1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-11-22 15:06:54 -05:00

fix(std/archive): untar (#6217)

- Fix `Untar` for tarballs with multiple files (Currently throwing `"checksum error"`)
- Fix parsing, now all `ustar` versions should be supported. Linux `tar -cvf deno.tar dir/` is not working on master. 
- Added `asyncIterator`
- Added README.md
This commit is contained in:
Marcos Casagrande 2020-06-10 23:10:47 +02:00 committed by GitHub
parent 408edbb065
commit fa6f10219e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 583 additions and 61 deletions

60
std/archive/README.md Normal file
View file

@ -0,0 +1,60 @@
# Usage
## Tar
```ts
import { Tar } from "https://deno.land/std/archive/tar.ts";
const tar = new Tar();
const content = new TextEncoder().encode("Deno.land");
await tar.append("deno.txt", {
reader: new Deno.Buffer(content),
contentSize: content.byteLength,
});
// Or specifying a filePath
await tar.append("land.txt", {
filePath: "./land.txt",
});
// use tar.getReader() to read the contents
const writer = await Deno.open("./out.tar", { write: true, create: true });
await Deno.copy(tar.getReader(), writer);
writer.close();
```
## Untar
```ts
import { Untar } from "https://deno.land/std/archive/tar.ts";
import { ensureFile } from "https://deno.land/std/fs/ensure_file.ts";
import { ensureDir } from "https://deno.land/std/fs/ensure_dir.ts";
const reader = await Deno.open("./out.tar", { read: true });
const untar = new Untar(reader);
for await (const entry of untar) {
console.log(entry); // metadata
/*
fileName: "archive/deno.txt",
fileMode: 33204,
mtime: 1591657305,
uid: 0,
gid: 0,
size: 24400,
type: 'file'
*/
if (entry.type === "directory") {
await ensureDir(entry.fileName);
continue;
}
await ensureFile(entry.fileName);
const file = await Deno.open(entry.fileName, { write: true });
// <entry> is a reader
await Deno.copy(entry, file);
}
reader.close();
```

View file

@ -27,16 +27,42 @@
* THE SOFTWARE.
*/
import { MultiReader } from "../io/readers.ts";
import { BufReader } from "../io/bufio.ts";
import { PartialReadError } from "../io/bufio.ts";
import { assert } from "../_util/assert.ts";
type Reader = Deno.Reader;
type Seeker = Deno.Seeker;
const recordSize = 512;
const ustar = "ustar\u000000";
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
// eight checksum bytes taken to be ascii spaces (decimal value 32)
const initialChecksum = 8 * 32;
async function readBlock(
reader: Deno.Reader,
p: Uint8Array
): Promise<number | null> {
let bytesRead = 0;
while (bytesRead < p.length) {
const rr = await reader.read(p.subarray(bytesRead));
if (rr === null) {
if (bytesRead === 0) {
return null;
} else {
throw new PartialReadError();
}
}
bytesRead += rr;
}
return bytesRead;
}
/**
* Simple file reader
*/
class FileReader implements Deno.Reader {
class FileReader implements Reader {
private file?: Deno.File;
constructor(private filePath: string) {}
@ -79,24 +105,34 @@ function pad(num: number, bytes: number, base?: number): string {
return "000000000000".substr(numString.length + 12 - bytes) + numString;
}
const types: { [key: string]: string } = {
"": "file",
"0": "file",
"1": "link",
"2": "symlink",
"3": "character-device",
"4": "block-device",
"5": "directory",
};
/*
struct posix_header { // byte offset
char name[100]; // 0
char mode[8]; // 100
char uid[8]; // 108
char gid[8]; // 116
char size[12]; // 124
char mtime[12]; // 136
char chksum[8]; // 148
char typeflag; // 156
char linkname[100]; // 157
char magic[6]; // 257
char version[2]; // 263
char uname[32]; // 265
char gname[32]; // 297
char devmajor[8]; // 329
char devminor[8]; // 337
char prefix[155]; // 345
char name[100]; // 0
char mode[8]; // 100
char uid[8]; // 108
char gid[8]; // 116
char size[12]; // 124
char mtime[12]; // 136
char chksum[8]; // 148
char typeflag; // 156
char linkname[100]; // 157
char magic[6]; // 257
char version[2]; // 263
char uname[32]; // 265
char gname[32]; // 297
char devmajor[8]; // 329
char devminor[8]; // 337
char prefix[155]; // 345
// 500
};
*/
@ -198,6 +234,10 @@ function parseHeader(buffer: Uint8Array): { [key: string]: Uint8Array } {
return data;
}
interface TarHeader {
[key: string]: Uint8Array;
}
export interface TarData {
fileName?: string;
fileNamePrefix?: string;
@ -221,7 +261,7 @@ export interface TarDataWithSource extends TarData {
/**
* buffer to read
*/
reader?: Deno.Reader;
reader?: Reader;
}
export interface TarInfo {
@ -231,6 +271,7 @@ export interface TarInfo {
gid?: number;
owner?: string;
group?: string;
type?: string;
}
export interface TarOptions extends TarInfo {
@ -242,7 +283,7 @@ export interface TarOptions extends TarInfo {
/**
* append any arbitrary content
*/
reader?: Deno.Reader;
reader?: Reader;
/**
* size of the content to be appended
@ -250,10 +291,14 @@ export interface TarOptions extends TarInfo {
contentSize?: number;
}
export interface UntarOptions extends TarInfo {
export interface TarMeta extends TarInfo {
fileName: string;
fileSize?: number;
}
// eslint-disable-next-line @typescript-eslint/no-empty-interface
interface TarEntry extends TarMeta {}
/**
* A class to create a tar archive
*/
@ -364,8 +409,8 @@ export class Tar {
/**
* Get a Reader instance for this tar data
*/
getReader(): Deno.Reader {
const readers: Deno.Reader[] = [];
getReader(): Reader {
const readers: Reader[] = [];
this.data.forEach((tarData): void => {
let { reader } = tarData;
const { filePath } = tarData;
@ -395,44 +440,132 @@ export class Tar {
}
}
class TarEntry implements Reader {
#header: TarHeader;
#reader: Reader | (Reader & Deno.Seeker);
#size: number;
#read = 0;
#consumed = false;
#entrySize: number;
constructor(
meta: TarMeta,
header: TarHeader,
reader: Reader | (Reader & Deno.Seeker)
) {
Object.assign(this, meta);
this.#header = header;
this.#reader = reader;
// File Size
this.#size = this.fileSize || 0;
// Entry Size
const blocks = Math.ceil(this.#size / recordSize);
this.#entrySize = blocks * recordSize;
}
get consumed(): boolean {
return this.#consumed;
}
async read(p: Uint8Array): Promise<number | null> {
// Bytes left for entry
const entryBytesLeft = this.#entrySize - this.#read;
const bufSize = Math.min(
// bufSize can't be greater than p.length nor bytes left in the entry
p.length,
entryBytesLeft
);
if (entryBytesLeft <= 0) return null;
const block = new Uint8Array(bufSize);
const n = await readBlock(this.#reader, block);
const bytesLeft = this.#size - this.#read;
this.#read += n || 0;
if (n === null || bytesLeft <= 0) {
if (null) this.#consumed = true;
return null;
}
// Remove zero filled
const offset = bytesLeft < n ? bytesLeft : n;
p.set(block.subarray(0, offset), 0);
return offset < 0 ? n - Math.abs(offset) : offset;
}
async discard(): Promise<void> {
// Discard current entry
if (this.#consumed) return;
this.#consumed = true;
if (typeof (this.#reader as Seeker).seek === "function") {
await (this.#reader as Seeker).seek(
this.#entrySize - this.#read,
Deno.SeekMode.Current
);
this.#read = this.#entrySize;
} else {
await Deno.readAll(this);
}
}
}
/**
* A class to create a tar archive
* A class to extract a tar archive
*/
export class Untar {
reader: BufReader;
reader: Reader;
block: Uint8Array;
#entry: TarEntry | undefined;
constructor(reader: Deno.Reader) {
this.reader = new BufReader(reader);
constructor(reader: Reader) {
this.reader = reader;
this.block = new Uint8Array(recordSize);
}
async extract(writer: Deno.Writer): Promise<UntarOptions> {
await this.reader.readFull(this.block);
#checksum = (header: Uint8Array): number => {
let sum = initialChecksum;
for (let i = 0; i < 512; i++) {
if (i >= 148 && i < 156) {
// Ignore checksum header
continue;
}
sum += header[i];
}
return sum;
};
#getHeader = async (): Promise<TarHeader | null> => {
await readBlock(this.reader, this.block);
const header = parseHeader(this.block);
// calculate the checksum
let checksum = 0;
const encoder = new TextEncoder(),
decoder = new TextDecoder("ascii");
Object.keys(header)
.filter((key): boolean => key !== "checksum")
.forEach(function (key): void {
checksum += header[key].reduce((p, c): number => p + c, 0);
});
checksum += encoder.encode(" ").reduce((p, c): number => p + c, 0);
const decoder = new TextDecoder();
const checksum = this.#checksum(this.block);
if (parseInt(decoder.decode(header.checksum), 8) !== checksum) {
if (checksum === initialChecksum) {
// EOF
return null;
}
throw new Error("checksum error");
}
const magic = decoder.decode(header.ustar);
if (magic !== ustar) {
if (magic.indexOf("ustar")) {
throw new Error(`unsupported archive format: ${magic}`);
}
return header;
};
#getMetadata = (header: TarHeader): TarMeta => {
const decoder = new TextDecoder();
// get meta data
const meta: UntarOptions = {
const meta: TarMeta = {
fileName: decoder.decode(trim(header.fileName)),
};
const fileNamePrefix = trim(header.fileNamePrefix);
@ -450,23 +583,45 @@ export class Untar {
meta[key] = parseInt(decoder.decode(arr), 8);
}
});
(["owner", "group"] as ["owner", "group"]).forEach((key): void => {
const arr = trim(header[key]);
if (arr.byteLength > 0) {
meta[key] = decoder.decode(arr);
(["owner", "group", "type"] as ["owner", "group", "type"]).forEach(
(key): void => {
const arr = trim(header[key]);
if (arr.byteLength > 0) {
meta[key] = decoder.decode(arr);
}
}
});
);
// read the file content
const len = parseInt(decoder.decode(header.fileSize), 8);
let rest = len;
while (rest > 0) {
await this.reader.readFull(this.block);
const arr = rest < recordSize ? this.block.subarray(0, rest) : this.block;
await Deno.copy(new Deno.Buffer(arr), writer);
rest -= recordSize;
}
meta.fileSize = parseInt(decoder.decode(header.fileSize), 8);
meta.type = types[meta.type as string] || meta.type;
return meta;
};
async extract(): Promise<TarEntry | null> {
if (this.#entry && !this.#entry.consumed) {
// If entry body was not read, discard the body
// so we can read the next entry.
await this.#entry.discard();
}
const header = await this.#getHeader();
if (header === null) return null;
const meta = this.#getMetadata(header);
this.#entry = new TarEntry(meta, header, this.reader);
return this.#entry;
}
async *[Symbol.asyncIterator](): AsyncIterableIterator<TarEntry> {
while (true) {
const entry = await this.extract();
if (entry === null) return;
yield entry;
}
}
}

View file

@ -8,13 +8,40 @@
* **to run this test**
* deno run --allow-read archive/tar_test.ts
*/
import { assertEquals } from "../testing/asserts.ts";
import { assertEquals, assert } from "../testing/asserts.ts";
import { resolve } from "../path/mod.ts";
import { Tar, Untar } from "./tar.ts";
const filePath = resolve("archive", "testdata", "example.txt");
interface TestEntry {
name: string;
content?: Uint8Array;
filePath?: string;
}
async function createTar(entries: TestEntry[]): Promise<Tar> {
const tar = new Tar();
// put data on memory
for (const file of entries) {
let options;
if (file.content) {
options = {
reader: new Deno.Buffer(file.content),
contentSize: file.content.byteLength,
};
} else {
options = { filePath: file.filePath };
}
await tar.append(file.name, options);
}
return tar;
}
Deno.test("createTarArchive", async function (): Promise<void> {
// initialize
const tar = new Tar();
@ -54,10 +81,11 @@ Deno.test("deflateTarArchive", async function (): Promise<void> {
// read data from a tar archive
const untar = new Untar(tar.getReader());
const buf = new Deno.Buffer();
const result = await untar.extract(buf);
const untarText = new TextDecoder("utf-8").decode(buf.bytes());
const result = await untar.extract();
assert(result !== null);
const untarText = new TextDecoder("utf-8").decode(await Deno.readAll(result));
assertEquals(await untar.extract(), null); // EOF
// tests
assertEquals(result.fileName, fileName);
assertEquals(untarText, text);
@ -80,11 +108,290 @@ Deno.test("appendFileWithLongNameToTarArchive", async function (): Promise<
// read data from a tar archive
const untar = new Untar(tar.getReader());
const buf = new Deno.Buffer();
const result = await untar.extract(buf);
const untarText = new TextDecoder("utf-8").decode(buf.bytes());
const result = await untar.extract();
assert(result !== null);
const untarText = new TextDecoder("utf-8").decode(await Deno.readAll(result));
// tests
assertEquals(result.fileName, fileName);
assertEquals(untarText, text);
});
Deno.test("untarAsyncIterator", async function (): Promise<void> {
const entries: TestEntry[] = [
{
name: "output.txt",
content: new TextEncoder().encode("hello tar world!"),
},
{
name: "dir/tar.ts",
filePath,
},
];
const tar = await createTar(entries);
// read data from a tar archive
const untar = new Untar(tar.getReader());
for await (const entry of untar) {
const expected = entries.shift();
assert(expected);
let content = expected.content;
if (expected.filePath) {
content = await Deno.readFile(expected.filePath);
}
assertEquals(content, await Deno.readAll(entry));
assertEquals(expected.name, entry.fileName);
}
assertEquals(entries.length, 0);
});
Deno.test("untarAsyncIteratorWithoutReadingBody", async function (): Promise<
void
> {
const entries: TestEntry[] = [
{
name: "output.txt",
content: new TextEncoder().encode("hello tar world!"),
},
{
name: "dir/tar.ts",
filePath,
},
];
const tar = await createTar(entries);
// read data from a tar archive
const untar = new Untar(tar.getReader());
for await (const entry of untar) {
const expected = entries.shift();
assert(expected);
assertEquals(expected.name, entry.fileName);
}
assertEquals(entries.length, 0);
});
Deno.test(
"untarAsyncIteratorWithoutReadingBodyFromFileReader",
async function (): Promise<void> {
const entries: TestEntry[] = [
{
name: "output.txt",
content: new TextEncoder().encode("hello tar world!"),
},
{
name: "dir/tar.ts",
filePath,
},
];
const outputFile = resolve("archive", "testdata", "test.tar");
const tar = await createTar(entries);
const file = await Deno.open(outputFile, { create: true, write: true });
await Deno.copy(tar.getReader(), file);
file.close();
const reader = await Deno.open(outputFile, { read: true });
// read data from a tar archive
const untar = new Untar(reader);
for await (const entry of untar) {
const expected = entries.shift();
assert(expected);
assertEquals(expected.name, entry.fileName);
}
reader.close();
await Deno.remove(outputFile);
assertEquals(entries.length, 0);
}
);
Deno.test("untarAsyncIteratorFromFileReader", async function (): Promise<void> {
const entries: TestEntry[] = [
{
name: "output.txt",
content: new TextEncoder().encode("hello tar world!"),
},
{
name: "dir/tar.ts",
filePath,
},
];
const outputFile = resolve("archive", "testdata", "test.tar");
const tar = await createTar(entries);
const file = await Deno.open(outputFile, { create: true, write: true });
await Deno.copy(tar.getReader(), file);
file.close();
const reader = await Deno.open(outputFile, { read: true });
// read data from a tar archive
const untar = new Untar(reader);
for await (const entry of untar) {
const expected = entries.shift();
assert(expected);
let content = expected.content;
if (expected.filePath) {
content = await Deno.readFile(expected.filePath);
}
assertEquals(content, await Deno.readAll(entry));
assertEquals(expected.name, entry.fileName);
}
reader.close();
await Deno.remove(outputFile);
assertEquals(entries.length, 0);
});
Deno.test(
"untarAsyncIteratorReadingLessThanRecordSize",
async function (): Promise<void> {
// record size is 512
const bufSizes = [1, 53, 256, 511];
for (const bufSize of bufSizes) {
const entries: TestEntry[] = [
{
name: "output.txt",
content: new TextEncoder().encode("hello tar world!".repeat(100)),
},
// Need to test at least two files, to make sure the first entry doesn't over-read
// Causing the next to fail with: chesum error
{
name: "deni.txt",
content: new TextEncoder().encode("deno!".repeat(250)),
},
];
const tar = await createTar(entries);
// read data from a tar archive
const untar = new Untar(tar.getReader());
for await (const entry of untar) {
const expected = entries.shift();
assert(expected);
assertEquals(expected.name, entry.fileName);
const writer = new Deno.Buffer();
while (true) {
const buf = new Uint8Array(bufSize);
const n = await entry.read(buf);
if (n === null) break;
await writer.write(buf.subarray(0, n));
}
assertEquals(writer.bytes(), expected!.content);
}
assertEquals(entries.length, 0);
}
}
);
Deno.test("untarLinuxGeneratedTar", async function (): Promise<void> {
const filePath = resolve("archive", "testdata", "deno.tar");
const file = await Deno.open(filePath, { read: true });
const expectedEntries = [
{
fileName: "archive/",
fileSize: 0,
fileMode: 509,
mtime: 1591800767,
uid: 1001,
gid: 1001,
owner: "deno",
group: "deno",
type: "directory",
},
{
fileName: "archive/deno/",
fileSize: 0,
fileMode: 509,
mtime: 1591799635,
uid: 1001,
gid: 1001,
owner: "deno",
group: "deno",
type: "directory",
},
{
fileName: "archive/deno/land/",
fileSize: 0,
fileMode: 509,
mtime: 1591799660,
uid: 1001,
gid: 1001,
owner: "deno",
group: "deno",
type: "directory",
},
{
fileName: "archive/deno/land/land.txt",
fileMode: 436,
fileSize: 5,
mtime: 1591799660,
uid: 1001,
gid: 1001,
owner: "deno",
group: "deno",
type: "file",
content: new TextEncoder().encode("land\n"),
},
{
fileName: "archive/file.txt",
fileMode: 436,
fileSize: 5,
mtime: 1591799626,
uid: 1001,
gid: 1001,
owner: "deno",
group: "deno",
type: "file",
content: new TextEncoder().encode("file\n"),
},
{
fileName: "archive/deno.txt",
fileMode: 436,
fileSize: 5,
mtime: 1591799642,
uid: 1001,
gid: 1001,
owner: "deno",
group: "deno",
type: "file",
content: new TextEncoder().encode("deno\n"),
},
];
const untar = new Untar(file);
for await (const entry of untar) {
const expected = expectedEntries.shift();
assert(expected);
const content = expected.content;
delete expected.content;
assertEquals(entry, expected);
if (content) {
assertEquals(content, await Deno.readAll(entry));
}
}
file.close();
});

BIN
std/archive/testdata/deno.tar vendored Normal file

Binary file not shown.