From fa6f10219e096e0c2765628ae3d5bc733e489adb Mon Sep 17 00:00:00 2001 From: Marcos Casagrande Date: Wed, 10 Jun 2020 23:10:47 +0200 Subject: [PATCH] fix(std/archive): untar (#6217) - Fix `Untar` for tarballs with multiple files (Currently throwing `"checksum error"`) - Fix parsing, now all `ustar` versions should be supported. Linux `tar -cvf deno.tar dir/` is not working on master. - Added `asyncIterator` - Added README.md --- std/archive/README.md | 60 +++++++ std/archive/tar.ts | 263 ++++++++++++++++++++++------ std/archive/tar_test.ts | 321 +++++++++++++++++++++++++++++++++- std/archive/testdata/deno.tar | Bin 0 -> 10240 bytes 4 files changed, 583 insertions(+), 61 deletions(-) create mode 100644 std/archive/README.md create mode 100644 std/archive/testdata/deno.tar diff --git a/std/archive/README.md b/std/archive/README.md new file mode 100644 index 0000000000..efa258ca4c --- /dev/null +++ b/std/archive/README.md @@ -0,0 +1,60 @@ +# Usage + +## Tar + +```ts +import { Tar } from "https://deno.land/std/archive/tar.ts"; + +const tar = new Tar(); +const content = new TextEncoder().encode("Deno.land"); +await tar.append("deno.txt", { + reader: new Deno.Buffer(content), + contentSize: content.byteLength, +}); + +// Or specifying a filePath +await tar.append("land.txt", { + filePath: "./land.txt", +}); + +// use tar.getReader() to read the contents + +const writer = await Deno.open("./out.tar", { write: true, create: true }); +await Deno.copy(tar.getReader(), writer); +writer.close(); +``` + +## Untar + +```ts +import { Untar } from "https://deno.land/std/archive/tar.ts"; +import { ensureFile } from "https://deno.land/std/fs/ensure_file.ts"; +import { ensureDir } from "https://deno.land/std/fs/ensure_dir.ts"; + +const reader = await Deno.open("./out.tar", { read: true }); +const untar = new Untar(reader); + +for await (const entry of untar) { + console.log(entry); // metadata + /* + fileName: "archive/deno.txt", + fileMode: 33204, + mtime: 1591657305, + uid: 0, + gid: 0, + size: 24400, + type: 'file' + */ + + if (entry.type === "directory") { + await ensureDir(entry.fileName); + continue; + } + + await ensureFile(entry.fileName); + const file = await Deno.open(entry.fileName, { write: true }); + // is a reader + await Deno.copy(entry, file); +} +reader.close(); +``` diff --git a/std/archive/tar.ts b/std/archive/tar.ts index d549a4623b..8ec240764b 100644 --- a/std/archive/tar.ts +++ b/std/archive/tar.ts @@ -27,16 +27,42 @@ * THE SOFTWARE. */ import { MultiReader } from "../io/readers.ts"; -import { BufReader } from "../io/bufio.ts"; +import { PartialReadError } from "../io/bufio.ts"; import { assert } from "../_util/assert.ts"; +type Reader = Deno.Reader; +type Seeker = Deno.Seeker; + const recordSize = 512; const ustar = "ustar\u000000"; +// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 +// eight checksum bytes taken to be ascii spaces (decimal value 32) +const initialChecksum = 8 * 32; + +async function readBlock( + reader: Deno.Reader, + p: Uint8Array +): Promise { + let bytesRead = 0; + while (bytesRead < p.length) { + const rr = await reader.read(p.subarray(bytesRead)); + if (rr === null) { + if (bytesRead === 0) { + return null; + } else { + throw new PartialReadError(); + } + } + bytesRead += rr; + } + return bytesRead; +} + /** * Simple file reader */ -class FileReader implements Deno.Reader { +class FileReader implements Reader { private file?: Deno.File; constructor(private filePath: string) {} @@ -79,24 +105,34 @@ function pad(num: number, bytes: number, base?: number): string { return "000000000000".substr(numString.length + 12 - bytes) + numString; } +const types: { [key: string]: string } = { + "": "file", + "0": "file", + "1": "link", + "2": "symlink", + "3": "character-device", + "4": "block-device", + "5": "directory", +}; + /* struct posix_header { // byte offset - char name[100]; // 0 - char mode[8]; // 100 - char uid[8]; // 108 - char gid[8]; // 116 - char size[12]; // 124 - char mtime[12]; // 136 - char chksum[8]; // 148 - char typeflag; // 156 - char linkname[100]; // 157 - char magic[6]; // 257 - char version[2]; // 263 - char uname[32]; // 265 - char gname[32]; // 297 - char devmajor[8]; // 329 - char devminor[8]; // 337 - char prefix[155]; // 345 + char name[100]; // 0 + char mode[8]; // 100 + char uid[8]; // 108 + char gid[8]; // 116 + char size[12]; // 124 + char mtime[12]; // 136 + char chksum[8]; // 148 + char typeflag; // 156 + char linkname[100]; // 157 + char magic[6]; // 257 + char version[2]; // 263 + char uname[32]; // 265 + char gname[32]; // 297 + char devmajor[8]; // 329 + char devminor[8]; // 337 + char prefix[155]; // 345 // 500 }; */ @@ -198,6 +234,10 @@ function parseHeader(buffer: Uint8Array): { [key: string]: Uint8Array } { return data; } +interface TarHeader { + [key: string]: Uint8Array; +} + export interface TarData { fileName?: string; fileNamePrefix?: string; @@ -221,7 +261,7 @@ export interface TarDataWithSource extends TarData { /** * buffer to read */ - reader?: Deno.Reader; + reader?: Reader; } export interface TarInfo { @@ -231,6 +271,7 @@ export interface TarInfo { gid?: number; owner?: string; group?: string; + type?: string; } export interface TarOptions extends TarInfo { @@ -242,7 +283,7 @@ export interface TarOptions extends TarInfo { /** * append any arbitrary content */ - reader?: Deno.Reader; + reader?: Reader; /** * size of the content to be appended @@ -250,10 +291,14 @@ export interface TarOptions extends TarInfo { contentSize?: number; } -export interface UntarOptions extends TarInfo { +export interface TarMeta extends TarInfo { fileName: string; + fileSize?: number; } +// eslint-disable-next-line @typescript-eslint/no-empty-interface +interface TarEntry extends TarMeta {} + /** * A class to create a tar archive */ @@ -364,8 +409,8 @@ export class Tar { /** * Get a Reader instance for this tar data */ - getReader(): Deno.Reader { - const readers: Deno.Reader[] = []; + getReader(): Reader { + const readers: Reader[] = []; this.data.forEach((tarData): void => { let { reader } = tarData; const { filePath } = tarData; @@ -395,44 +440,132 @@ export class Tar { } } +class TarEntry implements Reader { + #header: TarHeader; + #reader: Reader | (Reader & Deno.Seeker); + #size: number; + #read = 0; + #consumed = false; + #entrySize: number; + constructor( + meta: TarMeta, + header: TarHeader, + reader: Reader | (Reader & Deno.Seeker) + ) { + Object.assign(this, meta); + this.#header = header; + this.#reader = reader; + + // File Size + this.#size = this.fileSize || 0; + // Entry Size + const blocks = Math.ceil(this.#size / recordSize); + this.#entrySize = blocks * recordSize; + } + + get consumed(): boolean { + return this.#consumed; + } + + async read(p: Uint8Array): Promise { + // Bytes left for entry + const entryBytesLeft = this.#entrySize - this.#read; + const bufSize = Math.min( + // bufSize can't be greater than p.length nor bytes left in the entry + p.length, + entryBytesLeft + ); + + if (entryBytesLeft <= 0) return null; + + const block = new Uint8Array(bufSize); + const n = await readBlock(this.#reader, block); + const bytesLeft = this.#size - this.#read; + + this.#read += n || 0; + if (n === null || bytesLeft <= 0) { + if (null) this.#consumed = true; + return null; + } + + // Remove zero filled + const offset = bytesLeft < n ? bytesLeft : n; + p.set(block.subarray(0, offset), 0); + + return offset < 0 ? n - Math.abs(offset) : offset; + } + + async discard(): Promise { + // Discard current entry + if (this.#consumed) return; + this.#consumed = true; + + if (typeof (this.#reader as Seeker).seek === "function") { + await (this.#reader as Seeker).seek( + this.#entrySize - this.#read, + Deno.SeekMode.Current + ); + this.#read = this.#entrySize; + } else { + await Deno.readAll(this); + } + } +} + /** - * A class to create a tar archive + * A class to extract a tar archive */ export class Untar { - reader: BufReader; + reader: Reader; block: Uint8Array; + #entry: TarEntry | undefined; - constructor(reader: Deno.Reader) { - this.reader = new BufReader(reader); + constructor(reader: Reader) { + this.reader = reader; this.block = new Uint8Array(recordSize); } - async extract(writer: Deno.Writer): Promise { - await this.reader.readFull(this.block); + #checksum = (header: Uint8Array): number => { + let sum = initialChecksum; + for (let i = 0; i < 512; i++) { + if (i >= 148 && i < 156) { + // Ignore checksum header + continue; + } + sum += header[i]; + } + return sum; + }; + + #getHeader = async (): Promise => { + await readBlock(this.reader, this.block); const header = parseHeader(this.block); // calculate the checksum - let checksum = 0; - const encoder = new TextEncoder(), - decoder = new TextDecoder("ascii"); - Object.keys(header) - .filter((key): boolean => key !== "checksum") - .forEach(function (key): void { - checksum += header[key].reduce((p, c): number => p + c, 0); - }); - checksum += encoder.encode(" ").reduce((p, c): number => p + c, 0); + const decoder = new TextDecoder(); + const checksum = this.#checksum(this.block); if (parseInt(decoder.decode(header.checksum), 8) !== checksum) { + if (checksum === initialChecksum) { + // EOF + return null; + } throw new Error("checksum error"); } const magic = decoder.decode(header.ustar); - if (magic !== ustar) { + + if (magic.indexOf("ustar")) { throw new Error(`unsupported archive format: ${magic}`); } + return header; + }; + + #getMetadata = (header: TarHeader): TarMeta => { + const decoder = new TextDecoder(); // get meta data - const meta: UntarOptions = { + const meta: TarMeta = { fileName: decoder.decode(trim(header.fileName)), }; const fileNamePrefix = trim(header.fileNamePrefix); @@ -450,23 +583,45 @@ export class Untar { meta[key] = parseInt(decoder.decode(arr), 8); } }); - (["owner", "group"] as ["owner", "group"]).forEach((key): void => { - const arr = trim(header[key]); - if (arr.byteLength > 0) { - meta[key] = decoder.decode(arr); + (["owner", "group", "type"] as ["owner", "group", "type"]).forEach( + (key): void => { + const arr = trim(header[key]); + if (arr.byteLength > 0) { + meta[key] = decoder.decode(arr); + } } - }); + ); - // read the file content - const len = parseInt(decoder.decode(header.fileSize), 8); - let rest = len; - while (rest > 0) { - await this.reader.readFull(this.block); - const arr = rest < recordSize ? this.block.subarray(0, rest) : this.block; - await Deno.copy(new Deno.Buffer(arr), writer); - rest -= recordSize; - } + meta.fileSize = parseInt(decoder.decode(header.fileSize), 8); + meta.type = types[meta.type as string] || meta.type; return meta; + }; + + async extract(): Promise { + if (this.#entry && !this.#entry.consumed) { + // If entry body was not read, discard the body + // so we can read the next entry. + await this.#entry.discard(); + } + + const header = await this.#getHeader(); + if (header === null) return null; + + const meta = this.#getMetadata(header); + + this.#entry = new TarEntry(meta, header, this.reader); + + return this.#entry; + } + + async *[Symbol.asyncIterator](): AsyncIterableIterator { + while (true) { + const entry = await this.extract(); + + if (entry === null) return; + + yield entry; + } } } diff --git a/std/archive/tar_test.ts b/std/archive/tar_test.ts index 0df9956f2a..69f1ec3f3f 100644 --- a/std/archive/tar_test.ts +++ b/std/archive/tar_test.ts @@ -8,13 +8,40 @@ * **to run this test** * deno run --allow-read archive/tar_test.ts */ -import { assertEquals } from "../testing/asserts.ts"; +import { assertEquals, assert } from "../testing/asserts.ts"; import { resolve } from "../path/mod.ts"; import { Tar, Untar } from "./tar.ts"; const filePath = resolve("archive", "testdata", "example.txt"); +interface TestEntry { + name: string; + content?: Uint8Array; + filePath?: string; +} + +async function createTar(entries: TestEntry[]): Promise { + const tar = new Tar(); + // put data on memory + for (const file of entries) { + let options; + + if (file.content) { + options = { + reader: new Deno.Buffer(file.content), + contentSize: file.content.byteLength, + }; + } else { + options = { filePath: file.filePath }; + } + + await tar.append(file.name, options); + } + + return tar; +} + Deno.test("createTarArchive", async function (): Promise { // initialize const tar = new Tar(); @@ -54,10 +81,11 @@ Deno.test("deflateTarArchive", async function (): Promise { // read data from a tar archive const untar = new Untar(tar.getReader()); - const buf = new Deno.Buffer(); - const result = await untar.extract(buf); - const untarText = new TextDecoder("utf-8").decode(buf.bytes()); + const result = await untar.extract(); + assert(result !== null); + const untarText = new TextDecoder("utf-8").decode(await Deno.readAll(result)); + assertEquals(await untar.extract(), null); // EOF // tests assertEquals(result.fileName, fileName); assertEquals(untarText, text); @@ -80,11 +108,290 @@ Deno.test("appendFileWithLongNameToTarArchive", async function (): Promise< // read data from a tar archive const untar = new Untar(tar.getReader()); - const buf = new Deno.Buffer(); - const result = await untar.extract(buf); - const untarText = new TextDecoder("utf-8").decode(buf.bytes()); + const result = await untar.extract(); + assert(result !== null); + const untarText = new TextDecoder("utf-8").decode(await Deno.readAll(result)); // tests assertEquals(result.fileName, fileName); assertEquals(untarText, text); }); + +Deno.test("untarAsyncIterator", async function (): Promise { + const entries: TestEntry[] = [ + { + name: "output.txt", + content: new TextEncoder().encode("hello tar world!"), + }, + { + name: "dir/tar.ts", + filePath, + }, + ]; + + const tar = await createTar(entries); + + // read data from a tar archive + const untar = new Untar(tar.getReader()); + + for await (const entry of untar) { + const expected = entries.shift(); + assert(expected); + + let content = expected.content; + if (expected.filePath) { + content = await Deno.readFile(expected.filePath); + } + + assertEquals(content, await Deno.readAll(entry)); + assertEquals(expected.name, entry.fileName); + } + + assertEquals(entries.length, 0); +}); + +Deno.test("untarAsyncIteratorWithoutReadingBody", async function (): Promise< + void +> { + const entries: TestEntry[] = [ + { + name: "output.txt", + content: new TextEncoder().encode("hello tar world!"), + }, + { + name: "dir/tar.ts", + filePath, + }, + ]; + + const tar = await createTar(entries); + + // read data from a tar archive + const untar = new Untar(tar.getReader()); + + for await (const entry of untar) { + const expected = entries.shift(); + assert(expected); + assertEquals(expected.name, entry.fileName); + } + + assertEquals(entries.length, 0); +}); + +Deno.test( + "untarAsyncIteratorWithoutReadingBodyFromFileReader", + async function (): Promise { + const entries: TestEntry[] = [ + { + name: "output.txt", + content: new TextEncoder().encode("hello tar world!"), + }, + { + name: "dir/tar.ts", + filePath, + }, + ]; + + const outputFile = resolve("archive", "testdata", "test.tar"); + + const tar = await createTar(entries); + const file = await Deno.open(outputFile, { create: true, write: true }); + await Deno.copy(tar.getReader(), file); + file.close(); + + const reader = await Deno.open(outputFile, { read: true }); + // read data from a tar archive + const untar = new Untar(reader); + + for await (const entry of untar) { + const expected = entries.shift(); + assert(expected); + assertEquals(expected.name, entry.fileName); + } + + reader.close(); + await Deno.remove(outputFile); + assertEquals(entries.length, 0); + } +); + +Deno.test("untarAsyncIteratorFromFileReader", async function (): Promise { + const entries: TestEntry[] = [ + { + name: "output.txt", + content: new TextEncoder().encode("hello tar world!"), + }, + { + name: "dir/tar.ts", + filePath, + }, + ]; + + const outputFile = resolve("archive", "testdata", "test.tar"); + + const tar = await createTar(entries); + const file = await Deno.open(outputFile, { create: true, write: true }); + await Deno.copy(tar.getReader(), file); + file.close(); + + const reader = await Deno.open(outputFile, { read: true }); + // read data from a tar archive + const untar = new Untar(reader); + + for await (const entry of untar) { + const expected = entries.shift(); + assert(expected); + + let content = expected.content; + if (expected.filePath) { + content = await Deno.readFile(expected.filePath); + } + + assertEquals(content, await Deno.readAll(entry)); + assertEquals(expected.name, entry.fileName); + } + + reader.close(); + await Deno.remove(outputFile); + assertEquals(entries.length, 0); +}); + +Deno.test( + "untarAsyncIteratorReadingLessThanRecordSize", + async function (): Promise { + // record size is 512 + const bufSizes = [1, 53, 256, 511]; + + for (const bufSize of bufSizes) { + const entries: TestEntry[] = [ + { + name: "output.txt", + content: new TextEncoder().encode("hello tar world!".repeat(100)), + }, + // Need to test at least two files, to make sure the first entry doesn't over-read + // Causing the next to fail with: chesum error + { + name: "deni.txt", + content: new TextEncoder().encode("deno!".repeat(250)), + }, + ]; + + const tar = await createTar(entries); + + // read data from a tar archive + const untar = new Untar(tar.getReader()); + + for await (const entry of untar) { + const expected = entries.shift(); + assert(expected); + assertEquals(expected.name, entry.fileName); + + const writer = new Deno.Buffer(); + while (true) { + const buf = new Uint8Array(bufSize); + const n = await entry.read(buf); + if (n === null) break; + + await writer.write(buf.subarray(0, n)); + } + assertEquals(writer.bytes(), expected!.content); + } + + assertEquals(entries.length, 0); + } + } +); + +Deno.test("untarLinuxGeneratedTar", async function (): Promise { + const filePath = resolve("archive", "testdata", "deno.tar"); + const file = await Deno.open(filePath, { read: true }); + + const expectedEntries = [ + { + fileName: "archive/", + fileSize: 0, + fileMode: 509, + mtime: 1591800767, + uid: 1001, + gid: 1001, + owner: "deno", + group: "deno", + type: "directory", + }, + { + fileName: "archive/deno/", + fileSize: 0, + fileMode: 509, + mtime: 1591799635, + uid: 1001, + gid: 1001, + owner: "deno", + group: "deno", + type: "directory", + }, + { + fileName: "archive/deno/land/", + fileSize: 0, + fileMode: 509, + mtime: 1591799660, + uid: 1001, + gid: 1001, + owner: "deno", + group: "deno", + type: "directory", + }, + { + fileName: "archive/deno/land/land.txt", + fileMode: 436, + fileSize: 5, + mtime: 1591799660, + uid: 1001, + gid: 1001, + owner: "deno", + group: "deno", + type: "file", + content: new TextEncoder().encode("land\n"), + }, + { + fileName: "archive/file.txt", + fileMode: 436, + fileSize: 5, + mtime: 1591799626, + uid: 1001, + gid: 1001, + owner: "deno", + group: "deno", + type: "file", + content: new TextEncoder().encode("file\n"), + }, + { + fileName: "archive/deno.txt", + fileMode: 436, + fileSize: 5, + mtime: 1591799642, + uid: 1001, + gid: 1001, + owner: "deno", + group: "deno", + type: "file", + content: new TextEncoder().encode("deno\n"), + }, + ]; + + const untar = new Untar(file); + + for await (const entry of untar) { + const expected = expectedEntries.shift(); + assert(expected); + const content = expected.content; + delete expected.content; + + assertEquals(entry, expected); + + if (content) { + assertEquals(content, await Deno.readAll(entry)); + } + } + + file.close(); +}); diff --git a/std/archive/testdata/deno.tar b/std/archive/testdata/deno.tar new file mode 100644 index 0000000000000000000000000000000000000000..300ce003b5bcc01dfbd5615d5fbcb7fe0c0f654c GIT binary patch literal 10240 zcmeH}(GG$j6o$F(DR_bc6wdRovgOulGiyD5oJy^gt2SX)KQ}O$px<|X)K~LWc_?Ng zVpE|JSrIx-R^RqK#ZBnUQJB?^NGPqfbwb*R&(tox-_^d7QY?ziw!Qdd@M`|aE#Gj- zU;Xpz#VP_8{GNYD)?~rIQ3U@<%ID7i+HV#Sir^#vHV^(YWfbzCq5vNKa}iAIV?EsN z?BBWIwfFqD*YiK*|Li}{$q1>2dxZX#{`~xB+mn7enE7!6P5zhVx)?U~9k`G3uML0y zok9LPfDuRR9r=$4c;rhr`RD)tQRF|n