encoding: add csv parse (denoland/deno_std#458)

Original: 167f529898
2024-11-23 15:16:54 -05:00 · 2019-05-30 15:50:29 +02:00 · 2019-05-30 15:50:29 +02:00 · 2487c45547
commit 2487c45547
parent a0ce25e606
4 changed files with 349 additions and 24 deletions
--- a/README.md
+++ b/README.md
@ -24,6 +24,7 @@ Here are the dedicated documentations of modules:
 - [colors](colors/README.md)
 - [datetime](datetime/README.md)
 - [encoding](encoding/README.md)
 - [examples](examples/README.md)
 - [flags](flags/README.md)
 - [fs](fs/README.md)
@ -33,7 +34,6 @@ Here are the dedicated documentations of modules:
 - [prettier](prettier/README.md)
 - [strings](strings/README.md)
 - [testing](testing/README.md)
 - [toml](encoding/toml/README.md)
 - [ws](ws/README.md)
 ## Contributing
--- a/encoding/README.md
+++ b/encoding/README.md
@ -1,11 +1,112 @@
-# TOML
+# Encoding
 ## CSV
 - **`readAll(reader: BufReader, opt: ParseOptions = { comma: ",", trimLeadingSpace: false, lazyQuotes: false } ): Promise<[string[][], BufState]>`**:
  Read the whole buffer and output the structured CSV datas
 - **`parse(csvString: string, opt: ParseOption): Promise<unknown[]>`**:
  See [parse](###Parse)
 ### Parse
 Parse the CSV string with the options provided.
 #### Options
 ##### ParseOption
 - **`header: boolean | string[] | HeaderOption[];`**: If a boolean is provided,
  the first line will be used as Header definitions. If `string[]` or
  `HeaderOption[]`
  those names will be used for header definition.
 - **`parse?: (input: unknown) => unknown;`**: Parse function for the row, which
  will be executed after parsing of all columns. Therefore if you don't provide
  header and parse function with headers, input will be `string[]`.
 ##### HeaderOption
 - **`name: string;`**: Name of the header to be used as property.
 - **`parse?: (input: string) => unknown;`**: Parse function for the column.
  This is executed on each entry of the header. This can be combined with the
  Parse function of the rows.
 #### Usage
 ```ts
 // input:
 // a,b,c
 // e,f,g
 const r = await parseFile(filepath, {
  header: false
 });
 // output:
 // [["a", "b", "c"], ["e", "f", "g"]]
 const r = await parseFile(filepath, {
  header: true
 });
 // output:
 // [{ a: "e", b: "f", c: "g" }]
 const r = await parseFile(filepath, {
  header: ["this", "is", "sparta"]
 });
 // output:
 // [
 //   { this: "a", is: "b", sparta: "c" },
 //   { this: "e", is: "f", sparta: "g" }
 // ]
 const r = await parseFile(filepath, {
  header: [
    {
      name: "this",
      parse: (e: string): string => {
        return `b${e}$$`;
      }
    },
    {
      name: "is",
      parse: (e: string): number => {
        return e.length;
      }
    },
    {
      name: "sparta",
      parse: (e: string): unknown => {
        return { bim: `boom-${e}` };
      }
    }
  ]
 });
 // output:
 // [
 //    { this: "ba$$", is: 1, sparta: { bim: `boom-c` } },
 //    { this: "be$$", is: 1, sparta: { bim: `boom-g` } }
 // ]
 const r = await parseFile(filepath, {
  header: ["this", "is", "sparta"],
  parse: (e: Record<string, unknown>) => {
    return { super: e.this, street: e.is, fighter: e.sparta };
  }
 });
 // output:
 // [
 //   { super: "a", street: "b", fighter: "c" },
 //   { super: "e", street: "f", fighter: "g" }
 // ]
 ```
 ## TOML
 This module parse TOML files. It follows as much as possible the
 [TOML specs](https://github.com/toml-lang/toml). Be sure to read the supported
 types as not every specs is supported at the moment and the handling in
 TypeScript side is a bit different.
-## Supported types and handling
+### Supported types and handling
 - :heavy_check_mark: [Keys](https://github.com/toml-lang/toml#string)
 - :exclamation: [String](https://github.com/toml-lang/toml#string)
@ -27,39 +128,39 @@ TypeScript side is a bit different.
 :exclamation: _Supported with warnings see [Warning](#Warning)._
-### :warning: Warning
+#### :warning: Warning
-#### String
+##### String
 - Regex : Due to the spec, there is no flag to detect regex properly
  in a TOML declaration. So the regex is stored as string.
-#### Integer
+##### Integer
 For **Binary** / **Octal** / **Hexadecimal** numbers,
 they are stored as string to be not interpreted as Decimal.
-#### Local Time
+##### Local Time
 Because local time does not exist in JavaScript, the local time is stored as a string.
-#### Inline Table
+##### Inline Table
 Inline tables are supported. See below:
 ```toml
 animal = { type = { name = "pug" } }
-# Output
+## Output
 animal = { type.name = "pug" }
-# Output { animal : { type : { name : "pug" } }
+## Output { animal : { type : { name : "pug" } }
 animal.as.leaders = "tosin"
-# Output { animal: { as: { leaders: "tosin" } } }
+## Output { animal: { as: { leaders: "tosin" } } }
 "tosin.abasi" = "guitarist"
-# Output
+## Output
 "tosin.abasi" : "guitarist"
 ```
-#### Array of Tables
+##### Array of Tables
 At the moment only simple declarations like below are supported:
@ -89,9 +190,9 @@ will output:
 }
 ```
-## Usage
+### Usage
-### Parse
+#### Parse
 ```ts
 import { parse } from "./parser.ts";
@ -103,7 +204,7 @@ const tomlString = 'foo.bar = "Deno"';
 const tomlObject22 = parse(tomlString);
 ```
-### Stringify
+#### Stringify
 ```ts
 import { stringify } from "./parser.ts";
--- a/encoding/csv.ts
+++ b/encoding/csv.ts
@ -4,6 +4,7 @@
 import { BufReader, EOF } from "../io/bufio.ts";
 import { TextProtoReader } from "../textproto/mod.ts";
 import { StringReader } from "../io/readers.ts";
 const INVALID_RUNE = ["\r", "\n", '"'];
@ -17,28 +18,39 @@ export class ParseError extends Error {
  }
 }
 /**
 * @property comma - Character which separates values. Default: ','
 * @property comment - Character to start a comment. Default: '#'
 * @property trimLeadingSpace - Flag to trim the leading space of the value. Default: 'false'
 * @property lazyQuotes - Allow unquoted quote in a quoted field or non double
 *  quoted quotes in quoted field Default: 'false'
 * @property fieldsPerRecord - Enabling the check of fields for each row. If == 0
 * first row is used as referal for the number of fields.
 */
 export interface ParseOptions {
-  comma: string;
+  comma?: string;
  comment?: string;
-  trimLeadingSpace: boolean;
+  trimLeadingSpace?: boolean;
  lazyQuotes?: boolean;
  fieldsPerRecord?: number;
 }
 function chkOptions(opt: ParseOptions): void {
  if (!opt.comma) opt.comma = ",";
  if (!opt.trimLeadingSpace) opt.trimLeadingSpace = false;
  if (
-    INVALID_RUNE.includes(opt.comma) ||
+    INVALID_RUNE.includes(opt.comma!) ||
-    (opt.comment && INVALID_RUNE.includes(opt.comment)) ||
+    INVALID_RUNE.includes(opt.comment!) ||
    opt.comma === opt.comment
  ) {
    throw new Error("Invalid Delimiter");
  }
 }
-export async function read(
+async function read(
  Startline: number,
  reader: BufReader,
-  opt: ParseOptions = { comma: ",", comment: "#", trimLeadingSpace: false }
+  opt: ParseOptions = { comma: ",", trimLeadingSpace: false }
 ): Promise<string[] | EOF> {
  const tp = new TextProtoReader(reader);
  let line: string;
@ -68,7 +80,7 @@ export async function read(
    return [];
  }
-  result = line.split(opt.comma);
+  result = line.split(opt.comma!);
  let quoteError = false;
  result = result.map(
@ -138,3 +150,105 @@ export async function readAll(
  }
  return result;
 }
 /**
 * HeaderOption provides the column definition
 * and the parse function for each entry of the
 * column.
 */
 export interface HeaderOption {
  name: string;
  parse?: (input: string) => unknown;
 }
 export interface ExtendedParseOptions extends ParseOptions {
  header: boolean | string[] | HeaderOption[];
  parse?: (input: unknown) => unknown;
 }
 /**
 * Csv parse helper to manipulate data.
 * Provides an auto/custom mapper for columns and parse function
 * for columns and rows.
 * @param input Input to parse. Can be a string or BufReader.
 * @param opt options of the parser.
 * @param [opt.header=false] HeaderOptions
 * @param [opt.parse=null] Parse function for rows.
 * Example:
 *     const r = await parseFile('a,b,c\ne,f,g\n', {
 *      header: ["this", "is", "sparta"],
 *       parse: (e: Record<string, unknown>) => {
 *         return { super: e.this, street: e.is, fighter: e.sparta };
 *       }
 *     });
 * // output
 * [
 *   { super: "a", street: "b", fighter: "c" },
 *   { super: "e", street: "f", fighter: "g" }
 * ]
 */
 export async function parse(
  input: string | BufReader,
  opt: ExtendedParseOptions = {
    header: false
  }
 ): Promise<unknown[]> {
  let r: string[][];
  if (input instanceof BufReader) {
    r = await readAll(input, opt);
  } else {
    r = await readAll(new BufReader(new StringReader(input)), opt);
  }
  if (opt.header) {
    let headers: HeaderOption[] = [];
    let i = 0;
    if (Array.isArray(opt.header)) {
      if (typeof opt.header[0] !== "string") {
        headers = opt.header as HeaderOption[];
      } else {
        const h = opt.header as string[];
        headers = h.map(
          (e): HeaderOption => {
            return {
              name: e
            };
          }
        );
      }
    } else {
      headers = r.shift()!.map(
        (e): HeaderOption => {
          return {
            name: e
          };
        }
      );
      i++;
    }
    return r.map(
      (e): unknown => {
        if (e.length !== headers.length) {
          throw `Error number of fields line:${i}`;
        }
        i++;
        let out: Record<string, unknown> = {};
        for (let j = 0; j < e.length; j++) {
          const h = headers[j];
          if (h.parse) {
            out[h.name] = h.parse(e[j]);
          } else {
            out[h.name] = e[j];
          }
        }
        if (opt.parse) {
          return opt.parse(out);
        }
        return out;
      }
    );
  }
  if (opt.parse) {
    return r.map((e: string[]): unknown => opt.parse!(e));
  }
  return r;
 }
--- a/encoding/csv_test.ts
+++ b/encoding/csv_test.ts
@ -2,7 +2,7 @@
 // https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go
 import { test, runIfMain } from "../testing/mod.ts";
 import { assertEquals, assert } from "../testing/asserts.ts";
-import { readAll } from "./csv.ts";
+import { readAll, parse } from "./csv.ts";
 import { StringReader } from "../io/readers.ts";
 import { BufReader } from "../io/bufio.ts";
@ -468,4 +468,114 @@ for (const t of testCases) {
  });
 }
 const parseTestCases = [
  {
    name: "simple",
    in: "a,b,c",
    header: false,
    result: [["a", "b", "c"]]
  },
  {
    name: "simple Bufreader",
    in: new BufReader(new StringReader("a,b,c")),
    header: false,
    result: [["a", "b", "c"]]
  },
  {
    name: "multiline",
    in: "a,b,c\ne,f,g\n",
    header: false,
    result: [["a", "b", "c"], ["e", "f", "g"]]
  },
  {
    name: "header mapping boolean",
    in: "a,b,c\ne,f,g\n",
    header: true,
    result: [{ a: "e", b: "f", c: "g" }]
  },
  {
    name: "header mapping array",
    in: "a,b,c\ne,f,g\n",
    header: ["this", "is", "sparta"],
    result: [
      { this: "a", is: "b", sparta: "c" },
      { this: "e", is: "f", sparta: "g" }
    ]
  },
  {
    name: "header mapping object",
    in: "a,b,c\ne,f,g\n",
    header: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
    result: [
      { this: "a", is: "b", sparta: "c" },
      { this: "e", is: "f", sparta: "g" }
    ]
  },
  {
    name: "header mapping parse entry",
    in: "a,b,c\ne,f,g\n",
    header: [
      {
        name: "this",
        parse: (e: string): string => {
          return `b${e}$$`;
        }
      },
      {
        name: "is",
        parse: (e: string): number => {
          return e.length;
        }
      },
      {
        name: "sparta",
        parse: (e: string): unknown => {
          return { bim: `boom-${e}` };
        }
      }
    ],
    result: [
      { this: "ba$$", is: 1, sparta: { bim: `boom-c` } },
      { this: "be$$", is: 1, sparta: { bim: `boom-g` } }
    ]
  },
  {
    name: "multiline parse",
    in: "a,b,c\ne,f,g\n",
    parse: (e: string[]): unknown => {
      return { super: e[0], street: e[1], fighter: e[2] };
    },
    header: false,
    result: [
      { super: "a", street: "b", fighter: "c" },
      { super: "e", street: "f", fighter: "g" }
    ]
  },
  {
    name: "header mapping object parseline",
    in: "a,b,c\ne,f,g\n",
    header: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
    parse: (e: Record<string, unknown>): unknown => {
      return { super: e.this, street: e.is, fighter: e.sparta };
    },
    result: [
      { super: "a", street: "b", fighter: "c" },
      { super: "e", street: "f", fighter: "g" }
    ]
  }
 ];
 for (const testCase of parseTestCases) {
  test({
    name: `[CSV] Parse ${testCase.name}`,
    async fn(): Promise<void> {
      const r = await parse(testCase.in, {
        header: testCase.header,
        parse: testCase.parse as (input: unknown) => unknown
      });
      assertEquals(r, testCase.result);
    }
  });
 }
 runIfMain(import.meta);