From 2487c455472d00880dfca82d0178bf299bdfec48 Mon Sep 17 00:00:00 2001 From: Vincent LE GOFF Date: Thu, 30 May 2019 15:50:29 +0200 Subject: [PATCH] encoding: add csv parse (denoland/deno_std#458) Original: https://github.com/denoland/deno_std/commit/167f5298983000e9aa9da560e566df6237f03f67 --- README.md | 2 +- encoding/README.md | 131 ++++++++++++++++++++++++++++++++++++++----- encoding/csv.ts | 128 +++++++++++++++++++++++++++++++++++++++--- encoding/csv_test.ts | 112 +++++++++++++++++++++++++++++++++++- 4 files changed, 349 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index a1e7bc3955..ac9969627a 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Here are the dedicated documentations of modules: - [colors](colors/README.md) - [datetime](datetime/README.md) +- [encoding](encoding/README.md) - [examples](examples/README.md) - [flags](flags/README.md) - [fs](fs/README.md) @@ -33,7 +34,6 @@ Here are the dedicated documentations of modules: - [prettier](prettier/README.md) - [strings](strings/README.md) - [testing](testing/README.md) -- [toml](encoding/toml/README.md) - [ws](ws/README.md) ## Contributing diff --git a/encoding/README.md b/encoding/README.md index e30d972f3f..f03e80ba25 100644 --- a/encoding/README.md +++ b/encoding/README.md @@ -1,11 +1,112 @@ -# TOML +# Encoding + +## CSV + +- **`readAll(reader: BufReader, opt: ParseOptions = { comma: ",", trimLeadingSpace: false, lazyQuotes: false } ): Promise<[string[][], BufState]>`**: + Read the whole buffer and output the structured CSV datas +- **`parse(csvString: string, opt: ParseOption): Promise`**: + See [parse](###Parse) + +### Parse + +Parse the CSV string with the options provided. + +#### Options + +##### ParseOption + +- **`header: boolean | string[] | HeaderOption[];`**: If a boolean is provided, + the first line will be used as Header definitions. If `string[]` or + `HeaderOption[]` + those names will be used for header definition. +- **`parse?: (input: unknown) => unknown;`**: Parse function for the row, which + will be executed after parsing of all columns. Therefore if you don't provide + header and parse function with headers, input will be `string[]`. + +##### HeaderOption + +- **`name: string;`**: Name of the header to be used as property. +- **`parse?: (input: string) => unknown;`**: Parse function for the column. + This is executed on each entry of the header. This can be combined with the + Parse function of the rows. + +#### Usage + +```ts +// input: +// a,b,c +// e,f,g + +const r = await parseFile(filepath, { + header: false +}); +// output: +// [["a", "b", "c"], ["e", "f", "g"]] + +const r = await parseFile(filepath, { + header: true +}); +// output: +// [{ a: "e", b: "f", c: "g" }] + +const r = await parseFile(filepath, { + header: ["this", "is", "sparta"] +}); +// output: +// [ +// { this: "a", is: "b", sparta: "c" }, +// { this: "e", is: "f", sparta: "g" } +// ] + +const r = await parseFile(filepath, { + header: [ + { + name: "this", + parse: (e: string): string => { + return `b${e}$$`; + } + }, + { + name: "is", + parse: (e: string): number => { + return e.length; + } + }, + { + name: "sparta", + parse: (e: string): unknown => { + return { bim: `boom-${e}` }; + } + } + ] +}); +// output: +// [ +// { this: "ba$$", is: 1, sparta: { bim: `boom-c` } }, +// { this: "be$$", is: 1, sparta: { bim: `boom-g` } } +// ] + +const r = await parseFile(filepath, { + header: ["this", "is", "sparta"], + parse: (e: Record) => { + return { super: e.this, street: e.is, fighter: e.sparta }; + } +}); +// output: +// [ +// { super: "a", street: "b", fighter: "c" }, +// { super: "e", street: "f", fighter: "g" } +// ] +``` + +## TOML This module parse TOML files. It follows as much as possible the [TOML specs](https://github.com/toml-lang/toml). Be sure to read the supported types as not every specs is supported at the moment and the handling in TypeScript side is a bit different. -## Supported types and handling +### Supported types and handling - :heavy_check_mark: [Keys](https://github.com/toml-lang/toml#string) - :exclamation: [String](https://github.com/toml-lang/toml#string) @@ -27,39 +128,39 @@ TypeScript side is a bit different. :exclamation: _Supported with warnings see [Warning](#Warning)._ -### :warning: Warning +#### :warning: Warning -#### String +##### String - Regex : Due to the spec, there is no flag to detect regex properly in a TOML declaration. So the regex is stored as string. -#### Integer +##### Integer For **Binary** / **Octal** / **Hexadecimal** numbers, they are stored as string to be not interpreted as Decimal. -#### Local Time +##### Local Time Because local time does not exist in JavaScript, the local time is stored as a string. -#### Inline Table +##### Inline Table Inline tables are supported. See below: ```toml animal = { type = { name = "pug" } } -# Output +## Output animal = { type.name = "pug" } -# Output { animal : { type : { name : "pug" } } +## Output { animal : { type : { name : "pug" } } animal.as.leaders = "tosin" -# Output { animal: { as: { leaders: "tosin" } } } +## Output { animal: { as: { leaders: "tosin" } } } "tosin.abasi" = "guitarist" -# Output +## Output "tosin.abasi" : "guitarist" ``` -#### Array of Tables +##### Array of Tables At the moment only simple declarations like below are supported: @@ -89,9 +190,9 @@ will output: } ``` -## Usage +### Usage -### Parse +#### Parse ```ts import { parse } from "./parser.ts"; @@ -103,7 +204,7 @@ const tomlString = 'foo.bar = "Deno"'; const tomlObject22 = parse(tomlString); ``` -### Stringify +#### Stringify ```ts import { stringify } from "./parser.ts"; diff --git a/encoding/csv.ts b/encoding/csv.ts index aa2ceb1cf0..1c4ae546b0 100644 --- a/encoding/csv.ts +++ b/encoding/csv.ts @@ -4,6 +4,7 @@ import { BufReader, EOF } from "../io/bufio.ts"; import { TextProtoReader } from "../textproto/mod.ts"; +import { StringReader } from "../io/readers.ts"; const INVALID_RUNE = ["\r", "\n", '"']; @@ -17,28 +18,39 @@ export class ParseError extends Error { } } +/** + * @property comma - Character which separates values. Default: ',' + * @property comment - Character to start a comment. Default: '#' + * @property trimLeadingSpace - Flag to trim the leading space of the value. Default: 'false' + * @property lazyQuotes - Allow unquoted quote in a quoted field or non double + * quoted quotes in quoted field Default: 'false' + * @property fieldsPerRecord - Enabling the check of fields for each row. If == 0 + * first row is used as referal for the number of fields. + */ export interface ParseOptions { - comma: string; + comma?: string; comment?: string; - trimLeadingSpace: boolean; + trimLeadingSpace?: boolean; lazyQuotes?: boolean; fieldsPerRecord?: number; } function chkOptions(opt: ParseOptions): void { + if (!opt.comma) opt.comma = ","; + if (!opt.trimLeadingSpace) opt.trimLeadingSpace = false; if ( - INVALID_RUNE.includes(opt.comma) || - (opt.comment && INVALID_RUNE.includes(opt.comment)) || + INVALID_RUNE.includes(opt.comma!) || + INVALID_RUNE.includes(opt.comment!) || opt.comma === opt.comment ) { throw new Error("Invalid Delimiter"); } } -export async function read( +async function read( Startline: number, reader: BufReader, - opt: ParseOptions = { comma: ",", comment: "#", trimLeadingSpace: false } + opt: ParseOptions = { comma: ",", trimLeadingSpace: false } ): Promise { const tp = new TextProtoReader(reader); let line: string; @@ -68,7 +80,7 @@ export async function read( return []; } - result = line.split(opt.comma); + result = line.split(opt.comma!); let quoteError = false; result = result.map( @@ -138,3 +150,105 @@ export async function readAll( } return result; } + +/** + * HeaderOption provides the column definition + * and the parse function for each entry of the + * column. + */ +export interface HeaderOption { + name: string; + parse?: (input: string) => unknown; +} + +export interface ExtendedParseOptions extends ParseOptions { + header: boolean | string[] | HeaderOption[]; + parse?: (input: unknown) => unknown; +} + +/** + * Csv parse helper to manipulate data. + * Provides an auto/custom mapper for columns and parse function + * for columns and rows. + * @param input Input to parse. Can be a string or BufReader. + * @param opt options of the parser. + * @param [opt.header=false] HeaderOptions + * @param [opt.parse=null] Parse function for rows. + * Example: + * const r = await parseFile('a,b,c\ne,f,g\n', { + * header: ["this", "is", "sparta"], + * parse: (e: Record) => { + * return { super: e.this, street: e.is, fighter: e.sparta }; + * } + * }); + * // output + * [ + * { super: "a", street: "b", fighter: "c" }, + * { super: "e", street: "f", fighter: "g" } + * ] + */ +export async function parse( + input: string | BufReader, + opt: ExtendedParseOptions = { + header: false + } +): Promise { + let r: string[][]; + if (input instanceof BufReader) { + r = await readAll(input, opt); + } else { + r = await readAll(new BufReader(new StringReader(input)), opt); + } + if (opt.header) { + let headers: HeaderOption[] = []; + let i = 0; + if (Array.isArray(opt.header)) { + if (typeof opt.header[0] !== "string") { + headers = opt.header as HeaderOption[]; + } else { + const h = opt.header as string[]; + headers = h.map( + (e): HeaderOption => { + return { + name: e + }; + } + ); + } + } else { + headers = r.shift()!.map( + (e): HeaderOption => { + return { + name: e + }; + } + ); + i++; + } + return r.map( + (e): unknown => { + if (e.length !== headers.length) { + throw `Error number of fields line:${i}`; + } + i++; + let out: Record = {}; + for (let j = 0; j < e.length; j++) { + const h = headers[j]; + if (h.parse) { + out[h.name] = h.parse(e[j]); + } else { + out[h.name] = e[j]; + } + } + if (opt.parse) { + return opt.parse(out); + } + return out; + } + ); + } + if (opt.parse) { + return r.map((e: string[]): unknown => opt.parse!(e)); + } + return r; +} diff --git a/encoding/csv_test.ts b/encoding/csv_test.ts index 0cf95b473a..a68b81dc8a 100644 --- a/encoding/csv_test.ts +++ b/encoding/csv_test.ts @@ -2,7 +2,7 @@ // https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go import { test, runIfMain } from "../testing/mod.ts"; import { assertEquals, assert } from "../testing/asserts.ts"; -import { readAll } from "./csv.ts"; +import { readAll, parse } from "./csv.ts"; import { StringReader } from "../io/readers.ts"; import { BufReader } from "../io/bufio.ts"; @@ -468,4 +468,114 @@ for (const t of testCases) { }); } +const parseTestCases = [ + { + name: "simple", + in: "a,b,c", + header: false, + result: [["a", "b", "c"]] + }, + { + name: "simple Bufreader", + in: new BufReader(new StringReader("a,b,c")), + header: false, + result: [["a", "b", "c"]] + }, + { + name: "multiline", + in: "a,b,c\ne,f,g\n", + header: false, + result: [["a", "b", "c"], ["e", "f", "g"]] + }, + { + name: "header mapping boolean", + in: "a,b,c\ne,f,g\n", + header: true, + result: [{ a: "e", b: "f", c: "g" }] + }, + { + name: "header mapping array", + in: "a,b,c\ne,f,g\n", + header: ["this", "is", "sparta"], + result: [ + { this: "a", is: "b", sparta: "c" }, + { this: "e", is: "f", sparta: "g" } + ] + }, + { + name: "header mapping object", + in: "a,b,c\ne,f,g\n", + header: [{ name: "this" }, { name: "is" }, { name: "sparta" }], + result: [ + { this: "a", is: "b", sparta: "c" }, + { this: "e", is: "f", sparta: "g" } + ] + }, + { + name: "header mapping parse entry", + in: "a,b,c\ne,f,g\n", + header: [ + { + name: "this", + parse: (e: string): string => { + return `b${e}$$`; + } + }, + { + name: "is", + parse: (e: string): number => { + return e.length; + } + }, + { + name: "sparta", + parse: (e: string): unknown => { + return { bim: `boom-${e}` }; + } + } + ], + result: [ + { this: "ba$$", is: 1, sparta: { bim: `boom-c` } }, + { this: "be$$", is: 1, sparta: { bim: `boom-g` } } + ] + }, + { + name: "multiline parse", + in: "a,b,c\ne,f,g\n", + parse: (e: string[]): unknown => { + return { super: e[0], street: e[1], fighter: e[2] }; + }, + header: false, + result: [ + { super: "a", street: "b", fighter: "c" }, + { super: "e", street: "f", fighter: "g" } + ] + }, + { + name: "header mapping object parseline", + in: "a,b,c\ne,f,g\n", + header: [{ name: "this" }, { name: "is" }, { name: "sparta" }], + parse: (e: Record): unknown => { + return { super: e.this, street: e.is, fighter: e.sparta }; + }, + result: [ + { super: "a", street: "b", fighter: "c" }, + { super: "e", street: "f", fighter: "g" } + ] + } +]; + +for (const testCase of parseTestCases) { + test({ + name: `[CSV] Parse ${testCase.name}`, + async fn(): Promise { + const r = await parse(testCase.in, { + header: testCase.header, + parse: testCase.parse as (input: unknown) => unknown + }); + assertEquals(r, testCase.result); + } + }); +} + runIfMain(import.meta);