1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-11-22 15:06:54 -05:00

encoding: add csv parse (denoland/deno_std#458)

Original: 167f529898
This commit is contained in:
Vincent LE GOFF 2019-05-30 15:50:29 +02:00 committed by Ryan Dahl
parent a0ce25e606
commit 2487c45547
4 changed files with 349 additions and 24 deletions

View file

@ -24,6 +24,7 @@ Here are the dedicated documentations of modules:
- [colors](colors/README.md)
- [datetime](datetime/README.md)
- [encoding](encoding/README.md)
- [examples](examples/README.md)
- [flags](flags/README.md)
- [fs](fs/README.md)
@ -33,7 +34,6 @@ Here are the dedicated documentations of modules:
- [prettier](prettier/README.md)
- [strings](strings/README.md)
- [testing](testing/README.md)
- [toml](encoding/toml/README.md)
- [ws](ws/README.md)
## Contributing

View file

@ -1,11 +1,112 @@
# TOML
# Encoding
## CSV
- **`readAll(reader: BufReader, opt: ParseOptions = { comma: ",", trimLeadingSpace: false, lazyQuotes: false } ): Promise<[string[][], BufState]>`**:
Read the whole buffer and output the structured CSV datas
- **`parse(csvString: string, opt: ParseOption): Promise<unknown[]>`**:
See [parse](###Parse)
### Parse
Parse the CSV string with the options provided.
#### Options
##### ParseOption
- **`header: boolean | string[] | HeaderOption[];`**: If a boolean is provided,
the first line will be used as Header definitions. If `string[]` or
`HeaderOption[]`
those names will be used for header definition.
- **`parse?: (input: unknown) => unknown;`**: Parse function for the row, which
will be executed after parsing of all columns. Therefore if you don't provide
header and parse function with headers, input will be `string[]`.
##### HeaderOption
- **`name: string;`**: Name of the header to be used as property.
- **`parse?: (input: string) => unknown;`**: Parse function for the column.
This is executed on each entry of the header. This can be combined with the
Parse function of the rows.
#### Usage
```ts
// input:
// a,b,c
// e,f,g
const r = await parseFile(filepath, {
header: false
});
// output:
// [["a", "b", "c"], ["e", "f", "g"]]
const r = await parseFile(filepath, {
header: true
});
// output:
// [{ a: "e", b: "f", c: "g" }]
const r = await parseFile(filepath, {
header: ["this", "is", "sparta"]
});
// output:
// [
// { this: "a", is: "b", sparta: "c" },
// { this: "e", is: "f", sparta: "g" }
// ]
const r = await parseFile(filepath, {
header: [
{
name: "this",
parse: (e: string): string => {
return `b${e}$$`;
}
},
{
name: "is",
parse: (e: string): number => {
return e.length;
}
},
{
name: "sparta",
parse: (e: string): unknown => {
return { bim: `boom-${e}` };
}
}
]
});
// output:
// [
// { this: "ba$$", is: 1, sparta: { bim: `boom-c` } },
// { this: "be$$", is: 1, sparta: { bim: `boom-g` } }
// ]
const r = await parseFile(filepath, {
header: ["this", "is", "sparta"],
parse: (e: Record<string, unknown>) => {
return { super: e.this, street: e.is, fighter: e.sparta };
}
});
// output:
// [
// { super: "a", street: "b", fighter: "c" },
// { super: "e", street: "f", fighter: "g" }
// ]
```
## TOML
This module parse TOML files. It follows as much as possible the
[TOML specs](https://github.com/toml-lang/toml). Be sure to read the supported
types as not every specs is supported at the moment and the handling in
TypeScript side is a bit different.
## Supported types and handling
### Supported types and handling
- :heavy_check_mark: [Keys](https://github.com/toml-lang/toml#string)
- :exclamation: [String](https://github.com/toml-lang/toml#string)
@ -27,39 +128,39 @@ TypeScript side is a bit different.
:exclamation: _Supported with warnings see [Warning](#Warning)._
### :warning: Warning
#### :warning: Warning
#### String
##### String
- Regex : Due to the spec, there is no flag to detect regex properly
in a TOML declaration. So the regex is stored as string.
#### Integer
##### Integer
For **Binary** / **Octal** / **Hexadecimal** numbers,
they are stored as string to be not interpreted as Decimal.
#### Local Time
##### Local Time
Because local time does not exist in JavaScript, the local time is stored as a string.
#### Inline Table
##### Inline Table
Inline tables are supported. See below:
```toml
animal = { type = { name = "pug" } }
# Output
## Output
animal = { type.name = "pug" }
# Output { animal : { type : { name : "pug" } }
## Output { animal : { type : { name : "pug" } }
animal.as.leaders = "tosin"
# Output { animal: { as: { leaders: "tosin" } } }
## Output { animal: { as: { leaders: "tosin" } } }
"tosin.abasi" = "guitarist"
# Output
## Output
"tosin.abasi" : "guitarist"
```
#### Array of Tables
##### Array of Tables
At the moment only simple declarations like below are supported:
@ -89,9 +190,9 @@ will output:
}
```
## Usage
### Usage
### Parse
#### Parse
```ts
import { parse } from "./parser.ts";
@ -103,7 +204,7 @@ const tomlString = 'foo.bar = "Deno"';
const tomlObject22 = parse(tomlString);
```
### Stringify
#### Stringify
```ts
import { stringify } from "./parser.ts";

View file

@ -4,6 +4,7 @@
import { BufReader, EOF } from "../io/bufio.ts";
import { TextProtoReader } from "../textproto/mod.ts";
import { StringReader } from "../io/readers.ts";
const INVALID_RUNE = ["\r", "\n", '"'];
@ -17,28 +18,39 @@ export class ParseError extends Error {
}
}
/**
* @property comma - Character which separates values. Default: ','
* @property comment - Character to start a comment. Default: '#'
* @property trimLeadingSpace - Flag to trim the leading space of the value. Default: 'false'
* @property lazyQuotes - Allow unquoted quote in a quoted field or non double
* quoted quotes in quoted field Default: 'false'
* @property fieldsPerRecord - Enabling the check of fields for each row. If == 0
* first row is used as referal for the number of fields.
*/
export interface ParseOptions {
comma: string;
comma?: string;
comment?: string;
trimLeadingSpace: boolean;
trimLeadingSpace?: boolean;
lazyQuotes?: boolean;
fieldsPerRecord?: number;
}
function chkOptions(opt: ParseOptions): void {
if (!opt.comma) opt.comma = ",";
if (!opt.trimLeadingSpace) opt.trimLeadingSpace = false;
if (
INVALID_RUNE.includes(opt.comma) ||
(opt.comment && INVALID_RUNE.includes(opt.comment)) ||
INVALID_RUNE.includes(opt.comma!) ||
INVALID_RUNE.includes(opt.comment!) ||
opt.comma === opt.comment
) {
throw new Error("Invalid Delimiter");
}
}
export async function read(
async function read(
Startline: number,
reader: BufReader,
opt: ParseOptions = { comma: ",", comment: "#", trimLeadingSpace: false }
opt: ParseOptions = { comma: ",", trimLeadingSpace: false }
): Promise<string[] | EOF> {
const tp = new TextProtoReader(reader);
let line: string;
@ -68,7 +80,7 @@ export async function read(
return [];
}
result = line.split(opt.comma);
result = line.split(opt.comma!);
let quoteError = false;
result = result.map(
@ -138,3 +150,105 @@ export async function readAll(
}
return result;
}
/**
* HeaderOption provides the column definition
* and the parse function for each entry of the
* column.
*/
export interface HeaderOption {
name: string;
parse?: (input: string) => unknown;
}
export interface ExtendedParseOptions extends ParseOptions {
header: boolean | string[] | HeaderOption[];
parse?: (input: unknown) => unknown;
}
/**
* Csv parse helper to manipulate data.
* Provides an auto/custom mapper for columns and parse function
* for columns and rows.
* @param input Input to parse. Can be a string or BufReader.
* @param opt options of the parser.
* @param [opt.header=false] HeaderOptions
* @param [opt.parse=null] Parse function for rows.
* Example:
* const r = await parseFile('a,b,c\ne,f,g\n', {
* header: ["this", "is", "sparta"],
* parse: (e: Record<string, unknown>) => {
* return { super: e.this, street: e.is, fighter: e.sparta };
* }
* });
* // output
* [
* { super: "a", street: "b", fighter: "c" },
* { super: "e", street: "f", fighter: "g" }
* ]
*/
export async function parse(
input: string | BufReader,
opt: ExtendedParseOptions = {
header: false
}
): Promise<unknown[]> {
let r: string[][];
if (input instanceof BufReader) {
r = await readAll(input, opt);
} else {
r = await readAll(new BufReader(new StringReader(input)), opt);
}
if (opt.header) {
let headers: HeaderOption[] = [];
let i = 0;
if (Array.isArray(opt.header)) {
if (typeof opt.header[0] !== "string") {
headers = opt.header as HeaderOption[];
} else {
const h = opt.header as string[];
headers = h.map(
(e): HeaderOption => {
return {
name: e
};
}
);
}
} else {
headers = r.shift()!.map(
(e): HeaderOption => {
return {
name: e
};
}
);
i++;
}
return r.map(
(e): unknown => {
if (e.length !== headers.length) {
throw `Error number of fields line:${i}`;
}
i++;
let out: Record<string, unknown> = {};
for (let j = 0; j < e.length; j++) {
const h = headers[j];
if (h.parse) {
out[h.name] = h.parse(e[j]);
} else {
out[h.name] = e[j];
}
}
if (opt.parse) {
return opt.parse(out);
}
return out;
}
);
}
if (opt.parse) {
return r.map((e: string[]): unknown => opt.parse!(e));
}
return r;
}

View file

@ -2,7 +2,7 @@
// https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go
import { test, runIfMain } from "../testing/mod.ts";
import { assertEquals, assert } from "../testing/asserts.ts";
import { readAll } from "./csv.ts";
import { readAll, parse } from "./csv.ts";
import { StringReader } from "../io/readers.ts";
import { BufReader } from "../io/bufio.ts";
@ -468,4 +468,114 @@ for (const t of testCases) {
});
}
const parseTestCases = [
{
name: "simple",
in: "a,b,c",
header: false,
result: [["a", "b", "c"]]
},
{
name: "simple Bufreader",
in: new BufReader(new StringReader("a,b,c")),
header: false,
result: [["a", "b", "c"]]
},
{
name: "multiline",
in: "a,b,c\ne,f,g\n",
header: false,
result: [["a", "b", "c"], ["e", "f", "g"]]
},
{
name: "header mapping boolean",
in: "a,b,c\ne,f,g\n",
header: true,
result: [{ a: "e", b: "f", c: "g" }]
},
{
name: "header mapping array",
in: "a,b,c\ne,f,g\n",
header: ["this", "is", "sparta"],
result: [
{ this: "a", is: "b", sparta: "c" },
{ this: "e", is: "f", sparta: "g" }
]
},
{
name: "header mapping object",
in: "a,b,c\ne,f,g\n",
header: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
result: [
{ this: "a", is: "b", sparta: "c" },
{ this: "e", is: "f", sparta: "g" }
]
},
{
name: "header mapping parse entry",
in: "a,b,c\ne,f,g\n",
header: [
{
name: "this",
parse: (e: string): string => {
return `b${e}$$`;
}
},
{
name: "is",
parse: (e: string): number => {
return e.length;
}
},
{
name: "sparta",
parse: (e: string): unknown => {
return { bim: `boom-${e}` };
}
}
],
result: [
{ this: "ba$$", is: 1, sparta: { bim: `boom-c` } },
{ this: "be$$", is: 1, sparta: { bim: `boom-g` } }
]
},
{
name: "multiline parse",
in: "a,b,c\ne,f,g\n",
parse: (e: string[]): unknown => {
return { super: e[0], street: e[1], fighter: e[2] };
},
header: false,
result: [
{ super: "a", street: "b", fighter: "c" },
{ super: "e", street: "f", fighter: "g" }
]
},
{
name: "header mapping object parseline",
in: "a,b,c\ne,f,g\n",
header: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
parse: (e: Record<string, unknown>): unknown => {
return { super: e.this, street: e.is, fighter: e.sparta };
},
result: [
{ super: "a", street: "b", fighter: "c" },
{ super: "e", street: "f", fighter: "g" }
]
}
];
for (const testCase of parseTestCases) {
test({
name: `[CSV] Parse ${testCase.name}`,
async fn(): Promise<void> {
const r = await parse(testCase.in, {
header: testCase.header,
parse: testCase.parse as (input: unknown) => unknown
});
assertEquals(r, testCase.result);
}
});
}
runIfMain(import.meta);