mirror of
https://github.com/denoland/deno.git
synced 2024-12-23 15:49:44 -05:00
BREAKING(std/encoding/csv): improve the definition of ParseOptions (#7714)
This commit is contained in:
parent
5db72dcaf3
commit
94dcef714d
3 changed files with 101 additions and 72 deletions
|
@ -37,25 +37,29 @@ writeVarbig(w: Deno.Writer, x: bigint, o: VarbigOptions = {}): Promise<number>
|
|||
Parse the CSV from the `reader` with the options provided and return
|
||||
`string[][]`.
|
||||
|
||||
#### `parse(input: string | BufReader, opt: ParseOptions = { header: false }): Promise<unknown[]>`:
|
||||
#### `parse(input: string | BufReader, opt: ParseOptions = { skipFirstRow: false }): Promise<unknown[]>`:
|
||||
|
||||
Parse the CSV string/buffer with the options provided. The result of this
|
||||
function is as follows:
|
||||
|
||||
- If you don't provide both `opt.header` and `opt.parse`, it returns
|
||||
`string[][]`.
|
||||
- If you provide `opt.header` but not `opt.parse`, it returns `object[]`.
|
||||
- If you don't provide `opt.skipFirstRow`, `opt.parse`, and `opt.columns`, it
|
||||
returns `string[][]`.
|
||||
- If you provide `opt.skipFirstRow` or `opt.columns` but not `opt.parse`, it
|
||||
returns `object[]`.
|
||||
- If you provide `opt.parse`, it returns an array where each element is the
|
||||
value returned from `opt.parse`.
|
||||
|
||||
##### `ParseOptions`
|
||||
|
||||
- **`header: boolean | string[] | HeaderOptions[];`**: If a boolean is provided,
|
||||
the first line will be used as Header definitions. If `string[]` or
|
||||
`HeaderOptions[]` those names will be used for header definition.
|
||||
- **`skipFirstRow: boolean;`**: If you provide `skipFirstRow: true` and
|
||||
`columns`, the first line will be skipped. If you provide `skipFirstRow: true`
|
||||
but not `columns`, the first line will be skipped and used as header
|
||||
definitions.
|
||||
- **`columns: string[] | HeaderOptions[];`**: If you provide `string[]` or
|
||||
`ColumnOptions[]`, those names will be used for header definition.
|
||||
- **`parse?: (input: unknown) => unknown;`**: Parse function for the row, which
|
||||
will be executed after parsing of all columns. Therefore if you don't provide
|
||||
header and parse function with headers, input will be `string[]`.
|
||||
`skipFirstRow`, `columns`, and `parse` function, input will be `string[]`.
|
||||
|
||||
##### `HeaderOptions`
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ export class ParseError extends Error {
|
|||
}
|
||||
|
||||
/**
|
||||
* @property comma - Character which separates values. Default: ','
|
||||
* @property separator - Character which separates values. Default: ','
|
||||
* @property comment - Character to start a comment. Default: '#'
|
||||
* @property trimLeadingSpace - Flag to trim the leading space of the value.
|
||||
* Default: 'false'
|
||||
|
@ -62,7 +62,7 @@ export class ParseError extends Error {
|
|||
* If == 0, first row is used as referral for the number of fields.
|
||||
*/
|
||||
export interface ReadOptions {
|
||||
comma?: string;
|
||||
separator?: string;
|
||||
comment?: string;
|
||||
trimLeadingSpace?: boolean;
|
||||
lazyQuotes?: boolean;
|
||||
|
@ -70,16 +70,16 @@ export interface ReadOptions {
|
|||
}
|
||||
|
||||
function chkOptions(opt: ReadOptions): void {
|
||||
if (!opt.comma) {
|
||||
opt.comma = ",";
|
||||
if (!opt.separator) {
|
||||
opt.separator = ",";
|
||||
}
|
||||
if (!opt.trimLeadingSpace) {
|
||||
opt.trimLeadingSpace = false;
|
||||
}
|
||||
if (
|
||||
INVALID_RUNE.includes(opt.comma) ||
|
||||
INVALID_RUNE.includes(opt.separator) ||
|
||||
(typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) ||
|
||||
opt.comma === opt.comment
|
||||
opt.separator === opt.comment
|
||||
) {
|
||||
throw new Error(ERR_INVALID_DELIM);
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ function chkOptions(opt: ReadOptions): void {
|
|||
async function readRecord(
|
||||
startLine: number,
|
||||
reader: BufReader,
|
||||
opt: ReadOptions = { comma: ",", trimLeadingSpace: false },
|
||||
opt: ReadOptions = { separator: ",", trimLeadingSpace: false },
|
||||
): Promise<string[] | null> {
|
||||
const tp = new TextProtoReader(reader);
|
||||
let line = await readLine(tp);
|
||||
|
@ -103,13 +103,13 @@ async function readRecord(
|
|||
return [];
|
||||
}
|
||||
|
||||
assert(opt.comma != null);
|
||||
assert(opt.separator != null);
|
||||
|
||||
let fullLine = line;
|
||||
let quoteError: ParseError | null = null;
|
||||
const quote = '"';
|
||||
const quoteLen = quote.length;
|
||||
const commaLen = opt.comma.length;
|
||||
const separatorLen = opt.separator.length;
|
||||
let recordBuffer = "";
|
||||
const fieldIndexes = [] as number[];
|
||||
parseField:
|
||||
|
@ -120,7 +120,7 @@ async function readRecord(
|
|||
|
||||
if (line.length === 0 || !line.startsWith(quote)) {
|
||||
// Non-quoted string field
|
||||
const i = line.indexOf(opt.comma);
|
||||
const i = line.indexOf(opt.separator);
|
||||
let field = line;
|
||||
if (i >= 0) {
|
||||
field = field.substring(0, i);
|
||||
|
@ -144,7 +144,7 @@ async function readRecord(
|
|||
recordBuffer += field;
|
||||
fieldIndexes.push(recordBuffer.length);
|
||||
if (i >= 0) {
|
||||
line = line.substring(i + commaLen);
|
||||
line = line.substring(i + separatorLen);
|
||||
continue parseField;
|
||||
}
|
||||
break parseField;
|
||||
|
@ -161,9 +161,9 @@ async function readRecord(
|
|||
// `""` sequence (append quote).
|
||||
recordBuffer += quote;
|
||||
line = line.substring(quoteLen);
|
||||
} else if (line.startsWith(opt.comma)) {
|
||||
} else if (line.startsWith(opt.separator)) {
|
||||
// `","` sequence (end of field).
|
||||
line = line.substring(commaLen);
|
||||
line = line.substring(separatorLen);
|
||||
fieldIndexes.push(recordBuffer.length);
|
||||
continue parseField;
|
||||
} else if (0 === line.length) {
|
||||
|
@ -281,7 +281,7 @@ async function readLine(tp: TextProtoReader): Promise<string | null> {
|
|||
export async function readMatrix(
|
||||
reader: BufReader,
|
||||
opt: ReadOptions = {
|
||||
comma: ",",
|
||||
separator: ",",
|
||||
trimLeadingSpace: false,
|
||||
lazyQuotes: false,
|
||||
},
|
||||
|
@ -324,13 +324,13 @@ export async function readMatrix(
|
|||
/**
|
||||
* Parse the CSV string/buffer with the options provided.
|
||||
*
|
||||
* HeaderOptions provides the column definition
|
||||
* ColumnOptions provides the column definition
|
||||
* and the parse function for each entry of the
|
||||
* column.
|
||||
*/
|
||||
export interface HeaderOptions {
|
||||
export interface ColumnOptions {
|
||||
/**
|
||||
* Name of the header to be used as property
|
||||
* Name of the column to be used as property
|
||||
*/
|
||||
name: string;
|
||||
/**
|
||||
|
@ -343,14 +343,20 @@ export interface HeaderOptions {
|
|||
|
||||
export interface ParseOptions extends ReadOptions {
|
||||
/**
|
||||
* If a boolean is provided, the first line will be used as Header definitions.
|
||||
* If `string[]` or `HeaderOptions[]` those names will be used for header definition.
|
||||
* If you provide `skipFirstRow: true` and `columns`, the first line will be skipped.
|
||||
* If you provide `skipFirstRow: true` but not `columns`, the first line will be skipped and used as header definitions.
|
||||
*/
|
||||
header: boolean | string[] | HeaderOptions[];
|
||||
skipFirstRow?: boolean;
|
||||
|
||||
/**
|
||||
* If you provide `string[]` or `ColumnOptions[]`, those names will be used for header definition.
|
||||
*/
|
||||
columns?: string[] | ColumnOptions[];
|
||||
|
||||
/** Parse function for rows.
|
||||
* Example:
|
||||
* const r = await parseFile('a,b,c\ne,f,g\n', {
|
||||
* header: ["this", "is", "sparta"],
|
||||
* columns: ["this", "is", "sparta"],
|
||||
* parse: (e: Record<string, unknown>) => {
|
||||
* return { super: e.this, street: e.is, fighter: e.sparta };
|
||||
* }
|
||||
|
@ -370,14 +376,14 @@ export interface ParseOptions extends ReadOptions {
|
|||
* for columns and rows.
|
||||
* @param input Input to parse. Can be a string or BufReader.
|
||||
* @param opt options of the parser.
|
||||
* @returns If you don't provide both `opt.header` and `opt.parse`, it returns `string[][]`.
|
||||
* If you provide `opt.header` but not `opt.parse`, it returns `object[]`.
|
||||
* @returns If you don't provide `opt.skipFirstRow`, `opt.parse`, and `opt.columns`, it returns `string[][]`.
|
||||
* If you provide `opt.skipFirstRow` or `opt.columns` but not `opt.parse`, it returns `object[]`.
|
||||
* If you provide `opt.parse`, it returns an array where each element is the value returned from `opt.parse`.
|
||||
*/
|
||||
export async function parse(
|
||||
input: string | BufReader,
|
||||
opt: ParseOptions = {
|
||||
header: false,
|
||||
skipFirstRow: false,
|
||||
},
|
||||
): Promise<unknown[]> {
|
||||
let r: string[][];
|
||||
|
@ -386,27 +392,15 @@ export async function parse(
|
|||
} else {
|
||||
r = await readMatrix(new BufReader(new StringReader(input)), opt);
|
||||
}
|
||||
if (opt.header) {
|
||||
let headers: HeaderOptions[] = [];
|
||||
if (opt.skipFirstRow || opt.columns) {
|
||||
let headers: ColumnOptions[] = [];
|
||||
let i = 0;
|
||||
if (Array.isArray(opt.header)) {
|
||||
if (typeof opt.header[0] !== "string") {
|
||||
headers = opt.header as HeaderOptions[];
|
||||
} else {
|
||||
const h = opt.header as string[];
|
||||
headers = h.map(
|
||||
(e): HeaderOptions => {
|
||||
return {
|
||||
name: e,
|
||||
};
|
||||
},
|
||||
);
|
||||
}
|
||||
} else {
|
||||
|
||||
if (opt.skipFirstRow) {
|
||||
const head = r.shift();
|
||||
assert(head != null);
|
||||
headers = head.map(
|
||||
(e): HeaderOptions => {
|
||||
(e): ColumnOptions => {
|
||||
return {
|
||||
name: e,
|
||||
};
|
||||
|
@ -414,6 +408,21 @@ export async function parse(
|
|||
);
|
||||
i++;
|
||||
}
|
||||
|
||||
if (opt.columns) {
|
||||
if (typeof opt.columns[0] !== "string") {
|
||||
headers = opt.columns as ColumnOptions[];
|
||||
} else {
|
||||
const h = opt.columns as string[];
|
||||
headers = h.map(
|
||||
(e): ColumnOptions => {
|
||||
return {
|
||||
name: e,
|
||||
};
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
return r.map((e): unknown => {
|
||||
if (e.length !== headers.length) {
|
||||
throw `Error number of fields line:${i}`;
|
||||
|
|
|
@ -17,6 +17,7 @@ import {
|
|||
import { StringReader } from "../io/readers.ts";
|
||||
import { BufReader } from "../io/bufio.ts";
|
||||
|
||||
// Test cases for `readMatrix()`
|
||||
const testCases = [
|
||||
{
|
||||
Name: "Simple",
|
||||
|
@ -60,7 +61,7 @@ zzz,yyy,xxx`,
|
|||
Name: "Semicolon",
|
||||
Input: "a;b;c\n",
|
||||
Output: [["a", "b", "c"]],
|
||||
Comma: ";",
|
||||
Separator: ";",
|
||||
},
|
||||
{
|
||||
Name: "MultiLine",
|
||||
|
@ -334,14 +335,14 @@ x,,,
|
|||
Input: "a£b,c£ \td,e\n€ comment\n",
|
||||
Output: [["a", "b,c", "d,e"]],
|
||||
TrimLeadingSpace: true,
|
||||
Comma: "£",
|
||||
Separator: "£",
|
||||
Comment: "€",
|
||||
},
|
||||
{
|
||||
Name: "NonASCIICommaAndCommentWithQuotes",
|
||||
Input: 'a€" b,"€ c\nλ comment\n',
|
||||
Output: [["a", " b,", " c"]],
|
||||
Comma: "€",
|
||||
Separator: "€",
|
||||
Comment: "λ",
|
||||
},
|
||||
{
|
||||
|
@ -350,7 +351,7 @@ x,,,
|
|||
Name: "NonASCIICommaConfusion",
|
||||
Input: '"abθcd"λefθgh',
|
||||
Output: [["abθcd", "efθgh"]],
|
||||
Comma: "λ",
|
||||
Separator: "λ",
|
||||
Comment: "€",
|
||||
},
|
||||
{
|
||||
|
@ -415,17 +416,17 @@ x,,,
|
|||
},
|
||||
{
|
||||
Name: "BadComma1",
|
||||
Comma: "\n",
|
||||
Separator: "\n",
|
||||
Error: new Error(ERR_INVALID_DELIM),
|
||||
},
|
||||
{
|
||||
Name: "BadComma2",
|
||||
Comma: "\r",
|
||||
Separator: "\r",
|
||||
Error: new Error(ERR_INVALID_DELIM),
|
||||
},
|
||||
{
|
||||
Name: "BadComma3",
|
||||
Comma: '"',
|
||||
Separator: '"',
|
||||
Error: new Error(ERR_INVALID_DELIM),
|
||||
},
|
||||
{
|
||||
|
@ -440,7 +441,7 @@ x,,,
|
|||
},
|
||||
{
|
||||
Name: "BadCommaComment",
|
||||
Comma: "X",
|
||||
Separator: "X",
|
||||
Comment: "X",
|
||||
Error: new Error(ERR_INVALID_DELIM),
|
||||
},
|
||||
|
@ -449,13 +450,13 @@ for (const t of testCases) {
|
|||
Deno.test({
|
||||
name: `[CSV] ${t.Name}`,
|
||||
async fn(): Promise<void> {
|
||||
let comma = ",";
|
||||
let separator = ",";
|
||||
let comment: string | undefined;
|
||||
let fieldsPerRec: number | undefined;
|
||||
let trim = false;
|
||||
let lazyquote = false;
|
||||
if (t.Comma) {
|
||||
comma = t.Comma;
|
||||
if (t.Separator) {
|
||||
separator = t.Separator;
|
||||
}
|
||||
if (t.Comment) {
|
||||
comment = t.Comment;
|
||||
|
@ -475,7 +476,7 @@ for (const t of testCases) {
|
|||
await readMatrix(
|
||||
new BufReader(new StringReader(t.Input ?? "")),
|
||||
{
|
||||
comma: comma,
|
||||
separator,
|
||||
comment: comment,
|
||||
trimLeadingSpace: trim,
|
||||
fieldsPerRecord: fieldsPerRec,
|
||||
|
@ -489,7 +490,7 @@ for (const t of testCases) {
|
|||
actual = await readMatrix(
|
||||
new BufReader(new StringReader(t.Input ?? "")),
|
||||
{
|
||||
comma: comma,
|
||||
separator,
|
||||
comment: comment,
|
||||
trimLeadingSpace: trim,
|
||||
fieldsPerRecord: fieldsPerRec,
|
||||
|
@ -507,19 +508,19 @@ const parseTestCases = [
|
|||
{
|
||||
name: "simple",
|
||||
in: "a,b,c",
|
||||
header: false,
|
||||
skipFirstRow: false,
|
||||
result: [["a", "b", "c"]],
|
||||
},
|
||||
{
|
||||
name: "simple Bufreader",
|
||||
in: new BufReader(new StringReader("a,b,c")),
|
||||
header: false,
|
||||
skipFirstRow: false,
|
||||
result: [["a", "b", "c"]],
|
||||
},
|
||||
{
|
||||
name: "multiline",
|
||||
in: "a,b,c\ne,f,g\n",
|
||||
header: false,
|
||||
skipFirstRow: false,
|
||||
result: [
|
||||
["a", "b", "c"],
|
||||
["e", "f", "g"],
|
||||
|
@ -528,13 +529,13 @@ const parseTestCases = [
|
|||
{
|
||||
name: "header mapping boolean",
|
||||
in: "a,b,c\ne,f,g\n",
|
||||
header: true,
|
||||
skipFirstRow: true,
|
||||
result: [{ a: "e", b: "f", c: "g" }],
|
||||
},
|
||||
{
|
||||
name: "header mapping array",
|
||||
in: "a,b,c\ne,f,g\n",
|
||||
header: ["this", "is", "sparta"],
|
||||
columns: ["this", "is", "sparta"],
|
||||
result: [
|
||||
{ this: "a", is: "b", sparta: "c" },
|
||||
{ this: "e", is: "f", sparta: "g" },
|
||||
|
@ -543,7 +544,7 @@ const parseTestCases = [
|
|||
{
|
||||
name: "header mapping object",
|
||||
in: "a,b,c\ne,f,g\n",
|
||||
header: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
|
||||
columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
|
||||
result: [
|
||||
{ this: "a", is: "b", sparta: "c" },
|
||||
{ this: "e", is: "f", sparta: "g" },
|
||||
|
@ -552,7 +553,7 @@ const parseTestCases = [
|
|||
{
|
||||
name: "header mapping parse entry",
|
||||
in: "a,b,c\ne,f,g\n",
|
||||
header: [
|
||||
columns: [
|
||||
{
|
||||
name: "this",
|
||||
parse: (e: string): string => {
|
||||
|
@ -583,7 +584,7 @@ const parseTestCases = [
|
|||
parse: (e: string[]): unknown => {
|
||||
return { super: e[0], street: e[1], fighter: e[2] };
|
||||
},
|
||||
header: false,
|
||||
skipFirstRow: false,
|
||||
result: [
|
||||
{ super: "a", street: "b", fighter: "c" },
|
||||
{ super: "e", street: "f", fighter: "g" },
|
||||
|
@ -592,7 +593,7 @@ const parseTestCases = [
|
|||
{
|
||||
name: "header mapping object parseline",
|
||||
in: "a,b,c\ne,f,g\n",
|
||||
header: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
|
||||
columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
|
||||
parse: (e: Record<string, unknown>): unknown => {
|
||||
return { super: e.this, street: e.is, fighter: e.sparta };
|
||||
},
|
||||
|
@ -601,6 +602,20 @@ const parseTestCases = [
|
|||
{ super: "e", street: "f", fighter: "g" },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "provides both opts.skipFirstRow and opts.columns",
|
||||
in: "a,b,1\nc,d,2\ne,f,3",
|
||||
skipFirstRow: true,
|
||||
columns: [
|
||||
{ name: "foo" },
|
||||
{ name: "bar" },
|
||||
{ name: "baz", parse: (e: string) => Number(e) },
|
||||
],
|
||||
result: [
|
||||
{ foo: "c", bar: "d", baz: 2 },
|
||||
{ foo: "e", bar: "f", baz: 3 },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
for (const testCase of parseTestCases) {
|
||||
|
@ -608,7 +623,8 @@ for (const testCase of parseTestCases) {
|
|||
name: `[CSV] Parse ${testCase.name}`,
|
||||
async fn(): Promise<void> {
|
||||
const r = await parse(testCase.in, {
|
||||
header: testCase.header,
|
||||
skipFirstRow: testCase.skipFirstRow,
|
||||
columns: testCase.columns,
|
||||
parse: testCase.parse as (input: unknown) => unknown,
|
||||
});
|
||||
assertEquals(r, testCase.result);
|
||||
|
|
Loading…
Reference in a new issue