2019-05-24 09:33:42 -04:00
|
|
|
// Ported from Go:
|
|
|
|
// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
|
2020-01-02 15:13:47 -05:00
|
|
|
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
|
2019-05-24 09:33:42 -04:00
|
|
|
|
2019-07-07 15:20:41 -04:00
|
|
|
import { BufReader } from "../io/bufio.ts";
|
2019-05-24 09:33:42 -04:00
|
|
|
import { TextProtoReader } from "../textproto/mod.ts";
|
2019-05-30 09:50:29 -04:00
|
|
|
import { StringReader } from "../io/readers.ts";
|
2020-02-08 15:15:59 -05:00
|
|
|
import { assert } from "../testing/asserts.ts";
|
2019-05-24 09:33:42 -04:00
|
|
|
|
|
|
|
const INVALID_RUNE = ["\r", "\n", '"'];
|
|
|
|
|
|
|
|
export class ParseError extends Error {
|
|
|
|
StartLine: number;
|
|
|
|
Line: number;
|
|
|
|
constructor(start: number, line: number, message: string) {
|
|
|
|
super(message);
|
|
|
|
this.StartLine = start;
|
|
|
|
this.Line = line;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-30 09:50:29 -04:00
|
|
|
/**
|
|
|
|
* @property comma - Character which separates values. Default: ','
|
|
|
|
* @property comment - Character to start a comment. Default: '#'
|
2019-06-19 00:22:01 -04:00
|
|
|
* @property trimLeadingSpace - Flag to trim the leading space of the value.
|
|
|
|
* Default: 'false'
|
2019-05-30 09:50:29 -04:00
|
|
|
* @property lazyQuotes - Allow unquoted quote in a quoted field or non double
|
2019-06-19 00:22:01 -04:00
|
|
|
* quoted quotes in quoted field Default: 'false'
|
|
|
|
* @property fieldsPerRecord - Enabling the check of fields for each row.
|
|
|
|
* If == 0, first row is used as referal for the number of fields.
|
2019-05-30 09:50:29 -04:00
|
|
|
*/
|
2019-12-20 15:21:30 -05:00
|
|
|
export interface ReadOptions {
|
2019-05-30 09:50:29 -04:00
|
|
|
comma?: string;
|
2019-05-24 09:33:42 -04:00
|
|
|
comment?: string;
|
2019-05-30 09:50:29 -04:00
|
|
|
trimLeadingSpace?: boolean;
|
2019-05-24 09:33:42 -04:00
|
|
|
lazyQuotes?: boolean;
|
|
|
|
fieldsPerRecord?: number;
|
|
|
|
}
|
|
|
|
|
2019-12-20 15:21:30 -05:00
|
|
|
function chkOptions(opt: ReadOptions): void {
|
2020-02-08 15:15:59 -05:00
|
|
|
if (!opt.comma) {
|
|
|
|
opt.comma = ",";
|
|
|
|
}
|
|
|
|
if (!opt.trimLeadingSpace) {
|
|
|
|
opt.trimLeadingSpace = false;
|
|
|
|
}
|
2019-05-24 09:33:42 -04:00
|
|
|
if (
|
2020-02-08 15:15:59 -05:00
|
|
|
INVALID_RUNE.includes(opt.comma) ||
|
|
|
|
INVALID_RUNE.includes(opt.comment) ||
|
2019-05-24 09:33:42 -04:00
|
|
|
opt.comma === opt.comment
|
|
|
|
) {
|
2019-05-23 22:04:06 -04:00
|
|
|
throw new Error("Invalid Delimiter");
|
2019-05-24 09:33:42 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-30 09:50:29 -04:00
|
|
|
async function read(
|
2019-05-24 09:33:42 -04:00
|
|
|
Startline: number,
|
|
|
|
reader: BufReader,
|
2019-12-20 15:21:30 -05:00
|
|
|
opt: ReadOptions = { comma: ",", trimLeadingSpace: false }
|
2019-07-07 15:20:41 -04:00
|
|
|
): Promise<string[] | Deno.EOF> {
|
2019-05-24 09:33:42 -04:00
|
|
|
const tp = new TextProtoReader(reader);
|
|
|
|
let line: string;
|
|
|
|
let result: string[] = [];
|
2019-10-05 12:02:34 -04:00
|
|
|
const lineIndex = Startline;
|
2019-05-24 09:33:42 -04:00
|
|
|
|
2019-05-23 22:04:06 -04:00
|
|
|
const r = await tp.readLine();
|
2019-07-07 15:20:41 -04:00
|
|
|
if (r === Deno.EOF) return Deno.EOF;
|
2019-05-23 22:04:06 -04:00
|
|
|
line = r;
|
2019-05-24 09:33:42 -04:00
|
|
|
// Normalize \r\n to \n on all input lines.
|
|
|
|
if (
|
|
|
|
line.length >= 2 &&
|
|
|
|
line[line.length - 2] === "\r" &&
|
|
|
|
line[line.length - 1] === "\n"
|
|
|
|
) {
|
|
|
|
line = line.substring(0, line.length - 2);
|
|
|
|
line = line + "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
const trimmedLine = line.trimLeft();
|
|
|
|
if (trimmedLine.length === 0) {
|
2019-05-23 22:04:06 -04:00
|
|
|
return [];
|
2019-05-24 09:33:42 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// line starting with comment character is ignored
|
|
|
|
if (opt.comment && trimmedLine[0] === opt.comment) {
|
2019-05-23 22:04:06 -04:00
|
|
|
return [];
|
2019-05-24 09:33:42 -04:00
|
|
|
}
|
|
|
|
|
2020-02-08 15:15:59 -05:00
|
|
|
assert(opt.comma != null);
|
|
|
|
result = line.split(opt.comma);
|
2019-05-24 09:33:42 -04:00
|
|
|
|
|
|
|
let quoteError = false;
|
2019-11-13 13:42:34 -05:00
|
|
|
result = result.map((r): string => {
|
|
|
|
if (opt.trimLeadingSpace) {
|
|
|
|
r = r.trimLeft();
|
|
|
|
}
|
|
|
|
if (r[0] === '"' && r[r.length - 1] === '"') {
|
|
|
|
r = r.substring(1, r.length - 1);
|
|
|
|
} else if (r[0] === '"') {
|
|
|
|
r = r.substring(1, r.length);
|
|
|
|
}
|
2019-05-24 09:33:42 -04:00
|
|
|
|
2019-11-13 13:42:34 -05:00
|
|
|
if (!opt.lazyQuotes) {
|
|
|
|
if (r[0] !== '"' && r.indexOf('"') !== -1) {
|
|
|
|
quoteError = true;
|
2019-05-24 09:33:42 -04:00
|
|
|
}
|
|
|
|
}
|
2019-11-13 13:42:34 -05:00
|
|
|
return r;
|
|
|
|
});
|
2019-05-24 09:33:42 -04:00
|
|
|
if (quoteError) {
|
2019-05-23 22:04:06 -04:00
|
|
|
throw new ParseError(Startline, lineIndex, 'bare " in non-quoted-field');
|
2019-05-24 09:33:42 -04:00
|
|
|
}
|
2019-05-23 22:04:06 -04:00
|
|
|
return result;
|
2019-05-24 09:33:42 -04:00
|
|
|
}
|
|
|
|
|
2019-12-20 15:21:30 -05:00
|
|
|
export async function readMatrix(
|
2019-05-24 09:33:42 -04:00
|
|
|
reader: BufReader,
|
2019-12-20 15:21:30 -05:00
|
|
|
opt: ReadOptions = {
|
2019-05-24 09:33:42 -04:00
|
|
|
comma: ",",
|
|
|
|
trimLeadingSpace: false,
|
|
|
|
lazyQuotes: false
|
|
|
|
}
|
2019-05-23 22:04:06 -04:00
|
|
|
): Promise<string[][]> {
|
2019-05-24 09:33:42 -04:00
|
|
|
const result: string[][] = [];
|
|
|
|
let _nbFields: number;
|
|
|
|
let lineResult: string[];
|
|
|
|
let first = true;
|
|
|
|
let lineIndex = 0;
|
2019-05-23 22:04:06 -04:00
|
|
|
chkOptions(opt);
|
2019-05-24 09:33:42 -04:00
|
|
|
|
|
|
|
for (;;) {
|
2019-05-23 22:04:06 -04:00
|
|
|
const r = await read(lineIndex, reader, opt);
|
2019-07-07 15:20:41 -04:00
|
|
|
if (r === Deno.EOF) break;
|
2019-05-23 22:04:06 -04:00
|
|
|
lineResult = r;
|
2019-05-24 09:33:42 -04:00
|
|
|
lineIndex++;
|
|
|
|
// If fieldsPerRecord is 0, Read sets it to
|
|
|
|
// the number of fields in the first record
|
|
|
|
if (first) {
|
|
|
|
first = false;
|
|
|
|
if (opt.fieldsPerRecord !== undefined) {
|
|
|
|
if (opt.fieldsPerRecord === 0) {
|
|
|
|
_nbFields = lineResult.length;
|
|
|
|
} else {
|
|
|
|
_nbFields = opt.fieldsPerRecord;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lineResult.length > 0) {
|
2020-02-08 15:15:59 -05:00
|
|
|
if (_nbFields && _nbFields !== lineResult.length) {
|
2019-05-23 22:04:06 -04:00
|
|
|
throw new ParseError(lineIndex, lineIndex, "wrong number of fields");
|
2019-05-24 09:33:42 -04:00
|
|
|
}
|
|
|
|
result.push(lineResult);
|
|
|
|
}
|
|
|
|
}
|
2019-05-23 22:04:06 -04:00
|
|
|
return result;
|
2019-05-24 09:33:42 -04:00
|
|
|
}
|
2019-05-30 09:50:29 -04:00
|
|
|
|
|
|
|
/**
|
2019-12-20 15:21:30 -05:00
|
|
|
* HeaderOptions provides the column definition
|
2019-05-30 09:50:29 -04:00
|
|
|
* and the parse function for each entry of the
|
|
|
|
* column.
|
|
|
|
*/
|
2019-12-20 15:21:30 -05:00
|
|
|
export interface HeaderOptions {
|
2019-05-30 09:50:29 -04:00
|
|
|
name: string;
|
|
|
|
parse?: (input: string) => unknown;
|
|
|
|
}
|
|
|
|
|
2019-12-20 15:21:30 -05:00
|
|
|
export interface ParseOptions extends ReadOptions {
|
|
|
|
header: boolean | string[] | HeaderOptions[];
|
2019-05-30 09:50:29 -04:00
|
|
|
parse?: (input: unknown) => unknown;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Csv parse helper to manipulate data.
|
|
|
|
* Provides an auto/custom mapper for columns and parse function
|
|
|
|
* for columns and rows.
|
|
|
|
* @param input Input to parse. Can be a string or BufReader.
|
|
|
|
* @param opt options of the parser.
|
|
|
|
* @param [opt.header=false] HeaderOptions
|
|
|
|
* @param [opt.parse=null] Parse function for rows.
|
|
|
|
* Example:
|
|
|
|
* const r = await parseFile('a,b,c\ne,f,g\n', {
|
|
|
|
* header: ["this", "is", "sparta"],
|
|
|
|
* parse: (e: Record<string, unknown>) => {
|
|
|
|
* return { super: e.this, street: e.is, fighter: e.sparta };
|
|
|
|
* }
|
|
|
|
* });
|
|
|
|
* // output
|
|
|
|
* [
|
|
|
|
* { super: "a", street: "b", fighter: "c" },
|
|
|
|
* { super: "e", street: "f", fighter: "g" }
|
|
|
|
* ]
|
|
|
|
*/
|
|
|
|
export async function parse(
|
|
|
|
input: string | BufReader,
|
2019-12-20 15:21:30 -05:00
|
|
|
opt: ParseOptions = {
|
2019-05-30 09:50:29 -04:00
|
|
|
header: false
|
|
|
|
}
|
|
|
|
): Promise<unknown[]> {
|
|
|
|
let r: string[][];
|
|
|
|
if (input instanceof BufReader) {
|
2019-12-20 15:21:30 -05:00
|
|
|
r = await readMatrix(input, opt);
|
2019-05-30 09:50:29 -04:00
|
|
|
} else {
|
2019-12-20 15:21:30 -05:00
|
|
|
r = await readMatrix(new BufReader(new StringReader(input)), opt);
|
2019-05-30 09:50:29 -04:00
|
|
|
}
|
|
|
|
if (opt.header) {
|
2019-12-20 15:21:30 -05:00
|
|
|
let headers: HeaderOptions[] = [];
|
2019-05-30 09:50:29 -04:00
|
|
|
let i = 0;
|
|
|
|
if (Array.isArray(opt.header)) {
|
|
|
|
if (typeof opt.header[0] !== "string") {
|
2019-12-20 15:21:30 -05:00
|
|
|
headers = opt.header as HeaderOptions[];
|
2019-05-30 09:50:29 -04:00
|
|
|
} else {
|
|
|
|
const h = opt.header as string[];
|
|
|
|
headers = h.map(
|
2019-12-20 15:21:30 -05:00
|
|
|
(e): HeaderOptions => {
|
2019-05-30 09:50:29 -04:00
|
|
|
return {
|
|
|
|
name: e
|
|
|
|
};
|
|
|
|
}
|
|
|
|
);
|
|
|
|
}
|
|
|
|
} else {
|
2020-02-08 15:15:59 -05:00
|
|
|
const head = r.shift();
|
|
|
|
assert(head != null);
|
|
|
|
headers = head.map(
|
2019-12-20 15:21:30 -05:00
|
|
|
(e): HeaderOptions => {
|
2019-05-30 09:50:29 -04:00
|
|
|
return {
|
|
|
|
name: e
|
|
|
|
};
|
|
|
|
}
|
|
|
|
);
|
|
|
|
i++;
|
|
|
|
}
|
2019-11-13 13:42:34 -05:00
|
|
|
return r.map((e): unknown => {
|
|
|
|
if (e.length !== headers.length) {
|
|
|
|
throw `Error number of fields line:${i}`;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
const out: Record<string, unknown> = {};
|
|
|
|
for (let j = 0; j < e.length; j++) {
|
|
|
|
const h = headers[j];
|
|
|
|
if (h.parse) {
|
|
|
|
out[h.name] = h.parse(e[j]);
|
|
|
|
} else {
|
|
|
|
out[h.name] = e[j];
|
2019-10-09 17:22:22 -04:00
|
|
|
}
|
2019-05-30 09:50:29 -04:00
|
|
|
}
|
2019-11-13 13:42:34 -05:00
|
|
|
if (opt.parse) {
|
|
|
|
return opt.parse(out);
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
});
|
2019-05-30 09:50:29 -04:00
|
|
|
}
|
|
|
|
if (opt.parse) {
|
2020-02-08 15:15:59 -05:00
|
|
|
assert(opt.parse != null, "opt.parse must be set");
|
|
|
|
return r.map((e: string[]): unknown => opt.parse(e));
|
2019-05-30 09:50:29 -04:00
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|