mirror of
https://github.com/denoland/deno.git
synced 2024-12-12 18:42:18 -05:00
fix(std/encoding/csv): improve error message on ParseError (#7057)
This commit is contained in:
parent
b44b7a9a60
commit
95a6812e82
2 changed files with 125 additions and 53 deletions
|
@ -16,13 +16,38 @@ export const ERR_QUOTE = 'extraneous or missing " in quoted-field';
|
||||||
export const ERR_INVALID_DELIM = "Invalid Delimiter";
|
export const ERR_INVALID_DELIM = "Invalid Delimiter";
|
||||||
export const ERR_FIELD_COUNT = "wrong number of fields";
|
export const ERR_FIELD_COUNT = "wrong number of fields";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A ParseError is returned for parsing errors.
|
||||||
|
* Line numbers are 1-indexed and columns are 0-indexed.
|
||||||
|
*/
|
||||||
export class ParseError extends Error {
|
export class ParseError extends Error {
|
||||||
StartLine: number;
|
/** Line where the record starts*/
|
||||||
Line: number;
|
startLine: number;
|
||||||
constructor(start: number, line: number, message: string) {
|
/** Line where the error occurred */
|
||||||
super(message);
|
line: number;
|
||||||
this.StartLine = start;
|
/** Column (rune index) where the error occurred */
|
||||||
this.Line = line;
|
column: number | null;
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
start: number,
|
||||||
|
line: number,
|
||||||
|
column: number | null,
|
||||||
|
message: string,
|
||||||
|
) {
|
||||||
|
super();
|
||||||
|
this.startLine = start;
|
||||||
|
this.column = column;
|
||||||
|
this.line = line;
|
||||||
|
|
||||||
|
if (message === ERR_FIELD_COUNT) {
|
||||||
|
this.message = `record on line ${line}: ${message}`;
|
||||||
|
} else if (start !== line) {
|
||||||
|
this.message =
|
||||||
|
`record on line ${start}; parse error on line ${line}, column ${column}: ${message}`;
|
||||||
|
} else {
|
||||||
|
this.message =
|
||||||
|
`parse error on line ${line}, column ${column}: ${message}`;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,13 +86,13 @@ function chkOptions(opt: ReadOptions): void {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function readRecord(
|
async function readRecord(
|
||||||
Startline: number,
|
startLine: number,
|
||||||
reader: BufReader,
|
reader: BufReader,
|
||||||
opt: ReadOptions = { comma: ",", trimLeadingSpace: false },
|
opt: ReadOptions = { comma: ",", trimLeadingSpace: false },
|
||||||
): Promise<string[] | null> {
|
): Promise<string[] | null> {
|
||||||
const tp = new TextProtoReader(reader);
|
const tp = new TextProtoReader(reader);
|
||||||
const lineIndex = Startline;
|
|
||||||
let line = await readLine(tp);
|
let line = await readLine(tp);
|
||||||
|
let lineIndex = startLine + 1;
|
||||||
|
|
||||||
if (line === null) return null;
|
if (line === null) return null;
|
||||||
if (line.length === 0) {
|
if (line.length === 0) {
|
||||||
|
@ -80,7 +105,8 @@ async function readRecord(
|
||||||
|
|
||||||
assert(opt.comma != null);
|
assert(opt.comma != null);
|
||||||
|
|
||||||
let quoteError: string | null = null;
|
let fullLine = line;
|
||||||
|
let quoteError: ParseError | null = null;
|
||||||
const quote = '"';
|
const quote = '"';
|
||||||
const quoteLen = quote.length;
|
const quoteLen = quote.length;
|
||||||
const commaLen = opt.comma.length;
|
const commaLen = opt.comma.length;
|
||||||
|
@ -103,7 +129,15 @@ async function readRecord(
|
||||||
if (!opt.lazyQuotes) {
|
if (!opt.lazyQuotes) {
|
||||||
const j = field.indexOf(quote);
|
const j = field.indexOf(quote);
|
||||||
if (j >= 0) {
|
if (j >= 0) {
|
||||||
quoteError = ERR_BARE_QUOTE;
|
const col = runeCount(
|
||||||
|
fullLine.slice(0, fullLine.length - line.slice(j).length),
|
||||||
|
);
|
||||||
|
quoteError = new ParseError(
|
||||||
|
startLine + 1,
|
||||||
|
lineIndex,
|
||||||
|
col,
|
||||||
|
ERR_BARE_QUOTE,
|
||||||
|
);
|
||||||
break parseField;
|
break parseField;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -141,27 +175,50 @@ async function readRecord(
|
||||||
recordBuffer += quote;
|
recordBuffer += quote;
|
||||||
} else {
|
} else {
|
||||||
// `"*` sequence (invalid non-escaped quote).
|
// `"*` sequence (invalid non-escaped quote).
|
||||||
quoteError = ERR_QUOTE;
|
const col = runeCount(
|
||||||
|
fullLine.slice(0, fullLine.length - line.length - quoteLen),
|
||||||
|
);
|
||||||
|
quoteError = new ParseError(
|
||||||
|
startLine + 1,
|
||||||
|
lineIndex,
|
||||||
|
col,
|
||||||
|
ERR_QUOTE,
|
||||||
|
);
|
||||||
break parseField;
|
break parseField;
|
||||||
}
|
}
|
||||||
} else if (line.length > 0 || !(await isEOF(tp))) {
|
} else if (line.length > 0 || !(await isEOF(tp))) {
|
||||||
// Hit end of line (copy all data so far).
|
// Hit end of line (copy all data so far).
|
||||||
recordBuffer += line;
|
recordBuffer += line;
|
||||||
const r = await readLine(tp);
|
const r = await readLine(tp);
|
||||||
|
lineIndex++;
|
||||||
|
line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go.
|
||||||
|
fullLine = line;
|
||||||
if (r === null) {
|
if (r === null) {
|
||||||
|
// Abrupt end of file (EOF or error).
|
||||||
if (!opt.lazyQuotes) {
|
if (!opt.lazyQuotes) {
|
||||||
quoteError = ERR_QUOTE;
|
const col = runeCount(fullLine);
|
||||||
|
quoteError = new ParseError(
|
||||||
|
startLine + 1,
|
||||||
|
lineIndex,
|
||||||
|
col,
|
||||||
|
ERR_QUOTE,
|
||||||
|
);
|
||||||
break parseField;
|
break parseField;
|
||||||
}
|
}
|
||||||
fieldIndexes.push(recordBuffer.length);
|
fieldIndexes.push(recordBuffer.length);
|
||||||
break parseField;
|
break parseField;
|
||||||
}
|
}
|
||||||
recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
|
recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
|
||||||
line = r;
|
|
||||||
} else {
|
} else {
|
||||||
// Abrupt end of file (EOF on error).
|
// Abrupt end of file (EOF on error).
|
||||||
if (!opt.lazyQuotes) {
|
if (!opt.lazyQuotes) {
|
||||||
quoteError = ERR_QUOTE;
|
const col = runeCount(fullLine);
|
||||||
|
quoteError = new ParseError(
|
||||||
|
startLine + 1,
|
||||||
|
lineIndex,
|
||||||
|
col,
|
||||||
|
ERR_QUOTE,
|
||||||
|
);
|
||||||
break parseField;
|
break parseField;
|
||||||
}
|
}
|
||||||
fieldIndexes.push(recordBuffer.length);
|
fieldIndexes.push(recordBuffer.length);
|
||||||
|
@ -171,7 +228,7 @@ async function readRecord(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (quoteError) {
|
if (quoteError) {
|
||||||
throw new ParseError(Startline, lineIndex, quoteError);
|
throw quoteError;
|
||||||
}
|
}
|
||||||
const result = [] as string[];
|
const result = [] as string[];
|
||||||
let preIdx = 0;
|
let preIdx = 0;
|
||||||
|
@ -186,6 +243,11 @@ async function isEOF(tp: TextProtoReader): Promise<boolean> {
|
||||||
return (await tp.r.peek(0)) === null;
|
return (await tp.r.peek(0)) === null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function runeCount(s: string): number {
|
||||||
|
// Array.from considers the surrogate pair.
|
||||||
|
return Array.from(s).length;
|
||||||
|
}
|
||||||
|
|
||||||
async function readLine(tp: TextProtoReader): Promise<string | null> {
|
async function readLine(tp: TextProtoReader): Promise<string | null> {
|
||||||
let line: string;
|
let line: string;
|
||||||
const r = await tp.readLine();
|
const r = await tp.readLine();
|
||||||
|
@ -251,7 +313,7 @@ export async function readMatrix(
|
||||||
|
|
||||||
if (lineResult.length > 0) {
|
if (lineResult.length > 0) {
|
||||||
if (_nbFields && _nbFields !== lineResult.length) {
|
if (_nbFields && _nbFields !== lineResult.length) {
|
||||||
throw new ParseError(lineIndex, lineIndex, ERR_FIELD_COUNT);
|
throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT);
|
||||||
}
|
}
|
||||||
result.push(lineResult);
|
result.push(lineResult);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
// https://github.com/golang/go/blob/master/LICENSE
|
// https://github.com/golang/go/blob/master/LICENSE
|
||||||
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
|
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
|
||||||
|
|
||||||
import { assertEquals, assert } from "../testing/asserts.ts";
|
import { assertEquals, assertThrowsAsync } from "../testing/asserts.ts";
|
||||||
import {
|
import {
|
||||||
readMatrix,
|
readMatrix,
|
||||||
parse,
|
parse,
|
||||||
|
@ -12,6 +12,7 @@ import {
|
||||||
ERR_QUOTE,
|
ERR_QUOTE,
|
||||||
ERR_INVALID_DELIM,
|
ERR_INVALID_DELIM,
|
||||||
ERR_FIELD_COUNT,
|
ERR_FIELD_COUNT,
|
||||||
|
ParseError,
|
||||||
} from "./csv.ts";
|
} from "./csv.ts";
|
||||||
import { StringReader } from "../io/readers.ts";
|
import { StringReader } from "../io/readers.ts";
|
||||||
import { BufReader } from "../io/bufio.ts";
|
import { BufReader } from "../io/bufio.ts";
|
||||||
|
@ -133,8 +134,7 @@ field"`,
|
||||||
{
|
{
|
||||||
Name: "BadDoubleQuotes",
|
Name: "BadDoubleQuotes",
|
||||||
Input: `a""b,c`,
|
Input: `a""b,c`,
|
||||||
Error: ERR_BARE_QUOTE,
|
Error: new ParseError(1, 1, 1, ERR_BARE_QUOTE),
|
||||||
// Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "TrimQuote",
|
Name: "TrimQuote",
|
||||||
|
@ -145,33 +145,31 @@ field"`,
|
||||||
{
|
{
|
||||||
Name: "BadBareQuote",
|
Name: "BadBareQuote",
|
||||||
Input: `a "word","b"`,
|
Input: `a "word","b"`,
|
||||||
Error: ERR_BARE_QUOTE,
|
Error: new ParseError(1, 1, 2, ERR_BARE_QUOTE),
|
||||||
// &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "BadTrailingQuote",
|
Name: "BadTrailingQuote",
|
||||||
Input: `"a word",b"`,
|
Input: `"a word",b"`,
|
||||||
Error: ERR_BARE_QUOTE,
|
Error: new ParseError(1, 1, 10, ERR_BARE_QUOTE),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "ExtraneousQuote",
|
Name: "ExtraneousQuote",
|
||||||
Input: `"a "word","b"`,
|
Input: `"a "word","b"`,
|
||||||
Error: ERR_QUOTE,
|
Error: new ParseError(1, 1, 3, ERR_QUOTE),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "BadFieldCount",
|
Name: "BadFieldCount",
|
||||||
Input: "a,b,c\nd,e",
|
Input: "a,b,c\nd,e",
|
||||||
Error: ERR_FIELD_COUNT,
|
Error: new ParseError(2, 2, null, ERR_FIELD_COUNT),
|
||||||
UseFieldsPerRecord: true,
|
UseFieldsPerRecord: true,
|
||||||
FieldsPerRecord: 0,
|
FieldsPerRecord: 0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "BadFieldCount1",
|
Name: "BadFieldCount1",
|
||||||
Input: `a,b,c`,
|
Input: `a,b,c`,
|
||||||
// Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
|
|
||||||
UseFieldsPerRecord: true,
|
UseFieldsPerRecord: true,
|
||||||
FieldsPerRecord: 2,
|
FieldsPerRecord: 2,
|
||||||
Error: ERR_FIELD_COUNT,
|
Error: new ParseError(1, 1, null, ERR_FIELD_COUNT),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "FieldCount",
|
Name: "FieldCount",
|
||||||
|
@ -265,14 +263,12 @@ x,,,
|
||||||
{
|
{
|
||||||
Name: "StartLine1", // Issue 19019
|
Name: "StartLine1", // Issue 19019
|
||||||
Input: 'a,"b\nc"d,e',
|
Input: 'a,"b\nc"d,e',
|
||||||
Error: ERR_QUOTE,
|
Error: new ParseError(1, 2, 1, ERR_QUOTE),
|
||||||
// Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "StartLine2",
|
Name: "StartLine2",
|
||||||
Input: 'a,b\n"d\n\n,e',
|
Input: 'a,b\n\"d\n\n,e',
|
||||||
Error: ERR_QUOTE,
|
Error: new ParseError(2, 5, 0, ERR_QUOTE),
|
||||||
// Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "CRLFInQuotedField", // Issue 21201
|
Name: "CRLFInQuotedField", // Issue 21201
|
||||||
|
@ -297,8 +293,7 @@ x,,,
|
||||||
{
|
{
|
||||||
Name: "QuotedTrailingCRCR",
|
Name: "QuotedTrailingCRCR",
|
||||||
Input: '"field"\r\r',
|
Input: '"field"\r\r',
|
||||||
Error: ERR_QUOTE,
|
Error: new ParseError(1, 1, 6, ERR_QUOTE),
|
||||||
// Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "FieldCR",
|
Name: "FieldCR",
|
||||||
|
@ -389,8 +384,7 @@ x,,,
|
||||||
{
|
{
|
||||||
Name: "QuoteWithTrailingCRLF",
|
Name: "QuoteWithTrailingCRLF",
|
||||||
Input: '"foo"bar"\r\n',
|
Input: '"foo"bar"\r\n',
|
||||||
Error: ERR_QUOTE,
|
Error: new ParseError(1, 1, 4, ERR_QUOTE),
|
||||||
// Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "LazyQuoteWithTrailingCRLF",
|
Name: "LazyQuoteWithTrailingCRLF",
|
||||||
|
@ -411,8 +405,7 @@ x,,,
|
||||||
{
|
{
|
||||||
Name: "OddQuotes",
|
Name: "OddQuotes",
|
||||||
Input: `"""""""`,
|
Input: `"""""""`,
|
||||||
Error: ERR_QUOTE,
|
Error: new ParseError(1, 1, 7, ERR_QUOTE),
|
||||||
// Error:" &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}",
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "LazyOddQuotes",
|
Name: "LazyOddQuotes",
|
||||||
|
@ -423,33 +416,33 @@ x,,,
|
||||||
{
|
{
|
||||||
Name: "BadComma1",
|
Name: "BadComma1",
|
||||||
Comma: "\n",
|
Comma: "\n",
|
||||||
Error: ERR_INVALID_DELIM,
|
Error: new Error(ERR_INVALID_DELIM),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "BadComma2",
|
Name: "BadComma2",
|
||||||
Comma: "\r",
|
Comma: "\r",
|
||||||
Error: ERR_INVALID_DELIM,
|
Error: new Error(ERR_INVALID_DELIM),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "BadComma3",
|
Name: "BadComma3",
|
||||||
Comma: '"',
|
Comma: '"',
|
||||||
Error: ERR_INVALID_DELIM,
|
Error: new Error(ERR_INVALID_DELIM),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "BadComment1",
|
Name: "BadComment1",
|
||||||
Comment: "\n",
|
Comment: "\n",
|
||||||
Error: ERR_INVALID_DELIM,
|
Error: new Error(ERR_INVALID_DELIM),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "BadComment2",
|
Name: "BadComment2",
|
||||||
Comment: "\r",
|
Comment: "\r",
|
||||||
Error: ERR_INVALID_DELIM,
|
Error: new Error(ERR_INVALID_DELIM),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "BadCommaComment",
|
Name: "BadCommaComment",
|
||||||
Comma: "X",
|
Comma: "X",
|
||||||
Comment: "X",
|
Comment: "X",
|
||||||
Error: ERR_INVALID_DELIM,
|
Error: new Error(ERR_INVALID_DELIM),
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
for (const t of testCases) {
|
for (const t of testCases) {
|
||||||
|
@ -457,8 +450,8 @@ for (const t of testCases) {
|
||||||
name: `[CSV] ${t.Name}`,
|
name: `[CSV] ${t.Name}`,
|
||||||
async fn(): Promise<void> {
|
async fn(): Promise<void> {
|
||||||
let comma = ",";
|
let comma = ",";
|
||||||
let comment;
|
let comment: string | undefined;
|
||||||
let fieldsPerRec;
|
let fieldsPerRec: number | undefined;
|
||||||
let trim = false;
|
let trim = false;
|
||||||
let lazyquote = false;
|
let lazyquote = false;
|
||||||
if (t.Comma) {
|
if (t.Comma) {
|
||||||
|
@ -478,9 +471,8 @@ for (const t of testCases) {
|
||||||
}
|
}
|
||||||
let actual;
|
let actual;
|
||||||
if (t.Error) {
|
if (t.Error) {
|
||||||
let err;
|
const err = await assertThrowsAsync(async () => {
|
||||||
try {
|
await readMatrix(
|
||||||
actual = await readMatrix(
|
|
||||||
new BufReader(new StringReader(t.Input ?? "")),
|
new BufReader(new StringReader(t.Input ?? "")),
|
||||||
{
|
{
|
||||||
comma: comma,
|
comma: comma,
|
||||||
|
@ -490,11 +482,9 @@ for (const t of testCases) {
|
||||||
lazyQuotes: lazyquote,
|
lazyQuotes: lazyquote,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
} catch (e) {
|
});
|
||||||
err = e;
|
|
||||||
}
|
assertEquals(err, t.Error);
|
||||||
assert(err);
|
|
||||||
assertEquals(err.message, t.Error);
|
|
||||||
} else {
|
} else {
|
||||||
actual = await readMatrix(
|
actual = await readMatrix(
|
||||||
new BufReader(new StringReader(t.Input ?? "")),
|
new BufReader(new StringReader(t.Input ?? "")),
|
||||||
|
@ -625,3 +615,23 @@ for (const testCase of parseTestCases) {
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Deno.test({
|
||||||
|
name: "[CSV] ParseError.message",
|
||||||
|
fn(): void {
|
||||||
|
assertEquals(
|
||||||
|
new ParseError(2, 2, null, ERR_FIELD_COUNT).message,
|
||||||
|
`record on line 2: ${ERR_FIELD_COUNT}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
new ParseError(1, 2, 1, ERR_QUOTE).message,
|
||||||
|
`record on line 1; parse error on line 2, column 1: ${ERR_QUOTE}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
new ParseError(1, 1, 7, ERR_QUOTE).message,
|
||||||
|
`parse error on line 1, column 7: ${ERR_QUOTE}`,
|
||||||
|
);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
Loading…
Reference in a new issue