1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-12-12 18:42:18 -05:00

fix(std/encoding/csv): improve error message on ParseError (#7057)

This commit is contained in:
uki00a 2020-08-18 06:47:01 +09:00 committed by GitHub
parent b44b7a9a60
commit 95a6812e82
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 125 additions and 53 deletions

View file

@ -16,13 +16,38 @@ export const ERR_QUOTE = 'extraneous or missing " in quoted-field';
export const ERR_INVALID_DELIM = "Invalid Delimiter"; export const ERR_INVALID_DELIM = "Invalid Delimiter";
export const ERR_FIELD_COUNT = "wrong number of fields"; export const ERR_FIELD_COUNT = "wrong number of fields";
/**
* A ParseError is returned for parsing errors.
* Line numbers are 1-indexed and columns are 0-indexed.
*/
export class ParseError extends Error { export class ParseError extends Error {
StartLine: number; /** Line where the record starts*/
Line: number; startLine: number;
constructor(start: number, line: number, message: string) { /** Line where the error occurred */
super(message); line: number;
this.StartLine = start; /** Column (rune index) where the error occurred */
this.Line = line; column: number | null;
constructor(
start: number,
line: number,
column: number | null,
message: string,
) {
super();
this.startLine = start;
this.column = column;
this.line = line;
if (message === ERR_FIELD_COUNT) {
this.message = `record on line ${line}: ${message}`;
} else if (start !== line) {
this.message =
`record on line ${start}; parse error on line ${line}, column ${column}: ${message}`;
} else {
this.message =
`parse error on line ${line}, column ${column}: ${message}`;
}
} }
} }
@ -61,13 +86,13 @@ function chkOptions(opt: ReadOptions): void {
} }
async function readRecord( async function readRecord(
Startline: number, startLine: number,
reader: BufReader, reader: BufReader,
opt: ReadOptions = { comma: ",", trimLeadingSpace: false }, opt: ReadOptions = { comma: ",", trimLeadingSpace: false },
): Promise<string[] | null> { ): Promise<string[] | null> {
const tp = new TextProtoReader(reader); const tp = new TextProtoReader(reader);
const lineIndex = Startline;
let line = await readLine(tp); let line = await readLine(tp);
let lineIndex = startLine + 1;
if (line === null) return null; if (line === null) return null;
if (line.length === 0) { if (line.length === 0) {
@ -80,7 +105,8 @@ async function readRecord(
assert(opt.comma != null); assert(opt.comma != null);
let quoteError: string | null = null; let fullLine = line;
let quoteError: ParseError | null = null;
const quote = '"'; const quote = '"';
const quoteLen = quote.length; const quoteLen = quote.length;
const commaLen = opt.comma.length; const commaLen = opt.comma.length;
@ -103,7 +129,15 @@ async function readRecord(
if (!opt.lazyQuotes) { if (!opt.lazyQuotes) {
const j = field.indexOf(quote); const j = field.indexOf(quote);
if (j >= 0) { if (j >= 0) {
quoteError = ERR_BARE_QUOTE; const col = runeCount(
fullLine.slice(0, fullLine.length - line.slice(j).length),
);
quoteError = new ParseError(
startLine + 1,
lineIndex,
col,
ERR_BARE_QUOTE,
);
break parseField; break parseField;
} }
} }
@ -141,27 +175,50 @@ async function readRecord(
recordBuffer += quote; recordBuffer += quote;
} else { } else {
// `"*` sequence (invalid non-escaped quote). // `"*` sequence (invalid non-escaped quote).
quoteError = ERR_QUOTE; const col = runeCount(
fullLine.slice(0, fullLine.length - line.length - quoteLen),
);
quoteError = new ParseError(
startLine + 1,
lineIndex,
col,
ERR_QUOTE,
);
break parseField; break parseField;
} }
} else if (line.length > 0 || !(await isEOF(tp))) { } else if (line.length > 0 || !(await isEOF(tp))) {
// Hit end of line (copy all data so far). // Hit end of line (copy all data so far).
recordBuffer += line; recordBuffer += line;
const r = await readLine(tp); const r = await readLine(tp);
lineIndex++;
line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go.
fullLine = line;
if (r === null) { if (r === null) {
// Abrupt end of file (EOF or error).
if (!opt.lazyQuotes) { if (!opt.lazyQuotes) {
quoteError = ERR_QUOTE; const col = runeCount(fullLine);
quoteError = new ParseError(
startLine + 1,
lineIndex,
col,
ERR_QUOTE,
);
break parseField; break parseField;
} }
fieldIndexes.push(recordBuffer.length); fieldIndexes.push(recordBuffer.length);
break parseField; break parseField;
} }
recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.) recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
line = r;
} else { } else {
// Abrupt end of file (EOF on error). // Abrupt end of file (EOF on error).
if (!opt.lazyQuotes) { if (!opt.lazyQuotes) {
quoteError = ERR_QUOTE; const col = runeCount(fullLine);
quoteError = new ParseError(
startLine + 1,
lineIndex,
col,
ERR_QUOTE,
);
break parseField; break parseField;
} }
fieldIndexes.push(recordBuffer.length); fieldIndexes.push(recordBuffer.length);
@ -171,7 +228,7 @@ async function readRecord(
} }
} }
if (quoteError) { if (quoteError) {
throw new ParseError(Startline, lineIndex, quoteError); throw quoteError;
} }
const result = [] as string[]; const result = [] as string[];
let preIdx = 0; let preIdx = 0;
@ -186,6 +243,11 @@ async function isEOF(tp: TextProtoReader): Promise<boolean> {
return (await tp.r.peek(0)) === null; return (await tp.r.peek(0)) === null;
} }
function runeCount(s: string): number {
// Array.from considers the surrogate pair.
return Array.from(s).length;
}
async function readLine(tp: TextProtoReader): Promise<string | null> { async function readLine(tp: TextProtoReader): Promise<string | null> {
let line: string; let line: string;
const r = await tp.readLine(); const r = await tp.readLine();
@ -251,7 +313,7 @@ export async function readMatrix(
if (lineResult.length > 0) { if (lineResult.length > 0) {
if (_nbFields && _nbFields !== lineResult.length) { if (_nbFields && _nbFields !== lineResult.length) {
throw new ParseError(lineIndex, lineIndex, ERR_FIELD_COUNT); throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT);
} }
result.push(lineResult); result.push(lineResult);
} }

View file

@ -4,7 +4,7 @@
// https://github.com/golang/go/blob/master/LICENSE // https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. // Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
import { assertEquals, assert } from "../testing/asserts.ts"; import { assertEquals, assertThrowsAsync } from "../testing/asserts.ts";
import { import {
readMatrix, readMatrix,
parse, parse,
@ -12,6 +12,7 @@ import {
ERR_QUOTE, ERR_QUOTE,
ERR_INVALID_DELIM, ERR_INVALID_DELIM,
ERR_FIELD_COUNT, ERR_FIELD_COUNT,
ParseError,
} from "./csv.ts"; } from "./csv.ts";
import { StringReader } from "../io/readers.ts"; import { StringReader } from "../io/readers.ts";
import { BufReader } from "../io/bufio.ts"; import { BufReader } from "../io/bufio.ts";
@ -133,8 +134,7 @@ field"`,
{ {
Name: "BadDoubleQuotes", Name: "BadDoubleQuotes",
Input: `a""b,c`, Input: `a""b,c`,
Error: ERR_BARE_QUOTE, Error: new ParseError(1, 1, 1, ERR_BARE_QUOTE),
// Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
}, },
{ {
Name: "TrimQuote", Name: "TrimQuote",
@ -145,33 +145,31 @@ field"`,
{ {
Name: "BadBareQuote", Name: "BadBareQuote",
Input: `a "word","b"`, Input: `a "word","b"`,
Error: ERR_BARE_QUOTE, Error: new ParseError(1, 1, 2, ERR_BARE_QUOTE),
// &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}
}, },
{ {
Name: "BadTrailingQuote", Name: "BadTrailingQuote",
Input: `"a word",b"`, Input: `"a word",b"`,
Error: ERR_BARE_QUOTE, Error: new ParseError(1, 1, 10, ERR_BARE_QUOTE),
}, },
{ {
Name: "ExtraneousQuote", Name: "ExtraneousQuote",
Input: `"a "word","b"`, Input: `"a "word","b"`,
Error: ERR_QUOTE, Error: new ParseError(1, 1, 3, ERR_QUOTE),
}, },
{ {
Name: "BadFieldCount", Name: "BadFieldCount",
Input: "a,b,c\nd,e", Input: "a,b,c\nd,e",
Error: ERR_FIELD_COUNT, Error: new ParseError(2, 2, null, ERR_FIELD_COUNT),
UseFieldsPerRecord: true, UseFieldsPerRecord: true,
FieldsPerRecord: 0, FieldsPerRecord: 0,
}, },
{ {
Name: "BadFieldCount1", Name: "BadFieldCount1",
Input: `a,b,c`, Input: `a,b,c`,
// Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
UseFieldsPerRecord: true, UseFieldsPerRecord: true,
FieldsPerRecord: 2, FieldsPerRecord: 2,
Error: ERR_FIELD_COUNT, Error: new ParseError(1, 1, null, ERR_FIELD_COUNT),
}, },
{ {
Name: "FieldCount", Name: "FieldCount",
@ -265,14 +263,12 @@ x,,,
{ {
Name: "StartLine1", // Issue 19019 Name: "StartLine1", // Issue 19019
Input: 'a,"b\nc"d,e', Input: 'a,"b\nc"d,e',
Error: ERR_QUOTE, Error: new ParseError(1, 2, 1, ERR_QUOTE),
// Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
}, },
{ {
Name: "StartLine2", Name: "StartLine2",
Input: 'a,b\n"d\n\n,e', Input: 'a,b\n\"d\n\n,e',
Error: ERR_QUOTE, Error: new ParseError(2, 5, 0, ERR_QUOTE),
// Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
}, },
{ {
Name: "CRLFInQuotedField", // Issue 21201 Name: "CRLFInQuotedField", // Issue 21201
@ -297,8 +293,7 @@ x,,,
{ {
Name: "QuotedTrailingCRCR", Name: "QuotedTrailingCRCR",
Input: '"field"\r\r', Input: '"field"\r\r',
Error: ERR_QUOTE, Error: new ParseError(1, 1, 6, ERR_QUOTE),
// Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
}, },
{ {
Name: "FieldCR", Name: "FieldCR",
@ -389,8 +384,7 @@ x,,,
{ {
Name: "QuoteWithTrailingCRLF", Name: "QuoteWithTrailingCRLF",
Input: '"foo"bar"\r\n', Input: '"foo"bar"\r\n',
Error: ERR_QUOTE, Error: new ParseError(1, 1, 4, ERR_QUOTE),
// Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
}, },
{ {
Name: "LazyQuoteWithTrailingCRLF", Name: "LazyQuoteWithTrailingCRLF",
@ -411,8 +405,7 @@ x,,,
{ {
Name: "OddQuotes", Name: "OddQuotes",
Input: `"""""""`, Input: `"""""""`,
Error: ERR_QUOTE, Error: new ParseError(1, 1, 7, ERR_QUOTE),
// Error:" &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}",
}, },
{ {
Name: "LazyOddQuotes", Name: "LazyOddQuotes",
@ -423,33 +416,33 @@ x,,,
{ {
Name: "BadComma1", Name: "BadComma1",
Comma: "\n", Comma: "\n",
Error: ERR_INVALID_DELIM, Error: new Error(ERR_INVALID_DELIM),
}, },
{ {
Name: "BadComma2", Name: "BadComma2",
Comma: "\r", Comma: "\r",
Error: ERR_INVALID_DELIM, Error: new Error(ERR_INVALID_DELIM),
}, },
{ {
Name: "BadComma3", Name: "BadComma3",
Comma: '"', Comma: '"',
Error: ERR_INVALID_DELIM, Error: new Error(ERR_INVALID_DELIM),
}, },
{ {
Name: "BadComment1", Name: "BadComment1",
Comment: "\n", Comment: "\n",
Error: ERR_INVALID_DELIM, Error: new Error(ERR_INVALID_DELIM),
}, },
{ {
Name: "BadComment2", Name: "BadComment2",
Comment: "\r", Comment: "\r",
Error: ERR_INVALID_DELIM, Error: new Error(ERR_INVALID_DELIM),
}, },
{ {
Name: "BadCommaComment", Name: "BadCommaComment",
Comma: "X", Comma: "X",
Comment: "X", Comment: "X",
Error: ERR_INVALID_DELIM, Error: new Error(ERR_INVALID_DELIM),
}, },
]; ];
for (const t of testCases) { for (const t of testCases) {
@ -457,8 +450,8 @@ for (const t of testCases) {
name: `[CSV] ${t.Name}`, name: `[CSV] ${t.Name}`,
async fn(): Promise<void> { async fn(): Promise<void> {
let comma = ","; let comma = ",";
let comment; let comment: string | undefined;
let fieldsPerRec; let fieldsPerRec: number | undefined;
let trim = false; let trim = false;
let lazyquote = false; let lazyquote = false;
if (t.Comma) { if (t.Comma) {
@ -478,9 +471,8 @@ for (const t of testCases) {
} }
let actual; let actual;
if (t.Error) { if (t.Error) {
let err; const err = await assertThrowsAsync(async () => {
try { await readMatrix(
actual = await readMatrix(
new BufReader(new StringReader(t.Input ?? "")), new BufReader(new StringReader(t.Input ?? "")),
{ {
comma: comma, comma: comma,
@ -490,11 +482,9 @@ for (const t of testCases) {
lazyQuotes: lazyquote, lazyQuotes: lazyquote,
}, },
); );
} catch (e) { });
err = e;
} assertEquals(err, t.Error);
assert(err);
assertEquals(err.message, t.Error);
} else { } else {
actual = await readMatrix( actual = await readMatrix(
new BufReader(new StringReader(t.Input ?? "")), new BufReader(new StringReader(t.Input ?? "")),
@ -625,3 +615,23 @@ for (const testCase of parseTestCases) {
}, },
}); });
} }
Deno.test({
name: "[CSV] ParseError.message",
fn(): void {
assertEquals(
new ParseError(2, 2, null, ERR_FIELD_COUNT).message,
`record on line 2: ${ERR_FIELD_COUNT}`,
);
assertEquals(
new ParseError(1, 2, 1, ERR_QUOTE).message,
`record on line 1; parse error on line 2, column 1: ${ERR_QUOTE}`,
);
assertEquals(
new ParseError(1, 1, 7, ERR_QUOTE).message,
`parse error on line 1, column 7: ${ERR_QUOTE}`,
);
},
});