diff --git a/std/encoding/csv.ts b/std/encoding/csv.ts index 27b40c5668..bab856b51e 100644 --- a/std/encoding/csv.ts +++ b/std/encoding/csv.ts @@ -16,13 +16,38 @@ export const ERR_QUOTE = 'extraneous or missing " in quoted-field'; export const ERR_INVALID_DELIM = "Invalid Delimiter"; export const ERR_FIELD_COUNT = "wrong number of fields"; +/** + * A ParseError is returned for parsing errors. + * Line numbers are 1-indexed and columns are 0-indexed. + */ export class ParseError extends Error { - StartLine: number; - Line: number; - constructor(start: number, line: number, message: string) { - super(message); - this.StartLine = start; - this.Line = line; + /** Line where the record starts*/ + startLine: number; + /** Line where the error occurred */ + line: number; + /** Column (rune index) where the error occurred */ + column: number | null; + + constructor( + start: number, + line: number, + column: number | null, + message: string, + ) { + super(); + this.startLine = start; + this.column = column; + this.line = line; + + if (message === ERR_FIELD_COUNT) { + this.message = `record on line ${line}: ${message}`; + } else if (start !== line) { + this.message = + `record on line ${start}; parse error on line ${line}, column ${column}: ${message}`; + } else { + this.message = + `parse error on line ${line}, column ${column}: ${message}`; + } } } @@ -61,13 +86,13 @@ function chkOptions(opt: ReadOptions): void { } async function readRecord( - Startline: number, + startLine: number, reader: BufReader, opt: ReadOptions = { comma: ",", trimLeadingSpace: false }, ): Promise { const tp = new TextProtoReader(reader); - const lineIndex = Startline; let line = await readLine(tp); + let lineIndex = startLine + 1; if (line === null) return null; if (line.length === 0) { @@ -80,7 +105,8 @@ async function readRecord( assert(opt.comma != null); - let quoteError: string | null = null; + let fullLine = line; + let quoteError: ParseError | null = null; const quote = '"'; const quoteLen = quote.length; const commaLen = opt.comma.length; @@ -103,7 +129,15 @@ async function readRecord( if (!opt.lazyQuotes) { const j = field.indexOf(quote); if (j >= 0) { - quoteError = ERR_BARE_QUOTE; + const col = runeCount( + fullLine.slice(0, fullLine.length - line.slice(j).length), + ); + quoteError = new ParseError( + startLine + 1, + lineIndex, + col, + ERR_BARE_QUOTE, + ); break parseField; } } @@ -141,27 +175,50 @@ async function readRecord( recordBuffer += quote; } else { // `"*` sequence (invalid non-escaped quote). - quoteError = ERR_QUOTE; + const col = runeCount( + fullLine.slice(0, fullLine.length - line.length - quoteLen), + ); + quoteError = new ParseError( + startLine + 1, + lineIndex, + col, + ERR_QUOTE, + ); break parseField; } } else if (line.length > 0 || !(await isEOF(tp))) { // Hit end of line (copy all data so far). recordBuffer += line; const r = await readLine(tp); + lineIndex++; + line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go. + fullLine = line; if (r === null) { + // Abrupt end of file (EOF or error). if (!opt.lazyQuotes) { - quoteError = ERR_QUOTE; + const col = runeCount(fullLine); + quoteError = new ParseError( + startLine + 1, + lineIndex, + col, + ERR_QUOTE, + ); break parseField; } fieldIndexes.push(recordBuffer.length); break parseField; } recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.) - line = r; } else { // Abrupt end of file (EOF on error). if (!opt.lazyQuotes) { - quoteError = ERR_QUOTE; + const col = runeCount(fullLine); + quoteError = new ParseError( + startLine + 1, + lineIndex, + col, + ERR_QUOTE, + ); break parseField; } fieldIndexes.push(recordBuffer.length); @@ -171,7 +228,7 @@ async function readRecord( } } if (quoteError) { - throw new ParseError(Startline, lineIndex, quoteError); + throw quoteError; } const result = [] as string[]; let preIdx = 0; @@ -186,6 +243,11 @@ async function isEOF(tp: TextProtoReader): Promise { return (await tp.r.peek(0)) === null; } +function runeCount(s: string): number { + // Array.from considers the surrogate pair. + return Array.from(s).length; +} + async function readLine(tp: TextProtoReader): Promise { let line: string; const r = await tp.readLine(); @@ -251,7 +313,7 @@ export async function readMatrix( if (lineResult.length > 0) { if (_nbFields && _nbFields !== lineResult.length) { - throw new ParseError(lineIndex, lineIndex, ERR_FIELD_COUNT); + throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT); } result.push(lineResult); } diff --git a/std/encoding/csv_test.ts b/std/encoding/csv_test.ts index 07a8e0965b..aab9e86b11 100644 --- a/std/encoding/csv_test.ts +++ b/std/encoding/csv_test.ts @@ -4,7 +4,7 @@ // https://github.com/golang/go/blob/master/LICENSE // Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. -import { assertEquals, assert } from "../testing/asserts.ts"; +import { assertEquals, assertThrowsAsync } from "../testing/asserts.ts"; import { readMatrix, parse, @@ -12,6 +12,7 @@ import { ERR_QUOTE, ERR_INVALID_DELIM, ERR_FIELD_COUNT, + ParseError, } from "./csv.ts"; import { StringReader } from "../io/readers.ts"; import { BufReader } from "../io/bufio.ts"; @@ -133,8 +134,7 @@ field"`, { Name: "BadDoubleQuotes", Input: `a""b,c`, - Error: ERR_BARE_QUOTE, - // Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote}, + Error: new ParseError(1, 1, 1, ERR_BARE_QUOTE), }, { Name: "TrimQuote", @@ -145,33 +145,31 @@ field"`, { Name: "BadBareQuote", Input: `a "word","b"`, - Error: ERR_BARE_QUOTE, - // &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote} + Error: new ParseError(1, 1, 2, ERR_BARE_QUOTE), }, { Name: "BadTrailingQuote", Input: `"a word",b"`, - Error: ERR_BARE_QUOTE, + Error: new ParseError(1, 1, 10, ERR_BARE_QUOTE), }, { Name: "ExtraneousQuote", Input: `"a "word","b"`, - Error: ERR_QUOTE, + Error: new ParseError(1, 1, 3, ERR_QUOTE), }, { Name: "BadFieldCount", Input: "a,b,c\nd,e", - Error: ERR_FIELD_COUNT, + Error: new ParseError(2, 2, null, ERR_FIELD_COUNT), UseFieldsPerRecord: true, FieldsPerRecord: 0, }, { Name: "BadFieldCount1", Input: `a,b,c`, - // Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount}, UseFieldsPerRecord: true, FieldsPerRecord: 2, - Error: ERR_FIELD_COUNT, + Error: new ParseError(1, 1, null, ERR_FIELD_COUNT), }, { Name: "FieldCount", @@ -265,14 +263,12 @@ x,,, { Name: "StartLine1", // Issue 19019 Input: 'a,"b\nc"d,e', - Error: ERR_QUOTE, - // Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote}, + Error: new ParseError(1, 2, 1, ERR_QUOTE), }, { Name: "StartLine2", - Input: 'a,b\n"d\n\n,e', - Error: ERR_QUOTE, - // Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote}, + Input: 'a,b\n\"d\n\n,e', + Error: new ParseError(2, 5, 0, ERR_QUOTE), }, { Name: "CRLFInQuotedField", // Issue 21201 @@ -297,8 +293,7 @@ x,,, { Name: "QuotedTrailingCRCR", Input: '"field"\r\r', - Error: ERR_QUOTE, - // Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote}, + Error: new ParseError(1, 1, 6, ERR_QUOTE), }, { Name: "FieldCR", @@ -389,8 +384,7 @@ x,,, { Name: "QuoteWithTrailingCRLF", Input: '"foo"bar"\r\n', - Error: ERR_QUOTE, - // Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote}, + Error: new ParseError(1, 1, 4, ERR_QUOTE), }, { Name: "LazyQuoteWithTrailingCRLF", @@ -411,8 +405,7 @@ x,,, { Name: "OddQuotes", Input: `"""""""`, - Error: ERR_QUOTE, - // Error:" &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}", + Error: new ParseError(1, 1, 7, ERR_QUOTE), }, { Name: "LazyOddQuotes", @@ -423,33 +416,33 @@ x,,, { Name: "BadComma1", Comma: "\n", - Error: ERR_INVALID_DELIM, + Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComma2", Comma: "\r", - Error: ERR_INVALID_DELIM, + Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComma3", Comma: '"', - Error: ERR_INVALID_DELIM, + Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComment1", Comment: "\n", - Error: ERR_INVALID_DELIM, + Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComment2", Comment: "\r", - Error: ERR_INVALID_DELIM, + Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadCommaComment", Comma: "X", Comment: "X", - Error: ERR_INVALID_DELIM, + Error: new Error(ERR_INVALID_DELIM), }, ]; for (const t of testCases) { @@ -457,8 +450,8 @@ for (const t of testCases) { name: `[CSV] ${t.Name}`, async fn(): Promise { let comma = ","; - let comment; - let fieldsPerRec; + let comment: string | undefined; + let fieldsPerRec: number | undefined; let trim = false; let lazyquote = false; if (t.Comma) { @@ -478,9 +471,8 @@ for (const t of testCases) { } let actual; if (t.Error) { - let err; - try { - actual = await readMatrix( + const err = await assertThrowsAsync(async () => { + await readMatrix( new BufReader(new StringReader(t.Input ?? "")), { comma: comma, @@ -490,11 +482,9 @@ for (const t of testCases) { lazyQuotes: lazyquote, }, ); - } catch (e) { - err = e; - } - assert(err); - assertEquals(err.message, t.Error); + }); + + assertEquals(err, t.Error); } else { actual = await readMatrix( new BufReader(new StringReader(t.Input ?? "")), @@ -625,3 +615,23 @@ for (const testCase of parseTestCases) { }, }); } + +Deno.test({ + name: "[CSV] ParseError.message", + fn(): void { + assertEquals( + new ParseError(2, 2, null, ERR_FIELD_COUNT).message, + `record on line 2: ${ERR_FIELD_COUNT}`, + ); + + assertEquals( + new ParseError(1, 2, 1, ERR_QUOTE).message, + `record on line 1; parse error on line 2, column 1: ${ERR_QUOTE}`, + ); + + assertEquals( + new ParseError(1, 1, 7, ERR_QUOTE).message, + `parse error on line 1, column 7: ${ERR_QUOTE}`, + ); + }, +});