denoland-deno/std/encoding/csv.ts

// Ported from Go:
// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
// Copyright 2018-2019 the Deno authors. All rights reserved. MIT license.

import { BufReader } from "../io/bufio.ts";
import { TextProtoReader } from "../textproto/mod.ts";
import { StringReader } from "../io/readers.ts";

const INVALID_RUNE = ["\r", "\n", '"'];

export class ParseError extends Error {
  StartLine: number;
  Line: number;
  constructor(start: number, line: number, message: string) {
    super(message);
    this.StartLine = start;
    this.Line = line;
  }
}

/**
 * @property comma - Character which separates values. Default: ','
 * @property comment - Character to start a comment. Default: '#'
 * @property trimLeadingSpace - Flag to trim the leading space of the value.
 *           Default: 'false'
 * @property lazyQuotes - Allow unquoted quote in a quoted field or non double
 *           quoted quotes in quoted field Default: 'false'
 * @property fieldsPerRecord - Enabling the check of fields for each row.
 *           If == 0, first row is used as referal for the number of fields.
 */
export interface ParseOptions {
  comma?: string;
  comment?: string;
  trimLeadingSpace?: boolean;
  lazyQuotes?: boolean;
  fieldsPerRecord?: number;
}

function chkOptions(opt: ParseOptions): void {
  if (!opt.comma) opt.comma = ",";
  if (!opt.trimLeadingSpace) opt.trimLeadingSpace = false;
  if (
    INVALID_RUNE.includes(opt.comma!) ||
    INVALID_RUNE.includes(opt.comment!) ||
    opt.comma === opt.comment
  ) {
    throw new Error("Invalid Delimiter");
  }
}

async function read(
  Startline: number,
  reader: BufReader,
  opt: ParseOptions = { comma: ",", trimLeadingSpace: false }
): Promise<string[] | Deno.EOF> {
  const tp = new TextProtoReader(reader);
  let line: string;
  let result: string[] = [];
  const lineIndex = Startline;

  const r = await tp.readLine();
  if (r === Deno.EOF) return Deno.EOF;
  line = r;
  // Normalize \r\n to \n on all input lines.
  if (
    line.length >= 2 &&
    line[line.length - 2] === "\r" &&
    line[line.length - 1] === "\n"
  ) {
    line = line.substring(0, line.length - 2);
    line = line + "\n";
  }

  const trimmedLine = line.trimLeft();
  if (trimmedLine.length === 0) {
    return [];
  }

  // line starting with comment character is ignored
  if (opt.comment && trimmedLine[0] === opt.comment) {
    return [];
  }

  result = line.split(opt.comma!);

  let quoteError = false;
  result = result.map(
    (r): string => {
      if (opt.trimLeadingSpace) {
        r = r.trimLeft();
      }
      if (r[0] === '"' && r[r.length - 1] === '"') {
        r = r.substring(1, r.length - 1);
      } else if (r[0] === '"') {
        r = r.substring(1, r.length);
      }

      if (!opt.lazyQuotes) {
        if (r[0] !== '"' && r.indexOf('"') !== -1) {
          quoteError = true;
        }
      }
      return r;
    }
  );
  if (quoteError) {
    throw new ParseError(Startline, lineIndex, 'bare " in non-quoted-field');
  }
  return result;
}

export async function readAll(
  reader: BufReader,
  opt: ParseOptions = {
    comma: ",",
    trimLeadingSpace: false,
    lazyQuotes: false
  }
): Promise<string[][]> {
  const result: string[][] = [];
  let _nbFields: number;
  let lineResult: string[];
  let first = true;
  let lineIndex = 0;
  chkOptions(opt);

  for (;;) {
    const r = await read(lineIndex, reader, opt);
    if (r === Deno.EOF) break;
    lineResult = r;
    lineIndex++;
    // If fieldsPerRecord is 0, Read sets it to
    // the number of fields in the first record
    if (first) {
      first = false;
      if (opt.fieldsPerRecord !== undefined) {
        if (opt.fieldsPerRecord === 0) {
          _nbFields = lineResult.length;
        } else {
          _nbFields = opt.fieldsPerRecord;
        }
      }
    }

    if (lineResult.length > 0) {
      if (_nbFields! && _nbFields! !== lineResult.length) {
        throw new ParseError(lineIndex, lineIndex, "wrong number of fields");
      }
      result.push(lineResult);
    }
  }
  return result;
}

/**
 * HeaderOption provides the column definition
 * and the parse function for each entry of the
 * column.
 */
export interface HeaderOption {
  name: string;
  parse?: (input: string) => unknown;
}

export interface ExtendedParseOptions extends ParseOptions {
  header: boolean | string[] | HeaderOption[];
  parse?: (input: unknown) => unknown;
}

/**
 * Csv parse helper to manipulate data.
 * Provides an auto/custom mapper for columns and parse function
 * for columns and rows.
 * @param input Input to parse. Can be a string or BufReader.
 * @param opt options of the parser.
 * @param [opt.header=false] HeaderOptions
 * @param [opt.parse=null] Parse function for rows.
 * Example:
 *     const r = await parseFile('a,b,c\ne,f,g\n', {
 *      header: ["this", "is", "sparta"],
 *       parse: (e: Record<string, unknown>) => {
 *         return { super: e.this, street: e.is, fighter: e.sparta };
 *       }
 *     });
 * // output
 * [
 *   { super: "a", street: "b", fighter: "c" },
 *   { super: "e", street: "f", fighter: "g" }
 * ]
 */
export async function parse(
  input: string | BufReader,
  opt: ExtendedParseOptions = {
    header: false
  }
): Promise<unknown[]> {
  let r: string[][];
  if (input instanceof BufReader) {
    r = await readAll(input, opt);
  } else {
    r = await readAll(new BufReader(new StringReader(input)), opt);
  }
  if (opt.header) {
    let headers: HeaderOption[] = [];
    let i = 0;
    if (Array.isArray(opt.header)) {
      if (typeof opt.header[0] !== "string") {
        headers = opt.header as HeaderOption[];
      } else {
        const h = opt.header as string[];
        headers = h.map(
          (e): HeaderOption => {
            return {
              name: e
            };
          }
        );
      }
    } else {
      headers = r.shift()!.map(
        (e): HeaderOption => {
          return {
            name: e
          };
        }
      );
      i++;
    }
    return r.map(
      (e): unknown => {
        if (e.length !== headers.length) {
          throw `Error number of fields line:${i}`;
        }
        i++;
        const out: Record<string, unknown> = {};
        for (let j = 0; j < e.length; j++) {
          const h = headers[j];
          if (h.parse) {
            out[h.name] = h.parse(e[j]);
          } else {
            out[h.name] = e[j];
          }
        }
        if (opt.parse) {
          return opt.parse(out);
        }
        return out;
      }
    );
  }
  if (opt.parse) {
    return r.map((e: string[]): unknown => opt.parse!(e));
  }
  return r;
}