1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-12-27 09:39:08 -05:00

feat(std/io): Export readDelim(), readStringDelim() and readLines() from bufio.ts (#4019)

Co-authored-by: Bartek Iwańczuk <biwanczuk@gmail.com>
This commit is contained in:
Nayeem Rahman 2020-02-18 00:51:13 +00:00 committed by GitHub
parent 7b9f6e9c45
commit 5a3292047c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 138 additions and 98 deletions

View file

@ -27,7 +27,7 @@ const xevalPath = "examples/xeval.ts";
Deno.test(async function xevalCliReplvar(): Promise<void> {
const p = run({
args: [execPath(), xevalPath, "--", "--replvar=abc", "console.log(abc)"],
args: [execPath(), xevalPath, "--replvar=abc", "console.log(abc)"],
stdin: "piped",
stdout: "piped",
stderr: "null"
@ -41,7 +41,7 @@ Deno.test(async function xevalCliReplvar(): Promise<void> {
Deno.test(async function xevalCliSyntaxError(): Promise<void> {
const p = run({
args: [execPath(), xevalPath, "--", "("],
args: [execPath(), xevalPath, "("],
stdin: "null",
stdout: "piped",
stderr: "piped"

View file

@ -1,5 +1,6 @@
import { parse } from "../flags/mod.ts";
const { Buffer, EOF, args, exit, stdin, writeAll } = Deno;
import { readStringDelim } from "../io/bufio.ts";
const { args, exit, stdin } = Deno;
type Reader = Deno.Reader;
/* eslint-disable-next-line max-len */
@ -10,18 +11,16 @@ const AsyncFunction = Object.getPrototypeOf(async function(): Promise<void> {})
/* eslint-disable max-len */
const HELP_MSG = `xeval
Eval a script on lines from stdin.
Read from standard input and eval code on each whitespace-delimited
string chunks.
Run a script for each new-line or otherwise delimited chunk of standard input.
Print all the usernames in /etc/passwd:
cat /etc/passwd | deno -A https://deno.land/std/examples/xeval.ts -- "a = $.split(':'); if (a) console.log(a[0])"
cat /etc/passwd | deno -A https://deno.land/std/examples/xeval.ts "a = $.split(':'); if (a) console.log(a[0])"
A complicated way to print the current git branch:
git branch | deno -A https://deno.land/std/examples/xeval.ts -- -I 'line' "if (line.startsWith('*')) console.log(line.slice(2))"
git branch | deno -A https://deno.land/std/examples/xeval.ts -I 'line' "if (line.startsWith('*')) console.log(line.slice(2))"
Demonstrates breaking the input up by space delimiter instead of by lines:
cat LICENSE | deno -A https://deno.land/std/examples/xeval.ts -- -d " " "if ($ === 'MIT') console.log('MIT licensed')",
cat LICENSE | deno -A https://deno.land/std/examples/xeval.ts -d " " "if ($ === 'MIT') console.log('MIT licensed')",
USAGE:
deno -A https://deno.land/std/examples/xeval.ts [OPTIONS] <code>
@ -40,98 +39,12 @@ export interface XevalOptions {
const DEFAULT_DELIMITER = "\n";
// Generate longest proper prefix which is also suffix array.
function createLPS(pat: Uint8Array): Uint8Array {
const lps = new Uint8Array(pat.length);
lps[0] = 0;
let prefixEnd = 0;
let i = 1;
while (i < lps.length) {
if (pat[i] == pat[prefixEnd]) {
prefixEnd++;
lps[i] = prefixEnd;
i++;
} else if (prefixEnd === 0) {
lps[i] = 0;
i++;
} else {
prefixEnd = pat[prefixEnd - 1];
}
}
return lps;
}
// Read from reader until EOF and emit string chunks separated
// by the given delimiter.
async function* chunks(
reader: Reader,
delim: string
): AsyncIterableIterator<string> {
const encoder = new TextEncoder();
const decoder = new TextDecoder();
// Avoid unicode problems
const delimArr = encoder.encode(delim);
const delimLen = delimArr.length;
const delimLPS = createLPS(delimArr);
let inputBuffer = new Buffer();
const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1));
// Modified KMP
let inspectIndex = 0;
let matchIndex = 0;
while (true) {
const result = await reader.read(inspectArr);
if (result === EOF) {
// Yield last chunk.
const lastChunk = inputBuffer.toString();
yield lastChunk;
return;
}
if ((result as number) < 0) {
// Discard all remaining and silently fail.
return;
}
const sliceRead = inspectArr.subarray(0, result as number);
await writeAll(inputBuffer, sliceRead);
let sliceToProcess = inputBuffer.bytes();
while (inspectIndex < sliceToProcess.length) {
if (sliceToProcess[inspectIndex] === delimArr[matchIndex]) {
inspectIndex++;
matchIndex++;
if (matchIndex === delimLen) {
// Full match
const matchEnd = inspectIndex - delimLen;
const readyBytes = sliceToProcess.subarray(0, matchEnd);
// Copy
const pendingBytes = sliceToProcess.slice(inspectIndex);
const readyChunk = decoder.decode(readyBytes);
yield readyChunk;
// Reset match, different from KMP.
sliceToProcess = pendingBytes;
inspectIndex = 0;
matchIndex = 0;
}
} else {
if (matchIndex === 0) {
inspectIndex++;
} else {
matchIndex = delimLPS[matchIndex - 1];
}
}
}
// Keep inspectIndex and matchIndex.
inputBuffer = new Buffer(sliceToProcess);
}
}
export async function xeval(
reader: Reader,
xevalFunc: XevalFunc,
{ delimiter = DEFAULT_DELIMITER }: XevalOptions = {}
): Promise<void> {
for await (const chunk of chunks(reader, delimiter)) {
for await (const chunk of readStringDelim(reader, delimiter)) {
// Ignore empty chunks.
if (chunk.length > 0) {
await xevalFunc(chunk);
@ -140,7 +53,7 @@ export async function xeval(
}
async function main(): Promise<void> {
const parsedArgs = parse(args.slice(1), {
const parsedArgs = parse(args, {
boolean: ["help"],
string: ["delim", "replvar"],
alias: {
@ -155,6 +68,7 @@ async function main(): Promise<void> {
});
if (parsedArgs._.length != 1) {
console.error(HELP_MSG);
console.log(parsedArgs._);
exit(1);
}
if (parsedArgs.help) {

View file

@ -508,3 +508,102 @@ export class BufWriter implements Writer {
return nn;
}
}
/** Generate longest proper prefix which is also suffix array. */
function createLPS(pat: Uint8Array): Uint8Array {
const lps = new Uint8Array(pat.length);
lps[0] = 0;
let prefixEnd = 0;
let i = 1;
while (i < lps.length) {
if (pat[i] == pat[prefixEnd]) {
prefixEnd++;
lps[i] = prefixEnd;
i++;
} else if (prefixEnd === 0) {
lps[i] = 0;
i++;
} else {
prefixEnd = pat[prefixEnd - 1];
}
}
return lps;
}
/** Read delimited bytes from a Reader. */
export async function* readDelim(
reader: Reader,
delim: Uint8Array
): AsyncIterableIterator<Uint8Array> {
// Avoid unicode problems
const delimLen = delim.length;
const delimLPS = createLPS(delim);
let inputBuffer = new Deno.Buffer();
const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1));
// Modified KMP
let inspectIndex = 0;
let matchIndex = 0;
while (true) {
const result = await reader.read(inspectArr);
if (result === Deno.EOF) {
// Yield last chunk.
yield inputBuffer.bytes();
return;
}
if ((result as number) < 0) {
// Discard all remaining and silently fail.
return;
}
const sliceRead = inspectArr.subarray(0, result as number);
await Deno.writeAll(inputBuffer, sliceRead);
let sliceToProcess = inputBuffer.bytes();
while (inspectIndex < sliceToProcess.length) {
if (sliceToProcess[inspectIndex] === delim[matchIndex]) {
inspectIndex++;
matchIndex++;
if (matchIndex === delimLen) {
// Full match
const matchEnd = inspectIndex - delimLen;
const readyBytes = sliceToProcess.subarray(0, matchEnd);
// Copy
const pendingBytes = sliceToProcess.slice(inspectIndex);
yield readyBytes;
// Reset match, different from KMP.
sliceToProcess = pendingBytes;
inspectIndex = 0;
matchIndex = 0;
}
} else {
if (matchIndex === 0) {
inspectIndex++;
} else {
matchIndex = delimLPS[matchIndex - 1];
}
}
}
// Keep inspectIndex and matchIndex.
inputBuffer = new Deno.Buffer(sliceToProcess);
}
}
/** Read delimited strings from a Reader. */
export async function* readStringDelim(
reader: Reader,
delim: string
): AsyncIterableIterator<string> {
const encoder = new TextEncoder();
const decoder = new TextDecoder();
for await (const chunk of readDelim(reader, encoder.encode(delim))) {
yield decoder.decode(chunk);
}
}
/** Read strings line-by-line from a Reader. */
export async function* readLines(
reader: Reader
): AsyncIterableIterator<string> {
yield* readStringDelim(reader, "\n");
}

View file

@ -15,7 +15,9 @@ import {
BufReader,
BufWriter,
BufferFullError,
UnexpectedEOFError
UnexpectedEOFError,
readStringDelim,
readLines
} from "./bufio.ts";
import * as iotest from "./iotest.ts";
import { charCode, copyBytes, stringsReader } from "./util.ts";
@ -381,3 +383,28 @@ Deno.test(async function bufReaderReadFull(): Promise<void> {
}
}
});
Deno.test(async function readStringDelimAndLines(): Promise<void> {
const enc = new TextEncoder();
const data = new Buffer(
enc.encode("Hello World\tHello World 2\tHello World 3")
);
const chunks_ = [];
for await (const c of readStringDelim(data, "\t")) {
chunks_.push(c);
}
assertEquals(chunks_.length, 3);
assertEquals(chunks_, ["Hello World", "Hello World 2", "Hello World 3"]);
const linesData = new Buffer(enc.encode("0\n1\n2\n3\n4\n5\n6\n7\n8\n9"));
const lines_ = [];
for await (const l of readLines(linesData)) {
lines_.push(l);
}
assertEquals(lines_.length, 10);
assertEquals(lines_, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]);
});