mirror of
https://github.com/denoland/deno.git
synced 2024-11-25 15:29:32 -05:00
feat(std/io): Export readDelim(), readStringDelim() and readLines() from bufio.ts (#4019)
Co-authored-by: Bartek Iwańczuk <biwanczuk@gmail.com>
This commit is contained in:
parent
7b9f6e9c45
commit
5a3292047c
4 changed files with 138 additions and 98 deletions
|
@ -27,7 +27,7 @@ const xevalPath = "examples/xeval.ts";
|
||||||
|
|
||||||
Deno.test(async function xevalCliReplvar(): Promise<void> {
|
Deno.test(async function xevalCliReplvar(): Promise<void> {
|
||||||
const p = run({
|
const p = run({
|
||||||
args: [execPath(), xevalPath, "--", "--replvar=abc", "console.log(abc)"],
|
args: [execPath(), xevalPath, "--replvar=abc", "console.log(abc)"],
|
||||||
stdin: "piped",
|
stdin: "piped",
|
||||||
stdout: "piped",
|
stdout: "piped",
|
||||||
stderr: "null"
|
stderr: "null"
|
||||||
|
@ -41,7 +41,7 @@ Deno.test(async function xevalCliReplvar(): Promise<void> {
|
||||||
|
|
||||||
Deno.test(async function xevalCliSyntaxError(): Promise<void> {
|
Deno.test(async function xevalCliSyntaxError(): Promise<void> {
|
||||||
const p = run({
|
const p = run({
|
||||||
args: [execPath(), xevalPath, "--", "("],
|
args: [execPath(), xevalPath, "("],
|
||||||
stdin: "null",
|
stdin: "null",
|
||||||
stdout: "piped",
|
stdout: "piped",
|
||||||
stderr: "piped"
|
stderr: "piped"
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import { parse } from "../flags/mod.ts";
|
import { parse } from "../flags/mod.ts";
|
||||||
const { Buffer, EOF, args, exit, stdin, writeAll } = Deno;
|
import { readStringDelim } from "../io/bufio.ts";
|
||||||
|
const { args, exit, stdin } = Deno;
|
||||||
type Reader = Deno.Reader;
|
type Reader = Deno.Reader;
|
||||||
|
|
||||||
/* eslint-disable-next-line max-len */
|
/* eslint-disable-next-line max-len */
|
||||||
|
@ -10,18 +11,16 @@ const AsyncFunction = Object.getPrototypeOf(async function(): Promise<void> {})
|
||||||
/* eslint-disable max-len */
|
/* eslint-disable max-len */
|
||||||
const HELP_MSG = `xeval
|
const HELP_MSG = `xeval
|
||||||
|
|
||||||
Eval a script on lines from stdin.
|
Run a script for each new-line or otherwise delimited chunk of standard input.
|
||||||
Read from standard input and eval code on each whitespace-delimited
|
|
||||||
string chunks.
|
|
||||||
|
|
||||||
Print all the usernames in /etc/passwd:
|
Print all the usernames in /etc/passwd:
|
||||||
cat /etc/passwd | deno -A https://deno.land/std/examples/xeval.ts -- "a = $.split(':'); if (a) console.log(a[0])"
|
cat /etc/passwd | deno -A https://deno.land/std/examples/xeval.ts "a = $.split(':'); if (a) console.log(a[0])"
|
||||||
|
|
||||||
A complicated way to print the current git branch:
|
A complicated way to print the current git branch:
|
||||||
git branch | deno -A https://deno.land/std/examples/xeval.ts -- -I 'line' "if (line.startsWith('*')) console.log(line.slice(2))"
|
git branch | deno -A https://deno.land/std/examples/xeval.ts -I 'line' "if (line.startsWith('*')) console.log(line.slice(2))"
|
||||||
|
|
||||||
Demonstrates breaking the input up by space delimiter instead of by lines:
|
Demonstrates breaking the input up by space delimiter instead of by lines:
|
||||||
cat LICENSE | deno -A https://deno.land/std/examples/xeval.ts -- -d " " "if ($ === 'MIT') console.log('MIT licensed')",
|
cat LICENSE | deno -A https://deno.land/std/examples/xeval.ts -d " " "if ($ === 'MIT') console.log('MIT licensed')",
|
||||||
|
|
||||||
USAGE:
|
USAGE:
|
||||||
deno -A https://deno.land/std/examples/xeval.ts [OPTIONS] <code>
|
deno -A https://deno.land/std/examples/xeval.ts [OPTIONS] <code>
|
||||||
|
@ -40,98 +39,12 @@ export interface XevalOptions {
|
||||||
|
|
||||||
const DEFAULT_DELIMITER = "\n";
|
const DEFAULT_DELIMITER = "\n";
|
||||||
|
|
||||||
// Generate longest proper prefix which is also suffix array.
|
|
||||||
function createLPS(pat: Uint8Array): Uint8Array {
|
|
||||||
const lps = new Uint8Array(pat.length);
|
|
||||||
lps[0] = 0;
|
|
||||||
let prefixEnd = 0;
|
|
||||||
let i = 1;
|
|
||||||
while (i < lps.length) {
|
|
||||||
if (pat[i] == pat[prefixEnd]) {
|
|
||||||
prefixEnd++;
|
|
||||||
lps[i] = prefixEnd;
|
|
||||||
i++;
|
|
||||||
} else if (prefixEnd === 0) {
|
|
||||||
lps[i] = 0;
|
|
||||||
i++;
|
|
||||||
} else {
|
|
||||||
prefixEnd = pat[prefixEnd - 1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lps;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read from reader until EOF and emit string chunks separated
|
|
||||||
// by the given delimiter.
|
|
||||||
async function* chunks(
|
|
||||||
reader: Reader,
|
|
||||||
delim: string
|
|
||||||
): AsyncIterableIterator<string> {
|
|
||||||
const encoder = new TextEncoder();
|
|
||||||
const decoder = new TextDecoder();
|
|
||||||
// Avoid unicode problems
|
|
||||||
const delimArr = encoder.encode(delim);
|
|
||||||
const delimLen = delimArr.length;
|
|
||||||
const delimLPS = createLPS(delimArr);
|
|
||||||
|
|
||||||
let inputBuffer = new Buffer();
|
|
||||||
const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1));
|
|
||||||
|
|
||||||
// Modified KMP
|
|
||||||
let inspectIndex = 0;
|
|
||||||
let matchIndex = 0;
|
|
||||||
while (true) {
|
|
||||||
const result = await reader.read(inspectArr);
|
|
||||||
if (result === EOF) {
|
|
||||||
// Yield last chunk.
|
|
||||||
const lastChunk = inputBuffer.toString();
|
|
||||||
yield lastChunk;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if ((result as number) < 0) {
|
|
||||||
// Discard all remaining and silently fail.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const sliceRead = inspectArr.subarray(0, result as number);
|
|
||||||
await writeAll(inputBuffer, sliceRead);
|
|
||||||
|
|
||||||
let sliceToProcess = inputBuffer.bytes();
|
|
||||||
while (inspectIndex < sliceToProcess.length) {
|
|
||||||
if (sliceToProcess[inspectIndex] === delimArr[matchIndex]) {
|
|
||||||
inspectIndex++;
|
|
||||||
matchIndex++;
|
|
||||||
if (matchIndex === delimLen) {
|
|
||||||
// Full match
|
|
||||||
const matchEnd = inspectIndex - delimLen;
|
|
||||||
const readyBytes = sliceToProcess.subarray(0, matchEnd);
|
|
||||||
// Copy
|
|
||||||
const pendingBytes = sliceToProcess.slice(inspectIndex);
|
|
||||||
const readyChunk = decoder.decode(readyBytes);
|
|
||||||
yield readyChunk;
|
|
||||||
// Reset match, different from KMP.
|
|
||||||
sliceToProcess = pendingBytes;
|
|
||||||
inspectIndex = 0;
|
|
||||||
matchIndex = 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (matchIndex === 0) {
|
|
||||||
inspectIndex++;
|
|
||||||
} else {
|
|
||||||
matchIndex = delimLPS[matchIndex - 1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Keep inspectIndex and matchIndex.
|
|
||||||
inputBuffer = new Buffer(sliceToProcess);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function xeval(
|
export async function xeval(
|
||||||
reader: Reader,
|
reader: Reader,
|
||||||
xevalFunc: XevalFunc,
|
xevalFunc: XevalFunc,
|
||||||
{ delimiter = DEFAULT_DELIMITER }: XevalOptions = {}
|
{ delimiter = DEFAULT_DELIMITER }: XevalOptions = {}
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
for await (const chunk of chunks(reader, delimiter)) {
|
for await (const chunk of readStringDelim(reader, delimiter)) {
|
||||||
// Ignore empty chunks.
|
// Ignore empty chunks.
|
||||||
if (chunk.length > 0) {
|
if (chunk.length > 0) {
|
||||||
await xevalFunc(chunk);
|
await xevalFunc(chunk);
|
||||||
|
@ -140,7 +53,7 @@ export async function xeval(
|
||||||
}
|
}
|
||||||
|
|
||||||
async function main(): Promise<void> {
|
async function main(): Promise<void> {
|
||||||
const parsedArgs = parse(args.slice(1), {
|
const parsedArgs = parse(args, {
|
||||||
boolean: ["help"],
|
boolean: ["help"],
|
||||||
string: ["delim", "replvar"],
|
string: ["delim", "replvar"],
|
||||||
alias: {
|
alias: {
|
||||||
|
@ -155,6 +68,7 @@ async function main(): Promise<void> {
|
||||||
});
|
});
|
||||||
if (parsedArgs._.length != 1) {
|
if (parsedArgs._.length != 1) {
|
||||||
console.error(HELP_MSG);
|
console.error(HELP_MSG);
|
||||||
|
console.log(parsedArgs._);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if (parsedArgs.help) {
|
if (parsedArgs.help) {
|
||||||
|
|
|
@ -508,3 +508,102 @@ export class BufWriter implements Writer {
|
||||||
return nn;
|
return nn;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Generate longest proper prefix which is also suffix array. */
|
||||||
|
function createLPS(pat: Uint8Array): Uint8Array {
|
||||||
|
const lps = new Uint8Array(pat.length);
|
||||||
|
lps[0] = 0;
|
||||||
|
let prefixEnd = 0;
|
||||||
|
let i = 1;
|
||||||
|
while (i < lps.length) {
|
||||||
|
if (pat[i] == pat[prefixEnd]) {
|
||||||
|
prefixEnd++;
|
||||||
|
lps[i] = prefixEnd;
|
||||||
|
i++;
|
||||||
|
} else if (prefixEnd === 0) {
|
||||||
|
lps[i] = 0;
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
prefixEnd = pat[prefixEnd - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lps;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Read delimited bytes from a Reader. */
|
||||||
|
export async function* readDelim(
|
||||||
|
reader: Reader,
|
||||||
|
delim: Uint8Array
|
||||||
|
): AsyncIterableIterator<Uint8Array> {
|
||||||
|
// Avoid unicode problems
|
||||||
|
const delimLen = delim.length;
|
||||||
|
const delimLPS = createLPS(delim);
|
||||||
|
|
||||||
|
let inputBuffer = new Deno.Buffer();
|
||||||
|
const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1));
|
||||||
|
|
||||||
|
// Modified KMP
|
||||||
|
let inspectIndex = 0;
|
||||||
|
let matchIndex = 0;
|
||||||
|
while (true) {
|
||||||
|
const result = await reader.read(inspectArr);
|
||||||
|
if (result === Deno.EOF) {
|
||||||
|
// Yield last chunk.
|
||||||
|
yield inputBuffer.bytes();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ((result as number) < 0) {
|
||||||
|
// Discard all remaining and silently fail.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const sliceRead = inspectArr.subarray(0, result as number);
|
||||||
|
await Deno.writeAll(inputBuffer, sliceRead);
|
||||||
|
|
||||||
|
let sliceToProcess = inputBuffer.bytes();
|
||||||
|
while (inspectIndex < sliceToProcess.length) {
|
||||||
|
if (sliceToProcess[inspectIndex] === delim[matchIndex]) {
|
||||||
|
inspectIndex++;
|
||||||
|
matchIndex++;
|
||||||
|
if (matchIndex === delimLen) {
|
||||||
|
// Full match
|
||||||
|
const matchEnd = inspectIndex - delimLen;
|
||||||
|
const readyBytes = sliceToProcess.subarray(0, matchEnd);
|
||||||
|
// Copy
|
||||||
|
const pendingBytes = sliceToProcess.slice(inspectIndex);
|
||||||
|
yield readyBytes;
|
||||||
|
// Reset match, different from KMP.
|
||||||
|
sliceToProcess = pendingBytes;
|
||||||
|
inspectIndex = 0;
|
||||||
|
matchIndex = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (matchIndex === 0) {
|
||||||
|
inspectIndex++;
|
||||||
|
} else {
|
||||||
|
matchIndex = delimLPS[matchIndex - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Keep inspectIndex and matchIndex.
|
||||||
|
inputBuffer = new Deno.Buffer(sliceToProcess);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Read delimited strings from a Reader. */
|
||||||
|
export async function* readStringDelim(
|
||||||
|
reader: Reader,
|
||||||
|
delim: string
|
||||||
|
): AsyncIterableIterator<string> {
|
||||||
|
const encoder = new TextEncoder();
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
for await (const chunk of readDelim(reader, encoder.encode(delim))) {
|
||||||
|
yield decoder.decode(chunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Read strings line-by-line from a Reader. */
|
||||||
|
export async function* readLines(
|
||||||
|
reader: Reader
|
||||||
|
): AsyncIterableIterator<string> {
|
||||||
|
yield* readStringDelim(reader, "\n");
|
||||||
|
}
|
||||||
|
|
|
@ -15,7 +15,9 @@ import {
|
||||||
BufReader,
|
BufReader,
|
||||||
BufWriter,
|
BufWriter,
|
||||||
BufferFullError,
|
BufferFullError,
|
||||||
UnexpectedEOFError
|
UnexpectedEOFError,
|
||||||
|
readStringDelim,
|
||||||
|
readLines
|
||||||
} from "./bufio.ts";
|
} from "./bufio.ts";
|
||||||
import * as iotest from "./iotest.ts";
|
import * as iotest from "./iotest.ts";
|
||||||
import { charCode, copyBytes, stringsReader } from "./util.ts";
|
import { charCode, copyBytes, stringsReader } from "./util.ts";
|
||||||
|
@ -381,3 +383,28 @@ Deno.test(async function bufReaderReadFull(): Promise<void> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Deno.test(async function readStringDelimAndLines(): Promise<void> {
|
||||||
|
const enc = new TextEncoder();
|
||||||
|
const data = new Buffer(
|
||||||
|
enc.encode("Hello World\tHello World 2\tHello World 3")
|
||||||
|
);
|
||||||
|
const chunks_ = [];
|
||||||
|
|
||||||
|
for await (const c of readStringDelim(data, "\t")) {
|
||||||
|
chunks_.push(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(chunks_.length, 3);
|
||||||
|
assertEquals(chunks_, ["Hello World", "Hello World 2", "Hello World 3"]);
|
||||||
|
|
||||||
|
const linesData = new Buffer(enc.encode("0\n1\n2\n3\n4\n5\n6\n7\n8\n9"));
|
||||||
|
const lines_ = [];
|
||||||
|
|
||||||
|
for await (const l of readLines(linesData)) {
|
||||||
|
lines_.push(l);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(lines_.length, 10);
|
||||||
|
assertEquals(lines_, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]);
|
||||||
|
});
|
||||||
|
|
Loading…
Reference in a new issue