mirror of
https://github.com/denoland/deno.git
synced 2024-12-27 09:39:08 -05:00
feat(std/io): Export readDelim(), readStringDelim() and readLines() from bufio.ts (#4019)
Co-authored-by: Bartek Iwańczuk <biwanczuk@gmail.com>
This commit is contained in:
parent
7b9f6e9c45
commit
5a3292047c
4 changed files with 138 additions and 98 deletions
|
@ -27,7 +27,7 @@ const xevalPath = "examples/xeval.ts";
|
|||
|
||||
Deno.test(async function xevalCliReplvar(): Promise<void> {
|
||||
const p = run({
|
||||
args: [execPath(), xevalPath, "--", "--replvar=abc", "console.log(abc)"],
|
||||
args: [execPath(), xevalPath, "--replvar=abc", "console.log(abc)"],
|
||||
stdin: "piped",
|
||||
stdout: "piped",
|
||||
stderr: "null"
|
||||
|
@ -41,7 +41,7 @@ Deno.test(async function xevalCliReplvar(): Promise<void> {
|
|||
|
||||
Deno.test(async function xevalCliSyntaxError(): Promise<void> {
|
||||
const p = run({
|
||||
args: [execPath(), xevalPath, "--", "("],
|
||||
args: [execPath(), xevalPath, "("],
|
||||
stdin: "null",
|
||||
stdout: "piped",
|
||||
stderr: "piped"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import { parse } from "../flags/mod.ts";
|
||||
const { Buffer, EOF, args, exit, stdin, writeAll } = Deno;
|
||||
import { readStringDelim } from "../io/bufio.ts";
|
||||
const { args, exit, stdin } = Deno;
|
||||
type Reader = Deno.Reader;
|
||||
|
||||
/* eslint-disable-next-line max-len */
|
||||
|
@ -10,18 +11,16 @@ const AsyncFunction = Object.getPrototypeOf(async function(): Promise<void> {})
|
|||
/* eslint-disable max-len */
|
||||
const HELP_MSG = `xeval
|
||||
|
||||
Eval a script on lines from stdin.
|
||||
Read from standard input and eval code on each whitespace-delimited
|
||||
string chunks.
|
||||
Run a script for each new-line or otherwise delimited chunk of standard input.
|
||||
|
||||
Print all the usernames in /etc/passwd:
|
||||
cat /etc/passwd | deno -A https://deno.land/std/examples/xeval.ts -- "a = $.split(':'); if (a) console.log(a[0])"
|
||||
cat /etc/passwd | deno -A https://deno.land/std/examples/xeval.ts "a = $.split(':'); if (a) console.log(a[0])"
|
||||
|
||||
A complicated way to print the current git branch:
|
||||
git branch | deno -A https://deno.land/std/examples/xeval.ts -- -I 'line' "if (line.startsWith('*')) console.log(line.slice(2))"
|
||||
git branch | deno -A https://deno.land/std/examples/xeval.ts -I 'line' "if (line.startsWith('*')) console.log(line.slice(2))"
|
||||
|
||||
Demonstrates breaking the input up by space delimiter instead of by lines:
|
||||
cat LICENSE | deno -A https://deno.land/std/examples/xeval.ts -- -d " " "if ($ === 'MIT') console.log('MIT licensed')",
|
||||
cat LICENSE | deno -A https://deno.land/std/examples/xeval.ts -d " " "if ($ === 'MIT') console.log('MIT licensed')",
|
||||
|
||||
USAGE:
|
||||
deno -A https://deno.land/std/examples/xeval.ts [OPTIONS] <code>
|
||||
|
@ -40,98 +39,12 @@ export interface XevalOptions {
|
|||
|
||||
const DEFAULT_DELIMITER = "\n";
|
||||
|
||||
// Generate longest proper prefix which is also suffix array.
|
||||
function createLPS(pat: Uint8Array): Uint8Array {
|
||||
const lps = new Uint8Array(pat.length);
|
||||
lps[0] = 0;
|
||||
let prefixEnd = 0;
|
||||
let i = 1;
|
||||
while (i < lps.length) {
|
||||
if (pat[i] == pat[prefixEnd]) {
|
||||
prefixEnd++;
|
||||
lps[i] = prefixEnd;
|
||||
i++;
|
||||
} else if (prefixEnd === 0) {
|
||||
lps[i] = 0;
|
||||
i++;
|
||||
} else {
|
||||
prefixEnd = pat[prefixEnd - 1];
|
||||
}
|
||||
}
|
||||
return lps;
|
||||
}
|
||||
|
||||
// Read from reader until EOF and emit string chunks separated
|
||||
// by the given delimiter.
|
||||
async function* chunks(
|
||||
reader: Reader,
|
||||
delim: string
|
||||
): AsyncIterableIterator<string> {
|
||||
const encoder = new TextEncoder();
|
||||
const decoder = new TextDecoder();
|
||||
// Avoid unicode problems
|
||||
const delimArr = encoder.encode(delim);
|
||||
const delimLen = delimArr.length;
|
||||
const delimLPS = createLPS(delimArr);
|
||||
|
||||
let inputBuffer = new Buffer();
|
||||
const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1));
|
||||
|
||||
// Modified KMP
|
||||
let inspectIndex = 0;
|
||||
let matchIndex = 0;
|
||||
while (true) {
|
||||
const result = await reader.read(inspectArr);
|
||||
if (result === EOF) {
|
||||
// Yield last chunk.
|
||||
const lastChunk = inputBuffer.toString();
|
||||
yield lastChunk;
|
||||
return;
|
||||
}
|
||||
if ((result as number) < 0) {
|
||||
// Discard all remaining and silently fail.
|
||||
return;
|
||||
}
|
||||
const sliceRead = inspectArr.subarray(0, result as number);
|
||||
await writeAll(inputBuffer, sliceRead);
|
||||
|
||||
let sliceToProcess = inputBuffer.bytes();
|
||||
while (inspectIndex < sliceToProcess.length) {
|
||||
if (sliceToProcess[inspectIndex] === delimArr[matchIndex]) {
|
||||
inspectIndex++;
|
||||
matchIndex++;
|
||||
if (matchIndex === delimLen) {
|
||||
// Full match
|
||||
const matchEnd = inspectIndex - delimLen;
|
||||
const readyBytes = sliceToProcess.subarray(0, matchEnd);
|
||||
// Copy
|
||||
const pendingBytes = sliceToProcess.slice(inspectIndex);
|
||||
const readyChunk = decoder.decode(readyBytes);
|
||||
yield readyChunk;
|
||||
// Reset match, different from KMP.
|
||||
sliceToProcess = pendingBytes;
|
||||
inspectIndex = 0;
|
||||
matchIndex = 0;
|
||||
}
|
||||
} else {
|
||||
if (matchIndex === 0) {
|
||||
inspectIndex++;
|
||||
} else {
|
||||
matchIndex = delimLPS[matchIndex - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
// Keep inspectIndex and matchIndex.
|
||||
inputBuffer = new Buffer(sliceToProcess);
|
||||
}
|
||||
}
|
||||
|
||||
export async function xeval(
|
||||
reader: Reader,
|
||||
xevalFunc: XevalFunc,
|
||||
{ delimiter = DEFAULT_DELIMITER }: XevalOptions = {}
|
||||
): Promise<void> {
|
||||
for await (const chunk of chunks(reader, delimiter)) {
|
||||
for await (const chunk of readStringDelim(reader, delimiter)) {
|
||||
// Ignore empty chunks.
|
||||
if (chunk.length > 0) {
|
||||
await xevalFunc(chunk);
|
||||
|
@ -140,7 +53,7 @@ export async function xeval(
|
|||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const parsedArgs = parse(args.slice(1), {
|
||||
const parsedArgs = parse(args, {
|
||||
boolean: ["help"],
|
||||
string: ["delim", "replvar"],
|
||||
alias: {
|
||||
|
@ -155,6 +68,7 @@ async function main(): Promise<void> {
|
|||
});
|
||||
if (parsedArgs._.length != 1) {
|
||||
console.error(HELP_MSG);
|
||||
console.log(parsedArgs._);
|
||||
exit(1);
|
||||
}
|
||||
if (parsedArgs.help) {
|
||||
|
|
|
@ -508,3 +508,102 @@ export class BufWriter implements Writer {
|
|||
return nn;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate longest proper prefix which is also suffix array. */
|
||||
function createLPS(pat: Uint8Array): Uint8Array {
|
||||
const lps = new Uint8Array(pat.length);
|
||||
lps[0] = 0;
|
||||
let prefixEnd = 0;
|
||||
let i = 1;
|
||||
while (i < lps.length) {
|
||||
if (pat[i] == pat[prefixEnd]) {
|
||||
prefixEnd++;
|
||||
lps[i] = prefixEnd;
|
||||
i++;
|
||||
} else if (prefixEnd === 0) {
|
||||
lps[i] = 0;
|
||||
i++;
|
||||
} else {
|
||||
prefixEnd = pat[prefixEnd - 1];
|
||||
}
|
||||
}
|
||||
return lps;
|
||||
}
|
||||
|
||||
/** Read delimited bytes from a Reader. */
|
||||
export async function* readDelim(
|
||||
reader: Reader,
|
||||
delim: Uint8Array
|
||||
): AsyncIterableIterator<Uint8Array> {
|
||||
// Avoid unicode problems
|
||||
const delimLen = delim.length;
|
||||
const delimLPS = createLPS(delim);
|
||||
|
||||
let inputBuffer = new Deno.Buffer();
|
||||
const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1));
|
||||
|
||||
// Modified KMP
|
||||
let inspectIndex = 0;
|
||||
let matchIndex = 0;
|
||||
while (true) {
|
||||
const result = await reader.read(inspectArr);
|
||||
if (result === Deno.EOF) {
|
||||
// Yield last chunk.
|
||||
yield inputBuffer.bytes();
|
||||
return;
|
||||
}
|
||||
if ((result as number) < 0) {
|
||||
// Discard all remaining and silently fail.
|
||||
return;
|
||||
}
|
||||
const sliceRead = inspectArr.subarray(0, result as number);
|
||||
await Deno.writeAll(inputBuffer, sliceRead);
|
||||
|
||||
let sliceToProcess = inputBuffer.bytes();
|
||||
while (inspectIndex < sliceToProcess.length) {
|
||||
if (sliceToProcess[inspectIndex] === delim[matchIndex]) {
|
||||
inspectIndex++;
|
||||
matchIndex++;
|
||||
if (matchIndex === delimLen) {
|
||||
// Full match
|
||||
const matchEnd = inspectIndex - delimLen;
|
||||
const readyBytes = sliceToProcess.subarray(0, matchEnd);
|
||||
// Copy
|
||||
const pendingBytes = sliceToProcess.slice(inspectIndex);
|
||||
yield readyBytes;
|
||||
// Reset match, different from KMP.
|
||||
sliceToProcess = pendingBytes;
|
||||
inspectIndex = 0;
|
||||
matchIndex = 0;
|
||||
}
|
||||
} else {
|
||||
if (matchIndex === 0) {
|
||||
inspectIndex++;
|
||||
} else {
|
||||
matchIndex = delimLPS[matchIndex - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
// Keep inspectIndex and matchIndex.
|
||||
inputBuffer = new Deno.Buffer(sliceToProcess);
|
||||
}
|
||||
}
|
||||
|
||||
/** Read delimited strings from a Reader. */
|
||||
export async function* readStringDelim(
|
||||
reader: Reader,
|
||||
delim: string
|
||||
): AsyncIterableIterator<string> {
|
||||
const encoder = new TextEncoder();
|
||||
const decoder = new TextDecoder();
|
||||
for await (const chunk of readDelim(reader, encoder.encode(delim))) {
|
||||
yield decoder.decode(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
/** Read strings line-by-line from a Reader. */
|
||||
export async function* readLines(
|
||||
reader: Reader
|
||||
): AsyncIterableIterator<string> {
|
||||
yield* readStringDelim(reader, "\n");
|
||||
}
|
||||
|
|
|
@ -15,7 +15,9 @@ import {
|
|||
BufReader,
|
||||
BufWriter,
|
||||
BufferFullError,
|
||||
UnexpectedEOFError
|
||||
UnexpectedEOFError,
|
||||
readStringDelim,
|
||||
readLines
|
||||
} from "./bufio.ts";
|
||||
import * as iotest from "./iotest.ts";
|
||||
import { charCode, copyBytes, stringsReader } from "./util.ts";
|
||||
|
@ -381,3 +383,28 @@ Deno.test(async function bufReaderReadFull(): Promise<void> {
|
|||
}
|
||||
}
|
||||
});
|
||||
|
||||
Deno.test(async function readStringDelimAndLines(): Promise<void> {
|
||||
const enc = new TextEncoder();
|
||||
const data = new Buffer(
|
||||
enc.encode("Hello World\tHello World 2\tHello World 3")
|
||||
);
|
||||
const chunks_ = [];
|
||||
|
||||
for await (const c of readStringDelim(data, "\t")) {
|
||||
chunks_.push(c);
|
||||
}
|
||||
|
||||
assertEquals(chunks_.length, 3);
|
||||
assertEquals(chunks_, ["Hello World", "Hello World 2", "Hello World 3"]);
|
||||
|
||||
const linesData = new Buffer(enc.encode("0\n1\n2\n3\n4\n5\n6\n7\n8\n9"));
|
||||
const lines_ = [];
|
||||
|
||||
for await (const l of readLines(linesData)) {
|
||||
lines_.push(l);
|
||||
}
|
||||
|
||||
assertEquals(lines_.length, 10);
|
||||
assertEquals(lines_, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]);
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue