1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-12-23 15:49:44 -05:00

feat(std/path): Align globToRegExp() with bash glob expansion (#7209)

- feat: Support escaping glob characters
- feat: Support more character classes
- feat: Match characters literally on segment parse failure
- fix: Match nothing for empty globs
- fix: Contain any glob syntax to its path segment
- perf: Remove extraneous separators from generated regex
- doc: Add detailed JSDoc
- chore: Remove old copyright headers
This commit is contained in:
Nayeem Rahman 2020-10-01 10:37:03 +01:00 committed by GitHub
parent b689e60b60
commit 326ccb1095
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 367 additions and 240 deletions

View file

@ -60,11 +60,13 @@ function comparePath(a: WalkEntry, b: WalkEntry): number {
return 0; return 0;
} }
/** /** Expand the glob string from the specified `root` directory and yield each
* Expand the glob string from the specified `root` directory and yield each
* result as a `WalkEntry` object. * result as a `WalkEntry` object.
* *
* Examples: * See [`globToRegExp()`](../path/glob.ts#globToRegExp) for details on supported
* syntax.
*
* Example:
* *
* for await (const file of expandGlob("**\/*.ts")) { * for await (const file of expandGlob("**\/*.ts")) {
* console.log(file); * console.log(file);
@ -168,10 +170,9 @@ export async function* expandGlob(
yield* currentMatches; yield* currentMatches;
} }
/** /** Synchronous version of `expandGlob()`.
* Synchronous version of `expandGlob()`.
* *
* Examples: * Example:
* *
* for (const file of expandGlobSync("**\/*.ts")) { * for (const file of expandGlobSync("**\/*.ts")) {
* console.log(file); * console.log(file);

View file

@ -1,5 +1,3 @@
// globToRegExp() is originall ported from globrex@0.1.2.
// Copyright 2018 Terkel Gjervig Nielsen. All rights reserved. MIT license.
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. // Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible. // This module is browser compatible.
@ -22,232 +20,296 @@ export interface GlobOptions {
export type GlobToRegExpOptions = GlobOptions; export type GlobToRegExpOptions = GlobOptions;
/** Convert a glob string to a regular expressions. // deno-fmt-ignore
const regExpEscapeChars = ["!", "$", "(", ")", "*", "+", ".", "=", "?", "[", "\\", "^", "{", "|"];
const rangeEscapeChars = ["-", "\\", "]"];
/** Convert a glob string to a regular expression.
* *
* // Looking for all the `ts` files: * Tries to match bash glob expansion as closely as possible.
* walkSync(".", {
* match: [globToRegExp("*.ts")]
* });
* *
* Looking for all the `.json` files in any subfolder: * Basic glob syntax:
* walkSync(".", { * - `*` - Matches everything without leaving the path segment.
* match: [globToRegExp(join("a", "**", "*.json"), { * - `{foo,bar}` - Matches `foo` or `bar`.
* extended: true, * - `[abcd]` - Matches `a`, `b`, `c` or `d`.
* globstar: true * - `[a-d]` - Matches `a`, `b`, `c` or `d`.
* })] * - `[!abcd]` - Matches any single character besides `a`, `b`, `c` or `d`.
* }); */ * - `[[:<class>:]]` - Matches any character belonging to `<class>`.
* - `[[:alnum:]]` - Matches any digit or letter.
* - `[[:digit:]abc]` - Matches any digit, `a`, `b` or `c`.
* - See https://facelessuser.github.io/wcmatch/glob/#posix-character-classes
* for a complete list of supported character classes.
* - `\` - Escapes the next character for an `os` other than `"windows"`.
* - \` - Escapes the next character for `os` set to `"windows"`.
* - `/` - Path separator.
* - `\` - Additional path separator only for `os` set to `"windows"`.
*
* Extended syntax:
* - Requires `{ extended: true }`.
* - `?(foo|bar)` - Matches 0 or 1 instance of `{foo,bar}`.
* - `@(foo|bar)` - Matches 1 instance of `{foo,bar}`. They behave the same.
* - `*(foo|bar)` - Matches _n_ instances of `{foo,bar}`.
* - `+(foo|bar)` - Matches _n > 0_ instances of `{foo,bar}`.
* - `!(foo|bar)` - Matches anything other than `{foo,bar}`.
* - See https://www.linuxjournal.com/content/bash-extended-globbing.
*
* Globstar syntax:
* - Requires `{ globstar: true }`.
* - `**` - Matches any number of any path segments.
* - Must comprise its entire path segment in the provided glob.
* - See https://www.linuxjournal.com/content/globstar-new-bash-globbing-option.
*
* Note the following properties:
* - The generated `RegExp` is anchored at both start and end.
* - Repeating and trailing separators are tolerated. Trailing separators in the
* provided glob have no meaning and are discarded.
* - Absolute globs will only match absolute paths, etc.
* - Empty globs will match nothing.
* - Any special glob syntax must be contained to one path segment. For example,
* `?(foo|bar/baz)` is invalid. The separator will take precendence and the
* first segment ends with an unclosed group.
* - If a path segment ends with unclosed groups or a dangling escape prefix, a
* parse error has occured. Every character for that segment is taken
* literally in this event.
*
* Limitations:
* - A negative group like `!(foo|bar)` will wrongly be converted to a negative
* look-ahead followed by a wildcard. This means that `!(foo).js` will wrongly
* fail to match `foobar.js`, even though `foobar` is not `foo`. Effectively,
* `!(foo|bar)` is treated like `!(@(foo|bar)*)`. This will work correctly if
* the group occurs not nested at the end of the segment. */
export function globToRegExp( export function globToRegExp(
glob: string, glob: string,
{ extended = true, globstar: globstarOption = true, os = NATIVE_OS }: { extended = true, globstar: globstarOption = true, os = NATIVE_OS }:
GlobToRegExpOptions = {}, GlobToRegExpOptions = {},
): RegExp { ): RegExp {
const sep = os == "windows" ? `(?:\\\\|\\/)+` : `\\/+`; if (glob == "") {
const sepMaybe = os == "windows" ? `(?:\\\\|\\/)*` : `\\/*`; return /(?!)/;
}
const sep = os == "windows" ? "(?:\\\\|/)+" : "/+";
const sepMaybe = os == "windows" ? "(?:\\\\|/)*" : "/*";
const seps = os == "windows" ? ["\\", "/"] : ["/"]; const seps = os == "windows" ? ["\\", "/"] : ["/"];
const sepRaw = os == "windows" ? `\\` : `/`;
const globstar = os == "windows" const globstar = os == "windows"
? `(?:[^\\\\/]*(?:\\\\|\\/|$)+)*` ? "(?:[^\\\\/]*(?:\\\\|/|$)+)*"
: `(?:[^/]*(?:\\/|$)+)*`; : "(?:[^/]*(?:/|$)+)*";
const wildcard = os == "windows" ? `[^\\\\/]*` : `[^/]*`; const wildcard = os == "windows" ? "[^\\\\/]*" : "[^/]*";
const escapePrefix = os == "windows" ? "`" : "\\";
// Keep track of scope for extended syntaxes.
const extStack = [];
// If we are doing extended matching, this boolean is true when we are inside
// a group (eg {*.html,*.js}), and false otherwise.
let inGroup = false;
let inRange = false;
let regExpString = "";
// Remove trailing separators. // Remove trailing separators.
let newLength = glob.length; let newLength = glob.length;
for (; newLength > 0 && seps.includes(glob[newLength - 1]); newLength--); for (; newLength > 1 && seps.includes(glob[newLength - 1]); newLength--);
glob = glob.slice(0, newLength); glob = glob.slice(0, newLength);
let c, n; let regExpString = "";
for (let i = 0; i < glob.length; i++) {
c = glob[i];
n = glob[i + 1];
if (seps.includes(c)) { // Terminates correctly. Trust that `j` is incremented every iteration.
regExpString += sep; for (let j = 0; j < glob.length;) {
while (seps.includes(glob[i + 1])) i++; let segment = "";
const groupStack = [];
let inRange = false;
let inEscape = false;
let endsWithSep = false;
let i = j;
// Terminates with `i` at the non-inclusive end of the current segment.
for (; i < glob.length && !seps.includes(glob[i]); i++) {
if (inEscape) {
inEscape = false;
const escapeChars = inRange ? rangeEscapeChars : regExpEscapeChars;
segment += escapeChars.includes(glob[i]) ? `\\${glob[i]}` : glob[i];
continue; continue;
} }
if (c == "[") { if (glob[i] == escapePrefix) {
if (inRange && n == ":") { inEscape = true;
i++; // skip [
let value = "";
while (glob[++i] !== ":") value += glob[i];
if (value == "alnum") regExpString += "\\w\\d";
else if (value == "space") regExpString += "\\s";
else if (value == "digit") regExpString += "\\d";
i++; // skip last ]
continue; continue;
} }
if (glob[i] == "[") {
if (!inRange) {
inRange = true; inRange = true;
regExpString += c; segment += "[";
if (glob[i + 1] == "!") {
i++;
segment += "^";
} else if (glob[i + 1] == "^") {
i++;
segment += "\\^";
}
continue;
} else if (glob[i + 1] == ":") {
let k = i + 1;
let value = "";
while (glob[k + 1] != null && glob[k + 1] != ":") {
value += glob[k + 1];
k++;
}
if (glob[k + 1] == ":" && glob[k + 2] == "]") {
i = k + 2;
if (value == "alnum") segment += "\\dA-Za-z";
else if (value == "alpha") segment += "A-Za-z";
else if (value == "ascii") segment += "\x00-\x7F";
else if (value == "blank") segment += "\t ";
else if (value == "cntrl") segment += "\x00-\x1F\x7F";
else if (value == "digit") segment += "\\d";
else if (value == "graph") segment += "\x21-\x7E";
else if (value == "lower") segment += "a-z";
else if (value == "print") segment += "\x20-\x7E";
else if (value == "punct") {
segment += "!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_{|}~";
} else if (value == "space") segment += "\\s\v";
else if (value == "upper") segment += "A-Z";
else if (value == "word") segment += "\\w";
else if (value == "xdigit") segment += "\\dA-Fa-f";
continue; continue;
} }
}
}
if (c == "]") { if (glob[i] == "]" && inRange) {
inRange = false; inRange = false;
regExpString += c; segment += "]";
continue; continue;
} }
if (c == "!") {
if (inRange) { if (inRange) {
if (glob[i - 1] == "[") { if (glob[i] == "\\") {
regExpString += "^"; segment += `\\\\`;
} else {
segment += glob[i];
}
continue; continue;
} }
} else if (extended) {
if (n == "(") { if (
extStack.push(c); glob[i] == ")" && groupStack.length > 0 &&
regExpString += "(?!"; groupStack[groupStack.length - 1] != "BRACE"
) {
segment += ")";
const type = groupStack.pop()!;
if (type == "!") {
segment += wildcard;
} else if (type != "@") {
segment += type;
}
continue;
}
if (
glob[i] == "|" && groupStack.length > 0 &&
groupStack[groupStack.length - 1] != "BRACE"
) {
segment += "|";
continue;
}
if (glob[i] == "+" && extended && glob[i + 1] == "(") {
i++; i++;
groupStack.push("+");
segment += "(?:";
continue; continue;
} }
regExpString += `\\${c}`;
if (glob[i] == "@" && extended && glob[i + 1] == "(") {
i++;
groupStack.push("@");
segment += "(?:";
continue; continue;
}
if (glob[i] == "?") {
if (extended && glob[i + 1] == "(") {
i++;
groupStack.push("?");
segment += "(?:";
} else { } else {
regExpString += `\\${c}`; segment += ".";
continue;
} }
}
if (inRange) {
if (c == "\\" || c == "^" && glob[i - 1] == "[") regExpString += `\\${c}`;
else regExpString += c;
continue; continue;
} }
if (["\\", "$", "^", ".", "="].includes(c)) { if (glob[i] == "!" && extended && glob[i + 1] == "(") {
regExpString += `\\${c}`; i++;
groupStack.push("!");
segment += "(?!";
continue; continue;
} }
if (c == "(") { if (glob[i] == "{") {
if (extStack.length) { groupStack.push("BRACE");
regExpString += `${c}?:`; segment += "(?:";
continue;
}
regExpString += `\\${c}`;
continue; continue;
} }
if (c == ")") { if (glob[i] == "}" && groupStack[groupStack.length - 1] == "BRACE") {
if (extStack.length) { groupStack.pop();
regExpString += c; segment += ")";
const type = extStack.pop()!; continue;
if (type == "@") { }
regExpString += "{1}";
} else if (type == "!") { if (glob[i] == "," && groupStack[groupStack.length - 1] == "BRACE") {
regExpString += wildcard; segment += "|";
continue;
}
if (glob[i] == "*") {
if (extended && glob[i + 1] == "(") {
i++;
groupStack.push("*");
segment += "(?:";
} else { } else {
regExpString += type;
}
continue;
}
regExpString += `\\${c}`;
continue;
}
if (c == "|") {
if (extStack.length) {
regExpString += c;
continue;
}
regExpString += `\\${c}`;
continue;
}
if (c == "+") {
if (n == "(" && extended) {
extStack.push(c);
continue;
}
regExpString += `\\${c}`;
continue;
}
if (c == "@" && extended) {
if (n == "(") {
extStack.push(c);
continue;
}
}
if (c == "?") {
if (extended) {
if (n == "(") {
extStack.push(c);
}
continue;
} else {
regExpString += ".";
continue;
}
}
if (c == "{") {
inGroup = true;
regExpString += "(?:";
continue;
}
if (c == "}") {
inGroup = false;
regExpString += ")";
continue;
}
if (c == ",") {
if (inGroup) {
regExpString += "|";
continue;
}
regExpString += `\\${c}`;
continue;
}
if (c == "*") {
if (n == "(" && extended) {
extStack.push(c);
continue;
}
// Move over all consecutive "*"'s.
// Also store the previous and next characters
const prevChar = glob[i - 1]; const prevChar = glob[i - 1];
let starCount = 1; let numStars = 1;
while (glob[i + 1] == "*") { while (glob[i + 1] == "*") {
starCount++;
i++; i++;
numStars++;
} }
const nextChar = glob[i + 1]; const nextChar = glob[i + 1];
const isGlobstar = globstarOption && starCount > 1 && if (
// from the start of the segment globstarOption && numStars == 2 &&
[sepRaw, "/", undefined].includes(prevChar) && [...seps, undefined].includes(prevChar) &&
// to the end of the segment [...seps, undefined].includes(nextChar)
[sepRaw, "/", undefined].includes(nextChar); ) {
if (isGlobstar) { segment += globstar;
// it's a globstar, so match zero or more path segments endsWithSep = true;
regExpString += globstar;
while (seps.includes(glob[i + 1])) i++;
} else { } else {
// it's not a globstar, so only match one path segment segment += wildcard;
regExpString += wildcard; }
} }
continue; continue;
} }
regExpString += c; segment += regExpEscapeChars.includes(glob[i]) ? `\\${glob[i]}` : glob[i];
} }
regExpString = `^${regExpString}${regExpString != "" ? sepMaybe : ""}$`; // Check for unclosed groups or a dangling backslash.
if (groupStack.length > 0 || inRange || inEscape) {
// Parse failure. Take all characters from this segment literally.
segment = "";
for (const c of glob.slice(j, i)) {
segment += regExpEscapeChars.includes(c) ? `\\${c}` : c;
endsWithSep = false;
}
}
regExpString += segment;
if (!endsWithSep) {
regExpString += i < glob.length ? sep : sepMaybe;
endsWithSep = true;
}
// Terminates with `i` at the start of the next segment.
while (seps.includes(glob[i])) i++;
// Check that the next value of `j` is indeed higher than the current value.
if (!(i > j)) {
throw new Error("Assertion failure: i > j (potential infinite loop)");
}
j = i;
}
regExpString = `^${regExpString}$`;
return new RegExp(regExpString); return new RegExp(regExpString);
} }

View file

@ -44,7 +44,14 @@ function match(
Deno.test({ Deno.test({
name: "[path] globToRegExp() Basic RegExp", name: "[path] globToRegExp() Basic RegExp",
fn(): void { fn(): void {
assertEquals(globToRegExp(""), /^$/); assertEquals(globToRegExp("*.js", { os: "linux" }), /^[^/]*\.js\/*$/);
},
});
Deno.test({
name: "[path] globToRegExp() Empty glob",
fn(): void {
assertEquals(globToRegExp(""), /(?!)/);
assertEquals(globToRegExp("*.js", { os: "linux" }), /^[^/]*\.js\/*$/); assertEquals(globToRegExp("*.js", { os: "linux" }), /^[^/]*\.js\/*$/);
}, },
}); });
@ -108,27 +115,6 @@ Deno.test({
{ extended: false, globstar: false }, { extended: false, globstar: false },
), ),
); );
assert(
match(
"[[:digit:]]/bar.txt",
"1/bar.txt",
{ extended: false, globstar: false },
),
);
assert(
match(
"[[:digit:]b]/bar.txt",
"b/bar.txt",
{ extended: false, globstar: false },
),
);
assert(
match(
"[![:digit:]b]/bar.txt",
"a/bar.txt",
{ extended: false, globstar: false },
),
);
assert( assert(
!match( !match(
"[[:alnum:]]/bar.txt", "[[:alnum:]]/bar.txt",
@ -136,20 +122,48 @@ Deno.test({
{ extended: false, globstar: false }, { extended: false, globstar: false },
), ),
); );
assert( for (const c of "09AGZagz") {
!match( assert(match("[[:alnum:]]", c, { extended: false, globstar: false }), c);
"[[:digit:]]/bar.txt", }
"a/bar.txt", for (const c of "AGZagz") {
{ extended: false, globstar: false }, assert(match("[[:alpha:]]", c, { extended: false, globstar: false }), c);
), }
); for (const c of "\x00\x20\x7F") {
assert( assert(match("[[:ascii:]]", c, { extended: false, globstar: false }), c);
!match( }
"[[:digit:]b]/bar.txt", for (const c of "\t ") {
"a/bar.txt", assert(match("[[:blank:]]", c, { extended: false, globstar: false }), c);
{ extended: false, globstar: false }, }
), for (const c of "\x00\x1F\x7F") {
); assert(match("[[:cntrl:]]", c, { extended: false, globstar: false }), c);
}
for (const c of "09") {
assert(match("[[:digit:]]", c, { extended: false, globstar: false }), c);
}
for (const c of "\x21\x7E") {
assert(match("[[:graph:]]", c, { extended: false, globstar: false }), c);
}
for (const c of "az") {
assert(match("[[:lower:]]", c, { extended: false, globstar: false }), c);
}
for (const c of "\x20\x7E") {
assert(match("[[:print:]]", c, { extended: false, globstar: false }), c);
}
for (const c of "!\"#$%&'()*+,-./:;<=>?@[\\]^_{|}~") {
assert(match("[[:punct:]]", c, { extended: false, globstar: false }), c);
}
for (const c of "\t\n\v\f\r ") {
assert(match("[[:space:]]", c, { extended: false, globstar: false }), c);
}
for (const c of "AZ") {
assert(match("[[:upper:]]", c, { extended: false, globstar: false }), c);
}
for (const c of "09AZaz_") {
assert(match("[[:word:]]", c, { extended: false, globstar: false }), c);
}
for (const c of "09AFaf") {
assert(match("[[:xdigit:]]", c, { extended: false, globstar: false }), c);
}
}, },
}); });
@ -367,8 +381,11 @@ Deno.test({
name: "[path] globToRegExp() Special RegExp characters in range", name: "[path] globToRegExp() Special RegExp characters in range",
fn(): void { fn(): void {
// Excluding characters checked in the previous test. // Excluding characters checked in the previous test.
assertEquals(globToRegExp("[\\$^.=]", { os: "linux" }), /^[\\$^.=]\/*$/); assertEquals(globToRegExp("[\\\\$^.=]", { os: "linux" }), /^[\\$^.=]\/*$/);
assertEquals(globToRegExp("[!\\$^.=]", { os: "linux" }), /^[^\\$^.=]\/*$/); assertEquals(
globToRegExp("[!\\\\$^.=]", { os: "linux" }),
/^[^\\$^.=]\/*$/,
);
assertEquals(globToRegExp("[^^]", { os: "linux" }), /^[\^^]\/*$/); assertEquals(globToRegExp("[^^]", { os: "linux" }), /^[\^^]\/*$/);
}, },
}); });
@ -409,6 +426,53 @@ Deno.test({
}, },
}); });
Deno.test({
name: "[path] globToRegExp() Unclosed groups",
fn() {
assert(match("{foo,bar}/[ab", "foo/[ab"));
assert(match("{foo,bar}/{foo,bar", "foo/{foo,bar"));
assert(match("{foo,bar}/?(foo|bar", "foo/?(foo|bar"));
assert(match("{foo,bar}/@(foo|bar", "foo/@(foo|bar"));
assert(match("{foo,bar}/*(foo|bar", "foo/*(foo|bar"));
assert(match("{foo,bar}/+(foo|bar", "foo/+(foo|bar"));
assert(match("{foo,bar}/!(foo|bar", "foo/!(foo|bar"));
assert(match("{foo,bar}/?({)}", "foo/?({)}"));
assert(match("{foo,bar}/{?(})", "foo/{?(})"));
},
});
Deno.test({
name: "[path] globToRegExp() Escape glob characters",
fn() {
assert(match("\\[ab]", "[ab]", { os: "linux" }));
assert(match("`[ab]", "[ab]", { os: "windows" }));
assert(match("\\{foo,bar}", "{foo,bar}", { os: "linux" }));
assert(match("`{foo,bar}", "{foo,bar}", { os: "windows" }));
assert(match("\\?(foo|bar)", "?(foo|bar)", { os: "linux" }));
assert(match("`?(foo|bar)", "?(foo|bar)", { os: "windows" }));
assert(match("\\@(foo|bar)", "@(foo|bar)", { os: "linux" }));
assert(match("`@(foo|bar)", "@(foo|bar)", { os: "windows" }));
assert(match("\\*(foo|bar)", "*(foo|bar)", { os: "linux" }));
assert(match("`*(foo|bar)", "*(foo|bar)", { os: "windows" }));
assert(match("\\+(foo|bar)", "+(foo|bar)", { os: "linux" }));
assert(match("`+(foo|bar)", "+(foo|bar)", { os: "windows" }));
assert(match("\\!(foo|bar)", "!(foo|bar)", { os: "linux" }));
assert(match("`!(foo|bar)", "!(foo|bar)", { os: "windows" }));
assert(match("@\\(foo|bar)", "@(foo|bar)", { os: "linux" }));
assert(match("@`(foo|bar)", "@(foo|bar)", { os: "windows" }));
assert(match("{foo,bar}/[ab]\\", "foo/[ab]\\", { os: "linux" }));
assert(match("{foo,bar}/[ab]`", "foo/[ab]`", { os: "windows" }));
},
});
Deno.test({
name: "[path] globToRegExp() Dangling escape prefix",
fn() {
assert(match("{foo,bar}/[ab]\\", "foo/[ab]\\", { os: "linux" }));
assert(match("{foo,bar}/[ab]`", "foo/[ab]`", { os: "windows" }));
},
});
Deno.test({ Deno.test({
name: "[path] GlobToRegExpOptions::extended", name: "[path] GlobToRegExpOptions::extended",
fn() { fn() {