From 972b3e8e654e096f882693ff42b5c3cf4fed97e9 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 8 Jan 2024 12:18:42 -0500 Subject: [PATCH] perf: skip expanding exclude globs (#21817) We were calling `expand_glob` on our excludes, which is very expensive and unnecessary because we can pattern match while traversing instead. 1. Doesn't expand "exclude" globs. Instead pattern matches while walking the directory. 2. Splits up the "include" into base paths and applicable file patterns. This causes less pattern matching to occur because we're only pattern matching on patterns that might match and not ones in completely unrelated directories. --- cli/args/flags.rs | 29 ++ cli/args/mod.rs | 136 ++++---- cli/graph_util.rs | 14 +- cli/lsp/documents.rs | 202 +++++------ cli/lsp/language_server.rs | 6 +- cli/module_loader.rs | 48 +-- cli/tests/testdata/doc/invalid_url.out | 2 +- cli/tools/bench/mod.rs | 56 +++- cli/tools/coverage/mod.rs | 34 +- cli/tools/doc.rs | 40 ++- cli/tools/fmt.rs | 39 ++- cli/tools/lint.rs | 38 ++- cli/tools/test/mod.rs | 75 +++-- cli/util/fs.rs | 358 +++++++++++--------- cli/util/glob.rs | 443 ++++++++++++++++++++++--- 15 files changed, 1020 insertions(+), 500 deletions(-) diff --git a/cli/args/flags.rs b/cli/args/flags.rs index 8b796edc2f..a05d31ce14 100644 --- a/cli/args/flags.rs +++ b/cli/args/flags.rs @@ -34,6 +34,35 @@ pub struct FileFlags { pub include: Vec, } +impl FileFlags { + pub fn with_absolute_paths(self, base: &Path) -> Self { + fn to_absolute_path(path: PathBuf, base: &Path) -> PathBuf { + // todo(dsherret): don't store URLs in PathBufs + if path.starts_with("http:") + || path.starts_with("https:") + || path.starts_with("file:") + { + path + } else { + base.join(path) + } + } + + Self { + include: self + .include + .into_iter() + .map(|p| to_absolute_path(p, base)) + .collect(), + ignore: self + .ignore + .into_iter() + .map(|p| to_absolute_path(p, base)) + .collect(), + } + } +} + #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct BenchFlags { pub files: FileFlags, diff --git a/cli/args/mod.rs b/cli/args/mod.rs index dd9ae36e3b..23723d916f 100644 --- a/cli/args/mod.rs +++ b/cli/args/mod.rs @@ -69,7 +69,8 @@ use thiserror::Error; use crate::file_fetcher::FileFetcher; use crate::util::fs::canonicalize_path_maybe_not_exists; -use crate::util::glob::expand_globs; +use crate::util::glob::FilePatterns; +use crate::util::glob::PathOrPatternSet; use crate::version; use deno_config::FmtConfig; @@ -217,7 +218,7 @@ impl CacheSetting { #[derive(Clone, Debug, Eq, PartialEq)] pub struct BenchOptions { - pub files: FilesConfig, + pub files: FilePatterns, pub filter: Option, pub json: bool, pub no_run: bool, @@ -227,12 +228,14 @@ impl BenchOptions { pub fn resolve( maybe_bench_config: Option, maybe_bench_flags: Option, + initial_cwd: &Path, ) -> Result { let bench_flags = maybe_bench_flags.unwrap_or_default(); Ok(Self { files: resolve_files( maybe_bench_config.map(|c| c.files), Some(bench_flags.files), + initial_cwd, )?, filter: bench_flags.filter, json: bench_flags.json, @@ -245,13 +248,14 @@ impl BenchOptions { pub struct FmtOptions { pub check: bool, pub options: FmtOptionsConfig, - pub files: FilesConfig, + pub files: FilePatterns, } impl FmtOptions { pub fn resolve( maybe_fmt_config: Option, maybe_fmt_flags: Option, + initial_cwd: &Path, ) -> Result { let (maybe_config_options, maybe_config_files) = maybe_fmt_config.map(|c| (c.options, c.files)).unzip(); @@ -265,6 +269,7 @@ impl FmtOptions { files: resolve_files( maybe_config_files, maybe_fmt_flags.map(|f| f.files), + initial_cwd, )?, }) } @@ -311,26 +316,9 @@ fn resolve_fmt_options( options } -#[derive(Clone, Debug, Default)] -pub struct CheckOptions { - pub exclude: Vec, -} - -impl CheckOptions { - pub fn resolve( - maybe_files_config: Option, - ) -> Result { - Ok(Self { - exclude: expand_globs( - maybe_files_config.map(|c| c.exclude).unwrap_or_default(), - )?, - }) - } -} - #[derive(Clone)] pub struct TestOptions { - pub files: FilesConfig, + pub files: FilePatterns, pub doc: bool, pub no_run: bool, pub fail_fast: Option, @@ -347,6 +335,7 @@ impl TestOptions { pub fn resolve( maybe_test_config: Option, maybe_test_flags: Option, + initial_cwd: &Path, ) -> Result { let test_flags = maybe_test_flags.unwrap_or_default(); @@ -354,6 +343,7 @@ impl TestOptions { files: resolve_files( maybe_test_config.map(|c| c.files), Some(test_flags.files), + initial_cwd, )?, allow_none: test_flags.allow_none, concurrent_jobs: test_flags @@ -382,7 +372,7 @@ pub enum LintReporterKind { #[derive(Clone, Debug, Default)] pub struct LintOptions { pub rules: LintRulesConfig, - pub files: FilesConfig, + pub files: FilePatterns, pub reporter_kind: LintReporterKind, } @@ -390,6 +380,7 @@ impl LintOptions { pub fn resolve( maybe_lint_config: Option, maybe_lint_flags: Option, + initial_cwd: &Path, ) -> Result { let mut maybe_reporter_kind = maybe_lint_flags.as_ref().and_then(|lint_flags| { @@ -437,7 +428,11 @@ impl LintOptions { maybe_lint_config.map(|c| (c.files, c.rules)).unzip(); Ok(Self { reporter_kind: maybe_reporter_kind.unwrap_or_default(), - files: resolve_files(maybe_config_files, Some(maybe_file_flags))?, + files: resolve_files( + maybe_config_files, + Some(maybe_file_flags), + initial_cwd, + )?, rules: resolve_lint_rules_options( maybe_config_rules, maybe_rules_tags, @@ -1184,7 +1179,7 @@ impl CliOptions { } else { None }; - FmtOptions::resolve(maybe_fmt_config, Some(fmt_flags)) + FmtOptions::resolve(maybe_fmt_config, Some(fmt_flags), &self.initial_cwd) } pub fn resolve_lint_options( @@ -1196,17 +1191,20 @@ impl CliOptions { } else { None }; - LintOptions::resolve(maybe_lint_config, Some(lint_flags)) + LintOptions::resolve(maybe_lint_config, Some(lint_flags), &self.initial_cwd) } - pub fn resolve_check_options(&self) -> Result { + pub fn resolve_config_excludes(&self) -> Result { let maybe_files_config = if let Some(config_file) = &self.maybe_config_file { config_file.to_files_config()? } else { None }; - CheckOptions::resolve(maybe_files_config) + PathOrPatternSet::from_absolute_paths( + maybe_files_config.map(|c| c.exclude).unwrap_or_default(), + ) + .context("Invalid config file exclude pattern.") } pub fn resolve_test_options( @@ -1218,7 +1216,7 @@ impl CliOptions { } else { None }; - TestOptions::resolve(maybe_test_config, Some(test_flags)) + TestOptions::resolve(maybe_test_config, Some(test_flags), &self.initial_cwd) } pub fn resolve_bench_options( @@ -1231,7 +1229,11 @@ impl CliOptions { } else { None }; - BenchOptions::resolve(maybe_bench_config, Some(bench_flags)) + BenchOptions::resolve( + maybe_bench_config, + Some(bench_flags), + &self.initial_cwd, + ) } /// Vector of user script CLI arguments. @@ -1655,24 +1657,29 @@ impl StorageKeyResolver { fn resolve_files( maybe_files_config: Option, maybe_file_flags: Option, -) -> Result { - let mut result = maybe_files_config.unwrap_or_default(); + initial_cwd: &Path, +) -> Result { + let mut maybe_files_config = maybe_files_config.unwrap_or_default(); if let Some(file_flags) = maybe_file_flags { + let file_flags = file_flags.with_absolute_paths(initial_cwd); if !file_flags.include.is_empty() { - result.include = Some(file_flags.include); + maybe_files_config.include = Some(file_flags.include); } if !file_flags.ignore.is_empty() { - result.exclude = file_flags.ignore; + maybe_files_config.exclude = file_flags.ignore } } - // Now expand globs if there are any - result.include = match result.include { - Some(include) => Some(expand_globs(include)?), - None => None, - }; - result.exclude = expand_globs(result.exclude)?; - - Ok(result) + Ok(FilePatterns { + include: { + let files = match maybe_files_config.include { + Some(include) => include, + None => vec![initial_cwd.to_path_buf()], + }; + Some(PathOrPatternSet::from_absolute_paths(files)?) + }, + exclude: PathOrPatternSet::from_absolute_paths(maybe_files_config.exclude) + .context("Invalid exclude.")?, + }) } /// Resolves the no_prompt value based on the cli flags and environment. @@ -1694,6 +1701,8 @@ pub fn npm_pkg_req_ref_to_binary_command( #[cfg(test)] mod test { + use crate::util::fs::FileCollector; + use super::*; use pretty_assertions::assert_eq; @@ -1887,6 +1896,7 @@ mod test { exclude: vec![], }), None, + temp_dir_path, ) .unwrap_err(); assert!(error.to_string().starts_with("Failed to expand glob")); @@ -1902,32 +1912,36 @@ mod test { exclude: vec![temp_dir_path.join("nested/**/*bazz.ts")], }), None, + temp_dir_path, ) .unwrap(); + let mut files = FileCollector::new(|_, _| true) + .ignore_git_folder() + .ignore_node_modules() + .ignore_vendor_folder() + .collect_file_patterns(resolved_files) + .unwrap(); + + files.sort(); + assert_eq!( - resolved_files.include, - Some(vec![ - temp_dir_path.join("data/test1.js"), - temp_dir_path.join("data/test1.ts"), - temp_dir_path.join("nested/foo/bar.ts"), - temp_dir_path.join("nested/foo/bazz.ts"), - temp_dir_path.join("nested/foo/fizz.ts"), - temp_dir_path.join("nested/foo/foo.ts"), - temp_dir_path.join("nested/fizz/bar.ts"), - temp_dir_path.join("nested/fizz/bazz.ts"), - temp_dir_path.join("nested/fizz/fizz.ts"), - temp_dir_path.join("nested/fizz/foo.ts"), - temp_dir_path.join("pages/[id].ts"), - ]) - ); - assert_eq!( - resolved_files.exclude, + files, vec![ - temp_dir_path.join("nested/fizz/bazz.ts"), - temp_dir_path.join("nested/foo/bazz.ts"), + "data/test1.js", + "data/test1.ts", + "nested/fizz/bar.ts", + "nested/fizz/fizz.ts", + "nested/fizz/foo.ts", + "nested/foo/bar.ts", + "nested/foo/fizz.ts", + "nested/foo/foo.ts", + "pages/[id].ts", ] - ) + .into_iter() + .map(|p| normalize_path(temp_dir_path.join(p))) + .collect::>() + ); } #[test] diff --git a/cli/graph_util.rs b/cli/graph_util.rs index 5247998604..b4f4b939aa 100644 --- a/cli/graph_util.rs +++ b/cli/graph_util.rs @@ -16,6 +16,8 @@ use crate::resolver::SloppyImportsResolver; use crate::tools::check; use crate::tools::check::TypeChecker; use crate::util::file_watcher::WatcherCommunicator; +use crate::util::fs::canonicalize_path; +use crate::util::path::specifier_to_file_path; use crate::util::sync::TaskQueue; use crate::util::sync::TaskQueuePermit; @@ -677,7 +679,7 @@ impl ModuleGraphContainer { pub fn has_graph_root_local_dependent_changed( graph: &ModuleGraph, root: &ModuleSpecifier, - changed_specifiers: &HashSet, + canonicalized_changed_paths: &HashSet, ) -> bool { let roots = vec![root.clone()]; let mut dependent_specifiers = graph.walk( @@ -689,11 +691,15 @@ pub fn has_graph_root_local_dependent_changed( }, ); while let Some((s, _)) = dependent_specifiers.next() { - if s.scheme() != "file" { + if let Ok(path) = specifier_to_file_path(s) { + if let Ok(path) = canonicalize_path(&path) { + if canonicalized_changed_paths.contains(&path) { + return true; + } + } + } else { // skip walking this remote module's dependencies dependent_specifiers.skip_previous_dependencies(); - } else if changed_specifiers.contains(s) { - return true; } } false diff --git a/cli/lsp/documents.rs b/cli/lsp/documents.rs index 75bd54fc71..2084928004 100644 --- a/cli/lsp/documents.rs +++ b/cli/lsp/documents.rs @@ -24,6 +24,7 @@ use crate::resolver::SloppyImportsFsEntry; use crate::resolver::SloppyImportsResolution; use crate::resolver::SloppyImportsResolver; use crate::util::glob; +use crate::util::glob::FilePatterns; use crate::util::path::specifier_to_file_path; use crate::util::text_encoding; @@ -62,6 +63,7 @@ use std::fs::ReadDir; use std::ops::Range; use std::path::Path; use std::path::PathBuf; +use std::rc::Rc; use std::str::FromStr; use std::sync::Arc; use tower_lsp::lsp_types as lsp; @@ -1852,13 +1854,14 @@ fn analyze_module( } } +#[derive(Debug)] enum PendingEntry { /// File specified as a root url. SpecifiedRootFile(PathBuf), /// Directory that is queued to read. - Dir(PathBuf), + Dir(PathBuf, Rc), /// The current directory being read. - ReadDir(Box), + ReadDir(Box, Rc), } struct PreloadDocumentFinderOptions { @@ -1873,27 +1876,22 @@ struct PreloadDocumentFinder { limit: usize, entry_count: usize, pending_entries: VecDeque, - disabled_globs: glob::GlobSet, - disabled_paths: HashSet, + root_dir_entries: Vec, + visited_paths: HashSet, } impl PreloadDocumentFinder { pub fn new(options: PreloadDocumentFinderOptions) -> Self { fn paths_into_globs_and_paths( input_paths: Vec, - ) -> (glob::GlobSet, HashSet) { - let mut globs = Vec::with_capacity(input_paths.len()); - let mut paths = HashSet::with_capacity(input_paths.len()); + ) -> glob::PathOrPatternSet { + let mut result = Vec::with_capacity(input_paths.len()); for path in input_paths { - if let Ok(Some(glob)) = - glob::GlobPattern::new_if_pattern(&path.to_string_lossy()) - { - globs.push(glob); - } else { - paths.insert(path); + if let Ok(path_or_pattern) = glob::PathOrPattern::new(path) { + result.push(path_or_pattern); } } - (glob::GlobSet::new(globs), paths) + glob::PathOrPatternSet::new(result) } fn is_allowed_root_dir(dir_path: &Path) -> bool { @@ -1904,36 +1902,34 @@ impl PreloadDocumentFinder { true } - let (disabled_globs, disabled_paths) = - paths_into_globs_and_paths(options.disabled_paths); let mut finder = PreloadDocumentFinder { limit: options.limit, entry_count: 0, pending_entries: Default::default(), - disabled_globs, - disabled_paths, + root_dir_entries: Default::default(), + visited_paths: Default::default(), }; + let file_patterns = FilePatterns { + include: Some(paths_into_globs_and_paths(options.enabled_paths)), + exclude: paths_into_globs_and_paths(options.disabled_paths), + }; + let file_patterns_by_base = file_patterns.split_by_base(); + // initialize the finder with the initial paths - let mut dirs = Vec::with_capacity(options.enabled_paths.len()); - for path in options.enabled_paths { - if !finder.disabled_paths.contains(&path) - && !finder.disabled_globs.matches_path(&path) - { - if path.is_dir() { - if is_allowed_root_dir(&path) { - dirs.push(path); - } - } else { + for (path, file_patterns) in file_patterns_by_base { + if path.is_dir() { + if is_allowed_root_dir(&path) { finder - .pending_entries - .push_back(PendingEntry::SpecifiedRootFile(path)); + .root_dir_entries + .push(PendingEntry::Dir(path, Rc::new(file_patterns))); } + } else { + finder + .pending_entries + .push_back(PendingEntry::SpecifiedRootFile(path)); } } - for dir in sort_and_remove_non_leaf_dirs(dirs) { - finder.pending_entries.push_back(PendingEntry::Dir(dir)); - } finder } @@ -2015,48 +2011,60 @@ impl Iterator for PreloadDocumentFinder { } } - while let Some(entry) = self.pending_entries.pop_front() { - match entry { - PendingEntry::SpecifiedRootFile(file) => { - // since it was a file that was specified as a root url, only - // verify that it's valid - if let Some(specifier) = Self::get_valid_specifier(&file) { - return Some(specifier); - } - } - PendingEntry::Dir(dir_path) => { - if let Ok(read_dir) = fs::read_dir(&dir_path) { - self - .pending_entries - .push_back(PendingEntry::ReadDir(Box::new(read_dir))); - } - } - PendingEntry::ReadDir(mut entries) => { - while let Some(entry) = entries.next() { - self.entry_count += 1; - - if self.hit_limit() { - self.pending_entries.clear(); // stop searching - return None; + // This first drains all the pending entries then adds the root dir entries + // one at a time to the pending entries before draining them. This is because + // we're traversing based on directory depth, so we want to search deeper + // directories first + while !self.pending_entries.is_empty() || !self.root_dir_entries.is_empty() + { + while let Some(entry) = self.pending_entries.pop_front() { + match entry { + PendingEntry::SpecifiedRootFile(file) => { + // since it was a file that was specified as a root url, only + // verify that it's valid + if let Some(specifier) = Self::get_valid_specifier(&file) { + return Some(specifier); } + } + PendingEntry::Dir(dir_path, file_patterns) => { + if self.visited_paths.insert(dir_path.clone()) { + if let Ok(read_dir) = fs::read_dir(&dir_path) { + self.pending_entries.push_back(PendingEntry::ReadDir( + Box::new(read_dir), + file_patterns, + )); + } + } + } + PendingEntry::ReadDir(mut entries, file_patterns) => { + while let Some(entry) = entries.next() { + self.entry_count += 1; - if let Ok(entry) = entry { - let path = entry.path(); - if let Ok(file_type) = entry.file_type() { - if !self.disabled_paths.contains(&path) - && !self.disabled_globs.matches_path(&path) - { - if file_type.is_dir() && is_discoverable_dir(&path) { - self - .pending_entries - .push_back(PendingEntry::Dir(path.to_path_buf())); - } else if file_type.is_file() && is_discoverable_file(&path) { - if let Some(specifier) = Self::get_valid_specifier(&path) { - // restore the next entries for next time - self - .pending_entries - .push_front(PendingEntry::ReadDir(entries)); - return Some(specifier); + if self.hit_limit() { + self.pending_entries.clear(); // stop searching + return None; + } + + if let Ok(entry) = entry { + let path = entry.path(); + if let Ok(file_type) = entry.file_type() { + if file_patterns.matches_path(&path) { + if file_type.is_dir() && is_discoverable_dir(&path) { + self.pending_entries.push_back(PendingEntry::Dir( + path.to_path_buf(), + file_patterns.clone(), + )); + } else if file_type.is_file() && is_discoverable_file(&path) + { + if let Some(specifier) = Self::get_valid_specifier(&path) + { + // restore the next entries for next time + self.pending_entries.push_front(PendingEntry::ReadDir( + entries, + file_patterns.clone(), + )); + return Some(specifier); + } } } } @@ -2065,31 +2073,16 @@ impl Iterator for PreloadDocumentFinder { } } } + + if let Some(entry) = self.root_dir_entries.pop() { + self.pending_entries.push_back(entry); + } } None } } -/// Removes any directories that are a descendant of another directory in the collection. -fn sort_and_remove_non_leaf_dirs(mut dirs: Vec) -> Vec { - if dirs.is_empty() { - return dirs; - } - - dirs.sort(); - if !dirs.is_empty() { - for i in (0..dirs.len() - 1).rev() { - let prev = &dirs[i + 1]; - if prev.starts_with(&dirs[i]) { - dirs.remove(i + 1); - } - } - } - - dirs -} - #[cfg(test)] mod tests { use crate::cache::GlobalHttpCache; @@ -2435,29 +2428,4 @@ console.log(b, "hello deno"); assert_eq!(paths, vec![]); } } - - #[test] - fn test_sort_and_remove_non_leaf_dirs() { - fn run_test(paths: Vec<&str>, expected_output: Vec<&str>) { - let paths = sort_and_remove_non_leaf_dirs( - paths.into_iter().map(PathBuf::from).collect(), - ); - let dirs: Vec<_> = - paths.iter().map(|dir| dir.to_string_lossy()).collect(); - assert_eq!(dirs, expected_output); - } - - run_test( - vec![ - "/test/asdf/test/asdf/", - "/test/asdf/test/asdf/test.ts", - "/test/asdf/", - "/test/asdf/", - "/testing/456/893/", - "/testing/456/893/test/", - ], - vec!["/test/asdf/", "/testing/456/893/"], - ); - run_test(vec![], vec![]); - } } diff --git a/cli/lsp/language_server.rs b/cli/lsp/language_server.rs index 754ccd680d..1271d8fd97 100644 --- a/cli/lsp/language_server.rs +++ b/cli/lsp/language_server.rs @@ -1046,10 +1046,12 @@ impl Inner { self.fmt_options = Default::default(); self.lint_options = Default::default(); if let Some(config_file) = self.get_config_file()? { + // this doesn't need to be an actual directory because flags is specified as `None` + let dummy_args_cwd = PathBuf::from("/"); let lint_options = config_file .to_lint_config() .and_then(|maybe_lint_config| { - LintOptions::resolve(maybe_lint_config, None) + LintOptions::resolve(maybe_lint_config, None, &dummy_args_cwd) }) .map_err(|err| { anyhow!("Unable to update lint configuration: {:?}", err) @@ -1057,7 +1059,7 @@ impl Inner { let fmt_options = config_file .to_fmt_config() .and_then(|maybe_fmt_config| { - FmtOptions::resolve(maybe_fmt_config, None) + FmtOptions::resolve(maybe_fmt_config, None, &dummy_args_cwd) }) .map_err(|err| { anyhow!("Unable to update formatter configuration: {:?}", err) diff --git a/cli/module_loader.rs b/cli/module_loader.rs index f795761086..9a2c511ff3 100644 --- a/cli/module_loader.rs +++ b/cli/module_loader.rs @@ -224,7 +224,7 @@ impl ModuleLoadPreparer { ) -> Result<(), AnyError> { let lib = self.options.ts_type_lib_window(); - let specifiers = self.collect_specifiers(files); + let specifiers = self.collect_specifiers(files)?; self .prepare_module_load( specifiers, @@ -235,28 +235,30 @@ impl ModuleLoadPreparer { .await } - fn collect_specifiers(&self, files: &[String]) -> Vec { - let excludes = match self.options.resolve_check_options() { - Ok(o) => o.exclude, - Err(_) => vec![], - }; - files - .iter() - .filter_map(|file| { - let file_url = - resolve_url_or_path(file, self.options.initial_cwd()).ok()?; - if file_url.scheme() != "file" { - return Some(file_url); - } - // ignore local files that match any of files listed in `exclude` option - let file_path = file_url.to_file_path().ok()?; - if excludes.iter().any(|e| file_path.starts_with(e)) { - None - } else { - Some(file_url) - } - }) - .collect::>() + fn collect_specifiers( + &self, + files: &[String], + ) -> Result, AnyError> { + let excludes = self.options.resolve_config_excludes()?; + Ok( + files + .iter() + .filter_map(|file| { + let file_url = + resolve_url_or_path(file, self.options.initial_cwd()).ok()?; + if file_url.scheme() != "file" { + return Some(file_url); + } + // ignore local files that match any of files listed in `exclude` option + let file_path = file_url.to_file_path().ok()?; + if excludes.matches_path(&file_path) { + None + } else { + Some(file_url) + } + }) + .collect::>(), + ) } } diff --git a/cli/tests/testdata/doc/invalid_url.out b/cli/tests/testdata/doc/invalid_url.out index 8be787e90e..038c53177a 100644 --- a/cli/tests/testdata/doc/invalid_url.out +++ b/cli/tests/testdata/doc/invalid_url.out @@ -1,4 +1,4 @@ -error: invalid URL: invalid domain character +error: Invalid URL 'https://raw.githubusercontent.com%2Fdyedgreen%2Fdeno-sqlite%2Frework_api%2Fmod.ts' Caused by: invalid domain character diff --git a/cli/tools/bench/mod.rs b/cli/tools/bench/mod.rs index 4cfd902780..1eb703813b 100644 --- a/cli/tools/bench/mod.rs +++ b/cli/tools/bench/mod.rs @@ -15,6 +15,8 @@ use crate::tools::test::format_test_error; use crate::tools::test::TestFilter; use crate::util::file_watcher; use crate::util::fs::collect_specifiers; +use crate::util::glob::FilePatterns; +use crate::util::glob::PathOrPattern; use crate::util::path::is_script_ext; use crate::version::get_user_agent; use crate::worker::CliMainWorkerFactory; @@ -393,13 +395,33 @@ async fn bench_specifiers( } /// Checks if the path has a basename and extension Deno supports for benches. -fn is_supported_bench_path(path: &Path) -> bool { +fn is_supported_bench_path(path: &Path, patterns: &FilePatterns) -> bool { + if !is_script_ext(path) { + false + } else if has_supported_bench_path_name(path) { + true + } else { + // allow someone to explicitly specify a path + let matches_exact_path_or_pattern = patterns + .include + .as_ref() + .map(|p| { + p.inner().iter().any(|p| match p { + PathOrPattern::Path(p) => p == path, + PathOrPattern::Pattern(p) => p.matches_path(path), + }) + }) + .unwrap_or(false); + matches_exact_path_or_pattern + } +} + +fn has_supported_bench_path_name(path: &Path) -> bool { if let Some(name) = path.file_stem() { let basename = name.to_string_lossy(); - (basename.ends_with("_bench") + basename.ends_with("_bench") || basename.ends_with(".bench") - || basename == "bench") - && is_script_ext(path) + || basename == "bench" } else { false } @@ -420,7 +442,7 @@ pub async fn run_benchmarks( Permissions::from_options(&cli_options.permissions_options())?; let specifiers = - collect_specifiers(&bench_options.files, is_supported_bench_path)?; + collect_specifiers(bench_options.files, is_supported_bench_path)?; if specifiers.is_empty() { return Err(generic_error("No bench modules found")); @@ -480,16 +502,21 @@ pub async fn run_benchmarks_with_watch( let bench_options = cli_options.resolve_bench_options(bench_flags)?; let _ = watcher_communicator.watch_paths(cli_options.watch_paths()); - if let Some(include) = &bench_options.files.include { - let _ = watcher_communicator.watch_paths(include.clone()); + if let Some(set) = &bench_options.files.include { + let watch_paths = set.base_paths(); + if !watch_paths.is_empty() { + let _ = watcher_communicator.watch_paths(watch_paths); + } } let graph_kind = cli_options.type_check_mode().as_graph_kind(); let module_graph_builder = factory.module_graph_builder().await?; let module_load_preparer = factory.module_load_preparer().await?; - let bench_modules = - collect_specifiers(&bench_options.files, is_supported_bench_path)?; + let bench_modules = collect_specifiers( + bench_options.files.clone(), + is_supported_bench_path, + )?; // Various bench files should not share the same permissions in terms of // `PermissionsContainer` - otherwise granting/revoking permissions in one @@ -509,16 +536,13 @@ pub async fn run_benchmarks_with_watch( let bench_modules_to_reload = if let Some(changed_paths) = changed_paths { - let changed_specifiers = changed_paths - .into_iter() - .filter_map(|p| ModuleSpecifier::from_file_path(p).ok()) - .collect::>(); + let changed_paths = changed_paths.into_iter().collect::>(); let mut result = Vec::new(); for bench_module_specifier in bench_modules { if has_graph_root_local_dependent_changed( &graph, &bench_module_specifier, - &changed_specifiers, + &changed_paths, ) { result.push(bench_module_specifier.clone()); } @@ -531,8 +555,10 @@ pub async fn run_benchmarks_with_watch( let worker_factory = Arc::new(factory.create_cli_main_worker_factory().await?); + // todo(THIS PR): why are we collecting specifiers twice in a row? + // Seems like a perf bug. let specifiers = - collect_specifiers(&bench_options.files, is_supported_bench_path)? + collect_specifiers(bench_options.files, is_supported_bench_path)? .into_iter() .filter(|specifier| bench_modules_to_reload.contains(specifier)) .collect::>(); diff --git a/cli/tools/coverage/mod.rs b/cli/tools/coverage/mod.rs index 9f5c142e7d..49bb5d5dee 100644 --- a/cli/tools/coverage/mod.rs +++ b/cli/tools/coverage/mod.rs @@ -9,6 +9,8 @@ use crate::npm::CliNpmResolver; use crate::tools::fmt::format_json; use crate::tools::test::is_supported_test_path; use crate::util::fs::FileCollector; +use crate::util::glob::FilePatterns; +use crate::util::glob::PathOrPatternSet; use crate::util::text_encoding::source_map_from_code; use deno_ast::MediaType; @@ -371,9 +373,23 @@ fn range_to_src_line_index( fn collect_coverages( files: FileFlags, + initial_cwd: &Path, ) -> Result, AnyError> { + let files = files.with_absolute_paths(initial_cwd); let mut coverages: Vec = Vec::new(); - let file_paths = FileCollector::new(|file_path| { + let file_patterns = FilePatterns { + include: Some({ + let files = if files.include.is_empty() { + vec![initial_cwd.to_path_buf()] + } else { + files.include + }; + PathOrPatternSet::from_absolute_paths(files)? + }), + exclude: PathOrPatternSet::from_absolute_paths(files.ignore) + .context("Invalid ignore pattern.")?, + }; + let file_paths = FileCollector::new(|file_path, _| { file_path .extension() .map(|ext| ext == "json") @@ -382,16 +398,13 @@ fn collect_coverages( .ignore_git_folder() .ignore_node_modules() .ignore_vendor_folder() - .add_ignore_paths(&files.ignore) - .collect_files(if files.include.is_empty() { - None - } else { - Some(&files.include) - })?; + .collect_file_patterns(file_patterns)?; for file_path in file_paths { - let json = fs::read_to_string(file_path.as_path())?; - let new_coverage: cdp::ScriptCoverage = serde_json::from_str(&json)?; + let new_coverage = fs::read_to_string(file_path.as_path()) + .map_err(AnyError::from) + .and_then(|json| serde_json::from_str(&json).map_err(AnyError::from)) + .with_context(|| format!("Failed reading '{}'", file_path.display()))?; coverages.push(new_coverage); } @@ -451,7 +464,8 @@ pub async fn cover_files( // Use the first include path as the default output path. let coverage_root = coverage_flags.files.include[0].clone(); - let script_coverages = collect_coverages(coverage_flags.files)?; + let script_coverages = + collect_coverages(coverage_flags.files, cli_options.initial_cwd())?; if script_coverages.is_empty() { return Err(generic_error("No coverage files found")); } diff --git a/cli/tools/doc.rs b/cli/tools/doc.rs index b0eecd044b..b8d6b8a87a 100644 --- a/cli/tools/doc.rs +++ b/cli/tools/doc.rs @@ -12,12 +12,13 @@ use crate::factory::CliFactory; use crate::graph_util::graph_lock_or_exit; use crate::graph_util::CreateGraphOptions; use crate::tsc::get_types_declaration_file_text; -use crate::util::glob::expand_globs; +use crate::util::fs::collect_specifiers; +use crate::util::glob::FilePatterns; +use crate::util::glob::PathOrPatternSet; use deno_core::anyhow::bail; use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_core::futures::FutureExt; -use deno_core::resolve_url_or_path; use deno_doc as doc; use deno_graph::CapturingModuleParser; use deno_graph::DefaultParsedSourceStore; @@ -100,19 +101,28 @@ pub async fn doc(flags: Flags, doc_flags: DocFlags) -> Result<(), AnyError> { let module_graph_builder = factory.module_graph_builder().await?; let maybe_lockfile = factory.maybe_lockfile(); - let expanded_globs = - expand_globs(source_files.iter().map(PathBuf::from).collect())?; - let module_specifiers: Result, AnyError> = - expanded_globs - .iter() - .map(|source_file| { - Ok(resolve_url_or_path( - &source_file.to_string_lossy(), - cli_options.initial_cwd(), - )?) - }) - .collect(); - let module_specifiers = module_specifiers?; + let module_specifiers = collect_specifiers( + FilePatterns { + include: Some(PathOrPatternSet::from_absolute_paths( + source_files + .iter() + .map(|p| { + if p.starts_with("https:") + || p.starts_with("http:") + || p.starts_with("file:") + { + // todo(dsherret): don't store URLs in PathBufs + PathBuf::from(p) + } else { + cli_options.initial_cwd().join(p) + } + }) + .collect(), + )?), + exclude: Default::default(), + }, + |_, _| true, + )?; let mut loader = module_graph_builder.create_graph_loader(); let graph = module_graph_builder .create_graph_with_options(CreateGraphOptions { diff --git a/cli/tools/fmt.rs b/cli/tools/fmt.rs index 111632d4ae..c35c72844f 100644 --- a/cli/tools/fmt.rs +++ b/cli/tools/fmt.rs @@ -8,7 +8,6 @@ //! the same functions as ops available in JS runtime. use crate::args::CliOptions; -use crate::args::FilesConfig; use crate::args::Flags; use crate::args::FmtFlags; use crate::args::FmtOptions; @@ -18,7 +17,9 @@ use crate::colors; use crate::factory::CliFactory; use crate::util::diff::diff; use crate::util::file_watcher; +use crate::util::fs::canonicalize_path; use crate::util::fs::FileCollector; +use crate::util::glob::FilePatterns; use crate::util::path::get_extension; use crate::util::text_encoding; use deno_ast::ParsedSource; @@ -72,7 +73,7 @@ pub async fn format(flags: Flags, fmt_flags: FmtFlags) -> Result<(), AnyError> { let cli_options = factory.cli_options(); let fmt_options = cli_options.resolve_fmt_options(fmt_flags)?; let files = - collect_fmt_files(&fmt_options.files).and_then(|files| { + collect_fmt_files(fmt_options.files.clone()).and_then(|files| { if files.is_empty() { Err(generic_error("No target files found.")) } else { @@ -85,13 +86,21 @@ pub async fn format(flags: Flags, fmt_flags: FmtFlags) -> Result<(), AnyError> { // check all files on any changed (https://github.com/denoland/deno/issues/12446) files .iter() - .any(|path| paths.contains(path)) + .any(|path| { + canonicalize_path(path) + .map(|path| paths.contains(&path)) + .unwrap_or(false) + }) .then_some(files) .unwrap_or_else(|| [].to_vec()) } else { files .into_iter() - .filter(|path| paths.contains(path)) + .filter(|path| { + canonicalize_path(path) + .map(|path| paths.contains(&path)) + .unwrap_or(false) + }) .collect::>() } } else { @@ -108,13 +117,14 @@ pub async fn format(flags: Flags, fmt_flags: FmtFlags) -> Result<(), AnyError> { let factory = CliFactory::from_flags(flags).await?; let cli_options = factory.cli_options(); let fmt_options = cli_options.resolve_fmt_options(fmt_flags)?; - let files = collect_fmt_files(&fmt_options.files).and_then(|files| { - if files.is_empty() { - Err(generic_error("No target files found.")) - } else { - Ok(files) - } - })?; + let files = + collect_fmt_files(fmt_options.files.clone()).and_then(|files| { + if files.is_empty() { + Err(generic_error("No target files found.")) + } else { + Ok(files) + } + })?; format_files(factory, fmt_options, files).await?; } @@ -144,13 +154,12 @@ async fn format_files( Ok(()) } -fn collect_fmt_files(files: &FilesConfig) -> Result, AnyError> { - FileCollector::new(is_supported_ext_fmt) +fn collect_fmt_files(files: FilePatterns) -> Result, AnyError> { + FileCollector::new(|path, _| is_supported_ext_fmt(path)) .ignore_git_folder() .ignore_node_modules() .ignore_vendor_folder() - .add_ignore_paths(&files.exclude) - .collect_files(files.include.as_deref()) + .collect_file_patterns(files) } /// Formats markdown (using ) and its code blocks diff --git a/cli/tools/lint.rs b/cli/tools/lint.rs index 7981fec090..a91f14ad89 100644 --- a/cli/tools/lint.rs +++ b/cli/tools/lint.rs @@ -2,7 +2,6 @@ //! This module provides file linting utilities using //! [`deno_lint`](https://github.com/denoland/deno_lint). -use crate::args::FilesConfig; use crate::args::Flags; use crate::args::LintFlags; use crate::args::LintOptions; @@ -12,7 +11,9 @@ use crate::colors; use crate::factory::CliFactory; use crate::tools::fmt::run_parallelized; use crate::util::file_watcher; +use crate::util::fs::canonicalize_path; use crate::util::fs::FileCollector; +use crate::util::glob::FilePatterns; use crate::util::path::is_script_ext; use crate::util::sync::AtomicFlag; use deno_ast::MediaType; @@ -66,21 +67,26 @@ pub async fn lint(flags: Flags, lint_flags: LintFlags) -> Result<(), AnyError> { let factory = CliFactory::from_flags(flags).await?; let cli_options = factory.cli_options(); let lint_options = cli_options.resolve_lint_options(lint_flags)?; - let files = - collect_lint_files(&lint_options.files).and_then(|files| { + let files = collect_lint_files(lint_options.files.clone()).and_then( + |files| { if files.is_empty() { Err(generic_error("No target files found.")) } else { Ok(files) } - })?; + }, + )?; _ = watcher_communicator.watch_paths(files.clone()); let lint_paths = if let Some(paths) = changed_paths { // lint all files on any changed (https://github.com/denoland/deno/issues/12446) files .iter() - .any(|path| paths.contains(path)) + .any(|path| { + canonicalize_path(path) + .map(|p| paths.contains(&p)) + .unwrap_or(false) + }) .then_some(files) .unwrap_or_else(|| [].to_vec()) } else { @@ -109,13 +115,14 @@ pub async fn lint(flags: Flags, lint_flags: LintFlags) -> Result<(), AnyError> { reporter_lock.lock().unwrap().close(1); success } else { - let target_files = collect_lint_files(files).and_then(|files| { - if files.is_empty() { - Err(generic_error("No target files found.")) - } else { - Ok(files) - } - })?; + let target_files = + collect_lint_files(files.clone()).and_then(|files| { + if files.is_empty() { + Err(generic_error("No target files found.")) + } else { + Ok(files) + } + })?; debug!("Found {} files", target_files.len()); lint_files(factory, lint_options, target_files).await? }; @@ -191,13 +198,12 @@ async fn lint_files( Ok(!has_error.is_raised()) } -fn collect_lint_files(files: &FilesConfig) -> Result, AnyError> { - FileCollector::new(is_script_ext) +fn collect_lint_files(files: FilePatterns) -> Result, AnyError> { + FileCollector::new(|path, _| is_script_ext(path)) .ignore_git_folder() .ignore_node_modules() .ignore_vendor_folder() - .add_ignore_paths(&files.exclude) - .collect_files(files.include.as_deref()) + .collect_file_patterns(files) } pub fn print_rules_list(json: bool, maybe_rules_tags: Option>) { diff --git a/cli/tools/test/mod.rs b/cli/tools/test/mod.rs index 2a5e87b2a1..2c226db4d7 100644 --- a/cli/tools/test/mod.rs +++ b/cli/tools/test/mod.rs @@ -1,7 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. use crate::args::CliOptions; -use crate::args::FilesConfig; use crate::args::Flags; use crate::args::TestFlags; use crate::args::TestReporterConfig; @@ -17,6 +16,8 @@ use crate::module_loader::ModuleLoadPreparer; use crate::ops; use crate::util::file_watcher; use crate::util::fs::collect_specifiers; +use crate::util::glob::FilePatterns; +use crate::util::glob::PathOrPattern; use crate::util::path::get_extension; use crate::util::path::is_script_ext; use crate::util::path::mapped_specifier_for_tsc; @@ -1048,14 +1049,41 @@ pub async fn report_tests( (Ok(()), receiver) } +fn is_supported_test_path_predicate( + path: &Path, + patterns: &FilePatterns, +) -> bool { + if !is_script_ext(path) { + false + } else if has_supported_test_path_name(path) { + true + } else { + // allow someone to explicitly specify a path + let matches_exact_path_or_pattern = patterns + .include + .as_ref() + .map(|p| { + p.inner().iter().any(|p| match p { + PathOrPattern::Path(p) => p == path, + PathOrPattern::Pattern(p) => p.matches_path(path), + }) + }) + .unwrap_or(false); + matches_exact_path_or_pattern + } +} + /// Checks if the path has a basename and extension Deno supports for tests. pub(crate) fn is_supported_test_path(path: &Path) -> bool { + has_supported_test_path_name(path) && is_script_ext(path) +} + +fn has_supported_test_path_name(path: &Path) -> bool { if let Some(name) = path.file_stem() { let basename = name.to_string_lossy(); - (basename.ends_with("_test") + basename.ends_with("_test") || basename.ends_with(".test") - || basename == "test") - && is_script_ext(path) + || basename == "test" } else { false } @@ -1094,13 +1122,15 @@ fn is_supported_test_ext(path: &Path) -> bool { /// - Specifiers matching the `is_supported_test_path` are marked as `TestMode::Executable`. /// - Specifiers matching both predicates are marked as `TestMode::Both` fn collect_specifiers_with_test_mode( - files: &FilesConfig, + files: FilePatterns, include_inline: &bool, ) -> Result, AnyError> { - let module_specifiers = collect_specifiers(files, is_supported_test_path)?; + // todo(dsherret): there's no need to collect twice as it's slow + let module_specifiers = + collect_specifiers(files.clone(), is_supported_test_path_predicate)?; if *include_inline { - return collect_specifiers(files, is_supported_test_ext).map( + return collect_specifiers(files, |p, _| is_supported_test_ext(p)).map( |specifiers| { specifiers .into_iter() @@ -1136,7 +1166,7 @@ fn collect_specifiers_with_test_mode( /// as well. async fn fetch_specifiers_with_test_mode( file_fetcher: &FileFetcher, - files: &FilesConfig, + files: FilePatterns, doc: &bool, ) -> Result, AnyError> { let mut specifiers_with_mode = collect_specifiers_with_test_mode(files, doc)?; @@ -1174,7 +1204,7 @@ pub async fn run_tests( let specifiers_with_mode = fetch_specifiers_with_test_mode( file_fetcher, - &test_options.files, + test_options.files.clone(), &test_options.doc, ) .await?; @@ -1264,8 +1294,11 @@ pub async fn run_tests_with_watch( let test_options = cli_options.resolve_test_options(test_flags)?; let _ = watcher_communicator.watch_paths(cli_options.watch_paths()); - if let Some(include) = &test_options.files.include { - let _ = watcher_communicator.watch_paths(include.clone()); + if let Some(set) = &test_options.files.include { + let watch_paths = set.base_paths(); + if !watch_paths.is_empty() { + let _ = watcher_communicator.watch_paths(watch_paths); + } } let graph_kind = cli_options.type_check_mode().as_graph_kind(); @@ -1274,13 +1307,18 @@ pub async fn run_tests_with_watch( let module_graph_builder = factory.module_graph_builder().await?; let file_fetcher = factory.file_fetcher()?; let test_modules = if test_options.doc { - collect_specifiers(&test_options.files, is_supported_test_ext) + collect_specifiers(test_options.files.clone(), |p, _| { + is_supported_test_ext(p) + }) } else { - collect_specifiers(&test_options.files, is_supported_test_path) + collect_specifiers( + test_options.files.clone(), + is_supported_test_path_predicate, + ) }?; + let permissions = Permissions::from_options(&cli_options.permissions_options())?; - let graph = module_graph_builder .create_graph(graph_kind, test_modules.clone()) .await?; @@ -1293,16 +1331,13 @@ pub async fn run_tests_with_watch( let test_modules_to_reload = if let Some(changed_paths) = changed_paths { - let changed_specifiers = changed_paths - .into_iter() - .filter_map(|p| ModuleSpecifier::from_file_path(p).ok()) - .collect::>(); let mut result = Vec::new(); + let changed_paths = changed_paths.into_iter().collect::>(); for test_module_specifier in test_modules { if has_graph_root_local_dependent_changed( &graph, &test_module_specifier, - &changed_specifiers, + &changed_paths, ) { result.push(test_module_specifier.clone()); } @@ -1317,7 +1352,7 @@ pub async fn run_tests_with_watch( let module_load_preparer = factory.module_load_preparer().await?; let specifiers_with_mode = fetch_specifiers_with_test_mode( file_fetcher, - &test_options.files, + test_options.files.clone(), &test_options.doc, ) .await? diff --git a/cli/util/fs.rs b/cli/util/fs.rs index f9fe9424f8..86b17754bf 100644 --- a/cli/util/fs.rs +++ b/cli/util/fs.rs @@ -1,5 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use deno_core::anyhow::anyhow; use deno_core::anyhow::Context; use deno_core::error::AnyError; pub use deno_core::normalize_path; @@ -8,7 +9,7 @@ use deno_core::ModuleSpecifier; use deno_runtime::deno_crypto::rand; use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::PathClean; -use std::borrow::Cow; +use std::collections::HashSet; use std::env::current_dir; use std::fmt::Write as FmtWrite; use std::fs::OpenOptions; @@ -21,11 +22,13 @@ use std::sync::Arc; use std::time::Duration; use walkdir::WalkDir; -use crate::args::FilesConfig; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; use crate::util::progress_bar::ProgressMessagePrompt; +use super::glob::FilePatterns; +use super::glob::PathOrPattern; +use super::glob::PathOrPatternSet; use super::path::specifier_to_file_path; /// Writes the file to the file system at a temporary path, then @@ -244,18 +247,16 @@ pub fn resolve_from_cwd(path: &Path) -> Result { /// Collects file paths that satisfy the given predicate, by recursively walking `files`. /// If the walker visits a path that is listed in `ignore`, it skips descending into the directory. -pub struct FileCollector bool> { - canonicalized_ignore: Vec, +pub struct FileCollector bool> { file_filter: TFilter, ignore_git_folder: bool, ignore_node_modules: bool, ignore_vendor_folder: bool, } -impl bool> FileCollector { +impl bool> FileCollector { pub fn new(file_filter: TFilter) -> Self { Self { - canonicalized_ignore: Default::default(), file_filter, ignore_git_folder: false, ignore_node_modules: false, @@ -263,14 +264,6 @@ impl bool> FileCollector { } } - pub fn add_ignore_paths(mut self, paths: &[PathBuf]) -> Self { - // retain only the paths which exist and ignore the rest - self - .canonicalized_ignore - .extend(paths.iter().filter_map(|i| canonicalize_path(i).ok())); - self - } - pub fn ignore_node_modules(mut self) -> Self { self.ignore_node_modules = true; self @@ -286,58 +279,62 @@ impl bool> FileCollector { self } - pub fn collect_files( + pub fn collect_file_patterns( &self, - files: Option<&[PathBuf]>, + file_patterns: FilePatterns, ) -> Result, AnyError> { let mut target_files = Vec::new(); - let files = if let Some(files) = files { - Cow::Borrowed(files) - } else { - Cow::Owned(vec![PathBuf::from(".")]) - }; - for file in files.iter() { - if let Ok(file) = canonicalize_path(file) { - // use an iterator like this in order to minimize the number of file system operations - let mut iterator = WalkDir::new(&file).into_iter(); - loop { - let e = match iterator.next() { - None => break, - Some(Err(_)) => continue, - Some(Ok(entry)) => entry, - }; - let file_type = e.file_type(); - let is_dir = file_type.is_dir(); - if let Ok(c) = canonicalize_path(e.path()) { - if self.canonicalized_ignore.iter().any(|i| c.starts_with(i)) { - if is_dir { - iterator.skip_current_dir(); - } - } else if is_dir { - let should_ignore_dir = c - .file_name() - .map(|dir_name| { - let dir_name = dir_name.to_string_lossy().to_lowercase(); - let is_ignored_file = match dir_name.as_str() { - "node_modules" => self.ignore_node_modules, - "vendor" => self.ignore_vendor_folder, - ".git" => self.ignore_git_folder, - _ => false, - }; - // allow the user to opt out of ignoring by explicitly specifying the dir - file != c && is_ignored_file - }) - .unwrap_or(false); - if should_ignore_dir { - iterator.skip_current_dir(); - } - } else if (self.file_filter)(e.path()) { - target_files.push(c); - } - } else if is_dir { - // failed canonicalizing, so skip it + let mut visited_paths = HashSet::new(); + let file_patterns_by_base = file_patterns.split_by_base(); + for (base, file_patterns) in file_patterns_by_base { + let file = normalize_path(base); + // use an iterator in order to minimize the number of file system operations + let mut iterator = WalkDir::new(&file) + .follow_links(false) // the default, but be explicit + .into_iter(); + loop { + let e = match iterator.next() { + None => break, + Some(Err(_)) => continue, + Some(Ok(entry)) => entry, + }; + let file_type = e.file_type(); + let is_dir = file_type.is_dir(); + let c = e.path().to_path_buf(); + if file_patterns.exclude.matches_path(&c) + || !is_dir + && !file_patterns + .include + .as_ref() + .map(|i| i.matches_path(&c)) + .unwrap_or(true) + { + if is_dir { iterator.skip_current_dir(); } + } else if is_dir { + let should_ignore_dir = c + .file_name() + .map(|dir_name| { + let dir_name = dir_name.to_string_lossy().to_lowercase(); + let is_ignored_file = match dir_name.as_str() { + "node_modules" => self.ignore_node_modules, + "vendor" => self.ignore_vendor_folder, + ".git" => self.ignore_git_folder, + _ => false, + }; + // allow the user to opt out of ignoring by explicitly specifying the dir + file != c && is_ignored_file + }) + .unwrap_or(false) + || !visited_paths.insert(c.clone()); + if should_ignore_dir { + iterator.skip_current_dir(); + } + } else if (self.file_filter)(&c, &file_patterns) + && visited_paths.insert(c.clone()) + { + target_files.push(c); } } } @@ -349,53 +346,67 @@ impl bool> FileCollector { /// Specifiers that start with http and https are left intact. /// Note: This ignores all .git and node_modules folders. pub fn collect_specifiers( - files: &FilesConfig, - predicate: impl Fn(&Path) -> bool, + mut files: FilePatterns, + predicate: impl Fn(&Path, &FilePatterns) -> bool, ) -> Result, AnyError> { let mut prepared = vec![]; - let file_collector = FileCollector::new(predicate) - .add_ignore_paths(&files.exclude) + + // break out the remote specifiers + if let Some(include_mut) = &mut files.include { + let includes = std::mem::take(include_mut); + let path_or_patterns = includes.into_path_or_patterns(); + let mut result = Vec::with_capacity(path_or_patterns.len()); + for path_or_pattern in path_or_patterns { + match path_or_pattern { + PathOrPattern::Path(path) => { + // todo(dsherret): we should improve this to not store URLs in a PathBuf + let path_str = path.to_string_lossy(); + let lowercase_path = path_str.to_lowercase(); + if lowercase_path.starts_with("http://") + || lowercase_path.starts_with("https://") + { + // take out the url + let url = ModuleSpecifier::parse(&path_str) + .with_context(|| format!("Invalid URL '{}'", path_str))?; + prepared.push(url); + } else if lowercase_path.starts_with("file://") { + let url = ModuleSpecifier::parse(&path_str) + .with_context(|| format!("Invalid URL '{}'", path_str))?; + let p = specifier_to_file_path(&url)?; + if p.is_dir() { + result.push(PathOrPattern::Path(p)); + } else { + prepared.push(url) + } + } else if path.is_dir() { + result.push(PathOrPattern::Path(path)); + } else if !files.exclude.matches_path(&path) { + let url = ModuleSpecifier::from_file_path(&path) + .map_err(|_| anyhow!("Invalid file path '{}'", path.display()))?; + prepared.push(url); + } + } + PathOrPattern::Pattern(pattern) => { + // add it back + result.push(PathOrPattern::Pattern(pattern)); + } + } + } + *include_mut = PathOrPatternSet::new(result); + } + + let collected_files = FileCollector::new(predicate) .ignore_git_folder() .ignore_node_modules() - .ignore_vendor_folder(); + .ignore_vendor_folder() + .collect_file_patterns(files)?; + let mut collected_files_as_urls = collected_files + .iter() + .map(|f| ModuleSpecifier::from_file_path(f).unwrap()) + .collect::>(); - let root_path = current_dir()?; - let include_files = if let Some(include) = &files.include { - Cow::Borrowed(include) - } else { - Cow::Owned(vec![root_path.clone()]) - }; - for path in include_files.iter() { - let path = path.to_string_lossy(); - let lowercase_path = path.to_lowercase(); - if lowercase_path.starts_with("http://") - || lowercase_path.starts_with("https://") - { - let url = ModuleSpecifier::parse(&path)?; - prepared.push(url); - continue; - } - - let p = if lowercase_path.starts_with("file://") { - specifier_to_file_path(&ModuleSpecifier::parse(&path)?)? - } else { - root_path.join(path.as_ref()) - }; - let p = normalize_path(p); - if p.is_dir() { - let test_files = file_collector.collect_files(Some(&[p]))?; - let mut test_files_as_urls = test_files - .iter() - .map(|f| ModuleSpecifier::from_file_path(f).unwrap()) - .collect::>(); - - test_files_as_urls.sort(); - prepared.extend(test_files_as_urls); - } else { - let url = ModuleSpecifier::from_file_path(p).unwrap(); - prepared.push(url); - } - } + collected_files_as_urls.sort(); + prepared.extend(collected_files_as_urls); Ok(prepared) } @@ -812,18 +823,29 @@ mod tests { let ignore_dir_files = ["g.d.ts", ".gitignore"]; create_files(&ignore_dir_path, &ignore_dir_files); - let file_collector = FileCollector::new(|path| { + let file_patterns = FilePatterns { + include: Some( + PathOrPatternSet::from_absolute_paths( + vec![root_dir_path.to_path_buf()], + ) + .unwrap(), + ), + exclude: PathOrPatternSet::from_absolute_paths(vec![ + ignore_dir_path.to_path_buf() + ]) + .unwrap(), + }; + let file_collector = FileCollector::new(|path, _| { // exclude dotfiles path .file_name() .and_then(|f| f.to_str()) .map(|f| !f.starts_with('.')) .unwrap_or(false) - }) - .add_ignore_paths(&[ignore_dir_path.to_path_buf()]); + }); let result = file_collector - .collect_files(Some(&[root_dir_path.to_path_buf()])) + .collect_file_patterns(file_patterns.clone()) .unwrap(); let expected = [ "README.md", @@ -850,7 +872,7 @@ mod tests { .ignore_node_modules() .ignore_vendor_folder(); let result = file_collector - .collect_files(Some(&[root_dir_path.to_path_buf()])) + .collect_file_patterns(file_patterns.clone()) .unwrap(); let expected = [ "README.md", @@ -869,12 +891,20 @@ mod tests { assert_eq!(file_names, expected); // test opting out of ignoring by specifying the dir - let result = file_collector - .collect_files(Some(&[ - root_dir_path.to_path_buf(), - root_dir_path.to_path_buf().join("child/node_modules/"), - ])) - .unwrap(); + let file_patterns = FilePatterns { + include: Some( + PathOrPatternSet::from_absolute_paths(vec![ + root_dir_path.to_path_buf(), + root_dir_path.to_path_buf().join("child/node_modules/"), + ]) + .unwrap(), + ), + exclude: PathOrPatternSet::from_absolute_paths(vec![ + ignore_dir_path.to_path_buf() + ]) + .unwrap(), + }; + let result = file_collector.collect_file_patterns(file_patterns).unwrap(); let expected = [ "README.md", "a.ts", @@ -930,7 +960,7 @@ mod tests { let ignore_dir_files = ["g.d.ts", ".gitignore"]; create_files(&ignore_dir_path, &ignore_dir_files); - let predicate = |path: &Path| { + let predicate = |path: &Path, _: &FilePatterns| { // exclude dotfiles path .file_name() @@ -940,38 +970,46 @@ mod tests { }; let result = collect_specifiers( - &FilesConfig { - include: Some(vec![ - PathBuf::from("http://localhost:8080"), - root_dir_path.to_path_buf(), - PathBuf::from("https://localhost:8080".to_string()), - ]), - exclude: vec![ignore_dir_path.to_path_buf()], + FilePatterns { + include: Some( + PathOrPatternSet::from_absolute_paths(vec![ + PathBuf::from("http://localhost:8080"), + root_dir_path.to_path_buf(), + PathBuf::from("https://localhost:8080".to_string()), + ]) + .unwrap(), + ), + exclude: PathOrPatternSet::from_absolute_paths(vec![ + ignore_dir_path.to_path_buf() + ]) + .unwrap(), }, predicate, ) .unwrap(); - let root_dir_url = - ModuleSpecifier::from_file_path(root_dir_path.canonicalize()) - .unwrap() - .to_string(); - let expected: Vec = [ - "http://localhost:8080", - &format!("{root_dir_url}/a.ts"), - &format!("{root_dir_url}/b.js"), - &format!("{root_dir_url}/c.tsx"), - &format!("{root_dir_url}/child/README.md"), - &format!("{root_dir_url}/child/e.mjs"), - &format!("{root_dir_url}/child/f.mjsx"), - &format!("{root_dir_url}/d.jsx"), - "https://localhost:8080", - ] - .iter() - .map(|f| ModuleSpecifier::parse(f).unwrap()) - .collect::>(); + let root_dir_url = ModuleSpecifier::from_file_path(&root_dir_path) + .unwrap() + .to_string(); + let expected = vec![ + "http://localhost:8080/".to_string(), + "https://localhost:8080/".to_string(), + format!("{root_dir_url}/a.ts"), + format!("{root_dir_url}/b.js"), + format!("{root_dir_url}/c.tsx"), + format!("{root_dir_url}/child/README.md"), + format!("{root_dir_url}/child/e.mjs"), + format!("{root_dir_url}/child/f.mjsx"), + format!("{root_dir_url}/d.jsx"), + ]; - assert_eq!(result, expected); + assert_eq!( + result + .into_iter() + .map(|s| s.to_string()) + .collect::>(), + expected + ); let scheme = if cfg!(target_os = "windows") { "file:///" @@ -979,28 +1017,34 @@ mod tests { "file://" }; let result = collect_specifiers( - &FilesConfig { - include: Some(vec![PathBuf::from(format!( - "{}{}", - scheme, - root_dir_path.join("child").to_string().replace('\\', "/") - ))]), - exclude: vec![], + FilePatterns { + include: Some( + PathOrPatternSet::from_absolute_paths(vec![PathBuf::from(format!( + "{}{}", + scheme, + root_dir_path.join("child").to_string().replace('\\', "/") + ))]) + .unwrap(), + ), + exclude: Default::default(), }, predicate, ) .unwrap(); - let expected: Vec = [ - &format!("{root_dir_url}/child/README.md"), - &format!("{root_dir_url}/child/e.mjs"), - &format!("{root_dir_url}/child/f.mjsx"), - ] - .iter() - .map(|f| ModuleSpecifier::parse(f).unwrap()) - .collect::>(); + let expected = vec![ + format!("{root_dir_url}/child/README.md"), + format!("{root_dir_url}/child/e.mjs"), + format!("{root_dir_url}/child/f.mjsx"), + ]; - assert_eq!(result, expected); + assert_eq!( + result + .into_iter() + .map(|s| s.to_string()) + .collect::>(), + expected + ); } #[tokio::test] diff --git a/cli/util/glob.rs b/cli/util/glob.rs index f0cabc2ece..7bd600167a 100644 --- a/cli/util/glob.rs +++ b/cli/util/glob.rs @@ -5,30 +5,238 @@ use std::path::PathBuf; use deno_core::anyhow::Context; use deno_core::error::AnyError; +use deno_core::normalize_path; +use deno_core::url::Url; +use indexmap::IndexMap; -pub fn expand_globs(paths: Vec) -> Result, AnyError> { - let mut new_paths = vec![]; - for path in paths { - let path_str = path.to_string_lossy(); - if is_glob_pattern(&path_str) { - let globbed_paths = glob(&path_str)?; +use super::path::specifier_to_file_path; - for globbed_path_result in globbed_paths { - new_paths.push(globbed_path_result?); +#[derive(Clone, Default, Debug, Eq, PartialEq)] +pub struct FilePatterns { + pub include: Option, + pub exclude: PathOrPatternSet, +} + +impl FilePatterns { + pub fn matches_specifier(&self, specifier: &Url) -> bool { + let path = match specifier_to_file_path(specifier) { + Ok(path) => path, + Err(_) => return true, + }; + self.matches_path(&path) + } + + pub fn matches_path(&self, path: &Path) -> bool { + // Skip files in the exclude list. + if self.exclude.matches_path(path) { + return false; + } + + // Ignore files not in the include list if it's present. + self + .include + .as_ref() + .map(|m| m.matches_path(path)) + .unwrap_or(true) + } + + /// Creates a collection of `FilePatterns` by base where the containing patterns + /// are only the ones applicable to the base. + /// + /// The order these are returned in is the order that the directory traversal + /// should occur in. + pub fn split_by_base(&self) -> Vec<(PathBuf, Self)> { + let Some(include) = &self.include else { + return Vec::new(); + }; + + let mut include_paths = Vec::new(); + let mut include_patterns = Vec::new(); + for path_or_pattern in &include.0 { + match path_or_pattern { + PathOrPattern::Path(path) => include_paths.push((path.is_file(), path)), + PathOrPattern::Pattern(pattern) => include_patterns.push(pattern), } - } else { - new_paths.push(path); + } + let include_patterns_by_base_path = include_patterns.into_iter().fold( + IndexMap::new(), + |mut map: IndexMap<_, Vec<_>>, p| { + map.entry(p.base_path()).or_default().push(p); + map + }, + ); + let exclude_by_base_path = self + .exclude + .0 + .iter() + .map(|s| (s.base_path(), s)) + .collect::>(); + let get_applicable_excludes = + |is_file_path: bool, base_path: &PathBuf| -> Vec { + exclude_by_base_path + .iter() + .filter_map(|(exclude_base_path, exclude)| { + match exclude { + PathOrPattern::Path(exclude_path) => { + // For explicitly specified files, ignore when the exclude path starts + // with it. Regardless, include excludes that are on a sub path of the dir. + if is_file_path && base_path.starts_with(exclude_path) + || exclude_path.starts_with(base_path) + { + Some((*exclude).clone()) + } else { + None + } + } + PathOrPattern::Pattern(_) => { + // include globs that's are sub paths or a parent path + if exclude_base_path.starts_with(base_path) + || base_path.starts_with(exclude_base_path) + { + Some((*exclude).clone()) + } else { + None + } + } + } + }) + .collect::>() + }; + + let mut result = Vec::with_capacity( + include_paths.len() + include_patterns_by_base_path.len(), + ); + for (is_file, path) in include_paths { + let applicable_excludes = get_applicable_excludes(is_file, path); + result.push(( + path.clone(), + Self { + include: Some(PathOrPatternSet::new(vec![PathOrPattern::Path( + path.clone(), + )])), + exclude: PathOrPatternSet::new(applicable_excludes), + }, + )); + } + + // todo(dsherret): This could be further optimized by not including + // patterns that will only ever match another base. + for base_path in include_patterns_by_base_path.keys() { + let applicable_excludes = get_applicable_excludes(false, base_path); + let mut applicable_includes = Vec::new(); + // get all patterns that apply to the current or ancestor directories + for path in base_path.ancestors() { + if let Some(patterns) = include_patterns_by_base_path.get(path) { + applicable_includes.extend( + patterns + .iter() + .map(|p| PathOrPattern::Pattern((*p).clone())), + ); + } + } + result.push(( + base_path.clone(), + Self { + include: Some(PathOrPatternSet::new(applicable_includes)), + exclude: PathOrPatternSet::new(applicable_excludes), + }, + )); + } + + // Sort by the longest base path first. This ensures that we visit opted into + // nested directories first before visiting the parent directory. The directory + // traverser will handle not going into directories it's already been in. + result.sort_by(|a, b| b.0.as_os_str().len().cmp(&a.0.as_os_str().len())); + + result + } +} + +#[derive(Clone, Default, Debug, Eq, PartialEq)] +pub struct PathOrPatternSet(Vec); + +impl PathOrPatternSet { + pub fn new(elements: Vec) -> Self { + Self(elements) + } + + pub fn from_absolute_paths(path: Vec) -> Result { + Ok(Self( + path + .into_iter() + .map(PathOrPattern::new) + .collect::, _>>()?, + )) + } + + pub fn inner(&self) -> &Vec { + &self.0 + } + + pub fn into_path_or_patterns(self) -> Vec { + self.0 + } + + pub fn matches_path(&self, path: &Path) -> bool { + self.0.iter().any(|p| p.matches_path(path)) + } + + pub fn base_paths(&self) -> Vec { + let mut result = Vec::with_capacity(self.0.len()); + for element in &self.0 { + match element { + PathOrPattern::Path(path) => { + result.push(path.to_path_buf()); + } + PathOrPattern::Pattern(pattern) => { + result.push(pattern.base_path()); + } + } + } + result + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum PathOrPattern { + Path(PathBuf), + Pattern(GlobPattern), +} + +impl PathOrPattern { + pub fn new(path: PathBuf) -> Result { + let path_str = path.to_string_lossy(); + // todo(dsherret): don't store URLs in PathBufs + if path_str.starts_with("http:") + || path_str.starts_with("https:") + || path_str.starts_with("file:") + { + return Ok(Self::Path(path)); + } + + GlobPattern::new_if_pattern(&path_str).map(|maybe_pattern| { + maybe_pattern + .map(PathOrPattern::Pattern) + .unwrap_or_else(|| PathOrPattern::Path(normalize_path(path))) + }) + } + + pub fn matches_path(&self, path: &Path) -> bool { + match self { + PathOrPattern::Path(p) => path.starts_with(p), + PathOrPattern::Pattern(p) => p.matches_path(path), } } - Ok(new_paths) -} - -pub fn glob(pattern: &str) -> Result { - glob::glob_with(&escape_brackets(pattern), match_options()) - .with_context(|| format!("Failed to expand glob: \"{}\"", pattern)) + pub fn base_path(&self) -> PathBuf { + match self { + PathOrPattern::Path(p) => p.clone(), + PathOrPattern::Pattern(p) => p.base_path(), + } + } } +#[derive(Debug, Clone, Eq, PartialEq)] pub struct GlobPattern(glob::Pattern); impl GlobPattern { @@ -40,35 +248,38 @@ impl GlobPattern { } pub fn new(pattern: &str) -> Result { - let pattern = glob::Pattern::new(pattern) - .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))?; + let pattern = + glob::Pattern::new(&escape_brackets(pattern).replace('\\', "/")) + .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))?; Ok(Self(pattern)) } pub fn matches_path(&self, path: &Path) -> bool { - self.0.matches_path(path) - } -} - -pub struct GlobSet(Vec); - -impl GlobSet { - pub fn new(matchers: Vec) -> Self { - Self(matchers) + self.0.matches_path_with(path, match_options()) } - pub fn matches_path(&self, path: &Path) -> bool { - for pattern in &self.0 { - if pattern.matches_path(path) { - return true; - } - } - false + pub fn base_path(&self) -> PathBuf { + let base_path = self + .0 + .as_str() + .split('/') + .take_while(|c| !has_glob_chars(c)) + .collect::>() + .join(std::path::MAIN_SEPARATOR_STR); + PathBuf::from(base_path) } } pub fn is_glob_pattern(path: &str) -> bool { - path.chars().any(|c| matches!(c, '*' | '?')) + !path.starts_with("http:") + && !path.starts_with("https:") + && !path.starts_with("file:") + && has_glob_chars(path) +} + +fn has_glob_chars(pattern: &str) -> bool { + // we don't support [ and ] + pattern.chars().any(|c| matches!(c, '*' | '?')) } fn escape_brackets(pattern: &str) -> String { @@ -92,17 +303,161 @@ fn match_options() -> glob::MatchOptions { #[cfg(test)] mod test { + use pretty_assertions::assert_eq; + use test_util::TempDir; + use super::*; - #[test] - pub fn glob_set_matches_path() { - let glob_set = GlobSet::new(vec![ - GlobPattern::new("foo/bar").unwrap(), - GlobPattern::new("foo/baz").unwrap(), - ]); + // For easier comparisons in tests. + #[derive(Debug, PartialEq, Eq)] + struct ComparableFilePatterns { + include: Option>, + exclude: Vec, + } - assert!(glob_set.matches_path(Path::new("foo/bar"))); - assert!(glob_set.matches_path(Path::new("foo/baz"))); - assert!(!glob_set.matches_path(Path::new("foo/qux"))); + impl ComparableFilePatterns { + pub fn new(root: &Path, file_patterns: &FilePatterns) -> Self { + fn path_or_pattern_to_string(root: &Path, p: &PathOrPattern) -> String { + match p { + PathOrPattern::Path(p) => p + .strip_prefix(root) + .unwrap() + .to_string_lossy() + .replace('\\', "/"), + PathOrPattern::Pattern(p) => p + .0 + .as_str() + .strip_prefix(&format!( + "{}/", + root.to_string_lossy().replace('\\', "/") + )) + .unwrap() + .to_string(), + } + } + + Self { + include: file_patterns.include.as_ref().map(|p| { + p.0 + .iter() + .map(|p| path_or_pattern_to_string(root, p)) + .collect() + }), + exclude: file_patterns + .exclude + .0 + .iter() + .map(|p| path_or_pattern_to_string(root, p)) + .collect(), + } + } + + pub fn from_split( + root: &Path, + patterns_by_base: &[(PathBuf, FilePatterns)], + ) -> Vec<(String, ComparableFilePatterns)> { + patterns_by_base + .iter() + .map(|(base_path, file_patterns)| { + ( + base_path + .strip_prefix(root) + .unwrap() + .to_string_lossy() + .replace('\\', "/"), + ComparableFilePatterns::new(root, file_patterns), + ) + }) + .collect() + } + } + + #[test] + fn should_split_globs_by_base_dir() { + let temp_dir = TempDir::new(); + let patterns = FilePatterns { + include: Some(PathOrPatternSet::new(vec![ + PathOrPattern::Pattern( + GlobPattern::new(&format!( + "{}/inner/**/*.ts", + temp_dir.path().to_string_lossy().replace('\\', "/") + )) + .unwrap(), + ), + PathOrPattern::Pattern( + GlobPattern::new(&format!( + "{}/inner/sub/deeper/**/*.js", + temp_dir.path().to_string_lossy().replace('\\', "/") + )) + .unwrap(), + ), + PathOrPattern::Pattern( + GlobPattern::new(&format!( + "{}/other/**/*.js", + temp_dir.path().to_string_lossy().replace('\\', "/") + )) + .unwrap(), + ), + PathOrPattern::Path(temp_dir.path().join("sub/file.ts").to_path_buf()), + ])), + exclude: PathOrPatternSet::new(vec![ + PathOrPattern::Pattern( + GlobPattern::new(&format!( + "{}/inner/other/**/*.ts", + temp_dir.path().to_string_lossy().replace('\\', "/") + )) + .unwrap(), + ), + PathOrPattern::Path( + temp_dir + .path() + .join("inner/sub/deeper/file.js") + .to_path_buf(), + ), + ]), + }; + let split = ComparableFilePatterns::from_split( + temp_dir.path().as_path(), + &patterns.split_by_base(), + ); + assert_eq!( + split, + vec![ + ( + "inner/sub/deeper".to_string(), + ComparableFilePatterns { + include: Some(vec![ + "inner/sub/deeper/**/*.js".to_string(), + "inner/**/*.ts".to_string(), + ]), + exclude: vec!["inner/sub/deeper/file.js".to_string()], + } + ), + ( + "sub/file.ts".to_string(), + ComparableFilePatterns { + include: Some(vec!["sub/file.ts".to_string()]), + exclude: vec![], + } + ), + ( + "inner".to_string(), + ComparableFilePatterns { + include: Some(vec!["inner/**/*.ts".to_string()]), + exclude: vec![ + "inner/other/**/*.ts".to_string(), + "inner/sub/deeper/file.js".to_string(), + ], + } + ), + ( + "other".to_string(), + ComparableFilePatterns { + include: Some(vec!["other/**/*.js".to_string()]), + exclude: vec![], + } + ) + ] + ); } }