1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2024-11-24 15:19:26 -05:00

fix(lsp): add a document preload file system entry limit (#18553)

I was testing this out and it's badly needed. For now, it's not
configurable and limited to 1,000 file system entries.

Related to #18538
This commit is contained in:
David Sherret 2023-04-01 15:10:30 -04:00 committed by GitHub
parent 30065af0f2
commit 0210c1cadf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 237 additions and 154 deletions

View file

@ -485,8 +485,12 @@ impl Config {
.unwrap_or_else(|| self.settings.workspace.enable)
}
/// Gets the root directories or file paths based on the workspace config.
pub fn enabled_root_urls(&self) -> Vec<Url> {
/// Gets the directories or specifically enabled file paths based on the
/// workspace config.
///
/// WARNING: This may incorrectly have some directory urls as being
/// represented as file urls.
pub fn enabled_urls(&self) -> Vec<Url> {
let mut urls: Vec<Url> = Vec::new();
if !self.settings.workspace.enable && self.enabled_paths.is_empty() {
@ -501,12 +505,15 @@ impl Config {
urls.push(workspace.clone());
}
}
if urls.is_empty() {
if let Some(root_dir) = &self.root_uri {
urls.push(root_dir.clone())
}
}
sort_and_remove_non_leaf_urls(&mut urls);
// sort for determinism
urls.sort();
urls
}
@ -646,26 +653,12 @@ impl Config {
}
}
/// Removes any URLs that are a descendant of another URL in the collection.
fn sort_and_remove_non_leaf_urls(dirs: &mut Vec<Url>) {
if dirs.is_empty() {
return;
}
dirs.sort();
for i in (0..dirs.len() - 1).rev() {
let prev = &dirs[i + 1];
if prev.as_str().starts_with(dirs[i].as_str()) {
dirs.remove(i + 1);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use deno_core::resolve_url;
use deno_core::serde_json::json;
use pretty_assertions::assert_eq;
#[test]
fn test_config_specifier_enabled() {
@ -827,41 +820,16 @@ mod tests {
}
#[test]
fn test_sort_and_remove_non_leaf_urls() {
fn run_test(dirs: Vec<&str>, expected_output: Vec<&str>) {
let mut dirs = dirs
.into_iter()
.map(|dir| Url::parse(dir).unwrap())
.collect();
sort_and_remove_non_leaf_urls(&mut dirs);
let dirs: Vec<_> = dirs.iter().map(|dir| dir.as_str()).collect();
assert_eq!(dirs, expected_output);
}
run_test(
vec![
"file:///test/asdf/test/asdf/",
"file:///test/asdf/",
"file:///test/asdf/",
"file:///testing/456/893/",
"file:///testing/456/893/test/",
],
vec!["file:///test/asdf/", "file:///testing/456/893/"],
);
run_test(vec![], vec![]);
}
#[test]
fn config_enabled_root_urls() {
fn config_enabled_urls() {
let mut config = Config::new();
let root_dir = Url::parse("file:///example/").unwrap();
config.root_uri = Some(root_dir.clone());
config.settings.workspace.enable = false;
config.settings.workspace.enable_paths = Vec::new();
assert_eq!(config.enabled_root_urls(), vec![]);
assert_eq!(config.enabled_urls(), vec![]);
config.settings.workspace.enable = true;
assert_eq!(config.enabled_root_urls(), vec![root_dir]);
assert_eq!(config.enabled_urls(), vec![root_dir]);
config.settings.workspace.enable = false;
let root_dir1 = Url::parse("file:///root1/").unwrap();
@ -871,8 +839,8 @@ mod tests {
(
root_dir1.clone(),
vec![
root_dir1.join("sub_dir").unwrap(),
root_dir1.join("sub_dir/other").unwrap(),
root_dir1.join("sub_dir/").unwrap(),
root_dir1.join("sub_dir/other/").unwrap(),
root_dir1.join("test.ts").unwrap(),
],
),
@ -881,9 +849,10 @@ mod tests {
]);
assert_eq!(
config.enabled_root_urls(),
config.enabled_urls(),
vec![
root_dir1.join("sub_dir").unwrap(),
root_dir1.join("sub_dir/").unwrap(),
root_dir1.join("sub_dir/other/").unwrap(),
root_dir1.join("test.ts").unwrap(),
root_dir2.join("other.ts").unwrap(),
root_dir3

View file

@ -16,6 +16,7 @@ use crate::cache::HttpCache;
use crate::file_fetcher::get_source_from_bytes;
use crate::file_fetcher::map_content_type;
use crate::file_fetcher::SUPPORTED_SCHEMES;
use crate::lsp::logging::lsp_warn;
use crate::node;
use crate::node::node_resolve_npm_reference;
use crate::node::NodeResolution;
@ -1160,7 +1161,7 @@ impl Documents {
pub fn update_config(
&mut self,
root_urls: Vec<Url>,
enabled_urls: Vec<Url>,
maybe_import_map: Option<Arc<import_map::ImportMap>>,
maybe_config_file: Option<&ConfigFile>,
maybe_package_json: Option<&PackageJson>,
@ -1168,17 +1169,17 @@ impl Documents {
npm_resolution: NpmResolution,
) {
fn calculate_resolver_config_hash(
root_urls: &[Url],
enabled_urls: &[Url],
maybe_import_map: Option<&import_map::ImportMap>,
maybe_jsx_config: Option<&JsxImportSourceConfig>,
maybe_package_json_deps: Option<&PackageJsonDeps>,
) -> u64 {
let mut hasher = FastInsecureHasher::default();
hasher.write_hashable(&{
// ensure these are sorted (they should be, but this is a safeguard)
let mut root_urls = root_urls.to_vec();
root_urls.sort_unstable();
root_urls
// ensure these are sorted so the hashing is deterministic
let mut enabled_urls = enabled_urls.to_vec();
enabled_urls.sort_unstable();
enabled_urls
});
if let Some(import_map) = maybe_import_map {
hasher.write_str(&import_map.to_json());
@ -1195,7 +1196,7 @@ impl Documents {
let maybe_jsx_config =
maybe_config_file.and_then(|cf| cf.to_maybe_jsx_import_source_config());
let new_resolver_config_hash = calculate_resolver_config_hash(
&root_urls,
&enabled_urls,
maybe_import_map.as_deref(),
maybe_jsx_config.as_ref(),
maybe_package_json_deps.as_ref(),
@ -1235,14 +1236,14 @@ impl Documents {
// only refresh the dependencies if the underlying configuration has changed
if self.resolver_config_hash != new_resolver_config_hash {
self.refresh_dependencies(root_urls);
self.refresh_dependencies(enabled_urls);
self.resolver_config_hash = new_resolver_config_hash;
}
self.dirty = true;
}
fn refresh_dependencies(&mut self, root_urls: Vec<Url>) {
fn refresh_dependencies(&mut self, enabled_urls: Vec<Url>) {
let resolver = self.resolver.as_graph_resolver();
for doc in self.open_docs.values_mut() {
if let Some(new_doc) = doc.maybe_with_new_resolver(resolver) {
@ -1258,8 +1259,9 @@ impl Documents {
fs_docs.docs.keys().cloned().collect::<HashSet<_>>();
let open_docs = &mut self.open_docs;
log::debug!("Preloading documents from root urls...");
for specifier in PreloadDocumentFinder::from_root_urls(&root_urls) {
log::debug!("Preloading documents from enabled urls...");
for specifier in PreloadDocumentFinder::from_enabled_urls(&enabled_urls)
{
// mark this document as having been found
not_found_docs.remove(&specifier);
@ -1528,89 +1530,67 @@ fn analyze_module(
}
}
enum PendingEntry {
/// File specified as a root url.
SpecifiedRootFile(PathBuf),
/// Directory that is queued to read.
Dir(PathBuf),
/// The current directory being read.
ReadDir(Box<ReadDir>),
}
/// Iterator that finds documents that can be preloaded into
/// the LSP on startup.
struct PreloadDocumentFinder {
pending_dirs: Vec<PathBuf>,
pending_files: Vec<PathBuf>,
current_entries: Option<ReadDir>,
limit: u16,
entry_count: u16,
pending_entries: VecDeque<PendingEntry>,
}
impl PreloadDocumentFinder {
fn is_allowed_root_dir(dir_path: &Path) -> bool {
if dir_path.parent().is_none() {
// never search the root directory of a drive
return false;
}
true
pub fn from_enabled_urls(enabled_urls: &Vec<Url>) -> Self {
Self::from_enabled_urls_with_limit(enabled_urls, 1_000)
}
pub fn from_root_urls(root_urls: &Vec<Url>) -> Self {
pub fn from_enabled_urls_with_limit(
enabled_urls: &Vec<Url>,
limit: u16,
) -> Self {
fn is_allowed_root_dir(dir_path: &Path) -> bool {
if dir_path.parent().is_none() {
// never search the root directory of a drive
return false;
}
true
}
let mut finder = PreloadDocumentFinder {
pending_dirs: Default::default(),
pending_files: Default::default(),
current_entries: Default::default(),
limit,
entry_count: 0,
pending_entries: Default::default(),
};
for root_url in root_urls {
if let Ok(path) = root_url.to_file_path() {
let mut dirs = Vec::with_capacity(enabled_urls.len());
for enabled_url in enabled_urls {
if let Ok(path) = enabled_url.to_file_path() {
if path.is_dir() {
if Self::is_allowed_root_dir(&path) {
finder.pending_dirs.push(path);
if is_allowed_root_dir(&path) {
dirs.push(path);
}
} else {
finder.pending_files.push(path);
finder
.pending_entries
.push_back(PendingEntry::SpecifiedRootFile(path));
}
}
}
for dir in sort_and_remove_non_leaf_dirs(dirs) {
finder.pending_entries.push_back(PendingEntry::Dir(dir));
}
finder
}
fn read_next_file_entry(&mut self) -> Option<ModuleSpecifier> {
fn is_discoverable_dir(dir_path: &Path) -> bool {
if let Some(dir_name) = dir_path.file_name() {
let dir_name = dir_name.to_string_lossy().to_lowercase();
!matches!(dir_name.as_str(), "node_modules" | ".git")
} else {
false
}
}
if let Some(mut entries) = self.current_entries.take() {
while let Some(entry) = entries.next() {
if let Ok(entry) = entry {
let path = entry.path();
if let Ok(file_type) = entry.file_type() {
if file_type.is_dir() && is_discoverable_dir(&path) {
self.pending_dirs.push(path);
} else if file_type.is_file() {
if let Some(specifier) = Self::get_valid_specifier(&path) {
// restore the next entries for next time
self.current_entries = Some(entries);
return Some(specifier);
}
}
}
}
}
}
None
}
fn get_valid_specifier(path: &Path) -> Option<ModuleSpecifier> {
fn is_discoverable_file(file_path: &Path) -> bool {
// Don't auto-discover minified files as they are likely to be very large
// and likely not to have dependencies on code outside them that would
// be useful in the LSP
if let Some(file_name) = file_path.file_name() {
let file_name = file_name.to_string_lossy().to_lowercase();
!file_name.as_str().contains(".min.")
} else {
false
}
}
fn is_discoverable_media_type(media_type: MediaType) -> bool {
fn is_allowed_media_type(media_type: MediaType) -> bool {
match media_type {
MediaType::JavaScript
| MediaType::Jsx
@ -1632,49 +1612,136 @@ impl PreloadDocumentFinder {
}
let media_type = MediaType::from_path(path);
if is_discoverable_media_type(media_type) && is_discoverable_file(path) {
if is_allowed_media_type(media_type) {
if let Ok(specifier) = ModuleSpecifier::from_file_path(path) {
return Some(specifier);
}
}
None
}
fn queue_next_file_entries(&mut self) {
debug_assert!(self.current_entries.is_none());
while let Some(dir_path) = self.pending_dirs.pop() {
if let Ok(entries) = fs::read_dir(&dir_path) {
self.current_entries = Some(entries);
break;
}
}
}
}
impl Iterator for PreloadDocumentFinder {
type Item = ModuleSpecifier;
fn next(&mut self) -> Option<Self::Item> {
// drain the pending files
while let Some(path) = self.pending_files.pop() {
if let Some(specifier) = Self::get_valid_specifier(&path) {
return Some(specifier);
fn is_discoverable_dir(dir_path: &Path) -> bool {
if let Some(dir_name) = dir_path.file_name() {
let dir_name = dir_name.to_string_lossy().to_lowercase();
// We ignore these directories by default because there is a
// high likelihood they aren't relevant. Someone can opt-into
// them by specifying one of them as an enabled path.
if matches!(dir_name.as_str(), "node_modules" | ".git") {
return false;
}
// ignore cargo target directories for anyone using Deno with Rust
if dir_name == "target"
&& dir_path
.parent()
.map(|p| p.join("Cargo.toml").exists())
.unwrap_or(false)
{
return false;
}
true
} else {
false
}
}
// then go through the current entries and directories
while !self.pending_dirs.is_empty() || self.current_entries.is_some() {
match self.read_next_file_entry() {
Some(entry) => return Some(entry),
None => {
self.queue_next_file_entries();
fn is_discoverable_file(file_path: &Path) -> bool {
// Don't auto-discover minified files as they are likely to be very large
// and likely not to have dependencies on code outside them that would
// be useful in the LSP
if let Some(file_name) = file_path.file_name() {
let file_name = file_name.to_string_lossy().to_lowercase();
!file_name.as_str().contains(".min.")
} else {
false
}
}
while let Some(entry) = self.pending_entries.pop_front() {
match entry {
PendingEntry::SpecifiedRootFile(file) => {
// since it was a file that was specified as a root url, only
// verify that it's valid
if let Some(specifier) = Self::get_valid_specifier(&file) {
return Some(specifier);
}
}
PendingEntry::Dir(dir_path) => {
if let Ok(read_dir) = fs::read_dir(&dir_path) {
self
.pending_entries
.push_back(PendingEntry::ReadDir(Box::new(read_dir)));
}
}
PendingEntry::ReadDir(mut entries) => {
while let Some(entry) = entries.next() {
self.entry_count += 1;
if self.entry_count >= self.limit {
lsp_warn!(
concat!(
"Hit the language server document preload limit of {} file system entries. ",
"You may want to use the \"deno.enablePaths\" configuration setting to only have Deno ",
"partially enable a workspace."
),
self.limit,
);
self.pending_entries.clear(); // stop searching
return None;
}
if let Ok(entry) = entry {
let path = entry.path();
if let Ok(file_type) = entry.file_type() {
if file_type.is_dir() && is_discoverable_dir(&path) {
self
.pending_entries
.push_back(PendingEntry::Dir(path.to_path_buf()));
} else if file_type.is_file() && is_discoverable_file(&path) {
if let Some(specifier) = Self::get_valid_specifier(&path) {
// restore the next entries for next time
self
.pending_entries
.push_front(PendingEntry::ReadDir(entries));
return Some(specifier);
}
}
}
}
}
}
}
}
None
}
}
/// Removes any directorys that are a descendant of another directory in the collection.
fn sort_and_remove_non_leaf_dirs(mut dirs: Vec<PathBuf>) -> Vec<PathBuf> {
if dirs.is_empty() {
return dirs;
}
dirs.sort();
if !dirs.is_empty() {
for i in (0..dirs.len() - 1).rev() {
let prev = &dirs[i + 1];
if prev.starts_with(&dirs[i]) {
dirs.remove(i + 1);
}
}
}
dirs
}
#[cfg(test)]
mod tests {
use crate::npm::NpmResolution;
@ -1884,6 +1951,8 @@ console.log(b, "hello deno");
temp_dir.write("root1/node_modules/mod.ts", ""); // no, node_modules
temp_dir.create_dir_all("root1/sub_dir");
temp_dir.create_dir_all("root1/target");
temp_dir.create_dir_all("root1/node_modules");
temp_dir.create_dir_all("root1/.git");
temp_dir.create_dir_all("root1/file.ts"); // no, directory
temp_dir.write("root1/mod1.ts", ""); // yes
@ -1897,26 +1966,33 @@ console.log(b, "hello deno");
temp_dir.write("root1/other.json", ""); // no, json
temp_dir.write("root1/other.txt", ""); // no, text file
temp_dir.write("root1/other.wasm", ""); // no, don't load wasm
temp_dir.write("root1/Cargo.toml", ""); // no
temp_dir.write("root1/sub_dir/mod.ts", ""); // yes
temp_dir.write("root1/sub_dir/data.min.ts", ""); // no, minified file
temp_dir.write("root1/.git/main.ts", ""); // no, .git folder
temp_dir.write("root1/node_modules/main.ts", ""); // no, because it's in a node_modules folder
temp_dir.write("root1/target/main.ts", ""); // no, because there is a Cargo.toml in the root directory
temp_dir.create_dir_all("root2/folder");
temp_dir.write("root2/file1.ts", ""); // yes, provided
temp_dir.write("root2/file2.ts", ""); // no, not provided
temp_dir.write("root2/main.min.ts", ""); // yes, provided
temp_dir.write("root2/folder/main.ts", ""); // yes, provided
temp_dir.create_dir_all("root3/");
temp_dir.write("root3/mod.ts", ""); // no, not provided
let mut urls = PreloadDocumentFinder::from_root_urls(&vec![
let mut urls = PreloadDocumentFinder::from_enabled_urls(&vec![
temp_dir.uri().join("root1/").unwrap(),
temp_dir.uri().join("root2/file1.ts").unwrap(),
temp_dir.uri().join("root2/main.min.ts").unwrap(),
temp_dir.uri().join("root2/folder/").unwrap(),
])
.collect::<Vec<_>>();
// order doesn't matter
// Ideally we would test for order here, which should be BFS, but
// different file systems have different directory iteration
// so we sort the results
urls.sort();
assert_eq!(
@ -1933,26 +2009,64 @@ console.log(b, "hello deno");
temp_dir.uri().join("root1/sub_dir/mod.ts").unwrap(),
temp_dir.uri().join("root2/file1.ts").unwrap(),
temp_dir.uri().join("root2/folder/main.ts").unwrap(),
temp_dir.uri().join("root2/main.min.ts").unwrap(),
]
);
// now try iterating with a low limit
let urls = PreloadDocumentFinder::from_enabled_urls_with_limit(
&vec![temp_dir.uri()],
10, // entries and not results
)
.collect::<Vec<_>>();
// since different file system have different iteration
// order, the number here may vary, so just assert it's below
// a certain amount
assert!(urls.len() < 5, "Actual length: {}", urls.len());
}
#[test]
pub fn test_pre_load_document_finder_disallowed_dirs() {
if cfg!(windows) {
let paths =
PreloadDocumentFinder::from_root_urls(&vec![
Url::parse("file:///c:/").unwrap()
])
.collect::<Vec<_>>();
let paths = PreloadDocumentFinder::from_enabled_urls(&vec![Url::parse(
"file:///c:/",
)
.unwrap()])
.collect::<Vec<_>>();
assert_eq!(paths, vec![]);
} else {
let paths =
PreloadDocumentFinder::from_root_urls(&vec![
PreloadDocumentFinder::from_enabled_urls(&vec![
Url::parse("file:///").unwrap()
])
.collect::<Vec<_>>();
assert_eq!(paths, vec![]);
}
}
#[test]
fn test_sort_and_remove_non_leaf_dirs() {
fn run_test(paths: Vec<&str>, expected_output: Vec<&str>) {
let paths = sort_and_remove_non_leaf_dirs(
paths.into_iter().map(PathBuf::from).collect(),
);
let dirs: Vec<_> =
paths.iter().map(|dir| dir.to_string_lossy()).collect();
assert_eq!(dirs, expected_output);
}
run_test(
vec![
"/test/asdf/test/asdf/",
"/test/asdf/test/asdf/test.ts",
"/test/asdf/",
"/test/asdf/",
"/testing/456/893/",
"/testing/456/893/test/",
],
vec!["/test/asdf/", "/testing/456/893/"],
);
run_test(vec![], vec![]);
}
}

View file

@ -1138,7 +1138,7 @@ impl Inner {
fn refresh_documents_config(&mut self) {
self.documents.update_config(
self.config.enabled_root_urls(),
self.config.enabled_urls(),
self.maybe_import_map.clone(),
self.maybe_config_file.as_ref(),
self.maybe_package_json.as_ref(),