From 7a1a082876298a4c9e37237074ea62942180d083 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 22 Aug 2022 12:14:59 -0400 Subject: [PATCH] perf: cache swc dependency analysis and don't hold onto `ParsedSource`s in memory (#15502) --- Cargo.lock | 16 +- cli/Cargo.toml | 8 +- cli/cache/mod.rs | 2 + cli/cache/parsed_source.rs | 403 +++++++++++++++++++++++++++++++++ cli/deno_dir.rs | 6 + cli/emit.rs | 166 ++++++++++++-- cli/graph_util.rs | 41 +--- cli/lsp/documents.rs | 158 ++++++++----- cli/main.rs | 7 +- cli/module_loader.rs | 14 +- cli/proc_state.rs | 18 +- cli/tools/doc.rs | 45 ++-- cli/tools/vendor/build.rs | 20 +- cli/tools/vendor/import_map.rs | 30 ++- cli/tools/vendor/mod.rs | 1 + cli/tools/vendor/test.rs | 17 +- test_util/src/pty.rs | 7 +- 17 files changed, 784 insertions(+), 175 deletions(-) create mode 100644 cli/cache/parsed_source.rs diff --git a/Cargo.lock b/Cargo.lock index c8ddbfcbd2..ac73c49ecc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -982,9 +982,9 @@ dependencies = [ [[package]] name = "deno_doc" -version = "0.40.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8afd542bb96b192f9cc640f0bc04daab2f7f93508627330511d5791bef86e44" +checksum = "b8abb2f83fb63564ded9f9eb46c25a9c900f5a7c1beac30e4fae12009c3837b6" dependencies = [ "cfg-if", "deno_ast", @@ -1000,9 +1000,9 @@ dependencies = [ [[package]] name = "deno_emit" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c73eaad7056d2fb8473236c8edecd3e24ad2eafc637d06534fe1620da4fd6cff" +checksum = "71f7501c229a6ce6f28c8c36d5f0c67a82c2fc47fa490198bd4e5de437fca158" dependencies = [ "anyhow", "base64 0.13.0", @@ -1061,9 +1061,9 @@ dependencies = [ [[package]] name = "deno_graph" -version = "0.30.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9f3c87eee6a5cf75a74f64d05ee34a3e17a13cec72c708c5b0251daf1423ef7" +checksum = "f99778595687fca36ef4c28702b7e64f45508f2af0644ad9ad99fd12fa6d6e5f" dependencies = [ "anyhow", "cfg-if", @@ -1601,9 +1601,9 @@ dependencies = [ [[package]] name = "eszip" -version = "0.23.0" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abe2b413131dc7b90bc12e0e2cd4971b766c078ebb735ce38546ddb587c4109d" +checksum = "d902a48c7edaf6b26fd9df6bdf205935162690e03848607b671ae1c66a3f0d3b" dependencies = [ "anyhow", "base64 0.13.0", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 0940dd0b55..5bfc4663a9 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -48,9 +48,9 @@ winres = "=0.1.12" [dependencies] deno_ast = { version = "0.17.0", features = ["bundler", "cjs", "codegen", "dep_graph", "module_specifier", "proposal", "react", "sourcemap", "transforms", "transpiling", "typescript", "view", "visit"] } deno_core = { version = "0.147.0", path = "../core" } -deno_doc = "0.40.0" -deno_emit = "0.5.0" -deno_graph = "0.30.0" +deno_doc = "0.42.0" +deno_emit = "0.6.0" +deno_graph = "0.31.0" deno_lint = { version = "0.32.0", features = ["docs"] } deno_runtime = { version = "0.73.0", path = "../runtime" } deno_task_shell = "0.5.0" @@ -69,7 +69,7 @@ dprint-plugin-markdown = "=0.14.0" dprint-plugin-typescript = "=0.71.2" encoding_rs = "=0.8.31" env_logger = "=0.9.0" -eszip = "=0.23.0" +eszip = "=0.24.0" fancy-regex = "=0.10.0" flate2 = "=1.0.24" http = "=0.2.6" diff --git a/cli/cache/mod.rs b/cli/cache/mod.rs index d7cf8aca46..6769f153dd 100644 --- a/cli/cache/mod.rs +++ b/cli/cache/mod.rs @@ -19,12 +19,14 @@ mod common; mod disk_cache; mod emit; mod incremental; +mod parsed_source; pub use check::TypeCheckCache; pub use common::FastInsecureHasher; pub use disk_cache::DiskCache; pub use emit::EmitCache; pub use incremental::IncrementalCache; +pub use parsed_source::ParsedSourceCache; /// A "wrapper" for the FileFetcher and DiskCache for the Deno CLI that provides /// a concise interface to the DENO_DIR when building module graphs. diff --git a/cli/cache/parsed_source.rs b/cli/cache/parsed_source.rs new file mode 100644 index 0000000000..b35a89a149 --- /dev/null +++ b/cli/cache/parsed_source.rs @@ -0,0 +1,403 @@ +use std::collections::HashMap; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +use deno_ast::MediaType; +use deno_ast::ModuleSpecifier; +use deno_ast::ParsedSource; +use deno_core::error::AnyError; +use deno_core::parking_lot::Mutex; +use deno_core::serde_json; +use deno_graph::CapturingModuleParser; +use deno_graph::DefaultModuleAnalyzer; +use deno_graph::ModuleInfo; +use deno_graph::ModuleParser; +use deno_graph::ParsedSourceStore; +use deno_runtime::deno_webstorage::rusqlite::params; +use deno_runtime::deno_webstorage::rusqlite::Connection; + +use super::common::run_sqlite_pragma; +use super::FastInsecureHasher; + +#[derive(Clone, Default)] +struct ParsedSourceCacheSources( + Arc>>, +); + +/// It's ok that this is racy since in non-LSP situations +/// this will only ever store one form of a parsed source +/// and in LSP settings the concurrency will be enforced +/// at a higher level to ensure this will have the latest +/// parsed source. +impl deno_graph::ParsedSourceStore for ParsedSourceCacheSources { + fn set_parsed_source( + &self, + specifier: deno_graph::ModuleSpecifier, + parsed_source: ParsedSource, + ) -> Option { + self.0.lock().insert(specifier, parsed_source) + } + + fn get_parsed_source( + &self, + specifier: &deno_graph::ModuleSpecifier, + ) -> Option { + self.0.lock().get(specifier).cloned() + } +} + +/// A cache of `ParsedSource`s, which may be used with `deno_graph` +/// for cached dependency analysis. +#[derive(Clone)] +pub struct ParsedSourceCache { + db_cache_path: Option, + cli_version: String, + sources: ParsedSourceCacheSources, +} + +impl ParsedSourceCache { + pub fn new(sql_cache_path: Option) -> Self { + Self { + db_cache_path: sql_cache_path, + cli_version: crate::version::deno(), + sources: Default::default(), + } + } + + pub fn get_parsed_source_from_module( + &self, + module: &deno_graph::Module, + ) -> Result, AnyError> { + if let Some(source) = &module.maybe_source { + Ok(Some(self.get_or_parse_module( + &module.specifier, + source.clone(), + module.media_type, + )?)) + } else { + Ok(None) + } + } + + /// Gets the matching `ParsedSource` from the cache + /// or parses a new one and stores that in the cache. + pub fn get_or_parse_module( + &self, + specifier: &deno_graph::ModuleSpecifier, + source: Arc, + media_type: MediaType, + ) -> deno_core::anyhow::Result { + let parser = CapturingModuleParser::new(None, &self.sources); + // this will conditionally parse because it's using a CapturingModuleParser + parser.parse_module(specifier, source, media_type) + } + + /// Frees the parsed source from memory. + pub fn free(&self, specifier: &ModuleSpecifier) { + self.sources.0.lock().remove(specifier); + } + + /// Gets this cache as a `deno_graph::ParsedSourceStore`. + pub fn as_store(&self) -> Box { + // This trait is not implemented directly on ParsedSourceCache + // in order to prevent its methods from being accidentally used. + // Generally, people should prefer the methods found that will + // lazily parse if necessary. + Box::new(self.sources.clone()) + } + + pub fn as_analyzer(&self) -> Box { + match ParsedSourceCacheModuleAnalyzer::new( + self.db_cache_path.as_deref(), + self.cli_version.clone(), + self.sources.clone(), + ) { + Ok(analyzer) => Box::new(analyzer), + Err(err) => { + log::debug!("Could not create cached module analyzer. {:#}", err); + // fallback to not caching if it can't be created + Box::new(deno_graph::CapturingModuleAnalyzer::new( + None, + Some(self.as_store()), + )) + } + } + } +} + +struct ParsedSourceCacheModuleAnalyzer { + conn: Connection, + sources: ParsedSourceCacheSources, +} + +impl ParsedSourceCacheModuleAnalyzer { + pub fn new( + db_file_path: Option<&Path>, + cli_version: String, + sources: ParsedSourceCacheSources, + ) -> Result { + let conn = match db_file_path { + Some(path) => Connection::open(path)?, + None => Connection::open_in_memory()?, + }; + Self::from_connection(conn, cli_version, sources) + } + + fn from_connection( + conn: Connection, + cli_version: String, + sources: ParsedSourceCacheSources, + ) -> Result { + run_sqlite_pragma(&conn)?; + create_tables(&conn, cli_version)?; + + Ok(Self { conn, sources }) + } + + pub fn get_module_info( + &self, + specifier: &ModuleSpecifier, + media_type: MediaType, + expected_source_hash: &str, + ) -> Result, AnyError> { + let query = " + SELECT + module_info + FROM + moduleinfocache + WHERE + specifier=?1 + AND media_type=?2 + AND source_hash=?3 + LIMIT 1"; + let mut stmt = self.conn.prepare_cached(query)?; + let mut rows = stmt.query(params![ + &specifier.as_str(), + &media_type.to_string(), + &expected_source_hash, + ])?; + if let Some(row) = rows.next()? { + let module_info: String = row.get(0)?; + let module_info = serde_json::from_str(&module_info)?; + Ok(Some(module_info)) + } else { + Ok(None) + } + } + + pub fn set_module_info( + &self, + specifier: &ModuleSpecifier, + media_type: MediaType, + source_hash: &str, + module_info: &ModuleInfo, + ) -> Result<(), AnyError> { + let sql = " + INSERT OR REPLACE INTO + moduleinfocache (specifier, media_type, source_hash, module_info) + VALUES + (?1, ?2, ?3, ?4)"; + let mut stmt = self.conn.prepare_cached(sql)?; + stmt.execute(params![ + specifier.as_str(), + &media_type.to_string(), + &source_hash.to_string(), + &serde_json::to_string(&module_info)?, + ])?; + Ok(()) + } +} + +impl deno_graph::ModuleAnalyzer for ParsedSourceCacheModuleAnalyzer { + fn analyze( + &self, + specifier: &ModuleSpecifier, + source: Arc, + media_type: MediaType, + ) -> Result { + // attempt to load from the cache + let source_hash = compute_source_hash(source.as_bytes()); + match self.get_module_info(specifier, media_type, &source_hash) { + Ok(Some(info)) => return Ok(info), + Ok(None) => {} + Err(err) => { + log::debug!( + "Error loading module cache info for {}. {:#}", + specifier, + err + ); + } + } + + // otherwise, get the module info from the parsed source cache + let parser = CapturingModuleParser::new(None, &self.sources); + let analyzer = DefaultModuleAnalyzer::new(&parser); + + let module_info = analyzer.analyze(specifier, source, media_type)?; + + // then attempt to cache it + if let Err(err) = + self.set_module_info(specifier, media_type, &source_hash, &module_info) + { + log::debug!( + "Error saving module cache info for {}. {:#}", + specifier, + err + ); + } + + Ok(module_info) + } +} + +fn create_tables( + conn: &Connection, + cli_version: String, +) -> Result<(), AnyError> { + // INT doesn't store up to u64, so use TEXT for source_hash + conn.execute( + "CREATE TABLE IF NOT EXISTS moduleinfocache ( + specifier TEXT PRIMARY KEY, + media_type TEXT NOT NULL, + source_hash TEXT NOT NULL, + module_info TEXT NOT NULL + )", + [], + )?; + conn.execute( + "CREATE TABLE IF NOT EXISTS info ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + )", + [], + )?; + + // delete the cache when the CLI version changes + let data_cli_version: Option = conn + .query_row( + "SELECT value FROM info WHERE key='CLI_VERSION' LIMIT 1", + [], + |row| row.get(0), + ) + .ok(); + if data_cli_version != Some(cli_version.to_string()) { + conn.execute("DELETE FROM moduleinfocache", params![])?; + let mut stmt = conn + .prepare("INSERT OR REPLACE INTO info (key, value) VALUES (?1, ?2)")?; + stmt.execute(params!["CLI_VERSION", &cli_version])?; + } + + Ok(()) +} + +fn compute_source_hash(bytes: &[u8]) -> String { + FastInsecureHasher::new().write(bytes).finish().to_string() +} + +#[cfg(test)] +mod test { + use deno_graph::PositionRange; + use deno_graph::SpecifierWithRange; + + use super::*; + + #[test] + pub fn parsed_source_cache_module_analyzer_general_use() { + let conn = Connection::open_in_memory().unwrap(); + let cache = ParsedSourceCacheModuleAnalyzer::from_connection( + conn, + "1.0.0".to_string(), + Default::default(), + ) + .unwrap(); + let specifier1 = + ModuleSpecifier::parse("https://localhost/mod.ts").unwrap(); + let specifier2 = + ModuleSpecifier::parse("https://localhost/mod2.ts").unwrap(); + assert_eq!( + cache + .get_module_info(&specifier1, MediaType::JavaScript, "1") + .unwrap(), + None + ); + + let mut module_info = ModuleInfo::default(); + module_info.jsdoc_imports.push(SpecifierWithRange { + range: PositionRange { + start: deno_graph::Position { + line: 0, + character: 3, + }, + end: deno_graph::Position { + line: 1, + character: 2, + }, + }, + text: "test".to_string(), + }); + cache + .set_module_info(&specifier1, MediaType::JavaScript, "1", &module_info) + .unwrap(); + assert_eq!( + cache + .get_module_info(&specifier1, MediaType::JavaScript, "1") + .unwrap(), + Some(module_info.clone()) + ); + assert_eq!( + cache + .get_module_info(&specifier2, MediaType::JavaScript, "1") + .unwrap(), + None, + ); + // different media type + assert_eq!( + cache + .get_module_info(&specifier1, MediaType::TypeScript, "1") + .unwrap(), + None, + ); + // different source hash + assert_eq!( + cache + .get_module_info(&specifier1, MediaType::JavaScript, "2") + .unwrap(), + None, + ); + + // try recreating with the same version + let conn = cache.conn; + let cache = ParsedSourceCacheModuleAnalyzer::from_connection( + conn, + "1.0.0".to_string(), + Default::default(), + ) + .unwrap(); + + // should get it + assert_eq!( + cache + .get_module_info(&specifier1, MediaType::JavaScript, "1") + .unwrap(), + Some(module_info) + ); + + // try recreating with a different version + let conn = cache.conn; + let cache = ParsedSourceCacheModuleAnalyzer::from_connection( + conn, + "1.0.1".to_string(), + Default::default(), + ) + .unwrap(); + + // should no longer exist + assert_eq!( + cache + .get_module_info(&specifier1, MediaType::JavaScript, "1") + .unwrap(), + None, + ); + } +} diff --git a/cli/deno_dir.rs b/cli/deno_dir.rs index 303ad2c11c..c87ed450cf 100644 --- a/cli/deno_dir.rs +++ b/cli/deno_dir.rs @@ -58,6 +58,12 @@ impl DenoDir { self.root.join("lint_incremental_cache_v1") } + /// Path for caching swc dependency analysis. + pub fn dep_analysis_db_file_path(&self) -> PathBuf { + // bump this version name to invalidate the entire cache + self.root.join("dep_analysis_cache_v1") + } + /// Path for the cache used for type checking. pub fn type_checking_cache_db_file_path(&self) -> PathBuf { // bump this version name to invalidate the entire cache diff --git a/cli/emit.rs b/cli/emit.rs index 4c347a09c5..eb8a56ad03 100644 --- a/cli/emit.rs +++ b/cli/emit.rs @@ -11,6 +11,7 @@ use crate::args::TsConfig; use crate::args::TypeCheckMode; use crate::cache::EmitCache; use crate::cache::FastInsecureHasher; +use crate::cache::ParsedSourceCache; use crate::cache::TypeCheckCache; use crate::colors; use crate::diagnostics::Diagnostics; @@ -22,7 +23,6 @@ use crate::version; use deno_ast::swc::bundler::Hook; use deno_ast::swc::bundler::ModuleRecord; use deno_ast::swc::common::Span; -use deno_ast::ParsedSource; use deno_core::error::AnyError; use deno_core::parking_lot::RwLock; use deno_core::serde::Deserialize; @@ -36,6 +36,8 @@ use deno_graph::MediaType; use deno_graph::ModuleGraphError; use deno_graph::ModuleKind; use deno_graph::ResolutionError; +use once_cell::sync::Lazy; +use regex::Regex; use std::fmt; use std::sync::Arc; @@ -206,17 +208,15 @@ fn get_tsc_roots( .into_iter() .filter_map(|(specifier, module_entry)| match module_entry { ModuleEntry::Module { - media_type, - ts_check, - .. - } => match &media_type { + media_type, code, .. + } => match media_type { MediaType::TypeScript | MediaType::Tsx | MediaType::Mts | MediaType::Cts | MediaType::Jsx => Some((specifier.clone(), *media_type)), MediaType::JavaScript | MediaType::Mjs | MediaType::Cjs - if check_js || *ts_check => + if check_js || has_ts_check(*media_type, code) => { Some((specifier.clone(), *media_type)) } @@ -238,21 +238,30 @@ pub fn get_source_hash(source_text: &str, emit_options_hash: u64) -> u64 { } pub fn emit_parsed_source( - cache: &EmitCache, + emit_cache: &EmitCache, + parsed_source_cache: &ParsedSourceCache, specifier: &ModuleSpecifier, - parsed_source: &ParsedSource, + media_type: MediaType, + source: &Arc, emit_options: &deno_ast::EmitOptions, emit_config_hash: u64, ) -> Result { - let source_hash = - get_source_hash(parsed_source.text_info().text_str(), emit_config_hash); + let source_hash = get_source_hash(source, emit_config_hash); - if let Some(emit_code) = cache.get_emit_code(specifier, Some(source_hash)) { + if let Some(emit_code) = + emit_cache.get_emit_code(specifier, Some(source_hash)) + { Ok(emit_code) } else { + // this will use a cached version if it exists + let parsed_source = parsed_source_cache.get_or_parse_module( + specifier, + source.clone(), + media_type, + )?; let transpiled_source = parsed_source.transpile(emit_options)?; debug_assert!(transpiled_source.source_map.is_none()); - cache.set_emit_code(specifier, source_hash, &transpiled_source.text); + emit_cache.set_emit_code(specifier, source_hash, &transpiled_source.text); Ok(transpiled_source.text) } } @@ -397,13 +406,11 @@ fn get_check_hash( let mut has_file_to_type_check = false; for (specifier, module_entry) in sorted_entries { if let ModuleEntry::Module { - code, - media_type, - ts_check, - .. + code, media_type, .. } = module_entry { - if *ts_check { + let ts_check = has_ts_check(*media_type, code); + if ts_check { has_file_to_type_check = true; } @@ -549,3 +556,128 @@ impl From for deno_ast::EmitOptions { } } } + +/// Matches the `@ts-check` pragma. +static TS_CHECK_RE: Lazy = + Lazy::new(|| Regex::new(r#"(?i)^\s*@ts-check(?:\s+|$)"#).unwrap()); + +fn has_ts_check(media_type: MediaType, file_text: &str) -> bool { + match &media_type { + MediaType::JavaScript + | MediaType::Mjs + | MediaType::Cjs + | MediaType::Jsx => get_leading_comments(file_text) + .iter() + .any(|text| TS_CHECK_RE.is_match(text)), + _ => false, + } +} + +fn get_leading_comments(file_text: &str) -> Vec { + let mut chars = file_text.chars().peekable(); + + // skip over the shebang + if file_text.starts_with("#!") { + // skip until the end of the line + for c in chars.by_ref() { + if c == '\n' { + break; + } + } + } + + let mut results = Vec::new(); + // now handle the comments + while chars.peek().is_some() { + // skip over any whitespace + while chars + .peek() + .map(|c| char::is_whitespace(*c)) + .unwrap_or(false) + { + chars.next(); + } + + if chars.next() != Some('/') { + break; + } + match chars.next() { + Some('/') => { + let mut text = String::new(); + for c in chars.by_ref() { + if c == '\n' { + break; + } else { + text.push(c); + } + } + results.push(text); + } + Some('*') => { + let mut text = String::new(); + while let Some(c) = chars.next() { + if c == '*' && chars.peek() == Some(&'/') { + chars.next(); + break; + } else { + text.push(c); + } + } + results.push(text); + } + _ => break, + } + } + results +} + +#[cfg(test)] +mod test { + use deno_ast::MediaType; + + use super::get_leading_comments; + use super::has_ts_check; + + #[test] + fn get_leading_comments_test() { + assert_eq!( + get_leading_comments( + "#!/usr/bin/env deno\r\n// test\n/* 1 *//*2*///3\n//\n /**/ /*4 */" + ), + vec![ + " test".to_string(), + " 1 ".to_string(), + "2".to_string(), + "3".to_string(), + "".to_string(), + "".to_string(), + "4 ".to_string(), + ] + ); + assert_eq!( + get_leading_comments("//1 /* */ \na;"), + vec!["1 /* */ ".to_string(),] + ); + assert_eq!(get_leading_comments("//"), vec!["".to_string()]); + } + + #[test] + fn has_ts_check_test() { + assert!(has_ts_check( + MediaType::JavaScript, + "// @ts-check\nconsole.log(5);" + )); + assert!(has_ts_check( + MediaType::JavaScript, + "// deno-lint-ignore\n// @ts-check\n" + )); + assert!(!has_ts_check( + MediaType::JavaScript, + "test;\n// @ts-check\n" + )); + assert!(!has_ts_check( + MediaType::JavaScript, + "// ts-check\nconsole.log(5);" + )); + } +} diff --git a/cli/graph_util.rs b/cli/graph_util.rs index 1c58fd9b6f..1f49c22a30 100644 --- a/cli/graph_util.rs +++ b/cli/graph_util.rs @@ -6,7 +6,6 @@ use crate::errors::get_error_class_name; use crate::npm::NpmPackageReference; use crate::npm::NpmPackageReq; -use deno_ast::ParsedSource; use deno_core::error::custom_error; use deno_core::error::AnyError; use deno_core::ModuleSpecifier; @@ -18,18 +17,12 @@ use deno_graph::ModuleGraphError; use deno_graph::ModuleKind; use deno_graph::Range; use deno_graph::Resolved; -use once_cell::sync::Lazy; -use regex::Regex; use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; use std::collections::VecDeque; use std::sync::Arc; -/// Matches the `@ts-check` pragma. -static TS_CHECK_RE: Lazy = - Lazy::new(|| Regex::new(r#"(?i)^\s*@ts-check(?:\s+|$)"#).unwrap()); - pub fn contains_specifier( v: &[(ModuleSpecifier, ModuleKind)], specifier: &ModuleSpecifier, @@ -42,11 +35,8 @@ pub fn contains_specifier( pub enum ModuleEntry { Module { code: Arc, - maybe_parsed_source: Option, dependencies: BTreeMap, media_type: MediaType, - /// Whether or not this is a JS/JSX module with a `@ts-check` directive. - ts_check: bool, /// A set of type libs that the module has passed a type check with this /// session. This would consist of window, worker or both. checked_libs: HashSet, @@ -83,11 +73,7 @@ impl GraphData { } } } - // TODO(nayeemrmn): Implement `Clone` on `GraphImport`. - self.graph_imports.push(GraphImport { - referrer: graph_import.referrer.clone(), - dependencies: graph_import.dependencies.clone(), - }); + self.graph_imports.push(graph_import.clone()) } for (specifier, result) in graph.specifiers() { @@ -139,24 +125,9 @@ impl GraphData { } } } - let ts_check = match &media_type { - MediaType::JavaScript - | MediaType::Mjs - | MediaType::Cjs - | MediaType::Jsx => { - let parsed_source = module.maybe_parsed_source.as_ref().unwrap(); - parsed_source - .get_leading_comments() - .iter() - .any(|c| TS_CHECK_RE.is_match(&c.text)) - } - _ => false, - }; let module_entry = ModuleEntry::Module { code, - maybe_parsed_source: module.maybe_parsed_source.clone(), dependencies: module.dependencies.clone(), - ts_check, media_type, checked_libs: Default::default(), maybe_types, @@ -305,15 +276,7 @@ impl GraphData { modules, npm_packages: self.npm_packages.clone(), referrer_map, - // TODO(nayeemrmn): Implement `Clone` on `GraphImport`. - graph_imports: self - .graph_imports - .iter() - .map(|i| GraphImport { - referrer: i.referrer.clone(), - dependencies: i.dependencies.clone(), - }) - .collect(), + graph_imports: self.graph_imports.to_vec(), cjs_esm_translations: Default::default(), }) } diff --git a/cli/lsp/documents.rs b/cli/lsp/documents.rs index ca771e6565..18c3710fb6 100644 --- a/cli/lsp/documents.rs +++ b/cli/lsp/documents.rs @@ -17,6 +17,7 @@ use crate::resolver::JsxResolver; use crate::text_encoding; use deno_ast::MediaType; +use deno_ast::ParsedSource; use deno_ast::SourceTextInfo; use deno_core::error::custom_error; use deno_core::error::AnyError; @@ -71,30 +72,6 @@ static TSX_HEADERS: Lazy> = Lazy::new(|| { .collect() }); -/// The default parser from `deno_graph` does not include the configuration -/// options we require here, and so implementing an empty struct that provides -/// the trait. -#[derive(Debug, Default)] -struct SourceParser {} - -impl deno_graph::SourceParser for SourceParser { - fn parse_module( - &self, - specifier: &ModuleSpecifier, - source: Arc, - media_type: MediaType, - ) -> Result { - deno_ast::parse_module(deno_ast::ParseParams { - specifier: specifier.to_string(), - text_info: SourceTextInfo::new(source), - media_type, - capture_tokens: true, - scope_analysis: true, - maybe_syntax: None, - }) - } -} - #[derive(Debug, Clone, PartialEq, Eq)] pub enum LanguageId { JavaScript, @@ -218,7 +195,7 @@ impl AssetOrDocument { pub fn maybe_parsed_source( &self, - ) -> Option> { + ) -> Option> { self.document().and_then(|d| d.maybe_parsed_source()) } @@ -231,6 +208,11 @@ impl AssetOrDocument { } } +type MaybeModuleResult = + Option>; +type MaybeParsedSourceResult = + Option>; + #[derive(Debug, Clone)] struct DocumentInner { /// contains the last-known-good set of dependencies from parsing the module @@ -239,9 +221,9 @@ struct DocumentInner { line_index: Arc, maybe_language_id: Option, maybe_lsp_version: Option, - maybe_module: - Option>, + maybe_module: MaybeModuleResult, maybe_navigation_tree: Option>, + maybe_parsed_source: MaybeParsedSourceResult, specifier: ModuleSpecifier, text_info: SourceTextInfo, } @@ -257,23 +239,21 @@ impl Document { content: Arc, maybe_resolver: Option<&dyn deno_graph::source::Resolver>, ) -> Self { - let parser = SourceParser::default(); // we only ever do `Document::new` on on disk resources that are supposed to // be diagnosable, unlike `Document::open`, so it is safe to unconditionally // parse the module. - let maybe_module = Some(deno_graph::parse_module( + let (maybe_module, maybe_parsed_source) = lsp_deno_graph_analyze( &specifier, - maybe_headers, content.clone(), - Some(&deno_graph::ModuleKind::Esm), + maybe_headers, maybe_resolver, - Some(&parser), - )); + ); let dependencies = if let Some(Ok(module)) = &maybe_module { Arc::new(module.dependencies.clone()) } else { Arc::new(BTreeMap::new()) }; + // todo(dsherret): retrieve this from the parsed source if it let text_info = SourceTextInfo::new(content); let line_index = Arc::new(LineIndex::new(text_info.text_str())); Self(Arc::new(DocumentInner { @@ -284,6 +264,7 @@ impl Document { maybe_lsp_version: None, maybe_module, maybe_navigation_tree: None, + maybe_parsed_source, text_info, specifier, })) @@ -297,18 +278,15 @@ impl Document { maybe_resolver: Option<&dyn deno_graph::source::Resolver>, ) -> Self { let maybe_headers = language_id.as_headers(); - let parser = SourceParser::default(); - let maybe_module = if language_id.is_diagnosable() { - Some(deno_graph::parse_module( + let (maybe_module, maybe_parsed_source) = if language_id.is_diagnosable() { + lsp_deno_graph_analyze( &specifier, - maybe_headers, content.clone(), - Some(&deno_graph::ModuleKind::Esm), + maybe_headers, maybe_resolver, - Some(&parser), - )) + ) } else { - None + (None, None) }; let dependencies = if let Some(Ok(module)) = &maybe_module { Arc::new(module.dependencies.clone()) @@ -325,6 +303,7 @@ impl Document { maybe_lsp_version: Some(version), maybe_module, maybe_navigation_tree: None, + maybe_parsed_source, text_info: source, specifier, })) @@ -353,7 +332,7 @@ impl Document { } } let content: Arc = content.into(); - let maybe_module = if self + let (maybe_module, maybe_parsed_source) = if self .0 .maybe_language_id .as_ref() @@ -365,17 +344,14 @@ impl Document { .maybe_language_id .as_ref() .and_then(|li| li.as_headers()); - let parser = SourceParser::default(); - Some(deno_graph::parse_module( + lsp_deno_graph_analyze( &self.0.specifier, - maybe_headers, content.clone(), - Some(&deno_graph::ModuleKind::Esm), + maybe_headers, maybe_resolver, - Some(&parser), - )) + ) } else { - None + (None, None) }; let dependencies = if let Some(Ok(module)) = &maybe_module { Arc::new(module.dependencies.clone()) @@ -393,6 +369,7 @@ impl Document { text_info, line_index, maybe_module, + maybe_parsed_source, maybe_lsp_version: Some(version), maybe_navigation_tree: None, ..(*self.0).clone() @@ -493,12 +470,8 @@ impl Document { pub fn maybe_parsed_source( &self, - ) -> Option> { - let module_result = self.maybe_module()?; - match module_result { - Ok(module) => Some(Ok(module.maybe_parsed_source.clone()?)), - Err(err) => Some(Err(err.clone())), - } + ) -> Option> { + self.0.maybe_parsed_source.clone() } pub fn maybe_navigation_tree(&self) -> Option> { @@ -1138,6 +1111,81 @@ impl Documents { } } +/// The default parser from `deno_graph` does not include the configuration +/// options we require for the lsp. +#[derive(Debug, Default)] +struct LspModuleParser; + +impl deno_graph::ModuleParser for LspModuleParser { + fn parse_module( + &self, + specifier: &deno_graph::ModuleSpecifier, + source: Arc, + media_type: MediaType, + ) -> deno_core::anyhow::Result { + deno_ast::parse_module(deno_ast::ParseParams { + specifier: specifier.to_string(), + text_info: SourceTextInfo::new(source), + media_type, + capture_tokens: true, + scope_analysis: true, + maybe_syntax: None, + }) + } +} + +fn lsp_deno_graph_analyze( + specifier: &ModuleSpecifier, + content: Arc, + maybe_headers: Option<&HashMap>, + maybe_resolver: Option<&dyn deno_graph::source::Resolver>, +) -> (MaybeModuleResult, MaybeParsedSourceResult) { + use deno_graph::ModuleParser; + + let analyzer = deno_graph::CapturingModuleAnalyzer::new( + Some(Box::new(LspModuleParser::default())), + None, + ); + let parsed_source_result = analyzer.parse_module( + specifier, + content.clone(), + get_media_type(specifier, maybe_headers), + ); + let module_result = match &parsed_source_result { + Ok(_) => deno_graph::parse_module( + specifier, + maybe_headers, + content, + Some(&deno_graph::ModuleKind::Esm), + maybe_resolver, + Some(&analyzer), + ), + Err(err) => Err(deno_graph::ModuleGraphError::ParseErr( + specifier.clone(), + err.clone(), + )), + }; + + (Some(module_result), Some(parsed_source_result)) +} + +// todo(dsherret): use `MediaType::from_specifier_and_headers` once +// https://github.com/denoland/deno_ast/pull/108 is merged +fn get_media_type( + specifier: &ModuleSpecifier, + maybe_headers: Option<&HashMap>, +) -> MediaType { + if let Some(headers) = maybe_headers { + if let Some(content_type) = headers.get("content-type") { + MediaType::from_content_type(specifier, content_type) + } else { + MediaType::from(specifier) + } + } else { + MediaType::from(specifier) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/cli/main.rs b/cli/main.rs index 8e53d1f0cf..d384033844 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -245,7 +245,8 @@ async fn compile_command( graph.valid().unwrap(); - let eszip = eszip::EszipV2::from_graph(graph, Default::default())?; + let store = ps.parsed_source_cache.as_store(); + let eszip = eszip::EszipV2::from_graph(graph, &*store, Default::default())?; info!( "{} {}", @@ -462,6 +463,7 @@ async fn create_graph_and_maybe_check( .as_ref() .map(|im| im.as_resolver()) }; + let analyzer = ps.parsed_source_cache.as_analyzer(); let graph = Arc::new( deno_graph::create_graph( vec![(root, deno_graph::ModuleKind::Esm)], @@ -470,7 +472,7 @@ async fn create_graph_and_maybe_check( &mut cache, maybe_resolver, maybe_locker, - None, + Some(&*analyzer), None, ) .await, @@ -556,7 +558,6 @@ async fn bundle_command( debug!(">>>>> bundle START"); let ps = ProcState::from_options(cli_options).await?; - let graph = create_graph_and_maybe_check(module_specifier, &ps, debug).await?; diff --git a/cli/module_loader.rs b/cli/module_loader.rs index 235c35fcd8..05dd5bd73b 100644 --- a/cli/module_loader.rs +++ b/cli/module_loader.rs @@ -67,10 +67,7 @@ impl CliModuleLoader { let found_url = graph_data.follow_redirect(specifier); match graph_data.get(&found_url) { Some(ModuleEntry::Module { - code, - media_type, - maybe_parsed_source, - .. + code, media_type, .. }) => { let code = match media_type { MediaType::JavaScript @@ -92,11 +89,12 @@ impl CliModuleLoader { | MediaType::Jsx | MediaType::Tsx => { // get emit text - let parsed_source = maybe_parsed_source.as_ref().unwrap(); // should always be set emit_parsed_source( &self.ps.emit_cache, + &self.ps.parsed_source_cache, &found_url, - parsed_source, + *media_type, + code, &self.ps.emit_options, self.ps.emit_options_hash, )? @@ -105,6 +103,10 @@ impl CliModuleLoader { panic!("Unexpected media type {} for {}", media_type, found_url) } }; + + // at this point, we no longer need the parsed source in memory, so free it + self.ps.parsed_source_cache.free(specifier); + Ok(ModuleCodeSource { code, found_url, diff --git a/cli/proc_state.rs b/cli/proc_state.rs index 0ffca1fa9c..6821e81835 100644 --- a/cli/proc_state.rs +++ b/cli/proc_state.rs @@ -7,6 +7,7 @@ use crate::args::TypeCheckMode; use crate::cache; use crate::cache::EmitCache; use crate::cache::FastInsecureHasher; +use crate::cache::ParsedSourceCache; use crate::cache::TypeCheckCache; use crate::compat; use crate::compat::NodeEsmResolver; @@ -82,6 +83,7 @@ pub struct Inner { pub broadcast_channel: InMemoryBroadcastChannel, pub shared_array_buffer_store: SharedArrayBufferStore, pub compiled_wasm_module_store: CompiledWasmModuleStore, + pub parsed_source_cache: ParsedSourceCache, maybe_resolver: Option>, maybe_file_watcher_reporter: Option, pub npm_resolver: GlobalNpmPackageResolver, @@ -217,6 +219,8 @@ impl ProcState { warn!("{}", ignored_options); } let emit_cache = EmitCache::new(dir.gen_cache.clone()); + let parsed_source_cache = + ParsedSourceCache::new(Some(dir.dep_analysis_db_file_path())); let npm_resolver = GlobalNpmPackageResolver::from_deno_dir( &dir, cli_options.reload_flag(), @@ -242,6 +246,7 @@ impl ProcState { broadcast_channel, shared_array_buffer_store, compiled_wasm_module_store, + parsed_source_cache, maybe_resolver, maybe_file_watcher_reporter, npm_resolver, @@ -367,6 +372,7 @@ impl ProcState { None }; + let analyzer = self.parsed_source_cache.as_analyzer(); let graph = create_graph( roots.clone(), is_dynamic, @@ -374,7 +380,7 @@ impl ProcState { &mut loader, maybe_resolver, maybe_locker, - None, + Some(&*analyzer), maybe_file_watcher_reporter, ) .await; @@ -589,14 +595,15 @@ impl ProcState { let graph_data = self.graph_data.read(); for (specifier, entry) in graph_data.entries() { if let ModuleEntry::Module { - maybe_parsed_source: Some(parsed_source), - .. + code, media_type, .. } = entry { emit_parsed_source( &self.emit_cache, + &self.parsed_source_cache, specifier, - parsed_source, + *media_type, + code, &self.emit_options, self.emit_options_hash, )?; @@ -630,6 +637,7 @@ impl ProcState { .as_ref() .map(|im| im.as_resolver()) }; + let analyzer = self.parsed_source_cache.as_analyzer(); let graph = create_graph( roots, @@ -638,7 +646,7 @@ impl ProcState { &mut cache, maybe_resolver, maybe_locker, - None, + Some(&*analyzer), None, ) .await; diff --git a/cli/tools/doc.rs b/cli/tools/doc.rs index ae0f37f1b7..aa22781317 100644 --- a/cli/tools/doc.rs +++ b/cli/tools/doc.rs @@ -25,21 +25,38 @@ pub async fn print_docs( let source_file = doc_flags .source_file .unwrap_or_else(|| "--builtin".to_string()); - let source_parser = deno_graph::DefaultSourceParser::new(); let mut doc_nodes = if source_file == "--builtin" { + // todo(dsherret): change this back to deno://lib.deno.d.ts once + // https://github.com/denoland/deno_ast/issues/109 is fixed let source_file_specifier = - ModuleSpecifier::parse("deno://lib.deno.d.ts").unwrap(); - let graph = ps - .create_graph(vec![(source_file_specifier.clone(), ModuleKind::Esm)]) - .await?; - let doc_parser = - doc::DocParser::new(graph, doc_flags.private, &source_parser); - doc_parser.parse_source( - &source_file_specifier, - MediaType::Dts, - get_types(ps.options.unstable()).into(), - )? + ModuleSpecifier::parse("deno://dts/lib.deno.d.ts").unwrap(); + let content = get_types(ps.options.unstable()); + let mut loader = deno_graph::source::MemoryLoader::new( + vec![( + source_file_specifier.to_string(), + deno_graph::source::Source::Module { + specifier: source_file_specifier.to_string(), + content, + maybe_headers: None, + }, + )], + Vec::new(), + ); + let analyzer = deno_graph::CapturingModuleAnalyzer::default(); + let graph = deno_graph::create_graph( + vec![(source_file_specifier.clone(), ModuleKind::Esm)], + false, + None, + &mut loader, + None, + None, + Some(&analyzer), + None, + ) + .await; + let doc_parser = doc::DocParser::new(graph, doc_flags.private, &analyzer); + doc_parser.parse_module(&source_file_specifier)?.definitions } else { let module_specifier = resolve_url_or_path(&source_file)?; @@ -61,8 +78,8 @@ pub async fn print_docs( let graph = ps .create_graph(vec![(root_specifier.clone(), ModuleKind::Esm)]) .await?; - let doc_parser = - doc::DocParser::new(graph, doc_flags.private, &source_parser); + let store = ps.parsed_source_cache.as_store(); + let doc_parser = doc::DocParser::new(graph, doc_flags.private, &*store); doc_parser.parse_with_reexports(&root_specifier)? }; diff --git a/cli/tools/vendor/build.rs b/cli/tools/vendor/build.rs index f7921251eb..33f5227b56 100644 --- a/cli/tools/vendor/build.rs +++ b/cli/tools/vendor/build.rs @@ -14,6 +14,8 @@ use deno_graph::ModuleKind; use import_map::ImportMap; use import_map::SpecifierMap; +use crate::cache::ParsedSourceCache; + use super::analyze::has_default_export; use super::import_map::build_import_map; use super::mappings::Mappings; @@ -52,6 +54,7 @@ impl VendorEnvironment for RealVendorEnvironment { /// Vendors remote modules and returns how many were vendored. pub fn build( graph: ModuleGraph, + parsed_source_cache: &ParsedSourceCache, output_dir: &Path, original_import_map: Option<&ImportMap>, environment: &impl VendorEnvironment, @@ -110,7 +113,8 @@ pub fn build( for (specifier, proxied_module) in mappings.proxied_modules() { let proxy_path = mappings.local_path(specifier); let module = graph.get(specifier).unwrap(); - let text = build_proxy_module_source(module, proxied_module); + let text = + build_proxy_module_source(module, proxied_module, parsed_source_cache)?; environment.write_file(&proxy_path, &text)?; } @@ -124,7 +128,8 @@ pub fn build( &all_modules, &mappings, original_import_map, - ); + parsed_source_cache, + )?; environment.write_file(&import_map_path, &import_map_text)?; } @@ -171,7 +176,8 @@ fn validate_original_import_map( fn build_proxy_module_source( module: &Module, proxied_module: &ProxiedModule, -) -> String { + parsed_source_cache: &ParsedSourceCache, +) -> Result { let mut text = String::new(); writeln!( text, @@ -194,8 +200,10 @@ fn build_proxy_module_source( writeln!(text, "export * from \"{}\";", relative_specifier).unwrap(); // add a default export if one exists in the module - if let Some(parsed_source) = module.maybe_parsed_source.as_ref() { - if has_default_export(parsed_source) { + if let Some(parsed_source) = + parsed_source_cache.get_parsed_source_from_module(module)? + { + if has_default_export(&parsed_source) { writeln!( text, "export {{ default }} from \"{}\";", @@ -205,7 +213,7 @@ fn build_proxy_module_source( } } - text + Ok(text) } #[cfg(test)] diff --git a/cli/tools/vendor/import_map.rs b/cli/tools/vendor/import_map.rs index 7e72693a1e..cc759b8595 100644 --- a/cli/tools/vendor/import_map.rs +++ b/cli/tools/vendor/import_map.rs @@ -3,6 +3,7 @@ use deno_ast::LineAndColumnIndex; use deno_ast::ModuleSpecifier; use deno_ast::SourceTextInfo; +use deno_core::error::AnyError; use deno_graph::Module; use deno_graph::ModuleGraph; use deno_graph::Position; @@ -13,6 +14,8 @@ use import_map::SpecifierMap; use indexmap::IndexMap; use log::warn; +use crate::cache::ParsedSourceCache; + use super::mappings::Mappings; use super::specifiers::is_remote_specifier; use super::specifiers::is_remote_specifier_text; @@ -179,9 +182,10 @@ pub fn build_import_map( modules: &[&Module], mappings: &Mappings, original_import_map: Option<&ImportMap>, -) -> String { + parsed_source_cache: &ParsedSourceCache, +) -> Result { let mut builder = ImportMapBuilder::new(base_dir, mappings); - visit_modules(graph, modules, mappings, &mut builder); + visit_modules(graph, modules, mappings, &mut builder, parsed_source_cache)?; for base_specifier in mappings.base_specifiers() { builder @@ -189,7 +193,7 @@ pub fn build_import_map( .add(base_specifier.to_string(), base_specifier); } - builder.into_import_map(original_import_map).to_json() + Ok(builder.into_import_map(original_import_map).to_json()) } fn visit_modules( @@ -197,12 +201,14 @@ fn visit_modules( modules: &[&Module], mappings: &Mappings, import_map: &mut ImportMapBuilder, -) { + parsed_source_cache: &ParsedSourceCache, +) -> Result<(), AnyError> { for module in modules { - let text_info = match &module.maybe_parsed_source { - Some(source) => source.text_info(), - None => continue, - }; + let text_info = + match parsed_source_cache.get_parsed_source_from_module(module)? { + Some(source) => source.text_info().clone(), + None => continue, + }; let source_text = match &module.maybe_source { Some(source) => source, None => continue, @@ -215,7 +221,7 @@ fn visit_modules( import_map, &module.specifier, mappings, - text_info, + &text_info, source_text, ); visit_maybe_resolved( @@ -224,7 +230,7 @@ fn visit_modules( import_map, &module.specifier, mappings, - text_info, + &text_info, source_text, ); } @@ -236,11 +242,13 @@ fn visit_modules( import_map, &module.specifier, mappings, - text_info, + &text_info, source_text, ); } } + + Ok(()) } fn visit_maybe_resolved( diff --git a/cli/tools/vendor/mod.rs b/cli/tools/vendor/mod.rs index a81965a153..05fee531a5 100644 --- a/cli/tools/vendor/mod.rs +++ b/cli/tools/vendor/mod.rs @@ -45,6 +45,7 @@ pub async fn vendor( let graph = create_graph(&ps, &vendor_flags).await?; let vendored_count = build::build( graph, + &ps.parsed_source_cache, &output_dir, ps.maybe_import_map.as_deref(), &build::RealVendorEnvironment, diff --git a/cli/tools/vendor/test.rs b/cli/tools/vendor/test.rs index 836fb579bd..036dd66637 100644 --- a/cli/tools/vendor/test.rs +++ b/cli/tools/vendor/test.rs @@ -20,6 +20,7 @@ use deno_graph::ModuleGraph; use deno_graph::ModuleKind; use import_map::ImportMap; +use crate::cache::ParsedSourceCache; use crate::resolver::ImportMapResolver; use super::build::VendorEnvironment; @@ -219,11 +220,18 @@ impl VendorTestBuilder { .map(|s| (s.to_owned(), deno_graph::ModuleKind::Esm)) .collect(); let loader = self.loader.clone(); - let graph = - build_test_graph(roots, self.original_import_map.clone(), loader.clone()) - .await; + let parsed_source_cache = ParsedSourceCache::new(None); + let analyzer = parsed_source_cache.as_analyzer(); + let graph = build_test_graph( + roots, + self.original_import_map.clone(), + loader.clone(), + &*analyzer, + ) + .await; super::build::build( graph, + &parsed_source_cache, &output_dir, self.original_import_map.as_ref(), &self.environment, @@ -254,6 +262,7 @@ async fn build_test_graph( roots: Vec<(ModuleSpecifier, ModuleKind)>, original_import_map: Option, mut loader: TestLoader, + analyzer: &dyn deno_graph::ModuleAnalyzer, ) -> ModuleGraph { let resolver = original_import_map.map(|m| ImportMapResolver::new(Arc::new(m))); @@ -264,7 +273,7 @@ async fn build_test_graph( &mut loader, resolver.as_ref().map(|im| im.as_resolver()), None, - None, + Some(analyzer), None, ) .await diff --git a/test_util/src/pty.rs b/test_util/src/pty.rs index f69fc8b310..83a27926bc 100644 --- a/test_util/src/pty.rs +++ b/test_util/src/pty.rs @@ -82,7 +82,7 @@ pub fn create_pty( let pty = windows::WinPseudoConsole::new( program, args, - &cwd.as_ref().to_string_lossy().to_string(), + &cwd.as_ref().to_string_lossy(), env_vars, ); Box::new(pty) @@ -338,14 +338,15 @@ mod windows { } } + // SAFETY: These handles are ok to send across threads. unsafe impl Send for WinHandle {} + // SAFETY: These handles are ok to send across threads. unsafe impl Sync for WinHandle {} impl Drop for WinHandle { fn drop(&mut self) { if !self.inner.is_null() && self.inner != INVALID_HANDLE_VALUE { - // SAFETY: - // winapi call + // SAFETY: winapi call unsafe { winapi::um::handleapi::CloseHandle(self.inner); }