From 52ad1ef154d352529c4ad4857ab82d8478aeb105 Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Wed, 24 Jan 2024 22:24:52 +0100 Subject: [PATCH] feat(publish): give diagnostic on invalid package files (#22082) --- cli/diagnostics.rs | 46 ++-- cli/tests/integration/publish_tests.rs | 34 ++- cli/tests/testdata/publish/invalid_path.out | 11 + .../testdata/publish/invalid_path/deno.json | 7 + .../testdata/publish/invalid_path/mod.ts | 3 + .../publish/invalid_path/path with spaces.txt | 0 cli/tests/testdata/publish/symlink.out | 11 + cli/tests/testdata/publish/symlink/deno.json | 7 + cli/tests/testdata/publish/symlink/mod.ts | 3 + cli/tests/testdata/publish/symlink/symlink | 1 + cli/tools/doc.rs | 2 +- cli/tools/lint.rs | 2 +- cli/tools/registry/diagnostics.rs | 68 +++++- cli/tools/registry/mod.rs | 3 +- cli/tools/registry/paths.rs | 198 ++++++++++++++++++ cli/tools/registry/tar.rs | 131 ++++++++---- 16 files changed, 441 insertions(+), 86 deletions(-) create mode 100644 cli/tests/testdata/publish/invalid_path.out create mode 100644 cli/tests/testdata/publish/invalid_path/deno.json create mode 100644 cli/tests/testdata/publish/invalid_path/mod.ts create mode 100644 cli/tests/testdata/publish/invalid_path/path with spaces.txt create mode 100644 cli/tests/testdata/publish/symlink.out create mode 100644 cli/tests/testdata/publish/symlink/deno.json create mode 100644 cli/tests/testdata/publish/symlink/mod.ts create mode 120000 cli/tests/testdata/publish/symlink/symlink create mode 100644 cli/tools/registry/paths.rs diff --git a/cli/diagnostics.rs b/cli/diagnostics.rs index eb8a0de60a..015dea1787 100644 --- a/cli/diagnostics.rs +++ b/cli/diagnostics.rs @@ -4,6 +4,7 @@ use std::borrow::Cow; use std::fmt; use std::fmt::Display; use std::fmt::Write as _; +use std::path::PathBuf; use deno_ast::ModuleSpecifier; use deno_ast::SourcePos; @@ -61,17 +62,19 @@ impl DiagnosticSourcePos { #[derive(Clone, Debug)] pub enum DiagnosticLocation<'a> { - /// The diagnostic is relevant to an entire file. - File { + /// The diagnostic is relevant to a specific path. + Path { path: PathBuf }, + /// The diagnostic is relevant to an entire module. + Module { /// The specifier of the module that contains the diagnostic. specifier: Cow<'a, ModuleSpecifier>, }, - /// The diagnostic is relevant to a specific position in a file. + /// The diagnostic is relevant to a specific position in a module. /// /// This variant will get the relevant `SouceTextInfo` from the cache using /// the given specifier, and will then calculate the line and column numbers /// from the given `SourcePos`. - PositionInFile { + ModulePosition { /// The specifier of the module that contains the diagnostic. specifier: Cow<'a, ModuleSpecifier>, /// The source position of the diagnostic. @@ -80,13 +83,6 @@ pub enum DiagnosticLocation<'a> { } impl<'a> DiagnosticLocation<'a> { - fn specifier(&self) -> &ModuleSpecifier { - match self { - DiagnosticLocation::File { specifier } => specifier, - DiagnosticLocation::PositionInFile { specifier, .. } => specifier, - } - } - /// Return the line and column number of the diagnostic. /// /// The line number is 1-indexed. @@ -97,8 +93,9 @@ impl<'a> DiagnosticLocation<'a> { /// everyone uses VS Code. :) fn position(&self, sources: &dyn SourceTextStore) -> Option<(usize, usize)> { match self { - DiagnosticLocation::File { .. } => None, - DiagnosticLocation::PositionInFile { + DiagnosticLocation::Path { .. } => None, + DiagnosticLocation::Module { .. } => None, + DiagnosticLocation::ModulePosition { specifier, source_pos, } => { @@ -384,7 +381,7 @@ fn print_diagnostic( write!( io, "{}", - colors::yellow(format_args!("warning[{}]", diagnostic.code())) + colors::yellow_bold(format_args!("warning[{}]", diagnostic.code())) )?; } } @@ -410,11 +407,18 @@ fn print_diagnostic( RepeatingCharFmt(' ', max_line_number_digits as usize), colors::intense_blue("-->"), )?; - let location_specifier = location.specifier(); - if let Ok(path) = location_specifier.to_file_path() { - write!(io, " {}", colors::cyan(path.display()))?; - } else { - write!(io, " {}", colors::cyan(location_specifier.as_str()))?; + match &location { + DiagnosticLocation::Path { path } => { + write!(io, " {}", colors::cyan(path.display()))?; + } + DiagnosticLocation::Module { specifier } + | DiagnosticLocation::ModulePosition { specifier, .. } => { + if let Ok(path) = specifier.to_file_path() { + write!(io, " {}", colors::cyan(path.display()))?; + } else { + write!(io, " {}", colors::cyan(specifier.as_str()))?; + } + } } if let Some((line, column)) = location.position(sources) { write!( @@ -614,7 +618,7 @@ mod tests { specifier: specifier.clone(), text_info, }; - let location = super::DiagnosticLocation::PositionInFile { + let location = super::DiagnosticLocation::ModulePosition { specifier: Cow::Borrowed(&specifier), source_pos: super::DiagnosticSourcePos::SourcePos(pos), }; @@ -631,7 +635,7 @@ mod tests { specifier: specifier.clone(), text_info, }; - let location = super::DiagnosticLocation::PositionInFile { + let location = super::DiagnosticLocation::ModulePosition { specifier: Cow::Borrowed(&specifier), source_pos: super::DiagnosticSourcePos::SourcePos(pos), }; diff --git a/cli/tests/integration/publish_tests.rs b/cli/tests/integration/publish_tests.rs index 92c65681a2..0e917eab97 100644 --- a/cli/tests/integration/publish_tests.rs +++ b/cli/tests/integration/publish_tests.rs @@ -25,101 +25,95 @@ itest!(missing_deno_json { args: "publish --token 'sadfasdf'", output: "publish/missing_deno_json.out", cwd: Some("publish/missing_deno_json"), - copy_temp_dir: Some("publish/missing_deno_json"), exit_code: 1, - temp_cwd: true, }); itest!(invalid_fast_check { args: "publish --token 'sadfasdf'", output: "publish/invalid_fast_check.out", cwd: Some("publish/invalid_fast_check"), - copy_temp_dir: Some("publish/invalid_fast_check"), exit_code: 1, - temp_cwd: true, +}); + +itest!(invalid_path { + args: "publish --token 'sadfasdf'", + output: "publish/invalid_path.out", + cwd: Some("publish/invalid_path"), + exit_code: 1, +}); + +itest!(symlink { + args: "publish --token 'sadfasdf' --dry-run", + output: "publish/symlink.out", + cwd: Some("publish/symlink"), + exit_code: 0, }); itest!(javascript_missing_decl_file { args: "publish --token 'sadfasdf'", output: "publish/javascript_missing_decl_file.out", cwd: Some("publish/javascript_missing_decl_file"), - copy_temp_dir: Some("publish/javascript_missing_decl_file"), envs: env_vars_for_registry(), exit_code: 0, http_server: true, - temp_cwd: true, }); itest!(unanalyzable_dynamic_import { args: "publish --token 'sadfasdf'", output: "publish/unanalyzable_dynamic_import.out", cwd: Some("publish/unanalyzable_dynamic_import"), - copy_temp_dir: Some("publish/unanalyzable_dynamic_import"), envs: env_vars_for_registry(), exit_code: 0, http_server: true, - temp_cwd: true, }); itest!(javascript_decl_file { args: "publish --token 'sadfasdf'", output: "publish/javascript_decl_file.out", cwd: Some("publish/javascript_decl_file"), - copy_temp_dir: Some("publish/javascript_decl_file"), envs: env_vars_for_registry(), http_server: true, exit_code: 0, - temp_cwd: true, }); itest!(successful { args: "publish --token 'sadfasdf'", output: "publish/successful.out", cwd: Some("publish/successful"), - copy_temp_dir: Some("publish/successful"), envs: env_vars_for_registry(), http_server: true, - temp_cwd: true, }); itest!(config_file_jsonc { args: "publish --token 'sadfasdf'", output: "publish/deno_jsonc.out", cwd: Some("publish/deno_jsonc"), - copy_temp_dir: Some("publish/deno_jsonc"), envs: env_vars_for_registry(), http_server: true, - temp_cwd: true, }); itest!(workspace_all { args: "publish --token 'sadfasdf'", output: "publish/workspace.out", cwd: Some("publish/workspace"), - copy_temp_dir: Some("publish/workspace"), envs: env_vars_for_registry(), http_server: true, - temp_cwd: true, }); itest!(workspace_individual { args: "publish --token 'sadfasdf'", output: "publish/workspace_individual.out", cwd: Some("publish/workspace/bar"), - copy_temp_dir: Some("publish/workspace"), envs: env_vars_for_registry(), http_server: true, - temp_cwd: true, }); itest!(dry_run { args: "publish --token 'sadfasdf' --dry-run", cwd: Some("publish/successful"), - copy_temp_dir: Some("publish/successful"), output: "publish/dry_run.out", envs: env_vars_for_registry(), http_server: true, - temp_cwd: true, }); #[test] diff --git a/cli/tests/testdata/publish/invalid_path.out b/cli/tests/testdata/publish/invalid_path.out new file mode 100644 index 0000000000..cd3e92e0cc --- /dev/null +++ b/cli/tests/testdata/publish/invalid_path.out @@ -0,0 +1,11 @@ +Checking fast check type graph for errors... +Ensuring type checks... +Check file://[WILDCARD]mod.ts +error[invalid-path]: package path must not contain whitespace (found ' ') + --> [WILDCARD]path with spaces.txt + = hint: rename or remove the file, or add it to 'publish.exclude' in the config file + + info: to portably support all platforms, including windows, the allowed characters in package paths are limited + docs: https://jsr.io/go/invalid-path + +error: Found 1 problem diff --git a/cli/tests/testdata/publish/invalid_path/deno.json b/cli/tests/testdata/publish/invalid_path/deno.json new file mode 100644 index 0000000000..213a7cec63 --- /dev/null +++ b/cli/tests/testdata/publish/invalid_path/deno.json @@ -0,0 +1,7 @@ +{ + "name": "@foo/bar", + "version": "1.0.0", + "exports": { + ".": "./mod.ts" + } +} diff --git a/cli/tests/testdata/publish/invalid_path/mod.ts b/cli/tests/testdata/publish/invalid_path/mod.ts new file mode 100644 index 0000000000..9e217d9b01 --- /dev/null +++ b/cli/tests/testdata/publish/invalid_path/mod.ts @@ -0,0 +1,3 @@ +export function foobar(): string { + return "string"; +} diff --git a/cli/tests/testdata/publish/invalid_path/path with spaces.txt b/cli/tests/testdata/publish/invalid_path/path with spaces.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cli/tests/testdata/publish/symlink.out b/cli/tests/testdata/publish/symlink.out new file mode 100644 index 0000000000..5befec4f94 --- /dev/null +++ b/cli/tests/testdata/publish/symlink.out @@ -0,0 +1,11 @@ +Checking fast check type graph for errors... +Ensuring type checks... +Check [WILDCARD]mod.ts +warning[unsupported-file-type]: unsupported file type 'symlink' + --> [WILDCARD]symlink + = hint: remove the file, or add it to 'publish.exclude' in the config file + + info: only files and directories are supported + info: the file was ignored and will not be published + +Warning Aborting due to --dry-run diff --git a/cli/tests/testdata/publish/symlink/deno.json b/cli/tests/testdata/publish/symlink/deno.json new file mode 100644 index 0000000000..213a7cec63 --- /dev/null +++ b/cli/tests/testdata/publish/symlink/deno.json @@ -0,0 +1,7 @@ +{ + "name": "@foo/bar", + "version": "1.0.0", + "exports": { + ".": "./mod.ts" + } +} diff --git a/cli/tests/testdata/publish/symlink/mod.ts b/cli/tests/testdata/publish/symlink/mod.ts new file mode 100644 index 0000000000..9e217d9b01 --- /dev/null +++ b/cli/tests/testdata/publish/symlink/mod.ts @@ -0,0 +1,3 @@ +export function foobar(): string { + return "string"; +} diff --git a/cli/tests/testdata/publish/symlink/symlink b/cli/tests/testdata/publish/symlink/symlink new file mode 120000 index 0000000000..0df9bcd04e --- /dev/null +++ b/cli/tests/testdata/publish/symlink/symlink @@ -0,0 +1 @@ +./mod.ts \ No newline at end of file diff --git a/cli/tools/doc.rs b/cli/tools/doc.rs index 2cb9ddfbae..f14d6f5f57 100644 --- a/cli/tools/doc.rs +++ b/cli/tools/doc.rs @@ -339,7 +339,7 @@ impl Diagnostic for DocDiagnostic { fn location(&self) -> DiagnosticLocation { let specifier = Url::parse(&self.location.filename).unwrap(); - DiagnosticLocation::PositionInFile { + DiagnosticLocation::ModulePosition { specifier: Cow::Owned(specifier), source_pos: DiagnosticSourcePos::ByteIndex(self.location.byte_index), } diff --git a/cli/tools/lint.rs b/cli/tools/lint.rs index 8de8160de8..e9f84fd77d 100644 --- a/cli/tools/lint.rs +++ b/cli/tools/lint.rs @@ -374,7 +374,7 @@ impl Diagnostic for LintDiagnostic { fn location(&self) -> DiagnosticLocation { let specifier = url::Url::from_file_path(&self.filename).unwrap(); - DiagnosticLocation::PositionInFile { + DiagnosticLocation::ModulePosition { specifier: Cow::Owned(specifier), source_pos: DiagnosticSourcePos::ByteIndex(self.range.start.byte_index), } diff --git a/cli/tools/registry/diagnostics.rs b/cli/tools/registry/diagnostics.rs index 0a847c46b1..45090aa2ce 100644 --- a/cli/tools/registry/diagnostics.rs +++ b/cli/tools/registry/diagnostics.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::fmt::Display; +use std::path::PathBuf; use std::sync::Arc; use std::sync::Mutex; @@ -10,6 +11,7 @@ use deno_core::anyhow::anyhow; use deno_core::error::AnyError; use deno_graph::FastCheckDiagnostic; use deno_graph::ParsedSourceStore; +use lsp_types::Url; use crate::diagnostics::Diagnostic; use crate::diagnostics::DiagnosticLevel; @@ -61,6 +63,9 @@ impl PublishDiagnosticsCollector { pub enum PublishDiagnostic { FastCheck(FastCheckDiagnostic), ImportMapUnfurl(ImportMapUnfurlDiagnostic), + InvalidPath { path: PathBuf, message: String }, + DuplicatePath { path: PathBuf }, + UnsupportedFileType { specifier: Url, kind: String }, } impl Diagnostic for PublishDiagnostic { @@ -71,6 +76,9 @@ impl Diagnostic for PublishDiagnostic { ) => DiagnosticLevel::Warning, PublishDiagnostic::FastCheck(_) => DiagnosticLevel::Error, PublishDiagnostic::ImportMapUnfurl(_) => DiagnosticLevel::Warning, + PublishDiagnostic::InvalidPath { .. } => DiagnosticLevel::Error, + PublishDiagnostic::DuplicatePath { .. } => DiagnosticLevel::Error, + PublishDiagnostic::UnsupportedFileType { .. } => DiagnosticLevel::Warning, } } @@ -78,6 +86,11 @@ impl Diagnostic for PublishDiagnostic { match &self { PublishDiagnostic::FastCheck(diagnostic) => diagnostic.code(), PublishDiagnostic::ImportMapUnfurl(diagnostic) => diagnostic.code(), + PublishDiagnostic::InvalidPath { .. } => "invalid-path", + PublishDiagnostic::DuplicatePath { .. } => { + "case-insensitive-duplicate-path" + } + PublishDiagnostic::UnsupportedFileType { .. } => "unsupported-file-type", } } @@ -89,17 +102,26 @@ impl Diagnostic for PublishDiagnostic { PublishDiagnostic::ImportMapUnfurl(diagnostic) => { Cow::Borrowed(diagnostic.message()) } + PublishDiagnostic::InvalidPath { message, .. } => { + Cow::Borrowed(message.as_str()) + } + PublishDiagnostic::DuplicatePath { .. } => { + Cow::Borrowed("package path is a case insensitive duplicate of another path in the package") + } + PublishDiagnostic::UnsupportedFileType { kind, .. } => { + Cow::Owned(format!("unsupported file type '{kind}'",)) + } } } fn location(&self) -> DiagnosticLocation { match &self { PublishDiagnostic::FastCheck(diagnostic) => match diagnostic.range() { - Some(range) => DiagnosticLocation::PositionInFile { + Some(range) => DiagnosticLocation::ModulePosition { specifier: Cow::Borrowed(diagnostic.specifier()), source_pos: DiagnosticSourcePos::SourcePos(range.range.start), }, - None => DiagnosticLocation::File { + None => DiagnosticLocation::Module { specifier: Cow::Borrowed(diagnostic.specifier()), }, }, @@ -107,11 +129,22 @@ impl Diagnostic for PublishDiagnostic { ImportMapUnfurlDiagnostic::UnanalyzableDynamicImport { specifier, range, - } => DiagnosticLocation::PositionInFile { + } => DiagnosticLocation::ModulePosition { specifier: Cow::Borrowed(specifier), source_pos: DiagnosticSourcePos::SourcePos(range.start), }, }, + PublishDiagnostic::InvalidPath { path, .. } => { + DiagnosticLocation::Path { path: path.clone() } + } + PublishDiagnostic::DuplicatePath { path, .. } => { + DiagnosticLocation::Path { path: path.clone() } + } + PublishDiagnostic::UnsupportedFileType { specifier, .. } => { + DiagnosticLocation::Module { + specifier: Cow::Borrowed(specifier), + } + } } } @@ -148,6 +181,9 @@ impl Diagnostic for PublishDiagnostic { }, }), }, + PublishDiagnostic::InvalidPath { .. } => None, + PublishDiagnostic::DuplicatePath { .. } => None, + PublishDiagnostic::UnsupportedFileType { .. } => None, } } @@ -155,6 +191,15 @@ impl Diagnostic for PublishDiagnostic { match &self { PublishDiagnostic::FastCheck(diagnostic) => Some(diagnostic.fix_hint()), PublishDiagnostic::ImportMapUnfurl(_) => None, + PublishDiagnostic::InvalidPath { .. } => Some( + "rename or remove the file, or add it to 'publish.exclude' in the config file", + ), + PublishDiagnostic::DuplicatePath { .. } => Some( + "rename or remove the file", + ), + PublishDiagnostic::UnsupportedFileType { .. } => Some( + "remove the file, or add it to 'publish.exclude' in the config file", + ), } } @@ -179,6 +224,16 @@ impl Diagnostic for PublishDiagnostic { Cow::Borrowed("make sure the dynamic import is resolvable at runtime without an import map") ]), }, + PublishDiagnostic::InvalidPath { .. } => Cow::Borrowed(&[ + Cow::Borrowed("to portably support all platforms, including windows, the allowed characters in package paths are limited"), + ]), + PublishDiagnostic::DuplicatePath { .. } => Cow::Borrowed(&[ + Cow::Borrowed("to support case insensitive file systems, no two package paths may differ only by case"), + ]), + PublishDiagnostic::UnsupportedFileType { .. } => Cow::Borrowed(&[ + Cow::Borrowed("only files and directories are supported"), + Cow::Borrowed("the file was ignored and will not be published") + ]), } } @@ -190,6 +245,13 @@ impl Diagnostic for PublishDiagnostic { PublishDiagnostic::ImportMapUnfurl(diagnostic) => match diagnostic { ImportMapUnfurlDiagnostic::UnanalyzableDynamicImport { .. } => None, }, + PublishDiagnostic::InvalidPath { .. } => { + Some("https://jsr.io/go/invalid-path".to_owned()) + } + PublishDiagnostic::DuplicatePath { .. } => { + Some("https://jsr.io/go/case-insensitive-duplicate-path".to_owned()) + } + PublishDiagnostic::UnsupportedFileType { .. } => None, } } } diff --git a/cli/tools/registry/mod.rs b/cli/tools/registry/mod.rs index 990f910a83..1c5344d271 100644 --- a/cli/tools/registry/mod.rs +++ b/cli/tools/registry/mod.rs @@ -44,6 +44,7 @@ mod api; mod auth; mod diagnostics; mod graph; +mod paths; mod publish_order; mod tar; @@ -474,7 +475,7 @@ async fn perform_publish( log::debug!( " Tarball file {} {}", human_size(file.size as f64), - file.path.display() + file.specifier ); } } diff --git a/cli/tools/registry/paths.rs b/cli/tools/registry/paths.rs new file mode 100644 index 0000000000..86c04a7cb5 --- /dev/null +++ b/cli/tools/registry/paths.rs @@ -0,0 +1,198 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +// Validation logic in this file is shared with registry/api/src/ids.rs + +use thiserror::Error; + +/// A package path, like '/foo' or '/foo/bar'. The path is prefixed with a slash +/// and does not end with a slash. +/// +/// The path must not contain any double slashes, dot segments, or dot dot +/// segments. +/// +/// The path must be less than 160 characters long, including the slash prefix. +/// +/// The path must not contain any windows reserved characters, like CON, PRN, +/// AUX, NUL, or COM1. +/// +/// The path must not contain any windows path separators, like backslash or +/// colon. +/// +/// The path must only contain ascii alphanumeric characters, and the characters +/// '$', '(', ')', '+', '-', '.', '@', '[', ']', '_', '{', '}', '~'. +/// +/// Path's are case sensitive, but comparisons and hashing are case insensitive. +/// This matches the behaviour of the Windows FS APIs. +#[derive(Clone, Default)] +pub struct PackagePath { + path: String, + lower: Option, +} + +impl PartialEq for PackagePath { + fn eq(&self, other: &Self) -> bool { + let self_lower = self.lower.as_ref().unwrap_or(&self.path); + let other_lower = other.lower.as_ref().unwrap_or(&other.path); + self_lower == other_lower + } +} + +impl Eq for PackagePath {} + +impl std::hash::Hash for PackagePath { + fn hash(&self, state: &mut H) { + let lower = self.lower.as_ref().unwrap_or(&self.path); + lower.hash(state); + } +} + +impl PackagePath { + pub fn new(path: String) -> Result { + let len = path.len(); + if len > 160 { + return Err(PackagePathValidationError::TooLong(len)); + } + + if len == 0 { + return Err(PackagePathValidationError::MissingPrefix); + } + + let mut components = path.split('/').peekable(); + let Some("") = components.next() else { + return Err(PackagePathValidationError::MissingPrefix); + }; + + let mut has_upper = false; + let mut valid_char_mapper = |c: char| { + if c.is_ascii_uppercase() { + has_upper = true; + } + valid_char(c) + }; + while let Some(component) = components.next() { + if component.is_empty() { + if components.peek().is_none() { + return Err(PackagePathValidationError::TrailingSlash); + } + return Err(PackagePathValidationError::EmptyComponent); + } + + if component == "." || component == ".." { + return Err(PackagePathValidationError::DotSegment); + } + + if let Some(err) = component.chars().find_map(&mut valid_char_mapper) { + return Err(err); + } + + let basename = match component.rsplit_once('.') { + Some((_, "")) => { + return Err(PackagePathValidationError::TrailingDot( + component.to_owned(), + )); + } + Some((basename, _)) => basename, + None => component, + }; + + let lower_basename = basename.to_ascii_lowercase(); + if WINDOWS_RESERVED_NAMES + .binary_search(&&*lower_basename) + .is_ok() + { + return Err(PackagePathValidationError::ReservedName( + component.to_owned(), + )); + } + } + + let lower = has_upper.then(|| path.to_ascii_lowercase()); + + Ok(Self { path, lower }) + } +} + +const WINDOWS_RESERVED_NAMES: [&str; 22] = [ + "aux", "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", + "com9", "con", "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", + "lpt8", "lpt9", "nul", "prn", +]; + +fn valid_char(c: char) -> Option { + match c { + 'a'..='z' + | 'A'..='Z' + | '0'..='9' + | '$' + | '(' + | ')' + | '+' + | '-' + | '.' + | '@' + | '[' + | ']' + | '_' + | '{' + | '}' + | '~' => None, + // informative error messages for some invalid characters + '\\' | ':' => Some( + PackagePathValidationError::InvalidWindowsPathSeparatorChar(c), + ), + '<' | '>' | '"' | '|' | '?' | '*' => { + Some(PackagePathValidationError::InvalidWindowsChar(c)) + } + ' ' | '\t' | '\n' | '\r' => { + Some(PackagePathValidationError::InvalidWhitespace(c)) + } + '%' | '#' => Some(PackagePathValidationError::InvalidSpecialUrlChar(c)), + // other invalid characters + c => Some(PackagePathValidationError::InvalidOtherChar(c)), + } +} + +#[derive(Debug, Clone, Error)] +pub enum PackagePathValidationError { + #[error("package path must be at most 160 characters long, but is {0} characters long")] + TooLong(usize), + + #[error("package path must be prefixed with a slash")] + MissingPrefix, + + #[error("package path must not end with a slash")] + TrailingSlash, + + #[error("package path must not contain empty components")] + EmptyComponent, + + #[error("package path must not contain dot segments like '.' or '..'")] + DotSegment, + + #[error( + "package path must not contain windows reserved names like 'CON' or 'PRN' (found '{0}')" + )] + ReservedName(String), + + #[error("path segment must not end in a dot (found '{0}')")] + TrailingDot(String), + + #[error( + "package path must not contain windows path separators like '\\' or ':' (found '{0}')" + )] + InvalidWindowsPathSeparatorChar(char), + + #[error( + "package path must not contain windows reserved characters like '<', '>', '\"', '|', '?', or '*' (found '{0}')" + )] + InvalidWindowsChar(char), + + #[error("package path must not contain whitespace (found '{}')", .0.escape_debug())] + InvalidWhitespace(char), + + #[error("package path must not contain special URL characters (found '{}')", .0.escape_debug())] + InvalidSpecialUrlChar(char), + + #[error("package path must not contain invalid characters (found '{}')", .0.escape_debug())] + InvalidOtherChar(char), +} diff --git a/cli/tools/registry/tar.rs b/cli/tools/registry/tar.rs index c3fafa4b21..1dcfe2949b 100644 --- a/cli/tools/registry/tar.rs +++ b/cli/tools/registry/tar.rs @@ -2,17 +2,18 @@ use bytes::Bytes; use deno_config::glob::FilePatterns; -use deno_core::anyhow; use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_core::url::Url; use sha2::Digest; +use std::collections::HashSet; +use std::ffi::OsStr; use std::fmt::Write as FmtWrite; use std::io::Write; use std::path::Path; -use std::path::PathBuf; use tar::Header; +use crate::tools::registry::paths::PackagePath; use crate::util::import_map::ImportMapUnfurler; use super::diagnostics::PublishDiagnostic; @@ -20,14 +21,13 @@ use super::diagnostics::PublishDiagnosticsCollector; #[derive(Debug, Clone, PartialEq)] pub struct PublishableTarballFile { - pub path: PathBuf, + pub specifier: Url, pub size: usize, } #[derive(Debug, Clone, PartialEq)] pub struct PublishableTarball { pub files: Vec, - pub diagnostics: Vec, pub hash: String, pub bytes: Bytes, } @@ -40,67 +40,121 @@ pub fn create_gzipped_tarball( file_patterns: Option, ) -> Result { let mut tar = TarGzArchive::new(); - let mut diagnostics = vec![]; let mut files = vec![]; + let mut paths = HashSet::new(); + let mut iterator = walkdir::WalkDir::new(dir).follow_links(false).into_iter(); while let Some(entry) = iterator.next() { let entry = entry?; - if let Some(file_patterns) = &file_patterns { - if !file_patterns.matches_path(entry.path()) { - if entry.file_type().is_dir() { - iterator.skip_current_dir(); - } - continue; + let path = entry.path(); + let file_type = entry.file_type(); + + let matches_pattern = file_patterns + .as_ref() + .map(|p| p.matches_path(path)) + .unwrap_or(true); + if !matches_pattern + || path.file_name() == Some(OsStr::new(".git")) + || path.file_name() == Some(OsStr::new("node_modules")) + { + if file_type.is_dir() { + iterator.skip_current_dir(); } + continue; } - if entry.file_type().is_file() { - let url = Url::from_file_path(entry.path()) - .map_err(|_| anyhow::anyhow!("Unable to convert path to url"))?; - let relative_path = entry - .path() - .strip_prefix(dir) - .map_err(|err| anyhow::anyhow!("Unable to strip prefix: {err:#}"))?; - let relative_path_str = relative_path.to_str().ok_or_else(|| { - anyhow::anyhow!( - "Unable to convert path to string '{}'", - relative_path.display() - ) - })?; - let data = std::fs::read(entry.path()).with_context(|| { + let Ok(specifier) = Url::from_file_path(path) else { + diagnostics_collector + .to_owned() + .push(PublishDiagnostic::InvalidPath { + path: path.to_path_buf(), + message: "unable to convert path to url".to_string(), + }); + continue; + }; + + if file_type.is_file() { + let Ok(relative_path) = path.strip_prefix(dir) else { + diagnostics_collector + .to_owned() + .push(PublishDiagnostic::InvalidPath { + path: path.to_path_buf(), + message: "path is not in publish directory".to_string(), + }); + continue; + }; + + let path_str = relative_path.components().fold( + "".to_string(), + |mut path, component| { + path.push('/'); + match component { + std::path::Component::Normal(normal) => { + path.push_str(&normal.to_string_lossy()) + } + std::path::Component::CurDir => path.push('.'), + std::path::Component::ParentDir => path.push_str(".."), + _ => unreachable!(), + } + path + }, + ); + + match PackagePath::new(path_str.clone()) { + Ok(package_path) => { + if !paths.insert(package_path) { + diagnostics_collector.to_owned().push( + PublishDiagnostic::DuplicatePath { + path: path.to_path_buf(), + }, + ); + } + } + Err(err) => { + diagnostics_collector.to_owned().push( + PublishDiagnostic::InvalidPath { + path: path.to_path_buf(), + message: err.to_string(), + }, + ); + } + } + + let data = std::fs::read(path).with_context(|| { format!("Unable to read file '{}'", entry.path().display()) })?; files.push(PublishableTarballFile { - path: relative_path.to_path_buf(), + specifier: specifier.clone(), size: data.len(), }); - let content = match source_cache.get_parsed_source(&url) { + let content = match source_cache.get_parsed_source(&specifier) { Some(parsed_source) => { let mut reporter = |diagnostic| { diagnostics_collector .push(PublishDiagnostic::ImportMapUnfurl(diagnostic)); }; - let content = unfurler.unfurl(&url, &parsed_source, &mut reporter); + let content = + unfurler.unfurl(&specifier, &parsed_source, &mut reporter); content.into_bytes() } None => data, }; tar - .add_file(relative_path_str.to_string(), &content) + .add_file(format!(".{}", path_str), &content) .with_context(|| { format!("Unable to add file to tarball '{}'", entry.path().display()) })?; - } else if entry.file_type().is_dir() { - if entry.file_name() == ".git" || entry.file_name() == "node_modules" { - iterator.skip_current_dir(); - } - } else { - diagnostics.push(format!( - "Unsupported file type at path '{}'", - entry.path().display() - )); + } else if !file_type.is_dir() { + diagnostics_collector.push(PublishDiagnostic::UnsupportedFileType { + specifier, + kind: if file_type.is_symlink() { + "symlink".to_owned() + } else { + format!("{file_type:?}") + }, + }); } } @@ -113,7 +167,6 @@ pub fn create_gzipped_tarball( Ok(PublishableTarball { files, - diagnostics, hash, bytes: Bytes::from(v), })