mirror of
https://github.com/denoland/deno.git
synced 2024-11-21 15:04:11 -05:00
fix(npm): make tarball extraction more reliable (#23759)
1. Extracts to a directory beside the destination. 2. Renames to the destination with retries.
This commit is contained in:
parent
c0e3b6ed9d
commit
f16b4d4df8
4 changed files with 153 additions and 29 deletions
|
@ -25,6 +25,7 @@ use crate::util::fs::hard_link_dir_recursive;
|
||||||
use crate::util::progress_bar::ProgressBar;
|
use crate::util::progress_bar::ProgressBar;
|
||||||
|
|
||||||
use super::tarball::verify_and_extract_tarball;
|
use super::tarball::verify_and_extract_tarball;
|
||||||
|
use super::tarball::TarballExtractionMode;
|
||||||
|
|
||||||
/// Stores a single copy of npm packages in a cache.
|
/// Stores a single copy of npm packages in a cache.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -69,7 +70,7 @@ impl NpmCache {
|
||||||
/// to ensure a package is only downloaded once per run of the CLI. This
|
/// to ensure a package is only downloaded once per run of the CLI. This
|
||||||
/// prevents downloads from re-occurring when someone has `--reload` and
|
/// prevents downloads from re-occurring when someone has `--reload` and
|
||||||
/// and imports a dynamic import that imports the same package again for example.
|
/// and imports a dynamic import that imports the same package again for example.
|
||||||
fn should_use_global_cache_for_package(&self, package: &PackageNv) -> bool {
|
fn should_use_cache_for_package(&self, package: &PackageNv) -> bool {
|
||||||
self.cache_setting.should_use_for_npm_package(&package.name)
|
self.cache_setting.should_use_for_npm_package(&package.name)
|
||||||
|| !self
|
|| !self
|
||||||
.previously_reloaded_packages
|
.previously_reloaded_packages
|
||||||
|
@ -91,26 +92,23 @@ impl NpmCache {
|
||||||
|
|
||||||
async fn ensure_package_inner(
|
async fn ensure_package_inner(
|
||||||
&self,
|
&self,
|
||||||
package: &PackageNv,
|
package_nv: &PackageNv,
|
||||||
dist: &NpmPackageVersionDistInfo,
|
dist: &NpmPackageVersionDistInfo,
|
||||||
registry_url: &Url,
|
registry_url: &Url,
|
||||||
) -> Result<(), AnyError> {
|
) -> Result<(), AnyError> {
|
||||||
let package_folder = self
|
let package_folder = self
|
||||||
.cache_dir
|
.cache_dir
|
||||||
.package_folder_for_name_and_version(package, registry_url);
|
.package_folder_for_name_and_version(package_nv, registry_url);
|
||||||
if self.should_use_global_cache_for_package(package)
|
let should_use_cache = self.should_use_cache_for_package(package_nv);
|
||||||
&& self.fs.exists_sync(&package_folder)
|
let package_folder_exists = self.fs.exists_sync(&package_folder);
|
||||||
// if this file exists, then the package didn't successfully extract
|
if should_use_cache && package_folder_exists {
|
||||||
// the first time, or another process is currently extracting the zip file
|
|
||||||
&& !self.fs.exists_sync(&package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME))
|
|
||||||
{
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
} else if self.cache_setting == CacheSetting::Only {
|
} else if self.cache_setting == CacheSetting::Only {
|
||||||
return Err(custom_error(
|
return Err(custom_error(
|
||||||
"NotCached",
|
"NotCached",
|
||||||
format!(
|
format!(
|
||||||
"An npm specifier not found in cache: \"{}\", --cached-only is specified.",
|
"An npm specifier not found in cache: \"{}\", --cached-only is specified.",
|
||||||
&package.name
|
&package_nv.name
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
@ -127,7 +125,31 @@ impl NpmCache {
|
||||||
.await?;
|
.await?;
|
||||||
match maybe_bytes {
|
match maybe_bytes {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
verify_and_extract_tarball(package, &bytes, dist, &package_folder)
|
let extraction_mode = if should_use_cache || !package_folder_exists {
|
||||||
|
TarballExtractionMode::SiblingTempDir
|
||||||
|
} else {
|
||||||
|
// The user ran with `--reload`, so overwrite the package instead of
|
||||||
|
// deleting it since the package might get corrupted if a user kills
|
||||||
|
// their deno process while it's deleting a package directory
|
||||||
|
//
|
||||||
|
// We can't rename this folder and delete it because the folder
|
||||||
|
// may be in use by another process or may now contain hardlinks,
|
||||||
|
// which will cause windows to throw an "AccessDenied" error when
|
||||||
|
// renaming. So we settle for overwriting.
|
||||||
|
TarballExtractionMode::Overwrite
|
||||||
|
};
|
||||||
|
let dist = dist.clone();
|
||||||
|
let package_nv = package_nv.clone();
|
||||||
|
deno_core::unsync::spawn_blocking(move || {
|
||||||
|
verify_and_extract_tarball(
|
||||||
|
&package_nv,
|
||||||
|
&bytes,
|
||||||
|
&dist,
|
||||||
|
&package_folder,
|
||||||
|
extraction_mode,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.await?
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
bail!("Could not find npm package tarball at: {}", dist.tarball);
|
bail!("Could not find npm package tarball at: {}", dist.tarball);
|
||||||
|
@ -150,7 +172,7 @@ impl NpmCache {
|
||||||
.package_folder_for_id(folder_id, registry_url);
|
.package_folder_for_id(folder_id, registry_url);
|
||||||
|
|
||||||
if package_folder.exists()
|
if package_folder.exists()
|
||||||
// if this file exists, then the package didn't successfully extract
|
// if this file exists, then the package didn't successfully initialize
|
||||||
// the first time, or another process is currently extracting the zip file
|
// the first time, or another process is currently extracting the zip file
|
||||||
&& !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists()
|
&& !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists()
|
||||||
&& self.cache_setting.should_use_for_npm_package(&folder_id.nv.name)
|
&& self.cache_setting.should_use_for_npm_package(&folder_id.nv.name)
|
||||||
|
@ -161,6 +183,9 @@ impl NpmCache {
|
||||||
let original_package_folder = self
|
let original_package_folder = self
|
||||||
.cache_dir
|
.cache_dir
|
||||||
.package_folder_for_name_and_version(&folder_id.nv, registry_url);
|
.package_folder_for_name_and_version(&folder_id.nv, registry_url);
|
||||||
|
|
||||||
|
// it seems Windows does an "AccessDenied" error when moving a
|
||||||
|
// directory with hard links, so that's why this solution is done
|
||||||
with_folder_sync_lock(&folder_id.nv, &package_folder, || {
|
with_folder_sync_lock(&folder_id.nv, &package_folder, || {
|
||||||
hard_link_dir_recursive(&original_package_folder, &package_folder)
|
hard_link_dir_recursive(&original_package_folder, &package_folder)
|
||||||
})?;
|
})?;
|
||||||
|
@ -206,7 +231,7 @@ impl NpmCache {
|
||||||
|
|
||||||
const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock";
|
const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock";
|
||||||
|
|
||||||
pub fn with_folder_sync_lock(
|
fn with_folder_sync_lock(
|
||||||
package: &PackageNv,
|
package: &PackageNv,
|
||||||
output_folder: &Path,
|
output_folder: &Path,
|
||||||
action: impl FnOnce() -> Result<(), AnyError>,
|
action: impl FnOnce() -> Result<(), AnyError>,
|
||||||
|
|
|
@ -2,12 +2,14 @@
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
use std::io::ErrorKind;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use base64::prelude::BASE64_STANDARD;
|
use base64::prelude::BASE64_STANDARD;
|
||||||
use base64::Engine;
|
use base64::Engine;
|
||||||
use deno_core::anyhow::bail;
|
use deno_core::anyhow::bail;
|
||||||
|
use deno_core::anyhow::Context;
|
||||||
use deno_core::error::AnyError;
|
use deno_core::error::AnyError;
|
||||||
use deno_npm::registry::NpmPackageVersionDistInfo;
|
use deno_npm::registry::NpmPackageVersionDistInfo;
|
||||||
use deno_npm::registry::NpmPackageVersionDistInfoIntegrity;
|
use deno_npm::registry::NpmPackageVersionDistInfoIntegrity;
|
||||||
|
@ -16,19 +18,76 @@ use flate2::read::GzDecoder;
|
||||||
use tar::Archive;
|
use tar::Archive;
|
||||||
use tar::EntryType;
|
use tar::EntryType;
|
||||||
|
|
||||||
use super::cache::with_folder_sync_lock;
|
use crate::util::path::get_atomic_dir_path;
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub enum TarballExtractionMode {
|
||||||
|
/// Overwrites the destination directory without deleting any files.
|
||||||
|
Overwrite,
|
||||||
|
/// Creates and writes to a sibling temporary directory. When done, moves
|
||||||
|
/// it to the final destination.
|
||||||
|
///
|
||||||
|
/// This is more robust than `Overwrite` as it better handles multiple
|
||||||
|
/// processes writing to the directory at the same time.
|
||||||
|
SiblingTempDir,
|
||||||
|
}
|
||||||
|
|
||||||
pub fn verify_and_extract_tarball(
|
pub fn verify_and_extract_tarball(
|
||||||
package: &PackageNv,
|
package_nv: &PackageNv,
|
||||||
data: &[u8],
|
data: &[u8],
|
||||||
dist_info: &NpmPackageVersionDistInfo,
|
dist_info: &NpmPackageVersionDistInfo,
|
||||||
output_folder: &Path,
|
output_folder: &Path,
|
||||||
|
extraction_mode: TarballExtractionMode,
|
||||||
) -> Result<(), AnyError> {
|
) -> Result<(), AnyError> {
|
||||||
verify_tarball_integrity(package, data, &dist_info.integrity())?;
|
verify_tarball_integrity(package_nv, data, &dist_info.integrity())?;
|
||||||
|
|
||||||
with_folder_sync_lock(package, output_folder, || {
|
match extraction_mode {
|
||||||
extract_tarball(data, output_folder)
|
TarballExtractionMode::Overwrite => extract_tarball(data, output_folder),
|
||||||
})
|
TarballExtractionMode::SiblingTempDir => {
|
||||||
|
let temp_dir = get_atomic_dir_path(output_folder);
|
||||||
|
extract_tarball(data, &temp_dir)?;
|
||||||
|
rename_with_retries(&temp_dir, output_folder)
|
||||||
|
.map_err(AnyError::from)
|
||||||
|
.context("Failed moving extracted tarball to final destination.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rename_with_retries(
|
||||||
|
temp_dir: &Path,
|
||||||
|
output_folder: &Path,
|
||||||
|
) -> Result<(), std::io::Error> {
|
||||||
|
fn already_exists(err: &std::io::Error, output_folder: &Path) -> bool {
|
||||||
|
// Windows will do an "Access is denied" error
|
||||||
|
err.kind() == ErrorKind::AlreadyExists || output_folder.exists()
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut count = 0;
|
||||||
|
// renaming might be flaky if a lot of processes are trying
|
||||||
|
// to do this, so retry a few times
|
||||||
|
loop {
|
||||||
|
match fs::rename(temp_dir, output_folder) {
|
||||||
|
Ok(_) => return Ok(()),
|
||||||
|
Err(err) if already_exists(&err, output_folder) => {
|
||||||
|
// another process copied here, just cleanup
|
||||||
|
let _ = fs::remove_dir_all(temp_dir);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
count += 1;
|
||||||
|
if count > 5 {
|
||||||
|
// too many retries, cleanup and return the error
|
||||||
|
let _ = fs::remove_dir_all(temp_dir);
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait a bit before retrying... this should be very rare or only
|
||||||
|
// in error cases, so ok to sleep a bit
|
||||||
|
let sleep_ms = std::cmp::min(100, 20 * count);
|
||||||
|
std::thread::sleep(std::time::Duration::from_millis(sleep_ms));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn verify_tarball_integrity(
|
fn verify_tarball_integrity(
|
||||||
|
@ -150,6 +209,7 @@ fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use deno_semver::Version;
|
use deno_semver::Version;
|
||||||
|
use test_util::TempDir;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
@ -240,4 +300,25 @@ mod test {
|
||||||
)
|
)
|
||||||
.is_ok());
|
.is_ok());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rename_with_retries_succeeds_exists() {
|
||||||
|
let temp_dir = TempDir::new();
|
||||||
|
let folder_1 = temp_dir.path().join("folder_1");
|
||||||
|
let folder_2 = temp_dir.path().join("folder_2");
|
||||||
|
|
||||||
|
folder_1.create_dir_all();
|
||||||
|
folder_1.join("a.txt").write("test");
|
||||||
|
folder_2.create_dir_all();
|
||||||
|
// this will not end up in the output as rename_with_retries assumes
|
||||||
|
// the folders ending up at the destination are the same
|
||||||
|
folder_2.join("b.txt").write("test2");
|
||||||
|
|
||||||
|
let dest_folder = temp_dir.path().join("dest_folder");
|
||||||
|
|
||||||
|
rename_with_retries(folder_1.as_path(), dest_folder.as_path()).unwrap();
|
||||||
|
rename_with_retries(folder_2.as_path(), dest_folder.as_path()).unwrap();
|
||||||
|
assert!(dest_folder.join("a.txt").exists());
|
||||||
|
assert!(!dest_folder.join("b.txt").exists());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::env::current_dir;
|
use std::env::current_dir;
|
||||||
use std::fmt::Write as FmtWrite;
|
|
||||||
use std::fs::FileType;
|
use std::fs::FileType;
|
||||||
use std::fs::OpenOptions;
|
use std::fs::OpenOptions;
|
||||||
use std::io::Error;
|
use std::io::Error;
|
||||||
|
@ -23,12 +22,12 @@ use deno_core::error::AnyError;
|
||||||
pub use deno_core::normalize_path;
|
pub use deno_core::normalize_path;
|
||||||
use deno_core::unsync::spawn_blocking;
|
use deno_core::unsync::spawn_blocking;
|
||||||
use deno_core::ModuleSpecifier;
|
use deno_core::ModuleSpecifier;
|
||||||
use deno_runtime::deno_crypto::rand;
|
|
||||||
use deno_runtime::deno_fs::FileSystem;
|
use deno_runtime::deno_fs::FileSystem;
|
||||||
use deno_runtime::deno_node::PathClean;
|
use deno_runtime::deno_node::PathClean;
|
||||||
|
|
||||||
use crate::util::gitignore::DirGitIgnores;
|
use crate::util::gitignore::DirGitIgnores;
|
||||||
use crate::util::gitignore::GitIgnoreTree;
|
use crate::util::gitignore::GitIgnoreTree;
|
||||||
|
use crate::util::path::get_atomic_file_path;
|
||||||
use crate::util::progress_bar::ProgressBar;
|
use crate::util::progress_bar::ProgressBar;
|
||||||
use crate::util::progress_bar::ProgressBarStyle;
|
use crate::util::progress_bar::ProgressBarStyle;
|
||||||
use crate::util::progress_bar::ProgressMessagePrompt;
|
use crate::util::progress_bar::ProgressMessagePrompt;
|
||||||
|
@ -56,14 +55,7 @@ pub fn atomic_write_file<T: AsRef<[u8]>>(
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inner(file_path: &Path, data: &[u8], mode: u32) -> std::io::Result<()> {
|
fn inner(file_path: &Path, data: &[u8], mode: u32) -> std::io::Result<()> {
|
||||||
let temp_file_path = {
|
let temp_file_path = get_atomic_file_path(file_path);
|
||||||
let rand: String = (0..4).fold(String::new(), |mut output, _| {
|
|
||||||
let _ = write!(output, "{:02x}", rand::random::<u8>());
|
|
||||||
output
|
|
||||||
});
|
|
||||||
let extension = format!("{rand}.tmp");
|
|
||||||
file_path.with_extension(extension)
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Err(write_err) =
|
if let Err(write_err) =
|
||||||
atomic_write_file_raw(&temp_file_path, file_path, data, mode)
|
atomic_write_file_raw(&temp_file_path, file_path, data, mode)
|
||||||
|
|
|
@ -42,6 +42,32 @@ pub fn get_extension(file_path: &Path) -> Option<String> {
|
||||||
.map(|e| e.to_lowercase());
|
.map(|e| e.to_lowercase());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_atomic_dir_path(file_path: &Path) -> PathBuf {
|
||||||
|
let rand = gen_rand_path_component();
|
||||||
|
let new_file_name = format!(
|
||||||
|
".{}_{}",
|
||||||
|
file_path
|
||||||
|
.file_name()
|
||||||
|
.map(|f| f.to_string_lossy())
|
||||||
|
.unwrap_or(Cow::Borrowed("")),
|
||||||
|
rand
|
||||||
|
);
|
||||||
|
file_path.with_file_name(new_file_name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_atomic_file_path(file_path: &Path) -> PathBuf {
|
||||||
|
let rand = gen_rand_path_component();
|
||||||
|
let extension = format!("{rand}.tmp");
|
||||||
|
file_path.with_extension(extension)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_rand_path_component() -> String {
|
||||||
|
(0..4).fold(String::new(), |mut output, _| {
|
||||||
|
output.push_str(&format!("{:02x}", rand::random::<u8>()));
|
||||||
|
output
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// TypeScript figures out the type of file based on the extension, but we take
|
/// TypeScript figures out the type of file based on the extension, but we take
|
||||||
/// other factors into account like the file headers. The hack here is to map the
|
/// other factors into account like the file headers. The hack here is to map the
|
||||||
/// specifier passed to TypeScript to a new specifier with the file extension.
|
/// specifier passed to TypeScript to a new specifier with the file extension.
|
||||||
|
|
Loading…
Reference in a new issue