mirror of
https://github.com/denoland/deno.git
synced 2024-11-21 15:04:11 -05:00
fix(npm): make tarball extraction more reliable (#23759)
1. Extracts to a directory beside the destination. 2. Renames to the destination with retries.
This commit is contained in:
parent
c0e3b6ed9d
commit
f16b4d4df8
4 changed files with 153 additions and 29 deletions
|
@ -25,6 +25,7 @@ use crate::util::fs::hard_link_dir_recursive;
|
|||
use crate::util::progress_bar::ProgressBar;
|
||||
|
||||
use super::tarball::verify_and_extract_tarball;
|
||||
use super::tarball::TarballExtractionMode;
|
||||
|
||||
/// Stores a single copy of npm packages in a cache.
|
||||
#[derive(Debug)]
|
||||
|
@ -69,7 +70,7 @@ impl NpmCache {
|
|||
/// to ensure a package is only downloaded once per run of the CLI. This
|
||||
/// prevents downloads from re-occurring when someone has `--reload` and
|
||||
/// and imports a dynamic import that imports the same package again for example.
|
||||
fn should_use_global_cache_for_package(&self, package: &PackageNv) -> bool {
|
||||
fn should_use_cache_for_package(&self, package: &PackageNv) -> bool {
|
||||
self.cache_setting.should_use_for_npm_package(&package.name)
|
||||
|| !self
|
||||
.previously_reloaded_packages
|
||||
|
@ -91,26 +92,23 @@ impl NpmCache {
|
|||
|
||||
async fn ensure_package_inner(
|
||||
&self,
|
||||
package: &PackageNv,
|
||||
package_nv: &PackageNv,
|
||||
dist: &NpmPackageVersionDistInfo,
|
||||
registry_url: &Url,
|
||||
) -> Result<(), AnyError> {
|
||||
let package_folder = self
|
||||
.cache_dir
|
||||
.package_folder_for_name_and_version(package, registry_url);
|
||||
if self.should_use_global_cache_for_package(package)
|
||||
&& self.fs.exists_sync(&package_folder)
|
||||
// if this file exists, then the package didn't successfully extract
|
||||
// the first time, or another process is currently extracting the zip file
|
||||
&& !self.fs.exists_sync(&package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME))
|
||||
{
|
||||
.package_folder_for_name_and_version(package_nv, registry_url);
|
||||
let should_use_cache = self.should_use_cache_for_package(package_nv);
|
||||
let package_folder_exists = self.fs.exists_sync(&package_folder);
|
||||
if should_use_cache && package_folder_exists {
|
||||
return Ok(());
|
||||
} else if self.cache_setting == CacheSetting::Only {
|
||||
return Err(custom_error(
|
||||
"NotCached",
|
||||
format!(
|
||||
"An npm specifier not found in cache: \"{}\", --cached-only is specified.",
|
||||
&package.name
|
||||
&package_nv.name
|
||||
)
|
||||
)
|
||||
);
|
||||
|
@ -127,7 +125,31 @@ impl NpmCache {
|
|||
.await?;
|
||||
match maybe_bytes {
|
||||
Some(bytes) => {
|
||||
verify_and_extract_tarball(package, &bytes, dist, &package_folder)
|
||||
let extraction_mode = if should_use_cache || !package_folder_exists {
|
||||
TarballExtractionMode::SiblingTempDir
|
||||
} else {
|
||||
// The user ran with `--reload`, so overwrite the package instead of
|
||||
// deleting it since the package might get corrupted if a user kills
|
||||
// their deno process while it's deleting a package directory
|
||||
//
|
||||
// We can't rename this folder and delete it because the folder
|
||||
// may be in use by another process or may now contain hardlinks,
|
||||
// which will cause windows to throw an "AccessDenied" error when
|
||||
// renaming. So we settle for overwriting.
|
||||
TarballExtractionMode::Overwrite
|
||||
};
|
||||
let dist = dist.clone();
|
||||
let package_nv = package_nv.clone();
|
||||
deno_core::unsync::spawn_blocking(move || {
|
||||
verify_and_extract_tarball(
|
||||
&package_nv,
|
||||
&bytes,
|
||||
&dist,
|
||||
&package_folder,
|
||||
extraction_mode,
|
||||
)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
None => {
|
||||
bail!("Could not find npm package tarball at: {}", dist.tarball);
|
||||
|
@ -150,7 +172,7 @@ impl NpmCache {
|
|||
.package_folder_for_id(folder_id, registry_url);
|
||||
|
||||
if package_folder.exists()
|
||||
// if this file exists, then the package didn't successfully extract
|
||||
// if this file exists, then the package didn't successfully initialize
|
||||
// the first time, or another process is currently extracting the zip file
|
||||
&& !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists()
|
||||
&& self.cache_setting.should_use_for_npm_package(&folder_id.nv.name)
|
||||
|
@ -161,6 +183,9 @@ impl NpmCache {
|
|||
let original_package_folder = self
|
||||
.cache_dir
|
||||
.package_folder_for_name_and_version(&folder_id.nv, registry_url);
|
||||
|
||||
// it seems Windows does an "AccessDenied" error when moving a
|
||||
// directory with hard links, so that's why this solution is done
|
||||
with_folder_sync_lock(&folder_id.nv, &package_folder, || {
|
||||
hard_link_dir_recursive(&original_package_folder, &package_folder)
|
||||
})?;
|
||||
|
@ -206,7 +231,7 @@ impl NpmCache {
|
|||
|
||||
const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock";
|
||||
|
||||
pub fn with_folder_sync_lock(
|
||||
fn with_folder_sync_lock(
|
||||
package: &PackageNv,
|
||||
output_folder: &Path,
|
||||
action: impl FnOnce() -> Result<(), AnyError>,
|
||||
|
|
|
@ -2,12 +2,14 @@
|
|||
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::io::ErrorKind;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use base64::prelude::BASE64_STANDARD;
|
||||
use base64::Engine;
|
||||
use deno_core::anyhow::bail;
|
||||
use deno_core::anyhow::Context;
|
||||
use deno_core::error::AnyError;
|
||||
use deno_npm::registry::NpmPackageVersionDistInfo;
|
||||
use deno_npm::registry::NpmPackageVersionDistInfoIntegrity;
|
||||
|
@ -16,19 +18,76 @@ use flate2::read::GzDecoder;
|
|||
use tar::Archive;
|
||||
use tar::EntryType;
|
||||
|
||||
use super::cache::with_folder_sync_lock;
|
||||
use crate::util::path::get_atomic_dir_path;
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum TarballExtractionMode {
|
||||
/// Overwrites the destination directory without deleting any files.
|
||||
Overwrite,
|
||||
/// Creates and writes to a sibling temporary directory. When done, moves
|
||||
/// it to the final destination.
|
||||
///
|
||||
/// This is more robust than `Overwrite` as it better handles multiple
|
||||
/// processes writing to the directory at the same time.
|
||||
SiblingTempDir,
|
||||
}
|
||||
|
||||
pub fn verify_and_extract_tarball(
|
||||
package: &PackageNv,
|
||||
package_nv: &PackageNv,
|
||||
data: &[u8],
|
||||
dist_info: &NpmPackageVersionDistInfo,
|
||||
output_folder: &Path,
|
||||
extraction_mode: TarballExtractionMode,
|
||||
) -> Result<(), AnyError> {
|
||||
verify_tarball_integrity(package, data, &dist_info.integrity())?;
|
||||
verify_tarball_integrity(package_nv, data, &dist_info.integrity())?;
|
||||
|
||||
with_folder_sync_lock(package, output_folder, || {
|
||||
extract_tarball(data, output_folder)
|
||||
})
|
||||
match extraction_mode {
|
||||
TarballExtractionMode::Overwrite => extract_tarball(data, output_folder),
|
||||
TarballExtractionMode::SiblingTempDir => {
|
||||
let temp_dir = get_atomic_dir_path(output_folder);
|
||||
extract_tarball(data, &temp_dir)?;
|
||||
rename_with_retries(&temp_dir, output_folder)
|
||||
.map_err(AnyError::from)
|
||||
.context("Failed moving extracted tarball to final destination.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn rename_with_retries(
|
||||
temp_dir: &Path,
|
||||
output_folder: &Path,
|
||||
) -> Result<(), std::io::Error> {
|
||||
fn already_exists(err: &std::io::Error, output_folder: &Path) -> bool {
|
||||
// Windows will do an "Access is denied" error
|
||||
err.kind() == ErrorKind::AlreadyExists || output_folder.exists()
|
||||
}
|
||||
|
||||
let mut count = 0;
|
||||
// renaming might be flaky if a lot of processes are trying
|
||||
// to do this, so retry a few times
|
||||
loop {
|
||||
match fs::rename(temp_dir, output_folder) {
|
||||
Ok(_) => return Ok(()),
|
||||
Err(err) if already_exists(&err, output_folder) => {
|
||||
// another process copied here, just cleanup
|
||||
let _ = fs::remove_dir_all(temp_dir);
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
count += 1;
|
||||
if count > 5 {
|
||||
// too many retries, cleanup and return the error
|
||||
let _ = fs::remove_dir_all(temp_dir);
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
// wait a bit before retrying... this should be very rare or only
|
||||
// in error cases, so ok to sleep a bit
|
||||
let sleep_ms = std::cmp::min(100, 20 * count);
|
||||
std::thread::sleep(std::time::Duration::from_millis(sleep_ms));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn verify_tarball_integrity(
|
||||
|
@ -150,6 +209,7 @@ fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> {
|
|||
#[cfg(test)]
|
||||
mod test {
|
||||
use deno_semver::Version;
|
||||
use test_util::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -240,4 +300,25 @@ mod test {
|
|||
)
|
||||
.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rename_with_retries_succeeds_exists() {
|
||||
let temp_dir = TempDir::new();
|
||||
let folder_1 = temp_dir.path().join("folder_1");
|
||||
let folder_2 = temp_dir.path().join("folder_2");
|
||||
|
||||
folder_1.create_dir_all();
|
||||
folder_1.join("a.txt").write("test");
|
||||
folder_2.create_dir_all();
|
||||
// this will not end up in the output as rename_with_retries assumes
|
||||
// the folders ending up at the destination are the same
|
||||
folder_2.join("b.txt").write("test2");
|
||||
|
||||
let dest_folder = temp_dir.path().join("dest_folder");
|
||||
|
||||
rename_with_retries(folder_1.as_path(), dest_folder.as_path()).unwrap();
|
||||
rename_with_retries(folder_2.as_path(), dest_folder.as_path()).unwrap();
|
||||
assert!(dest_folder.join("a.txt").exists());
|
||||
assert!(!dest_folder.join("b.txt").exists());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
use std::collections::HashSet;
|
||||
use std::env::current_dir;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::fs::FileType;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::Error;
|
||||
|
@ -23,12 +22,12 @@ use deno_core::error::AnyError;
|
|||
pub use deno_core::normalize_path;
|
||||
use deno_core::unsync::spawn_blocking;
|
||||
use deno_core::ModuleSpecifier;
|
||||
use deno_runtime::deno_crypto::rand;
|
||||
use deno_runtime::deno_fs::FileSystem;
|
||||
use deno_runtime::deno_node::PathClean;
|
||||
|
||||
use crate::util::gitignore::DirGitIgnores;
|
||||
use crate::util::gitignore::GitIgnoreTree;
|
||||
use crate::util::path::get_atomic_file_path;
|
||||
use crate::util::progress_bar::ProgressBar;
|
||||
use crate::util::progress_bar::ProgressBarStyle;
|
||||
use crate::util::progress_bar::ProgressMessagePrompt;
|
||||
|
@ -56,14 +55,7 @@ pub fn atomic_write_file<T: AsRef<[u8]>>(
|
|||
}
|
||||
|
||||
fn inner(file_path: &Path, data: &[u8], mode: u32) -> std::io::Result<()> {
|
||||
let temp_file_path = {
|
||||
let rand: String = (0..4).fold(String::new(), |mut output, _| {
|
||||
let _ = write!(output, "{:02x}", rand::random::<u8>());
|
||||
output
|
||||
});
|
||||
let extension = format!("{rand}.tmp");
|
||||
file_path.with_extension(extension)
|
||||
};
|
||||
let temp_file_path = get_atomic_file_path(file_path);
|
||||
|
||||
if let Err(write_err) =
|
||||
atomic_write_file_raw(&temp_file_path, file_path, data, mode)
|
||||
|
|
|
@ -42,6 +42,32 @@ pub fn get_extension(file_path: &Path) -> Option<String> {
|
|||
.map(|e| e.to_lowercase());
|
||||
}
|
||||
|
||||
pub fn get_atomic_dir_path(file_path: &Path) -> PathBuf {
|
||||
let rand = gen_rand_path_component();
|
||||
let new_file_name = format!(
|
||||
".{}_{}",
|
||||
file_path
|
||||
.file_name()
|
||||
.map(|f| f.to_string_lossy())
|
||||
.unwrap_or(Cow::Borrowed("")),
|
||||
rand
|
||||
);
|
||||
file_path.with_file_name(new_file_name)
|
||||
}
|
||||
|
||||
pub fn get_atomic_file_path(file_path: &Path) -> PathBuf {
|
||||
let rand = gen_rand_path_component();
|
||||
let extension = format!("{rand}.tmp");
|
||||
file_path.with_extension(extension)
|
||||
}
|
||||
|
||||
fn gen_rand_path_component() -> String {
|
||||
(0..4).fold(String::new(), |mut output, _| {
|
||||
output.push_str(&format!("{:02x}", rand::random::<u8>()));
|
||||
output
|
||||
})
|
||||
}
|
||||
|
||||
/// TypeScript figures out the type of file based on the extension, but we take
|
||||
/// other factors into account like the file headers. The hack here is to map the
|
||||
/// specifier passed to TypeScript to a new specifier with the file extension.
|
||||
|
|
Loading…
Reference in a new issue