// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. use crate::util::progress_bar::UpdateGuard; use crate::version::get_user_agent; use cache_control::Cachability; use cache_control::CacheControl; use chrono::DateTime; use deno_core::anyhow::bail; use deno_core::error::custom_error; use deno_core::error::generic_error; use deno_core::error::AnyError; use deno_core::futures::StreamExt; use deno_core::url::Url; use deno_runtime::deno_fetch::create_http_client; use deno_runtime::deno_fetch::reqwest; use deno_runtime::deno_fetch::reqwest::header::LOCATION; use deno_runtime::deno_fetch::reqwest::Response; use deno_runtime::deno_fetch::CreateHttpClientOptions; use deno_runtime::deno_tls::RootCertStoreProvider; use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use std::time::SystemTime; /// Construct the next uri based on base uri and location header fragment /// See <https://tools.ietf.org/html/rfc3986#section-4.2> fn resolve_url_from_location(base_url: &Url, location: &str) -> Url { if location.starts_with("http://") || location.starts_with("https://") { // absolute uri Url::parse(location).expect("provided redirect url should be a valid url") } else if location.starts_with("//") { // "//" authority path-abempty Url::parse(&format!("{}:{}", base_url.scheme(), location)) .expect("provided redirect url should be a valid url") } else if location.starts_with('/') { // path-absolute base_url .join(location) .expect("provided redirect url should be a valid url") } else { // assuming path-noscheme | path-empty let base_url_path_str = base_url.path().to_owned(); // Pop last part or url (after last slash) let segs: Vec<&str> = base_url_path_str.rsplitn(2, '/').collect(); let new_path = format!("{}/{}", segs.last().unwrap_or(&""), location); base_url .join(&new_path) .expect("provided redirect url should be a valid url") } } pub fn resolve_redirect_from_response( request_url: &Url, response: &Response, ) -> Result<Url, AnyError> { debug_assert!(response.status().is_redirection()); if let Some(location) = response.headers().get(LOCATION) { let location_string = location.to_str()?; log::debug!("Redirecting to {:?}...", &location_string); let new_url = resolve_url_from_location(request_url, location_string); Ok(new_url) } else { Err(generic_error(format!( "Redirection from '{request_url}' did not provide location header" ))) } } // TODO(ry) HTTP headers are not unique key, value pairs. There may be more than // one header line with the same key. This should be changed to something like // Vec<(String, String)> pub type HeadersMap = HashMap<String, String>; /// A structure used to determine if a entity in the http cache can be used. /// /// This is heavily influenced by /// <https://github.com/kornelski/rusty-http-cache-semantics> which is BSD /// 2-Clause Licensed and copyright Kornel LesiĆski pub struct CacheSemantics { cache_control: CacheControl, cached: SystemTime, headers: HashMap<String, String>, now: SystemTime, } impl CacheSemantics { pub fn new( headers: HashMap<String, String>, cached: SystemTime, now: SystemTime, ) -> Self { let cache_control = headers .get("cache-control") .map(|v| CacheControl::from_value(v).unwrap_or_default()) .unwrap_or_default(); Self { cache_control, cached, headers, now, } } fn age(&self) -> Duration { let mut age = self.age_header_value(); if let Ok(resident_time) = self.now.duration_since(self.cached) { age += resident_time; } age } fn age_header_value(&self) -> Duration { Duration::from_secs( self .headers .get("age") .and_then(|v| v.parse().ok()) .unwrap_or(0), ) } fn is_stale(&self) -> bool { self.max_age() <= self.age() } fn max_age(&self) -> Duration { if self.cache_control.cachability == Some(Cachability::NoCache) { return Duration::from_secs(0); } if self.headers.get("vary").map(|s| s.trim()) == Some("*") { return Duration::from_secs(0); } if let Some(max_age) = self.cache_control.max_age { return max_age; } let default_min_ttl = Duration::from_secs(0); let server_date = self.raw_server_date(); if let Some(expires) = self.headers.get("expires") { return match DateTime::parse_from_rfc2822(expires) { Err(_) => Duration::from_secs(0), Ok(expires) => { let expires = SystemTime::UNIX_EPOCH + Duration::from_secs(expires.timestamp().max(0) as _); return default_min_ttl .max(expires.duration_since(server_date).unwrap_or_default()); } }; } if let Some(last_modified) = self.headers.get("last-modified") { if let Ok(last_modified) = DateTime::parse_from_rfc2822(last_modified) { let last_modified = SystemTime::UNIX_EPOCH + Duration::from_secs(last_modified.timestamp().max(0) as _); if let Ok(diff) = server_date.duration_since(last_modified) { let secs_left = diff.as_secs() as f64 * 0.1; return default_min_ttl.max(Duration::from_secs(secs_left as _)); } } } default_min_ttl } fn raw_server_date(&self) -> SystemTime { self .headers .get("date") .and_then(|d| DateTime::parse_from_rfc2822(d).ok()) .and_then(|d| { SystemTime::UNIX_EPOCH .checked_add(Duration::from_secs(d.timestamp() as _)) }) .unwrap_or(self.cached) } /// Returns true if the cached value is "fresh" respecting cached headers, /// otherwise returns false. pub fn should_use(&self) -> bool { if self.cache_control.cachability == Some(Cachability::NoCache) { return false; } if let Some(max_age) = self.cache_control.max_age { if self.age() > max_age { return false; } } if let Some(min_fresh) = self.cache_control.min_fresh { if self.time_to_live() < min_fresh { return false; } } if self.is_stale() { let has_max_stale = self.cache_control.max_stale.is_some(); let allows_stale = has_max_stale && self .cache_control .max_stale .map(|val| val > self.age() - self.max_age()) .unwrap_or(true); if !allows_stale { return false; } } true } fn time_to_live(&self) -> Duration { self.max_age().checked_sub(self.age()).unwrap_or_default() } } pub struct HttpClient { options: CreateHttpClientOptions, root_cert_store_provider: Option<Arc<dyn RootCertStoreProvider>>, cell: once_cell::sync::OnceCell<reqwest::Client>, } impl std::fmt::Debug for HttpClient { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("HttpClient") .field("options", &self.options) .finish() } } impl HttpClient { pub fn new( root_cert_store_provider: Option<Arc<dyn RootCertStoreProvider>>, unsafely_ignore_certificate_errors: Option<Vec<String>>, ) -> Self { Self { options: CreateHttpClientOptions { unsafely_ignore_certificate_errors, ..Default::default() }, root_cert_store_provider, cell: Default::default(), } } #[cfg(test)] pub fn from_client(client: reqwest::Client) -> Self { let result = Self { options: Default::default(), root_cert_store_provider: Default::default(), cell: Default::default(), }; result.cell.set(client).unwrap(); result } pub(crate) fn client(&self) -> Result<&reqwest::Client, AnyError> { self.cell.get_or_try_init(|| { create_http_client( get_user_agent(), CreateHttpClientOptions { root_cert_store: match &self.root_cert_store_provider { Some(provider) => Some(provider.get_or_try_init()?.clone()), None => None, }, ..self.options.clone() }, ) }) } /// Do a GET request without following redirects. pub fn get_no_redirect<U: reqwest::IntoUrl>( &self, url: U, ) -> Result<reqwest::RequestBuilder, AnyError> { Ok(self.client()?.get(url)) } pub async fn download_text<U: reqwest::IntoUrl>( &self, url: U, ) -> Result<String, AnyError> { let bytes = self.download(url).await?; Ok(String::from_utf8(bytes)?) } pub async fn download<U: reqwest::IntoUrl>( &self, url: U, ) -> Result<Vec<u8>, AnyError> { let maybe_bytes = self.inner_download(url, None).await?; match maybe_bytes { Some(bytes) => Ok(bytes), None => Err(custom_error("Http", "Not found.")), } } pub async fn download_with_progress<U: reqwest::IntoUrl>( &self, url: U, progress_guard: &UpdateGuard, ) -> Result<Option<Vec<u8>>, AnyError> { self.inner_download(url, Some(progress_guard)).await } async fn inner_download<U: reqwest::IntoUrl>( &self, url: U, progress_guard: Option<&UpdateGuard>, ) -> Result<Option<Vec<u8>>, AnyError> { let response = self.get_redirected_response(url).await?; if response.status() == 404 { return Ok(None); } else if !response.status().is_success() { let status = response.status(); let maybe_response_text = response.text().await.ok(); bail!( "Bad response: {:?}{}", status, match maybe_response_text { Some(text) => format!("\n\n{text}"), None => String::new(), } ); } get_response_body_with_progress(response, progress_guard) .await .map(Some) } pub async fn get_redirected_response<U: reqwest::IntoUrl>( &self, url: U, ) -> Result<Response, AnyError> { let mut url = url.into_url()?; let mut response = self.get_no_redirect(url.clone())?.send().await?; let status = response.status(); if status.is_redirection() { for _ in 0..5 { let new_url = resolve_redirect_from_response(&url, &response)?; let new_response = self.get_no_redirect(new_url.clone())?.send().await?; let status = new_response.status(); if status.is_redirection() { response = new_response; url = new_url; } else { return Ok(new_response); } } Err(custom_error("Http", "Too many redirects.")) } else { Ok(response) } } } pub async fn get_response_body_with_progress( response: reqwest::Response, progress_guard: Option<&UpdateGuard>, ) -> Result<Vec<u8>, AnyError> { if let Some(progress_guard) = progress_guard { if let Some(total_size) = response.content_length() { progress_guard.set_total_size(total_size); let mut current_size = 0; let mut data = Vec::with_capacity(total_size as usize); let mut stream = response.bytes_stream(); while let Some(item) = stream.next().await { let bytes = item?; current_size += bytes.len() as u64; progress_guard.set_position(current_size); data.extend(bytes.into_iter()); } return Ok(data); } } let bytes = response.bytes().await?; Ok(bytes.into()) } #[cfg(test)] mod test { use super::*; #[tokio::test] async fn test_http_client_download_redirect() { let _http_server_guard = test_util::http_server(); let client = HttpClient::new(None, None); // make a request to the redirect server let text = client .download_text("http://localhost:4546/subdir/redirects/redirect1.js") .await .unwrap(); assert_eq!(text, "export const redirect = 1;\n"); // now make one to the infinite redirects server let err = client .download_text("http://localhost:4549/subdir/redirects/redirect1.js") .await .err() .unwrap(); assert_eq!(err.to_string(), "Too many redirects."); } #[test] fn test_resolve_url_from_location_full_1() { let url = "http://deno.land".parse::<Url>().unwrap(); let new_uri = resolve_url_from_location(&url, "http://golang.org"); assert_eq!(new_uri.host_str().unwrap(), "golang.org"); } #[test] fn test_resolve_url_from_location_full_2() { let url = "https://deno.land".parse::<Url>().unwrap(); let new_uri = resolve_url_from_location(&url, "https://golang.org"); assert_eq!(new_uri.host_str().unwrap(), "golang.org"); } #[test] fn test_resolve_url_from_location_relative_1() { let url = "http://deno.land/x".parse::<Url>().unwrap(); let new_uri = resolve_url_from_location(&url, "//rust-lang.org/en-US"); assert_eq!(new_uri.host_str().unwrap(), "rust-lang.org"); assert_eq!(new_uri.path(), "/en-US"); } #[test] fn test_resolve_url_from_location_relative_2() { let url = "http://deno.land/x".parse::<Url>().unwrap(); let new_uri = resolve_url_from_location(&url, "/y"); assert_eq!(new_uri.host_str().unwrap(), "deno.land"); assert_eq!(new_uri.path(), "/y"); } #[test] fn test_resolve_url_from_location_relative_3() { let url = "http://deno.land/x".parse::<Url>().unwrap(); let new_uri = resolve_url_from_location(&url, "z"); assert_eq!(new_uri.host_str().unwrap(), "deno.land"); assert_eq!(new_uri.path(), "/z"); } }