2023-01-02 16:00:42 -05:00
|
|
|
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
|
2022-12-12 20:52:10 -05:00
|
|
|
use crate::util::progress_bar::UpdateGuard;
|
2022-11-18 17:28:14 -05:00
|
|
|
use crate::version::get_user_agent;
|
2021-02-15 21:50:27 -05:00
|
|
|
|
2021-12-09 06:16:17 -05:00
|
|
|
use cache_control::Cachability;
|
|
|
|
use cache_control::CacheControl;
|
|
|
|
use chrono::DateTime;
|
2022-12-12 20:52:10 -05:00
|
|
|
use deno_core::anyhow::bail;
|
2021-11-08 20:26:39 -05:00
|
|
|
use deno_core::error::custom_error;
|
2020-09-14 12:48:57 -04:00
|
|
|
use deno_core::error::generic_error;
|
|
|
|
use deno_core::error::AnyError;
|
2022-12-12 20:52:10 -05:00
|
|
|
use deno_core::futures::StreamExt;
|
2020-09-16 14:28:07 -04:00
|
|
|
use deno_core::url::Url;
|
2022-11-18 17:28:14 -05:00
|
|
|
use deno_runtime::deno_fetch::create_http_client;
|
|
|
|
use deno_runtime::deno_fetch::reqwest;
|
2020-12-13 13:45:53 -05:00
|
|
|
use deno_runtime::deno_fetch::reqwest::header::LOCATION;
|
2022-12-14 08:47:18 -05:00
|
|
|
use deno_runtime::deno_fetch::reqwest::Response;
|
2022-11-18 17:28:14 -05:00
|
|
|
use deno_runtime::deno_tls::rustls::RootCertStore;
|
2020-02-19 08:17:13 -05:00
|
|
|
use std::collections::HashMap;
|
2021-12-09 06:16:17 -05:00
|
|
|
use std::time::Duration;
|
|
|
|
use std::time::SystemTime;
|
2019-04-25 13:29:21 -04:00
|
|
|
|
2018-11-29 22:01:01 -05:00
|
|
|
/// Construct the next uri based on base uri and location header fragment
|
2018-11-30 03:30:49 -05:00
|
|
|
/// See <https://tools.ietf.org/html/rfc3986#section-4.2>
|
2019-04-25 13:29:21 -04:00
|
|
|
fn resolve_url_from_location(base_url: &Url, location: &str) -> Url {
|
2018-11-29 22:01:01 -05:00
|
|
|
if location.starts_with("http://") || location.starts_with("https://") {
|
|
|
|
// absolute uri
|
2019-04-25 13:29:21 -04:00
|
|
|
Url::parse(location).expect("provided redirect url should be a valid url")
|
2018-11-29 22:01:01 -05:00
|
|
|
} else if location.starts_with("//") {
|
|
|
|
// "//" authority path-abempty
|
2019-04-25 13:29:21 -04:00
|
|
|
Url::parse(&format!("{}:{}", base_url.scheme(), location))
|
2018-11-30 03:30:49 -05:00
|
|
|
.expect("provided redirect url should be a valid url")
|
|
|
|
} else if location.starts_with('/') {
|
2018-11-29 22:01:01 -05:00
|
|
|
// path-absolute
|
2019-04-25 13:29:21 -04:00
|
|
|
base_url
|
|
|
|
.join(location)
|
|
|
|
.expect("provided redirect url should be a valid url")
|
2018-11-29 22:01:01 -05:00
|
|
|
} else {
|
|
|
|
// assuming path-noscheme | path-empty
|
2019-04-25 13:29:21 -04:00
|
|
|
let base_url_path_str = base_url.path().to_owned();
|
|
|
|
// Pop last part or url (after last slash)
|
|
|
|
let segs: Vec<&str> = base_url_path_str.rsplitn(2, '/').collect();
|
|
|
|
let new_path = format!("{}/{}", segs.last().unwrap_or(&""), location);
|
|
|
|
base_url
|
|
|
|
.join(&new_path)
|
|
|
|
.expect("provided redirect url should be a valid url")
|
2018-11-29 22:01:01 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-14 08:47:18 -05:00
|
|
|
pub fn resolve_redirect_from_response(
|
|
|
|
request_url: &Url,
|
|
|
|
response: &Response,
|
|
|
|
) -> Result<Url, AnyError> {
|
|
|
|
debug_assert!(response.status().is_redirection());
|
|
|
|
if let Some(location) = response.headers().get(LOCATION) {
|
|
|
|
let location_string = location.to_str().unwrap();
|
|
|
|
log::debug!("Redirecting to {:?}...", &location_string);
|
|
|
|
let new_url = resolve_url_from_location(request_url, location_string);
|
|
|
|
Ok(new_url)
|
|
|
|
} else {
|
|
|
|
Err(generic_error(format!(
|
|
|
|
"Redirection from '{}' did not provide location header",
|
|
|
|
request_url
|
|
|
|
)))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-16 07:34:16 -04:00
|
|
|
// TODO(ry) HTTP headers are not unique key, value pairs. There may be more than
|
|
|
|
// one header line with the same key. This should be changed to something like
|
|
|
|
// Vec<(String, String)>
|
|
|
|
pub type HeadersMap = HashMap<String, String>;
|
2020-01-26 13:59:41 -05:00
|
|
|
|
2021-12-09 06:16:17 -05:00
|
|
|
/// A structure used to determine if a entity in the http cache can be used.
|
|
|
|
///
|
|
|
|
/// This is heavily influenced by
|
2022-07-30 06:20:29 -04:00
|
|
|
/// <https://github.com/kornelski/rusty-http-cache-semantics> which is BSD
|
2021-12-09 06:16:17 -05:00
|
|
|
/// 2-Clause Licensed and copyright Kornel Lesiński
|
2022-03-23 09:54:22 -04:00
|
|
|
pub struct CacheSemantics {
|
2021-12-09 06:16:17 -05:00
|
|
|
cache_control: CacheControl,
|
|
|
|
cached: SystemTime,
|
|
|
|
headers: HashMap<String, String>,
|
|
|
|
now: SystemTime,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl CacheSemantics {
|
|
|
|
pub fn new(
|
|
|
|
headers: HashMap<String, String>,
|
|
|
|
cached: SystemTime,
|
|
|
|
now: SystemTime,
|
|
|
|
) -> Self {
|
|
|
|
let cache_control = headers
|
|
|
|
.get("cache-control")
|
|
|
|
.map(|v| CacheControl::from_value(v).unwrap_or_default())
|
|
|
|
.unwrap_or_default();
|
|
|
|
Self {
|
|
|
|
cache_control,
|
|
|
|
cached,
|
|
|
|
headers,
|
|
|
|
now,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn age(&self) -> Duration {
|
|
|
|
let mut age = self.age_header_value();
|
|
|
|
|
|
|
|
if let Ok(resident_time) = self.now.duration_since(self.cached) {
|
|
|
|
age += resident_time;
|
|
|
|
}
|
|
|
|
|
|
|
|
age
|
|
|
|
}
|
|
|
|
|
|
|
|
fn age_header_value(&self) -> Duration {
|
|
|
|
Duration::from_secs(
|
|
|
|
self
|
|
|
|
.headers
|
|
|
|
.get("age")
|
|
|
|
.and_then(|v| v.parse().ok())
|
|
|
|
.unwrap_or(0),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_stale(&self) -> bool {
|
|
|
|
self.max_age() <= self.age()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn max_age(&self) -> Duration {
|
|
|
|
if self.cache_control.cachability == Some(Cachability::NoCache) {
|
|
|
|
return Duration::from_secs(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.headers.get("vary").map(|s| s.trim()) == Some("*") {
|
|
|
|
return Duration::from_secs(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(max_age) = self.cache_control.max_age {
|
|
|
|
return max_age;
|
|
|
|
}
|
|
|
|
|
|
|
|
let default_min_ttl = Duration::from_secs(0);
|
|
|
|
|
|
|
|
let server_date = self.raw_server_date();
|
|
|
|
if let Some(expires) = self.headers.get("expires") {
|
|
|
|
return match DateTime::parse_from_rfc2822(expires) {
|
|
|
|
Err(_) => Duration::from_secs(0),
|
|
|
|
Ok(expires) => {
|
|
|
|
let expires = SystemTime::UNIX_EPOCH
|
|
|
|
+ Duration::from_secs(expires.timestamp().max(0) as _);
|
|
|
|
return default_min_ttl
|
|
|
|
.max(expires.duration_since(server_date).unwrap_or_default());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(last_modified) = self.headers.get("last-modified") {
|
|
|
|
if let Ok(last_modified) = DateTime::parse_from_rfc2822(last_modified) {
|
|
|
|
let last_modified = SystemTime::UNIX_EPOCH
|
|
|
|
+ Duration::from_secs(last_modified.timestamp().max(0) as _);
|
|
|
|
if let Ok(diff) = server_date.duration_since(last_modified) {
|
|
|
|
let secs_left = diff.as_secs() as f64 * 0.1;
|
|
|
|
return default_min_ttl.max(Duration::from_secs(secs_left as _));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
default_min_ttl
|
|
|
|
}
|
|
|
|
|
|
|
|
fn raw_server_date(&self) -> SystemTime {
|
|
|
|
self
|
|
|
|
.headers
|
|
|
|
.get("date")
|
|
|
|
.and_then(|d| DateTime::parse_from_rfc2822(d).ok())
|
|
|
|
.and_then(|d| {
|
|
|
|
SystemTime::UNIX_EPOCH
|
|
|
|
.checked_add(Duration::from_secs(d.timestamp() as _))
|
|
|
|
})
|
|
|
|
.unwrap_or(self.cached)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if the cached value is "fresh" respecting cached headers,
|
|
|
|
/// otherwise returns false.
|
|
|
|
pub fn should_use(&self) -> bool {
|
|
|
|
if self.cache_control.cachability == Some(Cachability::NoCache) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(max_age) = self.cache_control.max_age {
|
|
|
|
if self.age() > max_age {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(min_fresh) = self.cache_control.min_fresh {
|
|
|
|
if self.time_to_live() < min_fresh {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.is_stale() {
|
|
|
|
let has_max_stale = self.cache_control.max_stale.is_some();
|
|
|
|
let allows_stale = has_max_stale
|
|
|
|
&& self
|
|
|
|
.cache_control
|
|
|
|
.max_stale
|
|
|
|
.map_or(true, |val| val > self.age() - self.max_age());
|
|
|
|
if !allows_stale {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
true
|
|
|
|
}
|
|
|
|
|
|
|
|
fn time_to_live(&self) -> Duration {
|
|
|
|
self.max_age().checked_sub(self.age()).unwrap_or_default()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-18 17:28:14 -05:00
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub struct HttpClient(reqwest::Client);
|
2019-04-25 13:29:21 -04:00
|
|
|
|
2022-11-18 17:28:14 -05:00
|
|
|
impl HttpClient {
|
|
|
|
pub fn new(
|
|
|
|
root_cert_store: Option<RootCertStore>,
|
|
|
|
unsafely_ignore_certificate_errors: Option<Vec<String>>,
|
|
|
|
) -> Result<Self, AnyError> {
|
|
|
|
Ok(HttpClient::from_client(create_http_client(
|
|
|
|
get_user_agent(),
|
|
|
|
root_cert_store,
|
|
|
|
vec![],
|
|
|
|
None,
|
|
|
|
unsafely_ignore_certificate_errors,
|
|
|
|
None,
|
|
|
|
)?))
|
2021-02-15 21:50:27 -05:00
|
|
|
}
|
2022-11-18 17:28:14 -05:00
|
|
|
|
|
|
|
pub fn from_client(client: reqwest::Client) -> Self {
|
|
|
|
Self(client)
|
2021-12-20 21:40:22 -05:00
|
|
|
}
|
2020-01-11 05:11:05 -05:00
|
|
|
|
2022-12-14 08:47:18 -05:00
|
|
|
/// Do a GET request without following redirects.
|
|
|
|
pub fn get_no_redirect<U: reqwest::IntoUrl>(
|
|
|
|
&self,
|
|
|
|
url: U,
|
|
|
|
) -> reqwest::RequestBuilder {
|
2022-11-18 17:28:14 -05:00
|
|
|
self.0.get(url)
|
2020-09-04 06:43:20 -04:00
|
|
|
}
|
2019-12-30 08:57:17 -05:00
|
|
|
|
2022-12-14 08:47:18 -05:00
|
|
|
pub async fn download_text<U: reqwest::IntoUrl>(
|
|
|
|
&self,
|
|
|
|
url: U,
|
|
|
|
) -> Result<String, AnyError> {
|
|
|
|
let bytes = self.download(url).await?;
|
|
|
|
Ok(String::from_utf8(bytes)?)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn download<U: reqwest::IntoUrl>(
|
|
|
|
&self,
|
|
|
|
url: U,
|
|
|
|
) -> Result<Vec<u8>, AnyError> {
|
|
|
|
let maybe_bytes = self.inner_download(url, None).await?;
|
|
|
|
match maybe_bytes {
|
|
|
|
Some(bytes) => Ok(bytes),
|
|
|
|
None => Err(custom_error("Http", "Not found.")),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-12 20:52:10 -05:00
|
|
|
pub async fn download_with_progress<U: reqwest::IntoUrl>(
|
|
|
|
&self,
|
|
|
|
url: U,
|
|
|
|
progress_guard: &UpdateGuard,
|
|
|
|
) -> Result<Option<Vec<u8>>, AnyError> {
|
2022-12-14 08:47:18 -05:00
|
|
|
self.inner_download(url, Some(progress_guard)).await
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn inner_download<U: reqwest::IntoUrl>(
|
|
|
|
&self,
|
|
|
|
url: U,
|
|
|
|
progress_guard: Option<&UpdateGuard>,
|
|
|
|
) -> Result<Option<Vec<u8>>, AnyError> {
|
|
|
|
let response = self.get_redirected_response(url).await?;
|
2022-12-12 20:52:10 -05:00
|
|
|
|
|
|
|
if response.status() == 404 {
|
2022-12-14 08:47:18 -05:00
|
|
|
return Ok(None);
|
2022-12-12 20:52:10 -05:00
|
|
|
} else if !response.status().is_success() {
|
|
|
|
let status = response.status();
|
|
|
|
let maybe_response_text = response.text().await.ok();
|
|
|
|
bail!(
|
|
|
|
"Bad response: {:?}{}",
|
|
|
|
status,
|
|
|
|
match maybe_response_text {
|
|
|
|
Some(text) => format!("\n\n{}", text),
|
|
|
|
None => String::new(),
|
|
|
|
}
|
|
|
|
);
|
2022-11-18 17:28:14 -05:00
|
|
|
}
|
|
|
|
|
2022-12-19 14:31:19 -05:00
|
|
|
get_response_body_with_progress(response, progress_guard)
|
|
|
|
.await
|
|
|
|
.map(Some)
|
2022-12-14 08:47:18 -05:00
|
|
|
}
|
2022-11-18 17:28:14 -05:00
|
|
|
|
2022-12-14 08:47:18 -05:00
|
|
|
async fn get_redirected_response<U: reqwest::IntoUrl>(
|
|
|
|
&self,
|
|
|
|
url: U,
|
|
|
|
) -> Result<Response, AnyError> {
|
|
|
|
let mut url = url.into_url()?;
|
|
|
|
let mut response = self.get_no_redirect(url.clone()).send().await?;
|
|
|
|
let status = response.status();
|
|
|
|
if status.is_redirection() {
|
|
|
|
for _ in 0..5 {
|
|
|
|
let new_url = resolve_redirect_from_response(&url, &response)?;
|
|
|
|
let new_response = self.get_no_redirect(new_url.clone()).send().await?;
|
|
|
|
let status = new_response.status();
|
|
|
|
if status.is_redirection() {
|
|
|
|
response = new_response;
|
|
|
|
url = new_url;
|
|
|
|
} else {
|
|
|
|
return Ok(new_response);
|
|
|
|
}
|
2022-11-18 17:28:14 -05:00
|
|
|
}
|
2022-12-14 08:47:18 -05:00
|
|
|
Err(custom_error("Http", "Too many redirects."))
|
|
|
|
} else {
|
|
|
|
Ok(response)
|
2022-11-18 17:28:14 -05:00
|
|
|
}
|
|
|
|
}
|
2019-04-01 21:46:40 -04:00
|
|
|
}
|
|
|
|
|
2022-12-19 14:31:19 -05:00
|
|
|
pub async fn get_response_body_with_progress(
|
|
|
|
response: reqwest::Response,
|
|
|
|
progress_guard: Option<&UpdateGuard>,
|
|
|
|
) -> Result<Vec<u8>, AnyError> {
|
|
|
|
if let Some(progress_guard) = progress_guard {
|
|
|
|
if let Some(total_size) = response.content_length() {
|
|
|
|
progress_guard.set_total_size(total_size);
|
|
|
|
let mut current_size = 0;
|
|
|
|
let mut data = Vec::with_capacity(total_size as usize);
|
|
|
|
let mut stream = response.bytes_stream();
|
|
|
|
while let Some(item) = stream.next().await {
|
|
|
|
let bytes = item?;
|
|
|
|
current_size += bytes.len() as u64;
|
|
|
|
progress_guard.set_position(current_size);
|
|
|
|
data.extend(bytes.into_iter());
|
|
|
|
}
|
|
|
|
return Ok(data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let bytes = response.bytes().await?;
|
|
|
|
Ok(bytes.into())
|
|
|
|
}
|
|
|
|
|
2019-04-01 21:46:40 -04:00
|
|
|
#[cfg(test)]
|
2022-12-14 08:47:18 -05:00
|
|
|
mod test {
|
2019-04-25 13:29:21 -04:00
|
|
|
use super::*;
|
2020-12-11 18:36:18 -05:00
|
|
|
|
2020-02-03 08:53:50 -05:00
|
|
|
#[tokio::test]
|
2022-12-14 08:47:18 -05:00
|
|
|
async fn test_http_client_download_redirect() {
|
2020-08-10 17:31:05 -04:00
|
|
|
let _http_server_guard = test_util::http_server();
|
2022-12-14 08:47:18 -05:00
|
|
|
let client = HttpClient::new(None, None).unwrap();
|
2020-01-11 05:11:05 -05:00
|
|
|
|
2022-12-14 08:47:18 -05:00
|
|
|
// make a request to the redirect server
|
|
|
|
let text = client
|
|
|
|
.download_text("http://localhost:4546/subdir/redirects/redirect1.js")
|
|
|
|
.await
|
2021-08-11 10:20:47 -04:00
|
|
|
.unwrap();
|
2022-12-14 08:47:18 -05:00
|
|
|
assert_eq!(text, "export const redirect = 1;\n");
|
2020-01-11 05:11:05 -05:00
|
|
|
|
2022-12-14 08:47:18 -05:00
|
|
|
// now make one to the infinite redirects server
|
|
|
|
let err = client
|
|
|
|
.download_text("http://localhost:4549/subdir/redirects/redirect1.js")
|
|
|
|
.await
|
|
|
|
.err()
|
2021-08-11 10:20:47 -04:00
|
|
|
.unwrap();
|
2022-12-14 08:47:18 -05:00
|
|
|
assert_eq!(err.to_string(), "Too many redirects.");
|
2019-04-25 13:29:21 -04:00
|
|
|
}
|
2018-11-29 22:01:01 -05:00
|
|
|
|
2019-04-25 13:29:21 -04:00
|
|
|
#[test]
|
|
|
|
fn test_resolve_url_from_location_full_1() {
|
|
|
|
let url = "http://deno.land".parse::<Url>().unwrap();
|
|
|
|
let new_uri = resolve_url_from_location(&url, "http://golang.org");
|
|
|
|
assert_eq!(new_uri.host_str().unwrap(), "golang.org");
|
|
|
|
}
|
2018-11-29 22:01:01 -05:00
|
|
|
|
2019-04-25 13:29:21 -04:00
|
|
|
#[test]
|
|
|
|
fn test_resolve_url_from_location_full_2() {
|
|
|
|
let url = "https://deno.land".parse::<Url>().unwrap();
|
|
|
|
let new_uri = resolve_url_from_location(&url, "https://golang.org");
|
|
|
|
assert_eq!(new_uri.host_str().unwrap(), "golang.org");
|
|
|
|
}
|
2018-11-29 22:01:01 -05:00
|
|
|
|
2019-04-25 13:29:21 -04:00
|
|
|
#[test]
|
|
|
|
fn test_resolve_url_from_location_relative_1() {
|
|
|
|
let url = "http://deno.land/x".parse::<Url>().unwrap();
|
|
|
|
let new_uri = resolve_url_from_location(&url, "//rust-lang.org/en-US");
|
|
|
|
assert_eq!(new_uri.host_str().unwrap(), "rust-lang.org");
|
|
|
|
assert_eq!(new_uri.path(), "/en-US");
|
|
|
|
}
|
2018-11-29 22:01:01 -05:00
|
|
|
|
2019-04-25 13:29:21 -04:00
|
|
|
#[test]
|
|
|
|
fn test_resolve_url_from_location_relative_2() {
|
|
|
|
let url = "http://deno.land/x".parse::<Url>().unwrap();
|
|
|
|
let new_uri = resolve_url_from_location(&url, "/y");
|
|
|
|
assert_eq!(new_uri.host_str().unwrap(), "deno.land");
|
|
|
|
assert_eq!(new_uri.path(), "/y");
|
|
|
|
}
|
2018-11-29 22:01:01 -05:00
|
|
|
|
2019-04-25 13:29:21 -04:00
|
|
|
#[test]
|
|
|
|
fn test_resolve_url_from_location_relative_3() {
|
|
|
|
let url = "http://deno.land/x".parse::<Url>().unwrap();
|
|
|
|
let new_uri = resolve_url_from_location(&url, "z");
|
|
|
|
assert_eq!(new_uri.host_str().unwrap(), "deno.land");
|
|
|
|
assert_eq!(new_uri.path(), "/z");
|
|
|
|
}
|
2018-11-29 22:01:01 -05:00
|
|
|
}
|