use std::collections::HashMap;
use std::collections::HashSet;
use std::io::ErrorKind;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::SystemTime;
use deno_media_type::MediaType;
use indexmap::IndexMap;
use once_cell::sync::Lazy;
use parking_lot::RwLock;
use url::Url;
use crate::cache::CacheReadFileError;
use crate::cache::GlobalToLocalCopy;
use super::common::base_url_to_filename_parts;
use super::common::checksum;
use super::common::HeadersMap;
use super::global::GlobalHttpCache;
use super::Checksum;
use super::DenoCacheEnv;
use super::HttpCache;
use super::HttpCacheItemKey;
#[derive(Debug)]
pub struct LocalLspHttpCache<Env: DenoCacheEnv> {
cache: LocalHttpCache<Env>,
}
impl<Env: DenoCacheEnv> LocalLspHttpCache<Env> {
pub fn new(path: PathBuf, global_cache: Arc<GlobalHttpCache<Env>>) -> Self {
#[cfg(not(feature = "wasm"))]
assert!(path.is_absolute());
let manifest = LocalCacheManifest::new_for_lsp(
path.join("manifest.json"),
global_cache.env.clone(),
);
Self {
cache: LocalHttpCache {
path,
manifest,
global_cache,
},
}
}
#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
pub fn get_file_url(&self, url: &Url) -> Option<Url> {
let sub_path = {
let data = self.cache.manifest.data.read();
let maybe_content_type =
data.get(url).and_then(|d| d.content_type_header());
url_to_local_sub_path(url, maybe_content_type).ok()?
};
let path = sub_path.as_path_from_root(&self.cache.path);
if self.cache.fs().is_file(&path) {
Url::from_file_path(path).ok()
} else {
None
}
}
pub fn get_remote_url(&self, path: &Path) -> Option<Url> {
let Ok(path) = path.strip_prefix(&self.cache.path) else {
return None; };
let components = path
.components()
.map(|c| c.as_os_str().to_string_lossy())
.collect::<Vec<_>>();
if components
.last()
.map(|c| c.starts_with('#'))
.unwrap_or(false)
{
let data = self.cache.manifest.data.read();
data.get_reverse_mapping(path)
} else if let Some(last_index) =
components.iter().rposition(|c| c.starts_with('#'))
{
let dir_path: PathBuf = components[..last_index + 1].iter().fold(
PathBuf::new(),
|mut path, c| {
path.push(c.as_ref());
path
},
);
let dir_url = self
.cache
.manifest
.data
.read()
.get_reverse_mapping(&dir_path)?;
let file_url =
dir_url.join(&components[last_index + 1..].join("/")).ok()?;
Some(file_url)
} else {
let mut parts = Vec::new();
for (i, part) in path.components().enumerate() {
let part = part.as_os_str().to_string_lossy();
if i == 0 {
let mut result = String::new();
let part = if let Some(part) = part.strip_prefix("http_") {
result.push_str("http://");
part
} else {
result.push_str("https://");
&part
};
if let Some((domain, port)) = part.rsplit_once('_') {
result.push_str(&format!("{}:{}", domain, port));
} else {
result.push_str(part);
}
parts.push(result);
} else {
parts.push(part.to_string());
}
}
Url::parse(&parts.join("/")).ok()
}
}
}
impl<Env: DenoCacheEnv> HttpCache for LocalLspHttpCache<Env> {
fn cache_item_key<'a>(
&self,
url: &'a Url,
) -> std::io::Result<HttpCacheItemKey<'a>> {
self.cache.cache_item_key(url)
}
fn contains(&self, url: &Url) -> bool {
self.cache.contains(url)
}
fn set(
&self,
url: &Url,
headers: HeadersMap,
content: &[u8],
) -> std::io::Result<()> {
self.cache.set(url, headers, content)
}
fn read_modified_time(
&self,
key: &HttpCacheItemKey,
) -> std::io::Result<Option<SystemTime>> {
self.cache.read_modified_time(key)
}
fn read_file_bytes(
&self,
key: &HttpCacheItemKey,
maybe_checksum: Option<Checksum>,
allow_global_to_local: GlobalToLocalCopy,
) -> Result<Option<Vec<u8>>, CacheReadFileError> {
self
.cache
.read_file_bytes(key, maybe_checksum, allow_global_to_local)
}
fn read_headers(
&self,
key: &HttpCacheItemKey,
) -> std::io::Result<Option<HeadersMap>> {
self.cache.read_headers(key)
}
fn read_download_time(
&self,
key: &HttpCacheItemKey,
) -> std::io::Result<Option<SystemTime>> {
self.cache.read_modified_time(key)
}
}
#[derive(Debug)]
pub struct LocalHttpCache<Env: DenoCacheEnv> {
path: PathBuf,
manifest: LocalCacheManifest<Env>,
global_cache: Arc<GlobalHttpCache<Env>>,
}
impl<Env: DenoCacheEnv> LocalHttpCache<Env> {
pub fn new(path: PathBuf, global_cache: Arc<GlobalHttpCache<Env>>) -> Self {
#[cfg(not(feature = "wasm"))]
assert!(path.is_absolute());
let manifest = LocalCacheManifest::new(
path.join("manifest.json"),
global_cache.env.clone(),
);
Self {
path,
manifest,
global_cache,
}
}
#[inline]
fn fs(&self) -> &Env {
&self.global_cache.env
}
fn get_url_headers(&self, url: &Url) -> std::io::Result<Option<HeadersMap>> {
if let Some(metadata) = self.manifest.get_stored_headers(url) {
return Ok(Some(metadata));
}
let global_key = self.global_cache.cache_item_key(url)?;
let Some(headers) = self.global_cache.read_headers(&global_key)? else {
let local_path = url_to_local_sub_path(url, None)?;
if self.fs().is_file(&local_path.as_path_from_root(&self.path)) {
return Ok(Some(Default::default()));
} else {
return Ok(None);
}
};
let local_path =
url_to_local_sub_path(url, headers_content_type(&headers))?;
self.manifest.insert_data(local_path, url.clone(), headers);
Ok(Some(self.manifest.get_stored_headers(url).unwrap_or_else(
|| {
Default::default()
},
)))
}
}
impl<Env: DenoCacheEnv> HttpCache for LocalHttpCache<Env> {
fn cache_item_key<'a>(
&self,
url: &'a Url,
) -> std::io::Result<HttpCacheItemKey<'a>> {
Ok(HttpCacheItemKey {
#[cfg(debug_assertions)]
is_local_key: true,
url,
file_path: None, })
}
fn contains(&self, url: &Url) -> bool {
self
.get_url_headers(url)
.ok()
.map(|d| d.is_some())
.unwrap_or(false)
}
fn read_modified_time(
&self,
key: &HttpCacheItemKey,
) -> std::io::Result<Option<SystemTime>> {
#[cfg(debug_assertions)]
debug_assert!(key.is_local_key);
if let Some(headers) = self.get_url_headers(key.url)? {
let local_path =
url_to_local_sub_path(key.url, headers_content_type(&headers))?;
if let Ok(Some(modified_time)) = self
.fs()
.modified(&local_path.as_path_from_root(&self.path))
{
return Ok(Some(modified_time));
}
}
let global_key = self.global_cache.cache_item_key(key.url)?;
self.global_cache.read_modified_time(&global_key)
}
fn set(
&self,
url: &Url,
headers: HeadersMap,
content: &[u8],
) -> std::io::Result<()> {
let is_redirect = headers.contains_key("location");
let sub_path = url_to_local_sub_path(url, headers_content_type(&headers))?;
if !is_redirect {
self
.fs()
.atomic_write_file(&sub_path.as_path_from_root(&self.path), content)?;
}
self.manifest.insert_data(sub_path, url.clone(), headers);
Ok(())
}
fn read_file_bytes(
&self,
key: &HttpCacheItemKey,
maybe_checksum: Option<Checksum>,
allow_global_to_local: GlobalToLocalCopy,
) -> Result<Option<Vec<u8>>, CacheReadFileError> {
#[cfg(debug_assertions)]
debug_assert!(key.is_local_key);
let maybe_headers = self.get_url_headers(key.url)?;
match maybe_headers {
Some(headers) => {
let is_redirect = headers.contains_key("location");
if is_redirect {
Ok(Some(Vec::new()))
} else {
let local_file_path =
url_to_local_sub_path(key.url, headers_content_type(&headers))?
.as_path_from_root(&self.path);
let maybe_file_bytes = self.fs().read_file_bytes(&local_file_path)?;
match maybe_file_bytes {
Some(bytes) => Ok(Some(bytes)),
None => {
if allow_global_to_local.is_true() {
let global_key = self.global_cache.cache_item_key(key.url)?;
let maybe_file_bytes = self.global_cache.read_file_bytes(
&global_key,
maybe_checksum,
allow_global_to_local,
)?;
if let Some(bytes) = &maybe_file_bytes {
self.fs().atomic_write_file(&local_file_path, bytes)?;
}
Ok(maybe_file_bytes)
} else {
Ok(None)
}
}
}
}
}
None => Ok(None),
}
}
fn read_headers(
&self,
key: &HttpCacheItemKey,
) -> std::io::Result<Option<HeadersMap>> {
#[cfg(debug_assertions)]
debug_assert!(key.is_local_key);
self.get_url_headers(key.url)
}
fn read_download_time(
&self,
key: &HttpCacheItemKey,
) -> std::io::Result<Option<SystemTime>> {
self.read_modified_time(key)
}
}
pub(super) struct LocalCacheSubPath {
pub has_hash: bool,
pub parts: Vec<String>,
}
impl LocalCacheSubPath {
pub fn as_path_from_root(&self, root_path: &Path) -> PathBuf {
let mut path = root_path.to_path_buf();
for part in &self.parts {
path.push(part);
}
path
}
pub fn as_relative_path(&self) -> PathBuf {
let mut path = PathBuf::with_capacity(self.parts.len());
for part in &self.parts {
path.push(part);
}
path
}
}
fn headers_content_type(headers: &HeadersMap) -> Option<&str> {
headers.get("content-type").map(|s| s.as_str())
}
fn url_to_local_sub_path(
url: &Url,
content_type: Option<&str>,
) -> std::io::Result<LocalCacheSubPath> {
static FORBIDDEN_CHARS: Lazy<HashSet<char>> = Lazy::new(|| {
HashSet::from(['?', '<', '>', ':', '*', '|', '\\', ':', '"', '\'', '/'])
});
static FORBIDDEN_WINDOWS_NAMES: Lazy<HashSet<&'static str>> =
Lazy::new(|| {
let set = HashSet::from([
"con", "prn", "aux", "nul", "com0", "com1", "com2", "com3", "com4",
"com5", "com6", "com7", "com8", "com9", "lpt0", "lpt1", "lpt2", "lpt3",
"lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9",
]);
debug_assert!(set.iter().all(|s| s.to_lowercase() == *s));
set
});
fn has_forbidden_chars(segment: &str) -> bool {
segment.chars().any(|c| {
let is_uppercase = c.is_ascii_alphabetic() && !c.is_ascii_lowercase();
FORBIDDEN_CHARS.contains(&c)
|| is_uppercase
})
}
fn has_known_extension(path: &str) -> bool {
let path = path.to_lowercase();
path.ends_with(".js")
|| path.ends_with(".ts")
|| path.ends_with(".jsx")
|| path.ends_with(".tsx")
|| path.ends_with(".mts")
|| path.ends_with(".mjs")
|| path.ends_with(".json")
|| path.ends_with(".wasm")
}
fn get_extension(url: &Url, content_type: Option<&str>) -> &'static str {
MediaType::from_specifier_and_content_type(url, content_type)
.as_ts_extension()
}
fn short_hash(data: &str, last_ext: Option<&str>) -> String {
let hash = checksum(data.as_bytes());
const MAX_LENGTH: usize = 20;
let mut sub = String::with_capacity(MAX_LENGTH);
for c in data.chars().take(MAX_LENGTH) {
if c == '?' {
break;
}
if FORBIDDEN_CHARS.contains(&c) {
sub.push('_');
} else {
sub.extend(c.to_lowercase());
}
}
let sub = match last_ext {
Some(ext) => sub.strip_suffix(ext).unwrap_or(&sub),
None => &sub,
};
let ext = last_ext.unwrap_or("");
if sub.is_empty() {
format!("#{}{}", &hash[..7], ext)
} else {
format!("#{}_{}{}", &sub, &hash[..5], ext)
}
}
fn should_hash_part(part: &str, last_ext: Option<&str>) -> bool {
if part.is_empty() || part.len() > 30 {
return true;
}
let hash_context_specific = if let Some(last_ext) = last_ext {
!has_known_extension(part) || !part.ends_with(last_ext)
} else {
has_known_extension(part)
};
hash_context_specific
|| part.starts_with('#')
|| has_forbidden_chars(part)
|| last_ext.is_none() && FORBIDDEN_WINDOWS_NAMES.contains(part)
|| part.ends_with('.')
}
let port_separator = "_"; let Some(mut base_parts) = base_url_to_filename_parts(url, port_separator)
else {
return Err(std::io::Error::new(
ErrorKind::InvalidInput,
format!("Can't convert url (\"{}\") to filename.", url),
));
};
if base_parts[0] == "https" {
base_parts.remove(0);
} else {
let scheme = base_parts.remove(0);
base_parts[0] = format!("{}_{}", scheme, base_parts[0]);
}
let path_segments = url_path_segments(url);
let mut parts = base_parts
.into_iter()
.chain(path_segments.map(|s| s.to_string()))
.collect::<Vec<_>>();
if let Some(query) = url.query() {
let last_part = parts.last_mut().unwrap();
last_part.push('?');
last_part.push_str(query);
}
let mut has_hash = false;
let parts_len = parts.len();
let parts = parts
.into_iter()
.enumerate()
.map(|(i, part)| {
let is_last = i == parts_len - 1;
let last_ext = if is_last {
Some(get_extension(url, content_type))
} else {
None
};
if should_hash_part(&part, last_ext) {
has_hash = true;
short_hash(&part, last_ext)
} else {
part
}
})
.collect::<Vec<_>>();
Ok(LocalCacheSubPath { has_hash, parts })
}
#[derive(Debug)]
struct LocalCacheManifest<Env: DenoCacheEnv> {
env: Env,
file_path: PathBuf,
data: RwLock<manifest::LocalCacheManifestData>,
}
impl<Env: DenoCacheEnv> LocalCacheManifest<Env> {
pub fn new(file_path: PathBuf, env: Env) -> Self {
Self::new_internal(file_path, false, env)
}
pub fn new_for_lsp(file_path: PathBuf, env: Env) -> Self {
Self::new_internal(file_path, true, env)
}
fn new_internal(
file_path: PathBuf,
use_reverse_mapping: bool,
env: Env,
) -> Self {
let text = env
.read_file_bytes(&file_path)
.ok()
.flatten()
.and_then(|bytes| String::from_utf8(bytes).ok());
Self {
env,
data: RwLock::new(manifest::LocalCacheManifestData::new(
text.as_deref(),
use_reverse_mapping,
)),
file_path,
}
}
pub fn insert_data(
&self,
sub_path: LocalCacheSubPath,
url: Url,
mut original_headers: HashMap<String, String>,
) {
fn should_keep_content_type_header(
url: &Url,
headers: &HashMap<String, String>,
) -> bool {
MediaType::from_specifier(url)
!= MediaType::from_specifier_and_headers(url, Some(headers))
}
let mut headers_subset = IndexMap::new();
const HEADER_KEYS_TO_KEEP: [&str; 4] = [
"content-type",
"location",
"x-deno-warning",
"x-typescript-types",
];
for key in HEADER_KEYS_TO_KEEP {
if key == "content-type"
&& !should_keep_content_type_header(&url, &original_headers)
{
continue;
}
if let Some((k, v)) = original_headers.remove_entry(key) {
headers_subset.insert(k, v);
}
}
let mut data = self.data.write();
let add_module_entry = headers_subset.is_empty()
&& !sub_path
.parts
.last()
.map(|s| s.starts_with('#'))
.unwrap_or(false);
let mut has_changed = if add_module_entry {
data.remove(&url, &sub_path)
} else {
let new_data = manifest::SerializedLocalCacheManifestDataModule {
headers: headers_subset,
};
if data.get(&url) == Some(&new_data) {
false
} else {
data.insert(url.clone(), &sub_path, new_data);
true
}
};
if sub_path.has_hash {
let url_path_parts = url_path_segments(&url).collect::<Vec<_>>();
let base_url = {
let mut url = url.clone();
url.set_path("/");
url.set_query(None);
url.set_fragment(None);
url
};
for (i, local_part) in sub_path.parts[1..sub_path.parts.len() - 1]
.iter()
.enumerate()
{
if local_part.starts_with('#') {
let mut url = base_url.clone();
url.set_path(&format!("{}/", url_path_parts[..i + 1].join("/")));
if data.add_directory(url, sub_path.parts[..i + 2].join("/")) {
has_changed = true;
}
}
}
}
if has_changed {
let result = self
.env
.atomic_write_file(&self.file_path, data.as_json().as_bytes());
if let Err(err) = result {
log::debug!("Failed saving local cache manifest: {:#}", err);
}
}
}
pub fn get_stored_headers(&self, url: &Url) -> Option<HeadersMap> {
let data = self.data.read();
data.get(url).map(|module| {
module
.headers
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect::<HashMap<_, _>>()
})
}
}
mod manifest {
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
use indexmap::IndexMap;
use serde::Deserialize;
use serde::Serialize;
use url::Url;
use super::url_to_local_sub_path;
use super::LocalCacheSubPath;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct SerializedLocalCacheManifestDataModule {
#[serde(
default = "IndexMap::new",
skip_serializing_if = "IndexMap::is_empty"
)]
pub headers: IndexMap<String, String>,
}
impl SerializedLocalCacheManifestDataModule {
pub fn content_type_header(&self) -> Option<&str> {
self.headers.get("content-type").map(|s| s.as_str())
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
struct SerializedLocalCacheManifestData {
#[serde(
default = "IndexMap::new",
skip_serializing_if = "IndexMap::is_empty"
)]
pub folders: IndexMap<Url, String>,
#[serde(
default = "IndexMap::new",
skip_serializing_if = "IndexMap::is_empty"
)]
pub modules: IndexMap<Url, SerializedLocalCacheManifestDataModule>,
}
#[derive(Debug, Default, Clone)]
pub(super) struct LocalCacheManifestData {
serialized: SerializedLocalCacheManifestData,
reverse_mapping: Option<HashMap<PathBuf, Url>>,
}
impl LocalCacheManifestData {
pub fn new(maybe_text: Option<&str>, use_reverse_mapping: bool) -> Self {
let serialized: SerializedLocalCacheManifestData = maybe_text
.and_then(|text| match serde_json::from_str(text) {
Ok(data) => Some(data),
Err(err) => {
log::debug!("Failed deserializing local cache manifest: {:#}", err);
None
}
})
.unwrap_or_default();
let reverse_mapping = if use_reverse_mapping {
Some(
serialized
.modules
.iter()
.filter_map(|(url, module)| {
if module.headers.contains_key("location") {
return None;
}
url_to_local_sub_path(url, module.content_type_header())
.ok()
.map(|local_path| {
let path = if cfg!(windows) {
PathBuf::from(local_path.parts.join("\\"))
} else {
PathBuf::from(local_path.parts.join("/"))
};
(path, url.clone())
})
})
.chain(serialized.folders.iter().map(|(url, local_path)| {
let path = if cfg!(windows) {
PathBuf::from(local_path.replace('/', "\\"))
} else {
PathBuf::from(local_path)
};
(path, url.clone())
}))
.collect::<HashMap<_, _>>(),
)
} else {
None
};
Self {
serialized,
reverse_mapping,
}
}
pub fn get(
&self,
url: &Url,
) -> Option<&SerializedLocalCacheManifestDataModule> {
self.serialized.modules.get(url)
}
pub fn get_reverse_mapping(&self, path: &Path) -> Option<Url> {
debug_assert!(self.reverse_mapping.is_some()); self
.reverse_mapping
.as_ref()
.and_then(|mapping| mapping.get(path))
.cloned()
}
pub fn add_directory(&mut self, url: Url, local_path: String) -> bool {
if let Some(current) = self.serialized.folders.get(&url) {
if *current == local_path {
return false;
}
}
if let Some(reverse_mapping) = &mut self.reverse_mapping {
reverse_mapping.insert(
if cfg!(windows) {
PathBuf::from(local_path.replace('/', "\\"))
} else {
PathBuf::from(&local_path)
},
url.clone(),
);
}
self.serialized.folders.insert(url, local_path);
true
}
pub fn insert(
&mut self,
url: Url,
sub_path: &LocalCacheSubPath,
new_data: SerializedLocalCacheManifestDataModule,
) {
if let Some(reverse_mapping) = &mut self.reverse_mapping {
reverse_mapping.insert(sub_path.as_relative_path(), url.clone());
}
self.serialized.modules.insert(url, new_data);
}
pub fn remove(&mut self, url: &Url, sub_path: &LocalCacheSubPath) -> bool {
if self.serialized.modules.remove(url).is_some() {
if let Some(reverse_mapping) = &mut self.reverse_mapping {
reverse_mapping.remove(&sub_path.as_relative_path());
}
true
} else {
false
}
}
pub fn as_json(&self) -> String {
serde_json::to_string_pretty(&self.serialized).unwrap()
}
}
}
fn url_path_segments(url: &Url) -> impl Iterator<Item = &str> {
url
.path()
.strip_prefix('/')
.unwrap_or(url.path())
.split('/')
}
#[cfg(test)]
mod test {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_url_to_local_sub_path() {
run_test("https://deno.land/x/mod.ts", &[], "deno.land/x/mod.ts");
run_test(
"http://deno.land/x/mod.ts",
&[],
"http_deno.land/x/mod.ts",
);
run_test(
"https://deno.land/x/MOD.ts",
&[],
"deno.land/x/#mod_fa860.ts",
);
run_test(
"https://deno.land/x/mod.ts?testing=1",
&[],
"deno.land/x/#mod_2eb80.ts",
);
run_test(
"https://deno.land/OTHER/mod.ts",
&[],
"deno.land/#other_1c55d/mod.ts",
);
run_test(
"https://deno.land/x/012345678901234567890123456.js",
&[],
"deno.land/x/012345678901234567890123456.js",
);
run_test(
"https://deno.land/x/0123456789012345678901234567.js",
&[],
"deno.land/x/#01234567890123456789_836de.js",
);
run_test(
"https://deno.land/x/mod's.js",
&[],
"deno.land/x/#mod_s_44fc8.js",
);
run_test(
"https://deno.land/x/mod",
&[("content-type", "application/typescript")],
"deno.land/x/#mod_e55cf.ts",
);
run_test(
"https://deno.land/x/mod.js/mod.js",
&[],
"deno.land/x/#mod.js_59c58/mod.js",
);
run_test(
"http://localhost//mod.js",
&[],
"http_localhost/#e3b0c44/mod.js",
);
run_test(
"https://deno.land/x/mod.ts",
&[("content-type", "application/typescript")],
"deno.land/x/mod.ts",
);
run_test(
"https://deno.land/x/mod.ts",
&[("content-type", "application/javascript")],
"deno.land/x/#mod.ts_e8c36.js",
);
run_test(
"https://deno.land/x/con/con.ts",
&[],
"deno.land/x/#con_1143d/con.ts",
);
run_test(
"https://deno.land/x/test./main.ts",
&[],
"deno.land/x/#test._4ee3d/main.ts",
);
#[track_caller]
fn run_test(url: &str, headers: &[(&str, &str)], expected: &str) {
let url = Url::parse(url).unwrap();
let headers = headers
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
let result =
url_to_local_sub_path(&url, headers_content_type(&headers)).unwrap();
let parts = result.parts.join("/");
assert_eq!(parts, expected);
assert_eq!(
result.parts.iter().any(|p| p.starts_with('#')),
result.has_hash
)
}
}
}