#![deny(missing_docs)]
use std::ops::Bound;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::{Duration, Instant, SystemTime};
use headers::{
CacheControl, ContentRange, Expires, HeaderMapExt, LastModified, Pragma, Range, Vary,
};
use http::header::HeaderValue;
use http::{HeaderMap, Method, StatusCode, header};
use log::{debug, error};
use malloc_size_of::{MallocSizeOf, MallocSizeOfOps, MallocUnconditionalSizeOf};
use malloc_size_of_derive::MallocSizeOf;
use net_traits::http_status::HttpStatus;
use net_traits::request::Request;
use net_traits::response::{HttpsState, Response, ResponseBody};
use net_traits::{CacheEntryDescriptor, FetchMetadata, Metadata, ResourceFetchTiming};
use parking_lot::Mutex as ParkingLotMutex;
use quick_cache::sync::{Cache, DefaultLifecycle, PlaceholderGuard};
use quick_cache::{DefaultHashBuilder, UnitWeighter};
use servo_arc::Arc;
use servo_config::pref;
use servo_url::ServoUrl;
use tokio::sync::mpsc::{UnboundedSender as TokioSender, unbounded_channel as unbounded};
use tokio::sync::{OwnedRwLockWriteGuard, RwLock as TokioRwLock};
use crate::fetch::methods::{Data, DoneChannel};
#[derive(Clone, Eq, Hash, MallocSizeOf, PartialEq)]
pub struct CacheKey {
url: ServoUrl,
}
impl CacheKey {
pub fn new(request: &Request) -> CacheKey {
CacheKey {
url: request.current_url(),
}
}
pub fn from_url(url: ServoUrl) -> CacheKey {
CacheKey { url }
}
}
#[derive(Clone)]
pub struct CachedResource {
request_headers: Arc<ParkingLotMutex<HeaderMap>>,
body: Arc<ParkingLotMutex<ResponseBody>>,
aborted: Arc<AtomicBool>,
awaiting_body: Arc<ParkingLotMutex<Vec<TokioSender<Data>>>>,
metadata: CachedMetadata,
location_url: Option<Result<ServoUrl, String>>,
https_state: HttpsState,
status: HttpStatus,
url_list: Vec<ServoUrl>,
expires: Duration,
last_validated: Instant,
}
impl MallocSizeOf for CachedResource {
fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
self.body.unconditional_size_of(ops) +
self.aborted.unconditional_size_of(ops) +
self.awaiting_body.unconditional_size_of(ops) +
self.metadata.size_of(ops) +
self.location_url.size_of(ops) +
self.https_state.size_of(ops) +
self.status.size_of(ops) +
self.url_list.size_of(ops) +
self.expires.size_of(ops) +
self.last_validated.size_of(ops)
}
}
#[derive(Clone, MallocSizeOf)]
struct CachedMetadata {
#[ignore_malloc_size_of = "Defined in `http` and has private members"]
pub headers: Arc<ParkingLotMutex<HeaderMap>>,
pub final_url: ServoUrl,
pub content_type: Option<String>,
pub charset: Option<String>,
pub status: HttpStatus,
}
pub(crate) struct CachedResponse {
pub response: Response,
pub needs_validation: bool,
}
type CacheEntry = std::sync::Arc<TokioRwLock<Vec<CachedResource>>>;
type QuickCache = Cache<CacheKey, CacheEntry, UnitWeighter>;
type OurLifecycle = DefaultLifecycle<CacheKey, CacheEntry>;
type QuickCachePlaceeholderGuard<'a> =
PlaceholderGuard<'a, CacheKey, CacheEntry, UnitWeighter, DefaultHashBuilder, OurLifecycle>;
pub struct HttpCache {
entries: QuickCache,
}
impl MallocSizeOf for HttpCache {
fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
self.entries
.iter()
.map(|(_key, entry)| entry.blocking_read().size_of(ops))
.sum()
}
}
impl Default for HttpCache {
fn default() -> Self {
let size = pref!(network_http_cache_size)
.try_into()
.expect("http_cache_size needs to fit into u64");
Self {
entries: Cache::new(size),
}
}
}
fn is_cacheable_by_default(status_code: StatusCode) -> bool {
matches!(
status_code.as_u16(),
200 | 203 | 204 | 206 | 300 | 301 | 404 | 405 | 410 | 414 | 501
)
}
fn response_is_cacheable(metadata: &Metadata) -> bool {
let mut is_cacheable = false;
let headers = metadata.headers.as_ref().unwrap();
if headers.contains_key(header::EXPIRES) ||
headers.contains_key(header::LAST_MODIFIED) ||
headers.contains_key(header::ETAG)
{
is_cacheable = true;
}
if let Some(ref directive) = headers.typed_get::<CacheControl>() {
if directive.no_store() {
return false;
}
if directive.public() ||
directive.s_max_age().is_some() ||
directive.max_age().is_some() ||
directive.no_cache()
{
return true;
}
}
if let Some(pragma) = headers.typed_get::<Pragma>() {
if pragma.is_no_cache() {
return false;
}
}
is_cacheable
}
fn calculate_response_age(response: &Response) -> Duration {
response
.headers
.get(header::AGE)
.and_then(|age_header| age_header.to_str().ok())
.and_then(|age_string| age_string.parse::<u64>().ok())
.map(Duration::from_secs)
.unwrap_or_default()
}
fn get_response_expiry(response: &Response) -> Duration {
let age = calculate_response_age(response);
let now = SystemTime::now();
if let Some(directives) = response.headers.typed_get::<CacheControl>() {
if directives.no_cache() {
return Duration::ZERO;
}
if let Some(max_age) = directives.max_age().or(directives.s_max_age()) {
return max_age.saturating_sub(age);
}
}
match response.headers.typed_get::<Expires>() {
Some(expiry) => {
let expiry_time: SystemTime = expiry.into();
return expiry_time.duration_since(now).unwrap_or(Duration::ZERO);
},
None if response.headers.contains_key(header::EXPIRES) => return Duration::ZERO,
_ => {},
}
if let Some(ref code) = response.status.try_code() {
let max_heuristic = Duration::from_secs(24 * 60 * 60).saturating_sub(age);
let heuristic_freshness = if let Some(last_modified) =
response.headers.typed_get::<LastModified>()
{
let last_modified: SystemTime = last_modified.into();
let time_since_last_modified = now.duration_since(last_modified).unwrap_or_default();
let raw_heuristic_calc = time_since_last_modified / 10;
if raw_heuristic_calc < max_heuristic {
raw_heuristic_calc
} else {
max_heuristic
}
} else {
max_heuristic
};
if is_cacheable_by_default(*code) {
return heuristic_freshness;
}
if let Some(ref directives) = response.headers.typed_get::<CacheControl>() {
if directives.public() {
return heuristic_freshness;
}
}
}
Duration::ZERO
}
fn get_expiry_adjustment_from_request_headers(request: &Request, expires: Duration) -> Duration {
let Some(directive) = request.headers.typed_get::<CacheControl>() else {
return expires;
};
if let Some(max_age) = directive.max_stale() {
return expires + max_age;
}
match directive.max_age() {
Some(max_age) if expires > max_age => return Duration::ZERO,
Some(max_age) => return expires - max_age,
None => {},
};
if let Some(min_fresh) = directive.min_fresh() {
if expires < min_fresh {
return Duration::ZERO;
}
return expires - min_fresh;
}
if directive.no_cache() || directive.no_store() {
return Duration::ZERO;
}
expires
}
fn create_cached_response(
request: &Request,
cached_resource: &CachedResource,
cached_headers: &HeaderMap,
done_chan: &mut DoneChannel,
) -> Option<CachedResponse> {
debug!("creating a cached response for {:?}", request.url());
if cached_resource.aborted.load(Ordering::Acquire) {
return None;
}
let resource_timing = ResourceFetchTiming::new(request.timing_type());
let mut response = Response::new(cached_resource.metadata.final_url.clone(), resource_timing);
response.headers = cached_headers.clone();
response.body = cached_resource.body.clone();
if let ResponseBody::Receiving(_) = *cached_resource.body.lock() {
debug!("existing body is in progress");
let (done_sender, done_receiver) = unbounded();
*done_chan = Some((done_sender.clone(), done_receiver));
cached_resource.awaiting_body.lock().push(done_sender);
}
response
.location_url
.clone_from(&cached_resource.location_url);
response.status.clone_from(&cached_resource.status);
response.url_list.clone_from(&cached_resource.url_list);
response.https_state = cached_resource.https_state;
response.referrer = request.referrer.to_url().cloned();
response.referrer_policy = request.referrer_policy;
response.aborted = cached_resource.aborted.clone();
let expires = cached_resource.expires;
let adjusted_expires = get_expiry_adjustment_from_request_headers(request, expires);
let time_since_validated = Instant::now() - cached_resource.last_validated;
let has_expired = adjusted_expires <= time_since_validated;
let cached_response = CachedResponse {
response,
needs_validation: has_expired,
};
Some(cached_response)
}
fn create_resource_with_bytes_from_resource(
bytes: &[u8],
resource: &CachedResource,
) -> CachedResource {
CachedResource {
request_headers: resource.request_headers.clone(),
body: Arc::new(ParkingLotMutex::new(ResponseBody::Done(bytes.to_owned()))),
aborted: Arc::new(AtomicBool::new(false)),
awaiting_body: Arc::new(ParkingLotMutex::new(vec![])),
metadata: resource.metadata.clone(),
location_url: resource.location_url.clone(),
https_state: resource.https_state,
status: StatusCode::PARTIAL_CONTENT.into(),
url_list: resource.url_list.clone(),
expires: resource.expires,
last_validated: resource.last_validated,
}
}
fn handle_range_request(
request: &Request,
candidates: &[&CachedResource],
range_spec: &Range,
done_chan: &mut DoneChannel,
) -> Option<CachedResponse> {
let mut complete_cached_resources = candidates
.iter()
.filter(|resource| resource.status == StatusCode::OK);
let partial_cached_resources = candidates
.iter()
.filter(|resource| resource.status == StatusCode::PARTIAL_CONTENT);
if let Some(complete_resource) = complete_cached_resources.next() {
let body_len = match *complete_resource.body.lock() {
ResponseBody::Done(ref body) => body.len(),
_ => 0,
};
let bound = range_spec
.satisfiable_ranges(body_len.try_into().unwrap())
.next()
.unwrap();
match bound {
(Bound::Included(beginning), Bound::Included(end)) => {
if let ResponseBody::Done(ref body) = *complete_resource.body.lock() {
if end == u64::MAX {
return None;
}
let b = beginning as usize;
let e = end as usize + 1;
let requested = body.get(b..e);
if let Some(bytes) = requested {
let new_resource =
create_resource_with_bytes_from_resource(bytes, complete_resource);
let cached_headers = new_resource.metadata.headers.lock();
let cached_response = create_cached_response(
request,
&new_resource,
&cached_headers,
done_chan,
);
if let Some(cached_response) = cached_response {
return Some(cached_response);
}
}
}
},
(Bound::Included(beginning), Bound::Unbounded) => {
if let ResponseBody::Done(ref body) = *complete_resource.body.lock() {
let b = beginning as usize;
let requested = body.get(b..);
if let Some(bytes) = requested {
let new_resource =
create_resource_with_bytes_from_resource(bytes, complete_resource);
let cached_headers = new_resource.metadata.headers.lock();
let cached_response = create_cached_response(
request,
&new_resource,
&cached_headers,
done_chan,
);
if let Some(cached_response) = cached_response {
return Some(cached_response);
}
}
}
},
_ => return None,
}
} else {
for partial_resource in partial_cached_resources {
let headers = partial_resource.metadata.headers.lock();
let content_range = headers.typed_get::<ContentRange>();
let Some(body_len) = content_range.as_ref().and_then(|range| range.bytes_len()) else {
continue;
};
match range_spec.satisfiable_ranges(body_len - 1).next().unwrap() {
(Bound::Included(beginning), Bound::Included(end)) => {
let (res_beginning, res_end) = match content_range {
Some(range) => {
if let Some(bytes_range) = range.bytes_range() {
bytes_range
} else {
continue;
}
},
_ => continue,
};
if res_beginning <= beginning && res_end >= end {
let resource_body = &*partial_resource.body.lock();
let requested = match resource_body {
ResponseBody::Done(body) => {
let b = beginning as usize - res_beginning as usize;
let e = end as usize - res_beginning as usize + 1;
body.get(b..e)
},
_ => continue,
};
if let Some(bytes) = requested {
let new_resource =
create_resource_with_bytes_from_resource(bytes, partial_resource);
let cached_response =
create_cached_response(request, &new_resource, &headers, done_chan);
if let Some(cached_response) = cached_response {
return Some(cached_response);
}
}
}
},
(Bound::Included(beginning), Bound::Unbounded) => {
let (res_beginning, res_end, total) = if let Some(range) = content_range {
match (range.bytes_range(), range.bytes_len()) {
(Some(bytes_range), Some(total)) => {
(bytes_range.0, bytes_range.1, total)
},
_ => continue,
}
} else {
continue;
};
if total == 0 {
continue;
};
if res_beginning <= beginning && res_end == total - 1 {
let resource_body = &*partial_resource.body.lock();
let requested = match resource_body {
ResponseBody::Done(body) => {
let from_byte = beginning as usize - res_beginning as usize;
body.get(from_byte..)
},
_ => continue,
};
if let Some(bytes) = requested {
if bytes.len() as u64 + beginning < total - 1 {
continue;
}
let new_resource =
create_resource_with_bytes_from_resource(bytes, partial_resource);
let cached_response =
create_cached_response(request, &new_resource, &headers, done_chan);
if let Some(cached_response) = cached_response {
return Some(cached_response);
}
}
}
},
_ => continue,
}
}
}
None
}
pub(crate) fn construct_response(
request: &Request,
done_chan: &mut DoneChannel,
cache_result: &[CachedResource],
) -> Option<CachedResponse> {
if pref!(network_http_cache_disabled) {
return None;
}
debug!("trying to construct cache response for {:?}", request.url());
if request.method != Method::GET {
debug!("non-GET method, not caching");
return None;
}
let resources = cache_result
.iter()
.filter(|r| !r.aborted.load(Ordering::Relaxed));
let mut candidates = vec![];
for cached_resource in resources {
let mut can_be_constructed = true;
let cached_headers = cached_resource.metadata.headers.lock();
let original_request_headers = cached_resource.request_headers.lock();
if let Some(vary_value) = cached_headers.typed_get::<Vary>() {
if vary_value.is_any() {
debug!("vary value is any, not caching");
can_be_constructed = false
} else {
for vary_val in vary_value.iter_strs() {
match request.headers.get(vary_val) {
Some(header_data) => {
if let Some(original_header_data) =
original_request_headers.get(vary_val)
{
if original_header_data != header_data {
debug!("headers don't match, not caching");
can_be_constructed = false;
break;
}
}
},
None => {
can_be_constructed = original_request_headers.get(vary_val).is_none();
if !can_be_constructed {
debug!("vary header present, not caching");
}
},
}
if !can_be_constructed {
break;
}
}
}
}
if can_be_constructed {
candidates.push(cached_resource);
}
}
if let Some(range_spec) = request.headers.typed_get::<Range>() {
return handle_range_request(request, candidates.as_slice(), &range_spec, done_chan);
}
while let Some(cached_resource) = candidates.pop() {
match cached_resource.status.try_code() {
Some(ref code) => {
if *code == StatusCode::PARTIAL_CONTENT {
continue;
}
},
None => continue,
}
let cached_headers = cached_resource.metadata.headers.lock();
let cached_response =
create_cached_response(request, cached_resource, &cached_headers, done_chan);
if let Some(cached_response) = cached_response {
return Some(cached_response);
}
}
debug!("couldn't find an appropriate response, not caching");
None
}
pub fn refresh(
request: &Request,
response: Response,
done_chan: &mut DoneChannel,
cached_resources: &mut [CachedResource],
) -> Option<Response> {
assert_eq!(response.status, StatusCode::NOT_MODIFIED);
let cached_resource = cached_resources.iter_mut().next()?;
let mut constructed_response = if let Some(range_spec) = request.headers.typed_get::<Range>() {
handle_range_request(request, &[cached_resource], &range_spec, done_chan)
.map(|cached_response| cached_response.response)
} else {
let in_progress_channel = match &*cached_resource.body.lock() {
ResponseBody::Receiving(..) => Some(unbounded()),
ResponseBody::Empty | ResponseBody::Done(..) => None,
};
match in_progress_channel {
Some((done_sender, done_receiver)) => {
*done_chan = Some((done_sender.clone(), done_receiver));
cached_resource.awaiting_body.lock().push(done_sender);
},
None => *done_chan = None,
}
let resource_timing = ResourceFetchTiming::new(request.timing_type());
let mut constructed_response =
Response::new(cached_resource.metadata.final_url.clone(), resource_timing);
constructed_response.body = cached_resource.body.clone();
constructed_response
.status
.clone_from(&cached_resource.status);
constructed_response.https_state = cached_resource.https_state;
constructed_response.referrer = request.referrer.to_url().cloned();
constructed_response.referrer_policy = request.referrer_policy;
constructed_response
.status
.clone_from(&cached_resource.status);
constructed_response
.url_list
.clone_from(&cached_resource.url_list);
Some(constructed_response)
};
if let Some(constructed_response) = constructed_response.as_mut() {
cached_resource.expires = get_response_expiry(constructed_response);
let mut stored_headers = cached_resource.metadata.headers.lock();
stored_headers.extend(response.headers);
constructed_response.headers = stored_headers.clone();
}
constructed_response
}
pub(crate) async fn invalidate(
request: &Request,
response: &Response,
cached_resources: &mut [CachedResource],
) {
if let Some(Ok(location)) = response
.headers
.get(header::LOCATION)
.map(HeaderValue::to_str)
{
if request.current_url().join(location).is_ok() {
invalidate_cached_resources(cached_resources).await;
}
}
if let Some(Ok(content_location)) = response
.headers
.get(header::CONTENT_LOCATION)
.map(HeaderValue::to_str)
{
if request.current_url().join(content_location).is_ok() {
invalidate_cached_resources(cached_resources).await;
}
}
invalidate_cached_resources(cached_resources).await;
}
async fn invalidate_cached_resources(cached_resources: &mut [CachedResource]) {
for cached_resource in cached_resources.iter_mut() {
cached_resource.expires = Duration::ZERO;
}
}
fn resolve_location_url(
request: &Request,
response: &Response,
header_name: header::HeaderName,
) -> Option<ServoUrl> {
response
.headers
.get(header_name)
.and_then(|value| value.to_str().ok())
.and_then(|location| request.current_url().join(location).ok())
}
impl HttpCache {
pub(crate) async fn update_awaiting_consumers(&self, request: &Request, response: &Response) {
let entry_key = CacheKey::new(request);
let cached_resources = match self.entries.get(&entry_key) {
None => return,
Some(resources) => resources,
};
let actual_response = response.actual_response();
let lock = cached_resources.read().await;
let relevant_cached_resources = lock.iter().filter(|resource| {
if actual_response.is_network_error() {
return *resource.body.lock() == ResponseBody::Empty;
}
resource.status == actual_response.status
});
for cached_resource in relevant_cached_resources {
let mut awaiting_consumers = cached_resource.awaiting_body.lock();
if awaiting_consumers.is_empty() {
continue;
}
let to_send = if cached_resource.aborted.load(Ordering::Acquire) {
Data::Cancelled
} else {
match *cached_resource.body.lock() {
ResponseBody::Done(_) | ResponseBody::Empty => Data::Done,
ResponseBody::Receiving(_) => {
continue;
},
}
};
for done_sender in awaiting_consumers.drain(..) {
let _ = done_sender.send(to_send.clone());
}
}
}
pub(crate) fn cache_entry_descriptors(&self) -> Vec<CacheEntryDescriptor> {
self.entries
.iter()
.map(|(key, _)| CacheEntryDescriptor::new(key.url.to_string()))
.collect()
}
pub(crate) fn clear(&self) {
self.entries.clear();
}
pub async fn store(&self, request: &Request, response: &Response) {
let guard = self.get_or_guard(CacheKey::new(request)).await;
guard.insert(request, response);
}
pub async fn construct_response(
&self,
request: &Request,
done_chan: &mut DoneChannel,
) -> Option<Response> {
let entry = self.entries.get(&CacheKey::new(request))?;
let cached_resources = entry.read().await;
construct_response(request, done_chan, cached_resources.as_slice())
.map(|cached| cached.response)
}
pub(crate) async fn invalidate_related_urls(
&self,
request: &Request,
response: &Response,
skip_key: &CacheKey,
) {
for header_name in &[header::LOCATION, header::CONTENT_LOCATION] {
if let Some(location_url) = resolve_location_url(request, response, header_name.clone())
{
let location_key = CacheKey::from_url(location_url);
if &location_key != skip_key {
self.invalidate_entry(&location_key).await;
}
}
}
}
async fn invalidate_entry(&self, key: &CacheKey) {
if let Some(entry) = self.entries.get(key) {
let mut guarded_resources = entry.write().await;
invalidate_cached_resources(guarded_resources.as_mut_slice()).await;
}
}
pub async fn get_or_guard(&self, entry_key: CacheKey) -> CachedResourcesOrGuard<'_> {
match self.entries.get_value_or_guard_async(&entry_key).await {
Ok(val) => CachedResourcesOrGuard::Value(val.write_owned().await),
Err(guard) => CachedResourcesOrGuard::Guard(guard),
}
}
}
pub enum CachedResourcesOrGuard<'a> {
Value(OwnedRwLockWriteGuard<Vec<CachedResource>>),
Guard(QuickCachePlaceeholderGuard<'a>),
}
impl<'a> CachedResourcesOrGuard<'a> {
pub fn insert(self, request: &Request, response: &Response) {
if pref!(network_http_cache_disabled) {
return;
}
if request.method != Method::GET {
return;
}
if request.headers.contains_key(header::AUTHORIZATION) {
return;
};
let metadata = match response.metadata() {
Ok(FetchMetadata::Filtered {
filtered: _,
unsafe_: metadata,
}) |
Ok(FetchMetadata::Unfiltered(metadata)) => metadata,
_ => return,
};
if !response_is_cacheable(&metadata) {
return;
}
let expiry = get_response_expiry(response);
let cacheable_metadata = CachedMetadata {
headers: Arc::new(ParkingLotMutex::new(response.headers.clone())),
final_url: metadata.final_url,
content_type: metadata.content_type.map(|v| v.0.to_string()),
charset: metadata.charset,
status: metadata.status,
};
let entry_resource = CachedResource {
request_headers: Arc::new(ParkingLotMutex::new(request.headers.clone())),
body: response.body.clone(),
aborted: response.aborted.clone(),
awaiting_body: Arc::new(ParkingLotMutex::new(vec![])),
metadata: cacheable_metadata,
location_url: response.location_url.clone(),
https_state: response.https_state,
status: response.status.clone(),
url_list: response.url_list.clone(),
expires: expiry,
last_validated: Instant::now(),
};
match self {
CachedResourcesOrGuard::Value(mut owned_rw_lock_write_guard) => {
owned_rw_lock_write_guard.push(entry_resource);
},
CachedResourcesOrGuard::Guard(placeholder_guard) => {
if placeholder_guard
.insert(std::sync::Arc::new(TokioRwLock::new(vec![entry_resource])))
.is_err()
{
error!("Could not insert into cache");
}
},
}
}
pub fn try_as_mut(&mut self) -> Option<&mut Vec<CachedResource>> {
match self {
CachedResourcesOrGuard::Value(owned_rw_lock_write_guard) => {
Some(owned_rw_lock_write_guard.as_mut())
},
CachedResourcesOrGuard::Guard(_) => None,
}
}
}