pingora_cache/
filters.rs

1// Copyright 2025 Cloudflare, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utility functions to help process HTTP headers for caching
16
17use super::*;
18use crate::cache_control::{CacheControl, Cacheable, InterpretCacheControl};
19use crate::RespCacheable::*;
20
21use cache_control::DELTA_SECONDS_OVERFLOW_VALUE;
22use http::{header, HeaderValue};
23use httpdate::HttpDate;
24use log::warn;
25use pingora_http::RequestHeader;
26
27/// Decide if the request can be cacheable
28pub fn request_cacheable(req_header: &ReqHeader) -> bool {
29    // TODO: the check is incomplete
30    matches!(req_header.method, Method::GET | Method::HEAD)
31}
32
33/// Decide if the response is cacheable.
34///
35/// `cache_control` is the parsed [CacheControl] from the response header. It is a standalone
36/// argument so that caller has the flexibility to choose to use, change or ignore it.
37pub fn resp_cacheable(
38    cache_control: Option<&CacheControl>,
39    mut resp_header: ResponseHeader,
40    authorization_present: bool,
41    defaults: &CacheMetaDefaults,
42) -> RespCacheable {
43    let now = SystemTime::now();
44    let expire_time = calculate_fresh_until(
45        now,
46        cache_control,
47        &resp_header,
48        authorization_present,
49        defaults,
50    );
51    if let Some(fresh_until) = expire_time {
52        let (stale_while_revalidate_duration, stale_if_error_duration) =
53            calculate_serve_stale_durations(cache_control, defaults);
54
55        if let Some(cc) = cache_control {
56            cc.strip_private_headers(&mut resp_header);
57        }
58        return Cacheable(CacheMeta::new(
59            fresh_until,
60            now,
61            stale_while_revalidate_duration,
62            stale_if_error_duration,
63            resp_header,
64        ));
65    }
66    Uncacheable(NoCacheReason::OriginNotCache)
67}
68
69/// Calculate the [SystemTime] at which the asset expires
70///
71/// Return None when not cacheable.
72pub fn calculate_fresh_until(
73    now: SystemTime,
74    cache_control: Option<&CacheControl>,
75    resp_header: &RespHeader,
76    authorization_present: bool,
77    defaults: &CacheMetaDefaults,
78) -> Option<SystemTime> {
79    fn freshness_ttl_to_time(now: SystemTime, fresh: Duration) -> Option<SystemTime> {
80        if fresh.is_zero() {
81            // ensure that the response is treated as stale
82            now.checked_sub(Duration::from_secs(1))
83        } else {
84            now.checked_add(fresh)
85        }
86    }
87
88    // A request with Authorization is normally not cacheable, unless Cache-Control allows it
89    if authorization_present {
90        let uncacheable = cache_control
91            .as_ref()
92            .map_or(true, |cc| !cc.allow_caching_authorized_req());
93        if uncacheable {
94            return None;
95        }
96    }
97
98    let uncacheable = cache_control
99        .as_ref()
100        .is_some_and(|cc| cc.is_cacheable() == Cacheable::No);
101    if uncacheable {
102        return None;
103    }
104
105    // For TTL check cache-control first, then expires header, then defaults
106    cache_control
107        .and_then(|cc| {
108            cc.fresh_duration()
109                .and_then(|ttl| freshness_ttl_to_time(now, ttl))
110        })
111        .or_else(|| calculate_expires_header_time(resp_header))
112        .or_else(|| {
113            defaults
114                .fresh_sec(resp_header.status)
115                .and_then(|ttl| freshness_ttl_to_time(now, ttl))
116        })
117}
118
119/// Calculate the expire time from the `Expires` header only
120pub fn calculate_expires_header_time(resp_header: &RespHeader) -> Option<SystemTime> {
121    // according to RFC 7234:
122    // https://datatracker.ietf.org/doc/html/rfc7234#section-4.2.1
123    // - treat multiple expires headers as invalid
124    // https://datatracker.ietf.org/doc/html/rfc7234#section-5.3
125    // - "MUST interpret invalid date formats... as representing a time in the past"
126    fn parse_expires_value(expires_value: &HeaderValue) -> Option<SystemTime> {
127        let expires = expires_value.to_str().ok()?;
128        Some(SystemTime::from(
129            expires
130                .parse::<HttpDate>()
131                .map_err(|e| warn!("Invalid HttpDate in Expires: {}, error: {}", expires, e))
132                .ok()?,
133        ))
134    }
135
136    let mut expires_iter = resp_header.headers.get_all("expires").iter();
137    let expires_header = expires_iter.next();
138    if expires_header.is_none() || expires_iter.next().is_some() {
139        return None;
140    }
141    parse_expires_value(expires_header.unwrap()).or(Some(SystemTime::UNIX_EPOCH))
142}
143
144/// Calculates stale-while-revalidate and stale-if-error seconds from Cache-Control or the [CacheMetaDefaults].
145pub fn calculate_serve_stale_durations(
146    cache_control: Option<&impl InterpretCacheControl>,
147    defaults: &CacheMetaDefaults,
148) -> (u32, u32) {
149    let serve_stale_while_revalidate = cache_control
150        .and_then(|cc| cc.serve_stale_while_revalidate_duration())
151        .unwrap_or_else(|| Duration::from_secs(defaults.serve_stale_while_revalidate_sec() as u64));
152    let serve_stale_if_error = cache_control
153        .and_then(|cc| cc.serve_stale_if_error_duration())
154        .unwrap_or_else(|| Duration::from_secs(defaults.serve_stale_if_error_sec() as u64));
155    (
156        serve_stale_while_revalidate
157            .as_secs()
158            .try_into()
159            .unwrap_or(DELTA_SECONDS_OVERFLOW_VALUE),
160        serve_stale_if_error
161            .as_secs()
162            .try_into()
163            .unwrap_or(DELTA_SECONDS_OVERFLOW_VALUE),
164    )
165}
166
167/// Filters to run when sending requests to upstream
168pub mod upstream {
169    use super::*;
170
171    /// Adjust the request header for cacheable requests
172    ///
173    /// This filter does the following in order to fetch the entire response to cache
174    /// - Convert HEAD to GET
175    /// - `If-*` headers are removed
176    /// - `Range` header is removed
177    ///
178    /// When `meta` is set, this function will inject `If-modified-since` according to the `Last-Modified` header
179    /// and inject `If-none-match` according to `Etag` header
180    pub fn request_filter(req: &mut RequestHeader, meta: Option<&CacheMeta>) -> Result<()> {
181        // change HEAD to GET, HEAD itself is not semantically cacheable
182        if req.method == Method::HEAD {
183            req.set_method(Method::GET);
184        }
185
186        // remove downstream precondition headers https://datatracker.ietf.org/doc/html/rfc7232#section-3
187        // we'd like to cache the 200 not the 304
188        req.remove_header(&header::IF_MATCH);
189        req.remove_header(&header::IF_NONE_MATCH);
190        req.remove_header(&header::IF_MODIFIED_SINCE);
191        req.remove_header(&header::IF_UNMODIFIED_SINCE);
192        // see below range header
193        req.remove_header(&header::IF_RANGE);
194
195        // remove downstream range header as we'd like to cache the entire response (this might change in the future)
196        req.remove_header(&header::RANGE);
197
198        // we have a presumably staled response already, add precondition headers for revalidation
199        if let Some(m) = meta {
200            // rfc7232: "SHOULD send both validators in cache validation" but
201            // there have been weird cases that an origin has matching etag but not Last-Modified
202            if let Some(since) = m.headers().get(&header::LAST_MODIFIED) {
203                req.insert_header(header::IF_MODIFIED_SINCE, since).unwrap();
204            }
205            if let Some(etag) = m.headers().get(&header::ETAG) {
206                req.insert_header(header::IF_NONE_MATCH, etag).unwrap();
207            }
208        }
209
210        Ok(())
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use crate::RespCacheable::Cacheable;
218    use http::header::{HeaderName, CACHE_CONTROL, EXPIRES, SET_COOKIE};
219    use http::StatusCode;
220    use httpdate::fmt_http_date;
221
222    fn init_log() {
223        let _ = env_logger::builder().is_test(true).try_init();
224    }
225
226    const DEFAULTS: CacheMetaDefaults = CacheMetaDefaults::new(
227        |status| {
228            match status {
229                StatusCode::OK => Some(10),
230                StatusCode::NOT_FOUND => Some(5),
231                StatusCode::PARTIAL_CONTENT => None,
232                _ => Some(1),
233            }
234            .map(Duration::from_secs)
235        },
236        0,
237        DELTA_SECONDS_OVERFLOW_VALUE, /* "infinite" stale-if-error */
238    );
239
240    // Cache nothing, by default
241    const BYPASS_CACHE_DEFAULTS: CacheMetaDefaults = CacheMetaDefaults::new(|_| None, 0, 0);
242
243    fn build_response(status: u16, headers: &[(HeaderName, &str)]) -> ResponseHeader {
244        let mut header = ResponseHeader::build(status, Some(headers.len())).unwrap();
245        for (k, v) in headers {
246            header.append_header(k.to_string(), *v).unwrap();
247        }
248        header
249    }
250
251    fn resp_cacheable_wrapper(
252        resp: ResponseHeader,
253        defaults: &CacheMetaDefaults,
254        authorization_present: bool,
255    ) -> Option<CacheMeta> {
256        if let Cacheable(meta) = resp_cacheable(
257            CacheControl::from_resp_headers(&resp).as_ref(),
258            resp,
259            authorization_present,
260            defaults,
261        ) {
262            Some(meta)
263        } else {
264            None
265        }
266    }
267
268    #[test]
269    fn test_resp_cacheable() {
270        let meta = resp_cacheable_wrapper(
271            build_response(200, &[(CACHE_CONTROL, "max-age=12345")]),
272            &DEFAULTS,
273            false,
274        );
275
276        let meta = meta.unwrap();
277        assert!(meta.is_fresh(SystemTime::now()));
278        assert!(meta.is_fresh(
279            SystemTime::now()
280                .checked_add(Duration::from_secs(12))
281                .unwrap()
282        ),);
283        assert!(!meta.is_fresh(
284            SystemTime::now()
285                .checked_add(Duration::from_secs(12346))
286                .unwrap()
287        ));
288    }
289
290    #[test]
291    fn test_resp_uncacheable_directives() {
292        let meta = resp_cacheable_wrapper(
293            build_response(200, &[(CACHE_CONTROL, "private, max-age=12345")]),
294            &DEFAULTS,
295            false,
296        );
297        assert!(meta.is_none());
298
299        let meta = resp_cacheable_wrapper(
300            build_response(200, &[(CACHE_CONTROL, "no-store, max-age=12345")]),
301            &DEFAULTS,
302            false,
303        );
304        assert!(meta.is_none());
305    }
306
307    #[test]
308    fn test_resp_cache_authorization() {
309        let meta = resp_cacheable_wrapper(build_response(200, &[]), &DEFAULTS, true);
310        assert!(meta.is_none());
311
312        let meta = resp_cacheable_wrapper(
313            build_response(200, &[(CACHE_CONTROL, "max-age=10")]),
314            &DEFAULTS,
315            true,
316        );
317        assert!(meta.is_none());
318
319        let meta = resp_cacheable_wrapper(
320            build_response(200, &[(CACHE_CONTROL, "s-maxage=10")]),
321            &DEFAULTS,
322            true,
323        );
324        assert!(meta.unwrap().is_fresh(SystemTime::now()));
325
326        let meta = resp_cacheable_wrapper(
327            build_response(200, &[(CACHE_CONTROL, "public, max-age=10")]),
328            &DEFAULTS,
329            true,
330        );
331        assert!(meta.unwrap().is_fresh(SystemTime::now()));
332
333        let meta = resp_cacheable_wrapper(
334            build_response(200, &[(CACHE_CONTROL, "must-revalidate")]),
335            &DEFAULTS,
336            true,
337        );
338        assert!(meta.unwrap().is_fresh(SystemTime::now()));
339    }
340
341    #[test]
342    fn test_resp_zero_max_age() {
343        let meta = resp_cacheable_wrapper(
344            build_response(200, &[(CACHE_CONTROL, "max-age=0, public")]),
345            &DEFAULTS,
346            false,
347        );
348
349        // cacheable, but needs revalidation
350        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
351    }
352
353    #[test]
354    fn test_resp_expires() {
355        let five_sec_time = SystemTime::now()
356            .checked_add(Duration::from_secs(5))
357            .unwrap();
358
359        // future expires is cacheable
360        let meta = resp_cacheable_wrapper(
361            build_response(200, &[(EXPIRES, &fmt_http_date(five_sec_time))]),
362            &DEFAULTS,
363            false,
364        );
365
366        let meta = meta.unwrap();
367        assert!(meta.is_fresh(SystemTime::now()));
368        assert!(!meta.is_fresh(
369            SystemTime::now()
370                .checked_add(Duration::from_secs(6))
371                .unwrap()
372        ));
373
374        // even on default uncacheable statuses
375        let meta = resp_cacheable_wrapper(
376            build_response(206, &[(EXPIRES, &fmt_http_date(five_sec_time))]),
377            &DEFAULTS,
378            false,
379        );
380        assert!(meta.is_some());
381    }
382
383    #[test]
384    fn test_resp_past_expires() {
385        // cacheable, but expired
386        let meta = resp_cacheable_wrapper(
387            build_response(200, &[(EXPIRES, "Fri, 15 May 2015 15:34:21 GMT")]),
388            &BYPASS_CACHE_DEFAULTS,
389            false,
390        );
391        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
392    }
393
394    #[test]
395    fn test_resp_nonstandard_expires() {
396        // init log to allow inspecting warnings
397        init_log();
398
399        // invalid cases, according to parser
400        // (but should be stale according to RFC)
401        let meta = resp_cacheable_wrapper(
402            build_response(200, &[(EXPIRES, "Mon, 13 Feb 0002 12:00:00 GMT")]),
403            &BYPASS_CACHE_DEFAULTS,
404            false,
405        );
406        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
407
408        let meta = resp_cacheable_wrapper(
409            build_response(200, &[(EXPIRES, "Fri, 01 Dec 99999 16:00:00 GMT")]),
410            &BYPASS_CACHE_DEFAULTS,
411            false,
412        );
413        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
414
415        let meta = resp_cacheable_wrapper(
416            build_response(200, &[(EXPIRES, "0")]),
417            &BYPASS_CACHE_DEFAULTS,
418            false,
419        );
420        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
421    }
422
423    #[test]
424    fn test_resp_multiple_expires() {
425        let five_sec_time = SystemTime::now()
426            .checked_add(Duration::from_secs(5))
427            .unwrap();
428        let ten_sec_time = SystemTime::now()
429            .checked_add(Duration::from_secs(10))
430            .unwrap();
431
432        // multiple expires = uncacheable
433        let meta = resp_cacheable_wrapper(
434            build_response(
435                200,
436                &[
437                    (EXPIRES, &fmt_http_date(five_sec_time)),
438                    (EXPIRES, &fmt_http_date(ten_sec_time)),
439                ],
440            ),
441            &BYPASS_CACHE_DEFAULTS,
442            false,
443        );
444        assert!(meta.is_none());
445
446        // unless the default is cacheable
447        let meta = resp_cacheable_wrapper(
448            build_response(
449                200,
450                &[
451                    (EXPIRES, &fmt_http_date(five_sec_time)),
452                    (EXPIRES, &fmt_http_date(ten_sec_time)),
453                ],
454            ),
455            &DEFAULTS,
456            false,
457        );
458        assert!(meta.is_some());
459    }
460
461    #[test]
462    fn test_resp_cache_control_with_expires() {
463        let five_sec_time = SystemTime::now()
464            .checked_add(Duration::from_secs(5))
465            .unwrap();
466        // cache-control takes precedence over expires
467        let meta = resp_cacheable_wrapper(
468            build_response(
469                200,
470                &[
471                    (EXPIRES, &fmt_http_date(five_sec_time)),
472                    (CACHE_CONTROL, "max-age=0"),
473                ],
474            ),
475            &DEFAULTS,
476            false,
477        );
478        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
479    }
480
481    #[test]
482    fn test_resp_stale_while_revalidate() {
483        // respect defaults
484        let meta = resp_cacheable_wrapper(
485            build_response(200, &[(CACHE_CONTROL, "max-age=10")]),
486            &DEFAULTS,
487            false,
488        );
489
490        let meta = meta.unwrap();
491        let eleven_sec_time = SystemTime::now()
492            .checked_add(Duration::from_secs(11))
493            .unwrap();
494        assert!(!meta.is_fresh(eleven_sec_time));
495        assert!(!meta.serve_stale_while_revalidate(SystemTime::now()));
496        assert!(!meta.serve_stale_while_revalidate(eleven_sec_time));
497
498        // override with stale-while-revalidate
499        let meta = resp_cacheable_wrapper(
500            build_response(
501                200,
502                &[(CACHE_CONTROL, "max-age=10, stale-while-revalidate=5")],
503            ),
504            &DEFAULTS,
505            false,
506        );
507
508        let meta = meta.unwrap();
509        let eleven_sec_time = SystemTime::now()
510            .checked_add(Duration::from_secs(11))
511            .unwrap();
512        let sixteen_sec_time = SystemTime::now()
513            .checked_add(Duration::from_secs(16))
514            .unwrap();
515        assert!(!meta.is_fresh(eleven_sec_time));
516        assert!(meta.serve_stale_while_revalidate(eleven_sec_time));
517        assert!(!meta.serve_stale_while_revalidate(sixteen_sec_time));
518    }
519
520    #[test]
521    fn test_resp_stale_if_error() {
522        // respect defaults
523        let meta = resp_cacheable_wrapper(
524            build_response(200, &[(CACHE_CONTROL, "max-age=10")]),
525            &DEFAULTS,
526            false,
527        );
528
529        let meta = meta.unwrap();
530        let fifty_years_time = SystemTime::now()
531            .checked_add(Duration::from_secs(86400 * 365 * 50))
532            .unwrap();
533        assert!(!meta.is_fresh(fifty_years_time));
534        assert!(meta.serve_stale_if_error(fifty_years_time));
535
536        // override with stale-if-error
537        let meta = resp_cacheable_wrapper(
538            build_response(
539                200,
540                &[(
541                    CACHE_CONTROL,
542                    "max-age=10, stale-while-revalidate=5, stale-if-error=60",
543                )],
544            ),
545            &DEFAULTS,
546            false,
547        );
548
549        let meta = meta.unwrap();
550        let eleven_sec_time = SystemTime::now()
551            .checked_add(Duration::from_secs(11))
552            .unwrap();
553        let seventy_sec_time = SystemTime::now()
554            .checked_add(Duration::from_secs(70))
555            .unwrap();
556        assert!(!meta.is_fresh(eleven_sec_time));
557        assert!(meta.serve_stale_if_error(SystemTime::now()));
558        assert!(meta.serve_stale_if_error(eleven_sec_time));
559        assert!(!meta.serve_stale_if_error(seventy_sec_time));
560
561        // never serve stale
562        let meta = resp_cacheable_wrapper(
563            build_response(200, &[(CACHE_CONTROL, "max-age=10, stale-if-error=0")]),
564            &DEFAULTS,
565            false,
566        );
567
568        let meta = meta.unwrap();
569        let eleven_sec_time = SystemTime::now()
570            .checked_add(Duration::from_secs(11))
571            .unwrap();
572        assert!(!meta.is_fresh(eleven_sec_time));
573        assert!(!meta.serve_stale_if_error(eleven_sec_time));
574    }
575
576    #[test]
577    fn test_resp_status_cache_defaults() {
578        // 200 response
579        let meta = resp_cacheable_wrapper(build_response(200, &[]), &DEFAULTS, false);
580        assert!(meta.is_some());
581
582        let meta = meta.unwrap();
583        assert!(meta.is_fresh(
584            SystemTime::now()
585                .checked_add(Duration::from_secs(9))
586                .unwrap()
587        ));
588        assert!(!meta.is_fresh(
589            SystemTime::now()
590                .checked_add(Duration::from_secs(11))
591                .unwrap()
592        ));
593
594        // 404 response, different ttl
595        let meta = resp_cacheable_wrapper(build_response(404, &[]), &DEFAULTS, false);
596        assert!(meta.is_some());
597
598        let meta = meta.unwrap();
599        assert!(meta.is_fresh(
600            SystemTime::now()
601                .checked_add(Duration::from_secs(4))
602                .unwrap()
603        ));
604        assert!(!meta.is_fresh(
605            SystemTime::now()
606                .checked_add(Duration::from_secs(6))
607                .unwrap()
608        ));
609
610        // 206 marked uncacheable (no cache TTL)
611        let meta = resp_cacheable_wrapper(build_response(206, &[]), &DEFAULTS, false);
612        assert!(meta.is_none());
613
614        // default uncacheable status with explicit Cache-Control is cacheable
615        let meta = resp_cacheable_wrapper(
616            build_response(206, &[(CACHE_CONTROL, "public, max-age=10")]),
617            &DEFAULTS,
618            false,
619        );
620        assert!(meta.is_some());
621
622        let meta = meta.unwrap();
623        assert!(meta.is_fresh(
624            SystemTime::now()
625                .checked_add(Duration::from_secs(9))
626                .unwrap()
627        ));
628        assert!(!meta.is_fresh(
629            SystemTime::now()
630                .checked_add(Duration::from_secs(11))
631                .unwrap()
632        ));
633
634        // 416 matches any status
635        let meta = resp_cacheable_wrapper(build_response(416, &[]), &DEFAULTS, false);
636        assert!(meta.is_some());
637
638        let meta = meta.unwrap();
639        assert!(meta.is_fresh(SystemTime::now()));
640        assert!(!meta.is_fresh(
641            SystemTime::now()
642                .checked_add(Duration::from_secs(2))
643                .unwrap()
644        ));
645    }
646
647    #[test]
648    fn test_resp_cache_no_cache_fields() {
649        // check #field-names are stripped from the cache header
650        let meta = resp_cacheable_wrapper(
651            build_response(
652                200,
653                &[
654                    (SET_COOKIE, "my-cookie"),
655                    (CACHE_CONTROL, "private=\"something\", max-age=10"),
656                    (HeaderName::from_bytes(b"Something").unwrap(), "foo"),
657                ],
658            ),
659            &DEFAULTS,
660            false,
661        );
662        let meta = meta.unwrap();
663        assert!(meta.headers().contains_key(SET_COOKIE));
664        assert!(!meta.headers().contains_key("Something"));
665
666        let meta = resp_cacheable_wrapper(
667            build_response(
668                200,
669                &[
670                    (SET_COOKIE, "my-cookie"),
671                    (
672                        CACHE_CONTROL,
673                        "max-age=0, no-cache=\"meta1, SeT-Cookie ,meta2\"",
674                    ),
675                    (HeaderName::from_bytes(b"meta1").unwrap(), "foo"),
676                ],
677            ),
678            &DEFAULTS,
679            false,
680        );
681        let meta = meta.unwrap();
682        assert!(!meta.headers().contains_key(SET_COOKIE));
683        assert!(!meta.headers().contains_key("meta1"));
684    }
685}