pingora_cache/
filters.rs

1// Copyright 2025 Cloudflare, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utility functions to help process HTTP headers for caching
16
17use super::*;
18use crate::cache_control::{CacheControl, Cacheable, InterpretCacheControl};
19use crate::RespCacheable::*;
20
21use http::{header, HeaderValue};
22use httpdate::HttpDate;
23use log::warn;
24use pingora_http::RequestHeader;
25
26/// Decide if the request can be cacheable
27pub fn request_cacheable(req_header: &ReqHeader) -> bool {
28    // TODO: the check is incomplete
29    matches!(req_header.method, Method::GET | Method::HEAD)
30}
31
32/// Decide if the response is cacheable.
33///
34/// `cache_control` is the parsed [CacheControl] from the response header. It is a standalone
35/// argument so that caller has the flexibility to choose to use, change or ignore it.
36pub fn resp_cacheable(
37    cache_control: Option<&CacheControl>,
38    mut resp_header: ResponseHeader,
39    authorization_present: bool,
40    defaults: &CacheMetaDefaults,
41) -> RespCacheable {
42    let now = SystemTime::now();
43    let expire_time = calculate_fresh_until(
44        now,
45        cache_control,
46        &resp_header,
47        authorization_present,
48        defaults,
49    );
50    if let Some(fresh_until) = expire_time {
51        let (stale_while_revalidate_sec, stale_if_error_sec) =
52            calculate_serve_stale_sec(cache_control, defaults);
53
54        if let Some(cc) = cache_control {
55            cc.strip_private_headers(&mut resp_header);
56        }
57        return Cacheable(CacheMeta::new(
58            fresh_until,
59            now,
60            stale_while_revalidate_sec,
61            stale_if_error_sec,
62            resp_header,
63        ));
64    }
65    Uncacheable(NoCacheReason::OriginNotCache)
66}
67
68/// Calculate the [SystemTime] at which the asset expires
69///
70/// Return None when not cacheable.
71pub fn calculate_fresh_until(
72    now: SystemTime,
73    cache_control: Option<&CacheControl>,
74    resp_header: &RespHeader,
75    authorization_present: bool,
76    defaults: &CacheMetaDefaults,
77) -> Option<SystemTime> {
78    fn freshness_ttl_to_time(now: SystemTime, fresh_sec: u32) -> Option<SystemTime> {
79        if fresh_sec == 0 {
80            // ensure that the response is treated as stale
81            now.checked_sub(Duration::from_secs(1))
82        } else {
83            now.checked_add(Duration::from_secs(fresh_sec.into()))
84        }
85    }
86
87    // A request with Authorization is normally not cacheable, unless Cache-Control allows it
88    if authorization_present {
89        let uncacheable = cache_control
90            .as_ref()
91            .map_or(true, |cc| !cc.allow_caching_authorized_req());
92        if uncacheable {
93            return None;
94        }
95    }
96
97    let uncacheable = cache_control
98        .as_ref()
99        .is_some_and(|cc| cc.is_cacheable() == Cacheable::No);
100    if uncacheable {
101        return None;
102    }
103
104    // For TTL check cache-control first, then expires header, then defaults
105    cache_control
106        .and_then(|cc| {
107            cc.fresh_sec()
108                .and_then(|ttl| freshness_ttl_to_time(now, ttl))
109        })
110        .or_else(|| calculate_expires_header_time(resp_header))
111        .or_else(|| {
112            defaults
113                .fresh_sec(resp_header.status)
114                .and_then(|ttl| freshness_ttl_to_time(now, ttl))
115        })
116}
117
118/// Calculate the expire time from the `Expires` header only
119pub fn calculate_expires_header_time(resp_header: &RespHeader) -> Option<SystemTime> {
120    // according to RFC 7234:
121    // https://datatracker.ietf.org/doc/html/rfc7234#section-4.2.1
122    // - treat multiple expires headers as invalid
123    // https://datatracker.ietf.org/doc/html/rfc7234#section-5.3
124    // - "MUST interpret invalid date formats... as representing a time in the past"
125    fn parse_expires_value(expires_value: &HeaderValue) -> Option<SystemTime> {
126        let expires = expires_value.to_str().ok()?;
127        Some(SystemTime::from(
128            expires
129                .parse::<HttpDate>()
130                .map_err(|e| warn!("Invalid HttpDate in Expires: {}, error: {}", expires, e))
131                .ok()?,
132        ))
133    }
134
135    let mut expires_iter = resp_header.headers.get_all("expires").iter();
136    let expires_header = expires_iter.next();
137    if expires_header.is_none() || expires_iter.next().is_some() {
138        return None;
139    }
140    parse_expires_value(expires_header.unwrap()).or(Some(SystemTime::UNIX_EPOCH))
141}
142
143/// Calculates stale-while-revalidate and stale-if-error seconds from Cache-Control or the [CacheMetaDefaults].
144pub fn calculate_serve_stale_sec(
145    cache_control: Option<&impl InterpretCacheControl>,
146    defaults: &CacheMetaDefaults,
147) -> (u32, u32) {
148    let serve_stale_while_revalidate_sec = cache_control
149        .and_then(|cc| cc.serve_stale_while_revalidate_sec())
150        .unwrap_or_else(|| defaults.serve_stale_while_revalidate_sec());
151    let serve_stale_if_error_sec = cache_control
152        .and_then(|cc| cc.serve_stale_if_error_sec())
153        .unwrap_or_else(|| defaults.serve_stale_if_error_sec());
154    (serve_stale_while_revalidate_sec, serve_stale_if_error_sec)
155}
156
157/// Filters to run when sending requests to upstream
158pub mod upstream {
159    use super::*;
160
161    /// Adjust the request header for cacheable requests
162    ///
163    /// This filter does the following in order to fetch the entire response to cache
164    /// - Convert HEAD to GET
165    /// - `If-*` headers are removed
166    /// - `Range` header is removed
167    ///
168    /// When `meta` is set, this function will inject `If-modified-since` according to the `Last-Modified` header
169    /// and inject `If-none-match` according to `Etag` header
170    pub fn request_filter(req: &mut RequestHeader, meta: Option<&CacheMeta>) -> Result<()> {
171        // change HEAD to GET, HEAD itself is not semantically cacheable
172        if req.method == Method::HEAD {
173            req.set_method(Method::GET);
174        }
175
176        // remove downstream precondition headers https://datatracker.ietf.org/doc/html/rfc7232#section-3
177        // we'd like to cache the 200 not the 304
178        req.remove_header(&header::IF_MATCH);
179        req.remove_header(&header::IF_NONE_MATCH);
180        req.remove_header(&header::IF_MODIFIED_SINCE);
181        req.remove_header(&header::IF_UNMODIFIED_SINCE);
182        // see below range header
183        req.remove_header(&header::IF_RANGE);
184
185        // remove downstream range header as we'd like to cache the entire response (this might change in the future)
186        req.remove_header(&header::RANGE);
187
188        // we have a presumably staled response already, add precondition headers for revalidation
189        if let Some(m) = meta {
190            // rfc7232: "SHOULD send both validators in cache validation" but
191            // there have been weird cases that an origin has matching etag but not Last-Modified
192            if let Some(since) = m.headers().get(&header::LAST_MODIFIED) {
193                req.insert_header(header::IF_MODIFIED_SINCE, since).unwrap();
194            }
195            if let Some(etag) = m.headers().get(&header::ETAG) {
196                req.insert_header(header::IF_NONE_MATCH, etag).unwrap();
197            }
198        }
199
200        Ok(())
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207    use crate::RespCacheable::Cacheable;
208    use http::header::{HeaderName, CACHE_CONTROL, EXPIRES, SET_COOKIE};
209    use http::StatusCode;
210    use httpdate::fmt_http_date;
211
212    fn init_log() {
213        let _ = env_logger::builder().is_test(true).try_init();
214    }
215
216    const DEFAULTS: CacheMetaDefaults = CacheMetaDefaults::new(
217        |status| match status {
218            StatusCode::OK => Some(10),
219            StatusCode::NOT_FOUND => Some(5),
220            StatusCode::PARTIAL_CONTENT => None,
221            _ => Some(1),
222        },
223        0,
224        u32::MAX, /* "infinite" stale-if-error */
225    );
226
227    // Cache nothing, by default
228    const BYPASS_CACHE_DEFAULTS: CacheMetaDefaults = CacheMetaDefaults::new(|_| None, 0, 0);
229
230    fn build_response(status: u16, headers: &[(HeaderName, &str)]) -> ResponseHeader {
231        let mut header = ResponseHeader::build(status, Some(headers.len())).unwrap();
232        for (k, v) in headers {
233            header.append_header(k.to_string(), *v).unwrap();
234        }
235        header
236    }
237
238    fn resp_cacheable_wrapper(
239        resp: ResponseHeader,
240        defaults: &CacheMetaDefaults,
241        authorization_present: bool,
242    ) -> Option<CacheMeta> {
243        if let Cacheable(meta) = resp_cacheable(
244            CacheControl::from_resp_headers(&resp).as_ref(),
245            resp,
246            authorization_present,
247            defaults,
248        ) {
249            Some(meta)
250        } else {
251            None
252        }
253    }
254
255    #[test]
256    fn test_resp_cacheable() {
257        let meta = resp_cacheable_wrapper(
258            build_response(200, &[(CACHE_CONTROL, "max-age=12345")]),
259            &DEFAULTS,
260            false,
261        );
262
263        let meta = meta.unwrap();
264        assert!(meta.is_fresh(SystemTime::now()));
265        assert!(meta.is_fresh(
266            SystemTime::now()
267                .checked_add(Duration::from_secs(12))
268                .unwrap()
269        ),);
270        assert!(!meta.is_fresh(
271            SystemTime::now()
272                .checked_add(Duration::from_secs(12346))
273                .unwrap()
274        ));
275    }
276
277    #[test]
278    fn test_resp_uncacheable_directives() {
279        let meta = resp_cacheable_wrapper(
280            build_response(200, &[(CACHE_CONTROL, "private, max-age=12345")]),
281            &DEFAULTS,
282            false,
283        );
284        assert!(meta.is_none());
285
286        let meta = resp_cacheable_wrapper(
287            build_response(200, &[(CACHE_CONTROL, "no-store, max-age=12345")]),
288            &DEFAULTS,
289            false,
290        );
291        assert!(meta.is_none());
292    }
293
294    #[test]
295    fn test_resp_cache_authorization() {
296        let meta = resp_cacheable_wrapper(build_response(200, &[]), &DEFAULTS, true);
297        assert!(meta.is_none());
298
299        let meta = resp_cacheable_wrapper(
300            build_response(200, &[(CACHE_CONTROL, "max-age=10")]),
301            &DEFAULTS,
302            true,
303        );
304        assert!(meta.is_none());
305
306        let meta = resp_cacheable_wrapper(
307            build_response(200, &[(CACHE_CONTROL, "s-maxage=10")]),
308            &DEFAULTS,
309            true,
310        );
311        assert!(meta.unwrap().is_fresh(SystemTime::now()));
312
313        let meta = resp_cacheable_wrapper(
314            build_response(200, &[(CACHE_CONTROL, "public, max-age=10")]),
315            &DEFAULTS,
316            true,
317        );
318        assert!(meta.unwrap().is_fresh(SystemTime::now()));
319
320        let meta = resp_cacheable_wrapper(
321            build_response(200, &[(CACHE_CONTROL, "must-revalidate")]),
322            &DEFAULTS,
323            true,
324        );
325        assert!(meta.unwrap().is_fresh(SystemTime::now()));
326    }
327
328    #[test]
329    fn test_resp_zero_max_age() {
330        let meta = resp_cacheable_wrapper(
331            build_response(200, &[(CACHE_CONTROL, "max-age=0, public")]),
332            &DEFAULTS,
333            false,
334        );
335
336        // cacheable, but needs revalidation
337        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
338    }
339
340    #[test]
341    fn test_resp_expires() {
342        let five_sec_time = SystemTime::now()
343            .checked_add(Duration::from_secs(5))
344            .unwrap();
345
346        // future expires is cacheable
347        let meta = resp_cacheable_wrapper(
348            build_response(200, &[(EXPIRES, &fmt_http_date(five_sec_time))]),
349            &DEFAULTS,
350            false,
351        );
352
353        let meta = meta.unwrap();
354        assert!(meta.is_fresh(SystemTime::now()));
355        assert!(!meta.is_fresh(
356            SystemTime::now()
357                .checked_add(Duration::from_secs(6))
358                .unwrap()
359        ));
360
361        // even on default uncacheable statuses
362        let meta = resp_cacheable_wrapper(
363            build_response(206, &[(EXPIRES, &fmt_http_date(five_sec_time))]),
364            &DEFAULTS,
365            false,
366        );
367        assert!(meta.is_some());
368    }
369
370    #[test]
371    fn test_resp_past_expires() {
372        // cacheable, but expired
373        let meta = resp_cacheable_wrapper(
374            build_response(200, &[(EXPIRES, "Fri, 15 May 2015 15:34:21 GMT")]),
375            &BYPASS_CACHE_DEFAULTS,
376            false,
377        );
378        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
379    }
380
381    #[test]
382    fn test_resp_nonstandard_expires() {
383        // init log to allow inspecting warnings
384        init_log();
385
386        // invalid cases, according to parser
387        // (but should be stale according to RFC)
388        let meta = resp_cacheable_wrapper(
389            build_response(200, &[(EXPIRES, "Mon, 13 Feb 0002 12:00:00 GMT")]),
390            &BYPASS_CACHE_DEFAULTS,
391            false,
392        );
393        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
394
395        let meta = resp_cacheable_wrapper(
396            build_response(200, &[(EXPIRES, "Fri, 01 Dec 99999 16:00:00 GMT")]),
397            &BYPASS_CACHE_DEFAULTS,
398            false,
399        );
400        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
401
402        let meta = resp_cacheable_wrapper(
403            build_response(200, &[(EXPIRES, "0")]),
404            &BYPASS_CACHE_DEFAULTS,
405            false,
406        );
407        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
408    }
409
410    #[test]
411    fn test_resp_multiple_expires() {
412        let five_sec_time = SystemTime::now()
413            .checked_add(Duration::from_secs(5))
414            .unwrap();
415        let ten_sec_time = SystemTime::now()
416            .checked_add(Duration::from_secs(10))
417            .unwrap();
418
419        // multiple expires = uncacheable
420        let meta = resp_cacheable_wrapper(
421            build_response(
422                200,
423                &[
424                    (EXPIRES, &fmt_http_date(five_sec_time)),
425                    (EXPIRES, &fmt_http_date(ten_sec_time)),
426                ],
427            ),
428            &BYPASS_CACHE_DEFAULTS,
429            false,
430        );
431        assert!(meta.is_none());
432
433        // unless the default is cacheable
434        let meta = resp_cacheable_wrapper(
435            build_response(
436                200,
437                &[
438                    (EXPIRES, &fmt_http_date(five_sec_time)),
439                    (EXPIRES, &fmt_http_date(ten_sec_time)),
440                ],
441            ),
442            &DEFAULTS,
443            false,
444        );
445        assert!(meta.is_some());
446    }
447
448    #[test]
449    fn test_resp_cache_control_with_expires() {
450        let five_sec_time = SystemTime::now()
451            .checked_add(Duration::from_secs(5))
452            .unwrap();
453        // cache-control takes precedence over expires
454        let meta = resp_cacheable_wrapper(
455            build_response(
456                200,
457                &[
458                    (EXPIRES, &fmt_http_date(five_sec_time)),
459                    (CACHE_CONTROL, "max-age=0"),
460                ],
461            ),
462            &DEFAULTS,
463            false,
464        );
465        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
466    }
467
468    #[test]
469    fn test_resp_stale_while_revalidate() {
470        // respect defaults
471        let meta = resp_cacheable_wrapper(
472            build_response(200, &[(CACHE_CONTROL, "max-age=10")]),
473            &DEFAULTS,
474            false,
475        );
476
477        let meta = meta.unwrap();
478        let eleven_sec_time = SystemTime::now()
479            .checked_add(Duration::from_secs(11))
480            .unwrap();
481        assert!(!meta.is_fresh(eleven_sec_time));
482        assert!(!meta.serve_stale_while_revalidate(SystemTime::now()));
483        assert!(!meta.serve_stale_while_revalidate(eleven_sec_time));
484
485        // override with stale-while-revalidate
486        let meta = resp_cacheable_wrapper(
487            build_response(
488                200,
489                &[(CACHE_CONTROL, "max-age=10, stale-while-revalidate=5")],
490            ),
491            &DEFAULTS,
492            false,
493        );
494
495        let meta = meta.unwrap();
496        let eleven_sec_time = SystemTime::now()
497            .checked_add(Duration::from_secs(11))
498            .unwrap();
499        let sixteen_sec_time = SystemTime::now()
500            .checked_add(Duration::from_secs(16))
501            .unwrap();
502        assert!(!meta.is_fresh(eleven_sec_time));
503        assert!(meta.serve_stale_while_revalidate(eleven_sec_time));
504        assert!(!meta.serve_stale_while_revalidate(sixteen_sec_time));
505    }
506
507    #[test]
508    fn test_resp_stale_if_error() {
509        // respect defaults
510        let meta = resp_cacheable_wrapper(
511            build_response(200, &[(CACHE_CONTROL, "max-age=10")]),
512            &DEFAULTS,
513            false,
514        );
515
516        let meta = meta.unwrap();
517        let hundred_years_time = SystemTime::now()
518            .checked_add(Duration::from_secs(86400 * 365 * 100))
519            .unwrap();
520        assert!(!meta.is_fresh(hundred_years_time));
521        assert!(meta.serve_stale_if_error(hundred_years_time));
522
523        // override with stale-if-error
524        let meta = resp_cacheable_wrapper(
525            build_response(
526                200,
527                &[(
528                    CACHE_CONTROL,
529                    "max-age=10, stale-while-revalidate=5, stale-if-error=60",
530                )],
531            ),
532            &DEFAULTS,
533            false,
534        );
535
536        let meta = meta.unwrap();
537        let eleven_sec_time = SystemTime::now()
538            .checked_add(Duration::from_secs(11))
539            .unwrap();
540        let seventy_sec_time = SystemTime::now()
541            .checked_add(Duration::from_secs(70))
542            .unwrap();
543        assert!(!meta.is_fresh(eleven_sec_time));
544        assert!(meta.serve_stale_if_error(SystemTime::now()));
545        assert!(meta.serve_stale_if_error(eleven_sec_time));
546        assert!(!meta.serve_stale_if_error(seventy_sec_time));
547
548        // never serve stale
549        let meta = resp_cacheable_wrapper(
550            build_response(200, &[(CACHE_CONTROL, "max-age=10, stale-if-error=0")]),
551            &DEFAULTS,
552            false,
553        );
554
555        let meta = meta.unwrap();
556        let eleven_sec_time = SystemTime::now()
557            .checked_add(Duration::from_secs(11))
558            .unwrap();
559        assert!(!meta.is_fresh(eleven_sec_time));
560        assert!(!meta.serve_stale_if_error(eleven_sec_time));
561    }
562
563    #[test]
564    fn test_resp_status_cache_defaults() {
565        // 200 response
566        let meta = resp_cacheable_wrapper(build_response(200, &[]), &DEFAULTS, false);
567        assert!(meta.is_some());
568
569        let meta = meta.unwrap();
570        assert!(meta.is_fresh(
571            SystemTime::now()
572                .checked_add(Duration::from_secs(9))
573                .unwrap()
574        ));
575        assert!(!meta.is_fresh(
576            SystemTime::now()
577                .checked_add(Duration::from_secs(11))
578                .unwrap()
579        ));
580
581        // 404 response, different ttl
582        let meta = resp_cacheable_wrapper(build_response(404, &[]), &DEFAULTS, false);
583        assert!(meta.is_some());
584
585        let meta = meta.unwrap();
586        assert!(meta.is_fresh(
587            SystemTime::now()
588                .checked_add(Duration::from_secs(4))
589                .unwrap()
590        ));
591        assert!(!meta.is_fresh(
592            SystemTime::now()
593                .checked_add(Duration::from_secs(6))
594                .unwrap()
595        ));
596
597        // 206 marked uncacheable (no cache TTL)
598        let meta = resp_cacheable_wrapper(build_response(206, &[]), &DEFAULTS, false);
599        assert!(meta.is_none());
600
601        // default uncacheable status with explicit Cache-Control is cacheable
602        let meta = resp_cacheable_wrapper(
603            build_response(206, &[(CACHE_CONTROL, "public, max-age=10")]),
604            &DEFAULTS,
605            false,
606        );
607        assert!(meta.is_some());
608
609        let meta = meta.unwrap();
610        assert!(meta.is_fresh(
611            SystemTime::now()
612                .checked_add(Duration::from_secs(9))
613                .unwrap()
614        ));
615        assert!(!meta.is_fresh(
616            SystemTime::now()
617                .checked_add(Duration::from_secs(11))
618                .unwrap()
619        ));
620
621        // 416 matches any status
622        let meta = resp_cacheable_wrapper(build_response(416, &[]), &DEFAULTS, false);
623        assert!(meta.is_some());
624
625        let meta = meta.unwrap();
626        assert!(meta.is_fresh(SystemTime::now()));
627        assert!(!meta.is_fresh(
628            SystemTime::now()
629                .checked_add(Duration::from_secs(2))
630                .unwrap()
631        ));
632    }
633
634    #[test]
635    fn test_resp_cache_no_cache_fields() {
636        // check #field-names are stripped from the cache header
637        let meta = resp_cacheable_wrapper(
638            build_response(
639                200,
640                &[
641                    (SET_COOKIE, "my-cookie"),
642                    (CACHE_CONTROL, "private=\"something\", max-age=10"),
643                    (HeaderName::from_bytes(b"Something").unwrap(), "foo"),
644                ],
645            ),
646            &DEFAULTS,
647            false,
648        );
649        let meta = meta.unwrap();
650        assert!(meta.headers().contains_key(SET_COOKIE));
651        assert!(!meta.headers().contains_key("Something"));
652
653        let meta = resp_cacheable_wrapper(
654            build_response(
655                200,
656                &[
657                    (SET_COOKIE, "my-cookie"),
658                    (
659                        CACHE_CONTROL,
660                        "max-age=0, no-cache=\"meta1, SeT-Cookie ,meta2\"",
661                    ),
662                    (HeaderName::from_bytes(b"meta1").unwrap(), "foo"),
663                ],
664            ),
665            &DEFAULTS,
666            false,
667        );
668        let meta = meta.unwrap();
669        assert!(!meta.headers().contains_key(SET_COOKIE));
670        assert!(!meta.headers().contains_key("meta1"));
671    }
672}