gr/cache/
filesystem.rs

1use std::fs::{self, File};
2use std::io::{BufRead, BufReader, BufWriter, Read, Write};
3use std::path::Path;
4use std::rc::Rc;
5use std::sync::Arc;
6
7use flate2::bufread::GzDecoder;
8use sha2::{Digest, Sha256};
9
10use crate::cache::Cache;
11use crate::http::{Headers, Resource};
12use crate::io::{self, FlowControlHeaders, HttpResponse};
13use crate::time::Seconds;
14
15use super::CacheState;
16
17use crate::config::ConfigProperties;
18
19use crate::error::{self, AddContext, GRError};
20use crate::Result;
21
22use flate2::write::GzEncoder;
23use flate2::Compression;
24
25pub struct FileCache {
26    config: Arc<dyn ConfigProperties>,
27}
28
29impl FileCache {
30    pub fn new(config: Arc<dyn ConfigProperties>) -> Self {
31        FileCache { config }
32    }
33
34    pub fn validate_cache_location(&self) -> Result<()> {
35        let cache_location = self
36            .config
37            .cache_location()
38            .ok_or(GRError::ConfigurationNotFound)?;
39
40        let path = Path::new(cache_location);
41
42        if !path.exists() {
43            return Err(GRError::CacheLocationDoesNotExist(format!(
44                "Cache directory does not exist: {cache_location}"
45            ))
46            .into());
47        }
48
49        if !path.is_dir() {
50            return Err(GRError::CacheLocationIsNotADirectory(format!(
51                "Cache location is not a directory: {cache_location}"
52            ))
53            .into());
54        }
55
56        // Check if we can write to the directory
57        let test_file_path = path.join(".write_test_cache_file");
58        match File::create(&test_file_path) {
59            Ok(_) => {
60                // Successfully created the file, now remove it
61                if let Err(e) = fs::remove_file(&test_file_path) {
62                    return Err(GRError::CacheLocationWriteTestFailed(format!(
63                        "Failed to remove cache test file {}: {}",
64                        test_file_path.to_string_lossy(),
65                        e
66                    ))
67                    .into());
68                }
69            }
70            Err(e) => {
71                return Err(GRError::CacheLocationIsNotWriteable(format!(
72                    "No write permission for cache directory {cache_location}: {e}"
73                ))
74                .into());
75            }
76        }
77        Ok(())
78    }
79
80    pub fn get_cache_file(&self, url: &str) -> String {
81        let mut hasher = Sha256::new();
82        hasher.update(url);
83        let hash = hasher.finalize();
84        let cache_location = self.config.cache_location().unwrap();
85        let location = cache_location.strip_suffix('/').unwrap_or(cache_location);
86        format!("{location}/{hash:x}")
87    }
88
89    fn get_cache_data(&self, mut reader: impl BufRead) -> Result<HttpResponse> {
90        let decompressed_data = GzDecoder::new(&mut reader);
91        let mut reader = BufReader::new(decompressed_data);
92        let mut headers = String::new();
93        reader.read_line(&mut headers)?;
94        let mut status_code = String::new();
95        reader.read_line(&mut status_code)?;
96        let status_code = status_code.trim();
97        let status_code = match status_code.parse::<i32>() {
98            Ok(value) => value,
99            Err(err) => {
100                // parse error in here could be hard to find/debug. Send a clear
101                // error trace over to the client.
102                // TODO should we just treat it as a cache miss?
103                let trace = format!("Could not parse the response status code from cache {err}");
104                return Err(error::gen(trace));
105            }
106        };
107        let mut body = Vec::new();
108        reader.read_to_end(&mut body)?;
109        let body = String::from_utf8(body)?.trim().to_string();
110        let headers_map = serde_json::from_str::<Headers>(&headers)?;
111        // Gather cached link headers for pagination.
112        // We don't need rate limit headers as we are not querying the API at
113        // this point.
114        let page_header = io::parse_page_headers(Some(&headers_map));
115        let flow_control_headers = FlowControlHeaders::new(Rc::new(page_header), Rc::new(None));
116
117        let response = HttpResponse::builder()
118            .status(status_code)
119            .body(body)
120            .headers(headers_map)
121            .flow_control_headers(flow_control_headers)
122            .build()?;
123        Ok(response)
124    }
125
126    fn persist_cache_data(&self, value: &HttpResponse, f: BufWriter<File>) -> Result<()> {
127        let headers_map = value.headers.as_ref().unwrap();
128        let headers = serde_json::to_string(headers_map).unwrap();
129        let status = value.status.to_string();
130        let file_data = format!("{}\n{}\n{}", headers, status, value.body);
131        let mut encoder = GzEncoder::new(f, Compression::default());
132        encoder.write_all(file_data.as_bytes())?;
133        Ok(())
134    }
135
136    fn expired(
137        &self,
138        key: &Resource,
139        path: String,
140        cache_control: Option<CacheControl>,
141    ) -> Result<bool> {
142        let cache_expiration = self
143            .config
144            .get_cache_expiration(key.api_operation.as_ref().unwrap())
145            .try_into()
146            .err_context(GRError::ConfigurationError(format!(
147                "Cannot retrieve cache expiration time. \
148                 Check your configuration file and make sure the key \
149                 <domain>.cache_api_{}_expiration has a valid time format.",
150                &key.api_operation.as_ref().unwrap()
151            )))?;
152        expired(
153            || get_file_mtime_elapsed(path.as_str()),
154            cache_expiration,
155            cache_control,
156        )
157    }
158}
159
160impl Cache<Resource> for FileCache {
161    fn get(&self, key: &Resource) -> Result<CacheState> {
162        let path = self.get_cache_file(&key.url);
163        if let Ok(f) = File::open(&path) {
164            let mut f = BufReader::new(f);
165            let response = self.get_cache_data(&mut f)?;
166
167            let cache_control = response.headers.as_ref().and_then(parse_cache_control);
168
169            if self.expired(key, path, cache_control)? {
170                return Ok(CacheState::Stale(response));
171            }
172            Ok(CacheState::Fresh(response))
173        } else {
174            Ok(CacheState::None)
175        }
176    }
177
178    fn set(&self, key: &Resource, value: &HttpResponse) -> Result<()> {
179        let path = self.get_cache_file(&key.url);
180        let f = File::create(path)?;
181        let f = BufWriter::new(f);
182        self.persist_cache_data(value, f)?;
183        Ok(())
184    }
185
186    fn update(
187        &self,
188        key: &Resource,
189        value: &HttpResponse,
190        field: &io::ResponseField,
191    ) -> Result<()> {
192        let path = self.get_cache_file(&key.url);
193        if let Ok(f) = File::open(path) {
194            let mut f = BufReader::new(f);
195            let mut response = self.get_cache_data(&mut f)?;
196            match field {
197                io::ResponseField::Body => response.body.clone_from(&value.body),
198                io::ResponseField::Headers => {
199                    // update existing headers with new ones. Not guaranteed
200                    // that a 304 will actually contain *all* the headers that
201                    // we got from an original 200 response. Update existing and
202                    // maintain old ones. Github wipes link headers on 304s that
203                    // actually existed in 200s.
204                    response
205                        .headers
206                        .as_mut()
207                        .unwrap()
208                        .extend(value.headers.as_ref().unwrap().clone());
209                }
210                io::ResponseField::Status => response.status = value.status,
211            }
212            return self.set(key, &response);
213        }
214        Ok(())
215    }
216}
217
218struct CacheControl {
219    max_age: Option<Seconds>,
220    no_cache: bool,
221    no_store: bool,
222}
223
224fn parse_cache_control(headers: &Headers) -> Option<CacheControl> {
225    headers.get("cache-control").map(|cc| {
226        let mut max_age = None;
227        let mut no_cache = false;
228        let mut no_store = false;
229
230        for directive in cc.split(',') {
231            let directive = directive.trim().to_lowercase();
232            if directive == "no-cache" {
233                no_cache = true;
234            } else if directive == "no-store" {
235                no_store = true;
236            } else if let Some(exp) = directive.strip_prefix("max-age=") {
237                max_age = exp.parse().ok();
238            }
239        }
240
241        CacheControl {
242            max_age,
243            no_cache,
244            no_store,
245        }
246    })
247}
248
249fn expired<F: Fn() -> Result<Seconds>>(
250    get_file_mtime_elapsed: F,
251    refresh_every: Seconds,
252    cache_control: Option<CacheControl>,
253) -> Result<bool> {
254    let elapsed = get_file_mtime_elapsed()?;
255
256    // Check user-defined expiration first
257    if elapsed < refresh_every {
258        return Ok(false);
259    }
260
261    // If user-defined expiration is reached, then consider cache-control
262    if let Some(cc) = cache_control {
263        if cc.no_store {
264            return Ok(true);
265        }
266        if cc.no_cache {
267            return Ok(true);
268        }
269        if let Some(max_age) = cc.max_age {
270            return Ok(elapsed >= max_age);
271        }
272    }
273
274    // If no cache-control or no relevant directives, it's expired
275    Ok(true)
276}
277
278fn get_file_mtime_elapsed(path: &str) -> Result<Seconds> {
279    let metadata = std::fs::metadata(path)?;
280    let mtime = metadata.modified()?.elapsed()?.as_secs();
281    Ok(Seconds::new(mtime))
282}
283
284// test
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    struct ConfigMock;
290
291    impl ConfigMock {
292        fn new() -> Self {
293            ConfigMock {}
294        }
295    }
296
297    impl ConfigProperties for ConfigMock {
298        fn api_token(&self) -> &str {
299            "1234"
300        }
301        fn cache_location(&self) -> Option<&str> {
302            // TODO test with suffix /
303            // should probably be sanitized on the Config struct itself.
304            Some("/home/user/.cache")
305        }
306    }
307
308    #[test]
309    fn test_get_cache_file() {
310        let config = ConfigMock::new();
311        let file_cache = FileCache::new(Arc::new(config));
312        let url = "https://gitlab.org/api/v4/projects/jordilin%2Fmr";
313        let cache_file = file_cache.get_cache_file(url);
314        assert_eq!(
315            "/home/user/.cache/b677b4f27bfd83c168c62cb1b629ac06e9444c29c0380a20ea2f2cad266f7dd9",
316            cache_file
317        );
318    }
319
320    #[test]
321    fn test_get_cache_data() {
322        let cached_data = r#"{"vary":"Accept-Encoding","cache-control":"max-age=0, private, must-revalidate","server":"nginx","transfer-encoding":"chunked","x-content-type-options":"nosniff","etag":"W/\"9ef5b79701ae0a753b6f08dc9229cdb6\"","x-per-page":"20","date":"Sat, 13 Jan 2024 19:50:23 GMT","connection":"keep-alive","x-next-page":"","x-runtime":"0.050489","content-type":"application/json","x-total-pages":"2","strict-transport-security":"max-age=63072000","referrer-policy":"strict-origin-when-cross-origin","x-prev-page":"1","x-request-id":"01HM260622PFEYAHAZQQWNT1WG","x-total":"22","x-page":"2","link":"<http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=1&per_page=20>; rel=\"prev\", <http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=1&per_page=20>; rel=\"first\", <http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=2&per_page=20>; rel=\"last\"","x-frame-options":"SAMEORIGIN"}
323        200
324        {"name":"385db2892449a18ca075c40344e6e9b418e3b16c","path":"tooling/cli:385db2892449a18ca075c40344e6e9b418e3b16c","location":"localhost:4567/tooling/cli:385db2892449a18ca075c40344e6e9b418e3b16c","revision":"791d4b6a13f90f0e48dd68fa1c758b79a6936f3854139eb01c9f251eded7c98d","short_revision":"791d4b6a1","digest":"sha256:41c70f2fcb036dfc6ca7da19b25cb660055268221b9d5db666bdbc7ad1ca2029","created_at":"2022-06-29T15:56:01.580+00:00","total_size":2819312
325        "#;
326        let mut enc = GzEncoder::new(Vec::new(), Compression::default());
327        enc.write_all(cached_data.as_bytes()).unwrap();
328        let reader = std::io::Cursor::new(enc.finish().unwrap());
329        let fc = FileCache::new(Arc::new(ConfigMock::new()));
330        let response = fc.get_cache_data(reader).unwrap();
331
332        assert_eq!(200, response.status);
333        assert_eq!(
334                    "<http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=1&per_page=20>; rel=\"prev\", <http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=1&per_page=20>; rel=\"first\", <http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=2&per_page=20>; rel=\"last\"",
335                    response.headers.as_ref().unwrap().get(io::LINK_HEADER).unwrap()
336                );
337        assert_eq!(
338                    "{\"name\":\"385db2892449a18ca075c40344e6e9b418e3b16c\",\"path\":\"tooling/cli:385db2892449a18ca075c40344e6e9b418e3b16c\",\"location\":\"localhost:4567/tooling/cli:385db2892449a18ca075c40344e6e9b418e3b16c\",\"revision\":\"791d4b6a13f90f0e48dd68fa1c758b79a6936f3854139eb01c9f251eded7c98d\",\"short_revision\":\"791d4b6a1\",\"digest\":\"sha256:41c70f2fcb036dfc6ca7da19b25cb660055268221b9d5db666bdbc7ad1ca2029\",\"created_at\":\"2022-06-29T15:56:01.580+00:00\",\"total_size\":2819312",
339                    response.body
340                );
341    }
342
343    fn mock_file_mtime_elapsed(m_time: u64) -> Result<Seconds> {
344        Ok(Seconds::new(m_time))
345    }
346
347    #[test]
348    fn test_expired_cache_beyond_refresh_time() {
349        assert!(expired(|| mock_file_mtime_elapsed(500), Seconds::new(300), None).unwrap())
350    }
351
352    #[test]
353    fn test_expired_diff_now_and_cache_same_as_refresh() {
354        assert!(expired(|| mock_file_mtime_elapsed(300), Seconds::new(300), None).unwrap())
355    }
356
357    #[test]
358    fn test_not_expired_diff_now_and_cache_less_than_refresh() {
359        assert!(!expired(|| mock_file_mtime_elapsed(100), Seconds::new(1000), None).unwrap())
360    }
361
362    #[test]
363    fn test_expired_get_m_time_result_err() {
364        assert!(expired(
365            || Err(error::gen("Could not get file mtime")),
366            Seconds::new(1000),
367            None
368        )
369        .is_err())
370    }
371
372    fn cc(max_age: Option<Seconds>, no_cache: bool, no_store: bool) -> Option<CacheControl> {
373        Some(CacheControl {
374            max_age,
375            no_cache,
376            no_store,
377        })
378    }
379
380    #[test]
381    fn test_cache_not_expired_according_to_user_cache_control_ignored() {
382        let user_refresh = Seconds::new(3600);
383
384        assert!(!expired(
385            || Ok(Seconds::new(3000)),
386            user_refresh,
387            cc(Some(Seconds::new(2000)), false, false)
388        )
389        .unwrap());
390    }
391
392    #[test]
393    fn test_cache_expired_according_to_user_checks_http_cache_control() {
394        let user_refresh = Seconds::new(3600);
395
396        assert!(!expired(
397            || Ok(Seconds::new(3601)),
398            user_refresh,
399            cc(Some(Seconds::new(4000)), false, false)
400        )
401        .unwrap());
402
403        assert!(expired(
404            || Ok(Seconds::new(4001)),
405            user_refresh,
406            cc(Some(Seconds::new(4000)), false, false)
407        )
408        .unwrap());
409    }
410
411    #[test]
412    fn test_cache_expired_according_to_user_no_cache_control_directive() {
413        let user_refresh = Seconds::new(3600);
414
415        assert!(expired(
416            || Ok(Seconds::new(3601)),
417            user_refresh,
418            cc(None, true, false)
419        )
420        .unwrap());
421    }
422
423    #[test]
424    fn test_cache_expired_according_to_user_no_store_directive() {
425        let user_refresh = Seconds::new(3600);
426
427        assert!(expired(
428            || Ok(Seconds::new(3601)),
429            user_refresh,
430            cc(None, false, true)
431        )
432        .unwrap());
433    }
434
435    #[test]
436    fn test_cache_expired_according_to_user_no_cache_control_whatsoever() {
437        let user_refresh = Seconds::new(3600);
438        assert!(expired(|| Ok(Seconds::new(3601)), user_refresh, None).unwrap());
439    }
440
441    #[test]
442    fn test_user_expires_cache_but_http_cache_control_not_expired() {
443        let user_refresh = Seconds::new(3600);
444
445        assert!(!expired(
446            || Ok(Seconds::new(5000)),
447            user_refresh,
448            cc(Some(Seconds::new(6000)), false, false)
449        )
450        .unwrap());
451    }
452
453    #[test]
454    fn test_cache_expired_both_user_and_cache_control() {
455        let user_refresh = Seconds::new(3600);
456
457        assert!(expired(
458            || Ok(Seconds::new(7000)),
459            user_refresh,
460            cc(Some(Seconds::new(6000)), false, false)
461        )
462        .unwrap());
463    }
464
465    #[test]
466    fn test_parse_cache_control() {
467        let mut headers = Headers::new();
468
469        let test_table = vec![
470            (
471                "max-age=3600, no-cache, no-store",
472                Some(Seconds::new(3600)),
473                true,
474                true,
475            ),
476            (
477                "max-age=3600, no-cache",
478                Some(Seconds::new(3600)),
479                true,
480                false,
481            ),
482            (
483                "max-age=3600, no-store",
484                Some(Seconds::new(3600)),
485                false,
486                true,
487            ),
488            ("no-cache, no-store", None, true, true),
489            ("no-cache", None, true, false),
490            ("no-store", None, false, true),
491            ("max-age=0", Some(Seconds::new(0)), false, false),
492        ];
493
494        for (header, max_age, no_cache, no_store) in test_table {
495            headers.set("cache-control".to_string(), header.to_string());
496            let cc = parse_cache_control(&headers).unwrap();
497            assert_eq!(cc.max_age, max_age);
498            assert_eq!(cc.no_cache, no_cache);
499            assert_eq!(cc.no_store, no_store);
500        }
501    }
502}