gr/cache/
filesystem.rs

1use std::fs::{self, File};
2use std::io::{BufRead, BufReader, BufWriter, Read, Write};
3use std::path::Path;
4use std::rc::Rc;
5use std::sync::Arc;
6
7use flate2::bufread::GzDecoder;
8use sha2::{Digest, Sha256};
9
10use crate::cache::Cache;
11use crate::http::{Headers, Resource};
12use crate::io::{self, FlowControlHeaders, HttpResponse};
13use crate::time::Seconds;
14
15use super::CacheState;
16
17use crate::config::ConfigProperties;
18
19use crate::error::{self, AddContext, GRError};
20use crate::Result;
21
22use flate2::write::GzEncoder;
23use flate2::Compression;
24
25pub struct FileCache {
26    config: Arc<dyn ConfigProperties>,
27}
28
29impl FileCache {
30    pub fn new(config: Arc<dyn ConfigProperties>) -> Self {
31        FileCache { config }
32    }
33
34    pub fn validate_cache_location(&self) -> Result<()> {
35        let cache_location = self
36            .config
37            .cache_location()
38            .ok_or(GRError::ConfigurationNotFound)?;
39
40        let path = Path::new(cache_location);
41
42        if !path.exists() {
43            return Err(GRError::CacheLocationDoesNotExist(format!(
44                "Cache directory does not exist: {}",
45                cache_location
46            ))
47            .into());
48        }
49
50        if !path.is_dir() {
51            return Err(GRError::CacheLocationIsNotADirectory(format!(
52                "Cache location is not a directory: {}",
53                cache_location
54            ))
55            .into());
56        }
57
58        // Check if we can write to the directory
59        let test_file_path = path.join(".write_test_cache_file");
60        match File::create(&test_file_path) {
61            Ok(_) => {
62                // Successfully created the file, now remove it
63                if let Err(e) = fs::remove_file(&test_file_path) {
64                    return Err(GRError::CacheLocationWriteTestFailed(format!(
65                        "Failed to remove cache test file {}: {}",
66                        test_file_path.to_string_lossy(),
67                        e
68                    ))
69                    .into());
70                }
71            }
72            Err(e) => {
73                return Err(GRError::CacheLocationIsNotWriteable(format!(
74                    "No write permission for cache directory {}: {}",
75                    cache_location, e
76                ))
77                .into());
78            }
79        }
80        Ok(())
81    }
82
83    pub fn get_cache_file(&self, url: &str) -> String {
84        let mut hasher = Sha256::new();
85        hasher.update(url);
86        let hash = hasher.finalize();
87        let cache_location = self.config.cache_location().unwrap();
88        let location = cache_location.strip_suffix('/').unwrap_or(cache_location);
89        format!("{}/{:x}", location, hash)
90    }
91
92    fn get_cache_data(&self, mut reader: impl BufRead) -> Result<HttpResponse> {
93        let decompressed_data = GzDecoder::new(&mut reader);
94        let mut reader = BufReader::new(decompressed_data);
95        let mut headers = String::new();
96        reader.read_line(&mut headers)?;
97        let mut status_code = String::new();
98        reader.read_line(&mut status_code)?;
99        let status_code = status_code.trim();
100        let status_code = match status_code.parse::<i32>() {
101            Ok(value) => value,
102            Err(err) => {
103                // parse error in here could be hard to find/debug. Send a clear
104                // error trace over to the client.
105                // TODO should we just treat it as a cache miss?
106                let trace = format!(
107                    "Could not parse the response status code from cache {}",
108                    err
109                );
110                return Err(error::gen(trace));
111            }
112        };
113        let mut body = Vec::new();
114        reader.read_to_end(&mut body)?;
115        let body = String::from_utf8(body)?.trim().to_string();
116        let headers_map = serde_json::from_str::<Headers>(&headers)?;
117        // Gather cached link headers for pagination.
118        // We don't need rate limit headers as we are not querying the API at
119        // this point.
120        let page_header = io::parse_page_headers(Some(&headers_map));
121        let flow_control_headers = FlowControlHeaders::new(Rc::new(page_header), Rc::new(None));
122
123        let response = HttpResponse::builder()
124            .status(status_code)
125            .body(body)
126            .headers(headers_map)
127            .flow_control_headers(flow_control_headers)
128            .build()?;
129        Ok(response)
130    }
131
132    fn persist_cache_data(&self, value: &HttpResponse, f: BufWriter<File>) -> Result<()> {
133        let headers_map = value.headers.as_ref().unwrap();
134        let headers = serde_json::to_string(headers_map).unwrap();
135        let status = value.status.to_string();
136        let file_data = format!("{}\n{}\n{}", headers, status, value.body);
137        let mut encoder = GzEncoder::new(f, Compression::default());
138        encoder.write_all(file_data.as_bytes())?;
139        Ok(())
140    }
141
142    fn expired(
143        &self,
144        key: &Resource,
145        path: String,
146        cache_control: Option<CacheControl>,
147    ) -> Result<bool> {
148        let cache_expiration = self
149            .config
150            .get_cache_expiration(key.api_operation.as_ref().unwrap())
151            .try_into()
152            .err_context(GRError::ConfigurationError(format!(
153                "Cannot retrieve cache expiration time. \
154                 Check your configuration file and make sure the key \
155                 <domain>.cache_api_{}_expiration has a valid time format.",
156                &key.api_operation.as_ref().unwrap()
157            )))?;
158        expired(
159            || get_file_mtime_elapsed(path.as_str()),
160            cache_expiration,
161            cache_control,
162        )
163    }
164}
165
166impl Cache<Resource> for FileCache {
167    fn get(&self, key: &Resource) -> Result<CacheState> {
168        let path = self.get_cache_file(&key.url);
169        if let Ok(f) = File::open(&path) {
170            let mut f = BufReader::new(f);
171            let response = self.get_cache_data(&mut f)?;
172
173            let cache_control = response.headers.as_ref().and_then(parse_cache_control);
174
175            if self.expired(key, path, cache_control)? {
176                return Ok(CacheState::Stale(response));
177            }
178            Ok(CacheState::Fresh(response))
179        } else {
180            Ok(CacheState::None)
181        }
182    }
183
184    fn set(&self, key: &Resource, value: &HttpResponse) -> Result<()> {
185        let path = self.get_cache_file(&key.url);
186        let f = File::create(path)?;
187        let f = BufWriter::new(f);
188        self.persist_cache_data(value, f)?;
189        Ok(())
190    }
191
192    fn update(
193        &self,
194        key: &Resource,
195        value: &HttpResponse,
196        field: &io::ResponseField,
197    ) -> Result<()> {
198        let path = self.get_cache_file(&key.url);
199        if let Ok(f) = File::open(path) {
200            let mut f = BufReader::new(f);
201            let mut response = self.get_cache_data(&mut f)?;
202            match field {
203                io::ResponseField::Body => response.body.clone_from(&value.body),
204                io::ResponseField::Headers => {
205                    // update existing headers with new ones. Not guaranteed
206                    // that a 304 will actually contain *all* the headers that
207                    // we got from an original 200 response. Update existing and
208                    // maintain old ones. Github wipes link headers on 304s that
209                    // actually existed in 200s.
210                    response
211                        .headers
212                        .as_mut()
213                        .unwrap()
214                        .extend(value.headers.as_ref().unwrap().clone());
215                }
216                io::ResponseField::Status => response.status = value.status,
217            }
218            return self.set(key, &response);
219        }
220        Ok(())
221    }
222}
223
224struct CacheControl {
225    max_age: Option<Seconds>,
226    no_cache: bool,
227    no_store: bool,
228}
229
230fn parse_cache_control(headers: &Headers) -> Option<CacheControl> {
231    headers.get("cache-control").map(|cc| {
232        let mut max_age = None;
233        let mut no_cache = false;
234        let mut no_store = false;
235
236        for directive in cc.split(',') {
237            let directive = directive.trim().to_lowercase();
238            if directive == "no-cache" {
239                no_cache = true;
240            } else if directive == "no-store" {
241                no_store = true;
242            } else if let Some(exp) = directive.strip_prefix("max-age=") {
243                max_age = exp.parse().ok();
244            }
245        }
246
247        CacheControl {
248            max_age,
249            no_cache,
250            no_store,
251        }
252    })
253}
254
255fn expired<F: Fn() -> Result<Seconds>>(
256    get_file_mtime_elapsed: F,
257    refresh_every: Seconds,
258    cache_control: Option<CacheControl>,
259) -> Result<bool> {
260    let elapsed = get_file_mtime_elapsed()?;
261
262    // Check user-defined expiration first
263    if elapsed < refresh_every {
264        return Ok(false);
265    }
266
267    // If user-defined expiration is reached, then consider cache-control
268    if let Some(cc) = cache_control {
269        if cc.no_store {
270            return Ok(true);
271        }
272        if cc.no_cache {
273            return Ok(true);
274        }
275        if let Some(max_age) = cc.max_age {
276            return Ok(elapsed >= max_age);
277        }
278    }
279
280    // If no cache-control or no relevant directives, it's expired
281    Ok(true)
282}
283
284fn get_file_mtime_elapsed(path: &str) -> Result<Seconds> {
285    let metadata = std::fs::metadata(path)?;
286    let mtime = metadata.modified()?.elapsed()?.as_secs();
287    Ok(Seconds::new(mtime))
288}
289
290// test
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    struct ConfigMock;
296
297    impl ConfigMock {
298        fn new() -> Self {
299            ConfigMock {}
300        }
301    }
302
303    impl ConfigProperties for ConfigMock {
304        fn api_token(&self) -> &str {
305            "1234"
306        }
307        fn cache_location(&self) -> Option<&str> {
308            // TODO test with suffix /
309            // should probably be sanitized on the Config struct itself.
310            Some("/home/user/.cache")
311        }
312    }
313
314    #[test]
315    fn test_get_cache_file() {
316        let config = ConfigMock::new();
317        let file_cache = FileCache::new(Arc::new(config));
318        let url = "https://gitlab.org/api/v4/projects/jordilin%2Fmr";
319        let cache_file = file_cache.get_cache_file(url);
320        assert_eq!(
321            "/home/user/.cache/b677b4f27bfd83c168c62cb1b629ac06e9444c29c0380a20ea2f2cad266f7dd9",
322            cache_file
323        );
324    }
325
326    #[test]
327    fn test_get_cache_data() {
328        let cached_data = r#"{"vary":"Accept-Encoding","cache-control":"max-age=0, private, must-revalidate","server":"nginx","transfer-encoding":"chunked","x-content-type-options":"nosniff","etag":"W/\"9ef5b79701ae0a753b6f08dc9229cdb6\"","x-per-page":"20","date":"Sat, 13 Jan 2024 19:50:23 GMT","connection":"keep-alive","x-next-page":"","x-runtime":"0.050489","content-type":"application/json","x-total-pages":"2","strict-transport-security":"max-age=63072000","referrer-policy":"strict-origin-when-cross-origin","x-prev-page":"1","x-request-id":"01HM260622PFEYAHAZQQWNT1WG","x-total":"22","x-page":"2","link":"<http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=1&per_page=20>; rel=\"prev\", <http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=1&per_page=20>; rel=\"first\", <http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=2&per_page=20>; rel=\"last\"","x-frame-options":"SAMEORIGIN"}
329        200
330        {"name":"385db2892449a18ca075c40344e6e9b418e3b16c","path":"tooling/cli:385db2892449a18ca075c40344e6e9b418e3b16c","location":"localhost:4567/tooling/cli:385db2892449a18ca075c40344e6e9b418e3b16c","revision":"791d4b6a13f90f0e48dd68fa1c758b79a6936f3854139eb01c9f251eded7c98d","short_revision":"791d4b6a1","digest":"sha256:41c70f2fcb036dfc6ca7da19b25cb660055268221b9d5db666bdbc7ad1ca2029","created_at":"2022-06-29T15:56:01.580+00:00","total_size":2819312
331        "#;
332        let mut enc = GzEncoder::new(Vec::new(), Compression::default());
333        enc.write_all(cached_data.as_bytes()).unwrap();
334        let reader = std::io::Cursor::new(enc.finish().unwrap());
335        let fc = FileCache::new(Arc::new(ConfigMock::new()));
336        let response = fc.get_cache_data(reader).unwrap();
337
338        assert_eq!(200, response.status);
339        assert_eq!(
340                    "<http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=1&per_page=20>; rel=\"prev\", <http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=1&per_page=20>; rel=\"first\", <http://gitlab-web/api/v4/projects/tooling%2Fcli/members/all?id=tooling%2Fcli&page=2&per_page=20>; rel=\"last\"",
341                    response.headers.as_ref().unwrap().get(io::LINK_HEADER).unwrap()
342                );
343        assert_eq!(
344                    "{\"name\":\"385db2892449a18ca075c40344e6e9b418e3b16c\",\"path\":\"tooling/cli:385db2892449a18ca075c40344e6e9b418e3b16c\",\"location\":\"localhost:4567/tooling/cli:385db2892449a18ca075c40344e6e9b418e3b16c\",\"revision\":\"791d4b6a13f90f0e48dd68fa1c758b79a6936f3854139eb01c9f251eded7c98d\",\"short_revision\":\"791d4b6a1\",\"digest\":\"sha256:41c70f2fcb036dfc6ca7da19b25cb660055268221b9d5db666bdbc7ad1ca2029\",\"created_at\":\"2022-06-29T15:56:01.580+00:00\",\"total_size\":2819312",
345                    response.body
346                );
347    }
348
349    fn mock_file_mtime_elapsed(m_time: u64) -> Result<Seconds> {
350        Ok(Seconds::new(m_time))
351    }
352
353    #[test]
354    fn test_expired_cache_beyond_refresh_time() {
355        assert!(expired(|| mock_file_mtime_elapsed(500), Seconds::new(300), None).unwrap())
356    }
357
358    #[test]
359    fn test_expired_diff_now_and_cache_same_as_refresh() {
360        assert!(expired(|| mock_file_mtime_elapsed(300), Seconds::new(300), None).unwrap())
361    }
362
363    #[test]
364    fn test_not_expired_diff_now_and_cache_less_than_refresh() {
365        assert!(!expired(|| mock_file_mtime_elapsed(100), Seconds::new(1000), None).unwrap())
366    }
367
368    #[test]
369    fn test_expired_get_m_time_result_err() {
370        assert!(expired(
371            || Err(error::gen("Could not get file mtime")),
372            Seconds::new(1000),
373            None
374        )
375        .is_err())
376    }
377
378    fn cc(max_age: Option<Seconds>, no_cache: bool, no_store: bool) -> Option<CacheControl> {
379        Some(CacheControl {
380            max_age,
381            no_cache,
382            no_store,
383        })
384    }
385
386    #[test]
387    fn test_cache_not_expired_according_to_user_cache_control_ignored() {
388        let user_refresh = Seconds::new(3600);
389
390        assert!(!expired(
391            || Ok(Seconds::new(3000)),
392            user_refresh,
393            cc(Some(Seconds::new(2000)), false, false)
394        )
395        .unwrap());
396    }
397
398    #[test]
399    fn test_cache_expired_according_to_user_checks_http_cache_control() {
400        let user_refresh = Seconds::new(3600);
401
402        assert!(!expired(
403            || Ok(Seconds::new(3601)),
404            user_refresh,
405            cc(Some(Seconds::new(4000)), false, false)
406        )
407        .unwrap());
408
409        assert!(expired(
410            || Ok(Seconds::new(4001)),
411            user_refresh,
412            cc(Some(Seconds::new(4000)), false, false)
413        )
414        .unwrap());
415    }
416
417    #[test]
418    fn test_cache_expired_according_to_user_no_cache_control_directive() {
419        let user_refresh = Seconds::new(3600);
420
421        assert!(expired(
422            || Ok(Seconds::new(3601)),
423            user_refresh,
424            cc(None, true, false)
425        )
426        .unwrap());
427    }
428
429    #[test]
430    fn test_cache_expired_according_to_user_no_store_directive() {
431        let user_refresh = Seconds::new(3600);
432
433        assert!(expired(
434            || Ok(Seconds::new(3601)),
435            user_refresh,
436            cc(None, false, true)
437        )
438        .unwrap());
439    }
440
441    #[test]
442    fn test_cache_expired_according_to_user_no_cache_control_whatsoever() {
443        let user_refresh = Seconds::new(3600);
444        assert!(expired(|| Ok(Seconds::new(3601)), user_refresh, None).unwrap());
445    }
446
447    #[test]
448    fn test_user_expires_cache_but_http_cache_control_not_expired() {
449        let user_refresh = Seconds::new(3600);
450
451        assert!(!expired(
452            || Ok(Seconds::new(5000)),
453            user_refresh,
454            cc(Some(Seconds::new(6000)), false, false)
455        )
456        .unwrap());
457    }
458
459    #[test]
460    fn test_cache_expired_both_user_and_cache_control() {
461        let user_refresh = Seconds::new(3600);
462
463        assert!(expired(
464            || Ok(Seconds::new(7000)),
465            user_refresh,
466            cc(Some(Seconds::new(6000)), false, false)
467        )
468        .unwrap());
469    }
470
471    #[test]
472    fn test_parse_cache_control() {
473        let mut headers = Headers::new();
474
475        let test_table = vec![
476            (
477                "max-age=3600, no-cache, no-store",
478                Some(Seconds::new(3600)),
479                true,
480                true,
481            ),
482            (
483                "max-age=3600, no-cache",
484                Some(Seconds::new(3600)),
485                true,
486                false,
487            ),
488            (
489                "max-age=3600, no-store",
490                Some(Seconds::new(3600)),
491                false,
492                true,
493            ),
494            ("no-cache, no-store", None, true, true),
495            ("no-cache", None, true, false),
496            ("no-store", None, false, true),
497            ("max-age=0", Some(Seconds::new(0)), false, false),
498        ];
499
500        for (header, max_age, no_cache, no_store) in test_table {
501            headers.set("cache-control".to_string(), header.to_string());
502            let cc = parse_cache_control(&headers).unwrap();
503            assert_eq!(cc.max_age, max_age);
504            assert_eq!(cc.no_cache, no_cache);
505            assert_eq!(cc.no_store, no_store);
506        }
507    }
508}