Skip to main content

openjd_snapshots/
hash_cache.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// Copyright by contributors to this project.
3// SPDX-License-Identifier: (Apache-2.0 OR MIT)
4
5use std::path::{Path, PathBuf};
6use std::sync::Mutex;
7
8pub const WHOLE_FILE_RANGE_END: i64 = -1;
9
10/// Normalize a path to a consistent cache key.
11/// On Windows, backslashes are converted to forward slashes so that
12/// paths from the manifest (normalized) match paths from the filesystem.
13fn normalize_cache_key(path: &Path) -> Vec<u8> {
14    #[allow(unused_mut)]
15    let mut s = path.to_string_lossy().into_owned();
16    #[cfg(windows)]
17    {
18        s = s.replace('\\', "/");
19    }
20    s.into_bytes()
21}
22
23pub struct HashCache {
24    conn: Mutex<rusqlite::Connection>,
25}
26
27impl HashCache {
28    pub fn new(cache_dir: impl AsRef<Path>) -> crate::Result<Self> {
29        let dir = cache_dir.as_ref();
30        std::fs::create_dir_all(dir)?;
31        let db_path = dir.join("hash_cache.db");
32        let conn = rusqlite::Connection::open(&db_path)
33            .map_err(|e| crate::SnapshotError::Cache(e.to_string()))?;
34        conn.pragma_update(None, "journal_mode", "WAL")
35            .map_err(|e| crate::SnapshotError::Cache(e.to_string()))?;
36        conn.execute_batch(
37            "CREATE TABLE IF NOT EXISTS hashesV4(
38                file_path blob,
39                hash_algorithm text,
40                range_start integer,
41                range_end integer,
42                file_hash text,
43                last_modified_time timestamp,
44                PRIMARY KEY (file_path, hash_algorithm, range_start, range_end)
45            );",
46        )
47        .map_err(|e| crate::SnapshotError::Cache(e.to_string()))?;
48        Ok(Self {
49            conn: Mutex::new(conn),
50        })
51    }
52
53    pub fn open_default() -> crate::Result<Self> {
54        let home = std::env::var("HOME").map_err(|_| {
55            crate::SnapshotError::Cache("$HOME is not set, cannot locate default cache".into())
56        })?;
57        Self::new(PathBuf::from(home).join(".deadline/job_attachments"))
58    }
59
60    pub fn get(
61        &self,
62        file_path: &Path,
63        algorithm: &str,
64        range_start: i64,
65        range_end: i64,
66    ) -> Option<(String, u64)> {
67        let path_bytes = normalize_cache_key(file_path);
68        let conn = self.conn.lock().unwrap();
69        conn.query_row(
70            "SELECT file_hash, last_modified_time FROM hashesV4
71                 WHERE file_path = ?1 AND hash_algorithm = ?2
72                   AND range_start = ?3 AND range_end = ?4",
73            rusqlite::params![path_bytes, algorithm, range_start, range_end],
74            |row| {
75                let hash: String = row.get(0)?;
76                let mtime: u64 = match row.get_ref(1)?.as_str() {
77                    Ok(s) => s.parse::<u64>().unwrap_or(0),
78                    Err(_) => row.get::<_, i64>(1)? as u64,
79                };
80                Ok((hash, mtime))
81            },
82        )
83        .ok()
84    }
85
86    pub fn put(
87        &self,
88        file_path: &Path,
89        algorithm: &str,
90        range_start: i64,
91        range_end: i64,
92        hash: &str,
93        mtime: u64,
94    ) -> crate::Result<()> {
95        let path_bytes = normalize_cache_key(file_path);
96        let mtime_text = rusqlite::types::Value::Text(mtime.to_string());
97        let conn = self.conn.lock().unwrap();
98        conn.execute(
99            "INSERT OR REPLACE INTO hashesV4
100                 (file_path, hash_algorithm, range_start, range_end, file_hash, last_modified_time)
101                 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
102            rusqlite::params![
103                path_bytes,
104                algorithm,
105                range_start,
106                range_end,
107                hash,
108                mtime_text
109            ],
110        )
111        .map_err(|e| crate::SnapshotError::Cache(e.to_string()))?;
112        Ok(())
113    }
114
115    pub fn get_if_fresh(
116        &self,
117        file_path: &Path,
118        algorithm: &str,
119        range_start: i64,
120        range_end: i64,
121        current_mtime: u64,
122    ) -> Option<String> {
123        let (hash, cached_mtime) = self.get(file_path, algorithm, range_start, range_end)?;
124        if cached_mtime == current_mtime {
125            Some(hash)
126        } else {
127            None
128        }
129    }
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135    use tempfile::TempDir;
136
137    fn make_cache() -> (TempDir, HashCache) {
138        let tmp = TempDir::new().unwrap();
139        let cache = HashCache::new(tmp.path()).unwrap();
140        (tmp, cache)
141    }
142
143    #[test]
144    fn put_and_get() {
145        let (_tmp, cache) = make_cache();
146        let path = Path::new("/tmp/test.txt");
147        cache
148            .put(path, "xxh128", 0, WHOLE_FILE_RANGE_END, "abc123", 1000)
149            .unwrap();
150        let (hash, mtime) = cache.get(path, "xxh128", 0, WHOLE_FILE_RANGE_END).unwrap();
151        assert_eq!(hash, "abc123");
152        assert_eq!(mtime, 1000);
153    }
154
155    #[test]
156    fn get_if_fresh_matching_mtime() {
157        let (_tmp, cache) = make_cache();
158        let path = Path::new("/tmp/test.txt");
159        cache
160            .put(path, "xxh128", 0, WHOLE_FILE_RANGE_END, "abc123", 1000)
161            .unwrap();
162        let hash = cache
163            .get_if_fresh(path, "xxh128", 0, WHOLE_FILE_RANGE_END, 1000)
164            .unwrap();
165        assert_eq!(hash, "abc123");
166    }
167
168    #[test]
169    fn get_if_fresh_mismatched_mtime() {
170        let (_tmp, cache) = make_cache();
171        let path = Path::new("/tmp/test.txt");
172        cache
173            .put(path, "xxh128", 0, WHOLE_FILE_RANGE_END, "abc123", 1000)
174            .unwrap();
175        assert!(cache
176            .get_if_fresh(path, "xxh128", 0, WHOLE_FILE_RANGE_END, 2000)
177            .is_none());
178    }
179
180    #[test]
181    fn missing_entry_returns_none() {
182        let (_tmp, cache) = make_cache();
183        assert!(cache
184            .get(
185                Path::new("/no/such/file"),
186                "xxh128",
187                0,
188                WHOLE_FILE_RANGE_END
189            )
190            .is_none());
191    }
192
193    #[test]
194    fn whole_file_and_range_coexist() {
195        let (_tmp, cache) = make_cache();
196        let path = Path::new("/tmp/big.bin");
197        cache
198            .put(path, "xxh128", 0, WHOLE_FILE_RANGE_END, "whole_hash", 500)
199            .unwrap();
200        cache
201            .put(path, "xxh128", 0, 1024, "chunk_hash", 500)
202            .unwrap();
203
204        let (h1, _) = cache.get(path, "xxh128", 0, WHOLE_FILE_RANGE_END).unwrap();
205        let (h2, _) = cache.get(path, "xxh128", 0, 1024).unwrap();
206        assert_eq!(h1, "whole_hash");
207        assert_eq!(h2, "chunk_hash");
208    }
209}