Skip to main content

mdql_core/
checksums.rs

1//! Sidecar checksum file for tamper detection.
2
3use std::collections::BTreeMap;
4use std::path::Path;
5
6use serde::{Deserialize, Serialize};
7
8use crate::schema::MDQL_FILENAME;
9
10const CHECKSUMS_FILENAME: &str = "_checksums.json";
11
12#[derive(Debug, Serialize, Deserialize)]
13pub struct ChecksumFile {
14    pub algorithm: String,
15    pub files: BTreeMap<String, String>,
16}
17
18impl ChecksumFile {
19    fn new() -> Self {
20        ChecksumFile {
21            algorithm: "xxhash64".to_string(),
22            files: BTreeMap::new(),
23        }
24    }
25}
26
27pub fn hash_content(content: &[u8]) -> String {
28    format!("{:016x}", xxhash_rust::xxh64::xxh64(content, 0))
29}
30
31pub fn load_checksums(table_dir: &Path) -> Option<ChecksumFile> {
32    let path = table_dir.join(CHECKSUMS_FILENAME);
33    let content = std::fs::read_to_string(&path).ok()?;
34    serde_json::from_str(&content).ok()
35}
36
37pub fn save_checksums(table_dir: &Path, checksums: &ChecksumFile) -> crate::errors::Result<()> {
38    let path = table_dir.join(CHECKSUMS_FILENAME);
39    let tmp = path.with_extension("json.tmp");
40    let content = serde_json::to_string_pretty(checksums)
41        .map_err(|e| crate::errors::MdqlError::General(e.to_string()))?;
42    std::fs::write(&tmp, &content)?;
43    std::fs::rename(&tmp, &path)?;
44    Ok(())
45}
46
47pub fn update_checksum(table_dir: &Path, filename: &str, content: &[u8]) -> crate::errors::Result<()> {
48    let mut checksums = load_checksums(table_dir).unwrap_or_else(ChecksumFile::new);
49    checksums.files.insert(filename.to_string(), hash_content(content));
50    save_checksums(table_dir, &checksums)
51}
52
53pub fn remove_checksum(table_dir: &Path, filename: &str) -> crate::errors::Result<()> {
54    if let Some(mut checksums) = load_checksums(table_dir) {
55        checksums.files.remove(filename);
56        save_checksums(table_dir, &checksums)?;
57    }
58    Ok(())
59}
60
61pub fn regenerate_checksums(table_dir: &Path) -> crate::errors::Result<usize> {
62    let mut checksums = ChecksumFile::new();
63    let mut count = 0;
64
65    let mut entries: Vec<_> = std::fs::read_dir(table_dir)?
66        .filter_map(|e| e.ok())
67        .filter(|e| {
68            let name = e.file_name();
69            let name_str = name.to_string_lossy();
70            name_str.ends_with(".md") && name_str != MDQL_FILENAME
71        })
72        .collect();
73    entries.sort_by_key(|e| e.file_name());
74
75    for entry in entries {
76        let content = std::fs::read(entry.path())?;
77        let name = entry.file_name().to_string_lossy().to_string();
78        checksums.files.insert(name, hash_content(&content));
79        count += 1;
80    }
81
82    save_checksums(table_dir, &checksums)?;
83    Ok(count)
84}
85
86pub fn check_file(table_dir: &Path, filename: &str) -> Option<bool> {
87    let checksums = load_checksums(table_dir)?;
88    let expected = checksums.files.get(filename)?;
89    let file_path = table_dir.join(filename);
90    let content = std::fs::read(&file_path).ok()?;
91    Some(hash_content(&content) == *expected)
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97    use tempfile::tempdir;
98    use std::fs;
99
100    #[test]
101    fn test_hash_deterministic() {
102        let h1 = hash_content(b"hello world");
103        let h2 = hash_content(b"hello world");
104        assert_eq!(h1, h2);
105        assert_eq!(h1.len(), 16);
106    }
107
108    #[test]
109    fn test_hash_different_content() {
110        let h1 = hash_content(b"hello");
111        let h2 = hash_content(b"world");
112        assert_ne!(h1, h2);
113    }
114
115    #[test]
116    fn test_roundtrip() {
117        let dir = tempdir().unwrap();
118        fs::write(dir.path().join("test.md"), "---\ntitle: Test\n---\n# Test\n").unwrap();
119
120        update_checksum(dir.path(), "test.md", b"---\ntitle: Test\n---\n# Test\n").unwrap();
121
122        let checksums = load_checksums(dir.path()).unwrap();
123        assert_eq!(checksums.algorithm, "xxhash64");
124        assert_eq!(checksums.files.len(), 1);
125        assert!(checksums.files.contains_key("test.md"));
126    }
127
128    #[test]
129    fn test_check_file_match() {
130        let dir = tempdir().unwrap();
131        let content = b"---\ntitle: Test\n---\n# Test\n";
132        fs::write(dir.path().join("test.md"), content).unwrap();
133        update_checksum(dir.path(), "test.md", content).unwrap();
134
135        assert_eq!(check_file(dir.path(), "test.md"), Some(true));
136    }
137
138    #[test]
139    fn test_check_file_tampered() {
140        let dir = tempdir().unwrap();
141        let content = b"---\ntitle: Test\n---\n# Test\n";
142        fs::write(dir.path().join("test.md"), content).unwrap();
143        update_checksum(dir.path(), "test.md", content).unwrap();
144
145        fs::write(dir.path().join("test.md"), "---\ntitle: Changed\n---\n# Changed\n").unwrap();
146        assert_eq!(check_file(dir.path(), "test.md"), Some(false));
147    }
148
149    #[test]
150    fn test_check_file_no_checksums() {
151        let dir = tempdir().unwrap();
152        fs::write(dir.path().join("test.md"), "content").unwrap();
153        assert_eq!(check_file(dir.path(), "test.md"), None);
154    }
155
156    #[test]
157    fn test_remove_checksum() {
158        let dir = tempdir().unwrap();
159        update_checksum(dir.path(), "a.md", b"a").unwrap();
160        update_checksum(dir.path(), "b.md", b"b").unwrap();
161
162        let checksums = load_checksums(dir.path()).unwrap();
163        assert_eq!(checksums.files.len(), 2);
164
165        remove_checksum(dir.path(), "a.md").unwrap();
166        let checksums = load_checksums(dir.path()).unwrap();
167        assert_eq!(checksums.files.len(), 1);
168        assert!(!checksums.files.contains_key("a.md"));
169    }
170
171    #[test]
172    fn test_regenerate() {
173        let dir = tempdir().unwrap();
174        fs::write(dir.path().join("_mdql.md"), "schema").unwrap();
175        fs::write(dir.path().join("a.md"), "aaa").unwrap();
176        fs::write(dir.path().join("b.md"), "bbb").unwrap();
177
178        let count = regenerate_checksums(dir.path()).unwrap();
179        assert_eq!(count, 2);
180
181        let checksums = load_checksums(dir.path()).unwrap();
182        assert_eq!(checksums.files.len(), 2);
183        assert!(checksums.files.contains_key("a.md"));
184        assert!(checksums.files.contains_key("b.md"));
185        assert!(!checksums.files.contains_key("_mdql.md"));
186    }
187}