1use std::collections::BTreeMap;
4use std::path::Path;
5
6use serde::{Deserialize, Serialize};
7
8use crate::schema::MDQL_FILENAME;
9
10const CHECKSUMS_FILENAME: &str = "_checksums.json";
11
12#[derive(Debug, Serialize, Deserialize)]
13pub struct ChecksumFile {
14 pub algorithm: String,
15 pub files: BTreeMap<String, String>,
16}
17
18impl ChecksumFile {
19 fn new() -> Self {
20 ChecksumFile {
21 algorithm: "xxhash64".to_string(),
22 files: BTreeMap::new(),
23 }
24 }
25}
26
27pub fn hash_content(content: &[u8]) -> String {
28 format!("{:016x}", xxhash_rust::xxh64::xxh64(content, 0))
29}
30
31pub fn load_checksums(table_dir: &Path) -> Option<ChecksumFile> {
32 let path = table_dir.join(CHECKSUMS_FILENAME);
33 let content = std::fs::read_to_string(&path).ok()?;
34 serde_json::from_str(&content).ok()
35}
36
37pub fn save_checksums(table_dir: &Path, checksums: &ChecksumFile) -> crate::errors::Result<()> {
38 let path = table_dir.join(CHECKSUMS_FILENAME);
39 let tmp = path.with_extension("json.tmp");
40 let content = serde_json::to_string_pretty(checksums)
41 .map_err(|e| crate::errors::MdqlError::General(e.to_string()))?;
42 std::fs::write(&tmp, &content)?;
43 std::fs::rename(&tmp, &path)?;
44 Ok(())
45}
46
47pub fn update_checksum(table_dir: &Path, filename: &str, content: &[u8]) -> crate::errors::Result<()> {
48 let mut checksums = load_checksums(table_dir).unwrap_or_else(ChecksumFile::new);
49 checksums.files.insert(filename.to_string(), hash_content(content));
50 save_checksums(table_dir, &checksums)
51}
52
53pub fn remove_checksum(table_dir: &Path, filename: &str) -> crate::errors::Result<()> {
54 if let Some(mut checksums) = load_checksums(table_dir) {
55 checksums.files.remove(filename);
56 save_checksums(table_dir, &checksums)?;
57 }
58 Ok(())
59}
60
61pub fn regenerate_checksums(table_dir: &Path) -> crate::errors::Result<usize> {
62 let mut checksums = ChecksumFile::new();
63 let mut count = 0;
64
65 let mut entries: Vec<_> = std::fs::read_dir(table_dir)?
66 .filter_map(|e| e.ok())
67 .filter(|e| {
68 let name = e.file_name();
69 let name_str = name.to_string_lossy();
70 name_str.ends_with(".md") && name_str != MDQL_FILENAME
71 })
72 .collect();
73 entries.sort_by_key(|e| e.file_name());
74
75 for entry in entries {
76 let content = std::fs::read(entry.path())?;
77 let name = entry.file_name().to_string_lossy().to_string();
78 checksums.files.insert(name, hash_content(&content));
79 count += 1;
80 }
81
82 save_checksums(table_dir, &checksums)?;
83 Ok(count)
84}
85
86pub fn check_file(table_dir: &Path, filename: &str) -> Option<bool> {
87 let checksums = load_checksums(table_dir)?;
88 let expected = checksums.files.get(filename)?;
89 let file_path = table_dir.join(filename);
90 let content = std::fs::read(&file_path).ok()?;
91 Some(hash_content(&content) == *expected)
92}
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97 use tempfile::tempdir;
98 use std::fs;
99
100 #[test]
101 fn test_hash_deterministic() {
102 let h1 = hash_content(b"hello world");
103 let h2 = hash_content(b"hello world");
104 assert_eq!(h1, h2);
105 assert_eq!(h1.len(), 16);
106 }
107
108 #[test]
109 fn test_hash_different_content() {
110 let h1 = hash_content(b"hello");
111 let h2 = hash_content(b"world");
112 assert_ne!(h1, h2);
113 }
114
115 #[test]
116 fn test_roundtrip() {
117 let dir = tempdir().unwrap();
118 fs::write(dir.path().join("test.md"), "---\ntitle: Test\n---\n# Test\n").unwrap();
119
120 update_checksum(dir.path(), "test.md", b"---\ntitle: Test\n---\n# Test\n").unwrap();
121
122 let checksums = load_checksums(dir.path()).unwrap();
123 assert_eq!(checksums.algorithm, "xxhash64");
124 assert_eq!(checksums.files.len(), 1);
125 assert!(checksums.files.contains_key("test.md"));
126 }
127
128 #[test]
129 fn test_check_file_match() {
130 let dir = tempdir().unwrap();
131 let content = b"---\ntitle: Test\n---\n# Test\n";
132 fs::write(dir.path().join("test.md"), content).unwrap();
133 update_checksum(dir.path(), "test.md", content).unwrap();
134
135 assert_eq!(check_file(dir.path(), "test.md"), Some(true));
136 }
137
138 #[test]
139 fn test_check_file_tampered() {
140 let dir = tempdir().unwrap();
141 let content = b"---\ntitle: Test\n---\n# Test\n";
142 fs::write(dir.path().join("test.md"), content).unwrap();
143 update_checksum(dir.path(), "test.md", content).unwrap();
144
145 fs::write(dir.path().join("test.md"), "---\ntitle: Changed\n---\n# Changed\n").unwrap();
146 assert_eq!(check_file(dir.path(), "test.md"), Some(false));
147 }
148
149 #[test]
150 fn test_check_file_no_checksums() {
151 let dir = tempdir().unwrap();
152 fs::write(dir.path().join("test.md"), "content").unwrap();
153 assert_eq!(check_file(dir.path(), "test.md"), None);
154 }
155
156 #[test]
157 fn test_remove_checksum() {
158 let dir = tempdir().unwrap();
159 update_checksum(dir.path(), "a.md", b"a").unwrap();
160 update_checksum(dir.path(), "b.md", b"b").unwrap();
161
162 let checksums = load_checksums(dir.path()).unwrap();
163 assert_eq!(checksums.files.len(), 2);
164
165 remove_checksum(dir.path(), "a.md").unwrap();
166 let checksums = load_checksums(dir.path()).unwrap();
167 assert_eq!(checksums.files.len(), 1);
168 assert!(!checksums.files.contains_key("a.md"));
169 }
170
171 #[test]
172 fn test_regenerate() {
173 let dir = tempdir().unwrap();
174 fs::write(dir.path().join("_mdql.md"), "schema").unwrap();
175 fs::write(dir.path().join("a.md"), "aaa").unwrap();
176 fs::write(dir.path().join("b.md"), "bbb").unwrap();
177
178 let count = regenerate_checksums(dir.path()).unwrap();
179 assert_eq!(count, 2);
180
181 let checksums = load_checksums(dir.path()).unwrap();
182 assert_eq!(checksums.files.len(), 2);
183 assert!(checksums.files.contains_key("a.md"));
184 assert!(checksums.files.contains_key("b.md"));
185 assert!(!checksums.files.contains_key("_mdql.md"));
186 }
187}