kardo_core/scanner/
cache.rs1use sha2::{Digest, Sha256};
7use std::collections::HashMap;
8use std::time::Instant;
9
10#[derive(Debug, Clone)]
12pub struct ScanResult {
13 pub files_scanned: usize,
14 pub files_skipped: usize,
15 pub duration_ms: u64,
16 pub score: Option<f64>,
17}
18
19pub struct ContentCache {
21 hashes: HashMap<String, String>,
22}
23
24impl ContentCache {
25 pub fn new() -> Self {
26 Self {
27 hashes: HashMap::new(),
28 }
29 }
30
31 pub fn from_entries(entries: Vec<(String, String)>) -> Self {
33 Self {
34 hashes: entries.into_iter().collect(),
35 }
36 }
37
38 pub fn compute_hash(content: &[u8]) -> String {
40 let mut hasher = Sha256::new();
41 hasher.update(content);
42 format!("{:x}", hasher.finalize())
43 }
44
45 pub fn needs_processing(&self, relative_path: &str, content: &[u8]) -> bool {
48 let new_hash = Self::compute_hash(content);
49 match self.hashes.get(relative_path) {
50 Some(stored) => stored != &new_hash,
51 None => true, }
53 }
54
55 pub fn update(&mut self, relative_path: String, content: &[u8]) {
57 let hash = Self::compute_hash(content);
58 self.hashes.insert(relative_path, hash);
59 }
60
61 pub fn entries(&self) -> Vec<(&str, &str)> {
63 self.hashes
64 .iter()
65 .map(|(k, v)| (k.as_str(), v.as_str()))
66 .collect()
67 }
68
69 pub fn process_batch<F>(
76 &mut self,
77 files: &[(String, Vec<u8>)],
78 mut processor: F,
79 ) -> ScanResult
80 where
81 F: FnMut(&str, &[u8]),
82 {
83 let start = Instant::now();
84 let mut scanned = 0;
85 let mut skipped = 0;
86
87 for (path, content) in files {
88 if self.needs_processing(path, content) {
89 processor(path, content);
90 self.update(path.clone(), content);
91 scanned += 1;
92 } else {
93 skipped += 1;
94 }
95 }
96
97 ScanResult {
98 files_scanned: scanned,
99 files_skipped: skipped,
100 duration_ms: start.elapsed().as_millis() as u64,
101 score: None,
102 }
103 }
104}
105
106impl Default for ContentCache {
107 fn default() -> Self {
108 Self::new()
109 }
110}
111
112#[cfg(test)]
113mod tests {
114 use super::*;
115
116 #[test]
117 fn test_compute_hash() {
118 let hash = ContentCache::compute_hash(b"hello world");
119 assert!(!hash.is_empty());
120 assert_eq!(
122 hash,
123 "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
124 );
125 }
126
127 #[test]
128 fn test_needs_processing_new_file() {
129 let cache = ContentCache::new();
130 assert!(cache.needs_processing("new.md", b"content"));
131 }
132
133 #[test]
134 fn test_needs_processing_unchanged() {
135 let mut cache = ContentCache::new();
136 cache.update("file.md".to_string(), b"content");
137 assert!(!cache.needs_processing("file.md", b"content"));
138 }
139
140 #[test]
141 fn test_needs_processing_changed() {
142 let mut cache = ContentCache::new();
143 cache.update("file.md".to_string(), b"old content");
144 assert!(cache.needs_processing("file.md", b"new content"));
145 }
146
147 #[test]
148 fn test_process_batch_skips_unchanged() {
149 let mut cache = ContentCache::new();
150 cache.update("unchanged.md".to_string(), b"same");
151
152 let files = vec![
153 ("unchanged.md".to_string(), b"same".to_vec()),
154 ("new.md".to_string(), b"new content".to_vec()),
155 ];
156
157 let mut processed = Vec::new();
158 let result = cache.process_batch(&files, |path, _content| {
159 processed.push(path.to_string());
160 });
161
162 assert_eq!(result.files_scanned, 1);
163 assert_eq!(result.files_skipped, 1);
164 assert_eq!(processed, vec!["new.md"]);
165 }
166
167 #[test]
168 fn test_process_batch_all_new() {
169 let mut cache = ContentCache::new();
170 let files = vec![
171 ("a.md".to_string(), b"aaa".to_vec()),
172 ("b.md".to_string(), b"bbb".to_vec()),
173 ];
174
175 let result = cache.process_batch(&files, |_path, _content| {});
176 assert_eq!(result.files_scanned, 2);
177 assert_eq!(result.files_skipped, 0);
178 }
179
180 #[test]
181 fn test_from_entries() {
182 let hash = ContentCache::compute_hash(b"content");
183 let cache = ContentCache::from_entries(vec![("file.md".to_string(), hash)]);
184 assert!(!cache.needs_processing("file.md", b"content"));
185 }
186
187 #[test]
188 fn test_entries_roundtrip() {
189 let mut cache = ContentCache::new();
190 cache.update("a.md".to_string(), b"aaa");
191 cache.update("b.md".to_string(), b"bbb");
192
193 let entries = cache.entries();
194 assert_eq!(entries.len(), 2);
195 }
196}