Skip to main content

aaai_core/diff/
engine.rs

1//! Folder diff engine — Phase 4: parallel processing + binary detection + diff stats.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::{Path, PathBuf};
5
6use rayon::prelude::*;
7use sha2::{Digest, Sha256};
8use walkdir::WalkDir;
9
10use super::entry::{DiffEntry, DiffStats, DiffType};
11use super::ignore::IgnoreRules;
12use super::progress::{DiffProgress, NullProgress, ProgressSink};
13
14pub struct DiffEngine;
15
16impl DiffEngine {
17    /// Compare two directory trees (sequential — for small trees).
18    pub fn compare(before_root: &Path, after_root: &Path) -> anyhow::Result<Vec<DiffEntry>> {
19        Self::compare_with_ignore(before_root, after_root, &IgnoreRules::default())
20    }
21
22    /// Compare with ignore rules.
23    /// Uses parallel processing for the per-file comparison step.
24    pub fn compare_with_ignore(
25        before_root: &Path,
26        after_root: &Path,
27        ignore: &IgnoreRules,
28    ) -> anyhow::Result<Vec<DiffEntry>> {
29        Self::compare_with_progress(before_root, after_root, ignore, &NullProgress)
30    }
31
32    /// Compare with ignore rules and a progress sink.
33    pub fn compare_with_progress(
34        before_root: &Path,
35        after_root: &Path,
36        ignore: &IgnoreRules,
37        progress: &dyn ProgressSink,
38    ) -> anyhow::Result<Vec<DiffEntry>> {
39        let before_map = collect_paths(before_root)?;
40        let after_map  = collect_paths(after_root)?;
41
42        let all_paths: BTreeSet<String> = before_map.keys()
43            .chain(after_map.keys())
44            .cloned()
45            .collect();
46
47        // Filter ignored paths eagerly.
48        let paths_to_compare: Vec<String> = all_paths
49            .into_iter()
50            .filter(|p| !ignore.is_ignored(p))
51            .collect();
52
53        let total = paths_to_compare.len();
54        progress.emit(DiffProgress::Started { total });
55
56        // ── Parallel per-file comparison ───────────────────────────────────
57        use std::sync::atomic::{AtomicUsize, Ordering};
58        let processed = AtomicUsize::new(0);
59
60        let mut entries: Vec<DiffEntry> = paths_to_compare
61            .into_par_iter()
62            .map(|rel_path| {
63                let diff_entry = match (before_map.get(&rel_path), after_map.get(&rel_path)) {
64                    (None,    Some(a)) => build_added(rel_path, a),
65                    (Some(b), None)    => build_removed(rel_path, b),
66                    (Some(b), Some(a)) => build_compared(rel_path, b, a),
67                    (None,    None)    => unreachable!(),
68                };
69                let n = processed.fetch_add(1, Ordering::Relaxed) + 1;
70                progress.emit(DiffProgress::File {
71                    path: diff_entry.path.clone(), processed: n, total,
72                });
73                diff_entry
74            })
75            .collect();
76
77        // Restore deterministic sort (parallel iter may reorder).
78        progress.emit(DiffProgress::Sorting);
79        entries.sort_by(|a, b| a.path.cmp(&b.path));
80        progress.emit(DiffProgress::Done { total_files: entries.len() });
81        Ok(entries)
82    }
83}
84
85// ── Path collection ───────────────────────────────────────────────────────
86
87fn collect_paths(root: &Path) -> anyhow::Result<BTreeMap<String, PathBuf>> {
88    if !root.is_dir() {
89        anyhow::bail!("Not a directory: {}", root.display());
90    }
91    let mut map = BTreeMap::new();
92    for entry in WalkDir::new(root).into_iter() {
93        let entry = entry.map_err(|e| anyhow::anyhow!("Walk error: {e}"))?;
94        if entry.path() == root { continue; }
95        let rel = entry.path()
96            .strip_prefix(root).unwrap()
97            .to_string_lossy()
98            .replace('\\', "/");
99        map.insert(rel, entry.path().to_path_buf());
100    }
101    Ok(map)
102}
103
104// ── Per-file builders ─────────────────────────────────────────────────────
105
106fn build_added(rel: String, after: &Path) -> DiffEntry {
107    if after.is_dir() {
108        return dir_entry(rel, DiffType::Added);
109    }
110    let (bytes, sha, size, error) = read_file(after);
111    let (text, is_binary) = classify_bytes(&bytes);
112    DiffEntry {
113        path: rel, diff_type: DiffType::Added, is_dir: false,
114        before_text: None, after_text: text.clone(),
115        is_binary,
116        before_size: None, after_size: size,
117        before_sha256: None, after_sha256: sha,
118        stats: None, // no before to diff against
119        error_detail: error,
120    }
121}
122
123fn build_removed(rel: String, before: &Path) -> DiffEntry {
124    if before.is_dir() {
125        return dir_entry(rel, DiffType::Removed);
126    }
127    let (bytes, sha, size, error) = read_file(before);
128    let (text, is_binary) = classify_bytes(&bytes);
129    DiffEntry {
130        path: rel, diff_type: DiffType::Removed, is_dir: false,
131        before_text: text, after_text: None,
132        is_binary,
133        before_size: size, after_size: None,
134        before_sha256: sha, after_sha256: None,
135        stats: None,
136        error_detail: error,
137    }
138}
139
140fn build_compared(rel: String, before: &Path, after: &Path) -> DiffEntry {
141    if before.is_dir() != after.is_dir() {
142        return DiffEntry {
143            path: rel, diff_type: DiffType::TypeChanged, is_dir: false,
144            before_text: None, after_text: None,
145            is_binary: false,
146            before_size: None, after_size: None,
147            before_sha256: None, after_sha256: None,
148            stats: None,
149            error_detail: Some("Path kind changed (file ↔ directory).".into()),
150        };
151    }
152    if before.is_dir() {
153        return dir_entry(rel, DiffType::Unchanged);
154    }
155
156    let (before_bytes, before_sha, before_size, before_err) = read_file(before);
157    if let Some(e) = before_err {
158        return unreadable(rel, format!("Cannot read before-file: {e}"));
159    }
160    let (after_bytes, after_sha, after_size, after_err) = read_file(after);
161    if let Some(e) = after_err {
162        return unreadable(rel, format!("Cannot read after-file: {e}"));
163    }
164
165    let diff_type = if before_bytes == after_bytes { DiffType::Unchanged } else { DiffType::Modified };
166
167    let (before_text, before_is_binary) = classify_bytes(&before_bytes);
168    let (after_text,  after_is_binary)  = classify_bytes(&after_bytes);
169    let is_binary = before_is_binary || after_is_binary;
170
171    // Compute line stats for text-Modified files.
172    let stats = if diff_type == DiffType::Modified && !is_binary {
173        let bt = before_text.as_deref().unwrap_or("");
174        let at = after_text.as_deref().unwrap_or("");
175        Some(DiffStats::compute(bt, at))
176    } else {
177        None
178    };
179
180    DiffEntry {
181        path: rel, diff_type, is_dir: false,
182        before_text, after_text,
183        is_binary,
184        before_size, after_size,
185        before_sha256: before_sha, after_sha256: after_sha,
186        stats,
187        error_detail: None,
188    }
189}
190
191// ── Helpers ───────────────────────────────────────────────────────────────
192
193fn dir_entry(rel: String, diff_type: DiffType) -> DiffEntry {
194    DiffEntry {
195        path: rel, diff_type, is_dir: true,
196        before_text: None, after_text: None,
197        is_binary: false,
198        before_size: None, after_size: None,
199        before_sha256: None, after_sha256: None,
200        stats: None, error_detail: None,
201    }
202}
203
204fn unreadable(rel: String, detail: String) -> DiffEntry {
205    DiffEntry {
206        path: rel, diff_type: DiffType::Unreadable, is_dir: false,
207        before_text: None, after_text: None,
208        is_binary: false,
209        before_size: None, after_size: None,
210        before_sha256: None, after_sha256: None,
211        stats: None, error_detail: Some(detail),
212    }
213}
214
215/// Read a file returning (bytes, sha256_hex, size_bytes, error).
216fn read_file(path: &Path) -> (Vec<u8>, Option<String>, Option<u64>, Option<String>) {
217    match std::fs::read(path) {
218        Ok(bytes) => {
219            let sha  = hex::encode(Sha256::digest(&bytes));
220            let size = bytes.len() as u64;
221            (bytes, Some(sha), Some(size), None)
222        }
223        Err(e) => (Vec::new(), None, None, Some(e.to_string())),
224    }
225}
226
227/// Classify bytes as text or binary.
228/// Returns (text_content, is_binary).
229fn classify_bytes(bytes: &[u8]) -> (Option<String>, bool) {
230    if bytes.is_empty() {
231        return (Some(String::new()), false);
232    }
233    // Heuristic: if any of the first 8 KB contains a null byte, treat as binary.
234    let sample = &bytes[..bytes.len().min(8192)];
235    if sample.contains(&0u8) {
236        return (None, true);
237    }
238    match String::from_utf8(bytes.to_vec()) {
239        Ok(text) => (Some(text), false),
240        Err(_)   => (None, true),
241    }
242}