Skip to main content

aaai_core/diff/
engine.rs

1//! Folder diff engine — Phase 4: parallel processing + binary detection + diff stats.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::{Path, PathBuf};
5
6use rayon::prelude::*;
7use sha2::{Digest, Sha256};
8use walkdir::WalkDir;
9
10use super::entry::{DiffEntry, DiffStats, DiffType};
11use super::ignore::IgnoreRules;
12
13pub struct DiffEngine;
14
15impl DiffEngine {
16    /// Compare two directory trees (sequential — for small trees).
17    pub fn compare(before_root: &Path, after_root: &Path) -> anyhow::Result<Vec<DiffEntry>> {
18        Self::compare_with_ignore(before_root, after_root, &IgnoreRules::default())
19    }
20
21    /// Compare with ignore rules.
22    /// Uses parallel processing for the per-file comparison step.
23    pub fn compare_with_ignore(
24        before_root: &Path,
25        after_root: &Path,
26        ignore: &IgnoreRules,
27    ) -> anyhow::Result<Vec<DiffEntry>> {
28        let before_map = collect_paths(before_root)?;
29        let after_map  = collect_paths(after_root)?;
30
31        let all_paths: BTreeSet<String> = before_map.keys()
32            .chain(after_map.keys())
33            .cloned()
34            .collect();
35
36        // Filter ignored paths eagerly.
37        let paths_to_compare: Vec<String> = all_paths
38            .into_iter()
39            .filter(|p| !ignore.is_ignored(p))
40            .collect();
41
42        // ── Parallel per-file comparison ───────────────────────────────────
43        let mut entries: Vec<DiffEntry> = paths_to_compare
44            .into_par_iter()
45            .map(|rel_path| {
46                match (before_map.get(&rel_path), after_map.get(&rel_path)) {
47                    (None,    Some(a)) => build_added(rel_path, a),
48                    (Some(b), None)    => build_removed(rel_path, b),
49                    (Some(b), Some(a)) => build_compared(rel_path, b, a),
50                    (None,    None)    => unreachable!(),
51                }
52            })
53            .collect();
54
55        // Restore deterministic sort (parallel iter may reorder).
56        entries.sort_by(|a, b| a.path.cmp(&b.path));
57        Ok(entries)
58    }
59}
60
61// ── Path collection ───────────────────────────────────────────────────────
62
63fn collect_paths(root: &Path) -> anyhow::Result<BTreeMap<String, PathBuf>> {
64    if !root.is_dir() {
65        anyhow::bail!("Not a directory: {}", root.display());
66    }
67    let mut map = BTreeMap::new();
68    for entry in WalkDir::new(root).into_iter() {
69        let entry = entry.map_err(|e| anyhow::anyhow!("Walk error: {e}"))?;
70        if entry.path() == root { continue; }
71        let rel = entry.path()
72            .strip_prefix(root).unwrap()
73            .to_string_lossy()
74            .replace('\\', "/");
75        map.insert(rel, entry.path().to_path_buf());
76    }
77    Ok(map)
78}
79
80// ── Per-file builders ─────────────────────────────────────────────────────
81
82fn build_added(rel: String, after: &Path) -> DiffEntry {
83    if after.is_dir() {
84        return dir_entry(rel, DiffType::Added);
85    }
86    let (bytes, sha, size, error) = read_file(after);
87    let (text, is_binary) = classify_bytes(&bytes);
88    DiffEntry {
89        path: rel, diff_type: DiffType::Added, is_dir: false,
90        before_text: None, after_text: text.clone(),
91        is_binary,
92        before_size: None, after_size: size,
93        before_sha256: None, after_sha256: sha,
94        stats: None, // no before to diff against
95        error_detail: error,
96    }
97}
98
99fn build_removed(rel: String, before: &Path) -> DiffEntry {
100    if before.is_dir() {
101        return dir_entry(rel, DiffType::Removed);
102    }
103    let (bytes, sha, size, error) = read_file(before);
104    let (text, is_binary) = classify_bytes(&bytes);
105    DiffEntry {
106        path: rel, diff_type: DiffType::Removed, is_dir: false,
107        before_text: text, after_text: None,
108        is_binary,
109        before_size: size, after_size: None,
110        before_sha256: sha, after_sha256: None,
111        stats: None,
112        error_detail: error,
113    }
114}
115
116fn build_compared(rel: String, before: &Path, after: &Path) -> DiffEntry {
117    if before.is_dir() != after.is_dir() {
118        return DiffEntry {
119            path: rel, diff_type: DiffType::TypeChanged, is_dir: false,
120            before_text: None, after_text: None,
121            is_binary: false,
122            before_size: None, after_size: None,
123            before_sha256: None, after_sha256: None,
124            stats: None,
125            error_detail: Some("Path kind changed (file ↔ directory).".into()),
126        };
127    }
128    if before.is_dir() {
129        return dir_entry(rel, DiffType::Unchanged);
130    }
131
132    let (before_bytes, before_sha, before_size, before_err) = read_file(before);
133    if let Some(e) = before_err {
134        return unreadable(rel, format!("Cannot read before-file: {e}"));
135    }
136    let (after_bytes, after_sha, after_size, after_err) = read_file(after);
137    if let Some(e) = after_err {
138        return unreadable(rel, format!("Cannot read after-file: {e}"));
139    }
140
141    let diff_type = if before_bytes == after_bytes { DiffType::Unchanged } else { DiffType::Modified };
142
143    let (before_text, before_is_binary) = classify_bytes(&before_bytes);
144    let (after_text,  after_is_binary)  = classify_bytes(&after_bytes);
145    let is_binary = before_is_binary || after_is_binary;
146
147    // Compute line stats for text-Modified files.
148    let stats = if diff_type == DiffType::Modified && !is_binary {
149        let bt = before_text.as_deref().unwrap_or("");
150        let at = after_text.as_deref().unwrap_or("");
151        Some(DiffStats::compute(bt, at))
152    } else {
153        None
154    };
155
156    DiffEntry {
157        path: rel, diff_type, is_dir: false,
158        before_text, after_text,
159        is_binary,
160        before_size, after_size,
161        before_sha256: before_sha, after_sha256: after_sha,
162        stats,
163        error_detail: None,
164    }
165}
166
167// ── Helpers ───────────────────────────────────────────────────────────────
168
169fn dir_entry(rel: String, diff_type: DiffType) -> DiffEntry {
170    DiffEntry {
171        path: rel, diff_type, is_dir: true,
172        before_text: None, after_text: None,
173        is_binary: false,
174        before_size: None, after_size: None,
175        before_sha256: None, after_sha256: None,
176        stats: None, error_detail: None,
177    }
178}
179
180fn unreadable(rel: String, detail: String) -> DiffEntry {
181    DiffEntry {
182        path: rel, diff_type: DiffType::Unreadable, is_dir: false,
183        before_text: None, after_text: None,
184        is_binary: false,
185        before_size: None, after_size: None,
186        before_sha256: None, after_sha256: None,
187        stats: None, error_detail: Some(detail),
188    }
189}
190
191/// Read a file returning (bytes, sha256_hex, size_bytes, error).
192fn read_file(path: &Path) -> (Vec<u8>, Option<String>, Option<u64>, Option<String>) {
193    match std::fs::read(path) {
194        Ok(bytes) => {
195            let sha  = hex::encode(Sha256::digest(&bytes));
196            let size = bytes.len() as u64;
197            (bytes, Some(sha), Some(size), None)
198        }
199        Err(e) => (Vec::new(), None, None, Some(e.to_string())),
200    }
201}
202
203/// Classify bytes as text or binary.
204/// Returns (text_content, is_binary).
205fn classify_bytes(bytes: &[u8]) -> (Option<String>, bool) {
206    if bytes.is_empty() {
207        return (Some(String::new()), false);
208    }
209    // Heuristic: if any of the first 8 KB contains a null byte, treat as binary.
210    let sample = &bytes[..bytes.len().min(8192)];
211    if sample.contains(&0u8) {
212        return (None, true);
213    }
214    match String::from_utf8(bytes.to_vec()) {
215        Ok(text) => (Some(text), false),
216        Err(_)   => (None, true),
217    }
218}