1use std::collections::{BTreeMap, BTreeSet};
4use std::path::{Path, PathBuf};
5
6use rayon::prelude::*;
7use sha2::{Digest, Sha256};
8use walkdir::WalkDir;
9
10use super::entry::{DiffEntry, DiffStats, DiffType};
11use super::ignore::IgnoreRules;
12use super::progress::{DiffProgress, NullProgress, ProgressSink};
13
14pub struct DiffEngine;
15
16impl DiffEngine {
17 pub fn compare(before_root: &Path, after_root: &Path) -> anyhow::Result<Vec<DiffEntry>> {
19 Self::compare_with_ignore(before_root, after_root, &IgnoreRules::default())
20 }
21
22 pub fn compare_with_ignore(
25 before_root: &Path,
26 after_root: &Path,
27 ignore: &IgnoreRules,
28 ) -> anyhow::Result<Vec<DiffEntry>> {
29 Self::compare_with_progress(before_root, after_root, ignore, &NullProgress)
30 }
31
32 pub fn compare_with_progress(
34 before_root: &Path,
35 after_root: &Path,
36 ignore: &IgnoreRules,
37 progress: &dyn ProgressSink,
38 ) -> anyhow::Result<Vec<DiffEntry>> {
39 let before_map = collect_paths(before_root)?;
40 let after_map = collect_paths(after_root)?;
41
42 let all_paths: BTreeSet<String> = before_map.keys()
43 .chain(after_map.keys())
44 .cloned()
45 .collect();
46
47 let paths_to_compare: Vec<String> = all_paths
49 .into_iter()
50 .filter(|p| !ignore.is_ignored(p))
51 .collect();
52
53 let total = paths_to_compare.len();
54 progress.emit(DiffProgress::Started { total });
55
56 use std::sync::atomic::{AtomicUsize, Ordering};
58 let processed = AtomicUsize::new(0);
59
60 let mut entries: Vec<DiffEntry> = paths_to_compare
61 .into_par_iter()
62 .map(|rel_path| {
63 let diff_entry = match (before_map.get(&rel_path), after_map.get(&rel_path)) {
64 (None, Some(a)) => build_added(rel_path, a),
65 (Some(b), None) => build_removed(rel_path, b),
66 (Some(b), Some(a)) => build_compared(rel_path, b, a),
67 (None, None) => unreachable!(),
68 };
69 let n = processed.fetch_add(1, Ordering::Relaxed) + 1;
70 progress.emit(DiffProgress::File {
71 path: diff_entry.path.clone(), processed: n, total,
72 });
73 diff_entry
74 })
75 .collect();
76
77 progress.emit(DiffProgress::Sorting);
79 entries.sort_by(|a, b| a.path.cmp(&b.path));
80 progress.emit(DiffProgress::Done { total_files: entries.len() });
81 Ok(entries)
82 }
83}
84
85fn collect_paths(root: &Path) -> anyhow::Result<BTreeMap<String, PathBuf>> {
88 if !root.is_dir() {
89 anyhow::bail!("Not a directory: {}", root.display());
90 }
91 let mut map = BTreeMap::new();
92 for entry in WalkDir::new(root).into_iter() {
93 let entry = entry.map_err(|e| anyhow::anyhow!("Walk error: {e}"))?;
94 if entry.path() == root { continue; }
95 let rel = entry.path()
96 .strip_prefix(root).unwrap()
97 .to_string_lossy()
98 .replace('\\', "/");
99 map.insert(rel, entry.path().to_path_buf());
100 }
101 Ok(map)
102}
103
104fn build_added(rel: String, after: &Path) -> DiffEntry {
107 if after.is_dir() {
108 return dir_entry(rel, DiffType::Added);
109 }
110 let (bytes, sha, size, error) = read_file(after);
111 let (text, is_binary) = classify_bytes(&bytes);
112 DiffEntry {
113 path: rel, diff_type: DiffType::Added, is_dir: false,
114 before_text: None, after_text: text.clone(),
115 is_binary,
116 before_size: None, after_size: size,
117 before_sha256: None, after_sha256: sha,
118 stats: None, error_detail: error,
120 }
121}
122
123fn build_removed(rel: String, before: &Path) -> DiffEntry {
124 if before.is_dir() {
125 return dir_entry(rel, DiffType::Removed);
126 }
127 let (bytes, sha, size, error) = read_file(before);
128 let (text, is_binary) = classify_bytes(&bytes);
129 DiffEntry {
130 path: rel, diff_type: DiffType::Removed, is_dir: false,
131 before_text: text, after_text: None,
132 is_binary,
133 before_size: size, after_size: None,
134 before_sha256: sha, after_sha256: None,
135 stats: None,
136 error_detail: error,
137 }
138}
139
140fn build_compared(rel: String, before: &Path, after: &Path) -> DiffEntry {
141 if before.is_dir() != after.is_dir() {
142 return DiffEntry {
143 path: rel, diff_type: DiffType::TypeChanged, is_dir: false,
144 before_text: None, after_text: None,
145 is_binary: false,
146 before_size: None, after_size: None,
147 before_sha256: None, after_sha256: None,
148 stats: None,
149 error_detail: Some("Path kind changed (file ↔ directory).".into()),
150 };
151 }
152 if before.is_dir() {
153 return dir_entry(rel, DiffType::Unchanged);
154 }
155
156 let (before_bytes, before_sha, before_size, before_err) = read_file(before);
157 if let Some(e) = before_err {
158 return unreadable(rel, format!("Cannot read before-file: {e}"));
159 }
160 let (after_bytes, after_sha, after_size, after_err) = read_file(after);
161 if let Some(e) = after_err {
162 return unreadable(rel, format!("Cannot read after-file: {e}"));
163 }
164
165 let diff_type = if before_bytes == after_bytes { DiffType::Unchanged } else { DiffType::Modified };
166
167 let (before_text, before_is_binary) = classify_bytes(&before_bytes);
168 let (after_text, after_is_binary) = classify_bytes(&after_bytes);
169 let is_binary = before_is_binary || after_is_binary;
170
171 let stats = if diff_type == DiffType::Modified && !is_binary {
173 let bt = before_text.as_deref().unwrap_or("");
174 let at = after_text.as_deref().unwrap_or("");
175 Some(DiffStats::compute(bt, at))
176 } else {
177 None
178 };
179
180 DiffEntry {
181 path: rel, diff_type, is_dir: false,
182 before_text, after_text,
183 is_binary,
184 before_size, after_size,
185 before_sha256: before_sha, after_sha256: after_sha,
186 stats,
187 error_detail: None,
188 }
189}
190
191fn dir_entry(rel: String, diff_type: DiffType) -> DiffEntry {
194 DiffEntry {
195 path: rel, diff_type, is_dir: true,
196 before_text: None, after_text: None,
197 is_binary: false,
198 before_size: None, after_size: None,
199 before_sha256: None, after_sha256: None,
200 stats: None, error_detail: None,
201 }
202}
203
204fn unreadable(rel: String, detail: String) -> DiffEntry {
205 DiffEntry {
206 path: rel, diff_type: DiffType::Unreadable, is_dir: false,
207 before_text: None, after_text: None,
208 is_binary: false,
209 before_size: None, after_size: None,
210 before_sha256: None, after_sha256: None,
211 stats: None, error_detail: Some(detail),
212 }
213}
214
215fn read_file(path: &Path) -> (Vec<u8>, Option<String>, Option<u64>, Option<String>) {
217 match std::fs::read(path) {
218 Ok(bytes) => {
219 let sha = hex::encode(Sha256::digest(&bytes));
220 let size = bytes.len() as u64;
221 (bytes, Some(sha), Some(size), None)
222 }
223 Err(e) => (Vec::new(), None, None, Some(e.to_string())),
224 }
225}
226
227fn classify_bytes(bytes: &[u8]) -> (Option<String>, bool) {
230 if bytes.is_empty() {
231 return (Some(String::new()), false);
232 }
233 let sample = &bytes[..bytes.len().min(8192)];
235 if sample.contains(&0u8) {
236 return (None, true);
237 }
238 match String::from_utf8(bytes.to_vec()) {
239 Ok(text) => (Some(text), false),
240 Err(_) => (None, true),
241 }
242}