1use std::collections::{BTreeMap, BTreeSet};
4use std::path::{Path, PathBuf};
5
6use rayon::prelude::*;
7use sha2::{Digest, Sha256};
8use walkdir::WalkDir;
9
10use super::entry::{DiffEntry, DiffStats, DiffType};
11use super::ignore::IgnoreRules;
12
13pub struct DiffEngine;
14
15impl DiffEngine {
16 pub fn compare(before_root: &Path, after_root: &Path) -> anyhow::Result<Vec<DiffEntry>> {
18 Self::compare_with_ignore(before_root, after_root, &IgnoreRules::default())
19 }
20
21 pub fn compare_with_ignore(
24 before_root: &Path,
25 after_root: &Path,
26 ignore: &IgnoreRules,
27 ) -> anyhow::Result<Vec<DiffEntry>> {
28 let before_map = collect_paths(before_root)?;
29 let after_map = collect_paths(after_root)?;
30
31 let all_paths: BTreeSet<String> = before_map.keys()
32 .chain(after_map.keys())
33 .cloned()
34 .collect();
35
36 let paths_to_compare: Vec<String> = all_paths
38 .into_iter()
39 .filter(|p| !ignore.is_ignored(p))
40 .collect();
41
42 let mut entries: Vec<DiffEntry> = paths_to_compare
44 .into_par_iter()
45 .map(|rel_path| {
46 match (before_map.get(&rel_path), after_map.get(&rel_path)) {
47 (None, Some(a)) => build_added(rel_path, a),
48 (Some(b), None) => build_removed(rel_path, b),
49 (Some(b), Some(a)) => build_compared(rel_path, b, a),
50 (None, None) => unreachable!(),
51 }
52 })
53 .collect();
54
55 entries.sort_by(|a, b| a.path.cmp(&b.path));
57 Ok(entries)
58 }
59}
60
61fn collect_paths(root: &Path) -> anyhow::Result<BTreeMap<String, PathBuf>> {
64 if !root.is_dir() {
65 anyhow::bail!("Not a directory: {}", root.display());
66 }
67 let mut map = BTreeMap::new();
68 for entry in WalkDir::new(root).into_iter() {
69 let entry = entry.map_err(|e| anyhow::anyhow!("Walk error: {e}"))?;
70 if entry.path() == root { continue; }
71 let rel = entry.path()
72 .strip_prefix(root).unwrap()
73 .to_string_lossy()
74 .replace('\\', "/");
75 map.insert(rel, entry.path().to_path_buf());
76 }
77 Ok(map)
78}
79
80fn build_added(rel: String, after: &Path) -> DiffEntry {
83 if after.is_dir() {
84 return dir_entry(rel, DiffType::Added);
85 }
86 let (bytes, sha, size, error) = read_file(after);
87 let (text, is_binary) = classify_bytes(&bytes);
88 DiffEntry {
89 path: rel, diff_type: DiffType::Added, is_dir: false,
90 before_text: None, after_text: text.clone(),
91 is_binary,
92 before_size: None, after_size: size,
93 before_sha256: None, after_sha256: sha,
94 stats: None, error_detail: error,
96 }
97}
98
99fn build_removed(rel: String, before: &Path) -> DiffEntry {
100 if before.is_dir() {
101 return dir_entry(rel, DiffType::Removed);
102 }
103 let (bytes, sha, size, error) = read_file(before);
104 let (text, is_binary) = classify_bytes(&bytes);
105 DiffEntry {
106 path: rel, diff_type: DiffType::Removed, is_dir: false,
107 before_text: text, after_text: None,
108 is_binary,
109 before_size: size, after_size: None,
110 before_sha256: sha, after_sha256: None,
111 stats: None,
112 error_detail: error,
113 }
114}
115
116fn build_compared(rel: String, before: &Path, after: &Path) -> DiffEntry {
117 if before.is_dir() != after.is_dir() {
118 return DiffEntry {
119 path: rel, diff_type: DiffType::TypeChanged, is_dir: false,
120 before_text: None, after_text: None,
121 is_binary: false,
122 before_size: None, after_size: None,
123 before_sha256: None, after_sha256: None,
124 stats: None,
125 error_detail: Some("Path kind changed (file ↔ directory).".into()),
126 };
127 }
128 if before.is_dir() {
129 return dir_entry(rel, DiffType::Unchanged);
130 }
131
132 let (before_bytes, before_sha, before_size, before_err) = read_file(before);
133 if let Some(e) = before_err {
134 return unreadable(rel, format!("Cannot read before-file: {e}"));
135 }
136 let (after_bytes, after_sha, after_size, after_err) = read_file(after);
137 if let Some(e) = after_err {
138 return unreadable(rel, format!("Cannot read after-file: {e}"));
139 }
140
141 let diff_type = if before_bytes == after_bytes { DiffType::Unchanged } else { DiffType::Modified };
142
143 let (before_text, before_is_binary) = classify_bytes(&before_bytes);
144 let (after_text, after_is_binary) = classify_bytes(&after_bytes);
145 let is_binary = before_is_binary || after_is_binary;
146
147 let stats = if diff_type == DiffType::Modified && !is_binary {
149 let bt = before_text.as_deref().unwrap_or("");
150 let at = after_text.as_deref().unwrap_or("");
151 Some(DiffStats::compute(bt, at))
152 } else {
153 None
154 };
155
156 DiffEntry {
157 path: rel, diff_type, is_dir: false,
158 before_text, after_text,
159 is_binary,
160 before_size, after_size,
161 before_sha256: before_sha, after_sha256: after_sha,
162 stats,
163 error_detail: None,
164 }
165}
166
167fn dir_entry(rel: String, diff_type: DiffType) -> DiffEntry {
170 DiffEntry {
171 path: rel, diff_type, is_dir: true,
172 before_text: None, after_text: None,
173 is_binary: false,
174 before_size: None, after_size: None,
175 before_sha256: None, after_sha256: None,
176 stats: None, error_detail: None,
177 }
178}
179
180fn unreadable(rel: String, detail: String) -> DiffEntry {
181 DiffEntry {
182 path: rel, diff_type: DiffType::Unreadable, is_dir: false,
183 before_text: None, after_text: None,
184 is_binary: false,
185 before_size: None, after_size: None,
186 before_sha256: None, after_sha256: None,
187 stats: None, error_detail: Some(detail),
188 }
189}
190
191fn read_file(path: &Path) -> (Vec<u8>, Option<String>, Option<u64>, Option<String>) {
193 match std::fs::read(path) {
194 Ok(bytes) => {
195 let sha = hex::encode(Sha256::digest(&bytes));
196 let size = bytes.len() as u64;
197 (bytes, Some(sha), Some(size), None)
198 }
199 Err(e) => (Vec::new(), None, None, Some(e.to_string())),
200 }
201}
202
203fn classify_bytes(bytes: &[u8]) -> (Option<String>, bool) {
206 if bytes.is_empty() {
207 return (Some(String::new()), false);
208 }
209 let sample = &bytes[..bytes.len().min(8192)];
211 if sample.contains(&0u8) {
212 return (None, true);
213 }
214 match String::from_utf8(bytes.to_vec()) {
215 Ok(text) => (Some(text), false),
216 Err(_) => (None, true),
217 }
218}