srctrait_common_tooling/path/
diff.rs

1//! Compares differences between two paths
2use std::{fmt::Display, fs::File, io::{self, BufReader, Read}, path::{Path, PathBuf}};
3use walkdir::{self, WalkDir};
4use srctrait_common_stdx::path::tree::{PathTree, PathTreeTrait};
5
6
7/// Compares differences between two subject paths.
8pub fn path_diff<P1,P2>(first: P1, second: P2) -> io::Result<Option<Vec<Difference>>>
9where
10    P1: AsRef<Path>,
11    P2: AsRef<Path>
12{
13    _path_diff(first.as_ref(), second.as_ref(), false)
14}
15
16/// Determines whether two paths differ or not
17///
18/// Returns immediately after the first difference is found.
19pub fn paths_differ<P1,P2>(first: P1, second: P2) -> io::Result<bool>
20where
21    P1: AsRef<Path>,
22    P2: AsRef<Path>
23{
24    _path_diff(first.as_ref(), second.as_ref(), true)
25        .map(|o| o.is_some())
26}
27
28/// The subject path in order as passed by argument
29#[derive(Debug, Copy, Clone, PartialEq, Eq)]
30pub enum Subject {
31    First,
32    Second
33}
34
35impl Display for Subject {
36    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37        match self {
38            Subject::First => write!(f, "first"),
39            Subject::Second => write!(f, "second"),
40        }
41    }
42}
43
44/// Describes a difference between two [Subject] paths.
45///
46/// All paths are relative to the subjects,
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub enum Difference {
49    /// One argument is a file and the other is a directory
50    SubjectTypesDiffer,
51    /// One path is a file and the other is a directory
52    TypesDiffer(PathBuf),
53    /// [Subject] is missing a file that the other has
54    FileMissing(PathBuf, Subject),
55    /// [Subject] is missing a directory that the other has
56    DirectoryMissing(PathBuf, Subject),
57    /// File is different between subjects
58    FileDiffers(PathBuf),
59}
60
61impl Display for Difference {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        match self {
64            Difference::SubjectTypesDiffer => write!(f, "One argument is a file and the other is a directory"),
65            Difference::TypesDiffer(path) => write!(f, "Path type differs: {}", path.display()),
66            Difference::FileMissing(path, subject) => write!(f, "File is missing in {subject}: {}", path.display()),
67            Difference::DirectoryMissing(path, subject) => write!(f, "Directory is missing in {subject}: {}", path.display()),
68            Difference::FileDiffers(path) => write!(f, "File differs: {}", path.display()),
69        }
70    }
71}
72
73/// early: returns on first difference
74fn _path_diff(first: &Path, second: &Path, early: bool)  -> io::Result<Option<Vec<Difference>>> {
75    if !first.exists() {
76        return Err(io::Error::new(io::ErrorKind::NotFound, format!("First path not found: {}", first.display())))
77    } else if !second.exists() {
78        return Err(io::Error::new(io::ErrorKind::NotFound, format!("Second path not found: {}", second.display())))
79    }
80
81    match (first.is_dir(), second.is_dir()) {
82        (false, false) => {
83            return compare_files(second, first, second)
84                .map(|o| o.map(|d| vec![d]))
85        },
86        (true, false) => return Ok(Some(vec![Difference::SubjectTypesDiffer])),
87        (false, true) => return Ok(Some(vec![Difference::SubjectTypesDiffer])),
88        (true, true) => {},
89    }
90
91    let mut differences: Vec<Difference> = Vec::new();
92    let mut paths_compared = PathTree::new_relative();
93    let mut paths_missing = Vec::new();
94
95    for first_entry in walk(first).into_iter() {
96        let first_entry = first_entry.map_err(|e| walk_err(e))?;
97        let relpath = rel(first, first_entry.path());
98        let second_path = second.join(&relpath);
99
100        paths_compared.insert(&relpath);
101
102        if let Some(parent) = second_path.parent() {
103            let parent_rel = rel(second, parent);
104            if parent != second && is_path_missing(&parent_rel, &paths_missing) {
105                continue;
106            }
107        }
108
109        let second_meta = match second_path.metadata() {
110            Ok(m) => m,
111            Err(e) if e.kind() == io::ErrorKind::NotFound => {
112                if first_entry.file_type().is_dir() {
113                    paths_missing.push(relpath.to_path_buf());
114                    differences.push(Difference::DirectoryMissing(relpath.to_path_buf(), Subject::Second));
115                    match early {
116                        true => return Ok(Some(differences)),
117                        false => continue
118                    }
119                } else {
120                    paths_missing.push(relpath.to_path_buf());
121                    differences.push(Difference::FileMissing(relpath.to_path_buf(), Subject::Second));
122                    match early {
123                        true => return Ok(Some(differences)),
124                        false => continue
125                    }
126                }
127            },
128            Err(e) => return Err(e)
129        };
130
131        let first_is_dir = first_entry.file_type().is_dir();
132        let second_is_dir = second_meta.is_dir();
133        match (first_is_dir, second_is_dir) {
134            (true, true) => {},
135            (true, false) => {
136                differences.push(Difference::TypesDiffer(relpath.to_path_buf()));
137                paths_missing.push(relpath.to_path_buf());
138                if early {
139                    return Ok(Some(differences));
140                }
141            },
142            (false, true) => {
143                differences.push(Difference::TypesDiffer(relpath.to_path_buf()));
144                paths_missing.push(relpath.to_path_buf());
145                if early {
146                    return Ok(Some(differences));
147                }
148            },
149            (false, false) => {
150                let diff = compare_files(&relpath, first_entry.path(), &second_path)?;
151                if let Some(diff) = diff {
152                    differences.push(diff);
153                    if early {
154                        return Ok(Some(differences));
155                    }
156                }
157            },
158        }
159    }
160
161    for second_entry in walk(second).into_iter() {
162        let second_entry = second_entry.map_err(|e| walk_err(e))?;
163        let relpath = rel(second, second_entry.path());
164        if paths_compared.contains(&relpath) {
165            continue;
166        }
167
168        let first_path = first.join(&relpath);
169
170        if let Some(parent) = first_path.parent() {
171            let parent_rel = rel(first, parent);
172            if parent != first && is_path_missing(&parent_rel, &paths_missing) {
173                continue;
174            }
175        }
176
177        match first_path.metadata() {
178            Ok(_) => unreachable!(),
179            Err(e) if e.kind() == io::ErrorKind::NotFound => {
180                if second_entry.file_type().is_dir() {
181                    differences.push(Difference::DirectoryMissing(relpath.to_path_buf(), Subject::First));
182                    paths_missing.push(relpath.to_path_buf());
183                    match early {
184                        true => return Ok(Some(differences)),
185                        false => continue
186                    }
187                } else {
188                    differences.push(Difference::FileMissing(relpath.to_path_buf(), Subject::First));
189                    match early {
190                        true => return Ok(Some(differences)),
191                        false => continue
192                    }
193                }
194            },
195            Err(e) => return Err(e)
196        }
197    }
198
199    if differences.is_empty() {
200        Ok(None)
201    } else {
202        Ok(Some(differences))
203    }
204}
205
206fn is_path_missing(path: &Path, paths_missing: &Vec<PathBuf>) -> bool {
207    for path_missing in paths_missing {
208        if path.strip_prefix(path_missing).is_ok() {
209            return true;
210        }
211    }
212
213    false
214}
215
216fn walk_err(e: walkdir::Error) -> io::Error {
217    if e.io_error().is_some() {
218        e.into_io_error().unwrap()
219    } else {
220        io::Error::new(io::ErrorKind::TooManyLinks, e.to_string())
221    }
222}
223
224fn walk(dir: &Path) -> WalkDir {
225    WalkDir::new(dir)
226        .follow_links(true)
227        .sort_by_file_name()
228}
229
230fn compare_files(relpath: &Path, first: &Path, second: &Path) -> io::Result<Option<Difference>> {
231    const MAX_BUF_SIZE: usize = 8388608; // MiB
232
233    let (first_file, second_file) = match (File::open(first), File::open(second)) {
234        (Ok(_), Err(e)) => return match e.kind() {
235            io::ErrorKind::NotFound => Ok(Some(Difference::FileMissing(relpath.to_path_buf(), Subject::Second))),
236            _ => Err(e)
237        },
238        (Err(e), Ok(_)) => return match e.kind() {
239            io::ErrorKind::NotFound => Ok(Some(Difference::FileMissing(relpath.to_path_buf(), Subject::First))),
240            _ => Err(e)
241        },
242        (Err(e1), Err(e2)) => return match e1.kind() {
243            io::ErrorKind::NotFound => Err(e2),
244            _ => Err(e1)
245        },
246        (Ok(first), Ok(second)) => (first, second),
247    };
248
249    let mut remaining = first.metadata()?.len() as usize;
250    if remaining != second.metadata()?.len() as usize {
251        return Ok(Some(Difference::FileDiffers(relpath.to_path_buf())));
252    }
253
254    let mut first_buf_reader = BufReader::new(first_file);
255    let mut second_buf_reader = BufReader::new(second_file);
256
257    while remaining > 0 {
258        let buf_size = std::cmp::min(MAX_BUF_SIZE, remaining);
259        let mut first_buf = vec![0; buf_size];
260        let mut second_buf = vec![0; buf_size];
261
262        first_buf_reader.read_exact(&mut first_buf)?;
263        second_buf_reader.read_exact(&mut second_buf)?;
264
265        if first_buf != second_buf {
266            return Ok(Some(Difference::FileDiffers(relpath.to_path_buf())));
267        }
268
269        remaining -= buf_size;
270    }
271
272    Ok(None)
273}
274
275fn rel(base: &Path, child: &Path) -> PathBuf {
276    child.strip_prefix(base)
277        .map_or_else(|_| child.to_path_buf(), |p| p.to_path_buf())
278}