baz_difftastic/
lib.rs

1use std::env;
2use std::path::{Path, PathBuf};
3
4use glob::Pattern;
5use humansize::{BINARY, format_size};
6use log::info;
7use rayon::iter::{IntoParallelIterator, ParallelIterator};
8use typed_arena::Arena;
9
10use crate::diff::changes::ChangeMap;
11use crate::diff::dijkstra::{ExceededGraphLimit, mark_syntax};
12use crate::diff::sliders::fix_all_sliders;
13use crate::diff::unchanged;
14use crate::display::context::opposite_positions;
15use crate::display::hunks::{matched_pos_to_hunks, merge_adjacent};
16use crate::files::{guess_content, ProbableFileKind, read_files_or_die, relative_paths_in_either};
17use crate::lines::MaxLine;
18use crate::options::{DiffOptions, DisplayOptions, FileArgument};
19use crate::parse::{syntax, tree_sitter_parser as tsp};
20use crate::parse::guess_language::{guess, language_name, LanguageOverride};
21use crate::parse::syntax::init_next_prev;
22use crate::summary::{DiffResult, FileContent, FileFormat};
23
24pub mod conflicts;
25pub mod constants;
26pub mod diff;
27pub mod display;
28pub mod exit_codes;
29pub mod files;
30pub mod hash;
31pub mod line_parser;
32pub mod lines;
33pub mod options;
34pub mod parse;
35pub mod summary;
36pub mod version;
37pub mod words;
38
39pub fn diff<'a>(lhs_path: &'a PathBuf, rhs_path: &'a PathBuf, display_options: DisplayOptions,
40                diff_options: DiffOptions, language_overrides: Vec<(LanguageOverride, Vec<Pattern>)>) -> impl ParallelIterator<Item=DiffResult> + 'a {
41    if lhs_path == rhs_path {
42        let is_dir = lhs_path.is_dir();
43
44        eprintln!(
45            "warning: You've specified the same {} twice.\n",
46            if is_dir { "directory" } else { "file" }
47        );
48    }
49
50    let diff_iter = diff_directories(
51        lhs_path,
52        rhs_path,
53        &display_options,
54        &diff_options,
55        &language_overrides,
56    );
57
58    diff_iter
59}
60
61/// Given two directories that contain the files, compare them
62/// pairwise. Returns an iterator, so we can print results
63/// incrementally.
64///
65/// When more than one file is modified, the hg extdiff extension passes directory
66/// paths with the all the modified files.
67fn diff_directories<'a>(
68    lhs_dir: &'a Path,
69    rhs_dir: &'a Path,
70    display_options: &DisplayOptions,
71    diff_options: &DiffOptions,
72    overrides: &[(LanguageOverride, Vec<Pattern>)],
73) -> impl ParallelIterator<Item=DiffResult> + 'a {
74    let diff_options = diff_options.clone();
75    let display_options = display_options.clone();
76    let overrides: Vec<_> = overrides.into();
77
78    // We greedily list all files in the directory, and then diff them
79    // in parallel. This is assuming that diffing is slower than
80    // enumerating files, so it benefits more from parallelism.
81    let paths = relative_paths_in_either(lhs_dir, rhs_dir);
82
83    paths.into_par_iter().map(move |rel_path| {
84        info!("Relative path is {:?} inside {:?}", rel_path, lhs_dir);
85
86        let lhs_path = Path::new(lhs_dir).join(&rel_path);
87        let rhs_path = Path::new(rhs_dir).join(&rel_path);
88
89        diff_file(
90            &rel_path.display().to_string(),
91            None,
92            &FileArgument::NamedPath(lhs_path),
93            &FileArgument::NamedPath(rhs_path),
94            &display_options,
95            &diff_options,
96            true,
97            &overrides,
98        )
99    })
100}
101
102/// Print a diff between two files.
103pub fn diff_file(
104    display_path: &str,
105    extra_info: Option<String>,
106    lhs_path: &FileArgument,
107    rhs_path: &FileArgument,
108    display_options: &DisplayOptions,
109    diff_options: &DiffOptions,
110    missing_as_empty: bool,
111    overrides: &[(LanguageOverride, Vec<Pattern>)],
112) -> DiffResult {
113    let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty);
114    let (mut lhs_src, mut rhs_src) = match (guess_content(&lhs_bytes), guess_content(&rhs_bytes)) {
115        (ProbableFileKind::Binary, _) | (_, ProbableFileKind::Binary) => {
116            return DiffResult {
117                extra_info,
118                display_path: display_path.to_owned(),
119                file_format: FileFormat::Binary,
120                lhs_src: FileContent::Binary,
121                rhs_src: FileContent::Binary,
122                lhs_positions: vec![],
123                rhs_positions: vec![],
124                hunks: vec![],
125                has_byte_changes: lhs_bytes != rhs_bytes,
126                has_syntactic_changes: false,
127            };
128        }
129        (ProbableFileKind::Text(lhs_src), ProbableFileKind::Text(rhs_src)) => (lhs_src, rhs_src),
130    };
131
132    if diff_options.strip_cr {
133        lhs_src.retain(|c| c != '\r');
134        rhs_src.retain(|c| c != '\r');
135    }
136
137    diff_file_content(
138        display_path,
139        extra_info,
140        lhs_path,
141        rhs_path,
142        &lhs_src,
143        &rhs_src,
144        display_options,
145        diff_options,
146        overrides,
147    )
148}
149
150pub fn diff_file_content(
151    display_path: &str,
152    extra_info: Option<String>,
153    _lhs_path: &FileArgument,
154    rhs_path: &FileArgument,
155    lhs_src: &str,
156    rhs_src: &str,
157    display_options: &DisplayOptions,
158    diff_options: &DiffOptions,
159    overrides: &[(LanguageOverride, Vec<Pattern>)],
160) -> DiffResult {
161    let (guess_src, guess_path) = match rhs_path {
162        FileArgument::NamedPath(path) => (&rhs_src, Path::new(path)),
163        FileArgument::Stdin => (&rhs_src, Path::new(&display_path)),
164        FileArgument::DevNull => (&lhs_src, Path::new(&display_path)),
165    };
166
167    let language = guess(guess_path, guess_src, overrides);
168    let lang_config = language.map(|lang| (lang.clone(), tsp::from_language(lang)));
169
170    if lhs_src == rhs_src {
171        let file_format = match language {
172            Some(language) => FileFormat::SupportedLanguage(language),
173            None => FileFormat::PlainText,
174        };
175
176        // If the two files are completely identical, return early
177        // rather than doing any more work.
178        return DiffResult {
179            extra_info,
180            display_path: display_path.to_string(),
181            file_format,
182            lhs_src: FileContent::Text("".into()),
183            rhs_src: FileContent::Text("".into()),
184            lhs_positions: vec![],
185            rhs_positions: vec![],
186            hunks: vec![],
187            has_byte_changes: false,
188            has_syntactic_changes: false,
189        };
190    }
191
192    let (file_format, lhs_positions, rhs_positions) = match lang_config {
193        None => {
194            let file_format = FileFormat::PlainText;
195            if diff_options.check_only {
196                return check_only_text(&file_format, display_path, extra_info, lhs_src, rhs_src);
197            }
198
199            let lhs_positions = line_parser::change_positions(lhs_src, rhs_src);
200            let rhs_positions = line_parser::change_positions(rhs_src, lhs_src);
201            (file_format, lhs_positions, rhs_positions)
202        }
203        Some((language, lang_config)) => {
204            let arena = Arena::new();
205            match tsp::to_tree_with_limit(diff_options, &lang_config, lhs_src, rhs_src) {
206                Ok((lhs_tree, rhs_tree)) => {
207                    match tsp::to_syntax_with_limit(
208                        lhs_src,
209                        rhs_src,
210                        &lhs_tree,
211                        &rhs_tree,
212                        &arena,
213                        &lang_config,
214                        diff_options,
215                    ) {
216                        Ok((lhs, rhs)) => {
217                            if diff_options.check_only {
218                                let has_syntactic_changes = lhs != rhs;
219                                return DiffResult {
220                                    extra_info,
221                                    display_path: display_path.to_string(),
222                                    file_format: FileFormat::SupportedLanguage(language),
223                                    lhs_src: FileContent::Text(lhs_src.to_owned()),
224                                    rhs_src: FileContent::Text(rhs_src.to_owned()),
225                                    lhs_positions: vec![],
226                                    rhs_positions: vec![],
227                                    hunks: vec![],
228                                    has_byte_changes: true,
229                                    has_syntactic_changes,
230                                };
231                            }
232
233                            let mut change_map = ChangeMap::default();
234                            let possibly_changed = if env::var("DFT_DBG_KEEP_UNCHANGED").is_ok() {
235                                vec![(lhs.clone(), rhs.clone())]
236                            } else {
237                                unchanged::mark_unchanged(&lhs, &rhs, &mut change_map)
238                            };
239
240                            let mut exceeded_graph_limit = false;
241
242                            for (lhs_section_nodes, rhs_section_nodes) in possibly_changed {
243                                init_next_prev(&lhs_section_nodes);
244                                init_next_prev(&rhs_section_nodes);
245
246                                match mark_syntax(
247                                    lhs_section_nodes.get(0).copied(),
248                                    rhs_section_nodes.get(0).copied(),
249                                    &mut change_map,
250                                    diff_options.graph_limit,
251                                ) {
252                                    Ok(()) => {}
253                                    Err(ExceededGraphLimit {}) => {
254                                        exceeded_graph_limit = true;
255                                        break;
256                                    }
257                                }
258                            }
259
260                            if exceeded_graph_limit {
261                                let lhs_positions = line_parser::change_positions(lhs_src, rhs_src);
262                                let rhs_positions = line_parser::change_positions(rhs_src, lhs_src);
263                                (
264                                    FileFormat::TextFallback {
265                                        reason: "exceeded DFT_GRAPH_LIMIT".into(),
266                                    },
267                                    lhs_positions,
268                                    rhs_positions,
269                                )
270                            } else {
271                                fix_all_sliders(language, &lhs, &mut change_map);
272                                fix_all_sliders(language, &rhs, &mut change_map);
273
274                                let mut lhs_positions = syntax::change_positions(&lhs, &change_map);
275                                let mut rhs_positions = syntax::change_positions(&rhs, &change_map);
276
277                                if diff_options.ignore_comments {
278                                    let lhs_comments =
279                                        tsp::comment_positions(&lhs_tree, lhs_src, &lang_config);
280                                    lhs_positions.extend(lhs_comments);
281
282                                    let rhs_comments =
283                                        tsp::comment_positions(&rhs_tree, rhs_src, &lang_config);
284                                    rhs_positions.extend(rhs_comments);
285                                }
286
287                                (
288                                    FileFormat::SupportedLanguage(language),
289                                    lhs_positions,
290                                    rhs_positions,
291                                )
292                            }
293                        }
294                        Err(tsp::ExceededParseErrorLimit(error_count)) => {
295                            let file_format = FileFormat::TextFallback {
296                                reason: format!(
297                                    "{} {} parse error{}, exceeded DFT_PARSE_ERROR_LIMIT",
298                                    error_count,
299                                    language_name(language),
300                                    if error_count == 1 { "" } else { "s" }
301                                ),
302                            };
303
304                            if diff_options.check_only {
305                                return check_only_text(
306                                    &file_format,
307                                    display_path,
308                                    extra_info,
309                                    lhs_src,
310                                    rhs_src,
311                                );
312                            }
313
314                            let lhs_positions = line_parser::change_positions(lhs_src, rhs_src);
315                            let rhs_positions = line_parser::change_positions(rhs_src, lhs_src);
316                            (file_format, lhs_positions, rhs_positions)
317                        }
318                    }
319                }
320                Err(tsp::ExceededByteLimit(num_bytes)) => {
321                    let file_format = FileFormat::TextFallback {
322                        reason: format!(
323                            "{} exceeded DFT_BYTE_LIMIT",
324                            &format_size(num_bytes, BINARY)
325                        ),
326                    };
327
328                    if diff_options.check_only {
329                        return check_only_text(
330                            &file_format,
331                            display_path,
332                            extra_info,
333                            lhs_src,
334                            rhs_src,
335                        );
336                    }
337
338                    let lhs_positions = line_parser::change_positions(lhs_src, rhs_src);
339                    let rhs_positions = line_parser::change_positions(rhs_src, lhs_src);
340                    (file_format, lhs_positions, rhs_positions)
341                }
342            }
343        }
344    };
345
346    let opposite_to_lhs = opposite_positions(&lhs_positions);
347    let opposite_to_rhs = opposite_positions(&rhs_positions);
348
349    let hunks = matched_pos_to_hunks(&lhs_positions, &rhs_positions);
350    let hunks = merge_adjacent(
351        &hunks,
352        &opposite_to_lhs,
353        &opposite_to_rhs,
354        lhs_src.max_line(),
355        rhs_src.max_line(),
356        display_options.num_context_lines as usize,
357    );
358    let has_syntactic_changes = !hunks.is_empty();
359
360    DiffResult {
361        extra_info,
362        display_path: display_path.to_string(),
363        file_format,
364        lhs_src: FileContent::Text(lhs_src.to_owned()),
365        rhs_src: FileContent::Text(rhs_src.to_owned()),
366        lhs_positions,
367        rhs_positions,
368        hunks,
369        has_byte_changes: true,
370        has_syntactic_changes,
371    }
372}
373
374fn check_only_text(
375    file_format: &FileFormat,
376    display_path: &str,
377    extra_info: Option<String>,
378    lhs_src: &str,
379    rhs_src: &str,
380) -> DiffResult {
381    let has_changes = lhs_src != rhs_src;
382
383    DiffResult {
384        display_path: display_path.to_string(),
385        extra_info,
386        file_format: file_format.clone(),
387        lhs_src: FileContent::Text(lhs_src.into()),
388        rhs_src: FileContent::Text(rhs_src.into()),
389        lhs_positions: vec![],
390        rhs_positions: vec![],
391        hunks: vec![],
392        has_byte_changes: has_changes,
393        has_syntactic_changes: has_changes,
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use std::ffi::OsStr;
400
401    use super::*;
402
403    #[test]
404    fn test_diff_identical_content() {
405        let s = "foo";
406        let res = diff_file_content(
407            "foo.el",
408            None,
409            &FileArgument::from_path_argument(OsStr::new("foo.el")),
410            &FileArgument::from_path_argument(OsStr::new("foo.el")),
411            s,
412            s,
413            &DisplayOptions::default(),
414            &DiffOptions::default(),
415            &[],
416        );
417
418        assert_eq!(res.lhs_positions, vec![]);
419        assert_eq!(res.rhs_positions, vec![]);
420    }
421}