difft_lib/
mainfn.rs

1//! Difftastic is a syntactic diff tool.
2//!
3//! For usage instructions and advice on contributing, see [the
4//! manual](http://difftastic.wilfred.me.uk/).
5//!
6
7// This tends to trigger on larger tuples of simple types, and naming
8// them would probably be worse for readability.
9#![allow(clippy::type_complexity)]
10// == "" is often clearer when dealing with strings.
11#![allow(clippy::comparison_to_empty)]
12// It's common to have pairs foo_lhs and foo_rhs, leading to double
13// the number of arguments and triggering this lint.
14#![allow(clippy::too_many_arguments)]
15// Has false positives on else if chains that sometimes have the same
16// body for readability.
17#![allow(clippy::clippy::if_same_then_else)]
18
19use crate::diff::{
20    changes::ChangeMap, dijkstra::mark_syntax, dijkstra::ExceededGraphLimit,
21    sliders::fix_all_sliders, unchanged,
22};
23use crate::display;
24use crate::display::context::opposite_positions;
25use crate::display::hunks::{matched_pos_to_hunks, merge_adjacent};
26use crate::files::{
27    guess_content, read_files_or_die, read_or_die, relative_paths_in_either, ProbableFileKind,
28};
29use crate::line_parser;
30use crate::lines::MaxLine;
31use crate::options;
32use crate::options::{DisplayMode, DisplayOptions, Mode, DEFAULT_TAB_WIDTH};
33use crate::parse;
34use crate::parse::guess_language::LANG_EXTENSIONS;
35use crate::parse::guess_language::{guess, language_name};
36use crate::parse::syntax;
37use crate::parse::syntax::init_all_info;
38use crate::parse::tree_sitter_parser as tsp;
39use crate::summary::{DiffResult, FileContent};
40use log::info;
41use mimalloc::MiMalloc;
42
43/// The global allocator used by difftastic.
44///
45/// Diffing allocates a large amount of memory, and `MiMalloc` performs
46/// better.
47#[global_allocator]
48static GLOBAL: MiMalloc = MiMalloc;
49
50use owo_colors::OwoColorize;
51use rayon::prelude::*;
52use std::{env, path::Path};
53use syntax::init_next_prev;
54use typed_arena::Arena;
55
56/// Terminate the process if we get SIGPIPE.
57#[cfg(unix)]
58fn reset_sigpipe() {
59    unsafe {
60        libc::signal(libc::SIGPIPE, libc::SIG_DFL);
61    }
62}
63
64#[cfg(not(unix))]
65fn reset_sigpipe() {
66    // Do nothing.
67}
68
69/// The entrypoint.
70pub fn mainfn() {
71    pretty_env_logger::init_timed();
72    reset_sigpipe();
73
74    match options::parse_args() {
75        Mode::DumpTreeSitter {
76            path,
77            language_override,
78        } => {
79            let path = Path::new(&path);
80            let bytes = read_or_die(path);
81            let src = String::from_utf8_lossy(&bytes).to_string();
82            // TODO: Load display options rather than hard-coding.
83            let src = replace_tabs(&src, DEFAULT_TAB_WIDTH);
84
85            let language = language_override.or_else(|| guess(path, &src));
86            match language {
87                Some(lang) => {
88                    let ts_lang = tsp::from_language(lang);
89                    let tree = tsp::parse_to_tree(&src, &ts_lang);
90                    tsp::print_tree(&src, &tree);
91                }
92                None => {
93                    eprintln!("No tree-sitter parser for file: {:?}", path);
94                }
95            }
96        }
97        Mode::DumpSyntax {
98            path,
99            language_override,
100        } => {
101            let path = Path::new(&path);
102            let bytes = read_or_die(path);
103            let src = String::from_utf8_lossy(&bytes).to_string();
104            // TODO: Load display options rather than hard-coding.
105            let src = replace_tabs(&src, DEFAULT_TAB_WIDTH);
106
107            let language = language_override.or_else(|| guess(path, &src));
108            match language {
109                Some(lang) => {
110                    let ts_lang = tsp::from_language(lang);
111                    let arena = Arena::new();
112                    let ast = tsp::parse(&arena, &src, &ts_lang);
113                    init_all_info(&ast, &[]);
114                    println!("{:#?}", ast);
115                }
116                None => {
117                    eprintln!("No tree-sitter parser for file: {:?}", path);
118                }
119            }
120        }
121        Mode::ListLanguages => {
122            for (language, extensions) in LANG_EXTENSIONS {
123                print!("{}", language_name(*language).bold());
124
125                let mut extensions: Vec<&str> = (*extensions).into();
126                extensions.sort();
127
128                for extension in extensions {
129                    print!(" .{}", extension);
130                }
131                println!();
132            }
133        }
134        Mode::Diff {
135            graph_limit,
136            byte_limit,
137            display_options,
138            missing_as_empty,
139            language_override,
140            lhs_path,
141            rhs_path,
142            lhs_display_path,
143            rhs_display_path,
144        } => {
145            let lhs_path = Path::new(&lhs_path);
146            let rhs_path = Path::new(&rhs_path);
147
148            if lhs_path == rhs_path {
149                eprintln!(
150                    "warning: You've specified the same {} twice.\n",
151                    if lhs_path.is_dir() {
152                        "directory"
153                    } else {
154                        "file"
155                    }
156                );
157            }
158
159            if lhs_path.is_dir() && rhs_path.is_dir() {
160                diff_directories(
161                    lhs_path,
162                    rhs_path,
163                    &display_options,
164                    graph_limit,
165                    byte_limit,
166                    language_override,
167                )
168                .for_each(|diff_result| {
169                    print_diff_result(&display_options, &diff_result);
170                });
171            } else {
172                let diff_result = diff_file(
173                    &lhs_display_path,
174                    &rhs_display_path,
175                    lhs_path,
176                    rhs_path,
177                    &display_options,
178                    missing_as_empty,
179                    graph_limit,
180                    byte_limit,
181                    language_override,
182                );
183                print_diff_result(&display_options, &diff_result);
184            }
185        }
186    };
187}
188
189/// Return a copy of `str` with all the tab characters replaced by
190/// `tab_width` strings.
191///
192/// TODO: This break parsers that require tabs, such as Makefile
193/// parsing. We shouldn't do this transform until after parsing.
194fn replace_tabs(src: &str, tab_width: usize) -> String {
195    let tab_as_spaces = " ".repeat(tab_width);
196    src.replace('\t', &tab_as_spaces)
197}
198
199/// Print a diff between two files.
200pub fn diff_file(
201    lhs_display_path: &str,
202    rhs_display_path: &str,
203    lhs_path: &Path,
204    rhs_path: &Path,
205    display_options: &DisplayOptions,
206    missing_as_empty: bool,
207    graph_limit: usize,
208    byte_limit: usize,
209    language_override: Option<parse::guess_language::Language>,
210) -> DiffResult {
211    let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty);
212    diff_file_content(
213        lhs_display_path,
214        rhs_display_path,
215        &lhs_bytes,
216        &rhs_bytes,
217        display_options.tab_width,
218        graph_limit,
219        byte_limit,
220        language_override,
221    )
222}
223
224fn diff_file_content(
225    lhs_display_path: &str,
226    rhs_display_path: &str,
227    lhs_bytes: &[u8],
228    rhs_bytes: &[u8],
229    tab_width: usize,
230    graph_limit: usize,
231    byte_limit: usize,
232    language_override: Option<parse::guess_language::Language>,
233) -> DiffResult {
234    let (mut lhs_src, mut rhs_src) = match (guess_content(lhs_bytes), guess_content(rhs_bytes)) {
235        (ProbableFileKind::Binary, _) | (_, ProbableFileKind::Binary) => {
236            return DiffResult {
237                lhs_display_path: lhs_display_path.into(),
238                rhs_display_path: rhs_display_path.into(),
239                language: None,
240                lhs_src: FileContent::Binary(lhs_bytes.to_vec()),
241                rhs_src: FileContent::Binary(rhs_bytes.to_vec()),
242                lhs_positions: vec![],
243                rhs_positions: vec![],
244            };
245        }
246        (ProbableFileKind::Text(lhs_src), ProbableFileKind::Text(rhs_src)) => (lhs_src, rhs_src),
247    };
248
249    // TODO: don't replace tab characters inside string literals.
250    lhs_src = replace_tabs(&lhs_src, tab_width);
251    rhs_src = replace_tabs(&rhs_src, tab_width);
252
253    // Ignore the trailing newline, if present.
254    // TODO: highlight if this has changes (#144).
255    // TODO: factor out a string cleaning function.
256    if lhs_src.ends_with('\n') {
257        lhs_src.pop();
258    }
259    if rhs_src.ends_with('\n') {
260        rhs_src.pop();
261    }
262
263    // Prefer the RHS path for language detection, unless it's /dev/null.
264    let (guess_src, guess_path) = if rhs_display_path == "/dev/null" {
265        // TODO: take a Path directly instead.
266        (&lhs_src, Path::new(&lhs_display_path))
267    } else {
268        (&rhs_src, Path::new(&rhs_display_path))
269    };
270    let language = language_override.or_else(|| guess(guess_path, guess_src));
271    let lang_config = language.map(tsp::from_language);
272
273    if lhs_bytes == rhs_bytes {
274        // If the two files are completely identical, return early
275        // rather than doing any more work.
276        return DiffResult {
277            lhs_display_path: lhs_display_path.into(),
278            rhs_display_path: rhs_display_path.into(),
279            language: language.map(|l| language_name(l).into()),
280            lhs_src: FileContent::Text("".into()),
281            rhs_src: FileContent::Text("".into()),
282            lhs_positions: vec![],
283            rhs_positions: vec![],
284        };
285    }
286
287    let (lang_name, lhs_positions, rhs_positions) = match lang_config {
288        _ if lhs_bytes.len() > byte_limit || rhs_bytes.len() > byte_limit => {
289            let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
290            let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
291            (
292                Some("Text (exceeded DFT_BYTE_LIMIT)".into()),
293                lhs_positions,
294                rhs_positions,
295            )
296        }
297        Some(ts_lang) => {
298            let arena = Arena::new();
299            let lhs = tsp::parse(&arena, &lhs_src, &ts_lang);
300            let rhs = tsp::parse(&arena, &rhs_src, &ts_lang);
301
302            init_all_info(&lhs, &rhs);
303
304            let mut change_map = ChangeMap::default();
305            let possibly_changed = if env::var("DFT_DBG_KEEP_UNCHANGED").is_ok() {
306                vec![(lhs.clone(), rhs.clone())]
307            } else {
308                unchanged::mark_unchanged(&lhs, &rhs, &mut change_map)
309            };
310
311            let mut exceeded_graph_limit = false;
312
313            for (lhs_section_nodes, rhs_section_nodes) in possibly_changed {
314                init_next_prev(&lhs_section_nodes);
315                init_next_prev(&rhs_section_nodes);
316
317                match mark_syntax(
318                    lhs_section_nodes.get(0).copied(),
319                    rhs_section_nodes.get(0).copied(),
320                    &mut change_map,
321                    graph_limit,
322                ) {
323                    Ok(()) => {}
324                    Err(ExceededGraphLimit {}) => {
325                        exceeded_graph_limit = true;
326                        break;
327                    }
328                }
329            }
330
331            if exceeded_graph_limit {
332                let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
333                let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
334                (
335                    Some("Text (exceeded DFT_GRAPH_LIMIT)".into()),
336                    lhs_positions,
337                    rhs_positions,
338                )
339            } else {
340                // TODO: Make this .expect() unnecessary.
341                let language =
342                    language.expect("If we had a ts_lang, we must have guessed the language");
343                fix_all_sliders(language, &lhs, &mut change_map);
344                fix_all_sliders(language, &rhs, &mut change_map);
345
346                let lhs_positions = syntax::change_positions(&lhs, &change_map);
347                let rhs_positions = syntax::change_positions(&rhs, &change_map);
348                (
349                    Some(language_name(language).into()),
350                    lhs_positions,
351                    rhs_positions,
352                )
353            }
354        }
355        None => {
356            let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
357            let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
358            (None, lhs_positions, rhs_positions)
359        }
360    };
361
362    DiffResult {
363        lhs_display_path: lhs_display_path.into(),
364        rhs_display_path: rhs_display_path.into(),
365        language: lang_name,
366        lhs_src: FileContent::Text(lhs_src),
367        rhs_src: FileContent::Text(rhs_src),
368        lhs_positions,
369        rhs_positions,
370    }
371}
372
373/// Given two directories that contain the files, compare them
374/// pairwise. Returns an iterator, so we can print results
375/// incrementally.
376///
377/// When more than one file is modified, the hg extdiff extension passes directory
378/// paths with the all the modified files.
379fn diff_directories<'a>(
380    lhs_dir: &'a Path,
381    rhs_dir: &'a Path,
382    display_options: &DisplayOptions,
383    graph_limit: usize,
384    byte_limit: usize,
385    language_override: Option<parse::guess_language::Language>,
386) -> impl ParallelIterator<Item = DiffResult> + 'a {
387    let display_options = display_options.clone();
388
389    // We greedily list all files in the directory, and then diff them
390    // in parallel. This is assuming that diffing is slower than
391    // enumerating files, so it benefits more from parallelism.
392    let paths = relative_paths_in_either(lhs_dir, rhs_dir);
393
394    paths.into_par_iter().map(move |rel_path| {
395        info!("Relative path is {:?} inside {:?}", rel_path, lhs_dir);
396
397        let lhs_path = Path::new(lhs_dir).join(&rel_path);
398        let rhs_path = Path::new(rhs_dir).join(&rel_path);
399
400        diff_file(
401            &rel_path.to_string_lossy(),
402            &rel_path.to_string_lossy(), // todo
403            &lhs_path,
404            &rhs_path,
405            &display_options,
406            true,
407            graph_limit,
408            byte_limit,
409            language_override,
410        )
411    })
412}
413
414pub fn print_diff_result(display_options: &DisplayOptions, summary: &DiffResult) {
415    match (&summary.lhs_src, &summary.rhs_src) {
416        (FileContent::Text(lhs_src), FileContent::Text(rhs_src)) => {
417            let opposite_to_lhs = opposite_positions(&summary.lhs_positions);
418            let opposite_to_rhs = opposite_positions(&summary.rhs_positions);
419
420            let hunks = matched_pos_to_hunks(&summary.lhs_positions, &summary.rhs_positions);
421            let hunks = merge_adjacent(
422                &hunks,
423                &opposite_to_lhs,
424                &opposite_to_rhs,
425                lhs_src.max_line(),
426                rhs_src.max_line(),
427            );
428
429            let lang_name = summary.language.clone().unwrap_or_else(|| "Text".into());
430            if hunks.is_empty() {
431                if display_options.print_unchanged {
432                    println!(
433                        "{}",
434                        display::style::header(
435                            &summary.lhs_display_path,
436                            &summary.rhs_display_path,
437                            1,
438                            1,
439                            &lang_name,
440                            display_options
441                        )
442                    );
443                    if lang_name == "Text" || summary.lhs_src == summary.rhs_src {
444                        // TODO: there are other Text names now, so
445                        // they will hit the second case incorrectly.
446                        println!("No changes.\n");
447                    } else {
448                        println!("No syntactic changes.\n");
449                    }
450                }
451                return;
452            }
453
454            match display_options.display_mode {
455                DisplayMode::Inline => {
456                    display::inline::print(
457                        lhs_src,
458                        rhs_src,
459                        display_options,
460                        &summary.lhs_positions,
461                        &summary.rhs_positions,
462                        &hunks,
463                        &summary.lhs_display_path,
464                        &summary.rhs_display_path,
465                        &lang_name,
466                    );
467                }
468                DisplayMode::SideBySide | DisplayMode::SideBySideShowBoth => {
469                    display::side_by_side::print(
470                        &hunks,
471                        display_options,
472                        &summary.lhs_display_path,
473                        &summary.rhs_display_path,
474                        &lang_name,
475                        lhs_src,
476                        rhs_src,
477                        &summary.lhs_positions,
478                        &summary.rhs_positions,
479                    );
480                }
481            }
482        }
483        (FileContent::Binary(lhs_bytes), FileContent::Binary(rhs_bytes)) => {
484            let changed = lhs_bytes != rhs_bytes;
485            if display_options.print_unchanged || changed {
486                println!(
487                    "{}",
488                    display::style::header(
489                        &summary.lhs_display_path,
490                        &summary.rhs_display_path,
491                        1,
492                        1,
493                        "binary",
494                        display_options
495                    )
496                );
497                if changed {
498                    println!("Binary contents changed.");
499                } else {
500                    println!("No changes.");
501                }
502            }
503        }
504        (_, FileContent::Binary(_)) | (FileContent::Binary(_), _) => {
505            // We're diffing a binary file against a text file.
506            println!(
507                "{}",
508                display::style::header(
509                    &summary.lhs_display_path,
510                    &summary.rhs_display_path,
511                    1,
512                    1,
513                    "binary",
514                    display_options
515                )
516            );
517            println!("Binary contents changed.");
518        }
519    }
520}
521
522pub enum FgColor {
523    White,
524    Red,
525    Green,
526}
527
528pub fn tui_diff_result(
529    display_options: &DisplayOptions,
530    summary: &DiffResult,
531) -> (Vec<Vec<(String, FgColor)>>, Vec<Vec<(String, FgColor)>>) {
532    if let (FileContent::Text(lhs_src), FileContent::Text(rhs_src)) =
533        (&summary.lhs_src, &summary.rhs_src)
534    {
535        let opposite_to_lhs = opposite_positions(&summary.lhs_positions);
536        let opposite_to_rhs = opposite_positions(&summary.rhs_positions);
537
538        let hunks = matched_pos_to_hunks(&summary.lhs_positions, &summary.rhs_positions);
539        let hunks = merge_adjacent(
540            &hunks,
541            &opposite_to_lhs,
542            &opposite_to_rhs,
543            lhs_src.max_line(),
544            rhs_src.max_line(),
545        );
546
547        let lang_name = summary.language.clone().unwrap_or_else(|| "Text".into());
548        if hunks.is_empty() {
549            if display_options.print_unchanged {
550                println!(
551                    "{}",
552                    display::style::header(
553                        &summary.lhs_display_path,
554                        &summary.rhs_display_path,
555                        1,
556                        1,
557                        &lang_name,
558                        display_options
559                    )
560                );
561                if lang_name == "Text" || summary.lhs_src == summary.rhs_src {
562                    // TODO: there are other Text names now, so
563                    // they will hit the second case incorrectly.
564                    return (
565                        vec![vec![("No changes.".to_string(), FgColor::White)]],
566                        vec![vec![]],
567                    );
568                } else {
569                    return (
570                        vec![vec![("No syntactic changes.".to_string(), FgColor::White)]],
571                        vec![vec![]],
572                    );
573                }
574            }
575            return (vec![vec![]], vec![vec![]]);
576        }
577
578        display::side_by_side::tui_print(
579            &hunks,
580            display_options,
581            &summary.lhs_display_path,
582            &summary.rhs_display_path,
583            &lang_name,
584            lhs_src,
585            rhs_src,
586            &summary.lhs_positions,
587            &summary.rhs_positions,
588        )
589    } else {
590        (
591            vec![vec![("No support source.".to_string(), FgColor::White)]],
592            vec![vec![]],
593        )
594    }
595}
596#[cfg(test)]
597mod tests {
598    use super::*;
599    use crate::options::{DEFAULT_BYTE_LIMIT, DEFAULT_GRAPH_LIMIT, DEFAULT_TAB_WIDTH};
600
601    #[test]
602    fn test_diff_identical_content() {
603        let s = "foo";
604        let res = diff_file_content(
605            "foo.el",
606            "foo.el",
607            s.as_bytes(),
608            s.as_bytes(),
609            DEFAULT_TAB_WIDTH,
610            DEFAULT_GRAPH_LIMIT,
611            DEFAULT_BYTE_LIMIT,
612            None,
613        );
614
615        assert_eq!(res.lhs_positions, vec![]);
616        assert_eq!(res.rhs_positions, vec![]);
617    }
618}