diffutilslib/
side_diff.rs

1// This file is part of the uutils diffutils package.
2//
3// For the full copyright and license information, please view the LICENSE-*
4// files that was distributed with this source code.
5
6use core::cmp::{max, min};
7use diff::Result;
8use std::{io::Write, vec};
9use unicode_width::UnicodeWidthStr;
10
11use crate::params::Params;
12
13const GUTTER_WIDTH_MIN: usize = 3;
14
15struct CharIter<'a> {
16    current: &'a [u8],
17}
18
19struct Config {
20    sdiff_half_width: usize,
21    sdiff_column_two_offset: usize,
22    tab_size: usize,
23    expanded: bool,
24    separator_pos: usize,
25}
26
27impl<'a> From<&'a [u8]> for CharIter<'a> {
28    fn from(value: &'a [u8]) -> Self {
29        CharIter { current: value }
30    }
31}
32
33impl<'a> Iterator for CharIter<'a> {
34    // (bytes for the next char, visible width)
35    type Item = (&'a [u8], usize);
36
37    fn next(&mut self) -> Option<Self::Item> {
38        let max = self.current.len().min(4);
39
40        // We reached the end.
41        if max == 0 {
42            return None;
43        }
44
45        // Try to find the next utf-8 character, if present in the next 4 bytes.
46        let mut index = 1;
47        let mut view = &self.current[..index];
48        let mut char = str::from_utf8(view);
49        while char.is_err() {
50            index += 1;
51            if index > max {
52                break;
53            }
54            view = &self.current[..index];
55            char = str::from_utf8(view)
56        }
57
58        match char {
59            Ok(c) => {
60                self.current = self
61                    .current
62                    .get(view.len()..)
63                    .unwrap_or(&self.current[0..0]);
64                Some((view, UnicodeWidthStr::width(c)))
65            }
66            Err(_) => {
67                // We did not find an utf-8 char within the next 4 bytes, return the single byte.
68                self.current = &self.current[1..];
69                Some((&view[..1], 1))
70            }
71        }
72    }
73}
74
75impl Config {
76    pub fn new(full_width: usize, tab_size: usize, expanded: bool) -> Self {
77        // diff uses this calculation to calculate the size of a half line
78        // based on the options passed (like -w, -t, etc.). It's actually
79        // pretty useless, because we (actually) don't have any size modifiers
80        // that can change this, however I just want to leave the calculate
81        // here, since it's not very clear and may cause some confusion
82
83        let w = full_width as isize;
84        let t = tab_size as isize;
85        let t_plus_g = t + GUTTER_WIDTH_MIN as isize;
86        let unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1);
87        let off = unaligned_off - unaligned_off % t;
88        let hw = max(0, min(off - GUTTER_WIDTH_MIN as isize, w - off)) as usize;
89        let c2o = if hw != 0 { off as usize } else { w as usize };
90
91        Self {
92            expanded,
93            sdiff_column_two_offset: c2o,
94            tab_size,
95            sdiff_half_width: hw,
96            separator_pos: ((hw + c2o - 1) >> 1),
97        }
98    }
99}
100
101fn format_tabs_and_spaces<T: Write>(
102    from: usize,
103    to: usize,
104    config: &Config,
105    buf: &mut T,
106) -> std::io::Result<()> {
107    let expanded = config.expanded;
108    let tab_size = config.tab_size;
109    let mut current = from;
110
111    if current > to {
112        return Ok(());
113    }
114
115    if expanded {
116        while current < to {
117            buf.write_all(b" ")?;
118            current += 1;
119        }
120        return Ok(());
121    }
122
123    while current + (tab_size - current % tab_size) <= to {
124        let next_tab = current + (tab_size - current % tab_size);
125        buf.write_all(b"\t")?;
126        current = next_tab;
127    }
128
129    while current < to {
130        buf.write_all(b" ")?;
131        current += 1;
132    }
133
134    Ok(())
135}
136
137fn process_half_line<T: Write>(
138    s: &[u8],
139    max_width: usize,
140    is_right: bool,
141    white_space_gutter: bool,
142    config: &Config,
143    buf: &mut T,
144) -> std::io::Result<()> {
145    if s.is_empty() {
146        if !is_right {
147            format_tabs_and_spaces(
148                0,
149                max_width
150                    + if white_space_gutter {
151                        GUTTER_WIDTH_MIN
152                    } else {
153                        1
154                    },
155                config,
156                buf,
157            )?;
158        }
159
160        return Ok(());
161    }
162
163    if max_width > config.sdiff_half_width {
164        return Ok(());
165    }
166
167    if max_width > config.sdiff_column_two_offset && !is_right {
168        return Ok(());
169    }
170
171    let expanded = config.expanded;
172    let tab_size = config.tab_size;
173    let sdiff_column_two_offset = config.sdiff_column_two_offset;
174    let mut current_width = 0;
175    let iter = CharIter::from(s);
176
177    // the encoding will probably be compatible with utf8, so we can take advantage
178    // of that to get the size of the columns and iterate without breaking the encoding of anything.
179    // It seems like a good trade, since there is still a fallback in case it is not utf8.
180    // But I think it would be better if we used some lib that would allow us to handle this
181    // in the best way possible, in order to avoid overhead (currently 2 for loops are needed).
182    // There is a library called mcel (mcel.h) that is used in GNU diff, but the documentation
183    // about it is very scarce, nor is its use documented on the internet. In fact, from my
184    // research I didn't even find any information about it in the GNU lib's own documentation.
185
186    for c in iter {
187        let (char, c_width) = c;
188
189        if current_width + c_width > max_width {
190            break;
191        }
192
193        match char {
194            b"\t" => {
195                if expanded && (current_width + tab_size - (current_width % tab_size)) <= max_width
196                {
197                    let mut spaces = tab_size - (current_width % tab_size);
198                    while spaces > 0 {
199                        buf.write_all(b" ")?;
200                        current_width += 1;
201                        spaces -= 1;
202                    }
203                } else if current_width + tab_size - (current_width % tab_size) <= max_width {
204                    buf.write_all(b"\t")?;
205                    current_width += tab_size - (current_width % tab_size);
206                }
207            }
208            b"\n" => {
209                break;
210            }
211            b"\r" => {
212                buf.write_all(b"\r")?;
213                format_tabs_and_spaces(0, sdiff_column_two_offset, config, buf)?;
214                current_width = 0;
215            }
216            b"\0" | b"\x07" | b"\x0C" | b"\x0B" => {
217                buf.write_all(char)?;
218            }
219            _ => {
220                buf.write_all(char)?;
221                current_width += c_width;
222            }
223        }
224    }
225
226    // gnu sdiff do not tabulate the hole empty right line, instead, just keep the line empty
227    if !is_right {
228        // we always sum + 1 or + GUTTER_WIDTH_MIN cause we want to expand
229        // up to the third column of the gutter column if the gutter is gutter white space,
230        // otherwise we can expand to only the first column of the gutter middle column, cause
231        // the next is the sep char
232        format_tabs_and_spaces(
233            current_width,
234            max_width
235                + if white_space_gutter {
236                    GUTTER_WIDTH_MIN
237                } else {
238                    1
239                },
240            config,
241            buf,
242        )?;
243    }
244
245    Ok(())
246}
247
248fn push_output<T: Write>(
249    left_ln: &[u8],
250    right_ln: &[u8],
251    symbol: u8,
252    output: &mut T,
253    config: &Config,
254) -> std::io::Result<()> {
255    if left_ln.is_empty() && right_ln.is_empty() {
256        writeln!(output)?;
257        return Ok(());
258    }
259
260    let white_space_gutter = symbol == b' ';
261    let half_width = config.sdiff_half_width;
262    let column_two_offset = config.sdiff_column_two_offset;
263    let separator_pos = config.separator_pos;
264    let put_new_line = true; // should be false when | is allowed
265
266    // this involves a lot of the '|' mark, however, as it is not active,
267    // it is better to deactivate it as it introduces visual bug if
268    // the line is empty.
269    // if !left_ln.is_empty() {
270    //     put_new_line = put_new_line || (left_ln.last() == Some(&b'\n'));
271    // }
272    // if !right_ln.is_empty() {
273    //     put_new_line = put_new_line || (right_ln.last() == Some(&b'\n'));
274    // }
275
276    process_half_line(
277        left_ln,
278        half_width,
279        false,
280        white_space_gutter,
281        config,
282        output,
283    )?;
284    if symbol != b' ' {
285        // the diff always want to put all tabs possible in the usable are,
286        // even in the middle space between the gutters if possible.
287
288        output.write_all(&[symbol])?;
289        if !right_ln.is_empty() {
290            format_tabs_and_spaces(separator_pos + 1, column_two_offset, config, output)?;
291        }
292    }
293    process_half_line(
294        right_ln,
295        half_width,
296        true,
297        white_space_gutter,
298        config,
299        output,
300    )?;
301
302    if put_new_line {
303        writeln!(output)?;
304    }
305
306    Ok(())
307}
308
309pub fn diff<T: Write>(
310    from_file: &[u8],
311    to_file: &[u8],
312    output: &mut T,
313    params: &Params,
314) -> Vec<u8> {
315    //      ^ The left file  ^ The right file
316
317    let mut left_lines: Vec<&[u8]> = from_file.split_inclusive(|&c| c == b'\n').collect();
318    let mut right_lines: Vec<&[u8]> = to_file.split_inclusive(|&c| c == b'\n').collect();
319    let config = Config::new(params.width, params.tabsize, params.expand_tabs);
320
321    if left_lines.last() == Some(&&b""[..]) {
322        left_lines.pop();
323    }
324
325    if right_lines.last() == Some(&&b""[..]) {
326        right_lines.pop();
327    }
328
329    /*
330    DISCLAIMER:
331    Currently the diff engine does not produce results like the diff engine used in GNU diff,
332    so some results may be inaccurate. For example, the line difference marker "|", according
333    to the GNU documentation, appears when the same lines (only the actual line, although the
334    relative line may change the result, so occasionally '|' markers appear with the same lines)
335    are different but exist in both files. In the current solution the same result cannot be
336    obtained because the diff engine does not return Both if both exist but are different,
337    but instead returns a Left and a Right for each one, implying that two lines were added
338    and deleted. Furthermore, the GNU diff program apparently stores some internal state
339    (this internal state is just a note about how the diff engine works) about the lines.
340    For example, an added or removed line directly counts in the line query of the original
341    lines to be printed in the output. Because of this imbalance caused by additions and
342    deletions, the characters ( and ) are introduced. They basically represent lines without
343    context, which have lost their pair in the other file due to additions or deletions. Anyway,
344    my goal with this disclaimer is to warn that for some reason, whether it's the diff engine's
345    inability to determine and predict/precalculate the result of GNU's sdiff, with this software it's
346    not possible to reproduce results that are 100% faithful to GNU's, however, the basic premise
347    e of side diff of showing added and removed lines and creating edit scripts is totally possible.
348    More studies are needed to cover GNU diff side by side with 100% accuracy, which is one of
349    the goals of this project : )
350    */
351    for result in diff::slice(&left_lines, &right_lines) {
352        match result {
353            Result::Left(left_ln) => push_output(left_ln, b"", b'<', output, &config).unwrap(),
354            Result::Right(right_ln) => push_output(b"", right_ln, b'>', output, &config).unwrap(),
355            Result::Both(left_ln, right_ln) => {
356                push_output(left_ln, right_ln, b' ', output, &config).unwrap()
357            }
358        }
359    }
360
361    vec![]
362}
363
364#[cfg(test)]
365mod tests {
366    const DEF_TAB_SIZE: usize = 4;
367
368    use super::*;
369
370    mod format_tabs_and_spaces {
371        use super::*;
372
373        const CONFIG_E_T: Config = Config {
374            sdiff_half_width: 60,
375            tab_size: DEF_TAB_SIZE,
376            expanded: true,
377            sdiff_column_two_offset: 0,
378            separator_pos: 0,
379        };
380
381        const CONFIG_E_F: Config = Config {
382            sdiff_half_width: 60,
383            tab_size: DEF_TAB_SIZE,
384            expanded: false,
385            sdiff_column_two_offset: 0,
386            separator_pos: 0,
387        };
388
389        #[test]
390        fn test_format_tabs_and_spaces_expanded_false() {
391            let mut buf = vec![];
392            format_tabs_and_spaces(0, 5, &CONFIG_E_F, &mut buf).unwrap();
393            assert_eq!(buf, vec![b'\t', b' ']);
394        }
395
396        #[test]
397        fn test_format_tabs_and_spaces_expanded_true() {
398            let mut buf = vec![];
399            format_tabs_and_spaces(0, 5, &CONFIG_E_T, &mut buf).unwrap();
400            assert_eq!(buf, vec![b' '; 5]);
401        }
402
403        #[test]
404        fn test_format_tabs_and_spaces_from_greater_than_to() {
405            let mut buf = vec![];
406            format_tabs_and_spaces(6, 5, &CONFIG_E_F, &mut buf).unwrap();
407            assert!(buf.is_empty());
408        }
409
410        #[test]
411        fn test_format_from_non_zero_position() {
412            let mut buf = vec![];
413            format_tabs_and_spaces(2, 7, &CONFIG_E_F, &mut buf).unwrap();
414            assert_eq!(buf, vec![b'\t', b' ', b' ', b' ']);
415        }
416
417        #[test]
418        fn test_multiple_full_tabs_needed() {
419            let mut buf = vec![];
420            format_tabs_and_spaces(0, 12, &CONFIG_E_F, &mut buf).unwrap();
421            assert_eq!(buf, vec![b'\t', b'\t', b'\t']);
422        }
423
424        #[test]
425        fn test_uneven_tab_boundary_with_spaces() {
426            let mut buf = vec![];
427            format_tabs_and_spaces(3, 10, &CONFIG_E_F, &mut buf).unwrap();
428            assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ']);
429        }
430
431        #[test]
432        fn test_expanded_true_with_offset() {
433            let mut buf = vec![];
434            format_tabs_and_spaces(3, 9, &CONFIG_E_T, &mut buf).unwrap();
435            assert_eq!(buf, vec![b' '; 6]);
436        }
437
438        #[test]
439        fn test_exact_tab_boundary_from_midpoint() {
440            let mut buf = vec![];
441            format_tabs_and_spaces(4, 8, &CONFIG_E_F, &mut buf).unwrap();
442            assert_eq!(buf, vec![b'\t']);
443        }
444
445        #[test]
446        fn test_mixed_tabs_and_spaces_edge_case() {
447            let mut buf = vec![];
448            format_tabs_and_spaces(5, 9, &CONFIG_E_F, &mut buf).unwrap();
449            assert_eq!(buf, vec![b'\t', b' ']);
450        }
451
452        #[test]
453        fn test_minimal_gap_with_tab() {
454            let mut buf = vec![];
455            format_tabs_and_spaces(7, 8, &CONFIG_E_F, &mut buf).unwrap();
456            assert_eq!(buf, vec![b'\t']);
457        }
458
459        #[test]
460        fn test_expanded_false_with_tab_at_end() {
461            let mut buf = vec![];
462            format_tabs_and_spaces(6, 8, &CONFIG_E_F, &mut buf).unwrap();
463            assert_eq!(buf, vec![b'\t']);
464        }
465    }
466
467    mod process_half_line {
468        use super::*;
469
470        fn create_test_config(expanded: bool, tab_size: usize) -> Config {
471            Config {
472                sdiff_half_width: 30,
473                sdiff_column_two_offset: 60,
474                tab_size,
475                expanded,
476                separator_pos: 15,
477            }
478        }
479
480        #[test]
481        fn test_empty_line_left_expanded_false() {
482            let config = create_test_config(false, DEF_TAB_SIZE);
483            let mut buf = vec![];
484            process_half_line(b"", 10, false, false, &config, &mut buf).unwrap();
485            assert_eq!(buf.len(), 5);
486            assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ', b' ']);
487        }
488
489        #[test]
490        fn test_tabs_unexpanded() {
491            let config = create_test_config(false, DEF_TAB_SIZE);
492            let mut buf = vec![];
493            process_half_line(b"\tabc", 8, false, false, &config, &mut buf).unwrap();
494            assert_eq!(buf, vec![b'\t', b'a', b'b', b'c', b'\t', b' ']);
495        }
496
497        #[test]
498        fn test_utf8_multibyte() {
499            let config = create_test_config(false, DEF_TAB_SIZE);
500            let mut buf = vec![];
501            let s = "😉😉😉".as_bytes();
502            process_half_line(s, 3, false, false, &config, &mut buf).unwrap();
503            let mut r = vec![];
504            r.write_all("😉\t".as_bytes()).unwrap();
505            assert_eq!(buf, r)
506        }
507
508        #[test]
509        fn test_newline_handling() {
510            let config = create_test_config(false, DEF_TAB_SIZE);
511            let mut buf = vec![];
512            process_half_line(b"abc\ndef", 5, false, false, &config, &mut buf).unwrap();
513            assert_eq!(buf, vec![b'a', b'b', b'c', b'\t', b' ', b' ']);
514        }
515
516        #[test]
517        fn test_carriage_return() {
518            let config = create_test_config(false, DEF_TAB_SIZE);
519            let mut buf = vec![];
520            process_half_line(b"\rxyz", 5, true, false, &config, &mut buf).unwrap();
521            let mut r = vec![b'\r'];
522            r.extend(vec![b'\t'; 15]);
523            r.extend(vec![b'x', b'y', b'z']);
524            assert_eq!(buf, r);
525        }
526
527        #[test]
528        fn test_exact_width_fit() {
529            let config = create_test_config(true, DEF_TAB_SIZE);
530            let mut buf = vec![];
531            process_half_line(b"abcd", 4, false, false, &config, &mut buf).unwrap();
532            assert_eq!(buf.len(), 5);
533            assert_eq!(buf, b"abcd ".to_vec());
534        }
535
536        #[test]
537        fn test_non_utf8_bytes() {
538            let config = create_test_config(false, DEF_TAB_SIZE);
539            let mut buf = vec![];
540            // ISO-8859-1
541            process_half_line(
542                &[0x63, 0x61, 0x66, 0xE9],
543                5,
544                false,
545                false,
546                &config,
547                &mut buf,
548            )
549            .unwrap();
550            assert_eq!(&buf, &[0x63, 0x61, 0x66, 0xE9, b' ', b' ']);
551            assert!(String::from_utf8(buf).is_err());
552        }
553
554        #[test]
555        fn test_non_utf8_bytes_ignore_padding_bytes() {
556            let config = create_test_config(false, DEF_TAB_SIZE);
557            let mut buf = vec![];
558
559            let utf32le_bytes = [
560                0x63, 0x00, 0x00, 0x00, // 'c'
561                0x61, 0x00, 0x00, 0x00, // 'a'
562                0x66, 0x00, 0x00, 0x00, // 'f'
563                0xE9, 0x00, 0x00, 0x00, // 'é'
564            ];
565            // utf8 little endiand 32 bits (or 4 bytes per char)
566            process_half_line(&utf32le_bytes, 6, false, false, &config, &mut buf).unwrap();
567            let mut r = utf32le_bytes.to_vec();
568            r.extend(vec![b' '; 3]);
569            assert_eq!(buf, r);
570        }
571
572        #[test]
573        fn test_non_utf8_non_preserve_ascii_bytes_cut() {
574            let config = create_test_config(false, DEF_TAB_SIZE);
575            let mut buf = vec![];
576
577            let gb18030 = b"\x63\x61\x66\xA8\x80"; // some random chinese encoding
578                                                   //                                   ^ é char, start multi byte
579            process_half_line(gb18030, 4, false, false, &config, &mut buf).unwrap();
580            assert_eq!(buf, b"\x63\x61\x66\xA8 "); // break the encoding of 'é' letter
581        }
582
583        #[test]
584        fn test_right_line_padding() {
585            let config = create_test_config(false, DEF_TAB_SIZE);
586            let mut buf = vec![];
587            process_half_line(b"xyz", 5, true, true, &config, &mut buf).unwrap();
588            assert_eq!(buf.len(), 3);
589        }
590
591        #[test]
592        fn test_mixed_tabs_spaces() {
593            let config = create_test_config(false, DEF_TAB_SIZE);
594            let mut buf = vec![];
595            process_half_line(b"\t  \t", 10, false, false, &config, &mut buf).unwrap();
596            assert_eq!(buf, vec![b'\t', b' ', b' ', b'\t', b' ', b' ', b' ']);
597        }
598
599        #[test]
600        fn test_overflow_multibyte() {
601            let config = create_test_config(false, DEF_TAB_SIZE);
602            let mut buf = vec![];
603            let s = "日本語".as_bytes();
604            process_half_line(s, 5, false, false, &config, &mut buf).unwrap();
605            assert_eq!(buf, "日本  ".as_bytes());
606        }
607
608        #[test]
609        fn test_white_space_gutter() {
610            let config = create_test_config(false, DEF_TAB_SIZE);
611            let mut buf = vec![];
612            let s = b"abc";
613            process_half_line(s, 3, false, true, &config, &mut buf).unwrap();
614            assert_eq!(buf, b"abc\t  ");
615        }
616
617        #[test]
618        fn test_expanded_true() {
619            let config = create_test_config(true, DEF_TAB_SIZE);
620            let mut buf = vec![];
621            let s = b"abc";
622            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
623            assert_eq!(buf, b"abc        ")
624        }
625
626        #[test]
627        fn test_expanded_true_with_gutter() {
628            let config = create_test_config(true, DEF_TAB_SIZE);
629            let mut buf = vec![];
630            let s = b"abc";
631            process_half_line(s, 10, false, true, &config, &mut buf).unwrap();
632            assert_eq!(buf, b"abc          ")
633        }
634
635        #[test]
636        fn test_width0_chars() {
637            let config = create_test_config(false, DEF_TAB_SIZE);
638            let mut buf = vec![];
639            let s = b"abc\0\x0B\x07\x0C";
640            process_half_line(s, 4, false, false, &config, &mut buf).unwrap();
641            assert_eq!(buf, b"abc\0\x0B\x07\x0C\t ")
642        }
643
644        #[test]
645        fn test_left_empty_white_space_gutter() {
646            let config = create_test_config(false, DEF_TAB_SIZE);
647            let mut buf = vec![];
648            let s = b"";
649            process_half_line(s, 9, false, true, &config, &mut buf).unwrap();
650            assert_eq!(buf, b"\t\t\t");
651        }
652
653        #[test]
654        fn test_s_size_eq_max_width_p1() {
655            let config = create_test_config(false, DEF_TAB_SIZE);
656            let mut buf = vec![];
657            let s = b"abcdefghij";
658            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
659            assert_eq!(buf, b"abcdefghij ");
660        }
661
662        #[test]
663        fn test_mixed_tabs_and_spaces_inversion() {
664            let config = create_test_config(false, DEF_TAB_SIZE);
665            let mut buf = vec![];
666            let s = b" \t \t ";
667            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
668            assert_eq!(buf, b" \t \t   ");
669        }
670
671        #[test]
672        fn test_expanded_with_tabs() {
673            let config = create_test_config(true, DEF_TAB_SIZE);
674            let mut buf = vec![];
675            let s = b" \t \t ";
676            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
677            assert_eq!(buf, b"           ");
678        }
679
680        #[test]
681        fn test_expanded_with_tabs_and_space_gutter() {
682            let config = create_test_config(true, DEF_TAB_SIZE);
683            let mut buf = vec![];
684            let s = b" \t \t ";
685            process_half_line(s, 10, false, true, &config, &mut buf).unwrap();
686            assert_eq!(buf, b"             ");
687        }
688
689        #[test]
690        fn test_zero_width_unicode_chars() {
691            let config = create_test_config(false, DEF_TAB_SIZE);
692            let mut buf = vec![];
693            let s = "\u{200B}".as_bytes();
694            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
695            assert_eq!(buf, "\u{200B}\t\t   ".as_bytes());
696        }
697
698        #[test]
699        fn test_multiple_carriage_returns() {
700            let config = create_test_config(false, DEF_TAB_SIZE);
701            let mut buf = vec![];
702            let s = b"\r\r";
703            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
704            let mut r = vec![b'\r'];
705            r.extend(vec![b'\t'; 15]);
706            r.push(b'\r');
707            r.extend(vec![b'\t'; 15]);
708            r.extend(vec![b'\t'; 2]);
709            r.extend(vec![b' '; 3]);
710            assert_eq!(buf, r);
711        }
712
713        #[test]
714        fn test_multiple_carriage_returns_is_right_true() {
715            let config = create_test_config(false, DEF_TAB_SIZE);
716            let mut buf = vec![];
717            let s = b"\r\r";
718            process_half_line(s, 10, true, false, &config, &mut buf).unwrap();
719            let mut r = vec![b'\r'];
720            r.extend(vec![b'\t'; 15]);
721            r.push(b'\r');
722            r.extend(vec![b'\t'; 15]);
723            assert_eq!(buf, r);
724        }
725
726        #[test]
727        fn test_mixed_invalid_utf8_with_valid() {
728            let config = create_test_config(false, DEF_TAB_SIZE);
729            let mut buf = vec![];
730            let s = b"abc\xFF\xFEdef";
731            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
732            assert!(String::from_utf8(s.to_vec()).is_err());
733            assert_eq!(buf, b"abc\xFF\xFEdef   ");
734        }
735
736        #[test]
737        fn test_max_width_zero() {
738            let config = create_test_config(false, DEF_TAB_SIZE);
739            let mut buf = vec![];
740            let s = b"foo bar";
741            process_half_line(s, 0, false, false, &config, &mut buf).unwrap();
742            assert_eq!(buf, vec![b' ']);
743        }
744
745        #[test]
746        fn test_line_only_with_tabs() {
747            let config = create_test_config(false, DEF_TAB_SIZE);
748            let mut buf = vec![];
749            let s = b"\t\t\t";
750            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
751            assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ', b' '])
752        }
753
754        #[test]
755        fn test_tabs_expanded() {
756            let config = create_test_config(true, DEF_TAB_SIZE);
757            let mut buf = vec![];
758            let s = b"\t\t\t";
759            process_half_line(s, 12, false, false, &config, &mut buf).unwrap();
760            assert_eq!(buf, b" ".repeat(13));
761        }
762
763        #[test]
764        fn test_mixed_tabs() {
765            let config = create_test_config(false, DEF_TAB_SIZE);
766            let mut buf = vec![];
767            let s = b"a\tb\tc\t";
768            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
769            assert_eq!(buf, b"a\tb\tc  ");
770        }
771
772        #[test]
773        fn test_mixed_tabs_with_gutter() {
774            let config = create_test_config(false, DEF_TAB_SIZE);
775            let mut buf = vec![];
776            let s = b"a\tb\tc\t";
777            process_half_line(s, 10, false, true, &config, &mut buf).unwrap();
778            assert_eq!(buf, b"a\tb\tc\t ");
779        }
780
781        #[test]
782        fn test_mixed_tabs_expanded() {
783            let config = create_test_config(true, DEF_TAB_SIZE);
784            let mut buf = vec![];
785            let s = b"a\tb\tc\t";
786            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
787            assert_eq!(buf, b"a   b   c  ");
788        }
789
790        #[test]
791        fn test_mixed_tabs_expanded_with_gutter() {
792            let config = create_test_config(true, DEF_TAB_SIZE);
793            let mut buf = vec![];
794            let s = b"a\tb\tc\t";
795            process_half_line(s, 10, false, true, &config, &mut buf).unwrap();
796            assert_eq!(buf, b"a   b   c    ");
797        }
798
799        #[test]
800        fn test_break_if_invalid_max_width() {
801            let config = create_test_config(true, DEF_TAB_SIZE);
802            let mut buf = vec![];
803            let s = b"a\tb\tc\t";
804            process_half_line(s, 61, false, true, &config, &mut buf).unwrap();
805            assert_eq!(buf, b"");
806            assert_eq!(buf.len(), 0);
807        }
808
809        #[test]
810        fn test_new_line() {
811            let config = create_test_config(false, DEF_TAB_SIZE);
812            let mut buf = vec![];
813            let s = b"abc";
814            process_half_line(s, 10, false, false, &config, &mut buf).unwrap();
815            assert_eq!(buf, b"abc\t\t   ");
816        }
817    }
818
819    mod push_output {
820        // almost all behavior of the push_output was tested with tests on process_half_line
821
822        use super::*;
823
824        impl Default for Config {
825            fn default() -> Self {
826                Config::new(130, 8, false)
827            }
828        }
829
830        fn create_test_config_def() -> Config {
831            Config::default()
832        }
833
834        #[test]
835        fn test_left_empty_right_not_added() {
836            let config = create_test_config_def();
837            let left_ln = b"";
838            let right_ln = b"bar";
839            let symbol = b'>';
840            let mut buf = vec![];
841            push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap();
842            assert_eq!(buf, b"\t\t\t\t\t\t\t      >\tbar\n");
843        }
844
845        #[test]
846        fn test_right_empty_left_not_del() {
847            let config = create_test_config_def();
848            let left_ln = b"bar";
849            let right_ln = b"";
850            let symbol = b'>';
851            let mut buf = vec![];
852            push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap();
853            assert_eq!(buf, b"bar\t\t\t\t\t\t\t      >\n");
854        }
855
856        #[test]
857        fn test_both_empty() {
858            let config = create_test_config_def();
859            let left_ln = b"";
860            let right_ln = b"";
861            let symbol = b' ';
862            let mut buf = vec![];
863            push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap();
864            assert_eq!(buf, b"\n");
865        }
866
867        #[test]
868        fn test_output_cut_with_maximization() {
869            let config = create_test_config_def();
870            let left_ln = b"a".repeat(62);
871            let right_ln = b"a".repeat(62);
872            let symbol = b' ';
873            let mut buf = vec![];
874            push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap();
875            assert_eq!(buf.len(), 61 * 2 + 2);
876            assert_eq!(&buf[0..61], vec![b'a'; 61]);
877            assert_eq!(&buf[61..62], b"\t");
878            let mut end = b"a".repeat(61);
879            end.push(b'\n');
880            assert_eq!(&buf[62..], end);
881        }
882
883        #[test]
884        fn test_both_lines_non_empty_with_space_symbol_max_tabs() {
885            let config = create_test_config_def();
886            let left_ln = b"left";
887            let right_ln = b"right";
888            let symbol = b' ';
889            let mut buf = vec![];
890            push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap();
891            let expected_left = "left\t\t\t\t\t\t\t\t";
892            let expected_right = "right";
893            assert_eq!(buf, format!("{expected_left}{expected_right}\n").as_bytes());
894        }
895
896        #[test]
897        fn test_non_space_symbol_with_padding() {
898            let config = create_test_config_def();
899            let left_ln = b"data";
900            let right_ln = b"";
901            let symbol = b'<'; // impossible case, just to use different symbol
902            let mut buf = vec![];
903            push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap();
904            assert_eq!(buf, "data\t\t\t\t\t\t\t      <\n".as_bytes());
905        }
906
907        #[test]
908        fn test_lines_exceeding_half_width() {
909            let config = create_test_config_def();
910            let left_ln = vec![b'a'; 100];
911            let left_ln = left_ln.as_slice();
912            let right_ln = vec![b'b'; 100];
913            let right_ln = right_ln.as_slice();
914            let symbol = b' ';
915            let mut buf = vec![];
916            push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap();
917            let expected_left = "a".repeat(61);
918            let expected_right = "b".repeat(61);
919            assert_eq!(buf.len(), 61 + 1 + 61 + 1);
920            assert_eq!(&buf[0..61], expected_left.as_bytes());
921            assert_eq!(buf[61], b'\t');
922            assert_eq!(&buf[62..123], expected_right.as_bytes());
923            assert_eq!(&buf[123..], b"\n");
924        }
925
926        #[test]
927        fn test_tabs_in_lines_expanded() {
928            let mut config = create_test_config_def();
929            config.expanded = true;
930            let left_ln = b"\tleft";
931            let right_ln = b"\tright";
932            let symbol = b' ';
933            let mut buf = vec![];
934            push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap();
935            let expected_left = "        left".to_string() + &" ".repeat(61 - 12);
936            let expected_right = "        right";
937            assert_eq!(
938                buf,
939                format!("{}{}{}\n", expected_left, "   ", expected_right).as_bytes()
940            );
941        }
942
943        #[test]
944        fn test_unicode_characters() {
945            let config = create_test_config_def();
946            let left_ln = "áéíóú".as_bytes();
947            let right_ln = "😀😃😄".as_bytes();
948            let symbol = b' ';
949            let mut buf = vec![];
950            push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap();
951            let expected_left = "áéíóú\t\t\t\t\t\t\t\t";
952            let expected_right = "😀😃😄";
953            assert_eq!(buf, format!("{expected_left}{expected_right}\n").as_bytes());
954        }
955    }
956
957    mod diff {
958        /*
959        Probably this hole section should be refactored when complete sdiff
960        arrives. I would say that these tests are more to document the
961        behavior of the engine than to actually test whether it is right,
962        because it is right, but right up to its limitations.
963        */
964
965        use super::*;
966
967        fn generate_params() -> Params {
968            Params {
969                tabsize: 8,
970                expand_tabs: false,
971                width: 130,
972                ..Default::default()
973            }
974        }
975
976        fn contains_string(vec: &[u8], s: &str) -> usize {
977            let pattern = s.as_bytes();
978            vec.windows(pattern.len()).filter(|s| s == &pattern).count()
979        }
980
981        fn calc_lines(input: &Vec<u8>) -> usize {
982            let mut lines_counter = 0;
983
984            for c in input {
985                if c == &b'\n' {
986                    lines_counter += 1;
987                }
988            }
989
990            lines_counter
991        }
992
993        #[test]
994        fn test_equal_lines() {
995            let params = generate_params();
996            let from_file = b"equal";
997            let to_file = b"equal";
998            let mut output = vec![];
999            diff(from_file, to_file, &mut output, &params);
1000            assert_eq!(calc_lines(&output), 1);
1001            assert!(!output.contains(&b'<'));
1002            assert!(!output.contains(&b'>'));
1003            assert_eq!(contains_string(&output, "equal"), 2)
1004        }
1005
1006        #[test]
1007        fn test_different_lines() {
1008            let params = generate_params();
1009            let from_file = b"eq";
1010            let to_file = b"ne";
1011            let mut output = vec![];
1012            diff(from_file, to_file, &mut output, &params);
1013            assert_eq!(calc_lines(&output), 2);
1014            assert!(output.contains(&b'>'));
1015            assert!(output.contains(&b'<'));
1016            assert_eq!(contains_string(&output, "eq"), 1);
1017            assert_eq!(contains_string(&output, "ne"), 1);
1018        }
1019
1020        #[test]
1021        fn test_added_line() {
1022            let params = generate_params();
1023            let from_file = b"";
1024            let to_file = b"new line";
1025            let mut output = vec![];
1026            diff(from_file, to_file, &mut output, &params);
1027
1028            assert_eq!(calc_lines(&output), 1);
1029            assert_eq!(contains_string(&output, ">"), 1);
1030            assert_eq!(contains_string(&output, "new line"), 1);
1031        }
1032
1033        #[test]
1034        fn test_removed_line() {
1035            let params = generate_params();
1036            let from_file = b"old line";
1037            let to_file = b"";
1038            let mut output = vec![];
1039            diff(from_file, to_file, &mut output, &params);
1040
1041            assert_eq!(calc_lines(&output), 1);
1042            assert_eq!(contains_string(&output, "<"), 1);
1043            assert_eq!(contains_string(&output, "old line"), 1);
1044        }
1045
1046        #[test]
1047        fn test_multiple_changes() {
1048            let params = generate_params();
1049            let from_file = b"line1\nline2\nline3";
1050            let to_file = b"line1\nmodified\nline4";
1051            let mut output = vec![];
1052            diff(from_file, to_file, &mut output, &params);
1053
1054            assert_eq!(calc_lines(&output), 5);
1055            assert_eq!(contains_string(&output, "<"), 2);
1056            assert_eq!(contains_string(&output, ">"), 2);
1057        }
1058
1059        #[test]
1060        fn test_unicode_and_special_chars() {
1061            let params = generate_params();
1062            let from_file = "á\t€".as_bytes();
1063            let to_file = "€\t😊".as_bytes();
1064            let mut output = vec![];
1065            diff(from_file, to_file, &mut output, &params);
1066
1067            assert!(String::from_utf8_lossy(&output).contains("á"));
1068            assert!(String::from_utf8_lossy(&output).contains("€"));
1069            assert!(String::from_utf8_lossy(&output).contains("😊"));
1070            assert_eq!(contains_string(&output, "<"), 1);
1071            assert_eq!(contains_string(&output, ">"), 1);
1072        }
1073
1074        #[test]
1075        fn test_mixed_whitespace() {
1076            let params = generate_params();
1077            let from_file = b"  \tspaces";
1078            let to_file = b"\t\t tabs";
1079            let mut output = vec![];
1080            diff(from_file, to_file, &mut output, &params);
1081
1082            assert!(output.contains(&b'<'));
1083            assert!(output.contains(&b'>'));
1084            assert!(String::from_utf8_lossy(&output).contains("spaces"));
1085            assert!(String::from_utf8_lossy(&output).contains("tabs"));
1086        }
1087
1088        #[test]
1089        fn test_empty_files() {
1090            let params = generate_params();
1091            let from_file = b"";
1092            let to_file = b"";
1093            let mut output = vec![];
1094            diff(from_file, to_file, &mut output, &params);
1095
1096            assert_eq!(output, vec![]);
1097        }
1098
1099        #[test]
1100        fn test_partially_matching_lines() {
1101            let params = generate_params();
1102            let from_file = b"match\nchange";
1103            let to_file = b"match\nupdated";
1104            let mut output = vec![];
1105            diff(from_file, to_file, &mut output, &params);
1106
1107            assert_eq!(calc_lines(&output), 3);
1108            assert_eq!(contains_string(&output, "match"), 2);
1109            assert_eq!(contains_string(&output, "<"), 1);
1110            assert_eq!(contains_string(&output, ">"), 1);
1111        }
1112
1113        #[test]
1114        fn test_interleaved_add_remove() {
1115            let params = generate_params();
1116            let from_file = b"A\nB\nC\nD";
1117            let to_file = b"B\nX\nD\nY";
1118            let mut output = vec![];
1119            diff(from_file, to_file, &mut output, &params);
1120
1121            assert_eq!(calc_lines(&output), 7);
1122            assert_eq!(contains_string(&output, "A"), 1);
1123            assert_eq!(contains_string(&output, "X"), 1);
1124            assert_eq!(contains_string(&output, "Y"), 1);
1125            assert_eq!(contains_string(&output, "<"), 3);
1126            assert_eq!(contains_string(&output, ">"), 3);
1127        }
1128
1129        #[test]
1130        fn test_swapped_lines() {
1131            let params = generate_params();
1132            let from_file = b"1\n2\n3\n4";
1133            let to_file = b"4\n3\n2\n1";
1134            let mut output = vec![];
1135            diff(from_file, to_file, &mut output, &params);
1136
1137            assert_eq!(calc_lines(&output), 7);
1138            assert_eq!(contains_string(&output, "<"), 3);
1139            assert_eq!(contains_string(&output, ">"), 3);
1140        }
1141
1142        #[test]
1143        fn test_gap_between_changes() {
1144            let params = generate_params();
1145            let from_file = b"Start\nKeep1\nRemove\nKeep2\nEnd";
1146            let to_file = b"Start\nNew1\nKeep1\nKeep2\nNew2\nEnd";
1147            let mut output = vec![];
1148            diff(from_file, to_file, &mut output, &params);
1149
1150            assert_eq!(calc_lines(&output), 7);
1151            assert_eq!(contains_string(&output, "Remove"), 1);
1152            assert_eq!(contains_string(&output, "New1"), 1);
1153            assert_eq!(contains_string(&output, "New2"), 1);
1154            assert_eq!(contains_string(&output, "<"), 1);
1155            assert_eq!(contains_string(&output, ">"), 2);
1156        }
1157
1158        #[test]
1159        fn test_mixed_operations_complex() {
1160            let params = generate_params();
1161            let from_file = b"Same\nOld1\nSameMid\nOld2\nSameEnd";
1162            let to_file = b"Same\nNew1\nSameMid\nNew2\nNew3\nSameEnd";
1163            let mut output = vec![];
1164            diff(from_file, to_file, &mut output, &params);
1165
1166            assert_eq!(calc_lines(&output), 8);
1167            assert_eq!(contains_string(&output, "<"), 2);
1168            assert_eq!(contains_string(&output, ">"), 3);
1169        }
1170
1171        #[test]
1172        fn test_insert_remove_middle() {
1173            let params = generate_params();
1174            let from_file = b"Header\nContent1\nFooter";
1175            let to_file = b"Header\nContent2\nFooter";
1176            let mut output = vec![];
1177            diff(from_file, to_file, &mut output, &params);
1178
1179            assert_eq!(calc_lines(&output), 4);
1180            assert_eq!(contains_string(&output, "Content1"), 1);
1181            assert_eq!(contains_string(&output, "Content2"), 1);
1182            assert_eq!(contains_string(&output, "<"), 1);
1183            assert_eq!(contains_string(&output, ">"), 1);
1184        }
1185
1186        #[test]
1187        fn test_multiple_adjacent_changes() {
1188            let params = generate_params();
1189            let from_file = b"A\nB\nC\nD\nE";
1190            let to_file = b"A\nX\nY\nD\nZ";
1191            let mut output = vec![];
1192            diff(from_file, to_file, &mut output, &params);
1193
1194            assert_eq!(calc_lines(&output), 8);
1195            assert_eq!(contains_string(&output, "<"), 3);
1196            assert_eq!(contains_string(&output, ">"), 3);
1197        }
1198    }
1199
1200    mod config {
1201        use super::*;
1202
1203        fn create_config(full_width: usize, tab_size: usize, expanded: bool) -> Config {
1204            Config::new(full_width, tab_size, expanded)
1205        }
1206
1207        #[test]
1208        fn test_full_width_80_tab_4() {
1209            let config = create_config(80, 4, false);
1210            assert_eq!(config.sdiff_half_width, 37);
1211            assert_eq!(config.sdiff_column_two_offset, 40);
1212            assert_eq!(config.separator_pos, 38);
1213        }
1214
1215        #[test]
1216        fn test_full_width_40_tab_8() {
1217            let config = create_config(40, 8, true);
1218            assert_eq!(config.sdiff_half_width, 16);
1219            assert_eq!(config.sdiff_column_two_offset, 24);
1220            assert_eq!(config.separator_pos, 19); // (16 +24 -1) /2 = 19.5
1221        }
1222
1223        #[test]
1224        fn test_full_width_30_tab_2() {
1225            let config = create_config(30, 2, false);
1226            assert_eq!(config.sdiff_half_width, 13);
1227            assert_eq!(config.sdiff_column_two_offset, 16);
1228            assert_eq!(config.separator_pos, 14);
1229        }
1230
1231        #[test]
1232        fn test_small_width_10_tab_4() {
1233            let config = create_config(10, 4, false);
1234            assert_eq!(config.sdiff_half_width, 2);
1235            assert_eq!(config.sdiff_column_two_offset, 8);
1236            assert_eq!(config.separator_pos, 4);
1237        }
1238
1239        #[test]
1240        fn test_minimal_width_3_tab_4() {
1241            let config = create_config(3, 4, false);
1242            assert_eq!(config.sdiff_half_width, 0);
1243            assert_eq!(config.sdiff_column_two_offset, 3);
1244            assert_eq!(config.separator_pos, 1);
1245        }
1246
1247        #[test]
1248        fn test_odd_width_7_tab_3() {
1249            let config = create_config(7, 3, false);
1250            assert_eq!(config.sdiff_half_width, 1);
1251            assert_eq!(config.sdiff_column_two_offset, 6);
1252            assert_eq!(config.separator_pos, 3);
1253        }
1254
1255        #[test]
1256        fn test_tab_size_larger_than_width() {
1257            let config = create_config(5, 10, false);
1258            assert_eq!(config.sdiff_half_width, 0);
1259            assert_eq!(config.sdiff_column_two_offset, 5);
1260            assert_eq!(config.separator_pos, 2);
1261        }
1262    }
1263}