Skip to main content

semdiff_differ_text/
lib.rs

1use memmap2::Mmap;
2use mime::Mime;
3use semdiff_core::fs::FileLeaf;
4use semdiff_core::{Diff, DiffCalculator, MayUnsupported};
5use similar::TextDiffConfig;
6use std::convert;
7use std::sync::Arc;
8
9pub mod report_html;
10pub mod report_json;
11pub mod report_summary;
12
13#[cfg(test)]
14mod tests;
15
16pub struct TextDiffReporter;
17
18#[derive(Debug)]
19pub struct TextDiff {
20    equal: bool,
21    expected: Arc<Mmap>,
22    actual: Arc<Mmap>,
23}
24
25impl Diff for TextDiff {
26    fn equal(&self) -> bool {
27        self.equal
28    }
29}
30
31impl TextDiff {
32    fn diff(&self) -> similar::TextDiff<'_, '_, '_, [u8]> {
33        text_diff_lines(&self.expected[..], &self.actual[..])
34    }
35}
36
37fn text_diff_lines<'a>(expected: &'a [u8], actual: &'a [u8]) -> similar::TextDiff<'a, 'a, 'a, [u8]> {
38    TextDiffConfig::default()
39        .algorithm(similar::Algorithm::Patience)
40        .diff_lines(expected, actual)
41}
42
43fn is_text_file(kind: &Mime, body: &[u8]) -> bool {
44    if is_text_mime(kind) {
45        return true;
46    }
47    if is_binary_mime(kind) {
48        return false;
49    }
50
51    let Ok(text) = str::from_utf8(body) else {
52        return false;
53    };
54
55    text.chars()
56        .all(|ch| !ch.is_control() || matches!(ch, '\n' | '\r' | '\t'))
57}
58
59fn is_text_mime(kind: &Mime) -> bool {
60    kind.type_() == mime::TEXT
61        || matches!(
62            kind.essence_str(),
63            "application/json"
64                | "application/xml"
65                | "application/javascript"
66                | "application/x-javascript"
67                | "application/x-www-form-urlencoded"
68                | "application/yaml"
69                | "application/x-yaml"
70                | "application/toml"
71        )
72}
73
74fn is_binary_mime(kind: &Mime) -> bool {
75    kind == &mime::APPLICATION_OCTET_STREAM
76        || kind.type_() == mime::IMAGE
77        || kind.type_() == mime::AUDIO
78        || kind.type_() == mime::VIDEO
79        || matches!(
80            kind.essence_str(),
81            "application/pdf"
82                | "application/zip"
83                | "application/gzip"
84                | "application/x-tar"
85                | "application/x-7z-compressed"
86                | "application/x-rar-compressed"
87                | "application/x-bzip2"
88        )
89}
90
91#[derive(Default)]
92pub struct TextDiffCalculator;
93
94impl DiffCalculator<FileLeaf> for TextDiffCalculator {
95    type Error = convert::Infallible;
96    type Diff = TextDiff;
97
98    fn diff(
99        &self,
100        _name: &str,
101        expected: FileLeaf,
102        actual: FileLeaf,
103    ) -> Result<MayUnsupported<Self::Diff>, Self::Error> {
104        'available: {
105            if is_text_mime(&expected.kind) && is_text_mime(&actual.kind) {
106                break 'available;
107            }
108            if is_binary_mime(&expected.kind) || is_binary_mime(&actual.kind) {
109                return Ok(MayUnsupported::Unsupported);
110            }
111            let Ok(expected) = str::from_utf8(&expected.content) else {
112                return Ok(MayUnsupported::Unsupported);
113            };
114            let Ok(actual) = str::from_utf8(&actual.content) else {
115                return Ok(MayUnsupported::Unsupported);
116            };
117
118            if expected
119                .chars()
120                .all(|ch| ch.is_control() && !matches!(ch, '\n' | '\r' | '\t'))
121            {
122                return Ok(MayUnsupported::Unsupported);
123            }
124            if actual
125                .chars()
126                .all(|ch| ch.is_control() && !matches!(ch, '\n' | '\r' | '\t'))
127            {
128                return Ok(MayUnsupported::Unsupported);
129            }
130        }
131        Ok(MayUnsupported::Ok(TextDiff {
132            equal: <[u8] as PartialEq<[u8]>>::eq(&expected.content, &actual.content),
133            expected: expected.content,
134            actual: actual.content,
135        }))
136    }
137}