gix_filter/eol/
utils.rs

1use crate::eol::{AttributesDigest, AutoCrlf, Configuration, Mode, Stats};
2
3impl Default for Mode {
4    fn default() -> Self {
5        if cfg!(windows) {
6            Mode::CrLf
7        } else {
8            Mode::Lf
9        }
10    }
11}
12
13impl AttributesDigest {
14    /// Return the end-of-line mode this digest would require, or `None` if no conversion would be performed.
15    pub fn to_eol(&self, config: Configuration) -> Option<Mode> {
16        Some(match self {
17            AttributesDigest::Binary => return None,
18            AttributesDigest::TextInput | AttributesDigest::TextAutoInput => Mode::Lf,
19            AttributesDigest::TextCrlf | AttributesDigest::TextAutoCrlf => Mode::CrLf,
20            AttributesDigest::Text | AttributesDigest::TextAuto => config.to_eol(),
21        })
22    }
23
24    /// Return true if this digest allows for auto-determination of CRLF text conversion.
25    pub fn is_auto_text(&self) -> bool {
26        matches!(
27            self,
28            AttributesDigest::TextAuto | AttributesDigest::TextAutoCrlf | AttributesDigest::TextAutoInput
29        )
30    }
31}
32
33impl Configuration {
34    /// Return the line-ending mode that is configured here.
35    pub fn to_eol(&self) -> Mode {
36        match self.auto_crlf {
37            AutoCrlf::Enabled => Mode::CrLf,
38            AutoCrlf::Input => Mode::Lf,
39            AutoCrlf::Disabled => self.eol.unwrap_or_default(),
40        }
41    }
42}
43
44impl Stats {
45    /// Gather statistics from the given `bytes`.
46    ///
47    /// Note that the entire buffer will be scanned.
48    pub fn from_bytes(bytes: &[u8]) -> Self {
49        let mut bytes = bytes.iter().peekable();
50        let mut null = 0;
51        let mut lone_cr = 0;
52        let mut lone_lf = 0;
53        let mut crlf = 0;
54        let mut printable = 0;
55        let mut non_printable = 0;
56        while let Some(b) = bytes.next() {
57            if *b == b'\r' {
58                match bytes.peek() {
59                    Some(n) if **n == b'\n' => {
60                        bytes.next();
61                        crlf += 1;
62                    }
63                    _ => lone_cr += 1,
64                }
65                continue;
66            }
67            if *b == b'\n' {
68                lone_lf += 1;
69                continue;
70            }
71            if *b == 127 {
72                non_printable += 1;
73            } else if *b < 32 {
74                match *b {
75                    8 /* \b */ | b'\t' | 27 /* \033 */ | 12 /* \014 */ => printable += 1,
76                    0 => {
77                        non_printable += 1;
78                        null += 1;
79                    },
80                    _ => non_printable += 1,
81                }
82            } else {
83                printable += 1;
84            }
85        }
86
87        Self {
88            null,
89            lone_cr,
90            lone_lf,
91            crlf,
92            printable,
93            non_printable,
94        }
95    }
96
97    /// Returns `true` if these statistics are typical for a binary file.
98    pub fn is_binary(&self) -> bool {
99        self.lone_cr > 0 || self.null > 0 || (self.printable >> 7) < self.non_printable
100    }
101
102    /// Return `true` if we would convert the buffer from which these stats are derived, knowing only the digest
103    pub fn will_convert_lf_to_crlf(&self, digest: AttributesDigest, config: Configuration) -> bool {
104        if digest.to_eol(config) != Some(Mode::CrLf) {
105            return false;
106        }
107
108        // nothing to do?
109        if self.lone_lf == 0 {
110            return false;
111        }
112
113        if digest.is_auto_text() {
114            if self.is_binary() {
115                return false;
116            }
117            // Lone `\r` or mixed LF and CRLF isn't safe as it won't round-trip, and in auto-mode we don't touch it.
118            if self.lone_cr > 0 || self.crlf > 0 {
119                return false;
120            }
121        }
122        true
123    }
124}