diffy_fork_filenames/
utils.rs

1//! Common utilities
2
3use std::{
4    collections::{hash_map::Entry, HashMap},
5    hash::Hash,
6};
7
8/// Classifies lines, converting lines into unique `u64`s for quicker comparison
9pub struct Classifier<'a, T: ?Sized> {
10    next_id: u64,
11    unique_ids: HashMap<&'a T, u64>,
12}
13
14impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> {
15    fn classify(&mut self, record: &'a T) -> u64 {
16        match self.unique_ids.entry(record) {
17            Entry::Occupied(o) => *o.get(),
18            Entry::Vacant(v) => {
19                let id = self.next_id;
20                self.next_id += 1;
21                *v.insert(id)
22            }
23        }
24    }
25}
26
27impl<'a, T: ?Sized + Text> Classifier<'a, T> {
28    pub fn classify_lines(&mut self, text: &'a T) -> (Vec<&'a T>, Vec<u64>) {
29        LineIter::new(text)
30            .map(|line| (line, self.classify(line)))
31            .unzip()
32    }
33}
34
35impl<T: Eq + Hash + ?Sized> Default for Classifier<'_, T> {
36    fn default() -> Self {
37        Self {
38            next_id: 0,
39            unique_ids: HashMap::default(),
40        }
41    }
42}
43
44/// Iterator over the lines of a string, including the `\n` character.
45pub struct LineIter<'a, T: ?Sized>(&'a T);
46
47impl<'a, T: ?Sized> LineIter<'a, T> {
48    pub fn new(text: &'a T) -> Self {
49        Self(text)
50    }
51}
52
53impl<'a, T: Text + ?Sized> Iterator for LineIter<'a, T> {
54    type Item = &'a T;
55
56    fn next(&mut self) -> Option<Self::Item> {
57        if self.0.is_empty() {
58            return None;
59        }
60
61        let end = if let Some(idx) = self.0.find("\n") {
62            idx + 1
63        } else {
64            self.0.len()
65        };
66
67        let (line, remaining) = self.0.split_at(end);
68        self.0 = remaining;
69        Some(line)
70    }
71}
72
73/// A helper trait for processing text like `str` and `[u8]`
74/// Useful for abstracting over those types for parsing as well as breaking input into lines
75pub trait Text: Eq + Hash {
76    fn is_empty(&self) -> bool;
77    fn len(&self) -> usize;
78    fn starts_with(&self, prefix: &str) -> bool;
79    fn ends_with(&self, suffix: &str) -> bool;
80    fn strip_prefix(&self, prefix: &str) -> Option<&Self>;
81    fn strip_suffix(&self, suffix: &str) -> Option<&Self>;
82    fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)>;
83    fn find(&self, needle: &str) -> Option<usize>;
84    fn split_at(&self, mid: usize) -> (&Self, &Self);
85    fn as_str(&self) -> Option<&str>;
86    fn as_bytes(&self) -> &[u8];
87    fn lines(&self) -> LineIter<Self>;
88
89    fn parse<T: std::str::FromStr>(&self) -> Option<T> {
90        self.as_str().and_then(|s| s.parse().ok())
91    }
92}
93
94impl Text for str {
95    fn is_empty(&self) -> bool {
96        self.is_empty()
97    }
98
99    fn len(&self) -> usize {
100        self.len()
101    }
102
103    fn starts_with(&self, prefix: &str) -> bool {
104        self.starts_with(prefix)
105    }
106
107    fn ends_with(&self, suffix: &str) -> bool {
108        self.ends_with(suffix)
109    }
110
111    fn strip_prefix(&self, prefix: &str) -> Option<&Self> {
112        self.strip_prefix(prefix)
113    }
114
115    fn strip_suffix(&self, suffix: &str) -> Option<&Self> {
116        self.strip_suffix(suffix)
117    }
118
119    fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)> {
120        self.find(needle)
121            .map(|idx| (&self[..idx], &self[idx + needle.len()..]))
122    }
123
124    fn find(&self, needle: &str) -> Option<usize> {
125        self.find(needle)
126    }
127
128    fn split_at(&self, mid: usize) -> (&Self, &Self) {
129        self.split_at(mid)
130    }
131
132    fn as_str(&self) -> Option<&str> {
133        Some(self)
134    }
135
136    fn as_bytes(&self) -> &[u8] {
137        self.as_bytes()
138    }
139
140    fn lines(&self) -> LineIter<Self> {
141        LineIter::new(self)
142    }
143}
144
145impl Text for [u8] {
146    fn is_empty(&self) -> bool {
147        self.is_empty()
148    }
149
150    fn len(&self) -> usize {
151        self.len()
152    }
153
154    fn starts_with(&self, prefix: &str) -> bool {
155        self.starts_with(prefix.as_bytes())
156    }
157
158    fn ends_with(&self, suffix: &str) -> bool {
159        self.ends_with(suffix.as_bytes())
160    }
161
162    fn strip_prefix(&self, prefix: &str) -> Option<&Self> {
163        self.strip_prefix(prefix.as_bytes())
164    }
165
166    fn strip_suffix(&self, suffix: &str) -> Option<&Self> {
167        self.strip_suffix(suffix.as_bytes())
168    }
169
170    fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)> {
171        find_bytes(self, needle.as_bytes()).map(|idx| (&self[..idx], &self[idx + needle.len()..]))
172    }
173
174    fn find(&self, needle: &str) -> Option<usize> {
175        find_bytes(self, needle.as_bytes())
176    }
177
178    fn split_at(&self, mid: usize) -> (&Self, &Self) {
179        self.split_at(mid)
180    }
181
182    fn as_str(&self) -> Option<&str> {
183        std::str::from_utf8(self).ok()
184    }
185
186    fn as_bytes(&self) -> &[u8] {
187        self
188    }
189
190    fn lines(&self) -> LineIter<Self> {
191        LineIter::new(self)
192    }
193}
194
195fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
196    match needle.len() {
197        0 => Some(0),
198        1 => find_byte(haystack, needle[0]),
199        len if len > haystack.len() => None,
200        needle_len => {
201            let mut offset = 0;
202            let mut haystack = haystack;
203
204            while let Some(position) = find_byte(haystack, needle[0]) {
205                offset += position;
206
207                if let Some(haystack) = haystack.get(position..position + needle_len) {
208                    if haystack == needle {
209                        return Some(offset);
210                    }
211                } else {
212                    return None;
213                }
214
215                haystack = &haystack[position + 1..];
216                offset += 1;
217            }
218
219            None
220        }
221    }
222}
223
224// XXX Maybe use `memchr`?
225fn find_byte(haystack: &[u8], byte: u8) -> Option<usize> {
226    haystack.iter().position(|&b| b == byte)
227}