diffy_fork_filenames/
utils.rs1use std::{
4 collections::{hash_map::Entry, HashMap},
5 hash::Hash,
6};
7
8pub struct Classifier<'a, T: ?Sized> {
10 next_id: u64,
11 unique_ids: HashMap<&'a T, u64>,
12}
13
14impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> {
15 fn classify(&mut self, record: &'a T) -> u64 {
16 match self.unique_ids.entry(record) {
17 Entry::Occupied(o) => *o.get(),
18 Entry::Vacant(v) => {
19 let id = self.next_id;
20 self.next_id += 1;
21 *v.insert(id)
22 }
23 }
24 }
25}
26
27impl<'a, T: ?Sized + Text> Classifier<'a, T> {
28 pub fn classify_lines(&mut self, text: &'a T) -> (Vec<&'a T>, Vec<u64>) {
29 LineIter::new(text)
30 .map(|line| (line, self.classify(line)))
31 .unzip()
32 }
33}
34
35impl<T: Eq + Hash + ?Sized> Default for Classifier<'_, T> {
36 fn default() -> Self {
37 Self {
38 next_id: 0,
39 unique_ids: HashMap::default(),
40 }
41 }
42}
43
44pub struct LineIter<'a, T: ?Sized>(&'a T);
46
47impl<'a, T: ?Sized> LineIter<'a, T> {
48 pub fn new(text: &'a T) -> Self {
49 Self(text)
50 }
51}
52
53impl<'a, T: Text + ?Sized> Iterator for LineIter<'a, T> {
54 type Item = &'a T;
55
56 fn next(&mut self) -> Option<Self::Item> {
57 if self.0.is_empty() {
58 return None;
59 }
60
61 let end = if let Some(idx) = self.0.find("\n") {
62 idx + 1
63 } else {
64 self.0.len()
65 };
66
67 let (line, remaining) = self.0.split_at(end);
68 self.0 = remaining;
69 Some(line)
70 }
71}
72
73pub trait Text: Eq + Hash {
76 fn is_empty(&self) -> bool;
77 fn len(&self) -> usize;
78 fn starts_with(&self, prefix: &str) -> bool;
79 fn ends_with(&self, suffix: &str) -> bool;
80 fn strip_prefix(&self, prefix: &str) -> Option<&Self>;
81 fn strip_suffix(&self, suffix: &str) -> Option<&Self>;
82 fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)>;
83 fn find(&self, needle: &str) -> Option<usize>;
84 fn split_at(&self, mid: usize) -> (&Self, &Self);
85 fn as_str(&self) -> Option<&str>;
86 fn as_bytes(&self) -> &[u8];
87 fn lines(&self) -> LineIter<Self>;
88
89 fn parse<T: std::str::FromStr>(&self) -> Option<T> {
90 self.as_str().and_then(|s| s.parse().ok())
91 }
92}
93
94impl Text for str {
95 fn is_empty(&self) -> bool {
96 self.is_empty()
97 }
98
99 fn len(&self) -> usize {
100 self.len()
101 }
102
103 fn starts_with(&self, prefix: &str) -> bool {
104 self.starts_with(prefix)
105 }
106
107 fn ends_with(&self, suffix: &str) -> bool {
108 self.ends_with(suffix)
109 }
110
111 fn strip_prefix(&self, prefix: &str) -> Option<&Self> {
112 self.strip_prefix(prefix)
113 }
114
115 fn strip_suffix(&self, suffix: &str) -> Option<&Self> {
116 self.strip_suffix(suffix)
117 }
118
119 fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)> {
120 self.find(needle)
121 .map(|idx| (&self[..idx], &self[idx + needle.len()..]))
122 }
123
124 fn find(&self, needle: &str) -> Option<usize> {
125 self.find(needle)
126 }
127
128 fn split_at(&self, mid: usize) -> (&Self, &Self) {
129 self.split_at(mid)
130 }
131
132 fn as_str(&self) -> Option<&str> {
133 Some(self)
134 }
135
136 fn as_bytes(&self) -> &[u8] {
137 self.as_bytes()
138 }
139
140 fn lines(&self) -> LineIter<Self> {
141 LineIter::new(self)
142 }
143}
144
145impl Text for [u8] {
146 fn is_empty(&self) -> bool {
147 self.is_empty()
148 }
149
150 fn len(&self) -> usize {
151 self.len()
152 }
153
154 fn starts_with(&self, prefix: &str) -> bool {
155 self.starts_with(prefix.as_bytes())
156 }
157
158 fn ends_with(&self, suffix: &str) -> bool {
159 self.ends_with(suffix.as_bytes())
160 }
161
162 fn strip_prefix(&self, prefix: &str) -> Option<&Self> {
163 self.strip_prefix(prefix.as_bytes())
164 }
165
166 fn strip_suffix(&self, suffix: &str) -> Option<&Self> {
167 self.strip_suffix(suffix.as_bytes())
168 }
169
170 fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)> {
171 find_bytes(self, needle.as_bytes()).map(|idx| (&self[..idx], &self[idx + needle.len()..]))
172 }
173
174 fn find(&self, needle: &str) -> Option<usize> {
175 find_bytes(self, needle.as_bytes())
176 }
177
178 fn split_at(&self, mid: usize) -> (&Self, &Self) {
179 self.split_at(mid)
180 }
181
182 fn as_str(&self) -> Option<&str> {
183 std::str::from_utf8(self).ok()
184 }
185
186 fn as_bytes(&self) -> &[u8] {
187 self
188 }
189
190 fn lines(&self) -> LineIter<Self> {
191 LineIter::new(self)
192 }
193}
194
195fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
196 match needle.len() {
197 0 => Some(0),
198 1 => find_byte(haystack, needle[0]),
199 len if len > haystack.len() => None,
200 needle_len => {
201 let mut offset = 0;
202 let mut haystack = haystack;
203
204 while let Some(position) = find_byte(haystack, needle[0]) {
205 offset += position;
206
207 if let Some(haystack) = haystack.get(position..position + needle_len) {
208 if haystack == needle {
209 return Some(offset);
210 }
211 } else {
212 return None;
213 }
214
215 haystack = &haystack[position + 1..];
216 offset += 1;
217 }
218
219 None
220 }
221 }
222}
223
224fn find_byte(haystack: &[u8], byte: u8) -> Option<usize> {
226 haystack.iter().position(|&b| b == byte)
227}