1use {
6 bstr::ByteSlice,
7 grep_matcher::{LineTerminator, Match},
8};
9
10#[derive(Debug)]
17pub struct LineIter<'b> {
18 bytes: &'b [u8],
19 stepper: LineStep,
20}
21
22impl<'b> LineIter<'b> {
23 pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
26 let stepper = LineStep::new(line_term, 0, bytes.len());
27 LineIter { bytes, stepper }
28 }
29}
30
31impl<'b> Iterator for LineIter<'b> {
32 type Item = &'b [u8];
33
34 fn next(&mut self) -> Option<&'b [u8]> {
35 self.stepper.next_match(self.bytes).map(|m| &self.bytes[m])
36 }
37}
38
39#[derive(Debug)]
49pub struct LineStep {
50 line_term: u8,
51 pos: usize,
52 end: usize,
53}
54
55impl LineStep {
56 pub fn new(line_term: u8, start: usize, end: usize) -> LineStep {
64 LineStep { line_term, pos: start, end }
65 }
66
67 pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
75 self.next_impl(bytes)
76 }
77
78 #[inline(always)]
80 pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
81 self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
82 }
83
84 #[inline(always)]
85 fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
86 bytes = &bytes[..self.end];
87 match bytes[self.pos..].find_byte(self.line_term) {
88 None => {
89 if self.pos < bytes.len() {
90 let m = (self.pos, bytes.len());
91 assert!(m.0 <= m.1);
92
93 self.pos = m.1;
94 Some(m)
95 } else {
96 None
97 }
98 }
99 Some(line_end) => {
100 let m = (self.pos, self.pos + line_end + 1);
101 assert!(m.0 <= m.1);
102
103 self.pos = m.1;
104 Some(m)
105 }
106 }
107 }
108}
109
110pub(crate) fn count(bytes: &[u8], line_term: u8) -> u64 {
112 memchr::memchr_iter(line_term, bytes).count() as u64
113}
114
115#[inline(always)]
118pub(crate) fn without_terminator(
119 bytes: &[u8],
120 line_term: LineTerminator,
121) -> &[u8] {
122 let line_term = line_term.as_bytes();
123 let start = bytes.len().saturating_sub(line_term.len());
124 if bytes.get(start..) == Some(line_term) {
125 return &bytes[..bytes.len() - line_term.len()];
126 }
127 bytes
128}
129
130#[inline(always)]
135pub(crate) fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
136 let line_start =
137 bytes[..range.start()].rfind_byte(line_term).map_or(0, |i| i + 1);
138 let line_end =
139 if range.end() > line_start && bytes[range.end() - 1] == line_term {
140 range.end()
141 } else {
142 bytes[range.end()..]
143 .find_byte(line_term)
144 .map_or(bytes.len(), |i| range.end() + i + 1)
145 };
146 Match::new(line_start, line_end)
147}
148
149pub(crate) fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
158 preceding_by_pos(bytes, bytes.len(), line_term, count)
159}
160
161fn preceding_by_pos(
171 bytes: &[u8],
172 mut pos: usize,
173 line_term: u8,
174 mut count: usize,
175) -> usize {
176 if pos == 0 {
177 return 0;
178 } else if bytes[pos - 1] == line_term {
179 pos -= 1;
180 }
181 loop {
182 match bytes[..pos].rfind_byte(line_term) {
183 None => {
184 return 0;
185 }
186 Some(i) => {
187 if count == 0 {
188 return i + 1;
189 } else if i == 0 {
190 return 0;
191 }
192 count -= 1;
193 pos = i;
194 }
195 }
196 }
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202
203 const SHERLOCK: &'static str = "\
204For the Doctor Watsons of this world, as opposed to the Sherlock
205Holmeses, success in the province of detective work must always
206be, to a very large extent, the result of luck. Sherlock Holmes
207can extract a clew from a wisp of straw or a flake of cigar ash;
208but Doctor Watson has to have it taken out for him and dusted,
209and exhibited clearly, with a label attached.\
210";
211
212 fn m(start: usize, end: usize) -> Match {
213 Match::new(start, end)
214 }
215
216 fn lines(text: &str) -> Vec<&str> {
217 let mut results = vec![];
218 let mut it = LineStep::new(b'\n', 0, text.len());
219 while let Some(m) = it.next_match(text.as_bytes()) {
220 results.push(&text[m]);
221 }
222 results
223 }
224
225 fn line_ranges(text: &str) -> Vec<std::ops::Range<usize>> {
226 let mut results = vec![];
227 let mut it = LineStep::new(b'\n', 0, text.len());
228 while let Some(m) = it.next_match(text.as_bytes()) {
229 results.push(m.start()..m.end());
230 }
231 results
232 }
233
234 fn prev(text: &str, pos: usize, count: usize) -> usize {
235 preceding_by_pos(text.as_bytes(), pos, b'\n', count)
236 }
237
238 fn loc(text: &str, start: usize, end: usize) -> Match {
239 locate(text.as_bytes(), b'\n', Match::new(start, end))
240 }
241
242 #[test]
243 fn line_count() {
244 assert_eq!(0, count(b"", b'\n'));
245 assert_eq!(1, count(b"\n", b'\n'));
246 assert_eq!(2, count(b"\n\n", b'\n'));
247 assert_eq!(2, count(b"a\nb\nc", b'\n'));
248 }
249
250 #[test]
251 fn line_locate() {
252 let t = SHERLOCK;
253 let lines = line_ranges(t);
254
255 assert_eq!(
256 loc(t, lines[0].start, lines[0].end),
257 m(lines[0].start, lines[0].end)
258 );
259 assert_eq!(
260 loc(t, lines[0].start + 1, lines[0].end),
261 m(lines[0].start, lines[0].end)
262 );
263 assert_eq!(
264 loc(t, lines[0].end - 1, lines[0].end),
265 m(lines[0].start, lines[0].end)
266 );
267 assert_eq!(
268 loc(t, lines[0].end, lines[0].end),
269 m(lines[1].start, lines[1].end)
270 );
271
272 assert_eq!(
273 loc(t, lines[5].start, lines[5].end),
274 m(lines[5].start, lines[5].end)
275 );
276 assert_eq!(
277 loc(t, lines[5].start + 1, lines[5].end),
278 m(lines[5].start, lines[5].end)
279 );
280 assert_eq!(
281 loc(t, lines[5].end - 1, lines[5].end),
282 m(lines[5].start, lines[5].end)
283 );
284 assert_eq!(
285 loc(t, lines[5].end, lines[5].end),
286 m(lines[5].start, lines[5].end)
287 );
288 }
289
290 #[test]
291 fn line_locate_weird() {
292 assert_eq!(loc("", 0, 0), m(0, 0));
293
294 assert_eq!(loc("\n", 0, 1), m(0, 1));
295 assert_eq!(loc("\n", 1, 1), m(1, 1));
296
297 assert_eq!(loc("\n\n", 0, 0), m(0, 1));
298 assert_eq!(loc("\n\n", 0, 1), m(0, 1));
299 assert_eq!(loc("\n\n", 1, 1), m(1, 2));
300 assert_eq!(loc("\n\n", 1, 2), m(1, 2));
301 assert_eq!(loc("\n\n", 2, 2), m(2, 2));
302
303 assert_eq!(loc("a\nb\nc", 0, 1), m(0, 2));
304 assert_eq!(loc("a\nb\nc", 1, 2), m(0, 2));
305 assert_eq!(loc("a\nb\nc", 2, 3), m(2, 4));
306 assert_eq!(loc("a\nb\nc", 3, 4), m(2, 4));
307 assert_eq!(loc("a\nb\nc", 4, 5), m(4, 5));
308 assert_eq!(loc("a\nb\nc", 5, 5), m(4, 5));
309 }
310
311 #[test]
312 fn line_iter() {
313 assert_eq!(lines("abc"), vec!["abc"]);
314
315 assert_eq!(lines("abc\n"), vec!["abc\n"]);
316 assert_eq!(lines("abc\nxyz"), vec!["abc\n", "xyz"]);
317 assert_eq!(lines("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
318
319 assert_eq!(lines("abc\n\n"), vec!["abc\n", "\n"]);
320 assert_eq!(lines("abc\n\n\n"), vec!["abc\n", "\n", "\n"]);
321 assert_eq!(lines("abc\n\nxyz"), vec!["abc\n", "\n", "xyz"]);
322 assert_eq!(lines("abc\n\nxyz\n"), vec!["abc\n", "\n", "xyz\n"]);
323 assert_eq!(lines("abc\nxyz\n\n"), vec!["abc\n", "xyz\n", "\n"]);
324
325 assert_eq!(lines("\n"), vec!["\n"]);
326 assert_eq!(lines(""), Vec::<&str>::new());
327 }
328
329 #[test]
330 fn line_iter_empty() {
331 let mut it = LineStep::new(b'\n', 0, 0);
332 assert_eq!(it.next(b"abc"), None);
333 }
334
335 #[test]
336 fn preceding_lines_doc() {
337 let bytes = b"abc\nxyz\n";
339 assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
340 assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
341 assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));
342 assert_eq!(0, preceding_by_pos(bytes, 8, b'\n', 1));
343 }
344
345 #[test]
346 fn preceding_lines_sherlock() {
347 let t = SHERLOCK;
348 let lines = line_ranges(t);
349
350 assert_eq!(0, prev(t, 0, 0));
353 assert_eq!(0, prev(t, 1, 0));
354 assert_eq!(0, prev(t, lines[0].end - 1, 0));
357 assert_eq!(lines[0].start, prev(t, lines[0].end, 0));
358 assert_eq!(lines[1].start, prev(t, lines[0].end + 1, 0));
361
362 assert_eq!(0, prev(t, 0, 1));
364 assert_eq!(0, prev(t, 0, 2));
365 assert_eq!(0, prev(t, 1, 1));
366 assert_eq!(0, prev(t, 1, 2));
367 assert_eq!(0, prev(t, lines[0].end - 1, 1));
368 assert_eq!(0, prev(t, lines[0].end - 1, 2));
369 assert_eq!(0, prev(t, lines[0].end, 1));
370 assert_eq!(0, prev(t, lines[0].end, 2));
371 assert_eq!(lines[3].start, prev(t, lines[4].end - 1, 1));
372 assert_eq!(lines[3].start, prev(t, lines[4].end, 1));
373 assert_eq!(lines[4].start, prev(t, lines[4].end + 1, 1));
374
375 assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
377 assert_eq!(lines[5].start, prev(t, lines[5].end - 1, 0));
378 assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
379 assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
380 }
381
382 #[test]
383 fn preceding_lines_short() {
384 let t = "a\nb\nc\nd\ne\nf\n";
385 let lines = line_ranges(t);
386 assert_eq!(12, t.len());
387
388 assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
389 assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
390 assert_eq!(lines[3].start, prev(t, lines[5].end, 2));
391 assert_eq!(lines[2].start, prev(t, lines[5].end, 3));
392 assert_eq!(lines[1].start, prev(t, lines[5].end, 4));
393 assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
394 assert_eq!(lines[0].start, prev(t, lines[5].end, 6));
395
396 assert_eq!(lines[5].start, prev(t, lines[5].end - 1, 0));
397 assert_eq!(lines[4].start, prev(t, lines[5].end - 1, 1));
398 assert_eq!(lines[3].start, prev(t, lines[5].end - 1, 2));
399 assert_eq!(lines[2].start, prev(t, lines[5].end - 1, 3));
400 assert_eq!(lines[1].start, prev(t, lines[5].end - 1, 4));
401 assert_eq!(lines[0].start, prev(t, lines[5].end - 1, 5));
402 assert_eq!(lines[0].start, prev(t, lines[5].end - 1, 6));
403
404 assert_eq!(lines[4].start, prev(t, lines[5].start, 0));
405 assert_eq!(lines[3].start, prev(t, lines[5].start, 1));
406 assert_eq!(lines[2].start, prev(t, lines[5].start, 2));
407 assert_eq!(lines[1].start, prev(t, lines[5].start, 3));
408 assert_eq!(lines[0].start, prev(t, lines[5].start, 4));
409 assert_eq!(lines[0].start, prev(t, lines[5].start, 5));
410
411 assert_eq!(lines[3].start, prev(t, lines[4].end - 1, 1));
412 assert_eq!(lines[2].start, prev(t, lines[4].start, 1));
413
414 assert_eq!(lines[2].start, prev(t, lines[3].end - 1, 1));
415 assert_eq!(lines[1].start, prev(t, lines[3].start, 1));
416
417 assert_eq!(lines[1].start, prev(t, lines[2].end - 1, 1));
418 assert_eq!(lines[0].start, prev(t, lines[2].start, 1));
419
420 assert_eq!(lines[0].start, prev(t, lines[1].end - 1, 1));
421 assert_eq!(lines[0].start, prev(t, lines[1].start, 1));
422
423 assert_eq!(lines[0].start, prev(t, lines[0].end - 1, 1));
424 assert_eq!(lines[0].start, prev(t, lines[0].start, 1));
425 }
426
427 #[test]
428 fn preceding_lines_empty1() {
429 let t = "\n\n\nd\ne\nf\n";
430 let lines = line_ranges(t);
431 assert_eq!(9, t.len());
432
433 assert_eq!(lines[0].start, prev(t, lines[0].end, 0));
434 assert_eq!(lines[0].start, prev(t, lines[0].end, 1));
435 assert_eq!(lines[1].start, prev(t, lines[1].end, 0));
436 assert_eq!(lines[0].start, prev(t, lines[1].end, 1));
437
438 assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
439 assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
440 assert_eq!(lines[3].start, prev(t, lines[5].end, 2));
441 assert_eq!(lines[2].start, prev(t, lines[5].end, 3));
442 assert_eq!(lines[1].start, prev(t, lines[5].end, 4));
443 assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
444 assert_eq!(lines[0].start, prev(t, lines[5].end, 6));
445 }
446
447 #[test]
448 fn preceding_lines_empty2() {
449 let t = "a\n\n\nd\ne\nf\n";
450 let lines = line_ranges(t);
451 assert_eq!(10, t.len());
452
453 assert_eq!(lines[0].start, prev(t, lines[0].end, 0));
454 assert_eq!(lines[0].start, prev(t, lines[0].end, 1));
455 assert_eq!(lines[1].start, prev(t, lines[1].end, 0));
456 assert_eq!(lines[0].start, prev(t, lines[1].end, 1));
457
458 assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
459 assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
460 assert_eq!(lines[3].start, prev(t, lines[5].end, 2));
461 assert_eq!(lines[2].start, prev(t, lines[5].end, 3));
462 assert_eq!(lines[1].start, prev(t, lines[5].end, 4));
463 assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
464 assert_eq!(lines[0].start, prev(t, lines[5].end, 6));
465 }
466}