1use {
6 bstr::ByteSlice,
7 grep_matcher::{LineTerminator, Match},
8};
9
10#[derive(Debug)]
18pub struct LineStep {
19 line_term: u8,
20 pos: usize,
21 end: usize,
22}
23
24impl LineStep {
25 pub fn new(line_term: u8, start: usize, end: usize) -> LineStep {
28 LineStep {
29 line_term,
30 pos: start,
31 end,
32 }
33 }
34
35 #[inline(always)]
37 pub fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
38 self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
39 }
40
41 #[inline(always)]
42 fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
43 bytes = &bytes[..self.end];
44 match bytes[self.pos..].find_byte(self.line_term) {
45 None => {
46 if self.pos < bytes.len() {
47 let m = (self.pos, bytes.len());
48 assert!(m.0 <= m.1);
49
50 self.pos = m.1;
51 Some(m)
52 } else {
53 None
54 }
55 }
56 Some(line_end) => {
57 let m = (self.pos, self.pos + line_end + 1);
58 assert!(m.0 <= m.1);
59
60 self.pos = m.1;
61 Some(m)
62 }
63 }
64 }
65}
66
67pub fn count(bytes: &[u8], line_term: u8) -> u64 {
69 memchr::memchr_iter(line_term, bytes).count() as u64
70}
71
72#[inline(always)]
75pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
76 let line_term = line_term.as_bytes();
77 let start = bytes.len().saturating_sub(line_term.len());
78 if bytes.get(start..) == Some(line_term) {
79 return &bytes[..bytes.len() - line_term.len()];
80 }
81 bytes
82}
83
84#[inline(always)]
89pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
90 let line_start = bytes[..range.start()]
91 .rfind_byte(line_term)
92 .map_or(0, |i| i + 1);
93 let line_end = if range.end() > line_start && bytes[range.end() - 1] == line_term {
94 range.end()
95 } else {
96 bytes[range.end()..]
97 .find_byte(line_term)
98 .map_or(bytes.len(), |i| range.end() + i + 1)
99 };
100 Match::new(line_start, line_end)
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106
107 const SHERLOCK: &'static str = "\
108For the Doctor Watsons of this world, as opposed to the Sherlock
109Holmeses, success in the province of detective work must always
110be, to a very large extent, the result of luck. Sherlock Holmes
111can extract a clew from a wisp of straw or a flake of cigar ash;
112but Doctor Watson has to have it taken out for him and dusted,
113and exhibited clearly, with a label attached.\
114";
115
116 fn m(start: usize, end: usize) -> Match {
117 Match::new(start, end)
118 }
119
120 fn lines(text: &str) -> Vec<&str> {
121 let mut results = vec![];
122 let mut it = LineStep::new(b'\n', 0, text.len());
123 while let Some(m) = it.next_match(text.as_bytes()) {
124 results.push(&text[m]);
125 }
126 results
127 }
128
129 fn line_ranges(text: &str) -> Vec<std::ops::Range<usize>> {
130 let mut results = vec![];
131 let mut it = LineStep::new(b'\n', 0, text.len());
132 while let Some(m) = it.next_match(text.as_bytes()) {
133 results.push(m.start()..m.end());
134 }
135 results
136 }
137
138 fn loc(text: &str, start: usize, end: usize) -> Match {
139 locate(text.as_bytes(), b'\n', Match::new(start, end))
140 }
141
142 #[test]
143 fn line_count() {
144 assert_eq!(0, count(b"", b'\n'));
145 assert_eq!(1, count(b"\n", b'\n'));
146 assert_eq!(2, count(b"\n\n", b'\n'));
147 assert_eq!(2, count(b"a\nb\nc", b'\n'));
148 }
149
150 #[test]
151 fn line_locate() {
152 let t = SHERLOCK;
153 let lines = line_ranges(t);
154
155 assert_eq!(
156 loc(t, lines[0].start, lines[0].end),
157 m(lines[0].start, lines[0].end)
158 );
159 assert_eq!(
160 loc(t, lines[0].start + 1, lines[0].end),
161 m(lines[0].start, lines[0].end)
162 );
163 assert_eq!(
164 loc(t, lines[0].end - 1, lines[0].end),
165 m(lines[0].start, lines[0].end)
166 );
167 assert_eq!(
168 loc(t, lines[0].end, lines[0].end),
169 m(lines[1].start, lines[1].end)
170 );
171
172 assert_eq!(
173 loc(t, lines[5].start, lines[5].end),
174 m(lines[5].start, lines[5].end)
175 );
176 assert_eq!(
177 loc(t, lines[5].start + 1, lines[5].end),
178 m(lines[5].start, lines[5].end)
179 );
180 assert_eq!(
181 loc(t, lines[5].end - 1, lines[5].end),
182 m(lines[5].start, lines[5].end)
183 );
184 assert_eq!(
185 loc(t, lines[5].end, lines[5].end),
186 m(lines[5].start, lines[5].end)
187 );
188 }
189
190 #[test]
191 fn line_locate_weird() {
192 assert_eq!(loc("", 0, 0), m(0, 0));
193
194 assert_eq!(loc("\n", 0, 1), m(0, 1));
195 assert_eq!(loc("\n", 1, 1), m(1, 1));
196
197 assert_eq!(loc("\n\n", 0, 0), m(0, 1));
198 assert_eq!(loc("\n\n", 0, 1), m(0, 1));
199 assert_eq!(loc("\n\n", 1, 1), m(1, 2));
200 assert_eq!(loc("\n\n", 1, 2), m(1, 2));
201 assert_eq!(loc("\n\n", 2, 2), m(2, 2));
202
203 assert_eq!(loc("a\nb\nc", 0, 1), m(0, 2));
204 assert_eq!(loc("a\nb\nc", 1, 2), m(0, 2));
205 assert_eq!(loc("a\nb\nc", 2, 3), m(2, 4));
206 assert_eq!(loc("a\nb\nc", 3, 4), m(2, 4));
207 assert_eq!(loc("a\nb\nc", 4, 5), m(4, 5));
208 assert_eq!(loc("a\nb\nc", 5, 5), m(4, 5));
209 }
210
211 #[test]
212 fn line_iter() {
213 assert_eq!(lines("abc"), vec!["abc"]);
214
215 assert_eq!(lines("abc\n"), vec!["abc\n"]);
216 assert_eq!(lines("abc\nxyz"), vec!["abc\n", "xyz"]);
217 assert_eq!(lines("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
218
219 assert_eq!(lines("abc\n\n"), vec!["abc\n", "\n"]);
220 assert_eq!(lines("abc\n\n\n"), vec!["abc\n", "\n", "\n"]);
221 assert_eq!(lines("abc\n\nxyz"), vec!["abc\n", "\n", "xyz"]);
222 assert_eq!(lines("abc\n\nxyz\n"), vec!["abc\n", "\n", "xyz\n"]);
223 assert_eq!(lines("abc\nxyz\n\n"), vec!["abc\n", "xyz\n", "\n"]);
224
225 assert_eq!(lines("\n"), vec!["\n"]);
226 assert_eq!(lines(""), Vec::<&str>::new());
227 }
228
229 #[test]
230 fn line_iter_empty() {
231 let mut it = LineStep::new(b'\n', 0, 0);
232 assert_eq!(it.next_match(b"abc"), None);
233 }
234}