1use bstr::ByteSlice;
6
7use crate::matcher::{LineTerminator, Match};
8
9#[derive(Debug)]
17pub struct LineStep {
18 line_term: u8,
19 pos: usize,
20 end: usize,
21}
22
23impl LineStep {
24 pub fn new(line_term: u8, start: usize, end: usize) -> LineStep {
27 LineStep {
28 line_term,
29 pos: start,
30 end,
31 }
32 }
33
34 #[inline(always)]
36 pub fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
37 self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
38 }
39
40 #[inline(always)]
41 fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
42 bytes = &bytes[..self.end];
43 match bytes[self.pos..].find_byte(self.line_term) {
44 None => {
45 if self.pos < bytes.len() {
46 let m = (self.pos, bytes.len());
47 assert!(m.0 <= m.1);
48
49 self.pos = m.1;
50 Some(m)
51 } else {
52 None
53 }
54 }
55 Some(line_end) => {
56 let m = (self.pos, self.pos + line_end + 1);
57 assert!(m.0 <= m.1);
58
59 self.pos = m.1;
60 Some(m)
61 }
62 }
63 }
64}
65
66pub fn count(bytes: &[u8], line_term: u8) -> u64 {
68 memchr::memchr_iter(line_term, bytes).count() as u64
69}
70
71#[inline(always)]
74pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
75 let line_term = line_term.as_bytes();
76 let start = bytes.len().saturating_sub(line_term.len());
77 if bytes.get(start..) == Some(line_term) {
78 return &bytes[..bytes.len() - line_term.len()];
79 }
80 bytes
81}
82
83#[inline(always)]
88pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
89 let line_start = bytes[..range.start()]
90 .rfind_byte(line_term)
91 .map_or(0, |i| i + 1);
92 let line_end = if range.end() > line_start && bytes[range.end() - 1] == line_term {
93 range.end()
94 } else {
95 bytes[range.end()..]
96 .find_byte(line_term)
97 .map_or(bytes.len(), |i| range.end() + i + 1)
98 };
99 Match::new(line_start, line_end)
100}
101
102#[cfg(test)]
103mod tests {
104 use super::*;
105
106 const SHERLOCK: &'static str = "\
107For the Doctor Watsons of this world, as opposed to the Sherlock
108Holmeses, success in the province of detective work must always
109be, to a very large extent, the result of luck. Sherlock Holmes
110can extract a clew from a wisp of straw or a flake of cigar ash;
111but Doctor Watson has to have it taken out for him and dusted,
112and exhibited clearly, with a label attached.\
113";
114
115 fn m(start: usize, end: usize) -> Match {
116 Match::new(start, end)
117 }
118
119 fn lines(text: &str) -> Vec<&str> {
120 let mut results = vec![];
121 let mut it = LineStep::new(b'\n', 0, text.len());
122 while let Some(m) = it.next_match(text.as_bytes()) {
123 results.push(&text[m]);
124 }
125 results
126 }
127
128 fn line_ranges(text: &str) -> Vec<std::ops::Range<usize>> {
129 let mut results = vec![];
130 let mut it = LineStep::new(b'\n', 0, text.len());
131 while let Some(m) = it.next_match(text.as_bytes()) {
132 results.push(m.start()..m.end());
133 }
134 results
135 }
136
137 fn loc(text: &str, start: usize, end: usize) -> Match {
138 locate(text.as_bytes(), b'\n', Match::new(start, end))
139 }
140
141 #[test]
142 fn line_count() {
143 assert_eq!(0, count(b"", b'\n'));
144 assert_eq!(1, count(b"\n", b'\n'));
145 assert_eq!(2, count(b"\n\n", b'\n'));
146 assert_eq!(2, count(b"a\nb\nc", b'\n'));
147 }
148
149 #[test]
150 fn line_locate() {
151 let t = SHERLOCK;
152 let lines = line_ranges(t);
153
154 assert_eq!(
155 loc(t, lines[0].start, lines[0].end),
156 m(lines[0].start, lines[0].end)
157 );
158 assert_eq!(
159 loc(t, lines[0].start + 1, lines[0].end),
160 m(lines[0].start, lines[0].end)
161 );
162 assert_eq!(
163 loc(t, lines[0].end - 1, lines[0].end),
164 m(lines[0].start, lines[0].end)
165 );
166 assert_eq!(
167 loc(t, lines[0].end, lines[0].end),
168 m(lines[1].start, lines[1].end)
169 );
170
171 assert_eq!(
172 loc(t, lines[5].start, lines[5].end),
173 m(lines[5].start, lines[5].end)
174 );
175 assert_eq!(
176 loc(t, lines[5].start + 1, lines[5].end),
177 m(lines[5].start, lines[5].end)
178 );
179 assert_eq!(
180 loc(t, lines[5].end - 1, lines[5].end),
181 m(lines[5].start, lines[5].end)
182 );
183 assert_eq!(
184 loc(t, lines[5].end, lines[5].end),
185 m(lines[5].start, lines[5].end)
186 );
187 }
188
189 #[test]
190 fn line_locate_weird() {
191 assert_eq!(loc("", 0, 0), m(0, 0));
192
193 assert_eq!(loc("\n", 0, 1), m(0, 1));
194 assert_eq!(loc("\n", 1, 1), m(1, 1));
195
196 assert_eq!(loc("\n\n", 0, 0), m(0, 1));
197 assert_eq!(loc("\n\n", 0, 1), m(0, 1));
198 assert_eq!(loc("\n\n", 1, 1), m(1, 2));
199 assert_eq!(loc("\n\n", 1, 2), m(1, 2));
200 assert_eq!(loc("\n\n", 2, 2), m(2, 2));
201
202 assert_eq!(loc("a\nb\nc", 0, 1), m(0, 2));
203 assert_eq!(loc("a\nb\nc", 1, 2), m(0, 2));
204 assert_eq!(loc("a\nb\nc", 2, 3), m(2, 4));
205 assert_eq!(loc("a\nb\nc", 3, 4), m(2, 4));
206 assert_eq!(loc("a\nb\nc", 4, 5), m(4, 5));
207 assert_eq!(loc("a\nb\nc", 5, 5), m(4, 5));
208 }
209
210 #[test]
211 fn line_iter() {
212 assert_eq!(lines("abc"), vec!["abc"]);
213
214 assert_eq!(lines("abc\n"), vec!["abc\n"]);
215 assert_eq!(lines("abc\nxyz"), vec!["abc\n", "xyz"]);
216 assert_eq!(lines("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
217
218 assert_eq!(lines("abc\n\n"), vec!["abc\n", "\n"]);
219 assert_eq!(lines("abc\n\n\n"), vec!["abc\n", "\n", "\n"]);
220 assert_eq!(lines("abc\n\nxyz"), vec!["abc\n", "\n", "xyz"]);
221 assert_eq!(lines("abc\n\nxyz\n"), vec!["abc\n", "\n", "xyz\n"]);
222 assert_eq!(lines("abc\nxyz\n\n"), vec!["abc\n", "xyz\n", "\n"]);
223
224 assert_eq!(lines("\n"), vec!["\n"]);
225 assert_eq!(lines(""), Vec::<&str>::new());
226 }
227
228 #[test]
229 fn line_iter_empty() {
230 let mut it = LineStep::new(b'\n', 0, 0);
231 assert_eq!(it.next_match(b"abc"), None);
232 }
233}