1use crate::location::{ByteOffset, BytePosition, LineAndCharPosition, LineOffset};
4use smallvec::{smallvec, SmallVec};
5use std::ops::Range;
6
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone)]
34#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
35pub struct LineOffsetTracker {
36 line_starts: SmallVec<[ByteOffset; 16]>,
37}
38
39impl Default for LineOffsetTracker {
40 fn default() -> Self {
41 LineOffsetTracker {
42 line_starts: smallvec![ByteOffset(0)], }
44 }
45}
46
47#[derive(Debug, Clone, PartialEq, Eq, Hash)]
49#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
50pub enum LineOffsetError {
51 EndOfInput,
53 InsideUnicodeCodepoint,
55}
56
57impl LineOffsetTracker {
58 #[inline(always)]
60 pub fn record(&mut self, line_start: ByteOffset) {
61 self.line_starts.push(line_start);
62 }
63
64 #[inline(always)]
66 pub fn append(&mut self, other: &LineOffsetTracker, offset: ByteOffset) {
67 for start in &other.line_starts[1..] {
69 self.record(offset + *start);
70 }
71 }
72
73 #[inline(always)]
75 #[must_use]
76 pub fn num_lines(&self) -> usize {
77 self.line_starts.len()
78 }
79
80 #[inline(always)]
85 fn byte_span_from_line_num(&self, num: LineOffset, max: ByteOffset) -> Range<ByteOffset> {
86 let start = self.line_starts[num.to_usize()];
87 let end = self
88 .line_starts
89 .get((num + 1).to_usize())
90 .unwrap_or(&max)
91 .min(&max);
92 start..*end
93 }
94
95 #[inline(always)]
99 fn line_num_from_byte_offset(&self, offset: ByteOffset) -> LineOffset {
100 match self.line_starts.binary_search(&offset) {
101 Err(i) => i - 1,
102 Ok(i) => i,
103 }
104 .into()
105 }
106
107 pub fn at(
115 &self,
116 source: &str,
117 BytePosition(offset): BytePosition,
118 ) -> Result<LineAndCharPosition, LineOffsetError> {
119 let full_len = source.len() as u32;
120 match offset {
121 ByteOffset(0) => Ok(LineAndCharPosition::new(0, 0)),
122 ByteOffset(n) if n > full_len => Err(LineOffsetError::EndOfInput),
123 _ => {
124 let line_num = self.line_num_from_byte_offset(offset);
125 let line_span = self.byte_span_from_line_num(line_num, source.len().into());
126 let limit = (offset - line_span.start).0 as usize;
127 let line = &source[line_span.start.0 as usize..line_span.end.0 as usize];
128 let column_num = line
129 .char_indices()
130 .enumerate()
131 .find(|(_i, (idx, _char))| idx == &limit);
132
133 match column_num {
134 None if limit == line.len() => Ok(LineAndCharPosition::new(
135 line_num.to_usize(),
136 line.char_indices().count(),
137 )),
138 None => Err(LineOffsetError::InsideUnicodeCodepoint),
139 Some((column_num, (_idx, _char))) => {
140 Ok(LineAndCharPosition::new(line_num.to_usize(), column_num))
141 }
142 }
143 }
144 }
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151
152 fn tracker_from_str(s: &str) -> LineOffsetTracker {
153 let mut tracker = LineOffsetTracker::default();
154 let mut start = 0;
155 while let Some(l) = s[start..].find('\n') {
156 let span = (start + l)..(start + l + 1);
157 tracker.record(span.end.into());
158 start += l + 1;
159 }
160 tracker
161 }
162
163 #[test]
164 fn single() {
165 let s = "1";
166 let tracker = tracker_from_str(s);
167
168 assert_eq!(tracker.num_lines(), 1);
169 assert_eq!(&s[0..1], "1");
170 assert_eq!(
171 tracker.at(s, 0.into()).unwrap(),
172 LineAndCharPosition::new(0, 0)
173 );
174 assert_eq!(
175 tracker.at(s, 1.into()).unwrap(),
176 LineAndCharPosition::new(0, 1)
177 );
178 }
179
180 #[test]
181 fn simple() {
182 let s = "01\n345";
183 let tracker = tracker_from_str(s);
184
185 assert_eq!(tracker.num_lines(), 2);
186
187 assert_eq!(&s[0..1], "0");
188 assert_eq!(
189 tracker.at(s, 0.into()).unwrap(),
190 LineAndCharPosition::new(0, 0)
191 );
192 assert_eq!(&s[1..2], "1");
193 assert_eq!(
194 tracker.at(s, 1.into()).unwrap(),
195 LineAndCharPosition::new(0, 1)
196 );
197 assert_eq!(&s[2..3], "\n");
198 assert_eq!(
199 tracker.at(s, 2.into()).unwrap(),
200 LineAndCharPosition::new(0, 2)
201 );
202 assert_eq!(&s[3..4], "3");
203 assert_eq!(
204 tracker.at(s, 3.into()).unwrap(),
205 LineAndCharPosition::new(1, 0)
206 );
207 assert_eq!(&s[4..5], "4");
208 assert_eq!(
209 tracker.at(s, 4.into()).unwrap(),
210 LineAndCharPosition::new(1, 1)
211 );
212 assert_eq!(&s[5..6], "5");
213 assert_eq!(
214 tracker.at(s, 5.into()).unwrap(),
215 LineAndCharPosition::new(1, 2)
216 );
217 assert_eq!(s.len(), 6);
218 assert_eq!(
219 tracker.at(s, 6.into()).unwrap(),
220 LineAndCharPosition::new(1, 3)
221 );
222 assert_eq!(tracker.at(s, 7.into()), Err(LineOffsetError::EndOfInput));
223 }
224
225 #[test]
226 fn append() {
227 let s = "01234\nab`de\nqr`tu";
228 let s1 = 0;
229 let s2 = s.find('`').unwrap();
230 let s3 = s.rfind('`').unwrap();
231 let s4 = s.len();
232
233 let mut tracker1 = tracker_from_str(&s[s1..s2]);
234 let mut tracker2 = tracker_from_str(&s[s2..s3]);
235 let tracker3 = tracker_from_str(&s[s3..s4]);
236
237 assert_eq!(tracker1.num_lines(), 2);
238 assert_eq!(tracker2.num_lines(), 2);
239 assert_eq!(tracker3.num_lines(), 1);
240
241 tracker2.append(&tracker3, (s3 - s2).into());
242 tracker1.append(&tracker2, (s2 - s1).into());
243
244 assert_eq!(tracker1.num_lines(), 3);
245 assert_eq!(&s[9..10], "d");
246 assert_eq!(
247 tracker1.at(s, 9.into()).unwrap(),
248 LineAndCharPosition::new(1, 3)
249 );
250 assert_eq!(&s[16..17], "u");
251 assert_eq!(
252 tracker1.at(s, 16.into()).unwrap(),
253 LineAndCharPosition::new(2, 4)
254 );
255 }
256
257 #[test]
258 fn complex() {
259 let s = "0123456789\n0123456789\n012345\n012345\n🤷\n\n";
260 let tracker = tracker_from_str(s);
261
262 assert_eq!(tracker.num_lines(), 7);
263
264 assert_eq!(
266 tracker.at(s, 0.into()).unwrap(),
267 LineAndCharPosition::new(0, 0)
268 );
269 assert_eq!(
270 tracker.at(s, s.len().into()).unwrap(),
271 LineAndCharPosition::new(6, 0)
272 );
273 assert_eq!(
274 tracker.at(s, (s.len() + 1).into()),
275 Err(LineOffsetError::EndOfInput)
276 );
277
278 let idx = s.find('2').unwrap();
280 assert_eq!(&s[idx..=idx], "2");
281 assert_eq!(
282 tracker.at(s, idx.into()).unwrap(),
283 LineAndCharPosition::new(0, 2)
284 );
285
286 let idx = 1 + idx + s[idx + 1..].find('2').unwrap();
287 assert_eq!(&s[idx..=idx], "2");
288 assert_eq!(
289 tracker.at(s, idx.into()).unwrap(),
290 LineAndCharPosition::new(1, 2)
291 );
292
293 let idx = 1 + idx + s[idx + 1..].find('2').unwrap();
294 assert_eq!(&s[idx..=idx], "2");
295 assert_eq!(
296 tracker.at(s, idx.into()).unwrap(),
297 LineAndCharPosition::new(2, 2)
298 );
299
300 let idx = 1 + idx + s[idx + 1..].find('2').unwrap();
301 assert_eq!(&s[idx..=idx], "2");
302 assert_eq!(
303 tracker.at(s, idx.into()).unwrap(),
304 LineAndCharPosition::new(3, 2)
305 );
306
307 let idx = s.find('🤷').unwrap();
308 assert_eq!(&s[idx..idx + '🤷'.len_utf8()], "🤷");
309 assert_eq!(
310 tracker.at(s, idx.into()).unwrap(),
311 LineAndCharPosition::new(4, 0)
312 );
313 }
314}