partiql_source_map/
line_offset_tracker.rs

1//! [`LineOffsetTracker`] and related types for mapping locations in source `str`s.
2
3use crate::location::{ByteOffset, BytePosition, LineAndCharPosition, LineOffset};
4use smallvec::{smallvec, SmallVec};
5use std::ops::Range;
6
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9
10/// Keeps track of source offsets of newlines for the purposes of later calculating
11/// line and column information
12///
13///
14/// ## Example
15///
16/// ```rust
17/// use partiql_source_map::location::{ByteOffset, LineAndCharPosition};
18/// use partiql_source_map::line_offset_tracker::{LineOffsetError, LineOffsetTracker};
19///
20/// let source = "12345\n789012345\n789012345\n789012345";
21/// let mut tracker = LineOffsetTracker::default();
22/// tracker.record(6.into());
23/// tracker.record(16.into());
24/// tracker.record(26.into());
25///
26/// // We added 3 newlines, so there should be 4 lines of source
27/// assert_eq!(tracker.num_lines(), 4);
28/// assert_eq!(tracker.at(source, ByteOffset(0).into()), Ok(LineAndCharPosition::new(0,0)));
29/// assert_eq!(tracker.at(source, ByteOffset(6).into()), Ok(LineAndCharPosition::new(1,0)));
30/// assert_eq!(tracker.at(source, ByteOffset(30).into()), Ok(LineAndCharPosition::new(3,4)));
31/// assert_eq!(tracker.at(source, ByteOffset(300).into()), Err(LineOffsetError::EndOfInput));
32/// ```
33#[derive(Debug, Clone)]
34#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
35pub struct LineOffsetTracker {
36    line_starts: SmallVec<[ByteOffset; 16]>,
37}
38
39impl Default for LineOffsetTracker {
40    fn default() -> Self {
41        LineOffsetTracker {
42            line_starts: smallvec![ByteOffset(0)], // line 1 starts at offset `0`
43        }
44    }
45}
46
47/// Errors that can be encountered when indexing by byte offset.
48#[derive(Debug, Clone, PartialEq, Eq, Hash)]
49#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
50pub enum LineOffsetError {
51    /// Requested `offset` is past end of input
52    EndOfInput,
53    /// Requested `offset` falls inside a unicode codepoint
54    InsideUnicodeCodepoint,
55}
56
57impl LineOffsetTracker {
58    /// Record a newline at `span` in the source
59    #[inline(always)]
60    pub fn record(&mut self, line_start: ByteOffset) {
61        self.line_starts.push(line_start);
62    }
63
64    /// Append the line starts from another [`LineOffsetTracker`] to this one, adding `offset` to each.
65    #[inline(always)]
66    pub fn append(&mut self, other: &LineOffsetTracker, offset: ByteOffset) {
67        // skip the first offset in `other`; it is the `0` added by `LineOffsetTracker::default()`
68        for start in &other.line_starts[1..] {
69            self.record(offset + *start);
70        }
71    }
72
73    /// Calculate the number of lines of source seen so far.
74    #[inline(always)]
75    #[must_use]
76    pub fn num_lines(&self) -> usize {
77        self.line_starts.len()
78    }
79
80    /// Calculates the byte offset span ([`Range`]) of a line.
81    ///
82    /// `num` is the line number (0-indexed) for which to  calculate the span
83    /// `max` is the largest value allowable in the returned [`Range's end`](core::ops::Range)
84    #[inline(always)]
85    fn byte_span_from_line_num(&self, num: LineOffset, max: ByteOffset) -> Range<ByteOffset> {
86        let start = self.line_starts[num.to_usize()];
87        let end = self
88            .line_starts
89            .get((num + 1).to_usize())
90            .unwrap_or(&max)
91            .min(&max);
92        start..*end
93    }
94
95    /// Calculates the line number (0-indexed) in which a byte offset is contained.
96    ///
97    /// `offset` is the byte offset
98    #[inline(always)]
99    fn line_num_from_byte_offset(&self, offset: ByteOffset) -> LineOffset {
100        match self.line_starts.binary_search(&offset) {
101            Err(i) => i - 1,
102            Ok(i) => i,
103        }
104        .into()
105    }
106
107    /// Calculates a [`LineAndCharPosition`] for a byte offset from the given `&str`
108    ///
109    /// `source` is source `&str` into which the byte offset applies
110    /// `offset` is the byte offset for which to find the [`LineAndCharPosition`]
111    ///
112    /// If `offset` is larger than `source.len()`, then [`LineOffsetError::EndOfInput`] is returned
113    /// If `offset` is in the middle of a unicode codepoint, then [`LineOffsetError::InsideUnicodeCodepoint`] is returned
114    pub fn at(
115        &self,
116        source: &str,
117        BytePosition(offset): BytePosition,
118    ) -> Result<LineAndCharPosition, LineOffsetError> {
119        let full_len = source.len() as u32;
120        match offset {
121            ByteOffset(0) => Ok(LineAndCharPosition::new(0, 0)),
122            ByteOffset(n) if n > full_len => Err(LineOffsetError::EndOfInput),
123            _ => {
124                let line_num = self.line_num_from_byte_offset(offset);
125                let line_span = self.byte_span_from_line_num(line_num, source.len().into());
126                let limit = (offset - line_span.start).0 as usize;
127                let line = &source[line_span.start.0 as usize..line_span.end.0 as usize];
128                let column_num = line
129                    .char_indices()
130                    .enumerate()
131                    .find(|(_i, (idx, _char))| idx == &limit);
132
133                match column_num {
134                    None if limit == line.len() => Ok(LineAndCharPosition::new(
135                        line_num.to_usize(),
136                        line.char_indices().count(),
137                    )),
138                    None => Err(LineOffsetError::InsideUnicodeCodepoint),
139                    Some((column_num, (_idx, _char))) => {
140                        Ok(LineAndCharPosition::new(line_num.to_usize(), column_num))
141                    }
142                }
143            }
144        }
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    fn tracker_from_str(s: &str) -> LineOffsetTracker {
153        let mut tracker = LineOffsetTracker::default();
154        let mut start = 0;
155        while let Some(l) = s[start..].find('\n') {
156            let span = (start + l)..(start + l + 1);
157            tracker.record(span.end.into());
158            start += l + 1;
159        }
160        tracker
161    }
162
163    #[test]
164    fn single() {
165        let s = "1";
166        let tracker = tracker_from_str(s);
167
168        assert_eq!(tracker.num_lines(), 1);
169        assert_eq!(&s[0..1], "1");
170        assert_eq!(
171            tracker.at(s, 0.into()).unwrap(),
172            LineAndCharPosition::new(0, 0)
173        );
174        assert_eq!(
175            tracker.at(s, 1.into()).unwrap(),
176            LineAndCharPosition::new(0, 1)
177        );
178    }
179
180    #[test]
181    fn simple() {
182        let s = "01\n345";
183        let tracker = tracker_from_str(s);
184
185        assert_eq!(tracker.num_lines(), 2);
186
187        assert_eq!(&s[0..1], "0");
188        assert_eq!(
189            tracker.at(s, 0.into()).unwrap(),
190            LineAndCharPosition::new(0, 0)
191        );
192        assert_eq!(&s[1..2], "1");
193        assert_eq!(
194            tracker.at(s, 1.into()).unwrap(),
195            LineAndCharPosition::new(0, 1)
196        );
197        assert_eq!(&s[2..3], "\n");
198        assert_eq!(
199            tracker.at(s, 2.into()).unwrap(),
200            LineAndCharPosition::new(0, 2)
201        );
202        assert_eq!(&s[3..4], "3");
203        assert_eq!(
204            tracker.at(s, 3.into()).unwrap(),
205            LineAndCharPosition::new(1, 0)
206        );
207        assert_eq!(&s[4..5], "4");
208        assert_eq!(
209            tracker.at(s, 4.into()).unwrap(),
210            LineAndCharPosition::new(1, 1)
211        );
212        assert_eq!(&s[5..6], "5");
213        assert_eq!(
214            tracker.at(s, 5.into()).unwrap(),
215            LineAndCharPosition::new(1, 2)
216        );
217        assert_eq!(s.len(), 6);
218        assert_eq!(
219            tracker.at(s, 6.into()).unwrap(),
220            LineAndCharPosition::new(1, 3)
221        );
222        assert_eq!(tracker.at(s, 7.into()), Err(LineOffsetError::EndOfInput));
223    }
224
225    #[test]
226    fn append() {
227        let s = "01234\nab`de\nqr`tu";
228        let s1 = 0;
229        let s2 = s.find('`').unwrap();
230        let s3 = s.rfind('`').unwrap();
231        let s4 = s.len();
232
233        let mut tracker1 = tracker_from_str(&s[s1..s2]);
234        let mut tracker2 = tracker_from_str(&s[s2..s3]);
235        let tracker3 = tracker_from_str(&s[s3..s4]);
236
237        assert_eq!(tracker1.num_lines(), 2);
238        assert_eq!(tracker2.num_lines(), 2);
239        assert_eq!(tracker3.num_lines(), 1);
240
241        tracker2.append(&tracker3, (s3 - s2).into());
242        tracker1.append(&tracker2, (s2 - s1).into());
243
244        assert_eq!(tracker1.num_lines(), 3);
245        assert_eq!(&s[9..10], "d");
246        assert_eq!(
247            tracker1.at(s, 9.into()).unwrap(),
248            LineAndCharPosition::new(1, 3)
249        );
250        assert_eq!(&s[16..17], "u");
251        assert_eq!(
252            tracker1.at(s, 16.into()).unwrap(),
253            LineAndCharPosition::new(2, 4)
254        );
255    }
256
257    #[test]
258    fn complex() {
259        let s = "0123456789\n0123456789\n012345\n012345\n🤷\n\n";
260        let tracker = tracker_from_str(s);
261
262        assert_eq!(tracker.num_lines(), 7);
263
264        // boundaries
265        assert_eq!(
266            tracker.at(s, 0.into()).unwrap(),
267            LineAndCharPosition::new(0, 0)
268        );
269        assert_eq!(
270            tracker.at(s, s.len().into()).unwrap(),
271            LineAndCharPosition::new(6, 0)
272        );
273        assert_eq!(
274            tracker.at(s, (s.len() + 1).into()),
275            Err(LineOffsetError::EndOfInput)
276        );
277
278        //lines
279        let idx = s.find('2').unwrap();
280        assert_eq!(&s[idx..=idx], "2");
281        assert_eq!(
282            tracker.at(s, idx.into()).unwrap(),
283            LineAndCharPosition::new(0, 2)
284        );
285
286        let idx = 1 + idx + s[idx + 1..].find('2').unwrap();
287        assert_eq!(&s[idx..=idx], "2");
288        assert_eq!(
289            tracker.at(s, idx.into()).unwrap(),
290            LineAndCharPosition::new(1, 2)
291        );
292
293        let idx = 1 + idx + s[idx + 1..].find('2').unwrap();
294        assert_eq!(&s[idx..=idx], "2");
295        assert_eq!(
296            tracker.at(s, idx.into()).unwrap(),
297            LineAndCharPosition::new(2, 2)
298        );
299
300        let idx = 1 + idx + s[idx + 1..].find('2').unwrap();
301        assert_eq!(&s[idx..=idx], "2");
302        assert_eq!(
303            tracker.at(s, idx.into()).unwrap(),
304            LineAndCharPosition::new(3, 2)
305        );
306
307        let idx = s.find('🤷').unwrap();
308        assert_eq!(&s[idx..idx + '🤷'.len_utf8()], "🤷");
309        assert_eq!(
310            tracker.at(s, idx.into()).unwrap(),
311            LineAndCharPosition::new(4, 0)
312        );
313    }
314}