line_numbers/
lib.rs

1//! Efficiently find line numbers and line spans within a string.
2//!
3//! ```rust
4//! use line_numbers::LinePositions;
5//!
6//! let s = "foo\nbar\nbaz\n";
7//! let s_lines: Vec<_> = s.lines().collect();
8//!
9//! let line_positions = LinePositions::from(s);
10//!
11//! let offset = 5;
12//! let (line_num, column) = line_positions.from_offset(offset);
13//!
14//! println!(
15//!     "Offset {} is on line {} (column {}), and the text of that line is {:?}.",
16//!     offset,
17//!     line_num.display(),
18//!     column,
19//!     s_lines[line_num.as_usize()]
20//! );
21//! ```
22
23// The `from_offset*` methods on NewlinePositions are sensible names,
24// and the docs clippy cites:
25// https://rust-lang.github.io/api-guidelines/naming.html#ad-hoc-conversions-follow-as_-to_-into_-conventions-c-conv
26// don't actually have an opinion on `from_foo` names.
27#![allow(clippy::wrong_self_convention)]
28
29use std::cmp::Ordering;
30use std::fmt;
31
32/// A distinct number type for line numbers, to prevent confusion with
33/// other numerical data.
34///
35/// Zero-indexed internally.
36///
37/// We use a 32-bit integer, so a file cannot have more than 4 billion
38/// lines. This keeps the size of the struct small. It's common to
39/// have a lot of `LineNumber`s when analysing large files, so the
40/// struct size is more important than handling crazy big files.
41#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
42pub struct LineNumber(pub u32);
43
44impl LineNumber {
45    pub fn display(self) -> String {
46        format!("{}", self.0 + 1)
47    }
48
49    pub fn as_usize(self) -> usize {
50        self.0 as usize
51    }
52}
53
54impl fmt::Debug for LineNumber {
55    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56        write!(
57            f,
58            "LineNumber: {} (zero-indexed: {})",
59            self.display(),
60            self.0
61        )
62    }
63}
64
65impl From<u32> for LineNumber {
66    fn from(number: u32) -> Self {
67        Self(number)
68    }
69}
70
71/// A range within a single line of a string.
72#[derive(Debug, PartialEq, Clone, Copy, Eq, PartialOrd, Ord, Hash)]
73pub struct SingleLineSpan {
74    pub line: LineNumber,
75    /// Start column.
76    pub start_col: u32,
77    /// End column.
78    pub end_col: u32,
79}
80
81/// A struct for efficiently converting absolute string positions to
82/// line-relative positions.
83#[derive(Debug)]
84pub struct LinePositions {
85    /// A vector of the start and end positions (in bytes) of all the
86    /// lines in a string. Positions include the newline character
87    /// itself.
88    positions: Vec<(usize, usize)>,
89}
90
91impl From<&str> for LinePositions {
92    fn from(s: &str) -> Self {
93        let mut line_start = 0;
94        let mut positions = vec![];
95        for line in s.split('\n') {
96            let line_end = line_start + line.len() + "\n".len();
97            // TODO: this assumes lines terminate with \n, not \r\n.
98            positions.push((line_start, line_end - 1));
99            line_start = line_end;
100        }
101
102        LinePositions { positions }
103    }
104}
105
106impl LinePositions {
107    /// Return the line and column corresponding to this
108    /// `offset`. `offset` is measured in bytes.
109    ///
110    /// # Panics
111    ///
112    /// Panics if `offset` is out of bounds.
113    pub fn from_offset(&self, offset: usize) -> (LineNumber, usize) {
114        if let Some((_, s_end)) = self.positions.last() {
115            assert!(
116                offset <= *s_end,
117                "Offset {} is out of bounds for a string of length {}",
118                offset,
119                s_end
120            );
121        }
122
123        let idx = self
124            .positions
125            .binary_search_by(|(line_start, line_end)| {
126                if *line_end < offset {
127                    return Ordering::Less;
128                }
129                if *line_start > offset {
130                    return Ordering::Greater;
131                }
132
133                Ordering::Equal
134            })
135            .expect("line should be present");
136
137        let (line_start_offset, _) = self.positions.get(idx).unwrap();
138        let column = offset - line_start_offset;
139
140        (LineNumber::from(idx as u32), column)
141    }
142
143    /// Convert this region into line spans. If the region includes a
144    /// newline, the vec will contain multiple items.
145    ///
146    /// # Panics
147    ///
148    /// Panics if `region_start` or `region_end` are out of bounds, or
149    /// if `region_start` is greater than `region_end`.
150    pub fn from_region(&self, region_start: usize, region_end: usize) -> Vec<SingleLineSpan> {
151        assert!(region_start <= region_end);
152
153        let (first_line, _) = self.from_offset(region_start);
154        let (last_line, _) = self.from_offset(region_end);
155
156        let mut res = vec![];
157        for idx in first_line.0..=last_line.0 {
158            let (line_start, line_end) = self.positions[idx as usize];
159            res.push(SingleLineSpan {
160                line: idx.into(),
161                start_col: region_start.saturating_sub(line_start) as u32,
162                end_col: if region_end < line_end {
163                    region_end - line_start
164                } else {
165                    line_end - line_start
166                } as u32,
167            });
168        }
169
170        res
171    }
172
173    /// Given a region in the current LinePositions, convert it to be
174    /// relative to a `start` offset in a larger, enclosing string.
175    ///
176    /// # Panics
177    ///
178    /// Panics if `region_start` or `region_end` are out of bounds, or
179    /// if `region_start` is greater than `region_end`.
180    pub fn from_region_relative_to(
181        &self,
182        start: SingleLineSpan,
183        region_start: usize,
184        region_end: usize,
185    ) -> Vec<SingleLineSpan> {
186        assert!(region_start <= region_end);
187
188        let mut res = vec![];
189        for pos in self.from_region(region_start, region_end) {
190            if pos.line.0 == 0 {
191                res.push(SingleLineSpan {
192                    line: (pos.line.0 + start.line.0).into(),
193                    // On the first line of the inner string, the
194                    // inner column offset may not match the column
195                    // offset of the enclosing string.
196                    start_col: pos.start_col + start.start_col,
197                    end_col: pos.end_col + start.start_col,
198                });
199            } else {
200                res.push(SingleLineSpan {
201                    line: (pos.line.0 + start.line.0).into(),
202                    // On later lines in the inner string, since we've
203                    // seen a newline, we know the column offsets are
204                    // the same as the enclosing string.
205                    start_col: pos.start_col,
206                    end_col: pos.end_col,
207                });
208            }
209        }
210
211        res
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn test_display_one_indexed() {
221        let ln = LineNumber(0);
222        assert_eq!(ln.display(), "1");
223    }
224
225    #[test]
226    fn from_offset() {
227        let newline_positions: LinePositions = "foo\nbar".into();
228        let offset = 5;
229
230        let (line, column) = newline_positions.from_offset(offset);
231        assert_eq!(line.as_usize(), 1);
232        assert_eq!(column, 1);
233    }
234
235    #[test]
236    fn from_region_first_line() {
237        let newline_positions: LinePositions = "foo".into();
238        let line_spans = newline_positions.from_region(1, 3);
239        assert_eq!(
240            line_spans,
241            vec![SingleLineSpan {
242                line: 0.into(),
243                start_col: 1,
244                end_col: 3
245            }]
246        );
247    }
248
249    #[test]
250    fn from_region_first_char() {
251        let newline_positions: LinePositions = "foo".into();
252        let line_spans = newline_positions.from_region(0, 0);
253        assert_eq!(
254            line_spans,
255            vec![SingleLineSpan {
256                line: 0.into(),
257                start_col: 0,
258                end_col: 0
259            }]
260        );
261    }
262
263    #[test]
264    fn from_region_split_over_multiple_lines() {
265        let newline_positions: LinePositions = "foo\nbar\nbaz\naaaaaaaaaaa".into();
266        let line_spans = newline_positions.from_region(5, 10);
267
268        assert_eq!(
269            line_spans,
270            vec![
271                SingleLineSpan {
272                    line: 1.into(),
273                    start_col: 1,
274                    end_col: 3
275                },
276                SingleLineSpan {
277                    line: 2.into(),
278                    start_col: 0,
279                    end_col: 2
280                }
281            ]
282        );
283    }
284
285    #[test]
286    fn from_region_relative_to() {
287        let newline_positions: LinePositions = "foo\nbar".into();
288
289        let pos = SingleLineSpan {
290            line: 100.into(),
291            start_col: 1,
292            end_col: 1,
293        };
294
295        let line_spans = newline_positions.from_region_relative_to(pos, 1, 7);
296        assert_eq!(
297            line_spans,
298            vec![
299                SingleLineSpan {
300                    line: 100.into(),
301                    start_col: 2,
302                    end_col: 4
303                },
304                SingleLineSpan {
305                    line: 101.into(),
306                    start_col: 0,
307                    end_col: 3
308                }
309            ]
310        );
311    }
312
313    #[test]
314    #[should_panic(expected = "out of bounds for a string")]
315    fn test_from_offset_out_of_bounds() {
316        let newline_positions: LinePositions = "foo".into();
317        let _ = newline_positions.from_offset(4);
318    }
319}