Skip to main content

markdown_syntax/
span.rs

1//! Source locations: byte-offset [`Span`]s and their [`LineIndex`] translation
2//! into human-readable line/column [`LinePosition`]s.
3
4use alloc::vec::Vec;
5
6/// A half-open byte range `start..end` into the original source string. These are
7/// absolute UTF-8 byte offsets from the start of the document (not line/column);
8/// use [`LineIndex`] to translate an offset into a line and column.
9#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
10pub struct Span {
11    /// Inclusive start byte offset.
12    pub start: usize,
13    /// Exclusive end byte offset (one past the last byte).
14    pub end: usize,
15}
16
17impl Span {
18    /// Construct a span from a start and end byte offset.
19    pub const fn new(start: usize, end: usize) -> Self {
20        Self { start, end }
21    }
22
23    /// The length of the span in bytes (`0` if `end <= start`).
24    pub const fn len(self) -> usize {
25        self.end.saturating_sub(self.start)
26    }
27
28    /// Whether the span covers zero bytes.
29    pub const fn is_empty(self) -> bool {
30        self.start == self.end
31    }
32
33    /// Whether `other` lies entirely within this span.
34    pub const fn contains(self, other: Span) -> bool {
35        self.start <= other.start && other.end <= self.end
36    }
37
38    /// Whether `start <= end` (a well-formed range).
39    pub const fn is_valid(self) -> bool {
40        self.start <= self.end
41    }
42}
43
44/// A 1-based line and column, derived from a byte offset by [`LineIndex`].
45#[derive(Clone, Copy, Debug, Eq, PartialEq)]
46pub struct LinePosition {
47    /// 1-based line number.
48    pub line: usize,
49    /// 1-based column number (counted in bytes from the line start).
50    pub column: usize,
51}
52
53/// A precomputed map from byte offsets to line/column positions for one source
54/// string. Build it once with [`LineIndex::new`], then query repeatedly.
55#[derive(Clone, Debug, Eq, PartialEq)]
56pub struct LineIndex {
57    line_starts: Vec<usize>,
58    len: usize,
59}
60
61impl LineIndex {
62    /// Build a line index for `source`, scanning its line breaks
63    /// (`\n`, `\r`, and `\r\n`).
64    pub fn new(source: &str) -> Self {
65        let bytes = source.as_bytes();
66        let mut starts = Vec::new();
67        starts.push(0);
68
69        let mut index = 0;
70        while index < bytes.len() {
71            match bytes[index] {
72                b'\r' => {
73                    if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
74                        index += 2;
75                    } else {
76                        index += 1;
77                    }
78                    starts.push(index);
79                }
80                b'\n' => {
81                    index += 1;
82                    starts.push(index);
83                }
84                _ => index += 1,
85            }
86        }
87
88        Self {
89            line_starts: starts,
90            len: source.len(),
91        }
92    }
93
94    /// Translate a byte `offset` into its 1-based line and column (clamped to the
95    /// end of the source).
96    pub fn position(&self, offset: usize) -> LinePosition {
97        let offset = offset.min(self.len);
98        let line_index = match self.line_starts.binary_search(&offset) {
99            Ok(index) => index,
100            Err(index) => index.saturating_sub(1),
101        };
102        let line_start = self.line_starts[line_index];
103
104        LinePosition {
105            line: line_index + 1,
106            column: offset.saturating_sub(line_start) + 1,
107        }
108    }
109
110    /// Translate a [`Span`] into its start and end [`LinePosition`]s.
111    pub fn span(&self, span: Span) -> (LinePosition, LinePosition) {
112        (self.position(span.start), self.position(span.end))
113    }
114}