markdown_syntax/span.rs
1//! Source locations: byte-offset [`Span`]s and their [`LineIndex`] translation
2//! into human-readable line/column [`LinePosition`]s.
3
4use alloc::vec::Vec;
5
6/// A half-open byte range `start..end` into the original source string. These are
7/// absolute UTF-8 byte offsets from the start of the document (not line/column);
8/// use [`LineIndex`] to translate an offset into a line and column.
9#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
10pub struct Span {
11 /// Inclusive start byte offset.
12 pub start: usize,
13 /// Exclusive end byte offset (one past the last byte).
14 pub end: usize,
15}
16
17impl Span {
18 /// Construct a span from a start and end byte offset.
19 pub const fn new(start: usize, end: usize) -> Self {
20 Self { start, end }
21 }
22
23 /// The length of the span in bytes (`0` if `end <= start`).
24 pub const fn len(self) -> usize {
25 self.end.saturating_sub(self.start)
26 }
27
28 /// Whether the span covers zero bytes.
29 pub const fn is_empty(self) -> bool {
30 self.start == self.end
31 }
32
33 /// Whether `other` lies entirely within this span.
34 pub const fn contains(self, other: Span) -> bool {
35 self.start <= other.start && other.end <= self.end
36 }
37
38 /// Whether `start <= end` (a well-formed range).
39 pub const fn is_valid(self) -> bool {
40 self.start <= self.end
41 }
42}
43
44/// A 1-based line and column, derived from a byte offset by [`LineIndex`].
45#[derive(Clone, Copy, Debug, Eq, PartialEq)]
46pub struct LinePosition {
47 /// 1-based line number.
48 pub line: usize,
49 /// 1-based column number (counted in bytes from the line start).
50 pub column: usize,
51}
52
53/// A precomputed map from byte offsets to line/column positions for one source
54/// string. Build it once with [`LineIndex::new`], then query repeatedly.
55#[derive(Clone, Debug, Eq, PartialEq)]
56pub struct LineIndex {
57 line_starts: Vec<usize>,
58 len: usize,
59}
60
61impl LineIndex {
62 /// Build a line index for `source`, scanning its line breaks
63 /// (`\n`, `\r`, and `\r\n`).
64 pub fn new(source: &str) -> Self {
65 let bytes = source.as_bytes();
66 let mut starts = Vec::new();
67 starts.push(0);
68
69 let mut index = 0;
70 while index < bytes.len() {
71 match bytes[index] {
72 b'\r' => {
73 if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
74 index += 2;
75 } else {
76 index += 1;
77 }
78 starts.push(index);
79 }
80 b'\n' => {
81 index += 1;
82 starts.push(index);
83 }
84 _ => index += 1,
85 }
86 }
87
88 Self {
89 line_starts: starts,
90 len: source.len(),
91 }
92 }
93
94 /// Translate a byte `offset` into its 1-based line and column (clamped to the
95 /// end of the source).
96 pub fn position(&self, offset: usize) -> LinePosition {
97 let offset = offset.min(self.len);
98 let line_index = match self.line_starts.binary_search(&offset) {
99 Ok(index) => index,
100 Err(index) => index.saturating_sub(1),
101 };
102 let line_start = self.line_starts[line_index];
103
104 LinePosition {
105 line: line_index + 1,
106 column: offset.saturating_sub(line_start) + 1,
107 }
108 }
109
110 /// Translate a [`Span`] into its start and end [`LinePosition`]s.
111 pub fn span(&self, span: Span) -> (LinePosition, LinePosition) {
112 (self.position(span.start), self.position(span.end))
113 }
114}