Skip to main content

php_rs_parser/
source_map.rs

1/// Maps byte offsets (as used in [`Span`]) to line/column positions.
2///
3/// Build once per source file, then query as many offsets as needed in O(1) each.
4///
5/// Lines and columns are **0-based** — the LSP convention. Call
6/// [`LineCol::to_one_based`] if you need 1-based positions.
7///
8/// # Example
9///
10/// ```
11/// use php_rs_parser::source_map::{SourceMap, LineCol};
12///
13/// let src = "<?php\necho 'hi';\n";
14/// let map = SourceMap::new(src);
15///
16/// assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
17/// assert_eq!(map.offset_to_line_col(6), LineCol { line: 1, col: 0 });
18/// ```
19use php_ast::Span;
20
21/// A 0-based line/column position.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
23pub struct LineCol {
24    /// 0-based line number.
25    pub line: u32,
26    /// 0-based UTF-8 byte column offset within the line.
27    pub col: u32,
28}
29
30impl LineCol {
31    /// Convert to 1-based line and column (e.g. for human-readable diagnostics).
32    pub fn to_one_based(self) -> (u32, u32) {
33        (self.line + 1, self.col + 1)
34    }
35}
36
37/// A line/column range corresponding to a [`Span`].
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub struct LineColSpan {
40    pub start: LineCol,
41    pub end: LineCol,
42}
43
44/// Pre-computed index of line-start byte offsets for a source string.
45///
46/// Construction is O(n) in the source length. Each lookup is O(log n) in the
47/// number of lines (binary search), which is effectively O(1) for typical files.
48pub struct SourceMap {
49    /// Byte offset of the start of each line. `line_starts[0]` is always 0.
50    line_starts: Vec<u32>,
51}
52
53impl SourceMap {
54    /// Build an index from the given source text.
55    pub fn new(source: &str) -> Self {
56        let mut line_starts = Vec::with_capacity(source.len() / 40 + 1);
57        line_starts.push(0u32);
58        for pos in memchr::memchr_iter(b'\n', source.as_bytes()) {
59            line_starts.push((pos + 1) as u32);
60        }
61        Self { line_starts }
62    }
63
64    /// A no-op map for callers that never query line/column positions.
65    /// Returned by [`parse_arena_raw`](crate::parse_arena_raw).
66    /// `offset_to_line_col` on an empty map returns `LineCol { line: 0, col: offset }`.
67    pub fn empty() -> Self {
68        Self {
69            line_starts: vec![0u32],
70        }
71    }
72
73    /// Total number of lines in the source.
74    pub fn line_count(&self) -> usize {
75        self.line_starts.len()
76    }
77
78    /// Byte offset where the given 0-based line starts.
79    /// Returns `None` if the line is out of range.
80    pub fn line_start(&self, line: u32) -> Option<u32> {
81        self.line_starts.get(line as usize).copied()
82    }
83
84    /// Convert a byte offset to a 0-based line/column.
85    ///
86    /// If `offset` is past the end of the source, the position is clamped to
87    /// the last line.
88    pub fn offset_to_line_col(&self, offset: u32) -> LineCol {
89        let line = match self.line_starts.binary_search(&offset) {
90            Ok(exact) => exact,
91            Err(after) => after - 1,
92        };
93        let col = offset - self.line_starts[line];
94        LineCol {
95            line: line as u32,
96            col,
97        }
98    }
99
100    /// Convert a [`Span`] to a start/end [`LineColSpan`].
101    pub fn span_to_line_col(&self, span: Span) -> LineColSpan {
102        LineColSpan {
103            start: self.offset_to_line_col(span.start),
104            end: self.offset_to_line_col(span.end),
105        }
106    }
107
108    /// Convert a 0-based line/column back to a byte offset.
109    /// Returns `None` if the line is out of range.
110    pub fn line_col_to_offset(&self, lc: LineCol) -> Option<u32> {
111        self.line_starts
112            .get(lc.line as usize)
113            .map(|start| start + lc.col)
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    #[test]
122    fn empty_source() {
123        let map = SourceMap::new("");
124        assert_eq!(map.line_count(), 1);
125        assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
126    }
127
128    #[test]
129    fn single_line_no_newline() {
130        let map = SourceMap::new("<?php echo 1;");
131        assert_eq!(map.line_count(), 1);
132        assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
133        assert_eq!(map.offset_to_line_col(6), LineCol { line: 0, col: 6 });
134    }
135
136    #[test]
137    fn multiple_lines() {
138        let src = "<?php\necho 'hi';\nreturn;\n";
139        let map = SourceMap::new(src);
140        assert_eq!(map.line_count(), 4); // 3 lines + trailing empty line after last \n
141
142        // First char of line 0
143        assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
144        // First char of line 1
145        assert_eq!(map.offset_to_line_col(6), LineCol { line: 1, col: 0 });
146        // 'e' of echo on line 1
147        assert_eq!(map.offset_to_line_col(6), LineCol { line: 1, col: 0 });
148        // First char of line 2
149        assert_eq!(map.offset_to_line_col(17), LineCol { line: 2, col: 0 });
150    }
151
152    #[test]
153    fn span_conversion() {
154        let src = "<?php\necho 'hi';\n";
155        let map = SourceMap::new(src);
156        let span = Span::new(6, 10); // "echo"
157        let lc = map.span_to_line_col(span);
158        assert_eq!(lc.start, LineCol { line: 1, col: 0 });
159        assert_eq!(lc.end, LineCol { line: 1, col: 4 });
160    }
161
162    #[test]
163    fn round_trip() {
164        let src = "<?php\necho 'hi';\nreturn;\n";
165        let map = SourceMap::new(src);
166        let lc = LineCol { line: 1, col: 5 };
167        let offset = map.line_col_to_offset(lc).unwrap();
168        assert_eq!(map.offset_to_line_col(offset), lc);
169    }
170
171    #[test]
172    fn one_based() {
173        let lc = LineCol { line: 0, col: 0 };
174        assert_eq!(lc.to_one_based(), (1, 1));
175        let lc = LineCol { line: 2, col: 5 };
176        assert_eq!(lc.to_one_based(), (3, 6));
177    }
178
179    #[test]
180    fn line_start_lookup() {
181        let src = "aaa\nbbb\nccc";
182        let map = SourceMap::new(src);
183        assert_eq!(map.line_start(0), Some(0));
184        assert_eq!(map.line_start(1), Some(4));
185        assert_eq!(map.line_start(2), Some(8));
186        assert_eq!(map.line_start(3), None);
187    }
188
189    #[test]
190    fn crlf_treated_as_two_bytes() {
191        // \r\n: \r is col 0 on line 0, \n triggers new line at offset 2
192        let src = "a\r\nb";
193        let map = SourceMap::new(src);
194        assert_eq!(map.line_count(), 2);
195        // 'b' is at offset 3, line 1 starts at offset 3
196        assert_eq!(map.offset_to_line_col(3), LineCol { line: 1, col: 0 });
197    }
198}