Skip to main content

php_rs_parser/
source_map.rs

1/// Maps byte offsets (as used in [`Span`]) to line/column positions.
2///
3/// Build once per source file, then query as many offsets as needed in O(1) each.
4///
5/// Lines and columns are **0-based** — the LSP convention. Call
6/// [`LineCol::to_one_based`] if you need 1-based positions.
7///
8/// # Example
9///
10/// ```
11/// use php_rs_parser::source_map::{SourceMap, LineCol};
12///
13/// let src = "<?php\necho 'hi';\n";
14/// let map = SourceMap::new(src);
15///
16/// assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
17/// assert_eq!(map.offset_to_line_col(6), LineCol { line: 1, col: 0 });
18/// ```
19use php_ast::Span;
20
21/// A 0-based line/column position.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
23pub struct LineCol {
24    /// 0-based line number.
25    pub line: u32,
26    /// 0-based UTF-8 byte column offset within the line.
27    pub col: u32,
28}
29
30impl LineCol {
31    /// Convert to 1-based line and column (e.g. for human-readable diagnostics).
32    pub fn to_one_based(self) -> (u32, u32) {
33        (self.line + 1, self.col + 1)
34    }
35}
36
37/// A line/column range corresponding to a [`Span`].
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub struct LineColSpan {
40    pub start: LineCol,
41    pub end: LineCol,
42}
43
44/// Pre-computed index of line-start byte offsets for a source string.
45///
46/// Construction is O(n) in the source length. Each lookup is O(log n) in the
47/// number of lines (binary search), which is effectively O(1) for typical files.
48pub struct SourceMap {
49    /// Byte offset of the start of each line. `line_starts[0]` is always 0.
50    line_starts: Vec<u32>,
51}
52
53impl SourceMap {
54    /// Build an index from the given source text.
55    pub fn new(source: &str) -> Self {
56        let mut line_starts = vec![0u32];
57        for pos in memchr::memchr_iter(b'\n', source.as_bytes()) {
58            line_starts.push((pos + 1) as u32);
59        }
60        Self { line_starts }
61    }
62
63    /// Total number of lines in the source.
64    pub fn line_count(&self) -> usize {
65        self.line_starts.len()
66    }
67
68    /// Byte offset where the given 0-based line starts.
69    /// Returns `None` if the line is out of range.
70    pub fn line_start(&self, line: u32) -> Option<u32> {
71        self.line_starts.get(line as usize).copied()
72    }
73
74    /// Convert a byte offset to a 0-based line/column.
75    ///
76    /// If `offset` is past the end of the source, the position is clamped to
77    /// the last line.
78    pub fn offset_to_line_col(&self, offset: u32) -> LineCol {
79        let line = match self.line_starts.binary_search(&offset) {
80            Ok(exact) => exact,
81            Err(after) => after - 1,
82        };
83        let col = offset - self.line_starts[line];
84        LineCol {
85            line: line as u32,
86            col,
87        }
88    }
89
90    /// Convert a [`Span`] to a start/end [`LineColSpan`].
91    pub fn span_to_line_col(&self, span: Span) -> LineColSpan {
92        LineColSpan {
93            start: self.offset_to_line_col(span.start),
94            end: self.offset_to_line_col(span.end),
95        }
96    }
97
98    /// Convert a 0-based line/column back to a byte offset.
99    /// Returns `None` if the line is out of range.
100    pub fn line_col_to_offset(&self, lc: LineCol) -> Option<u32> {
101        self.line_starts
102            .get(lc.line as usize)
103            .map(|start| start + lc.col)
104    }
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn empty_source() {
113        let map = SourceMap::new("");
114        assert_eq!(map.line_count(), 1);
115        assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
116    }
117
118    #[test]
119    fn single_line_no_newline() {
120        let map = SourceMap::new("<?php echo 1;");
121        assert_eq!(map.line_count(), 1);
122        assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
123        assert_eq!(map.offset_to_line_col(6), LineCol { line: 0, col: 6 });
124    }
125
126    #[test]
127    fn multiple_lines() {
128        let src = "<?php\necho 'hi';\nreturn;\n";
129        let map = SourceMap::new(src);
130        assert_eq!(map.line_count(), 4); // 3 lines + trailing empty line after last \n
131
132        // First char of line 0
133        assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
134        // First char of line 1
135        assert_eq!(map.offset_to_line_col(6), LineCol { line: 1, col: 0 });
136        // 'e' of echo on line 1
137        assert_eq!(map.offset_to_line_col(6), LineCol { line: 1, col: 0 });
138        // First char of line 2
139        assert_eq!(map.offset_to_line_col(17), LineCol { line: 2, col: 0 });
140    }
141
142    #[test]
143    fn span_conversion() {
144        let src = "<?php\necho 'hi';\n";
145        let map = SourceMap::new(src);
146        let span = Span::new(6, 10); // "echo"
147        let lc = map.span_to_line_col(span);
148        assert_eq!(lc.start, LineCol { line: 1, col: 0 });
149        assert_eq!(lc.end, LineCol { line: 1, col: 4 });
150    }
151
152    #[test]
153    fn round_trip() {
154        let src = "<?php\necho 'hi';\nreturn;\n";
155        let map = SourceMap::new(src);
156        let lc = LineCol { line: 1, col: 5 };
157        let offset = map.line_col_to_offset(lc).unwrap();
158        assert_eq!(map.offset_to_line_col(offset), lc);
159    }
160
161    #[test]
162    fn one_based() {
163        let lc = LineCol { line: 0, col: 0 };
164        assert_eq!(lc.to_one_based(), (1, 1));
165        let lc = LineCol { line: 2, col: 5 };
166        assert_eq!(lc.to_one_based(), (3, 6));
167    }
168
169    #[test]
170    fn line_start_lookup() {
171        let src = "aaa\nbbb\nccc";
172        let map = SourceMap::new(src);
173        assert_eq!(map.line_start(0), Some(0));
174        assert_eq!(map.line_start(1), Some(4));
175        assert_eq!(map.line_start(2), Some(8));
176        assert_eq!(map.line_start(3), None);
177    }
178
179    #[test]
180    fn crlf_treated_as_two_bytes() {
181        // \r\n: \r is col 0 on line 0, \n triggers new line at offset 2
182        let src = "a\r\nb";
183        let map = SourceMap::new(src);
184        assert_eq!(map.line_count(), 2);
185        // 'b' is at offset 3, line 1 starts at offset 3
186        assert_eq!(map.offset_to_line_col(3), LineCol { line: 1, col: 0 });
187    }
188}