Skip to main content

php_rs_parser/
source_map.rs

1/// Maps byte offsets (as used in [`Span`]) to line/column positions.
2///
3/// Build once per source file, then query as many offsets as needed in O(1) each.
4///
5/// Lines and columns are **0-based** — the LSP convention. Call
6/// [`LineCol::to_one_based`] if you need 1-based positions.
7///
8/// # Example
9///
10/// ```
11/// use php_rs_parser::source_map::{SourceMap, LineCol};
12///
13/// let src = "<?php\necho 'hi';\n";
14/// let map = SourceMap::new(src);
15///
16/// assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
17/// assert_eq!(map.offset_to_line_col(6), LineCol { line: 1, col: 0 });
18/// ```
19use php_ast::Span;
20
21/// A 0-based line/column position.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
23pub struct LineCol {
24    /// 0-based line number.
25    pub line: u32,
26    /// 0-based UTF-8 byte column offset within the line.
27    pub col: u32,
28}
29
30impl LineCol {
31    /// Convert to 1-based line and column (e.g. for human-readable diagnostics).
32    pub fn to_one_based(self) -> (u32, u32) {
33        (self.line + 1, self.col + 1)
34    }
35}
36
37/// A line/column range corresponding to a [`Span`].
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub struct LineColSpan {
40    pub start: LineCol,
41    pub end: LineCol,
42}
43
44/// Pre-computed index of line-start byte offsets for a source string.
45///
46/// Construction is O(n) in the source length. Each lookup is O(log n) in the
47/// number of lines (binary search), which is effectively O(1) for typical files.
48pub struct SourceMap {
49    /// Byte offset of the start of each line. `line_starts[0]` is always 0.
50    line_starts: Vec<u32>,
51}
52
53impl SourceMap {
54    /// Build an index from the given source text.
55    pub fn new(source: &str) -> Self {
56        let mut line_starts = vec![0u32];
57        for (i, byte) in source.bytes().enumerate() {
58            if byte == b'\n' {
59                line_starts.push((i + 1) as u32);
60            }
61        }
62        Self { line_starts }
63    }
64
65    /// Total number of lines in the source.
66    pub fn line_count(&self) -> usize {
67        self.line_starts.len()
68    }
69
70    /// Byte offset where the given 0-based line starts.
71    /// Returns `None` if the line is out of range.
72    pub fn line_start(&self, line: u32) -> Option<u32> {
73        self.line_starts.get(line as usize).copied()
74    }
75
76    /// Convert a byte offset to a 0-based line/column.
77    ///
78    /// If `offset` is past the end of the source, the position is clamped to
79    /// the last line.
80    pub fn offset_to_line_col(&self, offset: u32) -> LineCol {
81        let line = match self.line_starts.binary_search(&offset) {
82            Ok(exact) => exact,
83            Err(after) => after - 1,
84        };
85        let col = offset - self.line_starts[line];
86        LineCol {
87            line: line as u32,
88            col,
89        }
90    }
91
92    /// Convert a [`Span`] to a start/end [`LineColSpan`].
93    pub fn span_to_line_col(&self, span: Span) -> LineColSpan {
94        LineColSpan {
95            start: self.offset_to_line_col(span.start),
96            end: self.offset_to_line_col(span.end),
97        }
98    }
99
100    /// Convert a 0-based line/column back to a byte offset.
101    /// Returns `None` if the line is out of range.
102    pub fn line_col_to_offset(&self, lc: LineCol) -> Option<u32> {
103        self.line_starts
104            .get(lc.line as usize)
105            .map(|start| start + lc.col)
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    #[test]
114    fn empty_source() {
115        let map = SourceMap::new("");
116        assert_eq!(map.line_count(), 1);
117        assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
118    }
119
120    #[test]
121    fn single_line_no_newline() {
122        let map = SourceMap::new("<?php echo 1;");
123        assert_eq!(map.line_count(), 1);
124        assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
125        assert_eq!(map.offset_to_line_col(6), LineCol { line: 0, col: 6 });
126    }
127
128    #[test]
129    fn multiple_lines() {
130        let src = "<?php\necho 'hi';\nreturn;\n";
131        let map = SourceMap::new(src);
132        assert_eq!(map.line_count(), 4); // 3 lines + trailing empty line after last \n
133
134        // First char of line 0
135        assert_eq!(map.offset_to_line_col(0), LineCol { line: 0, col: 0 });
136        // First char of line 1
137        assert_eq!(map.offset_to_line_col(6), LineCol { line: 1, col: 0 });
138        // 'e' of echo on line 1
139        assert_eq!(map.offset_to_line_col(6), LineCol { line: 1, col: 0 });
140        // First char of line 2
141        assert_eq!(map.offset_to_line_col(17), LineCol { line: 2, col: 0 });
142    }
143
144    #[test]
145    fn span_conversion() {
146        let src = "<?php\necho 'hi';\n";
147        let map = SourceMap::new(src);
148        let span = Span::new(6, 10); // "echo"
149        let lc = map.span_to_line_col(span);
150        assert_eq!(lc.start, LineCol { line: 1, col: 0 });
151        assert_eq!(lc.end, LineCol { line: 1, col: 4 });
152    }
153
154    #[test]
155    fn round_trip() {
156        let src = "<?php\necho 'hi';\nreturn;\n";
157        let map = SourceMap::new(src);
158        let lc = LineCol { line: 1, col: 5 };
159        let offset = map.line_col_to_offset(lc).unwrap();
160        assert_eq!(map.offset_to_line_col(offset), lc);
161    }
162
163    #[test]
164    fn one_based() {
165        let lc = LineCol { line: 0, col: 0 };
166        assert_eq!(lc.to_one_based(), (1, 1));
167        let lc = LineCol { line: 2, col: 5 };
168        assert_eq!(lc.to_one_based(), (3, 6));
169    }
170
171    #[test]
172    fn line_start_lookup() {
173        let src = "aaa\nbbb\nccc";
174        let map = SourceMap::new(src);
175        assert_eq!(map.line_start(0), Some(0));
176        assert_eq!(map.line_start(1), Some(4));
177        assert_eq!(map.line_start(2), Some(8));
178        assert_eq!(map.line_start(3), None);
179    }
180
181    #[test]
182    fn crlf_treated_as_two_bytes() {
183        // \r\n: \r is col 0 on line 0, \n triggers new line at offset 2
184        let src = "a\r\nb";
185        let map = SourceMap::new(src);
186        assert_eq!(map.line_count(), 2);
187        // 'b' is at offset 3, line 1 starts at offset 3
188        assert_eq!(map.offset_to_line_col(3), LineCol { line: 1, col: 0 });
189    }
190}