Skip to main content

rlsp_yaml_parser/
pos.rs

1// SPDX-License-Identifier: MIT
2
3/// A position within the input stream.
4///
5/// `line` is 1-based; `column` is 0-based (codepoints from the start of the line).
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct Pos {
8    pub byte_offset: usize,
9    pub line: usize,
10    pub column: usize,
11}
12
13impl Pos {
14    /// The position representing the start of a document.
15    pub const ORIGIN: Self = Self {
16        byte_offset: 0,
17        line: 1,
18        column: 0,
19    };
20
21    /// Advance the position by one character.
22    ///
23    /// If `ch` is a line feed (`\n`) the line counter is incremented and the
24    /// column is reset to 0.  For all other characters the column advances by
25    /// one.  `byte_offset` advances by `ch.len_utf8()`.
26    #[must_use]
27    pub const fn advance(self, ch: char) -> Self {
28        let byte_offset = self.byte_offset + ch.len_utf8();
29        if ch == '\n' {
30            Self {
31                byte_offset,
32                line: self.line + 1,
33                column: 0,
34            }
35        } else {
36            Self {
37                byte_offset,
38                line: self.line,
39                column: self.column + 1,
40            }
41        }
42    }
43}
44
45/// Compute the 0-based column (codepoint count) for a position within a line.
46///
47/// `byte_offset_in_line` must be a valid byte-boundary index into `line_content`.
48/// Uses an ASCII fast path: if the prefix is pure ASCII, the column equals the
49/// byte offset (1 byte = 1 codepoint).
50pub fn column_at(line_content: &str, byte_offset_in_line: usize) -> usize {
51    let prefix = &line_content[..byte_offset_in_line];
52    if prefix.is_ascii() {
53        byte_offset_in_line
54    } else {
55        prefix.chars().count()
56    }
57}
58
59/// Advance `pos` past `content`, assuming `content` contains no line break.
60/// Uses the ASCII fast path in [`column_at`].
61pub fn advance_within_line(pos: Pos, content: &str) -> Pos {
62    Pos {
63        byte_offset: pos.byte_offset + content.len(),
64        line: pos.line,
65        column: pos.column + column_at(content, content.len()),
66    }
67}
68
69/// A half-open span `[start, end)` within the input stream.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub struct Span {
72    pub start: Pos,
73    pub end: Pos,
74}
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79
80    #[test]
81    fn pos_origin_is_start_of_document() {
82        let pos = Pos::ORIGIN;
83        assert_eq!(pos.byte_offset, 0);
84        assert_eq!(pos.line, 1);
85        assert_eq!(pos.column, 0);
86    }
87
88    #[test]
89    fn pos_fields_are_accessible() {
90        let pos = Pos {
91            byte_offset: 10,
92            line: 3,
93            column: 4,
94        };
95        assert_eq!(pos.byte_offset, 10);
96        assert_eq!(pos.line, 3);
97        assert_eq!(pos.column, 4);
98    }
99
100    #[test]
101    fn pos_is_copy() {
102        let pos = Pos::ORIGIN;
103        let pos2 = pos;
104        let _ = pos.byte_offset;
105        let _ = pos2.byte_offset;
106    }
107
108    #[test]
109    fn span_is_copy() {
110        let span = Span {
111            start: Pos::ORIGIN,
112            end: Pos::ORIGIN,
113        };
114        let span2 = span;
115        let _ = span.start;
116        let _ = span2.start;
117    }
118
119    #[test]
120    fn advance_ascii_increments_byte_and_column() {
121        let pos = Pos::ORIGIN.advance('a');
122        assert_eq!(pos.byte_offset, 1);
123        assert_eq!(pos.line, 1);
124        assert_eq!(pos.column, 1);
125    }
126
127    #[test]
128    fn advance_newline_increments_line_and_resets_column() {
129        let pos = Pos::ORIGIN.advance('a').advance('\n');
130        assert_eq!(pos.byte_offset, 2);
131        assert_eq!(pos.line, 2);
132        assert_eq!(pos.column, 0);
133    }
134
135    #[test]
136    fn advance_multibyte_char_increments_byte_offset_by_utf8_len() {
137        // '中' is 3 bytes in UTF-8
138        let pos = Pos::ORIGIN.advance('中');
139        assert_eq!(pos.byte_offset, 3);
140        assert_eq!(pos.line, 1);
141        assert_eq!(pos.column, 1);
142    }
143
144    #[test]
145    fn advance_multiple_lines() {
146        let pos = Pos::ORIGIN
147            .advance('a')
148            .advance('\n')
149            .advance('b')
150            .advance('\n')
151            .advance('c');
152        assert_eq!(pos.line, 3);
153        assert_eq!(pos.column, 1);
154    }
155
156    // -----------------------------------------------------------------------
157    // column_at
158    // -----------------------------------------------------------------------
159
160    #[test]
161    fn column_at_empty_prefix_is_zero() {
162        assert_eq!(column_at("hello", 0), 0);
163    }
164
165    #[test]
166    fn column_at_ascii_only_line_returns_byte_offset() {
167        assert_eq!(column_at("hello world", 5), 5);
168    }
169
170    #[test]
171    fn column_at_ascii_full_line_returns_byte_len() {
172        assert_eq!(column_at("abc", 3), 3);
173    }
174
175    #[test]
176    fn column_at_multibyte_prefix_counts_chars() {
177        // "日本語xyz": 日本語 = 9 bytes / 3 chars
178        assert_eq!(column_at("日本語xyz", 9), 3);
179    }
180
181    #[test]
182    fn column_at_mixed_prefix_ascii_then_multibyte() {
183        // "ab日本": ab = 2 bytes, 日本 = 6 bytes; prefix = 8 bytes = 4 chars
184        assert_eq!(column_at("ab日本", 8), 4);
185    }
186
187    #[test]
188    fn column_at_multibyte_then_ascii() {
189        // "日ab": 日 = 3 bytes, ab = 2 bytes; prefix = first 5 bytes = "日ab" = 3 chars
190        assert_eq!(column_at("日ab", 5), 3);
191    }
192
193    #[test]
194    fn column_at_full_multibyte_line() {
195        // "日本語" = 9 bytes / 3 chars; prefix = entire string
196        assert_eq!(column_at("日本語", 9), 3);
197    }
198
199    // -----------------------------------------------------------------------
200    // advance_within_line
201    // -----------------------------------------------------------------------
202
203    #[test]
204    fn advance_within_line_empty_content_returns_pos_unchanged() {
205        let pos = Pos {
206            byte_offset: 5,
207            line: 2,
208            column: 3,
209        };
210        assert_eq!(advance_within_line(pos, ""), pos);
211    }
212
213    #[test]
214    fn advance_within_line_ascii_only_advances_byte_and_column() {
215        let result = advance_within_line(Pos::ORIGIN, "hello");
216        assert_eq!(result.byte_offset, 5);
217        assert_eq!(result.line, 1);
218        assert_eq!(result.column, 5);
219    }
220
221    #[test]
222    fn advance_within_line_ascii_mid_line_accumulates_correctly() {
223        let pos = Pos {
224            byte_offset: 10,
225            line: 3,
226            column: 4,
227        };
228        let result = advance_within_line(pos, "abc");
229        assert_eq!(result.byte_offset, 13);
230        assert_eq!(result.line, 3);
231        assert_eq!(result.column, 7);
232    }
233
234    #[test]
235    fn advance_within_line_multibyte_utf8_column_counts_codepoints() {
236        // "日本語" = 9 bytes / 3 codepoints
237        let result = advance_within_line(Pos::ORIGIN, "日本語");
238        assert_eq!(result.byte_offset, 9);
239        assert_eq!(result.line, 1);
240        assert_eq!(result.column, 3);
241    }
242
243    #[test]
244    fn advance_within_line_multibyte_mid_line_accumulates_correctly() {
245        let pos = Pos {
246            byte_offset: 4,
247            line: 1,
248            column: 2,
249        };
250        let result = advance_within_line(pos, "日本語");
251        assert_eq!(result.byte_offset, 13);
252        assert_eq!(result.line, 1);
253        assert_eq!(result.column, 5);
254    }
255
256    #[test]
257    fn advance_within_line_mixed_ascii_then_multibyte() {
258        // "ab日" = 2 + 3 = 5 bytes / 3 codepoints
259        let result = advance_within_line(Pos::ORIGIN, "ab日");
260        assert_eq!(result.byte_offset, 5);
261        assert_eq!(result.line, 1);
262        assert_eq!(result.column, 3);
263    }
264
265    #[test]
266    fn advance_within_line_line_field_is_preserved() {
267        let pos = Pos {
268            byte_offset: 0,
269            line: 7,
270            column: 0,
271        };
272        let result = advance_within_line(pos, "xyz");
273        assert_eq!(result.line, 7);
274    }
275
276    #[test]
277    fn advance_within_line_matches_advance_loop_ascii() {
278        let pos = Pos {
279            byte_offset: 2,
280            line: 1,
281            column: 2,
282        };
283        let content = "abc";
284        let expected = content.chars().fold(pos, super::Pos::advance);
285        assert_eq!(advance_within_line(pos, content), expected);
286    }
287
288    #[test]
289    fn advance_within_line_matches_advance_loop_multibyte() {
290        let pos = Pos {
291            byte_offset: 0,
292            line: 1,
293            column: 0,
294        };
295        let content = "日本語xyz";
296        let expected = content.chars().fold(pos, super::Pos::advance);
297        assert_eq!(advance_within_line(pos, content), expected);
298    }
299}