Skip to main content

rlsp_yaml_parser/
pos.rs

1// SPDX-License-Identifier: MIT
2
3/// A position within the input stream.
4///
5/// `line` is 1-based; `column` is 0-based (codepoints from the start of the line).
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct Pos {
8    pub byte_offset: usize,
9    pub line: usize,
10    pub column: usize,
11}
12
13impl Pos {
14    /// The position representing the start of a document.
15    pub const ORIGIN: Self = Self {
16        byte_offset: 0,
17        line: 1,
18        column: 0,
19    };
20
21    /// Advance the position by one character.
22    ///
23    /// If `ch` is a line feed (`\n`) the line counter is incremented and the
24    /// column is reset to 0.  For all other characters the column advances by
25    /// one.  `byte_offset` advances by `ch.len_utf8()`.
26    #[must_use]
27    pub const fn advance(self, ch: char) -> Self {
28        let byte_offset = self.byte_offset + ch.len_utf8();
29        if ch == '\n' {
30            Self {
31                byte_offset,
32                line: self.line + 1,
33                column: 0,
34            }
35        } else {
36            Self {
37                byte_offset,
38                line: self.line,
39                column: self.column + 1,
40            }
41        }
42    }
43}
44
45/// Compute the 0-based column (codepoint count) for a position within a line.
46///
47/// `byte_offset_in_line` must be a valid byte-boundary index into `line_content`.
48/// Uses an ASCII fast path: if the prefix is pure ASCII, the column equals the
49/// byte offset (1 byte = 1 codepoint).
50pub fn column_at(line_content: &str, byte_offset_in_line: usize) -> usize {
51    let prefix = &line_content[..byte_offset_in_line];
52    if prefix.is_ascii() {
53        byte_offset_in_line
54    } else {
55        prefix.chars().count()
56    }
57}
58
59/// Advance `pos` past `content`, assuming `content` contains no line break.
60/// Uses the ASCII fast path in [`column_at`].
61pub fn advance_within_line(pos: Pos, content: &str) -> Pos {
62    Pos {
63        byte_offset: pos.byte_offset + content.len(),
64        line: pos.line,
65        column: pos.column + column_at(content, content.len()),
66    }
67}
68
69/// A half-open span `[start, end)` within the input stream.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub struct Span {
72    pub start: Pos,
73    pub end: Pos,
74}
75
76#[cfg(test)]
77mod tests {
78    use rstest::rstest;
79
80    use super::*;
81
82    #[test]
83    fn pos_origin_is_start_of_document() {
84        let pos = Pos::ORIGIN;
85        assert_eq!(pos.byte_offset, 0);
86        assert_eq!(pos.line, 1);
87        assert_eq!(pos.column, 0);
88    }
89
90    #[test]
91    fn pos_fields_are_accessible() {
92        let pos = Pos {
93            byte_offset: 10,
94            line: 3,
95            column: 4,
96        };
97        assert_eq!(pos.byte_offset, 10);
98        assert_eq!(pos.line, 3);
99        assert_eq!(pos.column, 4);
100    }
101
102    #[test]
103    fn pos_is_copy() {
104        let pos = Pos::ORIGIN;
105        let pos2 = pos;
106        let _ = pos.byte_offset;
107        let _ = pos2.byte_offset;
108    }
109
110    #[test]
111    fn span_is_copy() {
112        let span = Span {
113            start: Pos::ORIGIN,
114            end: Pos::ORIGIN,
115        };
116        let span2 = span;
117        let _ = span.start;
118        let _ = span2.start;
119    }
120
121    #[rstest]
122    #[case::ascii_char('a', 1, 1, 1)]
123    #[case::newline('\n', 1, 2, 0)]
124    #[case::multibyte_cjk('中', 3, 1, 1)]
125    fn advance_basic(
126        #[case] ch: char,
127        #[case] expected_byte_offset: usize,
128        #[case] expected_line: usize,
129        #[case] expected_column: usize,
130    ) {
131        let pos = Pos::ORIGIN.advance(ch);
132        assert_eq!(pos.byte_offset, expected_byte_offset);
133        assert_eq!(pos.line, expected_line);
134        assert_eq!(pos.column, expected_column);
135    }
136
137    #[test]
138    fn advance_multiple_lines() {
139        let pos = Pos::ORIGIN
140            .advance('a')
141            .advance('\n')
142            .advance('b')
143            .advance('\n')
144            .advance('c');
145        assert_eq!(pos.line, 3);
146        assert_eq!(pos.column, 1);
147    }
148
149    // -----------------------------------------------------------------------
150    // column_at
151    // -----------------------------------------------------------------------
152
153    #[rstest]
154    #[case::empty_prefix("hello", 0, 0)]
155    #[case::ascii_mid_line("hello world", 5, 5)]
156    #[case::ascii_full_line("abc", 3, 3)]
157    #[case::multibyte_only_prefix("日本語xyz", 9, 3)]
158    #[case::ascii_then_multibyte("ab日本", 8, 4)]
159    #[case::multibyte_then_ascii("日ab", 5, 3)]
160    #[case::full_multibyte_line("日本語", 9, 3)]
161    fn column_at_cases(
162        #[case] line_content: &str,
163        #[case] byte_offset: usize,
164        #[case] expected: usize,
165    ) {
166        assert_eq!(column_at(line_content, byte_offset), expected);
167    }
168
169    // -----------------------------------------------------------------------
170    // advance_within_line
171    // -----------------------------------------------------------------------
172
173    #[rstest]
174    #[case::empty_content(Pos { byte_offset: 5, line: 2, column: 3 }, "", 5, 2, 3)]
175    #[case::ascii_from_origin(Pos::ORIGIN, "hello", 5, 1, 5)]
176    #[case::ascii_mid_line(Pos { byte_offset: 10, line: 3, column: 4 }, "abc", 13, 3, 7)]
177    #[case::multibyte_from_origin(Pos::ORIGIN, "日本語", 9, 1, 3)]
178    #[case::multibyte_mid_line(Pos { byte_offset: 4, line: 1, column: 2 }, "日本語", 13, 1, 5)]
179    #[case::mixed_ascii_then_multibyte(Pos::ORIGIN, "ab日", 5, 1, 3)]
180    fn advance_within_line_fields(
181        #[case] start: Pos,
182        #[case] content: &str,
183        #[case] expected_byte_offset: usize,
184        #[case] expected_line: usize,
185        #[case] expected_column: usize,
186    ) {
187        let result = advance_within_line(start, content);
188        assert_eq!(result.byte_offset, expected_byte_offset);
189        assert_eq!(result.line, expected_line);
190        assert_eq!(result.column, expected_column);
191    }
192
193    #[test]
194    fn advance_within_line_line_field_is_preserved() {
195        let pos = Pos {
196            byte_offset: 0,
197            line: 7,
198            column: 0,
199        };
200        let result = advance_within_line(pos, "xyz");
201        assert_eq!(result.line, 7);
202    }
203
204    #[test]
205    fn advance_within_line_matches_advance_loop_ascii() {
206        let pos = Pos {
207            byte_offset: 2,
208            line: 1,
209            column: 2,
210        };
211        let content = "abc";
212        let expected = content.chars().fold(pos, super::Pos::advance);
213        assert_eq!(advance_within_line(pos, content), expected);
214    }
215
216    #[test]
217    fn advance_within_line_matches_advance_loop_multibyte() {
218        let pos = Pos {
219            byte_offset: 0,
220            line: 1,
221            column: 0,
222        };
223        let content = "日本語xyz";
224        let expected = content.chars().fold(pos, super::Pos::advance);
225        assert_eq!(advance_within_line(pos, content), expected);
226    }
227}