Skip to main content

rlsp_yaml_parser/
pos.rs

1// SPDX-License-Identifier: MIT
2
3/// A position within the input stream.
4///
5/// `line` is 1-based; `column` is 0-based (codepoints from the start of the line).
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct Pos {
8    /// Byte offset from the start of the input (0-based).
9    pub byte_offset: usize,
10    /// Line number (1-based).
11    pub line: usize,
12    /// Codepoint column within the current line (0-based).
13    pub column: usize,
14}
15
16impl Pos {
17    /// The position representing the start of a document.
18    pub const ORIGIN: Self = Self {
19        byte_offset: 0,
20        line: 1,
21        column: 0,
22    };
23
24    /// Advance the position by one character.
25    ///
26    /// If `ch` is a line feed (`\n`) the line counter is incremented and the
27    /// column is reset to 0.  For all other characters the column advances by
28    /// one.  `byte_offset` advances by `ch.len_utf8()`.
29    #[must_use]
30    pub const fn advance(self, ch: char) -> Self {
31        let byte_offset = self.byte_offset + ch.len_utf8();
32        if ch == '\n' {
33            Self {
34                byte_offset,
35                line: self.line + 1,
36                column: 0,
37            }
38        } else {
39            Self {
40                byte_offset,
41                line: self.line,
42                column: self.column + 1,
43            }
44        }
45    }
46}
47
48/// Compute the 0-based column (codepoint count) for a position within a line.
49///
50/// `byte_offset_in_line` must be a valid byte-boundary index into `line_content`.
51/// Uses an ASCII fast path: if the prefix is pure ASCII, the column equals the
52/// byte offset (1 byte = 1 codepoint).
53pub fn column_at(line_content: &str, byte_offset_in_line: usize) -> usize {
54    let prefix = &line_content[..byte_offset_in_line];
55    if prefix.is_ascii() {
56        byte_offset_in_line
57    } else {
58        prefix.chars().count()
59    }
60}
61
62/// Advance `pos` past `content`, assuming `content` contains no line break.
63/// Uses the ASCII fast path in [`column_at`].
64pub fn advance_within_line(pos: Pos, content: &str) -> Pos {
65    Pos {
66        byte_offset: pos.byte_offset + content.len(),
67        line: pos.line,
68        column: pos.column + column_at(content, content.len()),
69    }
70}
71
72/// A half-open span `[start, end)` within the input stream.
73#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74pub struct Span {
75    /// Inclusive start position of the span.
76    pub start: Pos,
77    /// Exclusive end position of the span.
78    pub end: Pos,
79}
80
81#[cfg(test)]
82mod tests {
83    use rstest::rstest;
84
85    use super::*;
86
87    #[test]
88    fn pos_origin_is_start_of_document() {
89        let pos = Pos::ORIGIN;
90        assert_eq!(pos.byte_offset, 0);
91        assert_eq!(pos.line, 1);
92        assert_eq!(pos.column, 0);
93    }
94
95    #[test]
96    fn pos_fields_are_accessible() {
97        let pos = Pos {
98            byte_offset: 10,
99            line: 3,
100            column: 4,
101        };
102        assert_eq!(pos.byte_offset, 10);
103        assert_eq!(pos.line, 3);
104        assert_eq!(pos.column, 4);
105    }
106
107    #[test]
108    fn pos_is_copy() {
109        let pos = Pos::ORIGIN;
110        let pos2 = pos;
111        let _ = pos.byte_offset;
112        let _ = pos2.byte_offset;
113    }
114
115    #[test]
116    fn span_is_copy() {
117        let span = Span {
118            start: Pos::ORIGIN,
119            end: Pos::ORIGIN,
120        };
121        let span2 = span;
122        let _ = span.start;
123        let _ = span2.start;
124    }
125
126    #[rstest]
127    #[case::ascii_char('a', 1, 1, 1)]
128    #[case::newline('\n', 1, 2, 0)]
129    #[case::multibyte_cjk('中', 3, 1, 1)]
130    fn advance_basic(
131        #[case] ch: char,
132        #[case] expected_byte_offset: usize,
133        #[case] expected_line: usize,
134        #[case] expected_column: usize,
135    ) {
136        let pos = Pos::ORIGIN.advance(ch);
137        assert_eq!(pos.byte_offset, expected_byte_offset);
138        assert_eq!(pos.line, expected_line);
139        assert_eq!(pos.column, expected_column);
140    }
141
142    #[test]
143    fn advance_multiple_lines() {
144        let pos = Pos::ORIGIN
145            .advance('a')
146            .advance('\n')
147            .advance('b')
148            .advance('\n')
149            .advance('c');
150        assert_eq!(pos.line, 3);
151        assert_eq!(pos.column, 1);
152    }
153
154    // -----------------------------------------------------------------------
155    // column_at
156    // -----------------------------------------------------------------------
157
158    #[rstest]
159    #[case::empty_prefix("hello", 0, 0)]
160    #[case::ascii_mid_line("hello world", 5, 5)]
161    #[case::ascii_full_line("abc", 3, 3)]
162    #[case::multibyte_only_prefix("日本語xyz", 9, 3)]
163    #[case::ascii_then_multibyte("ab日本", 8, 4)]
164    #[case::multibyte_then_ascii("日ab", 5, 3)]
165    #[case::full_multibyte_line("日本語", 9, 3)]
166    fn column_at_cases(
167        #[case] line_content: &str,
168        #[case] byte_offset: usize,
169        #[case] expected: usize,
170    ) {
171        assert_eq!(column_at(line_content, byte_offset), expected);
172    }
173
174    // -----------------------------------------------------------------------
175    // advance_within_line
176    // -----------------------------------------------------------------------
177
178    #[rstest]
179    #[case::empty_content(Pos { byte_offset: 5, line: 2, column: 3 }, "", 5, 2, 3)]
180    #[case::ascii_from_origin(Pos::ORIGIN, "hello", 5, 1, 5)]
181    #[case::ascii_mid_line(Pos { byte_offset: 10, line: 3, column: 4 }, "abc", 13, 3, 7)]
182    #[case::multibyte_from_origin(Pos::ORIGIN, "日本語", 9, 1, 3)]
183    #[case::multibyte_mid_line(Pos { byte_offset: 4, line: 1, column: 2 }, "日本語", 13, 1, 5)]
184    #[case::mixed_ascii_then_multibyte(Pos::ORIGIN, "ab日", 5, 1, 3)]
185    fn advance_within_line_fields(
186        #[case] start: Pos,
187        #[case] content: &str,
188        #[case] expected_byte_offset: usize,
189        #[case] expected_line: usize,
190        #[case] expected_column: usize,
191    ) {
192        let result = advance_within_line(start, content);
193        assert_eq!(result.byte_offset, expected_byte_offset);
194        assert_eq!(result.line, expected_line);
195        assert_eq!(result.column, expected_column);
196    }
197
198    #[test]
199    fn advance_within_line_line_field_is_preserved() {
200        let pos = Pos {
201            byte_offset: 0,
202            line: 7,
203            column: 0,
204        };
205        let result = advance_within_line(pos, "xyz");
206        assert_eq!(result.line, 7);
207    }
208
209    #[test]
210    fn advance_within_line_matches_advance_loop_ascii() {
211        let pos = Pos {
212            byte_offset: 2,
213            line: 1,
214            column: 2,
215        };
216        let content = "abc";
217        let expected = content.chars().fold(pos, super::Pos::advance);
218        assert_eq!(advance_within_line(pos, content), expected);
219    }
220
221    #[test]
222    fn advance_within_line_matches_advance_loop_multibyte() {
223        let pos = Pos {
224            byte_offset: 0,
225            line: 1,
226            column: 0,
227        };
228        let content = "日本語xyz";
229        let expected = content.chars().fold(pos, super::Pos::advance);
230        assert_eq!(advance_within_line(pos, content), expected);
231    }
232}