1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub struct Pos {
11 pub byte_offset: usize,
13 pub line: usize,
15 pub column: usize,
17}
18
19impl Pos {
20 pub const ORIGIN: Self = Self {
22 byte_offset: 0,
23 line: 1,
24 column: 0,
25 };
26
27 #[must_use]
33 pub const fn advance(self, ch: char) -> Self {
34 let byte_offset = self.byte_offset + ch.len_utf8();
35 if ch == '\n' {
36 Self {
37 byte_offset,
38 line: self.line + 1,
39 column: 0,
40 }
41 } else {
42 Self {
43 byte_offset,
44 line: self.line,
45 column: self.column + 1,
46 }
47 }
48 }
49}
50
51pub fn column_at(line_content: &str, byte_offset_in_line: usize) -> usize {
57 let prefix = &line_content[..byte_offset_in_line];
58 if prefix.is_ascii() {
59 byte_offset_in_line
60 } else {
61 prefix.chars().count()
62 }
63}
64
65pub fn advance_within_line(pos: Pos, content: &str) -> Pos {
68 Pos {
69 byte_offset: pos.byte_offset + content.len(),
70 line: pos.line,
71 column: pos.column + column_at(content, content.len()),
72 }
73}
74
75#[derive(Debug, Clone, PartialEq, Eq)]
88pub struct LineIndex {
89 newlines: Vec<u32>,
92 source: String,
94}
95
96impl LineIndex {
97 #[must_use]
99 pub fn new(source: &str) -> Self {
100 let mut newlines = Vec::new();
101 let mut chars = source.char_indices().peekable();
102 while let Some((i, ch)) = chars.next() {
103 match ch {
104 '\r' => {
105 #[expect(
106 clippy::cast_possible_truncation,
107 reason = "YAML files <= 4 GB; u32 offset is sufficient"
108 )]
109 newlines.push(i as u32);
110 if chars.peek().is_some_and(|(_, next)| *next == '\n') {
112 let _ = chars.next();
113 }
114 }
115 '\n' => {
116 #[expect(
117 clippy::cast_possible_truncation,
118 reason = "YAML files <= 4 GB; u32 offset is sufficient"
119 )]
120 newlines.push(i as u32);
121 }
122 _ => {}
123 }
124 }
125 Self {
126 newlines,
127 source: source.to_owned(),
128 }
129 }
130
131 #[must_use]
138 pub fn line_column(&self, offset: u32) -> (u32, u32) {
139 let newline_idx = self.newlines.partition_point(|&nl| nl < offset);
143 #[expect(
144 clippy::cast_possible_truncation,
145 reason = "line count fits u32 for any realistic document"
146 )]
147 let line = (newline_idx as u32) + 1;
148
149 let line_start_byte = if newline_idx == 0 {
151 0usize
152 } else {
153 #[expect(clippy::indexing_slicing, reason = "newline_idx > 0 is checked above")]
155 let nl_byte = self.newlines[newline_idx - 1] as usize;
156 let nl_char = self
159 .source
160 .get(nl_byte..)
161 .and_then(|s| s.chars().next())
162 .unwrap_or('\n');
163 if nl_char == '\r'
164 && self
165 .source
166 .get(nl_byte + 1..nl_byte + 2)
167 .is_some_and(|s| s == "\n")
168 {
169 nl_byte + 2
170 } else {
171 nl_byte + 1
172 }
173 };
174
175 let col_prefix = self
177 .source
178 .get(line_start_byte..offset as usize)
179 .unwrap_or("");
180 #[expect(
181 clippy::cast_possible_truncation,
182 reason = "column fits u32 for any realistic line"
183 )]
184 let column = if col_prefix.is_ascii() {
185 col_prefix.len() as u32
186 } else {
187 col_prefix.chars().count() as u32
188 };
189
190 (line, column)
191 }
192}
193
194#[derive(Debug, Clone, Copy, PartialEq, Eq)]
204pub struct Span {
205 pub start: u32,
207 pub end: u32,
209}
210
211impl Span {
212 #[must_use]
217 #[expect(
218 clippy::cast_possible_truncation,
219 reason = "YAML files <= 4 GB; u32 offset is sufficient"
220 )]
221 pub(crate) const fn from_pos(start: Pos, end: Pos) -> Self {
222 Self {
223 start: start.byte_offset as u32,
224 end: end.byte_offset as u32,
225 }
226 }
227
228 #[must_use]
232 pub fn start_line_column(&self, index: &LineIndex) -> (u32, u32) {
233 index.line_column(self.start)
234 }
235
236 #[must_use]
240 pub fn end_line_column(&self, index: &LineIndex) -> (u32, u32) {
241 index.line_column(self.end)
242 }
243}
244
245#[cfg(test)]
250mod tests {
251 use proptest::prelude::*;
252 use rstest::rstest;
253
254 use super::*;
255
256 #[test]
261 fn pos_origin_is_start_of_document() {
262 let pos = Pos::ORIGIN;
263 assert_eq!(pos.byte_offset, 0);
264 assert_eq!(pos.line, 1);
265 assert_eq!(pos.column, 0);
266 }
267
268 #[test]
269 fn pos_fields_are_accessible() {
270 let pos = Pos {
271 byte_offset: 10,
272 line: 3,
273 column: 4,
274 };
275 assert_eq!(pos.byte_offset, 10);
276 assert_eq!(pos.line, 3);
277 assert_eq!(pos.column, 4);
278 }
279
280 #[test]
281 fn pos_is_copy() {
282 let pos = Pos::ORIGIN;
283 let pos2 = pos;
284 let _ = pos.byte_offset;
285 let _ = pos2.byte_offset;
286 }
287
288 #[rstest]
289 #[case::ascii_char('a', 1, 1, 1)]
290 #[case::newline('\n', 1, 2, 0)]
291 #[case::multibyte_cjk('中', 3, 1, 1)]
292 fn advance_basic(
293 #[case] ch: char,
294 #[case] expected_byte_offset: usize,
295 #[case] expected_line: usize,
296 #[case] expected_column: usize,
297 ) {
298 let pos = Pos::ORIGIN.advance(ch);
299 assert_eq!(pos.byte_offset, expected_byte_offset);
300 assert_eq!(pos.line, expected_line);
301 assert_eq!(pos.column, expected_column);
302 }
303
304 #[test]
305 fn advance_multiple_lines() {
306 let pos = Pos::ORIGIN
307 .advance('a')
308 .advance('\n')
309 .advance('b')
310 .advance('\n')
311 .advance('c');
312 assert_eq!(pos.line, 3);
313 assert_eq!(pos.column, 1);
314 }
315
316 #[test]
322 fn span_size_is_eight_bytes() {
323 assert_eq!(std::mem::size_of::<Span>(), 8);
324 }
325
326 const _: () = assert!(
327 std::mem::size_of::<Span>() == 8,
328 "Span must be exactly 8 bytes"
329 );
330
331 #[test]
332 fn span_is_copy() {
333 let span = Span { start: 0, end: 0 };
334 let span2 = span;
335 let _ = span.start;
336 let _ = span2.start;
337 }
338
339 #[rstest]
344 #[case::empty_prefix("hello", 0, 0)]
345 #[case::ascii_mid_line("hello world", 5, 5)]
346 #[case::ascii_full_line("abc", 3, 3)]
347 #[case::multibyte_only_prefix("日本語xyz", 9, 3)]
348 #[case::ascii_then_multibyte("ab日本", 8, 4)]
349 #[case::multibyte_then_ascii("日ab", 5, 3)]
350 #[case::full_multibyte_line("日本語", 9, 3)]
351 fn column_at_cases(
352 #[case] line_content: &str,
353 #[case] byte_offset: usize,
354 #[case] expected: usize,
355 ) {
356 assert_eq!(column_at(line_content, byte_offset), expected);
357 }
358
359 #[rstest]
364 #[case::empty_content(Pos { byte_offset: 5, line: 2, column: 3 }, "", 5, 2, 3)]
365 #[case::ascii_from_origin(Pos::ORIGIN, "hello", 5, 1, 5)]
366 #[case::ascii_mid_line(Pos { byte_offset: 10, line: 3, column: 4 }, "abc", 13, 3, 7)]
367 #[case::multibyte_from_origin(Pos::ORIGIN, "日本語", 9, 1, 3)]
368 #[case::multibyte_mid_line(Pos { byte_offset: 4, line: 1, column: 2 }, "日本語", 13, 1, 5)]
369 #[case::mixed_ascii_then_multibyte(Pos::ORIGIN, "ab日", 5, 1, 3)]
370 fn advance_within_line_fields(
371 #[case] start: Pos,
372 #[case] content: &str,
373 #[case] expected_byte_offset: usize,
374 #[case] expected_line: usize,
375 #[case] expected_column: usize,
376 ) {
377 let result = advance_within_line(start, content);
378 assert_eq!(result.byte_offset, expected_byte_offset);
379 assert_eq!(result.line, expected_line);
380 assert_eq!(result.column, expected_column);
381 }
382
383 #[test]
384 fn advance_within_line_line_field_is_preserved() {
385 let pos = Pos {
386 byte_offset: 0,
387 line: 7,
388 column: 0,
389 };
390 let result = advance_within_line(pos, "xyz");
391 assert_eq!(result.line, 7);
392 }
393
394 #[test]
395 fn advance_within_line_matches_advance_loop_ascii() {
396 let pos = Pos {
397 byte_offset: 2,
398 line: 1,
399 column: 2,
400 };
401 let content = "abc";
402 let expected = content.chars().fold(pos, super::Pos::advance);
403 assert_eq!(advance_within_line(pos, content), expected);
404 }
405
406 #[test]
407 fn advance_within_line_matches_advance_loop_multibyte() {
408 let pos = Pos {
409 byte_offset: 0,
410 line: 1,
411 column: 0,
412 };
413 let content = "日本語xyz";
414 let expected = content.chars().fold(pos, super::Pos::advance);
415 assert_eq!(advance_within_line(pos, content), expected);
416 }
417
418 #[test]
424 fn line_index_empty_string_produces_no_newlines() {
425 let idx = LineIndex::new("");
426 assert!(idx.newlines.is_empty());
427 assert_eq!(idx.line_column(0), (1, 0));
428 }
429
430 #[test]
432 fn line_index_single_line_no_newline() {
433 let idx = LineIndex::new("hello");
434 assert_eq!(idx.line_column(0), (1, 0));
435 assert_eq!(idx.line_column(4), (1, 4));
436 assert_eq!(idx.line_column(5), (1, 5));
437 }
438
439 #[test]
441 fn line_index_single_newline_at_end() {
442 let idx = LineIndex::new("hello\n");
443 assert_eq!(idx.line_column(0), (1, 0));
444 assert_eq!(idx.line_column(4), (1, 4));
445 assert_eq!(idx.line_column(5), (1, 5)); assert_eq!(idx.line_column(6), (2, 0)); }
448
449 #[test]
451 fn line_index_multiple_lines_line_numbers_correct() {
452 let idx = LineIndex::new("a\nb\nc");
453 assert_eq!(idx.line_column(0), (1, 0));
454 assert_eq!(idx.line_column(2), (2, 0));
455 assert_eq!(idx.line_column(4), (3, 0));
456 }
457
458 #[test]
460 fn line_index_column_is_codepoint_count_not_byte_count() {
461 let idx = LineIndex::new("日本語\nfoo");
463 assert_eq!(idx.line_column(9), (1, 3)); assert_eq!(idx.line_column(10), (2, 0));
465 assert_eq!(idx.line_column(11), (2, 1));
466 }
467
468 #[test]
470 fn line_index_ascii_fast_path_matches_general_path() {
471 let idx = LineIndex::new("abc\nxyz");
473 assert_eq!(idx.line_column(5), (2, 1)); }
475
476 #[test]
478 fn line_index_multibyte_mid_line() {
479 let idx = LineIndex::new("ab日xyz\nok");
481 assert_eq!(idx.line_column(2), (1, 2));
482 assert_eq!(idx.line_column(5), (1, 3)); assert_eq!(idx.line_column(6), (1, 4)); assert_eq!(idx.line_column(8), (1, 6)); assert_eq!(idx.line_column(9), (2, 0)); }
487
488 #[test]
490 fn line_index_crlf_line_endings() {
491 let idx = LineIndex::new("a\r\nb");
493 assert_eq!(idx.line_column(0), (1, 0));
494 assert_eq!(idx.line_column(3), (2, 0));
495 }
496
497 #[test]
499 fn line_index_bare_cr_line_endings() {
500 let idx = LineIndex::new("a\rb");
502 assert_eq!(idx.line_column(0), (1, 0));
503 assert_eq!(idx.line_column(2), (2, 0));
504 }
505
506 fn eager_line_column(source: &str, offset: usize) -> (u32, u32) {
512 let mut pos = Pos::ORIGIN;
513 for ch in source.chars() {
514 if pos.byte_offset >= offset {
515 break;
516 }
517 pos = pos.advance(ch);
518 }
519 #[expect(
520 clippy::cast_possible_truncation,
521 reason = "test oracle: values fit u32 in realistic inputs"
522 )]
523 (pos.line as u32, pos.column as u32)
524 }
525
526 proptest! {
527 #[test]
529 #[expect(
530 clippy::unwrap_used,
531 clippy::cast_possible_truncation,
532 reason = "proptest: regex strategy is valid; offset fits u32 for any string of length <= 50"
533 )]
534 fn line_index_line_column_matches_advance_loop_ascii(
535 input in proptest::string::string_regex("[a-z\n]{0,50}").unwrap()
536 ) {
537 let idx = LineIndex::new(&input);
538 let mut offset = 0usize;
539 for ch in input.chars() {
540 let expected = eager_line_column(&input, offset);
541 let got = idx.line_column(offset as u32);
542 prop_assert_eq!(
543 got, expected,
544 "mismatch at offset {} in {:?}", offset, input
545 );
546 offset += ch.len_utf8();
547 }
548 if offset <= input.len() {
550 let expected = eager_line_column(&input, offset);
551 let got = idx.line_column(offset as u32);
552 prop_assert_eq!(got, expected, "mismatch at end offset {}", offset);
553 }
554 }
555
556 #[test]
558 #[expect(
559 clippy::unwrap_used,
560 clippy::cast_possible_truncation,
561 reason = "proptest: regex strategy is valid; offset fits u32 for any string of length <= 30"
562 )]
563 fn line_index_line_column_matches_advance_loop_multibyte(
564 input in proptest::string::string_regex("[a-z\n\u{4E00}-\u{4E10}]{0,30}").unwrap()
565 ) {
566 let idx = LineIndex::new(&input);
567 let mut offset = 0usize;
568 for ch in input.chars() {
569 let expected = eager_line_column(&input, offset);
570 let got = idx.line_column(offset as u32);
571 prop_assert_eq!(
572 got, expected,
573 "mismatch at offset {} in {:?}", offset, input
574 );
575 offset += ch.len_utf8();
576 }
577 }
578 }
579}