rustpython_parser_vendored/source_location/
line_index.rs1use super::SourceLocation;
2use crate::text_size::{TextLen, TextRange, TextSize};
3#[cfg(feature = "serde")]
4use serde::{Deserialize, Serialize};
5use std::fmt;
6use std::fmt::{Debug, Formatter};
7use std::num::NonZeroU32;
8use std::ops::Deref;
9use std::sync::Arc;
10
11#[derive(Clone)]
15pub struct LineIndex {
16 inner: Arc<LineIndexInner>,
17}
18
19struct LineIndexInner {
20 line_starts: Vec<TextSize>,
21 kind: IndexKind,
22}
23
24impl LineIndex {
25 pub fn from_source_text(text: &str) -> Self {
27 let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
28 line_starts.push(TextSize::default());
29
30 let bytes = text.as_bytes();
31 let mut utf8 = false;
32
33 assert!(u32::try_from(bytes.len()).is_ok());
34
35 for (i, byte) in bytes.iter().enumerate() {
36 utf8 |= !byte.is_ascii();
37
38 match byte {
39 b'\r' if bytes.get(i + 1) == Some(&b'\n') => continue,
41 b'\n' | b'\r' => {
42 #[allow(clippy::cast_possible_truncation)]
44 line_starts.push(TextSize::from(i as u32) + TextSize::from(1));
45 }
46 _ => {}
47 }
48 }
49
50 let kind = if utf8 {
51 IndexKind::Utf8
52 } else {
53 IndexKind::Ascii
54 };
55
56 Self {
57 inner: Arc::new(LineIndexInner { line_starts, kind }),
58 }
59 }
60
61 fn kind(&self) -> IndexKind {
62 self.inner.kind
63 }
64
65 pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
94 match self.binary_search_line(&offset) {
95 Ok(row) => SourceLocation {
97 row: OneIndexed::from_zero_indexed(row),
98 column: OneIndexed::from_zero_indexed(0),
99 },
100 Err(next_row) => {
101 let row = next_row - 1;
103 let mut line_start = self.line_starts()[row as usize];
104
105 let column = if self.kind().is_ascii() {
106 u32::from(offset - line_start)
107 } else {
108 if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
110 line_start = '\u{feff}'.text_len();
111 }
112
113 let range = TextRange::new(line_start, offset);
114 content[range].chars().count().try_into().unwrap()
115 };
116
117 SourceLocation {
118 row: OneIndexed::from_zero_indexed(row),
119 column: OneIndexed::from_zero_indexed(column),
120 }
121 }
122 }
123 }
124
125 pub(crate) fn line_count(&self) -> usize {
127 self.line_starts().len()
128 }
129
130 pub fn line_index(&self, offset: TextSize) -> OneIndexed {
149 match self.binary_search_line(&offset) {
150 Ok(row) => OneIndexed::from_zero_indexed(row),
152 Err(row) => {
153 OneIndexed::from_zero_indexed(row - 1)
155 }
156 }
157 }
158
159 pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
161 let row_index = line.to_zero_indexed_usize();
162 let starts = self.line_starts();
163
164 if row_index == starts.len() {
166 contents.text_len()
167 } else {
168 starts[row_index]
169 }
170 }
171
172 pub(crate) fn line_end(&self, line: OneIndexed, contents: &str) -> TextSize {
175 let row_index = line.to_zero_indexed_usize();
176 let starts = self.line_starts();
177
178 if row_index.saturating_add(1) >= starts.len() {
180 contents.text_len()
181 } else {
182 starts[row_index + 1]
183 }
184 }
185
186 pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
190 let starts = self.line_starts();
191
192 if starts.len() == line.to_zero_indexed_usize() {
193 TextRange::empty(contents.text_len())
194 } else {
195 TextRange::new(
196 self.line_start(line, contents),
197 self.line_start(line.saturating_add(1), contents),
198 )
199 }
200 }
201
202 pub fn line_starts(&self) -> &[TextSize] {
204 &self.inner.line_starts
205 }
206
207 #[allow(clippy::trivially_copy_pass_by_ref)] fn binary_search_line(&self, offset: &TextSize) -> Result<u32, u32> {
209 match self.line_starts().binary_search(offset) {
211 Ok(index) => Ok(index.try_into().unwrap()),
212 Err(index) => Err(index.try_into().unwrap()),
213 }
214 }
215}
216
217impl Deref for LineIndex {
218 type Target = [TextSize];
219
220 fn deref(&self) -> &Self::Target {
221 self.line_starts()
222 }
223}
224
225impl Debug for LineIndex {
226 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
227 f.debug_list().entries(self.line_starts()).finish()
228 }
229}
230
231#[derive(Debug, Clone, Copy)]
232enum IndexKind {
233 Ascii,
235
236 Utf8,
238}
239
240impl IndexKind {
241 const fn is_ascii(self) -> bool {
242 matches!(self, IndexKind::Ascii)
243 }
244}
245
246#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
252#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
253pub struct OneIndexed(NonZeroU32);
254
255#[allow(clippy::cast_possible_truncation)] const fn try_to_u32(value: usize) -> Result<u32, usize> {
257 if value <= u32::MAX as usize {
258 Ok(value as u32)
259 } else {
260 Err(value)
261 }
262}
263
264impl OneIndexed {
265 pub const MIN: Self = unwrap(Self::new(1));
268 pub const MAX: Self = unwrap(Self::new(u32::MAX));
270
271 const ONE: NonZeroU32 = unwrap(NonZeroU32::new(1));
272
273 pub const fn new(value: u32) -> Option<Self> {
275 match NonZeroU32::new(value) {
276 Some(value) => Some(Self(value)),
277 None => None,
278 }
279 }
280
281 pub const fn from_zero_indexed(value: u32) -> Self {
283 Self(Self::ONE.saturating_add(value))
284 }
285
286 pub const fn try_from_zero_indexed(value: usize) -> Result<Self, usize> {
288 match try_to_u32(value) {
289 Ok(value) => Ok(Self(Self::ONE.saturating_add(value))),
290 Err(value) => Err(value),
291 }
292 }
293
294 pub const fn get(self) -> u32 {
296 self.0.get()
297 }
298
299 pub const fn to_usize(self) -> usize {
301 self.get() as _
302 }
303
304 pub const fn to_zero_indexed(self) -> u32 {
306 self.0.get() - 1
307 }
308
309 pub const fn to_zero_indexed_usize(self) -> usize {
311 self.to_zero_indexed() as _
312 }
313
314 #[must_use]
317 pub const fn saturating_add(self, rhs: u32) -> Self {
318 match NonZeroU32::new(self.0.get().saturating_add(rhs)) {
319 Some(value) => Self(value),
320 None => Self::MAX,
321 }
322 }
323
324 #[must_use]
327 pub const fn saturating_sub(self, rhs: u32) -> Self {
328 match NonZeroU32::new(self.0.get().saturating_sub(rhs)) {
329 Some(value) => Self(value),
330 None => Self::MIN,
331 }
332 }
333}
334
335impl std::fmt::Display for OneIndexed {
336 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
337 std::fmt::Debug::fmt(&self.0.get(), f)
338 }
339}
340
341const fn unwrap<T: Copy>(option: Option<T>) -> T {
344 match option {
345 Some(value) => value,
346 None => panic!("unwrapping None"),
347 }
348}
349
350#[cfg(test)]
351mod tests {
352 use crate::source_location::line_index::LineIndex;
353 use crate::source_location::{OneIndexed, SourceLocation};
354 use crate::text_size::TextSize;
355
356 #[test]
357 fn ascii_index() {
358 let index = LineIndex::from_source_text("");
359 assert_eq!(index.line_starts(), &[TextSize::from(0)]);
360
361 let index = LineIndex::from_source_text("x = 1");
362 assert_eq!(index.line_starts(), &[TextSize::from(0)]);
363
364 let index = LineIndex::from_source_text("x = 1\n");
365 assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
366
367 let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n");
368 assert_eq!(
369 index.line_starts(),
370 &[
371 TextSize::from(0),
372 TextSize::from(6),
373 TextSize::from(12),
374 TextSize::from(22)
375 ]
376 );
377 }
378
379 #[test]
380 fn ascii_source_location() {
381 let contents = "x = 1\ny = 2";
382 let index = LineIndex::from_source_text(contents);
383
384 let loc = index.source_location(TextSize::from(2), contents);
386 assert_eq!(
387 loc,
388 SourceLocation {
389 row: OneIndexed::from_zero_indexed(0),
390 column: OneIndexed::from_zero_indexed(2)
391 }
392 );
393
394 let loc = index.source_location(TextSize::from(6), contents);
396 assert_eq!(
397 loc,
398 SourceLocation {
399 row: OneIndexed::from_zero_indexed(1),
400 column: OneIndexed::from_zero_indexed(0)
401 }
402 );
403
404 let loc = index.source_location(TextSize::from(11), contents);
405 assert_eq!(
406 loc,
407 SourceLocation {
408 row: OneIndexed::from_zero_indexed(1),
409 column: OneIndexed::from_zero_indexed(5)
410 }
411 );
412 }
413
414 #[test]
415 fn ascii_carriage_return() {
416 let contents = "x = 4\ry = 3";
417 let index = LineIndex::from_source_text(contents);
418 assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
419
420 assert_eq!(
421 index.source_location(TextSize::from(4), contents),
422 SourceLocation {
423 row: OneIndexed::from_zero_indexed(0),
424 column: OneIndexed::from_zero_indexed(4)
425 }
426 );
427 assert_eq!(
428 index.source_location(TextSize::from(6), contents),
429 SourceLocation {
430 row: OneIndexed::from_zero_indexed(1),
431 column: OneIndexed::from_zero_indexed(0)
432 }
433 );
434 assert_eq!(
435 index.source_location(TextSize::from(7), contents),
436 SourceLocation {
437 row: OneIndexed::from_zero_indexed(1),
438 column: OneIndexed::from_zero_indexed(1)
439 }
440 );
441 }
442
443 #[test]
444 fn ascii_carriage_return_newline() {
445 let contents = "x = 4\r\ny = 3";
446 let index = LineIndex::from_source_text(contents);
447 assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
448
449 assert_eq!(
450 index.source_location(TextSize::from(4), contents),
451 SourceLocation {
452 row: OneIndexed::from_zero_indexed(0),
453 column: OneIndexed::from_zero_indexed(4)
454 }
455 );
456 assert_eq!(
457 index.source_location(TextSize::from(7), contents),
458 SourceLocation {
459 row: OneIndexed::from_zero_indexed(1),
460 column: OneIndexed::from_zero_indexed(0)
461 }
462 );
463 assert_eq!(
464 index.source_location(TextSize::from(8), contents),
465 SourceLocation {
466 row: OneIndexed::from_zero_indexed(1),
467 column: OneIndexed::from_zero_indexed(1)
468 }
469 );
470 }
471
472 #[test]
473 fn utf8_index() {
474 let index = LineIndex::from_source_text("x = '🫣'");
475 assert_eq!(index.line_count(), 1);
476 assert_eq!(index.line_starts(), &[TextSize::from(0)]);
477
478 let index = LineIndex::from_source_text("x = '🫣'\n");
479 assert_eq!(index.line_count(), 2);
480 assert_eq!(
481 index.line_starts(),
482 &[TextSize::from(0), TextSize::from(11)]
483 );
484
485 let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
486 assert_eq!(index.line_count(), 4);
487 assert_eq!(
488 index.line_starts(),
489 &[
490 TextSize::from(0),
491 TextSize::from(11),
492 TextSize::from(17),
493 TextSize::from(27)
494 ]
495 );
496
497 let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
498 assert_eq!(index.line_count(), 3);
499 assert_eq!(
500 index.line_starts(),
501 &[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
502 );
503 }
504
505 #[test]
506 fn utf8_carriage_return() {
507 let contents = "x = '🫣'\ry = 3";
508 let index = LineIndex::from_source_text(contents);
509 assert_eq!(index.line_count(), 2);
510 assert_eq!(
511 index.line_starts(),
512 &[TextSize::from(0), TextSize::from(11)]
513 );
514
515 assert_eq!(
517 index.source_location(TextSize::from(9), contents),
518 SourceLocation {
519 row: OneIndexed::from_zero_indexed(0),
520 column: OneIndexed::from_zero_indexed(6)
521 }
522 );
523 assert_eq!(
524 index.source_location(TextSize::from(11), contents),
525 SourceLocation {
526 row: OneIndexed::from_zero_indexed(1),
527 column: OneIndexed::from_zero_indexed(0)
528 }
529 );
530 assert_eq!(
531 index.source_location(TextSize::from(12), contents),
532 SourceLocation {
533 row: OneIndexed::from_zero_indexed(1),
534 column: OneIndexed::from_zero_indexed(1)
535 }
536 );
537 }
538
539 #[test]
540 fn utf8_carriage_return_newline() {
541 let contents = "x = '🫣'\r\ny = 3";
542 let index = LineIndex::from_source_text(contents);
543 assert_eq!(index.line_count(), 2);
544 assert_eq!(
545 index.line_starts(),
546 &[TextSize::from(0), TextSize::from(12)]
547 );
548
549 assert_eq!(
551 index.source_location(TextSize::from(9), contents),
552 SourceLocation {
553 row: OneIndexed::from_zero_indexed(0),
554 column: OneIndexed::from_zero_indexed(6)
555 }
556 );
557 assert_eq!(
558 index.source_location(TextSize::from(12), contents),
559 SourceLocation {
560 row: OneIndexed::from_zero_indexed(1),
561 column: OneIndexed::from_zero_indexed(0)
562 }
563 );
564 assert_eq!(
565 index.source_location(TextSize::from(13), contents),
566 SourceLocation {
567 row: OneIndexed::from_zero_indexed(1),
568 column: OneIndexed::from_zero_indexed(1)
569 }
570 );
571 }
572
573 #[test]
574 fn utf8_byte_offset() {
575 let contents = "x = '☃'\ny = 2";
576 let index = LineIndex::from_source_text(contents);
577 assert_eq!(
578 index.line_starts(),
579 &[TextSize::from(0), TextSize::from(10)]
580 );
581
582 let loc = index.source_location(TextSize::from(0), contents);
584 assert_eq!(
585 loc,
586 SourceLocation {
587 row: OneIndexed::from_zero_indexed(0),
588 column: OneIndexed::from_zero_indexed(0)
589 }
590 );
591
592 let loc = index.source_location(TextSize::from(5), contents);
593 assert_eq!(
594 loc,
595 SourceLocation {
596 row: OneIndexed::from_zero_indexed(0),
597 column: OneIndexed::from_zero_indexed(5)
598 }
599 );
600
601 let loc = index.source_location(TextSize::from(8), contents);
602 assert_eq!(
603 loc,
604 SourceLocation {
605 row: OneIndexed::from_zero_indexed(0),
606 column: OneIndexed::from_zero_indexed(6)
607 }
608 );
609
610 let loc = index.source_location(TextSize::from(10), contents);
612 assert_eq!(
613 loc,
614 SourceLocation {
615 row: OneIndexed::from_zero_indexed(1),
616 column: OneIndexed::from_zero_indexed(0)
617 }
618 );
619
620 let loc = index.source_location(TextSize::from(15), contents);
622 assert_eq!(
623 loc,
624 SourceLocation {
625 row: OneIndexed::from_zero_indexed(1),
626 column: OneIndexed::from_zero_indexed(5)
627 }
628 );
629 }
630}