rustpython_ruff_source_file/line_index.rs
1use std::fmt;
2use std::fmt::{Debug, Formatter};
3use std::num::{NonZeroUsize, ParseIntError};
4use std::ops::Deref;
5use std::str::FromStr;
6use std::sync::Arc;
7
8use crate::{LineColumn, SourceLocation};
9use ruff_text_size::{TextLen, TextRange, TextSize};
10#[cfg(feature = "serde")]
11use serde::{Deserialize, Serialize};
12
13/// Index for fast [byte offset](TextSize) to [`LineColumn`] conversions.
14///
15/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
16#[derive(Clone, Eq, PartialEq)]
17#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
18pub struct LineIndex {
19 inner: Arc<LineIndexInner>,
20}
21
22#[derive(Eq, PartialEq)]
23#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
24struct LineIndexInner {
25 line_starts: Vec<TextSize>,
26 kind: IndexKind,
27}
28
29impl LineIndex {
30 /// Builds the [`LineIndex`] from the source text of a file.
31 pub fn from_source_text(text: &str) -> Self {
32 let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
33 line_starts.push(TextSize::default());
34
35 let bytes = text.as_bytes();
36
37 assert!(u32::try_from(bytes.len()).is_ok());
38
39 for i in memchr::memchr2_iter(b'\n', b'\r', bytes) {
40 // Skip `\r` in `\r\n` sequences (only count the `\n`).
41 if bytes[i] == b'\r' && bytes.get(i + 1) == Some(&b'\n') {
42 continue;
43 }
44 // SAFETY: Assertion above guarantees `i <= u32::MAX`
45 #[expect(clippy::cast_possible_truncation)]
46 line_starts.push(TextSize::from(i as u32) + TextSize::from(1));
47 }
48
49 // Determine whether the source text is ASCII.
50 //
51 // Empirically, this simple loop is auto-vectorized by LLVM and benchmarks faster than both
52 // `str::is_ascii()` and hand-written SIMD.
53 let mut has_non_ascii = false;
54 for byte in bytes {
55 has_non_ascii |= !byte.is_ascii();
56 }
57
58 let kind = if has_non_ascii {
59 IndexKind::Utf8
60 } else {
61 IndexKind::Ascii
62 };
63
64 Self {
65 inner: Arc::new(LineIndexInner { line_starts, kind }),
66 }
67 }
68
69 fn kind(&self) -> IndexKind {
70 self.inner.kind
71 }
72
73 /// Returns the line and column number for an UTF-8 byte offset.
74 ///
75 /// The `column` number is the nth-character of the line, except for the first line
76 /// where it doesn't include the UTF-8 BOM marker at the start of the file.
77 ///
78 /// ### BOM handling
79 ///
80 /// For files starting with a UTF-8 BOM marker, the byte offsets
81 /// in the range `0...3` are all mapped to line 0 and column 0.
82 /// Because of this, the conversion isn't losless.
83 ///
84 /// ## Examples
85 ///
86 /// ```
87 /// # use ruff_text_size::TextSize;
88 /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn};
89 /// let source = format!("\u{FEFF}{}", "def a():\n pass");
90 /// let index = LineIndex::from_source_text(&source);
91 ///
92 /// // Before BOM, maps to after BOM
93 /// assert_eq!(
94 /// index.line_column(TextSize::from(0), &source),
95 /// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
96 /// );
97 ///
98 /// // After BOM, maps to after BOM
99 /// assert_eq!(
100 /// index.line_column(TextSize::from(3), &source),
101 /// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
102 /// );
103 ///
104 /// assert_eq!(
105 /// index.line_column(TextSize::from(7), &source),
106 /// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
107 /// );
108 /// assert_eq!(
109 /// index.line_column(TextSize::from(16), &source),
110 /// LineColumn { line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
111 /// );
112 /// ```
113 ///
114 /// ## Panics
115 ///
116 /// If the byte offset isn't within the bounds of `content`.
117 pub fn line_column(&self, offset: TextSize, content: &str) -> LineColumn {
118 let location = self.source_location(offset, content, PositionEncoding::Utf32);
119
120 // Don't count the BOM character as a column, but only on the first line.
121 let column = if location.line.to_zero_indexed() == 0 && content.starts_with('\u{feff}') {
122 location.character_offset.saturating_sub(1)
123 } else {
124 location.character_offset
125 };
126
127 LineColumn {
128 line: location.line,
129 column,
130 }
131 }
132
133 /// Given a UTF-8 byte offset, returns the line and character offset according to the given encoding.
134 ///
135 /// ### BOM handling
136 ///
137 /// Unlike [`Self::line_column`], this method does not skip the BOM character at the start of the file.
138 /// This allows for bidirectional mapping between [`SourceLocation`] and [`TextSize`] (see [`Self::offset`]).
139 ///
140 /// ## Examples
141 ///
142 /// ```
143 /// # use ruff_text_size::TextSize;
144 /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn, SourceLocation, PositionEncoding, Line};
145 /// let source = format!("\u{FEFF}{}", "def a():\n pass");
146 /// let index = LineIndex::from_source_text(&source);
147 ///
148 /// // Before BOM, maps to character 0
149 /// assert_eq!(
150 /// index.source_location(TextSize::from(0), &source, PositionEncoding::Utf32),
151 /// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(0) }
152 /// );
153 ///
154 /// // After BOM, maps to after BOM
155 /// assert_eq!(
156 /// index.source_location(TextSize::from(3), &source, PositionEncoding::Utf32),
157 /// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(1) }
158 /// );
159 ///
160 /// assert_eq!(
161 /// index.source_location(TextSize::from(7), &source, PositionEncoding::Utf32),
162 /// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(5) }
163 /// );
164 /// assert_eq!(
165 /// index.source_location(TextSize::from(16), &source, PositionEncoding::Utf32),
166 /// SourceLocation { line: OneIndexed::from_zero_indexed(1), character_offset: OneIndexed::from_zero_indexed(4) }
167 /// );
168 /// ```
169 ///
170 /// ## Panics
171 ///
172 /// If the UTF-8 byte offset is out of bounds of `text`.
173 pub fn source_location(
174 &self,
175 offset: TextSize,
176 text: &str,
177 encoding: PositionEncoding,
178 ) -> SourceLocation {
179 let line = self.line_index(offset);
180 let line_start = self.line_start(line, text);
181
182 let character_offset =
183 self.characters_between(TextRange::new(line_start, offset), text, encoding);
184
185 SourceLocation {
186 line,
187 character_offset: OneIndexed::from_zero_indexed(character_offset),
188 }
189 }
190
191 fn characters_between(
192 &self,
193 range: TextRange,
194 text: &str,
195 encoding: PositionEncoding,
196 ) -> usize {
197 if self.is_ascii() {
198 return (range.end() - range.start()).to_usize();
199 }
200
201 match encoding {
202 PositionEncoding::Utf8 => (range.end() - range.start()).to_usize(),
203 PositionEncoding::Utf16 => {
204 let up_to_character = &text[range];
205 up_to_character.encode_utf16().count()
206 }
207 PositionEncoding::Utf32 => {
208 let up_to_character = &text[range];
209 up_to_character.chars().count()
210 }
211 }
212 }
213
214 /// Returns the length of the line in characters, respecting the given encoding
215 pub fn line_len(&self, line: OneIndexed, text: &str, encoding: PositionEncoding) -> usize {
216 let line_range = self.line_range(line, text);
217
218 self.characters_between(line_range, text, encoding)
219 }
220
221 /// Return the number of lines in the source code.
222 pub fn line_count(&self) -> usize {
223 self.line_starts().len()
224 }
225
226 /// Returns `true` if the text only consists of ASCII characters
227 pub fn is_ascii(&self) -> bool {
228 self.kind().is_ascii()
229 }
230
231 /// Returns the row number for a given offset.
232 ///
233 /// ## Examples
234 ///
235 /// ```
236 /// # use ruff_text_size::TextSize;
237 /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn};
238 /// let source = "def a():\n pass";
239 /// let index = LineIndex::from_source_text(source);
240 ///
241 /// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0));
242 /// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0));
243 /// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1));
244 /// ```
245 ///
246 /// ## Panics
247 ///
248 /// If the offset is out of bounds.
249 pub fn line_index(&self, offset: TextSize) -> OneIndexed {
250 match self.line_starts().binary_search(&offset) {
251 // Offset is at the start of a line
252 Ok(row) => OneIndexed::from_zero_indexed(row),
253 Err(row) => {
254 // SAFETY: Safe because the index always contains an entry for the offset 0
255 OneIndexed::from_zero_indexed(row - 1)
256 }
257 }
258 }
259
260 /// Returns the [byte offset](TextSize) for the `line` with the given index.
261 pub fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
262 let row_index = line.to_zero_indexed();
263 let starts = self.line_starts();
264
265 // If start-of-line position after last line
266 if row_index == starts.len() {
267 contents.text_len()
268 } else {
269 starts[row_index]
270 }
271 }
272
273 /// Returns the [byte offset](TextSize) of the `line`'s end.
274 /// The offset is the end of the line, up to and including the newline character ending the line (if any).
275 pub fn line_end(&self, line: OneIndexed, contents: &str) -> TextSize {
276 let row_index = line.to_zero_indexed();
277 let starts = self.line_starts();
278
279 // If start-of-line position after last line
280 if row_index.saturating_add(1) >= starts.len() {
281 contents.text_len()
282 } else {
283 starts[row_index + 1]
284 }
285 }
286
287 /// Returns the [byte offset](TextSize) of the `line`'s end.
288 /// The offset is the end of the line, excluding the newline character ending the line (if any).
289 pub fn line_end_exclusive(&self, line: OneIndexed, contents: &str) -> TextSize {
290 let row_index = line.to_zero_indexed();
291 let starts = self.line_starts();
292
293 // If start-of-line position after last line
294 if row_index.saturating_add(1) >= starts.len() {
295 contents.text_len()
296 } else {
297 starts[row_index + 1] - TextSize::new(1)
298 }
299 }
300
301 /// Returns the [`TextRange`] of the `line` with the given index.
302 /// The start points to the first character's [byte offset](TextSize), the end up to, and including
303 /// the newline character ending the line (if any).
304 pub fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
305 let starts = self.line_starts();
306
307 if starts.len() == line.to_zero_indexed() {
308 TextRange::empty(contents.text_len())
309 } else {
310 TextRange::new(
311 self.line_start(line, contents),
312 self.line_start(line.saturating_add(1), contents),
313 )
314 }
315 }
316
317 /// Returns the [UTF-8 byte offset](TextSize) at `line` and `character` where character is counted using the given encoding.
318 ///
319 /// ## Examples
320 ///
321 /// ### ASCII only source text
322 ///
323 /// ```
324 /// # use ruff_source_file::{SourceLocation, LineIndex, OneIndexed, PositionEncoding};
325 /// # use ruff_text_size::TextSize;
326 /// let source = r#"a = 4
327 /// c = "some string"
328 /// x = b"#;
329 ///
330 /// let index = LineIndex::from_source_text(source);
331 ///
332 /// // First line, first character
333 /// assert_eq!(
334 /// index.offset(
335 /// SourceLocation {
336 /// line: OneIndexed::from_zero_indexed(0),
337 /// character_offset: OneIndexed::from_zero_indexed(0)
338 /// },
339 /// source,
340 /// PositionEncoding::Utf32,
341 /// ),
342 /// TextSize::new(0)
343 /// );
344 ///
345 /// assert_eq!(
346 /// index.offset(
347 /// SourceLocation {
348 /// line: OneIndexed::from_zero_indexed(1),
349 /// character_offset: OneIndexed::from_zero_indexed(4)
350 /// },
351 /// source,
352 /// PositionEncoding::Utf32,
353 /// ),
354 /// TextSize::new(10)
355 /// );
356 ///
357 /// // Offset past the end of the first line
358 /// assert_eq!(
359 /// index.offset(
360 /// SourceLocation {
361 /// line: OneIndexed::from_zero_indexed(0),
362 /// character_offset: OneIndexed::from_zero_indexed(10)
363 /// },
364 /// source,
365 /// PositionEncoding::Utf32,
366 /// ),
367 /// TextSize::new(6)
368 /// );
369 ///
370 /// // Offset past the end of the file
371 /// assert_eq!(
372 /// index.offset(
373 /// SourceLocation {
374 /// line: OneIndexed::from_zero_indexed(3),
375 /// character_offset: OneIndexed::from_zero_indexed(0)
376 /// },
377 /// source,
378 /// PositionEncoding::Utf32,
379 /// ),
380 /// TextSize::new(29)
381 /// );
382 /// ```
383 ///
384 /// ### Non-ASCII source text
385 ///
386 /// ```
387 /// use ruff_source_file::{LineIndex, OneIndexed, SourceLocation, PositionEncoding};
388 /// use ruff_text_size::TextSize;
389 /// let source = format!("\u{FEFF}{}", r#"a = 4
390 /// c = "❤️"
391 /// x = b"#);
392 ///
393 /// let index = LineIndex::from_source_text(&source);
394 ///
395 /// // First line, first character, points at the BOM
396 /// assert_eq!(
397 /// index.offset(
398 /// SourceLocation {
399 /// line: OneIndexed::from_zero_indexed(0),
400 /// character_offset: OneIndexed::from_zero_indexed(0)
401 /// },
402 /// &source,
403 /// PositionEncoding::Utf32,
404 /// ),
405 /// TextSize::new(0)
406 /// );
407 ///
408 /// // First line, after the BOM
409 /// assert_eq!(
410 /// index.offset(
411 /// SourceLocation {
412 /// line: OneIndexed::from_zero_indexed(0),
413 /// character_offset: OneIndexed::from_zero_indexed(1)
414 /// },
415 /// &source,
416 /// PositionEncoding::Utf32,
417 /// ),
418 /// TextSize::new(3)
419 /// );
420 ///
421 /// // second line, 7th character, after emoji, UTF32
422 /// assert_eq!(
423 /// index.offset(
424 /// SourceLocation {
425 /// line: OneIndexed::from_zero_indexed(1),
426 /// character_offset: OneIndexed::from_zero_indexed(7)
427 /// },
428 /// &source,
429 /// PositionEncoding::Utf32,
430 /// ),
431 /// TextSize::new(20)
432 /// );
433 ///
434 /// // Second line, 7th character, after emoji, UTF 16
435 /// assert_eq!(
436 /// index.offset(
437 /// SourceLocation {
438 /// line: OneIndexed::from_zero_indexed(1),
439 /// character_offset: OneIndexed::from_zero_indexed(7)
440 /// },
441 /// &source,
442 /// PositionEncoding::Utf16,
443 /// ),
444 /// TextSize::new(20)
445 /// );
446 ///
447 ///
448 /// // Offset past the end of the second line
449 /// assert_eq!(
450 /// index.offset(
451 /// SourceLocation {
452 /// line: OneIndexed::from_zero_indexed(1),
453 /// character_offset: OneIndexed::from_zero_indexed(10)
454 /// },
455 /// &source,
456 /// PositionEncoding::Utf32,
457 /// ),
458 /// TextSize::new(22)
459 /// );
460 ///
461 /// // Offset past the end of the file
462 /// assert_eq!(
463 /// index.offset(
464 /// SourceLocation {
465 /// line: OneIndexed::from_zero_indexed(3),
466 /// character_offset: OneIndexed::from_zero_indexed(0)
467 /// },
468 /// &source,
469 /// PositionEncoding::Utf32,
470 /// ),
471 /// TextSize::new(27)
472 /// );
473 /// ```
474 pub fn offset(
475 &self,
476 position: SourceLocation,
477 text: &str,
478 position_encoding: PositionEncoding,
479 ) -> TextSize {
480 // If start-of-line position after last line
481 if position.line.to_zero_indexed() > self.line_starts().len() {
482 return text.text_len();
483 }
484
485 let line_range = self.line_range(position.line, text);
486
487 let character_offset = position.character_offset.to_zero_indexed();
488 let character_byte_offset = if self.is_ascii() {
489 TextSize::try_from(character_offset).unwrap()
490 } else {
491 let line = &text[line_range];
492
493 match position_encoding {
494 PositionEncoding::Utf8 => {
495 TextSize::try_from(position.character_offset.to_zero_indexed()).unwrap()
496 }
497 PositionEncoding::Utf16 => {
498 let mut byte_offset = TextSize::new(0);
499 let mut utf16_code_unit_offset = 0;
500
501 for c in line.chars() {
502 if utf16_code_unit_offset >= character_offset {
503 break;
504 }
505
506 // Count characters encoded as two 16 bit words as 2 characters.
507 byte_offset += c.text_len();
508 utf16_code_unit_offset += c.len_utf16();
509 }
510
511 byte_offset
512 }
513 PositionEncoding::Utf32 => line
514 .chars()
515 .take(position.character_offset.to_zero_indexed())
516 .map(ruff_text_size::TextLen::text_len)
517 .sum(),
518 }
519 };
520
521 line_range.start() + character_byte_offset.clamp(TextSize::new(0), line_range.len())
522 }
523
524 /// Returns the [byte offsets](TextSize) for every line
525 pub fn line_starts(&self) -> &[TextSize] {
526 &self.inner.line_starts
527 }
528}
529
530impl Deref for LineIndex {
531 type Target = [TextSize];
532
533 fn deref(&self) -> &Self::Target {
534 self.line_starts()
535 }
536}
537
538impl Debug for LineIndex {
539 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
540 f.debug_list().entries(self.line_starts()).finish()
541 }
542}
543
544#[derive(Debug, Clone, Copy, Eq, PartialEq)]
545#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
546enum IndexKind {
547 /// Optimized index for an ASCII only document
548 Ascii,
549
550 /// Index for UTF8 documents
551 Utf8,
552}
553
554impl IndexKind {
555 const fn is_ascii(self) -> bool {
556 matches!(self, IndexKind::Ascii)
557 }
558}
559
560/// Type-safe wrapper for a value whose logical range starts at `1`, for
561/// instance the line or column numbers in a file
562///
563/// Internally this is represented as a [`NonZeroUsize`], this enables some
564/// memory optimizations
565#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
566#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
567pub struct OneIndexed(NonZeroUsize);
568
569impl OneIndexed {
570 /// The largest value that can be represented by this integer type
571 pub const MAX: Self = Self::new(usize::MAX).unwrap();
572 // SAFETY: These constants are being initialized with non-zero values
573 /// The smallest value that can be represented by this integer type.
574 pub const MIN: Self = Self::new(1).unwrap();
575 pub const ONE: NonZeroUsize = NonZeroUsize::new(1).unwrap();
576
577 /// Creates a non-zero if the given value is not zero.
578 pub const fn new(value: usize) -> Option<Self> {
579 match NonZeroUsize::new(value) {
580 Some(value) => Some(Self(value)),
581 None => None,
582 }
583 }
584
585 /// Construct a new [`OneIndexed`] from a zero-indexed value
586 pub const fn from_zero_indexed(value: usize) -> Self {
587 Self(Self::ONE.saturating_add(value))
588 }
589
590 /// Returns the value as a primitive type.
591 pub const fn get(self) -> usize {
592 self.0.get()
593 }
594
595 /// Return the zero-indexed primitive value for this [`OneIndexed`]
596 pub const fn to_zero_indexed(self) -> usize {
597 self.0.get() - 1
598 }
599
600 /// Saturating integer addition. Computes `self + rhs`, saturating at
601 /// the numeric bounds instead of overflowing.
602 #[must_use]
603 pub const fn saturating_add(self, rhs: usize) -> Self {
604 match NonZeroUsize::new(self.0.get().saturating_add(rhs)) {
605 Some(value) => Self(value),
606 None => Self::MAX,
607 }
608 }
609
610 /// Saturating integer subtraction. Computes `self - rhs`, saturating
611 /// at the numeric bounds instead of overflowing.
612 #[must_use]
613 pub const fn saturating_sub(self, rhs: usize) -> Self {
614 match NonZeroUsize::new(self.0.get().saturating_sub(rhs)) {
615 Some(value) => Self(value),
616 None => Self::MIN,
617 }
618 }
619
620 /// Checked addition. Returns `None` if overflow occurred.
621 #[must_use]
622 pub fn checked_add(self, rhs: Self) -> Option<Self> {
623 self.0.checked_add(rhs.0.get()).map(Self)
624 }
625
626 /// Checked subtraction. Returns `None` if overflow occurred.
627 #[must_use]
628 pub fn checked_sub(self, rhs: Self) -> Option<Self> {
629 self.0.get().checked_sub(rhs.get()).and_then(Self::new)
630 }
631
632 /// Calculate the number of digits in `self`.
633 ///
634 /// This is primarily intended for computing the length of the string representation for
635 /// formatted printing.
636 ///
637 /// # Examples
638 ///
639 /// ```
640 /// use ruff_source_file::OneIndexed;
641 ///
642 /// let one = OneIndexed::new(1).unwrap();
643 /// assert_eq!(one.digits().get(), 1);
644 ///
645 /// let hundred = OneIndexed::new(100).unwrap();
646 /// assert_eq!(hundred.digits().get(), 3);
647 ///
648 /// let thousand = OneIndexed::new(1000).unwrap();
649 /// assert_eq!(thousand.digits().get(), 4);
650 /// ```
651 pub const fn digits(self) -> NonZeroUsize {
652 // Safety: the 1+ ensures this is always non-zero, and
653 // `usize::MAX.ilog10()` << `usize::MAX`, so the result is always safe
654 // to cast to a usize, even though it's returned as a u32
655 // (u64::MAX.ilog10() is 19).
656 NonZeroUsize::new(1 + self.0.get().ilog10() as usize).unwrap()
657 }
658}
659
660impl Default for OneIndexed {
661 fn default() -> Self {
662 Self::MIN
663 }
664}
665
666impl fmt::Display for OneIndexed {
667 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
668 std::fmt::Debug::fmt(&self.0.get(), f)
669 }
670}
671
672impl FromStr for OneIndexed {
673 type Err = ParseIntError;
674 fn from_str(s: &str) -> Result<Self, Self::Err> {
675 Ok(OneIndexed(NonZeroUsize::from_str(s)?))
676 }
677}
678
679#[derive(Copy, Clone, Debug)]
680pub enum PositionEncoding {
681 /// Character offsets count the number of bytes from the start of the line.
682 Utf8,
683
684 /// Character offsets count the number of UTF-16 code units from the start of the line.
685 Utf16,
686
687 /// Character offsets count the number of UTF-32 code points units (the same as number of characters in Rust)
688 /// from the start of the line.
689 Utf32,
690}
691
692#[cfg(test)]
693mod tests {
694 use ruff_text_size::TextSize;
695
696 use crate::line_index::LineIndex;
697 use crate::{LineColumn, OneIndexed};
698
699 #[test]
700 fn ascii_index() {
701 let index = LineIndex::from_source_text("");
702 assert_eq!(index.line_starts(), &[TextSize::from(0)]);
703
704 let index = LineIndex::from_source_text("x = 1");
705 assert_eq!(index.line_starts(), &[TextSize::from(0)]);
706
707 let index = LineIndex::from_source_text("x = 1\n");
708 assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
709
710 let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n");
711 assert_eq!(
712 index.line_starts(),
713 &[
714 TextSize::from(0),
715 TextSize::from(6),
716 TextSize::from(12),
717 TextSize::from(22)
718 ]
719 );
720 }
721
722 #[test]
723 fn ascii_source_location() {
724 let contents = "x = 1\ny = 2";
725 let index = LineIndex::from_source_text(contents);
726
727 // First row.
728 let loc = index.line_column(TextSize::from(2), contents);
729 assert_eq!(
730 loc,
731 LineColumn {
732 line: OneIndexed::from_zero_indexed(0),
733 column: OneIndexed::from_zero_indexed(2)
734 }
735 );
736
737 // Second row.
738 let loc = index.line_column(TextSize::from(6), contents);
739 assert_eq!(
740 loc,
741 LineColumn {
742 line: OneIndexed::from_zero_indexed(1),
743 column: OneIndexed::from_zero_indexed(0)
744 }
745 );
746
747 let loc = index.line_column(TextSize::from(11), contents);
748 assert_eq!(
749 loc,
750 LineColumn {
751 line: OneIndexed::from_zero_indexed(1),
752 column: OneIndexed::from_zero_indexed(5)
753 }
754 );
755 }
756
757 #[test]
758 fn ascii_carriage_return() {
759 let contents = "x = 4\ry = 3";
760 let index = LineIndex::from_source_text(contents);
761 assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
762
763 assert_eq!(
764 index.line_column(TextSize::from(4), contents),
765 LineColumn {
766 line: OneIndexed::from_zero_indexed(0),
767 column: OneIndexed::from_zero_indexed(4)
768 }
769 );
770 assert_eq!(
771 index.line_column(TextSize::from(6), contents),
772 LineColumn {
773 line: OneIndexed::from_zero_indexed(1),
774 column: OneIndexed::from_zero_indexed(0)
775 }
776 );
777 assert_eq!(
778 index.line_column(TextSize::from(7), contents),
779 LineColumn {
780 line: OneIndexed::from_zero_indexed(1),
781 column: OneIndexed::from_zero_indexed(1)
782 }
783 );
784 }
785
786 #[test]
787 fn ascii_carriage_return_newline() {
788 let contents = "x = 4\r\ny = 3";
789 let index = LineIndex::from_source_text(contents);
790 assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
791
792 assert_eq!(
793 index.line_column(TextSize::from(4), contents),
794 LineColumn {
795 line: OneIndexed::from_zero_indexed(0),
796 column: OneIndexed::from_zero_indexed(4)
797 }
798 );
799 assert_eq!(
800 index.line_column(TextSize::from(7), contents),
801 LineColumn {
802 line: OneIndexed::from_zero_indexed(1),
803 column: OneIndexed::from_zero_indexed(0)
804 }
805 );
806 assert_eq!(
807 index.line_column(TextSize::from(8), contents),
808 LineColumn {
809 line: OneIndexed::from_zero_indexed(1),
810 column: OneIndexed::from_zero_indexed(1)
811 }
812 );
813 }
814
815 #[test]
816 fn utf8_index() {
817 let index = LineIndex::from_source_text("x = '🫣'");
818 assert_eq!(index.line_count(), 1);
819 assert_eq!(index.line_starts(), &[TextSize::from(0)]);
820
821 let index = LineIndex::from_source_text("x = '🫣'\n");
822 assert_eq!(index.line_count(), 2);
823 assert_eq!(
824 index.line_starts(),
825 &[TextSize::from(0), TextSize::from(11)]
826 );
827
828 let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
829 assert_eq!(index.line_count(), 4);
830 assert_eq!(
831 index.line_starts(),
832 &[
833 TextSize::from(0),
834 TextSize::from(11),
835 TextSize::from(17),
836 TextSize::from(27)
837 ]
838 );
839
840 let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
841 assert_eq!(index.line_count(), 3);
842 assert_eq!(
843 index.line_starts(),
844 &[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
845 );
846 }
847
848 #[test]
849 fn utf8_carriage_return() {
850 let contents = "x = '🫣'\ry = 3";
851 let index = LineIndex::from_source_text(contents);
852 assert_eq!(index.line_count(), 2);
853 assert_eq!(
854 index.line_starts(),
855 &[TextSize::from(0), TextSize::from(11)]
856 );
857
858 // Second '
859 assert_eq!(
860 index.line_column(TextSize::from(9), contents),
861 LineColumn {
862 line: OneIndexed::from_zero_indexed(0),
863 column: OneIndexed::from_zero_indexed(6)
864 }
865 );
866 assert_eq!(
867 index.line_column(TextSize::from(11), contents),
868 LineColumn {
869 line: OneIndexed::from_zero_indexed(1),
870 column: OneIndexed::from_zero_indexed(0)
871 }
872 );
873 assert_eq!(
874 index.line_column(TextSize::from(12), contents),
875 LineColumn {
876 line: OneIndexed::from_zero_indexed(1),
877 column: OneIndexed::from_zero_indexed(1)
878 }
879 );
880 }
881
882 #[test]
883 fn utf8_carriage_return_newline() {
884 let contents = "x = '🫣'\r\ny = 3";
885 let index = LineIndex::from_source_text(contents);
886 assert_eq!(index.line_count(), 2);
887 assert_eq!(
888 index.line_starts(),
889 &[TextSize::from(0), TextSize::from(12)]
890 );
891
892 // Second '
893 assert_eq!(
894 index.line_column(TextSize::from(9), contents),
895 LineColumn {
896 line: OneIndexed::from_zero_indexed(0),
897 column: OneIndexed::from_zero_indexed(6)
898 }
899 );
900 assert_eq!(
901 index.line_column(TextSize::from(12), contents),
902 LineColumn {
903 line: OneIndexed::from_zero_indexed(1),
904 column: OneIndexed::from_zero_indexed(0)
905 }
906 );
907 assert_eq!(
908 index.line_column(TextSize::from(13), contents),
909 LineColumn {
910 line: OneIndexed::from_zero_indexed(1),
911 column: OneIndexed::from_zero_indexed(1)
912 }
913 );
914 }
915
916 #[test]
917 fn utf8_byte_offset() {
918 let contents = "x = '☃'\ny = 2";
919 let index = LineIndex::from_source_text(contents);
920 assert_eq!(
921 index.line_starts(),
922 &[TextSize::from(0), TextSize::from(10)]
923 );
924
925 // First row.
926 let loc = index.line_column(TextSize::from(0), contents);
927 assert_eq!(
928 loc,
929 LineColumn {
930 line: OneIndexed::from_zero_indexed(0),
931 column: OneIndexed::from_zero_indexed(0)
932 }
933 );
934
935 let loc = index.line_column(TextSize::from(5), contents);
936 assert_eq!(
937 loc,
938 LineColumn {
939 line: OneIndexed::from_zero_indexed(0),
940 column: OneIndexed::from_zero_indexed(5)
941 }
942 );
943
944 let loc = index.line_column(TextSize::from(8), contents);
945 assert_eq!(
946 loc,
947 LineColumn {
948 line: OneIndexed::from_zero_indexed(0),
949 column: OneIndexed::from_zero_indexed(6)
950 }
951 );
952
953 // Second row.
954 let loc = index.line_column(TextSize::from(10), contents);
955 assert_eq!(
956 loc,
957 LineColumn {
958 line: OneIndexed::from_zero_indexed(1),
959 column: OneIndexed::from_zero_indexed(0)
960 }
961 );
962
963 // One-past-the-end.
964 let loc = index.line_column(TextSize::from(15), contents);
965 assert_eq!(
966 loc,
967 LineColumn {
968 line: OneIndexed::from_zero_indexed(1),
969 column: OneIndexed::from_zero_indexed(5)
970 }
971 );
972 }
973}