rustpython_ruff_source_file/
line_index.rs

1use std::fmt;
2use std::fmt::{Debug, Formatter};
3use std::num::{NonZeroUsize, ParseIntError};
4use std::ops::Deref;
5use std::str::FromStr;
6use std::sync::Arc;
7
8use crate::{LineColumn, SourceLocation};
9use ruff_text_size::{TextLen, TextRange, TextSize};
10#[cfg(feature = "serde")]
11use serde::{Deserialize, Serialize};
12
13/// Index for fast [byte offset](TextSize) to [`LineColumn`] conversions.
14///
15/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
16#[derive(Clone, Eq, PartialEq)]
17#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
18pub struct LineIndex {
19    inner: Arc<LineIndexInner>,
20}
21
22#[derive(Eq, PartialEq)]
23#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
24struct LineIndexInner {
25    line_starts: Vec<TextSize>,
26    kind: IndexKind,
27}
28
29impl LineIndex {
30    /// Builds the [`LineIndex`] from the source text of a file.
31    pub fn from_source_text(text: &str) -> Self {
32        let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
33        line_starts.push(TextSize::default());
34
35        let bytes = text.as_bytes();
36
37        assert!(u32::try_from(bytes.len()).is_ok());
38
39        for i in memchr::memchr2_iter(b'\n', b'\r', bytes) {
40            // Skip `\r` in `\r\n` sequences (only count the `\n`).
41            if bytes[i] == b'\r' && bytes.get(i + 1) == Some(&b'\n') {
42                continue;
43            }
44            // SAFETY: Assertion above guarantees `i <= u32::MAX`
45            #[expect(clippy::cast_possible_truncation)]
46            line_starts.push(TextSize::from(i as u32) + TextSize::from(1));
47        }
48
49        // Determine whether the source text is ASCII.
50        //
51        // Empirically, this simple loop is auto-vectorized by LLVM and benchmarks faster than both
52        // `str::is_ascii()` and hand-written SIMD.
53        let mut has_non_ascii = false;
54        for byte in bytes {
55            has_non_ascii |= !byte.is_ascii();
56        }
57
58        let kind = if has_non_ascii {
59            IndexKind::Utf8
60        } else {
61            IndexKind::Ascii
62        };
63
64        Self {
65            inner: Arc::new(LineIndexInner { line_starts, kind }),
66        }
67    }
68
69    fn kind(&self) -> IndexKind {
70        self.inner.kind
71    }
72
73    /// Returns the line and column number for an UTF-8 byte offset.
74    ///
75    /// The `column` number is the nth-character of the line, except for the first line
76    /// where it doesn't include the UTF-8 BOM marker at the start of the file.
77    ///
78    /// ### BOM handling
79    ///
80    /// For files starting with a UTF-8 BOM marker, the byte offsets
81    /// in the range `0...3` are all mapped to line 0 and column 0.
82    /// Because of this, the conversion isn't losless.
83    ///
84    /// ## Examples
85    ///
86    /// ```
87    /// # use ruff_text_size::TextSize;
88    /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn};
89    /// let source = format!("\u{FEFF}{}", "def a():\n    pass");
90    /// let index = LineIndex::from_source_text(&source);
91    ///
92    /// // Before BOM, maps to after BOM
93    /// assert_eq!(
94    ///     index.line_column(TextSize::from(0), &source),
95    ///     LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
96    /// );
97    ///
98    /// // After BOM, maps to after BOM
99    /// assert_eq!(
100    ///     index.line_column(TextSize::from(3), &source),
101    ///     LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
102    /// );
103    ///
104    /// assert_eq!(
105    ///     index.line_column(TextSize::from(7), &source),
106    ///     LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
107    /// );
108    /// assert_eq!(
109    ///     index.line_column(TextSize::from(16), &source),
110    ///     LineColumn { line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
111    /// );
112    /// ```
113    ///
114    /// ## Panics
115    ///
116    /// If the byte offset isn't within the bounds of `content`.
117    pub fn line_column(&self, offset: TextSize, content: &str) -> LineColumn {
118        let location = self.source_location(offset, content, PositionEncoding::Utf32);
119
120        // Don't count the BOM character as a column, but only on the first line.
121        let column = if location.line.to_zero_indexed() == 0 && content.starts_with('\u{feff}') {
122            location.character_offset.saturating_sub(1)
123        } else {
124            location.character_offset
125        };
126
127        LineColumn {
128            line: location.line,
129            column,
130        }
131    }
132
133    /// Given a UTF-8 byte offset, returns the line and character offset according to the given encoding.
134    ///
135    /// ### BOM handling
136    ///
137    /// Unlike [`Self::line_column`], this method does not skip the BOM character at the start of the file.
138    /// This allows for bidirectional mapping between [`SourceLocation`] and [`TextSize`] (see [`Self::offset`]).
139    ///
140    /// ## Examples
141    ///
142    /// ```
143    /// # use ruff_text_size::TextSize;
144    /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn, SourceLocation, PositionEncoding, Line};
145    /// let source = format!("\u{FEFF}{}", "def a():\n    pass");
146    /// let index = LineIndex::from_source_text(&source);
147    ///
148    /// // Before BOM, maps to character 0
149    /// assert_eq!(
150    ///     index.source_location(TextSize::from(0), &source, PositionEncoding::Utf32),
151    ///     SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(0) }
152    /// );
153    ///
154    /// // After BOM, maps to after BOM
155    /// assert_eq!(
156    ///     index.source_location(TextSize::from(3), &source, PositionEncoding::Utf32),
157    ///     SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(1) }
158    /// );
159    ///
160    /// assert_eq!(
161    ///     index.source_location(TextSize::from(7), &source, PositionEncoding::Utf32),
162    ///     SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(5) }
163    /// );
164    /// assert_eq!(
165    ///     index.source_location(TextSize::from(16), &source, PositionEncoding::Utf32),
166    ///     SourceLocation { line: OneIndexed::from_zero_indexed(1), character_offset: OneIndexed::from_zero_indexed(4) }
167    /// );
168    /// ```
169    ///
170    /// ## Panics
171    ///
172    /// If the UTF-8 byte offset is out of bounds of `text`.
173    pub fn source_location(
174        &self,
175        offset: TextSize,
176        text: &str,
177        encoding: PositionEncoding,
178    ) -> SourceLocation {
179        let line = self.line_index(offset);
180        let line_start = self.line_start(line, text);
181
182        let character_offset =
183            self.characters_between(TextRange::new(line_start, offset), text, encoding);
184
185        SourceLocation {
186            line,
187            character_offset: OneIndexed::from_zero_indexed(character_offset),
188        }
189    }
190
191    fn characters_between(
192        &self,
193        range: TextRange,
194        text: &str,
195        encoding: PositionEncoding,
196    ) -> usize {
197        if self.is_ascii() {
198            return (range.end() - range.start()).to_usize();
199        }
200
201        match encoding {
202            PositionEncoding::Utf8 => (range.end() - range.start()).to_usize(),
203            PositionEncoding::Utf16 => {
204                let up_to_character = &text[range];
205                up_to_character.encode_utf16().count()
206            }
207            PositionEncoding::Utf32 => {
208                let up_to_character = &text[range];
209                up_to_character.chars().count()
210            }
211        }
212    }
213
214    /// Returns the length of the line in characters, respecting the given encoding
215    pub fn line_len(&self, line: OneIndexed, text: &str, encoding: PositionEncoding) -> usize {
216        let line_range = self.line_range(line, text);
217
218        self.characters_between(line_range, text, encoding)
219    }
220
221    /// Return the number of lines in the source code.
222    pub fn line_count(&self) -> usize {
223        self.line_starts().len()
224    }
225
226    /// Returns `true` if the text only consists of ASCII characters
227    pub fn is_ascii(&self) -> bool {
228        self.kind().is_ascii()
229    }
230
231    /// Returns the row number for a given offset.
232    ///
233    /// ## Examples
234    ///
235    /// ```
236    /// # use ruff_text_size::TextSize;
237    /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn};
238    /// let source = "def a():\n    pass";
239    /// let index = LineIndex::from_source_text(source);
240    ///
241    /// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0));
242    /// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0));
243    /// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1));
244    /// ```
245    ///
246    /// ## Panics
247    ///
248    /// If the offset is out of bounds.
249    pub fn line_index(&self, offset: TextSize) -> OneIndexed {
250        match self.line_starts().binary_search(&offset) {
251            // Offset is at the start of a line
252            Ok(row) => OneIndexed::from_zero_indexed(row),
253            Err(row) => {
254                // SAFETY: Safe because the index always contains an entry for the offset 0
255                OneIndexed::from_zero_indexed(row - 1)
256            }
257        }
258    }
259
260    /// Returns the [byte offset](TextSize) for the `line` with the given index.
261    pub fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
262        let row_index = line.to_zero_indexed();
263        let starts = self.line_starts();
264
265        // If start-of-line position after last line
266        if row_index == starts.len() {
267            contents.text_len()
268        } else {
269            starts[row_index]
270        }
271    }
272
273    /// Returns the [byte offset](TextSize) of the `line`'s end.
274    /// The offset is the end of the line, up to and including the newline character ending the line (if any).
275    pub fn line_end(&self, line: OneIndexed, contents: &str) -> TextSize {
276        let row_index = line.to_zero_indexed();
277        let starts = self.line_starts();
278
279        // If start-of-line position after last line
280        if row_index.saturating_add(1) >= starts.len() {
281            contents.text_len()
282        } else {
283            starts[row_index + 1]
284        }
285    }
286
287    /// Returns the [byte offset](TextSize) of the `line`'s end.
288    /// The offset is the end of the line, excluding the newline character ending the line (if any).
289    pub fn line_end_exclusive(&self, line: OneIndexed, contents: &str) -> TextSize {
290        let row_index = line.to_zero_indexed();
291        let starts = self.line_starts();
292
293        // If start-of-line position after last line
294        if row_index.saturating_add(1) >= starts.len() {
295            contents.text_len()
296        } else {
297            starts[row_index + 1] - TextSize::new(1)
298        }
299    }
300
301    /// Returns the [`TextRange`] of the `line` with the given index.
302    /// The start points to the first character's [byte offset](TextSize), the end up to, and including
303    /// the newline character ending the line (if any).
304    pub fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
305        let starts = self.line_starts();
306
307        if starts.len() == line.to_zero_indexed() {
308            TextRange::empty(contents.text_len())
309        } else {
310            TextRange::new(
311                self.line_start(line, contents),
312                self.line_start(line.saturating_add(1), contents),
313            )
314        }
315    }
316
317    /// Returns the [UTF-8 byte offset](TextSize) at `line` and `character` where character is counted using the given encoding.
318    ///
319    /// ## Examples
320    ///
321    /// ### ASCII only source text
322    ///
323    /// ```
324    /// # use ruff_source_file::{SourceLocation, LineIndex, OneIndexed, PositionEncoding};
325    /// # use ruff_text_size::TextSize;
326    /// let source = r#"a = 4
327    /// c = "some string"
328    /// x = b"#;
329    ///
330    /// let index = LineIndex::from_source_text(source);
331    ///
332    /// // First line, first character
333    /// assert_eq!(
334    ///     index.offset(
335    ///         SourceLocation {
336    ///             line: OneIndexed::from_zero_indexed(0),
337    ///             character_offset: OneIndexed::from_zero_indexed(0)
338    ///         },
339    ///         source,
340    ///         PositionEncoding::Utf32,
341    ///     ),
342    ///     TextSize::new(0)
343    ///  );
344    ///
345    /// assert_eq!(
346    ///     index.offset(
347    ///         SourceLocation {
348    ///             line: OneIndexed::from_zero_indexed(1),
349    ///             character_offset: OneIndexed::from_zero_indexed(4)
350    ///         },
351    ///         source,
352    ///         PositionEncoding::Utf32,
353    ///     ),
354    ///     TextSize::new(10)
355    ///  );
356    ///
357    /// // Offset past the end of the first line
358    /// assert_eq!(
359    ///     index.offset(
360    ///         SourceLocation {
361    ///             line: OneIndexed::from_zero_indexed(0),
362    ///             character_offset: OneIndexed::from_zero_indexed(10)
363    ///         },
364    ///         source,
365    ///         PositionEncoding::Utf32,
366    ///     ),
367    ///     TextSize::new(6)
368    ///  );
369    ///
370    /// // Offset past the end of the file
371    /// assert_eq!(
372    ///     index.offset(
373    ///         SourceLocation {
374    ///             line: OneIndexed::from_zero_indexed(3),
375    ///             character_offset: OneIndexed::from_zero_indexed(0)
376    ///         },
377    ///         source,
378    ///         PositionEncoding::Utf32,
379    ///     ),
380    ///     TextSize::new(29)
381    ///  );
382    /// ```
383    ///
384    /// ### Non-ASCII source text
385    ///
386    /// ```
387    /// use ruff_source_file::{LineIndex, OneIndexed, SourceLocation, PositionEncoding};
388    /// use ruff_text_size::TextSize;
389    /// let source = format!("\u{FEFF}{}", r#"a = 4
390    /// c = "❤️"
391    /// x = b"#);
392    ///
393    /// let index = LineIndex::from_source_text(&source);
394    ///
395    /// // First line, first character, points at the BOM
396    /// assert_eq!(
397    ///     index.offset(
398    ///         SourceLocation {
399    ///             line: OneIndexed::from_zero_indexed(0),
400    ///             character_offset: OneIndexed::from_zero_indexed(0)
401    ///         },
402    ///         &source,
403    ///         PositionEncoding::Utf32,
404    ///     ),
405    ///     TextSize::new(0)
406    ///  );
407    ///
408    /// // First line, after the BOM
409    /// assert_eq!(
410    ///     index.offset(
411    ///         SourceLocation {
412    ///             line: OneIndexed::from_zero_indexed(0),
413    ///             character_offset: OneIndexed::from_zero_indexed(1)
414    ///         },
415    ///         &source,
416    ///         PositionEncoding::Utf32,
417    ///     ),
418    ///     TextSize::new(3)
419    ///  );
420    ///
421    /// // second line, 7th character, after emoji, UTF32
422    /// assert_eq!(
423    ///     index.offset(
424    ///         SourceLocation {
425    ///             line: OneIndexed::from_zero_indexed(1),
426    ///             character_offset: OneIndexed::from_zero_indexed(7)
427    ///         },
428    ///         &source,
429    ///         PositionEncoding::Utf32,
430    ///     ),
431    ///     TextSize::new(20)
432    ///  );
433    ///
434    /// // Second line, 7th character, after emoji, UTF 16
435    /// assert_eq!(
436    ///     index.offset(
437    ///         SourceLocation {
438    ///             line: OneIndexed::from_zero_indexed(1),
439    ///             character_offset: OneIndexed::from_zero_indexed(7)
440    ///         },
441    ///         &source,
442    ///         PositionEncoding::Utf16,
443    ///     ),
444    ///     TextSize::new(20)
445    ///  );
446    ///
447    ///
448    /// // Offset past the end of the second line
449    /// assert_eq!(
450    ///     index.offset(
451    ///         SourceLocation {
452    ///             line: OneIndexed::from_zero_indexed(1),
453    ///             character_offset: OneIndexed::from_zero_indexed(10)
454    ///         },
455    ///         &source,
456    ///         PositionEncoding::Utf32,
457    ///     ),
458    ///     TextSize::new(22)
459    ///  );
460    ///
461    /// // Offset past the end of the file
462    /// assert_eq!(
463    ///     index.offset(
464    ///         SourceLocation {
465    ///             line: OneIndexed::from_zero_indexed(3),
466    ///             character_offset: OneIndexed::from_zero_indexed(0)
467    ///         },
468    ///         &source,
469    ///         PositionEncoding::Utf32,
470    ///     ),
471    ///     TextSize::new(27)
472    ///  );
473    /// ```
474    pub fn offset(
475        &self,
476        position: SourceLocation,
477        text: &str,
478        position_encoding: PositionEncoding,
479    ) -> TextSize {
480        // If start-of-line position after last line
481        if position.line.to_zero_indexed() > self.line_starts().len() {
482            return text.text_len();
483        }
484
485        let line_range = self.line_range(position.line, text);
486
487        let character_offset = position.character_offset.to_zero_indexed();
488        let character_byte_offset = if self.is_ascii() {
489            TextSize::try_from(character_offset).unwrap()
490        } else {
491            let line = &text[line_range];
492
493            match position_encoding {
494                PositionEncoding::Utf8 => {
495                    TextSize::try_from(position.character_offset.to_zero_indexed()).unwrap()
496                }
497                PositionEncoding::Utf16 => {
498                    let mut byte_offset = TextSize::new(0);
499                    let mut utf16_code_unit_offset = 0;
500
501                    for c in line.chars() {
502                        if utf16_code_unit_offset >= character_offset {
503                            break;
504                        }
505
506                        // Count characters encoded as two 16 bit words as 2 characters.
507                        byte_offset += c.text_len();
508                        utf16_code_unit_offset += c.len_utf16();
509                    }
510
511                    byte_offset
512                }
513                PositionEncoding::Utf32 => line
514                    .chars()
515                    .take(position.character_offset.to_zero_indexed())
516                    .map(ruff_text_size::TextLen::text_len)
517                    .sum(),
518            }
519        };
520
521        line_range.start() + character_byte_offset.clamp(TextSize::new(0), line_range.len())
522    }
523
524    /// Returns the [byte offsets](TextSize) for every line
525    pub fn line_starts(&self) -> &[TextSize] {
526        &self.inner.line_starts
527    }
528}
529
530impl Deref for LineIndex {
531    type Target = [TextSize];
532
533    fn deref(&self) -> &Self::Target {
534        self.line_starts()
535    }
536}
537
538impl Debug for LineIndex {
539    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
540        f.debug_list().entries(self.line_starts()).finish()
541    }
542}
543
544#[derive(Debug, Clone, Copy, Eq, PartialEq)]
545#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
546enum IndexKind {
547    /// Optimized index for an ASCII only document
548    Ascii,
549
550    /// Index for UTF8 documents
551    Utf8,
552}
553
554impl IndexKind {
555    const fn is_ascii(self) -> bool {
556        matches!(self, IndexKind::Ascii)
557    }
558}
559
560/// Type-safe wrapper for a value whose logical range starts at `1`, for
561/// instance the line or column numbers in a file
562///
563/// Internally this is represented as a [`NonZeroUsize`], this enables some
564/// memory optimizations
565#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
566#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
567pub struct OneIndexed(NonZeroUsize);
568
569impl OneIndexed {
570    /// The largest value that can be represented by this integer type
571    pub const MAX: Self = Self::new(usize::MAX).unwrap();
572    // SAFETY: These constants are being initialized with non-zero values
573    /// The smallest value that can be represented by this integer type.
574    pub const MIN: Self = Self::new(1).unwrap();
575    pub const ONE: NonZeroUsize = NonZeroUsize::new(1).unwrap();
576
577    /// Creates a non-zero if the given value is not zero.
578    pub const fn new(value: usize) -> Option<Self> {
579        match NonZeroUsize::new(value) {
580            Some(value) => Some(Self(value)),
581            None => None,
582        }
583    }
584
585    /// Construct a new [`OneIndexed`] from a zero-indexed value
586    pub const fn from_zero_indexed(value: usize) -> Self {
587        Self(Self::ONE.saturating_add(value))
588    }
589
590    /// Returns the value as a primitive type.
591    pub const fn get(self) -> usize {
592        self.0.get()
593    }
594
595    /// Return the zero-indexed primitive value for this [`OneIndexed`]
596    pub const fn to_zero_indexed(self) -> usize {
597        self.0.get() - 1
598    }
599
600    /// Saturating integer addition. Computes `self + rhs`, saturating at
601    /// the numeric bounds instead of overflowing.
602    #[must_use]
603    pub const fn saturating_add(self, rhs: usize) -> Self {
604        match NonZeroUsize::new(self.0.get().saturating_add(rhs)) {
605            Some(value) => Self(value),
606            None => Self::MAX,
607        }
608    }
609
610    /// Saturating integer subtraction. Computes `self - rhs`, saturating
611    /// at the numeric bounds instead of overflowing.
612    #[must_use]
613    pub const fn saturating_sub(self, rhs: usize) -> Self {
614        match NonZeroUsize::new(self.0.get().saturating_sub(rhs)) {
615            Some(value) => Self(value),
616            None => Self::MIN,
617        }
618    }
619
620    /// Checked addition. Returns `None` if overflow occurred.
621    #[must_use]
622    pub fn checked_add(self, rhs: Self) -> Option<Self> {
623        self.0.checked_add(rhs.0.get()).map(Self)
624    }
625
626    /// Checked subtraction. Returns `None` if overflow occurred.
627    #[must_use]
628    pub fn checked_sub(self, rhs: Self) -> Option<Self> {
629        self.0.get().checked_sub(rhs.get()).and_then(Self::new)
630    }
631
632    /// Calculate the number of digits in `self`.
633    ///
634    /// This is primarily intended for computing the length of the string representation for
635    /// formatted printing.
636    ///
637    /// # Examples
638    ///
639    /// ```
640    /// use ruff_source_file::OneIndexed;
641    ///
642    /// let one = OneIndexed::new(1).unwrap();
643    /// assert_eq!(one.digits().get(), 1);
644    ///
645    /// let hundred = OneIndexed::new(100).unwrap();
646    /// assert_eq!(hundred.digits().get(), 3);
647    ///
648    /// let thousand = OneIndexed::new(1000).unwrap();
649    /// assert_eq!(thousand.digits().get(), 4);
650    /// ```
651    pub const fn digits(self) -> NonZeroUsize {
652        // Safety: the 1+ ensures this is always non-zero, and
653        // `usize::MAX.ilog10()` << `usize::MAX`, so the result is always safe
654        // to cast to a usize, even though it's returned as a u32
655        // (u64::MAX.ilog10() is 19).
656        NonZeroUsize::new(1 + self.0.get().ilog10() as usize).unwrap()
657    }
658}
659
660impl Default for OneIndexed {
661    fn default() -> Self {
662        Self::MIN
663    }
664}
665
666impl fmt::Display for OneIndexed {
667    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
668        std::fmt::Debug::fmt(&self.0.get(), f)
669    }
670}
671
672impl FromStr for OneIndexed {
673    type Err = ParseIntError;
674    fn from_str(s: &str) -> Result<Self, Self::Err> {
675        Ok(OneIndexed(NonZeroUsize::from_str(s)?))
676    }
677}
678
679#[derive(Copy, Clone, Debug)]
680pub enum PositionEncoding {
681    /// Character offsets count the number of bytes from the start of the line.
682    Utf8,
683
684    /// Character offsets count the number of UTF-16 code units from the start of the line.
685    Utf16,
686
687    /// Character offsets count the number of UTF-32 code points units (the same as number of characters in Rust)
688    /// from the start of the line.
689    Utf32,
690}
691
692#[cfg(test)]
693mod tests {
694    use ruff_text_size::TextSize;
695
696    use crate::line_index::LineIndex;
697    use crate::{LineColumn, OneIndexed};
698
699    #[test]
700    fn ascii_index() {
701        let index = LineIndex::from_source_text("");
702        assert_eq!(index.line_starts(), &[TextSize::from(0)]);
703
704        let index = LineIndex::from_source_text("x = 1");
705        assert_eq!(index.line_starts(), &[TextSize::from(0)]);
706
707        let index = LineIndex::from_source_text("x = 1\n");
708        assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
709
710        let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n");
711        assert_eq!(
712            index.line_starts(),
713            &[
714                TextSize::from(0),
715                TextSize::from(6),
716                TextSize::from(12),
717                TextSize::from(22)
718            ]
719        );
720    }
721
722    #[test]
723    fn ascii_source_location() {
724        let contents = "x = 1\ny = 2";
725        let index = LineIndex::from_source_text(contents);
726
727        // First row.
728        let loc = index.line_column(TextSize::from(2), contents);
729        assert_eq!(
730            loc,
731            LineColumn {
732                line: OneIndexed::from_zero_indexed(0),
733                column: OneIndexed::from_zero_indexed(2)
734            }
735        );
736
737        // Second row.
738        let loc = index.line_column(TextSize::from(6), contents);
739        assert_eq!(
740            loc,
741            LineColumn {
742                line: OneIndexed::from_zero_indexed(1),
743                column: OneIndexed::from_zero_indexed(0)
744            }
745        );
746
747        let loc = index.line_column(TextSize::from(11), contents);
748        assert_eq!(
749            loc,
750            LineColumn {
751                line: OneIndexed::from_zero_indexed(1),
752                column: OneIndexed::from_zero_indexed(5)
753            }
754        );
755    }
756
757    #[test]
758    fn ascii_carriage_return() {
759        let contents = "x = 4\ry = 3";
760        let index = LineIndex::from_source_text(contents);
761        assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
762
763        assert_eq!(
764            index.line_column(TextSize::from(4), contents),
765            LineColumn {
766                line: OneIndexed::from_zero_indexed(0),
767                column: OneIndexed::from_zero_indexed(4)
768            }
769        );
770        assert_eq!(
771            index.line_column(TextSize::from(6), contents),
772            LineColumn {
773                line: OneIndexed::from_zero_indexed(1),
774                column: OneIndexed::from_zero_indexed(0)
775            }
776        );
777        assert_eq!(
778            index.line_column(TextSize::from(7), contents),
779            LineColumn {
780                line: OneIndexed::from_zero_indexed(1),
781                column: OneIndexed::from_zero_indexed(1)
782            }
783        );
784    }
785
786    #[test]
787    fn ascii_carriage_return_newline() {
788        let contents = "x = 4\r\ny = 3";
789        let index = LineIndex::from_source_text(contents);
790        assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
791
792        assert_eq!(
793            index.line_column(TextSize::from(4), contents),
794            LineColumn {
795                line: OneIndexed::from_zero_indexed(0),
796                column: OneIndexed::from_zero_indexed(4)
797            }
798        );
799        assert_eq!(
800            index.line_column(TextSize::from(7), contents),
801            LineColumn {
802                line: OneIndexed::from_zero_indexed(1),
803                column: OneIndexed::from_zero_indexed(0)
804            }
805        );
806        assert_eq!(
807            index.line_column(TextSize::from(8), contents),
808            LineColumn {
809                line: OneIndexed::from_zero_indexed(1),
810                column: OneIndexed::from_zero_indexed(1)
811            }
812        );
813    }
814
815    #[test]
816    fn utf8_index() {
817        let index = LineIndex::from_source_text("x = '🫣'");
818        assert_eq!(index.line_count(), 1);
819        assert_eq!(index.line_starts(), &[TextSize::from(0)]);
820
821        let index = LineIndex::from_source_text("x = '🫣'\n");
822        assert_eq!(index.line_count(), 2);
823        assert_eq!(
824            index.line_starts(),
825            &[TextSize::from(0), TextSize::from(11)]
826        );
827
828        let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
829        assert_eq!(index.line_count(), 4);
830        assert_eq!(
831            index.line_starts(),
832            &[
833                TextSize::from(0),
834                TextSize::from(11),
835                TextSize::from(17),
836                TextSize::from(27)
837            ]
838        );
839
840        let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n    \"\"\".\"\"\"");
841        assert_eq!(index.line_count(), 3);
842        assert_eq!(
843            index.line_starts(),
844            &[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
845        );
846    }
847
848    #[test]
849    fn utf8_carriage_return() {
850        let contents = "x = '🫣'\ry = 3";
851        let index = LineIndex::from_source_text(contents);
852        assert_eq!(index.line_count(), 2);
853        assert_eq!(
854            index.line_starts(),
855            &[TextSize::from(0), TextSize::from(11)]
856        );
857
858        // Second '
859        assert_eq!(
860            index.line_column(TextSize::from(9), contents),
861            LineColumn {
862                line: OneIndexed::from_zero_indexed(0),
863                column: OneIndexed::from_zero_indexed(6)
864            }
865        );
866        assert_eq!(
867            index.line_column(TextSize::from(11), contents),
868            LineColumn {
869                line: OneIndexed::from_zero_indexed(1),
870                column: OneIndexed::from_zero_indexed(0)
871            }
872        );
873        assert_eq!(
874            index.line_column(TextSize::from(12), contents),
875            LineColumn {
876                line: OneIndexed::from_zero_indexed(1),
877                column: OneIndexed::from_zero_indexed(1)
878            }
879        );
880    }
881
882    #[test]
883    fn utf8_carriage_return_newline() {
884        let contents = "x = '🫣'\r\ny = 3";
885        let index = LineIndex::from_source_text(contents);
886        assert_eq!(index.line_count(), 2);
887        assert_eq!(
888            index.line_starts(),
889            &[TextSize::from(0), TextSize::from(12)]
890        );
891
892        // Second '
893        assert_eq!(
894            index.line_column(TextSize::from(9), contents),
895            LineColumn {
896                line: OneIndexed::from_zero_indexed(0),
897                column: OneIndexed::from_zero_indexed(6)
898            }
899        );
900        assert_eq!(
901            index.line_column(TextSize::from(12), contents),
902            LineColumn {
903                line: OneIndexed::from_zero_indexed(1),
904                column: OneIndexed::from_zero_indexed(0)
905            }
906        );
907        assert_eq!(
908            index.line_column(TextSize::from(13), contents),
909            LineColumn {
910                line: OneIndexed::from_zero_indexed(1),
911                column: OneIndexed::from_zero_indexed(1)
912            }
913        );
914    }
915
916    #[test]
917    fn utf8_byte_offset() {
918        let contents = "x = '☃'\ny = 2";
919        let index = LineIndex::from_source_text(contents);
920        assert_eq!(
921            index.line_starts(),
922            &[TextSize::from(0), TextSize::from(10)]
923        );
924
925        // First row.
926        let loc = index.line_column(TextSize::from(0), contents);
927        assert_eq!(
928            loc,
929            LineColumn {
930                line: OneIndexed::from_zero_indexed(0),
931                column: OneIndexed::from_zero_indexed(0)
932            }
933        );
934
935        let loc = index.line_column(TextSize::from(5), contents);
936        assert_eq!(
937            loc,
938            LineColumn {
939                line: OneIndexed::from_zero_indexed(0),
940                column: OneIndexed::from_zero_indexed(5)
941            }
942        );
943
944        let loc = index.line_column(TextSize::from(8), contents);
945        assert_eq!(
946            loc,
947            LineColumn {
948                line: OneIndexed::from_zero_indexed(0),
949                column: OneIndexed::from_zero_indexed(6)
950            }
951        );
952
953        // Second row.
954        let loc = index.line_column(TextSize::from(10), contents);
955        assert_eq!(
956            loc,
957            LineColumn {
958                line: OneIndexed::from_zero_indexed(1),
959                column: OneIndexed::from_zero_indexed(0)
960            }
961        );
962
963        // One-past-the-end.
964        let loc = index.line_column(TextSize::from(15), contents);
965        assert_eq!(
966            loc,
967            LineColumn {
968                line: OneIndexed::from_zero_indexed(1),
969                column: OneIndexed::from_zero_indexed(5)
970            }
971        );
972    }
973}
rustpython_ruff_source_file/line_index.rs

rustpython_ruff_source_file/
line_index.rs