Skip to main content

rustpython_ruff_source_file/
newlines.rs

1use std::iter::FusedIterator;
2use std::ops::Deref;
3
4use memchr::{memchr2, memrchr2};
5use ruff_text_size::{TextLen, TextRange, TextSize};
6
7/// Extension trait for [`str`] that provides a [`UniversalNewlineIterator`].
8pub trait UniversalNewlines {
9    fn universal_newlines(&self) -> UniversalNewlineIterator<'_>;
10}
11
12impl UniversalNewlines for str {
13    fn universal_newlines(&self) -> UniversalNewlineIterator<'_> {
14        UniversalNewlineIterator::from(self)
15    }
16}
17
18/// Like [`str::lines`], but accommodates LF, CRLF, and CR line endings,
19/// the latter of which are not supported by [`str::lines`].
20///
21/// ## Examples
22///
23/// ```rust
24/// # use ruff_text_size::TextSize;
25/// # use ruff_source_file::{Line, UniversalNewlineIterator};
26/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
27///
28/// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14))));
29/// assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
30/// assert_eq!(lines.next_back(), Some(Line::new("baz\r", TextSize::from(10))));
31/// assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
32/// assert_eq!(lines.next_back(), Some(Line::new("\r\n", TextSize::from(8))));
33/// assert_eq!(lines.next(), None);
34/// ```
35#[derive(Clone)]
36pub struct UniversalNewlineIterator<'a> {
37    text: &'a str,
38    offset: TextSize,
39    offset_back: TextSize,
40}
41
42impl<'a> UniversalNewlineIterator<'a> {
43    pub fn with_offset(text: &'a str, offset: TextSize) -> UniversalNewlineIterator<'a> {
44        UniversalNewlineIterator {
45            text,
46            offset,
47            offset_back: offset + text.text_len(),
48        }
49    }
50
51    pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> {
52        Self::with_offset(text, TextSize::default())
53    }
54}
55
56/// Finds the next newline character. Returns its position and the [`LineEnding`].
57#[inline]
58pub fn find_newline(text: &str) -> Option<(usize, LineEnding)> {
59    let bytes = text.as_bytes();
60    if let Some(position) = memchr2(b'\n', b'\r', bytes) {
61        let line_ending = match bytes[position] {
62            // Explicit branch for `\n` as this is the most likely path
63            b'\n' => LineEnding::Lf,
64            // '\r\n'
65            b'\r' if bytes.get(position.saturating_add(1)) == Some(&b'\n') => LineEnding::CrLf,
66            // '\r'
67            _ => LineEnding::Cr,
68        };
69
70        Some((position, line_ending))
71    } else {
72        None
73    }
74}
75
76impl<'a> Iterator for UniversalNewlineIterator<'a> {
77    type Item = Line<'a>;
78
79    #[inline]
80    fn next(&mut self) -> Option<Line<'a>> {
81        if self.text.is_empty() {
82            return None;
83        }
84
85        let line = if let Some((newline_position, line_ending)) = find_newline(self.text) {
86            let (text, remainder) = self.text.split_at(newline_position + line_ending.len());
87
88            let line = Line {
89                offset: self.offset,
90                text,
91            };
92
93            self.text = remainder;
94            self.offset += text.text_len();
95
96            line
97        }
98        // Last line
99        else {
100            Line {
101                offset: self.offset,
102                text: std::mem::take(&mut self.text),
103            }
104        };
105
106        Some(line)
107    }
108
109    fn last(mut self) -> Option<Self::Item> {
110        self.next_back()
111    }
112}
113
114impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
115    #[inline]
116    fn next_back(&mut self) -> Option<Self::Item> {
117        if self.text.is_empty() {
118            return None;
119        }
120
121        let len = self.text.len();
122
123        // Trim any trailing newlines.
124        let haystack = match self.text.as_bytes()[len - 1] {
125            b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2],
126            b'\n' | b'\r' => &self.text[..len - 1],
127            _ => self.text,
128        };
129
130        // Find the end of the previous line. The previous line is the text up to, but not including
131        // the newline character.
132        let line = if let Some(line_end) = memrchr2(b'\n', b'\r', haystack.as_bytes()) {
133            // '\n' or '\r' or '\r\n'
134            let (remainder, line) = self.text.split_at(line_end + 1);
135            self.text = remainder;
136            self.offset_back -= line.text_len();
137
138            Line {
139                text: line,
140                offset: self.offset_back,
141            }
142        } else {
143            // Last line
144            let offset = self.offset_back - self.text.text_len();
145            Line {
146                text: std::mem::take(&mut self.text),
147                offset,
148            }
149        };
150
151        Some(line)
152    }
153}
154
155impl FusedIterator for UniversalNewlineIterator<'_> {}
156
157/// Like [`UniversalNewlineIterator`], but includes a trailing newline as an empty line.
158pub struct NewlineWithTrailingNewline<'a> {
159    trailing: Option<Line<'a>>,
160    underlying: UniversalNewlineIterator<'a>,
161}
162
163impl<'a> NewlineWithTrailingNewline<'a> {
164    pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> {
165        Self::with_offset(input, TextSize::default())
166    }
167
168    pub fn with_offset(input: &'a str, offset: TextSize) -> Self {
169        NewlineWithTrailingNewline {
170            underlying: UniversalNewlineIterator::with_offset(input, offset),
171            trailing: if input.ends_with(['\r', '\n']) {
172                Some(Line {
173                    text: "",
174                    offset: offset + input.text_len(),
175                })
176            } else {
177                None
178            },
179        }
180    }
181}
182
183impl<'a> Iterator for NewlineWithTrailingNewline<'a> {
184    type Item = Line<'a>;
185
186    #[inline]
187    fn next(&mut self) -> Option<Self::Item> {
188        self.underlying.next().or_else(|| self.trailing.take())
189    }
190}
191
192impl DoubleEndedIterator for NewlineWithTrailingNewline<'_> {
193    #[inline]
194    fn next_back(&mut self) -> Option<Self::Item> {
195        self.trailing.take().or_else(|| self.underlying.next_back())
196    }
197}
198
199#[derive(Debug, Clone, Eq, PartialEq)]
200pub struct Line<'a> {
201    text: &'a str,
202    offset: TextSize,
203}
204
205impl<'a> Line<'a> {
206    pub fn new(text: &'a str, offset: TextSize) -> Self {
207        Self { text, offset }
208    }
209
210    #[inline]
211    pub const fn start(&self) -> TextSize {
212        self.offset
213    }
214
215    /// Returns the byte offset where the line ends, including its terminating new line character.
216    #[inline]
217    pub fn full_end(&self) -> TextSize {
218        self.offset + self.full_text_len()
219    }
220
221    /// Returns the byte offset where the line ends, excluding its new line character
222    #[inline]
223    pub fn end(&self) -> TextSize {
224        self.offset + self.as_str().text_len()
225    }
226
227    /// Returns the range of the line, including its terminating new line character.
228    #[inline]
229    pub fn full_range(&self) -> TextRange {
230        TextRange::at(self.offset, self.text.text_len())
231    }
232
233    /// Returns the range of the line, excluding its terminating new line character
234    #[inline]
235    pub fn range(&self) -> TextRange {
236        TextRange::new(self.start(), self.end())
237    }
238
239    /// Returns the line's new line character, if any.
240    #[inline]
241    pub fn line_ending(&self) -> Option<LineEnding> {
242        let mut bytes = self.text.bytes().rev();
243        match bytes.next() {
244            Some(b'\n') => {
245                if bytes.next() == Some(b'\r') {
246                    Some(LineEnding::CrLf)
247                } else {
248                    Some(LineEnding::Lf)
249                }
250            }
251            Some(b'\r') => Some(LineEnding::Cr),
252            _ => None,
253        }
254    }
255
256    /// Returns the text of the line, excluding the terminating new line character.
257    #[inline]
258    pub fn as_str(&self) -> &'a str {
259        let newline_len = self
260            .line_ending()
261            .map_or(0, |line_ending| line_ending.len());
262        &self.text[..self.text.len() - newline_len]
263    }
264
265    /// Returns the line's text, including the terminating new line character.
266    #[inline]
267    pub fn as_full_str(&self) -> &'a str {
268        self.text
269    }
270
271    #[inline]
272    pub fn full_text_len(&self) -> TextSize {
273        self.text.text_len()
274    }
275}
276
277impl Deref for Line<'_> {
278    type Target = str;
279
280    fn deref(&self) -> &Self::Target {
281        self.as_str()
282    }
283}
284
285impl PartialEq<&str> for Line<'_> {
286    fn eq(&self, other: &&str) -> bool {
287        self.as_str() == *other
288    }
289}
290
291impl PartialEq<Line<'_>> for &str {
292    fn eq(&self, other: &Line<'_>) -> bool {
293        *self == other.as_str()
294    }
295}
296
297/// The line ending style used in Python source code.
298/// See <https://docs.python.org/3/reference/lexical_analysis.html#physical-lines>
299#[derive(Debug, PartialEq, Eq, Copy, Clone)]
300pub enum LineEnding {
301    Lf,
302    Cr,
303    CrLf,
304}
305
306impl Default for LineEnding {
307    fn default() -> Self {
308        if cfg!(windows) {
309            LineEnding::CrLf
310        } else {
311            LineEnding::Lf
312        }
313    }
314}
315
316impl LineEnding {
317    pub const fn as_str(&self) -> &'static str {
318        match self {
319            LineEnding::Lf => "\n",
320            LineEnding::CrLf => "\r\n",
321            LineEnding::Cr => "\r",
322        }
323    }
324
325    #[expect(clippy::len_without_is_empty)]
326    pub const fn len(&self) -> usize {
327        match self {
328            LineEnding::Lf | LineEnding::Cr => 1,
329            LineEnding::CrLf => 2,
330        }
331    }
332
333    pub const fn text_len(&self) -> TextSize {
334        match self {
335            LineEnding::Lf | LineEnding::Cr => TextSize::new(1),
336            LineEnding::CrLf => TextSize::new(2),
337        }
338    }
339}
340
341impl Deref for LineEnding {
342    type Target = str;
343
344    fn deref(&self) -> &Self::Target {
345        self.as_str()
346    }
347}
348
349#[cfg(test)]
350mod tests {
351    use ruff_text_size::TextSize;
352
353    use super::{Line, UniversalNewlineIterator};
354
355    #[test]
356    fn universal_newlines_empty_str() {
357        let lines: Vec<_> = UniversalNewlineIterator::from("").collect();
358        assert_eq!(lines, Vec::<Line>::new());
359
360        let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect();
361        assert_eq!(lines, Vec::<Line>::new());
362    }
363
364    #[test]
365    fn universal_newlines_forward() {
366        let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect();
367        assert_eq!(
368            lines,
369            vec![
370                Line::new("foo\n", TextSize::from(0)),
371                Line::new("bar\n", TextSize::from(4)),
372                Line::new("\r\n", TextSize::from(8)),
373                Line::new("baz\r", TextSize::from(10)),
374                Line::new("bop", TextSize::from(14)),
375            ]
376        );
377
378        let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect();
379        assert_eq!(
380            lines,
381            vec![
382                Line::new("foo\n", TextSize::from(0)),
383                Line::new("bar\n", TextSize::from(4)),
384                Line::new("\r\n", TextSize::from(8)),
385                Line::new("baz\r", TextSize::from(10)),
386                Line::new("bop\n", TextSize::from(14)),
387            ]
388        );
389
390        let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect();
391        assert_eq!(
392            lines,
393            vec![
394                Line::new("foo\n", TextSize::from(0)),
395                Line::new("bar\n", TextSize::from(4)),
396                Line::new("\r\n", TextSize::from(8)),
397                Line::new("baz\r", TextSize::from(10)),
398                Line::new("bop\n", TextSize::from(14)),
399                Line::new("\n", TextSize::from(18)),
400            ]
401        );
402    }
403
404    #[test]
405    fn universal_newlines_backwards() {
406        let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop")
407            .rev()
408            .collect();
409        assert_eq!(
410            lines,
411            vec![
412                Line::new("bop", TextSize::from(14)),
413                Line::new("baz\r", TextSize::from(10)),
414                Line::new("\r\n", TextSize::from(8)),
415                Line::new("bar\n", TextSize::from(4)),
416                Line::new("foo\n", TextSize::from(0)),
417            ]
418        );
419
420        let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n")
421            .rev()
422            .map(|line| line.as_str())
423            .collect();
424
425        assert_eq!(
426            lines,
427            vec![
428                Line::new("bop\n", TextSize::from(13)),
429                Line::new("baz\r", TextSize::from(9)),
430                Line::new("\n", TextSize::from(8)),
431                Line::new("bar\n", TextSize::from(4)),
432                Line::new("foo\n", TextSize::from(0)),
433            ]
434        );
435    }
436
437    #[test]
438    fn universal_newlines_mixed() {
439        let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
440
441        assert_eq!(
442            lines.next_back(),
443            Some(Line::new("bop", TextSize::from(14)))
444        );
445        assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
446        assert_eq!(
447            lines.next_back(),
448            Some(Line::new("baz\r", TextSize::from(10)))
449        );
450        assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
451        assert_eq!(
452            lines.next_back(),
453            Some(Line::new("\r\n", TextSize::from(8)))
454        );
455        assert_eq!(lines.next(), None);
456    }
457}