rustpython_ruff_source_file/
newlines.rs1use std::iter::FusedIterator;
2use std::ops::Deref;
3
4use memchr::{memchr2, memrchr2};
5use ruff_text_size::{TextLen, TextRange, TextSize};
6
7pub trait UniversalNewlines {
9 fn universal_newlines(&self) -> UniversalNewlineIterator<'_>;
10}
11
12impl UniversalNewlines for str {
13 fn universal_newlines(&self) -> UniversalNewlineIterator<'_> {
14 UniversalNewlineIterator::from(self)
15 }
16}
17
18#[derive(Clone)]
36pub struct UniversalNewlineIterator<'a> {
37 text: &'a str,
38 offset: TextSize,
39 offset_back: TextSize,
40}
41
42impl<'a> UniversalNewlineIterator<'a> {
43 pub fn with_offset(text: &'a str, offset: TextSize) -> UniversalNewlineIterator<'a> {
44 UniversalNewlineIterator {
45 text,
46 offset,
47 offset_back: offset + text.text_len(),
48 }
49 }
50
51 pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> {
52 Self::with_offset(text, TextSize::default())
53 }
54}
55
56#[inline]
58pub fn find_newline(text: &str) -> Option<(usize, LineEnding)> {
59 let bytes = text.as_bytes();
60 if let Some(position) = memchr2(b'\n', b'\r', bytes) {
61 let line_ending = match bytes[position] {
62 b'\n' => LineEnding::Lf,
64 b'\r' if bytes.get(position.saturating_add(1)) == Some(&b'\n') => LineEnding::CrLf,
66 _ => LineEnding::Cr,
68 };
69
70 Some((position, line_ending))
71 } else {
72 None
73 }
74}
75
76impl<'a> Iterator for UniversalNewlineIterator<'a> {
77 type Item = Line<'a>;
78
79 #[inline]
80 fn next(&mut self) -> Option<Line<'a>> {
81 if self.text.is_empty() {
82 return None;
83 }
84
85 let line = if let Some((newline_position, line_ending)) = find_newline(self.text) {
86 let (text, remainder) = self.text.split_at(newline_position + line_ending.len());
87
88 let line = Line {
89 offset: self.offset,
90 text,
91 };
92
93 self.text = remainder;
94 self.offset += text.text_len();
95
96 line
97 }
98 else {
100 Line {
101 offset: self.offset,
102 text: std::mem::take(&mut self.text),
103 }
104 };
105
106 Some(line)
107 }
108
109 fn last(mut self) -> Option<Self::Item> {
110 self.next_back()
111 }
112}
113
114impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
115 #[inline]
116 fn next_back(&mut self) -> Option<Self::Item> {
117 if self.text.is_empty() {
118 return None;
119 }
120
121 let len = self.text.len();
122
123 let haystack = match self.text.as_bytes()[len - 1] {
125 b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2],
126 b'\n' | b'\r' => &self.text[..len - 1],
127 _ => self.text,
128 };
129
130 let line = if let Some(line_end) = memrchr2(b'\n', b'\r', haystack.as_bytes()) {
133 let (remainder, line) = self.text.split_at(line_end + 1);
135 self.text = remainder;
136 self.offset_back -= line.text_len();
137
138 Line {
139 text: line,
140 offset: self.offset_back,
141 }
142 } else {
143 let offset = self.offset_back - self.text.text_len();
145 Line {
146 text: std::mem::take(&mut self.text),
147 offset,
148 }
149 };
150
151 Some(line)
152 }
153}
154
155impl FusedIterator for UniversalNewlineIterator<'_> {}
156
157pub struct NewlineWithTrailingNewline<'a> {
159 trailing: Option<Line<'a>>,
160 underlying: UniversalNewlineIterator<'a>,
161}
162
163impl<'a> NewlineWithTrailingNewline<'a> {
164 pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> {
165 Self::with_offset(input, TextSize::default())
166 }
167
168 pub fn with_offset(input: &'a str, offset: TextSize) -> Self {
169 NewlineWithTrailingNewline {
170 underlying: UniversalNewlineIterator::with_offset(input, offset),
171 trailing: if input.ends_with(['\r', '\n']) {
172 Some(Line {
173 text: "",
174 offset: offset + input.text_len(),
175 })
176 } else {
177 None
178 },
179 }
180 }
181}
182
183impl<'a> Iterator for NewlineWithTrailingNewline<'a> {
184 type Item = Line<'a>;
185
186 #[inline]
187 fn next(&mut self) -> Option<Self::Item> {
188 self.underlying.next().or_else(|| self.trailing.take())
189 }
190}
191
192impl DoubleEndedIterator for NewlineWithTrailingNewline<'_> {
193 #[inline]
194 fn next_back(&mut self) -> Option<Self::Item> {
195 self.trailing.take().or_else(|| self.underlying.next_back())
196 }
197}
198
199#[derive(Debug, Clone, Eq, PartialEq)]
200pub struct Line<'a> {
201 text: &'a str,
202 offset: TextSize,
203}
204
205impl<'a> Line<'a> {
206 pub fn new(text: &'a str, offset: TextSize) -> Self {
207 Self { text, offset }
208 }
209
210 #[inline]
211 pub const fn start(&self) -> TextSize {
212 self.offset
213 }
214
215 #[inline]
217 pub fn full_end(&self) -> TextSize {
218 self.offset + self.full_text_len()
219 }
220
221 #[inline]
223 pub fn end(&self) -> TextSize {
224 self.offset + self.as_str().text_len()
225 }
226
227 #[inline]
229 pub fn full_range(&self) -> TextRange {
230 TextRange::at(self.offset, self.text.text_len())
231 }
232
233 #[inline]
235 pub fn range(&self) -> TextRange {
236 TextRange::new(self.start(), self.end())
237 }
238
239 #[inline]
241 pub fn line_ending(&self) -> Option<LineEnding> {
242 let mut bytes = self.text.bytes().rev();
243 match bytes.next() {
244 Some(b'\n') => {
245 if bytes.next() == Some(b'\r') {
246 Some(LineEnding::CrLf)
247 } else {
248 Some(LineEnding::Lf)
249 }
250 }
251 Some(b'\r') => Some(LineEnding::Cr),
252 _ => None,
253 }
254 }
255
256 #[inline]
258 pub fn as_str(&self) -> &'a str {
259 let newline_len = self
260 .line_ending()
261 .map_or(0, |line_ending| line_ending.len());
262 &self.text[..self.text.len() - newline_len]
263 }
264
265 #[inline]
267 pub fn as_full_str(&self) -> &'a str {
268 self.text
269 }
270
271 #[inline]
272 pub fn full_text_len(&self) -> TextSize {
273 self.text.text_len()
274 }
275}
276
277impl Deref for Line<'_> {
278 type Target = str;
279
280 fn deref(&self) -> &Self::Target {
281 self.as_str()
282 }
283}
284
285impl PartialEq<&str> for Line<'_> {
286 fn eq(&self, other: &&str) -> bool {
287 self.as_str() == *other
288 }
289}
290
291impl PartialEq<Line<'_>> for &str {
292 fn eq(&self, other: &Line<'_>) -> bool {
293 *self == other.as_str()
294 }
295}
296
297#[derive(Debug, PartialEq, Eq, Copy, Clone)]
300pub enum LineEnding {
301 Lf,
302 Cr,
303 CrLf,
304}
305
306impl Default for LineEnding {
307 fn default() -> Self {
308 if cfg!(windows) {
309 LineEnding::CrLf
310 } else {
311 LineEnding::Lf
312 }
313 }
314}
315
316impl LineEnding {
317 pub const fn as_str(&self) -> &'static str {
318 match self {
319 LineEnding::Lf => "\n",
320 LineEnding::CrLf => "\r\n",
321 LineEnding::Cr => "\r",
322 }
323 }
324
325 #[expect(clippy::len_without_is_empty)]
326 pub const fn len(&self) -> usize {
327 match self {
328 LineEnding::Lf | LineEnding::Cr => 1,
329 LineEnding::CrLf => 2,
330 }
331 }
332
333 pub const fn text_len(&self) -> TextSize {
334 match self {
335 LineEnding::Lf | LineEnding::Cr => TextSize::new(1),
336 LineEnding::CrLf => TextSize::new(2),
337 }
338 }
339}
340
341impl Deref for LineEnding {
342 type Target = str;
343
344 fn deref(&self) -> &Self::Target {
345 self.as_str()
346 }
347}
348
349#[cfg(test)]
350mod tests {
351 use ruff_text_size::TextSize;
352
353 use super::{Line, UniversalNewlineIterator};
354
355 #[test]
356 fn universal_newlines_empty_str() {
357 let lines: Vec<_> = UniversalNewlineIterator::from("").collect();
358 assert_eq!(lines, Vec::<Line>::new());
359
360 let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect();
361 assert_eq!(lines, Vec::<Line>::new());
362 }
363
364 #[test]
365 fn universal_newlines_forward() {
366 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect();
367 assert_eq!(
368 lines,
369 vec![
370 Line::new("foo\n", TextSize::from(0)),
371 Line::new("bar\n", TextSize::from(4)),
372 Line::new("\r\n", TextSize::from(8)),
373 Line::new("baz\r", TextSize::from(10)),
374 Line::new("bop", TextSize::from(14)),
375 ]
376 );
377
378 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect();
379 assert_eq!(
380 lines,
381 vec![
382 Line::new("foo\n", TextSize::from(0)),
383 Line::new("bar\n", TextSize::from(4)),
384 Line::new("\r\n", TextSize::from(8)),
385 Line::new("baz\r", TextSize::from(10)),
386 Line::new("bop\n", TextSize::from(14)),
387 ]
388 );
389
390 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect();
391 assert_eq!(
392 lines,
393 vec![
394 Line::new("foo\n", TextSize::from(0)),
395 Line::new("bar\n", TextSize::from(4)),
396 Line::new("\r\n", TextSize::from(8)),
397 Line::new("baz\r", TextSize::from(10)),
398 Line::new("bop\n", TextSize::from(14)),
399 Line::new("\n", TextSize::from(18)),
400 ]
401 );
402 }
403
404 #[test]
405 fn universal_newlines_backwards() {
406 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop")
407 .rev()
408 .collect();
409 assert_eq!(
410 lines,
411 vec![
412 Line::new("bop", TextSize::from(14)),
413 Line::new("baz\r", TextSize::from(10)),
414 Line::new("\r\n", TextSize::from(8)),
415 Line::new("bar\n", TextSize::from(4)),
416 Line::new("foo\n", TextSize::from(0)),
417 ]
418 );
419
420 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n")
421 .rev()
422 .map(|line| line.as_str())
423 .collect();
424
425 assert_eq!(
426 lines,
427 vec![
428 Line::new("bop\n", TextSize::from(13)),
429 Line::new("baz\r", TextSize::from(9)),
430 Line::new("\n", TextSize::from(8)),
431 Line::new("bar\n", TextSize::from(4)),
432 Line::new("foo\n", TextSize::from(0)),
433 ]
434 );
435 }
436
437 #[test]
438 fn universal_newlines_mixed() {
439 let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
440
441 assert_eq!(
442 lines.next_back(),
443 Some(Line::new("bop", TextSize::from(14)))
444 );
445 assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
446 assert_eq!(
447 lines.next_back(),
448 Some(Line::new("baz\r", TextSize::from(10)))
449 );
450 assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
451 assert_eq!(
452 lines.next_back(),
453 Some(Line::new("\r\n", TextSize::from(8)))
454 );
455 assert_eq!(lines.next(), None);
456 }
457}