rustpython_parser_vendored/source_location/
newlines.rs1use crate::text_size::{TextLen, TextRange, TextSize};
2use memchr::{memchr2, memrchr2};
3use std::iter::FusedIterator;
4use std::ops::Deref;
5
6pub trait StrExt {
8 fn universal_newlines(&self) -> UniversalNewlineIterator<'_>;
9}
10
11impl StrExt for str {
12 fn universal_newlines(&self) -> UniversalNewlineIterator<'_> {
13 UniversalNewlineIterator::from(self)
14 }
15}
16
17pub struct UniversalNewlineIterator<'a> {
35 text: &'a str,
36 offset: TextSize,
37 offset_back: TextSize,
38}
39
40impl<'a> UniversalNewlineIterator<'a> {
41 pub fn with_offset(text: &'a str, offset: TextSize) -> UniversalNewlineIterator<'a> {
42 UniversalNewlineIterator {
43 text,
44 offset,
45 offset_back: offset + text.text_len(),
46 }
47 }
48
49 pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> {
50 Self::with_offset(text, TextSize::default())
51 }
52}
53
54#[inline]
56pub fn find_newline(text: &str) -> Option<(usize, LineEnding)> {
57 let bytes = text.as_bytes();
58 if let Some(position) = memchr2(b'\n', b'\r', bytes) {
59 #[allow(unsafe_code)]
61 let newline_character = unsafe { *bytes.get_unchecked(position) };
62
63 let line_ending = match newline_character {
64 b'\n' => LineEnding::Lf,
66 b'\r' if bytes.get(position.saturating_add(1)) == Some(&b'\n') => LineEnding::CrLf,
68 _ => LineEnding::Cr,
70 };
71
72 Some((position, line_ending))
73 } else {
74 None
75 }
76}
77
78impl<'a> Iterator for UniversalNewlineIterator<'a> {
79 type Item = Line<'a>;
80
81 #[inline]
82 fn next(&mut self) -> Option<Line<'a>> {
83 if self.text.is_empty() {
84 return None;
85 }
86
87 let line = if let Some((newline_position, line_ending)) = find_newline(self.text) {
88 let (text, remainder) = self.text.split_at(newline_position + line_ending.len());
89
90 let line = Line {
91 offset: self.offset,
92 text,
93 };
94
95 self.text = remainder;
96 self.offset += text.text_len();
97
98 line
99 }
100 else {
102 Line {
103 offset: self.offset,
104 text: std::mem::take(&mut self.text),
105 }
106 };
107
108 Some(line)
109 }
110
111 fn last(mut self) -> Option<Self::Item> {
112 self.next_back()
113 }
114}
115
116impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
117 #[inline]
118 fn next_back(&mut self) -> Option<Self::Item> {
119 if self.text.is_empty() {
120 return None;
121 }
122
123 let len = self.text.len();
124
125 let haystack = match self.text.as_bytes()[len - 1] {
127 b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2],
128 b'\n' | b'\r' => &self.text[..len - 1],
129 _ => self.text,
130 };
131
132 let line = if let Some(line_end) = memrchr2(b'\n', b'\r', haystack.as_bytes()) {
135 let (remainder, line) = self.text.split_at(line_end + 1);
137 self.text = remainder;
138 self.offset_back -= line.text_len();
139
140 Line {
141 text: line,
142 offset: self.offset_back,
143 }
144 } else {
145 let offset = self.offset_back - self.text.text_len();
147 Line {
148 text: std::mem::take(&mut self.text),
149 offset,
150 }
151 };
152
153 Some(line)
154 }
155}
156
157impl FusedIterator for UniversalNewlineIterator<'_> {}
158
159pub struct NewlineWithTrailingNewline<'a> {
161 trailing: Option<Line<'a>>,
162 underlying: UniversalNewlineIterator<'a>,
163}
164
165impl<'a> NewlineWithTrailingNewline<'a> {
166 pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> {
167 Self::with_offset(input, TextSize::default())
168 }
169
170 pub fn with_offset(input: &'a str, offset: TextSize) -> Self {
171 NewlineWithTrailingNewline {
172 underlying: UniversalNewlineIterator::with_offset(input, offset),
173 trailing: if input.ends_with(['\r', '\n']) {
174 Some(Line {
175 text: "",
176 offset: offset + input.text_len(),
177 })
178 } else {
179 None
180 },
181 }
182 }
183}
184
185impl<'a> Iterator for NewlineWithTrailingNewline<'a> {
186 type Item = Line<'a>;
187
188 #[inline]
189 fn next(&mut self) -> Option<Line<'a>> {
190 self.underlying.next().or_else(|| self.trailing.take())
191 }
192}
193
194#[derive(Debug, Clone, Eq, PartialEq)]
195pub struct Line<'a> {
196 text: &'a str,
197 offset: TextSize,
198}
199
200impl<'a> Line<'a> {
201 pub fn new(text: &'a str, offset: TextSize) -> Self {
202 Self { text, offset }
203 }
204
205 #[inline]
206 pub const fn start(&self) -> TextSize {
207 self.offset
208 }
209
210 #[inline]
212 pub fn full_end(&self) -> TextSize {
213 self.offset + self.full_text_len()
214 }
215
216 #[inline]
218 pub fn end(&self) -> TextSize {
219 self.offset + self.as_str().text_len()
220 }
221
222 #[inline]
224 pub fn full_range(&self) -> TextRange {
225 TextRange::at(self.offset, self.text.text_len())
226 }
227
228 #[inline]
230 pub fn range(&self) -> TextRange {
231 TextRange::new(self.start(), self.end())
232 }
233
234 #[inline]
236 pub fn as_str(&self) -> &'a str {
237 let mut bytes = self.text.bytes().rev();
238
239 let newline_len = match bytes.next() {
240 Some(b'\n') => {
241 if bytes.next() == Some(b'\r') {
242 2
243 } else {
244 1
245 }
246 }
247 Some(b'\r') => 1,
248 _ => 0,
249 };
250
251 &self.text[..self.text.len() - newline_len]
252 }
253
254 #[inline]
256 pub fn as_full_str(&self) -> &'a str {
257 self.text
258 }
259
260 #[inline]
261 pub fn full_text_len(&self) -> TextSize {
262 self.text.text_len()
263 }
264}
265
266impl Deref for Line<'_> {
267 type Target = str;
268
269 fn deref(&self) -> &Self::Target {
270 self.as_str()
271 }
272}
273
274impl PartialEq<&str> for Line<'_> {
275 fn eq(&self, other: &&str) -> bool {
276 self.as_str() == *other
277 }
278}
279
280impl PartialEq<Line<'_>> for &str {
281 fn eq(&self, other: &Line<'_>) -> bool {
282 *self == other.as_str()
283 }
284}
285
286#[derive(Debug, PartialEq, Eq, Copy, Clone)]
289pub enum LineEnding {
290 Lf,
291 Cr,
292 CrLf,
293}
294
295impl Default for LineEnding {
296 fn default() -> Self {
297 if cfg!(windows) {
298 LineEnding::CrLf
299 } else {
300 LineEnding::Lf
301 }
302 }
303}
304
305impl LineEnding {
306 pub const fn as_str(&self) -> &'static str {
307 match self {
308 LineEnding::Lf => "\n",
309 LineEnding::CrLf => "\r\n",
310 LineEnding::Cr => "\r",
311 }
312 }
313
314 #[allow(clippy::len_without_is_empty)]
315 pub const fn len(&self) -> usize {
316 match self {
317 LineEnding::Lf | LineEnding::Cr => 1,
318 LineEnding::CrLf => 2,
319 }
320 }
321
322 pub const fn text_len(&self) -> TextSize {
323 match self {
324 LineEnding::Lf | LineEnding::Cr => TextSize::new(1),
325 LineEnding::CrLf => TextSize::new(2),
326 }
327 }
328}
329
330impl Deref for LineEnding {
331 type Target = str;
332
333 fn deref(&self) -> &Self::Target {
334 self.as_str()
335 }
336}
337
338#[cfg(test)]
339mod tests {
340 use super::Line;
341 use super::UniversalNewlineIterator;
342 use crate::text_size::TextSize;
343
344 #[test]
345 fn universal_newlines_empty_str() {
346 let lines: Vec<_> = UniversalNewlineIterator::from("").collect();
347 assert_eq!(lines, Vec::<Line>::new());
348
349 let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect();
350 assert_eq!(lines, Vec::<Line>::new());
351 }
352
353 #[test]
354 fn universal_newlines_forward() {
355 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect();
356 assert_eq!(
357 lines,
358 vec![
359 Line::new("foo\n", TextSize::from(0)),
360 Line::new("bar\n", TextSize::from(4)),
361 Line::new("\r\n", TextSize::from(8)),
362 Line::new("baz\r", TextSize::from(10)),
363 Line::new("bop", TextSize::from(14)),
364 ]
365 );
366
367 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect();
368 assert_eq!(
369 lines,
370 vec![
371 Line::new("foo\n", TextSize::from(0)),
372 Line::new("bar\n", TextSize::from(4)),
373 Line::new("\r\n", TextSize::from(8)),
374 Line::new("baz\r", TextSize::from(10)),
375 Line::new("bop\n", TextSize::from(14)),
376 ]
377 );
378
379 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect();
380 assert_eq!(
381 lines,
382 vec![
383 Line::new("foo\n", TextSize::from(0)),
384 Line::new("bar\n", TextSize::from(4)),
385 Line::new("\r\n", TextSize::from(8)),
386 Line::new("baz\r", TextSize::from(10)),
387 Line::new("bop\n", TextSize::from(14)),
388 Line::new("\n", TextSize::from(18)),
389 ]
390 );
391 }
392
393 #[test]
394 fn universal_newlines_backwards() {
395 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop")
396 .rev()
397 .collect();
398 assert_eq!(
399 lines,
400 vec![
401 Line::new("bop", TextSize::from(14)),
402 Line::new("baz\r", TextSize::from(10)),
403 Line::new("\r\n", TextSize::from(8)),
404 Line::new("bar\n", TextSize::from(4)),
405 Line::new("foo\n", TextSize::from(0)),
406 ]
407 );
408
409 let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n")
410 .rev()
411 .map(|line| line.as_str())
412 .collect();
413
414 assert_eq!(
415 lines,
416 vec![
417 Line::new("bop\n", TextSize::from(13)),
418 Line::new("baz\r", TextSize::from(9)),
419 Line::new("\n", TextSize::from(8)),
420 Line::new("bar\n", TextSize::from(4)),
421 Line::new("foo\n", TextSize::from(0)),
422 ]
423 );
424 }
425
426 #[test]
427 fn universal_newlines_mixed() {
428 let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
429
430 assert_eq!(
431 lines.next_back(),
432 Some(Line::new("bop", TextSize::from(14)))
433 );
434 assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
435 assert_eq!(
436 lines.next_back(),
437 Some(Line::new("baz\r", TextSize::from(10)))
438 );
439 assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
440 assert_eq!(
441 lines.next_back(),
442 Some(Line::new("\r\n", TextSize::from(8)))
443 );
444 assert_eq!(lines.next(), None);
445 }
446}