1use std::{iter::Peekable, ops::Range};
2
3use lapce_xi_rope::{DeltaBuilder, Rope, RopeDelta};
4use memchr::{memchr, memchr2};
5use std::sync::LazyLock;
6
7static CR_LF: LazyLock<Rope> = LazyLock::new(|| Rope::from("\r\n"));
9static LF: LazyLock<Rope> = LazyLock::new(|| Rope::from("\n"));
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub enum LineEnding {
13 CrLf,
15 Lf,
17}
18impl LineEnding {
19 pub fn normalize(self, text: &Rope) -> Rope {
22 self.normalize_delta(text)
23 .map(|d| d.apply(text))
24 .unwrap_or_else(|| text.clone())
25 }
26
27 pub fn normalize_delta(self, text: &Rope) -> Option<RopeDelta> {
28 let mut builder = DeltaBuilder::new(text.len());
29
30 let le = if self == LineEnding::Lf {
31 LF.clone()
32 } else {
33 CR_LF.clone()
34 };
35
36 let mut had_entries = false;
37 for (range, kind) in FullLeChunkSearch::new(text.iter_chunks(..)) {
38 had_entries = true;
39 match kind {
40 LeChunkKind::CrLf => {
41 if self == LineEnding::Lf {
42 builder.replace(range, LF.clone());
43 }
44 }
45 LeChunkKind::Lf => {
46 if self == LineEnding::CrLf {
47 builder.replace(range, CR_LF.clone());
48 }
49 }
50 LeChunkKind::Cr => {
51 builder.replace(range, le.clone());
52 }
53 }
54 }
55
56 if had_entries {
57 let delta = builder.build();
58 Some(delta)
59 } else {
60 None
61 }
62 }
63
64 pub fn normalize_limited(self, text: &Rope) -> Rope {
66 let mut builder = DeltaBuilder::new(text.len());
67
68 let le = if self == LineEnding::Lf {
69 LF.clone()
70 } else {
71 CR_LF.clone()
72 };
73
74 let mut had_entries = false;
75 for offset in LoneCrChunkSearch::new(text.iter_chunks(..)) {
76 had_entries = true;
77 builder.replace(offset..offset + 1, le.clone());
78 }
79
80 if had_entries {
81 let delta = builder.build();
82 delta.apply(text)
83 } else {
84 text.clone()
85 }
86 }
87
88 pub fn as_str(&self) -> &'static str {
90 match self {
91 LineEnding::CrLf => "CRLF",
92 LineEnding::Lf => "LF",
93 }
94 }
95}
96
97#[derive(Debug, Clone, Copy)]
98pub enum LineEndingDetermination {
99 CrLf,
100 Lf,
101 Mixed,
102 Unknown,
103}
104impl LineEndingDetermination {
105 pub fn determine(text: &Rope) -> Self {
108 let mut crlf = false;
109 let mut lf = false;
110
111 for chunk in text.iter_chunks(..) {
112 match LineEndingDetermination::determine_str(chunk) {
113 LineEndingDetermination::CrLf => crlf = true,
114 LineEndingDetermination::Lf => lf = true,
115 LineEndingDetermination::Mixed => {
116 return LineEndingDetermination::Mixed;
117 }
118 LineEndingDetermination::Unknown => {}
119 }
120 }
121
122 match (crlf, lf) {
123 (true, true) => LineEndingDetermination::Mixed,
124 (true, false) => LineEndingDetermination::CrLf,
125 (false, true) => LineEndingDetermination::Lf,
126 (false, false) => LineEndingDetermination::Unknown,
127 }
128 }
129
130 fn determine_str(chunk: &str) -> LineEndingDetermination {
131 let bytes = chunk.as_bytes();
132 let newline = memchr2(b'\n', b'\r', bytes);
133 match newline {
134 Some(x) if bytes[x] == b'\r' && bytes.len() > x + 1 && bytes[x + 1] == b'\n' => {
135 LineEndingDetermination::CrLf
136 }
137 Some(x) if bytes[x] == b'\n' => LineEndingDetermination::Lf,
138 Some(_) => LineEndingDetermination::Mixed,
139 None => LineEndingDetermination::Unknown,
140 }
141 }
142
143 pub fn unwrap_or(self, le: LineEnding) -> LineEnding {
144 match self {
145 LineEndingDetermination::CrLf => LineEnding::CrLf,
146 LineEndingDetermination::Lf => LineEnding::Lf,
147 LineEndingDetermination::Mixed | LineEndingDetermination::Unknown => le,
148 }
149 }
150}
151
152#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
153enum LeChunkKind {
154 CrLf,
155 Lf,
156 Cr,
157}
158
159struct FullLeChunkSearch<'a, I: Iterator<Item = &'a str>> {
161 offset: usize,
162 chunk_pos: usize,
164 chunks: Peekable<I>,
165}
166impl<'a, I: Iterator<Item = &'a str>> FullLeChunkSearch<'a, I> {
167 fn new(chunks: I) -> Self {
168 Self {
169 offset: 0,
170 chunk_pos: 0,
171 chunks: chunks.peekable(),
172 }
173 }
174
175 fn get_chunk(&mut self) -> Option<&'a str> {
177 let chunk = self.chunks.peek()?;
178 if self.chunk_pos >= chunk.len() {
179 self.advance_chunk();
180 Some(*self.chunks.peek()?)
181 } else {
182 Some(chunk)
183 }
184 }
185
186 fn advance_chunk(&mut self) -> Option<()> {
187 let chunk = self.chunks.next()?;
188 self.offset += chunk.len();
189 self.chunk_pos = 0;
190
191 Some(())
192 }
193}
194impl<'a, I: Iterator<Item = &'a str>> Iterator for FullLeChunkSearch<'a, I> {
195 type Item = (Range<usize>, LeChunkKind);
196
197 fn next(&mut self) -> Option<Self::Item> {
198 let chunk = self.get_chunk()?;
199
200 let bytes = &chunk.as_bytes()[self.chunk_pos..];
201
202 let newline = memchr2(b'\n', b'\r', bytes);
203 match newline {
204 Some(x) if bytes[x] == b'\r' && bytes.len() > x + 1 && bytes[x + 1] == b'\n' => {
206 let start = self.offset + self.chunk_pos + x;
207 let end = start + 2;
208
209 self.chunk_pos += x + 2;
210 Some((start..end, LeChunkKind::CrLf))
211 }
212 Some(x) if bytes[x] == b'\n' => {
214 let start = self.offset + self.chunk_pos + x;
215 let end = start + 1;
216
217 self.chunk_pos += x + 1;
218 Some((start..end, LeChunkKind::Lf))
219 }
220 Some(x) => {
221 assert_eq!(bytes[x], b'\r');
225
226 let start = self.offset + self.chunk_pos + x;
227 self.chunk_pos += x + 1;
228
229 let v = if self.chunk_pos == chunk.len() {
230 if let Some(next_chunk) = self.get_chunk() {
231 let next_chunk = &next_chunk.as_bytes()[self.chunk_pos..];
232 if next_chunk.starts_with(b"\n") {
233 self.chunk_pos += 1;
234 Some((start..start + 2, LeChunkKind::CrLf))
235 } else {
236 None
237 }
238 } else {
239 None
240 }
241 } else {
242 None
243 };
244
245 Some(v.unwrap_or_else(|| {
246 let end = start + 1;
249 (start..end, LeChunkKind::Cr)
250 }))
251 }
252 None => {
253 self.advance_chunk();
254 self.next()
255 }
256 }
257 }
258}
259
260struct LoneCrChunkSearch<'a, I: Iterator<Item = &'a str>> {
262 offset: usize,
264 chunk_pos: usize,
265 chunks: Peekable<I>,
266}
267
268impl<'a, I: Iterator<Item = &'a str>> LoneCrChunkSearch<'a, I> {
269 fn new(chunks: I) -> Self {
270 Self {
271 offset: 0,
272 chunk_pos: 0,
273 chunks: chunks.peekable(),
274 }
275 }
276
277 fn get_chunk(&mut self) -> Option<&'a str> {
280 let chunk = self.chunks.peek()?;
281 if self.chunk_pos >= chunk.len() {
282 self.advance_chunk();
283 Some(*self.chunks.peek()?)
284 } else {
285 Some(chunk)
286 }
287 }
288
289 fn advance_chunk(&mut self) -> Option<()> {
290 let chunk = self.chunks.next()?;
291 self.offset += chunk.len();
292 self.chunk_pos = 0;
293
294 Some(())
295 }
296}
297
298impl<'a, I: Iterator<Item = &'a str>> Iterator for LoneCrChunkSearch<'a, I> {
299 type Item = usize;
300
301 fn next(&mut self) -> Option<Self::Item> {
302 loop {
303 let chunk = self.get_chunk()?;
304
305 let bytes = &chunk.as_bytes()[self.chunk_pos..];
306
307 let newline = memchr(b'\r', bytes);
308 match newline {
309 Some(x) => {
310 let offset = self.offset + self.chunk_pos + x;
311
312 self.chunk_pos += x + 1;
314 if self.chunk_pos < chunk.len() && chunk.as_bytes()[self.chunk_pos] == b'\n' {
315 self.chunk_pos += 1;
317 } else if let Some(chunk_b) = self.get_chunk() {
318 let chunk_b = &chunk_b.as_bytes()[self.chunk_pos..];
319 if chunk_b.starts_with(b"\n") {
320 self.chunk_pos += 1;
322 } else {
323 return Some(offset);
325 }
326 } else {
327 return Some(offset);
329 }
330 }
331 None => {
332 self.advance_chunk();
333 }
334 }
335 }
336 }
337}
338
339#[cfg(test)]
340mod tests {
341 use super::*;
342
343 #[test]
344 fn normalize() {
345 let text = Rope::from("hello\r\nworld toast and jam\nthe end\nhi");
346 let normalized = LineEnding::CrLf.normalize(&text);
347 assert_eq!(
348 normalized.slice_to_cow(..),
349 "hello\r\nworld toast and jam\r\nthe end\r\nhi"
350 );
351
352 let text = Rope::from("\n");
353 let normalized = LineEnding::Lf.normalize(&text);
354 assert_eq!(normalized.slice_to_cow(..), "\n");
355 let normalized = LineEnding::CrLf.normalize(&text);
356 assert_eq!(normalized.slice_to_cow(..), "\r\n");
357
358 let text = Rope::from("\r\n");
359 let normalized = LineEnding::Lf.normalize(&text);
360 assert_eq!(normalized.slice_to_cow(..), "\n");
361 let normalized = LineEnding::CrLf.normalize(&text);
362 assert_eq!(normalized.slice_to_cow(..), "\r\n");
363
364 let text = Rope::from("\r");
366 let normalized = LineEnding::Lf.normalize(&text);
367 assert_eq!(normalized.slice_to_cow(..), "\n");
368 let normalized = LineEnding::CrLf.normalize(&text);
369 assert_eq!(normalized.slice_to_cow(..), "\r\n");
370 let normalized = LineEnding::Lf.normalize_limited(&text);
371 assert_eq!(normalized.slice_to_cow(..), "\n");
372
373 let text = Rope::from("\rtest");
374 let normalized = LineEnding::Lf.normalize(&text);
375 assert_eq!(normalized.slice_to_cow(..), "\ntest");
376 let normalized = LineEnding::CrLf.normalize(&text);
377 assert_eq!(normalized.slice_to_cow(..), "\r\ntest");
378 let normalized = LineEnding::Lf.normalize_limited(&text);
379 assert_eq!(normalized.slice_to_cow(..), "\ntest");
380 }
381
382 #[test]
383 fn chunk_search() {
384 let text = Rope::from("hello\r\nworld toast and jam\nthe end\nhi");
385 let c = FullLeChunkSearch::new(text.iter_chunks(..));
386 assert_eq!(
387 c.collect::<Vec<_>>(),
388 vec![
389 (5..7, LeChunkKind::CrLf),
390 (26..27, LeChunkKind::Lf),
391 (34..35, LeChunkKind::Lf),
392 ]
393 );
394 let c = LoneCrChunkSearch::new(text.iter_chunks(..));
395 assert_eq!(c.collect::<Vec<_>>(), Vec::new());
396
397 let text = ["a\n", "\n5", "\r\ne\r", "\ntest\r", "\rv"];
400 let multi_chunk = FullLeChunkSearch::new(text.into_iter());
401 assert_eq!(
402 multi_chunk.collect::<Vec<_>>(),
403 vec![
404 (1..2, LeChunkKind::Lf),
405 (2..3, LeChunkKind::Lf),
406 (4..6, LeChunkKind::CrLf),
407 (7..9, LeChunkKind::CrLf),
408 (13..14, LeChunkKind::Cr),
409 (14..15, LeChunkKind::Cr),
410 ]
411 );
412
413 let multi_chunk = LoneCrChunkSearch::new(text.into_iter());
414 assert_eq!(multi_chunk.collect::<Vec<_>>(), vec![13, 14]);
415
416 let text = ["\n\rb"];
417 let chunks = FullLeChunkSearch::new(text.into_iter());
418 assert_eq!(
419 chunks.collect::<Vec<_>>(),
420 vec![(0..1, LeChunkKind::Lf), (1..2, LeChunkKind::Cr)]
421 );
422
423 let text = ["\n\rb"];
424 let chunks = LoneCrChunkSearch::new(text.into_iter());
425 assert_eq!(chunks.collect::<Vec<_>>(), vec![1]);
426 }
427}