line_ending/
lib.rs

1#[cfg(doctest)]
2doc_comment::doctest!("../README.md");
3
4use std::collections::HashMap;
5
6/// Enum representing the detected line ending style.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
8#[allow(clippy::upper_case_acronyms)]
9pub enum LineEnding {
10    /// Line Feed (LF) - Common on Unix, Linux, and macOS (`\n`).
11    LF,
12    /// Carriage Return + Line Feed (CRLF) - Used on Windows (`\r\n`).
13    CRLF,
14    /// Carriage Return (CR) - Used in older Mac OS (pre-OS X) (`\r`).
15    CR,
16}
17
18/// A mapping of line ending types to their respective occurrence counts.
19///
20/// This type alias represents a `HashMap<LineEnding, usize>`, where each
21/// `LineEnding` key corresponds to the number of times that specific
22/// line ending appears in a given string.
23///
24/// This is used in functions like [`LineEnding::score_mixed_types`] to track
25/// the distribution of line endings in a text.
26pub type LineEndingScores = HashMap<LineEnding, usize>;
27
28impl From<&str> for LineEnding {
29    /// Detects the predominant line ending style used in the input string.
30    ///
31    /// Note: This assumes that the input string is not of varying types, in
32    /// which case there is really
33    ///
34    /// # Example
35    ///
36    /// ```
37    /// use line_ending::LineEnding;
38    ///
39    /// let sample = "first line\r\nsecond line\r\nthird line";
40    /// assert_eq!(LineEnding::from(sample), LineEnding::CRLF);
41    /// ```
42    fn from(s: &str) -> Self {
43        let scores = Self::score_mixed_types(s);
44
45        let crlf_score = *scores.get(&Self::CRLF).unwrap_or(&0);
46        let cr_score = *scores.get(&Self::CR).unwrap_or(&0);
47        let lf_score = *scores.get(&Self::LF).unwrap_or(&0);
48
49        // Select the highest count
50        let max_score = crlf_score.max(cr_score).max(lf_score);
51
52        if max_score == 0 || crlf_score == max_score {
53            // `CRLF` is chosen as a tie-breaker because it represents both `CR`
54            // and `LF`, making it the most inclusive option
55            Self::CRLF
56        } else if cr_score == max_score {
57            Self::CR
58        } else {
59            Self::LF
60        }
61    }
62}
63
64impl LineEnding {
65    /// Counts occurrences of each line ending type in the given string.
66    ///
67    /// This function analyzes the input string and returns a `LineEndingScores`
68    /// (a `HashMap<LineEnding, usize>`) containing the number of times each
69    /// line ending appears.
70    ///
71    /// - `CRLF (\r\n)` is counted first to ensure `\r` inside `\r\n` is not
72    ///   double-counted.
73    /// - `CR (\r)` is counted separately, subtracting occurrences of `CRLF`.
74    /// - `LF (\n)` is counted separately, also subtracting occurrences of `CRLF`.
75    ///
76    /// # Example
77    ///
78    /// ```
79    /// use line_ending::{LineEnding, LineEndingScores};
80    ///
81    /// let text = "line1\r\nline2\r\nline3\nline4\r";
82    /// let scores = LineEnding::score_mixed_types(text);
83    ///
84    /// assert_eq!(scores[&LineEnding::CRLF], 2);
85    /// assert_eq!(scores[&LineEnding::LF], 1);
86    /// assert_eq!(scores[&LineEnding::CR], 1);
87    /// ```
88    pub fn score_mixed_types(s: &str) -> LineEndingScores {
89        let crlf_score = Self::CRLF.split_with(s).len().saturating_sub(1);
90
91        // Ensure CR is not double-counted when it's part of CRLF
92        let cr_score = Self::CR.split_with(s).len().saturating_sub(1) - crlf_score;
93
94        // Ensure LF is not double-counted when it's part of CRLF
95        let lf_score = Self::LF.split_with(s).len().saturating_sub(1) - crlf_score;
96
97        [
98            (LineEnding::CRLF, crlf_score),
99            (LineEnding::CR, cr_score),
100            (LineEnding::LF, lf_score),
101        ]
102        .into_iter()
103        .collect()
104    }
105
106    /// Returns the string representation of the line ending (`\n`, `\r\n`, or `\r`).
107    ///
108    /// # Example
109    ///
110    /// ```
111    /// use line_ending::LineEnding;
112    ///
113    /// assert_eq!(LineEnding::LF.as_str(), "\n");
114    /// assert_eq!(LineEnding::CRLF.as_str(), "\r\n");
115    /// assert_eq!(LineEnding::CR.as_str(), "\r");
116    /// ```
117    pub fn as_str(&self) -> &'static str {
118        match self {
119            Self::LF => "\n",
120            Self::CRLF => "\r\n",
121            Self::CR => "\r",
122        }
123    }
124
125    /// Converts all line endings in a string to LF (`\n`) for consistent processing.
126    ///
127    /// # Example
128    ///
129    /// ```
130    /// use line_ending::LineEnding;
131    ///
132    /// let mixed = "first\r\nsecond\rthird\n";
133    /// assert_eq!(LineEnding::normalize(mixed), "first\nsecond\nthird\n");
134    /// ```
135    pub fn normalize(s: &str) -> String {
136        s.replace("\r\n", "\n").replace("\r", "\n")
137    }
138
139    /// Restores line endings in a string to the specified type.
140    ///
141    /// # Example
142    ///
143    /// ```
144    /// use line_ending::LineEnding;
145    ///
146    /// let normalized = "first\nsecond\nthird";
147    /// assert_eq!(LineEnding::CRLF.denormalize(normalized), "first\r\nsecond\r\nthird");
148    /// assert_eq!(LineEnding::CR.denormalize(normalized), "first\rsecond\rthird");
149    /// ```
150    pub fn denormalize(&self, s: &str) -> String {
151        s.replace("\n", self.as_str())
152    }
153
154    /// Splits a string into a vector of strings using the auto-detected line ending
155    /// parsed from the string.
156    ///
157    /// # Example
158    ///
159    /// ```
160    /// use line_ending::LineEnding;
161    ///
162    /// let text = "line1\r\nline2\r\nline3";
163    /// let lines = LineEnding::split(text);
164    /// assert_eq!(lines, vec!["line1", "line2", "line3"]);
165    /// ```
166    pub fn split(s: &str) -> Vec<String> {
167        let line_ending = Self::from(s).as_str();
168        s.split(line_ending).map(String::from).collect()
169    }
170
171    /// Splits a string into lines using the specified line ending.
172    ///
173    /// In most cases, `split` is the preferred method as it automatically detects the
174    /// line ending to use.
175    ///
176    /// Unlike [`LineEnding::split`], which detects the line ending type from the input,
177    /// this method explicitly uses the line ending type of `self` to split the string.
178    ///
179    /// # Example
180    ///
181    /// ```
182    /// use line_ending::LineEnding;
183    ///
184    /// let text = "line1\r\nline2\r\nline3";
185    /// let lines = LineEnding::CRLF.split_with(text);
186    /// assert_eq!(lines, vec!["line1", "line2", "line3"]);
187    ///
188    /// let text = "line1\nline2\nline3";
189    /// let lines = LineEnding::LF.split_with(text);
190    /// assert_eq!(lines, vec!["line1", "line2", "line3"]);
191    /// ```
192    pub fn split_with(&self, s: &str) -> Vec<String> {
193        s.split(self.as_str()).map(String::from).collect()
194    }
195
196    /// Joins a vector of strings using the specified line ending.
197    ///
198    /// # Example
199    ///
200    /// ```
201    /// use line_ending::LineEnding;
202    ///
203    /// let lines = vec!["line1".to_string(), "line2".to_string(), "line3".to_string()];
204    /// assert_eq!(LineEnding::CRLF.join(lines.clone()), "line1\r\nline2\r\nline3");
205    /// assert_eq!(LineEnding::LF.join(lines.clone()), "line1\nline2\nline3");
206    /// ```
207    pub fn join(&self, lines: Vec<String>) -> String {
208        lines.join(self.as_str())
209    }
210
211    /// Applies a specific line ending type to an existing string.
212    ///
213    /// # Example
214    ///
215    /// ```
216    /// use line_ending::LineEnding;
217    ///
218    /// let mixed_text = "first line\r\nsecond line\rthird line\n";
219    /// assert_eq!(LineEnding::CRLF.apply(mixed_text), "first line\r\nsecond line\r\nthird line\r\n");
220    /// assert_eq!(LineEnding::LF.apply(mixed_text), "first line\nsecond line\nthird line\n");
221    /// ```
222    pub fn apply(&self, s: &str) -> String {
223        let normalized = Self::normalize(s);
224        normalized.replace("\n", self.as_str())
225    }
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231
232    fn get_readme_contents() -> String {
233        use std::fs::File;
234        use std::io::Read;
235
236        let readme_file = "README.md";
237
238        // Read file contents
239        let mut read_content = String::new();
240        File::open(readme_file)
241            .unwrap_or_else(|_| panic!("Failed to open {}", readme_file))
242            .read_to_string(&mut read_content)
243            .unwrap_or_else(|_| panic!("Failed to read {}", readme_file));
244
245        read_content
246    }
247
248    #[test]
249    fn detects_platform_line_ending_correctly() {
250        // Determine line ending from file contents
251        let detected = LineEnding::from(get_readme_contents().as_str());
252
253        // Assert expected line ending based on platform
254        #[cfg(target_os = "windows")]
255        assert_eq!(detected, LineEnding::CRLF, "Windows should detect CRLF");
256
257        #[cfg(target_family = "unix")]
258        assert_eq!(detected, LineEnding::LF, "Unix/macOS should detect LF");
259    }
260
261    #[test]
262    fn detects_lf_correctly() {
263        let sample = "first line\nsecond line\nthird line";
264        assert_eq!(LineEnding::from(sample), LineEnding::LF);
265    }
266
267    #[test]
268    fn detects_crlf_correctly() {
269        let sample = "first line\r\nsecond line\r\nthird line";
270        assert_eq!(LineEnding::from(sample), LineEnding::CRLF);
271    }
272
273    #[test]
274    fn detects_cr_correctly() {
275        let sample = "first line\rsecond line\rthird line";
276        assert_eq!(LineEnding::from(sample), LineEnding::CR);
277    }
278
279    #[test]
280    fn normalize_converts_all_to_lf() {
281        let crlf = "first\r\nsecond\r\nthird";
282        let cr = "first\rsecond\rthird";
283        let lf = "first\nsecond\nthird";
284
285        assert_eq!(LineEnding::normalize(crlf), lf);
286        assert_eq!(LineEnding::normalize(cr), lf);
287        assert_eq!(LineEnding::normalize(lf), lf);
288    }
289
290    #[test]
291    fn splits_into_lines() {
292        let readme_contents = get_readme_contents();
293        let readme_lines = LineEnding::split(&readme_contents);
294
295        assert_eq!(readme_lines.first().unwrap(), "# Rust Line Endings");
296
297        let crlf_lines = LineEnding::split("first\r\nsecond\r\nthird");
298        let cr_lines = LineEnding::split("first\rsecond\rthird");
299        let lf_lines = LineEnding::split("first\nsecond\nthird");
300
301        let expected = vec!["first", "second", "third"];
302
303        assert_eq!(crlf_lines, expected);
304        assert_eq!(cr_lines, expected);
305        assert_eq!(lf_lines, expected);
306    }
307
308    #[test]
309    fn restore_correctly_applies_line_endings() {
310        let text = "first\nsecond\nthird";
311        let crlf_restored = LineEnding::CRLF.denormalize(text);
312        let cr_restored = LineEnding::CR.denormalize(text);
313        let lf_restored = LineEnding::LF.denormalize(text);
314
315        assert_eq!(crlf_restored, "first\r\nsecond\r\nthird");
316        assert_eq!(cr_restored, "first\rsecond\rthird");
317        assert_eq!(lf_restored, "first\nsecond\nthird");
318    }
319
320    #[test]
321    fn applies_correct_line_endings() {
322        let lines = vec![
323            "first".to_string(),
324            "second".to_string(),
325            "third".to_string(),
326        ];
327
328        assert_eq!(
329            LineEnding::CRLF.join(lines.clone()),
330            "first\r\nsecond\r\nthird"
331        );
332        assert_eq!(LineEnding::CR.join(lines.clone()), "first\rsecond\rthird");
333        assert_eq!(LineEnding::LF.join(lines.clone()), "first\nsecond\nthird");
334    }
335
336    #[test]
337    fn apply_correctly_applies_line_endings() {
338        let mixed_text = "first line\r\nsecond line\rthird line\nfourth line\n";
339
340        assert_eq!(
341            LineEnding::CRLF.apply(mixed_text),
342            "first line\r\nsecond line\r\nthird line\r\nfourth line\r\n"
343        );
344        assert_eq!(
345            LineEnding::CR.apply(mixed_text),
346            "first line\rsecond line\rthird line\rfourth line\r"
347        );
348        assert_eq!(
349            LineEnding::LF.apply(mixed_text),
350            "first line\nsecond line\nthird line\nfourth line\n"
351        );
352    }
353
354    #[test]
355    fn handles_mixed_line_endings() {
356        // Mixed with some CRLF and CR, but LF is dominant
357        let mostly_lf = "line1\nline2\r\nline3\rline4\nline5\nline6\n";
358        assert_eq!(LineEnding::from(mostly_lf), LineEnding::LF);
359        assert_eq!(
360            LineEnding::score_mixed_types(mostly_lf,),
361            [
362                (LineEnding::CRLF, 1),
363                (LineEnding::CR, 1),
364                (LineEnding::LF, 4),
365            ]
366            .into_iter()
367            .collect::<LineEndingScores>()
368        );
369
370        // Mixed with some LF and CR, but CRLF is dominant
371        let mostly_crlf = "line1\r\nline2\r\nline3\nline4\rline5\r\nline6\r\n";
372        assert_eq!(LineEnding::from(mostly_crlf), LineEnding::CRLF);
373        assert_eq!(
374            LineEnding::score_mixed_types(mostly_crlf,),
375            [
376                (LineEnding::CRLF, 4),
377                (LineEnding::CR, 1),
378                (LineEnding::LF, 1),
379            ]
380            .into_iter()
381            .collect::<LineEndingScores>()
382        );
383
384        // Mixed with some LF and CRLF, but CR is dominant
385        let mostly_cr = "line1\rline2\r\nline3\rline4\nline5\rline6\r";
386        assert_eq!(LineEnding::from(mostly_cr), LineEnding::CR);
387        assert_eq!(
388            LineEnding::score_mixed_types(mostly_cr,),
389            [
390                (LineEnding::CRLF, 1),
391                (LineEnding::CR, 4),
392                (LineEnding::LF, 1),
393            ]
394            .into_iter()
395            .collect::<LineEndingScores>()
396        );
397    }
398
399    #[test]
400    fn handles_mixed_line_edge_cases() {
401        // Case 1: One Line Ending Type is Clearly Dominant
402        let mostly_crlf = "line1\r\nline2\r\nline3\nline4\r\nline5\r\n";
403        assert_eq!(LineEnding::from(mostly_crlf), LineEnding::CRLF); // CRLF is the most common
404
405        // Case 2: All Line Endings Appear Equally
406        let equal_mixed = "line1\r\nline2\nline3\rline4\r\nline5\nline6\r";
407        assert_eq!(LineEnding::from(equal_mixed), LineEnding::CRLF); // CRLF > CR > LF
408
409        // Case 3: Single Line Containing Multiple Line Endings
410        let mixed_on_one_line = "line1\r\nline2\rline3\r\nline4\r\nline5\r";
411        assert_eq!(LineEnding::from(mixed_on_one_line), LineEnding::CRLF); // CRLF appears the most overall
412
413        // Case 4: Empty Input Defaults to CRLF
414        let empty_text = "";
415        assert_eq!(LineEnding::from(empty_text), LineEnding::CRLF); // Defaults to CRLF
416    }
417
418    #[test]
419    fn ignores_escaped_line_endings_in_split() {
420        let input_lf = "First\\nSecond\\nThird";
421        let input_crlf = "First\\r\\nSecond\\r\\nThird";
422        let input_cr = "First\\rSecond\\rThird";
423
424        // Expected output: The input should NOT be split since these are escaped sequences
425        assert_eq!(LineEnding::split(input_lf), vec!["First\\nSecond\\nThird"]);
426        assert_eq!(
427            LineEnding::split(input_crlf),
428            vec!["First\\r\\nSecond\\r\\nThird"]
429        );
430        assert_eq!(LineEnding::split(input_cr), vec!["First\\rSecond\\rThird"]);
431    }
432
433    #[test]
434    fn split_does_not_split_on_escaped_line_endings() {
435        let input_lf = "First\\nSecond\\nThird";
436        let input_crlf = "First\\r\\nSecond\\r\\nThird";
437        let input_cr = "First\\rSecond\\rThird";
438
439        // All inputs should remain as a single, unsplit string
440        assert_eq!(LineEnding::split(input_lf), vec!["First\\nSecond\\nThird"]);
441        assert_eq!(
442            LineEnding::split(input_crlf),
443            vec!["First\\r\\nSecond\\r\\nThird"]
444        );
445        assert_eq!(LineEnding::split(input_cr), vec!["First\\rSecond\\rThird"]);
446    }
447
448    #[test]
449    fn split_correctly_splits_on_actual_line_endings() {
450        let input_lf = "First\nSecond\nThird";
451        let input_crlf = "First\r\nSecond\r\nThird";
452        let input_cr = "First\rSecond\rThird";
453
454        // Each input should split correctly based on its actual line endings
455        assert_eq!(
456            LineEnding::split(input_lf),
457            vec!["First", "Second", "Third"]
458        );
459        assert_eq!(
460            LineEnding::split(input_crlf),
461            vec!["First", "Second", "Third"]
462        );
463        assert_eq!(
464            LineEnding::split(input_cr),
465            vec!["First", "Second", "Third"]
466        );
467    }
468
469    #[test]
470    fn split_detects_mixed_escaped_and_actual_line_endings() {
471        // LF test case (escaped `\\n` should not trigger a split, actual `\n` should)
472        let input_lf = "First\\nSecond\nThird";
473        assert_eq!(LineEnding::split(input_lf), vec!["First\\nSecond", "Third"]);
474
475        // CRLF test case (escaped `\\r\\n` should be ignored, actual `\r\n` should split)
476        let input_crlf = "First\\r\\nSecond\r\nThird";
477        assert_eq!(
478            LineEnding::split(input_crlf),
479            vec!["First\\r\\nSecond", "Third"]
480        );
481
482        // CR test case (escaped `\\r` should be ignored, actual `\r` should split)
483        let input_cr = "First\\rSecond\rThird";
484        assert_eq!(LineEnding::split(input_cr), vec!["First\\rSecond", "Third"]);
485    }
486}