Skip to main content

oxihuman_core/
whitespace_normalizer.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5//! Normalize whitespace and line endings in text buffers.
6//!
7//! Handles CRLF → LF, trailing whitespace trimming, collapsing of
8//! redundant blank lines, and EOF newline normalization.
9
10/// The desired line-ending style.
11#[derive(Debug, Clone, PartialEq)]
12pub enum LineEnding {
13    Lf,
14    CrLf,
15    Cr,
16}
17
18impl LineEnding {
19    pub fn as_str(&self) -> &'static str {
20        match self {
21            LineEnding::Lf => "\n",
22            LineEnding::CrLf => "\r\n",
23            LineEnding::Cr => "\r",
24        }
25    }
26}
27
28/// Configuration for whitespace normalization.
29#[derive(Debug, Clone)]
30pub struct NormalizerConfig {
31    pub target_ending: LineEnding,
32    pub trim_trailing: bool,
33    pub max_blank_lines: usize,
34    pub ensure_final_newline: bool,
35}
36
37impl Default for NormalizerConfig {
38    fn default() -> Self {
39        Self {
40            target_ending: LineEnding::Lf,
41            trim_trailing: true,
42            max_blank_lines: 2,
43            ensure_final_newline: true,
44        }
45    }
46}
47
48/// Statistics about whitespace issues found in text.
49#[derive(Debug, Clone, Default)]
50pub struct WhitespaceStats {
51    pub crlf_count: usize,
52    pub trailing_whitespace_lines: usize,
53    pub excess_blank_lines: usize,
54    pub missing_final_newline: bool,
55}
56
57impl WhitespaceStats {
58    pub fn has_issues(&self) -> bool {
59        self.crlf_count > 0
60            || self.trailing_whitespace_lines > 0
61            || self.excess_blank_lines > 0
62            || self.missing_final_newline
63    }
64}
65
66/// Detect whitespace issues in `text` relative to `cfg`.
67pub fn detect_issues(text: &str, cfg: &NormalizerConfig) -> WhitespaceStats {
68    let mut stats = WhitespaceStats::default();
69    let mut blank_run = 0usize;
70
71    stats.crlf_count = text.matches("\r\n").count();
72
73    for line in text.lines() {
74        if line.is_empty() {
75            blank_run += 1;
76            if blank_run > cfg.max_blank_lines {
77                stats.excess_blank_lines += 1;
78            }
79        } else {
80            blank_run = 0;
81            if line != line.trim_end() {
82                stats.trailing_whitespace_lines += 1;
83            }
84        }
85    }
86
87    stats.missing_final_newline =
88        cfg.ensure_final_newline && !text.ends_with('\n') && !text.ends_with("\r\n");
89    stats
90}
91
92/// Normalize whitespace in `text` according to `cfg`.
93pub fn normalize(text: &str, cfg: &NormalizerConfig) -> String {
94    /* Convert all endings to LF first */
95    let unified = text.replace("\r\n", "\n").replace('\r', "\n");
96    let ending = cfg.target_ending.as_str();
97
98    let mut out = String::with_capacity(unified.len());
99    let mut blank_run = 0usize;
100
101    for line in unified.lines() {
102        let processed = if cfg.trim_trailing {
103            line.trim_end()
104        } else {
105            line
106        };
107        if processed.is_empty() {
108            blank_run += 1;
109            if blank_run <= cfg.max_blank_lines {
110                out.push_str(ending);
111            }
112        } else {
113            blank_run = 0;
114            out.push_str(processed);
115            out.push_str(ending);
116        }
117    }
118
119    /* Handle ensure_final_newline */
120    if cfg.ensure_final_newline && !out.ends_with(ending) {
121        out.push_str(ending);
122    }
123
124    out
125}
126
127/// Count trailing whitespace characters on a single line.
128pub fn trailing_whitespace_count(line: &str) -> usize {
129    line.len().saturating_sub(line.trim_end().len())
130}
131
132/// Strip all trailing whitespace from every line, joining with `\n`.
133pub fn strip_trailing(text: &str) -> String {
134    text.lines()
135        .map(str::trim_end)
136        .collect::<Vec<_>>()
137        .join("\n")
138        + "\n"
139}
140
141/// Collapse runs of blank lines to a maximum of `max` consecutive.
142pub fn collapse_blank_lines(text: &str, max: usize) -> String {
143    let mut out = String::new();
144    let mut blank = 0usize;
145    for line in text.lines() {
146        if line.trim().is_empty() {
147            blank += 1;
148            if blank <= max {
149                out.push('\n');
150            }
151        } else {
152            blank = 0;
153            out.push_str(line);
154            out.push('\n');
155        }
156    }
157    out
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163
164    #[test]
165    fn test_detect_crlf() {
166        let cfg = NormalizerConfig::default();
167        let stats = detect_issues("line1\r\nline2\r\n", &cfg);
168        assert_eq!(stats.crlf_count, 2);
169    }
170
171    #[test]
172    fn test_detect_trailing_whitespace() {
173        let cfg = NormalizerConfig::default();
174        let stats = detect_issues("line   \nclean\n", &cfg);
175        assert_eq!(stats.trailing_whitespace_lines, 1);
176    }
177
178    #[test]
179    fn test_normalize_crlf_to_lf() {
180        let cfg = NormalizerConfig::default();
181        let result = normalize("a\r\nb\r\n", &cfg);
182        assert!(!result.contains('\r'));
183    }
184
185    #[test]
186    fn test_normalize_trim_trailing() {
187        let cfg = NormalizerConfig::default();
188        let result = normalize("hello   \n", &cfg);
189        assert_eq!(result, "hello\n");
190    }
191
192    #[test]
193    fn test_collapse_blank_lines() {
194        let text = "a\n\n\n\nb\n";
195        let result = collapse_blank_lines(text, 1);
196        assert!(result.matches('\n').count() < text.matches('\n').count());
197    }
198
199    #[test]
200    fn test_strip_trailing() {
201        let result = strip_trailing("  hi   \n");
202        assert_eq!(result, "  hi\n");
203    }
204
205    #[test]
206    fn test_trailing_whitespace_count() {
207        assert_eq!(trailing_whitespace_count("abc   "), 3);
208        assert_eq!(trailing_whitespace_count("abc"), 0);
209    }
210
211    #[test]
212    fn test_has_issues_false_for_clean() {
213        let cfg = NormalizerConfig::default();
214        let stats = detect_issues("clean line\n", &cfg);
215        assert!(!stats.has_issues());
216    }
217
218    #[test]
219    fn test_line_ending_as_str() {
220        assert_eq!(LineEnding::Lf.as_str(), "\n");
221        assert_eq!(LineEnding::CrLf.as_str(), "\r\n");
222    }
223}