Skip to main content

grit_lib/
stripspace.rs

1//! Core logic for `git stripspace`.
2//!
3//! Provides whitespace stripping and comment-line prefixing transformations
4//! that match Git's behaviour:
5//!
6//! - Strip trailing whitespace from every line.
7//! - Collapse multiple consecutive blank lines into one.
8//! - Remove leading and trailing blank lines.
9//! - Ensure non-empty output ends with a newline.
10//! - Optionally strip lines that start with a comment prefix string.
11//! - Optionally prefix every line with the comment character.
12
13/// Processing mode for [`process`].
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum Mode {
16    /// Strip trailing whitespace and collapse blank lines.
17    Default,
18    /// Same as [`Mode::Default`] but also remove comment lines.
19    ///
20    /// A comment line is any line whose first bytes match `comment_prefix`
21    /// (e.g. `"#"`).
22    StripComments(String),
23    /// Prefix every input line with the comment character.
24    ///
25    /// Non-empty lines that do not start with a tab get `comment_prefix + " "`;
26    /// empty lines and tab-starting lines get just `comment_prefix`.  This
27    /// avoids the `SP-HT` sequence (`# \t…`) that Git also avoids.
28    CommentLines(String),
29}
30
31/// Process `input` bytes according to `mode` and return the result.
32///
33/// # Parameters
34///
35/// - `input`: raw bytes read from stdin.
36/// - `mode`: controls whether to strip, strip-and-remove-comments, or add comments.
37///
38/// # Returns
39///
40/// A `Vec<u8>` with the transformed content.  Returns an empty vector when the
41/// input consists entirely of whitespace (in strip modes) or is itself empty.
42///
43/// # Examples
44///
45/// ```
46/// use grit_lib::stripspace::{process, Mode};
47///
48/// let out = process(b"hello   \n\n\nworld\n", &Mode::Default);
49/// assert_eq!(out, b"hello\n\nworld\n");
50///
51/// let out = process(b"# comment\ntext\n", &Mode::StripComments("#".into()));
52/// assert_eq!(out, b"text\n");
53///
54/// let out = process(b"foo\n\nbar\n", &Mode::CommentLines("#".into()));
55/// assert_eq!(out, b"# foo\n#\n# bar\n");
56/// ```
57#[must_use]
58pub fn process(input: &[u8], mode: &Mode) -> Vec<u8> {
59    match mode {
60        Mode::Default => strip(input, None),
61        Mode::StripComments(prefix) => strip(input, Some(prefix.as_str())),
62        Mode::CommentLines(prefix) => comment_lines(input, prefix.as_str()),
63    }
64}
65
66/// Returns a copy of `line` with trailing space/tab bytes removed.
67///
68/// The line is expected to end with `\n`; the newline is preserved (not
69/// considered trailing whitespace).
70fn strip_trailing(line: &[u8]) -> Vec<u8> {
71    let nl_pos = line.iter().rposition(|&b| b == b'\n');
72    let content_end = nl_pos.unwrap_or(line.len());
73    let content = &line[..content_end];
74
75    let trimmed_end = content
76        .iter()
77        .rposition(|&b| b != b' ' && b != b'\t')
78        .map(|p| p + 1)
79        .unwrap_or(0);
80
81    let mut result = content[..trimmed_end].to_vec();
82    if nl_pos.is_some() {
83        result.push(b'\n');
84    }
85    result
86}
87
88/// Core strip implementation shared by [`Mode::Default`] and
89/// [`Mode::StripComments`].
90///
91/// When `comment_prefix` is `Some(s)`, lines whose bytes begin with `s` are
92/// discarded before any other processing.
93fn strip(input: &[u8], comment_prefix: Option<&str>) -> Vec<u8> {
94    if input.is_empty() {
95        return Vec::new();
96    }
97
98    // Ensure the data ends with a newline so every line is terminated.
99    let owned;
100    let data: &[u8] = if input.last() != Some(&b'\n') {
101        owned = {
102            let mut v = input.to_vec();
103            v.push(b'\n');
104            v
105        };
106        &owned
107    } else {
108        input
109    };
110
111    let mut result: Vec<u8> = Vec::new();
112    let mut pending_blank: usize = 0;
113    let mut saw_content = false;
114
115    let mut pos = 0;
116    while pos < data.len() {
117        let next = data[pos..]
118            .iter()
119            .position(|&b| b == b'\n')
120            .map(|p| pos + p + 1)
121            .unwrap_or(data.len());
122        let raw_line = &data[pos..next];
123        pos = next;
124
125        // Discard comment lines when requested.
126        if let Some(prefix) = comment_prefix {
127            if raw_line.starts_with(prefix.as_bytes()) {
128                continue;
129            }
130        }
131
132        // Strip trailing whitespace; the result ends with '\n'.
133        let stripped = strip_trailing(raw_line);
134
135        // A line that reduces to just '\n' is blank.
136        if stripped == [b'\n'] {
137            if saw_content {
138                pending_blank += 1;
139            }
140            // Skip leading blank lines (before any real content).
141            continue;
142        }
143
144        // Non-blank line: flush at most one pending blank, then emit the line.
145        if saw_content && pending_blank > 0 {
146            result.push(b'\n');
147        }
148        pending_blank = 0;
149        saw_content = true;
150        result.extend_from_slice(&stripped);
151    }
152
153    result
154}
155
156/// Prefix every line of `input` with the comment string.
157///
158/// - Non-empty lines that do not start with `\t` get `comment_prefix + " "`.
159/// - Empty lines and lines starting with `\t` get just `comment_prefix`.
160///
161/// This mirrors `strbuf_add_commented_lines` in Git, which avoids the
162/// `SP-HT` sequence `"# \t…"`.
163fn comment_lines(input: &[u8], comment_prefix: &str) -> Vec<u8> {
164    if input.is_empty() {
165        return Vec::new();
166    }
167
168    // Ensure the data ends with a newline.
169    let owned;
170    let data: &[u8] = if input.last() != Some(&b'\n') {
171        owned = {
172            let mut v = input.to_vec();
173            v.push(b'\n');
174            v
175        };
176        &owned
177    } else {
178        input
179    };
180
181    let prefix_bytes = comment_prefix.as_bytes();
182    let mut result: Vec<u8> = Vec::new();
183
184    let mut pos = 0;
185    while pos < data.len() {
186        let next = data[pos..]
187            .iter()
188            .position(|&b| b == b'\n')
189            .map(|p| pos + p + 1)
190            .unwrap_or(data.len());
191        let raw_line = &data[pos..next];
192        pos = next;
193
194        // Separate content from the terminating newline.
195        let nl_pos = raw_line.iter().rposition(|&b| b == b'\n');
196        let content_end = nl_pos.unwrap_or(raw_line.len());
197        let content = &raw_line[..content_end];
198
199        // Prepend comment prefix; add a space unless the content is empty or
200        // starts with a tab (to avoid the SP-HT sequence).
201        result.extend_from_slice(prefix_bytes);
202        if !content.is_empty() && content[0] != b'\t' {
203            result.push(b' ');
204        }
205        result.extend_from_slice(content);
206        result.push(b'\n');
207    }
208
209    result
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    // ── Mode::Default ────────────────────────────────────────────────────────
217
218    #[test]
219    fn default_strips_trailing_whitespace() {
220        let out = process(b"hello   \n", &Mode::Default);
221        assert_eq!(out, b"hello\n");
222    }
223
224    #[test]
225    fn default_collapses_consecutive_blank_lines() {
226        let out = process(b"a\n\n\n\nb\n", &Mode::Default);
227        assert_eq!(out, b"a\n\nb\n");
228    }
229
230    #[test]
231    fn default_removes_leading_blank_lines() {
232        let out = process(b"\n\n\ntext\n", &Mode::Default);
233        assert_eq!(out, b"text\n");
234    }
235
236    #[test]
237    fn default_removes_trailing_blank_lines() {
238        let out = process(b"text\n\n\n", &Mode::Default);
239        assert_eq!(out, b"text\n");
240    }
241
242    #[test]
243    fn default_all_whitespace_yields_empty() {
244        assert_eq!(process(b"   \n  \n\n", &Mode::Default), b"");
245        assert_eq!(process(b"\n", &Mode::Default), b"");
246        assert_eq!(process(b"", &Mode::Default), b"");
247    }
248
249    #[test]
250    fn default_adds_trailing_newline_when_missing() {
251        let out = process(b"text", &Mode::Default);
252        assert_eq!(out, b"text\n");
253    }
254
255    #[test]
256    fn default_preserves_leading_spaces_on_line() {
257        let out = process(b"  indented\n", &Mode::Default);
258        assert_eq!(out, b"  indented\n");
259    }
260
261    #[test]
262    fn default_blank_lines_between_whitespace_only_lines() {
263        // Lines with only spaces count as blank.
264        let out = process(b"a\n   \n   \nb\n", &Mode::Default);
265        assert_eq!(out, b"a\n\nb\n");
266    }
267
268    // ── Mode::StripComments ──────────────────────────────────────────────────
269
270    #[test]
271    fn strip_comments_removes_hash_lines() {
272        // Comment lines are simply removed; no blank is inserted in their place.
273        let out = process(b"text\n# comment\nmore\n", &Mode::StripComments("#".into()));
274        assert_eq!(out, b"text\nmore\n");
275    }
276
277    #[test]
278    fn strip_comments_keeps_non_comment_lines() {
279        let out = process(b"# comment\n", &Mode::StripComments("#".into()));
280        assert_eq!(out, b"");
281    }
282
283    #[test]
284    fn strip_comments_multichar_prefix() {
285        let out = process(
286            b"// removed\nnormal line\n",
287            &Mode::StripComments("//".into()),
288        );
289        assert_eq!(out, b"normal line\n");
290    }
291
292    // ── Mode::CommentLines ───────────────────────────────────────────────────
293
294    #[test]
295    fn comment_lines_prefixes_non_empty() {
296        let out = process(b"foo\n", &Mode::CommentLines("#".into()));
297        assert_eq!(out, b"# foo\n");
298    }
299
300    #[test]
301    fn comment_lines_empty_line_gets_bare_prefix() {
302        let out = process(b"\n", &Mode::CommentLines("#".into()));
303        assert_eq!(out, b"#\n");
304    }
305
306    #[test]
307    fn comment_lines_tab_line_avoids_sp_ht() {
308        // "\tone" → "#\tone", not "# \tone"
309        let out = process(b"\tone\n", &Mode::CommentLines("#".into()));
310        assert_eq!(out, b"#\tone\n");
311    }
312
313    #[test]
314    fn comment_lines_adds_trailing_newline() {
315        let out = process(b"foo", &Mode::CommentLines("#".into()));
316        assert_eq!(out, b"# foo\n");
317    }
318
319    #[test]
320    fn comment_lines_empty_input_yields_empty() {
321        let out = process(b"", &Mode::CommentLines("#".into()));
322        assert_eq!(out, b"");
323    }
324
325    #[test]
326    fn comment_lines_multiple_lines() {
327        let out = process(b"\tone\n\ntwo\n", &Mode::CommentLines("#".into()));
328        assert_eq!(out, b"#\tone\n#\n# two\n");
329    }
330}