Skip to main content

grit_lib/
ws.rs

1//! Git-compatible whitespace rules (`core.whitespace`, `whitespace` attribute).
2//!
3//! Ported from Git's `ws.c` / `ws.h` for `git apply` parity.
4
5/// Trailing whitespace at end of line (spaces/tabs before newline).
6pub const WS_BLANK_AT_EOL: u32 = 1 << 6;
7/// Space characters before a tab in the indentation area.
8pub const WS_SPACE_BEFORE_TAB: u32 = 1 << 7;
9/// Indent uses spaces where Git expects tabs (width ≥ tab width).
10pub const WS_INDENT_WITH_NON_TAB: u32 = 1 << 8;
11/// Allow CR before LF at end of line.
12pub const WS_CR_AT_EOL: u32 = 1 << 9;
13/// Blank lines at end of file (handled at apply layer, not in `ws_check`).
14pub const WS_BLANK_AT_EOF: u32 = 1 << 10;
15/// Tab characters in the indentation area.
16pub const WS_TAB_IN_INDENT: u32 = 1 << 11;
17/// Missing newline at end of file.
18pub const WS_INCOMPLETE_LINE: u32 = 1 << 12;
19
20pub const WS_TRAILING_SPACE: u32 = WS_BLANK_AT_EOL | WS_BLANK_AT_EOF;
21pub const WS_TAB_WIDTH_MASK: u32 = (1 << 6) - 1;
22/// Default `core.whitespace` when unset: trailing + space-before-tab, tab width 8.
23pub const WS_DEFAULT_RULE: u32 = WS_TRAILING_SPACE | WS_SPACE_BEFORE_TAB | 8;
24
25#[derive(Debug, Clone, PartialEq, Eq, Default)]
26pub enum WhitespaceGitAttr {
27    /// No `whitespace` gitattribute applies (use `core.whitespace` / default).
28    #[default]
29    Unspecified,
30    /// `-whitespace` (`ATTR_FALSE`).
31    False,
32    /// Bare `whitespace` (`ATTR_TRUE`).
33    True,
34    /// `whitespace=<rules>`.
35    String(String),
36}
37
38impl WhitespaceGitAttr {
39    /// Combine with `core.whitespace` the same way Git's `whitespace_rule()` does.
40    pub fn merge_with_config(self, cfg_rule: u32) -> Result<u32, WhitespaceRuleError> {
41        match self {
42            WhitespaceGitAttr::Unspecified => Ok(cfg_rule),
43            WhitespaceGitAttr::False => Ok(tab_width_only(cfg_rule)),
44            WhitespaceGitAttr::True => {
45                let mut all = tab_width_only(cfg_rule);
46                for entry in WS_RULE_ENTRIES {
47                    if !entry.loosens_error && !entry.exclude_default {
48                        all |= entry.bits;
49                    }
50                }
51                Ok(all)
52            }
53            // Git `whitespace_rule()`: a string attribute replaces the rule entirely via
54            // `parse_whitespace_rule(value)` — it does not merge with `core.whitespace`.
55            WhitespaceGitAttr::String(s) => parse_whitespace_rule(&s),
56        }
57    }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum WhitespaceRuleError {
62    ConflictingTabAndIndentRules,
63}
64
65struct WsRuleEntry {
66    name: &'static str,
67    bits: u32,
68    loosens_error: bool,
69    exclude_default: bool,
70}
71
72const WS_RULE_ENTRIES: &[WsRuleEntry] = &[
73    WsRuleEntry {
74        name: "trailing-space",
75        bits: WS_TRAILING_SPACE,
76        loosens_error: false,
77        exclude_default: false,
78    },
79    WsRuleEntry {
80        name: "space-before-tab",
81        bits: WS_SPACE_BEFORE_TAB,
82        loosens_error: false,
83        exclude_default: false,
84    },
85    WsRuleEntry {
86        name: "indent-with-non-tab",
87        bits: WS_INDENT_WITH_NON_TAB,
88        loosens_error: false,
89        exclude_default: false,
90    },
91    WsRuleEntry {
92        name: "cr-at-eol",
93        bits: WS_CR_AT_EOL,
94        loosens_error: true,
95        exclude_default: false,
96    },
97    WsRuleEntry {
98        name: "blank-at-eol",
99        bits: WS_BLANK_AT_EOL,
100        loosens_error: false,
101        exclude_default: false,
102    },
103    WsRuleEntry {
104        name: "blank-at-eof",
105        bits: WS_BLANK_AT_EOF,
106        loosens_error: false,
107        exclude_default: false,
108    },
109    WsRuleEntry {
110        name: "tab-in-indent",
111        bits: WS_TAB_IN_INDENT,
112        loosens_error: false,
113        exclude_default: true,
114    },
115    WsRuleEntry {
116        name: "incomplete-line",
117        bits: WS_INCOMPLETE_LINE,
118        loosens_error: false,
119        exclude_default: false,
120    },
121];
122
123/// Tab width embedded in the low bits of a whitespace rule (1–63).
124#[must_use]
125pub fn ws_tab_width(rule: u32) -> usize {
126    (rule & WS_TAB_WIDTH_MASK) as usize
127}
128
129fn tab_width_only(rule: u32) -> u32 {
130    rule & WS_TAB_WIDTH_MASK
131}
132
133/// Parse a `core.whitespace` / `whitespace=` attribute value into rule bits.
134pub fn parse_whitespace_rule(string: &str) -> Result<u32, WhitespaceRuleError> {
135    let mut rule = WS_DEFAULT_RULE;
136    let mut s = string;
137
138    while !s.is_empty() {
139        s = s.trim_start_matches([',', ' ', '\t', '\n', '\r']);
140        if s.is_empty() {
141            break;
142        }
143        let (negated, rest) = if let Some(r) = s.strip_prefix('-') {
144            (true, r)
145        } else {
146            (false, s)
147        };
148        let end = rest.find(',').unwrap_or(rest.len());
149        let token = rest[..end].trim();
150        s = &rest[end..];
151
152        if token.is_empty() {
153            continue;
154        }
155
156        if let Some(arg) = token.strip_prefix("tabwidth=") {
157            if let Ok(w) = arg.parse::<u32>() {
158                if (0 < w) && (w < 0o100) {
159                    rule &= !WS_TAB_WIDTH_MASK;
160                    rule |= w & WS_TAB_WIDTH_MASK;
161                }
162            }
163            continue;
164        }
165
166        let mut matched = false;
167        for entry in WS_RULE_ENTRIES {
168            // Git matches with `strncmp(rule_name, token, token.len())`: the config
169            // token is a prefix (e.g. `trailing` → `trailing-space`).
170            if entry.name.starts_with(token) {
171                if negated {
172                    rule &= !entry.bits;
173                } else {
174                    rule |= entry.bits;
175                }
176                matched = true;
177                break;
178            }
179        }
180        if !matched {
181            // Unknown token: Git ignores (with optional warning); we ignore.
182        }
183    }
184
185    if (rule & WS_TAB_IN_INDENT) != 0 && (rule & WS_INDENT_WITH_NON_TAB) != 0 {
186        return Err(WhitespaceRuleError::ConflictingTabAndIndentRules);
187    }
188    Ok(rule)
189}
190
191/// Human-readable summary of `ws_check` result flags (Git `whitespace_error_string`).
192#[must_use]
193pub fn whitespace_error_string(ws: u32) -> String {
194    let mut parts: Vec<&'static str> = Vec::new();
195    if (ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE {
196        parts.push("trailing whitespace");
197    } else {
198        if (ws & WS_BLANK_AT_EOL) != 0 {
199            parts.push("trailing whitespace");
200        }
201        if (ws & WS_BLANK_AT_EOF) != 0 {
202            parts.push("new blank line at EOF");
203        }
204    }
205    if (ws & WS_SPACE_BEFORE_TAB) != 0 {
206        parts.push("space before tab in indent");
207    }
208    if (ws & WS_INDENT_WITH_NON_TAB) != 0 {
209        parts.push("indent with spaces");
210    }
211    if (ws & WS_TAB_IN_INDENT) != 0 {
212        parts.push("tab in indent");
213    }
214    if (ws & WS_INCOMPLETE_LINE) != 0 {
215        parts.push("no newline at the end of file");
216    }
217    parts.join(", ")
218}
219
220fn is_space_git(c: u8) -> bool {
221    matches!(c, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
222}
223
224/// Check one line of patch body (without the leading `+`/`-`/` ` prefix) for whitespace issues.
225///
226/// Matches Git `ws_check_emit_1` (`ws.c`): `WS_INCOMPLETE_LINE` is set only when the line has no
227/// trailing newline in the patch (so context lines that end at `\` are not flagged).
228#[must_use]
229pub fn ws_check(line: &str, ws_rule: u32) -> u32 {
230    let mut result = 0u32;
231    let bytes = line.as_bytes();
232    let mut len = bytes.len();
233
234    let mut trailing_newline = false;
235    if len > 0 && bytes[len - 1] == b'\n' {
236        trailing_newline = true;
237        len -= 1;
238    }
239
240    let mut trailing_carriage_return = false;
241    if (ws_rule & WS_CR_AT_EOL) != 0 && len > 0 && bytes[len - 1] == b'\r' {
242        trailing_carriage_return = true;
243        len -= 1;
244    }
245
246    let mut trailing_whitespace: isize = -1;
247    if (ws_rule & WS_BLANK_AT_EOL) != 0 {
248        let mut i = len as isize - 1;
249        while i >= 0 {
250            if bytes[i as usize].is_ascii_whitespace() {
251                trailing_whitespace = i;
252                result |= WS_BLANK_AT_EOL;
253            } else {
254                break;
255            }
256            i -= 1;
257        }
258    }
259    let tw_end = if trailing_whitespace < 0 {
260        len
261    } else {
262        trailing_whitespace as usize
263    };
264
265    if !trailing_newline && (ws_rule & WS_INCOMPLETE_LINE) != 0 {
266        result |= WS_INCOMPLETE_LINE;
267    }
268
269    let mut i = 0usize;
270    let mut written = 0usize;
271    while i < tw_end {
272        let c = bytes[i];
273        if c == b' ' {
274            i += 1;
275            continue;
276        }
277        if c != b'\t' {
278            break;
279        }
280        if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && written < i {
281            result |= WS_SPACE_BEFORE_TAB;
282        } else if (ws_rule & WS_TAB_IN_INDENT) != 0 {
283            result |= WS_TAB_IN_INDENT;
284        }
285        written = i + 1;
286        i += 1;
287    }
288
289    if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0 && i - written >= ws_tab_width(ws_rule) {
290        result |= WS_INDENT_WITH_NON_TAB;
291    }
292
293    let _ = trailing_carriage_return;
294    result
295}
296
297/// Returns true if the line is empty or only ASCII whitespace.
298#[must_use]
299pub fn ws_blank_line(line: &str) -> bool {
300    line.bytes().all(is_space_git)
301}
302
303fn isspace_c(ch: u8) -> bool {
304    matches!(ch, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
305}
306
307/// Fix whitespace on one line, matching Git `ws_fix_copy` (patch `+` line body without prefix).
308pub fn ws_fix_copy_line(src: &str, ws_rule: u32) -> (String, bool) {
309    let mut dst = String::new();
310    let mut fixed = false;
311    let mut len = src.len();
312    if len == 0 {
313        return (dst, false);
314    }
315    let bytes = src.as_bytes();
316
317    let mut add_nl_to_tail = false;
318    let mut add_cr_to_tail = false;
319
320    if (ws_rule & WS_INCOMPLETE_LINE) != 0 && bytes[len - 1] != b'\n' {
321        fixed = true;
322        add_nl_to_tail = true;
323    }
324
325    if (ws_rule & WS_BLANK_AT_EOL) != 0 {
326        if len > 0 && bytes[len - 1] == b'\n' {
327            add_nl_to_tail = true;
328            len -= 1;
329            if len > 0 && bytes[len - 1] == b'\r' {
330                add_cr_to_tail = (ws_rule & WS_CR_AT_EOL) != 0;
331                len -= 1;
332            }
333        }
334        if len > 0 && isspace_c(bytes[len - 1]) {
335            while len > 0 && isspace_c(bytes[len - 1]) {
336                len -= 1;
337            }
338            fixed = true;
339        }
340    }
341
342    let mut last_tab_in_indent: i32 = -1;
343    let mut last_space_in_indent: i32 = -1;
344    let mut need_fix_leading_space = false;
345    let mut i = 0usize;
346    while i < len {
347        let ch = bytes[i];
348        if ch == b'\t' {
349            last_tab_in_indent = i as i32;
350            if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && last_space_in_indent >= 0 {
351                need_fix_leading_space = true;
352            }
353        } else if ch == b' ' {
354            last_space_in_indent = i as i32;
355            if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0
356                && (i as i32 - last_tab_in_indent) >= ws_tab_width(ws_rule) as i32
357            {
358                need_fix_leading_space = true;
359            }
360        } else {
361            break;
362        }
363        i += 1;
364    }
365
366    let mut src_rest = &src[..len];
367    let mut rest_len = len;
368
369    if need_fix_leading_space {
370        let mut last = (last_tab_in_indent + 1) as usize;
371        if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0 {
372            if last_tab_in_indent < last_space_in_indent {
373                last = (last_space_in_indent + 1) as usize;
374            } else {
375                last = (last_tab_in_indent + 1) as usize;
376            }
377        }
378
379        let mut consecutive_spaces = 0i32;
380        let tw = ws_tab_width(ws_rule);
381        for idx in 0..last {
382            let ch = bytes[idx];
383            if ch != b' ' {
384                consecutive_spaces = 0;
385                dst.push(ch as char);
386            } else {
387                consecutive_spaces += 1;
388                if consecutive_spaces == tw as i32 {
389                    dst.push('\t');
390                    consecutive_spaces = 0;
391                }
392            }
393        }
394        while consecutive_spaces > 0 {
395            dst.push(' ');
396            consecutive_spaces -= 1;
397        }
398        src_rest = &src[last..len];
399        rest_len = src_rest.len();
400        fixed = true;
401    } else if (ws_rule & WS_TAB_IN_INDENT) != 0 && last_tab_in_indent >= 0 {
402        let last = (last_tab_in_indent + 1) as usize;
403        let start = dst.len();
404        for idx in 0..last {
405            if bytes[idx] == b'\t' {
406                loop {
407                    dst.push(' ');
408                    if (dst.len() - start).is_multiple_of(ws_tab_width(ws_rule)) {
409                        break;
410                    }
411                }
412            } else {
413                dst.push(bytes[idx] as char);
414            }
415        }
416        src_rest = &src[last..len];
417        rest_len = src_rest.len();
418        fixed = true;
419    }
420
421    dst.push_str(&src_rest[..rest_len]);
422    if add_cr_to_tail {
423        dst.push('\r');
424    }
425    if add_nl_to_tail {
426        dst.push('\n');
427    }
428    (dst, fixed)
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434
435    #[test]
436    fn default_rule_parses() {
437        let r = parse_whitespace_rule("").unwrap();
438        assert!((r & WS_TRAILING_SPACE) != 0);
439        assert!((r & WS_SPACE_BEFORE_TAB) != 0);
440        assert_eq!(ws_tab_width(r), 8);
441    }
442
443    #[test]
444    fn ws_check_trailing_space() {
445        let rule = WS_BLANK_AT_EOL | 8;
446        assert_eq!(ws_check("hello \n", rule), WS_BLANK_AT_EOL);
447        assert_eq!(ws_check("hello\n", rule), 0);
448    }
449
450    #[test]
451    fn ws_fix_tab_in_indent() {
452        let rule = WS_TAB_IN_INDENT | 8;
453        let (out, fx) = ws_fix_copy_line("\tfoo\n", rule);
454        assert!(fx);
455        assert_eq!(out, "        foo\n");
456    }
457}