Skip to main content

sley_diff_merge/
ws.rs

1//! Whitespace-rule engine — a port of git's `ws.c` / `ws.h`.
2//!
3//! This is the single source of truth for git's `core.whitespace` rules. It
4//! powers three consumers, all of which used to be stubbed out in sley:
5//!
6//! * `git diff --check` (and `diff-index`/`diff-tree --check`) — reports
7//!   whitespace errors introduced by the `+` lines of a diff;
8//! * `--ws-error-highlight` — paints whitespace errors in the patch body;
9//! * `git apply --whitespace=warn|error|fix|strip|nowarn` — warns about,
10//!   errors on, or fixes whitespace errors in the patch being applied.
11//!
12//! The surface mirrors git's exactly:
13//!
14//! * [`parse_whitespace_rule`] parses a `core.whitespace` / attribute value
15//!   into a [`WsRule`] bitmask (the low 6 bits are the tab width);
16//! * [`ws_check`] classifies a single line, returning the [`WsRule`] bits that
17//!   fired (so the caller can format them with [`whitespace_error_string`]);
18//! * [`ws_check_emit`] does the same but also writes the line with
19//!   whitespace-error spans painted (the `--ws-error-highlight` path);
20//! * [`ws_fix_copy`] copies a line while fixing its whitespace errors (the
21//!   `apply --whitespace=fix` path);
22//! * [`ws_blank_line`] / [`count_trailing_blank`] support the blank-at-EOF
23//!   detection that lives outside the per-line check in git.
24
25/// `core.whitespace` rule mask. The low 6 bits encode the tab width (git
26/// supports up to 63); the high bits are the individual error rules.
27///
28/// This mirrors git's `unsigned ws_rule` exactly so the bit values match
29/// git's `ws.h` constants byte-for-byte.
30pub type WsRule = u32;
31
32/// Trailing whitespace at end of line.
33pub const WS_BLANK_AT_EOL: WsRule = 1 << 6;
34/// A space appearing before a tab in the indentation.
35pub const WS_SPACE_BEFORE_TAB: WsRule = 1 << 7;
36/// Indentation that uses spaces instead of tabs (tab-width worth of leading
37/// spaces beyond any tabs).
38pub const WS_INDENT_WITH_NON_TAB: WsRule = 1 << 8;
39/// A carriage return at end of line.
40pub const WS_CR_AT_EOL: WsRule = 1 << 9;
41/// A new blank line at the end of the file.
42pub const WS_BLANK_AT_EOF: WsRule = 1 << 10;
43/// A tab appearing in the indentation.
44pub const WS_TAB_IN_INDENT: WsRule = 1 << 11;
45/// An incomplete final line (no trailing newline).
46pub const WS_INCOMPLETE_LINE: WsRule = 1 << 12;
47
48/// `trailing-space` = blank-at-eol + blank-at-eof.
49pub const WS_TRAILING_SPACE: WsRule = WS_BLANK_AT_EOL | WS_BLANK_AT_EOF;
50/// git's default rule: trailing-space + space-before-tab + tab width 8.
51pub const WS_DEFAULT_RULE: WsRule = WS_TRAILING_SPACE | WS_SPACE_BEFORE_TAB | 8;
52/// The low 6 bits hold the tab width.
53pub const WS_TAB_WIDTH_MASK: WsRule = (1 << 6) - 1;
54/// Mask covering all whitespace rule bits (low 16).
55pub const WS_RULE_MASK: WsRule = (1 << 16) - 1;
56
57/// Extract the tab width from a rule (git's `ws_tab_width` macro).
58#[inline]
59pub fn ws_tab_width(rule: WsRule) -> usize {
60    (rule & WS_TAB_WIDTH_MASK) as usize
61}
62
63struct RuleName {
64    name: &'static str,
65    bits: WsRule,
66    /// Loosens (rather than tightens) error checking; excluded from the
67    /// "all rules" set built for a `whitespace`-true attribute.
68    loosens_error: bool,
69    /// Excluded from the default rule set even when not loosening.
70    exclude_default: bool,
71}
72
73const RULE_NAMES: &[RuleName] = &[
74    RuleName {
75        name: "trailing-space",
76        bits: WS_TRAILING_SPACE,
77        loosens_error: false,
78        exclude_default: false,
79    },
80    RuleName {
81        name: "space-before-tab",
82        bits: WS_SPACE_BEFORE_TAB,
83        loosens_error: false,
84        exclude_default: false,
85    },
86    RuleName {
87        name: "indent-with-non-tab",
88        bits: WS_INDENT_WITH_NON_TAB,
89        loosens_error: false,
90        exclude_default: false,
91    },
92    RuleName {
93        name: "cr-at-eol",
94        bits: WS_CR_AT_EOL,
95        loosens_error: true,
96        exclude_default: false,
97    },
98    RuleName {
99        name: "blank-at-eol",
100        bits: WS_BLANK_AT_EOL,
101        loosens_error: false,
102        exclude_default: false,
103    },
104    RuleName {
105        name: "blank-at-eof",
106        bits: WS_BLANK_AT_EOF,
107        loosens_error: false,
108        exclude_default: false,
109    },
110    RuleName {
111        name: "tab-in-indent",
112        bits: WS_TAB_IN_INDENT,
113        loosens_error: false,
114        exclude_default: true,
115    },
116    RuleName {
117        name: "incomplete-line",
118        bits: WS_INCOMPLETE_LINE,
119        loosens_error: false,
120        exclude_default: false,
121    },
122];
123
124/// Parse a `core.whitespace` / `whitespace` attribute value into a [`WsRule`].
125///
126/// Port of git's `parse_whitespace_rule`. Comma/whitespace-separated tokens,
127/// each optionally `-`-negated; `tabwidth=N` sets the tab width (1..=63). A
128/// rule combining both `tab-in-indent` and `indent-with-non-tab` is rejected
129/// (git `die`s); we return [`None`] so the caller can surface the error.
130pub fn parse_whitespace_rule(string: &str) -> Option<WsRule> {
131    let bytes = string.as_bytes();
132    let mut rule = WS_DEFAULT_RULE;
133    let mut pos = 0usize;
134
135    while pos < bytes.len() {
136        // Skip leading separators (`, \t\n\r`).
137        while pos < bytes.len() && matches!(bytes[pos], b',' | b' ' | b'\t' | b'\n' | b'\r') {
138            pos += 1;
139        }
140        if pos >= bytes.len() {
141            break;
142        }
143        // Token runs to the next comma (or end).
144        let token_start = pos;
145        let token_end = bytes[token_start..]
146            .iter()
147            .position(|&b| b == b',')
148            .map(|off| token_start + off)
149            .unwrap_or(bytes.len());
150
151        let mut name_start = token_start;
152        let mut negated = false;
153        if bytes[name_start] == b'-' {
154            negated = true;
155            name_start += 1;
156        }
157        let name = &bytes[name_start..token_end];
158        if name.is_empty() {
159            break;
160        }
161
162        // git uses strncmp with the token length: a token matches a rule whose
163        // name *starts with* the token bytes. (e.g. `incomplete` matches
164        // `incomplete-line`.)
165        for entry in RULE_NAMES {
166            if entry.name.as_bytes().starts_with(name) {
167                if negated {
168                    rule &= !entry.bits;
169                } else {
170                    rule |= entry.bits;
171                }
172                break;
173            }
174        }
175
176        // `tabwidth=N`. git tests the token after the negation strip
177        // (`string` points past any leading `-`), so match from `name_start`.
178        if let Some(arg) = token_starts_with_tabwidth(&bytes[name_start..token_end]) {
179            let digits: String = arg
180                .iter()
181                .take_while(|b| b.is_ascii_digit())
182                .map(|&b| b as char)
183                .collect();
184            let tabwidth: u32 = digits.parse().unwrap_or(0);
185            if tabwidth > 0 && tabwidth < 0o100 {
186                rule &= !WS_TAB_WIDTH_MASK;
187                rule |= tabwidth;
188            }
189            // Out-of-range tab widths are silently ignored here (git warns).
190        }
191
192        pos = token_end;
193    }
194
195    if rule & WS_TAB_IN_INDENT != 0 && rule & WS_INDENT_WITH_NON_TAB != 0 {
196        return None;
197    }
198    Some(rule)
199}
200
201fn token_starts_with_tabwidth(token: &[u8]) -> Option<&[u8]> {
202    const PREFIX: &[u8] = b"tabwidth=";
203    token.strip_prefix(PREFIX)
204}
205
206/// The `whitespace` gitattribute state for a path, the way git's
207/// `whitespace_rule` interprets it.
208pub enum WsAttr<'a> {
209    /// `path whitespace` — true: enforce all (non-loosening, non-excluded)
210    /// rules at the config's tab width.
211    True,
212    /// `path -whitespace` — false: enforce nothing (just the config tab width).
213    False,
214    /// `path !whitespace` or unattributed — use the config rule as-is.
215    Unset,
216    /// `path whitespace=<value>` — parse the value as a rule string.
217    Value(&'a str),
218}
219
220/// Resolve the effective whitespace rule for a path. Port of git's
221/// `whitespace_rule`: `config_rule` is the `core.whitespace` value (or
222/// [`WS_DEFAULT_RULE`]), and `attr` is the per-path `whitespace` attribute.
223///
224/// Returns [`None`] only when an explicit attribute *value* names a
225/// conflicting rule (git would `die`).
226pub fn resolve_whitespace_rule(config_rule: WsRule, attr: WsAttr<'_>) -> Option<WsRule> {
227    match attr {
228        WsAttr::True => {
229            // All enforcing rules at the config tab width.
230            let mut all = config_rule & WS_TAB_WIDTH_MASK;
231            for entry in RULE_NAMES {
232                if !entry.loosens_error && !entry.exclude_default {
233                    all |= entry.bits;
234                }
235            }
236            Some(all)
237        }
238        // `-whitespace`: enforce nothing but keep the config tab width.
239        WsAttr::False => Some(config_rule & WS_TAB_WIDTH_MASK),
240        // `!whitespace` / unattributed: the config rule as-is.
241        WsAttr::Unset => Some(config_rule),
242        WsAttr::Value(value) => parse_whitespace_rule(value),
243    }
244}
245
246/// Format the set of fired rule bits into git's human-readable error string.
247///
248/// Port of `whitespace_error_string`. The order and the `trailing whitespace`
249/// collapsing of `WS_TRAILING_SPACE` (blank-at-eol + blank-at-eof together)
250/// match git exactly.
251pub fn whitespace_error_string(ws: WsRule) -> String {
252    let mut err = String::new();
253    if (ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE {
254        err.push_str("trailing whitespace");
255    } else {
256        if ws & WS_BLANK_AT_EOL != 0 {
257            err.push_str("trailing whitespace");
258        }
259        if ws & WS_BLANK_AT_EOF != 0 {
260            if !err.is_empty() {
261                err.push_str(", ");
262            }
263            err.push_str("new blank line at EOF");
264        }
265    }
266    if ws & WS_SPACE_BEFORE_TAB != 0 {
267        if !err.is_empty() {
268            err.push_str(", ");
269        }
270        err.push_str("space before tab in indent");
271    }
272    if ws & WS_INDENT_WITH_NON_TAB != 0 {
273        if !err.is_empty() {
274            err.push_str(", ");
275        }
276        err.push_str("indent with spaces");
277    }
278    if ws & WS_TAB_IN_INDENT != 0 {
279        if !err.is_empty() {
280            err.push_str(", ");
281        }
282        err.push_str("tab in indent");
283    }
284    if ws & WS_INCOMPLETE_LINE != 0 {
285        if !err.is_empty() {
286            err.push_str(", ");
287        }
288        err.push_str("no newline at the end of file");
289    }
290    err
291}
292
293/// ASCII `isspace`, matching git's C locale behaviour (space, `\t`, `\n`,
294/// `\x0b`, `\x0c`, `\r`).
295#[inline]
296pub fn is_space(b: u8) -> bool {
297    matches!(b, b' ' | b'\t' | b'\n' | 0x0b | 0x0c | b'\r')
298}
299
300/// Whitespace-fix a whole line (including any trailing newline) and return the
301/// fixed bytes — [`ws_fix_copy`] into a fresh buffer. Used by `git apply`'s
302/// whitespace-corrected fragment matching.
303pub fn ws_fix_bytes(src: &[u8], ws_rule: WsRule) -> Vec<u8> {
304    let mut out = Vec::with_capacity(src.len());
305    ws_fix_copy(&mut out, src, ws_rule);
306    out
307}
308
309/// The painted spans produced by [`ws_check_emit`] for `--ws-error-highlight`.
310///
311/// `set`/`reset`/`ws` are the color escapes for the normal line color, the
312/// reset, and the whitespace-error highlight. When color is off they are all
313/// empty and the output is just the original line bytes.
314pub struct WsEmitColors<'a> {
315    /// `color.diff.new` (or the relevant line color) — normal text.
316    pub set: &'a str,
317    /// The reset escape.
318    pub reset: &'a str,
319    /// `color.diff.whitespace` — the whitespace-error highlight.
320    pub ws: &'a str,
321}
322
323/// Classify a single line's whitespace errors, returning the rule bits that
324/// fired. Port of `ws_check` (`ws_check_emit_1` with no stream).
325///
326/// `line` is the raw line content *without* the diff `+` prefix, and may
327/// include a trailing `\n`.
328pub fn ws_check(line: &[u8], ws_rule: WsRule) -> WsRule {
329    ws_check_emit_inner(line, ws_rule, None)
330}
331
332/// Like [`ws_check`] but also appends the line to `out` with whitespace-error
333/// spans painted using `colors`. Port of `ws_check_emit`.
334pub fn ws_check_emit(
335    line: &[u8],
336    ws_rule: WsRule,
337    out: &mut Vec<u8>,
338    colors: &WsEmitColors<'_>,
339) -> WsRule {
340    ws_check_emit_inner(line, ws_rule, Some((out, colors)))
341}
342
343fn ws_check_emit_inner(
344    line: &[u8],
345    ws_rule: WsRule,
346    mut stream: Option<(&mut Vec<u8>, &WsEmitColors<'_>)>,
347) -> WsRule {
348    let mut result: WsRule = 0;
349    let mut written = 0usize;
350    let mut trailing_whitespace: isize = -1;
351    let mut trailing_newline = false;
352    let mut trailing_carriage_return = false;
353
354    let mut len = line.len();
355
356    // Logic is simpler if we temporarily ignore the trailing newline.
357    if len > 0 && line[len - 1] == b'\n' {
358        trailing_newline = true;
359        len -= 1;
360    }
361    if (ws_rule & WS_CR_AT_EOL) != 0 && len > 0 && line[len - 1] == b'\r' {
362        trailing_carriage_return = true;
363        len -= 1;
364    }
365
366    // Check for trailing whitespace.
367    if ws_rule & WS_BLANK_AT_EOL != 0 {
368        let mut i = len as isize - 1;
369        while i >= 0 {
370            if is_space(line[i as usize]) {
371                trailing_whitespace = i;
372                result |= WS_BLANK_AT_EOL;
373            } else {
374                break;
375            }
376            i -= 1;
377        }
378    }
379
380    if trailing_whitespace == -1 {
381        trailing_whitespace = len as isize;
382    }
383    let trailing_whitespace = trailing_whitespace as usize;
384
385    if !trailing_newline && (ws_rule & WS_INCOMPLETE_LINE) != 0 {
386        result |= WS_INCOMPLETE_LINE;
387    }
388
389    // Check indentation.
390    let mut i = 0usize;
391    while i < trailing_whitespace {
392        if line[i] == b' ' {
393            i += 1;
394            continue;
395        }
396        if line[i] != b'\t' {
397            break;
398        }
399        if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && written < i {
400            result |= WS_SPACE_BEFORE_TAB;
401            if let Some((out, colors)) = stream.as_mut() {
402                out.extend_from_slice(colors.ws.as_bytes());
403                out.extend_from_slice(&line[written..i]);
404                out.extend_from_slice(colors.reset.as_bytes());
405                out.push(line[i]);
406            }
407        } else if (ws_rule & WS_TAB_IN_INDENT) != 0 {
408            result |= WS_TAB_IN_INDENT;
409            if let Some((out, colors)) = stream.as_mut() {
410                out.extend_from_slice(&line[written..i]);
411                out.extend_from_slice(colors.ws.as_bytes());
412                out.push(line[i]);
413                out.extend_from_slice(colors.reset.as_bytes());
414            }
415        } else if let Some((out, _)) = stream.as_mut() {
416            out.extend_from_slice(&line[written..=i]);
417        }
418        written = i + 1;
419        i += 1;
420    }
421
422    // Check for indent using non-tab.
423    if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0 && i - written >= ws_tab_width(ws_rule) {
424        result |= WS_INDENT_WITH_NON_TAB;
425        if let Some((out, colors)) = stream.as_mut() {
426            out.extend_from_slice(colors.ws.as_bytes());
427            out.extend_from_slice(&line[written..i]);
428            out.extend_from_slice(colors.reset.as_bytes());
429        }
430        written = i;
431    }
432
433    if let Some((out, colors)) = stream.as_mut() {
434        // Emit non-highlighted (middle) segment.
435        if trailing_whitespace > written {
436            out.extend_from_slice(colors.set.as_bytes());
437            out.extend_from_slice(&line[written..trailing_whitespace]);
438            out.extend_from_slice(colors.reset.as_bytes());
439        }
440        // Highlight errors in trailing whitespace.
441        if trailing_whitespace != len {
442            out.extend_from_slice(colors.ws.as_bytes());
443            out.extend_from_slice(&line[trailing_whitespace..len]);
444            out.extend_from_slice(colors.reset.as_bytes());
445        }
446        if trailing_carriage_return {
447            out.push(b'\r');
448        }
449        if trailing_newline {
450            out.push(b'\n');
451        }
452    }
453
454    result
455}
456
457/// Is the line entirely blank (whitespace only)? Port of `ws_blank_line`.
458pub fn ws_blank_line(line: &[u8]) -> bool {
459    line.iter().all(|&b| is_space(b))
460}
461
462/// Count the trailing run of blank lines in a buffer. Port of
463/// `count_trailing_blank` (diff.c) — used by the blank-at-EOF detection.
464///
465/// The final newline is skipped (it does not count as a blank line); an
466/// incomplete final line is treated as content. Returns the number of blank
467/// lines at the very end of the buffer.
468pub fn count_trailing_blank(buf: &[u8]) -> usize {
469    let size = buf.len();
470    if size == 0 {
471        return 0;
472    }
473    let mut cnt = 0usize;
474    // `ptr` is an index pointing at the last byte considered.
475    let mut ptr: isize = size as isize - 1;
476    if buf[ptr as usize] == b'\n' {
477        ptr -= 1; // skip the last LF
478    }
479    // else: incomplete final line — the byte at ptr is part of it.
480    let base: isize = 0;
481    while base < ptr {
482        // Find the previous LF at or below ptr (but above base-1).
483        let mut prev_eol = ptr;
484        while base <= prev_eol {
485            if buf[prev_eol as usize] == b'\n' {
486                break;
487            }
488            prev_eol -= 1;
489        }
490        // The line is buf[prev_eol+1 ..= ptr].
491        let start = (prev_eol + 1) as usize;
492        let end = (ptr + 1) as usize;
493        if !ws_blank_line(&buf[start..end]) {
494            break;
495        }
496        cnt += 1;
497        ptr = prev_eol - 1;
498    }
499    cnt
500}
501
502/// Count the lines in a buffer the way git's `count_lines` does: a final line
503/// without a trailing newline still counts.
504pub fn count_lines(buf: &[u8]) -> usize {
505    if buf.is_empty() {
506        return 0;
507    }
508    let nl = buf.iter().filter(|&&b| b == b'\n').count();
509    if buf[buf.len() - 1] == b'\n' {
510        nl
511    } else {
512        nl + 1
513    }
514}
515
516/// Copy `src` onto the end of `dst` while fixing whitespace errors per
517/// `ws_rule`. Port of `ws_fix_copy`. Returns whether anything was fixed (git's
518/// `error_count` increment) so callers can count fixes.
519///
520/// `src` is the line content (typically ending in `\n`, unless it is the
521/// incomplete last line).
522pub fn ws_fix_copy(dst: &mut Vec<u8>, src: &[u8], ws_rule: WsRule) -> bool {
523    let mut len = src.len();
524    let mut src_off = 0usize;
525    let mut add_nl_to_tail = false;
526    let mut add_cr_to_tail = false;
527    let mut fixed = false;
528    let mut last_tab_in_indent: isize = -1;
529    let mut last_space_in_indent: isize = -1;
530    let mut need_fix_leading_space = false;
531
532    // An incomplete line is fixed by remembering to add the trailing newline.
533    if ws_rule & WS_INCOMPLETE_LINE != 0 && len > 0 && src[len - 1] != b'\n' {
534        fixed = true;
535        add_nl_to_tail = true;
536    }
537
538    // Strip trailing whitespace.
539    if ws_rule & WS_BLANK_AT_EOL != 0 {
540        if len > 0 && src[len - 1] == b'\n' {
541            add_nl_to_tail = true;
542            len -= 1;
543            if len > 0 && src[len - 1] == b'\r' {
544                add_cr_to_tail = ws_rule & WS_CR_AT_EOL != 0;
545                len -= 1;
546            }
547        }
548        if len > 0 && is_space(src[len - 1]) {
549            while len > 0 && is_space(src[len - 1]) {
550                len -= 1;
551            }
552            fixed = true;
553        }
554    }
555
556    // Check leading whitespace (indent).
557    {
558        let mut i = 0usize;
559        while i < len {
560            let ch = src[i];
561            if ch == b'\t' {
562                last_tab_in_indent = i as isize;
563                if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && last_space_in_indent >= 0 {
564                    need_fix_leading_space = true;
565                }
566            } else if ch == b' ' {
567                last_space_in_indent = i as isize;
568                if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0
569                    && (i as isize - last_tab_in_indent) >= ws_tab_width(ws_rule) as isize
570                {
571                    need_fix_leading_space = true;
572                }
573            } else {
574                break;
575            }
576            i += 1;
577        }
578    }
579
580    if need_fix_leading_space {
581        // Process indent ourselves.
582        let mut consecutive_spaces = 0usize;
583        let mut last = (last_tab_in_indent + 1) as usize;
584        if ws_rule & WS_INDENT_WITH_NON_TAB != 0 {
585            // Point `last` one past the indent.
586            if last_tab_in_indent < last_space_in_indent {
587                last = (last_space_in_indent + 1) as usize;
588            } else {
589                last = (last_tab_in_indent + 1) as usize;
590            }
591        }
592        let tabw = ws_tab_width(ws_rule);
593        for &ch in &src[src_off..src_off + last] {
594            if ch != b' ' {
595                consecutive_spaces = 0;
596                dst.push(ch);
597            } else {
598                consecutive_spaces += 1;
599                if tabw != 0 && consecutive_spaces == tabw {
600                    dst.push(b'\t');
601                    consecutive_spaces = 0;
602                }
603            }
604        }
605        while consecutive_spaces > 0 {
606            dst.push(b' ');
607            consecutive_spaces -= 1;
608        }
609        len -= last;
610        src_off += last;
611        fixed = true;
612    } else if (ws_rule & WS_TAB_IN_INDENT) != 0 && last_tab_in_indent >= 0 {
613        // Expand tabs into spaces.
614        let start = dst.len();
615        let last = (last_tab_in_indent + 1) as usize;
616        let tabw = ws_tab_width(ws_rule).max(1);
617        for &ch in &src[src_off..src_off + last] {
618            if ch == b'\t' {
619                loop {
620                    dst.push(b' ');
621                    if (dst.len() - start).is_multiple_of(tabw) {
622                        break;
623                    }
624                }
625            } else {
626                dst.push(ch);
627            }
628        }
629        len -= last;
630        src_off += last;
631        fixed = true;
632    }
633
634    dst.extend_from_slice(&src[src_off..src_off + len]);
635    if add_cr_to_tail {
636        dst.push(b'\r');
637    }
638    if add_nl_to_tail {
639        dst.push(b'\n');
640    }
641    fixed
642}
643
644/// Fix the whitespace of a single line's *content* (no trailing newline — the
645/// caller stores newlines separately). Returns the fixed bytes; the caller can
646/// compare against the input to know whether anything changed.
647///
648/// This is [`ws_fix_copy`] applied to a newline-free line: the trailing-newline
649/// bookkeeping is inert, so the result is just the indent-fixed, trailing-ws-
650/// stripped content.
651pub fn ws_fix_line_content(content: &[u8], ws_rule: WsRule) -> Vec<u8> {
652    let mut out = Vec::with_capacity(content.len());
653    ws_fix_copy(&mut out, content, ws_rule);
654    out
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660
661    #[test]
662    fn default_rule_constant() {
663        // trailing-space (eol|eof) + space-before-tab + tabwidth 8.
664        assert_eq!(WS_DEFAULT_RULE, (1 << 6) | (1 << 10) | (1 << 7) | 8);
665        assert_eq!(ws_tab_width(WS_DEFAULT_RULE), 8);
666    }
667
668    #[test]
669    fn parse_basic() {
670        // -trailing,-space-before,-indent disables those.
671        let r = parse_whitespace_rule("-trailing,-space-before,-indent")
672            .expect("valid whitespace rule");
673        assert_eq!(r & WS_BLANK_AT_EOL, 0);
674        assert_eq!(r & WS_SPACE_BEFORE_TAB, 0);
675    }
676
677    #[test]
678    fn parse_tab_in_indent_and_tabwidth() {
679        let r =
680            parse_whitespace_rule("-trailing,-space,-indent,tab").expect("valid whitespace rule");
681        assert_ne!(r & WS_TAB_IN_INDENT, 0);
682        let r2 = parse_whitespace_rule("tab-in-indent,tabwidth=16").expect("valid whitespace rule");
683        assert_eq!(ws_tab_width(r2), 16);
684    }
685
686    #[test]
687    fn parse_conflicting_rule_rejected() {
688        assert!(parse_whitespace_rule("tab-in-indent,indent-with-non-tab").is_none());
689    }
690
691    #[test]
692    fn trailing_whitespace_detected() {
693        let r = WS_DEFAULT_RULE;
694        assert_ne!(ws_check(b"foo(); \n", r) & WS_BLANK_AT_EOL, 0);
695        assert_eq!(ws_check(b"foo();\n", r) & WS_BLANK_AT_EOL, 0);
696    }
697
698    #[test]
699    fn space_before_tab_detected() {
700        let r = WS_DEFAULT_RULE;
701        // " \tfoo();" -> space before tab.
702        assert_ne!(ws_check(b" \tfoo();\n", r) & WS_SPACE_BEFORE_TAB, 0);
703    }
704
705    #[test]
706    fn indent_with_non_tab() {
707        let r = parse_whitespace_rule("indent-with-non-tab").expect("valid whitespace rule");
708        // 8 leading spaces (tab width 8) -> indent with spaces.
709        assert_ne!(ws_check(b"        eight\n", r) & WS_INDENT_WITH_NON_TAB, 0);
710        // 7 leading spaces -> not enough.
711        assert_eq!(ws_check(b"       seven\n", r) & WS_INDENT_WITH_NON_TAB, 0);
712    }
713
714    #[test]
715    fn error_string_order() {
716        assert_eq!(
717            whitespace_error_string(WS_TRAILING_SPACE),
718            "trailing whitespace"
719        );
720        assert_eq!(
721            whitespace_error_string(WS_BLANK_AT_EOF),
722            "new blank line at EOF"
723        );
724        assert_eq!(
725            whitespace_error_string(WS_SPACE_BEFORE_TAB | WS_TAB_IN_INDENT),
726            "space before tab in indent, tab in indent"
727        );
728    }
729
730    #[test]
731    fn fix_strips_trailing() {
732        let mut out = Vec::new();
733        let fixed = ws_fix_copy(&mut out, b"foo(); \n", WS_DEFAULT_RULE);
734        assert!(fixed);
735        assert_eq!(out, b"foo();\n");
736    }
737
738    #[test]
739    fn fix_tab_in_indent_expands() {
740        let mut out = Vec::new();
741        let r =
742            parse_whitespace_rule("-trailing,-space,-indent,tab").expect("valid whitespace rule");
743        // A leading tab expands to 8 spaces.
744        ws_fix_copy(&mut out, b"\tfoo();\n", r);
745        assert_eq!(out, b"        foo();\n");
746    }
747
748    #[test]
749    fn count_trailing_blank_basic() {
750        assert_eq!(count_trailing_blank(b"a\nb\n"), 0);
751        assert_eq!(count_trailing_blank(b"a\nb\n\n"), 1);
752        assert_eq!(count_trailing_blank(b"a\n\n\n"), 2);
753        assert_eq!(count_trailing_blank(b"a\n   \n"), 1);
754    }
755
756    #[test]
757    fn ws_check_emit_paints_trailing() {
758        let colors = WsEmitColors {
759            set: "<S>",
760            reset: "<R>",
761            ws: "<W>",
762        };
763        let mut out = Vec::new();
764        ws_check_emit(b"foo(); \n", WS_DEFAULT_RULE, &mut out, &colors);
765        assert_eq!(out, b"<S>foo();<R><W> <R>\n".to_vec());
766    }
767}