1pub type WsRule = u32;
31
32pub const WS_BLANK_AT_EOL: WsRule = 1 << 6;
34pub const WS_SPACE_BEFORE_TAB: WsRule = 1 << 7;
36pub const WS_INDENT_WITH_NON_TAB: WsRule = 1 << 8;
39pub const WS_CR_AT_EOL: WsRule = 1 << 9;
41pub const WS_BLANK_AT_EOF: WsRule = 1 << 10;
43pub const WS_TAB_IN_INDENT: WsRule = 1 << 11;
45pub const WS_INCOMPLETE_LINE: WsRule = 1 << 12;
47
48pub const WS_TRAILING_SPACE: WsRule = WS_BLANK_AT_EOL | WS_BLANK_AT_EOF;
50pub const WS_DEFAULT_RULE: WsRule = WS_TRAILING_SPACE | WS_SPACE_BEFORE_TAB | 8;
52pub const WS_TAB_WIDTH_MASK: WsRule = (1 << 6) - 1;
54pub const WS_RULE_MASK: WsRule = (1 << 16) - 1;
56
57#[inline]
59pub fn ws_tab_width(rule: WsRule) -> usize {
60 (rule & WS_TAB_WIDTH_MASK) as usize
61}
62
63struct RuleName {
64 name: &'static str,
65 bits: WsRule,
66 loosens_error: bool,
69 exclude_default: bool,
71}
72
73const RULE_NAMES: &[RuleName] = &[
74 RuleName {
75 name: "trailing-space",
76 bits: WS_TRAILING_SPACE,
77 loosens_error: false,
78 exclude_default: false,
79 },
80 RuleName {
81 name: "space-before-tab",
82 bits: WS_SPACE_BEFORE_TAB,
83 loosens_error: false,
84 exclude_default: false,
85 },
86 RuleName {
87 name: "indent-with-non-tab",
88 bits: WS_INDENT_WITH_NON_TAB,
89 loosens_error: false,
90 exclude_default: false,
91 },
92 RuleName {
93 name: "cr-at-eol",
94 bits: WS_CR_AT_EOL,
95 loosens_error: true,
96 exclude_default: false,
97 },
98 RuleName {
99 name: "blank-at-eol",
100 bits: WS_BLANK_AT_EOL,
101 loosens_error: false,
102 exclude_default: false,
103 },
104 RuleName {
105 name: "blank-at-eof",
106 bits: WS_BLANK_AT_EOF,
107 loosens_error: false,
108 exclude_default: false,
109 },
110 RuleName {
111 name: "tab-in-indent",
112 bits: WS_TAB_IN_INDENT,
113 loosens_error: false,
114 exclude_default: true,
115 },
116 RuleName {
117 name: "incomplete-line",
118 bits: WS_INCOMPLETE_LINE,
119 loosens_error: false,
120 exclude_default: false,
121 },
122];
123
124pub fn parse_whitespace_rule(string: &str) -> Option<WsRule> {
131 let bytes = string.as_bytes();
132 let mut rule = WS_DEFAULT_RULE;
133 let mut pos = 0usize;
134
135 while pos < bytes.len() {
136 while pos < bytes.len() && matches!(bytes[pos], b',' | b' ' | b'\t' | b'\n' | b'\r') {
138 pos += 1;
139 }
140 if pos >= bytes.len() {
141 break;
142 }
143 let token_start = pos;
145 let token_end = bytes[token_start..]
146 .iter()
147 .position(|&b| b == b',')
148 .map(|off| token_start + off)
149 .unwrap_or(bytes.len());
150
151 let mut name_start = token_start;
152 let mut negated = false;
153 if bytes[name_start] == b'-' {
154 negated = true;
155 name_start += 1;
156 }
157 let name = &bytes[name_start..token_end];
158 if name.is_empty() {
159 break;
160 }
161
162 for entry in RULE_NAMES {
166 if entry.name.as_bytes().starts_with(name) {
167 if negated {
168 rule &= !entry.bits;
169 } else {
170 rule |= entry.bits;
171 }
172 break;
173 }
174 }
175
176 if let Some(arg) = token_starts_with_tabwidth(&bytes[name_start..token_end]) {
179 let digits: String = arg
180 .iter()
181 .take_while(|b| b.is_ascii_digit())
182 .map(|&b| b as char)
183 .collect();
184 let tabwidth: u32 = digits.parse().unwrap_or(0);
185 if tabwidth > 0 && tabwidth < 0o100 {
186 rule &= !WS_TAB_WIDTH_MASK;
187 rule |= tabwidth;
188 }
189 }
191
192 pos = token_end;
193 }
194
195 if rule & WS_TAB_IN_INDENT != 0 && rule & WS_INDENT_WITH_NON_TAB != 0 {
196 return None;
197 }
198 Some(rule)
199}
200
201fn token_starts_with_tabwidth(token: &[u8]) -> Option<&[u8]> {
202 const PREFIX: &[u8] = b"tabwidth=";
203 token.strip_prefix(PREFIX)
204}
205
206pub enum WsAttr<'a> {
209 True,
212 False,
214 Unset,
216 Value(&'a str),
218}
219
220pub fn resolve_whitespace_rule(config_rule: WsRule, attr: WsAttr<'_>) -> Option<WsRule> {
227 match attr {
228 WsAttr::True => {
229 let mut all = config_rule & WS_TAB_WIDTH_MASK;
231 for entry in RULE_NAMES {
232 if !entry.loosens_error && !entry.exclude_default {
233 all |= entry.bits;
234 }
235 }
236 Some(all)
237 }
238 WsAttr::False => Some(config_rule & WS_TAB_WIDTH_MASK),
240 WsAttr::Unset => Some(config_rule),
242 WsAttr::Value(value) => parse_whitespace_rule(value),
243 }
244}
245
246pub fn whitespace_error_string(ws: WsRule) -> String {
252 let mut err = String::new();
253 if (ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE {
254 err.push_str("trailing whitespace");
255 } else {
256 if ws & WS_BLANK_AT_EOL != 0 {
257 err.push_str("trailing whitespace");
258 }
259 if ws & WS_BLANK_AT_EOF != 0 {
260 if !err.is_empty() {
261 err.push_str(", ");
262 }
263 err.push_str("new blank line at EOF");
264 }
265 }
266 if ws & WS_SPACE_BEFORE_TAB != 0 {
267 if !err.is_empty() {
268 err.push_str(", ");
269 }
270 err.push_str("space before tab in indent");
271 }
272 if ws & WS_INDENT_WITH_NON_TAB != 0 {
273 if !err.is_empty() {
274 err.push_str(", ");
275 }
276 err.push_str("indent with spaces");
277 }
278 if ws & WS_TAB_IN_INDENT != 0 {
279 if !err.is_empty() {
280 err.push_str(", ");
281 }
282 err.push_str("tab in indent");
283 }
284 if ws & WS_INCOMPLETE_LINE != 0 {
285 if !err.is_empty() {
286 err.push_str(", ");
287 }
288 err.push_str("no newline at the end of file");
289 }
290 err
291}
292
293#[inline]
296fn is_space(b: u8) -> bool {
297 matches!(b, b' ' | b'\t' | b'\n' | 0x0b | 0x0c | b'\r')
298}
299
300pub struct WsEmitColors<'a> {
306 pub set: &'a str,
308 pub reset: &'a str,
310 pub ws: &'a str,
312}
313
314pub fn ws_check(line: &[u8], ws_rule: WsRule) -> WsRule {
320 ws_check_emit_inner(line, ws_rule, None)
321}
322
323pub fn ws_check_emit(
326 line: &[u8],
327 ws_rule: WsRule,
328 out: &mut Vec<u8>,
329 colors: &WsEmitColors<'_>,
330) -> WsRule {
331 ws_check_emit_inner(line, ws_rule, Some((out, colors)))
332}
333
334fn ws_check_emit_inner(
335 line: &[u8],
336 ws_rule: WsRule,
337 mut stream: Option<(&mut Vec<u8>, &WsEmitColors<'_>)>,
338) -> WsRule {
339 let mut result: WsRule = 0;
340 let mut written = 0usize;
341 let mut trailing_whitespace: isize = -1;
342 let mut trailing_newline = false;
343 let mut trailing_carriage_return = false;
344
345 let mut len = line.len();
346
347 if len > 0 && line[len - 1] == b'\n' {
349 trailing_newline = true;
350 len -= 1;
351 }
352 if (ws_rule & WS_CR_AT_EOL) != 0 && len > 0 && line[len - 1] == b'\r' {
353 trailing_carriage_return = true;
354 len -= 1;
355 }
356
357 if ws_rule & WS_BLANK_AT_EOL != 0 {
359 let mut i = len as isize - 1;
360 while i >= 0 {
361 if is_space(line[i as usize]) {
362 trailing_whitespace = i;
363 result |= WS_BLANK_AT_EOL;
364 } else {
365 break;
366 }
367 i -= 1;
368 }
369 }
370
371 if trailing_whitespace == -1 {
372 trailing_whitespace = len as isize;
373 }
374 let trailing_whitespace = trailing_whitespace as usize;
375
376 if !trailing_newline && (ws_rule & WS_INCOMPLETE_LINE) != 0 {
377 result |= WS_INCOMPLETE_LINE;
378 }
379
380 let mut i = 0usize;
382 while i < trailing_whitespace {
383 if line[i] == b' ' {
384 i += 1;
385 continue;
386 }
387 if line[i] != b'\t' {
388 break;
389 }
390 if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && written < i {
391 result |= WS_SPACE_BEFORE_TAB;
392 if let Some((out, colors)) = stream.as_mut() {
393 out.extend_from_slice(colors.ws.as_bytes());
394 out.extend_from_slice(&line[written..i]);
395 out.extend_from_slice(colors.reset.as_bytes());
396 out.push(line[i]);
397 }
398 } else if (ws_rule & WS_TAB_IN_INDENT) != 0 {
399 result |= WS_TAB_IN_INDENT;
400 if let Some((out, colors)) = stream.as_mut() {
401 out.extend_from_slice(&line[written..i]);
402 out.extend_from_slice(colors.ws.as_bytes());
403 out.push(line[i]);
404 out.extend_from_slice(colors.reset.as_bytes());
405 }
406 } else if let Some((out, _)) = stream.as_mut() {
407 out.extend_from_slice(&line[written..=i]);
408 }
409 written = i + 1;
410 i += 1;
411 }
412
413 if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0 && i - written >= ws_tab_width(ws_rule) {
415 result |= WS_INDENT_WITH_NON_TAB;
416 if let Some((out, colors)) = stream.as_mut() {
417 out.extend_from_slice(colors.ws.as_bytes());
418 out.extend_from_slice(&line[written..i]);
419 out.extend_from_slice(colors.reset.as_bytes());
420 }
421 written = i;
422 }
423
424 if let Some((out, colors)) = stream.as_mut() {
425 if trailing_whitespace > written {
427 out.extend_from_slice(colors.set.as_bytes());
428 out.extend_from_slice(&line[written..trailing_whitespace]);
429 out.extend_from_slice(colors.reset.as_bytes());
430 }
431 if trailing_whitespace != len {
433 out.extend_from_slice(colors.ws.as_bytes());
434 out.extend_from_slice(&line[trailing_whitespace..len]);
435 out.extend_from_slice(colors.reset.as_bytes());
436 }
437 if trailing_carriage_return {
438 out.push(b'\r');
439 }
440 if trailing_newline {
441 out.push(b'\n');
442 }
443 }
444
445 result
446}
447
448pub fn ws_blank_line(line: &[u8]) -> bool {
450 line.iter().all(|&b| is_space(b))
451}
452
453pub fn count_trailing_blank(buf: &[u8]) -> usize {
460 let size = buf.len();
461 if size == 0 {
462 return 0;
463 }
464 let mut cnt = 0usize;
465 let mut ptr: isize = size as isize - 1;
467 if buf[ptr as usize] == b'\n' {
468 ptr -= 1; }
470 let base: isize = 0;
472 while base < ptr {
473 let mut prev_eol = ptr;
475 while base <= prev_eol {
476 if buf[prev_eol as usize] == b'\n' {
477 break;
478 }
479 prev_eol -= 1;
480 }
481 let start = (prev_eol + 1) as usize;
483 let end = (ptr + 1) as usize;
484 if !ws_blank_line(&buf[start..end]) {
485 break;
486 }
487 cnt += 1;
488 ptr = prev_eol - 1;
489 }
490 cnt
491}
492
493pub fn count_lines(buf: &[u8]) -> usize {
496 if buf.is_empty() {
497 return 0;
498 }
499 let nl = buf.iter().filter(|&&b| b == b'\n').count();
500 if buf[buf.len() - 1] == b'\n' {
501 nl
502 } else {
503 nl + 1
504 }
505}
506
507pub fn ws_fix_copy(dst: &mut Vec<u8>, src: &[u8], ws_rule: WsRule) -> bool {
514 let mut len = src.len();
515 let mut src_off = 0usize;
516 let mut add_nl_to_tail = false;
517 let mut add_cr_to_tail = false;
518 let mut fixed = false;
519 let mut last_tab_in_indent: isize = -1;
520 let mut last_space_in_indent: isize = -1;
521 let mut need_fix_leading_space = false;
522
523 if ws_rule & WS_INCOMPLETE_LINE != 0 && len > 0 && src[len - 1] != b'\n' {
525 fixed = true;
526 add_nl_to_tail = true;
527 }
528
529 if ws_rule & WS_BLANK_AT_EOL != 0 {
531 if len > 0 && src[len - 1] == b'\n' {
532 add_nl_to_tail = true;
533 len -= 1;
534 if len > 0 && src[len - 1] == b'\r' {
535 add_cr_to_tail = ws_rule & WS_CR_AT_EOL != 0;
536 len -= 1;
537 }
538 }
539 if len > 0 && is_space(src[len - 1]) {
540 while len > 0 && is_space(src[len - 1]) {
541 len -= 1;
542 }
543 fixed = true;
544 }
545 }
546
547 {
549 let mut i = 0usize;
550 while i < len {
551 let ch = src[i];
552 if ch == b'\t' {
553 last_tab_in_indent = i as isize;
554 if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && last_space_in_indent >= 0 {
555 need_fix_leading_space = true;
556 }
557 } else if ch == b' ' {
558 last_space_in_indent = i as isize;
559 if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0
560 && (i as isize - last_tab_in_indent) >= ws_tab_width(ws_rule) as isize
561 {
562 need_fix_leading_space = true;
563 }
564 } else {
565 break;
566 }
567 i += 1;
568 }
569 }
570
571 if need_fix_leading_space {
572 let mut consecutive_spaces = 0usize;
574 let mut last = (last_tab_in_indent + 1) as usize;
575 if ws_rule & WS_INDENT_WITH_NON_TAB != 0 {
576 if last_tab_in_indent < last_space_in_indent {
578 last = (last_space_in_indent + 1) as usize;
579 } else {
580 last = (last_tab_in_indent + 1) as usize;
581 }
582 }
583 let tabw = ws_tab_width(ws_rule);
584 for &ch in &src[src_off..src_off + last] {
585 if ch != b' ' {
586 consecutive_spaces = 0;
587 dst.push(ch);
588 } else {
589 consecutive_spaces += 1;
590 if tabw != 0 && consecutive_spaces == tabw {
591 dst.push(b'\t');
592 consecutive_spaces = 0;
593 }
594 }
595 }
596 while consecutive_spaces > 0 {
597 dst.push(b' ');
598 consecutive_spaces -= 1;
599 }
600 len -= last;
601 src_off += last;
602 fixed = true;
603 } else if (ws_rule & WS_TAB_IN_INDENT) != 0 && last_tab_in_indent >= 0 {
604 let start = dst.len();
606 let last = (last_tab_in_indent + 1) as usize;
607 let tabw = ws_tab_width(ws_rule).max(1);
608 for &ch in &src[src_off..src_off + last] {
609 if ch == b'\t' {
610 loop {
611 dst.push(b' ');
612 if (dst.len() - start).is_multiple_of(tabw) {
613 break;
614 }
615 }
616 } else {
617 dst.push(ch);
618 }
619 }
620 len -= last;
621 src_off += last;
622 fixed = true;
623 }
624
625 dst.extend_from_slice(&src[src_off..src_off + len]);
626 if add_cr_to_tail {
627 dst.push(b'\r');
628 }
629 if add_nl_to_tail {
630 dst.push(b'\n');
631 }
632 fixed
633}
634
635pub fn ws_fix_line_content(content: &[u8], ws_rule: WsRule) -> Vec<u8> {
643 let mut out = Vec::with_capacity(content.len());
644 ws_fix_copy(&mut out, content, ws_rule);
645 out
646}
647
648#[cfg(test)]
649mod tests {
650 use super::*;
651
652 #[test]
653 fn default_rule_constant() {
654 assert_eq!(WS_DEFAULT_RULE, (1 << 6) | (1 << 10) | (1 << 7) | 8);
656 assert_eq!(ws_tab_width(WS_DEFAULT_RULE), 8);
657 }
658
659 #[test]
660 fn parse_basic() {
661 let r = parse_whitespace_rule("-trailing,-space-before,-indent")
663 .expect("valid whitespace rule");
664 assert_eq!(r & WS_BLANK_AT_EOL, 0);
665 assert_eq!(r & WS_SPACE_BEFORE_TAB, 0);
666 }
667
668 #[test]
669 fn parse_tab_in_indent_and_tabwidth() {
670 let r =
671 parse_whitespace_rule("-trailing,-space,-indent,tab").expect("valid whitespace rule");
672 assert_ne!(r & WS_TAB_IN_INDENT, 0);
673 let r2 = parse_whitespace_rule("tab-in-indent,tabwidth=16").expect("valid whitespace rule");
674 assert_eq!(ws_tab_width(r2), 16);
675 }
676
677 #[test]
678 fn parse_conflicting_rule_rejected() {
679 assert!(parse_whitespace_rule("tab-in-indent,indent-with-non-tab").is_none());
680 }
681
682 #[test]
683 fn trailing_whitespace_detected() {
684 let r = WS_DEFAULT_RULE;
685 assert_ne!(ws_check(b"foo(); \n", r) & WS_BLANK_AT_EOL, 0);
686 assert_eq!(ws_check(b"foo();\n", r) & WS_BLANK_AT_EOL, 0);
687 }
688
689 #[test]
690 fn space_before_tab_detected() {
691 let r = WS_DEFAULT_RULE;
692 assert_ne!(ws_check(b" \tfoo();\n", r) & WS_SPACE_BEFORE_TAB, 0);
694 }
695
696 #[test]
697 fn indent_with_non_tab() {
698 let r = parse_whitespace_rule("indent-with-non-tab").expect("valid whitespace rule");
699 assert_ne!(ws_check(b" eight\n", r) & WS_INDENT_WITH_NON_TAB, 0);
701 assert_eq!(ws_check(b" seven\n", r) & WS_INDENT_WITH_NON_TAB, 0);
703 }
704
705 #[test]
706 fn error_string_order() {
707 assert_eq!(
708 whitespace_error_string(WS_TRAILING_SPACE),
709 "trailing whitespace"
710 );
711 assert_eq!(
712 whitespace_error_string(WS_BLANK_AT_EOF),
713 "new blank line at EOF"
714 );
715 assert_eq!(
716 whitespace_error_string(WS_SPACE_BEFORE_TAB | WS_TAB_IN_INDENT),
717 "space before tab in indent, tab in indent"
718 );
719 }
720
721 #[test]
722 fn fix_strips_trailing() {
723 let mut out = Vec::new();
724 let fixed = ws_fix_copy(&mut out, b"foo(); \n", WS_DEFAULT_RULE);
725 assert!(fixed);
726 assert_eq!(out, b"foo();\n");
727 }
728
729 #[test]
730 fn fix_tab_in_indent_expands() {
731 let mut out = Vec::new();
732 let r =
733 parse_whitespace_rule("-trailing,-space,-indent,tab").expect("valid whitespace rule");
734 ws_fix_copy(&mut out, b"\tfoo();\n", r);
736 assert_eq!(out, b" foo();\n");
737 }
738
739 #[test]
740 fn count_trailing_blank_basic() {
741 assert_eq!(count_trailing_blank(b"a\nb\n"), 0);
742 assert_eq!(count_trailing_blank(b"a\nb\n\n"), 1);
743 assert_eq!(count_trailing_blank(b"a\n\n\n"), 2);
744 assert_eq!(count_trailing_blank(b"a\n \n"), 1);
745 }
746
747 #[test]
748 fn ws_check_emit_paints_trailing() {
749 let colors = WsEmitColors {
750 set: "<S>",
751 reset: "<R>",
752 ws: "<W>",
753 };
754 let mut out = Vec::new();
755 ws_check_emit(b"foo(); \n", WS_DEFAULT_RULE, &mut out, &colors);
756 assert_eq!(out, b"<S>foo();<R><W> <R>\n".to_vec());
757 }
758}