1pub type WsRule = u32;
31
32pub const WS_BLANK_AT_EOL: WsRule = 1 << 6;
34pub const WS_SPACE_BEFORE_TAB: WsRule = 1 << 7;
36pub const WS_INDENT_WITH_NON_TAB: WsRule = 1 << 8;
39pub const WS_CR_AT_EOL: WsRule = 1 << 9;
41pub const WS_BLANK_AT_EOF: WsRule = 1 << 10;
43pub const WS_TAB_IN_INDENT: WsRule = 1 << 11;
45pub const WS_INCOMPLETE_LINE: WsRule = 1 << 12;
47
48pub const WS_TRAILING_SPACE: WsRule = WS_BLANK_AT_EOL | WS_BLANK_AT_EOF;
50pub const WS_DEFAULT_RULE: WsRule = WS_TRAILING_SPACE | WS_SPACE_BEFORE_TAB | 8;
52pub const WS_TAB_WIDTH_MASK: WsRule = (1 << 6) - 1;
54pub const WS_RULE_MASK: WsRule = (1 << 16) - 1;
56
57#[inline]
59pub fn ws_tab_width(rule: WsRule) -> usize {
60 (rule & WS_TAB_WIDTH_MASK) as usize
61}
62
63struct RuleName {
64 name: &'static str,
65 bits: WsRule,
66 loosens_error: bool,
69 exclude_default: bool,
71}
72
73const RULE_NAMES: &[RuleName] = &[
74 RuleName {
75 name: "trailing-space",
76 bits: WS_TRAILING_SPACE,
77 loosens_error: false,
78 exclude_default: false,
79 },
80 RuleName {
81 name: "space-before-tab",
82 bits: WS_SPACE_BEFORE_TAB,
83 loosens_error: false,
84 exclude_default: false,
85 },
86 RuleName {
87 name: "indent-with-non-tab",
88 bits: WS_INDENT_WITH_NON_TAB,
89 loosens_error: false,
90 exclude_default: false,
91 },
92 RuleName {
93 name: "cr-at-eol",
94 bits: WS_CR_AT_EOL,
95 loosens_error: true,
96 exclude_default: false,
97 },
98 RuleName {
99 name: "blank-at-eol",
100 bits: WS_BLANK_AT_EOL,
101 loosens_error: false,
102 exclude_default: false,
103 },
104 RuleName {
105 name: "blank-at-eof",
106 bits: WS_BLANK_AT_EOF,
107 loosens_error: false,
108 exclude_default: false,
109 },
110 RuleName {
111 name: "tab-in-indent",
112 bits: WS_TAB_IN_INDENT,
113 loosens_error: false,
114 exclude_default: true,
115 },
116 RuleName {
117 name: "incomplete-line",
118 bits: WS_INCOMPLETE_LINE,
119 loosens_error: false,
120 exclude_default: false,
121 },
122];
123
124pub fn parse_whitespace_rule(string: &str) -> Option<WsRule> {
131 let bytes = string.as_bytes();
132 let mut rule = WS_DEFAULT_RULE;
133 let mut pos = 0usize;
134
135 while pos < bytes.len() {
136 while pos < bytes.len() && matches!(bytes[pos], b',' | b' ' | b'\t' | b'\n' | b'\r') {
138 pos += 1;
139 }
140 if pos >= bytes.len() {
141 break;
142 }
143 let token_start = pos;
145 let token_end = bytes[token_start..]
146 .iter()
147 .position(|&b| b == b',')
148 .map(|off| token_start + off)
149 .unwrap_or(bytes.len());
150
151 let mut name_start = token_start;
152 let mut negated = false;
153 if bytes[name_start] == b'-' {
154 negated = true;
155 name_start += 1;
156 }
157 let name = &bytes[name_start..token_end];
158 if name.is_empty() {
159 break;
160 }
161
162 for entry in RULE_NAMES {
166 if entry.name.as_bytes().starts_with(name) {
167 if negated {
168 rule &= !entry.bits;
169 } else {
170 rule |= entry.bits;
171 }
172 break;
173 }
174 }
175
176 if let Some(arg) = token_starts_with_tabwidth(&bytes[name_start..token_end]) {
179 let digits: String = arg
180 .iter()
181 .take_while(|b| b.is_ascii_digit())
182 .map(|&b| b as char)
183 .collect();
184 let tabwidth: u32 = digits.parse().unwrap_or(0);
185 if tabwidth > 0 && tabwidth < 0o100 {
186 rule &= !WS_TAB_WIDTH_MASK;
187 rule |= tabwidth;
188 }
189 }
191
192 pos = token_end;
193 }
194
195 if rule & WS_TAB_IN_INDENT != 0 && rule & WS_INDENT_WITH_NON_TAB != 0 {
196 return None;
197 }
198 Some(rule)
199}
200
201fn token_starts_with_tabwidth(token: &[u8]) -> Option<&[u8]> {
202 const PREFIX: &[u8] = b"tabwidth=";
203 token.strip_prefix(PREFIX)
204}
205
206pub enum WsAttr<'a> {
209 True,
212 False,
214 Unset,
216 Value(&'a str),
218}
219
220pub fn resolve_whitespace_rule(config_rule: WsRule, attr: WsAttr<'_>) -> Option<WsRule> {
227 match attr {
228 WsAttr::True => {
229 let mut all = config_rule & WS_TAB_WIDTH_MASK;
231 for entry in RULE_NAMES {
232 if !entry.loosens_error && !entry.exclude_default {
233 all |= entry.bits;
234 }
235 }
236 Some(all)
237 }
238 WsAttr::False => Some(config_rule & WS_TAB_WIDTH_MASK),
240 WsAttr::Unset => Some(config_rule),
242 WsAttr::Value(value) => parse_whitespace_rule(value),
243 }
244}
245
246pub fn whitespace_error_string(ws: WsRule) -> String {
252 let mut err = String::new();
253 if (ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE {
254 err.push_str("trailing whitespace");
255 } else {
256 if ws & WS_BLANK_AT_EOL != 0 {
257 err.push_str("trailing whitespace");
258 }
259 if ws & WS_BLANK_AT_EOF != 0 {
260 if !err.is_empty() {
261 err.push_str(", ");
262 }
263 err.push_str("new blank line at EOF");
264 }
265 }
266 if ws & WS_SPACE_BEFORE_TAB != 0 {
267 if !err.is_empty() {
268 err.push_str(", ");
269 }
270 err.push_str("space before tab in indent");
271 }
272 if ws & WS_INDENT_WITH_NON_TAB != 0 {
273 if !err.is_empty() {
274 err.push_str(", ");
275 }
276 err.push_str("indent with spaces");
277 }
278 if ws & WS_TAB_IN_INDENT != 0 {
279 if !err.is_empty() {
280 err.push_str(", ");
281 }
282 err.push_str("tab in indent");
283 }
284 if ws & WS_INCOMPLETE_LINE != 0 {
285 if !err.is_empty() {
286 err.push_str(", ");
287 }
288 err.push_str("no newline at the end of file");
289 }
290 err
291}
292
293#[inline]
296pub fn is_space(b: u8) -> bool {
297 matches!(b, b' ' | b'\t' | b'\n' | 0x0b | 0x0c | b'\r')
298}
299
300pub fn ws_fix_bytes(src: &[u8], ws_rule: WsRule) -> Vec<u8> {
304 let mut out = Vec::with_capacity(src.len());
305 ws_fix_copy(&mut out, src, ws_rule);
306 out
307}
308
309pub struct WsEmitColors<'a> {
315 pub set: &'a str,
317 pub reset: &'a str,
319 pub ws: &'a str,
321}
322
323pub fn ws_check(line: &[u8], ws_rule: WsRule) -> WsRule {
329 ws_check_emit_inner(line, ws_rule, None)
330}
331
332pub fn ws_check_emit(
335 line: &[u8],
336 ws_rule: WsRule,
337 out: &mut Vec<u8>,
338 colors: &WsEmitColors<'_>,
339) -> WsRule {
340 ws_check_emit_inner(line, ws_rule, Some((out, colors)))
341}
342
343fn ws_check_emit_inner(
344 line: &[u8],
345 ws_rule: WsRule,
346 mut stream: Option<(&mut Vec<u8>, &WsEmitColors<'_>)>,
347) -> WsRule {
348 let mut result: WsRule = 0;
349 let mut written = 0usize;
350 let mut trailing_whitespace: isize = -1;
351 let mut trailing_newline = false;
352 let mut trailing_carriage_return = false;
353
354 let mut len = line.len();
355
356 if len > 0 && line[len - 1] == b'\n' {
358 trailing_newline = true;
359 len -= 1;
360 }
361 if (ws_rule & WS_CR_AT_EOL) != 0 && len > 0 && line[len - 1] == b'\r' {
362 trailing_carriage_return = true;
363 len -= 1;
364 }
365
366 if ws_rule & WS_BLANK_AT_EOL != 0 {
368 let mut i = len as isize - 1;
369 while i >= 0 {
370 if is_space(line[i as usize]) {
371 trailing_whitespace = i;
372 result |= WS_BLANK_AT_EOL;
373 } else {
374 break;
375 }
376 i -= 1;
377 }
378 }
379
380 if trailing_whitespace == -1 {
381 trailing_whitespace = len as isize;
382 }
383 let trailing_whitespace = trailing_whitespace as usize;
384
385 if !trailing_newline && (ws_rule & WS_INCOMPLETE_LINE) != 0 {
386 result |= WS_INCOMPLETE_LINE;
387 }
388
389 let mut i = 0usize;
391 while i < trailing_whitespace {
392 if line[i] == b' ' {
393 i += 1;
394 continue;
395 }
396 if line[i] != b'\t' {
397 break;
398 }
399 if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && written < i {
400 result |= WS_SPACE_BEFORE_TAB;
401 if let Some((out, colors)) = stream.as_mut() {
402 out.extend_from_slice(colors.ws.as_bytes());
403 out.extend_from_slice(&line[written..i]);
404 out.extend_from_slice(colors.reset.as_bytes());
405 out.push(line[i]);
406 }
407 } else if (ws_rule & WS_TAB_IN_INDENT) != 0 {
408 result |= WS_TAB_IN_INDENT;
409 if let Some((out, colors)) = stream.as_mut() {
410 out.extend_from_slice(&line[written..i]);
411 out.extend_from_slice(colors.ws.as_bytes());
412 out.push(line[i]);
413 out.extend_from_slice(colors.reset.as_bytes());
414 }
415 } else if let Some((out, _)) = stream.as_mut() {
416 out.extend_from_slice(&line[written..=i]);
417 }
418 written = i + 1;
419 i += 1;
420 }
421
422 if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0 && i - written >= ws_tab_width(ws_rule) {
424 result |= WS_INDENT_WITH_NON_TAB;
425 if let Some((out, colors)) = stream.as_mut() {
426 out.extend_from_slice(colors.ws.as_bytes());
427 out.extend_from_slice(&line[written..i]);
428 out.extend_from_slice(colors.reset.as_bytes());
429 }
430 written = i;
431 }
432
433 if let Some((out, colors)) = stream.as_mut() {
434 if trailing_whitespace > written {
436 out.extend_from_slice(colors.set.as_bytes());
437 out.extend_from_slice(&line[written..trailing_whitespace]);
438 out.extend_from_slice(colors.reset.as_bytes());
439 }
440 if trailing_whitespace != len {
442 out.extend_from_slice(colors.ws.as_bytes());
443 out.extend_from_slice(&line[trailing_whitespace..len]);
444 out.extend_from_slice(colors.reset.as_bytes());
445 }
446 if trailing_carriage_return {
447 out.push(b'\r');
448 }
449 if trailing_newline {
450 out.push(b'\n');
451 }
452 }
453
454 result
455}
456
457pub fn ws_blank_line(line: &[u8]) -> bool {
459 line.iter().all(|&b| is_space(b))
460}
461
462pub fn count_trailing_blank(buf: &[u8]) -> usize {
469 let size = buf.len();
470 if size == 0 {
471 return 0;
472 }
473 let mut cnt = 0usize;
474 let mut ptr: isize = size as isize - 1;
476 if buf[ptr as usize] == b'\n' {
477 ptr -= 1; }
479 let base: isize = 0;
481 while base < ptr {
482 let mut prev_eol = ptr;
484 while base <= prev_eol {
485 if buf[prev_eol as usize] == b'\n' {
486 break;
487 }
488 prev_eol -= 1;
489 }
490 let start = (prev_eol + 1) as usize;
492 let end = (ptr + 1) as usize;
493 if !ws_blank_line(&buf[start..end]) {
494 break;
495 }
496 cnt += 1;
497 ptr = prev_eol - 1;
498 }
499 cnt
500}
501
502pub fn count_lines(buf: &[u8]) -> usize {
505 if buf.is_empty() {
506 return 0;
507 }
508 let nl = buf.iter().filter(|&&b| b == b'\n').count();
509 if buf[buf.len() - 1] == b'\n' {
510 nl
511 } else {
512 nl + 1
513 }
514}
515
516pub fn ws_fix_copy(dst: &mut Vec<u8>, src: &[u8], ws_rule: WsRule) -> bool {
523 let mut len = src.len();
524 let mut src_off = 0usize;
525 let mut add_nl_to_tail = false;
526 let mut add_cr_to_tail = false;
527 let mut fixed = false;
528 let mut last_tab_in_indent: isize = -1;
529 let mut last_space_in_indent: isize = -1;
530 let mut need_fix_leading_space = false;
531
532 if ws_rule & WS_INCOMPLETE_LINE != 0 && len > 0 && src[len - 1] != b'\n' {
534 fixed = true;
535 add_nl_to_tail = true;
536 }
537
538 if ws_rule & WS_BLANK_AT_EOL != 0 {
540 if len > 0 && src[len - 1] == b'\n' {
541 add_nl_to_tail = true;
542 len -= 1;
543 if len > 0 && src[len - 1] == b'\r' {
544 add_cr_to_tail = ws_rule & WS_CR_AT_EOL != 0;
545 len -= 1;
546 }
547 }
548 if len > 0 && is_space(src[len - 1]) {
549 while len > 0 && is_space(src[len - 1]) {
550 len -= 1;
551 }
552 fixed = true;
553 }
554 }
555
556 {
558 let mut i = 0usize;
559 while i < len {
560 let ch = src[i];
561 if ch == b'\t' {
562 last_tab_in_indent = i as isize;
563 if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && last_space_in_indent >= 0 {
564 need_fix_leading_space = true;
565 }
566 } else if ch == b' ' {
567 last_space_in_indent = i as isize;
568 if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0
569 && (i as isize - last_tab_in_indent) >= ws_tab_width(ws_rule) as isize
570 {
571 need_fix_leading_space = true;
572 }
573 } else {
574 break;
575 }
576 i += 1;
577 }
578 }
579
580 if need_fix_leading_space {
581 let mut consecutive_spaces = 0usize;
583 let mut last = (last_tab_in_indent + 1) as usize;
584 if ws_rule & WS_INDENT_WITH_NON_TAB != 0 {
585 if last_tab_in_indent < last_space_in_indent {
587 last = (last_space_in_indent + 1) as usize;
588 } else {
589 last = (last_tab_in_indent + 1) as usize;
590 }
591 }
592 let tabw = ws_tab_width(ws_rule);
593 for &ch in &src[src_off..src_off + last] {
594 if ch != b' ' {
595 consecutive_spaces = 0;
596 dst.push(ch);
597 } else {
598 consecutive_spaces += 1;
599 if tabw != 0 && consecutive_spaces == tabw {
600 dst.push(b'\t');
601 consecutive_spaces = 0;
602 }
603 }
604 }
605 while consecutive_spaces > 0 {
606 dst.push(b' ');
607 consecutive_spaces -= 1;
608 }
609 len -= last;
610 src_off += last;
611 fixed = true;
612 } else if (ws_rule & WS_TAB_IN_INDENT) != 0 && last_tab_in_indent >= 0 {
613 let start = dst.len();
615 let last = (last_tab_in_indent + 1) as usize;
616 let tabw = ws_tab_width(ws_rule).max(1);
617 for &ch in &src[src_off..src_off + last] {
618 if ch == b'\t' {
619 loop {
620 dst.push(b' ');
621 if (dst.len() - start).is_multiple_of(tabw) {
622 break;
623 }
624 }
625 } else {
626 dst.push(ch);
627 }
628 }
629 len -= last;
630 src_off += last;
631 fixed = true;
632 }
633
634 dst.extend_from_slice(&src[src_off..src_off + len]);
635 if add_cr_to_tail {
636 dst.push(b'\r');
637 }
638 if add_nl_to_tail {
639 dst.push(b'\n');
640 }
641 fixed
642}
643
644pub fn ws_fix_line_content(content: &[u8], ws_rule: WsRule) -> Vec<u8> {
652 let mut out = Vec::with_capacity(content.len());
653 ws_fix_copy(&mut out, content, ws_rule);
654 out
655}
656
657#[cfg(test)]
658mod tests {
659 use super::*;
660
661 #[test]
662 fn default_rule_constant() {
663 assert_eq!(WS_DEFAULT_RULE, (1 << 6) | (1 << 10) | (1 << 7) | 8);
665 assert_eq!(ws_tab_width(WS_DEFAULT_RULE), 8);
666 }
667
668 #[test]
669 fn parse_basic() {
670 let r = parse_whitespace_rule("-trailing,-space-before,-indent")
672 .expect("valid whitespace rule");
673 assert_eq!(r & WS_BLANK_AT_EOL, 0);
674 assert_eq!(r & WS_SPACE_BEFORE_TAB, 0);
675 }
676
677 #[test]
678 fn parse_tab_in_indent_and_tabwidth() {
679 let r =
680 parse_whitespace_rule("-trailing,-space,-indent,tab").expect("valid whitespace rule");
681 assert_ne!(r & WS_TAB_IN_INDENT, 0);
682 let r2 = parse_whitespace_rule("tab-in-indent,tabwidth=16").expect("valid whitespace rule");
683 assert_eq!(ws_tab_width(r2), 16);
684 }
685
686 #[test]
687 fn parse_conflicting_rule_rejected() {
688 assert!(parse_whitespace_rule("tab-in-indent,indent-with-non-tab").is_none());
689 }
690
691 #[test]
692 fn trailing_whitespace_detected() {
693 let r = WS_DEFAULT_RULE;
694 assert_ne!(ws_check(b"foo(); \n", r) & WS_BLANK_AT_EOL, 0);
695 assert_eq!(ws_check(b"foo();\n", r) & WS_BLANK_AT_EOL, 0);
696 }
697
698 #[test]
699 fn space_before_tab_detected() {
700 let r = WS_DEFAULT_RULE;
701 assert_ne!(ws_check(b" \tfoo();\n", r) & WS_SPACE_BEFORE_TAB, 0);
703 }
704
705 #[test]
706 fn indent_with_non_tab() {
707 let r = parse_whitespace_rule("indent-with-non-tab").expect("valid whitespace rule");
708 assert_ne!(ws_check(b" eight\n", r) & WS_INDENT_WITH_NON_TAB, 0);
710 assert_eq!(ws_check(b" seven\n", r) & WS_INDENT_WITH_NON_TAB, 0);
712 }
713
714 #[test]
715 fn error_string_order() {
716 assert_eq!(
717 whitespace_error_string(WS_TRAILING_SPACE),
718 "trailing whitespace"
719 );
720 assert_eq!(
721 whitespace_error_string(WS_BLANK_AT_EOF),
722 "new blank line at EOF"
723 );
724 assert_eq!(
725 whitespace_error_string(WS_SPACE_BEFORE_TAB | WS_TAB_IN_INDENT),
726 "space before tab in indent, tab in indent"
727 );
728 }
729
730 #[test]
731 fn fix_strips_trailing() {
732 let mut out = Vec::new();
733 let fixed = ws_fix_copy(&mut out, b"foo(); \n", WS_DEFAULT_RULE);
734 assert!(fixed);
735 assert_eq!(out, b"foo();\n");
736 }
737
738 #[test]
739 fn fix_tab_in_indent_expands() {
740 let mut out = Vec::new();
741 let r =
742 parse_whitespace_rule("-trailing,-space,-indent,tab").expect("valid whitespace rule");
743 ws_fix_copy(&mut out, b"\tfoo();\n", r);
745 assert_eq!(out, b" foo();\n");
746 }
747
748 #[test]
749 fn count_trailing_blank_basic() {
750 assert_eq!(count_trailing_blank(b"a\nb\n"), 0);
751 assert_eq!(count_trailing_blank(b"a\nb\n\n"), 1);
752 assert_eq!(count_trailing_blank(b"a\n\n\n"), 2);
753 assert_eq!(count_trailing_blank(b"a\n \n"), 1);
754 }
755
756 #[test]
757 fn ws_check_emit_paints_trailing() {
758 let colors = WsEmitColors {
759 set: "<S>",
760 reset: "<R>",
761 ws: "<W>",
762 };
763 let mut out = Vec::new();
764 ws_check_emit(b"foo(); \n", WS_DEFAULT_RULE, &mut out, &colors);
765 assert_eq!(out, b"<S>foo();<R><W> <R>\n".to_vec());
766 }
767}