pub type WsRule = u32;
pub const WS_BLANK_AT_EOL: WsRule = 1 << 6;
pub const WS_SPACE_BEFORE_TAB: WsRule = 1 << 7;
pub const WS_INDENT_WITH_NON_TAB: WsRule = 1 << 8;
pub const WS_CR_AT_EOL: WsRule = 1 << 9;
pub const WS_BLANK_AT_EOF: WsRule = 1 << 10;
pub const WS_TAB_IN_INDENT: WsRule = 1 << 11;
pub const WS_INCOMPLETE_LINE: WsRule = 1 << 12;
pub const WS_TRAILING_SPACE: WsRule = WS_BLANK_AT_EOL | WS_BLANK_AT_EOF;
pub const WS_DEFAULT_RULE: WsRule = WS_TRAILING_SPACE | WS_SPACE_BEFORE_TAB | 8;
pub const WS_TAB_WIDTH_MASK: WsRule = (1 << 6) - 1;
pub const WS_RULE_MASK: WsRule = (1 << 16) - 1;
#[inline]
pub fn ws_tab_width(rule: WsRule) -> usize {
(rule & WS_TAB_WIDTH_MASK) as usize
}
struct RuleName {
name: &'static str,
bits: WsRule,
loosens_error: bool,
exclude_default: bool,
}
const RULE_NAMES: &[RuleName] = &[
RuleName {
name: "trailing-space",
bits: WS_TRAILING_SPACE,
loosens_error: false,
exclude_default: false,
},
RuleName {
name: "space-before-tab",
bits: WS_SPACE_BEFORE_TAB,
loosens_error: false,
exclude_default: false,
},
RuleName {
name: "indent-with-non-tab",
bits: WS_INDENT_WITH_NON_TAB,
loosens_error: false,
exclude_default: false,
},
RuleName {
name: "cr-at-eol",
bits: WS_CR_AT_EOL,
loosens_error: true,
exclude_default: false,
},
RuleName {
name: "blank-at-eol",
bits: WS_BLANK_AT_EOL,
loosens_error: false,
exclude_default: false,
},
RuleName {
name: "blank-at-eof",
bits: WS_BLANK_AT_EOF,
loosens_error: false,
exclude_default: false,
},
RuleName {
name: "tab-in-indent",
bits: WS_TAB_IN_INDENT,
loosens_error: false,
exclude_default: true,
},
RuleName {
name: "incomplete-line",
bits: WS_INCOMPLETE_LINE,
loosens_error: false,
exclude_default: false,
},
];
pub fn parse_whitespace_rule(string: &str) -> Option<WsRule> {
let bytes = string.as_bytes();
let mut rule = WS_DEFAULT_RULE;
let mut pos = 0usize;
while pos < bytes.len() {
while pos < bytes.len() && matches!(bytes[pos], b',' | b' ' | b'\t' | b'\n' | b'\r') {
pos += 1;
}
if pos >= bytes.len() {
break;
}
let token_start = pos;
let token_end = bytes[token_start..]
.iter()
.position(|&b| b == b',')
.map(|off| token_start + off)
.unwrap_or(bytes.len());
let mut name_start = token_start;
let mut negated = false;
if bytes[name_start] == b'-' {
negated = true;
name_start += 1;
}
let name = &bytes[name_start..token_end];
if name.is_empty() {
break;
}
for entry in RULE_NAMES {
if entry.name.as_bytes().starts_with(name) {
if negated {
rule &= !entry.bits;
} else {
rule |= entry.bits;
}
break;
}
}
if let Some(arg) = token_starts_with_tabwidth(&bytes[name_start..token_end]) {
let digits: String = arg
.iter()
.take_while(|b| b.is_ascii_digit())
.map(|&b| b as char)
.collect();
let tabwidth: u32 = digits.parse().unwrap_or(0);
if tabwidth > 0 && tabwidth < 0o100 {
rule &= !WS_TAB_WIDTH_MASK;
rule |= tabwidth;
}
}
pos = token_end;
}
if rule & WS_TAB_IN_INDENT != 0 && rule & WS_INDENT_WITH_NON_TAB != 0 {
return None;
}
Some(rule)
}
fn token_starts_with_tabwidth(token: &[u8]) -> Option<&[u8]> {
const PREFIX: &[u8] = b"tabwidth=";
token.strip_prefix(PREFIX)
}
pub enum WsAttr<'a> {
True,
False,
Unset,
Value(&'a str),
}
pub fn resolve_whitespace_rule(config_rule: WsRule, attr: WsAttr<'_>) -> Option<WsRule> {
match attr {
WsAttr::True => {
let mut all = config_rule & WS_TAB_WIDTH_MASK;
for entry in RULE_NAMES {
if !entry.loosens_error && !entry.exclude_default {
all |= entry.bits;
}
}
Some(all)
}
WsAttr::False => Some(config_rule & WS_TAB_WIDTH_MASK),
WsAttr::Unset => Some(config_rule),
WsAttr::Value(value) => parse_whitespace_rule(value),
}
}
pub fn whitespace_error_string(ws: WsRule) -> String {
let mut err = String::new();
if (ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE {
err.push_str("trailing whitespace");
} else {
if ws & WS_BLANK_AT_EOL != 0 {
err.push_str("trailing whitespace");
}
if ws & WS_BLANK_AT_EOF != 0 {
if !err.is_empty() {
err.push_str(", ");
}
err.push_str("new blank line at EOF");
}
}
if ws & WS_SPACE_BEFORE_TAB != 0 {
if !err.is_empty() {
err.push_str(", ");
}
err.push_str("space before tab in indent");
}
if ws & WS_INDENT_WITH_NON_TAB != 0 {
if !err.is_empty() {
err.push_str(", ");
}
err.push_str("indent with spaces");
}
if ws & WS_TAB_IN_INDENT != 0 {
if !err.is_empty() {
err.push_str(", ");
}
err.push_str("tab in indent");
}
if ws & WS_INCOMPLETE_LINE != 0 {
if !err.is_empty() {
err.push_str(", ");
}
err.push_str("no newline at the end of file");
}
err
}
#[inline]
fn is_space(b: u8) -> bool {
matches!(b, b' ' | b'\t' | b'\n' | 0x0b | 0x0c | b'\r')
}
pub struct WsEmitColors<'a> {
pub set: &'a str,
pub reset: &'a str,
pub ws: &'a str,
}
pub fn ws_check(line: &[u8], ws_rule: WsRule) -> WsRule {
ws_check_emit_inner(line, ws_rule, None)
}
pub fn ws_check_emit(
line: &[u8],
ws_rule: WsRule,
out: &mut Vec<u8>,
colors: &WsEmitColors<'_>,
) -> WsRule {
ws_check_emit_inner(line, ws_rule, Some((out, colors)))
}
fn ws_check_emit_inner(
line: &[u8],
ws_rule: WsRule,
mut stream: Option<(&mut Vec<u8>, &WsEmitColors<'_>)>,
) -> WsRule {
let mut result: WsRule = 0;
let mut written = 0usize;
let mut trailing_whitespace: isize = -1;
let mut trailing_newline = false;
let mut trailing_carriage_return = false;
let mut len = line.len();
if len > 0 && line[len - 1] == b'\n' {
trailing_newline = true;
len -= 1;
}
if (ws_rule & WS_CR_AT_EOL) != 0 && len > 0 && line[len - 1] == b'\r' {
trailing_carriage_return = true;
len -= 1;
}
if ws_rule & WS_BLANK_AT_EOL != 0 {
let mut i = len as isize - 1;
while i >= 0 {
if is_space(line[i as usize]) {
trailing_whitespace = i;
result |= WS_BLANK_AT_EOL;
} else {
break;
}
i -= 1;
}
}
if trailing_whitespace == -1 {
trailing_whitespace = len as isize;
}
let trailing_whitespace = trailing_whitespace as usize;
if !trailing_newline && (ws_rule & WS_INCOMPLETE_LINE) != 0 {
result |= WS_INCOMPLETE_LINE;
}
let mut i = 0usize;
while i < trailing_whitespace {
if line[i] == b' ' {
i += 1;
continue;
}
if line[i] != b'\t' {
break;
}
if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && written < i {
result |= WS_SPACE_BEFORE_TAB;
if let Some((out, colors)) = stream.as_mut() {
out.extend_from_slice(colors.ws.as_bytes());
out.extend_from_slice(&line[written..i]);
out.extend_from_slice(colors.reset.as_bytes());
out.push(line[i]);
}
} else if (ws_rule & WS_TAB_IN_INDENT) != 0 {
result |= WS_TAB_IN_INDENT;
if let Some((out, colors)) = stream.as_mut() {
out.extend_from_slice(&line[written..i]);
out.extend_from_slice(colors.ws.as_bytes());
out.push(line[i]);
out.extend_from_slice(colors.reset.as_bytes());
}
} else if let Some((out, _)) = stream.as_mut() {
out.extend_from_slice(&line[written..=i]);
}
written = i + 1;
i += 1;
}
if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0 && i - written >= ws_tab_width(ws_rule) {
result |= WS_INDENT_WITH_NON_TAB;
if let Some((out, colors)) = stream.as_mut() {
out.extend_from_slice(colors.ws.as_bytes());
out.extend_from_slice(&line[written..i]);
out.extend_from_slice(colors.reset.as_bytes());
}
written = i;
}
if let Some((out, colors)) = stream.as_mut() {
if trailing_whitespace > written {
out.extend_from_slice(colors.set.as_bytes());
out.extend_from_slice(&line[written..trailing_whitespace]);
out.extend_from_slice(colors.reset.as_bytes());
}
if trailing_whitespace != len {
out.extend_from_slice(colors.ws.as_bytes());
out.extend_from_slice(&line[trailing_whitespace..len]);
out.extend_from_slice(colors.reset.as_bytes());
}
if trailing_carriage_return {
out.push(b'\r');
}
if trailing_newline {
out.push(b'\n');
}
}
result
}
pub fn ws_blank_line(line: &[u8]) -> bool {
line.iter().all(|&b| is_space(b))
}
pub fn count_trailing_blank(buf: &[u8]) -> usize {
let size = buf.len();
if size == 0 {
return 0;
}
let mut cnt = 0usize;
let mut ptr: isize = size as isize - 1;
if buf[ptr as usize] == b'\n' {
ptr -= 1; }
let base: isize = 0;
while base < ptr {
let mut prev_eol = ptr;
while base <= prev_eol {
if buf[prev_eol as usize] == b'\n' {
break;
}
prev_eol -= 1;
}
let start = (prev_eol + 1) as usize;
let end = (ptr + 1) as usize;
if !ws_blank_line(&buf[start..end]) {
break;
}
cnt += 1;
ptr = prev_eol - 1;
}
cnt
}
pub fn count_lines(buf: &[u8]) -> usize {
if buf.is_empty() {
return 0;
}
let nl = buf.iter().filter(|&&b| b == b'\n').count();
if buf[buf.len() - 1] == b'\n' {
nl
} else {
nl + 1
}
}
pub fn ws_fix_copy(dst: &mut Vec<u8>, src: &[u8], ws_rule: WsRule) -> bool {
let mut len = src.len();
let mut src_off = 0usize;
let mut add_nl_to_tail = false;
let mut add_cr_to_tail = false;
let mut fixed = false;
let mut last_tab_in_indent: isize = -1;
let mut last_space_in_indent: isize = -1;
let mut need_fix_leading_space = false;
if ws_rule & WS_INCOMPLETE_LINE != 0 && len > 0 && src[len - 1] != b'\n' {
fixed = true;
add_nl_to_tail = true;
}
if ws_rule & WS_BLANK_AT_EOL != 0 {
if len > 0 && src[len - 1] == b'\n' {
add_nl_to_tail = true;
len -= 1;
if len > 0 && src[len - 1] == b'\r' {
add_cr_to_tail = ws_rule & WS_CR_AT_EOL != 0;
len -= 1;
}
}
if len > 0 && is_space(src[len - 1]) {
while len > 0 && is_space(src[len - 1]) {
len -= 1;
}
fixed = true;
}
}
{
let mut i = 0usize;
while i < len {
let ch = src[i];
if ch == b'\t' {
last_tab_in_indent = i as isize;
if (ws_rule & WS_SPACE_BEFORE_TAB) != 0 && last_space_in_indent >= 0 {
need_fix_leading_space = true;
}
} else if ch == b' ' {
last_space_in_indent = i as isize;
if (ws_rule & WS_INDENT_WITH_NON_TAB) != 0
&& (i as isize - last_tab_in_indent) >= ws_tab_width(ws_rule) as isize
{
need_fix_leading_space = true;
}
} else {
break;
}
i += 1;
}
}
if need_fix_leading_space {
let mut consecutive_spaces = 0usize;
let mut last = (last_tab_in_indent + 1) as usize;
if ws_rule & WS_INDENT_WITH_NON_TAB != 0 {
if last_tab_in_indent < last_space_in_indent {
last = (last_space_in_indent + 1) as usize;
} else {
last = (last_tab_in_indent + 1) as usize;
}
}
let tabw = ws_tab_width(ws_rule);
for &ch in &src[src_off..src_off + last] {
if ch != b' ' {
consecutive_spaces = 0;
dst.push(ch);
} else {
consecutive_spaces += 1;
if tabw != 0 && consecutive_spaces == tabw {
dst.push(b'\t');
consecutive_spaces = 0;
}
}
}
while consecutive_spaces > 0 {
dst.push(b' ');
consecutive_spaces -= 1;
}
len -= last;
src_off += last;
fixed = true;
} else if (ws_rule & WS_TAB_IN_INDENT) != 0 && last_tab_in_indent >= 0 {
let start = dst.len();
let last = (last_tab_in_indent + 1) as usize;
let tabw = ws_tab_width(ws_rule).max(1);
for &ch in &src[src_off..src_off + last] {
if ch == b'\t' {
loop {
dst.push(b' ');
if (dst.len() - start).is_multiple_of(tabw) {
break;
}
}
} else {
dst.push(ch);
}
}
len -= last;
src_off += last;
fixed = true;
}
dst.extend_from_slice(&src[src_off..src_off + len]);
if add_cr_to_tail {
dst.push(b'\r');
}
if add_nl_to_tail {
dst.push(b'\n');
}
fixed
}
pub fn ws_fix_line_content(content: &[u8], ws_rule: WsRule) -> Vec<u8> {
let mut out = Vec::with_capacity(content.len());
ws_fix_copy(&mut out, content, ws_rule);
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_rule_constant() {
assert_eq!(WS_DEFAULT_RULE, (1 << 6) | (1 << 10) | (1 << 7) | 8);
assert_eq!(ws_tab_width(WS_DEFAULT_RULE), 8);
}
#[test]
fn parse_basic() {
let r = parse_whitespace_rule("-trailing,-space-before,-indent")
.expect("valid whitespace rule");
assert_eq!(r & WS_BLANK_AT_EOL, 0);
assert_eq!(r & WS_SPACE_BEFORE_TAB, 0);
}
#[test]
fn parse_tab_in_indent_and_tabwidth() {
let r =
parse_whitespace_rule("-trailing,-space,-indent,tab").expect("valid whitespace rule");
assert_ne!(r & WS_TAB_IN_INDENT, 0);
let r2 = parse_whitespace_rule("tab-in-indent,tabwidth=16").expect("valid whitespace rule");
assert_eq!(ws_tab_width(r2), 16);
}
#[test]
fn parse_conflicting_rule_rejected() {
assert!(parse_whitespace_rule("tab-in-indent,indent-with-non-tab").is_none());
}
#[test]
fn trailing_whitespace_detected() {
let r = WS_DEFAULT_RULE;
assert_ne!(ws_check(b"foo(); \n", r) & WS_BLANK_AT_EOL, 0);
assert_eq!(ws_check(b"foo();\n", r) & WS_BLANK_AT_EOL, 0);
}
#[test]
fn space_before_tab_detected() {
let r = WS_DEFAULT_RULE;
assert_ne!(ws_check(b" \tfoo();\n", r) & WS_SPACE_BEFORE_TAB, 0);
}
#[test]
fn indent_with_non_tab() {
let r = parse_whitespace_rule("indent-with-non-tab").expect("valid whitespace rule");
assert_ne!(ws_check(b" eight\n", r) & WS_INDENT_WITH_NON_TAB, 0);
assert_eq!(ws_check(b" seven\n", r) & WS_INDENT_WITH_NON_TAB, 0);
}
#[test]
fn error_string_order() {
assert_eq!(
whitespace_error_string(WS_TRAILING_SPACE),
"trailing whitespace"
);
assert_eq!(
whitespace_error_string(WS_BLANK_AT_EOF),
"new blank line at EOF"
);
assert_eq!(
whitespace_error_string(WS_SPACE_BEFORE_TAB | WS_TAB_IN_INDENT),
"space before tab in indent, tab in indent"
);
}
#[test]
fn fix_strips_trailing() {
let mut out = Vec::new();
let fixed = ws_fix_copy(&mut out, b"foo(); \n", WS_DEFAULT_RULE);
assert!(fixed);
assert_eq!(out, b"foo();\n");
}
#[test]
fn fix_tab_in_indent_expands() {
let mut out = Vec::new();
let r =
parse_whitespace_rule("-trailing,-space,-indent,tab").expect("valid whitespace rule");
ws_fix_copy(&mut out, b"\tfoo();\n", r);
assert_eq!(out, b" foo();\n");
}
#[test]
fn count_trailing_blank_basic() {
assert_eq!(count_trailing_blank(b"a\nb\n"), 0);
assert_eq!(count_trailing_blank(b"a\nb\n\n"), 1);
assert_eq!(count_trailing_blank(b"a\n\n\n"), 2);
assert_eq!(count_trailing_blank(b"a\n \n"), 1);
}
#[test]
fn ws_check_emit_paints_trailing() {
let colors = WsEmitColors {
set: "<S>",
reset: "<R>",
ws: "<W>",
};
let mut out = Vec::new();
ws_check_emit(b"foo(); \n", WS_DEFAULT_RULE, &mut out, &colors);
assert_eq!(out, b"<S>foo();<R><W> <R>\n".to_vec());
}
}