Skip to main content

alint_rules/
no_zero_width_chars.rs

1//! `no_zero_width_chars` — flag invisible zero-width characters
2//! that can hide text, break identifiers, or leak data.
3//!
4//! Codepoints flagged:
5//!   - U+200B ZERO WIDTH SPACE
6//!   - U+200C ZERO WIDTH NON-JOINER
7//!   - U+200D ZERO WIDTH JOINER
8//!   - U+FEFF ZERO WIDTH NO-BREAK SPACE (BOM) — *but only when
9//!     not at byte position 0*. A leading BOM is `no_bom`'s
10//!     territory; this rule stays focused on body-internal ZWs
11//!     so the two rules don't double-report.
12
13use alint_core::{Context, Error, FixSpec, Fixer, Level, Result, Rule, RuleSpec, Scope, Violation};
14
15use crate::fixers::FileStripZeroWidthFixer;
16
17/// Returns true if `c` is a zero-width character that this rule
18/// flags. `is_leading_feff == true` means U+FEFF at byte 0 of
19/// the file (the BOM case) — that's deliberately NOT flagged.
20pub fn is_flagged_zero_width(c: char, is_leading_feff: bool) -> bool {
21    match c {
22        '\u{200B}' | '\u{200C}' | '\u{200D}' => true,
23        '\u{FEFF}' => !is_leading_feff,
24        _ => false,
25    }
26}
27
28#[derive(Debug)]
29pub struct NoZeroWidthCharsRule {
30    id: String,
31    level: Level,
32    policy_url: Option<String>,
33    message: Option<String>,
34    scope: Scope,
35    fixer: Option<FileStripZeroWidthFixer>,
36}
37
38impl Rule for NoZeroWidthCharsRule {
39    fn id(&self) -> &str {
40        &self.id
41    }
42    fn level(&self) -> Level {
43        self.level
44    }
45    fn policy_url(&self) -> Option<&str> {
46        self.policy_url.as_deref()
47    }
48
49    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
50        let mut violations = Vec::new();
51        for entry in ctx.index.files() {
52            if !self.scope.matches(&entry.path) {
53                continue;
54            }
55            let full = ctx.root.join(&entry.path);
56            let Ok(bytes) = std::fs::read(&full) else {
57                continue;
58            };
59            let Ok(text) = std::str::from_utf8(&bytes) else {
60                continue;
61            };
62            if let Some((line_no, col, codepoint)) = first_zero_width(text) {
63                let msg = self.message.clone().unwrap_or_else(|| {
64                    format!("zero-width character U+{codepoint:04X} at line {line_no} col {col}")
65                });
66                violations.push(
67                    Violation::new(msg)
68                        .with_path(&entry.path)
69                        .with_location(line_no, col),
70                );
71            }
72        }
73        Ok(violations)
74    }
75
76    fn fixer(&self) -> Option<&dyn Fixer> {
77        self.fixer.as_ref().map(|f| f as &dyn Fixer)
78    }
79}
80
81fn first_zero_width(text: &str) -> Option<(usize, usize, u32)> {
82    let mut line = 1usize;
83    let mut col = 1usize;
84    let mut first_char = true;
85    for c in text.chars() {
86        let is_leading = first_char && c == '\u{FEFF}';
87        if !is_leading && is_flagged_zero_width(c, false) {
88            return Some((line, col, c as u32));
89        }
90        first_char = false;
91        if c == '\n' {
92            line += 1;
93            col = 1;
94        } else {
95            col += 1;
96        }
97    }
98    None
99}
100
101pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
102    let paths = spec.paths.as_ref().ok_or_else(|| {
103        Error::rule_config(&spec.id, "no_zero_width_chars requires a `paths` field")
104    })?;
105    let fixer = match &spec.fix {
106        Some(FixSpec::FileStripZeroWidth { .. }) => Some(FileStripZeroWidthFixer),
107        Some(other) => {
108            return Err(Error::rule_config(
109                &spec.id,
110                format!(
111                    "fix.{} is not compatible with no_zero_width_chars",
112                    other.op_name()
113                ),
114            ));
115        }
116        None => None,
117    };
118    Ok(Box::new(NoZeroWidthCharsRule {
119        id: spec.id.clone(),
120        level: spec.level,
121        policy_url: spec.policy_url.clone(),
122        message: spec.message.clone(),
123        scope: Scope::from_paths_spec(paths)?,
124        fixer,
125    }))
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn flags_zwsp() {
134        let s = "ab\u{200B}cd";
135        let (line, col, cp) = first_zero_width(s).unwrap();
136        assert_eq!((line, col, cp), (1, 3, 0x200B));
137    }
138
139    #[test]
140    fn flags_zwj() {
141        assert_eq!(first_zero_width("\u{200D}x").unwrap().2, 0x200D);
142    }
143
144    #[test]
145    fn leading_bom_is_not_flagged() {
146        assert!(first_zero_width("\u{FEFF}hello\n").is_none());
147    }
148
149    #[test]
150    fn midstream_feff_is_flagged() {
151        let (line, col, cp) = first_zero_width("hello\u{FEFF}world").unwrap();
152        assert_eq!((line, col, cp), (1, 6, 0xFEFF));
153    }
154
155    #[test]
156    fn clean_ascii_passes() {
157        assert!(first_zero_width("nothing hidden here\n").is_none());
158    }
159}