Skip to main content

alint_rules/
no_zero_width_chars.rs

1//! `no_zero_width_chars` — flag invisible zero-width characters
2//! that can hide text, break identifiers, or leak data.
3//!
4//! Codepoints flagged:
5//!   - U+200B ZERO WIDTH SPACE
6//!   - U+200C ZERO WIDTH NON-JOINER
7//!   - U+200D ZERO WIDTH JOINER
8//!   - U+FEFF ZERO WIDTH NO-BREAK SPACE (BOM) — *but only when
9//!     not at byte position 0*. A leading BOM is `no_bom`'s
10//!     territory; this rule stays focused on body-internal ZWs
11//!     so the two rules don't double-report.
12
13use std::path::Path;
14
15use alint_core::{
16    Context, Error, FixSpec, Fixer, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation,
17    eval_per_file,
18};
19
20use crate::fixers::FileStripZeroWidthFixer;
21
22/// Returns true if `c` is a zero-width character that this rule
23/// flags. `is_leading_feff == true` means U+FEFF at byte 0 of
24/// the file (the BOM case) — that's deliberately NOT flagged.
25pub fn is_flagged_zero_width(c: char, is_leading_feff: bool) -> bool {
26    match c {
27        '\u{200B}' | '\u{200C}' | '\u{200D}' => true,
28        '\u{FEFF}' => !is_leading_feff,
29        _ => false,
30    }
31}
32
33#[derive(Debug)]
34pub struct NoZeroWidthCharsRule {
35    id: String,
36    level: Level,
37    policy_url: Option<String>,
38    message: Option<String>,
39    scope: Scope,
40    fixer: Option<FileStripZeroWidthFixer>,
41}
42
43impl Rule for NoZeroWidthCharsRule {
44    alint_core::rule_common_impl!();
45
46    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
47        eval_per_file(self, ctx)
48    }
49
50    fn fixer(&self) -> Option<&dyn Fixer> {
51        self.fixer.as_ref().map(|f| f as &dyn Fixer)
52    }
53
54    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
55        Some(self)
56    }
57}
58
59impl PerFileRule for NoZeroWidthCharsRule {
60    fn path_scope(&self) -> &Scope {
61        &self.scope
62    }
63
64    fn evaluate_file(
65        &self,
66        _ctx: &Context<'_>,
67        path: &Path,
68        bytes: &[u8],
69    ) -> Result<Vec<Violation>> {
70        let Ok(text) = std::str::from_utf8(bytes) else {
71            return Ok(Vec::new());
72        };
73        let Some((line_no, col, codepoint)) = first_zero_width(text) else {
74            return Ok(Vec::new());
75        };
76        let msg = self.message.clone().unwrap_or_else(|| {
77            format!("zero-width character U+{codepoint:04X} at line {line_no} col {col}")
78        });
79        Ok(vec![
80            Violation::new(msg)
81                .with_path(std::sync::Arc::<Path>::from(path))
82                .with_location(line_no, col),
83        ])
84    }
85}
86
87fn first_zero_width(text: &str) -> Option<(usize, usize, u32)> {
88    let mut line = 1usize;
89    let mut col = 1usize;
90    let mut first_char = true;
91    for c in text.chars() {
92        let is_leading = first_char && c == '\u{FEFF}';
93        if !is_leading && is_flagged_zero_width(c, false) {
94            return Some((line, col, c as u32));
95        }
96        first_char = false;
97        if c == '\n' {
98            line += 1;
99            col = 1;
100        } else {
101            col += 1;
102        }
103    }
104    None
105}
106
107pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
108    let _paths = spec.paths.as_ref().ok_or_else(|| {
109        Error::rule_config(&spec.id, "no_zero_width_chars requires a `paths` field")
110    })?;
111    let fixer = match &spec.fix {
112        Some(FixSpec::FileStripZeroWidth { .. }) => Some(FileStripZeroWidthFixer),
113        Some(other) => {
114            return Err(Error::rule_config(
115                &spec.id,
116                format!(
117                    "fix.{} is not compatible with no_zero_width_chars",
118                    other.op_name()
119                ),
120            ));
121        }
122        None => None,
123    };
124    Ok(Box::new(NoZeroWidthCharsRule {
125        id: spec.id.clone(),
126        level: spec.level,
127        policy_url: spec.policy_url.clone(),
128        message: spec.message.clone(),
129        scope: Scope::from_spec(spec)?,
130        fixer,
131    }))
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn flags_zwsp() {
140        let s = "ab\u{200B}cd";
141        let (line, col, cp) = first_zero_width(s).unwrap();
142        assert_eq!((line, col, cp), (1, 3, 0x200B));
143    }
144
145    #[test]
146    fn flags_zwj() {
147        assert_eq!(first_zero_width("\u{200D}x").unwrap().2, 0x200D);
148    }
149
150    #[test]
151    fn leading_bom_is_not_flagged() {
152        assert!(first_zero_width("\u{FEFF}hello\n").is_none());
153    }
154
155    #[test]
156    fn midstream_feff_is_flagged() {
157        let (line, col, cp) = first_zero_width("hello\u{FEFF}world").unwrap();
158        assert_eq!((line, col, cp), (1, 6, 0xFEFF));
159    }
160
161    #[test]
162    fn clean_ascii_passes() {
163        assert!(first_zero_width("nothing hidden here\n").is_none());
164    }
165}