Skip to main content

alint_rules/fixers/
strip.rs

1use alint_core::{Error, FixContext, FixOutcome, Fixer, Result, Violation};
2
3/// Strips Unicode bidi control characters (the Trojan Source
4/// codepoints U+202A–202E, U+2066–2069) from the file's content.
5#[derive(Debug)]
6pub struct FileStripBidiFixer;
7
8impl Fixer for FileStripBidiFixer {
9    fn describe(&self) -> String {
10        "strip Unicode bidi control characters".to_string()
11    }
12
13    fn apply(&self, violation: &Violation, ctx: &FixContext<'_>) -> Result<FixOutcome> {
14        apply_char_filter(
15            "bidi",
16            "stripped bidi controls from",
17            violation,
18            ctx,
19            crate::no_bidi_controls::is_bidi_control,
20            /* preserve_leading_feff = */ false,
21        )
22    }
23}
24
25/// Strips zero-width characters (U+200B / U+200C / U+200D, plus
26/// body-internal U+FEFF — a leading BOM is preserved so
27/// `no_bom` can own that concern).
28#[derive(Debug)]
29pub struct FileStripZeroWidthFixer;
30
31impl Fixer for FileStripZeroWidthFixer {
32    fn describe(&self) -> String {
33        "strip zero-width characters (U+200B/C/D, body-internal U+FEFF)".to_string()
34    }
35
36    fn apply(&self, violation: &Violation, ctx: &FixContext<'_>) -> Result<FixOutcome> {
37        apply_char_filter(
38            "zero-width",
39            "stripped zero-width chars from",
40            violation,
41            ctx,
42            |c| matches!(c, '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}'),
43            /* preserve_leading_feff = */ true,
44        )
45    }
46}
47
48/// Strips a leading BOM (UTF-8 / UTF-16 / UTF-32 LE & BE) from
49/// the violating file.
50#[derive(Debug)]
51pub struct FileStripBomFixer;
52
53impl Fixer for FileStripBomFixer {
54    fn describe(&self) -> String {
55        "strip leading BOM".to_string()
56    }
57
58    fn apply(&self, violation: &Violation, ctx: &FixContext<'_>) -> Result<FixOutcome> {
59        let Some(path) = &violation.path else {
60            return Ok(FixOutcome::Skipped(
61                "violation did not carry a path".to_string(),
62            ));
63        };
64        let abs = ctx.root.join(path);
65        if ctx.dry_run {
66            return Ok(FixOutcome::Applied(format!(
67                "would strip BOM from {}",
68                path.display()
69            )));
70        }
71        let existing = match alint_core::read_for_fix(&abs, path, ctx)? {
72            alint_core::ReadForFix::Bytes(b) => b,
73            alint_core::ReadForFix::Skipped(outcome) => return Ok(outcome),
74        };
75        let Some(bom) = crate::no_bom::detect_bom(&existing) else {
76            return Ok(FixOutcome::Skipped(format!(
77                "{} has no BOM",
78                path.display()
79            )));
80        };
81        let stripped = &existing[bom.byte_len()..];
82        std::fs::write(&abs, stripped).map_err(|source| Error::Io {
83            path: abs.clone(),
84            source,
85        })?;
86        Ok(FixOutcome::Applied(format!(
87            "stripped {} BOM from {}",
88            bom.name(),
89            path.display()
90        )))
91    }
92}
93
94/// Shared read-modify-write helper for "remove every char that
95/// matches `predicate`" fix ops.
96fn apply_char_filter(
97    label: &str,
98    verb: &str,
99    violation: &Violation,
100    ctx: &FixContext<'_>,
101    predicate: impl Fn(char) -> bool,
102    preserve_leading_feff: bool,
103) -> Result<FixOutcome> {
104    let Some(path) = &violation.path else {
105        return Ok(FixOutcome::Skipped(
106            "violation did not carry a path".to_string(),
107        ));
108    };
109    let abs = ctx.root.join(path);
110    if ctx.dry_run {
111        return Ok(FixOutcome::Applied(format!(
112            "would strip {label} chars from {}",
113            path.display()
114        )));
115    }
116    let existing = match alint_core::read_for_fix(&abs, path, ctx)? {
117        alint_core::ReadForFix::Bytes(b) => b,
118        alint_core::ReadForFix::Skipped(outcome) => return Ok(outcome),
119    };
120    let Ok(text) = std::str::from_utf8(&existing) else {
121        return Ok(FixOutcome::Skipped(format!(
122            "{} is not UTF-8; cannot filter {label} chars",
123            path.display()
124        )));
125    };
126    let mut out = String::with_capacity(text.len());
127    let mut first_char = true;
128    for c in text.chars() {
129        let keep_because_leading_bom = preserve_leading_feff && first_char && c == '\u{FEFF}';
130        if keep_because_leading_bom || !predicate(c) {
131            out.push(c);
132        }
133        first_char = false;
134    }
135    if out.as_bytes() == existing {
136        return Ok(FixOutcome::Skipped(format!(
137            "{} has no {label} chars to strip",
138            path.display()
139        )));
140    }
141    std::fs::write(&abs, out.as_bytes()).map_err(|source| Error::Io {
142        path: abs.clone(),
143        source,
144    })?;
145    Ok(FixOutcome::Applied(format!("{verb} {}", path.display())))
146}