Skip to main content

fallow_extract/
css_classes.rs

1//! Markup CSS-class reference scanning and class-name similarity.
2//!
3//! Supports the `fallow health --css` class-reach candidates (the CSS analogue
4//! of `unresolved-import`). [`scan_markup_class_tokens`] pulls the STATIC class
5//! tokens out of `class` / `className` attributes across every markup surface
6//! fallow visits (JSX/TSX, HTML, Vue/Svelte/Astro), and flags whether the file
7//! also constructs classes DYNAMICALLY (`clsx(...)`, `` `btn-${x}` ``,
8//! `:class`, spread props), which downstream consumers use to abstain.
9//!
10//! The scanner is intentionally regex-based and conservative: it only collects
11//! tokens from a fully-static quoted attribute value, and treats anything that
12//! could be an interpolation as a dynamic signal rather than a token. It never
13//! tries to evaluate a dynamic expression.
14
15use std::sync::LazyLock;
16
17/// A static class token referenced in markup, with the 1-based line it sits on.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct MarkupClassToken {
20    /// The bare class name (no dot), e.g. `card-title`.
21    pub value: String,
22    /// 1-based line of the attribute in the source.
23    pub line: u32,
24}
25
26/// The result of scanning one markup source for class references.
27#[derive(Debug, Clone, Default, PartialEq, Eq)]
28pub struct MarkupClassScan {
29    /// Class tokens from fully-static `class` / `className` attribute values.
30    pub static_tokens: Vec<MarkupClassToken>,
31    /// True when the file constructs classes dynamically anywhere (`clsx(...)`,
32    /// template literals, `:class`, spread/computed props). Consumers that need
33    /// to prove a class unused must abstain on dynamic files; a typo check on a
34    /// static token can still fire.
35    pub has_dynamic: bool,
36}
37
38/// Matches a fully-static `class="..."` / `className="..."` attribute (double or
39/// single quoted) and captures the raw value. The value is split into tokens by
40/// the caller; a value containing `{`, `}`, `$`, or a backtick is treated as a
41/// dynamic interpolation (Svelte `class="a-{b}"`, Vue mustache) and skipped for
42/// token extraction.
43static STATIC_CLASS_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
44    crate::static_regex(r#"(?:\bclass|\bclassName)\s*=\s*(?:"([^"]*)"|'([^']*)')"#)
45});
46
47/// Substrings that prove a markup file constructs class names dynamically. Any
48/// hit sets [`MarkupClassScan::has_dynamic`].
49const DYNAMIC_CLASS_MARKERS: &[&str] = &[
50    "className={", // JSX expression container
51    "className ={",
52    "class={",      // Svelte / JSX
53    "class ={",     // tolerate whitespace
54    ":class",       // Vue v-bind shorthand
55    "v-bind:class", // Vue v-bind long form
56    "[class]",      // Angular property binding
57    "[ngClass]",    // Angular ngClass
58    "class:",       // Svelte class directive `class:active`
59    "clsx(",        // common class-combiner libraries
60    "classnames(",
61    "classNames(",
62    "cx(",
63    "cva(",
64    "twMerge(",
65    "tw`",       // tailwind tagged template
66    "classList", // DOM classList manipulation
67];
68
69/// True when a static class value carries an interpolation and must not be
70/// tokenized (the tokens would be partial / wrong). Such a value also implies
71/// the file is dynamic.
72fn value_is_interpolated(value: &str) -> bool {
73    value.contains('{') || value.contains('}') || value.contains('$') || value.contains('`')
74}
75
76/// A token is a usable class name only if it looks like an authored class: it is
77/// non-empty, contains no whitespace (already split), and carries no markup /
78/// interpolation punctuation. Tailwind variant (`hover:`) and opacity (`/50`)
79/// shapes are left in (they simply never match an authored CSS class or a near
80/// miss downstream), but obvious non-class noise is dropped.
81fn is_plausible_class_token(token: &str) -> bool {
82    !token.is_empty() && !token.contains(['{', '}', '$', '`', '"', '\'', '(', ')', '<', '>', '='])
83}
84
85/// Scan a markup source for static class tokens and a dynamic-construction flag.
86///
87/// `class="a b c"` yields three tokens; `className={clsx(...)}` and
88/// `class="a-{x}"` yield no tokens but set `has_dynamic`.
89#[must_use]
90pub fn scan_markup_class_tokens(source: &str) -> MarkupClassScan {
91    let has_dynamic = DYNAMIC_CLASS_MARKERS.iter().any(|m| source.contains(m));
92    let mut static_tokens = Vec::new();
93    let mut any_interpolated = false;
94
95    for caps in STATIC_CLASS_ATTR_RE.captures_iter(source) {
96        let Some(m) = caps.get(0) else { continue };
97        let value = caps
98            .get(1)
99            .or_else(|| caps.get(2))
100            .map_or("", |g| g.as_str());
101        if value_is_interpolated(value) {
102            any_interpolated = true;
103            continue;
104        }
105        let line = 1 + source[..m.start()].bytes().filter(|&b| b == b'\n').count();
106        let line = u32::try_from(line).unwrap_or(u32::MAX);
107        for token in value.split_whitespace() {
108            if is_plausible_class_token(token) {
109                static_tokens.push(MarkupClassToken {
110                    value: token.to_owned(),
111                    line,
112                });
113            }
114        }
115    }
116
117    MarkupClassScan {
118        static_tokens,
119        has_dynamic: has_dynamic || any_interpolated,
120    }
121}
122
123/// True when `a` and `b` differ by exactly one single-character edit (one
124/// substitution, insertion, or deletion). Equal strings return false. Runs in
125/// O(min(len)) without building a full edit-distance matrix.
126///
127/// Used to surface a likely className typo: a markup token that matches no
128/// defined class but is one edit from a class that IS defined (`card-tite` vs
129/// `card-title`). Restricting to distance one keeps the suggestion near-zero
130/// false-positive.
131#[must_use]
132pub fn is_edit_distance_one(a: &str, b: &str) -> bool {
133    let (ab, bb) = (a.as_bytes(), b.as_bytes());
134    let (la, lb) = (ab.len(), bb.len());
135    if la == lb {
136        // Same length: exactly one substitution.
137        let mut diffs = 0;
138        for i in 0..la {
139            if ab[i] != bb[i] {
140                diffs += 1;
141                if diffs > 1 {
142                    return false;
143                }
144            }
145        }
146        return diffs == 1;
147    }
148    // Differ by one in length: exactly one insertion/deletion. Walk both,
149    // allowing a single skip in the longer string.
150    if la.abs_diff(lb) != 1 {
151        return false;
152    }
153    let (short, long) = if la < lb { (ab, bb) } else { (bb, ab) };
154    let (mut i, mut j, mut skipped) = (0usize, 0usize, false);
155    while i < short.len() && j < long.len() {
156        if short[i] == long[j] {
157            i += 1;
158        } else {
159            if skipped {
160                return false;
161            }
162            skipped = true; // skip one char in the longer string
163        }
164        j += 1;
165    }
166    true
167}
168
169/// True when `defined` is a likely TYPO target for `token`: exactly one edit
170/// apart AND that edit is a believable mistake, not a deliberate naming
171/// variation. This is stricter than [`is_edit_distance_one`] because real
172/// codebases are full of one-edit class pairs that are NOT typos:
173///
174/// - **Numeric-scale families** (`col-lg-6` vs `col-lg-4`, `display-4` vs
175///   `display-5`, `gap-2` vs `gap-3`): adjacent members of a Bootstrap /
176///   utility scale differ by one digit but are distinct intentional classes.
177///   Any edit whose changed / inserted / deleted character is an ASCII digit is
178///   rejected.
179/// - **Singular/plural pairs** (`button` vs `buttons`): a single trailing `s`
180///   is a morphological variant, not a typo. Rejected.
181///
182/// Real typos (`card-tite` vs `card-title`, `sidebar-nev` vs `sidebar-nav`) are
183/// alphabetic edits and pass. Caught by real-world smoke on Bootstrap, where the
184/// bare near-miss produced 117 false positives, all numeric-scale or plural.
185#[must_use]
186pub fn is_typo_edit(token: &str, defined: &str) -> bool {
187    let (tb, db) = (token.as_bytes(), defined.as_bytes());
188    let (lt, ld) = (tb.len(), db.len());
189    if lt == ld {
190        // Substitution: find the single differing index; reject if a digit is on
191        // either side (a numeric-scale value, not a typo).
192        let mut diff = None;
193        for i in 0..lt {
194            if tb[i] != db[i] {
195                if diff.is_some() {
196                    return false;
197                }
198                diff = Some(i);
199            }
200        }
201        return diff.is_some_and(|i| !tb[i].is_ascii_digit() && !db[i].is_ascii_digit());
202    }
203    if lt.abs_diff(ld) != 1 {
204        return false;
205    }
206    let (short, long) = if lt < ld { (tb, db) } else { (db, tb) };
207    // Singular/plural: the longer is the shorter plus a trailing `s`.
208    if long.last() == Some(&b's') && short == &long[..long.len() - 1] {
209        return false;
210    }
211    // Locate the single inserted / deleted character.
212    let (mut i, mut j, mut skipped) = (0usize, 0usize, false);
213    let mut edit_byte = *long.last().unwrap_or(&0);
214    while i < short.len() && j < long.len() {
215        if short[i] == long[j] {
216            i += 1;
217        } else {
218            if skipped {
219                return false;
220            }
221            skipped = true;
222            edit_byte = long[j];
223        }
224        j += 1;
225    }
226    // Reject a digit insertion/deletion (numeric-scale variant, not a typo).
227    !edit_byte.is_ascii_digit()
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233
234    fn tokens(source: &str) -> Vec<String> {
235        scan_markup_class_tokens(source)
236            .static_tokens
237            .into_iter()
238            .map(|t| t.value)
239            .collect()
240    }
241
242    #[test]
243    fn extracts_static_class_and_classname_tokens() {
244        assert_eq!(
245            tokens(r#"<div class="card card-title">x</div>"#),
246            vec!["card", "card-title"]
247        );
248        assert_eq!(
249            tokens(r#"<div className="btn btn-primary">x</div>"#),
250            vec!["btn", "btn-primary"]
251        );
252        assert_eq!(tokens(r"<i class='solo'></i>"), vec!["solo"]);
253    }
254
255    #[test]
256    fn reports_one_based_line() {
257        let scan = scan_markup_class_tokens("\n\n<i class=\"on-line-three\"></i>");
258        assert_eq!(scan.static_tokens.len(), 1);
259        assert_eq!(scan.static_tokens[0].line, 3);
260    }
261
262    #[test]
263    fn flags_dynamic_construction_and_skips_its_tokens() {
264        for src in [
265            r#"<div className={clsx("a", x)}>y</div>"#,
266            r"<div className={`btn-${size}`}>y</div>",
267            r#"<div :class="{ active: isOn }">y</div>"#,
268            r#"<div class="a-{cls}">y</div>"#, // Svelte interpolation
269            r#"el.classList.add("toggled")"#,
270        ] {
271            let scan = scan_markup_class_tokens(src);
272            assert!(scan.has_dynamic, "expected dynamic for {src:?}");
273        }
274    }
275
276    #[test]
277    fn static_attr_in_dynamic_file_still_yields_its_tokens() {
278        // A static class attribute is tokenized even when the file is dynamic;
279        // the typo check needs the static token.
280        let scan = scan_markup_class_tokens(
281            r#"<div className={clsx(x)}>a</div><span class="card-tite">b</span>"#,
282        );
283        assert!(scan.has_dynamic);
284        assert_eq!(
285            scan.static_tokens
286                .iter()
287                .map(|t| t.value.as_str())
288                .collect::<Vec<_>>(),
289            vec!["card-tite"]
290        );
291    }
292
293    #[test]
294    fn edit_distance_one_substitution() {
295        assert!(is_edit_distance_one("card-tite", "card-tit=")); // sanity, one sub
296        assert!(is_edit_distance_one("btn-primary", "btn-primery"));
297        assert!(!is_edit_distance_one("btn", "btn")); // equal is not distance one
298        assert!(!is_edit_distance_one("btn-primary", "btn-secondary"));
299    }
300
301    #[test]
302    fn edit_distance_one_insertion_deletion() {
303        assert!(is_edit_distance_one("card-title", "card-titl")); // deletion
304        assert!(is_edit_distance_one("card-titl", "card-title")); // insertion
305        assert!(is_edit_distance_one("nav", "navs")); // append
306        assert!(!is_edit_distance_one("nav", "navxs")); // distance two
307        assert!(!is_edit_distance_one("nav", "xyz")); // unrelated
308    }
309
310    #[test]
311    fn typo_edit_accepts_real_alphabetic_typos() {
312        assert!(is_typo_edit("card-tite", "card-title")); // missing letter
313        assert!(is_typo_edit("sidebar-nev", "sidebar-nav")); // wrong letter
314        assert!(is_typo_edit("widget-labl", "widget-label")); // dropped letter (not plural)
315        assert!(is_typo_edit("headar", "header")); // one letter substitution
316    }
317
318    #[test]
319    fn typo_edit_rejects_numeric_scale_families() {
320        // Adjacent Bootstrap / utility scale members are one digit apart but are
321        // distinct intentional classes, never typos.
322        assert!(!is_typo_edit("col-lg-6", "col-lg-4")); // digit substitution
323        assert!(!is_typo_edit("display-4", "display-5"));
324        assert!(!is_typo_edit("gap-2", "gap-3"));
325        assert!(!is_typo_edit("display-4", "display-")); // digit deletion
326        assert!(!is_typo_edit("z-10", "z-50")); // digit substitution
327    }
328
329    #[test]
330    fn typo_edit_rejects_singular_plural() {
331        assert!(!is_typo_edit("button", "buttons"));
332        assert!(!is_typo_edit("buttons", "button"));
333        assert!(!is_typo_edit("card", "cards"));
334    }
335}