fallow_extract/css_classes.rs
1//! Markup CSS-class reference scanning and class-name similarity.
2//!
3//! Supports the `fallow health --css` class-reach candidates (the CSS analogue
4//! of `unresolved-import`). [`scan_markup_class_tokens`] pulls the STATIC class
5//! tokens out of `class` / `className` attributes across every markup surface
6//! fallow visits (JSX/TSX, HTML, Vue/Svelte/Astro), and flags whether the file
7//! also constructs classes DYNAMICALLY (`clsx(...)`, `` `btn-${x}` ``,
8//! `:class`, spread props), which downstream consumers use to abstain.
9//!
10//! The scanner is intentionally regex-based and conservative: it only collects
11//! tokens from a fully-static quoted attribute value, and treats anything that
12//! could be an interpolation as a dynamic signal rather than a token. It never
13//! tries to evaluate a dynamic expression.
14
15use std::sync::LazyLock;
16
17/// A static class token referenced in markup, with the 1-based line it sits on.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct MarkupClassToken {
20 /// The bare class name (no dot), e.g. `card-title`.
21 pub value: String,
22 /// 1-based line of the attribute in the source.
23 pub line: u32,
24}
25
26/// The result of scanning one markup source for class references.
27#[derive(Debug, Clone, Default, PartialEq, Eq)]
28pub struct MarkupClassScan {
29 /// Class tokens from fully-static `class` / `className` attribute values.
30 pub static_tokens: Vec<MarkupClassToken>,
31 /// True when the file constructs classes dynamically anywhere (`clsx(...)`,
32 /// template literals, `:class`, spread/computed props). Consumers that need
33 /// to prove a class unused must abstain on dynamic files; a typo check on a
34 /// static token can still fire.
35 pub has_dynamic: bool,
36}
37
38/// Matches a fully-static `class="..."` / `className="..."` attribute (double or
39/// single quoted) and captures the raw value. The value is split into tokens by
40/// the caller; a value containing `{`, `}`, `$`, or a backtick is treated as a
41/// dynamic interpolation (Svelte `class="a-{b}"`, Vue mustache) and skipped for
42/// token extraction.
43static STATIC_CLASS_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
44 crate::static_regex(r#"(?:\bclass|\bclassName)\s*=\s*(?:"([^"]*)"|'([^']*)')"#)
45});
46
47/// Substrings that prove a markup file constructs class names dynamically. Any
48/// hit sets [`MarkupClassScan::has_dynamic`].
49const DYNAMIC_CLASS_MARKERS: &[&str] = &[
50 "className={", // JSX expression container
51 "className ={",
52 "class={", // Svelte / JSX
53 "class ={", // tolerate whitespace
54 ":class", // Vue v-bind shorthand
55 "v-bind:class", // Vue v-bind long form
56 "[class]", // Angular property binding
57 "[ngClass]", // Angular ngClass
58 "class:", // Svelte class directive `class:active`
59 "clsx(", // common class-combiner libraries
60 "classnames(",
61 "classNames(",
62 "cx(",
63 "cva(",
64 "twMerge(",
65 "tw`", // tailwind tagged template
66 "classList", // DOM classList manipulation
67];
68
69/// True when a static class value carries an interpolation and must not be
70/// tokenized (the tokens would be partial / wrong). Such a value also implies
71/// the file is dynamic.
72fn value_is_interpolated(value: &str) -> bool {
73 value.contains('{') || value.contains('}') || value.contains('$') || value.contains('`')
74}
75
76/// A token is a usable class name only if it looks like an authored class: it is
77/// non-empty, contains no whitespace (already split), and carries no markup /
78/// interpolation punctuation. Tailwind variant (`hover:`) and opacity (`/50`)
79/// shapes are left in (they simply never match an authored CSS class or a near
80/// miss downstream), but obvious non-class noise is dropped.
81fn is_plausible_class_token(token: &str) -> bool {
82 !token.is_empty() && !token.contains(['{', '}', '$', '`', '"', '\'', '(', ')', '<', '>', '='])
83}
84
85/// Scan a markup source for static class tokens and a dynamic-construction flag.
86///
87/// `class="a b c"` yields three tokens; `className={clsx(...)}` and
88/// `class="a-{x}"` yield no tokens but set `has_dynamic`.
89#[must_use]
90pub fn scan_markup_class_tokens(source: &str) -> MarkupClassScan {
91 let has_dynamic = DYNAMIC_CLASS_MARKERS.iter().any(|m| source.contains(m));
92 let mut static_tokens = Vec::new();
93 let mut any_interpolated = false;
94
95 for caps in STATIC_CLASS_ATTR_RE.captures_iter(source) {
96 let Some(m) = caps.get(0) else { continue };
97 let value = caps
98 .get(1)
99 .or_else(|| caps.get(2))
100 .map_or("", |g| g.as_str());
101 if value_is_interpolated(value) {
102 any_interpolated = true;
103 continue;
104 }
105 let line = 1 + source[..m.start()].bytes().filter(|&b| b == b'\n').count();
106 let line = u32::try_from(line).unwrap_or(u32::MAX);
107 for token in value.split_whitespace() {
108 if is_plausible_class_token(token) {
109 static_tokens.push(MarkupClassToken {
110 value: token.to_owned(),
111 line,
112 });
113 }
114 }
115 }
116
117 MarkupClassScan {
118 static_tokens,
119 has_dynamic: has_dynamic || any_interpolated,
120 }
121}
122
123/// True when `a` and `b` differ by exactly one single-character edit (one
124/// substitution, insertion, or deletion). Equal strings return false. Runs in
125/// O(min(len)) without building a full edit-distance matrix.
126///
127/// Used to surface a likely className typo: a markup token that matches no
128/// defined class but is one edit from a class that IS defined (`card-tite` vs
129/// `card-title`). Restricting to distance one keeps the suggestion near-zero
130/// false-positive.
131#[must_use]
132pub fn is_edit_distance_one(a: &str, b: &str) -> bool {
133 let (ab, bb) = (a.as_bytes(), b.as_bytes());
134 let (la, lb) = (ab.len(), bb.len());
135 if la == lb {
136 // Same length: exactly one substitution.
137 let mut diffs = 0;
138 for i in 0..la {
139 if ab[i] != bb[i] {
140 diffs += 1;
141 if diffs > 1 {
142 return false;
143 }
144 }
145 }
146 return diffs == 1;
147 }
148 // Differ by one in length: exactly one insertion/deletion. Walk both,
149 // allowing a single skip in the longer string.
150 if la.abs_diff(lb) != 1 {
151 return false;
152 }
153 let (short, long) = if la < lb { (ab, bb) } else { (bb, ab) };
154 let (mut i, mut j, mut skipped) = (0usize, 0usize, false);
155 while i < short.len() && j < long.len() {
156 if short[i] == long[j] {
157 i += 1;
158 } else {
159 if skipped {
160 return false;
161 }
162 skipped = true; // skip one char in the longer string
163 }
164 j += 1;
165 }
166 true
167}
168
169/// True when `defined` is a likely TYPO target for `token`: exactly one edit
170/// apart AND that edit is a believable mistake, not a deliberate naming
171/// variation. This is stricter than [`is_edit_distance_one`] because real
172/// codebases are full of one-edit class pairs that are NOT typos:
173///
174/// - **Numeric-scale families** (`col-lg-6` vs `col-lg-4`, `display-4` vs
175/// `display-5`, `gap-2` vs `gap-3`): adjacent members of a Bootstrap /
176/// utility scale differ by one digit but are distinct intentional classes.
177/// Any edit whose changed / inserted / deleted character is an ASCII digit is
178/// rejected.
179/// - **Singular/plural pairs** (`button` vs `buttons`): a single trailing `s`
180/// is a morphological variant, not a typo. Rejected.
181///
182/// Real typos (`card-tite` vs `card-title`, `sidebar-nev` vs `sidebar-nav`) are
183/// alphabetic edits and pass. Caught by real-world smoke on Bootstrap, where the
184/// bare near-miss produced 117 false positives, all numeric-scale or plural.
185#[must_use]
186pub fn is_typo_edit(token: &str, defined: &str) -> bool {
187 let (tb, db) = (token.as_bytes(), defined.as_bytes());
188 let (lt, ld) = (tb.len(), db.len());
189 if lt == ld {
190 // Substitution: find the single differing index; reject if a digit is on
191 // either side (a numeric-scale value, not a typo).
192 let mut diff = None;
193 for i in 0..lt {
194 if tb[i] != db[i] {
195 if diff.is_some() {
196 return false;
197 }
198 diff = Some(i);
199 }
200 }
201 return diff.is_some_and(|i| !tb[i].is_ascii_digit() && !db[i].is_ascii_digit());
202 }
203 if lt.abs_diff(ld) != 1 {
204 return false;
205 }
206 let (short, long) = if lt < ld { (tb, db) } else { (db, tb) };
207 // Singular/plural: the longer is the shorter plus a trailing `s`.
208 if long.last() == Some(&b's') && short == &long[..long.len() - 1] {
209 return false;
210 }
211 // Locate the single inserted / deleted character.
212 let (mut i, mut j, mut skipped) = (0usize, 0usize, false);
213 let mut edit_byte = *long.last().unwrap_or(&0);
214 while i < short.len() && j < long.len() {
215 if short[i] == long[j] {
216 i += 1;
217 } else {
218 if skipped {
219 return false;
220 }
221 skipped = true;
222 edit_byte = long[j];
223 }
224 j += 1;
225 }
226 // Reject a digit insertion/deletion (numeric-scale variant, not a typo).
227 !edit_byte.is_ascii_digit()
228}
229
230#[cfg(test)]
231mod tests {
232 use super::*;
233
234 fn tokens(source: &str) -> Vec<String> {
235 scan_markup_class_tokens(source)
236 .static_tokens
237 .into_iter()
238 .map(|t| t.value)
239 .collect()
240 }
241
242 #[test]
243 fn extracts_static_class_and_classname_tokens() {
244 assert_eq!(
245 tokens(r#"<div class="card card-title">x</div>"#),
246 vec!["card", "card-title"]
247 );
248 assert_eq!(
249 tokens(r#"<div className="btn btn-primary">x</div>"#),
250 vec!["btn", "btn-primary"]
251 );
252 assert_eq!(tokens(r"<i class='solo'></i>"), vec!["solo"]);
253 }
254
255 #[test]
256 fn reports_one_based_line() {
257 let scan = scan_markup_class_tokens("\n\n<i class=\"on-line-three\"></i>");
258 assert_eq!(scan.static_tokens.len(), 1);
259 assert_eq!(scan.static_tokens[0].line, 3);
260 }
261
262 #[test]
263 fn flags_dynamic_construction_and_skips_its_tokens() {
264 for src in [
265 r#"<div className={clsx("a", x)}>y</div>"#,
266 r"<div className={`btn-${size}`}>y</div>",
267 r#"<div :class="{ active: isOn }">y</div>"#,
268 r#"<div class="a-{cls}">y</div>"#, // Svelte interpolation
269 r#"el.classList.add("toggled")"#,
270 ] {
271 let scan = scan_markup_class_tokens(src);
272 assert!(scan.has_dynamic, "expected dynamic for {src:?}");
273 }
274 }
275
276 #[test]
277 fn static_attr_in_dynamic_file_still_yields_its_tokens() {
278 // A static class attribute is tokenized even when the file is dynamic;
279 // the typo check needs the static token.
280 let scan = scan_markup_class_tokens(
281 r#"<div className={clsx(x)}>a</div><span class="card-tite">b</span>"#,
282 );
283 assert!(scan.has_dynamic);
284 assert_eq!(
285 scan.static_tokens
286 .iter()
287 .map(|t| t.value.as_str())
288 .collect::<Vec<_>>(),
289 vec!["card-tite"]
290 );
291 }
292
293 #[test]
294 fn edit_distance_one_substitution() {
295 assert!(is_edit_distance_one("card-tite", "card-tit=")); // sanity, one sub
296 assert!(is_edit_distance_one("btn-primary", "btn-primery"));
297 assert!(!is_edit_distance_one("btn", "btn")); // equal is not distance one
298 assert!(!is_edit_distance_one("btn-primary", "btn-secondary"));
299 }
300
301 #[test]
302 fn edit_distance_one_insertion_deletion() {
303 assert!(is_edit_distance_one("card-title", "card-titl")); // deletion
304 assert!(is_edit_distance_one("card-titl", "card-title")); // insertion
305 assert!(is_edit_distance_one("nav", "navs")); // append
306 assert!(!is_edit_distance_one("nav", "navxs")); // distance two
307 assert!(!is_edit_distance_one("nav", "xyz")); // unrelated
308 }
309
310 #[test]
311 fn typo_edit_accepts_real_alphabetic_typos() {
312 assert!(is_typo_edit("card-tite", "card-title")); // missing letter
313 assert!(is_typo_edit("sidebar-nev", "sidebar-nav")); // wrong letter
314 assert!(is_typo_edit("widget-labl", "widget-label")); // dropped letter (not plural)
315 assert!(is_typo_edit("headar", "header")); // one letter substitution
316 }
317
318 #[test]
319 fn typo_edit_rejects_numeric_scale_families() {
320 // Adjacent Bootstrap / utility scale members are one digit apart but are
321 // distinct intentional classes, never typos.
322 assert!(!is_typo_edit("col-lg-6", "col-lg-4")); // digit substitution
323 assert!(!is_typo_edit("display-4", "display-5"));
324 assert!(!is_typo_edit("gap-2", "gap-3"));
325 assert!(!is_typo_edit("display-4", "display-")); // digit deletion
326 assert!(!is_typo_edit("z-10", "z-50")); // digit substitution
327 }
328
329 #[test]
330 fn typo_edit_rejects_singular_plural() {
331 assert!(!is_typo_edit("button", "buttons"));
332 assert!(!is_typo_edit("buttons", "button"));
333 assert!(!is_typo_edit("card", "cards"));
334 }
335}