fallow_extract/css_in_js/template.rs
1//! CSS-in-JS template-literal lifter for the styling-health analytics pipeline
2//! (CSS program Phase 3b).
3//!
4//! Styled-components / emotion / linaria write their CSS as the body of a tagged
5//! template (`` styled.div`...` ``, `` css`...` ``, `` keyframes`...` ``). That CSS
6//! never reaches the structural analytics that `.css` files and Vue/Svelte SFC
7//! `<style>` blocks flow through, so a styled-components app gets `null` styling
8//! analytics. This module is the tagged-template analogue of
9//! [`crate::sfc_css::sfc_virtual_stylesheet`]: it lexically scans JS/TS source for
10//! CSS-in-JS tagged templates, lifts each template body into a blank-line-padded
11//! virtual stylesheet (so metric line numbers map back onto the real source line),
12//! masks every `${...}` interpolation to a CSS-valid placeholder, and returns
13//! `None` when the source has no CSS-in-JS template.
14//!
15//! It is health-time-only: it runs over file SOURCE in the engine's CSS walk, like
16//! `sfc_virtual_stylesheet` and `compute_css_analytics`, and persists nothing to
17//! the extraction cache (no `CACHE_VERSION` bump).
18//!
19//! Scope (first cut): TEMPLATE-LITERAL form only. The object form
20//! (`css({ color: 'red' })`, `styled.div({...})`) is JS-object-to-CSS
21//! serialization, a heavier and separate problem, and is deferred. `styled.div`,
22//! `styled(Component)`, bare `css` / `keyframes` / `createGlobalStyle` /
23//! `injectGlobal`, and `styled.div.attrs(...)` chains whose backtick does NOT
24//! immediately follow the tag are out of scope for the regex anchor (the
25//! `.attrs(...)` chain is a documented deferral).
26
27use std::sync::LazyLock;
28
29use super::shared::{WRAPPER, count_newlines};
30
31/// A CSS-valid identifier placeholder substituted for every `${...}`
32/// interpolation. Chosen so that a value-position interpolation
33/// (`color: ${x}` -> `color: fallowinterp`) parses as an identifier rather than a
34/// number / hex / color keyword, so it can never be mistaken for a design-token
35/// color and an interpolation `compute_css_analytics` cannot make valid is simply
36/// dropped by its `error_recovery: true` parse.
37const INTERP_PLACEHOLDER: &str = "fallowinterp";
38
39/// Matches the opening of a CSS-in-JS tagged template: a recognized tag
40/// (`styled.div`, `styled(Component)`, bare `css` / `keyframes` /
41/// `createGlobalStyle` / `injectGlobal`) immediately followed by a backtick. The
42/// match END is positioned at the opening backtick so the byte scanner takes over
43/// from there to find the matching close (handling `${}` and nested templates).
44static CSS_IN_JS_TAG_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45 crate::static_regex(
46 r"(?:\bstyled\.[A-Za-z_$][A-Za-z0-9_$]*|\bstyled\([^()`]*\)|\bcss|\bkeyframes|\bcreateGlobalStyle|\binjectGlobal)\s*`",
47 )
48});
49
50/// Build a virtual stylesheet from the CSS-in-JS tagged templates in a JS/TS
51/// source. Each template body is placed at its real line in the source via
52/// blank-line padding, so CSS metric line numbers from
53/// [`crate::compute_css_analytics`] map straight back onto the source. Every
54/// `${...}` interpolation is masked to a CSS-valid placeholder (newline count
55/// preserved so lines after a multi-line interpolation stay aligned). Returns
56/// `None` when the source has no CSS-in-JS tagged template, so callers skip the
57/// file entirely (no `files_analyzed` inflation).
58#[must_use]
59pub fn css_in_js_virtual_stylesheet(source: &str) -> Option<String> {
60 // Cheap pre-filter: no backtick means no tagged template at all.
61 if !source.contains('`') {
62 return None;
63 }
64
65 let bytes = source.as_bytes();
66 let mut out = String::new();
67 let mut current_line: usize = 1;
68 let mut found = false;
69 let mut search_from = 0;
70
71 while let Some(m) = CSS_IN_JS_TAG_RE.find_at(source, search_from) {
72 // The regex match ends at the opening backtick (its last byte).
73 let backtick = m.end() - 1;
74 let Some((body, after)) = scan_template_body(bytes, backtick) else {
75 // Unterminated template; stop scanning (the rest is malformed).
76 break;
77 };
78
79 // Blank-line-pad to the template body's real start line, so a metric on
80 // line N of the lifted sheet maps to line N of the source.
81 let body_start = backtick + 1;
82 let block_line = 1 + count_newlines(&source[..body_start]);
83 while current_line < block_line {
84 out.push('\n');
85 current_line += 1;
86 }
87 // Each lifted block is its own rule context. Wrapping the body in a
88 // synthetic selector keeps top-level declarations (the common
89 // `` styled.div`color: red` `` shape) inside a rule so they are counted,
90 // while a body that already contains full rules (`& { ... }`, `&:hover`)
91 // still parses under nesting. The wrapper selector occupies the body's
92 // start line; the body keeps its own lines.
93 out.push_str(WRAPPER);
94 out.push('{');
95 out.push_str(&body);
96 out.push('}');
97 current_line += count_newlines(&body);
98 found = true;
99
100 search_from = after;
101 }
102
103 found.then_some(out)
104}
105
106/// Scan a template literal whose opening backtick is at `open`. Returns the body
107/// text with every top-level `${...}` interpolation replaced by the placeholder
108/// (newline count preserved), plus the index immediately after the closing
109/// backtick. Returns `None` if the template is unterminated.
110fn scan_template_body(bytes: &[u8], open: usize) -> Option<(String, usize)> {
111 // The body is accumulated as raw bytes and converted to a `String` at the end.
112 // Every static byte (including the continuation bytes of a multi-byte UTF-8
113 // char) is copied verbatim and contiguously, and only ASCII bytes (the
114 // placeholder and newlines) are inserted at ASCII-boundary positions, so the
115 // accumulated buffer is always valid UTF-8. Pushing `byte as char` instead
116 // would Latin-1-mangle every non-ASCII char (e.g. a `content:`/`font-family`
117 // value), so byte accumulation is the correct mirror of `sfc_virtual_stylesheet`'s
118 // `&str` slicing.
119 let mut out: Vec<u8> = Vec::new();
120 let mut i = open + 1;
121 while i < bytes.len() {
122 match bytes[i] {
123 b'\\' => {
124 // Escaped char: copy the backslash and the escaped byte verbatim so
125 // an escaped backtick / `${` is not treated as a delimiter. A
126 // multi-byte escaped char's continuation bytes are picked up by the
127 // catch-all arm on the following iterations.
128 out.push(b'\\');
129 if i + 1 < bytes.len() {
130 out.push(bytes[i + 1]);
131 i += 2;
132 } else {
133 i += 1;
134 }
135 }
136 b'`' => return Some((String::from_utf8(out).unwrap_or_default(), i + 1)),
137 b'$' if i + 1 < bytes.len() && bytes[i + 1] == b'{' => {
138 let interp_end = scan_interpolation(bytes, i + 2)?;
139 // The span is bounded by ASCII `$` and the byte after `}`, so the
140 // sub-slice is always valid UTF-8. Count via the str helper to
141 // preserve newlines that lived inside nested templates/strings too.
142 let newlines =
143 count_newlines(std::str::from_utf8(&bytes[i..interp_end]).unwrap_or(""));
144 out.extend_from_slice(INTERP_PLACEHOLDER.as_bytes());
145 out.extend(std::iter::repeat_n(b'\n', newlines));
146 i = interp_end;
147 }
148 b => {
149 out.push(b);
150 i += 1;
151 }
152 }
153 }
154 None
155}
156
157/// Scan a `${...}` interpolation whose body starts at `start` (just after `{`).
158/// Returns the index immediately after the matching `}`. Handles nested braces,
159/// nested template literals (which may carry their own `${}`), and string
160/// literals so a `}` inside a string or nested template does not close early.
161fn scan_interpolation(bytes: &[u8], start: usize) -> Option<usize> {
162 let mut depth: usize = 1;
163 let mut i = start;
164 while i < bytes.len() {
165 match bytes[i] {
166 b'{' => {
167 depth += 1;
168 i += 1;
169 }
170 b'}' => {
171 depth -= 1;
172 i += 1;
173 if depth == 0 {
174 return Some(i);
175 }
176 }
177 b'`' => {
178 // Nested template literal: skip it wholesale (recurses for its
179 // own interpolations).
180 let (_, after) = scan_template_body(bytes, i)?;
181 i = after;
182 }
183 b'\'' | b'"' => {
184 i = skip_string(bytes, i)?;
185 }
186 // Skip the escaped byte; `saturating_add` guards a trailing backslash
187 // at end-of-input (the `while` guard then exits cleanly).
188 b'\\' => i = i.saturating_add(2),
189 _ => i += 1,
190 }
191 }
192 None
193}
194
195/// Skip a single- or double-quoted string whose opening quote is at `open`.
196/// Returns the index immediately after the closing quote.
197fn skip_string(bytes: &[u8], open: usize) -> Option<usize> {
198 let quote = bytes[open];
199 let mut i = open + 1;
200 while i < bytes.len() {
201 match bytes[i] {
202 b'\\' => i = i.saturating_add(2),
203 b if b == quote => return Some(i + 1),
204 _ => i += 1,
205 }
206 }
207 None
208}
209
210#[cfg(all(test, not(miri)))]
211mod tests {
212 use super::*;
213 use crate::compute_css_analytics;
214
215 #[test]
216 fn preserves_multibyte_utf8_in_lifted_body() {
217 // A non-ASCII `content:` value (2-byte, 3-byte, and 4-byte chars) must
218 // survive the lift byte-for-byte (no Latin-1 mangling), and the result
219 // stays valid UTF-8 / parseable CSS.
220 let src = "const T = styled.div`\n\
221 content: \"café 日本 €\";\n\
222 font-family: \"Ñoño\";\n\
223 `;\n";
224 let vcss = css_in_js_virtual_stylesheet(src).expect("has a styled template");
225 assert!(
226 vcss.contains("café 日本 €"),
227 "multibyte content preserved: {vcss:?}"
228 );
229 assert!(
230 vcss.contains("Ñoño"),
231 "multibyte font-family preserved: {vcss:?}"
232 );
233 // Still valid UTF-8 and parseable (no None, no panic).
234 assert!(compute_css_analytics(&vcss).is_some(), "lifted CSS parses");
235 }
236
237 #[test]
238 fn lifts_styled_component_body_to_parseable_css() {
239 let src = "import styled from 'styled-components';\n\
240 export const Button = styled.button`\n\
241 color: white;\n\
242 padding: 8px 16px;\n\
243 `;\n";
244 let vcss = css_in_js_virtual_stylesheet(src).expect("has a styled template");
245 let analytics = compute_css_analytics(&vcss).expect("masked CSS must parse, not None");
246 assert!(
247 analytics.total_declarations >= 2,
248 "styled body declarations should be counted: {analytics:?}"
249 );
250 }
251
252 #[test]
253 fn none_without_any_css_in_js_template() {
254 assert!(css_in_js_virtual_stylesheet("const x = 1; function f() {}").is_none());
255 // A plain (non-CSS-in-JS) template literal is not lifted.
256 assert!(css_in_js_virtual_stylesheet("const s = `hello ${name}`;").is_none());
257 }
258
259 #[test]
260 fn interpolation_heavy_template_does_not_return_none_or_garble() {
261 // Every value is an interpolation; masking must keep the sheet parseable
262 // and must not invent a structural finding.
263 let src = "const T = styled.div`\n\
264 color: ${theme.primary};\n\
265 padding: ${y}px;\n\
266 ${mixin};\n\
267 margin: ${a} ${b};\n\
268 `;\n";
269 let vcss = css_in_js_virtual_stylesheet(src).expect("has a styled template");
270 let analytics =
271 compute_css_analytics(&vcss).expect("interpolation-masked CSS must parse, not None");
272 // No `!important`, no id-selector, no deep nesting was authored, so no
273 // structural notable rule should be invented by the masking.
274 assert!(
275 analytics.important_declarations == 0,
276 "masking must not invent !important: {analytics:?}"
277 );
278 }
279
280 #[test]
281 fn emotion_css_and_keyframes_tags_are_lifted() {
282 let src = "import { css, keyframes } from '@emotion/react';\n\
283 const fade = keyframes`\n\
284 from { opacity: 0; }\n\
285 to { opacity: 1; }\n\
286 `;\n\
287 const box = css`\n\
288 display: flex;\n\
289 gap: 8px;\n\
290 `;\n";
291 let vcss = css_in_js_virtual_stylesheet(src).expect("has css/keyframes templates");
292 let analytics = compute_css_analytics(&vcss).expect("must parse");
293 assert!(
294 analytics.rule_count >= 1,
295 "rules should be counted: {analytics:?}"
296 );
297 }
298
299 #[test]
300 fn styled_call_form_is_lifted() {
301 let src = "const Primary = styled(Button)`\n\
302 font-weight: bold;\n\
303 `;\n";
304 let vcss = css_in_js_virtual_stylesheet(src).expect("styled(Component) is lifted");
305 assert!(vcss.contains("font-weight"), "vcss={vcss:?}");
306 }
307
308 #[test]
309 fn line_numbers_map_back_to_source() {
310 // The `color` declaration is on source line 4; the lifted sheet must keep
311 // a non-blank token on line 4 so metric line numbers map back.
312 let src = "import styled from 'styled-components';\n\
313 \n\
314 const A = styled.div`\n\
315 color: red;\n\
316 `;\n";
317 let vcss = css_in_js_virtual_stylesheet(src).expect("has a template");
318 let color_pos = vcss.find("color").expect("color present");
319 let vcss_line = 1 + vcss[..color_pos].bytes().filter(|&b| b == b'\n').count();
320 let src_color = src.find("color: red").unwrap();
321 let src_line = 1 + src[..src_color].bytes().filter(|&b| b == b'\n').count();
322 assert_eq!(vcss_line, src_line, "vcss={vcss:?}");
323 }
324
325 #[test]
326 fn nested_template_in_interpolation_does_not_break_extent() {
327 // An interpolation containing a nested template literal must not end the
328 // outer template early; the trailing `border` declaration must survive.
329 let src = "const A = styled.div`\n\
330 color: ${(p) => css`color: ${p.c}`};\n\
331 border: 1px solid black;\n\
332 `;\n";
333 let vcss = css_in_js_virtual_stylesheet(src).expect("has a template");
334 assert!(
335 vcss.contains("border"),
336 "outer template extent must include the post-interpolation decl: {vcss:?}"
337 );
338 }
339}