Skip to main content

fallow_extract/css_in_js/
template.rs

1//! CSS-in-JS template-literal lifter for the styling-health analytics pipeline
2//! (CSS program Phase 3b).
3//!
4//! Styled-components / emotion / linaria write their CSS as the body of a tagged
5//! template (`` styled.div`...` ``, `` css`...` ``, `` keyframes`...` ``). That CSS
6//! never reaches the structural analytics that `.css` files and Vue/Svelte SFC
7//! `<style>` blocks flow through, so a styled-components app gets `null` styling
8//! analytics. This module is the tagged-template analogue of
9//! [`crate::sfc_css::sfc_virtual_stylesheet`]: it lexically scans JS/TS source for
10//! CSS-in-JS tagged templates, lifts each template body into a blank-line-padded
11//! virtual stylesheet (so metric line numbers map back onto the real source line),
12//! masks every `${...}` interpolation to a CSS-valid placeholder, and returns
13//! `None` when the source has no CSS-in-JS template.
14//!
15//! It is health-time-only: it runs over file SOURCE in the engine's CSS walk, like
16//! `sfc_virtual_stylesheet` and `compute_css_analytics`, and persists nothing to
17//! the extraction cache (no `CACHE_VERSION` bump).
18//!
19//! Scope (first cut): TEMPLATE-LITERAL form only. The object form
20//! (`css({ color: 'red' })`, `styled.div({...})`) is JS-object-to-CSS
21//! serialization, a heavier and separate problem, and is deferred. `styled.div`,
22//! `styled(Component)`, bare `css` / `keyframes` / `createGlobalStyle` /
23//! `injectGlobal`, and `styled.div.attrs(...)` chains whose backtick does NOT
24//! immediately follow the tag are out of scope for the regex anchor (the
25//! `.attrs(...)` chain is a documented deferral).
26
27use std::sync::LazyLock;
28
29use super::shared::{WRAPPER, count_newlines};
30
31/// A CSS-valid identifier placeholder substituted for every `${...}`
32/// interpolation. Chosen so that a value-position interpolation
33/// (`color: ${x}` -> `color: fallowinterp`) parses as an identifier rather than a
34/// number / hex / color keyword, so it can never be mistaken for a design-token
35/// color and an interpolation `compute_css_analytics` cannot make valid is simply
36/// dropped by its `error_recovery: true` parse.
37const INTERP_PLACEHOLDER: &str = "fallowinterp";
38
39/// Matches the opening of a CSS-in-JS tagged template: a recognized tag
40/// (`styled.div`, `styled(Component)`, bare `css` / `keyframes` /
41/// `createGlobalStyle` / `injectGlobal`) immediately followed by a backtick. The
42/// match END is positioned at the opening backtick so the byte scanner takes over
43/// from there to find the matching close (handling `${}` and nested templates).
44static CSS_IN_JS_TAG_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
45    crate::static_regex(
46        r"(?:\bstyled\.[A-Za-z_$][A-Za-z0-9_$]*|\bstyled\([^()`]*\)|\bcss|\bkeyframes|\bcreateGlobalStyle|\binjectGlobal)\s*`",
47    )
48});
49
50/// Build a virtual stylesheet from the CSS-in-JS tagged templates in a JS/TS
51/// source. Each template body is placed at its real line in the source via
52/// blank-line padding, so CSS metric line numbers from
53/// [`crate::compute_css_analytics`] map straight back onto the source. Every
54/// `${...}` interpolation is masked to a CSS-valid placeholder (newline count
55/// preserved so lines after a multi-line interpolation stay aligned). Returns
56/// `None` when the source has no CSS-in-JS tagged template, so callers skip the
57/// file entirely (no `files_analyzed` inflation).
58#[must_use]
59pub fn css_in_js_virtual_stylesheet(source: &str) -> Option<String> {
60    // Cheap pre-filter: no backtick means no tagged template at all.
61    if !source.contains('`') {
62        return None;
63    }
64
65    let bytes = source.as_bytes();
66    let mut out = String::new();
67    let mut current_line: usize = 1;
68    let mut found = false;
69    let mut search_from = 0;
70
71    while let Some(m) = CSS_IN_JS_TAG_RE.find_at(source, search_from) {
72        // The regex match ends at the opening backtick (its last byte).
73        let backtick = m.end() - 1;
74        let Some((body, after)) = scan_template_body(bytes, backtick) else {
75            // Unterminated template; stop scanning (the rest is malformed).
76            break;
77        };
78
79        // Blank-line-pad to the template body's real start line, so a metric on
80        // line N of the lifted sheet maps to line N of the source.
81        let body_start = backtick + 1;
82        let block_line = 1 + count_newlines(&source[..body_start]);
83        while current_line < block_line {
84            out.push('\n');
85            current_line += 1;
86        }
87        // Each lifted block is its own rule context. Wrapping the body in a
88        // synthetic selector keeps top-level declarations (the common
89        // `` styled.div`color: red` `` shape) inside a rule so they are counted,
90        // while a body that already contains full rules (`& { ... }`, `&:hover`)
91        // still parses under nesting. The wrapper selector occupies the body's
92        // start line; the body keeps its own lines.
93        out.push_str(WRAPPER);
94        out.push('{');
95        out.push_str(&body);
96        out.push('}');
97        current_line += count_newlines(&body);
98        found = true;
99
100        search_from = after;
101    }
102
103    found.then_some(out)
104}
105
106/// Scan a template literal whose opening backtick is at `open`. Returns the body
107/// text with every top-level `${...}` interpolation replaced by the placeholder
108/// (newline count preserved), plus the index immediately after the closing
109/// backtick. Returns `None` if the template is unterminated.
110fn scan_template_body(bytes: &[u8], open: usize) -> Option<(String, usize)> {
111    // The body is accumulated as raw bytes and converted to a `String` at the end.
112    // Every static byte (including the continuation bytes of a multi-byte UTF-8
113    // char) is copied verbatim and contiguously, and only ASCII bytes (the
114    // placeholder and newlines) are inserted at ASCII-boundary positions, so the
115    // accumulated buffer is always valid UTF-8. Pushing `byte as char` instead
116    // would Latin-1-mangle every non-ASCII char (e.g. a `content:`/`font-family`
117    // value), so byte accumulation is the correct mirror of `sfc_virtual_stylesheet`'s
118    // `&str` slicing.
119    let mut out: Vec<u8> = Vec::new();
120    let mut i = open + 1;
121    while i < bytes.len() {
122        match bytes[i] {
123            b'\\' => {
124                // Escaped char: copy the backslash and the escaped byte verbatim so
125                // an escaped backtick / `${` is not treated as a delimiter. A
126                // multi-byte escaped char's continuation bytes are picked up by the
127                // catch-all arm on the following iterations.
128                out.push(b'\\');
129                if i + 1 < bytes.len() {
130                    out.push(bytes[i + 1]);
131                    i += 2;
132                } else {
133                    i += 1;
134                }
135            }
136            b'`' => return Some((String::from_utf8(out).unwrap_or_default(), i + 1)),
137            b'$' if i + 1 < bytes.len() && bytes[i + 1] == b'{' => {
138                let interp_end = scan_interpolation(bytes, i + 2)?;
139                // The span is bounded by ASCII `$` and the byte after `}`, so the
140                // sub-slice is always valid UTF-8. Count via the str helper to
141                // preserve newlines that lived inside nested templates/strings too.
142                let newlines =
143                    count_newlines(std::str::from_utf8(&bytes[i..interp_end]).unwrap_or(""));
144                out.extend_from_slice(INTERP_PLACEHOLDER.as_bytes());
145                out.extend(std::iter::repeat_n(b'\n', newlines));
146                i = interp_end;
147            }
148            b => {
149                out.push(b);
150                i += 1;
151            }
152        }
153    }
154    None
155}
156
157/// Scan a `${...}` interpolation whose body starts at `start` (just after `{`).
158/// Returns the index immediately after the matching `}`. Handles nested braces,
159/// nested template literals (which may carry their own `${}`), and string
160/// literals so a `}` inside a string or nested template does not close early.
161fn scan_interpolation(bytes: &[u8], start: usize) -> Option<usize> {
162    let mut depth: usize = 1;
163    let mut i = start;
164    while i < bytes.len() {
165        match bytes[i] {
166            b'{' => {
167                depth += 1;
168                i += 1;
169            }
170            b'}' => {
171                depth -= 1;
172                i += 1;
173                if depth == 0 {
174                    return Some(i);
175                }
176            }
177            b'`' => {
178                // Nested template literal: skip it wholesale (recurses for its
179                // own interpolations).
180                let (_, after) = scan_template_body(bytes, i)?;
181                i = after;
182            }
183            b'\'' | b'"' => {
184                i = skip_string(bytes, i)?;
185            }
186            // Skip the escaped byte; `saturating_add` guards a trailing backslash
187            // at end-of-input (the `while` guard then exits cleanly).
188            b'\\' => i = i.saturating_add(2),
189            _ => i += 1,
190        }
191    }
192    None
193}
194
195/// Skip a single- or double-quoted string whose opening quote is at `open`.
196/// Returns the index immediately after the closing quote.
197fn skip_string(bytes: &[u8], open: usize) -> Option<usize> {
198    let quote = bytes[open];
199    let mut i = open + 1;
200    while i < bytes.len() {
201        match bytes[i] {
202            b'\\' => i = i.saturating_add(2),
203            b if b == quote => return Some(i + 1),
204            _ => i += 1,
205        }
206    }
207    None
208}
209
210#[cfg(all(test, not(miri)))]
211mod tests {
212    use super::*;
213    use crate::compute_css_analytics;
214
215    #[test]
216    fn preserves_multibyte_utf8_in_lifted_body() {
217        // A non-ASCII `content:` value (2-byte, 3-byte, and 4-byte chars) must
218        // survive the lift byte-for-byte (no Latin-1 mangling), and the result
219        // stays valid UTF-8 / parseable CSS.
220        let src = "const T = styled.div`\n\
221                   content: \"café 日本 €\";\n\
222                   font-family: \"Ñoño\";\n\
223                   `;\n";
224        let vcss = css_in_js_virtual_stylesheet(src).expect("has a styled template");
225        assert!(
226            vcss.contains("café 日本 €"),
227            "multibyte content preserved: {vcss:?}"
228        );
229        assert!(
230            vcss.contains("Ñoño"),
231            "multibyte font-family preserved: {vcss:?}"
232        );
233        // Still valid UTF-8 and parseable (no None, no panic).
234        assert!(compute_css_analytics(&vcss).is_some(), "lifted CSS parses");
235    }
236
237    #[test]
238    fn lifts_styled_component_body_to_parseable_css() {
239        let src = "import styled from 'styled-components';\n\
240                   export const Button = styled.button`\n\
241                   color: white;\n\
242                   padding: 8px 16px;\n\
243                   `;\n";
244        let vcss = css_in_js_virtual_stylesheet(src).expect("has a styled template");
245        let analytics = compute_css_analytics(&vcss).expect("masked CSS must parse, not None");
246        assert!(
247            analytics.total_declarations >= 2,
248            "styled body declarations should be counted: {analytics:?}"
249        );
250    }
251
252    #[test]
253    fn none_without_any_css_in_js_template() {
254        assert!(css_in_js_virtual_stylesheet("const x = 1; function f() {}").is_none());
255        // A plain (non-CSS-in-JS) template literal is not lifted.
256        assert!(css_in_js_virtual_stylesheet("const s = `hello ${name}`;").is_none());
257    }
258
259    #[test]
260    fn interpolation_heavy_template_does_not_return_none_or_garble() {
261        // Every value is an interpolation; masking must keep the sheet parseable
262        // and must not invent a structural finding.
263        let src = "const T = styled.div`\n\
264                   color: ${theme.primary};\n\
265                   padding: ${y}px;\n\
266                   ${mixin};\n\
267                   margin: ${a} ${b};\n\
268                   `;\n";
269        let vcss = css_in_js_virtual_stylesheet(src).expect("has a styled template");
270        let analytics =
271            compute_css_analytics(&vcss).expect("interpolation-masked CSS must parse, not None");
272        // No `!important`, no id-selector, no deep nesting was authored, so no
273        // structural notable rule should be invented by the masking.
274        assert!(
275            analytics.important_declarations == 0,
276            "masking must not invent !important: {analytics:?}"
277        );
278    }
279
280    #[test]
281    fn emotion_css_and_keyframes_tags_are_lifted() {
282        let src = "import { css, keyframes } from '@emotion/react';\n\
283                   const fade = keyframes`\n\
284                   from { opacity: 0; }\n\
285                   to { opacity: 1; }\n\
286                   `;\n\
287                   const box = css`\n\
288                   display: flex;\n\
289                   gap: 8px;\n\
290                   `;\n";
291        let vcss = css_in_js_virtual_stylesheet(src).expect("has css/keyframes templates");
292        let analytics = compute_css_analytics(&vcss).expect("must parse");
293        assert!(
294            analytics.rule_count >= 1,
295            "rules should be counted: {analytics:?}"
296        );
297    }
298
299    #[test]
300    fn styled_call_form_is_lifted() {
301        let src = "const Primary = styled(Button)`\n\
302                   font-weight: bold;\n\
303                   `;\n";
304        let vcss = css_in_js_virtual_stylesheet(src).expect("styled(Component) is lifted");
305        assert!(vcss.contains("font-weight"), "vcss={vcss:?}");
306    }
307
308    #[test]
309    fn line_numbers_map_back_to_source() {
310        // The `color` declaration is on source line 4; the lifted sheet must keep
311        // a non-blank token on line 4 so metric line numbers map back.
312        let src = "import styled from 'styled-components';\n\
313                   \n\
314                   const A = styled.div`\n\
315                   color: red;\n\
316                   `;\n";
317        let vcss = css_in_js_virtual_stylesheet(src).expect("has a template");
318        let color_pos = vcss.find("color").expect("color present");
319        let vcss_line = 1 + vcss[..color_pos].bytes().filter(|&b| b == b'\n').count();
320        let src_color = src.find("color: red").unwrap();
321        let src_line = 1 + src[..src_color].bytes().filter(|&b| b == b'\n').count();
322        assert_eq!(vcss_line, src_line, "vcss={vcss:?}");
323    }
324
325    #[test]
326    fn nested_template_in_interpolation_does_not_break_extent() {
327        // An interpolation containing a nested template literal must not end the
328        // outer template early; the trailing `border` declaration must survive.
329        let src = "const A = styled.div`\n\
330                   color: ${(p) => css`color: ${p.c}`};\n\
331                   border: 1px solid black;\n\
332                   `;\n";
333        let vcss = css_in_js_virtual_stylesheet(src).expect("has a template");
334        assert!(
335            vcss.contains("border"),
336            "outer template extent must include the post-interpolation decl: {vcss:?}"
337        );
338    }
339}