Skip to main content

fallow_extract/
sfc.rs

1//! Vue/Svelte Single File Component (SFC) script extraction.
2//!
3//! Extracts `<script>` block content from `.vue` and `.svelte` files using regex,
4//! handling `lang`, `src`, and `generic` attributes, and filtering HTML comments.
5
6use std::path::Path;
7use std::sync::LazyLock;
8
9use oxc_allocator::Allocator;
10use oxc_ast_visit::Visit;
11use oxc_parser::Parser;
12use oxc_span::SourceType;
13use rustc_hash::FxHashSet;
14
15use crate::asset_url::normalize_asset_url;
16use crate::parse::compute_unused_import_bindings;
17use crate::sfc_template::{SfcKind, collect_template_usage};
18use crate::visitor::ModuleInfoExtractor;
19use crate::{ImportInfo, ImportedName, ModuleInfo};
20use fallow_types::discover::FileId;
21use oxc_span::Span;
22
23/// Regex to extract `<script>` block content from Vue/Svelte SFCs.
24/// The attrs pattern handles `>` inside quoted attribute values (e.g., `generic="T extends Foo<Bar>"`).
25static SCRIPT_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
26    regex::Regex::new(
27        r#"(?is)<script\b(?P<attrs>(?:[^>"']|"[^"]*"|'[^']*')*)>(?P<body>[\s\S]*?)</script>"#,
28    )
29    .expect("valid regex")
30});
31
32/// Regex to extract the `lang` attribute value from a script tag.
33static LANG_ATTR_RE: LazyLock<regex::Regex> =
34    LazyLock::new(|| regex::Regex::new(r#"lang\s*=\s*["'](\w+)["']"#).expect("valid regex"));
35
36/// Regex to extract the `src` attribute value from a script tag.
37/// Requires whitespace (or start of string) before `src` to avoid matching `data-src` etc.
38static SRC_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
39    regex::Regex::new(r#"(?:^|\s)src\s*=\s*["']([^"']+)["']"#).expect("valid regex")
40});
41
42/// Regex to detect Vue's bare `setup` attribute.
43static SETUP_ATTR_RE: LazyLock<regex::Regex> =
44    LazyLock::new(|| regex::Regex::new(r"(?:^|\s)setup(?:\s|$)").expect("valid regex"));
45
46/// Regex to detect Svelte's `context="module"` attribute.
47static CONTEXT_MODULE_ATTR_RE: LazyLock<regex::Regex> =
48    LazyLock::new(|| regex::Regex::new(r#"context\s*=\s*["']module["']"#).expect("valid regex"));
49
50/// Regex to match HTML comments for filtering script blocks inside comments.
51static HTML_COMMENT_RE: LazyLock<regex::Regex> =
52    LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
53
54/// An extracted `<script>` block from a Vue or Svelte SFC.
55pub struct SfcScript {
56    /// The script body text.
57    pub body: String,
58    /// Whether the script uses TypeScript (`lang="ts"` or `lang="tsx"`).
59    pub is_typescript: bool,
60    /// Whether the script uses JSX syntax (`lang="tsx"` or `lang="jsx"`).
61    pub is_jsx: bool,
62    /// Byte offset of the script body within the full SFC source.
63    pub byte_offset: usize,
64    /// External script source path from `src` attribute.
65    pub src: Option<String>,
66    /// Whether this script is a Vue `<script setup>` block.
67    pub is_setup: bool,
68    /// Whether this script is a Svelte module-context block.
69    pub is_context_module: bool,
70}
71
72/// Extract all `<script>` blocks from a Vue/Svelte SFC source string.
73pub fn extract_sfc_scripts(source: &str) -> Vec<SfcScript> {
74    // Build HTML comment ranges to filter out <script> blocks inside comments.
75    // Using ranges instead of source replacement avoids corrupting script body content
76    // (e.g., string literals containing "<!--" would be destroyed by replacement).
77    let comment_ranges: Vec<(usize, usize)> = HTML_COMMENT_RE
78        .find_iter(source)
79        .map(|m| (m.start(), m.end()))
80        .collect();
81
82    SCRIPT_BLOCK_RE
83        .captures_iter(source)
84        .filter(|cap| {
85            let start = cap.get(0).map_or(0, |m| m.start());
86            !comment_ranges
87                .iter()
88                .any(|&(cs, ce)| start >= cs && start < ce)
89        })
90        .map(|cap| {
91            let attrs = cap.name("attrs").map_or("", |m| m.as_str());
92            let body_match = cap.name("body");
93            let byte_offset = body_match.map_or(0, |m| m.start());
94            let body = body_match.map_or("", |m| m.as_str()).to_string();
95            let lang = LANG_ATTR_RE
96                .captures(attrs)
97                .and_then(|c| c.get(1))
98                .map(|m| m.as_str());
99            let is_typescript = matches!(lang, Some("ts" | "tsx"));
100            let is_jsx = matches!(lang, Some("tsx" | "jsx"));
101            let src = SRC_ATTR_RE
102                .captures(attrs)
103                .and_then(|c| c.get(1))
104                .map(|m| m.as_str().to_string());
105            let is_setup = SETUP_ATTR_RE.is_match(attrs);
106            let is_context_module = CONTEXT_MODULE_ATTR_RE.is_match(attrs);
107            SfcScript {
108                body,
109                is_typescript,
110                is_jsx,
111                byte_offset,
112                src,
113                is_setup,
114                is_context_module,
115            }
116        })
117        .collect()
118}
119
120/// Check if a file path is a Vue or Svelte SFC (`.vue` or `.svelte`).
121#[must_use]
122pub fn is_sfc_file(path: &Path) -> bool {
123    path.extension()
124        .and_then(|e| e.to_str())
125        .is_some_and(|ext| ext == "vue" || ext == "svelte")
126}
127
128/// Parse an SFC file by extracting and combining all `<script>` blocks.
129pub(crate) fn parse_sfc_to_module(
130    file_id: FileId,
131    path: &Path,
132    source: &str,
133    content_hash: u64,
134) -> ModuleInfo {
135    let scripts = extract_sfc_scripts(source);
136    let kind = sfc_kind(path);
137    let mut combined = empty_sfc_module(file_id, source, content_hash);
138    let mut template_visible_imports: FxHashSet<String> = FxHashSet::default();
139
140    for script in &scripts {
141        merge_script_into_module(kind, script, &mut combined, &mut template_visible_imports);
142    }
143
144    apply_template_usage(kind, source, &template_visible_imports, &mut combined);
145    combined.unused_import_bindings.sort_unstable();
146    combined.unused_import_bindings.dedup();
147
148    combined
149}
150
151fn sfc_kind(path: &Path) -> SfcKind {
152    if path.extension().and_then(|ext| ext.to_str()) == Some("vue") {
153        SfcKind::Vue
154    } else {
155        SfcKind::Svelte
156    }
157}
158
159fn empty_sfc_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
160    // For SFC files, use string scanning for suppression comments since script block
161    // byte offsets don't correspond to the original file positions.
162    let suppressions = crate::suppress::parse_suppressions_from_source(source);
163
164    ModuleInfo {
165        file_id,
166        exports: Vec::new(),
167        imports: Vec::new(),
168        re_exports: Vec::new(),
169        dynamic_imports: Vec::new(),
170        dynamic_import_patterns: Vec::new(),
171        require_calls: Vec::new(),
172        member_accesses: Vec::new(),
173        whole_object_uses: Vec::new(),
174        has_cjs_exports: false,
175        content_hash,
176        suppressions,
177        unused_import_bindings: Vec::new(),
178        line_offsets: fallow_types::extract::compute_line_offsets(source),
179        complexity: Vec::new(),
180        flag_uses: Vec::new(),
181    }
182}
183
184fn merge_script_into_module(
185    kind: SfcKind,
186    script: &SfcScript,
187    combined: &mut ModuleInfo,
188    template_visible_imports: &mut FxHashSet<String>,
189) {
190    if let Some(src) = &script.src {
191        add_script_src_import(combined, src);
192    }
193
194    let allocator = Allocator::default();
195    let parser_return =
196        Parser::new(&allocator, &script.body, source_type_for_script(script)).parse();
197    let mut extractor = ModuleInfoExtractor::new();
198    extractor.visit_program(&parser_return.program);
199
200    let unused_import_bindings =
201        compute_unused_import_bindings(&parser_return.program, &extractor.imports);
202    combined
203        .unused_import_bindings
204        .extend(unused_import_bindings.iter().cloned());
205
206    if is_template_visible_script(kind, script) {
207        template_visible_imports.extend(
208            extractor
209                .imports
210                .iter()
211                .filter(|import| !import.local_name.is_empty())
212                .map(|import| import.local_name.clone()),
213        );
214    }
215
216    extractor.merge_into(combined);
217}
218
219fn add_script_src_import(module: &mut ModuleInfo, source: &str) {
220    // Normalize bare filenames (e.g., `<script src="logic.ts">`) so the
221    // resolver treats them as file-relative references, not npm packages.
222    module.imports.push(ImportInfo {
223        source: normalize_asset_url(source),
224        imported_name: ImportedName::SideEffect,
225        local_name: String::new(),
226        is_type_only: false,
227        span: Span::default(),
228        source_span: Span::default(),
229    });
230}
231
232fn source_type_for_script(script: &SfcScript) -> SourceType {
233    match (script.is_typescript, script.is_jsx) {
234        (true, true) => SourceType::tsx(),
235        (true, false) => SourceType::ts(),
236        (false, true) => SourceType::jsx(),
237        (false, false) => SourceType::mjs(),
238    }
239}
240
241fn apply_template_usage(
242    kind: SfcKind,
243    source: &str,
244    template_visible_imports: &FxHashSet<String>,
245    combined: &mut ModuleInfo,
246) {
247    if template_visible_imports.is_empty() {
248        return;
249    }
250
251    let template_usage = collect_template_usage(kind, source, template_visible_imports);
252    combined
253        .unused_import_bindings
254        .retain(|binding| !template_usage.used_bindings.contains(binding));
255    combined
256        .member_accesses
257        .extend(template_usage.member_accesses);
258    combined
259        .whole_object_uses
260        .extend(template_usage.whole_object_uses);
261}
262
263fn is_template_visible_script(kind: SfcKind, script: &SfcScript) -> bool {
264    match kind {
265        SfcKind::Vue => script.is_setup,
266        SfcKind::Svelte => !script.is_context_module,
267    }
268}
269
270// SFC tests exercise regex-based HTML string extraction — no unsafe code,
271// no Miri-specific value. Oxc parser tests are additionally ~1000x slower.
272#[cfg(all(test, not(miri)))]
273mod tests {
274    use super::*;
275
276    // ── is_sfc_file ──────────────────────────────────────────────
277
278    #[test]
279    fn is_sfc_file_vue() {
280        assert!(is_sfc_file(Path::new("App.vue")));
281    }
282
283    #[test]
284    fn is_sfc_file_svelte() {
285        assert!(is_sfc_file(Path::new("Counter.svelte")));
286    }
287
288    #[test]
289    fn is_sfc_file_rejects_ts() {
290        assert!(!is_sfc_file(Path::new("utils.ts")));
291    }
292
293    #[test]
294    fn is_sfc_file_rejects_jsx() {
295        assert!(!is_sfc_file(Path::new("App.jsx")));
296    }
297
298    #[test]
299    fn is_sfc_file_rejects_astro() {
300        assert!(!is_sfc_file(Path::new("Layout.astro")));
301    }
302
303    // ── extract_sfc_scripts: single script block ─────────────────
304
305    #[test]
306    fn single_plain_script() {
307        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
308        assert_eq!(scripts.len(), 1);
309        assert_eq!(scripts[0].body, "const x = 1;");
310        assert!(!scripts[0].is_typescript);
311        assert!(!scripts[0].is_jsx);
312        assert!(scripts[0].src.is_none());
313    }
314
315    #[test]
316    fn single_ts_script() {
317        let scripts = extract_sfc_scripts(r#"<script lang="ts">const x: number = 1;</script>"#);
318        assert_eq!(scripts.len(), 1);
319        assert!(scripts[0].is_typescript);
320        assert!(!scripts[0].is_jsx);
321    }
322
323    #[test]
324    fn single_tsx_script() {
325        let scripts = extract_sfc_scripts(r#"<script lang="tsx">const el = <div />;</script>"#);
326        assert_eq!(scripts.len(), 1);
327        assert!(scripts[0].is_typescript);
328        assert!(scripts[0].is_jsx);
329    }
330
331    #[test]
332    fn single_jsx_script() {
333        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
334        assert_eq!(scripts.len(), 1);
335        assert!(!scripts[0].is_typescript);
336        assert!(scripts[0].is_jsx);
337    }
338
339    // ── Multiple script blocks ───────────────────────────────────
340
341    #[test]
342    fn two_script_blocks() {
343        let source = r#"
344<script lang="ts">
345export default {};
346</script>
347<script setup lang="ts">
348const count = 0;
349</script>
350"#;
351        let scripts = extract_sfc_scripts(source);
352        assert_eq!(scripts.len(), 2);
353        assert!(scripts[0].body.contains("export default"));
354        assert!(scripts[1].body.contains("count"));
355    }
356
357    // ── <script setup> ───────────────────────────────────────────
358
359    #[test]
360    fn script_setup_extracted() {
361        let scripts =
362            extract_sfc_scripts(r#"<script setup lang="ts">import { ref } from 'vue';</script>"#);
363        assert_eq!(scripts.len(), 1);
364        assert!(scripts[0].body.contains("import"));
365        assert!(scripts[0].is_typescript);
366    }
367
368    // ── <script src="..."> external script ───────────────────────
369
370    #[test]
371    fn script_src_detected() {
372        let scripts = extract_sfc_scripts(r#"<script src="./component.ts" lang="ts"></script>"#);
373        assert_eq!(scripts.len(), 1);
374        assert_eq!(scripts[0].src.as_deref(), Some("./component.ts"));
375    }
376
377    #[test]
378    fn data_src_not_treated_as_src() {
379        let scripts =
380            extract_sfc_scripts(r#"<script lang="ts" data-src="./nope.ts">const x = 1;</script>"#);
381        assert_eq!(scripts.len(), 1);
382        assert!(scripts[0].src.is_none());
383    }
384
385    // ── HTML comment filtering ───────────────────────────────────
386
387    #[test]
388    fn script_inside_html_comment_filtered() {
389        let source = r#"
390<!-- <script lang="ts">import { bad } from 'bad';</script> -->
391<script lang="ts">import { good } from 'good';</script>
392"#;
393        let scripts = extract_sfc_scripts(source);
394        assert_eq!(scripts.len(), 1);
395        assert!(scripts[0].body.contains("good"));
396    }
397
398    #[test]
399    fn spanning_comment_filters_script() {
400        let source = r#"
401<!-- disabled:
402<script lang="ts">import { bad } from 'bad';</script>
403-->
404<script lang="ts">const ok = true;</script>
405"#;
406        let scripts = extract_sfc_scripts(source);
407        assert_eq!(scripts.len(), 1);
408        assert!(scripts[0].body.contains("ok"));
409    }
410
411    #[test]
412    fn string_containing_comment_markers_not_corrupted() {
413        // A string in the script body containing <!-- should not cause filtering issues
414        let source = r#"
415<script setup lang="ts">
416const marker = "<!-- not a comment -->";
417import { ref } from 'vue';
418</script>
419"#;
420        let scripts = extract_sfc_scripts(source);
421        assert_eq!(scripts.len(), 1);
422        assert!(scripts[0].body.contains("import"));
423    }
424
425    // ── Generic attributes with > in quoted values ───────────────
426
427    #[test]
428    fn generic_attr_with_angle_bracket() {
429        let source =
430            r#"<script setup lang="ts" generic="T extends Foo<Bar>">const x = 1;</script>"#;
431        let scripts = extract_sfc_scripts(source);
432        assert_eq!(scripts.len(), 1);
433        assert_eq!(scripts[0].body, "const x = 1;");
434    }
435
436    #[test]
437    fn nested_generic_attr() {
438        let source = r#"<script setup lang="ts" generic="T extends Map<string, Set<number>>">const x = 1;</script>"#;
439        let scripts = extract_sfc_scripts(source);
440        assert_eq!(scripts.len(), 1);
441        assert_eq!(scripts[0].body, "const x = 1;");
442    }
443
444    // ── lang attribute with single quotes ────────────────────────
445
446    #[test]
447    fn lang_single_quoted() {
448        let scripts = extract_sfc_scripts("<script lang='ts'>const x = 1;</script>");
449        assert_eq!(scripts.len(), 1);
450        assert!(scripts[0].is_typescript);
451    }
452
453    // ── Case-insensitive matching ────────────────────────────────
454
455    #[test]
456    fn uppercase_script_tag() {
457        let scripts = extract_sfc_scripts(r#"<SCRIPT lang="ts">const x = 1;</SCRIPT>"#);
458        assert_eq!(scripts.len(), 1);
459        assert!(scripts[0].is_typescript);
460    }
461
462    // ── Edge cases ───────────────────────────────────────────────
463
464    #[test]
465    fn no_script_block() {
466        let scripts = extract_sfc_scripts("<template><div>Hello</div></template>");
467        assert!(scripts.is_empty());
468    }
469
470    #[test]
471    fn empty_script_body() {
472        let scripts = extract_sfc_scripts(r#"<script lang="ts"></script>"#);
473        assert_eq!(scripts.len(), 1);
474        assert!(scripts[0].body.is_empty());
475    }
476
477    #[test]
478    fn whitespace_only_script() {
479        let scripts = extract_sfc_scripts("<script lang=\"ts\">\n  \n</script>");
480        assert_eq!(scripts.len(), 1);
481        assert!(scripts[0].body.trim().is_empty());
482    }
483
484    #[test]
485    fn byte_offset_is_set() {
486        let source = r#"<template><div/></template><script lang="ts">code</script>"#;
487        let scripts = extract_sfc_scripts(source);
488        assert_eq!(scripts.len(), 1);
489        // The byte_offset should point to where "code" starts in the source
490        let offset = scripts[0].byte_offset;
491        assert_eq!(&source[offset..offset + 4], "code");
492    }
493
494    #[test]
495    fn script_with_extra_attributes() {
496        let scripts = extract_sfc_scripts(
497            r#"<script lang="ts" id="app" type="module" data-custom="val">const x = 1;</script>"#,
498        );
499        assert_eq!(scripts.len(), 1);
500        assert!(scripts[0].is_typescript);
501        assert!(scripts[0].src.is_none());
502    }
503
504    // ── Full parse tests (Oxc parser ~1000x slower under Miri) ──
505
506    #[test]
507    fn multiple_script_blocks_exports_combined() {
508        let source = r#"
509<script lang="ts">
510export const version = '1.0';
511</script>
512<script setup lang="ts">
513import { ref } from 'vue';
514const count = ref(0);
515</script>
516"#;
517        let info = parse_sfc_to_module(FileId(0), Path::new("Dual.vue"), source, 0);
518        // The non-setup block exports `version`
519        assert!(
520            info.exports
521                .iter()
522                .any(|e| matches!(&e.name, crate::ExportName::Named(n) if n == "version")),
523            "export from <script> block should be extracted"
524        );
525        // The setup block imports `ref` from 'vue'
526        assert!(
527            info.imports.iter().any(|i| i.source == "vue"),
528            "import from <script setup> block should be extracted"
529        );
530    }
531
532    // ── lang="tsx" detection ────────────────────────────────────
533
534    #[test]
535    fn lang_tsx_detected_as_typescript_jsx() {
536        let scripts =
537            extract_sfc_scripts(r#"<script lang="tsx">const el = <div>{x}</div>;</script>"#);
538        assert_eq!(scripts.len(), 1);
539        assert!(scripts[0].is_typescript, "lang=tsx should be typescript");
540        assert!(scripts[0].is_jsx, "lang=tsx should be jsx");
541    }
542
543    // ── HTML comment filtering of script blocks ─────────────────
544
545    #[test]
546    fn multiline_html_comment_filters_all_script_blocks_inside() {
547        let source = r#"
548<!--
549  This whole section is disabled:
550  <script lang="ts">import { bad1 } from 'bad1';</script>
551  <script lang="ts">import { bad2 } from 'bad2';</script>
552-->
553<script lang="ts">import { good } from 'good';</script>
554"#;
555        let scripts = extract_sfc_scripts(source);
556        assert_eq!(scripts.len(), 1);
557        assert!(scripts[0].body.contains("good"));
558    }
559
560    // ── <script src="..."> generates side-effect import ─────────
561
562    #[test]
563    fn script_src_generates_side_effect_import() {
564        let info = parse_sfc_to_module(
565            FileId(0),
566            Path::new("External.vue"),
567            r#"<script src="./external-logic.ts" lang="ts"></script>"#,
568            0,
569        );
570        assert!(
571            info.imports
572                .iter()
573                .any(|i| i.source == "./external-logic.ts"
574                    && matches!(i.imported_name, ImportedName::SideEffect)),
575            "script src should generate a side-effect import"
576        );
577    }
578
579    // ── Additional coverage ─────────────────────────────────────
580
581    #[test]
582    fn parse_sfc_no_script_returns_empty_module() {
583        let info = parse_sfc_to_module(
584            FileId(0),
585            Path::new("Empty.vue"),
586            "<template><div>Hello</div></template>",
587            42,
588        );
589        assert!(info.imports.is_empty());
590        assert!(info.exports.is_empty());
591        assert_eq!(info.content_hash, 42);
592        assert_eq!(info.file_id, FileId(0));
593    }
594
595    #[test]
596    fn parse_sfc_has_line_offsets() {
597        let info = parse_sfc_to_module(
598            FileId(0),
599            Path::new("LineOffsets.vue"),
600            r#"<script lang="ts">const x = 1;</script>"#,
601            0,
602        );
603        assert!(!info.line_offsets.is_empty());
604    }
605
606    #[test]
607    fn parse_sfc_has_suppressions() {
608        let info = parse_sfc_to_module(
609            FileId(0),
610            Path::new("Suppressions.vue"),
611            r#"<script lang="ts">
612// fallow-ignore-file
613export const foo = 1;
614</script>"#,
615            0,
616        );
617        assert!(!info.suppressions.is_empty());
618    }
619
620    #[test]
621    fn source_type_jsx_detection() {
622        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
623        assert_eq!(scripts.len(), 1);
624        assert!(!scripts[0].is_typescript);
625        assert!(scripts[0].is_jsx);
626    }
627
628    #[test]
629    fn source_type_plain_js_detection() {
630        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
631        assert_eq!(scripts.len(), 1);
632        assert!(!scripts[0].is_typescript);
633        assert!(!scripts[0].is_jsx);
634    }
635
636    #[test]
637    fn is_sfc_file_rejects_no_extension() {
638        assert!(!is_sfc_file(Path::new("Makefile")));
639    }
640
641    #[test]
642    fn is_sfc_file_rejects_mdx() {
643        assert!(!is_sfc_file(Path::new("post.mdx")));
644    }
645
646    #[test]
647    fn is_sfc_file_rejects_css() {
648        assert!(!is_sfc_file(Path::new("styles.css")));
649    }
650
651    #[test]
652    fn multiple_script_blocks_both_have_offsets() {
653        let source = r#"<script lang="ts">const a = 1;</script>
654<script setup lang="ts">const b = 2;</script>"#;
655        let scripts = extract_sfc_scripts(source);
656        assert_eq!(scripts.len(), 2);
657        // Both scripts should have valid byte offsets
658        let offset0 = scripts[0].byte_offset;
659        let offset1 = scripts[1].byte_offset;
660        assert_eq!(
661            &source[offset0..offset0 + "const a = 1;".len()],
662            "const a = 1;"
663        );
664        assert_eq!(
665            &source[offset1..offset1 + "const b = 2;".len()],
666            "const b = 2;"
667        );
668    }
669
670    #[test]
671    fn script_with_src_and_lang() {
672        // src + lang should both be detected
673        let scripts = extract_sfc_scripts(r#"<script src="./logic.ts" lang="tsx"></script>"#);
674        assert_eq!(scripts.len(), 1);
675        assert_eq!(scripts[0].src.as_deref(), Some("./logic.ts"));
676        assert!(scripts[0].is_typescript);
677        assert!(scripts[0].is_jsx);
678    }
679}