Skip to main content

fallow_extract/
sfc.rs

1//! Vue/Svelte Single File Component (SFC) script extraction.
2//!
3//! Extracts `<script>` block content from `.vue` and `.svelte` files using regex,
4//! handling `lang`, `src`, and `generic` attributes, and filtering HTML comments.
5
6use std::path::Path;
7use std::sync::LazyLock;
8
9use oxc_allocator::Allocator;
10use oxc_ast_visit::Visit;
11use oxc_parser::Parser;
12use oxc_span::SourceType;
13use rustc_hash::FxHashSet;
14
15use crate::asset_url::normalize_asset_url;
16use crate::parse::compute_unused_import_bindings;
17use crate::sfc_template::{SfcKind, collect_template_usage};
18use crate::visitor::ModuleInfoExtractor;
19use crate::{ImportInfo, ImportedName, ModuleInfo};
20use fallow_types::discover::FileId;
21use oxc_span::Span;
22
23/// Regex to extract `<script>` block content from Vue/Svelte SFCs.
24/// The attrs pattern handles `>` inside quoted attribute values (e.g., `generic="T extends Foo<Bar>"`).
25static SCRIPT_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
26    regex::Regex::new(
27        r#"(?is)<script\b(?P<attrs>(?:[^>"']|"[^"]*"|'[^']*')*)>(?P<body>[\s\S]*?)</script>"#,
28    )
29    .expect("valid regex")
30});
31
32/// Regex to extract the `lang` attribute value from a script tag.
33static LANG_ATTR_RE: LazyLock<regex::Regex> =
34    LazyLock::new(|| regex::Regex::new(r#"lang\s*=\s*["'](\w+)["']"#).expect("valid regex"));
35
36/// Regex to extract the `src` attribute value from a script tag.
37/// Requires whitespace (or start of string) before `src` to avoid matching `data-src` etc.
38static SRC_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
39    regex::Regex::new(r#"(?:^|\s)src\s*=\s*["']([^"']+)["']"#).expect("valid regex")
40});
41
42/// Regex to detect Vue's bare `setup` attribute.
43static SETUP_ATTR_RE: LazyLock<regex::Regex> =
44    LazyLock::new(|| regex::Regex::new(r"(?:^|\s)setup(?:\s|$)").expect("valid regex"));
45
46/// Regex to detect Svelte's `context="module"` attribute.
47static CONTEXT_MODULE_ATTR_RE: LazyLock<regex::Regex> =
48    LazyLock::new(|| regex::Regex::new(r#"context\s*=\s*["']module["']"#).expect("valid regex"));
49
50/// Regex to match HTML comments for filtering script blocks inside comments.
51static HTML_COMMENT_RE: LazyLock<regex::Regex> =
52    LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
53
54/// An extracted `<script>` block from a Vue or Svelte SFC.
55pub struct SfcScript {
56    /// The script body text.
57    pub body: String,
58    /// Whether the script uses TypeScript (`lang="ts"` or `lang="tsx"`).
59    pub is_typescript: bool,
60    /// Whether the script uses JSX syntax (`lang="tsx"` or `lang="jsx"`).
61    pub is_jsx: bool,
62    /// Byte offset of the script body within the full SFC source.
63    pub byte_offset: usize,
64    /// External script source path from `src` attribute.
65    pub src: Option<String>,
66    /// Whether this script is a Vue `<script setup>` block.
67    pub is_setup: bool,
68    /// Whether this script is a Svelte module-context block.
69    pub is_context_module: bool,
70}
71
72/// Extract all `<script>` blocks from a Vue/Svelte SFC source string.
73pub fn extract_sfc_scripts(source: &str) -> Vec<SfcScript> {
74    // Build HTML comment ranges to filter out <script> blocks inside comments.
75    // Using ranges instead of source replacement avoids corrupting script body content
76    // (e.g., string literals containing "<!--" would be destroyed by replacement).
77    let comment_ranges: Vec<(usize, usize)> = HTML_COMMENT_RE
78        .find_iter(source)
79        .map(|m| (m.start(), m.end()))
80        .collect();
81
82    SCRIPT_BLOCK_RE
83        .captures_iter(source)
84        .filter(|cap| {
85            let start = cap.get(0).map_or(0, |m| m.start());
86            !comment_ranges
87                .iter()
88                .any(|&(cs, ce)| start >= cs && start < ce)
89        })
90        .map(|cap| {
91            let attrs = cap.name("attrs").map_or("", |m| m.as_str());
92            let body_match = cap.name("body");
93            let byte_offset = body_match.map_or(0, |m| m.start());
94            let body = body_match.map_or("", |m| m.as_str()).to_string();
95            let lang = LANG_ATTR_RE
96                .captures(attrs)
97                .and_then(|c| c.get(1))
98                .map(|m| m.as_str());
99            let is_typescript = matches!(lang, Some("ts" | "tsx"));
100            let is_jsx = matches!(lang, Some("tsx" | "jsx"));
101            let src = SRC_ATTR_RE
102                .captures(attrs)
103                .and_then(|c| c.get(1))
104                .map(|m| m.as_str().to_string());
105            let is_setup = SETUP_ATTR_RE.is_match(attrs);
106            let is_context_module = CONTEXT_MODULE_ATTR_RE.is_match(attrs);
107            SfcScript {
108                body,
109                is_typescript,
110                is_jsx,
111                byte_offset,
112                src,
113                is_setup,
114                is_context_module,
115            }
116        })
117        .collect()
118}
119
120/// Check if a file path is a Vue or Svelte SFC (`.vue` or `.svelte`).
121#[must_use]
122pub fn is_sfc_file(path: &Path) -> bool {
123    path.extension()
124        .and_then(|e| e.to_str())
125        .is_some_and(|ext| ext == "vue" || ext == "svelte")
126}
127
128/// Parse an SFC file by extracting and combining all `<script>` blocks.
129pub(crate) fn parse_sfc_to_module(
130    file_id: FileId,
131    path: &Path,
132    source: &str,
133    content_hash: u64,
134) -> ModuleInfo {
135    let scripts = extract_sfc_scripts(source);
136    let kind = sfc_kind(path);
137    let mut combined = empty_sfc_module(file_id, source, content_hash);
138    let mut template_visible_imports: FxHashSet<String> = FxHashSet::default();
139
140    for script in &scripts {
141        merge_script_into_module(kind, script, &mut combined, &mut template_visible_imports);
142    }
143
144    apply_template_usage(kind, source, &template_visible_imports, &mut combined);
145    combined.unused_import_bindings.sort_unstable();
146    combined.unused_import_bindings.dedup();
147
148    combined
149}
150
151fn sfc_kind(path: &Path) -> SfcKind {
152    if path.extension().and_then(|ext| ext.to_str()) == Some("vue") {
153        SfcKind::Vue
154    } else {
155        SfcKind::Svelte
156    }
157}
158
159fn empty_sfc_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
160    // For SFC files, use string scanning for suppression comments since script block
161    // byte offsets don't correspond to the original file positions.
162    let suppressions = crate::suppress::parse_suppressions_from_source(source);
163
164    ModuleInfo {
165        file_id,
166        exports: Vec::new(),
167        imports: Vec::new(),
168        re_exports: Vec::new(),
169        dynamic_imports: Vec::new(),
170        dynamic_import_patterns: Vec::new(),
171        require_calls: Vec::new(),
172        member_accesses: Vec::new(),
173        whole_object_uses: Vec::new(),
174        has_cjs_exports: false,
175        content_hash,
176        suppressions,
177        unused_import_bindings: Vec::new(),
178        line_offsets: fallow_types::extract::compute_line_offsets(source),
179        complexity: Vec::new(),
180        flag_uses: Vec::new(),
181        class_heritage: vec![],
182    }
183}
184
185fn merge_script_into_module(
186    kind: SfcKind,
187    script: &SfcScript,
188    combined: &mut ModuleInfo,
189    template_visible_imports: &mut FxHashSet<String>,
190) {
191    if let Some(src) = &script.src {
192        add_script_src_import(combined, src);
193    }
194
195    let allocator = Allocator::default();
196    let parser_return =
197        Parser::new(&allocator, &script.body, source_type_for_script(script)).parse();
198    let mut extractor = ModuleInfoExtractor::new();
199    extractor.visit_program(&parser_return.program);
200
201    let unused_import_bindings =
202        compute_unused_import_bindings(&parser_return.program, &extractor.imports);
203    combined
204        .unused_import_bindings
205        .extend(unused_import_bindings.iter().cloned());
206
207    if is_template_visible_script(kind, script) {
208        template_visible_imports.extend(
209            extractor
210                .imports
211                .iter()
212                .filter(|import| !import.local_name.is_empty())
213                .map(|import| import.local_name.clone()),
214        );
215    }
216
217    extractor.merge_into(combined);
218}
219
220fn add_script_src_import(module: &mut ModuleInfo, source: &str) {
221    // Normalize bare filenames (e.g., `<script src="logic.ts">`) so the
222    // resolver treats them as file-relative references, not npm packages.
223    module.imports.push(ImportInfo {
224        source: normalize_asset_url(source),
225        imported_name: ImportedName::SideEffect,
226        local_name: String::new(),
227        is_type_only: false,
228        span: Span::default(),
229        source_span: Span::default(),
230    });
231}
232
233fn source_type_for_script(script: &SfcScript) -> SourceType {
234    match (script.is_typescript, script.is_jsx) {
235        (true, true) => SourceType::tsx(),
236        (true, false) => SourceType::ts(),
237        (false, true) => SourceType::jsx(),
238        (false, false) => SourceType::mjs(),
239    }
240}
241
242fn apply_template_usage(
243    kind: SfcKind,
244    source: &str,
245    template_visible_imports: &FxHashSet<String>,
246    combined: &mut ModuleInfo,
247) {
248    if template_visible_imports.is_empty() {
249        return;
250    }
251
252    let template_usage = collect_template_usage(kind, source, template_visible_imports);
253    combined
254        .unused_import_bindings
255        .retain(|binding| !template_usage.used_bindings.contains(binding));
256    combined
257        .member_accesses
258        .extend(template_usage.member_accesses);
259    combined
260        .whole_object_uses
261        .extend(template_usage.whole_object_uses);
262}
263
264fn is_template_visible_script(kind: SfcKind, script: &SfcScript) -> bool {
265    match kind {
266        SfcKind::Vue => script.is_setup,
267        SfcKind::Svelte => !script.is_context_module,
268    }
269}
270
271// SFC tests exercise regex-based HTML string extraction — no unsafe code,
272// no Miri-specific value. Oxc parser tests are additionally ~1000x slower.
273#[cfg(all(test, not(miri)))]
274mod tests {
275    use super::*;
276
277    // ── is_sfc_file ──────────────────────────────────────────────
278
279    #[test]
280    fn is_sfc_file_vue() {
281        assert!(is_sfc_file(Path::new("App.vue")));
282    }
283
284    #[test]
285    fn is_sfc_file_svelte() {
286        assert!(is_sfc_file(Path::new("Counter.svelte")));
287    }
288
289    #[test]
290    fn is_sfc_file_rejects_ts() {
291        assert!(!is_sfc_file(Path::new("utils.ts")));
292    }
293
294    #[test]
295    fn is_sfc_file_rejects_jsx() {
296        assert!(!is_sfc_file(Path::new("App.jsx")));
297    }
298
299    #[test]
300    fn is_sfc_file_rejects_astro() {
301        assert!(!is_sfc_file(Path::new("Layout.astro")));
302    }
303
304    // ── extract_sfc_scripts: single script block ─────────────────
305
306    #[test]
307    fn single_plain_script() {
308        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
309        assert_eq!(scripts.len(), 1);
310        assert_eq!(scripts[0].body, "const x = 1;");
311        assert!(!scripts[0].is_typescript);
312        assert!(!scripts[0].is_jsx);
313        assert!(scripts[0].src.is_none());
314    }
315
316    #[test]
317    fn single_ts_script() {
318        let scripts = extract_sfc_scripts(r#"<script lang="ts">const x: number = 1;</script>"#);
319        assert_eq!(scripts.len(), 1);
320        assert!(scripts[0].is_typescript);
321        assert!(!scripts[0].is_jsx);
322    }
323
324    #[test]
325    fn single_tsx_script() {
326        let scripts = extract_sfc_scripts(r#"<script lang="tsx">const el = <div />;</script>"#);
327        assert_eq!(scripts.len(), 1);
328        assert!(scripts[0].is_typescript);
329        assert!(scripts[0].is_jsx);
330    }
331
332    #[test]
333    fn single_jsx_script() {
334        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
335        assert_eq!(scripts.len(), 1);
336        assert!(!scripts[0].is_typescript);
337        assert!(scripts[0].is_jsx);
338    }
339
340    // ── Multiple script blocks ───────────────────────────────────
341
342    #[test]
343    fn two_script_blocks() {
344        let source = r#"
345<script lang="ts">
346export default {};
347</script>
348<script setup lang="ts">
349const count = 0;
350</script>
351"#;
352        let scripts = extract_sfc_scripts(source);
353        assert_eq!(scripts.len(), 2);
354        assert!(scripts[0].body.contains("export default"));
355        assert!(scripts[1].body.contains("count"));
356    }
357
358    // ── <script setup> ───────────────────────────────────────────
359
360    #[test]
361    fn script_setup_extracted() {
362        let scripts =
363            extract_sfc_scripts(r#"<script setup lang="ts">import { ref } from 'vue';</script>"#);
364        assert_eq!(scripts.len(), 1);
365        assert!(scripts[0].body.contains("import"));
366        assert!(scripts[0].is_typescript);
367    }
368
369    // ── <script src="..."> external script ───────────────────────
370
371    #[test]
372    fn script_src_detected() {
373        let scripts = extract_sfc_scripts(r#"<script src="./component.ts" lang="ts"></script>"#);
374        assert_eq!(scripts.len(), 1);
375        assert_eq!(scripts[0].src.as_deref(), Some("./component.ts"));
376    }
377
378    #[test]
379    fn data_src_not_treated_as_src() {
380        let scripts =
381            extract_sfc_scripts(r#"<script lang="ts" data-src="./nope.ts">const x = 1;</script>"#);
382        assert_eq!(scripts.len(), 1);
383        assert!(scripts[0].src.is_none());
384    }
385
386    // ── HTML comment filtering ───────────────────────────────────
387
388    #[test]
389    fn script_inside_html_comment_filtered() {
390        let source = r#"
391<!-- <script lang="ts">import { bad } from 'bad';</script> -->
392<script lang="ts">import { good } from 'good';</script>
393"#;
394        let scripts = extract_sfc_scripts(source);
395        assert_eq!(scripts.len(), 1);
396        assert!(scripts[0].body.contains("good"));
397    }
398
399    #[test]
400    fn spanning_comment_filters_script() {
401        let source = r#"
402<!-- disabled:
403<script lang="ts">import { bad } from 'bad';</script>
404-->
405<script lang="ts">const ok = true;</script>
406"#;
407        let scripts = extract_sfc_scripts(source);
408        assert_eq!(scripts.len(), 1);
409        assert!(scripts[0].body.contains("ok"));
410    }
411
412    #[test]
413    fn string_containing_comment_markers_not_corrupted() {
414        // A string in the script body containing <!-- should not cause filtering issues
415        let source = r#"
416<script setup lang="ts">
417const marker = "<!-- not a comment -->";
418import { ref } from 'vue';
419</script>
420"#;
421        let scripts = extract_sfc_scripts(source);
422        assert_eq!(scripts.len(), 1);
423        assert!(scripts[0].body.contains("import"));
424    }
425
426    // ── Generic attributes with > in quoted values ───────────────
427
428    #[test]
429    fn generic_attr_with_angle_bracket() {
430        let source =
431            r#"<script setup lang="ts" generic="T extends Foo<Bar>">const x = 1;</script>"#;
432        let scripts = extract_sfc_scripts(source);
433        assert_eq!(scripts.len(), 1);
434        assert_eq!(scripts[0].body, "const x = 1;");
435    }
436
437    #[test]
438    fn nested_generic_attr() {
439        let source = r#"<script setup lang="ts" generic="T extends Map<string, Set<number>>">const x = 1;</script>"#;
440        let scripts = extract_sfc_scripts(source);
441        assert_eq!(scripts.len(), 1);
442        assert_eq!(scripts[0].body, "const x = 1;");
443    }
444
445    // ── lang attribute with single quotes ────────────────────────
446
447    #[test]
448    fn lang_single_quoted() {
449        let scripts = extract_sfc_scripts("<script lang='ts'>const x = 1;</script>");
450        assert_eq!(scripts.len(), 1);
451        assert!(scripts[0].is_typescript);
452    }
453
454    // ── Case-insensitive matching ────────────────────────────────
455
456    #[test]
457    fn uppercase_script_tag() {
458        let scripts = extract_sfc_scripts(r#"<SCRIPT lang="ts">const x = 1;</SCRIPT>"#);
459        assert_eq!(scripts.len(), 1);
460        assert!(scripts[0].is_typescript);
461    }
462
463    // ── Edge cases ───────────────────────────────────────────────
464
465    #[test]
466    fn no_script_block() {
467        let scripts = extract_sfc_scripts("<template><div>Hello</div></template>");
468        assert!(scripts.is_empty());
469    }
470
471    #[test]
472    fn empty_script_body() {
473        let scripts = extract_sfc_scripts(r#"<script lang="ts"></script>"#);
474        assert_eq!(scripts.len(), 1);
475        assert!(scripts[0].body.is_empty());
476    }
477
478    #[test]
479    fn whitespace_only_script() {
480        let scripts = extract_sfc_scripts("<script lang=\"ts\">\n  \n</script>");
481        assert_eq!(scripts.len(), 1);
482        assert!(scripts[0].body.trim().is_empty());
483    }
484
485    #[test]
486    fn byte_offset_is_set() {
487        let source = r#"<template><div/></template><script lang="ts">code</script>"#;
488        let scripts = extract_sfc_scripts(source);
489        assert_eq!(scripts.len(), 1);
490        // The byte_offset should point to where "code" starts in the source
491        let offset = scripts[0].byte_offset;
492        assert_eq!(&source[offset..offset + 4], "code");
493    }
494
495    #[test]
496    fn script_with_extra_attributes() {
497        let scripts = extract_sfc_scripts(
498            r#"<script lang="ts" id="app" type="module" data-custom="val">const x = 1;</script>"#,
499        );
500        assert_eq!(scripts.len(), 1);
501        assert!(scripts[0].is_typescript);
502        assert!(scripts[0].src.is_none());
503    }
504
505    // ── Full parse tests (Oxc parser ~1000x slower under Miri) ──
506
507    #[test]
508    fn multiple_script_blocks_exports_combined() {
509        let source = r#"
510<script lang="ts">
511export const version = '1.0';
512</script>
513<script setup lang="ts">
514import { ref } from 'vue';
515const count = ref(0);
516</script>
517"#;
518        let info = parse_sfc_to_module(FileId(0), Path::new("Dual.vue"), source, 0);
519        // The non-setup block exports `version`
520        assert!(
521            info.exports
522                .iter()
523                .any(|e| matches!(&e.name, crate::ExportName::Named(n) if n == "version")),
524            "export from <script> block should be extracted"
525        );
526        // The setup block imports `ref` from 'vue'
527        assert!(
528            info.imports.iter().any(|i| i.source == "vue"),
529            "import from <script setup> block should be extracted"
530        );
531    }
532
533    // ── lang="tsx" detection ────────────────────────────────────
534
535    #[test]
536    fn lang_tsx_detected_as_typescript_jsx() {
537        let scripts =
538            extract_sfc_scripts(r#"<script lang="tsx">const el = <div>{x}</div>;</script>"#);
539        assert_eq!(scripts.len(), 1);
540        assert!(scripts[0].is_typescript, "lang=tsx should be typescript");
541        assert!(scripts[0].is_jsx, "lang=tsx should be jsx");
542    }
543
544    // ── HTML comment filtering of script blocks ─────────────────
545
546    #[test]
547    fn multiline_html_comment_filters_all_script_blocks_inside() {
548        let source = r#"
549<!--
550  This whole section is disabled:
551  <script lang="ts">import { bad1 } from 'bad1';</script>
552  <script lang="ts">import { bad2 } from 'bad2';</script>
553-->
554<script lang="ts">import { good } from 'good';</script>
555"#;
556        let scripts = extract_sfc_scripts(source);
557        assert_eq!(scripts.len(), 1);
558        assert!(scripts[0].body.contains("good"));
559    }
560
561    // ── <script src="..."> generates side-effect import ─────────
562
563    #[test]
564    fn script_src_generates_side_effect_import() {
565        let info = parse_sfc_to_module(
566            FileId(0),
567            Path::new("External.vue"),
568            r#"<script src="./external-logic.ts" lang="ts"></script>"#,
569            0,
570        );
571        assert!(
572            info.imports
573                .iter()
574                .any(|i| i.source == "./external-logic.ts"
575                    && matches!(i.imported_name, ImportedName::SideEffect)),
576            "script src should generate a side-effect import"
577        );
578    }
579
580    // ── Additional coverage ─────────────────────────────────────
581
582    #[test]
583    fn parse_sfc_no_script_returns_empty_module() {
584        let info = parse_sfc_to_module(
585            FileId(0),
586            Path::new("Empty.vue"),
587            "<template><div>Hello</div></template>",
588            42,
589        );
590        assert!(info.imports.is_empty());
591        assert!(info.exports.is_empty());
592        assert_eq!(info.content_hash, 42);
593        assert_eq!(info.file_id, FileId(0));
594    }
595
596    #[test]
597    fn parse_sfc_has_line_offsets() {
598        let info = parse_sfc_to_module(
599            FileId(0),
600            Path::new("LineOffsets.vue"),
601            r#"<script lang="ts">const x = 1;</script>"#,
602            0,
603        );
604        assert!(!info.line_offsets.is_empty());
605    }
606
607    #[test]
608    fn parse_sfc_has_suppressions() {
609        let info = parse_sfc_to_module(
610            FileId(0),
611            Path::new("Suppressions.vue"),
612            r#"<script lang="ts">
613// fallow-ignore-file
614export const foo = 1;
615</script>"#,
616            0,
617        );
618        assert!(!info.suppressions.is_empty());
619    }
620
621    #[test]
622    fn source_type_jsx_detection() {
623        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
624        assert_eq!(scripts.len(), 1);
625        assert!(!scripts[0].is_typescript);
626        assert!(scripts[0].is_jsx);
627    }
628
629    #[test]
630    fn source_type_plain_js_detection() {
631        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
632        assert_eq!(scripts.len(), 1);
633        assert!(!scripts[0].is_typescript);
634        assert!(!scripts[0].is_jsx);
635    }
636
637    #[test]
638    fn is_sfc_file_rejects_no_extension() {
639        assert!(!is_sfc_file(Path::new("Makefile")));
640    }
641
642    #[test]
643    fn is_sfc_file_rejects_mdx() {
644        assert!(!is_sfc_file(Path::new("post.mdx")));
645    }
646
647    #[test]
648    fn is_sfc_file_rejects_css() {
649        assert!(!is_sfc_file(Path::new("styles.css")));
650    }
651
652    #[test]
653    fn multiple_script_blocks_both_have_offsets() {
654        let source = r#"<script lang="ts">const a = 1;</script>
655<script setup lang="ts">const b = 2;</script>"#;
656        let scripts = extract_sfc_scripts(source);
657        assert_eq!(scripts.len(), 2);
658        // Both scripts should have valid byte offsets
659        let offset0 = scripts[0].byte_offset;
660        let offset1 = scripts[1].byte_offset;
661        assert_eq!(
662            &source[offset0..offset0 + "const a = 1;".len()],
663            "const a = 1;"
664        );
665        assert_eq!(
666            &source[offset1..offset1 + "const b = 2;".len()],
667            "const b = 2;"
668        );
669    }
670
671    #[test]
672    fn script_with_src_and_lang() {
673        // src + lang should both be detected
674        let scripts = extract_sfc_scripts(r#"<script src="./logic.ts" lang="tsx"></script>"#);
675        assert_eq!(scripts.len(), 1);
676        assert_eq!(scripts[0].src.as_deref(), Some("./logic.ts"));
677        assert!(scripts[0].is_typescript);
678        assert!(scripts[0].is_jsx);
679    }
680}