Skip to main content

fallow_extract/
sfc.rs

1//! Vue/Svelte Single File Component (SFC) script extraction.
2//!
3//! Extracts `<script>` block content from `.vue` and `.svelte` files using regex,
4//! handling `lang`, `src`, and `generic` attributes, and filtering HTML comments.
5
6use std::path::Path;
7use std::sync::LazyLock;
8
9use oxc_allocator::Allocator;
10use oxc_ast_visit::Visit;
11use oxc_parser::Parser;
12use oxc_span::SourceType;
13use rustc_hash::FxHashSet;
14
15use crate::parse::compute_unused_import_bindings;
16use crate::sfc_template::{SfcKind, collect_template_usage};
17use crate::visitor::ModuleInfoExtractor;
18use crate::{ImportInfo, ImportedName, ModuleInfo};
19use fallow_types::discover::FileId;
20use oxc_span::Span;
21
22/// Regex to extract `<script>` block content from Vue/Svelte SFCs.
23/// The attrs pattern handles `>` inside quoted attribute values (e.g., `generic="T extends Foo<Bar>"`).
24static SCRIPT_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
25    regex::Regex::new(
26        r#"(?is)<script\b(?P<attrs>(?:[^>"']|"[^"]*"|'[^']*')*)>(?P<body>[\s\S]*?)</script>"#,
27    )
28    .expect("valid regex")
29});
30
31/// Regex to extract the `lang` attribute value from a script tag.
32static LANG_ATTR_RE: LazyLock<regex::Regex> =
33    LazyLock::new(|| regex::Regex::new(r#"lang\s*=\s*["'](\w+)["']"#).expect("valid regex"));
34
35/// Regex to extract the `src` attribute value from a script tag.
36/// Requires whitespace (or start of string) before `src` to avoid matching `data-src` etc.
37static SRC_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
38    regex::Regex::new(r#"(?:^|\s)src\s*=\s*["']([^"']+)["']"#).expect("valid regex")
39});
40
41/// Regex to detect Vue's bare `setup` attribute.
42static SETUP_ATTR_RE: LazyLock<regex::Regex> =
43    LazyLock::new(|| regex::Regex::new(r"(?:^|\s)setup(?:\s|$)").expect("valid regex"));
44
45/// Regex to detect Svelte's `context="module"` attribute.
46static CONTEXT_MODULE_ATTR_RE: LazyLock<regex::Regex> =
47    LazyLock::new(|| regex::Regex::new(r#"context\s*=\s*["']module["']"#).expect("valid regex"));
48
49/// Regex to match HTML comments for filtering script blocks inside comments.
50static HTML_COMMENT_RE: LazyLock<regex::Regex> =
51    LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
52
53/// An extracted `<script>` block from a Vue or Svelte SFC.
54pub struct SfcScript {
55    /// The script body text.
56    pub body: String,
57    /// Whether the script uses TypeScript (`lang="ts"` or `lang="tsx"`).
58    pub is_typescript: bool,
59    /// Whether the script uses JSX syntax (`lang="tsx"` or `lang="jsx"`).
60    pub is_jsx: bool,
61    /// Byte offset of the script body within the full SFC source.
62    pub byte_offset: usize,
63    /// External script source path from `src` attribute.
64    pub src: Option<String>,
65    /// Whether this script is a Vue `<script setup>` block.
66    pub is_setup: bool,
67    /// Whether this script is a Svelte module-context block.
68    pub is_context_module: bool,
69}
70
71/// Extract all `<script>` blocks from a Vue/Svelte SFC source string.
72pub fn extract_sfc_scripts(source: &str) -> Vec<SfcScript> {
73    // Build HTML comment ranges to filter out <script> blocks inside comments.
74    // Using ranges instead of source replacement avoids corrupting script body content
75    // (e.g., string literals containing "<!--" would be destroyed by replacement).
76    let comment_ranges: Vec<(usize, usize)> = HTML_COMMENT_RE
77        .find_iter(source)
78        .map(|m| (m.start(), m.end()))
79        .collect();
80
81    SCRIPT_BLOCK_RE
82        .captures_iter(source)
83        .filter(|cap| {
84            let start = cap.get(0).map_or(0, |m| m.start());
85            !comment_ranges
86                .iter()
87                .any(|&(cs, ce)| start >= cs && start < ce)
88        })
89        .map(|cap| {
90            let attrs = cap.name("attrs").map_or("", |m| m.as_str());
91            let body_match = cap.name("body");
92            let byte_offset = body_match.map_or(0, |m| m.start());
93            let body = body_match.map_or("", |m| m.as_str()).to_string();
94            let lang = LANG_ATTR_RE
95                .captures(attrs)
96                .and_then(|c| c.get(1))
97                .map(|m| m.as_str());
98            let is_typescript = matches!(lang, Some("ts" | "tsx"));
99            let is_jsx = matches!(lang, Some("tsx" | "jsx"));
100            let src = SRC_ATTR_RE
101                .captures(attrs)
102                .and_then(|c| c.get(1))
103                .map(|m| m.as_str().to_string());
104            let is_setup = SETUP_ATTR_RE.is_match(attrs);
105            let is_context_module = CONTEXT_MODULE_ATTR_RE.is_match(attrs);
106            SfcScript {
107                body,
108                is_typescript,
109                is_jsx,
110                byte_offset,
111                src,
112                is_setup,
113                is_context_module,
114            }
115        })
116        .collect()
117}
118
119/// Check if a file path is a Vue or Svelte SFC (`.vue` or `.svelte`).
120#[must_use]
121pub fn is_sfc_file(path: &Path) -> bool {
122    path.extension()
123        .and_then(|e| e.to_str())
124        .is_some_and(|ext| ext == "vue" || ext == "svelte")
125}
126
127/// Parse an SFC file by extracting and combining all `<script>` blocks.
128pub(crate) fn parse_sfc_to_module(
129    file_id: FileId,
130    path: &Path,
131    source: &str,
132    content_hash: u64,
133) -> ModuleInfo {
134    let scripts = extract_sfc_scripts(source);
135    let kind = sfc_kind(path);
136    let mut combined = empty_sfc_module(file_id, source, content_hash);
137    let mut template_visible_imports: FxHashSet<String> = FxHashSet::default();
138
139    for script in &scripts {
140        merge_script_into_module(kind, script, &mut combined, &mut template_visible_imports);
141    }
142
143    apply_template_usage(kind, source, &template_visible_imports, &mut combined);
144    combined.unused_import_bindings.sort_unstable();
145    combined.unused_import_bindings.dedup();
146
147    combined
148}
149
150fn sfc_kind(path: &Path) -> SfcKind {
151    if path.extension().and_then(|ext| ext.to_str()) == Some("vue") {
152        SfcKind::Vue
153    } else {
154        SfcKind::Svelte
155    }
156}
157
158fn empty_sfc_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
159    // For SFC files, use string scanning for suppression comments since script block
160    // byte offsets don't correspond to the original file positions.
161    let suppressions = crate::suppress::parse_suppressions_from_source(source);
162
163    ModuleInfo {
164        file_id,
165        exports: Vec::new(),
166        imports: Vec::new(),
167        re_exports: Vec::new(),
168        dynamic_imports: Vec::new(),
169        dynamic_import_patterns: Vec::new(),
170        require_calls: Vec::new(),
171        member_accesses: Vec::new(),
172        whole_object_uses: Vec::new(),
173        has_cjs_exports: false,
174        content_hash,
175        suppressions,
176        unused_import_bindings: Vec::new(),
177        line_offsets: fallow_types::extract::compute_line_offsets(source),
178        complexity: Vec::new(),
179        flag_uses: Vec::new(),
180    }
181}
182
183fn merge_script_into_module(
184    kind: SfcKind,
185    script: &SfcScript,
186    combined: &mut ModuleInfo,
187    template_visible_imports: &mut FxHashSet<String>,
188) {
189    if let Some(src) = &script.src {
190        add_script_src_import(combined, src);
191    }
192
193    let allocator = Allocator::default();
194    let parser_return =
195        Parser::new(&allocator, &script.body, source_type_for_script(script)).parse();
196    let mut extractor = ModuleInfoExtractor::new();
197    extractor.visit_program(&parser_return.program);
198
199    let unused_import_bindings =
200        compute_unused_import_bindings(&parser_return.program, &extractor.imports);
201    combined
202        .unused_import_bindings
203        .extend(unused_import_bindings.iter().cloned());
204
205    if is_template_visible_script(kind, script) {
206        template_visible_imports.extend(
207            extractor
208                .imports
209                .iter()
210                .filter(|import| !import.local_name.is_empty())
211                .map(|import| import.local_name.clone()),
212        );
213    }
214
215    extractor.merge_into(combined);
216}
217
218fn add_script_src_import(module: &mut ModuleInfo, source: &str) {
219    module.imports.push(ImportInfo {
220        source: source.to_string(),
221        imported_name: ImportedName::SideEffect,
222        local_name: String::new(),
223        is_type_only: false,
224        span: Span::default(),
225        source_span: Span::default(),
226    });
227}
228
229fn source_type_for_script(script: &SfcScript) -> SourceType {
230    match (script.is_typescript, script.is_jsx) {
231        (true, true) => SourceType::tsx(),
232        (true, false) => SourceType::ts(),
233        (false, true) => SourceType::jsx(),
234        (false, false) => SourceType::mjs(),
235    }
236}
237
238fn apply_template_usage(
239    kind: SfcKind,
240    source: &str,
241    template_visible_imports: &FxHashSet<String>,
242    combined: &mut ModuleInfo,
243) {
244    if template_visible_imports.is_empty() {
245        return;
246    }
247
248    let template_usage = collect_template_usage(kind, source, template_visible_imports);
249    combined
250        .unused_import_bindings
251        .retain(|binding| !template_usage.used_bindings.contains(binding));
252    combined
253        .member_accesses
254        .extend(template_usage.member_accesses);
255    combined
256        .whole_object_uses
257        .extend(template_usage.whole_object_uses);
258}
259
260fn is_template_visible_script(kind: SfcKind, script: &SfcScript) -> bool {
261    match kind {
262        SfcKind::Vue => script.is_setup,
263        SfcKind::Svelte => !script.is_context_module,
264    }
265}
266
267// SFC tests exercise regex-based HTML string extraction — no unsafe code,
268// no Miri-specific value. Oxc parser tests are additionally ~1000x slower.
269#[cfg(all(test, not(miri)))]
270mod tests {
271    use super::*;
272
273    // ── is_sfc_file ──────────────────────────────────────────────
274
275    #[test]
276    fn is_sfc_file_vue() {
277        assert!(is_sfc_file(Path::new("App.vue")));
278    }
279
280    #[test]
281    fn is_sfc_file_svelte() {
282        assert!(is_sfc_file(Path::new("Counter.svelte")));
283    }
284
285    #[test]
286    fn is_sfc_file_rejects_ts() {
287        assert!(!is_sfc_file(Path::new("utils.ts")));
288    }
289
290    #[test]
291    fn is_sfc_file_rejects_jsx() {
292        assert!(!is_sfc_file(Path::new("App.jsx")));
293    }
294
295    #[test]
296    fn is_sfc_file_rejects_astro() {
297        assert!(!is_sfc_file(Path::new("Layout.astro")));
298    }
299
300    // ── extract_sfc_scripts: single script block ─────────────────
301
302    #[test]
303    fn single_plain_script() {
304        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
305        assert_eq!(scripts.len(), 1);
306        assert_eq!(scripts[0].body, "const x = 1;");
307        assert!(!scripts[0].is_typescript);
308        assert!(!scripts[0].is_jsx);
309        assert!(scripts[0].src.is_none());
310    }
311
312    #[test]
313    fn single_ts_script() {
314        let scripts = extract_sfc_scripts(r#"<script lang="ts">const x: number = 1;</script>"#);
315        assert_eq!(scripts.len(), 1);
316        assert!(scripts[0].is_typescript);
317        assert!(!scripts[0].is_jsx);
318    }
319
320    #[test]
321    fn single_tsx_script() {
322        let scripts = extract_sfc_scripts(r#"<script lang="tsx">const el = <div />;</script>"#);
323        assert_eq!(scripts.len(), 1);
324        assert!(scripts[0].is_typescript);
325        assert!(scripts[0].is_jsx);
326    }
327
328    #[test]
329    fn single_jsx_script() {
330        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
331        assert_eq!(scripts.len(), 1);
332        assert!(!scripts[0].is_typescript);
333        assert!(scripts[0].is_jsx);
334    }
335
336    // ── Multiple script blocks ───────────────────────────────────
337
338    #[test]
339    fn two_script_blocks() {
340        let source = r#"
341<script lang="ts">
342export default {};
343</script>
344<script setup lang="ts">
345const count = 0;
346</script>
347"#;
348        let scripts = extract_sfc_scripts(source);
349        assert_eq!(scripts.len(), 2);
350        assert!(scripts[0].body.contains("export default"));
351        assert!(scripts[1].body.contains("count"));
352    }
353
354    // ── <script setup> ───────────────────────────────────────────
355
356    #[test]
357    fn script_setup_extracted() {
358        let scripts =
359            extract_sfc_scripts(r#"<script setup lang="ts">import { ref } from 'vue';</script>"#);
360        assert_eq!(scripts.len(), 1);
361        assert!(scripts[0].body.contains("import"));
362        assert!(scripts[0].is_typescript);
363    }
364
365    // ── <script src="..."> external script ───────────────────────
366
367    #[test]
368    fn script_src_detected() {
369        let scripts = extract_sfc_scripts(r#"<script src="./component.ts" lang="ts"></script>"#);
370        assert_eq!(scripts.len(), 1);
371        assert_eq!(scripts[0].src.as_deref(), Some("./component.ts"));
372    }
373
374    #[test]
375    fn data_src_not_treated_as_src() {
376        let scripts =
377            extract_sfc_scripts(r#"<script lang="ts" data-src="./nope.ts">const x = 1;</script>"#);
378        assert_eq!(scripts.len(), 1);
379        assert!(scripts[0].src.is_none());
380    }
381
382    // ── HTML comment filtering ───────────────────────────────────
383
384    #[test]
385    fn script_inside_html_comment_filtered() {
386        let source = r#"
387<!-- <script lang="ts">import { bad } from 'bad';</script> -->
388<script lang="ts">import { good } from 'good';</script>
389"#;
390        let scripts = extract_sfc_scripts(source);
391        assert_eq!(scripts.len(), 1);
392        assert!(scripts[0].body.contains("good"));
393    }
394
395    #[test]
396    fn spanning_comment_filters_script() {
397        let source = r#"
398<!-- disabled:
399<script lang="ts">import { bad } from 'bad';</script>
400-->
401<script lang="ts">const ok = true;</script>
402"#;
403        let scripts = extract_sfc_scripts(source);
404        assert_eq!(scripts.len(), 1);
405        assert!(scripts[0].body.contains("ok"));
406    }
407
408    #[test]
409    fn string_containing_comment_markers_not_corrupted() {
410        // A string in the script body containing <!-- should not cause filtering issues
411        let source = r#"
412<script setup lang="ts">
413const marker = "<!-- not a comment -->";
414import { ref } from 'vue';
415</script>
416"#;
417        let scripts = extract_sfc_scripts(source);
418        assert_eq!(scripts.len(), 1);
419        assert!(scripts[0].body.contains("import"));
420    }
421
422    // ── Generic attributes with > in quoted values ───────────────
423
424    #[test]
425    fn generic_attr_with_angle_bracket() {
426        let source =
427            r#"<script setup lang="ts" generic="T extends Foo<Bar>">const x = 1;</script>"#;
428        let scripts = extract_sfc_scripts(source);
429        assert_eq!(scripts.len(), 1);
430        assert_eq!(scripts[0].body, "const x = 1;");
431    }
432
433    #[test]
434    fn nested_generic_attr() {
435        let source = r#"<script setup lang="ts" generic="T extends Map<string, Set<number>>">const x = 1;</script>"#;
436        let scripts = extract_sfc_scripts(source);
437        assert_eq!(scripts.len(), 1);
438        assert_eq!(scripts[0].body, "const x = 1;");
439    }
440
441    // ── lang attribute with single quotes ────────────────────────
442
443    #[test]
444    fn lang_single_quoted() {
445        let scripts = extract_sfc_scripts("<script lang='ts'>const x = 1;</script>");
446        assert_eq!(scripts.len(), 1);
447        assert!(scripts[0].is_typescript);
448    }
449
450    // ── Case-insensitive matching ────────────────────────────────
451
452    #[test]
453    fn uppercase_script_tag() {
454        let scripts = extract_sfc_scripts(r#"<SCRIPT lang="ts">const x = 1;</SCRIPT>"#);
455        assert_eq!(scripts.len(), 1);
456        assert!(scripts[0].is_typescript);
457    }
458
459    // ── Edge cases ───────────────────────────────────────────────
460
461    #[test]
462    fn no_script_block() {
463        let scripts = extract_sfc_scripts("<template><div>Hello</div></template>");
464        assert!(scripts.is_empty());
465    }
466
467    #[test]
468    fn empty_script_body() {
469        let scripts = extract_sfc_scripts(r#"<script lang="ts"></script>"#);
470        assert_eq!(scripts.len(), 1);
471        assert!(scripts[0].body.is_empty());
472    }
473
474    #[test]
475    fn whitespace_only_script() {
476        let scripts = extract_sfc_scripts("<script lang=\"ts\">\n  \n</script>");
477        assert_eq!(scripts.len(), 1);
478        assert!(scripts[0].body.trim().is_empty());
479    }
480
481    #[test]
482    fn byte_offset_is_set() {
483        let source = r#"<template><div/></template><script lang="ts">code</script>"#;
484        let scripts = extract_sfc_scripts(source);
485        assert_eq!(scripts.len(), 1);
486        // The byte_offset should point to where "code" starts in the source
487        let offset = scripts[0].byte_offset;
488        assert_eq!(&source[offset..offset + 4], "code");
489    }
490
491    #[test]
492    fn script_with_extra_attributes() {
493        let scripts = extract_sfc_scripts(
494            r#"<script lang="ts" id="app" type="module" data-custom="val">const x = 1;</script>"#,
495        );
496        assert_eq!(scripts.len(), 1);
497        assert!(scripts[0].is_typescript);
498        assert!(scripts[0].src.is_none());
499    }
500
501    // ── Full parse tests (Oxc parser ~1000x slower under Miri) ──
502
503    #[test]
504    fn multiple_script_blocks_exports_combined() {
505        let source = r#"
506<script lang="ts">
507export const version = '1.0';
508</script>
509<script setup lang="ts">
510import { ref } from 'vue';
511const count = ref(0);
512</script>
513"#;
514        let info = parse_sfc_to_module(FileId(0), Path::new("Dual.vue"), source, 0);
515        // The non-setup block exports `version`
516        assert!(
517            info.exports
518                .iter()
519                .any(|e| matches!(&e.name, crate::ExportName::Named(n) if n == "version")),
520            "export from <script> block should be extracted"
521        );
522        // The setup block imports `ref` from 'vue'
523        assert!(
524            info.imports.iter().any(|i| i.source == "vue"),
525            "import from <script setup> block should be extracted"
526        );
527    }
528
529    // ── lang="tsx" detection ────────────────────────────────────
530
531    #[test]
532    fn lang_tsx_detected_as_typescript_jsx() {
533        let scripts =
534            extract_sfc_scripts(r#"<script lang="tsx">const el = <div>{x}</div>;</script>"#);
535        assert_eq!(scripts.len(), 1);
536        assert!(scripts[0].is_typescript, "lang=tsx should be typescript");
537        assert!(scripts[0].is_jsx, "lang=tsx should be jsx");
538    }
539
540    // ── HTML comment filtering of script blocks ─────────────────
541
542    #[test]
543    fn multiline_html_comment_filters_all_script_blocks_inside() {
544        let source = r#"
545<!--
546  This whole section is disabled:
547  <script lang="ts">import { bad1 } from 'bad1';</script>
548  <script lang="ts">import { bad2 } from 'bad2';</script>
549-->
550<script lang="ts">import { good } from 'good';</script>
551"#;
552        let scripts = extract_sfc_scripts(source);
553        assert_eq!(scripts.len(), 1);
554        assert!(scripts[0].body.contains("good"));
555    }
556
557    // ── <script src="..."> generates side-effect import ─────────
558
559    #[test]
560    fn script_src_generates_side_effect_import() {
561        let info = parse_sfc_to_module(
562            FileId(0),
563            Path::new("External.vue"),
564            r#"<script src="./external-logic.ts" lang="ts"></script>"#,
565            0,
566        );
567        assert!(
568            info.imports
569                .iter()
570                .any(|i| i.source == "./external-logic.ts"
571                    && matches!(i.imported_name, ImportedName::SideEffect)),
572            "script src should generate a side-effect import"
573        );
574    }
575
576    // ── Additional coverage ─────────────────────────────────────
577
578    #[test]
579    fn parse_sfc_no_script_returns_empty_module() {
580        let info = parse_sfc_to_module(
581            FileId(0),
582            Path::new("Empty.vue"),
583            "<template><div>Hello</div></template>",
584            42,
585        );
586        assert!(info.imports.is_empty());
587        assert!(info.exports.is_empty());
588        assert_eq!(info.content_hash, 42);
589        assert_eq!(info.file_id, FileId(0));
590    }
591
592    #[test]
593    fn parse_sfc_has_line_offsets() {
594        let info = parse_sfc_to_module(
595            FileId(0),
596            Path::new("LineOffsets.vue"),
597            r#"<script lang="ts">const x = 1;</script>"#,
598            0,
599        );
600        assert!(!info.line_offsets.is_empty());
601    }
602
603    #[test]
604    fn parse_sfc_has_suppressions() {
605        let info = parse_sfc_to_module(
606            FileId(0),
607            Path::new("Suppressions.vue"),
608            r#"<script lang="ts">
609// fallow-ignore-file
610export const foo = 1;
611</script>"#,
612            0,
613        );
614        assert!(!info.suppressions.is_empty());
615    }
616
617    #[test]
618    fn source_type_jsx_detection() {
619        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
620        assert_eq!(scripts.len(), 1);
621        assert!(!scripts[0].is_typescript);
622        assert!(scripts[0].is_jsx);
623    }
624
625    #[test]
626    fn source_type_plain_js_detection() {
627        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
628        assert_eq!(scripts.len(), 1);
629        assert!(!scripts[0].is_typescript);
630        assert!(!scripts[0].is_jsx);
631    }
632
633    #[test]
634    fn is_sfc_file_rejects_no_extension() {
635        assert!(!is_sfc_file(Path::new("Makefile")));
636    }
637
638    #[test]
639    fn is_sfc_file_rejects_mdx() {
640        assert!(!is_sfc_file(Path::new("post.mdx")));
641    }
642
643    #[test]
644    fn is_sfc_file_rejects_css() {
645        assert!(!is_sfc_file(Path::new("styles.css")));
646    }
647
648    #[test]
649    fn multiple_script_blocks_both_have_offsets() {
650        let source = r#"<script lang="ts">const a = 1;</script>
651<script setup lang="ts">const b = 2;</script>"#;
652        let scripts = extract_sfc_scripts(source);
653        assert_eq!(scripts.len(), 2);
654        // Both scripts should have valid byte offsets
655        let offset0 = scripts[0].byte_offset;
656        let offset1 = scripts[1].byte_offset;
657        assert_eq!(
658            &source[offset0..offset0 + "const a = 1;".len()],
659            "const a = 1;"
660        );
661        assert_eq!(
662            &source[offset1..offset1 + "const b = 2;".len()],
663            "const b = 2;"
664        );
665    }
666
667    #[test]
668    fn script_with_src_and_lang() {
669        // src + lang should both be detected
670        let scripts = extract_sfc_scripts(r#"<script src="./logic.ts" lang="tsx"></script>"#);
671        assert_eq!(scripts.len(), 1);
672        assert_eq!(scripts[0].src.as_deref(), Some("./logic.ts"));
673        assert!(scripts[0].is_typescript);
674        assert!(scripts[0].is_jsx);
675    }
676}