Skip to main content

fallow_extract/
sfc.rs

1//! Vue/Svelte Single File Component (SFC) script extraction.
2//!
3//! Extracts `<script>` block content from `.vue` and `.svelte` files using regex,
4//! handling `lang`, `src`, and `generic` attributes, and filtering HTML comments.
5
6use std::path::Path;
7use std::sync::LazyLock;
8
9use oxc_allocator::Allocator;
10use oxc_ast_visit::Visit;
11use oxc_parser::Parser;
12use oxc_span::SourceType;
13use rustc_hash::FxHashSet;
14
15use crate::parse::compute_unused_import_bindings;
16use crate::sfc_template::{SfcKind, collect_template_usage};
17use crate::visitor::ModuleInfoExtractor;
18use crate::{ImportInfo, ImportedName, ModuleInfo};
19use fallow_types::discover::FileId;
20use oxc_span::Span;
21
22/// Regex to extract `<script>` block content from Vue/Svelte SFCs.
23/// The attrs pattern handles `>` inside quoted attribute values (e.g., `generic="T extends Foo<Bar>"`).
24static SCRIPT_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
25    regex::Regex::new(
26        r#"(?is)<script\b(?P<attrs>(?:[^>"']|"[^"]*"|'[^']*')*)>(?P<body>[\s\S]*?)</script>"#,
27    )
28    .expect("valid regex")
29});
30
31/// Regex to extract the `lang` attribute value from a script tag.
32static LANG_ATTR_RE: LazyLock<regex::Regex> =
33    LazyLock::new(|| regex::Regex::new(r#"lang\s*=\s*["'](\w+)["']"#).expect("valid regex"));
34
35/// Regex to extract the `src` attribute value from a script tag.
36/// Requires whitespace (or start of string) before `src` to avoid matching `data-src` etc.
37static SRC_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
38    regex::Regex::new(r#"(?:^|\s)src\s*=\s*["']([^"']+)["']"#).expect("valid regex")
39});
40
41/// Regex to detect Vue's bare `setup` attribute.
42static SETUP_ATTR_RE: LazyLock<regex::Regex> =
43    LazyLock::new(|| regex::Regex::new(r"(?:^|\s)setup(?:\s|$)").expect("valid regex"));
44
45/// Regex to detect Svelte's `context="module"` attribute.
46static CONTEXT_MODULE_ATTR_RE: LazyLock<regex::Regex> =
47    LazyLock::new(|| regex::Regex::new(r#"context\s*=\s*["']module["']"#).expect("valid regex"));
48
49/// Regex to match HTML comments for filtering script blocks inside comments.
50static HTML_COMMENT_RE: LazyLock<regex::Regex> =
51    LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
52
53/// An extracted `<script>` block from a Vue or Svelte SFC.
54pub struct SfcScript {
55    /// The script body text.
56    pub body: String,
57    /// Whether the script uses TypeScript (`lang="ts"` or `lang="tsx"`).
58    pub is_typescript: bool,
59    /// Whether the script uses JSX syntax (`lang="tsx"` or `lang="jsx"`).
60    pub is_jsx: bool,
61    /// Byte offset of the script body within the full SFC source.
62    pub byte_offset: usize,
63    /// External script source path from `src` attribute.
64    pub src: Option<String>,
65    /// Whether this script is a Vue `<script setup>` block.
66    pub is_setup: bool,
67    /// Whether this script is a Svelte module-context block.
68    pub is_context_module: bool,
69}
70
71/// Extract all `<script>` blocks from a Vue/Svelte SFC source string.
72pub fn extract_sfc_scripts(source: &str) -> Vec<SfcScript> {
73    // Build HTML comment ranges to filter out <script> blocks inside comments.
74    // Using ranges instead of source replacement avoids corrupting script body content
75    // (e.g., string literals containing "<!--" would be destroyed by replacement).
76    let comment_ranges: Vec<(usize, usize)> = HTML_COMMENT_RE
77        .find_iter(source)
78        .map(|m| (m.start(), m.end()))
79        .collect();
80
81    SCRIPT_BLOCK_RE
82        .captures_iter(source)
83        .filter(|cap| {
84            let start = cap.get(0).map_or(0, |m| m.start());
85            !comment_ranges
86                .iter()
87                .any(|&(cs, ce)| start >= cs && start < ce)
88        })
89        .map(|cap| {
90            let attrs = cap.name("attrs").map_or("", |m| m.as_str());
91            let body_match = cap.name("body");
92            let byte_offset = body_match.map_or(0, |m| m.start());
93            let body = body_match.map_or("", |m| m.as_str()).to_string();
94            let lang = LANG_ATTR_RE
95                .captures(attrs)
96                .and_then(|c| c.get(1))
97                .map(|m| m.as_str());
98            let is_typescript = matches!(lang, Some("ts" | "tsx"));
99            let is_jsx = matches!(lang, Some("tsx" | "jsx"));
100            let src = SRC_ATTR_RE
101                .captures(attrs)
102                .and_then(|c| c.get(1))
103                .map(|m| m.as_str().to_string());
104            let is_setup = SETUP_ATTR_RE.is_match(attrs);
105            let is_context_module = CONTEXT_MODULE_ATTR_RE.is_match(attrs);
106            SfcScript {
107                body,
108                is_typescript,
109                is_jsx,
110                byte_offset,
111                src,
112                is_setup,
113                is_context_module,
114            }
115        })
116        .collect()
117}
118
119/// Check if a file path is a Vue or Svelte SFC (`.vue` or `.svelte`).
120#[must_use]
121pub fn is_sfc_file(path: &Path) -> bool {
122    path.extension()
123        .and_then(|e| e.to_str())
124        .is_some_and(|ext| ext == "vue" || ext == "svelte")
125}
126
127/// Parse an SFC file by extracting and combining all `<script>` blocks.
128pub(crate) fn parse_sfc_to_module(
129    file_id: FileId,
130    path: &Path,
131    source: &str,
132    content_hash: u64,
133) -> ModuleInfo {
134    let scripts = extract_sfc_scripts(source);
135    let kind = sfc_kind(path);
136    let mut combined = empty_sfc_module(file_id, source, content_hash);
137    let mut template_visible_imports: FxHashSet<String> = FxHashSet::default();
138
139    for script in &scripts {
140        merge_script_into_module(kind, script, &mut combined, &mut template_visible_imports);
141    }
142
143    apply_template_usage(kind, source, &template_visible_imports, &mut combined);
144    combined.unused_import_bindings.sort_unstable();
145    combined.unused_import_bindings.dedup();
146
147    combined
148}
149
150fn sfc_kind(path: &Path) -> SfcKind {
151    if path.extension().and_then(|ext| ext.to_str()) == Some("vue") {
152        SfcKind::Vue
153    } else {
154        SfcKind::Svelte
155    }
156}
157
158fn empty_sfc_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
159    // For SFC files, use string scanning for suppression comments since script block
160    // byte offsets don't correspond to the original file positions.
161    let suppressions = crate::suppress::parse_suppressions_from_source(source);
162
163    ModuleInfo {
164        file_id,
165        exports: Vec::new(),
166        imports: Vec::new(),
167        re_exports: Vec::new(),
168        dynamic_imports: Vec::new(),
169        dynamic_import_patterns: Vec::new(),
170        require_calls: Vec::new(),
171        member_accesses: Vec::new(),
172        whole_object_uses: Vec::new(),
173        has_cjs_exports: false,
174        content_hash,
175        suppressions,
176        unused_import_bindings: Vec::new(),
177        line_offsets: fallow_types::extract::compute_line_offsets(source),
178        complexity: Vec::new(),
179    }
180}
181
182fn merge_script_into_module(
183    kind: SfcKind,
184    script: &SfcScript,
185    combined: &mut ModuleInfo,
186    template_visible_imports: &mut FxHashSet<String>,
187) {
188    if let Some(src) = &script.src {
189        add_script_src_import(combined, src);
190    }
191
192    let allocator = Allocator::default();
193    let parser_return =
194        Parser::new(&allocator, &script.body, source_type_for_script(script)).parse();
195    let mut extractor = ModuleInfoExtractor::new();
196    extractor.visit_program(&parser_return.program);
197
198    let unused_import_bindings =
199        compute_unused_import_bindings(&parser_return.program, &extractor.imports);
200    combined
201        .unused_import_bindings
202        .extend(unused_import_bindings.iter().cloned());
203
204    if is_template_visible_script(kind, script) {
205        template_visible_imports.extend(
206            extractor
207                .imports
208                .iter()
209                .filter(|import| !import.local_name.is_empty())
210                .map(|import| import.local_name.clone()),
211        );
212    }
213
214    extractor.merge_into(combined);
215}
216
217fn add_script_src_import(module: &mut ModuleInfo, source: &str) {
218    module.imports.push(ImportInfo {
219        source: source.to_string(),
220        imported_name: ImportedName::SideEffect,
221        local_name: String::new(),
222        is_type_only: false,
223        span: Span::default(),
224        source_span: Span::default(),
225    });
226}
227
228fn source_type_for_script(script: &SfcScript) -> SourceType {
229    match (script.is_typescript, script.is_jsx) {
230        (true, true) => SourceType::tsx(),
231        (true, false) => SourceType::ts(),
232        (false, true) => SourceType::jsx(),
233        (false, false) => SourceType::mjs(),
234    }
235}
236
237fn apply_template_usage(
238    kind: SfcKind,
239    source: &str,
240    template_visible_imports: &FxHashSet<String>,
241    combined: &mut ModuleInfo,
242) {
243    if template_visible_imports.is_empty() {
244        return;
245    }
246
247    let template_usage = collect_template_usage(kind, source, template_visible_imports);
248    combined
249        .unused_import_bindings
250        .retain(|binding| !template_usage.used_bindings.contains(binding));
251    combined
252        .member_accesses
253        .extend(template_usage.member_accesses);
254    combined
255        .whole_object_uses
256        .extend(template_usage.whole_object_uses);
257}
258
259fn is_template_visible_script(kind: SfcKind, script: &SfcScript) -> bool {
260    match kind {
261        SfcKind::Vue => script.is_setup,
262        SfcKind::Svelte => !script.is_context_module,
263    }
264}
265
266// SFC tests exercise regex-based HTML string extraction — no unsafe code,
267// no Miri-specific value. Oxc parser tests are additionally ~1000x slower.
268#[cfg(all(test, not(miri)))]
269mod tests {
270    use super::*;
271
272    // ── is_sfc_file ──────────────────────────────────────────────
273
274    #[test]
275    fn is_sfc_file_vue() {
276        assert!(is_sfc_file(Path::new("App.vue")));
277    }
278
279    #[test]
280    fn is_sfc_file_svelte() {
281        assert!(is_sfc_file(Path::new("Counter.svelte")));
282    }
283
284    #[test]
285    fn is_sfc_file_rejects_ts() {
286        assert!(!is_sfc_file(Path::new("utils.ts")));
287    }
288
289    #[test]
290    fn is_sfc_file_rejects_jsx() {
291        assert!(!is_sfc_file(Path::new("App.jsx")));
292    }
293
294    #[test]
295    fn is_sfc_file_rejects_astro() {
296        assert!(!is_sfc_file(Path::new("Layout.astro")));
297    }
298
299    // ── extract_sfc_scripts: single script block ─────────────────
300
301    #[test]
302    fn single_plain_script() {
303        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
304        assert_eq!(scripts.len(), 1);
305        assert_eq!(scripts[0].body, "const x = 1;");
306        assert!(!scripts[0].is_typescript);
307        assert!(!scripts[0].is_jsx);
308        assert!(scripts[0].src.is_none());
309    }
310
311    #[test]
312    fn single_ts_script() {
313        let scripts = extract_sfc_scripts(r#"<script lang="ts">const x: number = 1;</script>"#);
314        assert_eq!(scripts.len(), 1);
315        assert!(scripts[0].is_typescript);
316        assert!(!scripts[0].is_jsx);
317    }
318
319    #[test]
320    fn single_tsx_script() {
321        let scripts = extract_sfc_scripts(r#"<script lang="tsx">const el = <div />;</script>"#);
322        assert_eq!(scripts.len(), 1);
323        assert!(scripts[0].is_typescript);
324        assert!(scripts[0].is_jsx);
325    }
326
327    #[test]
328    fn single_jsx_script() {
329        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
330        assert_eq!(scripts.len(), 1);
331        assert!(!scripts[0].is_typescript);
332        assert!(scripts[0].is_jsx);
333    }
334
335    // ── Multiple script blocks ───────────────────────────────────
336
337    #[test]
338    fn two_script_blocks() {
339        let source = r#"
340<script lang="ts">
341export default {};
342</script>
343<script setup lang="ts">
344const count = 0;
345</script>
346"#;
347        let scripts = extract_sfc_scripts(source);
348        assert_eq!(scripts.len(), 2);
349        assert!(scripts[0].body.contains("export default"));
350        assert!(scripts[1].body.contains("count"));
351    }
352
353    // ── <script setup> ───────────────────────────────────────────
354
355    #[test]
356    fn script_setup_extracted() {
357        let scripts =
358            extract_sfc_scripts(r#"<script setup lang="ts">import { ref } from 'vue';</script>"#);
359        assert_eq!(scripts.len(), 1);
360        assert!(scripts[0].body.contains("import"));
361        assert!(scripts[0].is_typescript);
362    }
363
364    // ── <script src="..."> external script ───────────────────────
365
366    #[test]
367    fn script_src_detected() {
368        let scripts = extract_sfc_scripts(r#"<script src="./component.ts" lang="ts"></script>"#);
369        assert_eq!(scripts.len(), 1);
370        assert_eq!(scripts[0].src.as_deref(), Some("./component.ts"));
371    }
372
373    #[test]
374    fn data_src_not_treated_as_src() {
375        let scripts =
376            extract_sfc_scripts(r#"<script lang="ts" data-src="./nope.ts">const x = 1;</script>"#);
377        assert_eq!(scripts.len(), 1);
378        assert!(scripts[0].src.is_none());
379    }
380
381    // ── HTML comment filtering ───────────────────────────────────
382
383    #[test]
384    fn script_inside_html_comment_filtered() {
385        let source = r#"
386<!-- <script lang="ts">import { bad } from 'bad';</script> -->
387<script lang="ts">import { good } from 'good';</script>
388"#;
389        let scripts = extract_sfc_scripts(source);
390        assert_eq!(scripts.len(), 1);
391        assert!(scripts[0].body.contains("good"));
392    }
393
394    #[test]
395    fn spanning_comment_filters_script() {
396        let source = r#"
397<!-- disabled:
398<script lang="ts">import { bad } from 'bad';</script>
399-->
400<script lang="ts">const ok = true;</script>
401"#;
402        let scripts = extract_sfc_scripts(source);
403        assert_eq!(scripts.len(), 1);
404        assert!(scripts[0].body.contains("ok"));
405    }
406
407    #[test]
408    fn string_containing_comment_markers_not_corrupted() {
409        // A string in the script body containing <!-- should not cause filtering issues
410        let source = r#"
411<script setup lang="ts">
412const marker = "<!-- not a comment -->";
413import { ref } from 'vue';
414</script>
415"#;
416        let scripts = extract_sfc_scripts(source);
417        assert_eq!(scripts.len(), 1);
418        assert!(scripts[0].body.contains("import"));
419    }
420
421    // ── Generic attributes with > in quoted values ───────────────
422
423    #[test]
424    fn generic_attr_with_angle_bracket() {
425        let source =
426            r#"<script setup lang="ts" generic="T extends Foo<Bar>">const x = 1;</script>"#;
427        let scripts = extract_sfc_scripts(source);
428        assert_eq!(scripts.len(), 1);
429        assert_eq!(scripts[0].body, "const x = 1;");
430    }
431
432    #[test]
433    fn nested_generic_attr() {
434        let source = r#"<script setup lang="ts" generic="T extends Map<string, Set<number>>">const x = 1;</script>"#;
435        let scripts = extract_sfc_scripts(source);
436        assert_eq!(scripts.len(), 1);
437        assert_eq!(scripts[0].body, "const x = 1;");
438    }
439
440    // ── lang attribute with single quotes ────────────────────────
441
442    #[test]
443    fn lang_single_quoted() {
444        let scripts = extract_sfc_scripts("<script lang='ts'>const x = 1;</script>");
445        assert_eq!(scripts.len(), 1);
446        assert!(scripts[0].is_typescript);
447    }
448
449    // ── Case-insensitive matching ────────────────────────────────
450
451    #[test]
452    fn uppercase_script_tag() {
453        let scripts = extract_sfc_scripts(r#"<SCRIPT lang="ts">const x = 1;</SCRIPT>"#);
454        assert_eq!(scripts.len(), 1);
455        assert!(scripts[0].is_typescript);
456    }
457
458    // ── Edge cases ───────────────────────────────────────────────
459
460    #[test]
461    fn no_script_block() {
462        let scripts = extract_sfc_scripts("<template><div>Hello</div></template>");
463        assert!(scripts.is_empty());
464    }
465
466    #[test]
467    fn empty_script_body() {
468        let scripts = extract_sfc_scripts(r#"<script lang="ts"></script>"#);
469        assert_eq!(scripts.len(), 1);
470        assert!(scripts[0].body.is_empty());
471    }
472
473    #[test]
474    fn whitespace_only_script() {
475        let scripts = extract_sfc_scripts("<script lang=\"ts\">\n  \n</script>");
476        assert_eq!(scripts.len(), 1);
477        assert!(scripts[0].body.trim().is_empty());
478    }
479
480    #[test]
481    fn byte_offset_is_set() {
482        let source = r#"<template><div/></template><script lang="ts">code</script>"#;
483        let scripts = extract_sfc_scripts(source);
484        assert_eq!(scripts.len(), 1);
485        // The byte_offset should point to where "code" starts in the source
486        let offset = scripts[0].byte_offset;
487        assert_eq!(&source[offset..offset + 4], "code");
488    }
489
490    #[test]
491    fn script_with_extra_attributes() {
492        let scripts = extract_sfc_scripts(
493            r#"<script lang="ts" id="app" type="module" data-custom="val">const x = 1;</script>"#,
494        );
495        assert_eq!(scripts.len(), 1);
496        assert!(scripts[0].is_typescript);
497        assert!(scripts[0].src.is_none());
498    }
499
500    // ── Full parse tests (Oxc parser ~1000x slower under Miri) ──
501
502    #[test]
503    fn multiple_script_blocks_exports_combined() {
504        let source = r#"
505<script lang="ts">
506export const version = '1.0';
507</script>
508<script setup lang="ts">
509import { ref } from 'vue';
510const count = ref(0);
511</script>
512"#;
513        let info = parse_sfc_to_module(FileId(0), Path::new("Dual.vue"), source, 0);
514        // The non-setup block exports `version`
515        assert!(
516            info.exports
517                .iter()
518                .any(|e| matches!(&e.name, crate::ExportName::Named(n) if n == "version")),
519            "export from <script> block should be extracted"
520        );
521        // The setup block imports `ref` from 'vue'
522        assert!(
523            info.imports.iter().any(|i| i.source == "vue"),
524            "import from <script setup> block should be extracted"
525        );
526    }
527
528    // ── lang="tsx" detection ────────────────────────────────────
529
530    #[test]
531    fn lang_tsx_detected_as_typescript_jsx() {
532        let scripts =
533            extract_sfc_scripts(r#"<script lang="tsx">const el = <div>{x}</div>;</script>"#);
534        assert_eq!(scripts.len(), 1);
535        assert!(scripts[0].is_typescript, "lang=tsx should be typescript");
536        assert!(scripts[0].is_jsx, "lang=tsx should be jsx");
537    }
538
539    // ── HTML comment filtering of script blocks ─────────────────
540
541    #[test]
542    fn multiline_html_comment_filters_all_script_blocks_inside() {
543        let source = r#"
544<!--
545  This whole section is disabled:
546  <script lang="ts">import { bad1 } from 'bad1';</script>
547  <script lang="ts">import { bad2 } from 'bad2';</script>
548-->
549<script lang="ts">import { good } from 'good';</script>
550"#;
551        let scripts = extract_sfc_scripts(source);
552        assert_eq!(scripts.len(), 1);
553        assert!(scripts[0].body.contains("good"));
554    }
555
556    // ── <script src="..."> generates side-effect import ─────────
557
558    #[test]
559    fn script_src_generates_side_effect_import() {
560        let info = parse_sfc_to_module(
561            FileId(0),
562            Path::new("External.vue"),
563            r#"<script src="./external-logic.ts" lang="ts"></script>"#,
564            0,
565        );
566        assert!(
567            info.imports
568                .iter()
569                .any(|i| i.source == "./external-logic.ts"
570                    && matches!(i.imported_name, ImportedName::SideEffect)),
571            "script src should generate a side-effect import"
572        );
573    }
574
575    // ── Additional coverage ─────────────────────────────────────
576
577    #[test]
578    fn parse_sfc_no_script_returns_empty_module() {
579        let info = parse_sfc_to_module(
580            FileId(0),
581            Path::new("Empty.vue"),
582            "<template><div>Hello</div></template>",
583            42,
584        );
585        assert!(info.imports.is_empty());
586        assert!(info.exports.is_empty());
587        assert_eq!(info.content_hash, 42);
588        assert_eq!(info.file_id, FileId(0));
589    }
590
591    #[test]
592    fn parse_sfc_has_line_offsets() {
593        let info = parse_sfc_to_module(
594            FileId(0),
595            Path::new("LineOffsets.vue"),
596            r#"<script lang="ts">const x = 1;</script>"#,
597            0,
598        );
599        assert!(!info.line_offsets.is_empty());
600    }
601
602    #[test]
603    fn parse_sfc_has_suppressions() {
604        let info = parse_sfc_to_module(
605            FileId(0),
606            Path::new("Suppressions.vue"),
607            r#"<script lang="ts">
608// fallow-ignore-file
609export const foo = 1;
610</script>"#,
611            0,
612        );
613        assert!(!info.suppressions.is_empty());
614    }
615
616    #[test]
617    fn source_type_jsx_detection() {
618        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
619        assert_eq!(scripts.len(), 1);
620        assert!(!scripts[0].is_typescript);
621        assert!(scripts[0].is_jsx);
622    }
623
624    #[test]
625    fn source_type_plain_js_detection() {
626        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
627        assert_eq!(scripts.len(), 1);
628        assert!(!scripts[0].is_typescript);
629        assert!(!scripts[0].is_jsx);
630    }
631
632    #[test]
633    fn is_sfc_file_rejects_no_extension() {
634        assert!(!is_sfc_file(Path::new("Makefile")));
635    }
636
637    #[test]
638    fn is_sfc_file_rejects_mdx() {
639        assert!(!is_sfc_file(Path::new("post.mdx")));
640    }
641
642    #[test]
643    fn is_sfc_file_rejects_css() {
644        assert!(!is_sfc_file(Path::new("styles.css")));
645    }
646
647    #[test]
648    fn multiple_script_blocks_both_have_offsets() {
649        let source = r#"<script lang="ts">const a = 1;</script>
650<script setup lang="ts">const b = 2;</script>"#;
651        let scripts = extract_sfc_scripts(source);
652        assert_eq!(scripts.len(), 2);
653        // Both scripts should have valid byte offsets
654        let offset0 = scripts[0].byte_offset;
655        let offset1 = scripts[1].byte_offset;
656        assert_eq!(
657            &source[offset0..offset0 + "const a = 1;".len()],
658            "const a = 1;"
659        );
660        assert_eq!(
661            &source[offset1..offset1 + "const b = 2;".len()],
662            "const b = 2;"
663        );
664    }
665
666    #[test]
667    fn script_with_src_and_lang() {
668        // src + lang should both be detected
669        let scripts = extract_sfc_scripts(r#"<script src="./logic.ts" lang="tsx"></script>"#);
670        assert_eq!(scripts.len(), 1);
671        assert_eq!(scripts[0].src.as_deref(), Some("./logic.ts"));
672        assert!(scripts[0].is_typescript);
673        assert!(scripts[0].is_jsx);
674    }
675}