Skip to main content

fallow_extract/
sfc.rs

1//! Vue/Svelte Single File Component (SFC) script extraction.
2//!
3//! Extracts `<script>` block content from `.vue` and `.svelte` files using regex,
4//! handling `lang`, `src`, and `generic` attributes, and filtering HTML comments.
5
6use std::path::Path;
7use std::sync::LazyLock;
8
9use oxc_allocator::Allocator;
10use oxc_ast_visit::Visit;
11use oxc_parser::Parser;
12use oxc_span::SourceType;
13
14use crate::visitor::ModuleInfoExtractor;
15use crate::{ImportInfo, ImportedName, ModuleInfo};
16use fallow_types::discover::FileId;
17use oxc_span::Span;
18
19/// Regex to extract `<script>` block content from Vue/Svelte SFCs.
20/// The attrs pattern handles `>` inside quoted attribute values (e.g., `generic="T extends Foo<Bar>"`).
21static SCRIPT_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
22    regex::Regex::new(
23        r#"(?is)<script\b(?P<attrs>(?:[^>"']|"[^"]*"|'[^']*')*)>(?P<body>[\s\S]*?)</script>"#,
24    )
25    .expect("valid regex")
26});
27
28/// Regex to extract the `lang` attribute value from a script tag.
29static LANG_ATTR_RE: LazyLock<regex::Regex> =
30    LazyLock::new(|| regex::Regex::new(r#"lang\s*=\s*["'](\w+)["']"#).expect("valid regex"));
31
32/// Regex to extract the `src` attribute value from a script tag.
33/// Requires whitespace (or start of string) before `src` to avoid matching `data-src` etc.
34static SRC_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
35    regex::Regex::new(r#"(?:^|\s)src\s*=\s*["']([^"']+)["']"#).expect("valid regex")
36});
37
38/// Regex to match HTML comments for filtering script blocks inside comments.
39static HTML_COMMENT_RE: LazyLock<regex::Regex> =
40    LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
41
42/// An extracted `<script>` block from a Vue or Svelte SFC.
43pub struct SfcScript {
44    /// The script body text.
45    pub body: String,
46    /// Whether the script uses TypeScript (`lang="ts"` or `lang="tsx"`).
47    pub is_typescript: bool,
48    /// Whether the script uses JSX syntax (`lang="tsx"` or `lang="jsx"`).
49    pub is_jsx: bool,
50    /// Byte offset of the script body within the full SFC source.
51    pub byte_offset: usize,
52    /// External script source path from `src` attribute.
53    pub src: Option<String>,
54}
55
56/// Extract all `<script>` blocks from a Vue/Svelte SFC source string.
57pub fn extract_sfc_scripts(source: &str) -> Vec<SfcScript> {
58    // Build HTML comment ranges to filter out <script> blocks inside comments.
59    // Using ranges instead of source replacement avoids corrupting script body content
60    // (e.g., string literals containing "<!--" would be destroyed by replacement).
61    let comment_ranges: Vec<(usize, usize)> = HTML_COMMENT_RE
62        .find_iter(source)
63        .map(|m| (m.start(), m.end()))
64        .collect();
65
66    SCRIPT_BLOCK_RE
67        .captures_iter(source)
68        .filter(|cap| {
69            let start = cap.get(0).map_or(0, |m| m.start());
70            !comment_ranges
71                .iter()
72                .any(|&(cs, ce)| start >= cs && start < ce)
73        })
74        .map(|cap| {
75            let attrs = cap.name("attrs").map_or("", |m| m.as_str());
76            let body_match = cap.name("body");
77            let byte_offset = body_match.map_or(0, |m| m.start());
78            let body = body_match.map_or("", |m| m.as_str()).to_string();
79            let lang = LANG_ATTR_RE
80                .captures(attrs)
81                .and_then(|c| c.get(1))
82                .map(|m| m.as_str());
83            let is_typescript = matches!(lang, Some("ts" | "tsx"));
84            let is_jsx = matches!(lang, Some("tsx" | "jsx"));
85            let src = SRC_ATTR_RE
86                .captures(attrs)
87                .and_then(|c| c.get(1))
88                .map(|m| m.as_str().to_string());
89            SfcScript {
90                body,
91                is_typescript,
92                is_jsx,
93                byte_offset,
94                src,
95            }
96        })
97        .collect()
98}
99
100/// Check if a file path is a Vue or Svelte SFC (`.vue` or `.svelte`).
101pub fn is_sfc_file(path: &Path) -> bool {
102    path.extension()
103        .and_then(|e| e.to_str())
104        .is_some_and(|ext| ext == "vue" || ext == "svelte")
105}
106
107/// Parse an SFC file by extracting and combining all `<script>` blocks.
108pub(crate) fn parse_sfc_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
109    let scripts = extract_sfc_scripts(source);
110
111    // For SFC files, use string scanning for suppression comments since script block
112    // byte offsets don't correspond to the original file positions.
113    let suppressions = crate::suppress::parse_suppressions_from_source(source);
114
115    let mut combined = ModuleInfo {
116        file_id,
117        exports: Vec::new(),
118        imports: Vec::new(),
119        re_exports: Vec::new(),
120        dynamic_imports: Vec::new(),
121        dynamic_import_patterns: Vec::new(),
122        require_calls: Vec::new(),
123        member_accesses: Vec::new(),
124        whole_object_uses: Vec::new(),
125        has_cjs_exports: false,
126        content_hash,
127        suppressions,
128        unused_import_bindings: Vec::new(),
129        line_offsets: fallow_types::extract::compute_line_offsets(source),
130    };
131
132    for script in &scripts {
133        if let Some(src) = &script.src {
134            combined.imports.push(ImportInfo {
135                source: src.clone(),
136                imported_name: ImportedName::SideEffect,
137                local_name: String::new(),
138                is_type_only: false,
139                span: Span::default(),
140            });
141        }
142
143        let source_type = match (script.is_typescript, script.is_jsx) {
144            (true, true) => SourceType::tsx(),
145            (true, false) => SourceType::ts(),
146            (false, true) => SourceType::jsx(),
147            (false, false) => SourceType::mjs(),
148        };
149        let allocator = Allocator::default();
150        let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
151        let mut extractor = ModuleInfoExtractor::new();
152        extractor.visit_program(&parser_return.program);
153        extractor.merge_into(&mut combined);
154    }
155
156    combined
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    // ── is_sfc_file ──────────────────────────────────────────────
164
165    #[test]
166    fn is_sfc_file_vue() {
167        assert!(is_sfc_file(Path::new("App.vue")));
168    }
169
170    #[test]
171    fn is_sfc_file_svelte() {
172        assert!(is_sfc_file(Path::new("Counter.svelte")));
173    }
174
175    #[test]
176    fn is_sfc_file_rejects_ts() {
177        assert!(!is_sfc_file(Path::new("utils.ts")));
178    }
179
180    #[test]
181    fn is_sfc_file_rejects_jsx() {
182        assert!(!is_sfc_file(Path::new("App.jsx")));
183    }
184
185    #[test]
186    fn is_sfc_file_rejects_astro() {
187        assert!(!is_sfc_file(Path::new("Layout.astro")));
188    }
189
190    // ── extract_sfc_scripts: single script block ─────────────────
191
192    #[test]
193    fn single_plain_script() {
194        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
195        assert_eq!(scripts.len(), 1);
196        assert_eq!(scripts[0].body, "const x = 1;");
197        assert!(!scripts[0].is_typescript);
198        assert!(!scripts[0].is_jsx);
199        assert!(scripts[0].src.is_none());
200    }
201
202    #[test]
203    fn single_ts_script() {
204        let scripts = extract_sfc_scripts(r#"<script lang="ts">const x: number = 1;</script>"#);
205        assert_eq!(scripts.len(), 1);
206        assert!(scripts[0].is_typescript);
207        assert!(!scripts[0].is_jsx);
208    }
209
210    #[test]
211    fn single_tsx_script() {
212        let scripts = extract_sfc_scripts(r#"<script lang="tsx">const el = <div />;</script>"#);
213        assert_eq!(scripts.len(), 1);
214        assert!(scripts[0].is_typescript);
215        assert!(scripts[0].is_jsx);
216    }
217
218    #[test]
219    fn single_jsx_script() {
220        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
221        assert_eq!(scripts.len(), 1);
222        assert!(!scripts[0].is_typescript);
223        assert!(scripts[0].is_jsx);
224    }
225
226    // ── Multiple script blocks ───────────────────────────────────
227
228    #[test]
229    fn two_script_blocks() {
230        let source = r#"
231<script lang="ts">
232export default {};
233</script>
234<script setup lang="ts">
235const count = 0;
236</script>
237"#;
238        let scripts = extract_sfc_scripts(source);
239        assert_eq!(scripts.len(), 2);
240        assert!(scripts[0].body.contains("export default"));
241        assert!(scripts[1].body.contains("count"));
242    }
243
244    // ── <script setup> ───────────────────────────────────────────
245
246    #[test]
247    fn script_setup_extracted() {
248        let scripts =
249            extract_sfc_scripts(r#"<script setup lang="ts">import { ref } from 'vue';</script>"#);
250        assert_eq!(scripts.len(), 1);
251        assert!(scripts[0].body.contains("import"));
252        assert!(scripts[0].is_typescript);
253    }
254
255    // ── <script src="..."> external script ───────────────────────
256
257    #[test]
258    fn script_src_detected() {
259        let scripts = extract_sfc_scripts(r#"<script src="./component.ts" lang="ts"></script>"#);
260        assert_eq!(scripts.len(), 1);
261        assert_eq!(scripts[0].src.as_deref(), Some("./component.ts"));
262    }
263
264    #[test]
265    fn data_src_not_treated_as_src() {
266        let scripts =
267            extract_sfc_scripts(r#"<script lang="ts" data-src="./nope.ts">const x = 1;</script>"#);
268        assert_eq!(scripts.len(), 1);
269        assert!(scripts[0].src.is_none());
270    }
271
272    // ── HTML comment filtering ───────────────────────────────────
273
274    #[test]
275    fn script_inside_html_comment_filtered() {
276        let source = r#"
277<!-- <script lang="ts">import { bad } from 'bad';</script> -->
278<script lang="ts">import { good } from 'good';</script>
279"#;
280        let scripts = extract_sfc_scripts(source);
281        assert_eq!(scripts.len(), 1);
282        assert!(scripts[0].body.contains("good"));
283    }
284
285    #[test]
286    fn spanning_comment_filters_script() {
287        let source = r#"
288<!-- disabled:
289<script lang="ts">import { bad } from 'bad';</script>
290-->
291<script lang="ts">const ok = true;</script>
292"#;
293        let scripts = extract_sfc_scripts(source);
294        assert_eq!(scripts.len(), 1);
295        assert!(scripts[0].body.contains("ok"));
296    }
297
298    #[test]
299    fn string_containing_comment_markers_not_corrupted() {
300        // A string in the script body containing <!-- should not cause filtering issues
301        let source = r#"
302<script setup lang="ts">
303const marker = "<!-- not a comment -->";
304import { ref } from 'vue';
305</script>
306"#;
307        let scripts = extract_sfc_scripts(source);
308        assert_eq!(scripts.len(), 1);
309        assert!(scripts[0].body.contains("import"));
310    }
311
312    // ── Generic attributes with > in quoted values ───────────────
313
314    #[test]
315    fn generic_attr_with_angle_bracket() {
316        let source =
317            r#"<script setup lang="ts" generic="T extends Foo<Bar>">const x = 1;</script>"#;
318        let scripts = extract_sfc_scripts(source);
319        assert_eq!(scripts.len(), 1);
320        assert_eq!(scripts[0].body, "const x = 1;");
321    }
322
323    #[test]
324    fn nested_generic_attr() {
325        let source = r#"<script setup lang="ts" generic="T extends Map<string, Set<number>>">const x = 1;</script>"#;
326        let scripts = extract_sfc_scripts(source);
327        assert_eq!(scripts.len(), 1);
328        assert_eq!(scripts[0].body, "const x = 1;");
329    }
330
331    // ── lang attribute with single quotes ────────────────────────
332
333    #[test]
334    fn lang_single_quoted() {
335        let scripts = extract_sfc_scripts("<script lang='ts'>const x = 1;</script>");
336        assert_eq!(scripts.len(), 1);
337        assert!(scripts[0].is_typescript);
338    }
339
340    // ── Case-insensitive matching ────────────────────────────────
341
342    #[test]
343    fn uppercase_script_tag() {
344        let scripts = extract_sfc_scripts(r#"<SCRIPT lang="ts">const x = 1;</SCRIPT>"#);
345        assert_eq!(scripts.len(), 1);
346        assert!(scripts[0].is_typescript);
347    }
348
349    // ── Edge cases ───────────────────────────────────────────────
350
351    #[test]
352    fn no_script_block() {
353        let scripts = extract_sfc_scripts("<template><div>Hello</div></template>");
354        assert!(scripts.is_empty());
355    }
356
357    #[test]
358    fn empty_script_body() {
359        let scripts = extract_sfc_scripts(r#"<script lang="ts"></script>"#);
360        assert_eq!(scripts.len(), 1);
361        assert!(scripts[0].body.is_empty());
362    }
363
364    #[test]
365    fn whitespace_only_script() {
366        let scripts = extract_sfc_scripts("<script lang=\"ts\">\n  \n</script>");
367        assert_eq!(scripts.len(), 1);
368        assert!(scripts[0].body.trim().is_empty());
369    }
370
371    #[test]
372    fn byte_offset_is_set() {
373        let source = r#"<template><div/></template><script lang="ts">code</script>"#;
374        let scripts = extract_sfc_scripts(source);
375        assert_eq!(scripts.len(), 1);
376        // The byte_offset should point to where "code" starts in the source
377        let offset = scripts[0].byte_offset;
378        assert_eq!(&source[offset..offset + 4], "code");
379    }
380
381    #[test]
382    fn script_with_extra_attributes() {
383        let scripts = extract_sfc_scripts(
384            r#"<script lang="ts" id="app" type="module" data-custom="val">const x = 1;</script>"#,
385        );
386        assert_eq!(scripts.len(), 1);
387        assert!(scripts[0].is_typescript);
388        assert!(scripts[0].src.is_none());
389    }
390
391    // ── Multiple script blocks: exports from both ───────────────
392
393    #[test]
394    fn multiple_script_blocks_exports_combined() {
395        let source = r#"
396<script lang="ts">
397export const version = '1.0';
398</script>
399<script setup lang="ts">
400import { ref } from 'vue';
401const count = ref(0);
402</script>
403"#;
404        let info = parse_sfc_to_module(FileId(0), source, 0);
405        // The non-setup block exports `version`
406        assert!(
407            info.exports
408                .iter()
409                .any(|e| matches!(&e.name, crate::ExportName::Named(n) if n == "version")),
410            "export from <script> block should be extracted"
411        );
412        // The setup block imports `ref` from 'vue'
413        assert!(
414            info.imports.iter().any(|i| i.source == "vue"),
415            "import from <script setup> block should be extracted"
416        );
417    }
418
419    // ── lang="tsx" detection ────────────────────────────────────
420
421    #[test]
422    fn lang_tsx_detected_as_typescript_jsx() {
423        let scripts =
424            extract_sfc_scripts(r#"<script lang="tsx">const el = <div>{x}</div>;</script>"#);
425        assert_eq!(scripts.len(), 1);
426        assert!(scripts[0].is_typescript, "lang=tsx should be typescript");
427        assert!(scripts[0].is_jsx, "lang=tsx should be jsx");
428    }
429
430    // ── HTML comment filtering of script blocks ─────────────────
431
432    #[test]
433    fn multiline_html_comment_filters_all_script_blocks_inside() {
434        let source = r#"
435<!--
436  This whole section is disabled:
437  <script lang="ts">import { bad1 } from 'bad1';</script>
438  <script lang="ts">import { bad2 } from 'bad2';</script>
439-->
440<script lang="ts">import { good } from 'good';</script>
441"#;
442        let scripts = extract_sfc_scripts(source);
443        assert_eq!(scripts.len(), 1);
444        assert!(scripts[0].body.contains("good"));
445    }
446
447    // ── <script src="..."> generates side-effect import ─────────
448
449    #[test]
450    fn script_src_generates_side_effect_import() {
451        let info = parse_sfc_to_module(
452            FileId(0),
453            r#"<script src="./external-logic.ts" lang="ts"></script>"#,
454            0,
455        );
456        assert!(
457            info.imports
458                .iter()
459                .any(|i| i.source == "./external-logic.ts"
460                    && matches!(i.imported_name, ImportedName::SideEffect)),
461            "script src should generate a side-effect import"
462        );
463    }
464}