Skip to main content

fallow_extract/
sfc.rs

1//! Vue/Svelte Single File Component (SFC) script extraction.
2//!
3//! Extracts `<script>` block content from `.vue` and `.svelte` files using regex,
4//! handling `lang`, `src`, and `generic` attributes, and filtering HTML comments.
5
6use std::path::Path;
7use std::sync::LazyLock;
8
9use oxc_allocator::Allocator;
10use oxc_ast_visit::Visit;
11use oxc_parser::Parser;
12use oxc_span::SourceType;
13
14use crate::visitor::ModuleInfoExtractor;
15use crate::{ImportInfo, ImportedName, ModuleInfo};
16use fallow_types::discover::FileId;
17use oxc_span::Span;
18
19/// Regex to extract `<script>` block content from Vue/Svelte SFCs.
20/// The attrs pattern handles `>` inside quoted attribute values (e.g., `generic="T extends Foo<Bar>"`).
21static SCRIPT_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
22    regex::Regex::new(
23        r#"(?is)<script\b(?P<attrs>(?:[^>"']|"[^"]*"|'[^']*')*)>(?P<body>[\s\S]*?)</script>"#,
24    )
25    .expect("valid regex")
26});
27
28/// Regex to extract the `lang` attribute value from a script tag.
29static LANG_ATTR_RE: LazyLock<regex::Regex> =
30    LazyLock::new(|| regex::Regex::new(r#"lang\s*=\s*["'](\w+)["']"#).expect("valid regex"));
31
32/// Regex to extract the `src` attribute value from a script tag.
33/// Requires whitespace (or start of string) before `src` to avoid matching `data-src` etc.
34static SRC_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
35    regex::Regex::new(r#"(?:^|\s)src\s*=\s*["']([^"']+)["']"#).expect("valid regex")
36});
37
38/// Regex to match HTML comments for filtering script blocks inside comments.
39static HTML_COMMENT_RE: LazyLock<regex::Regex> =
40    LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
41
42/// An extracted `<script>` block from a Vue or Svelte SFC.
43pub struct SfcScript {
44    /// The script body text.
45    pub body: String,
46    /// Whether the script uses TypeScript (`lang="ts"` or `lang="tsx"`).
47    pub is_typescript: bool,
48    /// Whether the script uses JSX syntax (`lang="tsx"` or `lang="jsx"`).
49    pub is_jsx: bool,
50    /// Byte offset of the script body within the full SFC source.
51    pub byte_offset: usize,
52    /// External script source path from `src` attribute.
53    pub src: Option<String>,
54}
55
56/// Extract all `<script>` blocks from a Vue/Svelte SFC source string.
57pub fn extract_sfc_scripts(source: &str) -> Vec<SfcScript> {
58    // Build HTML comment ranges to filter out <script> blocks inside comments.
59    // Using ranges instead of source replacement avoids corrupting script body content
60    // (e.g., string literals containing "<!--" would be destroyed by replacement).
61    let comment_ranges: Vec<(usize, usize)> = HTML_COMMENT_RE
62        .find_iter(source)
63        .map(|m| (m.start(), m.end()))
64        .collect();
65
66    SCRIPT_BLOCK_RE
67        .captures_iter(source)
68        .filter(|cap| {
69            let start = cap.get(0).map_or(0, |m| m.start());
70            !comment_ranges
71                .iter()
72                .any(|&(cs, ce)| start >= cs && start < ce)
73        })
74        .map(|cap| {
75            let attrs = cap.name("attrs").map_or("", |m| m.as_str());
76            let body_match = cap.name("body");
77            let byte_offset = body_match.map_or(0, |m| m.start());
78            let body = body_match.map_or("", |m| m.as_str()).to_string();
79            let lang = LANG_ATTR_RE
80                .captures(attrs)
81                .and_then(|c| c.get(1))
82                .map(|m| m.as_str());
83            let is_typescript = matches!(lang, Some("ts" | "tsx"));
84            let is_jsx = matches!(lang, Some("tsx" | "jsx"));
85            let src = SRC_ATTR_RE
86                .captures(attrs)
87                .and_then(|c| c.get(1))
88                .map(|m| m.as_str().to_string());
89            SfcScript {
90                body,
91                is_typescript,
92                is_jsx,
93                byte_offset,
94                src,
95            }
96        })
97        .collect()
98}
99
100/// Check if a file path is a Vue or Svelte SFC (`.vue` or `.svelte`).
101pub fn is_sfc_file(path: &Path) -> bool {
102    path.extension()
103        .and_then(|e| e.to_str())
104        .is_some_and(|ext| ext == "vue" || ext == "svelte")
105}
106
107/// Parse an SFC file by extracting and combining all `<script>` blocks.
108pub(crate) fn parse_sfc_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
109    let scripts = extract_sfc_scripts(source);
110
111    // For SFC files, use string scanning for suppression comments since script block
112    // byte offsets don't correspond to the original file positions.
113    let suppressions = crate::suppress::parse_suppressions_from_source(source);
114
115    let mut combined = ModuleInfo {
116        file_id,
117        exports: Vec::new(),
118        imports: Vec::new(),
119        re_exports: Vec::new(),
120        dynamic_imports: Vec::new(),
121        dynamic_import_patterns: Vec::new(),
122        require_calls: Vec::new(),
123        member_accesses: Vec::new(),
124        whole_object_uses: Vec::new(),
125        has_cjs_exports: false,
126        content_hash,
127        suppressions,
128        unused_import_bindings: Vec::new(),
129        line_offsets: fallow_types::extract::compute_line_offsets(source),
130        complexity: Vec::new(),
131    };
132
133    for script in &scripts {
134        if let Some(src) = &script.src {
135            combined.imports.push(ImportInfo {
136                source: src.clone(),
137                imported_name: ImportedName::SideEffect,
138                local_name: String::new(),
139                is_type_only: false,
140                span: Span::default(),
141                source_span: Span::default(),
142            });
143        }
144
145        let source_type = match (script.is_typescript, script.is_jsx) {
146            (true, true) => SourceType::tsx(),
147            (true, false) => SourceType::ts(),
148            (false, true) => SourceType::jsx(),
149            (false, false) => SourceType::mjs(),
150        };
151        let allocator = Allocator::default();
152        let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
153        let mut extractor = ModuleInfoExtractor::new();
154        extractor.visit_program(&parser_return.program);
155        extractor.merge_into(&mut combined);
156    }
157
158    combined
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    // ── is_sfc_file ──────────────────────────────────────────────
166
167    #[test]
168    fn is_sfc_file_vue() {
169        assert!(is_sfc_file(Path::new("App.vue")));
170    }
171
172    #[test]
173    fn is_sfc_file_svelte() {
174        assert!(is_sfc_file(Path::new("Counter.svelte")));
175    }
176
177    #[test]
178    fn is_sfc_file_rejects_ts() {
179        assert!(!is_sfc_file(Path::new("utils.ts")));
180    }
181
182    #[test]
183    fn is_sfc_file_rejects_jsx() {
184        assert!(!is_sfc_file(Path::new("App.jsx")));
185    }
186
187    #[test]
188    fn is_sfc_file_rejects_astro() {
189        assert!(!is_sfc_file(Path::new("Layout.astro")));
190    }
191
192    // ── extract_sfc_scripts: single script block ─────────────────
193
194    #[test]
195    fn single_plain_script() {
196        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
197        assert_eq!(scripts.len(), 1);
198        assert_eq!(scripts[0].body, "const x = 1;");
199        assert!(!scripts[0].is_typescript);
200        assert!(!scripts[0].is_jsx);
201        assert!(scripts[0].src.is_none());
202    }
203
204    #[test]
205    fn single_ts_script() {
206        let scripts = extract_sfc_scripts(r#"<script lang="ts">const x: number = 1;</script>"#);
207        assert_eq!(scripts.len(), 1);
208        assert!(scripts[0].is_typescript);
209        assert!(!scripts[0].is_jsx);
210    }
211
212    #[test]
213    fn single_tsx_script() {
214        let scripts = extract_sfc_scripts(r#"<script lang="tsx">const el = <div />;</script>"#);
215        assert_eq!(scripts.len(), 1);
216        assert!(scripts[0].is_typescript);
217        assert!(scripts[0].is_jsx);
218    }
219
220    #[test]
221    fn single_jsx_script() {
222        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
223        assert_eq!(scripts.len(), 1);
224        assert!(!scripts[0].is_typescript);
225        assert!(scripts[0].is_jsx);
226    }
227
228    // ── Multiple script blocks ───────────────────────────────────
229
230    #[test]
231    fn two_script_blocks() {
232        let source = r#"
233<script lang="ts">
234export default {};
235</script>
236<script setup lang="ts">
237const count = 0;
238</script>
239"#;
240        let scripts = extract_sfc_scripts(source);
241        assert_eq!(scripts.len(), 2);
242        assert!(scripts[0].body.contains("export default"));
243        assert!(scripts[1].body.contains("count"));
244    }
245
246    // ── <script setup> ───────────────────────────────────────────
247
248    #[test]
249    fn script_setup_extracted() {
250        let scripts =
251            extract_sfc_scripts(r#"<script setup lang="ts">import { ref } from 'vue';</script>"#);
252        assert_eq!(scripts.len(), 1);
253        assert!(scripts[0].body.contains("import"));
254        assert!(scripts[0].is_typescript);
255    }
256
257    // ── <script src="..."> external script ───────────────────────
258
259    #[test]
260    fn script_src_detected() {
261        let scripts = extract_sfc_scripts(r#"<script src="./component.ts" lang="ts"></script>"#);
262        assert_eq!(scripts.len(), 1);
263        assert_eq!(scripts[0].src.as_deref(), Some("./component.ts"));
264    }
265
266    #[test]
267    fn data_src_not_treated_as_src() {
268        let scripts =
269            extract_sfc_scripts(r#"<script lang="ts" data-src="./nope.ts">const x = 1;</script>"#);
270        assert_eq!(scripts.len(), 1);
271        assert!(scripts[0].src.is_none());
272    }
273
274    // ── HTML comment filtering ───────────────────────────────────
275
276    #[test]
277    fn script_inside_html_comment_filtered() {
278        let source = r#"
279<!-- <script lang="ts">import { bad } from 'bad';</script> -->
280<script lang="ts">import { good } from 'good';</script>
281"#;
282        let scripts = extract_sfc_scripts(source);
283        assert_eq!(scripts.len(), 1);
284        assert!(scripts[0].body.contains("good"));
285    }
286
287    #[test]
288    fn spanning_comment_filters_script() {
289        let source = r#"
290<!-- disabled:
291<script lang="ts">import { bad } from 'bad';</script>
292-->
293<script lang="ts">const ok = true;</script>
294"#;
295        let scripts = extract_sfc_scripts(source);
296        assert_eq!(scripts.len(), 1);
297        assert!(scripts[0].body.contains("ok"));
298    }
299
300    #[test]
301    fn string_containing_comment_markers_not_corrupted() {
302        // A string in the script body containing <!-- should not cause filtering issues
303        let source = r#"
304<script setup lang="ts">
305const marker = "<!-- not a comment -->";
306import { ref } from 'vue';
307</script>
308"#;
309        let scripts = extract_sfc_scripts(source);
310        assert_eq!(scripts.len(), 1);
311        assert!(scripts[0].body.contains("import"));
312    }
313
314    // ── Generic attributes with > in quoted values ───────────────
315
316    #[test]
317    fn generic_attr_with_angle_bracket() {
318        let source =
319            r#"<script setup lang="ts" generic="T extends Foo<Bar>">const x = 1;</script>"#;
320        let scripts = extract_sfc_scripts(source);
321        assert_eq!(scripts.len(), 1);
322        assert_eq!(scripts[0].body, "const x = 1;");
323    }
324
325    #[test]
326    fn nested_generic_attr() {
327        let source = r#"<script setup lang="ts" generic="T extends Map<string, Set<number>>">const x = 1;</script>"#;
328        let scripts = extract_sfc_scripts(source);
329        assert_eq!(scripts.len(), 1);
330        assert_eq!(scripts[0].body, "const x = 1;");
331    }
332
333    // ── lang attribute with single quotes ────────────────────────
334
335    #[test]
336    fn lang_single_quoted() {
337        let scripts = extract_sfc_scripts("<script lang='ts'>const x = 1;</script>");
338        assert_eq!(scripts.len(), 1);
339        assert!(scripts[0].is_typescript);
340    }
341
342    // ── Case-insensitive matching ────────────────────────────────
343
344    #[test]
345    fn uppercase_script_tag() {
346        let scripts = extract_sfc_scripts(r#"<SCRIPT lang="ts">const x = 1;</SCRIPT>"#);
347        assert_eq!(scripts.len(), 1);
348        assert!(scripts[0].is_typescript);
349    }
350
351    // ── Edge cases ───────────────────────────────────────────────
352
353    #[test]
354    fn no_script_block() {
355        let scripts = extract_sfc_scripts("<template><div>Hello</div></template>");
356        assert!(scripts.is_empty());
357    }
358
359    #[test]
360    fn empty_script_body() {
361        let scripts = extract_sfc_scripts(r#"<script lang="ts"></script>"#);
362        assert_eq!(scripts.len(), 1);
363        assert!(scripts[0].body.is_empty());
364    }
365
366    #[test]
367    fn whitespace_only_script() {
368        let scripts = extract_sfc_scripts("<script lang=\"ts\">\n  \n</script>");
369        assert_eq!(scripts.len(), 1);
370        assert!(scripts[0].body.trim().is_empty());
371    }
372
373    #[test]
374    fn byte_offset_is_set() {
375        let source = r#"<template><div/></template><script lang="ts">code</script>"#;
376        let scripts = extract_sfc_scripts(source);
377        assert_eq!(scripts.len(), 1);
378        // The byte_offset should point to where "code" starts in the source
379        let offset = scripts[0].byte_offset;
380        assert_eq!(&source[offset..offset + 4], "code");
381    }
382
383    #[test]
384    fn script_with_extra_attributes() {
385        let scripts = extract_sfc_scripts(
386            r#"<script lang="ts" id="app" type="module" data-custom="val">const x = 1;</script>"#,
387        );
388        assert_eq!(scripts.len(), 1);
389        assert!(scripts[0].is_typescript);
390        assert!(scripts[0].src.is_none());
391    }
392
393    // ── Multiple script blocks: exports from both ───────────────
394
395    #[test]
396    fn multiple_script_blocks_exports_combined() {
397        let source = r#"
398<script lang="ts">
399export const version = '1.0';
400</script>
401<script setup lang="ts">
402import { ref } from 'vue';
403const count = ref(0);
404</script>
405"#;
406        let info = parse_sfc_to_module(FileId(0), source, 0);
407        // The non-setup block exports `version`
408        assert!(
409            info.exports
410                .iter()
411                .any(|e| matches!(&e.name, crate::ExportName::Named(n) if n == "version")),
412            "export from <script> block should be extracted"
413        );
414        // The setup block imports `ref` from 'vue'
415        assert!(
416            info.imports.iter().any(|i| i.source == "vue"),
417            "import from <script setup> block should be extracted"
418        );
419    }
420
421    // ── lang="tsx" detection ────────────────────────────────────
422
423    #[test]
424    fn lang_tsx_detected_as_typescript_jsx() {
425        let scripts =
426            extract_sfc_scripts(r#"<script lang="tsx">const el = <div>{x}</div>;</script>"#);
427        assert_eq!(scripts.len(), 1);
428        assert!(scripts[0].is_typescript, "lang=tsx should be typescript");
429        assert!(scripts[0].is_jsx, "lang=tsx should be jsx");
430    }
431
432    // ── HTML comment filtering of script blocks ─────────────────
433
434    #[test]
435    fn multiline_html_comment_filters_all_script_blocks_inside() {
436        let source = r#"
437<!--
438  This whole section is disabled:
439  <script lang="ts">import { bad1 } from 'bad1';</script>
440  <script lang="ts">import { bad2 } from 'bad2';</script>
441-->
442<script lang="ts">import { good } from 'good';</script>
443"#;
444        let scripts = extract_sfc_scripts(source);
445        assert_eq!(scripts.len(), 1);
446        assert!(scripts[0].body.contains("good"));
447    }
448
449    // ── <script src="..."> generates side-effect import ─────────
450
451    #[test]
452    fn script_src_generates_side_effect_import() {
453        let info = parse_sfc_to_module(
454            FileId(0),
455            r#"<script src="./external-logic.ts" lang="ts"></script>"#,
456            0,
457        );
458        assert!(
459            info.imports
460                .iter()
461                .any(|i| i.source == "./external-logic.ts"
462                    && matches!(i.imported_name, ImportedName::SideEffect)),
463            "script src should generate a side-effect import"
464        );
465    }
466}