Skip to main content

fallow_extract/
sfc.rs

1//! Vue/Svelte Single File Component (SFC) script extraction.
2//!
3//! Extracts `<script>` block content from `.vue` and `.svelte` files using regex,
4//! handling `lang`, `src`, and `generic` attributes, and filtering HTML comments.
5
6use std::path::Path;
7use std::sync::LazyLock;
8
9use oxc_allocator::Allocator;
10use oxc_ast_visit::Visit;
11use oxc_parser::Parser;
12use oxc_span::SourceType;
13
14use crate::visitor::ModuleInfoExtractor;
15use crate::{ImportInfo, ImportedName, ModuleInfo};
16use fallow_types::discover::FileId;
17use oxc_span::Span;
18
19/// Regex to extract `<script>` block content from Vue/Svelte SFCs.
20/// The attrs pattern handles `>` inside quoted attribute values (e.g., `generic="T extends Foo<Bar>"`).
21static SCRIPT_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
22    regex::Regex::new(
23        r#"(?is)<script\b(?P<attrs>(?:[^>"']|"[^"]*"|'[^']*')*)>(?P<body>[\s\S]*?)</script>"#,
24    )
25    .expect("valid regex")
26});
27
28/// Regex to extract the `lang` attribute value from a script tag.
29static LANG_ATTR_RE: LazyLock<regex::Regex> =
30    LazyLock::new(|| regex::Regex::new(r#"lang\s*=\s*["'](\w+)["']"#).expect("valid regex"));
31
32/// Regex to extract the `src` attribute value from a script tag.
33/// Requires whitespace (or start of string) before `src` to avoid matching `data-src` etc.
34static SRC_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
35    regex::Regex::new(r#"(?:^|\s)src\s*=\s*["']([^"']+)["']"#).expect("valid regex")
36});
37
38/// Regex to match HTML comments for filtering script blocks inside comments.
39static HTML_COMMENT_RE: LazyLock<regex::Regex> =
40    LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
41
42/// An extracted `<script>` block from a Vue or Svelte SFC.
43pub struct SfcScript {
44    /// The script body text.
45    pub body: String,
46    /// Whether the script uses TypeScript (`lang="ts"` or `lang="tsx"`).
47    pub is_typescript: bool,
48    /// Whether the script uses JSX syntax (`lang="tsx"` or `lang="jsx"`).
49    pub is_jsx: bool,
50    /// Byte offset of the script body within the full SFC source.
51    pub byte_offset: usize,
52    /// External script source path from `src` attribute.
53    pub src: Option<String>,
54}
55
56/// Extract all `<script>` blocks from a Vue/Svelte SFC source string.
57pub fn extract_sfc_scripts(source: &str) -> Vec<SfcScript> {
58    // Build HTML comment ranges to filter out <script> blocks inside comments.
59    // Using ranges instead of source replacement avoids corrupting script body content
60    // (e.g., string literals containing "<!--" would be destroyed by replacement).
61    let comment_ranges: Vec<(usize, usize)> = HTML_COMMENT_RE
62        .find_iter(source)
63        .map(|m| (m.start(), m.end()))
64        .collect();
65
66    SCRIPT_BLOCK_RE
67        .captures_iter(source)
68        .filter(|cap| {
69            let start = cap.get(0).map_or(0, |m| m.start());
70            !comment_ranges
71                .iter()
72                .any(|&(cs, ce)| start >= cs && start < ce)
73        })
74        .map(|cap| {
75            let attrs = cap.name("attrs").map_or("", |m| m.as_str());
76            let body_match = cap.name("body");
77            let byte_offset = body_match.map_or(0, |m| m.start());
78            let body = body_match.map_or("", |m| m.as_str()).to_string();
79            let lang = LANG_ATTR_RE
80                .captures(attrs)
81                .and_then(|c| c.get(1))
82                .map(|m| m.as_str());
83            let is_typescript = matches!(lang, Some("ts" | "tsx"));
84            let is_jsx = matches!(lang, Some("tsx" | "jsx"));
85            let src = SRC_ATTR_RE
86                .captures(attrs)
87                .and_then(|c| c.get(1))
88                .map(|m| m.as_str().to_string());
89            SfcScript {
90                body,
91                is_typescript,
92                is_jsx,
93                byte_offset,
94                src,
95            }
96        })
97        .collect()
98}
99
100/// Check if a file path is a Vue or Svelte SFC (`.vue` or `.svelte`).
101pub fn is_sfc_file(path: &Path) -> bool {
102    path.extension()
103        .and_then(|e| e.to_str())
104        .is_some_and(|ext| ext == "vue" || ext == "svelte")
105}
106
107/// Parse an SFC file by extracting and combining all `<script>` blocks.
108pub(crate) fn parse_sfc_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
109    let scripts = extract_sfc_scripts(source);
110
111    // For SFC files, use string scanning for suppression comments since script block
112    // byte offsets don't correspond to the original file positions.
113    let suppressions = crate::suppress::parse_suppressions_from_source(source);
114
115    let mut combined = ModuleInfo {
116        file_id,
117        exports: Vec::new(),
118        imports: Vec::new(),
119        re_exports: Vec::new(),
120        dynamic_imports: Vec::new(),
121        dynamic_import_patterns: Vec::new(),
122        require_calls: Vec::new(),
123        member_accesses: Vec::new(),
124        whole_object_uses: Vec::new(),
125        has_cjs_exports: false,
126        content_hash,
127        suppressions,
128        unused_import_bindings: Vec::new(),
129        line_offsets: fallow_types::extract::compute_line_offsets(source),
130        complexity: Vec::new(),
131    };
132
133    for script in &scripts {
134        if let Some(src) = &script.src {
135            combined.imports.push(ImportInfo {
136                source: src.clone(),
137                imported_name: ImportedName::SideEffect,
138                local_name: String::new(),
139                is_type_only: false,
140                span: Span::default(),
141            });
142        }
143
144        let source_type = match (script.is_typescript, script.is_jsx) {
145            (true, true) => SourceType::tsx(),
146            (true, false) => SourceType::ts(),
147            (false, true) => SourceType::jsx(),
148            (false, false) => SourceType::mjs(),
149        };
150        let allocator = Allocator::default();
151        let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
152        let mut extractor = ModuleInfoExtractor::new();
153        extractor.visit_program(&parser_return.program);
154        extractor.merge_into(&mut combined);
155    }
156
157    combined
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163
164    // ── is_sfc_file ──────────────────────────────────────────────
165
166    #[test]
167    fn is_sfc_file_vue() {
168        assert!(is_sfc_file(Path::new("App.vue")));
169    }
170
171    #[test]
172    fn is_sfc_file_svelte() {
173        assert!(is_sfc_file(Path::new("Counter.svelte")));
174    }
175
176    #[test]
177    fn is_sfc_file_rejects_ts() {
178        assert!(!is_sfc_file(Path::new("utils.ts")));
179    }
180
181    #[test]
182    fn is_sfc_file_rejects_jsx() {
183        assert!(!is_sfc_file(Path::new("App.jsx")));
184    }
185
186    #[test]
187    fn is_sfc_file_rejects_astro() {
188        assert!(!is_sfc_file(Path::new("Layout.astro")));
189    }
190
191    // ── extract_sfc_scripts: single script block ─────────────────
192
193    #[test]
194    fn single_plain_script() {
195        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
196        assert_eq!(scripts.len(), 1);
197        assert_eq!(scripts[0].body, "const x = 1;");
198        assert!(!scripts[0].is_typescript);
199        assert!(!scripts[0].is_jsx);
200        assert!(scripts[0].src.is_none());
201    }
202
203    #[test]
204    fn single_ts_script() {
205        let scripts = extract_sfc_scripts(r#"<script lang="ts">const x: number = 1;</script>"#);
206        assert_eq!(scripts.len(), 1);
207        assert!(scripts[0].is_typescript);
208        assert!(!scripts[0].is_jsx);
209    }
210
211    #[test]
212    fn single_tsx_script() {
213        let scripts = extract_sfc_scripts(r#"<script lang="tsx">const el = <div />;</script>"#);
214        assert_eq!(scripts.len(), 1);
215        assert!(scripts[0].is_typescript);
216        assert!(scripts[0].is_jsx);
217    }
218
219    #[test]
220    fn single_jsx_script() {
221        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
222        assert_eq!(scripts.len(), 1);
223        assert!(!scripts[0].is_typescript);
224        assert!(scripts[0].is_jsx);
225    }
226
227    // ── Multiple script blocks ───────────────────────────────────
228
229    #[test]
230    fn two_script_blocks() {
231        let source = r#"
232<script lang="ts">
233export default {};
234</script>
235<script setup lang="ts">
236const count = 0;
237</script>
238"#;
239        let scripts = extract_sfc_scripts(source);
240        assert_eq!(scripts.len(), 2);
241        assert!(scripts[0].body.contains("export default"));
242        assert!(scripts[1].body.contains("count"));
243    }
244
245    // ── <script setup> ───────────────────────────────────────────
246
247    #[test]
248    fn script_setup_extracted() {
249        let scripts =
250            extract_sfc_scripts(r#"<script setup lang="ts">import { ref } from 'vue';</script>"#);
251        assert_eq!(scripts.len(), 1);
252        assert!(scripts[0].body.contains("import"));
253        assert!(scripts[0].is_typescript);
254    }
255
256    // ── <script src="..."> external script ───────────────────────
257
258    #[test]
259    fn script_src_detected() {
260        let scripts = extract_sfc_scripts(r#"<script src="./component.ts" lang="ts"></script>"#);
261        assert_eq!(scripts.len(), 1);
262        assert_eq!(scripts[0].src.as_deref(), Some("./component.ts"));
263    }
264
265    #[test]
266    fn data_src_not_treated_as_src() {
267        let scripts =
268            extract_sfc_scripts(r#"<script lang="ts" data-src="./nope.ts">const x = 1;</script>"#);
269        assert_eq!(scripts.len(), 1);
270        assert!(scripts[0].src.is_none());
271    }
272
273    // ── HTML comment filtering ───────────────────────────────────
274
275    #[test]
276    fn script_inside_html_comment_filtered() {
277        let source = r#"
278<!-- <script lang="ts">import { bad } from 'bad';</script> -->
279<script lang="ts">import { good } from 'good';</script>
280"#;
281        let scripts = extract_sfc_scripts(source);
282        assert_eq!(scripts.len(), 1);
283        assert!(scripts[0].body.contains("good"));
284    }
285
286    #[test]
287    fn spanning_comment_filters_script() {
288        let source = r#"
289<!-- disabled:
290<script lang="ts">import { bad } from 'bad';</script>
291-->
292<script lang="ts">const ok = true;</script>
293"#;
294        let scripts = extract_sfc_scripts(source);
295        assert_eq!(scripts.len(), 1);
296        assert!(scripts[0].body.contains("ok"));
297    }
298
299    #[test]
300    fn string_containing_comment_markers_not_corrupted() {
301        // A string in the script body containing <!-- should not cause filtering issues
302        let source = r#"
303<script setup lang="ts">
304const marker = "<!-- not a comment -->";
305import { ref } from 'vue';
306</script>
307"#;
308        let scripts = extract_sfc_scripts(source);
309        assert_eq!(scripts.len(), 1);
310        assert!(scripts[0].body.contains("import"));
311    }
312
313    // ── Generic attributes with > in quoted values ───────────────
314
315    #[test]
316    fn generic_attr_with_angle_bracket() {
317        let source =
318            r#"<script setup lang="ts" generic="T extends Foo<Bar>">const x = 1;</script>"#;
319        let scripts = extract_sfc_scripts(source);
320        assert_eq!(scripts.len(), 1);
321        assert_eq!(scripts[0].body, "const x = 1;");
322    }
323
324    #[test]
325    fn nested_generic_attr() {
326        let source = r#"<script setup lang="ts" generic="T extends Map<string, Set<number>>">const x = 1;</script>"#;
327        let scripts = extract_sfc_scripts(source);
328        assert_eq!(scripts.len(), 1);
329        assert_eq!(scripts[0].body, "const x = 1;");
330    }
331
332    // ── lang attribute with single quotes ────────────────────────
333
334    #[test]
335    fn lang_single_quoted() {
336        let scripts = extract_sfc_scripts("<script lang='ts'>const x = 1;</script>");
337        assert_eq!(scripts.len(), 1);
338        assert!(scripts[0].is_typescript);
339    }
340
341    // ── Case-insensitive matching ────────────────────────────────
342
343    #[test]
344    fn uppercase_script_tag() {
345        let scripts = extract_sfc_scripts(r#"<SCRIPT lang="ts">const x = 1;</SCRIPT>"#);
346        assert_eq!(scripts.len(), 1);
347        assert!(scripts[0].is_typescript);
348    }
349
350    // ── Edge cases ───────────────────────────────────────────────
351
352    #[test]
353    fn no_script_block() {
354        let scripts = extract_sfc_scripts("<template><div>Hello</div></template>");
355        assert!(scripts.is_empty());
356    }
357
358    #[test]
359    fn empty_script_body() {
360        let scripts = extract_sfc_scripts(r#"<script lang="ts"></script>"#);
361        assert_eq!(scripts.len(), 1);
362        assert!(scripts[0].body.is_empty());
363    }
364
365    #[test]
366    fn whitespace_only_script() {
367        let scripts = extract_sfc_scripts("<script lang=\"ts\">\n  \n</script>");
368        assert_eq!(scripts.len(), 1);
369        assert!(scripts[0].body.trim().is_empty());
370    }
371
372    #[test]
373    fn byte_offset_is_set() {
374        let source = r#"<template><div/></template><script lang="ts">code</script>"#;
375        let scripts = extract_sfc_scripts(source);
376        assert_eq!(scripts.len(), 1);
377        // The byte_offset should point to where "code" starts in the source
378        let offset = scripts[0].byte_offset;
379        assert_eq!(&source[offset..offset + 4], "code");
380    }
381
382    #[test]
383    fn script_with_extra_attributes() {
384        let scripts = extract_sfc_scripts(
385            r#"<script lang="ts" id="app" type="module" data-custom="val">const x = 1;</script>"#,
386        );
387        assert_eq!(scripts.len(), 1);
388        assert!(scripts[0].is_typescript);
389        assert!(scripts[0].src.is_none());
390    }
391
392    // ── Multiple script blocks: exports from both ───────────────
393
394    #[test]
395    fn multiple_script_blocks_exports_combined() {
396        let source = r#"
397<script lang="ts">
398export const version = '1.0';
399</script>
400<script setup lang="ts">
401import { ref } from 'vue';
402const count = ref(0);
403</script>
404"#;
405        let info = parse_sfc_to_module(FileId(0), source, 0);
406        // The non-setup block exports `version`
407        assert!(
408            info.exports
409                .iter()
410                .any(|e| matches!(&e.name, crate::ExportName::Named(n) if n == "version")),
411            "export from <script> block should be extracted"
412        );
413        // The setup block imports `ref` from 'vue'
414        assert!(
415            info.imports.iter().any(|i| i.source == "vue"),
416            "import from <script setup> block should be extracted"
417        );
418    }
419
420    // ── lang="tsx" detection ────────────────────────────────────
421
422    #[test]
423    fn lang_tsx_detected_as_typescript_jsx() {
424        let scripts =
425            extract_sfc_scripts(r#"<script lang="tsx">const el = <div>{x}</div>;</script>"#);
426        assert_eq!(scripts.len(), 1);
427        assert!(scripts[0].is_typescript, "lang=tsx should be typescript");
428        assert!(scripts[0].is_jsx, "lang=tsx should be jsx");
429    }
430
431    // ── HTML comment filtering of script blocks ─────────────────
432
433    #[test]
434    fn multiline_html_comment_filters_all_script_blocks_inside() {
435        let source = r#"
436<!--
437  This whole section is disabled:
438  <script lang="ts">import { bad1 } from 'bad1';</script>
439  <script lang="ts">import { bad2 } from 'bad2';</script>
440-->
441<script lang="ts">import { good } from 'good';</script>
442"#;
443        let scripts = extract_sfc_scripts(source);
444        assert_eq!(scripts.len(), 1);
445        assert!(scripts[0].body.contains("good"));
446    }
447
448    // ── <script src="..."> generates side-effect import ─────────
449
450    #[test]
451    fn script_src_generates_side_effect_import() {
452        let info = parse_sfc_to_module(
453            FileId(0),
454            r#"<script src="./external-logic.ts" lang="ts"></script>"#,
455            0,
456        );
457        assert!(
458            info.imports
459                .iter()
460                .any(|i| i.source == "./external-logic.ts"
461                    && matches!(i.imported_name, ImportedName::SideEffect)),
462            "script src should generate a side-effect import"
463        );
464    }
465}