Skip to main content

fallow_extract/
sfc.rs

1//! Vue/Svelte Single File Component (SFC) script extraction.
2//!
3//! Extracts `<script>` block content from `.vue` and `.svelte` files using regex,
4//! handling `lang`, `src`, and `generic` attributes, and filtering HTML comments.
5
6use std::path::Path;
7use std::sync::LazyLock;
8
9use oxc_allocator::Allocator;
10use oxc_ast_visit::Visit;
11use oxc_parser::Parser;
12use oxc_span::SourceType;
13
14use crate::visitor::ModuleInfoExtractor;
15use crate::{ImportInfo, ImportedName, ModuleInfo};
16use fallow_types::discover::FileId;
17use oxc_span::Span;
18
19/// Regex to extract `<script>` block content from Vue/Svelte SFCs.
20/// The attrs pattern handles `>` inside quoted attribute values (e.g., `generic="T extends Foo<Bar>"`).
21static SCRIPT_BLOCK_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
22    regex::Regex::new(
23        r#"(?is)<script\b(?P<attrs>(?:[^>"']|"[^"]*"|'[^']*')*)>(?P<body>[\s\S]*?)</script>"#,
24    )
25    .expect("valid regex")
26});
27
28/// Regex to extract the `lang` attribute value from a script tag.
29static LANG_ATTR_RE: LazyLock<regex::Regex> =
30    LazyLock::new(|| regex::Regex::new(r#"lang\s*=\s*["'](\w+)["']"#).expect("valid regex"));
31
32/// Regex to extract the `src` attribute value from a script tag.
33/// Requires whitespace (or start of string) before `src` to avoid matching `data-src` etc.
34static SRC_ATTR_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
35    regex::Regex::new(r#"(?:^|\s)src\s*=\s*["']([^"']+)["']"#).expect("valid regex")
36});
37
38/// Regex to match HTML comments for filtering script blocks inside comments.
39static HTML_COMMENT_RE: LazyLock<regex::Regex> =
40    LazyLock::new(|| regex::Regex::new(r"(?s)<!--.*?-->").expect("valid regex"));
41
42/// An extracted `<script>` block from a Vue or Svelte SFC.
43pub struct SfcScript {
44    /// The script body text.
45    pub body: String,
46    /// Whether the script uses TypeScript (`lang="ts"` or `lang="tsx"`).
47    pub is_typescript: bool,
48    /// Whether the script uses JSX syntax (`lang="tsx"` or `lang="jsx"`).
49    pub is_jsx: bool,
50    /// Byte offset of the script body within the full SFC source.
51    pub byte_offset: usize,
52    /// External script source path from `src` attribute.
53    pub src: Option<String>,
54}
55
56/// Extract all `<script>` blocks from a Vue/Svelte SFC source string.
57pub fn extract_sfc_scripts(source: &str) -> Vec<SfcScript> {
58    // Build HTML comment ranges to filter out <script> blocks inside comments.
59    // Using ranges instead of source replacement avoids corrupting script body content
60    // (e.g., string literals containing "<!--" would be destroyed by replacement).
61    let comment_ranges: Vec<(usize, usize)> = HTML_COMMENT_RE
62        .find_iter(source)
63        .map(|m| (m.start(), m.end()))
64        .collect();
65
66    SCRIPT_BLOCK_RE
67        .captures_iter(source)
68        .filter(|cap| {
69            let start = cap.get(0).map_or(0, |m| m.start());
70            !comment_ranges
71                .iter()
72                .any(|&(cs, ce)| start >= cs && start < ce)
73        })
74        .map(|cap| {
75            let attrs = cap.name("attrs").map_or("", |m| m.as_str());
76            let body_match = cap.name("body");
77            let byte_offset = body_match.map_or(0, |m| m.start());
78            let body = body_match.map_or("", |m| m.as_str()).to_string();
79            let lang = LANG_ATTR_RE
80                .captures(attrs)
81                .and_then(|c| c.get(1))
82                .map(|m| m.as_str());
83            let is_typescript = matches!(lang, Some("ts" | "tsx"));
84            let is_jsx = matches!(lang, Some("tsx" | "jsx"));
85            let src = SRC_ATTR_RE
86                .captures(attrs)
87                .and_then(|c| c.get(1))
88                .map(|m| m.as_str().to_string());
89            SfcScript {
90                body,
91                is_typescript,
92                is_jsx,
93                byte_offset,
94                src,
95            }
96        })
97        .collect()
98}
99
100/// Check if a file path is a Vue or Svelte SFC (`.vue` or `.svelte`).
101#[must_use]
102pub fn is_sfc_file(path: &Path) -> bool {
103    path.extension()
104        .and_then(|e| e.to_str())
105        .is_some_and(|ext| ext == "vue" || ext == "svelte")
106}
107
108/// Parse an SFC file by extracting and combining all `<script>` blocks.
109pub(crate) fn parse_sfc_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
110    let scripts = extract_sfc_scripts(source);
111
112    // For SFC files, use string scanning for suppression comments since script block
113    // byte offsets don't correspond to the original file positions.
114    let suppressions = crate::suppress::parse_suppressions_from_source(source);
115
116    let mut combined = ModuleInfo {
117        file_id,
118        exports: Vec::new(),
119        imports: Vec::new(),
120        re_exports: Vec::new(),
121        dynamic_imports: Vec::new(),
122        dynamic_import_patterns: Vec::new(),
123        require_calls: Vec::new(),
124        member_accesses: Vec::new(),
125        whole_object_uses: Vec::new(),
126        has_cjs_exports: false,
127        content_hash,
128        suppressions,
129        unused_import_bindings: Vec::new(),
130        line_offsets: fallow_types::extract::compute_line_offsets(source),
131        complexity: Vec::new(),
132    };
133
134    for script in &scripts {
135        if let Some(src) = &script.src {
136            combined.imports.push(ImportInfo {
137                source: src.clone(),
138                imported_name: ImportedName::SideEffect,
139                local_name: String::new(),
140                is_type_only: false,
141                span: Span::default(),
142                source_span: Span::default(),
143            });
144        }
145
146        let source_type = match (script.is_typescript, script.is_jsx) {
147            (true, true) => SourceType::tsx(),
148            (true, false) => SourceType::ts(),
149            (false, true) => SourceType::jsx(),
150            (false, false) => SourceType::mjs(),
151        };
152        let allocator = Allocator::default();
153        let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
154        let mut extractor = ModuleInfoExtractor::new();
155        extractor.visit_program(&parser_return.program);
156        extractor.merge_into(&mut combined);
157    }
158
159    combined
160}
161
162// SFC tests exercise regex-based HTML string extraction — no unsafe code,
163// no Miri-specific value. Oxc parser tests are additionally ~1000x slower.
164#[cfg(all(test, not(miri)))]
165mod tests {
166    use super::*;
167
168    // ── is_sfc_file ──────────────────────────────────────────────
169
170    #[test]
171    fn is_sfc_file_vue() {
172        assert!(is_sfc_file(Path::new("App.vue")));
173    }
174
175    #[test]
176    fn is_sfc_file_svelte() {
177        assert!(is_sfc_file(Path::new("Counter.svelte")));
178    }
179
180    #[test]
181    fn is_sfc_file_rejects_ts() {
182        assert!(!is_sfc_file(Path::new("utils.ts")));
183    }
184
185    #[test]
186    fn is_sfc_file_rejects_jsx() {
187        assert!(!is_sfc_file(Path::new("App.jsx")));
188    }
189
190    #[test]
191    fn is_sfc_file_rejects_astro() {
192        assert!(!is_sfc_file(Path::new("Layout.astro")));
193    }
194
195    // ── extract_sfc_scripts: single script block ─────────────────
196
197    #[test]
198    fn single_plain_script() {
199        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
200        assert_eq!(scripts.len(), 1);
201        assert_eq!(scripts[0].body, "const x = 1;");
202        assert!(!scripts[0].is_typescript);
203        assert!(!scripts[0].is_jsx);
204        assert!(scripts[0].src.is_none());
205    }
206
207    #[test]
208    fn single_ts_script() {
209        let scripts = extract_sfc_scripts(r#"<script lang="ts">const x: number = 1;</script>"#);
210        assert_eq!(scripts.len(), 1);
211        assert!(scripts[0].is_typescript);
212        assert!(!scripts[0].is_jsx);
213    }
214
215    #[test]
216    fn single_tsx_script() {
217        let scripts = extract_sfc_scripts(r#"<script lang="tsx">const el = <div />;</script>"#);
218        assert_eq!(scripts.len(), 1);
219        assert!(scripts[0].is_typescript);
220        assert!(scripts[0].is_jsx);
221    }
222
223    #[test]
224    fn single_jsx_script() {
225        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
226        assert_eq!(scripts.len(), 1);
227        assert!(!scripts[0].is_typescript);
228        assert!(scripts[0].is_jsx);
229    }
230
231    // ── Multiple script blocks ───────────────────────────────────
232
233    #[test]
234    fn two_script_blocks() {
235        let source = r#"
236<script lang="ts">
237export default {};
238</script>
239<script setup lang="ts">
240const count = 0;
241</script>
242"#;
243        let scripts = extract_sfc_scripts(source);
244        assert_eq!(scripts.len(), 2);
245        assert!(scripts[0].body.contains("export default"));
246        assert!(scripts[1].body.contains("count"));
247    }
248
249    // ── <script setup> ───────────────────────────────────────────
250
251    #[test]
252    fn script_setup_extracted() {
253        let scripts =
254            extract_sfc_scripts(r#"<script setup lang="ts">import { ref } from 'vue';</script>"#);
255        assert_eq!(scripts.len(), 1);
256        assert!(scripts[0].body.contains("import"));
257        assert!(scripts[0].is_typescript);
258    }
259
260    // ── <script src="..."> external script ───────────────────────
261
262    #[test]
263    fn script_src_detected() {
264        let scripts = extract_sfc_scripts(r#"<script src="./component.ts" lang="ts"></script>"#);
265        assert_eq!(scripts.len(), 1);
266        assert_eq!(scripts[0].src.as_deref(), Some("./component.ts"));
267    }
268
269    #[test]
270    fn data_src_not_treated_as_src() {
271        let scripts =
272            extract_sfc_scripts(r#"<script lang="ts" data-src="./nope.ts">const x = 1;</script>"#);
273        assert_eq!(scripts.len(), 1);
274        assert!(scripts[0].src.is_none());
275    }
276
277    // ── HTML comment filtering ───────────────────────────────────
278
279    #[test]
280    fn script_inside_html_comment_filtered() {
281        let source = r#"
282<!-- <script lang="ts">import { bad } from 'bad';</script> -->
283<script lang="ts">import { good } from 'good';</script>
284"#;
285        let scripts = extract_sfc_scripts(source);
286        assert_eq!(scripts.len(), 1);
287        assert!(scripts[0].body.contains("good"));
288    }
289
290    #[test]
291    fn spanning_comment_filters_script() {
292        let source = r#"
293<!-- disabled:
294<script lang="ts">import { bad } from 'bad';</script>
295-->
296<script lang="ts">const ok = true;</script>
297"#;
298        let scripts = extract_sfc_scripts(source);
299        assert_eq!(scripts.len(), 1);
300        assert!(scripts[0].body.contains("ok"));
301    }
302
303    #[test]
304    fn string_containing_comment_markers_not_corrupted() {
305        // A string in the script body containing <!-- should not cause filtering issues
306        let source = r#"
307<script setup lang="ts">
308const marker = "<!-- not a comment -->";
309import { ref } from 'vue';
310</script>
311"#;
312        let scripts = extract_sfc_scripts(source);
313        assert_eq!(scripts.len(), 1);
314        assert!(scripts[0].body.contains("import"));
315    }
316
317    // ── Generic attributes with > in quoted values ───────────────
318
319    #[test]
320    fn generic_attr_with_angle_bracket() {
321        let source =
322            r#"<script setup lang="ts" generic="T extends Foo<Bar>">const x = 1;</script>"#;
323        let scripts = extract_sfc_scripts(source);
324        assert_eq!(scripts.len(), 1);
325        assert_eq!(scripts[0].body, "const x = 1;");
326    }
327
328    #[test]
329    fn nested_generic_attr() {
330        let source = r#"<script setup lang="ts" generic="T extends Map<string, Set<number>>">const x = 1;</script>"#;
331        let scripts = extract_sfc_scripts(source);
332        assert_eq!(scripts.len(), 1);
333        assert_eq!(scripts[0].body, "const x = 1;");
334    }
335
336    // ── lang attribute with single quotes ────────────────────────
337
338    #[test]
339    fn lang_single_quoted() {
340        let scripts = extract_sfc_scripts("<script lang='ts'>const x = 1;</script>");
341        assert_eq!(scripts.len(), 1);
342        assert!(scripts[0].is_typescript);
343    }
344
345    // ── Case-insensitive matching ────────────────────────────────
346
347    #[test]
348    fn uppercase_script_tag() {
349        let scripts = extract_sfc_scripts(r#"<SCRIPT lang="ts">const x = 1;</SCRIPT>"#);
350        assert_eq!(scripts.len(), 1);
351        assert!(scripts[0].is_typescript);
352    }
353
354    // ── Edge cases ───────────────────────────────────────────────
355
356    #[test]
357    fn no_script_block() {
358        let scripts = extract_sfc_scripts("<template><div>Hello</div></template>");
359        assert!(scripts.is_empty());
360    }
361
362    #[test]
363    fn empty_script_body() {
364        let scripts = extract_sfc_scripts(r#"<script lang="ts"></script>"#);
365        assert_eq!(scripts.len(), 1);
366        assert!(scripts[0].body.is_empty());
367    }
368
369    #[test]
370    fn whitespace_only_script() {
371        let scripts = extract_sfc_scripts("<script lang=\"ts\">\n  \n</script>");
372        assert_eq!(scripts.len(), 1);
373        assert!(scripts[0].body.trim().is_empty());
374    }
375
376    #[test]
377    fn byte_offset_is_set() {
378        let source = r#"<template><div/></template><script lang="ts">code</script>"#;
379        let scripts = extract_sfc_scripts(source);
380        assert_eq!(scripts.len(), 1);
381        // The byte_offset should point to where "code" starts in the source
382        let offset = scripts[0].byte_offset;
383        assert_eq!(&source[offset..offset + 4], "code");
384    }
385
386    #[test]
387    fn script_with_extra_attributes() {
388        let scripts = extract_sfc_scripts(
389            r#"<script lang="ts" id="app" type="module" data-custom="val">const x = 1;</script>"#,
390        );
391        assert_eq!(scripts.len(), 1);
392        assert!(scripts[0].is_typescript);
393        assert!(scripts[0].src.is_none());
394    }
395
396    // ── Full parse tests (Oxc parser ~1000x slower under Miri) ──
397
398    #[test]
399    fn multiple_script_blocks_exports_combined() {
400        let source = r#"
401<script lang="ts">
402export const version = '1.0';
403</script>
404<script setup lang="ts">
405import { ref } from 'vue';
406const count = ref(0);
407</script>
408"#;
409        let info = parse_sfc_to_module(FileId(0), source, 0);
410        // The non-setup block exports `version`
411        assert!(
412            info.exports
413                .iter()
414                .any(|e| matches!(&e.name, crate::ExportName::Named(n) if n == "version")),
415            "export from <script> block should be extracted"
416        );
417        // The setup block imports `ref` from 'vue'
418        assert!(
419            info.imports.iter().any(|i| i.source == "vue"),
420            "import from <script setup> block should be extracted"
421        );
422    }
423
424    // ── lang="tsx" detection ────────────────────────────────────
425
426    #[test]
427    fn lang_tsx_detected_as_typescript_jsx() {
428        let scripts =
429            extract_sfc_scripts(r#"<script lang="tsx">const el = <div>{x}</div>;</script>"#);
430        assert_eq!(scripts.len(), 1);
431        assert!(scripts[0].is_typescript, "lang=tsx should be typescript");
432        assert!(scripts[0].is_jsx, "lang=tsx should be jsx");
433    }
434
435    // ── HTML comment filtering of script blocks ─────────────────
436
437    #[test]
438    fn multiline_html_comment_filters_all_script_blocks_inside() {
439        let source = r#"
440<!--
441  This whole section is disabled:
442  <script lang="ts">import { bad1 } from 'bad1';</script>
443  <script lang="ts">import { bad2 } from 'bad2';</script>
444-->
445<script lang="ts">import { good } from 'good';</script>
446"#;
447        let scripts = extract_sfc_scripts(source);
448        assert_eq!(scripts.len(), 1);
449        assert!(scripts[0].body.contains("good"));
450    }
451
452    // ── <script src="..."> generates side-effect import ─────────
453
454    #[test]
455    fn script_src_generates_side_effect_import() {
456        let info = parse_sfc_to_module(
457            FileId(0),
458            r#"<script src="./external-logic.ts" lang="ts"></script>"#,
459            0,
460        );
461        assert!(
462            info.imports
463                .iter()
464                .any(|i| i.source == "./external-logic.ts"
465                    && matches!(i.imported_name, ImportedName::SideEffect)),
466            "script src should generate a side-effect import"
467        );
468    }
469
470    // ── Additional coverage ─────────────────────────────────────
471
472    #[test]
473    fn parse_sfc_no_script_returns_empty_module() {
474        let info = parse_sfc_to_module(FileId(0), "<template><div>Hello</div></template>", 42);
475        assert!(info.imports.is_empty());
476        assert!(info.exports.is_empty());
477        assert_eq!(info.content_hash, 42);
478        assert_eq!(info.file_id, FileId(0));
479    }
480
481    #[test]
482    fn parse_sfc_has_line_offsets() {
483        let info = parse_sfc_to_module(FileId(0), r#"<script lang="ts">const x = 1;</script>"#, 0);
484        assert!(!info.line_offsets.is_empty());
485    }
486
487    #[test]
488    fn parse_sfc_has_suppressions() {
489        let info = parse_sfc_to_module(
490            FileId(0),
491            r#"<script lang="ts">
492// fallow-ignore-file
493export const foo = 1;
494</script>"#,
495            0,
496        );
497        assert!(!info.suppressions.is_empty());
498    }
499
500    #[test]
501    fn source_type_jsx_detection() {
502        let scripts = extract_sfc_scripts(r#"<script lang="jsx">const el = <div />;</script>"#);
503        assert_eq!(scripts.len(), 1);
504        assert!(!scripts[0].is_typescript);
505        assert!(scripts[0].is_jsx);
506    }
507
508    #[test]
509    fn source_type_plain_js_detection() {
510        let scripts = extract_sfc_scripts("<script>const x = 1;</script>");
511        assert_eq!(scripts.len(), 1);
512        assert!(!scripts[0].is_typescript);
513        assert!(!scripts[0].is_jsx);
514    }
515
516    #[test]
517    fn is_sfc_file_rejects_no_extension() {
518        assert!(!is_sfc_file(Path::new("Makefile")));
519    }
520
521    #[test]
522    fn is_sfc_file_rejects_mdx() {
523        assert!(!is_sfc_file(Path::new("post.mdx")));
524    }
525
526    #[test]
527    fn is_sfc_file_rejects_css() {
528        assert!(!is_sfc_file(Path::new("styles.css")));
529    }
530
531    #[test]
532    fn multiple_script_blocks_both_have_offsets() {
533        let source = r#"<script lang="ts">const a = 1;</script>
534<script setup lang="ts">const b = 2;</script>"#;
535        let scripts = extract_sfc_scripts(source);
536        assert_eq!(scripts.len(), 2);
537        // Both scripts should have valid byte offsets
538        let offset0 = scripts[0].byte_offset;
539        let offset1 = scripts[1].byte_offset;
540        assert_eq!(
541            &source[offset0..offset0 + "const a = 1;".len()],
542            "const a = 1;"
543        );
544        assert_eq!(
545            &source[offset1..offset1 + "const b = 2;".len()],
546            "const b = 2;"
547        );
548    }
549
550    #[test]
551    fn script_with_src_and_lang() {
552        // src + lang should both be detected
553        let scripts = extract_sfc_scripts(r#"<script src="./logic.ts" lang="tsx"></script>"#);
554        assert_eq!(scripts.len(), 1);
555        assert_eq!(scripts[0].src.as_deref(), Some("./logic.ts"));
556        assert!(scripts[0].is_typescript);
557        assert!(scripts[0].is_jsx);
558    }
559}