fob_graph/analysis/extractors/
vue.rs

1//! Vue Single File Component (SFC) script extractor.
2//!
3//! This module implements efficient extraction of JavaScript/TypeScript from Vue SFC
4//! `<script>` blocks.
5
6use memchr::memmem;
7
8use super::common::{
9    ExtractedScript, Extractor, ExtractorError, MAX_FILE_SIZE, MAX_SCRIPT_TAGS, ScriptContext,
10};
11
12/// Vue SFC script extractor
13#[derive(Debug, Clone, Copy)]
14pub struct VueExtractor;
15
16impl Extractor for VueExtractor {
17    fn extract<'a>(&self, source: &'a str) -> Result<Vec<ExtractedScript<'a>>, ExtractorError> {
18        // Enforce file size limit
19        if source.len() > MAX_FILE_SIZE {
20            return Err(ExtractorError::FileTooLarge {
21                size: source.len(),
22                max: MAX_FILE_SIZE,
23            });
24        }
25
26        let mut sources = Vec::new();
27        let mut pointer = 0;
28        let mut script_count = 0;
29
30        // Extract all script blocks
31        while let Some(script) = parse_script(source, &mut pointer)? {
32            sources.push(script);
33            script_count += 1;
34
35            // Enforce script tag count limit
36            if script_count > MAX_SCRIPT_TAGS {
37                return Err(ExtractorError::TooManyScriptTags {
38                    count: script_count,
39                    max: MAX_SCRIPT_TAGS,
40                });
41            }
42        }
43
44        Ok(sources)
45    }
46
47    fn file_extension(&self) -> &'static str {
48        ".vue"
49    }
50}
51
52/// Parses a single script block starting from the given position.
53fn parse_script<'a>(
54    source_text: &'a str,
55    pointer: &mut usize,
56) -> Result<Option<ExtractedScript<'a>>, ExtractorError> {
57    let bytes = source_text.as_bytes();
58
59    // Find the start of a <script tag
60    let script_start = match find_script_start(bytes, *pointer) {
61        Some(pos) => pos,
62        None => return Ok(None), // No more script tags
63    };
64
65    // Move pointer past "<script"
66    *pointer = script_start + 7; // 7 = "<script".len()
67
68    // Check if this is a script tag (not "scripts" or "scripting")
69    if *pointer < bytes.len() {
70        let next_char = bytes[*pointer];
71        if !matches!(next_char, b' ' | b'\t' | b'\n' | b'\r' | b'>' | b'/') {
72            // Not a script tag, keep searching
73            return parse_script(source_text, pointer);
74        }
75    }
76
77    // Find the end of the opening tag (the closing >)
78    let tag_end = match find_script_closing_angle(bytes, *pointer) {
79        Some(pos) => pos,
80        None => {
81            return Err(ExtractorError::UnclosedScriptTag {
82                position: script_start,
83            });
84        }
85    };
86
87    // Extract the tag attributes (between "<script" and ">")
88    let tag_content = &source_text[*pointer..tag_end];
89
90    // Parse attributes
91    let is_setup = tag_content.contains("setup");
92    let lang = extract_lang_attribute(tag_content);
93
94    // Check for self-closing tag <script ... />
95    if tag_end > 0 && bytes[tag_end - 1] == b'/' {
96        // Self-closing tag, no content
97        *pointer = tag_end + 1;
98        return Ok(Some(ExtractedScript::new(
99            "",
100            tag_end + 1,
101            if is_setup {
102                ScriptContext::VueSetup
103            } else {
104                ScriptContext::VueRegular
105            },
106            lang,
107        )));
108    }
109
110    // Move pointer past the closing >
111    *pointer = tag_end + 1;
112    let content_start = *pointer;
113
114    // Find the closing </script> tag
115    let script_end = match find_script_end(bytes, *pointer) {
116        Some(pos) => pos,
117        None => {
118            return Err(ExtractorError::UnclosedScriptTag {
119                position: script_start,
120            });
121        }
122    };
123
124    // Extract the script content
125    let source_text = &source_text[content_start..script_end];
126
127    // Move pointer past the closing </script>
128    *pointer = script_end + 9; // 9 = "</script>".len()
129
130    Ok(Some(ExtractedScript::new(
131        source_text,
132        content_start,
133        if is_setup {
134            ScriptContext::VueSetup
135        } else {
136            ScriptContext::VueRegular
137        },
138        lang,
139    )))
140}
141
142/// Finds the start of a `<script` tag using memchr.
143fn find_script_start(bytes: &[u8], start: usize) -> Option<usize> {
144    let search_slice = &bytes[start..];
145    memmem::find(search_slice, b"<script").map(|pos| start + pos)
146}
147
148/// Finds the closing `>` of a script tag, handling quoted attributes.
149fn find_script_closing_angle(bytes: &[u8], start: usize) -> Option<usize> {
150    let mut in_quote = false;
151    let mut quote_char = 0u8;
152
153    for (i, &byte) in bytes[start..].iter().enumerate() {
154        match byte {
155            b'"' | b'\'' => {
156                if !in_quote {
157                    in_quote = true;
158                    quote_char = byte;
159                } else if byte == quote_char {
160                    in_quote = false;
161                }
162            }
163            b'>' if !in_quote => return Some(start + i),
164            _ => {}
165        }
166    }
167
168    None
169}
170
171/// Finds the closing `</script>` tag.
172fn find_script_end(bytes: &[u8], start: usize) -> Option<usize> {
173    let search_slice = &bytes[start..];
174    memmem::find(search_slice, b"</script>").map(|pos| start + pos)
175}
176
177/// Extracts the `lang` attribute value from a script tag.
178fn extract_lang_attribute(tag_content: &str) -> &str {
179    // Find "lang=" or 'lang='
180    if let Some(lang_pos) = tag_content.find("lang=") {
181        let after_equals = &tag_content[lang_pos + 5..];
182
183        // Skip whitespace
184        let after_equals = after_equals.trim_start();
185
186        if after_equals.is_empty() {
187            return "js";
188        }
189
190        // Check for quoted value
191        // Safe: return default if no character found (defensive programming)
192        let quote_char = match after_equals.chars().next() {
193            Some(ch) => ch,
194            None => return "js", // Empty after trimming, return default
195        };
196        if quote_char == '"' || quote_char == '\'' {
197            // Find closing quote
198            if let Some(end_quote) = after_equals[1..].find(quote_char) {
199                return &after_equals[1..=end_quote];
200            }
201        } else {
202            // Unquoted value (non-standard but handle it)
203            let end = after_equals
204                .find(|c: char| c.is_whitespace() || c == '>')
205                .unwrap_or(after_equals.len());
206            return &after_equals[..end];
207        }
208    }
209
210    "js" // Default to JavaScript
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    #[test]
218    fn test_basic_script() {
219        let vue = r#"
220<template><div>Hello</div></template>
221<script>
222export default { name: 'Test' }
223</script>
224"#;
225        let extractor = VueExtractor;
226        let sources = extractor.extract(vue).unwrap();
227        assert_eq!(sources.len(), 1);
228        assert_eq!(sources[0].context, ScriptContext::VueRegular);
229        assert_eq!(sources[0].lang, "js");
230        assert!(sources[0].source_text.contains("export default"));
231    }
232
233    #[test]
234    fn test_script_setup() {
235        let vue = r#"
236<script setup>
237import { ref } from 'vue'
238const count = ref(0)
239</script>
240"#;
241        let extractor = VueExtractor;
242        let sources = extractor.extract(vue).unwrap();
243        assert_eq!(sources.len(), 1);
244        assert_eq!(sources[0].context, ScriptContext::VueSetup);
245        assert!(sources[0].source_text.contains("const count"));
246    }
247
248    #[test]
249    fn test_typescript() {
250        let vue = r#"
251<script lang="ts">
252export default defineComponent({ name: 'Test' })
253</script>
254"#;
255        let extractor = VueExtractor;
256        let sources = extractor.extract(vue).unwrap();
257        assert_eq!(sources.len(), 1);
258        assert_eq!(sources[0].lang, "ts");
259    }
260
261    #[test]
262    fn test_multiple_scripts() {
263        let vue = r#"
264<script>
265export default { name: 'Test' }
266</script>
267<script setup lang="ts">
268const count = ref<number>(0)
269</script>
270"#;
271        let extractor = VueExtractor;
272        let sources = extractor.extract(vue).unwrap();
273        assert_eq!(sources.len(), 2);
274        assert_eq!(sources[0].context, ScriptContext::VueRegular);
275        assert_eq!(sources[1].context, ScriptContext::VueSetup);
276        assert_eq!(sources[1].lang, "ts");
277    }
278
279    #[test]
280    fn test_no_script() {
281        let vue = "<template><div>Hello</div></template>";
282        let extractor = VueExtractor;
283        let sources = extractor.extract(vue).unwrap();
284        assert_eq!(sources.len(), 0);
285    }
286
287    #[test]
288    fn test_file_too_large() {
289        let large_content = "x".repeat(MAX_FILE_SIZE + 1);
290        let extractor = VueExtractor;
291        let result = extractor.extract(&large_content);
292        assert!(matches!(result, Err(ExtractorError::FileTooLarge { .. })));
293    }
294
295    #[test]
296    fn test_malformed_lang_attribute() {
297        // Test empty lang attribute (lang=)
298        let vue = r#"
299<script lang=>
300const x = 1
301</script>
302"#;
303        let extractor = VueExtractor;
304        let sources = extractor
305            .extract(vue)
306            .expect("Should handle malformed lang");
307        assert_eq!(sources.len(), 1);
308        assert_eq!(sources[0].lang, "js"); // Should default to "js"
309
310        // Test lang attribute with only whitespace (lang=   )
311        let vue2 = r#"
312<script lang=   >
313const x = 1
314</script>
315"#;
316        let sources2 = extractor
317            .extract(vue2)
318            .expect("Should handle whitespace-only lang");
319        assert_eq!(sources2.len(), 1);
320        assert_eq!(sources2[0].lang, "js"); // Should default to "js"
321    }
322}