fob_graph/analysis/extractors/
svelte.rs

1//! Svelte component script extractor.
2//!
3//! This module implements efficient extraction of JavaScript/TypeScript from Svelte
4//! component `<script>` blocks.
5
6use memchr::memmem;
7
8use super::common::{
9    ExtractedScript, Extractor, ExtractorError, MAX_FILE_SIZE, MAX_SCRIPT_TAGS, ScriptContext,
10};
11
12/// Svelte component script extractor
13#[derive(Debug, Clone, Copy)]
14pub struct SvelteExtractor;
15
16impl Extractor for SvelteExtractor {
17    fn extract<'a>(&self, source: &'a str) -> Result<Vec<ExtractedScript<'a>>, ExtractorError> {
18        // Enforce file size limit
19        if source.len() > MAX_FILE_SIZE {
20            return Err(ExtractorError::FileTooLarge {
21                size: source.len(),
22                max: MAX_FILE_SIZE,
23            });
24        }
25
26        let mut sources = Vec::new();
27        let mut pointer = 0;
28        let mut script_count = 0;
29
30        // Extract all script blocks
31        while let Some(script) = parse_script(source, &mut pointer)? {
32            sources.push(script);
33            script_count += 1;
34
35            // Enforce script tag count limit
36            if script_count > MAX_SCRIPT_TAGS {
37                return Err(ExtractorError::TooManyScriptTags {
38                    count: script_count,
39                    max: MAX_SCRIPT_TAGS,
40                });
41            }
42        }
43
44        Ok(sources)
45    }
46
47    fn file_extension(&self) -> &'static str {
48        ".svelte"
49    }
50}
51
52/// Parses a single script block starting from the given position.
53fn parse_script<'a>(
54    source_text: &'a str,
55    pointer: &mut usize,
56) -> Result<Option<ExtractedScript<'a>>, ExtractorError> {
57    let bytes = source_text.as_bytes();
58
59    // Find the start of a <script tag
60    let script_start = match find_script_start(bytes, *pointer) {
61        Some(pos) => pos,
62        None => return Ok(None), // No more script tags
63    };
64
65    // Move pointer past "<script"
66    *pointer = script_start + 7; // 7 = "<script".len()
67
68    // Check if this is a script tag (not "scripts" or "scripting")
69    if *pointer < bytes.len() {
70        let next_char = bytes[*pointer];
71        if !matches!(next_char, b' ' | b'\t' | b'\n' | b'\r' | b'>' | b'/') {
72            // Not a script tag, keep searching
73            return parse_script(source_text, pointer);
74        }
75    }
76
77    // Find the end of the opening tag (the closing >)
78    let tag_end = match find_script_closing_angle(bytes, *pointer) {
79        Some(pos) => pos,
80        None => {
81            return Err(ExtractorError::UnclosedScriptTag {
82                position: script_start,
83            });
84        }
85    };
86
87    // Extract the tag attributes (between "<script" and ">")
88    let tag_content = &source_text[*pointer..tag_end];
89
90    // Parse attributes
91    let is_module_context =
92        tag_content.contains("context=\"module\"") || tag_content.contains("context='module'");
93    let lang = extract_lang_attribute(tag_content);
94
95    // Check for self-closing tag <script ... />
96    if tag_end > 0 && bytes[tag_end - 1] == b'/' {
97        // Self-closing tag, no content
98        *pointer = tag_end + 1;
99        return Ok(Some(ExtractedScript::new(
100            "",
101            tag_end + 1,
102            if is_module_context {
103                ScriptContext::SvelteModule
104            } else {
105                ScriptContext::SvelteComponent
106            },
107            lang,
108        )));
109    }
110
111    // Move pointer past the closing >
112    *pointer = tag_end + 1;
113    let content_start = *pointer;
114
115    // Find the closing </script> tag
116    let script_end = match find_script_end(bytes, *pointer) {
117        Some(pos) => pos,
118        None => {
119            return Err(ExtractorError::UnclosedScriptTag {
120                position: script_start,
121            });
122        }
123    };
124
125    // Extract the script content
126    let source_text = &source_text[content_start..script_end];
127
128    // Move pointer past the closing </script>
129    *pointer = script_end + 9; // 9 = "</script>".len()
130
131    Ok(Some(ExtractedScript::new(
132        source_text,
133        content_start,
134        if is_module_context {
135            ScriptContext::SvelteModule
136        } else {
137            ScriptContext::SvelteComponent
138        },
139        lang,
140    )))
141}
142
143/// Finds the start of a `<script` tag using memchr.
144fn find_script_start(bytes: &[u8], start: usize) -> Option<usize> {
145    let search_slice = &bytes[start..];
146    memmem::find(search_slice, b"<script").map(|pos| start + pos)
147}
148
149/// Finds the closing `>` of a script tag, handling quoted attributes.
150fn find_script_closing_angle(bytes: &[u8], start: usize) -> Option<usize> {
151    let mut in_quote = false;
152    let mut quote_char = 0u8;
153
154    for (i, &byte) in bytes[start..].iter().enumerate() {
155        match byte {
156            b'"' | b'\'' => {
157                if !in_quote {
158                    in_quote = true;
159                    quote_char = byte;
160                } else if byte == quote_char {
161                    in_quote = false;
162                }
163            }
164            b'>' if !in_quote => return Some(start + i),
165            _ => {}
166        }
167    }
168
169    None
170}
171
172/// Finds the closing `</script>` tag.
173fn find_script_end(bytes: &[u8], start: usize) -> Option<usize> {
174    let search_slice = &bytes[start..];
175    memmem::find(search_slice, b"</script>").map(|pos| start + pos)
176}
177
178/// Extracts the `lang` attribute value from a script tag.
179fn extract_lang_attribute(tag_content: &str) -> &str {
180    // Find "lang=" or 'lang='
181    if let Some(lang_pos) = tag_content.find("lang=") {
182        let after_equals = &tag_content[lang_pos + 5..];
183
184        // Skip whitespace
185        let after_equals = after_equals.trim_start();
186
187        if after_equals.is_empty() {
188            return "js";
189        }
190
191        // Check for quoted value
192        // Safe: return default if no character found (defensive programming)
193        let quote_char = match after_equals.chars().next() {
194            Some(ch) => ch,
195            None => return "js", // Empty after trimming, return default
196        };
197        if quote_char == '"' || quote_char == '\'' {
198            // Find closing quote
199            if let Some(end_quote) = after_equals[1..].find(quote_char) {
200                return &after_equals[1..=end_quote];
201            }
202        } else {
203            // Unquoted value (non-standard but handle it)
204            let end = after_equals
205                .find(|c: char| c.is_whitespace() || c == '>')
206                .unwrap_or(after_equals.len());
207            return &after_equals[..end];
208        }
209    }
210
211    "js" // Default to JavaScript
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    #[test]
219    fn test_basic_script() {
220        let svelte = r#"
221<script>
222let count = 0
223</script>
224<div>{count}</div>
225"#;
226        let extractor = SvelteExtractor;
227        let sources = extractor.extract(svelte).unwrap();
228        assert_eq!(sources.len(), 1);
229        assert_eq!(sources[0].context, ScriptContext::SvelteComponent);
230        assert_eq!(sources[0].lang, "js");
231        assert!(sources[0].source_text.contains("let count"));
232    }
233
234    #[test]
235    fn test_module_context() {
236        let svelte = r#"
237<script context="module">
238export const preload = () => ({ data: [] })
239</script>
240<script>
241import { onMount } from 'svelte'
242</script>
243"#;
244        let extractor = SvelteExtractor;
245        let sources = extractor.extract(svelte).unwrap();
246        assert_eq!(sources.len(), 2);
247        assert_eq!(sources[0].context, ScriptContext::SvelteModule);
248        assert_eq!(sources[1].context, ScriptContext::SvelteComponent);
249    }
250
251    #[test]
252    fn test_typescript() {
253        let svelte = r#"
254<script lang="ts">
255let count: number = 0
256</script>
257"#;
258        let extractor = SvelteExtractor;
259        let sources = extractor.extract(svelte).unwrap();
260        assert_eq!(sources.len(), 1);
261        assert_eq!(sources[0].lang, "ts");
262    }
263
264    #[test]
265    fn test_no_script() {
266        let svelte = "<div>Hello</div>";
267        let extractor = SvelteExtractor;
268        let sources = extractor.extract(svelte).unwrap();
269        assert_eq!(sources.len(), 0);
270    }
271
272    #[test]
273    fn test_file_too_large() {
274        let large_content = "x".repeat(MAX_FILE_SIZE + 1);
275        let extractor = SvelteExtractor;
276        let result = extractor.extract(&large_content);
277        assert!(matches!(result, Err(ExtractorError::FileTooLarge { .. })));
278    }
279
280    #[test]
281    fn test_malformed_lang_attribute() {
282        // Test empty lang attribute (lang=)
283        let svelte = r#"
284<script lang=>
285let x = 1
286</script>
287"#;
288        let extractor = SvelteExtractor;
289        let sources = extractor
290            .extract(svelte)
291            .expect("Should handle malformed lang");
292        assert_eq!(sources.len(), 1);
293        assert_eq!(sources[0].lang, "js"); // Should default to "js"
294
295        // Test lang attribute with only whitespace (lang=   )
296        let svelte2 = r#"
297<script lang=   >
298let x = 1
299</script>
300"#;
301        let sources2 = extractor
302            .extract(svelte2)
303            .expect("Should handle whitespace-only lang");
304        assert_eq!(sources2.len(), 1);
305        assert_eq!(sources2[0].lang, "js"); // Should default to "js"
306    }
307}