fob_graph/analysis/extractors/
astro.rs1use memchr::memmem;
7
8use super::common::{
9 ExtractedScript, Extractor, ExtractorError, MAX_FILE_SIZE, MAX_SCRIPT_TAGS, ScriptContext,
10};
11
12#[derive(Debug, Clone, Copy)]
14pub struct AstroExtractor;
15
16impl Extractor for AstroExtractor {
17 fn extract<'a>(&self, source: &'a str) -> Result<Vec<ExtractedScript<'a>>, ExtractorError> {
18 if source.len() > MAX_FILE_SIZE {
20 return Err(ExtractorError::FileTooLarge {
21 size: source.len(),
22 max: MAX_FILE_SIZE,
23 });
24 }
25
26 let mut sources = Vec::new();
27 let mut pointer = 0;
28
29 if let Some(frontmatter) = parse_frontmatter(source, &mut pointer)? {
31 sources.push(frontmatter);
32 }
33
34 let mut script_count = 0;
36 while let Some(script) = parse_script(source, &mut pointer)? {
37 sources.push(script);
38 script_count += 1;
39
40 if script_count > MAX_SCRIPT_TAGS {
42 return Err(ExtractorError::TooManyScriptTags {
43 count: script_count,
44 max: MAX_SCRIPT_TAGS,
45 });
46 }
47 }
48
49 Ok(sources)
50 }
51
52 fn file_extension(&self) -> &'static str {
53 ".astro"
54 }
55}
56
57fn parse_frontmatter<'a>(
59 source_text: &'a str,
60 pointer: &mut usize,
61) -> Result<Option<ExtractedScript<'a>>, ExtractorError> {
62 let bytes = source_text.as_bytes();
63
64 while *pointer < bytes.len() && matches!(bytes[*pointer], b' ' | b'\t' | b'\n' | b'\r') {
66 *pointer += 1;
67 }
68
69 if *pointer + 3 > bytes.len() || &bytes[*pointer..*pointer + 3] != b"---" {
71 return Ok(None); }
73
74 let frontmatter_start = *pointer;
75 *pointer += 3; while *pointer < bytes.len() && bytes[*pointer] != b'\n' {
79 *pointer += 1;
80 }
81 if *pointer < bytes.len() {
82 *pointer += 1; }
84
85 let content_start = *pointer;
86
87 let closing_pos = match find_frontmatter_closing(bytes, *pointer) {
89 Some(pos) => pos,
90 None => {
91 return Err(ExtractorError::UnclosedFrontmatter {
92 position: frontmatter_start,
93 });
94 }
95 };
96
97 let source_text = &source_text[content_start..closing_pos];
99
100 *pointer = closing_pos + 3; while *pointer < bytes.len() && bytes[*pointer] != b'\n' {
105 *pointer += 1;
106 }
107 if *pointer < bytes.len() {
108 *pointer += 1; }
110
111 Ok(Some(ExtractedScript::new(
113 source_text,
114 content_start,
115 ScriptContext::AstroFrontmatter,
116 "ts",
117 )))
118}
119
120fn parse_script<'a>(
122 source_text: &'a str,
123 pointer: &mut usize,
124) -> Result<Option<ExtractedScript<'a>>, ExtractorError> {
125 let bytes = source_text.as_bytes();
126
127 let script_start = match find_script_start(bytes, *pointer) {
129 Some(pos) => pos,
130 None => return Ok(None), };
132
133 *pointer = script_start + 7; if *pointer < bytes.len() {
138 let next_char = bytes[*pointer];
139 if !matches!(next_char, b' ' | b'\t' | b'\n' | b'\r' | b'>' | b'/') {
140 return parse_script(source_text, pointer);
142 }
143 }
144
145 let tag_end = match find_script_closing_angle(bytes, *pointer) {
147 Some(pos) => pos,
148 None => {
149 return Err(ExtractorError::UnclosedScriptTag {
150 position: script_start,
151 });
152 }
153 };
154
155 if tag_end > 0 && bytes[tag_end - 1] == b'/' {
157 *pointer = tag_end + 1;
159 return Ok(Some(ExtractedScript::new(
160 "",
161 tag_end + 1,
162 ScriptContext::AstroScript,
163 "js",
164 )));
165 }
166
167 *pointer = tag_end + 1;
169 let content_start = *pointer;
170
171 let script_end = match find_script_end(bytes, *pointer) {
173 Some(pos) => pos,
174 None => {
175 return Err(ExtractorError::UnclosedScriptTag {
176 position: script_start,
177 });
178 }
179 };
180
181 let source_text = &source_text[content_start..script_end];
183
184 *pointer = script_end + 9; Ok(Some(ExtractedScript::new(
189 source_text,
190 content_start,
191 ScriptContext::AstroScript,
192 "js",
193 )))
194}
195
196fn find_frontmatter_closing(bytes: &[u8], start: usize) -> Option<usize> {
198 let mut pos = start;
199
200 while pos + 3 <= bytes.len() {
201 let at_line_start = pos == 0 || bytes[pos - 1] == b'\n';
203
204 if at_line_start && &bytes[pos..pos + 3] == b"---" {
205 let after_dashes = pos + 3;
207 if after_dashes >= bytes.len()
208 || matches!(bytes[after_dashes], b'\n' | b'\r' | b' ' | b'\t')
209 {
210 return Some(pos);
211 }
212 }
213
214 pos += 1;
215 }
216
217 None
218}
219
220fn find_script_start(bytes: &[u8], start: usize) -> Option<usize> {
222 let search_slice = &bytes[start..];
223 memmem::find(search_slice, b"<script").map(|pos| start + pos)
224}
225
226fn find_script_closing_angle(bytes: &[u8], start: usize) -> Option<usize> {
228 let mut in_quote = false;
229 let mut quote_char = 0u8;
230
231 for (i, &byte) in bytes[start..].iter().enumerate() {
232 match byte {
233 b'"' | b'\'' => {
234 if !in_quote {
235 in_quote = true;
236 quote_char = byte;
237 } else if byte == quote_char {
238 in_quote = false;
239 }
240 }
241 b'>' if !in_quote => return Some(start + i),
242 _ => {}
243 }
244 }
245
246 None
247}
248
249fn find_script_end(bytes: &[u8], start: usize) -> Option<usize> {
251 let search_slice = &bytes[start..];
252 memmem::find(search_slice, b"</script>").map(|pos| start + pos)
253}
254
255#[cfg(test)]
256mod tests {
257 use super::*;
258
259 #[test]
260 fn test_frontmatter_only() {
261 let astro = r#"---
262const title = 'My Page'
263const data = await fetch('/api').then(r => r.json())
264---
265<html><head><title>{title}</title></head></html>
266"#;
267 let extractor = AstroExtractor;
268 let sources = extractor.extract(astro).unwrap();
269 assert_eq!(sources.len(), 1);
270 assert_eq!(sources[0].context, ScriptContext::AstroFrontmatter);
271 assert_eq!(sources[0].lang, "ts");
272 assert!(sources[0].source_text.contains("const title"));
273 }
274
275 #[test]
276 fn test_script_only() {
277 let astro = r#"
278<html>
279 <body>
280 <script>
281 console.log('Hello, Astro!')
282 </script>
283 </body>
284</html>
285"#;
286 let extractor = AstroExtractor;
287 let sources = extractor.extract(astro).unwrap();
288 assert_eq!(sources.len(), 1);
289 assert_eq!(sources[0].context, ScriptContext::AstroScript);
290 assert_eq!(sources[0].lang, "js");
291 assert!(sources[0].source_text.contains("console.log"));
292 }
293
294 #[test]
295 fn test_frontmatter_and_scripts() {
296 let astro = r#"---
297const pageTitle = 'Home'
298---
299<html>
300 <head><title>{pageTitle}</title></head>
301 <body>
302 <script>
303 console.log('Script 1')
304 </script>
305 <script>
306 console.log('Script 2')
307 </script>
308 </body>
309</html>
310"#;
311 let extractor = AstroExtractor;
312 let sources = extractor.extract(astro).unwrap();
313 assert_eq!(sources.len(), 3);
314 assert_eq!(sources[0].context, ScriptContext::AstroFrontmatter);
315 assert_eq!(sources[1].context, ScriptContext::AstroScript);
316 assert_eq!(sources[2].context, ScriptContext::AstroScript);
317 assert!(sources[1].source_text.contains("Script 1"));
318 assert!(sources[2].source_text.contains("Script 2"));
319 }
320
321 #[test]
322 fn test_no_frontmatter_or_scripts() {
323 let astro = "<html><body><h1>Hello</h1></body></html>";
324 let extractor = AstroExtractor;
325 let sources = extractor.extract(astro).unwrap();
326 assert_eq!(sources.len(), 0);
327 }
328
329 #[test]
330 fn test_unclosed_frontmatter() {
331 let astro = r#"---
332const x = 1
333<html></html>
334"#;
335 let extractor = AstroExtractor;
336 let result = extractor.extract(astro);
337 assert!(matches!(
338 result,
339 Err(ExtractorError::UnclosedFrontmatter { .. })
340 ));
341 }
342
343 #[test]
344 fn test_unclosed_script() {
345 let astro = r#"<script>console.log('test')"#;
346 let extractor = AstroExtractor;
347 let result = extractor.extract(astro);
348 assert!(matches!(
349 result,
350 Err(ExtractorError::UnclosedScriptTag { .. })
351 ));
352 }
353
354 #[test]
355 fn test_file_too_large() {
356 let large_content = "x".repeat(MAX_FILE_SIZE + 1);
357 let extractor = AstroExtractor;
358 let result = extractor.extract(&large_content);
359 assert!(matches!(result, Err(ExtractorError::FileTooLarge { .. })));
360 }
361}