1use crate::error::AqlError;
4use crate::manifest::{ExtractorConfig, Manifest};
5use crate::store::Annotation;
6use crate::types::{AttrName, Binding, ProjectRoot, RelativePath, TagName};
7use rustc_hash::{FxHashMap, FxHashSet};
8use serde::{Deserialize, Serialize};
9use serde_json::Value as JsonValue;
10use std::process::Command;
11use std::time::Duration;
12
13const EXTRACTOR_TIMEOUT: Duration = Duration::from_secs(30);
15
16#[derive(Debug, Clone, Serialize)]
18pub struct ExtractorResult {
19 pub annotations: Vec<Annotation>,
21 pub errors: Vec<String>,
23}
24
25#[derive(Debug, Deserialize)]
27struct RawExtractorAnnotation {
28 file: String,
29 bind: String,
30 tag: String,
31 #[serde(default)]
32 attrs: FxHashMap<String, JsonValue>,
33 #[serde(default)]
34 children: Vec<RawExtractorAnnotation>,
35}
36
37#[derive(Debug, Deserialize)]
39struct RawExtractorOutput {
40 annotations: Vec<RawExtractorAnnotation>,
41}
42
43fn convert_raw_annotation(raw: RawExtractorAnnotation) -> Annotation {
45 let attrs: FxHashMap<AttrName, JsonValue> = raw
46 .attrs
47 .into_iter()
48 .map(|(k, v)| (AttrName::from(k), v))
49 .collect();
50
51 let children: Vec<Annotation> = raw
52 .children
53 .into_iter()
54 .map(convert_raw_annotation)
55 .collect();
56
57 Annotation {
58 tag: TagName::from(raw.tag),
59 attrs,
60 binding: Binding::from(raw.bind),
61 file: RelativePath::from(raw.file),
62 children,
63 }
64}
65
66pub fn run_extractor(
72 config: &ExtractorConfig,
73 project_root: &ProjectRoot,
74) -> Result<ExtractorResult, AqlError> {
75 let mut child = Command::new("sh")
76 .arg("-c")
77 .arg(&config.run)
78 .current_dir(project_root.as_ref())
79 .stdout(std::process::Stdio::piped())
80 .stderr(std::process::Stdio::piped())
81 .spawn()
82 .map_err(|e| format!("Failed to execute extractor '{}': {e}", config.name))?;
83
84 let stdout_handle = child.stdout.take();
88 let stderr_handle = child.stderr.take();
89
90 let stdout_thread = std::thread::spawn(move || -> Vec<u8> {
91 use std::io::Read;
92 let mut buf = Vec::new();
93 if let Some(mut stdout) = stdout_handle {
94 let _ = stdout.read_to_end(&mut buf);
95 }
96 buf
97 });
98
99 let stderr_thread = std::thread::spawn(move || -> Vec<u8> {
100 use std::io::Read;
101 let mut buf = Vec::new();
102 if let Some(mut stderr) = stderr_handle {
103 let _ = stderr.read_to_end(&mut buf);
104 }
105 buf
106 });
107
108 let deadline = std::time::Instant::now() + EXTRACTOR_TIMEOUT;
109 loop {
110 match child.try_wait() {
111 Ok(Some(_)) => break,
112 Ok(None) => {
113 if std::time::Instant::now() >= deadline {
114 let _ = child.kill();
115 let _ = child.wait();
116 return Err(format!(
117 "Extractor '{}' timed out after {}s",
118 config.name,
119 EXTRACTOR_TIMEOUT.as_secs()
120 )
121 .into());
122 }
123 std::thread::sleep(Duration::from_millis(50));
124 }
125 Err(e) => {
126 return Err(format!("Failed to wait on extractor '{}': {e}", config.name).into());
127 }
128 }
129 }
130
131 let status = child
132 .wait()
133 .map_err(|e| format!("Failed to wait on extractor '{}': {e}", config.name))?;
134
135 let stdout_bytes = stdout_thread.join().unwrap_or_default();
136 let stderr_bytes = stderr_thread.join().unwrap_or_default();
137
138 let output = std::process::Output {
139 status,
140 stdout: stdout_bytes,
141 stderr: stderr_bytes,
142 };
143
144 if !output.status.success() {
145 let stderr = String::from_utf8_lossy(&output.stderr);
146 return Err(format!(
147 "Extractor '{}' exited with {}: {}",
148 config.name,
149 output.status,
150 stderr.trim()
151 )
152 .into());
153 }
154
155 let stdout = String::from_utf8_lossy(&output.stdout);
156 let raw: RawExtractorOutput = serde_json::from_str(&stdout)
157 .map_err(|e| format!("Extractor '{}' produced invalid JSON: {e}", config.name))?;
158
159 let mut annotations = Vec::with_capacity(raw.annotations.len());
160 let mut errors = Vec::new();
161
162 for entry in raw.annotations {
163 if entry.file.is_empty() {
164 errors.push(format!(
165 "Extractor '{}': annotation missing 'file' field",
166 config.name
167 ));
168 continue;
169 }
170 if entry.bind.is_empty() {
171 errors.push(format!(
172 "Extractor '{}': annotation for '{}' missing 'bind' field",
173 config.name, entry.file
174 ));
175 continue;
176 }
177
178 annotations.push(convert_raw_annotation(entry));
179 }
180
181 Ok(ExtractorResult {
182 annotations,
183 errors,
184 })
185}
186
187#[must_use = "extractor results contain annotations to merge into the store"]
194pub fn run_all_extractors(
195 manifest: &Manifest,
196 project_root: &ProjectRoot,
197 registry: &super::ExtractorRegistry,
198) -> Vec<ExtractorResult> {
199 manifest
200 .extractors
201 .iter()
202 .map(|config| {
203 if config.run.is_empty() {
204 match registry.get(&config.name) {
206 Some(builtin) => run_builtin(builtin, config, project_root),
207 None => ExtractorResult {
208 annotations: vec![],
209 errors: vec![format!(
210 "Extractor '{}': no 'run' command and no built-in found",
211 config.name
212 )],
213 },
214 }
215 } else {
216 match run_extractor(config, project_root) {
217 Ok(result) => result,
218 Err(e) => ExtractorResult {
219 annotations: vec![],
220 errors: vec![e.to_string()],
221 },
222 }
223 }
224 })
225 .collect()
226}
227
228fn run_builtin(
230 builtin: &dyn super::BuiltinExtractor,
231 config: &ExtractorConfig,
232 project_root: &ProjectRoot,
233) -> ExtractorResult {
234 let extensions: FxHashSet<String> = builtin
235 .extensions()
236 .iter()
237 .map(|ext| ext.trim_start_matches('.').to_string())
238 .collect();
239
240 let mut annotations = Vec::new();
241 let mut errors = Vec::new();
242
243 let walker = ignore::WalkBuilder::new(project_root.as_ref())
244 .hidden(false)
245 .git_ignore(true)
246 .git_global(true)
247 .git_exclude(true)
248 .filter_entry(|entry| {
249 let name = entry.file_name().to_string_lossy();
250 if entry.file_type().is_some_and(|ft| ft.is_dir()) {
251 return !crate::paths::SKIP_DIRS.contains(&name.as_ref());
252 }
253 true
254 })
255 .build();
256
257 for entry in walker {
258 let entry = match entry {
259 Ok(e) => e,
260 Err(e) => {
261 errors.push(format!("Extractor '{}': walk error: {e}", config.name));
262 continue;
263 }
264 };
265
266 let path = entry.path();
267 if !path.is_file() {
268 continue;
269 }
270
271 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
273 if !extensions.contains(ext) {
274 continue;
275 }
276
277 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
279 if name.ends_with(".d.ts") || name.ends_with(".d.mts") || name.ends_with(".d.cts") {
280 continue;
281 }
282
283 if !config.globs.is_empty() {
285 let relative_str = path
286 .strip_prefix(project_root.as_ref())
287 .ok()
288 .and_then(|r| r.to_str())
289 .unwrap_or("");
290 let matches_glob = config.globs.iter().any(|g| glob_matches(g, relative_str));
291 if !matches_glob {
292 continue;
293 }
294 }
295
296 let relative = match path.strip_prefix(project_root.as_ref()) {
297 Ok(r) => RelativePath::from(r.to_string_lossy().as_ref()),
298 Err(_) => continue,
299 };
300
301 let source = match std::fs::read_to_string(path) {
302 Ok(s) => s,
303 Err(e) => {
304 errors.push(format!(
305 "Extractor '{}': failed to read {}: {e}",
306 config.name,
307 path.display()
308 ));
309 continue;
310 }
311 };
312
313 let file_anns = builtin.extract(&source, &relative);
314 annotations.extend(file_anns);
315 }
316
317 ExtractorResult {
318 annotations,
319 errors,
320 }
321}
322
323fn glob_matches(pattern: &str, path: &str) -> bool {
325 if pattern.starts_with("**") {
326 let suffix = pattern.trim_start_matches("**/").trim_start_matches("*/");
328 if suffix.starts_with("*.") {
329 let ext = &suffix[1..]; return path.ends_with(ext);
331 }
332 path.contains(suffix)
333 } else if let Some(idx) = pattern.find('*') {
334 let prefix = &pattern[..idx];
335 let suffix = &pattern[idx + 1..];
336 path.starts_with(prefix) && path.ends_with(suffix)
337 } else {
338 path == pattern
339 }
340}
341
342#[cfg(test)]
343mod tests {
344 use super::*;
345 use std::path::PathBuf;
346
347 fn test_project_root() -> ProjectRoot {
348 ProjectRoot::from(PathBuf::from("/tmp"))
349 }
350
351 #[test]
352 fn parses_valid_extractor_json() {
353 let json = r#"{"annotations":[
355 {"file":"src/api.ts","bind":"createUser","tag":"controller","attrs":{"method":"POST","path":"/api/users"}},
356 {"file":"src/api.ts","bind":"getUser","tag":"controller","attrs":{"method":"GET","path":"/api/users/:id"}}
357 ]}"#;
358 let raw: RawExtractorOutput = serde_json::from_str(json).unwrap();
359
360 let annotations: Vec<Annotation> = raw
362 .annotations
363 .into_iter()
364 .map(convert_raw_annotation)
365 .collect();
366
367 assert_eq!(annotations.len(), 2, "should parse two annotations");
369 assert_eq!(
370 annotations[0].tag, "controller",
371 "first tag should be controller"
372 );
373 assert_eq!(
374 annotations[0].binding, "createUser",
375 "first binding should be createUser"
376 );
377 assert_eq!(
378 annotations[0].attrs.get("method"),
379 Some(&JsonValue::String("POST".to_string())),
380 "first method should be POST"
381 );
382 assert_eq!(
383 annotations[1].binding, "getUser",
384 "second binding should be getUser"
385 );
386 }
387
388 #[test]
389 fn parses_nested_children() {
390 let json = r#"{"annotations":[
392 {"file":"src/app.test.ts","bind":"AppTests","tag":"describe","attrs":{},
393 "children":[
394 {"file":"src/app.test.ts","bind":"renders correctly","tag":"test","attrs":{}},
395 {"file":"src/app.test.ts","bind":"nested suite","tag":"describe","attrs":{},
396 "children":[
397 {"file":"src/app.test.ts","bind":"handles edge case","tag":"test","attrs":{}}
398 ]}
399 ]}
400 ]}"#;
401 let raw: RawExtractorOutput = serde_json::from_str(json).unwrap();
402
403 let annotations: Vec<Annotation> = raw
405 .annotations
406 .into_iter()
407 .map(convert_raw_annotation)
408 .collect();
409
410 assert_eq!(
412 annotations.len(),
413 1,
414 "should parse one top-level annotation"
415 );
416 assert_eq!(
417 annotations[0].tag, "describe",
418 "top-level should be describe"
419 );
420 assert_eq!(
421 annotations[0].children.len(),
422 2,
423 "describe should have 2 children"
424 );
425 assert_eq!(
426 annotations[0].children[0].tag, "test",
427 "first child should be test"
428 );
429 assert_eq!(
430 annotations[0].children[1].tag, "describe",
431 "second child should be describe"
432 );
433 assert_eq!(
434 annotations[0].children[1].children.len(),
435 1,
436 "nested describe should have 1 child"
437 );
438 assert_eq!(
439 annotations[0].children[1].children[0].tag, "test",
440 "deeply nested should be test"
441 );
442 }
443
444 #[test]
445 fn skips_entries_with_missing_fields() {
446 let json = r#"{"annotations":[
448 {"file":"","bind":"x","tag":"t","attrs":{}},
449 {"file":"f.ts","bind":"","tag":"t","attrs":{}},
450 {"file":"f.ts","bind":"ok","tag":"t","attrs":{}}
451 ]}"#;
452 let raw: RawExtractorOutput = serde_json::from_str(json).unwrap();
453 let config_name = "test";
454
455 let mut annotations = Vec::new();
457 let mut errors = Vec::new();
458 for entry in raw.annotations {
459 if entry.file.is_empty() {
460 errors.push(format!(
461 "Extractor '{}': annotation missing 'file' field",
462 config_name
463 ));
464 continue;
465 }
466 if entry.bind.is_empty() {
467 errors.push(format!(
468 "Extractor '{}': annotation for '{}' missing 'bind' field",
469 config_name, entry.file
470 ));
471 continue;
472 }
473 annotations.push(convert_raw_annotation(entry));
474 }
475
476 assert_eq!(annotations.len(), 1, "should keep only valid annotation");
478 assert_eq!(errors.len(), 2, "should report two errors");
479 }
480
481 #[test]
482 fn run_extractor_with_echo() {
483 let config = ExtractorConfig {
485 name: "echo-test".to_string(),
486 run: r#"echo '{"annotations":[{"file":"src/app.ts","bind":"handler","tag":"endpoint","attrs":{"method":"GET"}}]}'"#.to_string(),
487 globs: vec!["src/**/*.ts".to_string()],
488 };
489
490 let result = run_extractor(&config, &test_project_root()).unwrap();
492
493 assert_eq!(result.annotations.len(), 1, "should produce one annotation");
495 assert_eq!(result.errors.len(), 0, "should produce no errors");
496 assert_eq!(
497 result.annotations[0].tag, "endpoint",
498 "tag should be endpoint"
499 );
500 assert_eq!(
501 result.annotations[0].binding, "handler",
502 "binding should be handler"
503 );
504 }
505
506 #[test]
507 fn run_extractor_handles_failure() {
508 let config = ExtractorConfig {
510 name: "fail-test".to_string(),
511 run: "exit 1".to_string(),
512 globs: vec![],
513 };
514
515 let result = run_extractor(&config, &test_project_root());
517
518 assert!(result.is_err(), "should return error for failed extractor");
520 assert!(
521 result.unwrap_err().to_string().contains("fail-test"),
522 "error should mention extractor name"
523 );
524 }
525
526 #[test]
527 fn run_extractor_handles_invalid_json() {
528 let config = ExtractorConfig {
530 name: "bad-json".to_string(),
531 run: "echo 'not json'".to_string(),
532 globs: vec![],
533 };
534
535 let result = run_extractor(&config, &test_project_root());
537
538 assert!(result.is_err(), "should return error for invalid JSON");
540 assert!(
541 result.unwrap_err().to_string().contains("invalid JSON"),
542 "error should mention invalid JSON"
543 );
544 }
545
546 #[test]
547 fn run_all_extractors_continues_on_failure() {
548 let manifest = Manifest {
550 version: "1.0".to_string(),
551 tags: FxHashMap::default(),
552 audiences: FxHashMap::default(),
553 visibilities: FxHashMap::default(),
554 extractors: vec![
555 ExtractorConfig {
556 name: "failing".to_string(),
557 run: "exit 1".to_string(),
558 globs: vec![],
559 },
560 ExtractorConfig {
561 name: "passing".to_string(),
562 run: r#"echo '{"annotations":[{"file":"a.ts","bind":"b","tag":"t","attrs":{}}]}'"#.to_string(),
563 globs: vec![],
564 },
565 ],
566 };
567
568 let registry = crate::extractor::ExtractorRegistry::new();
570 let results = run_all_extractors(&manifest, &test_project_root(), ®istry);
571
572 assert_eq!(results.len(), 2, "should return result for each extractor");
574 assert!(!results[0].errors.is_empty(), "first should have errors");
575 assert_eq!(
576 results[1].annotations.len(),
577 1,
578 "second should have annotations"
579 );
580 }
581}