Skip to main content

amql_engine/extractor/
mod.rs

1//! Extractors that discover annotations from framework conventions.
2//!
3//! Built-in extractors use tree-sitter to detect patterns (Express routes,
4//! test blocks). Subprocess extractors delegate to external scripts that
5//! emit JSON matching the extractor protocol.
6
7#[cfg(feature = "resolver")]
8mod express;
9#[cfg(feature = "resolver")]
10mod go_http;
11#[cfg(feature = "resolver")]
12mod go_structure;
13#[cfg(feature = "resolver")]
14mod go_test;
15#[cfg(feature = "resolver")]
16mod react;
17mod runner;
18#[cfg(feature = "resolver")]
19mod rust_structure;
20#[cfg(feature = "resolver")]
21mod test;
22#[cfg(feature = "resolver")]
23mod ts_structure;
24
25#[cfg(feature = "resolver")]
26pub use express::ExpressExtractor;
27#[cfg(feature = "resolver")]
28pub use go_http::GoHttpExtractor;
29#[cfg(feature = "resolver")]
30pub use go_structure::GoStructureExtractor;
31#[cfg(feature = "resolver")]
32pub use go_test::GoTestExtractor;
33#[cfg(feature = "resolver")]
34pub use react::ReactExtractor;
35pub use runner::{run_all_extractors, run_extractor, ExtractorResult};
36#[cfg(feature = "resolver")]
37pub use rust_structure::RustStructureExtractor;
38#[cfg(feature = "resolver")]
39pub use test::TestExtractor;
40#[cfg(feature = "resolver")]
41pub use ts_structure::TypeScriptStructureExtractor;
42
43use crate::store::Annotation;
44use crate::types::RelativePath;
45
46/// A built-in extractor that discovers annotations from source code.
47///
48/// Built-in extractors use tree-sitter for parsing and run in-process.
49/// They can be overridden by subprocess extractors in the manifest.
50pub trait BuiltinExtractor: Send + Sync {
51    /// Unique name for this extractor (matches manifest `name` attribute).
52    fn name(&self) -> &str;
53
54    /// File extensions this extractor applies to (including the dot).
55    fn extensions(&self) -> &[&str];
56
57    /// Extract annotations from source text.
58    fn extract(&self, source: &str, file: &RelativePath) -> Vec<Annotation>;
59}
60
61/// Registry of built-in extractors indexed by name.
62///
63/// When a manifest extractor has no `run` command, the registry is
64/// consulted for a matching built-in by name.
65pub struct ExtractorRegistry {
66    extractors: Vec<Box<dyn BuiltinExtractor>>,
67    by_name: rustc_hash::FxHashMap<String, usize>,
68}
69
70impl ExtractorRegistry {
71    /// Create an empty extractor registry.
72    pub fn new() -> Self {
73        Self {
74            extractors: Vec::new(),
75            by_name: rustc_hash::FxHashMap::default(),
76        }
77    }
78
79    /// Create a registry with all built-in extractors registered.
80    #[cfg(feature = "resolver")]
81    pub fn with_defaults() -> Self {
82        let mut reg = Self::new();
83        reg.register(Box::new(ExpressExtractor));
84        reg.register(Box::new(GoHttpExtractor));
85        reg.register(Box::new(GoStructureExtractor));
86        reg.register(Box::new(GoTestExtractor));
87        reg.register(Box::new(ReactExtractor));
88        reg.register(Box::new(RustStructureExtractor));
89        reg.register(Box::new(TestExtractor));
90        reg.register(Box::new(TypeScriptStructureExtractor));
91        reg.alias("vitest", "test");
92        reg.alias("jest", "test");
93        reg.alias("ts-structure", "structure");
94        reg
95    }
96
97    /// Register a built-in extractor.
98    pub fn register(&mut self, extractor: Box<dyn BuiltinExtractor>) {
99        let idx = self.extractors.len();
100        self.by_name.insert(extractor.name().to_string(), idx);
101        self.extractors.push(extractor);
102    }
103
104    /// Register an alias that resolves to an existing extractor.
105    pub fn alias(&mut self, alias: &str, target: &str) {
106        if let Some(&idx) = self.by_name.get(target) {
107            self.by_name.insert(alias.to_string(), idx);
108        }
109    }
110
111    /// Look up a built-in extractor by name.
112    pub fn get(&self, name: &str) -> Option<&dyn BuiltinExtractor> {
113        self.by_name
114            .get(name)
115            .and_then(|&idx| self.extractors.get(idx))
116            .map(|e| e.as_ref())
117    }
118
119    /// Run all extractors whose file extensions match the given file.
120    /// Returns the combined annotations from all matching extractors.
121    pub fn extract_all(&self, source: &str, file: &RelativePath) -> Vec<Annotation> {
122        let file_str: &str = file.as_ref();
123        let file_ext = std::path::Path::new(file_str)
124            .extension()
125            .and_then(|e| e.to_str())
126            .map(|e| format!(".{e}"));
127        let file_ext = match file_ext {
128            Some(e) => e,
129            None => return Vec::new(),
130        };
131
132        let mut annotations = Vec::new();
133        for extractor in &self.extractors {
134            if extractor.extensions().iter().any(|ext| *ext == file_ext) {
135                annotations.extend(extractor.extract(source, file));
136            }
137        }
138        annotations
139    }
140
141    /// Return all registered extractor names (including aliases).
142    pub fn names(&self) -> Vec<&str> {
143        let mut names: Vec<&str> = self.by_name.keys().map(|s| s.as_str()).collect();
144        names.sort_unstable();
145        names
146    }
147}
148
149impl Default for ExtractorRegistry {
150    fn default() -> Self {
151        Self::new()
152    }
153}