thread_services/traits/analyzer.rs
1// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
2// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
3// SPDX-License-Identifier: AGPL-3.0-or-later
4
5//! # Code Analyzer Service Trait
6//!
7//! Defines the analyzer service interface that abstracts over ast-grep analysis
8//! functionality while preserving all matching and replacement capabilities.
9
10use crate::types::Doc;
11use async_trait::async_trait;
12use thread_utilities::RapidMap;
13
14use crate::error::{AnalysisError, ServiceResult};
15use crate::types::{AnalysisContext, CodeMatch, CrossFileRelationship, ParsedDocument};
16
17/// Core analyzer service trait that abstracts ast-grep analysis functionality.
18///
19/// This trait provides both single-file analysis (preserving all ast-grep matching
20/// and replacement capabilities) and codebase-level analysis (adding graph intelligence
21/// and cross-file relationships).
22///
23/// # Design Philosophy
24///
25/// - **Preserve Power**: All ast-grep Matcher and Replacer functionality accessible
26/// - **Bridge Levels**: Connect file-level AST operations to codebase-level graph intelligence
27/// - **Enable Intelligence**: Add cross-file relationships and codebase-wide analysis
28/// - **Abstract Execution**: Support different execution environments and strategies
29///
30/// # Examples
31///
32/// ## File-Level Pattern Matching (preserves ast-grep power)
33/// ```rust,no_run
34/// # use thread_services::traits::CodeAnalyzer;
35/// # use thread_services::types::{ParsedDocument, AnalysisContext};
36/// # struct MyAnalyzer;
37/// # #[async_trait::async_trait]
38/// # impl CodeAnalyzer for MyAnalyzer {
39/// # async fn find_pattern<D: thread_ast_engine::source::Doc>(&self, document: &ParsedDocument<D>, pattern: &str, context: &AnalysisContext) -> Result<Vec<thread_services::types::CodeMatch<'_, D>>, thread_services::error::ServiceError> { todo!() }
40/// # async fn find_all_patterns<D: thread_ast_engine::source::Doc>(&self, document: &ParsedDocument<D>, patterns: &[&str], context: &AnalysisContext) -> Result<Vec<thread_services::types::CodeMatch<'_, D>>, thread_services::error::ServiceError> { todo!() }
41/// # async fn replace_pattern<D: thread_ast_engine::source::Doc>(&self, document: &mut ParsedDocument<D>, pattern: &str, replacement: &str, context: &AnalysisContext) -> Result<usize, thread_services::error::ServiceError> { todo!() }
42/// # async fn analyze_cross_file_relationships(&self, documents: &[ParsedDocument<impl thread_ast_engine::source::Doc>], context: &AnalysisContext) -> Result<Vec<thread_services::types::CrossFileRelationship>, thread_services::error::ServiceError> { todo!() }
43/// # fn capabilities(&self) -> thread_services::traits::AnalyzerCapabilities { todo!() }
44/// # }
45/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
46/// # let document: ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>> = todo!();
47/// let analyzer = MyAnalyzer;
48/// let context = AnalysisContext::default();
49///
50/// // Find all function declarations - preserves ast-grep pattern power
51/// let matches = analyzer.find_pattern(
52/// &document,
53/// "fn $NAME($$$PARAMS) { $$$BODY }",
54/// &context
55/// ).await?;
56///
57/// for match_result in matches {
58/// // Access ast-grep NodeMatch functionality
59/// let node_match = match_result.ast_node_match();
60/// let env = node_match.get_env();
61///
62/// if let Some(name) = env.get_match("NAME") {
63/// println!("Function: {}", name.text());
64/// }
65///
66/// // Plus codebase-level context
67/// for relationship in match_result.relationships() {
68/// println!("Cross-file relationship: {:?}", relationship.kind);
69/// }
70/// }
71/// # Ok(())
72/// # }
73/// ```
74///
75/// ## Codebase-Level Analysis
76/// ```rust,no_run
77/// # use thread_services::traits::CodeAnalyzer;
78/// # use thread_services::types::{ParsedDocument, AnalysisContext, ExecutionScope};
79/// # struct MyAnalyzer;
80/// # #[async_trait::async_trait]
81/// # impl CodeAnalyzer for MyAnalyzer {
82/// # async fn find_pattern<D: thread_ast_engine::source::Doc>(&self, document: &ParsedDocument<D>, pattern: &str, context: &AnalysisContext) -> Result<Vec<thread_services::types::CodeMatch<'_, D>>, thread_services::error::ServiceError> { todo!() }
83/// # async fn find_all_patterns<D: thread_ast_engine::source::Doc>(&self, document: &ParsedDocument<D>, patterns: &[&str], context: &AnalysisContext) -> Result<Vec<thread_services::types::CodeMatch<'_, D>>, thread_services::error::ServiceError> { todo!() }
84/// # async fn replace_pattern<D: thread_ast_engine::source::Doc>(&self, document: &mut ParsedDocument<D>, pattern: &str, replacement: &str, context: &AnalysisContext) -> Result<usize, thread_services::error::ServiceError> { todo!() }
85/// # async fn analyze_cross_file_relationships(&self, documents: &[ParsedDocument<impl thread_ast_engine::source::Doc>], context: &AnalysisContext) -> Result<Vec<thread_services::types::CrossFileRelationship>, thread_services::error::ServiceError> { todo!() }
86/// # fn capabilities(&self) -> thread_services::traits::AnalyzerCapabilities { todo!() }
87/// # }
88/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
89/// # let documents: Vec<ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>>> = vec![];
90/// let analyzer = MyAnalyzer;
91/// let mut context = AnalysisContext::default();
92/// context.scope = ExecutionScope::Codebase;
93///
94/// // Analyze relationships across entire codebase
95/// let relationships = analyzer.analyze_cross_file_relationships(
96/// &documents,
97/// &context
98/// ).await?;
99///
100/// // Build intelligence on top of ast-grep file-level analysis
101/// for rel in relationships {
102/// match rel.kind {
103/// thread_services::types::RelationshipKind::Calls => {
104/// println!("{} calls {} ({}->{})",
105/// rel.source_symbol, rel.target_symbol,
106/// rel.source_file.display(), rel.target_file.display());
107/// },
108/// thread_services::types::RelationshipKind::Imports => {
109/// println!("{} imports from {}",
110/// rel.source_file.display(), rel.target_file.display());
111/// },
112/// _ => {}
113/// }
114/// }
115/// # Ok(())
116/// # }
117/// ```
118#[async_trait]
119pub trait CodeAnalyzer<D: Doc + Send + Sync>: Send + Sync {
120 /// Find matches for a pattern in a document.
121 ///
122 /// Preserves all ast-grep pattern matching power while adding codebase-level
123 /// context. Returns CodeMatch instances that wrap NodeMatch and add cross-file
124 /// relationship information.
125 ///
126 /// # Arguments
127 /// * `document` - ParsedDocument to search in
128 /// * `pattern` - AST pattern using ast-grep meta-variable syntax (e.g., "$VAR")
129 /// * `context` - Analysis context for execution configuration
130 ///
131 /// # Returns
132 /// Vector of CodeMatch instances with both ast-grep functionality and codebase context
133 async fn find_pattern(
134 &self,
135 document: &ParsedDocument<D>,
136 pattern: &str,
137 context: &AnalysisContext,
138 ) -> ServiceResult<Vec<CodeMatch<'_, D>>>;
139
140 /// Find matches for multiple patterns efficiently.
141 ///
142 /// Optimizes for multiple pattern searches by batching operations and
143 /// reusing AST traversals where possible.
144 ///
145 /// # Arguments
146 /// * `document` - ParsedDocument to search in
147 /// * `patterns` - Slice of AST patterns to match
148 /// * `context` - Analysis context for execution configuration
149 ///
150 /// # Returns
151 /// Vector of CodeMatch instances for all pattern matches
152 async fn find_all_patterns(
153 &self,
154 document: &ParsedDocument<D>,
155 patterns: &[&str],
156 context: &AnalysisContext,
157 ) -> ServiceResult<Vec<CodeMatch<'_, D>>>;
158
159 /// Replace matches for a pattern with replacement content.
160 ///
161 /// Preserves all ast-grep replacement power including template-based replacement
162 /// with meta-variable substitution and structural replacement.
163 ///
164 /// # Arguments
165 /// * `document` - ParsedDocument to perform replacements in (modified in-place)
166 /// * `pattern` - AST pattern to match for replacement
167 /// * `replacement` - Replacement template or content
168 /// * `context` - Analysis context for execution configuration
169 ///
170 /// # Returns
171 /// Number of replacements made
172 async fn replace_pattern(
173 &self,
174 document: &mut ParsedDocument<D>,
175 pattern: &str,
176 replacement: &str,
177 context: &AnalysisContext,
178 ) -> ServiceResult<usize>;
179
180 /// Analyze relationships across multiple files.
181 ///
182 /// This is where Thread extends ast-grep from file-level to codebase-level.
183 /// Builds graph intelligence on top of ast-grep's powerful file-level analysis.
184 ///
185 /// # Arguments
186 /// * `documents` - Collection of ParsedDocuments to analyze
187 /// * `context` - Analysis context with scope and execution configuration
188 ///
189 /// # Returns
190 /// Vector of CrossFileRelationship instances representing codebase-level connections
191 async fn analyze_cross_file_relationships(
192 &self,
193 documents: &[ParsedDocument<D>],
194 context: &AnalysisContext,
195 ) -> ServiceResult<Vec<CrossFileRelationship>>;
196
197 /// Get analyzer capabilities and configuration.
198 fn capabilities(&self) -> AnalyzerCapabilities;
199
200 /// Find specific AST node types efficiently.
201 ///
202 /// Default implementation uses pattern matching, but implementations can
203 /// override for more efficient node type searches.
204 async fn find_nodes_by_kind(
205 &self,
206 document: &ParsedDocument<D>,
207 node_kind: &str,
208 context: &AnalysisContext,
209 ) -> ServiceResult<Vec<CodeMatch<'_, D>>> {
210 // Default: use pattern matching based on node kind
211 let pattern = match node_kind {
212 "function_declaration" => "fn $NAME($$$PARAMS) { $$$BODY }",
213 "class_declaration" => "class $NAME { $$$BODY }",
214 "variable_declaration" => "let $VAR = $VALUE",
215 // Add more patterns as needed
216 _ => {
217 return Err(AnalysisError::InvalidPattern {
218 pattern: format!("Unknown node kind: {}", node_kind),
219 }
220 .into());
221 }
222 };
223
224 self.find_pattern(document, pattern, context).await
225 }
226
227 /// Validate pattern syntax before analysis.
228 ///
229 /// Default implementation performs basic validation.
230 /// Implementations can override for language-specific validation.
231 fn validate_pattern(&self, pattern: &str) -> ServiceResult<()> {
232 if pattern.is_empty() {
233 return Err(AnalysisError::InvalidPattern {
234 pattern: "Pattern cannot be empty".to_string(),
235 }
236 .into());
237 }
238
239 // Basic meta-variable validation
240 if pattern.contains('$') {
241 // Check for valid meta-variable format
242 let mut chars = pattern.chars();
243 let mut _found_metavar = false;
244
245 while let Some(ch) = chars.next() {
246 if ch == '$' {
247 _found_metavar = true;
248 // Next character should be alphabetic or underscore
249 if let Some(next_ch) = chars.next()
250 && !next_ch.is_alphabetic()
251 && next_ch != '_'
252 {
253 return Err(AnalysisError::MetaVariable {
254 variable: format!("${}", next_ch),
255 message: "Invalid meta-variable format".to_string(),
256 }
257 .into());
258 }
259 }
260 }
261 }
262
263 Ok(())
264 }
265
266 /// Compile pattern for efficient reuse.
267 ///
268 /// Default implementation returns the pattern as-is.
269 /// Implementations can override to pre-compile patterns for better performance.
270 #[cfg(feature = "matching")]
271 async fn compile_pattern(&self, pattern: &str) -> ServiceResult<CompiledPattern> {
272 Ok(CompiledPattern {
273 pattern: pattern.to_string(),
274 compiled_data: None,
275 })
276 }
277
278 /// Perform batch analysis operations efficiently.
279 ///
280 /// Optimizes for analyzing multiple documents with multiple patterns
281 /// by batching operations and using appropriate execution strategies.
282 async fn batch_analyze(
283 &self,
284 documents: &[ParsedDocument<D>],
285 patterns: &[&str],
286 context: &AnalysisContext,
287 ) -> ServiceResult<Vec<Vec<CodeMatch<'_, D>>>> {
288 let mut results = Vec::new();
289
290 for document in documents {
291 let doc_results = self.find_all_patterns(document, patterns, context).await?;
292 results.push(doc_results);
293 }
294
295 Ok(results)
296 }
297
298 /// Extract symbols and metadata from documents.
299 ///
300 /// Bridges ast-grep file-level analysis to codebase-level intelligence
301 /// by extracting symbols, imports, exports, and other metadata.
302 async fn extract_symbols(
303 &self,
304 _document: &mut ParsedDocument<D>,
305 _context: &AnalysisContext,
306 ) -> ServiceResult<()> {
307 // This will be implemented in the conversion utilities
308 // For now, this is a placeholder that preserves the interface
309 Ok(())
310 }
311}
312
313/// Analyzer capabilities and configuration information
314#[derive(Debug, Clone)]
315pub struct AnalyzerCapabilities {
316 /// Maximum number of patterns that can be analyzed concurrently
317 pub max_concurrent_patterns: Option<usize>,
318
319 /// Maximum number of matches to return per pattern
320 pub max_matches_per_pattern: Option<usize>,
321
322 /// Whether pattern compilation/caching is supported
323 pub supports_pattern_compilation: bool,
324
325 /// Whether cross-file analysis is supported
326 pub supports_cross_file_analysis: bool,
327
328 /// Whether batch operations are optimized
329 pub supports_batch_optimization: bool,
330
331 /// Whether incremental analysis is supported
332 pub supports_incremental_analysis: bool,
333
334 /// Supported analysis depth levels
335 pub supported_analysis_depths: Vec<AnalysisDepth>,
336
337 /// Performance characteristics
338 pub performance_profile: AnalysisPerformanceProfile,
339
340 /// Additional capability flags
341 pub capability_flags: RapidMap<String, bool>,
342}
343
344impl Default for AnalyzerCapabilities {
345 fn default() -> Self {
346 Self {
347 max_concurrent_patterns: Some(50),
348 max_matches_per_pattern: Some(1000),
349 supports_pattern_compilation: false,
350 supports_cross_file_analysis: false,
351 supports_batch_optimization: true,
352 supports_incremental_analysis: false,
353 supported_analysis_depths: vec![AnalysisDepth::Syntax, AnalysisDepth::Local],
354 performance_profile: AnalysisPerformanceProfile::Balanced,
355 capability_flags: thread_utilities::get_map(),
356 }
357 }
358}
359
360/// Analysis depth levels
361#[derive(Debug, Clone, PartialEq)]
362pub enum AnalysisDepth {
363 /// Syntax-only analysis (AST patterns)
364 Syntax,
365 /// Include local scope analysis
366 Local,
367 /// Include cross-file dependencies
368 CrossFile,
369 /// Complete codebase analysis
370 Deep,
371}
372
373/// Performance profile for analysis operations
374#[derive(Debug, Clone, PartialEq)]
375pub enum AnalysisPerformanceProfile {
376 /// Optimized for low memory usage
377 LowMemory,
378 /// Optimized for fast analysis
379 FastAnalysis,
380 /// Balanced memory and speed
381 Balanced,
382 /// Optimized for complex pattern matching
383 ComplexPatterns,
384 /// Optimized for large codebases
385 LargeCodebase,
386}
387
388/// Compiled pattern for efficient reuse
389#[derive(Debug, Clone)]
390pub struct CompiledPattern {
391 /// Original pattern string
392 pub pattern: String,
393 /// Compiled pattern data (implementation-specific)
394 pub compiled_data: Option<std::sync::Arc<dyn std::any::Any + Send + Sync>>,
395}
396
397/// Analysis configuration for specific use cases
398#[derive(Debug, Clone)]
399pub struct AnalysisConfig {
400 /// Maximum analysis depth
401 pub max_depth: AnalysisDepth,
402
403 /// Whether to collect cross-file relationships
404 pub collect_relationships: bool,
405
406 /// Whether to enable pattern caching
407 pub enable_pattern_caching: bool,
408
409 /// Preferred performance profile
410 pub performance_profile: Option<AnalysisPerformanceProfile>,
411
412 /// Custom configuration options
413 pub custom_options: RapidMap<String, String>,
414}
415
416impl Default for AnalysisConfig {
417 fn default() -> Self {
418 Self {
419 max_depth: AnalysisDepth::Local,
420 collect_relationships: false,
421 enable_pattern_caching: true,
422 performance_profile: None, // Auto-detect
423 custom_options: thread_utilities::get_map(),
424 }
425 }
426}
427
428/// Analyzer factory trait for creating configured analyzer instances
429pub trait AnalyzerFactory<D: Doc + Send + Sync>: Send + Sync {
430 /// Create a new analyzer instance with default configuration
431 fn create_analyzer(&self) -> Box<dyn CodeAnalyzer<D>>;
432
433 /// Create a new analyzer instance with specific configuration
434 fn create_configured_analyzer(&self, config: AnalysisConfig) -> Box<dyn CodeAnalyzer<D>>;
435
436 /// Get available analyzer types
437 fn available_analyzers(&self) -> Vec<String>;
438}
439
440#[cfg(test)]
441mod tests {
442 use super::*;
443
444 #[test]
445 fn test_analyzer_capabilities_default() {
446 let caps = AnalyzerCapabilities::default();
447 assert!(!caps.supports_cross_file_analysis);
448 assert!(caps.supports_batch_optimization);
449 assert!(!caps.supports_pattern_compilation);
450 assert_eq!(
451 caps.performance_profile,
452 AnalysisPerformanceProfile::Balanced
453 );
454 }
455
456 #[test]
457 fn test_analysis_config_default() {
458 let config = AnalysisConfig::default();
459 assert_eq!(config.max_depth, AnalysisDepth::Local);
460 assert!(!config.collect_relationships);
461 assert!(config.enable_pattern_caching);
462 }
463}