Skip to main content

thread_services/traits/
analyzer.rs

1// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
2// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
3// SPDX-License-Identifier: AGPL-3.0-or-later
4
5//! # Code Analyzer Service Trait
6//!
7//! Defines the analyzer service interface that abstracts over ast-grep analysis
8//! functionality while preserving all matching and replacement capabilities.
9
10use crate::types::Doc;
11use async_trait::async_trait;
12use thread_utilities::RapidMap;
13
14use crate::error::{AnalysisError, ServiceResult};
15use crate::types::{AnalysisContext, CodeMatch, CrossFileRelationship, ParsedDocument};
16
17/// Core analyzer service trait that abstracts ast-grep analysis functionality.
18///
19/// This trait provides both single-file analysis (preserving all ast-grep matching
20/// and replacement capabilities) and codebase-level analysis (adding graph intelligence
21/// and cross-file relationships).
22///
23/// # Design Philosophy
24///
25/// - **Preserve Power**: All ast-grep Matcher and Replacer functionality accessible
26/// - **Bridge Levels**: Connect file-level AST operations to codebase-level graph intelligence
27/// - **Enable Intelligence**: Add cross-file relationships and codebase-wide analysis
28/// - **Abstract Execution**: Support different execution environments and strategies
29///
30/// # Examples
31///
32/// ## File-Level Pattern Matching (preserves ast-grep power)
33/// ```rust,no_run
34/// # use thread_services::traits::CodeAnalyzer;
35/// # use thread_services::types::{ParsedDocument, AnalysisContext};
36/// # struct MyAnalyzer;
37/// # #[async_trait::async_trait]
38/// # impl CodeAnalyzer for MyAnalyzer {
39/// #     async fn find_pattern<D: thread_ast_engine::source::Doc>(&self, document: &ParsedDocument<D>, pattern: &str, context: &AnalysisContext) -> Result<Vec<thread_services::types::CodeMatch<'_, D>>, thread_services::error::ServiceError> { todo!() }
40/// #     async fn find_all_patterns<D: thread_ast_engine::source::Doc>(&self, document: &ParsedDocument<D>, patterns: &[&str], context: &AnalysisContext) -> Result<Vec<thread_services::types::CodeMatch<'_, D>>, thread_services::error::ServiceError> { todo!() }
41/// #     async fn replace_pattern<D: thread_ast_engine::source::Doc>(&self, document: &mut ParsedDocument<D>, pattern: &str, replacement: &str, context: &AnalysisContext) -> Result<usize, thread_services::error::ServiceError> { todo!() }
42/// #     async fn analyze_cross_file_relationships(&self, documents: &[ParsedDocument<impl thread_ast_engine::source::Doc>], context: &AnalysisContext) -> Result<Vec<thread_services::types::CrossFileRelationship>, thread_services::error::ServiceError> { todo!() }
43/// #     fn capabilities(&self) -> thread_services::traits::AnalyzerCapabilities { todo!() }
44/// # }
45/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
46/// # let document: ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>> = todo!();
47/// let analyzer = MyAnalyzer;
48/// let context = AnalysisContext::default();
49///
50/// // Find all function declarations - preserves ast-grep pattern power
51/// let matches = analyzer.find_pattern(
52///     &document,
53///     "fn $NAME($$$PARAMS) { $$$BODY }",
54///     &context
55/// ).await?;
56///
57/// for match_result in matches {
58///     // Access ast-grep NodeMatch functionality
59///     let node_match = match_result.ast_node_match();
60///     let env = node_match.get_env();
61///
62///     if let Some(name) = env.get_match("NAME") {
63///         println!("Function: {}", name.text());
64///     }
65///
66///     // Plus codebase-level context
67///     for relationship in match_result.relationships() {
68///         println!("Cross-file relationship: {:?}", relationship.kind);
69///     }
70/// }
71/// # Ok(())
72/// # }
73/// ```
74///
75/// ## Codebase-Level Analysis
76/// ```rust,no_run
77/// # use thread_services::traits::CodeAnalyzer;
78/// # use thread_services::types::{ParsedDocument, AnalysisContext, ExecutionScope};
79/// # struct MyAnalyzer;
80/// # #[async_trait::async_trait]
81/// # impl CodeAnalyzer for MyAnalyzer {
82/// #     async fn find_pattern<D: thread_ast_engine::source::Doc>(&self, document: &ParsedDocument<D>, pattern: &str, context: &AnalysisContext) -> Result<Vec<thread_services::types::CodeMatch<'_, D>>, thread_services::error::ServiceError> { todo!() }
83/// #     async fn find_all_patterns<D: thread_ast_engine::source::Doc>(&self, document: &ParsedDocument<D>, patterns: &[&str], context: &AnalysisContext) -> Result<Vec<thread_services::types::CodeMatch<'_, D>>, thread_services::error::ServiceError> { todo!() }
84/// #     async fn replace_pattern<D: thread_ast_engine::source::Doc>(&self, document: &mut ParsedDocument<D>, pattern: &str, replacement: &str, context: &AnalysisContext) -> Result<usize, thread_services::error::ServiceError> { todo!() }
85/// #     async fn analyze_cross_file_relationships(&self, documents: &[ParsedDocument<impl thread_ast_engine::source::Doc>], context: &AnalysisContext) -> Result<Vec<thread_services::types::CrossFileRelationship>, thread_services::error::ServiceError> { todo!() }
86/// #     fn capabilities(&self) -> thread_services::traits::AnalyzerCapabilities { todo!() }
87/// # }
88/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
89/// # let documents: Vec<ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>>> = vec![];
90/// let analyzer = MyAnalyzer;
91/// let mut context = AnalysisContext::default();
92/// context.scope = ExecutionScope::Codebase;
93///
94/// // Analyze relationships across entire codebase
95/// let relationships = analyzer.analyze_cross_file_relationships(
96///     &documents,
97///     &context
98/// ).await?;
99///
100/// // Build intelligence on top of ast-grep file-level analysis
101/// for rel in relationships {
102///     match rel.kind {
103///         thread_services::types::RelationshipKind::Calls => {
104///             println!("{} calls {} ({}->{})",
105///                 rel.source_symbol, rel.target_symbol,
106///                 rel.source_file.display(), rel.target_file.display());
107///         },
108///         thread_services::types::RelationshipKind::Imports => {
109///             println!("{} imports from {}",
110///                 rel.source_file.display(), rel.target_file.display());
111///         },
112///         _ => {}
113///     }
114/// }
115/// # Ok(())
116/// # }
117/// ```
118#[async_trait]
119pub trait CodeAnalyzer<D: Doc + Send + Sync>: Send + Sync {
120    /// Find matches for a pattern in a document.
121    ///
122    /// Preserves all ast-grep pattern matching power while adding codebase-level
123    /// context. Returns CodeMatch instances that wrap NodeMatch and add cross-file
124    /// relationship information.
125    ///
126    /// # Arguments
127    /// * `document` - ParsedDocument to search in
128    /// * `pattern` - AST pattern using ast-grep meta-variable syntax (e.g., "$VAR")
129    /// * `context` - Analysis context for execution configuration
130    ///
131    /// # Returns
132    /// Vector of CodeMatch instances with both ast-grep functionality and codebase context
133    async fn find_pattern(
134        &self,
135        document: &ParsedDocument<D>,
136        pattern: &str,
137        context: &AnalysisContext,
138    ) -> ServiceResult<Vec<CodeMatch<'_, D>>>;
139
140    /// Find matches for multiple patterns efficiently.
141    ///
142    /// Optimizes for multiple pattern searches by batching operations and
143    /// reusing AST traversals where possible.
144    ///
145    /// # Arguments
146    /// * `document` - ParsedDocument to search in
147    /// * `patterns` - Slice of AST patterns to match
148    /// * `context` - Analysis context for execution configuration
149    ///
150    /// # Returns
151    /// Vector of CodeMatch instances for all pattern matches
152    async fn find_all_patterns(
153        &self,
154        document: &ParsedDocument<D>,
155        patterns: &[&str],
156        context: &AnalysisContext,
157    ) -> ServiceResult<Vec<CodeMatch<'_, D>>>;
158
159    /// Replace matches for a pattern with replacement content.
160    ///
161    /// Preserves all ast-grep replacement power including template-based replacement
162    /// with meta-variable substitution and structural replacement.
163    ///
164    /// # Arguments
165    /// * `document` - ParsedDocument to perform replacements in (modified in-place)
166    /// * `pattern` - AST pattern to match for replacement
167    /// * `replacement` - Replacement template or content
168    /// * `context` - Analysis context for execution configuration
169    ///
170    /// # Returns
171    /// Number of replacements made
172    async fn replace_pattern(
173        &self,
174        document: &mut ParsedDocument<D>,
175        pattern: &str,
176        replacement: &str,
177        context: &AnalysisContext,
178    ) -> ServiceResult<usize>;
179
180    /// Analyze relationships across multiple files.
181    ///
182    /// This is where Thread extends ast-grep from file-level to codebase-level.
183    /// Builds graph intelligence on top of ast-grep's powerful file-level analysis.
184    ///
185    /// # Arguments
186    /// * `documents` - Collection of ParsedDocuments to analyze
187    /// * `context` - Analysis context with scope and execution configuration
188    ///
189    /// # Returns
190    /// Vector of CrossFileRelationship instances representing codebase-level connections
191    async fn analyze_cross_file_relationships(
192        &self,
193        documents: &[ParsedDocument<D>],
194        context: &AnalysisContext,
195    ) -> ServiceResult<Vec<CrossFileRelationship>>;
196
197    /// Get analyzer capabilities and configuration.
198    fn capabilities(&self) -> AnalyzerCapabilities;
199
200    /// Find specific AST node types efficiently.
201    ///
202    /// Default implementation uses pattern matching, but implementations can
203    /// override for more efficient node type searches.
204    async fn find_nodes_by_kind(
205        &self,
206        document: &ParsedDocument<D>,
207        node_kind: &str,
208        context: &AnalysisContext,
209    ) -> ServiceResult<Vec<CodeMatch<'_, D>>> {
210        // Default: use pattern matching based on node kind
211        let pattern = match node_kind {
212            "function_declaration" => "fn $NAME($$$PARAMS) { $$$BODY }",
213            "class_declaration" => "class $NAME { $$$BODY }",
214            "variable_declaration" => "let $VAR = $VALUE",
215            // Add more patterns as needed
216            _ => {
217                return Err(AnalysisError::InvalidPattern {
218                    pattern: format!("Unknown node kind: {}", node_kind),
219                }
220                .into());
221            }
222        };
223
224        self.find_pattern(document, pattern, context).await
225    }
226
227    /// Validate pattern syntax before analysis.
228    ///
229    /// Default implementation performs basic validation.
230    /// Implementations can override for language-specific validation.
231    fn validate_pattern(&self, pattern: &str) -> ServiceResult<()> {
232        if pattern.is_empty() {
233            return Err(AnalysisError::InvalidPattern {
234                pattern: "Pattern cannot be empty".to_string(),
235            }
236            .into());
237        }
238
239        // Basic meta-variable validation
240        if pattern.contains('$') {
241            // Check for valid meta-variable format
242            let mut chars = pattern.chars();
243            let mut _found_metavar = false;
244
245            while let Some(ch) = chars.next() {
246                if ch == '$' {
247                    _found_metavar = true;
248                    // Next character should be alphabetic or underscore
249                    if let Some(next_ch) = chars.next()
250                        && !next_ch.is_alphabetic()
251                        && next_ch != '_'
252                    {
253                        return Err(AnalysisError::MetaVariable {
254                            variable: format!("${}", next_ch),
255                            message: "Invalid meta-variable format".to_string(),
256                        }
257                        .into());
258                    }
259                }
260            }
261        }
262
263        Ok(())
264    }
265
266    /// Compile pattern for efficient reuse.
267    ///
268    /// Default implementation returns the pattern as-is.
269    /// Implementations can override to pre-compile patterns for better performance.
270    #[cfg(feature = "matching")]
271    async fn compile_pattern(&self, pattern: &str) -> ServiceResult<CompiledPattern> {
272        Ok(CompiledPattern {
273            pattern: pattern.to_string(),
274            compiled_data: None,
275        })
276    }
277
278    /// Perform batch analysis operations efficiently.
279    ///
280    /// Optimizes for analyzing multiple documents with multiple patterns
281    /// by batching operations and using appropriate execution strategies.
282    async fn batch_analyze(
283        &self,
284        documents: &[ParsedDocument<D>],
285        patterns: &[&str],
286        context: &AnalysisContext,
287    ) -> ServiceResult<Vec<Vec<CodeMatch<'_, D>>>> {
288        let mut results = Vec::new();
289
290        for document in documents {
291            let doc_results = self.find_all_patterns(document, patterns, context).await?;
292            results.push(doc_results);
293        }
294
295        Ok(results)
296    }
297
298    /// Extract symbols and metadata from documents.
299    ///
300    /// Bridges ast-grep file-level analysis to codebase-level intelligence
301    /// by extracting symbols, imports, exports, and other metadata.
302    async fn extract_symbols(
303        &self,
304        _document: &mut ParsedDocument<D>,
305        _context: &AnalysisContext,
306    ) -> ServiceResult<()> {
307        // This will be implemented in the conversion utilities
308        // For now, this is a placeholder that preserves the interface
309        Ok(())
310    }
311}
312
313/// Analyzer capabilities and configuration information
314#[derive(Debug, Clone)]
315pub struct AnalyzerCapabilities {
316    /// Maximum number of patterns that can be analyzed concurrently
317    pub max_concurrent_patterns: Option<usize>,
318
319    /// Maximum number of matches to return per pattern
320    pub max_matches_per_pattern: Option<usize>,
321
322    /// Whether pattern compilation/caching is supported
323    pub supports_pattern_compilation: bool,
324
325    /// Whether cross-file analysis is supported
326    pub supports_cross_file_analysis: bool,
327
328    /// Whether batch operations are optimized
329    pub supports_batch_optimization: bool,
330
331    /// Whether incremental analysis is supported
332    pub supports_incremental_analysis: bool,
333
334    /// Supported analysis depth levels
335    pub supported_analysis_depths: Vec<AnalysisDepth>,
336
337    /// Performance characteristics
338    pub performance_profile: AnalysisPerformanceProfile,
339
340    /// Additional capability flags
341    pub capability_flags: RapidMap<String, bool>,
342}
343
344impl Default for AnalyzerCapabilities {
345    fn default() -> Self {
346        Self {
347            max_concurrent_patterns: Some(50),
348            max_matches_per_pattern: Some(1000),
349            supports_pattern_compilation: false,
350            supports_cross_file_analysis: false,
351            supports_batch_optimization: true,
352            supports_incremental_analysis: false,
353            supported_analysis_depths: vec![AnalysisDepth::Syntax, AnalysisDepth::Local],
354            performance_profile: AnalysisPerformanceProfile::Balanced,
355            capability_flags: thread_utilities::get_map(),
356        }
357    }
358}
359
360/// Analysis depth levels
361#[derive(Debug, Clone, PartialEq)]
362pub enum AnalysisDepth {
363    /// Syntax-only analysis (AST patterns)
364    Syntax,
365    /// Include local scope analysis
366    Local,
367    /// Include cross-file dependencies
368    CrossFile,
369    /// Complete codebase analysis
370    Deep,
371}
372
373/// Performance profile for analysis operations
374#[derive(Debug, Clone, PartialEq)]
375pub enum AnalysisPerformanceProfile {
376    /// Optimized for low memory usage
377    LowMemory,
378    /// Optimized for fast analysis
379    FastAnalysis,
380    /// Balanced memory and speed
381    Balanced,
382    /// Optimized for complex pattern matching
383    ComplexPatterns,
384    /// Optimized for large codebases
385    LargeCodebase,
386}
387
388/// Compiled pattern for efficient reuse
389#[derive(Debug, Clone)]
390pub struct CompiledPattern {
391    /// Original pattern string
392    pub pattern: String,
393    /// Compiled pattern data (implementation-specific)
394    pub compiled_data: Option<std::sync::Arc<dyn std::any::Any + Send + Sync>>,
395}
396
397/// Analysis configuration for specific use cases
398#[derive(Debug, Clone)]
399pub struct AnalysisConfig {
400    /// Maximum analysis depth
401    pub max_depth: AnalysisDepth,
402
403    /// Whether to collect cross-file relationships
404    pub collect_relationships: bool,
405
406    /// Whether to enable pattern caching
407    pub enable_pattern_caching: bool,
408
409    /// Preferred performance profile
410    pub performance_profile: Option<AnalysisPerformanceProfile>,
411
412    /// Custom configuration options
413    pub custom_options: RapidMap<String, String>,
414}
415
416impl Default for AnalysisConfig {
417    fn default() -> Self {
418        Self {
419            max_depth: AnalysisDepth::Local,
420            collect_relationships: false,
421            enable_pattern_caching: true,
422            performance_profile: None, // Auto-detect
423            custom_options: thread_utilities::get_map(),
424        }
425    }
426}
427
428/// Analyzer factory trait for creating configured analyzer instances
429pub trait AnalyzerFactory<D: Doc + Send + Sync>: Send + Sync {
430    /// Create a new analyzer instance with default configuration
431    fn create_analyzer(&self) -> Box<dyn CodeAnalyzer<D>>;
432
433    /// Create a new analyzer instance with specific configuration
434    fn create_configured_analyzer(&self, config: AnalysisConfig) -> Box<dyn CodeAnalyzer<D>>;
435
436    /// Get available analyzer types
437    fn available_analyzers(&self) -> Vec<String>;
438}
439
440#[cfg(test)]
441mod tests {
442    use super::*;
443
444    #[test]
445    fn test_analyzer_capabilities_default() {
446        let caps = AnalyzerCapabilities::default();
447        assert!(!caps.supports_cross_file_analysis);
448        assert!(caps.supports_batch_optimization);
449        assert!(!caps.supports_pattern_compilation);
450        assert_eq!(
451            caps.performance_profile,
452            AnalysisPerformanceProfile::Balanced
453        );
454    }
455
456    #[test]
457    fn test_analysis_config_default() {
458        let config = AnalysisConfig::default();
459        assert_eq!(config.max_depth, AnalysisDepth::Local);
460        assert!(!config.collect_relationships);
461        assert!(config.enable_pattern_caching);
462    }
463}