Skip to main content

thread_services/traits/
parser.rs

1// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
2// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
3// SPDX-License-Identifier: AGPL-3.0-or-later
4
5//! # Code Parser Service Trait
6//!
7//! Defines the parser service interface that abstracts over ast-grep parsing
8//! functionality while preserving all its capabilities.
9
10use async_trait::async_trait;
11use std::path::Path;
12use thread_utilities::RapidMap;
13
14use crate::error::{ParseError, ServiceResult};
15use crate::types::{AnalysisContext, ParsedDocument};
16
17cfg_if::cfg_if!(
18    if #[cfg(feature = "ast-grep-backend")] {
19        use thread_ast_engine::source::Doc;
20        use thread_ast_engine::Language;
21        use thread_language::SupportLang;
22    } else {
23        use crate::types::{Doc, SupportLang};
24    }
25);
26
27/// Core parser service trait that abstracts ast-grep parsing functionality.
28///
29/// This trait provides async interfaces for parsing source code into ParsedDocument
30/// instances that preserve all ast-grep capabilities while enabling codebase-level
31/// analysis. The trait supports both single-file and multi-file parsing operations.
32///
33/// # Design Philosophy
34///
35/// - **Preserve Power**: All ast-grep functionality remains accessible through ParsedDocument
36/// - **Enable Intelligence**: Add metadata needed for codebase-level graph analysis
37/// - **Abstract Execution**: Support different execution environments
38/// - **Commercial Ready**: Clear extension points for commercial parsing features
39///
40/// # Examples
41///
42/// ## Single File Parsing
43/// ```rust,no_run
44/// # use thread_services::traits::CodeParser;
45/// # use thread_services::types::AnalysisContext;
46/// # use thread_language::SupportLang;
47/// # struct MyParser;
48/// # #[async_trait::async_trait]
49/// # impl CodeParser for MyParser {
50/// #     async fn parse_content(&self, content: &str, language: SupportLang, context: &AnalysisContext) -> Result<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<SupportLang>>, thread_services::error::ServiceError> { todo!() }
51/// #     async fn parse_file(&self, file_path: &std::path::Path, context: &AnalysisContext) -> Result<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<SupportLang>>, thread_services::error::ServiceError> { todo!() }
52/// #     async fn parse_multiple_files(&self, file_paths: &[&std::path::Path], context: &AnalysisContext) -> Result<Vec<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<SupportLang>>>, thread_services::error::ServiceError> { todo!() }
53/// #     fn capabilities(&self) -> thread_services::traits::ParserCapabilities { todo!() }
54/// #     fn supported_languages(&self) -> &[SupportLang] { todo!() }
55/// # }
56/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
57/// let parser = MyParser;
58/// let context = AnalysisContext::default();
59///
60/// // Parse a Rust file
61/// let document = parser.parse_file(
62///     std::path::Path::new("src/main.rs"),
63///     &context
64/// ).await?;
65///
66/// // Access underlying ast-grep functionality
67/// let root = document.ast_grep_root();
68/// let matches = root.root().find_all("fn $NAME($$$PARAMS) { $$$BODY }");
69/// # Ok(())
70/// # }
71/// ```
72///
73/// ## Multi-File Codebase Parsing
74/// ```rust,no_run
75/// # use thread_services::traits::CodeParser;
76/// # use thread_services::types::{AnalysisContext, ExecutionScope};
77/// # use std::path::PathBuf;
78/// # struct MyParser;
79/// # #[async_trait::async_trait]
80/// # impl CodeParser for MyParser {
81/// #     async fn parse_content(&self, content: &str, language: thread_language::SupportLang, context: &AnalysisContext) -> Result<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>>, thread_services::error::ServiceError> { todo!() }
82/// #     async fn parse_file(&self, file_path: &std::path::Path, context: &AnalysisContext) -> Result<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>>, thread_services::error::ServiceError> { todo!() }
83/// #     async fn parse_multiple_files(&self, file_paths: &[&std::path::Path], context: &AnalysisContext) -> Result<Vec<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>>>, thread_services::error::ServiceError> { todo!() }
84/// #     fn capabilities(&self) -> thread_services::traits::ParserCapabilities { todo!() }
85/// #     fn supported_languages(&self) -> &[thread_language::SupportLang] { todo!() }
86/// # }
87/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
88/// let parser = MyParser;
89/// let mut context = AnalysisContext::default();
90/// context.scope = ExecutionScope::Codebase;
91///
92/// // Parse entire codebase
93/// let files: Vec<&std::path::Path> = vec![
94///     std::path::Path::new("src/main.rs"),
95///     std::path::Path::new("src/lib.rs"),
96///     std::path::Path::new("src/parser.rs"),
97/// ];
98///
99/// let documents = parser.parse_multiple_files(&files, &context).await?;
100///
101/// // Each document preserves ast-grep capabilities + adds codebase metadata
102/// for doc in &documents {
103///     println!("File: {:?}", doc.file_path);
104///     println!("Symbols: {:?}", doc.metadata().defined_symbols.keys().collect::<Vec<_>>());
105/// }
106/// # Ok(())
107/// # }
108/// ```
109#[async_trait]
110pub trait CodeParser<D: Doc + Send + Sync>: Send + Sync {
111    /// Parse source content into a ParsedDocument.
112    ///
113    /// This method wraps ast-grep parsing with additional metadata collection
114    /// for codebase-level analysis while preserving all ast-grep functionality.
115    ///
116    /// # Arguments
117    /// * `content` - Source code to parse
118    /// * `language` - Programming language of the content
119    /// * `context` - Analysis context containing execution configuration
120    ///
121    /// # Returns
122    /// ParsedDocument that wraps ast-grep Root with additional metadata
123    async fn parse_content(
124        &self,
125        content: &str,
126        language: SupportLang,
127        context: &AnalysisContext,
128    ) -> ServiceResult<ParsedDocument<D>>;
129
130    /// Parse a single file into a ParsedDocument.
131    ///
132    /// Automatically detects language from file extension and reads file content.
133    /// Collects symbols, imports, and other metadata for codebase-level analysis.
134    ///
135    /// # Arguments
136    /// * `file_path` - Path to source file to parse
137    /// * `context` - Analysis context containing execution configuration
138    ///
139    /// # Returns
140    /// ParsedDocument with both ast-grep functionality and codebase metadata
141    async fn parse_file(
142        &self,
143        file_path: &Path,
144        context: &AnalysisContext,
145    ) -> ServiceResult<ParsedDocument<D>>;
146
147    /// Parse multiple files with efficient parallel execution.
148    ///
149    /// Uses execution strategy from context to optimize for different environments:
150    /// - Rayon for CLI parallel processing
151    /// - Chunked execution for cloud workers
152    /// - Sequential for single-threaded environments
153    ///
154    /// # Arguments
155    /// * `file_paths` - Slice of file paths to parse
156    /// * `context` - Analysis context with execution configuration
157    ///
158    /// # Returns
159    /// Vector of ParsedDocuments in same order as input paths
160    async fn parse_multiple_files(
161        &self,
162        file_paths: &[&Path],
163        context: &AnalysisContext,
164    ) -> ServiceResult<Vec<ParsedDocument<D>>>;
165
166    /// Get parser capabilities and configuration.
167    ///
168    /// Describes what features this parser implementation supports,
169    /// including performance characteristics and execution strategies.
170    fn capabilities(&self) -> ParserCapabilities;
171
172    /// Get list of supported programming languages.
173    ///
174    /// Returns slice of SupportLang values that this parser can handle.
175    /// Used for language detection and validation.
176    fn supported_languages(&self) -> &[SupportLang];
177
178    /// Detect language from file path.
179    ///
180    /// Default implementation uses file extension matching.
181    /// Implementations can override for more sophisticated detection.
182    fn detect_language(&self, file_path: &Path) -> ServiceResult<SupportLang> {
183        SupportLang::from_path(file_path).ok_or_else(|| {
184            ParseError::LanguageDetectionFailed {
185                file_path: file_path.to_path_buf(),
186            }
187            .into()
188        })
189    }
190
191    /// Validate content before parsing.
192    ///
193    /// Default implementation checks for basic validity.
194    /// Implementations can override for language-specific validation.
195    fn validate_content(&self, content: &str, _language: SupportLang) -> ServiceResult<()> {
196        if content.is_empty() {
197            return Err(ParseError::InvalidSource {
198                message: "Content is empty".into(),
199            }
200            .into());
201        }
202
203        // Check content size limits based on capabilities
204        let capabilities = self.capabilities();
205        if let Some(max_size) = capabilities.max_content_size
206            && content.len() > max_size
207        {
208            return Err(ParseError::ContentTooLarge {
209                size: content.len(),
210                max_size,
211            }
212            .into());
213        }
214
215        Ok(())
216    }
217
218    /// Pre-process content before parsing.
219    ///
220    /// Default implementation returns content unchanged.
221    /// Implementations can override for content normalization.
222    fn preprocess_content(&self, content: &str, _language: SupportLang) -> String {
223        content.to_string()
224    }
225
226    /// Post-process parsed document.
227    ///
228    /// Default implementation returns document unchanged.
229    /// Implementations can override to add custom metadata collection.
230    async fn postprocess_document(
231        &self,
232        mut document: ParsedDocument<D>,
233        context: &AnalysisContext,
234    ) -> ServiceResult<ParsedDocument<D>> {
235        // Default: collect basic metadata
236        self.collect_basic_metadata(&mut document, context).await?;
237        Ok(document)
238    }
239
240    /// Collect basic metadata for codebase-level analysis.
241    ///
242    /// Default implementation extracts symbols, imports, exports, and function calls.
243    /// This bridges ast-grep file-level analysis to codebase-level intelligence.
244    async fn collect_basic_metadata(
245        &self,
246        _document: &mut ParsedDocument<D>,
247        _context: &AnalysisContext,
248    ) -> ServiceResult<()> {
249        // This will be implemented in the conversion utilities
250        // For now, this is a placeholder that preserves the interface
251        Ok(())
252    }
253}
254
255/// Parser capabilities and configuration information
256#[derive(Debug, Clone)]
257pub struct ParserCapabilities {
258    /// Maximum content size this parser can handle (in bytes)
259    pub max_content_size: Option<usize>,
260
261    /// Maximum number of files that can be parsed concurrently
262    pub max_concurrent_files: Option<usize>,
263
264    /// Supported execution strategies
265    pub execution_strategies: Vec<ExecutionStrategy>,
266
267    /// Whether incremental parsing is supported
268    pub supports_incremental: bool,
269
270    /// Whether error recovery during parsing is supported
271    pub supports_error_recovery: bool,
272
273    /// Whether codebase-level metadata collection is supported
274    pub supports_metadata_collection: bool,
275
276    /// Whether cross-file analysis is supported
277    pub supports_cross_file_analysis: bool,
278
279    /// Performance characteristics
280    pub performance_profile: PerformanceProfile,
281
282    /// Additional capability flags
283    pub capability_flags: RapidMap<String, bool>,
284}
285
286impl Default for ParserCapabilities {
287    fn default() -> Self {
288        Self {
289            max_content_size: Some(10 * 1024 * 1024), // 10MB default
290            max_concurrent_files: Some(100),
291            execution_strategies: vec![ExecutionStrategy::Sequential, ExecutionStrategy::Rayon],
292            supports_incremental: false,
293            supports_error_recovery: true,
294            supports_metadata_collection: true,
295            supports_cross_file_analysis: false,
296            performance_profile: PerformanceProfile::Balanced,
297            capability_flags: thread_utilities::get_map(),
298        }
299    }
300}
301
302/// Execution strategy for parser operations
303#[derive(Debug, Clone, PartialEq)]
304pub enum ExecutionStrategy {
305    /// Single-threaded sequential execution
306    Sequential,
307    /// Rayon-based parallel execution (for CLI)
308    Rayon,
309    /// Chunked execution
310    Chunked { chunk_size: usize },
311    /// Custom execution strategy
312    Custom(String),
313}
314
315/// Performance profile for parser operations
316#[derive(Debug, Clone, PartialEq)]
317pub enum PerformanceProfile {
318    /// Optimized for low memory usage
319    LowMemory,
320    /// Optimized for fast parsing speed
321    FastParsing,
322    /// Balanced memory usage and parsing speed
323    Balanced,
324    /// Optimized for high throughput
325    HighThroughput,
326}
327
328/// Parser configuration for specific use cases
329#[derive(Debug, Clone)]
330pub struct ParserConfig {
331    /// Whether to collect metadata during parsing
332    pub collect_metadata: bool,
333
334    /// Whether to enable error recovery
335    pub enable_error_recovery: bool,
336
337    /// Preferred execution strategy
338    pub execution_strategy: Option<ExecutionStrategy>,
339
340    /// Custom configuration options
341    pub custom_options: RapidMap<String, String>,
342}
343
344impl Default for ParserConfig {
345    fn default() -> Self {
346        Self {
347            collect_metadata: true,
348            enable_error_recovery: true,
349            execution_strategy: None, // Auto-detect
350            custom_options: thread_utilities::get_map(),
351        }
352    }
353}
354
355/// Parser factory trait for creating configured parser instances
356pub trait ParserFactory<D: Doc + Send + Sync>: Send + Sync {
357    /// Create a new parser instance with default configuration
358    fn create_parser(&self) -> Box<dyn CodeParser<D>>;
359
360    /// Create a new parser instance with specific configuration
361    fn create_configured_parser(&self, config: ParserConfig) -> Box<dyn CodeParser<D>>;
362
363    /// Get available parser types
364    fn available_parsers(&self) -> Vec<String>;
365}
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370
371    #[test]
372    fn test_parser_capabilities_default() {
373        let caps = ParserCapabilities::default();
374        assert!(caps.supports_metadata_collection);
375        assert!(caps.supports_error_recovery);
376        assert!(!caps.supports_cross_file_analysis);
377        assert_eq!(caps.performance_profile, PerformanceProfile::Balanced);
378    }
379
380    #[test]
381    fn test_parser_config_default() {
382        let config = ParserConfig::default();
383        assert!(config.collect_metadata);
384        assert!(config.enable_error_recovery);
385        assert!(config.execution_strategy.is_none());
386    }
387}