thread_services/traits/parser.rs
1// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
2// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
3// SPDX-License-Identifier: AGPL-3.0-or-later
4
5//! # Code Parser Service Trait
6//!
7//! Defines the parser service interface that abstracts over ast-grep parsing
8//! functionality while preserving all its capabilities.
9
10use async_trait::async_trait;
11use std::path::Path;
12use thread_utilities::RapidMap;
13
14use crate::error::{ParseError, ServiceResult};
15use crate::types::{AnalysisContext, ParsedDocument};
16
17cfg_if::cfg_if!(
18 if #[cfg(feature = "ast-grep-backend")] {
19 use thread_ast_engine::source::Doc;
20 use thread_ast_engine::Language;
21 use thread_language::SupportLang;
22 } else {
23 use crate::types::{Doc, SupportLang};
24 }
25);
26
27/// Core parser service trait that abstracts ast-grep parsing functionality.
28///
29/// This trait provides async interfaces for parsing source code into ParsedDocument
30/// instances that preserve all ast-grep capabilities while enabling codebase-level
31/// analysis. The trait supports both single-file and multi-file parsing operations.
32///
33/// # Design Philosophy
34///
35/// - **Preserve Power**: All ast-grep functionality remains accessible through ParsedDocument
36/// - **Enable Intelligence**: Add metadata needed for codebase-level graph analysis
37/// - **Abstract Execution**: Support different execution environments
38/// - **Commercial Ready**: Clear extension points for commercial parsing features
39///
40/// # Examples
41///
42/// ## Single File Parsing
43/// ```rust,no_run
44/// # use thread_services::traits::CodeParser;
45/// # use thread_services::types::AnalysisContext;
46/// # use thread_language::SupportLang;
47/// # struct MyParser;
48/// # #[async_trait::async_trait]
49/// # impl CodeParser for MyParser {
50/// # async fn parse_content(&self, content: &str, language: SupportLang, context: &AnalysisContext) -> Result<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<SupportLang>>, thread_services::error::ServiceError> { todo!() }
51/// # async fn parse_file(&self, file_path: &std::path::Path, context: &AnalysisContext) -> Result<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<SupportLang>>, thread_services::error::ServiceError> { todo!() }
52/// # async fn parse_multiple_files(&self, file_paths: &[&std::path::Path], context: &AnalysisContext) -> Result<Vec<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<SupportLang>>>, thread_services::error::ServiceError> { todo!() }
53/// # fn capabilities(&self) -> thread_services::traits::ParserCapabilities { todo!() }
54/// # fn supported_languages(&self) -> &[SupportLang] { todo!() }
55/// # }
56/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
57/// let parser = MyParser;
58/// let context = AnalysisContext::default();
59///
60/// // Parse a Rust file
61/// let document = parser.parse_file(
62/// std::path::Path::new("src/main.rs"),
63/// &context
64/// ).await?;
65///
66/// // Access underlying ast-grep functionality
67/// let root = document.ast_grep_root();
68/// let matches = root.root().find_all("fn $NAME($$$PARAMS) { $$$BODY }");
69/// # Ok(())
70/// # }
71/// ```
72///
73/// ## Multi-File Codebase Parsing
74/// ```rust,no_run
75/// # use thread_services::traits::CodeParser;
76/// # use thread_services::types::{AnalysisContext, ExecutionScope};
77/// # use std::path::PathBuf;
78/// # struct MyParser;
79/// # #[async_trait::async_trait]
80/// # impl CodeParser for MyParser {
81/// # async fn parse_content(&self, content: &str, language: thread_language::SupportLang, context: &AnalysisContext) -> Result<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>>, thread_services::error::ServiceError> { todo!() }
82/// # async fn parse_file(&self, file_path: &std::path::Path, context: &AnalysisContext) -> Result<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>>, thread_services::error::ServiceError> { todo!() }
83/// # async fn parse_multiple_files(&self, file_paths: &[&std::path::Path], context: &AnalysisContext) -> Result<Vec<thread_services::types::ParsedDocument<thread_ast_engine::tree_sitter::StrDoc<thread_language::SupportLang>>>, thread_services::error::ServiceError> { todo!() }
84/// # fn capabilities(&self) -> thread_services::traits::ParserCapabilities { todo!() }
85/// # fn supported_languages(&self) -> &[thread_language::SupportLang] { todo!() }
86/// # }
87/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
88/// let parser = MyParser;
89/// let mut context = AnalysisContext::default();
90/// context.scope = ExecutionScope::Codebase;
91///
92/// // Parse entire codebase
93/// let files: Vec<&std::path::Path> = vec![
94/// std::path::Path::new("src/main.rs"),
95/// std::path::Path::new("src/lib.rs"),
96/// std::path::Path::new("src/parser.rs"),
97/// ];
98///
99/// let documents = parser.parse_multiple_files(&files, &context).await?;
100///
101/// // Each document preserves ast-grep capabilities + adds codebase metadata
102/// for doc in &documents {
103/// println!("File: {:?}", doc.file_path);
104/// println!("Symbols: {:?}", doc.metadata().defined_symbols.keys().collect::<Vec<_>>());
105/// }
106/// # Ok(())
107/// # }
108/// ```
109#[async_trait]
110pub trait CodeParser<D: Doc + Send + Sync>: Send + Sync {
111 /// Parse source content into a ParsedDocument.
112 ///
113 /// This method wraps ast-grep parsing with additional metadata collection
114 /// for codebase-level analysis while preserving all ast-grep functionality.
115 ///
116 /// # Arguments
117 /// * `content` - Source code to parse
118 /// * `language` - Programming language of the content
119 /// * `context` - Analysis context containing execution configuration
120 ///
121 /// # Returns
122 /// ParsedDocument that wraps ast-grep Root with additional metadata
123 async fn parse_content(
124 &self,
125 content: &str,
126 language: SupportLang,
127 context: &AnalysisContext,
128 ) -> ServiceResult<ParsedDocument<D>>;
129
130 /// Parse a single file into a ParsedDocument.
131 ///
132 /// Automatically detects language from file extension and reads file content.
133 /// Collects symbols, imports, and other metadata for codebase-level analysis.
134 ///
135 /// # Arguments
136 /// * `file_path` - Path to source file to parse
137 /// * `context` - Analysis context containing execution configuration
138 ///
139 /// # Returns
140 /// ParsedDocument with both ast-grep functionality and codebase metadata
141 async fn parse_file(
142 &self,
143 file_path: &Path,
144 context: &AnalysisContext,
145 ) -> ServiceResult<ParsedDocument<D>>;
146
147 /// Parse multiple files with efficient parallel execution.
148 ///
149 /// Uses execution strategy from context to optimize for different environments:
150 /// - Rayon for CLI parallel processing
151 /// - Chunked execution for cloud workers
152 /// - Sequential for single-threaded environments
153 ///
154 /// # Arguments
155 /// * `file_paths` - Slice of file paths to parse
156 /// * `context` - Analysis context with execution configuration
157 ///
158 /// # Returns
159 /// Vector of ParsedDocuments in same order as input paths
160 async fn parse_multiple_files(
161 &self,
162 file_paths: &[&Path],
163 context: &AnalysisContext,
164 ) -> ServiceResult<Vec<ParsedDocument<D>>>;
165
166 /// Get parser capabilities and configuration.
167 ///
168 /// Describes what features this parser implementation supports,
169 /// including performance characteristics and execution strategies.
170 fn capabilities(&self) -> ParserCapabilities;
171
172 /// Get list of supported programming languages.
173 ///
174 /// Returns slice of SupportLang values that this parser can handle.
175 /// Used for language detection and validation.
176 fn supported_languages(&self) -> &[SupportLang];
177
178 /// Detect language from file path.
179 ///
180 /// Default implementation uses file extension matching.
181 /// Implementations can override for more sophisticated detection.
182 fn detect_language(&self, file_path: &Path) -> ServiceResult<SupportLang> {
183 SupportLang::from_path(file_path).ok_or_else(|| {
184 ParseError::LanguageDetectionFailed {
185 file_path: file_path.to_path_buf(),
186 }
187 .into()
188 })
189 }
190
191 /// Validate content before parsing.
192 ///
193 /// Default implementation checks for basic validity.
194 /// Implementations can override for language-specific validation.
195 fn validate_content(&self, content: &str, _language: SupportLang) -> ServiceResult<()> {
196 if content.is_empty() {
197 return Err(ParseError::InvalidSource {
198 message: "Content is empty".into(),
199 }
200 .into());
201 }
202
203 // Check content size limits based on capabilities
204 let capabilities = self.capabilities();
205 if let Some(max_size) = capabilities.max_content_size
206 && content.len() > max_size
207 {
208 return Err(ParseError::ContentTooLarge {
209 size: content.len(),
210 max_size,
211 }
212 .into());
213 }
214
215 Ok(())
216 }
217
218 /// Pre-process content before parsing.
219 ///
220 /// Default implementation returns content unchanged.
221 /// Implementations can override for content normalization.
222 fn preprocess_content(&self, content: &str, _language: SupportLang) -> String {
223 content.to_string()
224 }
225
226 /// Post-process parsed document.
227 ///
228 /// Default implementation returns document unchanged.
229 /// Implementations can override to add custom metadata collection.
230 async fn postprocess_document(
231 &self,
232 mut document: ParsedDocument<D>,
233 context: &AnalysisContext,
234 ) -> ServiceResult<ParsedDocument<D>> {
235 // Default: collect basic metadata
236 self.collect_basic_metadata(&mut document, context).await?;
237 Ok(document)
238 }
239
240 /// Collect basic metadata for codebase-level analysis.
241 ///
242 /// Default implementation extracts symbols, imports, exports, and function calls.
243 /// This bridges ast-grep file-level analysis to codebase-level intelligence.
244 async fn collect_basic_metadata(
245 &self,
246 _document: &mut ParsedDocument<D>,
247 _context: &AnalysisContext,
248 ) -> ServiceResult<()> {
249 // This will be implemented in the conversion utilities
250 // For now, this is a placeholder that preserves the interface
251 Ok(())
252 }
253}
254
255/// Parser capabilities and configuration information
256#[derive(Debug, Clone)]
257pub struct ParserCapabilities {
258 /// Maximum content size this parser can handle (in bytes)
259 pub max_content_size: Option<usize>,
260
261 /// Maximum number of files that can be parsed concurrently
262 pub max_concurrent_files: Option<usize>,
263
264 /// Supported execution strategies
265 pub execution_strategies: Vec<ExecutionStrategy>,
266
267 /// Whether incremental parsing is supported
268 pub supports_incremental: bool,
269
270 /// Whether error recovery during parsing is supported
271 pub supports_error_recovery: bool,
272
273 /// Whether codebase-level metadata collection is supported
274 pub supports_metadata_collection: bool,
275
276 /// Whether cross-file analysis is supported
277 pub supports_cross_file_analysis: bool,
278
279 /// Performance characteristics
280 pub performance_profile: PerformanceProfile,
281
282 /// Additional capability flags
283 pub capability_flags: RapidMap<String, bool>,
284}
285
286impl Default for ParserCapabilities {
287 fn default() -> Self {
288 Self {
289 max_content_size: Some(10 * 1024 * 1024), // 10MB default
290 max_concurrent_files: Some(100),
291 execution_strategies: vec![ExecutionStrategy::Sequential, ExecutionStrategy::Rayon],
292 supports_incremental: false,
293 supports_error_recovery: true,
294 supports_metadata_collection: true,
295 supports_cross_file_analysis: false,
296 performance_profile: PerformanceProfile::Balanced,
297 capability_flags: thread_utilities::get_map(),
298 }
299 }
300}
301
302/// Execution strategy for parser operations
303#[derive(Debug, Clone, PartialEq)]
304pub enum ExecutionStrategy {
305 /// Single-threaded sequential execution
306 Sequential,
307 /// Rayon-based parallel execution (for CLI)
308 Rayon,
309 /// Chunked execution
310 Chunked { chunk_size: usize },
311 /// Custom execution strategy
312 Custom(String),
313}
314
315/// Performance profile for parser operations
316#[derive(Debug, Clone, PartialEq)]
317pub enum PerformanceProfile {
318 /// Optimized for low memory usage
319 LowMemory,
320 /// Optimized for fast parsing speed
321 FastParsing,
322 /// Balanced memory usage and parsing speed
323 Balanced,
324 /// Optimized for high throughput
325 HighThroughput,
326}
327
328/// Parser configuration for specific use cases
329#[derive(Debug, Clone)]
330pub struct ParserConfig {
331 /// Whether to collect metadata during parsing
332 pub collect_metadata: bool,
333
334 /// Whether to enable error recovery
335 pub enable_error_recovery: bool,
336
337 /// Preferred execution strategy
338 pub execution_strategy: Option<ExecutionStrategy>,
339
340 /// Custom configuration options
341 pub custom_options: RapidMap<String, String>,
342}
343
344impl Default for ParserConfig {
345 fn default() -> Self {
346 Self {
347 collect_metadata: true,
348 enable_error_recovery: true,
349 execution_strategy: None, // Auto-detect
350 custom_options: thread_utilities::get_map(),
351 }
352 }
353}
354
355/// Parser factory trait for creating configured parser instances
356pub trait ParserFactory<D: Doc + Send + Sync>: Send + Sync {
357 /// Create a new parser instance with default configuration
358 fn create_parser(&self) -> Box<dyn CodeParser<D>>;
359
360 /// Create a new parser instance with specific configuration
361 fn create_configured_parser(&self, config: ParserConfig) -> Box<dyn CodeParser<D>>;
362
363 /// Get available parser types
364 fn available_parsers(&self) -> Vec<String>;
365}
366
367#[cfg(test)]
368mod tests {
369 use super::*;
370
371 #[test]
372 fn test_parser_capabilities_default() {
373 let caps = ParserCapabilities::default();
374 assert!(caps.supports_metadata_collection);
375 assert!(caps.supports_error_recovery);
376 assert!(!caps.supports_cross_file_analysis);
377 assert_eq!(caps.performance_profile, PerformanceProfile::Balanced);
378 }
379
380 #[test]
381 fn test_parser_config_default() {
382 let config = ParserConfig::default();
383 assert!(config.collect_metadata);
384 assert!(config.enable_error_recovery);
385 assert!(config.execution_strategy.is_none());
386 }
387}