codegraph_parser_api/
traits.rs

1use crate::{config::ParserConfig, errors::ParserError, metrics::ParserMetrics};
2use codegraph::{CodeGraph, NodeId};
3use serde::{Deserialize, Serialize};
4use std::path::{Path, PathBuf};
5use std::time::Duration;
6
7/// Information about a successfully parsed file
8#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
9pub struct FileInfo {
10    /// Path to the source file
11    pub file_path: PathBuf,
12
13    /// Node ID of the file/module in the graph
14    pub file_id: NodeId,
15
16    /// Node IDs of all functions extracted
17    pub functions: Vec<NodeId>,
18
19    /// Node IDs of all classes extracted
20    pub classes: Vec<NodeId>,
21
22    /// Node IDs of all traits/interfaces extracted
23    pub traits: Vec<NodeId>,
24
25    /// Node IDs of all imports extracted
26    pub imports: Vec<NodeId>,
27
28    /// Time taken to parse this file
29    #[serde(with = "duration_serde")]
30    pub parse_time: Duration,
31
32    /// Number of lines in the file
33    pub line_count: usize,
34
35    /// File size in bytes
36    pub byte_count: usize,
37}
38
39// Helper module for serializing Duration
40mod duration_serde {
41    use serde::{Deserialize, Deserializer, Serialize, Serializer};
42    use std::time::Duration;
43
44    pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
45    where
46        S: Serializer,
47    {
48        duration.as_secs().serialize(serializer)
49    }
50
51    pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
52    where
53        D: Deserializer<'de>,
54    {
55        let secs: u64 = u64::deserialize(deserializer)?;
56        Ok(Duration::from_secs(secs))
57    }
58}
59
60impl FileInfo {
61    /// Total number of entities extracted
62    pub fn entity_count(&self) -> usize {
63        self.functions.len() + self.classes.len() + self.traits.len()
64    }
65}
66
67/// Aggregate information about a parsed project
68#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
69pub struct ProjectInfo {
70    /// Information about each successfully parsed file
71    pub files: Vec<FileInfo>,
72
73    /// Total number of functions across all files
74    pub total_functions: usize,
75
76    /// Total number of classes across all files
77    pub total_classes: usize,
78
79    /// Total parse time for all files
80    #[serde(with = "duration_serde")]
81    pub total_parse_time: Duration,
82
83    /// Files that failed to parse (path, error message)
84    pub failed_files: Vec<(PathBuf, String)>,
85}
86
87impl ProjectInfo {
88    /// Total number of files processed (success + failure)
89    pub fn total_files(&self) -> usize {
90        self.files.len() + self.failed_files.len()
91    }
92
93    /// Success rate (0.0 to 1.0)
94    pub fn success_rate(&self) -> f64 {
95        if self.total_files() == 0 {
96            0.0
97        } else {
98            self.files.len() as f64 / self.total_files() as f64
99        }
100    }
101
102    /// Average parse time per file
103    pub fn avg_parse_time(&self) -> Duration {
104        if self.files.is_empty() {
105            Duration::ZERO
106        } else {
107            self.total_parse_time / self.files.len() as u32
108        }
109    }
110}
111
112/// Core trait that all language parsers must implement
113///
114/// This trait defines the contract for extracting code entities and relationships
115/// from source code and inserting them into a CodeGraph database.
116///
117/// # Thread Safety
118/// Implementations must be `Send + Sync` to support parallel parsing.
119///
120/// # Example
121/// ```rust,ignore
122/// use codegraph_parser_api::{CodeParser, ParserConfig};
123/// use codegraph::CodeGraph;
124///
125/// struct MyParser {
126///     config: ParserConfig,
127/// }
128///
129/// impl CodeParser for MyParser {
130///     fn language(&self) -> &str {
131///         "mylang"
132///     }
133///
134///     fn file_extensions(&self) -> &[&str] {
135///         &[".my"]
136///     }
137///
138///     // ... implement other required methods
139/// }
140/// ```
141pub trait CodeParser: Send + Sync {
142    /// Returns the language identifier (lowercase, e.g., "python", "rust")
143    fn language(&self) -> &str;
144
145    /// Returns supported file extensions (e.g., [".py", ".pyw"])
146    fn file_extensions(&self) -> &[&str];
147
148    /// Parse a single file and insert entities/relationships into the graph
149    ///
150    /// **Note on Metrics**: This method updates parser metrics
151    /// (files_attempted, files_succeeded, etc.). Use `metrics()` to retrieve
152    /// statistics after parsing operations.
153    ///
154    /// # Arguments
155    /// * `path` - Path to the source file
156    /// * `graph` - Mutable reference to the CodeGraph database
157    ///
158    /// # Returns
159    /// `FileInfo` containing metadata about parsed entities
160    ///
161    /// # Errors
162    /// Returns `ParserError` if:
163    /// - File cannot be read
164    /// - Source code has syntax errors
165    /// - Graph insertion fails
166    fn parse_file(&self, path: &Path, graph: &mut CodeGraph) -> Result<FileInfo, ParserError>;
167
168    /// Parse source code string and insert into graph
169    ///
170    /// Useful for parsing code snippets or in-memory source.
171    ///
172    /// **Note on Metrics**: This method does NOT update parser metrics
173    /// (files_attempted, files_succeeded, etc.). Only `parse_file()` updates
174    /// metrics to avoid double-counting when `parse_source()` is called
175    /// internally by `parse_file()`.
176    ///
177    /// # Arguments
178    /// * `source` - Source code string
179    /// * `file_path` - Logical path for this source (used for graph nodes)
180    /// * `graph` - Mutable reference to the CodeGraph database
181    fn parse_source(
182        &self,
183        source: &str,
184        file_path: &Path,
185        graph: &mut CodeGraph,
186    ) -> Result<FileInfo, ParserError>;
187
188    /// Parse multiple files (can be overridden for parallel parsing)
189    ///
190    /// Default implementation parses files sequentially. Override this
191    /// for parallel parsing implementation.
192    ///
193    /// # Arguments
194    /// * `paths` - List of file paths to parse
195    /// * `graph` - Mutable reference to the CodeGraph database
196    ///
197    /// # Returns
198    /// `ProjectInfo` containing aggregate statistics
199    fn parse_files(
200        &self,
201        paths: &[PathBuf],
202        graph: &mut CodeGraph,
203    ) -> Result<ProjectInfo, ParserError> {
204        let mut files = Vec::new();
205        let mut failed_files = Vec::new();
206        let mut total_functions = 0;
207        let mut total_classes = 0;
208        let mut total_parse_time = Duration::ZERO;
209
210        for path in paths {
211            match self.parse_file(path, graph) {
212                Ok(info) => {
213                    total_functions += info.functions.len();
214                    total_classes += info.classes.len();
215                    total_parse_time += info.parse_time;
216                    files.push(info);
217                }
218                Err(e) => {
219                    failed_files.push((path.clone(), e.to_string()));
220                }
221            }
222        }
223
224        Ok(ProjectInfo {
225            files,
226            total_functions,
227            total_classes,
228            total_parse_time,
229            failed_files,
230        })
231    }
232
233    /// Parse a directory recursively
234    ///
235    /// # Arguments
236    /// * `dir` - Directory path to parse
237    /// * `graph` - Mutable reference to the CodeGraph database
238    fn parse_directory(
239        &self,
240        dir: &Path,
241        graph: &mut CodeGraph,
242    ) -> Result<ProjectInfo, ParserError> {
243        let paths = self.discover_files(dir)?;
244        self.parse_files(&paths, graph)
245    }
246
247    /// Discover parseable files in a directory
248    ///
249    /// Default implementation walks the directory and filters by extension.
250    /// Can be overridden for custom discovery logic.
251    fn discover_files(&self, dir: &Path) -> Result<Vec<PathBuf>, ParserError> {
252        use std::fs;
253
254        let mut files = Vec::new();
255        let extensions = self.file_extensions();
256
257        fn walk_dir(
258            dir: &Path,
259            extensions: &[&str],
260            files: &mut Vec<PathBuf>,
261        ) -> Result<(), ParserError> {
262            if !dir.is_dir() {
263                return Ok(());
264            }
265
266            for entry in
267                fs::read_dir(dir).map_err(|e| ParserError::IoError(dir.to_path_buf(), e))?
268            {
269                let entry = entry.map_err(|e| ParserError::IoError(dir.to_path_buf(), e))?;
270                let path = entry.path();
271
272                if path.is_dir() {
273                    walk_dir(&path, extensions, files)?;
274                } else if let Some(ext) = path.extension() {
275                    let ext_str = format!(".{}", ext.to_string_lossy());
276                    if extensions.contains(&ext_str.as_str()) {
277                        files.push(path);
278                    }
279                }
280            }
281
282            Ok(())
283        }
284
285        walk_dir(dir, extensions, &mut files)?;
286        Ok(files)
287    }
288
289    /// Check if this parser can handle the given file
290    ///
291    /// Default implementation checks file extension.
292    fn can_parse(&self, path: &Path) -> bool {
293        if let Some(ext) = path.extension() {
294            let ext_str = format!(".{}", ext.to_string_lossy());
295            self.file_extensions().contains(&ext_str.as_str())
296        } else {
297            false
298        }
299    }
300
301    /// Get parser configuration
302    fn config(&self) -> &ParserConfig;
303
304    /// Get accumulated metrics
305    ///
306    /// Returns current parsing metrics (files processed, time taken, etc.)
307    fn metrics(&self) -> ParserMetrics;
308
309    /// Reset metrics
310    ///
311    /// Clears accumulated metrics. Useful for benchmarking.
312    fn reset_metrics(&mut self);
313}