codegraph_parser_api/traits.rs
1use crate::{config::ParserConfig, errors::ParserError, metrics::ParserMetrics};
2use codegraph::{CodeGraph, NodeId};
3use serde::{Deserialize, Serialize};
4use std::path::{Path, PathBuf};
5use std::time::Duration;
6
7/// Information about a successfully parsed file
8#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
9pub struct FileInfo {
10 /// Path to the source file
11 pub file_path: PathBuf,
12
13 /// Node ID of the file/module in the graph
14 pub file_id: NodeId,
15
16 /// Node IDs of all functions extracted
17 pub functions: Vec<NodeId>,
18
19 /// Node IDs of all classes extracted
20 pub classes: Vec<NodeId>,
21
22 /// Node IDs of all traits/interfaces extracted
23 pub traits: Vec<NodeId>,
24
25 /// Node IDs of all imports extracted
26 pub imports: Vec<NodeId>,
27
28 /// Time taken to parse this file
29 #[serde(with = "duration_serde")]
30 pub parse_time: Duration,
31
32 /// Number of lines in the file
33 pub line_count: usize,
34
35 /// File size in bytes
36 pub byte_count: usize,
37}
38
39// Helper module for serializing Duration
40mod duration_serde {
41 use serde::{Deserialize, Deserializer, Serialize, Serializer};
42 use std::time::Duration;
43
44 pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
45 where
46 S: Serializer,
47 {
48 duration.as_secs().serialize(serializer)
49 }
50
51 pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
52 where
53 D: Deserializer<'de>,
54 {
55 let secs: u64 = u64::deserialize(deserializer)?;
56 Ok(Duration::from_secs(secs))
57 }
58}
59
60impl FileInfo {
61 /// Total number of entities extracted
62 pub fn entity_count(&self) -> usize {
63 self.functions.len() + self.classes.len() + self.traits.len()
64 }
65}
66
67/// Aggregate information about a parsed project
68#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
69pub struct ProjectInfo {
70 /// Information about each successfully parsed file
71 pub files: Vec<FileInfo>,
72
73 /// Total number of functions across all files
74 pub total_functions: usize,
75
76 /// Total number of classes across all files
77 pub total_classes: usize,
78
79 /// Total parse time for all files
80 #[serde(with = "duration_serde")]
81 pub total_parse_time: Duration,
82
83 /// Files that failed to parse (path, error message)
84 pub failed_files: Vec<(PathBuf, String)>,
85}
86
87impl ProjectInfo {
88 /// Total number of files processed (success + failure)
89 pub fn total_files(&self) -> usize {
90 self.files.len() + self.failed_files.len()
91 }
92
93 /// Success rate (0.0 to 1.0)
94 pub fn success_rate(&self) -> f64 {
95 if self.total_files() == 0 {
96 0.0
97 } else {
98 self.files.len() as f64 / self.total_files() as f64
99 }
100 }
101
102 /// Average parse time per file
103 pub fn avg_parse_time(&self) -> Duration {
104 if self.files.is_empty() {
105 Duration::ZERO
106 } else {
107 self.total_parse_time / self.files.len() as u32
108 }
109 }
110}
111
112/// Core trait that all language parsers must implement
113///
114/// This trait defines the contract for extracting code entities and relationships
115/// from source code and inserting them into a CodeGraph database.
116///
117/// # Thread Safety
118/// Implementations must be `Send + Sync` to support parallel parsing.
119///
120/// # Example
121/// ```rust,ignore
122/// use codegraph_parser_api::{CodeParser, ParserConfig};
123/// use codegraph::CodeGraph;
124///
125/// struct MyParser {
126/// config: ParserConfig,
127/// }
128///
129/// impl CodeParser for MyParser {
130/// fn language(&self) -> &str {
131/// "mylang"
132/// }
133///
134/// fn file_extensions(&self) -> &[&str] {
135/// &[".my"]
136/// }
137///
138/// // ... implement other required methods
139/// }
140/// ```
141pub trait CodeParser: Send + Sync {
142 /// Returns the language identifier (lowercase, e.g., "python", "rust")
143 fn language(&self) -> &str;
144
145 /// Returns supported file extensions (e.g., [".py", ".pyw"])
146 fn file_extensions(&self) -> &[&str];
147
148 /// Parse a single file and insert entities/relationships into the graph
149 ///
150 /// **Note on Metrics**: This method updates parser metrics
151 /// (files_attempted, files_succeeded, etc.). Use `metrics()` to retrieve
152 /// statistics after parsing operations.
153 ///
154 /// # Arguments
155 /// * `path` - Path to the source file
156 /// * `graph` - Mutable reference to the CodeGraph database
157 ///
158 /// # Returns
159 /// `FileInfo` containing metadata about parsed entities
160 ///
161 /// # Errors
162 /// Returns `ParserError` if:
163 /// - File cannot be read
164 /// - Source code has syntax errors
165 /// - Graph insertion fails
166 fn parse_file(&self, path: &Path, graph: &mut CodeGraph) -> Result<FileInfo, ParserError>;
167
168 /// Parse source code string and insert into graph
169 ///
170 /// Useful for parsing code snippets or in-memory source.
171 ///
172 /// **Note on Metrics**: This method does NOT update parser metrics
173 /// (files_attempted, files_succeeded, etc.). Only `parse_file()` updates
174 /// metrics to avoid double-counting when `parse_source()` is called
175 /// internally by `parse_file()`.
176 ///
177 /// # Arguments
178 /// * `source` - Source code string
179 /// * `file_path` - Logical path for this source (used for graph nodes)
180 /// * `graph` - Mutable reference to the CodeGraph database
181 fn parse_source(
182 &self,
183 source: &str,
184 file_path: &Path,
185 graph: &mut CodeGraph,
186 ) -> Result<FileInfo, ParserError>;
187
188 /// Parse multiple files (can be overridden for parallel parsing)
189 ///
190 /// Default implementation parses files sequentially. Override this
191 /// for parallel parsing implementation.
192 ///
193 /// # Arguments
194 /// * `paths` - List of file paths to parse
195 /// * `graph` - Mutable reference to the CodeGraph database
196 ///
197 /// # Returns
198 /// `ProjectInfo` containing aggregate statistics
199 fn parse_files(
200 &self,
201 paths: &[PathBuf],
202 graph: &mut CodeGraph,
203 ) -> Result<ProjectInfo, ParserError> {
204 let mut files = Vec::new();
205 let mut failed_files = Vec::new();
206 let mut total_functions = 0;
207 let mut total_classes = 0;
208 let mut total_parse_time = Duration::ZERO;
209
210 for path in paths {
211 match self.parse_file(path, graph) {
212 Ok(info) => {
213 total_functions += info.functions.len();
214 total_classes += info.classes.len();
215 total_parse_time += info.parse_time;
216 files.push(info);
217 }
218 Err(e) => {
219 failed_files.push((path.clone(), e.to_string()));
220 }
221 }
222 }
223
224 Ok(ProjectInfo {
225 files,
226 total_functions,
227 total_classes,
228 total_parse_time,
229 failed_files,
230 })
231 }
232
233 /// Parse a directory recursively
234 ///
235 /// # Arguments
236 /// * `dir` - Directory path to parse
237 /// * `graph` - Mutable reference to the CodeGraph database
238 fn parse_directory(
239 &self,
240 dir: &Path,
241 graph: &mut CodeGraph,
242 ) -> Result<ProjectInfo, ParserError> {
243 let paths = self.discover_files(dir)?;
244 self.parse_files(&paths, graph)
245 }
246
247 /// Discover parseable files in a directory
248 ///
249 /// Default implementation walks the directory and filters by extension.
250 /// Can be overridden for custom discovery logic.
251 fn discover_files(&self, dir: &Path) -> Result<Vec<PathBuf>, ParserError> {
252 use std::fs;
253
254 let mut files = Vec::new();
255 let extensions = self.file_extensions();
256
257 fn walk_dir(
258 dir: &Path,
259 extensions: &[&str],
260 files: &mut Vec<PathBuf>,
261 ) -> Result<(), ParserError> {
262 if !dir.is_dir() {
263 return Ok(());
264 }
265
266 for entry in
267 fs::read_dir(dir).map_err(|e| ParserError::IoError(dir.to_path_buf(), e))?
268 {
269 let entry = entry.map_err(|e| ParserError::IoError(dir.to_path_buf(), e))?;
270 let path = entry.path();
271
272 if path.is_dir() {
273 walk_dir(&path, extensions, files)?;
274 } else if let Some(ext) = path.extension() {
275 let ext_str = format!(".{}", ext.to_string_lossy());
276 if extensions.contains(&ext_str.as_str()) {
277 files.push(path);
278 }
279 }
280 }
281
282 Ok(())
283 }
284
285 walk_dir(dir, extensions, &mut files)?;
286 Ok(files)
287 }
288
289 /// Check if this parser can handle the given file
290 ///
291 /// Default implementation checks file extension.
292 fn can_parse(&self, path: &Path) -> bool {
293 if let Some(ext) = path.extension() {
294 let ext_str = format!(".{}", ext.to_string_lossy());
295 self.file_extensions().contains(&ext_str.as_str())
296 } else {
297 false
298 }
299 }
300
301 /// Get parser configuration
302 fn config(&self) -> &ParserConfig;
303
304 /// Get accumulated metrics
305 ///
306 /// Returns current parsing metrics (files processed, time taken, etc.)
307 fn metrics(&self) -> ParserMetrics;
308
309 /// Reset metrics
310 ///
311 /// Clears accumulated metrics. Useful for benchmarking.
312 fn reset_metrics(&mut self);
313}