Skip to main content

cgx_engine/
parser.rs

1use std::collections::HashMap;
2
3use crate::walker::{Language, SourceFile};
4
5/// The semantic category of a graph node.
6#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
7pub enum NodeKind {
8    /// A source file on disk.
9    File,
10    /// A named function or method.
11    Function,
12    /// A class, struct, or interface definition.
13    Class,
14    /// A module-level variable or constant.
15    Variable,
16    /// A type alias, interface, or enum definition.
17    Type,
18    /// A package or module (e.g. Go package, Python module).
19    Module,
20    /// A git commit author, used in ownership edges.
21    Author,
22}
23
24#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
25pub enum CommentKind {
26    /// Regular JS/JSDoc comment above a function or at the top level
27    Standard,
28    /// `{/* ... */}` expression comment inside JSX return body
29    JsxExpression,
30    /// JSX expression comment whose inner text starts with `<` — commented-out JSX code
31    JsxCommentedCode,
32}
33
34impl CommentKind {
35    pub fn as_str(&self) -> &'static str {
36        match self {
37            CommentKind::Standard => "code",
38            CommentKind::JsxExpression => "jsx",
39            CommentKind::JsxCommentedCode => "jsx_commented_code",
40        }
41    }
42}
43
44/// A structured annotation extracted from a source comment (e.g. `@todo`, `@deprecated`).
45#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
46pub struct CommentTag {
47    /// Tag name, e.g. `"todo"`, `"fixme"`, `"hack"`.
48    pub tag_type: String,
49    /// Full comment text following the tag marker.
50    pub text: String,
51    pub line: u32,
52    pub comment_kind: CommentKind,
53}
54
55impl NodeKind {
56    pub fn as_str(&self) -> &'static str {
57        match self {
58            NodeKind::File => "File",
59            NodeKind::Function => "Function",
60            NodeKind::Class => "Class",
61            NodeKind::Variable => "Variable",
62            NodeKind::Type => "Type",
63            NodeKind::Module => "Module",
64            NodeKind::Author => "Author",
65        }
66    }
67}
68
69/// The semantic relationship represented by a graph edge.
70#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
71#[serde(rename_all = "UPPERCASE")]
72pub enum EdgeKind {
73    /// Function/method invocation.
74    Calls,
75    /// File imports another file or module.
76    Imports,
77    /// Class inherits from or implements another class/interface.
78    Inherits,
79    /// File exposes a symbol (file → function/class).
80    Exports,
81    /// Two files changed together in git history.
82    CoChanges,
83    /// Author owns a file (from git blame).
84    Owns,
85    /// File depends on an external package (from manifest parsing).
86    DependsOn,
87    /// Test file exercises a production symbol.
88    Tests,
89}
90
91impl EdgeKind {
92    pub fn as_str(&self) -> &'static str {
93        match self {
94            EdgeKind::Calls => "CALLS",
95            EdgeKind::Imports => "IMPORTS",
96            EdgeKind::Inherits => "INHERITS",
97            EdgeKind::Exports => "EXPORTS",
98            EdgeKind::CoChanges => "CO_CHANGES",
99            EdgeKind::Owns => "OWNS",
100            EdgeKind::DependsOn => "DEPENDS_ON",
101            EdgeKind::Tests => "TESTS",
102        }
103    }
104}
105
106/// A node as produced by a language parser, before being written to the graph DB.
107#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
108pub struct NodeDef {
109    /// Stable, unique identifier — format: `<prefix>:<path>:<name>`, e.g. `fn:src/lib.rs:parse`.
110    pub id: String,
111    pub kind: NodeKind,
112    pub name: String,
113    /// Repo-relative file path.
114    pub path: String,
115    pub line_start: u32,
116    pub line_end: u32,
117    /// Parser-specific extras (e.g. `{"exported": true, "complexity": 4.0}`).
118    #[serde(default)]
119    pub metadata: serde_json::Value,
120}
121
122impl Default for NodeDef {
123    fn default() -> Self {
124        Self {
125            id: String::new(),
126            kind: NodeKind::File,
127            name: String::new(),
128            path: String::new(),
129            line_start: 0,
130            line_end: 0,
131            metadata: serde_json::Value::Null,
132        }
133    }
134}
135
136/// A directed edge as produced by a language parser or the resolver.
137#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
138pub struct EdgeDef {
139    pub src: String,
140    pub dst: String,
141    pub kind: EdgeKind,
142    /// Relative strength of the relationship (default 1.0).
143    #[serde(default = "default_edge_weight")]
144    pub weight: f64,
145    /// Parser certainty that this edge is real, 0.0–1.0 (default 1.0, fuzzy matches use 0.8).
146    #[serde(default = "default_edge_weight")]
147    pub confidence: f64,
148}
149
150impl Default for EdgeDef {
151    fn default() -> Self {
152        Self {
153            src: String::new(),
154            dst: String::new(),
155            kind: EdgeKind::Calls,
156            weight: 1.0,
157            confidence: 1.0,
158        }
159    }
160}
161
162fn default_edge_weight() -> f64 {
163    1.0
164}
165
166/// The output of parsing a single source file.
167#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
168pub struct ParseResult {
169    pub nodes: Vec<NodeDef>,
170    pub edges: Vec<EdgeDef>,
171    #[serde(default)]
172    pub comment_tags: Vec<CommentTag>,
173}
174
175impl ParseResult {
176    pub fn new() -> Self {
177        Self {
178            nodes: Vec::new(),
179            edges: Vec::new(),
180            comment_tags: Vec::new(),
181        }
182    }
183}
184
185impl Default for ParseResult {
186    fn default() -> Self {
187        Self::new()
188    }
189}
190
191/// Trait implemented by every language-specific Tree-sitter parser.
192pub trait LanguageParser: Send + Sync {
193    /// File extensions this parser handles, e.g. `&["ts", "tsx"]`.
194    fn extensions(&self) -> &[&str];
195    /// Parse a source file and return all nodes, edges, and comment tags found.
196    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult>;
197}
198
199/// Registry that maps [`Language`] variants to their [`LanguageParser`] implementations.
200///
201/// Constructed with all built-in parsers pre-registered.  Use [`ParserRegistry::parse`]
202/// for a single file or [`ParserRegistry::parse_all`] for parallel batch processing.
203pub struct ParserRegistry {
204    parsers: HashMap<Language, Box<dyn LanguageParser>>,
205}
206
207impl ParserRegistry {
208    /// Create a registry with all built-in language parsers registered.
209    pub fn new() -> Self {
210        let mut parsers: HashMap<Language, Box<dyn LanguageParser>> = HashMap::new();
211
212        parsers.insert(
213            Language::TypeScript,
214            Box::new(super::parsers::ts::TypeScriptParser::new()),
215        );
216        parsers.insert(
217            Language::JavaScript,
218            Box::new(super::parsers::ts::TypeScriptParser::new()),
219        );
220        parsers.insert(
221            Language::Python,
222            Box::new(super::parsers::py::PythonParser::new()),
223        );
224        parsers.insert(
225            Language::Rust,
226            Box::new(super::parsers::rust::RustParser::new()),
227        );
228        parsers.insert(Language::Go, Box::new(super::parsers::go::GoParser::new()));
229        parsers.insert(
230            Language::Java,
231            Box::new(super::parsers::java::JavaParser::new()),
232        );
233        parsers.insert(
234            Language::CSharp,
235            Box::new(super::parsers::java::JavaParser::new()),
236        );
237        parsers.insert(
238            Language::Php,
239            Box::new(super::parsers::php::PhpParser::new()),
240        );
241
242        Self { parsers }
243    }
244
245    /// Parse a single file, returning an empty result for unknown languages.
246    pub fn parse(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
247        if let Some(parser) = self.parsers.get(&file.language) {
248            parser.extract(file)
249        } else {
250            Ok(ParseResult::new())
251        }
252    }
253
254    /// Parse all files in parallel using Rayon, logging warnings on individual failures.
255    pub fn parse_all(&self, files: &[SourceFile]) -> Vec<ParseResult> {
256        use rayon::prelude::*;
257        files
258            .par_iter()
259            .map(|file| {
260                self.parse(file).unwrap_or_else(|e| {
261                    tracing::warn!("Parse error in {}: {}", file.relative_path, e);
262                    ParseResult::new()
263                })
264            })
265            .collect()
266    }
267}
268
269impl Default for ParserRegistry {
270    fn default() -> Self {
271        Self::new()
272    }
273}