Skip to main content

graphify_extract/
parser.rs

1//! Parser trait for pluggable extraction backends.
2//!
3//! The [`Parser`] trait allows swapping between regex-based extraction (current
4//! default) and future tree-sitter–backed extraction without changing the
5//! pipeline.
6
7use std::path::Path;
8
9use graphify_core::model::ExtractionResult;
10
11/// A source-file parser that produces graph nodes and edges.
12///
13/// Implementations must be `Send + Sync` so they can be shared across threads
14/// when processing files in parallel.
15pub trait Parser: Send + Sync {
16    /// Parse a single source file and return the extracted entities and
17    /// relationships.
18    fn parse(&self, path: &Path, source: &[u8]) -> ExtractionResult;
19
20    /// File extensions this parser can handle (e.g. `[".py", ".pyi"]`).
21    fn supported_extensions(&self) -> &[&str];
22}
23
24/// The default regex-based parser that delegates to [`crate::ast_extract`].
25pub struct RegexParser;
26
27impl Parser for RegexParser {
28    fn parse(&self, path: &Path, source: &[u8]) -> ExtractionResult {
29        let lang = crate::language_for_path(path).unwrap_or("generic");
30        let source_str = String::from_utf8_lossy(source);
31        crate::ast_extract::extract_file(path, &source_str, lang)
32    }
33
34    fn supported_extensions(&self) -> &[&str] {
35        // All extensions from the DISPATCH table
36        &[
37            ".py", ".js", ".jsx", ".ts", ".tsx", ".go", ".rs", ".java", ".c", ".h", ".cpp", ".cc",
38            ".cxx", ".hpp", ".rb", ".cs", ".kt", ".kts", ".scala", ".php", ".swift", ".lua",
39            ".toc", ".zig", ".ps1", ".ex", ".exs", ".m", ".mm", ".jl",
40        ]
41    }
42}
43
44#[cfg(test)]
45mod tests {
46    use super::*;
47    use std::path::Path;
48
49    #[test]
50    fn regex_parser_is_send_sync() {
51        fn assert_send_sync<T: Send + Sync>() {}
52        assert_send_sync::<RegexParser>();
53    }
54
55    #[test]
56    fn regex_parser_produces_output() {
57        let parser = RegexParser;
58        let source = b"def hello():\n    pass\n";
59        let result = parser.parse(Path::new("test.py"), source);
60        // Should have at least the file node + function node
61        assert!(!result.nodes.is_empty());
62    }
63}