cgx-engine 0.4.1

Core engine for cgx — Tree-sitter parsing, DuckDB graph storage, git analysis, and clustering
Documentation
use std::collections::HashMap;

use crate::walker::{Language, SourceFile};

/// The semantic category of a graph node.
#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub enum NodeKind {
    /// A source file on disk.
    File,
    /// A named function or method.
    Function,
    /// A class, struct, or interface definition.
    Class,
    /// A module-level variable or constant.
    Variable,
    /// A type alias, interface, or enum definition.
    Type,
    /// A package or module (e.g. Go package, Python module).
    Module,
    /// A git commit author, used in ownership edges.
    Author,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub enum CommentKind {
    /// Regular JS/JSDoc comment above a function or at the top level
    Standard,
    /// `{/* ... */}` expression comment inside JSX return body
    JsxExpression,
    /// JSX expression comment whose inner text starts with `<` — commented-out JSX code
    JsxCommentedCode,
}

impl CommentKind {
    pub fn as_str(&self) -> &'static str {
        match self {
            CommentKind::Standard => "code",
            CommentKind::JsxExpression => "jsx",
            CommentKind::JsxCommentedCode => "jsx_commented_code",
        }
    }
}

/// A structured annotation extracted from a source comment (e.g. `@todo`, `@deprecated`).
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct CommentTag {
    /// Tag name, e.g. `"todo"`, `"fixme"`, `"hack"`.
    pub tag_type: String,
    /// Full comment text following the tag marker.
    pub text: String,
    pub line: u32,
    pub comment_kind: CommentKind,
}

impl NodeKind {
    pub fn as_str(&self) -> &'static str {
        match self {
            NodeKind::File => "File",
            NodeKind::Function => "Function",
            NodeKind::Class => "Class",
            NodeKind::Variable => "Variable",
            NodeKind::Type => "Type",
            NodeKind::Module => "Module",
            NodeKind::Author => "Author",
        }
    }
}

/// The semantic relationship represented by a graph edge.
#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "UPPERCASE")]
pub enum EdgeKind {
    /// Function/method invocation.
    Calls,
    /// File imports another file or module.
    Imports,
    /// Class inherits from or implements another class/interface.
    Inherits,
    /// File exposes a symbol (file → function/class).
    Exports,
    /// Two files changed together in git history.
    CoChanges,
    /// Author owns a file (from git blame).
    Owns,
    /// File depends on an external package (from manifest parsing).
    DependsOn,
    /// Test file exercises a production symbol.
    Tests,
}

impl EdgeKind {
    pub fn as_str(&self) -> &'static str {
        match self {
            EdgeKind::Calls => "CALLS",
            EdgeKind::Imports => "IMPORTS",
            EdgeKind::Inherits => "INHERITS",
            EdgeKind::Exports => "EXPORTS",
            EdgeKind::CoChanges => "CO_CHANGES",
            EdgeKind::Owns => "OWNS",
            EdgeKind::DependsOn => "DEPENDS_ON",
            EdgeKind::Tests => "TESTS",
        }
    }
}

/// A node as produced by a language parser, before being written to the graph DB.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct NodeDef {
    /// Stable, unique identifier — format: `<prefix>:<path>:<name>`, e.g. `fn:src/lib.rs:parse`.
    pub id: String,
    pub kind: NodeKind,
    pub name: String,
    /// Repo-relative file path.
    pub path: String,
    pub line_start: u32,
    pub line_end: u32,
    /// Parser-specific extras (e.g. `{"exported": true, "complexity": 4.0}`).
    #[serde(default)]
    pub metadata: serde_json::Value,
}

impl Default for NodeDef {
    fn default() -> Self {
        Self {
            id: String::new(),
            kind: NodeKind::File,
            name: String::new(),
            path: String::new(),
            line_start: 0,
            line_end: 0,
            metadata: serde_json::Value::Null,
        }
    }
}

/// A directed edge as produced by a language parser or the resolver.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct EdgeDef {
    pub src: String,
    pub dst: String,
    pub kind: EdgeKind,
    /// Relative strength of the relationship (default 1.0).
    #[serde(default = "default_edge_weight")]
    pub weight: f64,
    /// Parser certainty that this edge is real, 0.0–1.0 (default 1.0, fuzzy matches use 0.8).
    #[serde(default = "default_edge_weight")]
    pub confidence: f64,
}

impl Default for EdgeDef {
    fn default() -> Self {
        Self {
            src: String::new(),
            dst: String::new(),
            kind: EdgeKind::Calls,
            weight: 1.0,
            confidence: 1.0,
        }
    }
}

fn default_edge_weight() -> f64 {
    1.0
}

/// The output of parsing a single source file.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ParseResult {
    pub nodes: Vec<NodeDef>,
    pub edges: Vec<EdgeDef>,
    #[serde(default)]
    pub comment_tags: Vec<CommentTag>,
}

impl ParseResult {
    pub fn new() -> Self {
        Self {
            nodes: Vec::new(),
            edges: Vec::new(),
            comment_tags: Vec::new(),
        }
    }
}

impl Default for ParseResult {
    fn default() -> Self {
        Self::new()
    }
}

/// Trait implemented by every language-specific Tree-sitter parser.
pub trait LanguageParser: Send + Sync {
    /// File extensions this parser handles, e.g. `&["ts", "tsx"]`.
    fn extensions(&self) -> &[&str];
    /// Parse a source file and return all nodes, edges, and comment tags found.
    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult>;
}

/// Registry that maps [`Language`] variants to their [`LanguageParser`] implementations.
///
/// Constructed with all built-in parsers pre-registered.  Use [`ParserRegistry::parse`]
/// for a single file or [`ParserRegistry::parse_all`] for parallel batch processing.
pub struct ParserRegistry {
    parsers: HashMap<Language, Box<dyn LanguageParser>>,
}

impl ParserRegistry {
    /// Create a registry with all built-in language parsers registered.
    pub fn new() -> Self {
        let mut parsers: HashMap<Language, Box<dyn LanguageParser>> = HashMap::new();

        parsers.insert(
            Language::TypeScript,
            Box::new(super::parsers::ts::TypeScriptParser::new()),
        );
        parsers.insert(
            Language::JavaScript,
            Box::new(super::parsers::ts::TypeScriptParser::new()),
        );
        parsers.insert(
            Language::Python,
            Box::new(super::parsers::py::PythonParser::new()),
        );
        parsers.insert(
            Language::Rust,
            Box::new(super::parsers::rust::RustParser::new()),
        );
        parsers.insert(Language::Go, Box::new(super::parsers::go::GoParser::new()));
        parsers.insert(
            Language::Java,
            Box::new(super::parsers::java::JavaParser::new()),
        );
        parsers.insert(
            Language::CSharp,
            Box::new(super::parsers::java::JavaParser::new()),
        );
        parsers.insert(
            Language::Php,
            Box::new(super::parsers::php::PhpParser::new()),
        );

        Self { parsers }
    }

    /// Parse a single file, returning an empty result for unknown languages.
    pub fn parse(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
        if let Some(parser) = self.parsers.get(&file.language) {
            parser.extract(file)
        } else {
            Ok(ParseResult::new())
        }
    }

    /// Parse all files in parallel using Rayon, logging warnings on individual failures.
    pub fn parse_all(&self, files: &[SourceFile]) -> Vec<ParseResult> {
        use rayon::prelude::*;
        files
            .par_iter()
            .map(|file| {
                self.parse(file).unwrap_or_else(|e| {
                    tracing::warn!("Parse error in {}: {}", file.relative_path, e);
                    ParseResult::new()
                })
            })
            .collect()
    }
}

impl Default for ParserRegistry {
    fn default() -> Self {
        Self::new()
    }
}