Skip to main content

agentic_codebase/parse/
mod.rs

1//! Multi-language parsing engine using tree-sitter.
2//!
3//! Converts source code into raw syntax information. One module per language.
4//! No semantic analysis here — just syntax extraction.
5
6pub mod cpp;
7pub mod csharp;
8pub mod go;
9pub mod java;
10pub mod parser;
11pub mod python;
12pub mod rust;
13pub mod treesitter;
14pub mod typescript;
15
16pub use parser::{ParseOptions, ParseResult, ParseStats, Parser};
17
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20
21use crate::types::{AcbResult, CodeUnitType, Language, Span, Visibility};
22
23/// A code unit extracted from parsing, before semantic analysis.
24#[derive(Debug, Clone)]
25pub struct RawCodeUnit {
26    /// Temporary ID (reassigned during graph building).
27    pub temp_id: u64,
28    /// Type of code unit.
29    pub unit_type: CodeUnitType,
30    /// Programming language.
31    pub language: Language,
32    /// Simple name.
33    pub name: String,
34    /// Qualified name (may be partial, completed by semantic).
35    pub qualified_name: String,
36    /// Source file path.
37    pub file_path: PathBuf,
38    /// Location in source.
39    pub span: Span,
40    /// Type signature (raw, may need resolution).
41    pub signature: Option<String>,
42    /// Documentation.
43    pub doc: Option<String>,
44    /// Visibility.
45    pub visibility: Visibility,
46    /// Is async.
47    pub is_async: bool,
48    /// Is generator.
49    pub is_generator: bool,
50    /// Cyclomatic complexity.
51    pub complexity: u32,
52    /// Raw references found (names, not resolved IDs).
53    pub references: Vec<RawReference>,
54    /// Children temp_ids (for containers like modules, classes).
55    pub children: Vec<u64>,
56    /// Parent temp_id (for nested items).
57    pub parent: Option<u64>,
58    /// Language-specific metadata.
59    pub metadata: HashMap<String, String>,
60}
61
62impl RawCodeUnit {
63    /// Create a new raw code unit with minimal required fields.
64    pub fn new(
65        unit_type: CodeUnitType,
66        language: Language,
67        name: String,
68        file_path: PathBuf,
69        span: Span,
70    ) -> Self {
71        let qualified_name = name.clone();
72        Self {
73            temp_id: 0,
74            unit_type,
75            language,
76            name,
77            qualified_name,
78            file_path,
79            span,
80            signature: None,
81            doc: None,
82            visibility: Visibility::Unknown,
83            is_async: false,
84            is_generator: false,
85            complexity: 0,
86            references: Vec::new(),
87            children: Vec::new(),
88            parent: None,
89            metadata: HashMap::new(),
90        }
91    }
92}
93
94/// A raw reference found during parsing.
95#[derive(Debug, Clone)]
96pub struct RawReference {
97    /// The name being referenced.
98    pub name: String,
99    /// The kind of reference.
100    pub kind: ReferenceKind,
101    /// Where in the source.
102    pub span: Span,
103}
104
105/// The kind of a raw reference.
106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
107pub enum ReferenceKind {
108    /// Import statement.
109    Import,
110    /// Function call.
111    Call,
112    /// Type usage.
113    TypeUse,
114    /// Inheritance.
115    Inherit,
116    /// Interface implementation.
117    Implement,
118    /// Attribute/field access.
119    Access,
120}
121
122/// Parse error with severity.
123#[derive(Debug, Clone)]
124pub struct ParseFileError {
125    /// File path.
126    pub path: PathBuf,
127    /// Location in source.
128    pub span: Option<Span>,
129    /// Error message.
130    pub message: String,
131    /// Severity level.
132    pub severity: Severity,
133}
134
135/// Severity of a parse issue.
136#[derive(Debug, Clone, Copy, PartialEq, Eq)]
137pub enum Severity {
138    /// Could not parse at all.
139    Error,
140    /// Parsed but something odd.
141    Warning,
142    /// Informational.
143    Info,
144}
145
146/// Trait for language-specific parsers.
147pub trait LanguageParser: Send + Sync {
148    /// Extract code units from a parsed tree.
149    fn extract_units(
150        &self,
151        tree: &tree_sitter::Tree,
152        source: &str,
153        file_path: &Path,
154    ) -> AcbResult<Vec<RawCodeUnit>>;
155
156    /// Check if a file is a test file.
157    fn is_test_file(&self, path: &Path, source: &str) -> bool;
158}