1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
//! ast-grep parsing coordinator.
//!
//! Detects language from file extension, selects the appropriate rules,
//! and runs symbol + reference extraction via the unified engine.
use crate::index::chunker::{chunk_file, ChunkConfig, CodeChunk};
use crate::index::engine::AstGrepEngine;
use crate::index::symbol::{Reference, Symbol};
use ast_grep_core::tree_sitter::LanguageExt;
use std::path::Path;
/// Result of parsing a single file.
#[derive(Debug, Clone)]
pub struct ParseResult {
/// Path to the parsed file.
pub file_path: String,
/// Language that was detected and used.
pub language: String,
/// All symbols extracted from the file.
pub symbols: Vec<Symbol>,
/// All references extracted from the file.
pub references: Vec<Reference>,
/// CST-aware code chunks extracted from the file.
pub chunks: Vec<CodeChunk>,
}
/// Coordinates ast-grep parsing across multiple languages.
pub struct CodeParser {
engine: AstGrepEngine,
chunk_config: ChunkConfig,
}
impl CodeParser {
/// Create a new CodeParser with all registered language rules.
pub fn new() -> Self {
Self {
engine: AstGrepEngine::new(),
chunk_config: ChunkConfig::default(),
}
}
/// Parse a single file and extract symbols, references, and chunks.
///
/// Returns `None` if the file extension is not supported or parsing fails.
pub fn parse_file(&self, path: &str, content: &[u8]) -> Option<ParseResult> {
let extension = Path::new(path).extension().and_then(|ext| ext.to_str())?;
let lang = self.engine.find_language(extension)?;
let source = std::str::from_utf8(content).ok()?;
// C1: Parse source once and share the tree across all three passes.
let root = lang.lang.ast_grep(source);
let symbols = self
.engine
.extract_symbols_from_tree(lang, &root, source, path);
let references = self
.engine
.extract_references_from_tree(lang, &root, source, path);
let chunks = chunk_file(&root, source, path, &symbols, &self.chunk_config);
// Map internal language names to the canonical names used by consumers.
// tsx/javascript share TypeScript extraction rules (same grammar family).
// Consumers (graph nodes, MCP tools) treat them as "typescript" for uniformity,
// since JS/TS/TSX/JSX all use the same symbol/reference extraction logic.
let language_name = match lang.name {
"tsx" | "javascript" => "typescript",
other => other,
};
Some(ParseResult {
file_path: path.to_string(),
language: language_name.to_string(),
symbols,
references,
chunks,
})
}
/// Check if a given file extension is supported.
pub fn supports_extension(&self, ext: &str) -> bool {
self.engine.supports_extension(ext)
}
}
impl Default for CodeParser {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
#[path = "tests/parser_tests.rs"]
mod tests;