codemem_engine/index/
parser.rs1use crate::index::chunker::{chunk_file, ChunkConfig, CodeChunk};
7use crate::index::engine::AstGrepEngine;
8use crate::index::symbol::{Reference, Symbol};
9use ast_grep_core::tree_sitter::LanguageExt;
10use std::path::Path;
11
12#[derive(Debug, Clone)]
14pub struct ParseResult {
15 pub file_path: String,
17 pub language: String,
19 pub symbols: Vec<Symbol>,
21 pub references: Vec<Reference>,
23 pub chunks: Vec<CodeChunk>,
25}
26
27pub struct CodeParser {
29 engine: AstGrepEngine,
30 chunk_config: ChunkConfig,
31}
32
33impl CodeParser {
34 pub fn new() -> Self {
36 Self {
37 engine: AstGrepEngine::new(),
38 chunk_config: ChunkConfig::default(),
39 }
40 }
41
42 pub fn parse_file(&self, path: &str, content: &[u8]) -> Option<ParseResult> {
46 let extension = Path::new(path).extension().and_then(|ext| ext.to_str())?;
47
48 let lang = self.engine.find_language(extension)?;
49 let source = std::str::from_utf8(content).ok()?;
50
51 let root = lang.lang.ast_grep(source);
53 let symbols = self
54 .engine
55 .extract_symbols_from_tree(lang, &root, source, path);
56 let references = self
57 .engine
58 .extract_references_from_tree(lang, &root, source, path);
59 let chunks = chunk_file(&root, source, path, &symbols, &self.chunk_config);
60
61 let language_name = match lang.name {
66 "tsx" | "javascript" => "typescript",
67 other => other,
68 };
69
70 Some(ParseResult {
71 file_path: path.to_string(),
72 language: language_name.to_string(),
73 symbols,
74 references,
75 chunks,
76 })
77 }
78
79 pub fn supports_extension(&self, ext: &str) -> bool {
81 self.engine.supports_extension(ext)
82 }
83}
84
85impl Default for CodeParser {
86 fn default() -> Self {
87 Self::new()
88 }
89}
90
91#[cfg(test)]
92#[path = "tests/parser_tests.rs"]
93mod tests;