use anyhow::Result;
use std::path::Path;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Symbol {
pub name: String,
pub kind: SymbolKind,
pub line: usize,
pub signature: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum SymbolKind {
Function,
Struct,
Enum,
Trait,
Impl,
Const,
Type,
Mod,
Macro,
}
pub fn parse_file(path: &Path) -> Result<Vec<Symbol>> {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
match ext {
"rs" => parse_rust(path),
"py" => parse_python(path),
"ts" | "tsx" => parse_typescript(path),
"js" | "jsx" => parse_typescript(path), "go" => parse_go(path),
_ => parse_generic(path),
}
}
fn parse_rust(path: &Path) -> Result<Vec<Symbol>> {
let source = std::fs::read_to_string(path)?;
let mut parser = tree_sitter::Parser::new();
let language = tree_sitter_rust::LANGUAGE;
parser.set_language(&language.into())?;
let tree = parser
.parse(&source, None)
.ok_or_else(|| anyhow::anyhow!("Failed to parse {}", path.display()))?;
let mut symbols = Vec::new();
extract_rust_symbols(&tree.root_node(), &source, &mut symbols);
Ok(symbols)
}
fn parse_python(path: &Path) -> Result<Vec<Symbol>> {
let source = std::fs::read_to_string(path)?;
let mut parser = tree_sitter::Parser::new();
let language = tree_sitter_python::LANGUAGE;
parser.set_language(&language.into())?;
let tree = parser
.parse(&source, None)
.ok_or_else(|| anyhow::anyhow!("Failed to parse {}", path.display()))?;
let mut symbols = Vec::new();
extract_python_symbols(&tree.root_node(), &source, &mut symbols);
Ok(symbols)
}
fn extract_python_symbols(node: &tree_sitter::Node, source: &str, symbols: &mut Vec<Symbol>) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"function_definition" => {
if let Some(name_node) = child.child_by_field_name("name") {
let name = node_text(&name_node, source);
let params = child
.child_by_field_name("parameters")
.map(|p| node_text(&p, source))
.unwrap_or_default();
let ret = child
.child_by_field_name("return_type")
.map(|r| format!(" -> {}", node_text(&r, source)))
.unwrap_or_default();
symbols.push(Symbol {
name,
kind: SymbolKind::Function,
line: child.start_position().row + 1,
signature: Some(format!(
"def {}{}{}",
node_text(&name_node, source),
params,
ret
)),
});
}
extract_python_symbols(&child, source, symbols);
}
"class_definition" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Struct,
line: child.start_position().row + 1,
signature: None,
});
}
if let Some(body) = child.child_by_field_name("body") {
extract_python_symbols(&body, source, symbols);
}
}
_ => {
extract_python_symbols(&child, source, symbols);
}
}
}
}
fn parse_typescript(path: &Path) -> Result<Vec<Symbol>> {
let source = std::fs::read_to_string(path)?;
let mut parser = tree_sitter::Parser::new();
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
let language = if ext == "tsx" || ext == "jsx" {
tree_sitter_typescript::LANGUAGE_TSX
} else {
tree_sitter_typescript::LANGUAGE_TYPESCRIPT
};
parser.set_language(&language.into())?;
let tree = parser
.parse(&source, None)
.ok_or_else(|| anyhow::anyhow!("Failed to parse {}", path.display()))?;
let mut symbols = Vec::new();
extract_ts_symbols(&tree.root_node(), &source, &mut symbols);
Ok(symbols)
}
fn extract_ts_symbols(node: &tree_sitter::Node, source: &str, symbols: &mut Vec<Symbol>) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"function_declaration" => {
if let Some(name_node) = child.child_by_field_name("name") {
let sig = extract_ts_fn_signature(&child, source);
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Function,
line: child.start_position().row + 1,
signature: Some(sig),
});
}
}
"class_declaration" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Struct,
line: child.start_position().row + 1,
signature: None,
});
}
if let Some(body) = child.child_by_field_name("body") {
extract_ts_symbols(&body, source, symbols);
}
}
"interface_declaration" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Trait,
line: child.start_position().row + 1,
signature: None,
});
}
}
"type_alias_declaration" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Type,
line: child.start_position().row + 1,
signature: None,
});
}
}
"enum_declaration" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Enum,
line: child.start_position().row + 1,
signature: None,
});
}
}
"method_definition" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Function,
line: child.start_position().row + 1,
signature: None,
});
}
}
"export_statement" | "lexical_declaration" => {
extract_ts_symbols(&child, source, symbols);
}
"arrow_function" | "function" => {
}
_ => {
extract_ts_symbols(&child, source, symbols);
}
}
}
}
fn extract_ts_fn_signature(node: &tree_sitter::Node, source: &str) -> String {
let start = node.start_byte();
let text = &source[start..];
if let Some(brace_pos) = text.find('{') {
text[..brace_pos]
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
} else {
node_text(node, source)
}
}
fn extract_rust_symbols(node: &tree_sitter::Node, source: &str, symbols: &mut Vec<Symbol>) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"function_item" => {
if let Some(name_node) = child.child_by_field_name("name") {
let name = node_text(&name_node, source);
let sig = extract_fn_signature(&child, source);
symbols.push(Symbol {
name,
kind: SymbolKind::Function,
line: child.start_position().row + 1,
signature: Some(sig),
});
}
}
"struct_item" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Struct,
line: child.start_position().row + 1,
signature: None,
});
}
}
"enum_item" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Enum,
line: child.start_position().row + 1,
signature: None,
});
}
}
"trait_item" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Trait,
line: child.start_position().row + 1,
signature: None,
});
}
}
"impl_item" => {
let impl_name = extract_impl_name(&child, source);
symbols.push(Symbol {
name: impl_name,
kind: SymbolKind::Impl,
line: child.start_position().row + 1,
signature: None,
});
if let Some(body) = child.child_by_field_name("body") {
extract_impl_methods(&body, source, symbols);
}
}
"const_item" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Const,
line: child.start_position().row + 1,
signature: None,
});
}
}
"type_item" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Type,
line: child.start_position().row + 1,
signature: None,
});
}
}
"mod_item" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Mod,
line: child.start_position().row + 1,
signature: None,
});
}
}
"macro_definition" => {
if let Some(name_node) = child.child_by_field_name("name") {
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Macro,
line: child.start_position().row + 1,
signature: None,
});
}
}
_ => {
extract_rust_symbols(&child, source, symbols);
}
}
}
}
fn extract_impl_methods(body: &tree_sitter::Node, source: &str, symbols: &mut Vec<Symbol>) {
let mut cursor = body.walk();
for child in body.children(&mut cursor) {
if child.kind() == "function_item"
&& let Some(name_node) = child.child_by_field_name("name")
{
let name = node_text(&name_node, source);
let sig = extract_fn_signature(&child, source);
symbols.push(Symbol {
name,
kind: SymbolKind::Function,
line: child.start_position().row + 1,
signature: Some(sig),
});
}
}
}
fn extract_fn_signature(node: &tree_sitter::Node, source: &str) -> String {
let start = node.start_byte();
let text = &source[start..];
if let Some(brace_pos) = text.find('{') {
let sig = text[..brace_pos].trim();
let sig: String = sig.split_whitespace().collect::<Vec<_>>().join(" ");
sig
} else {
node_text(node, source)
}
}
fn extract_impl_name(node: &tree_sitter::Node, source: &str) -> String {
let start = node.start_byte();
let text = &source[start..];
if let Some(brace_pos) = text.find('{') {
let header = text[..brace_pos].trim();
header.to_string()
} else {
"impl".to_string()
}
}
fn node_text(node: &tree_sitter::Node, source: &str) -> String {
source[node.byte_range()].to_string()
}
fn parse_go(path: &Path) -> Result<Vec<Symbol>> {
let source = std::fs::read_to_string(path)?;
let mut parser = tree_sitter::Parser::new();
let language = tree_sitter_go::LANGUAGE;
parser.set_language(&language.into())?;
let tree = parser
.parse(&source, None)
.ok_or_else(|| anyhow::anyhow!("Failed to parse {}", path.display()))?;
let mut symbols = Vec::new();
extract_go_symbols(&tree.root_node(), &source, &mut symbols);
Ok(symbols)
}
fn extract_go_symbols(node: &tree_sitter::Node, source: &str, symbols: &mut Vec<Symbol>) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"function_declaration" => {
if let Some(name_node) = child.child_by_field_name("name") {
let sig = extract_go_fn_signature(&child, source);
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Function,
line: child.start_position().row + 1,
signature: Some(sig),
});
}
}
"method_declaration" => {
if let Some(name_node) = child.child_by_field_name("name") {
let sig = extract_go_fn_signature(&child, source);
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Function,
line: child.start_position().row + 1,
signature: Some(sig),
});
}
}
"type_declaration" => {
extract_go_type_decl(&child, source, symbols);
}
"const_declaration" => {
extract_go_const_decl(&child, source, symbols);
}
_ => {
extract_go_symbols(&child, source, symbols);
}
}
}
}
fn extract_go_type_decl(node: &tree_sitter::Node, source: &str, symbols: &mut Vec<Symbol>) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "type_spec"
&& let Some(name_node) = child.child_by_field_name("name")
{
let name = node_text(&name_node, source);
let kind = if let Some(type_node) = child.child_by_field_name("type") {
match type_node.kind() {
"struct_type" => SymbolKind::Struct,
"interface_type" => SymbolKind::Trait,
_ => SymbolKind::Type,
}
} else {
SymbolKind::Type
};
symbols.push(Symbol {
name,
kind,
line: child.start_position().row + 1,
signature: None,
});
}
}
}
fn extract_go_const_decl(node: &tree_sitter::Node, source: &str, symbols: &mut Vec<Symbol>) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "const_spec"
&& let Some(name_node) = child.child_by_field_name("name")
{
symbols.push(Symbol {
name: node_text(&name_node, source),
kind: SymbolKind::Const,
line: child.start_position().row + 1,
signature: None,
});
}
}
}
fn extract_go_fn_signature(node: &tree_sitter::Node, source: &str) -> String {
let start = node.start_byte();
let text = &source[start..];
if let Some(brace_pos) = text.find('{') {
text[..brace_pos]
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
} else {
node_text(node, source)
}
}
fn parse_generic(path: &Path) -> Result<Vec<Symbol>> {
let source = std::fs::read_to_string(path)?;
let mut symbols = Vec::new();
for (i, line) in source.lines().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("def ") || trimmed.starts_with("async def ") {
if let Some(name) = extract_name_after(trimmed, "def ") {
symbols.push(Symbol {
name,
kind: SymbolKind::Function,
line: i + 1,
signature: Some(trimmed.to_string()),
});
}
} else if trimmed.starts_with("func ") || trimmed.starts_with("fn ") {
if let Some(name) = extract_name_after(
trimmed,
if trimmed.starts_with("func ") {
"func "
} else {
"fn "
},
) {
symbols.push(Symbol {
name,
kind: SymbolKind::Function,
line: i + 1,
signature: Some(trimmed.to_string()),
});
}
} else if trimmed.starts_with("function ") {
if let Some(name) = extract_name_after(trimmed, "function ") {
symbols.push(Symbol {
name,
kind: SymbolKind::Function,
line: i + 1,
signature: Some(trimmed.to_string()),
});
}
} else if trimmed.starts_with("class ") {
if let Some(name) = extract_name_after(trimmed, "class ") {
symbols.push(Symbol {
name,
kind: SymbolKind::Struct,
line: i + 1,
signature: None,
});
}
} else if trimmed.starts_with("interface ")
&& let Some(name) = extract_name_after(trimmed, "interface ")
{
symbols.push(Symbol {
name,
kind: SymbolKind::Trait,
line: i + 1,
signature: None,
});
}
}
Ok(symbols)
}
fn extract_name_after(line: &str, keyword: &str) -> Option<String> {
let rest = line.strip_prefix(keyword)?;
let rest = if keyword == "def " && line.starts_with("async def ") {
&line["async def ".len()..]
} else {
rest
};
let name: String = rest
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if name.is_empty() { None } else { Some(name) }
}