use anyhow::Result;
use serde::Serialize;
use tree_sitter::{Node, Parser, Tree};
use crate::languages::{LanguageConfig, get_language_config};
#[derive(Debug, Clone, Serialize, Default)]
pub struct CodeStructure {
pub imports: Vec<Import>,
pub structs: Vec<StructDef>,
pub functions: Vec<FunctionDef>,
pub traits: Vec<TraitDef>,
pub enums: Vec<EnumDef>,
}
impl CodeStructure {
pub fn exports_only(&self) -> CodeStructure {
CodeStructure {
imports: self.imports.clone(),
structs: self
.structs
.iter()
.filter(|s| s.is_public)
.cloned()
.collect(),
functions: self
.functions
.iter()
.filter(|f| f.is_public)
.cloned()
.collect(),
traits: self
.traits
.iter()
.filter(|t| t.is_public)
.cloned()
.collect(),
enums: self.enums.iter().filter(|e| e.is_public).cloned().collect(),
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct Import {
pub name: String,
pub line: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct StructDef {
pub name: String,
pub is_public: bool,
pub start_line: usize,
pub end_line: usize,
pub fields: Vec<FieldDef>,
}
#[derive(Debug, Clone, Serialize)]
pub struct FieldDef {
pub name: String,
pub type_name: String,
pub is_public: bool,
}
#[derive(Debug, Clone, Serialize)]
pub struct FunctionDef {
pub name: String,
pub is_public: bool,
pub is_async: bool,
pub params: Vec<String>,
pub return_type: Option<String>,
pub start_line: usize,
pub end_line: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct TraitDef {
pub name: String,
pub is_public: bool,
pub start_line: usize,
pub end_line: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct EnumDef {
pub name: String,
pub is_public: bool,
pub start_line: usize,
pub end_line: usize,
pub variants: Vec<String>,
}
pub fn parse_file(source: &str, extension: &str) -> Result<CodeStructure> {
let config = get_language_config(extension)?;
let mut parser = Parser::new();
parser.set_language(&config.language)?;
let tree = parser
.parse(source, None)
.ok_or_else(|| anyhow::anyhow!("Failed to parse file"))?;
extract_structure(&tree, source, &config)
}
fn extract_structure(tree: &Tree, source: &str, config: &LanguageConfig) -> Result<CodeStructure> {
let mut structure = CodeStructure::default();
let root = tree.root_node();
extract_from_node(root, source, config, &mut structure);
Ok(structure)
}
fn extract_from_node(
node: Node,
source: &str,
config: &LanguageConfig,
structure: &mut CodeStructure,
) {
let kind = node.kind();
if config.import_kinds.contains(&kind) {
if let Some(import) = extract_import(node, source, config) {
structure.imports.push(import);
}
}
if config.struct_kinds.contains(&kind) {
if let Some(struct_def) = extract_struct(node, source, config) {
structure.structs.push(struct_def);
}
}
if config.function_kinds.contains(&kind) {
if let Some(func_def) = extract_function(node, source, config) {
structure.functions.push(func_def);
}
}
if config.trait_kinds.contains(&kind) {
if let Some(trait_def) = extract_trait(node, source, config) {
structure.traits.push(trait_def);
}
}
if config.enum_kinds.contains(&kind) {
if let Some(enum_def) = extract_enum(node, source, config) {
structure.enums.push(enum_def);
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
extract_from_node(child, source, config, structure);
}
}
fn extract_import(node: Node, source: &str, _config: &LanguageConfig) -> Option<Import> {
let text = node.utf8_text(source.as_bytes()).ok()?;
let line = node.start_position().row + 1;
let name = text.lines().next().unwrap_or(text).trim().to_string();
Some(Import { name, line })
}
fn extract_struct(node: Node, source: &str, config: &LanguageConfig) -> Option<StructDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let is_public = check_visibility(node, source, config);
let start_line = node.start_position().row + 1;
let end_line = node.end_position().row + 1;
let fields = extract_fields(node, source, config);
Some(StructDef {
name,
is_public,
start_line,
end_line,
fields,
})
}
fn extract_fields(node: Node, source: &str, config: &LanguageConfig) -> Vec<FieldDef> {
let mut fields = Vec::new();
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if config.field_kinds.contains(&child.kind()) {
if let Some(field) = extract_single_field(child, source, config) {
fields.push(field);
}
}
let mut inner_cursor = child.walk();
for inner in child.children(&mut inner_cursor) {
if config.field_kinds.contains(&inner.kind()) {
if let Some(field) = extract_single_field(inner, source, config) {
fields.push(field);
}
}
}
}
fields
}
fn extract_single_field(node: Node, source: &str, config: &LanguageConfig) -> Option<FieldDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let type_name = find_child_by_field(node, "type")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())
.unwrap_or_else(|| "unknown".to_string());
let is_public = check_visibility(node, source, config);
Some(FieldDef {
name,
type_name,
is_public,
})
}
fn extract_function(node: Node, source: &str, config: &LanguageConfig) -> Option<FunctionDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let is_public = check_visibility(node, source, config);
let is_async = check_async(node, source);
let start_line = node.start_position().row + 1;
let end_line = node.end_position().row + 1;
let params = extract_parameters(node, source, config);
let return_type = extract_return_type(node, source, config);
Some(FunctionDef {
name,
is_public,
is_async,
params,
return_type,
start_line,
end_line,
})
}
fn extract_parameters(node: Node, source: &str, config: &LanguageConfig) -> Vec<String> {
let mut params = Vec::new();
if let Some(params_node) = find_child_by_field(node, "parameters") {
let mut cursor = params_node.walk();
for child in params_node.children(&mut cursor) {
if config.param_kinds.contains(&child.kind()) {
if let Some(name_node) = find_child_by_field(child, "pattern")
.or_else(|| find_child_by_field(child, "name"))
{
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
let type_str = find_child_by_field(child, "type")
.and_then(|t| t.utf8_text(source.as_bytes()).ok())
.map(|t| format!(": {}", t))
.unwrap_or_default();
params.push(format!("{}{}", name, type_str));
}
} else if let Ok(text) = child.utf8_text(source.as_bytes()) {
let text = text.trim();
if !text.is_empty() && text != "," {
params.push(text.to_string());
}
}
}
}
}
params
}
fn extract_return_type(node: Node, source: &str, _config: &LanguageConfig) -> Option<String> {
find_child_by_field(node, "return_type")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.trim_start_matches("->").trim().to_string())
}
fn extract_trait(node: Node, source: &str, config: &LanguageConfig) -> Option<TraitDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let is_public = check_visibility(node, source, config);
let start_line = node.start_position().row + 1;
let end_line = node.end_position().row + 1;
Some(TraitDef {
name,
is_public,
start_line,
end_line,
})
}
fn extract_enum(node: Node, source: &str, config: &LanguageConfig) -> Option<EnumDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let is_public = check_visibility(node, source, config);
let start_line = node.start_position().row + 1;
let end_line = node.end_position().row + 1;
let variants = extract_enum_variants(node, source, config);
Some(EnumDef {
name,
is_public,
start_line,
end_line,
variants,
})
}
fn extract_enum_variants(node: Node, source: &str, config: &LanguageConfig) -> Vec<String> {
let mut variants = Vec::new();
let mut cursor = node.walk();
fn collect_variants(
node: Node,
source: &str,
config: &LanguageConfig,
variants: &mut Vec<String>,
) {
if config.enum_variant_kinds.contains(&node.kind()) {
if let Some(name_node) = find_child_by_field(node, "name") {
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
variants.push(name.to_string());
}
} else if let Ok(text) = node.utf8_text(source.as_bytes()) {
let text = text.trim();
if !text.is_empty() {
variants.push(text.to_string());
}
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
collect_variants(child, source, config, variants);
}
}
for child in node.children(&mut cursor) {
collect_variants(child, source, config, &mut variants);
}
variants
}
fn check_visibility(node: Node, source: &str, config: &LanguageConfig) -> bool {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "visibility_modifier" {
if let Ok(text) = child.utf8_text(source.as_bytes()) {
return text.contains("pub");
}
}
if child.kind() == "decorator" {
if let Ok(text) = child.utf8_text(source.as_bytes()) {
if text.contains("export") || text.contains("public") {
return true;
}
}
}
}
if let Some(parent) = node.parent() {
if parent.kind() == "export_statement" {
return true;
}
}
if let Some(prev) = node.prev_sibling() {
if prev.kind() == "export" || prev.kind() == "export_statement" {
return true;
}
}
match config.name {
"python" => {
if let Some(name_node) = find_child_by_field(node, "name") {
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
return !name.starts_with('_');
}
}
true
}
"typescript" | "javascript" => false, _ => false,
}
}
fn check_async(node: Node, source: &str) -> bool {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "async" {
return true;
}
}
if let Ok(text) = node.utf8_text(source.as_bytes()) {
return text.trim_start().starts_with("async");
}
false
}
fn find_child_by_field<'a>(node: Node<'a>, field_name: &str) -> Option<Node<'a>> {
node.child_by_field_name(field_name)
}