use anyhow::Result;
use serde::Serialize;
use tree_sitter::{Node, Parser, Tree};
use crate::languages::{LanguageConfig, get_language_config};
#[derive(Debug, Clone, Serialize, Default)]
pub struct CodeStructure {
pub imports: Vec<Import>,
pub structs: Vec<StructDef>,
pub functions: Vec<FunctionDef>,
pub traits: Vec<TraitDef>,
pub enums: Vec<EnumDef>,
}
impl CodeStructure {
pub fn exports_only(&self) -> CodeStructure {
CodeStructure {
imports: self.imports.clone(),
structs: self
.structs
.iter()
.filter(|s| s.is_public)
.cloned()
.collect(),
functions: self
.functions
.iter()
.filter(|f| f.is_public)
.cloned()
.collect(),
traits: self
.traits
.iter()
.filter(|t| t.is_public)
.cloned()
.collect(),
enums: self.enums.iter().filter(|e| e.is_public).cloned().collect(),
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct Import {
pub name: String,
pub line: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct StructDef {
pub name: String,
pub is_public: bool,
pub start_line: usize,
pub end_line: usize,
pub fields: Vec<FieldDef>,
#[serde(skip_serializing_if = "Option::is_none")]
pub doc_comment: Option<String>,
}
impl StructDef {
pub fn signature_string(&self) -> String {
let visibility = if self.is_public { "pub " } else { "" };
if self.fields.is_empty() {
format!("{}struct {}", visibility, self.name)
} else {
let fields: Vec<String> = self
.fields
.iter()
.map(|f| {
let field_vis = if f.is_public { "pub " } else { "" };
format!("{}{}: {}", field_vis, f.name, f.type_name)
})
.collect();
format!(
"{}struct {} {{ {} }}",
visibility,
self.name,
fields.join(", ")
)
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct FieldDef {
pub name: String,
pub type_name: String,
pub is_public: bool,
}
#[derive(Debug, Clone, Serialize)]
pub struct FunctionDef {
pub name: String,
pub is_public: bool,
pub is_async: bool,
pub params: Vec<String>,
pub return_type: Option<String>,
pub start_line: usize,
pub end_line: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub doc_comment: Option<String>,
}
impl FunctionDef {
pub fn signature_string(&self) -> String {
let mut parts = Vec::new();
if self.is_public {
parts.push("pub");
}
if self.is_async {
parts.push("async");
}
parts.push("fn");
let params = self.params.join(", ");
let name_and_params = format!("{}({})", self.name, params);
let signature = if let Some(ref ret) = self.return_type {
format!("{} {} -> {}", parts.join(" "), name_and_params, ret)
} else {
format!("{} {}", parts.join(" "), name_and_params)
};
signature
}
}
#[derive(Debug, Clone, Serialize)]
pub struct TraitDef {
pub name: String,
pub is_public: bool,
pub start_line: usize,
pub end_line: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct EnumDef {
pub name: String,
pub is_public: bool,
pub start_line: usize,
pub end_line: usize,
pub variants: Vec<String>,
}
pub fn parse_file(source: &str, extension: &str) -> Result<CodeStructure> {
let config = get_language_config(extension)?;
let mut parser = Parser::new();
parser.set_language(&config.language)?;
let tree = parser
.parse(source, None)
.ok_or_else(|| anyhow::anyhow!("Failed to parse file"))?;
extract_structure(&tree, source, &config)
}
fn extract_structure(tree: &Tree, source: &str, config: &LanguageConfig) -> Result<CodeStructure> {
let mut structure = CodeStructure::default();
let root = tree.root_node();
extract_from_node(root, source, config, &mut structure);
Ok(structure)
}
fn extract_from_node(
node: Node,
source: &str,
config: &LanguageConfig,
structure: &mut CodeStructure,
) {
let kind = node.kind();
if config.import_kinds.contains(&kind) {
if let Some(import) = extract_import(node, source, config) {
structure.imports.push(import);
}
}
if config.struct_kinds.contains(&kind) {
if let Some(struct_def) = extract_struct(node, source, config) {
structure.structs.push(struct_def);
}
}
if config.function_kinds.contains(&kind) {
if let Some(func_def) = extract_function(node, source, config) {
structure.functions.push(func_def);
}
}
if config.trait_kinds.contains(&kind) {
if let Some(trait_def) = extract_trait(node, source, config) {
structure.traits.push(trait_def);
}
}
if config.enum_kinds.contains(&kind) {
if let Some(enum_def) = extract_enum(node, source, config) {
structure.enums.push(enum_def);
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
extract_from_node(child, source, config, structure);
}
}
fn extract_import(node: Node, source: &str, _config: &LanguageConfig) -> Option<Import> {
let text = node.utf8_text(source.as_bytes()).ok()?;
let line = node.start_position().row + 1;
let name = text.lines().next().unwrap_or(text).trim().to_string();
Some(Import { name, line })
}
fn extract_struct(node: Node, source: &str, config: &LanguageConfig) -> Option<StructDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let is_public = check_visibility(node, source, config);
let start_line = node.start_position().row + 1;
let end_line = node.end_position().row + 1;
let fields = extract_fields(node, source, config);
let doc_comment = extract_doc_comment(node, source, config);
Some(StructDef {
name,
is_public,
start_line,
end_line,
fields,
doc_comment,
})
}
fn extract_fields(node: Node, source: &str, config: &LanguageConfig) -> Vec<FieldDef> {
let mut fields = Vec::new();
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if config.field_kinds.contains(&child.kind()) {
if let Some(field) = extract_single_field(child, source, config) {
fields.push(field);
}
}
let mut inner_cursor = child.walk();
for inner in child.children(&mut inner_cursor) {
if config.field_kinds.contains(&inner.kind()) {
if let Some(field) = extract_single_field(inner, source, config) {
fields.push(field);
}
}
}
}
fields
}
fn extract_single_field(node: Node, source: &str, config: &LanguageConfig) -> Option<FieldDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let type_name = find_child_by_field(node, "type")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())
.unwrap_or_else(|| "unknown".to_string());
let is_public = check_visibility(node, source, config);
Some(FieldDef {
name,
type_name,
is_public,
})
}
fn extract_function(node: Node, source: &str, config: &LanguageConfig) -> Option<FunctionDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let is_public = check_visibility(node, source, config);
let is_async = check_async(node, source);
let start_line = node.start_position().row + 1;
let end_line = node.end_position().row + 1;
let params = extract_parameters(node, source, config);
let return_type = extract_return_type(node, source, config);
let doc_comment = extract_doc_comment(node, source, config);
Some(FunctionDef {
name,
is_public,
is_async,
params,
return_type,
start_line,
end_line,
doc_comment,
})
}
fn extract_parameters(node: Node, source: &str, config: &LanguageConfig) -> Vec<String> {
let mut params = Vec::new();
if let Some(params_node) = find_child_by_field(node, "parameters") {
let mut cursor = params_node.walk();
for child in params_node.children(&mut cursor) {
if config.param_kinds.contains(&child.kind()) {
if let Some(name_node) = find_child_by_field(child, "pattern")
.or_else(|| find_child_by_field(child, "name"))
{
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
let type_str = find_child_by_field(child, "type")
.and_then(|t| t.utf8_text(source.as_bytes()).ok())
.map(|t| format!(": {}", t))
.unwrap_or_default();
params.push(format!("{}{}", name, type_str));
}
} else if let Ok(text) = child.utf8_text(source.as_bytes()) {
let text = text.trim();
if !text.is_empty() && text != "," {
params.push(text.to_string());
}
}
}
}
}
params
}
fn extract_return_type(node: Node, source: &str, _config: &LanguageConfig) -> Option<String> {
find_child_by_field(node, "return_type")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.trim_start_matches("->").trim().to_string())
}
fn extract_trait(node: Node, source: &str, config: &LanguageConfig) -> Option<TraitDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let is_public = check_visibility(node, source, config);
let start_line = node.start_position().row + 1;
let end_line = node.end_position().row + 1;
Some(TraitDef {
name,
is_public,
start_line,
end_line,
})
}
fn extract_enum(node: Node, source: &str, config: &LanguageConfig) -> Option<EnumDef> {
let name = find_child_by_field(node, "name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string())?;
let is_public = check_visibility(node, source, config);
let start_line = node.start_position().row + 1;
let end_line = node.end_position().row + 1;
let variants = extract_enum_variants(node, source, config);
Some(EnumDef {
name,
is_public,
start_line,
end_line,
variants,
})
}
fn extract_enum_variants(node: Node, source: &str, config: &LanguageConfig) -> Vec<String> {
let mut variants = Vec::new();
let mut cursor = node.walk();
fn collect_variants(
node: Node,
source: &str,
config: &LanguageConfig,
variants: &mut Vec<String>,
) {
if config.enum_variant_kinds.contains(&node.kind()) {
if let Some(name_node) = find_child_by_field(node, "name") {
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
variants.push(name.to_string());
}
} else if let Ok(text) = node.utf8_text(source.as_bytes()) {
let text = text.trim();
if !text.is_empty() {
variants.push(text.to_string());
}
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
collect_variants(child, source, config, variants);
}
}
for child in node.children(&mut cursor) {
collect_variants(child, source, config, &mut variants);
}
variants
}
fn check_visibility(node: Node, source: &str, config: &LanguageConfig) -> bool {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "visibility_modifier" {
if let Ok(text) = child.utf8_text(source.as_bytes()) {
return text.contains("pub");
}
}
if child.kind() == "decorator" {
if let Ok(text) = child.utf8_text(source.as_bytes()) {
if text.contains("export") || text.contains("public") {
return true;
}
}
}
}
if let Some(parent) = node.parent() {
if parent.kind() == "export_statement" {
return true;
}
}
if let Some(prev) = node.prev_sibling() {
if prev.kind() == "export" || prev.kind() == "export_statement" {
return true;
}
}
match config.name {
"python" => {
if let Some(name_node) = find_child_by_field(node, "name") {
if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
return !name.starts_with('_');
}
}
true
}
"typescript" | "javascript" => false, _ => false,
}
}
fn check_async(node: Node, source: &str) -> bool {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "async" {
return true;
}
}
if let Ok(text) = node.utf8_text(source.as_bytes()) {
return text.trim_start().starts_with("async");
}
false
}
fn find_child_by_field<'a>(node: Node<'a>, field_name: &str) -> Option<Node<'a>> {
node.child_by_field_name(field_name)
}
fn extract_doc_comment(node: Node, source: &str, config: &LanguageConfig) -> Option<String> {
if config.name == "python" {
return extract_python_docstring(node, source);
}
let search_node = if let Some(parent) = node.parent() {
if parent.kind() == "export_statement" {
parent
} else {
node
}
} else {
node
};
let mut comments = Vec::new();
let mut current = search_node.prev_sibling();
while let Some(sibling) = current {
let kind = sibling.kind();
if is_comment_node(kind) {
if let Ok(text) = sibling.utf8_text(source.as_bytes()) {
comments.push(text.to_string());
}
current = sibling.prev_sibling();
} else if kind == "attribute_item" || kind == "decorator" {
current = sibling.prev_sibling();
} else {
break;
}
}
if comments.is_empty() {
return None;
}
comments.reverse();
let cleaned: Vec<String> = comments.iter().map(|c| clean_comment(c, config)).collect();
Some(cleaned.join("\n"))
}
fn is_comment_node(kind: &str) -> bool {
matches!(
kind,
"line_comment"
| "block_comment"
| "comment"
| "doc_comment"
| "documentation_comment"
| "string_content"
)
}
fn extract_python_docstring(node: Node, source: &str) -> Option<String> {
let body = find_child_by_field(node, "body")?;
let mut cursor = body.walk();
for child in body.children(&mut cursor) {
if child.kind() == "expression_statement" {
let mut inner_cursor = child.walk();
for inner in child.children(&mut inner_cursor) {
if inner.kind() == "string" || inner.kind() == "concatenated_string" {
if let Ok(text) = inner.utf8_text(source.as_bytes()) {
return Some(clean_python_docstring(text));
}
}
}
}
break;
}
None
}
fn clean_python_docstring(text: &str) -> String {
let trimmed = text.trim();
let content = if trimmed.starts_with("\"\"\"") && trimmed.ends_with("\"\"\"") {
&trimmed[3..trimmed.len() - 3]
} else if trimmed.starts_with("'''") && trimmed.ends_with("'''") {
&trimmed[3..trimmed.len() - 3]
} else if trimmed.starts_with('"') && trimmed.ends_with('"') {
&trimmed[1..trimmed.len() - 1]
} else if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
&trimmed[1..trimmed.len() - 1]
} else {
trimmed
};
content.trim().to_string()
}
fn clean_comment(text: &str, config: &LanguageConfig) -> String {
let trimmed = text.trim();
match config.name {
"rust" => clean_rust_comment(trimmed),
"go" => clean_go_comment(trimmed),
"java" | "typescript" | "javascript" | "c" | "cpp" => clean_c_style_comment(trimmed),
_ => clean_c_style_comment(trimmed),
}
}
fn clean_rust_comment(text: &str) -> String {
if text.starts_with("///") {
text.strip_prefix("///")
.unwrap_or(text)
.trim_start()
.to_string()
} else if text.starts_with("//!") {
text.strip_prefix("//!")
.unwrap_or(text)
.trim_start()
.to_string()
} else if text.starts_with("/**") && text.ends_with("*/") {
clean_block_comment(text)
} else if text.starts_with("//") {
text.strip_prefix("//")
.unwrap_or(text)
.trim_start()
.to_string()
} else {
text.to_string()
}
}
fn clean_go_comment(text: &str) -> String {
if text.starts_with("//") {
text.strip_prefix("//")
.unwrap_or(text)
.trim_start()
.to_string()
} else if text.starts_with("/*") && text.ends_with("*/") {
clean_block_comment(text)
} else {
text.to_string()
}
}
fn clean_c_style_comment(text: &str) -> String {
if text.starts_with("/**") && text.ends_with("*/") {
clean_block_comment(text)
} else if text.starts_with("/*") && text.ends_with("*/") {
clean_block_comment(text)
} else if text.starts_with("//") {
text.strip_prefix("//")
.unwrap_or(text)
.trim_start()
.to_string()
} else {
text.to_string()
}
}
fn clean_block_comment(text: &str) -> String {
let content = text
.strip_prefix("/**")
.or_else(|| text.strip_prefix("/*"))
.unwrap_or(text)
.strip_suffix("*/")
.unwrap_or(text)
.trim();
let lines: Vec<&str> = content
.lines()
.map(|line| {
let trimmed = line.trim();
trimmed.strip_prefix('*').unwrap_or(trimmed).trim()
})
.collect();
lines.join("\n").trim().to_string()
}