use anyhow::Result;
use serde::Serialize;
use std::fs;
use std::path::{Path, PathBuf};
use tree_sitter::{Node, Parser};
use crate::indexer::{languages, path_utils};
#[derive(Debug, Serialize, Clone)]
pub struct FileSignature {
pub path: String,
pub language: String,
pub file_comment: Option<String>,
pub signatures: Vec<SignatureItem>,
}
#[derive(Debug, Serialize, Clone)]
pub struct SignatureItem {
pub kind: String, pub name: String, pub signature: String, pub description: Option<String>, pub start_line: usize, pub end_line: usize, }
pub fn extract_file_signatures(files: &[PathBuf]) -> Result<Vec<FileSignature>> {
let mut all_signatures = Vec::new();
let mut parser = Parser::new();
let current_dir = std::env::current_dir()?;
for file_path in files {
if let Some(language) = detect_language(file_path) {
if let Ok(contents) = fs::read_to_string(file_path) {
let display_path = path_utils::PathUtils::for_display(file_path, ¤t_dir);
if language == "markdown" {
let signatures = extract_markdown_signatures(&contents);
let file_comment = extract_markdown_file_comment(&contents);
all_signatures.push(FileSignature {
path: display_path,
language: "markdown".to_string(),
file_comment,
signatures,
});
} else {
let lang_impl = match languages::get_language(language) {
Some(impl_) => impl_,
None => continue, };
parser.set_language(&lang_impl.get_ts_language())?;
let tree = parser
.parse(&contents, None)
.unwrap_or_else(|| parser.parse("", None).unwrap());
let signatures =
extract_signatures(tree.root_node(), &contents, lang_impl.as_ref());
let file_comment = extract_file_comment(tree.root_node(), &contents);
all_signatures.push(FileSignature {
path: display_path,
language: lang_impl.name().to_string(),
file_comment,
signatures,
});
}
}
}
}
Ok(all_signatures)
}
pub fn extract_signatures(
node: Node,
contents: &str,
lang_impl: &dyn languages::Language,
) -> Vec<SignatureItem> {
let mut signatures = Vec::new();
let meaningful_kinds = lang_impl.get_meaningful_kinds();
fn visit_node(
node: Node,
contents: &str,
lang_impl: &dyn languages::Language,
meaningful_kinds: &[&str],
signatures: &mut Vec<SignatureItem>,
) {
let node_kind = node.kind();
if meaningful_kinds.contains(&node_kind) {
let start_line = node.start_position().row;
let end_line = node.end_position().row;
let name = extract_name(node, contents, lang_impl);
let description = extract_preceding_comment(node, contents);
if let Some(name) = name {
let sig_text = node_text(node, contents);
let kind = map_node_kind_to_simple_with_context(node, contents);
signatures.push(SignatureItem {
kind,
name,
signature: sig_text,
description,
start_line,
end_line,
});
}
}
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
visit_node(
cursor.node(),
contents,
lang_impl,
meaningful_kinds,
signatures,
);
if !cursor.goto_next_sibling() {
break;
}
}
}
}
visit_node(
node,
contents,
lang_impl,
&meaningful_kinds,
&mut signatures,
);
signatures.sort_by_key(|sig| sig.start_line);
signatures
}
fn extract_name(node: Node, contents: &str, lang_impl: &dyn languages::Language) -> Option<String> {
for child in node.children(&mut node.walk()) {
if child.kind() == "identifier"
|| child.kind().contains("name")
|| child.kind().contains("function_name")
{
if let Ok(name) = child.utf8_text(contents.as_bytes()) {
if !name.is_empty() {
return Some(name.to_string());
}
}
}
}
let symbols = lang_impl.extract_symbols(node, contents);
symbols.into_iter().next()
}
fn extract_preceding_comment(node: Node, contents: &str) -> Option<String> {
if let Some(parent) = node.parent() {
let mut siblings = Vec::new();
let mut cursor = parent.walk();
if cursor.goto_first_child() {
loop {
let current = cursor.node();
if current.id() == node.id() {
break;
}
siblings.push(current);
if !cursor.goto_next_sibling() {
break;
}
}
}
if let Some(last) = siblings.last() {
if last.kind().contains("comment") {
if let Ok(comment) = last.utf8_text(contents.as_bytes()) {
let comment = comment
.trim()
.trim_start_matches("/")
.trim_start_matches("*")
.trim_start_matches("/")
.trim_end_matches("*/")
.trim();
return Some(comment.to_string());
}
}
}
}
None
}
fn extract_file_comment(root: Node, contents: &str) -> Option<String> {
let mut cursor = root.walk();
if cursor.goto_first_child() {
let first = cursor.node();
if first.kind().contains("comment") {
if let Ok(comment) = first.utf8_text(contents.as_bytes()) {
let comment = comment
.trim()
.trim_start_matches("/")
.trim_start_matches("*")
.trim_start_matches("/")
.trim_end_matches("*/")
.trim();
return Some(comment.to_string());
}
}
}
None
}
fn node_text(node: Node, contents: &str) -> String {
if let Ok(text) = node.utf8_text(contents.as_bytes()) {
text.to_string()
} else {
let start_byte = node.start_byte();
let end_byte = node.end_byte();
let content_bytes = contents.as_bytes();
if start_byte < end_byte && end_byte <= content_bytes.len() {
String::from_utf8_lossy(&content_bytes[start_byte..end_byte]).to_string()
} else {
String::new()
}
}
}
fn map_node_kind_to_simple(kind: &str) -> String {
match kind {
k if k.contains("function") => "function".to_string(),
k if k.contains("method") => "method".to_string(),
k if k.contains("class") => "class".to_string(),
k if k.contains("struct") => "struct".to_string(),
k if k.contains("enum") => "enum".to_string(),
k if k.contains("interface") => "interface".to_string(),
k if k.contains("trait") => "trait".to_string(),
k if k.contains("mod") || k.contains("module") => "module".to_string(),
k if k.contains("const") => "constant".to_string(),
k if k.contains("macro") => "macro".to_string(),
k if k.contains("type") => "type".to_string(),
_ => kind.to_string(), }
}
fn map_node_kind_to_simple_with_context(node: Node, _contents: &str) -> String {
let kind = node.kind();
if kind == "declaration" {
for child in node.children(&mut node.walk()) {
if child.kind() == "function_declarator" {
return "function".to_string();
}
}
}
if kind == "namespace_definition" {
return "namespace".to_string();
}
map_node_kind_to_simple(kind)
}
fn detect_language(path: &Path) -> Option<&str> {
use crate::indexer::file_utils::FileUtils;
FileUtils::detect_language(path)
}
fn extract_markdown_signatures(contents: &str) -> Vec<SignatureItem> {
let mut signatures = Vec::new();
let lines: Vec<&str> = contents.lines().collect();
for (line_idx, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with('#') && !trimmed.starts_with("```") {
let heading_level = trimmed.chars().take_while(|&c| c == '#').count();
let heading_text = trimmed.trim_start_matches('#').trim();
if !heading_text.is_empty() {
let mut content_lines = vec![*line];
let mut end_line = line_idx;
for i in 1.. {
if line_idx + i >= lines.len() {
break;
}
let next_line = lines[line_idx + i];
let next_trimmed = next_line.trim();
if next_trimmed.starts_with('#') && !next_trimmed.starts_with("```") {
break;
}
content_lines.push(next_line);
end_line = line_idx + i;
if content_lines.len() >= 20 {
break;
}
}
while content_lines.len() > 1 && content_lines.last().unwrap().trim().is_empty() {
content_lines.pop();
end_line -= 1;
}
let signature_content = content_lines.join("\n");
signatures.push(SignatureItem {
kind: format!("heading{}", heading_level),
name: heading_text.to_string(),
signature: signature_content,
description: None,
start_line: line_idx,
end_line,
});
}
}
}
signatures
}
fn extract_markdown_file_comment(contents: &str) -> Option<String> {
let lines: Vec<&str> = contents.lines().collect();
if lines.is_empty() {
return None;
}
if lines[0].trim() == "---" {
let mut comment_lines = Vec::new();
for line in lines.iter().skip(1) {
if line.trim() == "---" {
break;
}
comment_lines.push(*line);
}
if !comment_lines.is_empty() {
return Some(comment_lines.join("\n"));
}
}
let mut comment_lines = Vec::new();
let mut found_content = false;
for line in &lines {
let trimmed = line.trim();
if trimmed.starts_with('#') {
if found_content {
break; }
continue;
}
if trimmed.is_empty() {
if found_content {
break; }
continue;
}
found_content = true;
comment_lines.push(*line);
if comment_lines.len() >= 3 {
break;
}
}
if comment_lines.is_empty() {
None
} else {
Some(comment_lines.join(" ").trim().to_string())
}
}