use crate::args::Cli;
use crate::commands::graph::loader::{GraphLoadConfig, load_unified_graph_for_cli};
use crate::index_discovery::find_nearest_index;
use crate::output::OutputStreams;
use anyhow::{Context, Result};
use serde::Serialize;
use std::collections::HashMap;
#[derive(Debug, Serialize)]
struct HierarchicalOutput {
query: String,
files: Vec<FileGroup>,
stats: HierarchicalStats,
}
#[derive(Debug, Serialize)]
struct FileGroup {
path: String,
language: String,
symbols: Vec<HierSymbol>,
estimated_tokens: usize,
}
#[derive(Debug, Serialize)]
struct HierSymbol {
name: String,
qualified_name: String,
kind: String,
line: u32,
score: f64,
}
#[derive(Debug, Serialize)]
#[allow(clippy::struct_field_names)] struct HierarchicalStats {
total_files: usize,
total_symbols: usize,
total_estimated_tokens: usize,
}
#[allow(clippy::too_many_lines)]
pub fn run_hier_search(
cli: &Cli,
query: &str,
path: Option<&str>,
max_results: usize,
max_files: usize,
context_lines: usize,
kinds: &[String],
languages: &[String],
) -> Result<()> {
let mut streams = OutputStreams::new();
let search_path = path.map_or_else(
|| std::env::current_dir().unwrap_or_default(),
std::path::PathBuf::from,
);
let index_location = find_nearest_index(&search_path);
let Some(ref loc) = index_location else {
streams
.write_diagnostic("No .sqry-index found. Run 'sqry index' first to build the index.")?;
return Ok(());
};
let config = GraphLoadConfig::default();
let graph = load_unified_graph_for_cli(&loc.index_root, &config, cli)
.context("Failed to load graph. Run 'sqry index' to build the graph.")?;
let strings = graph.strings();
let files = graph.files();
let query_lower = query.to_lowercase();
let mut scored_symbols: Vec<_> = graph
.nodes()
.iter()
.filter(|(_, entry)| {
if !kinds.is_empty() {
let kind_str = format!("{:?}", entry.kind);
if !kinds.iter().any(|k| k.eq_ignore_ascii_case(&kind_str)) {
return false;
}
}
if !languages.is_empty() {
let lang = files
.language_for_file(entry.file)
.map(|l| l.to_string())
.unwrap_or_default();
if !languages.iter().any(|l| l.eq_ignore_ascii_case(&lang)) {
return false;
}
}
let name = strings.resolve(entry.name).map(|s| s.to_lowercase());
let qname = entry
.qualified_name
.and_then(|id| strings.resolve(id))
.map(|s| s.to_lowercase());
name.as_ref().is_some_and(|n| n.contains(&query_lower))
|| qname.as_ref().is_some_and(|q| q.contains(&query_lower))
})
.map(|(_, entry)| {
let name = strings
.resolve(entry.name)
.map(|s| s.to_string())
.unwrap_or_default();
let score = compute_relevance(
&name,
entry.visibility.and_then(|v| strings.resolve(v)),
&query_lower,
);
(entry, score)
})
.collect();
scored_symbols.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored_symbols.truncate(max_results);
let mut file_groups: HashMap<String, FileGroup> = HashMap::new();
for (entry, score) in &scored_symbols {
let file_path = files
.resolve(entry.file)
.map(|p| p.display().to_string())
.unwrap_or_default();
let language = files
.language_for_file(entry.file)
.map_or_else(|| "Unknown".to_string(), |l| l.to_string());
let file_group = file_groups
.entry(file_path.clone())
.or_insert_with(|| FileGroup {
path: file_path,
language,
symbols: Vec::new(),
estimated_tokens: 0,
});
let name = strings
.resolve(entry.name)
.map(|s| s.to_string())
.unwrap_or_default();
let qualified_name = entry
.qualified_name
.and_then(|id| strings.resolve(id))
.map_or_else(|| name.clone(), |s| s.to_string());
let hier_sym = HierSymbol {
name,
qualified_name,
kind: format!("{:?}", entry.kind),
line: entry.start_line,
score: *score,
};
file_group.symbols.push(hier_sym);
file_group.estimated_tokens += estimate_symbol_tokens(entry, context_lines);
}
let mut files_vec: Vec<FileGroup> = file_groups.into_values().collect();
files_vec.sort_by(|a, b| b.estimated_tokens.cmp(&a.estimated_tokens));
files_vec.truncate(max_files);
for file in &mut files_vec {
file.symbols.sort_by(|a, b| a.line.cmp(&b.line));
}
let stats = HierarchicalStats {
total_files: files_vec.len(),
total_symbols: scored_symbols.len(),
total_estimated_tokens: files_vec.iter().map(|f| f.estimated_tokens).sum(),
};
let output = HierarchicalOutput {
query: query.to_string(),
files: files_vec,
stats,
};
if cli.json {
let json = serde_json::to_string_pretty(&output).context("Failed to serialize to JSON")?;
streams.write_result(&json)?;
} else {
let text = format_hier_text(&output);
streams.write_result(&text)?;
}
Ok(())
}
fn compute_relevance(name: &str, visibility: Option<std::sync::Arc<str>>, query: &str) -> f64 {
let mut score: f64 = 0.5; let name_lower = name.to_lowercase();
if name_lower == query {
score += 0.3;
} else if name_lower.starts_with(query) {
score += 0.2;
}
if visibility.is_some_and(|v| v.as_ref() == "public") {
score += 0.1;
}
score.min(1.0)
}
fn estimate_symbol_tokens(
entry: &sqry_core::graph::unified::storage::arena::NodeEntry,
context_lines: usize,
) -> usize {
let lines = (entry.end_line.saturating_sub(entry.start_line) + 1) as usize;
let with_context = lines + (context_lines * 2);
with_context * 10
}
fn format_hier_text(output: &HierarchicalOutput) -> String {
let mut lines = Vec::new();
lines.push(format!("Hierarchical search: {}", output.query));
lines.push(format!(
"Found {} symbols in {} files (~{} tokens)",
output.stats.total_symbols, output.stats.total_files, output.stats.total_estimated_tokens
));
lines.push(String::new());
for file in &output.files {
lines.push(format!(
"File: {} [{}] (~{} tokens)",
file.path, file.language, file.estimated_tokens
));
for sym in &file.symbols {
lines.push(format!(
" {} [{}] line {} (score: {:.2})",
sym.qualified_name, sym.kind, sym.line, sym.score
));
}
lines.push(String::new());
}
lines.join("\n")
}