use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use crate::core::registry::{IndexHandle, IndexId, IndexRegistry};
#[derive(Debug, Default)]
pub struct IndexHierarchy {
pub parent_of: HashMap<IndexId, IndexId>,
pub children_of: HashMap<IndexId, Vec<IndexId>>,
}
impl IndexHierarchy {
pub fn from_root_paths(entries: &[(IndexId, PathBuf)]) -> Self {
let mut parent_of: HashMap<IndexId, IndexId> = HashMap::new();
let mut children_of: HashMap<IndexId, Vec<IndexId>> = HashMap::new();
for (child_id, child_root) in entries {
let mut best_parent: Option<(&IndexId, &PathBuf)> = None;
for (parent_id, parent_root) in entries {
if parent_id == child_id {
continue;
}
if !is_strict_sub_path(child_root, parent_root) {
continue;
}
let is_deeper = best_parent
.as_ref()
.map(|(_, p)| parent_root.as_os_str().len() > p.as_os_str().len())
.unwrap_or(true);
if is_deeper {
best_parent = Some((parent_id, parent_root));
}
}
if let Some((par_id, _)) = best_parent {
parent_of.insert(child_id.clone(), par_id.clone());
children_of
.entry(par_id.clone())
.or_default()
.push(child_id.clone());
}
}
Self {
parent_of,
children_of,
}
}
pub fn from_registry(registry: &IndexRegistry, index_ids: &[IndexId]) -> Self {
let entries: Vec<(IndexId, PathBuf)> = index_ids
.iter()
.filter_map(|id| {
let handle = registry.get(id)?;
let canonical = canonicalize_best_effort(&handle.root_path);
Some((id.clone(), canonical))
})
.collect();
Self::from_root_paths(&entries)
}
pub fn is_child(&self, id: &IndexId) -> bool {
self.parent_of.contains_key(id)
}
pub fn children(&self, parent_id: &IndexId) -> &[IndexId] {
self.children_of
.get(parent_id)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
}
pub const DEFAULT_SUB_INDEX_BOOST: f32 = 1.5;
pub fn effective_weight_for_index(
id: &IndexId,
cosine_weight: f32,
hierarchy: &IndexHierarchy,
) -> f32 {
let boost = if hierarchy.is_child(id) {
DEFAULT_SUB_INDEX_BOOST
} else {
1.0_f32
};
(cosine_weight * boost).clamp(1.0, 4.0)
}
pub fn dedup_nested_results(
fused: Vec<(String, f32)>,
chunk_lookup: &HashMap<String, crate::core::indexer::CodeChunk>,
registry: &IndexRegistry,
hierarchy: &IndexHierarchy,
) -> (Vec<(String, f32)>, usize) {
if hierarchy.parent_of.is_empty() {
return (fused, 0);
}
let mut hierarchy_ids: std::collections::HashSet<&IndexId> = std::collections::HashSet::new();
for (child, parent) in &hierarchy.parent_of {
hierarchy_ids.insert(child);
hierarchy_ids.insert(parent);
}
let input_len = fused.len();
let mut seen: HashMap<(PathBuf, usize, usize), ()> = HashMap::new();
let mut deduped: Vec<(String, f32)> = Vec::with_capacity(input_len);
for (namespaced_id, score) in fused {
let index_id_str = match namespaced_id.split_once("::") {
Some((idx, _)) => idx,
None => {
deduped.push((namespaced_id, score));
continue;
}
};
let index_id = IndexId::new(index_id_str);
if !hierarchy_ids.contains(&index_id) {
deduped.push((namespaced_id, score));
continue;
}
let Some(chunk) = chunk_lookup.get(&namespaced_id) else {
deduped.push((namespaced_id, score));
continue;
};
let abs_path = resolve_chunk_path(registry, &index_id, &chunk.file);
let key = (abs_path, chunk.start_line, chunk.end_line);
if seen.contains_key(&key) {
continue;
}
seen.insert(key, ());
deduped.push((namespaced_id, score));
}
let dropped = input_len - deduped.len();
(deduped, dropped)
}
fn resolve_chunk_path(registry: &IndexRegistry, index_id: &IndexId, file: &str) -> PathBuf {
let raw = if Path::new(file).is_absolute() {
PathBuf::from(file)
} else {
registry
.get(index_id)
.map(|h| h.root_path.join(file))
.unwrap_or_else(|| PathBuf::from(file))
};
canonicalize_best_effort(&raw)
}
pub fn apply_threshold_child_inclusion(
inactive_ids: &[IndexId],
active_ids: &mut Vec<IndexId>,
weight_map: &mut HashMap<IndexId, f32>,
hierarchy: &IndexHierarchy,
) {
for child_id in inactive_ids {
let Some(parent_id) = hierarchy.parent_of.get(child_id) else {
continue;
};
if weight_map.contains_key(parent_id) {
if !weight_map.contains_key(child_id) {
tracing::debug!(
"hierarchy: including child index '{}' (parent '{}' is active)",
child_id,
parent_id,
);
weight_map.insert(child_id.clone(), 1.0);
active_ids.push(child_id.clone());
}
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct IndexTreeEntry {
pub id: String,
pub root_path: PathBuf,
#[serde(skip_serializing_if = "Option::is_none")]
pub parent_id: Option<String>,
pub children: Vec<String>,
pub priority_boost: f32,
pub is_sub_index: bool,
}
pub fn build_tree_entries(
registry: &IndexRegistry,
handles: &[Arc<IndexHandle>],
) -> Vec<IndexTreeEntry> {
let all_ids: Vec<IndexId> = handles.iter().map(|h| h.id.clone()).collect();
let hierarchy = IndexHierarchy::from_registry(registry, &all_ids);
handles
.iter()
.map(|h| {
let parent_id = hierarchy.parent_of.get(&h.id).map(|p| p.0.clone());
let children = hierarchy
.children(&h.id)
.iter()
.map(|c| c.0.clone())
.collect();
let is_sub = hierarchy.is_child(&h.id);
let boost = if is_sub { DEFAULT_SUB_INDEX_BOOST } else { 1.0 };
IndexTreeEntry {
id: h.id.0.clone(),
root_path: h.root_path.clone(),
parent_id,
children,
priority_boost: boost,
is_sub_index: is_sub,
}
})
.collect()
}
fn is_strict_sub_path(child: &Path, parent: &Path) -> bool {
child != parent && child.starts_with(parent)
}
pub fn canonicalize_best_effort(path: &Path) -> PathBuf {
std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
fn id(s: &str) -> IndexId {
IndexId::new(s)
}
fn pb(s: &str) -> PathBuf {
PathBuf::from(s)
}
#[test]
fn is_strict_sub_path_child_is_sub() {
assert!(is_strict_sub_path(
&pb("/repos/project/services/billing"),
&pb("/repos/project")
));
}
#[test]
fn is_strict_sub_path_same_path_is_not_sub() {
assert!(!is_strict_sub_path(
&pb("/repos/project"),
&pb("/repos/project")
));
}
#[test]
fn is_strict_sub_path_prefix_without_separator_is_not_sub() {
assert!(!is_strict_sub_path(
&pb("/repos/projectX/src"),
&pb("/repos/project")
));
}
#[test]
fn is_strict_sub_path_parent_does_not_start_with_child() {
assert!(!is_strict_sub_path(
&pb("/repos/project"),
&pb("/repos/project/services/billing")
));
}
#[test]
fn hierarchy_flat_peers_no_edges() {
let entries = vec![
(id("a"), pb("/repos/project-a")),
(id("b"), pb("/repos/project-b")),
];
let h = IndexHierarchy::from_root_paths(&entries);
assert!(h.parent_of.is_empty());
assert!(h.children_of.is_empty());
assert!(!h.is_child(&id("a")));
assert!(!h.is_child(&id("b")));
}
#[test]
fn hierarchy_two_indexes_nested() {
let entries = vec![
(id("root"), pb("/repos/project")),
(id("billing"), pb("/repos/project/services/billing")),
];
let h = IndexHierarchy::from_root_paths(&entries);
assert_eq!(h.parent_of.get(&id("billing")), Some(&id("root")));
assert!(h.children(&id("root")).contains(&id("billing")));
assert!(h.is_child(&id("billing")));
assert!(!h.is_child(&id("root")));
}
#[test]
fn hierarchy_deep_nesting_picks_direct_parent() {
let entries = vec![
(id("grandparent"), pb("/a")),
(id("parent"), pb("/a/b")),
(id("child"), pb("/a/b/c")),
];
let h = IndexHierarchy::from_root_paths(&entries);
assert_eq!(h.parent_of.get(&id("child")), Some(&id("parent")));
assert_eq!(h.parent_of.get(&id("parent")), Some(&id("grandparent")));
assert_eq!(h.parent_of.len(), 2);
assert!(h.children(&id("parent")).contains(&id("child")));
assert!(h.children(&id("grandparent")).contains(&id("parent")));
assert!(!h.children(&id("grandparent")).contains(&id("child")));
}
#[test]
fn hierarchy_sibling_sub_indexes_have_independent_parents() {
let entries = vec![
(id("root"), pb("/repos/mono")),
(id("svc-a"), pb("/repos/mono/services/a")),
(id("svc-b"), pb("/repos/mono/services/b")),
];
let h = IndexHierarchy::from_root_paths(&entries);
assert_eq!(h.parent_of.get(&id("svc-a")), Some(&id("root")));
assert_eq!(h.parent_of.get(&id("svc-b")), Some(&id("root")));
let children = h.children(&id("root"));
assert!(children.contains(&id("svc-a")));
assert!(children.contains(&id("svc-b")));
assert!(!h.parent_of.contains_key(&id("root")));
}
#[test]
fn sub_index_boost_applied_in_effective_weight() {
let entries = vec![
(id("root"), pb("/repos/project")),
(id("billing"), pb("/repos/project/services/billing")),
];
let h = IndexHierarchy::from_root_paths(&entries);
let w_root = effective_weight_for_index(&id("root"), 0.8, &h);
assert!(
(w_root - 1.0).abs() < 1e-4,
"root weight {w_root} should be 1.0 (clamped)"
);
let w_child = effective_weight_for_index(&id("billing"), 0.8, &h);
assert!(
(w_child - 1.2).abs() < 1e-4,
"child weight {w_child} should be 1.2"
);
let w_child_max = effective_weight_for_index(&id("billing"), 1.0, &h);
assert!(
(w_child_max - 1.5).abs() < 1e-4,
"child weight {w_child_max} should be 1.5"
);
}
#[test]
fn effective_weight_clamped_to_max() {
let entries = vec![(id("parent"), pb("/a")), (id("child"), pb("/a/b"))];
let h = IndexHierarchy::from_root_paths(&entries);
let w = effective_weight_for_index(&id("child"), 4.0, &h);
assert!(
(w - 4.0).abs() < 1e-4,
"weight {w} should be clamped to 4.0"
);
}
#[test]
fn threshold_child_inclusion_adds_child_when_parent_active() {
let entries = vec![
(id("root"), pb("/repos/mono")),
(id("svc"), pb("/repos/mono/svc")),
];
let h = IndexHierarchy::from_root_paths(&entries);
let mut active = vec![id("root")];
let mut weight_map: HashMap<IndexId, f32> = [(id("root"), 0.9)].into_iter().collect();
let inactive = vec![id("svc")];
apply_threshold_child_inclusion(&inactive, &mut active, &mut weight_map, &h);
assert!(
active.contains(&id("svc")),
"child should be added to active list"
);
assert_eq!(
weight_map.get(&id("svc")),
Some(&1.0),
"child weight should be 1.0"
);
}
#[test]
fn threshold_child_inclusion_does_not_add_when_parent_inactive() {
let entries = vec![
(id("root"), pb("/repos/mono")),
(id("svc"), pb("/repos/mono/svc")),
];
let h = IndexHierarchy::from_root_paths(&entries);
let mut active: Vec<IndexId> = vec![];
let mut weight_map: HashMap<IndexId, f32> = HashMap::new();
let inactive = vec![id("root"), id("svc")];
apply_threshold_child_inclusion(&inactive, &mut active, &mut weight_map, &h);
assert!(
active.is_empty(),
"nothing should be added when parent is inactive"
);
}
#[test]
fn canonicalize_best_effort_returns_input_on_missing_path() {
let missing = PathBuf::from("/this/path/does/not/exist/abc123");
let result = canonicalize_best_effort(&missing);
assert_eq!(
result, missing,
"should fall back to the raw path on failure"
);
}
#[test]
fn canonicalize_best_effort_resolves_existing_path() {
let result = canonicalize_best_effort(&PathBuf::from("/tmp"));
assert!(result.is_absolute());
}
}