use crate::core::content_chunk::ContentChunk;
#[derive(Debug, Clone)]
pub struct SaliencyScore {
pub chunk_idx: usize,
pub ecs_score: f64,
pub task_relevance: f64,
pub graph_centrality: f64,
pub info_density: f64,
pub final_score: f64,
}
#[derive(Debug, Clone)]
pub struct EcsWeights {
pub w_task: f64,
pub w_graph: f64,
pub w_density: f64,
}
impl Default for EcsWeights {
fn default() -> Self {
Self {
w_task: 0.5,
w_graph: 0.3,
w_density: 0.2,
}
}
}
pub fn compute_ecs_scores(
chunks: &[ContentChunk],
task_keywords: &[String],
graph_edge_counts: &[usize],
weights: &EcsWeights,
) -> Vec<SaliencyScore> {
let max_edges = graph_edge_counts.iter().max().copied().unwrap_or(1).max(1) as f64;
chunks
.iter()
.enumerate()
.map(|(i, chunk)| {
let task_relevance = compute_task_relevance(chunk, task_keywords);
let graph_centrality =
graph_edge_counts.get(i).copied().unwrap_or(0) as f64 / max_edges;
let info_density = compute_info_density(chunk);
let ecs_score = weights.w_task * task_relevance
+ weights.w_graph * graph_centrality
+ weights.w_density * info_density;
SaliencyScore {
chunk_idx: i,
ecs_score,
task_relevance,
graph_centrality,
info_density,
final_score: ecs_score,
}
})
.collect()
}
fn compute_task_relevance(chunk: &ContentChunk, task_keywords: &[String]) -> f64 {
if task_keywords.is_empty() {
return 0.5;
}
let content_lower = chunk.content.to_lowercase();
let title_lower = chunk.symbol_name.to_lowercase();
let combined = format!("{content_lower} {title_lower}");
let matches = task_keywords
.iter()
.filter(|kw| combined.contains(&kw.to_lowercase()))
.count();
matches as f64 / task_keywords.len() as f64
}
fn compute_info_density(chunk: &ContentChunk) -> f64 {
if chunk.token_count == 0 {
return 0.0;
}
let unique: std::collections::HashSet<&str> = chunk.content.split_whitespace().collect();
let total = chunk.content.split_whitespace().count().max(1);
(unique.len() as f64 / total as f64).min(1.0)
}
pub fn mig_select(
scores: &[SaliencyScore],
chunks: &[ContentChunk],
top_k: usize,
lambda: f64,
) -> Vec<usize> {
if scores.is_empty() || top_k == 0 {
return Vec::new();
}
let mut selected: Vec<usize> = Vec::with_capacity(top_k);
let mut available: Vec<usize> = (0..scores.len()).collect();
available.sort_by(|a, b| {
scores[*b]
.ecs_score
.partial_cmp(&scores[*a].ecs_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
if let Some(&first) = available.first() {
selected.push(first);
available.retain(|&i| i != first);
}
while selected.len() < top_k && !available.is_empty() {
let mut best_idx = available[0];
let mut best_mig = f64::NEG_INFINITY;
for &candidate in &available {
let relevance = scores[candidate].ecs_score;
let redundancy = max_similarity_to_selected(candidate, &selected, chunks);
let mig = (1.0 - lambda) * relevance - lambda * redundancy;
if mig > best_mig {
best_mig = mig;
best_idx = candidate;
}
}
selected.push(best_idx);
available.retain(|&i| i != best_idx);
}
selected
}
fn chunk_similarity(a: &ContentChunk, b: &ContentChunk) -> f64 {
let tokens_a: std::collections::HashSet<&str> = a.content.split_whitespace().collect();
let tokens_b: std::collections::HashSet<&str> = b.content.split_whitespace().collect();
if tokens_a.is_empty() && tokens_b.is_empty() {
return 1.0;
}
let intersection = tokens_a.intersection(&tokens_b).count();
let union = tokens_a.union(&tokens_b).count().max(1);
intersection as f64 / union as f64
}
fn max_similarity_to_selected(
candidate: usize,
selected: &[usize],
chunks: &[ContentChunk],
) -> f64 {
selected
.iter()
.map(|&s| chunk_similarity(&chunks[candidate], &chunks[s]))
.fold(0.0, f64::max)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::bm25_index::ChunkKind;
fn make_chunk(title: &str, content: &str) -> ContentChunk {
ContentChunk::from_provider(
"test",
"issues",
title,
title,
ChunkKind::Issue,
content.into(),
vec![],
None,
)
}
#[test]
fn ecs_score_higher_for_relevant_chunk() {
let chunks = vec![
make_chunk("auth-bug", "authentication token expiry broken"),
make_chunk("css-issue", "sidebar layout broken on mobile"),
];
let keywords = vec!["authentication".into(), "token".into()];
let edge_counts = vec![0, 0];
let scores = compute_ecs_scores(&chunks, &keywords, &edge_counts, &EcsWeights::default());
assert!(scores[0].ecs_score > scores[1].ecs_score);
assert!(scores[0].task_relevance > scores[1].task_relevance);
}
#[test]
fn ecs_score_boosts_high_graph_centrality() {
let chunks = vec![
make_chunk("hub-file", "important module"),
make_chunk("leaf-file", "minor utility"),
];
let keywords: Vec<String> = vec![];
let edge_counts = vec![10, 1];
let scores = compute_ecs_scores(&chunks, &keywords, &edge_counts, &EcsWeights::default());
assert!(scores[0].graph_centrality > scores[1].graph_centrality);
}
#[test]
fn info_density_higher_for_diverse_content() {
let diverse = make_chunk(
"diverse",
"authentication token validation expiry check refresh",
);
let repetitive = make_chunk("repetitive", "token token token token token token token");
let d_density = compute_info_density(&diverse);
let r_density = compute_info_density(&repetitive);
assert!(d_density > r_density);
}
#[test]
fn mig_select_picks_diverse_chunks() {
let chunks = vec![
make_chunk("auth-1", "authentication token expiry validation"),
make_chunk("auth-2", "authentication token expiry check"),
make_chunk("db-issue", "database connection pool exhausted timeout"),
];
let keywords = vec!["authentication".into(), "database".into()];
let edge_counts = vec![0, 0, 0];
let scores = compute_ecs_scores(&chunks, &keywords, &edge_counts, &EcsWeights::default());
let selected = mig_select(&scores, &chunks, 2, 0.6);
assert_eq!(selected.len(), 2);
assert!(selected.contains(&0));
assert!(selected.contains(&2));
}
#[test]
fn mig_select_respects_top_k() {
let chunks = vec![
make_chunk("a", "content a"),
make_chunk("b", "content b"),
make_chunk("c", "content c"),
];
let scores = compute_ecs_scores(&chunks, &[], &[0, 0, 0], &EcsWeights::default());
let selected = mig_select(&scores, &chunks, 1, 0.6);
assert_eq!(selected.len(), 1);
let selected = mig_select(&scores, &chunks, 10, 0.6);
assert_eq!(selected.len(), 3);
}
#[test]
fn mig_select_empty_input() {
let selected = mig_select(&[], &[], 5, 0.6);
assert!(selected.is_empty());
}
#[test]
fn chunk_similarity_identical() {
let a = make_chunk("a", "same content here");
assert!((chunk_similarity(&a, &a) - 1.0).abs() < f64::EPSILON);
}
#[test]
fn chunk_similarity_disjoint() {
let a = make_chunk("a", "authentication token validation");
let b = make_chunk("b", "database connection pool exhausted");
let sim = chunk_similarity(&a, &b);
assert!(sim < 0.2);
}
#[test]
fn default_weights_sum_to_one() {
let w = EcsWeights::default();
assert!((w.w_task + w.w_graph + w.w_density - 1.0).abs() < f64::EPSILON);
}
#[test]
fn no_task_keywords_gives_neutral_relevance() {
let chunk = make_chunk("test", "some content");
let relevance = compute_task_relevance(&chunk, &[]);
assert!((relevance - 0.5).abs() < f64::EPSILON);
}
}