lean_ctx/core/
context_packing.rs1use std::collections::HashSet;
20
21#[derive(Debug, Clone)]
24pub struct CoverageItem {
25 pub terms: HashSet<String>,
27 pub cost: usize,
29}
30
31pub fn greedy_max_coverage<F>(items: &[CoverageItem], budget: usize, term_weight: F) -> Vec<usize>
41where
42 F: Fn(&str) -> f64,
43{
44 let mut covered: HashSet<&str> = HashSet::new();
45 let mut selected: Vec<usize> = Vec::new();
46 let mut remaining: Vec<usize> = (0..items.len()).collect();
47 let mut spent = 0usize;
48
49 while !remaining.is_empty() && spent < budget {
50 let mut best: Option<(usize, usize, f64)> = None; for (pos, &idx) in remaining.iter().enumerate() {
53 let item = &items[idx];
54 if item.cost == 0 || spent + item.cost > budget {
55 continue;
56 }
57 let gain: f64 = item
59 .terms
60 .iter()
61 .filter(|t| !covered.contains(t.as_str()))
62 .map(|t| term_weight(t))
63 .sum();
64 if gain <= 0.0 {
65 continue;
66 }
67 let gain_per_cost = gain / item.cost as f64;
68 let take = match best {
69 Some((_, _, best_gpc)) => gain_per_cost > best_gpc,
70 None => true,
71 };
72 if take {
73 best = Some((pos, idx, gain_per_cost));
74 }
75 }
76
77 match best {
78 Some((pos, idx, _)) => {
79 for t in &items[idx].terms {
80 covered.insert(t.as_str());
81 }
82 spent += items[idx].cost;
83 selected.push(idx);
84 remaining.swap_remove(pos);
85 }
86 None => break,
88 }
89 }
90
91 selected
92}
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97
98 fn item(terms: &[&str], cost: usize) -> CoverageItem {
99 CoverageItem {
100 terms: terms.iter().map(|s| (*s).to_string()).collect(),
101 cost,
102 }
103 }
104
105 #[test]
106 fn picks_complementary_items_over_redundant_ones() {
107 let items = vec![
109 item(&["x", "y"], 1), item(&["z", "w"], 1), item(&["x", "y"], 1), ];
113 let sel = greedy_max_coverage(&items, 2, |_| 1.0);
114 assert_eq!(sel.len(), 2);
115 assert!(sel.contains(&0) || sel.contains(&2)); assert!(sel.contains(&1)); assert!(
118 !(sel.contains(&0) && sel.contains(&2)),
119 "must not pick both redundant items"
120 );
121 }
122
123 #[test]
124 fn respects_budget() {
125 let items = vec![item(&["a"], 1), item(&["b"], 1), item(&["c"], 1)];
126 let sel = greedy_max_coverage(&items, 2, |_| 1.0);
127 assert_eq!(sel.len(), 2);
128 }
129
130 #[test]
131 fn skips_items_that_add_no_new_coverage() {
132 let items = vec![item(&["a", "b"], 1), item(&["a"], 1)];
133 let sel = greedy_max_coverage(&items, 5, |_| 1.0);
134 assert_eq!(sel, vec![0], "second item is fully subsumed → skipped");
135 }
136
137 #[test]
138 fn term_weight_prioritises_rare_terms() {
139 let items = vec![item(&["common"], 1), item(&["rare"], 1)];
141 let weight = |t: &str| if t == "rare" { 10.0 } else { 1.0 };
142 let sel = greedy_max_coverage(&items, 1, weight);
143 assert_eq!(sel, vec![1], "rare/high-weight term wins the single slot");
144 }
145
146 #[test]
147 fn empty_input_yields_empty_selection() {
148 let sel = greedy_max_coverage(&[], 5, |_| 1.0);
149 assert!(sel.is_empty());
150 }
151
152 #[test]
153 fn zero_cost_items_are_skipped() {
154 let items = vec![item(&["a"], 0), item(&["b"], 1)];
155 let sel = greedy_max_coverage(&items, 5, |_| 1.0);
156 assert_eq!(sel, vec![1]);
157 }
158}