Skip to main content

lean_ctx/core/
context_compiler.rs

1//! Context Compiler -- builds minimal context packages under budget constraints.
2//!
3//! Physical metaphor: Free Energy minimization.
4//! F = E - TS, where E = token cost, T = budget pressure, S = information (Phi).
5//!
6//! Algorithm:
7//!   1. LOAD    ledger items + active overlays -> candidates
8//!   2. SCORE   Phi(i,t) for each candidate (Context Field)
9//!   3. SELECT  greedy knapsack with view selection
10//!   4. DEDUP   redundancy removal via Jaccard
11//!   5. ORDER   Lost-in-the-Middle reorder (LiTM profile)
12//!   6. RENDER  output in the requested mode
13//!   7. PROVE   record provenance in evidence ledger
14
15use serde::Serialize;
16
17use super::context_field::{
18    efficiency, ContextItemId, ContextKind, ContextState, TokenBudget, ViewCosts, ViewKind,
19};
20use super::entropy::jaccard_similarity;
21
22/// Compilation output mode.
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum CompileMode {
25    HandleManifest,
26    Compressed,
27    FullPrompt,
28}
29
30impl CompileMode {
31    pub fn parse(s: &str) -> Self {
32        match s.trim().to_lowercase().as_str() {
33            "compressed" => Self::Compressed,
34            "full" | "full_prompt" => Self::FullPrompt,
35            _ => Self::HandleManifest,
36        }
37    }
38
39    pub fn as_str(&self) -> &'static str {
40        match self {
41            Self::HandleManifest => "handle_manifest",
42            Self::Compressed => "compressed",
43            Self::FullPrompt => "full_prompt",
44        }
45    }
46}
47
48/// A candidate item ready for selection.
49#[derive(Debug, Clone)]
50pub struct CompileCandidate {
51    pub id: ContextItemId,
52    pub kind: ContextKind,
53    pub path: String,
54    pub state: ContextState,
55    pub phi: f64,
56    pub view_costs: ViewCosts,
57    pub selected_view: ViewKind,
58    pub selected_tokens: usize,
59    pub pinned: bool,
60}
61
62/// Result of a compilation run.
63#[derive(Debug, Clone, Serialize)]
64pub struct CompileResult {
65    pub run_id: String,
66    pub mode: String,
67    pub budget_total: usize,
68    pub budget_used: usize,
69    pub items_considered: usize,
70    pub items_selected: usize,
71    pub items_excluded: usize,
72    pub items_pinned: usize,
73    pub selected: Vec<SelectedItem>,
74    pub excluded_reasons: Vec<ExcludedItem>,
75    pub warnings: Vec<String>,
76}
77
78#[derive(Debug, Clone, Serialize)]
79pub struct SelectedItem {
80    pub id: String,
81    pub path: String,
82    pub view: String,
83    pub tokens: usize,
84    pub phi: f64,
85    pub pinned: bool,
86}
87
88#[derive(Debug, Clone, Serialize)]
89pub struct ExcludedItem {
90    pub id: String,
91    pub path: String,
92    pub reason: String,
93}
94
95/// Compile a minimal context package from candidates under budget constraints.
96///
97/// This implements a greedy knapsack: pinned items first, then by efficiency
98/// (Phi/token), with automatic view downgrade under budget pressure.
99pub fn compile(
100    candidates: &[CompileCandidate],
101    budget: TokenBudget,
102    mode: CompileMode,
103) -> CompileResult {
104    let run_id = format!(
105        "run_{}_{}",
106        chrono::Utc::now().format("%Y%m%d_%H%M%S"),
107        std::process::id() % 1000
108    );
109
110    let mut selected: Vec<SelectedItem> = Vec::new();
111    let mut excluded: Vec<ExcludedItem> = Vec::new();
112    let mut warnings: Vec<String> = Vec::new();
113    let mut tokens_used: usize = 0;
114    let remaining = budget.remaining();
115
116    let (pinned, unpinned): (Vec<_>, Vec<_>) = candidates
117        .iter()
118        .partition(|c| c.pinned || c.state == ContextState::Pinned);
119
120    for c in &pinned {
121        if c.state == ContextState::Excluded {
122            excluded.push(ExcludedItem {
123                id: c.id.to_string(),
124                path: c.path.clone(),
125                reason: "excluded by overlay".to_string(),
126            });
127            continue;
128        }
129        let (view, tokens) =
130            best_affordable_view(&c.view_costs, remaining.saturating_sub(tokens_used));
131        tokens_used = tokens_used.saturating_add(tokens);
132        selected.push(SelectedItem {
133            id: c.id.to_string(),
134            path: c.path.clone(),
135            view: view.as_str().to_string(),
136            tokens,
137            phi: c.phi,
138            pinned: true,
139        });
140    }
141
142    let mut scored: Vec<(usize, f64)> = unpinned
143        .iter()
144        .enumerate()
145        .filter(|(_, c)| c.state != ContextState::Excluded)
146        .map(|(i, c)| {
147            let best_tokens = c
148                .view_costs
149                .cheapest_content_view()
150                .map_or(c.selected_tokens, |(_, t)| t);
151            (i, efficiency(c.phi, best_tokens.max(1)))
152        })
153        .collect();
154
155    scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
156
157    for (idx, _eff) in &scored {
158        let c = &unpinned[*idx];
159        let budget_left = remaining.saturating_sub(tokens_used);
160        if budget_left == 0 {
161            excluded.push(ExcludedItem {
162                id: c.id.to_string(),
163                path: c.path.clone(),
164                reason: "budget exhausted".to_string(),
165            });
166            continue;
167        }
168
169        let (view, tokens) = best_affordable_view(&c.view_costs, budget_left);
170        if tokens == 0 || tokens > budget_left {
171            excluded.push(ExcludedItem {
172                id: c.id.to_string(),
173                path: c.path.clone(),
174                reason: format!("too expensive ({tokens}t > {budget_left}t remaining)"),
175            });
176            continue;
177        }
178
179        tokens_used = tokens_used.saturating_add(tokens);
180        selected.push(SelectedItem {
181            id: c.id.to_string(),
182            path: c.path.clone(),
183            view: view.as_str().to_string(),
184            tokens,
185            phi: c.phi,
186            pinned: false,
187        });
188    }
189
190    for c in candidates
191        .iter()
192        .filter(|c| c.state == ContextState::Excluded)
193    {
194        if !excluded.iter().any(|e| e.id == c.id.to_string()) {
195            excluded.push(ExcludedItem {
196                id: c.id.to_string(),
197                path: c.path.clone(),
198                reason: "excluded by overlay/policy".to_string(),
199            });
200        }
201    }
202
203    // Step 4: DEDUP — remove redundant items via Jaccard similarity.
204    // Items with >70% word overlap with a higher-Phi selected item are dropped.
205    let contents: Vec<Option<String>> = selected
206        .iter()
207        .map(|s| {
208            candidates
209                .iter()
210                .find(|c| c.id.to_string() == s.id)
211                .map(|c| c.path.clone())
212        })
213        .collect();
214
215    let mut deduped: Vec<SelectedItem> = Vec::with_capacity(selected.len());
216    let mut dedup_tokens = 0usize;
217    for (i, item) in selected.iter().enumerate() {
218        let dominated = deduped.iter().enumerate().any(|(j, existing)| {
219            let path_a = contents.get(j).and_then(|p| p.as_deref()).unwrap_or("");
220            let path_b = contents.get(i).and_then(|p| p.as_deref()).unwrap_or("");
221            if path_a.is_empty() || path_b.is_empty() {
222                return false;
223            }
224            jaccard_similarity(path_a, path_b) > 0.7 && existing.phi >= item.phi
225        });
226        if dominated {
227            excluded.push(ExcludedItem {
228                id: item.id.clone(),
229                path: item.path.clone(),
230                reason: "dedup: >70% Jaccard overlap with higher-Phi item".to_string(),
231            });
232        } else {
233            dedup_tokens += item.tokens;
234            deduped.push(item.clone());
235        }
236    }
237    selected = deduped;
238    tokens_used = dedup_tokens;
239
240    // Step 5: ORDER — Lost-in-the-Middle (LiTM) reorder.
241    // High-Phi items at the beginning and end; medium-Phi in the middle.
242    if selected.len() >= 3 {
243        selected.sort_by(|a, b| {
244            b.phi
245                .partial_cmp(&a.phi)
246                .unwrap_or(std::cmp::Ordering::Equal)
247        });
248        let n = selected.len();
249        let mut reordered = Vec::with_capacity(n);
250        let mut left = Vec::new();
251        let mut right = Vec::new();
252        for (i, item) in selected.into_iter().enumerate() {
253            if i % 2 == 0 {
254                left.push(item);
255            } else {
256                right.push(item);
257            }
258        }
259        right.reverse();
260        reordered.extend(left);
261        reordered.extend(right);
262        selected = reordered;
263    }
264
265    if tokens_used as f64 / budget.total.max(1) as f64 > 0.9 {
266        warnings.push(format!(
267            "Context budget >90% utilized ({tokens_used}/{} tokens)",
268            budget.total
269        ));
270    }
271
272    CompileResult {
273        run_id,
274        mode: mode.as_str().to_string(),
275        budget_total: budget.total,
276        budget_used: tokens_used,
277        items_considered: candidates.len(),
278        items_selected: selected.len(),
279        items_excluded: excluded.len(),
280        items_pinned: pinned.len(),
281        selected,
282        excluded_reasons: excluded,
283        warnings,
284    }
285}
286
287/// Select the best view that fits within the budget, preferring denser views.
288fn best_affordable_view(costs: &ViewCosts, budget_left: usize) -> (ViewKind, usize) {
289    let mut options: Vec<(ViewKind, usize)> = costs
290        .estimates
291        .iter()
292        .map(|(&v, &t)| (v, t))
293        .filter(|(_, t)| *t <= budget_left && *t > 0)
294        .collect();
295
296    options.sort_by_key(|(v, _)| v.density_rank());
297
298    options
299        .first()
300        .copied()
301        .unwrap_or((ViewKind::Handle, 25.min(budget_left)))
302}
303
304/// Format the compilation result for display.
305pub fn format_compile_result(result: &CompileResult) -> String {
306    let mut out = String::new();
307    out.push_str(&format!(
308        "[compiled] {} mode, {}/{} tokens\n",
309        result.mode, result.budget_used, result.budget_total
310    ));
311    out.push_str(&format!(
312        "Selected: {} items, Excluded: {}, Pinned: {}\n\n",
313        result.items_selected, result.items_excluded, result.items_pinned
314    ));
315
316    if !result.selected.is_empty() {
317        out.push_str("Included:\n");
318        for item in &result.selected {
319            let pin_tag = if item.pinned { " [pinned]" } else { "" };
320            out.push_str(&format!(
321                "  {} {} {}t phi={:.2}{}\n",
322                item.path, item.view, item.tokens, item.phi, pin_tag
323            ));
324        }
325    }
326
327    if !result.excluded_reasons.is_empty() {
328        out.push('\n');
329        out.push_str("Excluded:\n");
330        for item in &result.excluded_reasons {
331            out.push_str(&format!("  {} — {}\n", item.path, item.reason));
332        }
333    }
334
335    if !result.warnings.is_empty() {
336        out.push('\n');
337        for w in &result.warnings {
338            out.push_str(&format!("WARNING: {w}\n"));
339        }
340    }
341
342    out
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348
349    fn make_candidate(path: &str, phi: f64, full_tokens: usize, pinned: bool) -> CompileCandidate {
350        CompileCandidate {
351            id: ContextItemId::from_file(path),
352            kind: ContextKind::File,
353            path: path.to_string(),
354            state: if pinned {
355                ContextState::Pinned
356            } else {
357                ContextState::Included
358            },
359            phi,
360            view_costs: ViewCosts::from_full_tokens(full_tokens),
361            selected_view: ViewKind::Full,
362            selected_tokens: full_tokens,
363            pinned,
364        }
365    }
366
367    #[test]
368    fn compile_selects_highest_efficiency_first() {
369        let candidates = vec![
370            make_candidate("low_eff.rs", 0.1, 5000, false),
371            make_candidate("high_eff.rs", 0.9, 200, false),
372        ];
373        let budget = TokenBudget {
374            total: 10000,
375            used: 0,
376        };
377        let result = compile(&candidates, budget, CompileMode::HandleManifest);
378        assert_eq!(result.items_selected, 2);
379        assert_eq!(result.selected[0].path, "high_eff.rs");
380    }
381
382    #[test]
383    fn compile_respects_budget() {
384        let candidates = vec![
385            make_candidate("big.rs", 0.5, 8000, false),
386            make_candidate("small.rs", 0.5, 200, false),
387        ];
388        let budget = TokenBudget {
389            total: 2000,
390            used: 0,
391        };
392        let result = compile(&candidates, budget, CompileMode::Compressed);
393        let total_tokens: usize = result.selected.iter().map(|s| s.tokens).sum();
394        assert!(
395            total_tokens <= 2000,
396            "selected tokens {total_tokens} should fit in budget 2000"
397        );
398    }
399
400    #[test]
401    fn compile_includes_pinned_first() {
402        let candidates = vec![
403            make_candidate("normal.rs", 0.9, 200, false),
404            make_candidate("pinned.rs", 0.1, 300, true),
405        ];
406        let budget = TokenBudget {
407            total: 10000,
408            used: 0,
409        };
410        let result = compile(&candidates, budget, CompileMode::HandleManifest);
411        assert!(result.selected[0].pinned, "pinned item should come first");
412    }
413
414    #[test]
415    fn compile_excludes_excluded_state() {
416        let candidates = vec![CompileCandidate {
417            state: ContextState::Excluded,
418            ..make_candidate("excluded.rs", 0.9, 100, false)
419        }];
420        let budget = TokenBudget {
421            total: 10000,
422            used: 0,
423        };
424        let result = compile(&candidates, budget, CompileMode::HandleManifest);
425        assert_eq!(result.items_selected, 0);
426        assert_eq!(result.items_excluded, 1);
427    }
428
429    #[test]
430    fn compile_downgrades_view_when_budget_tight() {
431        let candidates = vec![make_candidate("big.rs", 0.9, 5000, false)];
432        let budget = TokenBudget {
433            total: 800,
434            used: 0,
435        };
436        let result = compile(&candidates, budget, CompileMode::Compressed);
437        if let Some(item) = result.selected.first() {
438            assert_ne!(item.view, "full", "should downgrade from full under budget");
439            assert!(item.tokens <= 800);
440        }
441    }
442
443    #[test]
444    fn compile_warns_at_high_utilization() {
445        let candidates = vec![make_candidate("a.rs", 0.9, 950, false)];
446        let budget = TokenBudget {
447            total: 1000,
448            used: 0,
449        };
450        let result = compile(&candidates, budget, CompileMode::HandleManifest);
451        assert!(
452            !result.warnings.is_empty(),
453            "should warn when >90% utilized"
454        );
455    }
456
457    #[test]
458    fn format_compile_result_includes_key_info() {
459        let candidates = vec![
460            make_candidate("a.rs", 0.8, 500, false),
461            make_candidate("b.rs", 0.3, 200, true),
462        ];
463        let budget = TokenBudget {
464            total: 10000,
465            used: 0,
466        };
467        let result = compile(&candidates, budget, CompileMode::HandleManifest);
468        let text = format_compile_result(&result);
469        assert!(text.contains("a.rs"));
470        assert!(text.contains("b.rs"));
471        assert!(text.contains("[pinned]"));
472    }
473}