Skip to main content

lean_ctx/core/
context_field.rs

1//! Context Field Theory (CFT) -- unified potential function for context items.
2//!
3//! Combines information-theoretic, graph-based, and history signals into a
4//! single scalar potential Phi(i,t) per context item, enabling principled
5//! budget allocation and view selection.
6//!
7//! Scientific basis:
8//!   Phi(i,t) = w_R*R + w_S*S + w_G*G + w_H*H - w_C*C - w_D*D
9//! where R = task relevance (heat diffusion + PageRank),
10//!       S = surprise (cross-entropy with Zipfian prior),
11//!       G = graph proximity (weighted BFS distance),
12//!       H = history signal (bandit feedback),
13//!       C = token cost for the active view,
14//!       D = redundancy with already-selected items (Jaccard).
15
16use std::collections::HashMap;
17use std::fmt;
18
19use serde::{Deserialize, Serialize};
20
21// ---------------------------------------------------------------------------
22// Shared types used across CFT modules (Ledger, Overlay, Handles, Compiler)
23// ---------------------------------------------------------------------------
24
25/// Stable, content-addressed identifier for a context item.
26/// Derived from `kind + source_path` so the same file always maps to the
27/// same ID within a session, regardless of content changes.
28#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
29pub struct ContextItemId(pub String);
30
31impl ContextItemId {
32    pub fn from_file(path: &str) -> Self {
33        Self(format!("file:{path}"))
34    }
35    pub fn from_shell(command: &str) -> Self {
36        let hash = crate::core::project_hash::hash_project_root(command);
37        Self(format!("shell:{hash}"))
38    }
39    pub fn from_knowledge(category: &str, key: &str) -> Self {
40        Self(format!("knowledge:{category}:{key}"))
41    }
42    pub fn from_memory(key: &str) -> Self {
43        Self(format!("memory:{key}"))
44    }
45    pub fn from_provider(provider: &str, key: &str) -> Self {
46        Self(format!("provider:{provider}:{key}"))
47    }
48    pub fn as_str(&self) -> &str {
49        &self.0
50    }
51}
52
53impl fmt::Display for ContextItemId {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        f.write_str(&self.0)
56    }
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
60#[serde(rename_all = "snake_case")]
61pub enum ContextKind {
62    File,
63    Shell,
64    Knowledge,
65    Memory,
66    Provider,
67}
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
70#[serde(rename_all = "snake_case")]
71#[derive(Default)]
72pub enum ContextState {
73    #[default]
74    Candidate,
75    Included,
76    Excluded,
77    Pinned,
78    Stale,
79    Shadowed,
80}
81
82#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
83#[serde(rename_all = "snake_case")]
84pub enum ViewKind {
85    Full,
86    Signatures,
87    Map,
88    Diff,
89    Aggressive,
90    Entropy,
91    Lines,
92    Reference,
93    Handle,
94}
95
96impl ViewKind {
97    pub fn as_str(&self) -> &'static str {
98        match self {
99            Self::Full => "full",
100            Self::Signatures => "signatures",
101            Self::Map => "map",
102            Self::Diff => "diff",
103            Self::Aggressive => "aggressive",
104            Self::Entropy => "entropy",
105            Self::Lines => "lines",
106            Self::Reference => "reference",
107            Self::Handle => "handle",
108        }
109    }
110
111    pub fn parse(s: &str) -> Self {
112        match s.trim().to_lowercase().as_str() {
113            "signatures" => Self::Signatures,
114            "map" => Self::Map,
115            "diff" => Self::Diff,
116            "aggressive" => Self::Aggressive,
117            "entropy" => Self::Entropy,
118            "lines" => Self::Lines,
119            "reference" => Self::Reference,
120            "handle" => Self::Handle,
121            _ => Self::Full,
122        }
123    }
124
125    /// Phase-transition ordering: lower index = denser (more tokens).
126    pub fn density_rank(&self) -> u8 {
127        match self {
128            Self::Full => 0,
129            Self::Aggressive => 1,
130            Self::Diff => 2,
131            Self::Lines => 3,
132            Self::Entropy => 4,
133            Self::Signatures => 5,
134            Self::Map => 6,
135            Self::Reference => 7,
136            Self::Handle => 8,
137        }
138    }
139}
140
141/// Token-cost estimates for each available view of a context item.
142#[derive(Debug, Clone, Default, Serialize, Deserialize)]
143pub struct ViewCosts {
144    pub estimates: HashMap<ViewKind, usize>,
145}
146
147impl ViewCosts {
148    pub fn new() -> Self {
149        Self::default()
150    }
151
152    pub fn set(&mut self, view: ViewKind, tokens: usize) {
153        self.estimates.insert(view, tokens);
154    }
155
156    pub fn get(&self, view: &ViewKind) -> usize {
157        self.estimates.get(view).copied().unwrap_or(0)
158    }
159
160    /// Cheapest view that still provides content (excludes Handle).
161    pub fn cheapest_content_view(&self) -> Option<(ViewKind, usize)> {
162        self.estimates
163            .iter()
164            .filter(|(v, _)| **v != ViewKind::Handle)
165            .min_by_key(|(_, &tokens)| tokens)
166            .map(|(&v, &t)| (v, t))
167    }
168
169    pub fn from_full_tokens(full_tokens: usize) -> Self {
170        let mut vc = Self::new();
171        vc.set(ViewKind::Full, full_tokens);
172        vc.set(ViewKind::Signatures, full_tokens / 5);
173        vc.set(ViewKind::Map, full_tokens / 8);
174        vc.set(ViewKind::Reference, full_tokens / 20);
175        vc.set(ViewKind::Handle, 25);
176        vc
177    }
178}
179
180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
181pub struct Provenance {
182    pub tool: Option<String>,
183    pub agent_id: Option<String>,
184    pub client_name: Option<String>,
185    pub timestamp: Option<String>,
186}
187
188// ---------------------------------------------------------------------------
189// Context Potential Function
190// ---------------------------------------------------------------------------
191
192/// Weights for the potential function components.
193/// Adapted via Thompson Sampling (bandit.rs) over time.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct FieldWeights {
196    pub w_relevance: f64,
197    pub w_surprise: f64,
198    pub w_graph: f64,
199    pub w_history: f64,
200    pub w_cost: f64,
201    pub w_redundancy: f64,
202}
203
204impl Default for FieldWeights {
205    fn default() -> Self {
206        Self {
207            w_relevance: 0.35,
208            w_surprise: 0.15,
209            w_graph: 0.20,
210            w_history: 0.10,
211            w_cost: 0.10,
212            w_redundancy: 0.10,
213        }
214    }
215}
216
217/// Raw signal components for a single context item before combination.
218#[derive(Debug, Clone, Default, Serialize, Deserialize)]
219pub struct FieldSignals {
220    pub relevance: f64,
221    pub surprise: f64,
222    pub graph_proximity: f64,
223    pub history_signal: f64,
224    pub token_cost_norm: f64,
225    pub redundancy: f64,
226}
227
228/// Combined potential for a context item.
229#[derive(Debug, Clone, Serialize, Deserialize)]
230pub struct FieldPotential {
231    pub signals: FieldSignals,
232    pub phi: f64,
233    pub view_costs: ViewCosts,
234    pub best_view: ViewKind,
235}
236
237/// Token budget parameters for compilation.
238#[derive(Debug, Clone, Copy)]
239pub struct TokenBudget {
240    pub total: usize,
241    pub used: usize,
242}
243
244impl TokenBudget {
245    pub fn remaining(&self) -> usize {
246        self.total.saturating_sub(self.used)
247    }
248    pub fn utilization(&self) -> f64 {
249        if self.total == 0 {
250            return 1.0;
251        }
252        self.used as f64 / self.total as f64
253    }
254    /// Temperature derived from budget pressure: high pressure = high T.
255    /// T in [0.1, 2.0]. At T=0.1 (low pressure), prefer dense views.
256    /// At T=2.0 (high pressure), prefer sparse views.
257    pub fn temperature(&self) -> f64 {
258        let u = self.utilization();
259        (0.1 + u * 1.9).clamp(0.1, 2.0)
260    }
261}
262
263/// The Context Field: computes Phi for a set of items given a task context.
264pub struct ContextField {
265    weights: FieldWeights,
266}
267
268impl Default for ContextField {
269    fn default() -> Self {
270        Self::new()
271    }
272}
273
274impl ContextField {
275    pub fn new() -> Self {
276        Self {
277            weights: FieldWeights::default(),
278        }
279    }
280
281    pub fn with_weights(weights: FieldWeights) -> Self {
282        Self { weights }
283    }
284
285    /// Compute the unified potential Phi(i,t) for a context item.
286    ///
287    /// All input signals should be normalized to [0, 1] before calling.
288    /// The cost and redundancy terms are subtracted (penalty).
289    pub fn compute_phi(&self, signals: &FieldSignals) -> f64 {
290        let w = &self.weights;
291        let phi = w.w_relevance * signals.relevance
292            + w.w_surprise * signals.surprise
293            + w.w_graph * signals.graph_proximity
294            + w.w_history * signals.history_signal
295            - w.w_cost * signals.token_cost_norm
296            - w.w_redundancy * signals.redundancy;
297        phi.clamp(0.0, 1.0)
298    }
299
300    /// Select the best view for an item given the temperature (budget pressure).
301    ///
302    /// Uses Boltzmann-weighted view selection:
303    ///   P(view_v | item_i, T) = exp(-C(v) / T) / Z(i, T)
304    ///
305    /// At low temperature (relaxed budget), denser views are preferred.
306    /// At high temperature (tight budget), sparser views are preferred.
307    pub fn select_view(&self, costs: &ViewCosts, temperature: f64) -> ViewKind {
308        if costs.estimates.is_empty() {
309            return ViewKind::Full;
310        }
311
312        let t = temperature.max(0.01);
313        let max_cost = costs.estimates.values().copied().max().unwrap_or(1).max(1) as f64;
314
315        let mut best_view = ViewKind::Full;
316        let mut best_score = f64::NEG_INFINITY;
317
318        for (&view, &tokens) in &costs.estimates {
319            let normalized_cost = tokens as f64 / max_cost;
320            let density_bonus = 1.0 - (view.density_rank() as f64 / 8.0);
321            // At low T, density_bonus dominates (prefer dense/full views).
322            // At high T, the cost penalty dominates (prefer cheap/sparse views).
323            let score = density_bonus * (2.0 - t) - normalized_cost * t;
324            if score > best_score {
325                best_score = score;
326                best_view = view;
327            }
328        }
329
330        best_view
331    }
332
333    /// Compute potentials for a batch of items.
334    pub fn compute_batch(
335        &self,
336        items: &[(ContextItemId, FieldSignals, ViewCosts)],
337        budget: TokenBudget,
338    ) -> HashMap<ContextItemId, FieldPotential> {
339        let temperature = budget.temperature();
340        let mut result = HashMap::new();
341
342        for (id, signals, costs) in items {
343            let phi = self.compute_phi(signals);
344            let best_view = self.select_view(costs, temperature);
345            result.insert(
346                id.clone(),
347                FieldPotential {
348                    signals: signals.clone(),
349                    phi,
350                    view_costs: costs.clone(),
351                    best_view,
352                },
353            );
354        }
355
356        result
357    }
358}
359
360// ---------------------------------------------------------------------------
361// Signal extraction helpers (bridge to existing modules)
362// ---------------------------------------------------------------------------
363
364/// Normalize a relevance score from task_relevance.rs to [0, 1].
365pub fn normalize_relevance(score: f64, max_score: f64) -> f64 {
366    if max_score <= 0.0 {
367        return 0.0;
368    }
369    (score / max_score).clamp(0.0, 1.0)
370}
371
372/// Normalize a surprise score from surprise.rs to [0, 1].
373/// Surprise range is typically 5.0 (common) to 17.0+ (rare).
374pub fn normalize_surprise(surprise: f64) -> f64 {
375    ((surprise - 5.0) / 12.0).clamp(0.0, 1.0)
376}
377
378/// Normalize graph proximity (inverse of distance) to [0, 1].
379/// Distance 0 = same file = 1.0, distance N = 1/(1+N).
380pub fn normalize_graph_proximity(distance: usize) -> f64 {
381    1.0 / (1.0 + distance as f64)
382}
383
384/// Normalize token cost relative to budget.
385pub fn normalize_token_cost(tokens: usize, budget_total: usize) -> f64 {
386    if budget_total == 0 {
387        return 1.0;
388    }
389    (tokens as f64 / budget_total as f64).clamp(0.0, 1.0)
390}
391
392/// Compute efficiency ratio: Phi per token.
393/// Used by the greedy knapsack in the compiler.
394pub fn efficiency(phi: f64, tokens: usize) -> f64 {
395    if tokens == 0 {
396        return phi;
397    }
398    phi / tokens as f64
399}
400
401/// Compute real signals for a file path using existing scoring modules.
402/// Bridges CFT with the information-theoretic, graph-based, and history
403/// subsystems already in lean-ctx.
404pub fn compute_signals_for_path(
405    path: &str,
406    task: Option<&str>,
407    file_content: Option<&str>,
408    budget_total: usize,
409    full_tokens: usize,
410) -> (FieldSignals, ViewCosts) {
411    let mut signals = FieldSignals::default();
412
413    let heatmap = super::heatmap::HeatMap::load();
414    let heat_entry = heatmap.entries.get(path);
415
416    // R(i,t): Task relevance via keyword overlap + heatmap frequency
417    if let Some(task_desc) = task {
418        let (_, keywords) = super::task_relevance::parse_task_hints(task_desc);
419        let path_lower = path.to_lowercase();
420        let keyword_hits = keywords
421            .iter()
422            .filter(|kw| path_lower.contains(&kw.to_lowercase()))
423            .count();
424        let keyword_score = (keyword_hits as f64 * 0.3).min(1.0);
425        let freq_score = heat_entry.map_or(0.0, |e| (e.access_count as f64 / 10.0).min(1.0));
426        signals.relevance = normalize_relevance(keyword_score + freq_score, 2.0);
427    } else {
428        let freq = heat_entry.map_or(0.0, |e| e.access_count as f64);
429        signals.relevance = normalize_relevance(freq, 10.0);
430    }
431
432    // S(i): Surprise from cross-entropy with Zipfian prior
433    if let Some(content) = file_content {
434        let surprise_val = super::surprise::line_surprise(content);
435        signals.surprise = normalize_surprise(surprise_val);
436    }
437
438    // G(i,t): Graph proximity heuristic from path depth
439    // (property graph queries require a Connection not available here)
440    let depth = path.matches('/').count();
441    signals.graph_proximity = normalize_graph_proximity(depth);
442
443    // H(i): History signal from heatmap access count
444    let access_count = heat_entry.map_or(0, |e| e.access_count);
445    signals.history_signal = (access_count as f64 / 20.0).min(1.0);
446
447    // C(i,v): Normalized token cost relative to budget
448    signals.token_cost_norm = normalize_token_cost(full_tokens, budget_total);
449
450    // D(i): Redundancy — initialized at 0, refined during compilation pass
451    signals.redundancy = 0.0;
452
453    let view_costs = ViewCosts::from_full_tokens(full_tokens);
454    (signals, view_costs)
455}
456
457// ---------------------------------------------------------------------------
458// Tests
459// ---------------------------------------------------------------------------
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464
465    #[test]
466    fn phi_increases_with_relevance() {
467        let field = ContextField::new();
468        let low = field.compute_phi(&FieldSignals {
469            relevance: 0.2,
470            ..Default::default()
471        });
472        let high = field.compute_phi(&FieldSignals {
473            relevance: 0.9,
474            ..Default::default()
475        });
476        assert!(high > low, "higher relevance should yield higher phi");
477    }
478
479    #[test]
480    fn phi_decreases_with_cost() {
481        let field = ContextField::new();
482        let cheap = field.compute_phi(&FieldSignals {
483            relevance: 0.5,
484            token_cost_norm: 0.1,
485            ..Default::default()
486        });
487        let expensive = field.compute_phi(&FieldSignals {
488            relevance: 0.5,
489            token_cost_norm: 0.9,
490            ..Default::default()
491        });
492        assert!(cheap > expensive, "higher cost should reduce phi");
493    }
494
495    #[test]
496    fn phi_decreases_with_redundancy() {
497        let field = ContextField::new();
498        let unique = field.compute_phi(&FieldSignals {
499            relevance: 0.5,
500            redundancy: 0.0,
501            ..Default::default()
502        });
503        let redundant = field.compute_phi(&FieldSignals {
504            relevance: 0.5,
505            redundancy: 0.9,
506            ..Default::default()
507        });
508        assert!(unique > redundant, "redundancy should reduce phi");
509    }
510
511    #[test]
512    fn phi_is_clamped_to_unit_interval() {
513        let field = ContextField::new();
514        let phi = field.compute_phi(&FieldSignals {
515            relevance: 1.0,
516            surprise: 1.0,
517            graph_proximity: 1.0,
518            history_signal: 1.0,
519            token_cost_norm: 0.0,
520            redundancy: 0.0,
521        });
522        assert!(phi <= 1.0);
523        assert!(phi >= 0.0);
524    }
525
526    #[test]
527    fn view_selection_prefers_dense_at_low_temperature() {
528        let field = ContextField::new();
529        let costs = ViewCosts::from_full_tokens(5000);
530        let view = field.select_view(&costs, 0.1);
531        assert_eq!(
532            view,
533            ViewKind::Full,
534            "low temperature (relaxed budget) should prefer full view"
535        );
536    }
537
538    #[test]
539    fn view_selection_prefers_sparse_at_high_temperature() {
540        let field = ContextField::new();
541        let costs = ViewCosts::from_full_tokens(5000);
542        let view = field.select_view(&costs, 2.0);
543        assert_ne!(
544            view,
545            ViewKind::Full,
546            "high temperature (tight budget) should prefer sparser view"
547        );
548    }
549
550    #[test]
551    fn budget_temperature_scales_with_utilization() {
552        let low = TokenBudget {
553            total: 10000,
554            used: 1000,
555        };
556        let high = TokenBudget {
557            total: 10000,
558            used: 9000,
559        };
560        assert!(
561            high.temperature() > low.temperature(),
562            "higher utilization should increase temperature"
563        );
564    }
565
566    #[test]
567    fn normalize_surprise_maps_range() {
568        assert!((normalize_surprise(5.0) - 0.0).abs() < 0.01);
569        assert!((normalize_surprise(17.0) - 1.0).abs() < 0.01);
570        assert!((normalize_surprise(11.0) - 0.5).abs() < 0.01);
571    }
572
573    #[test]
574    fn normalize_graph_proximity_inverse_distance() {
575        assert!((normalize_graph_proximity(0) - 1.0).abs() < f64::EPSILON);
576        assert!((normalize_graph_proximity(1) - 0.5).abs() < f64::EPSILON);
577        assert!(normalize_graph_proximity(10) < 0.15);
578    }
579
580    #[test]
581    fn efficiency_ratio_is_phi_per_token() {
582        let e = efficiency(0.8, 400);
583        assert!((e - 0.002).abs() < 0.0001);
584    }
585
586    #[test]
587    fn context_item_id_stable() {
588        let a = ContextItemId::from_file("src/main.rs");
589        let b = ContextItemId::from_file("src/main.rs");
590        assert_eq!(a, b);
591    }
592
593    #[test]
594    fn view_costs_from_full() {
595        let vc = ViewCosts::from_full_tokens(5000);
596        assert_eq!(vc.get(&ViewKind::Full), 5000);
597        assert_eq!(vc.get(&ViewKind::Signatures), 1000);
598        assert_eq!(vc.get(&ViewKind::Map), 625);
599        assert_eq!(vc.get(&ViewKind::Handle), 25);
600    }
601
602    #[test]
603    fn batch_compute_produces_results_for_all_items() {
604        let field = ContextField::new();
605        let items = vec![
606            (
607                ContextItemId::from_file("a.rs"),
608                FieldSignals {
609                    relevance: 0.8,
610                    ..Default::default()
611                },
612                ViewCosts::from_full_tokens(2000),
613            ),
614            (
615                ContextItemId::from_file("b.rs"),
616                FieldSignals {
617                    relevance: 0.3,
618                    ..Default::default()
619                },
620                ViewCosts::from_full_tokens(500),
621            ),
622        ];
623        let budget = TokenBudget {
624            total: 10000,
625            used: 2000,
626        };
627        let results = field.compute_batch(&items, budget);
628        assert_eq!(results.len(), 2);
629        assert!(results.contains_key(&ContextItemId::from_file("a.rs")));
630        assert!(results.contains_key(&ContextItemId::from_file("b.rs")));
631    }
632}