lean-ctx 3.5.20

Context Runtime for AI Agents with CCP. 63 MCP tools, 10 read modes, 95+ compression patterns, cross-session memory (CCP), persistent AI knowledge with temporal facts + contradiction detection, multi-agent context sharing + diaries, LITM-aware positioning, AAAK compact format, adaptive compression with Thompson Sampling bandits. Supports 24 AI tools. Reduces LLM token consumption by up to 99%.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
//! Context Field Theory (CFT) -- unified potential function for context items.
//!
//! Combines information-theoretic, graph-based, and history signals into a
//! single scalar potential Phi(i,t) per context item, enabling principled
//! budget allocation and view selection.
//!
//! Scientific basis:
//!   Phi(i,t) = w_R*R + w_S*S + w_G*G + w_H*H - w_C*C - w_D*D
//! where R = task relevance (heat diffusion + PageRank),
//!       S = surprise (cross-entropy with Zipfian prior),
//!       G = graph proximity (weighted BFS distance),
//!       H = history signal (bandit feedback),
//!       C = token cost for the active view,
//!       D = redundancy with already-selected items (Jaccard).

use std::collections::HashMap;
use std::fmt;

use serde::{Deserialize, Serialize};

// ---------------------------------------------------------------------------
// Shared types used across CFT modules (Ledger, Overlay, Handles, Compiler)
// ---------------------------------------------------------------------------

/// Stable, content-addressed identifier for a context item.
/// Derived from `kind + source_path` so the same file always maps to the
/// same ID within a session, regardless of content changes.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ContextItemId(pub String);

impl ContextItemId {
    pub fn from_file(path: &str) -> Self {
        Self(format!("file:{path}"))
    }
    pub fn from_shell(command: &str) -> Self {
        let hash = crate::core::project_hash::hash_project_root(command);
        Self(format!("shell:{hash}"))
    }
    pub fn from_knowledge(category: &str, key: &str) -> Self {
        Self(format!("knowledge:{category}:{key}"))
    }
    pub fn from_memory(key: &str) -> Self {
        Self(format!("memory:{key}"))
    }
    pub fn from_provider(provider: &str, key: &str) -> Self {
        Self(format!("provider:{provider}:{key}"))
    }
    pub fn as_str(&self) -> &str {
        &self.0
    }
}

impl fmt::Display for ContextItemId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(&self.0)
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ContextKind {
    File,
    Shell,
    Knowledge,
    Memory,
    Provider,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
#[derive(Default)]
pub enum ContextState {
    #[default]
    Candidate,
    Included,
    Excluded,
    Pinned,
    Stale,
    Shadowed,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ViewKind {
    Full,
    Signatures,
    Map,
    Diff,
    Aggressive,
    Entropy,
    Lines,
    Reference,
    Handle,
}

impl ViewKind {
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Full => "full",
            Self::Signatures => "signatures",
            Self::Map => "map",
            Self::Diff => "diff",
            Self::Aggressive => "aggressive",
            Self::Entropy => "entropy",
            Self::Lines => "lines",
            Self::Reference => "reference",
            Self::Handle => "handle",
        }
    }

    pub fn parse(s: &str) -> Self {
        match s.trim().to_lowercase().as_str() {
            "signatures" => Self::Signatures,
            "map" => Self::Map,
            "diff" => Self::Diff,
            "aggressive" => Self::Aggressive,
            "entropy" => Self::Entropy,
            "lines" => Self::Lines,
            "reference" => Self::Reference,
            "handle" => Self::Handle,
            _ => Self::Full,
        }
    }

    /// Phase-transition ordering: lower index = denser (more tokens).
    pub fn density_rank(&self) -> u8 {
        match self {
            Self::Full => 0,
            Self::Aggressive => 1,
            Self::Diff => 2,
            Self::Lines => 3,
            Self::Entropy => 4,
            Self::Signatures => 5,
            Self::Map => 6,
            Self::Reference => 7,
            Self::Handle => 8,
        }
    }
}

/// Token-cost estimates for each available view of a context item.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ViewCosts {
    pub estimates: HashMap<ViewKind, usize>,
}

impl ViewCosts {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn set(&mut self, view: ViewKind, tokens: usize) {
        self.estimates.insert(view, tokens);
    }

    pub fn get(&self, view: &ViewKind) -> usize {
        self.estimates.get(view).copied().unwrap_or(0)
    }

    /// Cheapest view that still provides content (excludes Handle).
    pub fn cheapest_content_view(&self) -> Option<(ViewKind, usize)> {
        self.estimates
            .iter()
            .filter(|(v, _)| **v != ViewKind::Handle)
            .min_by_key(|(_, &tokens)| tokens)
            .map(|(&v, &t)| (v, t))
    }

    pub fn from_full_tokens(full_tokens: usize) -> Self {
        let mut vc = Self::new();
        vc.set(ViewKind::Full, full_tokens);
        vc.set(ViewKind::Signatures, full_tokens / 5);
        vc.set(ViewKind::Map, full_tokens / 8);
        vc.set(ViewKind::Reference, full_tokens / 20);
        vc.set(ViewKind::Handle, 25);
        vc
    }
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Provenance {
    pub tool: Option<String>,
    pub agent_id: Option<String>,
    pub client_name: Option<String>,
    pub timestamp: Option<String>,
}

// ---------------------------------------------------------------------------
// Context Potential Function
// ---------------------------------------------------------------------------

/// Weights for the potential function components.
/// Adapted via Thompson Sampling (bandit.rs) over time.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FieldWeights {
    pub w_relevance: f64,
    pub w_surprise: f64,
    pub w_graph: f64,
    pub w_history: f64,
    pub w_cost: f64,
    pub w_redundancy: f64,
}

impl Default for FieldWeights {
    fn default() -> Self {
        Self {
            w_relevance: 0.35,
            w_surprise: 0.15,
            w_graph: 0.20,
            w_history: 0.10,
            w_cost: 0.10,
            w_redundancy: 0.10,
        }
    }
}

/// Raw signal components for a single context item before combination.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct FieldSignals {
    pub relevance: f64,
    pub surprise: f64,
    pub graph_proximity: f64,
    pub history_signal: f64,
    pub token_cost_norm: f64,
    pub redundancy: f64,
}

/// Combined potential for a context item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FieldPotential {
    pub signals: FieldSignals,
    pub phi: f64,
    pub view_costs: ViewCosts,
    pub best_view: ViewKind,
}

/// Token budget parameters for compilation.
#[derive(Debug, Clone, Copy)]
pub struct TokenBudget {
    pub total: usize,
    pub used: usize,
}

impl TokenBudget {
    pub fn remaining(&self) -> usize {
        self.total.saturating_sub(self.used)
    }
    pub fn utilization(&self) -> f64 {
        if self.total == 0 {
            return 1.0;
        }
        self.used as f64 / self.total as f64
    }
    /// Temperature derived from budget pressure: high pressure = high T.
    /// T in [0.1, 2.0]. At T=0.1 (low pressure), prefer dense views.
    /// At T=2.0 (high pressure), prefer sparse views.
    pub fn temperature(&self) -> f64 {
        let u = self.utilization();
        (0.1 + u * 1.9).clamp(0.1, 2.0)
    }
}

/// The Context Field: computes Phi for a set of items given a task context.
pub struct ContextField {
    weights: FieldWeights,
}

impl Default for ContextField {
    fn default() -> Self {
        Self::new()
    }
}

impl ContextField {
    pub fn new() -> Self {
        Self {
            weights: FieldWeights::default(),
        }
    }

    pub fn with_weights(weights: FieldWeights) -> Self {
        Self { weights }
    }

    /// Compute the unified potential Phi(i,t) for a context item.
    ///
    /// All input signals should be normalized to [0, 1] before calling.
    /// The cost and redundancy terms are subtracted (penalty).
    pub fn compute_phi(&self, signals: &FieldSignals) -> f64 {
        let w = &self.weights;
        let phi = w.w_relevance * signals.relevance
            + w.w_surprise * signals.surprise
            + w.w_graph * signals.graph_proximity
            + w.w_history * signals.history_signal
            - w.w_cost * signals.token_cost_norm
            - w.w_redundancy * signals.redundancy;
        phi.clamp(0.0, 1.0)
    }

    /// Select the best view for an item given the temperature (budget pressure).
    ///
    /// Uses Boltzmann-weighted view selection:
    ///   P(view_v | item_i, T) = exp(-C(v) / T) / Z(i, T)
    ///
    /// At low temperature (relaxed budget), denser views are preferred.
    /// At high temperature (tight budget), sparser views are preferred.
    pub fn select_view(&self, costs: &ViewCosts, temperature: f64) -> ViewKind {
        if costs.estimates.is_empty() {
            return ViewKind::Full;
        }

        let t = temperature.max(0.01);
        let max_cost = costs.estimates.values().copied().max().unwrap_or(1).max(1) as f64;

        let mut best_view = ViewKind::Full;
        let mut best_score = f64::NEG_INFINITY;

        for (&view, &tokens) in &costs.estimates {
            let normalized_cost = tokens as f64 / max_cost;
            let density_bonus = 1.0 - (view.density_rank() as f64 / 8.0);
            // At low T, density_bonus dominates (prefer dense/full views).
            // At high T, the cost penalty dominates (prefer cheap/sparse views).
            let score = density_bonus * (2.0 - t) - normalized_cost * t;
            if score > best_score {
                best_score = score;
                best_view = view;
            }
        }

        best_view
    }

    /// Compute potentials for a batch of items.
    pub fn compute_batch(
        &self,
        items: &[(ContextItemId, FieldSignals, ViewCosts)],
        budget: TokenBudget,
    ) -> HashMap<ContextItemId, FieldPotential> {
        let temperature = budget.temperature();
        let mut result = HashMap::new();

        for (id, signals, costs) in items {
            let phi = self.compute_phi(signals);
            let best_view = self.select_view(costs, temperature);
            result.insert(
                id.clone(),
                FieldPotential {
                    signals: signals.clone(),
                    phi,
                    view_costs: costs.clone(),
                    best_view,
                },
            );
        }

        result
    }
}

// ---------------------------------------------------------------------------
// Signal extraction helpers (bridge to existing modules)
// ---------------------------------------------------------------------------

/// Normalize a relevance score from task_relevance.rs to [0, 1].
pub fn normalize_relevance(score: f64, max_score: f64) -> f64 {
    if max_score <= 0.0 {
        return 0.0;
    }
    (score / max_score).clamp(0.0, 1.0)
}

/// Normalize a surprise score from surprise.rs to [0, 1].
/// Surprise range is typically 5.0 (common) to 17.0+ (rare).
pub fn normalize_surprise(surprise: f64) -> f64 {
    ((surprise - 5.0) / 12.0).clamp(0.0, 1.0)
}

/// Normalize graph proximity (inverse of distance) to [0, 1].
/// Distance 0 = same file = 1.0, distance N = 1/(1+N).
pub fn normalize_graph_proximity(distance: usize) -> f64 {
    1.0 / (1.0 + distance as f64)
}

/// Normalize token cost relative to budget.
pub fn normalize_token_cost(tokens: usize, budget_total: usize) -> f64 {
    if budget_total == 0 {
        return 1.0;
    }
    (tokens as f64 / budget_total as f64).clamp(0.0, 1.0)
}

/// Compute efficiency ratio: Phi per token.
/// Used by the greedy knapsack in the compiler.
pub fn efficiency(phi: f64, tokens: usize) -> f64 {
    if tokens == 0 {
        return phi;
    }
    phi / tokens as f64
}

/// Compute real signals for a file path using existing scoring modules.
/// Bridges CFT with the information-theoretic, graph-based, and history
/// subsystems already in lean-ctx.
pub fn compute_signals_for_path(
    path: &str,
    task: Option<&str>,
    file_content: Option<&str>,
    budget_total: usize,
    full_tokens: usize,
) -> (FieldSignals, ViewCosts) {
    let mut signals = FieldSignals::default();

    let heatmap = super::heatmap::HeatMap::load();
    let heat_entry = heatmap.entries.get(path);

    // R(i,t): Task relevance via keyword overlap + heatmap frequency
    if let Some(task_desc) = task {
        let (_, keywords) = super::task_relevance::parse_task_hints(task_desc);
        let path_lower = path.to_lowercase();
        let keyword_hits = keywords
            .iter()
            .filter(|kw| path_lower.contains(&kw.to_lowercase()))
            .count();
        let keyword_score = (keyword_hits as f64 * 0.3).min(1.0);
        let freq_score = heat_entry.map_or(0.0, |e| (e.access_count as f64 / 10.0).min(1.0));
        signals.relevance = normalize_relevance(keyword_score + freq_score, 2.0);
    } else {
        let freq = heat_entry.map_or(0.0, |e| e.access_count as f64);
        signals.relevance = normalize_relevance(freq, 10.0);
    }

    // S(i): Surprise from cross-entropy with Zipfian prior
    if let Some(content) = file_content {
        let surprise_val = super::surprise::line_surprise(content);
        signals.surprise = normalize_surprise(surprise_val);
    }

    // G(i,t): Graph proximity heuristic from path depth
    // (property graph queries require a Connection not available here)
    let depth = path.matches('/').count();
    signals.graph_proximity = normalize_graph_proximity(depth);

    // H(i): History signal from heatmap access count
    let access_count = heat_entry.map_or(0, |e| e.access_count);
    signals.history_signal = (access_count as f64 / 20.0).min(1.0);

    // C(i,v): Normalized token cost relative to budget
    signals.token_cost_norm = normalize_token_cost(full_tokens, budget_total);

    // D(i): Redundancy — initialized at 0, refined during compilation pass
    signals.redundancy = 0.0;

    let view_costs = ViewCosts::from_full_tokens(full_tokens);
    (signals, view_costs)
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn phi_increases_with_relevance() {
        let field = ContextField::new();
        let low = field.compute_phi(&FieldSignals {
            relevance: 0.2,
            ..Default::default()
        });
        let high = field.compute_phi(&FieldSignals {
            relevance: 0.9,
            ..Default::default()
        });
        assert!(high > low, "higher relevance should yield higher phi");
    }

    #[test]
    fn phi_decreases_with_cost() {
        let field = ContextField::new();
        let cheap = field.compute_phi(&FieldSignals {
            relevance: 0.5,
            token_cost_norm: 0.1,
            ..Default::default()
        });
        let expensive = field.compute_phi(&FieldSignals {
            relevance: 0.5,
            token_cost_norm: 0.9,
            ..Default::default()
        });
        assert!(cheap > expensive, "higher cost should reduce phi");
    }

    #[test]
    fn phi_decreases_with_redundancy() {
        let field = ContextField::new();
        let unique = field.compute_phi(&FieldSignals {
            relevance: 0.5,
            redundancy: 0.0,
            ..Default::default()
        });
        let redundant = field.compute_phi(&FieldSignals {
            relevance: 0.5,
            redundancy: 0.9,
            ..Default::default()
        });
        assert!(unique > redundant, "redundancy should reduce phi");
    }

    #[test]
    fn phi_is_clamped_to_unit_interval() {
        let field = ContextField::new();
        let phi = field.compute_phi(&FieldSignals {
            relevance: 1.0,
            surprise: 1.0,
            graph_proximity: 1.0,
            history_signal: 1.0,
            token_cost_norm: 0.0,
            redundancy: 0.0,
        });
        assert!(phi <= 1.0);
        assert!(phi >= 0.0);
    }

    #[test]
    fn view_selection_prefers_dense_at_low_temperature() {
        let field = ContextField::new();
        let costs = ViewCosts::from_full_tokens(5000);
        let view = field.select_view(&costs, 0.1);
        assert_eq!(
            view,
            ViewKind::Full,
            "low temperature (relaxed budget) should prefer full view"
        );
    }

    #[test]
    fn view_selection_prefers_sparse_at_high_temperature() {
        let field = ContextField::new();
        let costs = ViewCosts::from_full_tokens(5000);
        let view = field.select_view(&costs, 2.0);
        assert_ne!(
            view,
            ViewKind::Full,
            "high temperature (tight budget) should prefer sparser view"
        );
    }

    #[test]
    fn budget_temperature_scales_with_utilization() {
        let low = TokenBudget {
            total: 10000,
            used: 1000,
        };
        let high = TokenBudget {
            total: 10000,
            used: 9000,
        };
        assert!(
            high.temperature() > low.temperature(),
            "higher utilization should increase temperature"
        );
    }

    #[test]
    fn normalize_surprise_maps_range() {
        assert!((normalize_surprise(5.0) - 0.0).abs() < 0.01);
        assert!((normalize_surprise(17.0) - 1.0).abs() < 0.01);
        assert!((normalize_surprise(11.0) - 0.5).abs() < 0.01);
    }

    #[test]
    fn normalize_graph_proximity_inverse_distance() {
        assert!((normalize_graph_proximity(0) - 1.0).abs() < f64::EPSILON);
        assert!((normalize_graph_proximity(1) - 0.5).abs() < f64::EPSILON);
        assert!(normalize_graph_proximity(10) < 0.15);
    }

    #[test]
    fn efficiency_ratio_is_phi_per_token() {
        let e = efficiency(0.8, 400);
        assert!((e - 0.002).abs() < 0.0001);
    }

    #[test]
    fn context_item_id_stable() {
        let a = ContextItemId::from_file("src/main.rs");
        let b = ContextItemId::from_file("src/main.rs");
        assert_eq!(a, b);
    }

    #[test]
    fn view_costs_from_full() {
        let vc = ViewCosts::from_full_tokens(5000);
        assert_eq!(vc.get(&ViewKind::Full), 5000);
        assert_eq!(vc.get(&ViewKind::Signatures), 1000);
        assert_eq!(vc.get(&ViewKind::Map), 625);
        assert_eq!(vc.get(&ViewKind::Handle), 25);
    }

    #[test]
    fn batch_compute_produces_results_for_all_items() {
        let field = ContextField::new();
        let items = vec![
            (
                ContextItemId::from_file("a.rs"),
                FieldSignals {
                    relevance: 0.8,
                    ..Default::default()
                },
                ViewCosts::from_full_tokens(2000),
            ),
            (
                ContextItemId::from_file("b.rs"),
                FieldSignals {
                    relevance: 0.3,
                    ..Default::default()
                },
                ViewCosts::from_full_tokens(500),
            ),
        ];
        let budget = TokenBudget {
            total: 10000,
            used: 2000,
        };
        let results = field.compute_batch(&items, budget);
        assert_eq!(results.len(), 2);
        assert!(results.contains_key(&ContextItemId::from_file("a.rs")));
        assert!(results.contains_key(&ContextItemId::from_file("b.rs")));
    }
}