lean-ctx 3.7.1

Context Runtime for AI Agents with CCP. 63 MCP tools, 10 read modes, 60+ compression patterns, cross-session memory (CCP), persistent AI knowledge with temporal facts + contradiction detection, multi-agent context sharing, LITM-aware positioning, AAAK compact format, adaptive compression with Thompson Sampling bandits. Supports 24+ AI tools. Reduces LLM token consumption by up to 99%.
Documentation
//! Hebbian file co-access graph — "files that fire together, wire together".
//!
//! ## The idea (neuroscience → retrieval)
//!
//! Hebbian theory: synapses between co-active neurons strengthen (long-term
//! potentiation, LTP), while unused ones weaken (long-term depression / the
//! Ebbinghaus forgetting curve). We apply the same rule to files: whenever a
//! task surfaces a set of files *together*, we strengthen the association
//! between every pair; on each update all weights decay slightly, so stale
//! associations fade. Over time the graph learns the project's real working
//! paths — which the static import/AST graph cannot capture.
//!
//! The learned association is an additive retrieval signal: given a file the
//! agent is looking at, [`related`] returns the files history says tend to be
//! touched alongside it.
//!
//! The store is a small per-project JSON file; reads/writes are whole-file and
//! cheap because the graph is pruned aggressively (decay + min-weight + caps).

use std::collections::HashMap;
use std::path::PathBuf;

use serde::{Deserialize, Serialize};

/// Multiplicative decay applied to every edge on each `record` — the forgetting
/// curve. 0.98 ⇒ an association roughly halves after ~34 un-reinforced updates.
const DECAY: f64 = 0.98;
/// Edges weaker than this are pruned (kept the graph small + relevant).
const MIN_WEIGHT: f64 = 0.08;
/// Potentiation increment for a co-access (LTP step).
const LTP_INCREMENT: f64 = 1.0;
/// Cap on neighbours kept per file (strongest retained) to bound memory.
const MAX_NEIGHBORS: usize = 32;
/// Cap on total tracked files; beyond it new files are still recorded but the
/// weakest-degree files are evicted to stay bounded.
const MAX_FILES: usize = 5_000;
/// A single record never associates more than this many files (avoids O(n²)
/// blow-ups when a tool surfaces a huge file set).
const MAX_RECORD_FILES: usize = 24;

/// Persistent, decaying co-access graph for one project.
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct CoAccessGraph {
    /// `file → (neighbour → weight)`. Symmetric by construction.
    edges: HashMap<String, HashMap<String, f64>>,
}

impl CoAccessGraph {
    /// Reinforce the mutual association of every pair in `files` (LTP) after
    /// decaying the whole graph one step (global forgetting). Self-pairs and
    /// duplicates are ignored. Bounded work: at most `MAX_RECORD_FILES²` pairs.
    pub fn record(&mut self, files: &[String]) {
        // Distinct, capped input.
        let mut uniq: Vec<&String> = Vec::new();
        for f in files {
            if !f.is_empty() && !uniq.contains(&f) {
                uniq.push(f);
                if uniq.len() >= MAX_RECORD_FILES {
                    break;
                }
            }
        }
        if uniq.len() < 2 {
            return; // nothing to associate
        }

        self.decay_all();

        for i in 0..uniq.len() {
            for j in (i + 1)..uniq.len() {
                self.bump(uniq[i], uniq[j]);
                self.bump(uniq[j], uniq[i]);
            }
        }

        self.prune();
    }

    /// Files most strongly associated with `file`, strongest first.
    pub fn related(&self, file: &str, top_k: usize) -> Vec<(String, f64)> {
        let Some(neighbours) = self.edges.get(file) else {
            return Vec::new();
        };
        let mut v: Vec<(String, f64)> = neighbours.iter().map(|(k, &w)| (k.clone(), w)).collect();
        v.sort_by(|a, b| b.1.total_cmp(&a.1));
        v.truncate(top_k);
        v
    }

    fn bump(&mut self, from: &str, to: &str) {
        let entry = self.edges.entry(from.to_string()).or_default();
        *entry.entry(to.to_string()).or_insert(0.0) += LTP_INCREMENT;
    }

    fn decay_all(&mut self) {
        for neighbours in self.edges.values_mut() {
            for w in neighbours.values_mut() {
                *w *= DECAY;
            }
        }
    }

    fn prune(&mut self) {
        for neighbours in self.edges.values_mut() {
            neighbours.retain(|_, &mut w| w >= MIN_WEIGHT);
            if neighbours.len() > MAX_NEIGHBORS {
                let mut kept: Vec<(String, f64)> =
                    neighbours.iter().map(|(k, &w)| (k.clone(), w)).collect();
                kept.sort_by(|a, b| b.1.total_cmp(&a.1));
                kept.truncate(MAX_NEIGHBORS);
                *neighbours = kept.into_iter().collect();
            }
        }
        self.edges.retain(|_, neighbours| !neighbours.is_empty());

        if self.edges.len() > MAX_FILES {
            // Evict the lowest-degree files (least-connected memories).
            let mut by_degree: Vec<(String, usize)> = self
                .edges
                .iter()
                .map(|(k, n)| (k.clone(), n.len()))
                .collect();
            by_degree.sort_by_key(|(_, d)| *d);
            let evict = self.edges.len() - MAX_FILES;
            for (file, _) in by_degree.into_iter().take(evict) {
                self.edges.remove(&file);
            }
        }
    }
}

// ── Persistence (one small JSON file per project) ──────────────────────────

fn store_path(project_root: &str) -> Option<PathBuf> {
    let normalized = crate::core::graph_index::normalize_project_root(project_root);
    let hash = crate::core::project_hash::hash_project_root(&normalized);
    crate::core::data_dir::lean_ctx_data_dir()
        .ok()
        .map(|d| d.join("cooccurrence").join(format!("{hash}.json")))
}

/// Load the co-access graph for `project_root` (empty if none / unreadable).
pub fn load(project_root: &str) -> CoAccessGraph {
    let Some(path) = store_path(project_root) else {
        return CoAccessGraph::default();
    };
    std::fs::read_to_string(&path)
        .ok()
        .and_then(|s| serde_json::from_str(&s).ok())
        .unwrap_or_default()
}

fn save(project_root: &str, graph: &CoAccessGraph) {
    let Some(path) = store_path(project_root) else {
        return;
    };
    if let Some(parent) = path.parent() {
        let _ = std::fs::create_dir_all(parent);
    }
    if let Ok(json) = serde_json::to_string(graph) {
        let _ = std::fs::write(&path, json);
    }
}

/// Record that `files` were accessed together for one task, persisting the
/// reinforced graph. No-op for fewer than two distinct files.
pub fn record_access(project_root: &str, files: &[String]) {
    if files.len() < 2 {
        return;
    }
    let mut graph = load(project_root);
    graph.record(files);
    save(project_root, &graph);
}

/// Files historically co-accessed with `file`, strongest association first.
pub fn related(project_root: &str, file: &str, top_k: usize) -> Vec<(String, f64)> {
    load(project_root).related(file, top_k)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn co_access_strengthens_association() {
        let mut g = CoAccessGraph::default();
        g.record(&["a.rs".into(), "b.rs".into()]);
        let rel = g.related("a.rs", 5);
        assert_eq!(rel.len(), 1);
        assert_eq!(rel[0].0, "b.rs");
        assert!(rel[0].1 > 0.0);
    }

    #[test]
    fn repeated_co_access_outweighs_single() {
        let mut g = CoAccessGraph::default();
        for _ in 0..5 {
            g.record(&["x.rs".into(), "y.rs".into()]);
        }
        g.record(&["x.rs".into(), "z.rs".into()]);
        let rel = g.related("x.rs", 5);
        // y was reinforced 5×, z once → y must rank first.
        assert_eq!(rel[0].0, "y.rs");
        assert!(rel.iter().any(|(f, _)| f == "z.rs"));
        assert!(rel[0].1 > rel.iter().find(|(f, _)| f == "z.rs").unwrap().1);
    }

    #[test]
    fn weak_associations_are_pruned_by_decay() {
        let mut g = CoAccessGraph::default();
        g.record(&["a.rs".into(), "b.rs".into()]);
        // Hammer an unrelated pair so the a–b edge decays below MIN_WEIGHT.
        for _ in 0..400 {
            g.record(&["c.rs".into(), "d.rs".into()]);
        }
        assert!(
            g.related("a.rs", 5).is_empty(),
            "decayed association should be pruned"
        );
        assert!(!g.related("c.rs", 5).is_empty());
    }

    #[test]
    fn single_file_record_is_noop() {
        let mut g = CoAccessGraph::default();
        g.record(&["lonely.rs".into()]);
        assert!(g.related("lonely.rs", 5).is_empty());
    }

    #[test]
    fn association_is_symmetric() {
        let mut g = CoAccessGraph::default();
        g.record(&["one.rs".into(), "two.rs".into()]);
        assert_eq!(g.related("one.rs", 5)[0].0, "two.rs");
        assert_eq!(g.related("two.rs", 5)[0].0, "one.rs");
    }

    #[test]
    fn serializes_round_trip() {
        // Deterministic: exercises the persistence *format* (the on-disk path
        // uses this same serde round-trip) without touching the process-global
        // data-dir env var, which other tests mutate concurrently.
        let mut g = CoAccessGraph::default();
        g.record(&["alpha.rs".into(), "beta.rs".into()]);
        let json = serde_json::to_string(&g).unwrap();
        let restored: CoAccessGraph = serde_json::from_str(&json).unwrap();
        let rel = restored.related("alpha.rs", 5);
        assert_eq!(rel.len(), 1);
        assert_eq!(rel[0].0, "beta.rs");
    }

    #[test]
    fn neighbours_are_capped() {
        let mut g = CoAccessGraph::default();
        // Pair one hub file with many distinct others across separate records
        // so its neighbour set exceeds the cap before pruning.
        for i in 0..(MAX_NEIGHBORS + 20) {
            g.record(&["hub.rs".into(), format!("f{i}.rs")]);
        }
        assert!(g.related("hub.rs", 1000).len() <= MAX_NEIGHBORS);
    }
}