1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
//! Hebbian file co-access graph — "files that fire together, wire together".
//!
//! ## The idea (neuroscience → retrieval)
//!
//! Hebbian theory: synapses between co-active neurons strengthen (long-term
//! potentiation, LTP), while unused ones weaken (long-term depression / the
//! Ebbinghaus forgetting curve). We apply the same rule to files: whenever a
//! task surfaces a set of files *together*, we strengthen the association
//! between every pair; on each update all weights decay slightly, so stale
//! associations fade. Over time the graph learns the project's real working
//! paths — which the static import/AST graph cannot capture.
//!
//! The learned association is an additive retrieval signal: given a file the
//! agent is looking at, [`related`] returns the files history says tend to be
//! touched alongside it.
//!
//! The store is a small per-project JSON file; reads/writes are whole-file and
//! cheap because the graph is pruned aggressively (decay + min-weight + caps).
use std::collections::HashMap;
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
/// Multiplicative decay applied to every edge on each `record` — the forgetting
/// curve. 0.98 ⇒ an association roughly halves after ~34 un-reinforced updates.
const DECAY: f64 = 0.98;
/// Edges weaker than this are pruned (kept the graph small + relevant).
const MIN_WEIGHT: f64 = 0.08;
/// Potentiation increment for a co-access (LTP step).
const LTP_INCREMENT: f64 = 1.0;
/// Cap on neighbours kept per file (strongest retained) to bound memory.
const MAX_NEIGHBORS: usize = 32;
/// Cap on total tracked files; beyond it new files are still recorded but the
/// weakest-degree files are evicted to stay bounded.
const MAX_FILES: usize = 5_000;
/// A single record never associates more than this many files (avoids O(n²)
/// blow-ups when a tool surfaces a huge file set).
const MAX_RECORD_FILES: usize = 24;
/// Persistent, decaying co-access graph for one project.
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct CoAccessGraph {
/// `file → (neighbour → weight)`. Symmetric by construction.
edges: HashMap<String, HashMap<String, f64>>,
}
impl CoAccessGraph {
/// Reinforce the mutual association of every pair in `files` (LTP) after
/// decaying the whole graph one step (global forgetting). Self-pairs and
/// duplicates are ignored. Bounded work: at most `MAX_RECORD_FILES²` pairs.
pub fn record(&mut self, files: &[String]) {
// Distinct, capped input.
let mut uniq: Vec<&String> = Vec::new();
for f in files {
if !f.is_empty() && !uniq.contains(&f) {
uniq.push(f);
if uniq.len() >= MAX_RECORD_FILES {
break;
}
}
}
if uniq.len() < 2 {
return; // nothing to associate
}
self.decay_all();
for i in 0..uniq.len() {
for j in (i + 1)..uniq.len() {
self.bump(uniq[i], uniq[j]);
self.bump(uniq[j], uniq[i]);
}
}
self.prune();
}
/// Files most strongly associated with `file`, strongest first.
pub fn related(&self, file: &str, top_k: usize) -> Vec<(String, f64)> {
let Some(neighbours) = self.edges.get(file) else {
return Vec::new();
};
let mut v: Vec<(String, f64)> = neighbours.iter().map(|(k, &w)| (k.clone(), w)).collect();
v.sort_by(|a, b| b.1.total_cmp(&a.1));
v.truncate(top_k);
v
}
fn bump(&mut self, from: &str, to: &str) {
let entry = self.edges.entry(from.to_string()).or_default();
*entry.entry(to.to_string()).or_insert(0.0) += LTP_INCREMENT;
}
fn decay_all(&mut self) {
for neighbours in self.edges.values_mut() {
for w in neighbours.values_mut() {
*w *= DECAY;
}
}
}
fn prune(&mut self) {
for neighbours in self.edges.values_mut() {
neighbours.retain(|_, &mut w| w >= MIN_WEIGHT);
if neighbours.len() > MAX_NEIGHBORS {
let mut kept: Vec<(String, f64)> =
neighbours.iter().map(|(k, &w)| (k.clone(), w)).collect();
kept.sort_by(|a, b| b.1.total_cmp(&a.1));
kept.truncate(MAX_NEIGHBORS);
*neighbours = kept.into_iter().collect();
}
}
self.edges.retain(|_, neighbours| !neighbours.is_empty());
if self.edges.len() > MAX_FILES {
// Evict the lowest-degree files (least-connected memories).
let mut by_degree: Vec<(String, usize)> = self
.edges
.iter()
.map(|(k, n)| (k.clone(), n.len()))
.collect();
by_degree.sort_by_key(|(_, d)| *d);
let evict = self.edges.len() - MAX_FILES;
for (file, _) in by_degree.into_iter().take(evict) {
self.edges.remove(&file);
}
}
}
}
// ── Persistence (one small JSON file per project) ──────────────────────────
fn store_path(project_root: &str) -> Option<PathBuf> {
let normalized = crate::core::graph_index::normalize_project_root(project_root);
let hash = crate::core::project_hash::hash_project_root(&normalized);
crate::core::data_dir::lean_ctx_data_dir()
.ok()
.map(|d| d.join("cooccurrence").join(format!("{hash}.json")))
}
/// Load the co-access graph for `project_root` (empty if none / unreadable).
pub fn load(project_root: &str) -> CoAccessGraph {
let Some(path) = store_path(project_root) else {
return CoAccessGraph::default();
};
std::fs::read_to_string(&path)
.ok()
.and_then(|s| serde_json::from_str(&s).ok())
.unwrap_or_default()
}
fn save(project_root: &str, graph: &CoAccessGraph) {
let Some(path) = store_path(project_root) else {
return;
};
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
if let Ok(json) = serde_json::to_string(graph) {
let _ = std::fs::write(&path, json);
}
}
/// Record that `files` were accessed together for one task, persisting the
/// reinforced graph. No-op for fewer than two distinct files.
pub fn record_access(project_root: &str, files: &[String]) {
if files.len() < 2 {
return;
}
let mut graph = load(project_root);
graph.record(files);
save(project_root, &graph);
}
/// Files historically co-accessed with `file`, strongest association first.
pub fn related(project_root: &str, file: &str, top_k: usize) -> Vec<(String, f64)> {
load(project_root).related(file, top_k)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn co_access_strengthens_association() {
let mut g = CoAccessGraph::default();
g.record(&["a.rs".into(), "b.rs".into()]);
let rel = g.related("a.rs", 5);
assert_eq!(rel.len(), 1);
assert_eq!(rel[0].0, "b.rs");
assert!(rel[0].1 > 0.0);
}
#[test]
fn repeated_co_access_outweighs_single() {
let mut g = CoAccessGraph::default();
for _ in 0..5 {
g.record(&["x.rs".into(), "y.rs".into()]);
}
g.record(&["x.rs".into(), "z.rs".into()]);
let rel = g.related("x.rs", 5);
// y was reinforced 5×, z once → y must rank first.
assert_eq!(rel[0].0, "y.rs");
assert!(rel.iter().any(|(f, _)| f == "z.rs"));
assert!(rel[0].1 > rel.iter().find(|(f, _)| f == "z.rs").unwrap().1);
}
#[test]
fn weak_associations_are_pruned_by_decay() {
let mut g = CoAccessGraph::default();
g.record(&["a.rs".into(), "b.rs".into()]);
// Hammer an unrelated pair so the a–b edge decays below MIN_WEIGHT.
for _ in 0..400 {
g.record(&["c.rs".into(), "d.rs".into()]);
}
assert!(
g.related("a.rs", 5).is_empty(),
"decayed association should be pruned"
);
assert!(!g.related("c.rs", 5).is_empty());
}
#[test]
fn single_file_record_is_noop() {
let mut g = CoAccessGraph::default();
g.record(&["lonely.rs".into()]);
assert!(g.related("lonely.rs", 5).is_empty());
}
#[test]
fn association_is_symmetric() {
let mut g = CoAccessGraph::default();
g.record(&["one.rs".into(), "two.rs".into()]);
assert_eq!(g.related("one.rs", 5)[0].0, "two.rs");
assert_eq!(g.related("two.rs", 5)[0].0, "one.rs");
}
#[test]
fn serializes_round_trip() {
// Deterministic: exercises the persistence *format* (the on-disk path
// uses this same serde round-trip) without touching the process-global
// data-dir env var, which other tests mutate concurrently.
let mut g = CoAccessGraph::default();
g.record(&["alpha.rs".into(), "beta.rs".into()]);
let json = serde_json::to_string(&g).unwrap();
let restored: CoAccessGraph = serde_json::from_str(&json).unwrap();
let rel = restored.related("alpha.rs", 5);
assert_eq!(rel.len(), 1);
assert_eq!(rel[0].0, "beta.rs");
}
#[test]
fn neighbours_are_capped() {
let mut g = CoAccessGraph::default();
// Pair one hub file with many distinct others across separate records
// so its neighbour set exceeds the cap before pruning.
for i in 0..(MAX_NEIGHBORS + 20) {
g.record(&["hub.rs".into(), format!("f{i}.rs")]);
}
assert!(g.related("hub.rs", 1000).len() <= MAX_NEIGHBORS);
}
}