1use serde::{Deserialize, Serialize};
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct MemoryMapEntry {
40 pub id: String,
42 pub tier: String,
44 pub mem_type: String,
46 pub content_preview: String,
48 pub created_at: String,
50 pub access_count: u32,
52 pub coords_2d: (f32, f32),
54 pub top_neighbors: Vec<MemoryNeighbor>,
56}
57
58#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
60pub struct MemoryNeighbor {
61 pub id: String,
63 pub similarity: f32,
65}
66
67pub fn compute_pca_2d(embeddings: &[Vec<f32>]) -> Vec<(f32, f32)> {
85 let n = embeddings.len();
86 if n == 0 {
87 return Vec::new();
88 }
89 if n == 1 {
90 return vec![(0.0, 0.0)];
91 }
92
93 let sparse = build_sparse(embeddings);
96 if sparse.nnz == 0 {
97 return vec![(0.0, 0.0); n];
98 }
99 let d = sparse.d;
100
101 let means = column_means(&sparse, n, d);
104 let centered_sparse = Sparse::centered(&sparse, &means);
105
106 const POWER_ITERATIONS: usize = 20;
111
112 let v1 = power_iteration_sparse(¢ered_sparse, POWER_ITERATIONS);
113 let p1 = project_sparse(¢ered_sparse, &v1);
114 let v2 = power_iteration_deflated(¢ered_sparse, &p1, &v1, POWER_ITERATIONS);
116 let p2 = project_deflated(¢ered_sparse, &p1, &v1, &v2);
117
118 let coords: Vec<(f32, f32)> = p1
120 .iter()
121 .zip(p2.iter())
122 .map(|(a, b)| (*a as f32, *b as f32))
123 .collect();
124 normalize_to_unit_square(&coords)
125}
126
127pub fn compute_top_neighbors(
133 embeddings: &[Vec<f32>],
134 ids: &[String],
135 top_k: usize,
136 threshold: f32,
137) -> Vec<Vec<MemoryNeighbor>> {
138 debug_assert_eq!(embeddings.len(), ids.len());
139 let n = embeddings.len();
140 if n == 0 {
141 return Vec::new();
142 }
143 if n == 1 {
144 return vec![Vec::new()];
145 }
146
147 let s = build_sparse(embeddings);
148 if s.nnz == 0 {
149 return vec![Vec::new(); n];
150 }
151 let norms: Vec<f64> = s
153 .rows
154 .iter()
155 .map(|row| row.iter().map(|(_, v)| *v * *v).sum::<f64>().sqrt())
156 .collect();
157
158 let mut sorted_rows: Vec<Vec<(usize, f64)>> = s.rows.clone();
161 for r in &mut sorted_rows {
162 r.sort_by_key(|(j, _)| *j);
163 }
164
165 let mut out = Vec::with_capacity(n);
166 for i in 0..n {
167 let mut sims: Vec<(usize, f64)> = (0..n)
168 .filter(|&j| j != i)
169 .map(|j| {
170 let sim = if norms[i] == 0.0 || norms[j] == 0.0 {
171 0.0
172 } else {
173 sparse_dot(&sorted_rows[i], &sorted_rows[j]) / (norms[i] * norms[j])
174 };
175 (j, sim)
176 })
177 .filter(|(_, s)| *s >= threshold as f64)
178 .collect();
179 sims.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
180 sims.truncate(top_k);
181 out.push(
182 sims.into_iter()
183 .map(|(j, s)| MemoryNeighbor {
184 id: ids[j].clone(),
185 similarity: s as f32,
186 })
187 .collect(),
188 );
189 }
190 out
191}
192
193fn sparse_dot(a: &[(usize, f64)], b: &[(usize, f64)]) -> f64 {
196 let (mut i, mut j) = (0usize, 0usize);
197 let mut acc = 0.0_f64;
198 while i < a.len() && j < b.len() {
199 match a[i].0.cmp(&b[j].0) {
200 std::cmp::Ordering::Equal => {
201 acc += a[i].1 * b[j].1;
202 i += 1;
203 j += 1;
204 }
205 std::cmp::Ordering::Less => i += 1,
206 std::cmp::Ordering::Greater => j += 1,
207 }
208 }
209 acc
210}
211
212struct Sparse {
223 n: usize,
224 d: usize,
225 nnz: usize,
226 rows: Vec<Vec<(usize, f64)>>,
228 cols: Vec<Vec<(usize, f64)>>,
230}
231
232impl Sparse {
233 fn centered(&self, means: &[f64]) -> Self {
236 let rows: Vec<Vec<(usize, f64)>> = self
237 .rows
238 .iter()
239 .map(|row| row.iter().map(|&(j, v)| (j, v - means[j])).collect())
240 .collect();
241 let cols: Vec<Vec<(usize, f64)>> = self
242 .cols
243 .iter()
244 .enumerate()
245 .map(|(j, col)| col.iter().map(|&(i, v)| (i, v - means[j])).collect())
246 .collect();
247 Self {
248 n: self.n,
249 d: self.d,
250 nnz: self.nnz,
251 rows,
252 cols,
253 }
254 }
255}
256
257fn build_sparse(embeddings: &[Vec<f32>]) -> Sparse {
262 let n = embeddings.len();
263 let mut max_dim: usize = 0;
264 for emb in embeddings {
265 if emb.len() > max_dim {
266 max_dim = emb.len();
267 }
268 }
269 let d = max_dim;
270 let mut rows: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
271 let mut cols: Vec<Vec<(usize, f64)>> = vec![Vec::new(); d];
272 let mut nnz = 0usize;
273 for (i, emb) in embeddings.iter().enumerate() {
274 for (j, v) in emb.iter().enumerate() {
275 let val = *v as f64;
276 if val != 0.0 {
277 rows[i].push((j, val));
278 cols[j].push((i, val));
279 nnz += 1;
280 }
281 }
282 }
283 Sparse {
284 n,
285 d,
286 nnz,
287 rows,
288 cols,
289 }
290}
291
292fn column_means(s: &Sparse, n: usize, d: usize) -> Vec<f64> {
294 if n == 0 || d == 0 {
295 return vec![0.0_f64; d];
296 }
297 let mut means = vec![0.0_f64; d];
298 for (j, col) in s.cols.iter().enumerate() {
299 let mut s = 0.0_f64;
300 for &(_, v) in col {
301 s += v;
302 }
303 means[j] = s / n as f64;
304 }
305 means
306}
307
308fn matvec_rows(s: &Sparse, v: &[f64]) -> Vec<f64> {
310 let mut out = vec![0.0_f64; s.n];
311 for (i, row) in s.rows.iter().enumerate() {
312 let mut acc = 0.0_f64;
313 for &(j, val) in row {
314 acc += val * v[j];
315 }
316 out[i] = acc;
317 }
318 out
319}
320
321fn matvec_cols(s: &Sparse, u: &[f64]) -> Vec<f64> {
323 let mut out = vec![0.0_f64; s.d];
324 for (j, col) in s.cols.iter().enumerate() {
325 let mut acc = 0.0_f64;
326 for &(i, val) in col {
327 acc += val * u[i];
328 }
329 out[j] = acc;
330 }
331 out
332}
333
334fn power_iteration_sparse(s: &Sparse, iterations: usize) -> Vec<f64> {
337 if s.d == 0 {
338 return Vec::new();
339 }
340 let mut v: Vec<f64> = (0..s.d)
342 .map(|i| ((i + 1) as f64 * 0.137).sin() + 1.0)
343 .collect();
344 normalize(&mut v);
345
346 for _ in 0..iterations {
347 let av = matvec_rows(s, &v);
348 let ata_v = matvec_cols(s, &av);
349 let mut ata_v = ata_v;
350 normalize(&mut ata_v);
351 v = ata_v;
352 }
353 v
354}
355
356fn power_iteration_deflated(s: &Sparse, p: &[f64], v: &[f64], iterations: usize) -> Vec<f64> {
361 if s.d == 0 {
362 return Vec::new();
363 }
364 let mut w: Vec<f64> = (0..s.d)
365 .map(|i| ((i + 1) as f64 * 0.137).sin() + 1.0)
366 .collect();
367 normalize(&mut w);
368
369 for _ in 0..iterations {
370 let aw = matvec_rows(s, &w);
376 let vt_w: f64 = v.iter().zip(w.iter()).map(|(a, b)| *a * *b).sum();
377 let deflated_w: Vec<f64> = aw
378 .iter()
379 .zip(p.iter())
380 .map(|(a, b)| *a - *b * vt_w)
381 .collect();
382 let at_deflated = matvec_cols(s, &deflated_w);
383 let pt_deflated: f64 = p.iter().zip(deflated_w.iter()).map(|(a, b)| *a * *b).sum();
384 let mut new_w: Vec<f64> = at_deflated
385 .iter()
386 .zip(v.iter())
387 .map(|(a, b)| *a - *b * pt_deflated)
388 .collect();
389 normalize(&mut new_w);
390 w = new_w;
391 }
392 w
393}
394
395fn project_sparse(s: &Sparse, v: &[f64]) -> Vec<f64> {
397 matvec_rows(s, v)
398}
399
400fn project_deflated(s: &Sparse, p: &[f64], v1: &[f64], v2: &[f64]) -> Vec<f64> {
404 let av2 = matvec_rows(s, v2);
405 let v1t_v2: f64 = v1.iter().zip(v2.iter()).map(|(a, b)| *a * *b).sum();
406 av2.iter()
407 .zip(p.iter())
408 .map(|(a, b)| *a - *b * v1t_v2)
409 .collect()
410}
411
412fn normalize(v: &mut [f64]) {
413 let norm = v.iter().map(|x| *x * *x).sum::<f64>().sqrt();
414 if norm > 0.0 {
415 for x in v.iter_mut() {
416 *x /= norm;
417 }
418 }
419}
420
421fn normalize_to_unit_square(coords: &[(f32, f32)]) -> Vec<(f32, f32)> {
423 if coords.is_empty() {
424 return Vec::new();
425 }
426 let mut min_x = f32::INFINITY;
427 let mut max_x = f32::NEG_INFINITY;
428 let mut min_y = f32::INFINITY;
429 let mut max_y = f32::NEG_INFINITY;
430 for &(x, y) in coords {
431 if x < min_x {
432 min_x = x;
433 }
434 if x > max_x {
435 max_x = x;
436 }
437 if y < min_y {
438 min_y = y;
439 }
440 if y > max_y {
441 max_y = y;
442 }
443 }
444 let span_x = (max_x - min_x).max(f32::MIN_POSITIVE);
445 let span_y = (max_y - min_y).max(f32::MIN_POSITIVE);
446 let span = span_x.max(span_y);
447 if span <= 0.0 {
448 return coords.to_vec();
449 }
450 let cx = (min_x + max_x) / 2.0;
451 let cy = (min_y + max_y) / 2.0;
452 coords
453 .iter()
454 .map(|(x, y)| (((x - cx) / span), ((y - cy) / span)))
455 .collect()
456}
457
458#[cfg(test)]
463mod tests {
464 use super::*;
465
466 fn v(values: &[f32]) -> Vec<f32> {
468 values.to_vec()
469 }
470
471 #[test]
472 fn pca_two_clear_clusters_along_axes() {
473 let mut embs = Vec::new();
477 for i in 0..5 {
478 embs.push(v(&[10.0 + i as f32, 0.0, 0.0]));
479 }
480 for i in 0..5 {
481 embs.push(v(&[0.0, 10.0 + i as f32, 0.0]));
482 }
483 let coords = compute_pca_2d(&embs);
484 assert_eq!(coords.len(), 10);
485
486 let (cx1, cy1) = centroid(&coords[0..5]);
488 let (cx2, cy2) = centroid(&coords[5..10]);
489 let dist = ((cx1 - cx2).powi(2) + (cy1 - cy2).powi(2)).sqrt();
490 assert!(
491 dist > 0.5,
492 "clusters should be visually separated, got {dist}"
493 );
494 }
495
496 #[test]
497 fn pca_empty_input() {
498 let coords = compute_pca_2d(&[]);
499 assert!(coords.is_empty());
500 }
501
502 #[test]
503 fn pca_single_input_returns_origin() {
504 let coords = compute_pca_2d(&[v(&[1.0, 2.0, 3.0])]);
505 assert_eq!(coords, vec![(0.0, 0.0)]);
506 }
507
508 #[test]
509 fn pca_deterministic() {
510 let embs = vec![
512 v(&[1.0, 0.0, 0.0, 0.0]),
513 v(&[0.0, 1.0, 0.0, 0.0]),
514 v(&[0.0, 0.0, 1.0, 0.0]),
515 v(&[0.0, 0.0, 0.0, 1.0]),
516 v(&[1.0, 1.0, 0.0, 0.0]),
517 v(&[0.0, 0.0, 1.0, 1.0]),
518 ];
519 let a = compute_pca_2d(&embs);
520 let b = compute_pca_2d(&embs);
521 for (pa, pb) in a.iter().zip(b.iter()) {
522 assert!((pa.0 - pb.0).abs() < 1e-5);
523 assert!((pa.1 - pb.1).abs() < 1e-5);
524 }
525 }
526
527 #[test]
528 fn pca_handles_zero_vectors() {
529 let embs = vec![
531 v(&[0.0, 0.0, 0.0]),
532 v(&[1.0, 0.0, 0.0]),
533 v(&[0.0, 1.0, 0.0]),
534 ];
535 let coords = compute_pca_2d(&embs);
536 assert_eq!(coords.len(), 3);
537 for c in &coords {
538 assert!(c.0.is_finite() && c.1.is_finite());
539 }
540 }
541
542 #[test]
543 fn pca_handles_sparse_input() {
544 let embs = vec![
546 v(&[1.0, 0.0, 0.0, 0.0, 0.0]),
547 v(&[1.0, 0.0, 0.0, 0.0, 0.0]),
548 v(&[0.0, 0.0, 0.0, 0.0, 1.0]),
549 v(&[0.0, 0.0, 0.0, 0.0, 1.0]),
550 ];
551 let coords = compute_pca_2d(&embs);
552 assert_eq!(coords.len(), 4);
553 let d1 = dist(coords[0], coords[1]);
555 let d2 = dist(coords[2], coords[3]);
556 assert!(d1 < 1e-3, "identical pair must coincide, got {d1}");
557 assert!(d2 < 1e-3, "identical pair must coincide, got {d2}");
558 }
559
560 #[test]
561 fn neighbors_finds_nearest() {
562 let embs = vec![
563 v(&[1.0, 0.0, 0.0]), v(&[1.0, 0.0, 0.0]), v(&[0.0, 1.0, 0.0]), v(&[0.0, 0.0, 1.0]), ];
568 let ids: Vec<String> = (0..4).map(|i| format!("id{i}")).collect();
569 let nbrs = compute_top_neighbors(&embs, &ids, 2, 0.0);
570 assert_eq!(nbrs.len(), 4);
571
572 let top0 = &nbrs[0];
574 assert!(!top0.is_empty());
575 assert_eq!(top0[0].id, "id1");
576 assert!((top0[0].similarity - 1.0).abs() < 1e-4);
577 assert!(top0.iter().all(|n| n.id != "id0"));
579 }
580
581 #[test]
582 fn neighbors_threshold_filters() {
583 let embs = vec![
584 v(&[1.0, 0.0]), v(&[1.0, 0.0]), v(&[0.0, 1.0]), ];
588 let ids: Vec<String> = (0..3).map(|i| format!("id{i}")).collect();
589 let nbrs = compute_top_neighbors(&embs, &ids, 5, 0.5);
591 assert_eq!(nbrs[0].len(), 1);
592 assert_eq!(nbrs[0][0].id, "id1");
593 }
594
595 #[test]
596 fn neighbors_empty_input() {
597 let nbrs = compute_top_neighbors(&[], &[], 5, 0.0);
598 assert!(nbrs.is_empty());
599 }
600
601 #[test]
602 fn neighbors_single_input() {
603 let embs = vec![v(&[1.0, 0.0])];
604 let ids = vec!["only".to_string()];
605 let nbrs = compute_top_neighbors(&embs, &ids, 5, 0.0);
606 assert_eq!(nbrs, vec![Vec::<MemoryNeighbor>::new()]);
607 }
608
609 #[test]
610 fn map_entry_serializes_json() {
611 let entry = MemoryMapEntry {
612 id: "abc".into(),
613 tier: "hot".into(),
614 mem_type: "fact".into(),
615 content_preview: "hello world".into(),
616 created_at: "2026-06-04T00:00:00Z".into(),
617 access_count: 3,
618 coords_2d: (0.5, -0.5),
619 top_neighbors: vec![MemoryNeighbor {
620 id: "def".into(),
621 similarity: 0.87,
622 }],
623 };
624 let json = serde_json::to_string(&entry).unwrap();
625 assert!(json.contains("\"id\":\"abc\""));
626 assert!(json.contains("\"coords_2d\":[0.5,-0.5]"));
627 assert!(json.contains("\"similarity\":0.87"));
628 }
629
630 fn centroid(coords: &[(f32, f32)]) -> (f32, f32) {
633 let sx: f32 = coords.iter().map(|c| c.0).sum();
634 let sy: f32 = coords.iter().map(|c| c.1).sum();
635 (sx / coords.len() as f32, sy / coords.len() as f32)
636 }
637
638 fn dist(a: (f32, f32), b: (f32, f32)) -> f32 {
639 ((a.0 - b.0).powi(2) + (a.1 - b.1).powi(2)).sqrt()
640 }
641}