1use chrono::{DateTime, Utc};
6use petgraph::stable_graph::{NodeIndex, StableGraph};
7use petgraph::visit::EdgeRef;
8use petgraph::Direction;
9use std::collections::{HashMap, HashSet, VecDeque};
10
11pub type Layer = u8;
12
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
20pub enum Partition {
21 Project,
23 Foreign { source_repo: String, commit: String },
26}
27
28impl Partition {
29 pub fn is_project(&self) -> bool {
30 matches!(self, Partition::Project)
31 }
32 pub fn is_foreign(&self) -> bool {
33 matches!(self, Partition::Foreign { .. })
34 }
35}
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
38pub enum CodeLanguage {
39 Rust,
40 Python,
41 JavaScript,
42 TypeScript,
43 Go,
44 Java,
45 Cpp,
46 Shell,
47 Sql,
48 Unknown,
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
52pub enum ContentType {
53 #[default]
54 NaturalLanguage,
55 Code(CodeLanguage),
56 StructuredData,
57}
58
59impl ContentType {
60 pub fn is_code(&self) -> bool {
62 matches!(self, ContentType::Code(_))
63 }
64
65 pub fn language(&self) -> Option<CodeLanguage> {
67 match self {
68 ContentType::Code(lang) => Some(*lang),
69 _ => None,
70 }
71 }
72
73 pub fn from_label(s: &str) -> Self {
76 match s {
77 "code" => ContentType::Code(CodeLanguage::Unknown),
78 "structured_data" => ContentType::StructuredData,
79 _ => ContentType::NaturalLanguage,
80 }
81 }
82
83 pub fn as_label(&self) -> &'static str {
85 match self {
86 ContentType::NaturalLanguage => "natural_language",
87 ContentType::Code(_) => "code",
88 ContentType::StructuredData => "structured_data",
89 }
90 }
91}
92
93pub fn detect_content_type(text: &str) -> ContentType {
96 let trimmed = text.trim();
97
98 if trimmed.starts_with('{') || trimmed.starts_with('[') {
100 if trimmed.ends_with('}') || trimmed.ends_with(']') {
101 return ContentType::StructuredData;
102 }
103 }
104 if trimmed.starts_with("---") {
105 return ContentType::StructuredData;
106 }
107 let lines: Vec<&str> = trimmed.lines().collect();
109 if lines.len() >= 2 {
110 let kv_count = lines
111 .iter()
112 .filter(|l| {
113 let l = l.trim();
114 if l.is_empty() || l.starts_with('#') || l.starts_with("//") {
115 return false;
116 }
117 if let Some(pos) = l.find(" = ") {
118 return pos < 30 && !l[..pos].contains(' ');
119 }
120 if let Some(pos) = l.find(": ") {
121 return pos < 30 && !l[..pos].contains(' ');
122 }
123 false
124 })
125 .count();
126 if kv_count as f32 / lines.len() as f32 > 0.6 {
127 return ContentType::StructuredData;
128 }
129 }
130
131 if let Some(lang) = detect_code_language(text) {
133 return ContentType::Code(lang);
134 }
135
136 ContentType::NaturalLanguage
137}
138
139fn detect_code_language(text: &str) -> Option<CodeLanguage> {
142 let rust_markers: &[&str] = &[
144 "fn ", "pub fn", "impl ", "trait ", "mod ", "#[", "let mut ", "::",
145 ];
146 let python_markers: &[&str] = &[
147 "def ",
148 "async def",
149 "elif ",
150 "self.",
151 "import ",
152 "__init__",
153 "None",
154 "True",
155 "False",
156 ];
157 let js_markers: &[&str] = &[
158 "function ",
159 "const ",
160 "let ",
161 "var ",
162 "=> ",
163 "===",
164 "!==",
165 "require(",
166 "console.",
167 ];
168 let ts_markers: &[&str] = &[
169 "interface ",
170 ": string",
171 ": number",
172 ": boolean",
173 "readonly ",
174 "as const",
175 ];
176 let go_markers: &[&str] = &["func ", "package ", "go func", ":= ", "fmt."];
177 let java_markers: &[&str] = &[
178 "public class",
179 "private ",
180 "protected ",
181 "System.out",
182 "@Override",
183 ];
184 let cpp_markers: &[&str] = &["#include", "std::", "cout", "nullptr", "template<"];
185 let shell_markers: &[&str] = &["#!/bin", "echo ", "fi\n", "done\n", "esac", "$(", "export "];
186 let sql_markers: &[&str] = &[
187 "SELECT ",
188 "INSERT ",
189 "UPDATE ",
190 "DELETE ",
191 "CREATE TABLE",
192 "ALTER TABLE",
193 "JOIN ",
194 ];
195
196 let count = |markers: &[&str]| markers.iter().filter(|m| text.contains(**m)).count();
197
198 let rust = count(rust_markers);
199 let python = count(python_markers);
200 let js = count(js_markers);
201 let ts = count(ts_markers);
202 let go = count(go_markers);
203 let java = count(java_markers);
204 let cpp = count(cpp_markers);
205 let shell = count(shell_markers);
206 let sql = count(sql_markers);
207
208 let scores = [
210 (rust, CodeLanguage::Rust),
211 (python, CodeLanguage::Python),
212 (js, CodeLanguage::JavaScript),
213 (ts, CodeLanguage::TypeScript),
214 (go, CodeLanguage::Go),
215 (java, CodeLanguage::Java),
216 (cpp, CodeLanguage::Cpp),
217 (shell, CodeLanguage::Shell),
218 (sql, CodeLanguage::Sql),
219 ];
220
221 let (best_score, best_lang) = scores
222 .iter()
223 .max_by_key(|(score, _)| *score)
224 .copied()
225 .unwrap();
226
227 if best_score >= 2 {
228 return Some(best_lang);
229 }
230
231 let has_braces = text.contains('{') && text.contains('}');
233 let has_parens_semi = text.contains('(') && text.contains(';');
234 let structural = has_braces as usize + has_parens_semi as usize;
235
236 let generic_strong: &[&str] = &["fn ", "def ", "impl ", "struct ", "enum ", "=>", "->"];
238 let generic_weak: &[&str] = &[
239 "function ",
240 "class ",
241 "import ",
242 "from ",
243 "const ",
244 "let ",
245 "var ",
246 "export ",
247 "pub ",
248 "use ",
249 ];
250 let strong = count(generic_strong);
251 let weak = count(generic_weak);
252 let total = strong + weak;
253
254 let is_code = (strong >= 2)
255 || (strong >= 1 && (structural > 0 || weak >= 1))
256 || (weak >= 2 && structural > 0)
257 || (total >= 3 && structural > 0);
258
259 if is_code {
260 if best_score >= 1 {
262 Some(best_lang)
263 } else {
264 Some(CodeLanguage::Unknown)
265 }
266 } else {
267 None
268 }
269}
270
271#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
272pub enum MemKind {
273 Identity,
274 Fact,
275 FactSuperseded,
276 Conclusion,
279 ConclusionInvalidated,
281 Skill,
282 SkillDeprecated,
283 Conversation,
284 ConversationSummary,
286 Environment,
287 Model,
289 CodeSymbol,
292}
293
294#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
295pub enum EdgeKind {
296 Supersedes, DependsOn, RelatedTo, Triggers, TemporalNext, Calls, DefinedIn, Imports, CitesPremise, }
306
307#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
309pub struct CodeSymbolMeta {
310 pub name: String,
311 pub kind: String, pub signature: String,
313 pub file_path: String,
314 pub start_line: u32,
315 pub end_line: u32,
316 pub doc_comment: Option<String>,
317 pub parent: Option<String>,
318}
319
320impl CodeSymbolMeta {
321 pub fn from_node(node: &MemNode) -> Option<Self> {
322 if node.kind != MemKind::CodeSymbol {
323 return None;
324 }
325 serde_json::from_str(&node.value).ok()
326 }
327 pub fn encode(&self) -> String {
328 serde_json::to_string(self).expect("CodeSymbolMeta serializable")
329 }
330}
331
332#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
337#[serde(rename_all = "snake_case")]
338pub enum SkillScope {
339 Global,
341 Domain(String),
343}
344
345impl Default for SkillScope {
346 fn default() -> Self {
347 SkillScope::Global
348 }
349}
350
351#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default, PartialEq)]
372pub struct SkillTrigger {
373 #[serde(default)]
374 pub persona: String,
375 #[serde(default)]
376 pub url_pattern: String,
377 #[serde(default)]
378 pub task_keywords: Vec<String>,
379 #[serde(default, skip_serializing_if = "Option::is_none")]
385 pub structured: Option<StructuredTrigger>,
386}
387
388#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
390pub struct StructuredTrigger {
391 pub kind: String,
392 pub signature: serde_json::Value,
393}
394
395#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default, PartialEq)]
396pub struct SkillStats {
397 pub success_count: u64,
398 pub fail_count: u64,
399 pub degraded: bool,
400 pub broken_for_repair: bool,
401 #[serde(default, skip_serializing_if = "Option::is_none")]
406 pub completion_rate: Option<f64>,
407 #[serde(default, skip_serializing_if = "Option::is_none")]
411 pub complaint_count: Option<u64>,
412 #[serde(default, skip_serializing_if = "Option::is_none")]
415 pub last_used_at: Option<DateTime<Utc>>,
416}
417
418impl SkillStats {
419 pub fn should_degrade(&self, threshold: u64) -> bool {
420 self.fail_count > self.success_count + threshold
421 }
422 pub fn success_ratio(&self) -> f64 {
423 let total = self.success_count + self.fail_count;
424 if total == 0 {
425 return 0.5;
426 }
427 self.success_count as f64 / total as f64
428 }
429}
430
431#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
432#[serde(rename_all = "snake_case")]
433pub enum SkillOutcome {
434 Success,
435 Fail,
436}
437
438#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
439pub struct SkillMeta {
440 pub name: String,
441 pub code: String, pub platform: String, pub description: String,
444 pub trigger: SkillTrigger,
445 #[serde(default)]
447 pub scope: SkillScope,
448 #[serde(default)]
450 pub when_to_apply: String,
451 #[serde(default)]
452 pub stats: SkillStats,
453 #[serde(default = "default_skill_version")]
454 pub version: u64,
455 #[serde(default)]
464 pub tenant_id: Option<String>,
465}
466
467fn default_skill_version() -> u64 {
468 1
469}
470
471impl SkillMeta {
472 pub fn from_node(node: &MemNode) -> Option<Self> {
473 if !matches!(node.kind, MemKind::Skill | MemKind::SkillDeprecated) {
474 return None;
475 }
476 serde_json::from_str(&node.value).ok()
477 }
478 pub fn encode(&self) -> String {
479 serde_json::to_string(self).expect("SkillMeta serializable")
480 }
481}
482
483#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
485pub struct Provenance {
486 pub source: String, pub reference: String, #[serde(default)]
489 pub date: Option<DateTime<Utc>>,
490}
491
492#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
495pub struct FactMetadata {
496 #[serde(default)]
498 pub confidence: String,
499 #[serde(default)]
501 pub provenance: Vec<Provenance>,
502 #[serde(default)]
504 pub affected_files: Vec<String>,
505 #[serde(default)]
507 pub tags: Vec<String>,
508 #[serde(default)]
510 pub category: String,
511 #[serde(default)]
513 pub usage_count: u64,
514 #[serde(default)]
516 pub helpful_count: u64,
517 #[serde(default)]
519 pub outdated_reports: u64,
520 #[serde(default)]
531 pub tenant_id: Option<String>,
532}
533
534impl FactMetadata {
535 pub fn is_empty(&self) -> bool {
536 self.confidence.is_empty()
537 && self.provenance.is_empty()
538 && self.affected_files.is_empty()
539 && self.tags.is_empty()
540 && self.category.is_empty()
541 }
542
543 pub fn staleness_ratio(&self) -> f64 {
545 if self.usage_count == 0 {
546 return 0.0;
547 }
548 self.outdated_reports as f64 / self.usage_count as f64
549 }
550
551 pub fn helpfulness_ratio(&self) -> f64 {
553 if self.usage_count == 0 {
554 return 0.5;
555 } self.helpful_count as f64 / self.usage_count as f64
557 }
558}
559
560#[derive(Debug, Clone)]
561pub struct MemNode {
562 pub kind: MemKind,
563 pub layer: Layer,
564 pub key: String,
565 pub value: String,
566 pub fact_id: Option<String>,
567 pub scope: String,
568 pub authority: String,
569 pub is_constraint: bool,
570 pub created_at: DateTime<Utc>,
571 pub expires_at: Option<DateTime<Utc>>,
572 pub content_type: ContentType,
573 pub metadata: FactMetadata,
575}
576
577impl MemNode {
578 pub fn token_estimate(&self) -> usize {
579 std::cmp::max(1, self.value.len() / 4)
580 }
581
582 pub fn is_valid(&self) -> bool {
583 !matches!(
584 self.kind,
585 MemKind::FactSuperseded | MemKind::SkillDeprecated | MemKind::ConclusionInvalidated
586 )
587 }
588}
589
590#[derive(Debug, Clone)]
591pub struct MemEdge {
592 pub kind: EdgeKind,
593 pub weight: f32,
594 pub created_at: DateTime<Utc>,
595}
596
597#[derive(Debug, Clone)]
598pub struct RetrievalHit {
599 pub node_ix: NodeIndex,
600 pub activation: f32,
601 pub hops: usize,
602}
603
604pub struct MemoryGraph {
606 pub inner: StableGraph<MemNode, MemEdge>,
607 by_fact_id: HashMap<String, NodeIndex>,
608 by_key: HashMap<String, Vec<NodeIndex>>,
609 by_layer: [Vec<NodeIndex>; 4],
610 last_conversation: Option<NodeIndex>,
611 partitions: HashMap<NodeIndex, Partition>,
614 foreign_by_fact_id: HashMap<String, NodeIndex>,
621}
622
623impl MemoryGraph {
624 pub fn new() -> Self {
625 Self {
626 inner: StableGraph::new(),
627 by_fact_id: HashMap::new(),
628 by_key: HashMap::new(),
629 by_layer: [Vec::new(), Vec::new(), Vec::new(), Vec::new()],
630 last_conversation: None,
631 partitions: HashMap::new(),
632 foreign_by_fact_id: HashMap::new(),
633 }
634 }
635
636 pub fn partition_of(&self, nix: NodeIndex) -> Partition {
639 self.partitions
640 .get(&nix)
641 .cloned()
642 .unwrap_or(Partition::Project)
643 }
644
645 pub fn insert_partitioned(&mut self, node: MemNode, partition: Partition) -> NodeIndex {
650 if let Partition::Foreign { .. } = &partition {
651 if let Some(fid) = node.fact_id.as_deref() {
652 if let Some(&existing) = self.foreign_by_fact_id.get(fid) {
653 return existing;
654 }
655 }
656 let fid = node.fact_id.clone();
657 let nix = self.insert(node);
658 if let Some(fid) = fid {
659 self.foreign_by_fact_id.insert(fid, nix);
660 }
661 self.partitions.insert(nix, partition);
662 return nix;
663 }
664 self.insert(node)
665 }
666
667 pub fn insert_foreign(
669 &mut self,
670 source_repo: impl Into<String>,
671 commit: impl Into<String>,
672 node: MemNode,
673 ) -> NodeIndex {
674 self.insert_partitioned(
675 node,
676 Partition::Foreign {
677 source_repo: source_repo.into(),
678 commit: commit.into(),
679 },
680 )
681 }
682
683 pub fn insert(&mut self, node: MemNode) -> NodeIndex {
685 let layer_idx = (node.layer as usize).saturating_sub(1).min(3);
686 let fact_id = node.fact_id.clone();
687 let key = node.key.clone();
688 let is_conv = matches!(
689 node.kind,
690 MemKind::Conversation | MemKind::ConversationSummary
691 );
692
693 let nix = self.inner.add_node(node);
694
695 if let Some(fid) = fact_id {
696 self.by_fact_id.insert(fid, nix);
697 }
698 self.by_key.entry(key).or_default().push(nix);
699 self.by_layer[layer_idx].push(nix);
700
701 if is_conv {
703 if let Some(prev) = self.last_conversation {
704 self.inner.add_edge(
705 prev,
706 nix,
707 MemEdge {
708 kind: EdgeKind::TemporalNext,
709 weight: 1.0,
710 created_at: Utc::now(),
711 },
712 );
713 }
714 self.last_conversation = Some(nix);
715 }
716
717 nix
718 }
719
720 pub fn link(&mut self, from: NodeIndex, to: NodeIndex, kind: EdgeKind, weight: f32) {
722 self.inner.add_edge(
723 from,
724 to,
725 MemEdge {
726 kind,
727 weight,
728 created_at: Utc::now(),
729 },
730 );
731 }
732
733 pub fn supersede(&mut self, new_nix: NodeIndex, old_fact_id: &str) -> HashSet<NodeIndex> {
736 let mut invalidated = HashSet::new();
737
738 if let Some(&old_nix) = self.by_fact_id.get(old_fact_id) {
739 if let Some(old_node) = self.inner.node_weight_mut(old_nix) {
741 old_node.kind = MemKind::FactSuperseded;
742 }
743 self.link(new_nix, old_nix, EdgeKind::Supersedes, 1.0);
745 invalidated.insert(old_nix);
746
747 let dependents: Vec<NodeIndex> = self
749 .inner
750 .neighbors_directed(old_nix, Direction::Incoming)
751 .filter(|&n| {
752 self.inner
753 .edges_connecting(n, old_nix)
754 .any(|e| e.weight().kind == EdgeKind::DependsOn)
755 })
756 .collect();
757
758 for dep in dependents {
759 invalidated.insert(dep);
760 }
761
762 let citing_conclusions: Vec<NodeIndex> = self
764 .inner
765 .neighbors_directed(old_nix, Direction::Incoming)
766 .filter(|&n| {
767 self.inner
768 .edges_connecting(n, old_nix)
769 .any(|e| e.weight().kind == EdgeKind::CitesPremise)
770 })
771 .collect();
772
773 for conc_nix in citing_conclusions {
774 if let Some(conc_node) = self.inner.node_weight_mut(conc_nix) {
775 if conc_node.kind == MemKind::Conclusion {
776 conc_node.kind = MemKind::ConclusionInvalidated;
777 invalidated.insert(conc_nix);
778 }
779 }
780 }
781 }
782
783 invalidated
784 }
785
786 pub fn get_by_fact_id(&self, fact_id: &str) -> Option<(NodeIndex, &MemNode)> {
788 self.by_fact_id
789 .get(fact_id)
790 .and_then(|&nix| self.inner.node_weight(nix).map(|n| (nix, n)))
791 }
792
793 pub fn valid_facts(&self) -> Vec<(NodeIndex, &MemNode)> {
795 self.by_layer[1]
796 .iter() .filter_map(|&nix| {
798 self.inner
799 .node_weight(nix)
800 .filter(|n| n.kind == MemKind::Fact)
801 .map(|n| (nix, n))
802 })
803 .collect()
804 }
805
806 pub fn constraints(&self) -> Vec<(NodeIndex, &MemNode)> {
808 self.valid_facts()
809 .into_iter()
810 .filter(|(_, n)| n.is_constraint)
811 .collect()
812 }
813
814 pub fn nodes_by_layer(&self, layer: Layer) -> Vec<(NodeIndex, &MemNode)> {
816 let idx = (layer as usize).saturating_sub(1).min(3);
817 self.by_layer[idx]
818 .iter()
819 .filter_map(|&nix| self.inner.node_weight(nix).map(|n| (nix, n)))
820 .collect()
821 }
822
823 pub fn retrieve_ppr(
833 &self,
834 seeds: &[NodeIndex],
835 seed_weights: Option<&[f32]>,
836 damping: f32,
837 max_results: usize,
838 ) -> Vec<RetrievalHit> {
839 self.retrieve_ppr_inner(seeds, seed_weights, damping, max_results, true)
840 }
841
842 pub fn retrieve_ppr_cross_partition(
845 &self,
846 seeds: &[NodeIndex],
847 seed_weights: Option<&[f32]>,
848 damping: f32,
849 max_results: usize,
850 ) -> Vec<RetrievalHit> {
851 self.retrieve_ppr_inner(seeds, seed_weights, damping, max_results, false)
852 }
853
854 fn retrieve_ppr_inner(
855 &self,
856 seeds: &[NodeIndex],
857 seed_weights: Option<&[f32]>,
858 damping: f32,
859 max_results: usize,
860 apply_partition_filter: bool,
861 ) -> Vec<RetrievalHit> {
862 if seeds.is_empty() {
863 return Vec::new();
864 }
865
866 let all_nodes: Vec<NodeIndex> = self.inner.node_indices().collect();
867 let n = all_nodes.len();
868 if n == 0 {
869 return Vec::new();
870 }
871
872 let pos: HashMap<NodeIndex, usize> = all_nodes
874 .iter()
875 .enumerate()
876 .map(|(i, &nix)| (nix, i))
877 .collect();
878
879 let mut reset = vec![0.0f32; n];
881 let mut total_weight = 0.0f32;
882 for (i, &seed) in seeds.iter().enumerate() {
883 if let Some(&p) = pos.get(&seed) {
884 let w = seed_weights
885 .and_then(|sw| sw.get(i).copied())
886 .unwrap_or(1.0);
887 reset[p] = w;
888 total_weight += w;
889 }
890 }
891 if total_weight > 0.0 {
892 for v in &mut reset {
893 *v /= total_weight;
894 }
895 }
896
897 let cap = 0.4f32;
899 let mut needs_renorm = false;
900 for v in &mut reset {
901 if *v > cap {
902 *v = cap;
903 needs_renorm = true;
904 }
905 }
906 if needs_renorm {
907 let sum: f32 = reset.iter().sum();
908 if sum > 0.0 {
909 for v in &mut reset {
910 *v /= sum;
911 }
912 }
913 }
914
915 let mut scores = reset.clone();
917
918 let edge_mult = |kind: EdgeKind| -> f32 {
920 match kind {
921 EdgeKind::Supersedes => 0.3,
922 EdgeKind::DependsOn => 0.8,
923 EdgeKind::RelatedTo => 0.6,
924 EdgeKind::Triggers => 0.9,
925 EdgeKind::TemporalNext => 0.5,
926 EdgeKind::Calls => 0.85,
927 EdgeKind::DefinedIn => 0.9,
928 EdgeKind::Imports => 0.4,
929 EdgeKind::CitesPremise => 0.85,
930 }
931 };
932
933 let mut out_weights: Vec<f32> = vec![0.0; n];
935 for (i, &nix) in all_nodes.iter().enumerate() {
936 let mut total = 0.0f32;
937 for e in self.inner.edges(nix) {
938 total += e.weight().weight * edge_mult(e.weight().kind);
939 }
940 for e in self.inner.edges_directed(nix, Direction::Incoming) {
942 let k = e.weight().kind;
943 if matches!(
944 k,
945 EdgeKind::RelatedTo
946 | EdgeKind::DependsOn
947 | EdgeKind::Triggers
948 | EdgeKind::Calls
949 | EdgeKind::DefinedIn
950 | EdgeKind::Imports
951 ) {
952 total += e.weight().weight * edge_mult(k);
953 }
954 }
955 out_weights[i] = total;
956 }
957
958 let max_iters = 30;
960 let epsilon = 1e-6;
961
962 let mut new_scores = vec![0.0f32; n];
963
964 for _iter in 0..max_iters {
965 new_scores.fill(0.0);
966
967 for i in 0..n {
969 new_scores[i] += (1.0 - damping) * reset[i];
970 }
971
972 for (i, &nix) in all_nodes.iter().enumerate() {
974 if scores[i] < epsilon {
975 continue;
976 }
977 let out_w = out_weights[i];
978 if out_w == 0.0 {
979 continue;
980 }
981
982 for e in self.inner.edges(nix) {
984 if let Some(&j) = pos.get(&e.target()) {
985 let w = e.weight().weight * edge_mult(e.weight().kind);
986 new_scores[j] += damping * scores[i] * w / out_w;
987 }
988 }
989 for e in self.inner.edges_directed(nix, Direction::Incoming) {
991 let k = e.weight().kind;
992 if !matches!(
993 k,
994 EdgeKind::RelatedTo
995 | EdgeKind::DependsOn
996 | EdgeKind::Triggers
997 | EdgeKind::Calls
998 | EdgeKind::DefinedIn
999 | EdgeKind::Imports
1000 ) {
1001 continue;
1002 }
1003 if let Some(&j) = pos.get(&e.source()) {
1004 let w = e.weight().weight * edge_mult(k);
1005 new_scores[j] += damping * scores[i] * w / out_w;
1006 }
1007 }
1008 }
1009
1010 let delta: f32 = scores
1012 .iter()
1013 .zip(new_scores.iter())
1014 .map(|(a, b)| (a - b).abs())
1015 .sum();
1016 std::mem::swap(&mut scores, &mut new_scores);
1017 if delta < epsilon {
1018 break;
1019 }
1020 }
1021
1022 let mut hits: Vec<RetrievalHit> = all_nodes
1024 .iter()
1025 .enumerate()
1026 .filter_map(|(i, &nix)| {
1027 let node = self.inner.node_weight(nix)?;
1028 if !node.is_valid() {
1029 return None;
1030 }
1031 if scores[i] < epsilon {
1032 return None;
1033 }
1034 let hops = 0;
1037 Some(RetrievalHit {
1038 node_ix: nix,
1039 activation: scores[i],
1040 hops,
1041 })
1042 })
1043 .collect();
1044
1045 hits.sort_by(|a, b| {
1046 b.activation
1047 .partial_cmp(&a.activation)
1048 .unwrap_or(std::cmp::Ordering::Equal)
1049 });
1050 if apply_partition_filter {
1051 self.filter_same_partition(&mut hits, seeds);
1052 }
1053 hits.truncate(max_results);
1054 hits
1055 }
1056
1057 fn filter_same_partition(&self, hits: &mut Vec<RetrievalHit>, seeds: &[NodeIndex]) {
1060 if seeds.is_empty() {
1061 return;
1062 }
1063 if self.partitions.is_empty() {
1065 return;
1066 }
1067 let seed_parts: HashSet<Partition> = seeds.iter().map(|&n| self.partition_of(n)).collect();
1068 hits.retain(|h| seed_parts.contains(&self.partition_of(h.node_ix)));
1069 }
1070
1071 pub fn retrieve(
1076 &self,
1077 seeds: &[NodeIndex],
1078 max_hops: usize,
1079 max_results: usize,
1080 decay: f32,
1081 min_activation: f32,
1082 ) -> Vec<RetrievalHit> {
1083 self.retrieve_inner(seeds, max_hops, max_results, decay, min_activation, true)
1084 }
1085
1086 pub fn retrieve_cross_partition(
1088 &self,
1089 seeds: &[NodeIndex],
1090 max_hops: usize,
1091 max_results: usize,
1092 decay: f32,
1093 min_activation: f32,
1094 ) -> Vec<RetrievalHit> {
1095 self.retrieve_inner(seeds, max_hops, max_results, decay, min_activation, false)
1096 }
1097
1098 fn retrieve_inner(
1099 &self,
1100 seeds: &[NodeIndex],
1101 max_hops: usize,
1102 max_results: usize,
1103 decay: f32,
1104 min_activation: f32,
1105 apply_partition_filter: bool,
1106 ) -> Vec<RetrievalHit> {
1107 let mut activations: HashMap<NodeIndex, f32> = HashMap::new();
1108 let mut hops_map: HashMap<NodeIndex, usize> = HashMap::new();
1109 let mut queue: VecDeque<(NodeIndex, f32, usize)> = VecDeque::new();
1110
1111 for &seed in seeds {
1112 activations.insert(seed, 1.0);
1113 hops_map.insert(seed, 0);
1114 queue.push_back((seed, 1.0, 0));
1115 }
1116
1117 while let Some((node, activation, hops)) = queue.pop_front() {
1118 if hops >= max_hops {
1119 continue;
1120 }
1121
1122 for edge_ref in self.inner.edges(node) {
1124 let neighbor = edge_ref.target();
1125 let edge = edge_ref.weight();
1126
1127 let edge_mult = match edge.kind {
1128 EdgeKind::Supersedes => 0.3,
1129 EdgeKind::DependsOn => 0.8,
1130 EdgeKind::RelatedTo => 0.6,
1131 EdgeKind::Triggers => 0.9,
1132 EdgeKind::TemporalNext => 0.5,
1133 EdgeKind::Calls => 0.85,
1134 EdgeKind::DefinedIn => 0.9,
1135 EdgeKind::Imports => 0.4,
1136 EdgeKind::CitesPremise => 0.85,
1137 };
1138
1139 let new_activation = activation * decay * edge.weight * edge_mult;
1140 if new_activation < min_activation {
1141 continue;
1142 }
1143
1144 let existing = activations.get(&neighbor).copied().unwrap_or(0.0);
1145 if new_activation > existing {
1146 activations.insert(neighbor, new_activation);
1147 hops_map.insert(neighbor, hops + 1);
1148 queue.push_back((neighbor, new_activation, hops + 1));
1149 }
1150 }
1151
1152 for edge_ref in self.inner.edges_directed(node, Direction::Incoming) {
1154 let neighbor = edge_ref.source();
1155 let edge = edge_ref.weight();
1156
1157 let edge_mult = match edge.kind {
1159 EdgeKind::RelatedTo => 0.6,
1160 EdgeKind::DependsOn => 0.5,
1161 EdgeKind::Triggers => 0.85, EdgeKind::Calls => 0.7, EdgeKind::DefinedIn => 0.8, EdgeKind::Imports => 0.3,
1165 _ => continue,
1166 };
1167
1168 let new_activation = activation * decay * edge.weight * edge_mult;
1169 if new_activation < min_activation {
1170 continue;
1171 }
1172
1173 let existing = activations.get(&neighbor).copied().unwrap_or(0.0);
1174 if new_activation > existing {
1175 activations.insert(neighbor, new_activation);
1176 hops_map.insert(neighbor, hops + 1);
1177 queue.push_back((neighbor, new_activation, hops + 1));
1178 }
1179 }
1180 }
1181
1182 let mut hits: Vec<RetrievalHit> = activations
1183 .into_iter()
1184 .filter_map(|(nix, act)| {
1185 let node = self.inner.node_weight(nix)?;
1186 if !node.is_valid() {
1187 return None;
1188 } Some(RetrievalHit {
1190 node_ix: nix,
1191 activation: act,
1192 hops: *hops_map.get(&nix).unwrap_or(&0),
1193 })
1194 })
1195 .collect();
1196
1197 hits.sort_by(|a, b| {
1198 b.activation
1199 .partial_cmp(&a.activation)
1200 .unwrap_or(std::cmp::Ordering::Equal)
1201 });
1202 if apply_partition_filter {
1203 self.filter_same_partition(&mut hits, seeds);
1204 }
1205 hits.truncate(max_results);
1206 hits
1207 }
1208
1209 pub fn find_seeds_weighted(&self, query: &str, max_seeds: usize) -> (Vec<NodeIndex>, Vec<f32>) {
1215 let q_tokens = tokenize(query);
1216 if q_tokens.is_empty() {
1217 return (Vec::new(), Vec::new());
1218 }
1219
1220 let corpus_idf = self.compute_corpus_idf();
1222
1223 let mut scored: Vec<(f32, NodeIndex)> = Vec::new();
1224
1225 for nix in self.inner.node_indices() {
1226 let node = match self.inner.node_weight(nix) {
1227 Some(n) => n,
1228 None => continue,
1229 };
1230 if !node.is_valid() {
1231 continue;
1232 }
1233
1234 let text = format!("{} {}", node.key, node.value).to_lowercase();
1235 let n_tokens = tokenize_for_content(&text, node.content_type);
1236 let n_stems: HashSet<String> = n_tokens.iter().map(|t| stem(t)).collect();
1237
1238 let mut weighted_hits = 0.0f32;
1240 let mut weighted_total = 0.0f32;
1241
1242 for qt in &q_tokens {
1243 let term_idf = corpus_idf.get(qt).copied().unwrap_or(1.0);
1244 weighted_total += term_idf * 3.0; if n_tokens.contains(qt) {
1248 weighted_hits += term_idf * 3.0;
1249 continue;
1250 }
1251 let qt_stem = stem(qt);
1253 if n_stems.contains(&qt_stem) {
1254 weighted_hits += term_idf * 2.0;
1255 continue;
1256 }
1257 let qt_syns = synonym_expand(qt);
1259 if !qt_syns.is_empty()
1260 && qt_syns
1261 .iter()
1262 .any(|s| n_tokens.contains(s) || n_stems.contains(&stem(s)))
1263 {
1264 weighted_hits += term_idf * 1.5;
1265 continue;
1266 }
1267 if text.contains(qt.as_str()) {
1269 weighted_hits += term_idf;
1270 }
1271 }
1272
1273 if weighted_total > 0.0 {
1274 let match_score = weighted_hits / weighted_total;
1275 if match_score > 0.0 {
1276 scored.push((match_score, nix));
1277 }
1278 }
1279 }
1280
1281 scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
1282 scored.truncate(max_seeds);
1283 let seeds: Vec<NodeIndex> = scored.iter().map(|(_, nix)| *nix).collect();
1284 let weights: Vec<f32> = scored.iter().map(|(w, _)| *w).collect();
1285 (seeds, weights)
1286 }
1287
1288 fn compute_corpus_idf(&self) -> HashMap<String, f32> {
1292 let total = self.inner.node_count().max(1) as f32;
1293 let mut doc_freq: HashMap<String, usize> = HashMap::new();
1294
1295 for nix in self.inner.node_indices() {
1296 let node = match self.inner.node_weight(nix) {
1297 Some(n) if n.is_valid() => n,
1298 _ => continue,
1299 };
1300 let text = format!("{} {}", node.key, node.value).to_lowercase();
1301 let tokens = tokenize_for_content(&text, node.content_type);
1302 let mut seen = HashSet::new();
1303 for token in tokens {
1304 if seen.insert(token.clone()) {
1305 *doc_freq.entry(token).or_insert(0) += 1;
1306 }
1307 }
1308 }
1309
1310 doc_freq
1311 .into_iter()
1312 .map(|(term, df)| (term, (total / (1.0 + df as f32)).ln().max(0.1)))
1313 .collect()
1314 }
1315
1316 pub fn find_seeds(&self, query: &str, max_seeds: usize) -> Vec<NodeIndex> {
1318 self.find_seeds_weighted(query, max_seeds).0
1319 }
1320
1321 pub fn total_tokens(&self) -> usize {
1323 self.inner
1324 .node_indices()
1325 .filter_map(|nix| self.inner.node_weight(nix))
1326 .filter(|n| n.is_valid())
1327 .map(|n| n.token_estimate())
1328 .sum()
1329 }
1330
1331 pub fn valid_fact_count(&self) -> usize {
1333 self.valid_facts().len()
1334 }
1335
1336 pub fn node_count(&self) -> usize {
1337 self.inner.node_count()
1338 }
1339
1340 pub fn edge_count(&self) -> usize {
1341 self.inner.edge_count()
1342 }
1343
1344 pub fn gc_superseded(&mut self, max_depth: usize) -> usize {
1350 let stale: Vec<NodeIndex> = self
1352 .inner
1353 .node_indices()
1354 .filter(|&nix| {
1355 self.inner
1356 .node_weight(nix)
1357 .map(|n| {
1358 matches!(
1359 n.kind,
1360 MemKind::FactSuperseded
1361 | MemKind::SkillDeprecated
1362 | MemKind::ConclusionInvalidated
1363 )
1364 })
1365 .unwrap_or(false)
1366 })
1367 .collect();
1368
1369 let mut to_remove = Vec::new();
1372 for nix in &stale {
1373 let mut depth = 0;
1374 let mut current = *nix;
1375 let mut reachable = false;
1376
1377 loop {
1379 let parent: Option<NodeIndex> = self
1380 .inner
1381 .neighbors_directed(current, Direction::Incoming)
1382 .find(|&neighbor| {
1383 self.inner
1384 .edges_connecting(neighbor, current)
1385 .any(|e| e.weight().kind == EdgeKind::Supersedes)
1386 });
1387
1388 match parent {
1389 Some(p) => {
1390 depth += 1;
1391 if let Some(node) = self.inner.node_weight(p) {
1392 if node.is_valid() {
1393 reachable = true;
1394 break;
1395 }
1396 }
1397 if depth > max_depth {
1398 break;
1399 }
1400 current = p;
1401 }
1402 None => break, }
1404 }
1405
1406 if !reachable || depth > max_depth {
1408 to_remove.push(*nix);
1409 }
1410 }
1411
1412 let count = to_remove.len();
1413 for nix in to_remove {
1414 let (key, layer, fact_id) = match self.inner.node_weight(nix) {
1416 Some(node) => (node.key.clone(), node.layer, node.fact_id.clone()),
1417 None => continue,
1418 };
1419 self.remove_from_indexes(nix, &key, layer, fact_id.as_deref());
1420 self.inner.remove_node(nix);
1421 }
1422 count
1423 }
1424
1425 pub fn prune_expired(&mut self, now: DateTime<Utc>) {
1427 let expired: Vec<(NodeIndex, String, Layer, Option<String>)> = self
1428 .inner
1429 .node_indices()
1430 .filter_map(|nix| {
1431 let node = self.inner.node_weight(nix)?;
1432 if node.expires_at.map(|e| e <= now).unwrap_or(false) {
1433 Some((nix, node.key.clone(), node.layer, node.fact_id.clone()))
1434 } else {
1435 None
1436 }
1437 })
1438 .collect();
1439 for (nix, key, layer, fact_id) in expired {
1440 self.remove_from_indexes(nix, &key, layer, fact_id.as_deref());
1441 self.inner.remove_node(nix);
1442 }
1443 }
1444
1445 pub fn remove_conversation_nodes(&mut self, to_remove: &[NodeIndex]) {
1448 for &nix in to_remove {
1450 let (key, layer, fact_id) = match self.inner.node_weight(nix) {
1451 Some(node) => (node.key.clone(), node.layer, node.fact_id.clone()),
1452 None => continue,
1453 };
1454 self.remove_from_indexes(nix, &key, layer, fact_id.as_deref());
1455 self.inner.remove_node(nix);
1456 }
1457
1458 let mut survivors: Vec<(NodeIndex, chrono::DateTime<Utc>)> = self
1460 .inner
1461 .node_indices()
1462 .filter_map(|nix| {
1463 let n = self.inner.node_weight(nix)?;
1464 if matches!(n.kind, MemKind::Conversation | MemKind::ConversationSummary) {
1465 Some((nix, n.created_at))
1466 } else {
1467 None
1468 }
1469 })
1470 .collect();
1471 survivors.sort_by_key(|&(_, ts)| ts);
1472
1473 let temporal_edges: Vec<petgraph::graph::EdgeIndex> = self
1475 .inner
1476 .edge_indices()
1477 .filter(|&eix| {
1478 self.inner
1479 .edge_weight(eix)
1480 .map(|e| e.kind == EdgeKind::TemporalNext)
1481 .unwrap_or(false)
1482 })
1483 .collect();
1484 for eix in temporal_edges {
1485 self.inner.remove_edge(eix);
1486 }
1487
1488 for window in survivors.windows(2) {
1490 let (from_nix, _) = window[0];
1491 let (to_nix, _) = window[1];
1492 self.inner.add_edge(
1493 from_nix,
1494 to_nix,
1495 MemEdge {
1496 kind: EdgeKind::TemporalNext,
1497 weight: 1.0,
1498 created_at: Utc::now(),
1499 },
1500 );
1501 }
1502
1503 self.last_conversation = survivors.last().map(|&(nix, _)| nix);
1505 }
1506
1507 fn remove_from_indexes(
1509 &mut self,
1510 nix: NodeIndex,
1511 key: &str,
1512 layer: Layer,
1513 fact_id: Option<&str>,
1514 ) {
1515 if let Some(fid) = fact_id {
1516 self.by_fact_id.remove(fid);
1517 }
1518 if let Some(entries) = self.by_key.get_mut(key) {
1519 entries.retain(|&n| n != nix);
1520 if entries.is_empty() {
1521 self.by_key.remove(key);
1522 }
1523 }
1524 let layer_idx = (layer as usize).saturating_sub(1).min(3);
1525 self.by_layer[layer_idx].retain(|&n| n != nix);
1526 }
1527
1528 pub fn clear(&mut self) {
1529 self.inner.clear();
1530 self.by_fact_id.clear();
1531 self.by_key.clear();
1532 for v in &mut self.by_layer {
1533 v.clear();
1534 }
1535 self.last_conversation = None;
1536 }
1537}
1538
1539impl Default for MemoryGraph {
1540 fn default() -> Self {
1541 Self::new()
1542 }
1543}
1544
1545fn tokenize(text: &str) -> HashSet<String> {
1547 text.split(|c: char| {
1548 c.is_whitespace() || c == '/' || c == ':' || c == '-' || c == '_' || c == '.' || c == ','
1549 })
1550 .map(|s| s.trim().to_lowercase())
1551 .filter(|s| !s.is_empty())
1552 .collect()
1553}
1554
1555const CODE_NOISE_COMMON: &[&str] = &[
1557 "return", "if", "else", "for", "while", "true", "false", "new", "this", "static", "void",
1558 "null", "break", "continue", "try", "catch", "throw",
1559];
1560
1561fn noise_for_language(lang: CodeLanguage) -> &'static [&'static str] {
1563 match lang {
1564 CodeLanguage::Rust => &[
1565 "fn", "let", "const", "pub", "mut", "use", "mod", "crate", "super", "struct", "enum",
1566 "impl", "trait", "async", "await", "where", "type", "match", "ref", "move", "dyn",
1567 "unsafe",
1568 ],
1569 CodeLanguage::Python => &[
1570 "def", "class", "import", "from", "pass", "none", "self", "elif", "except", "finally",
1571 "yield", "lambda", "nonlocal", "global", "assert", "with", "as", "in", "is", "not",
1572 "and", "or",
1573 ],
1574 CodeLanguage::JavaScript | CodeLanguage::TypeScript => &[
1575 "function",
1576 "const",
1577 "let",
1578 "var",
1579 "export",
1580 "default",
1581 "async",
1582 "await",
1583 "typeof",
1584 "instanceof",
1585 "undefined",
1586 "require",
1587 ],
1588 CodeLanguage::Go => &[
1589 "func",
1590 "package",
1591 "import",
1592 "defer",
1593 "go",
1594 "chan",
1595 "select",
1596 "range",
1597 "switch",
1598 "case",
1599 "fallthrough",
1600 ],
1601 CodeLanguage::Java => &[
1602 "public",
1603 "private",
1604 "protected",
1605 "class",
1606 "interface",
1607 "extends",
1608 "implements",
1609 "final",
1610 "abstract",
1611 "synchronized",
1612 "volatile",
1613 "import",
1614 "package",
1615 "throws",
1616 ],
1617 CodeLanguage::Cpp => &[
1618 "include",
1619 "namespace",
1620 "using",
1621 "template",
1622 "typename",
1623 "virtual",
1624 "override",
1625 "const",
1626 "auto",
1627 "inline",
1628 "extern",
1629 "typedef",
1630 "class",
1631 "struct",
1632 "enum",
1633 ],
1634 CodeLanguage::Shell => &[
1635 "echo", "fi", "do", "done", "then", "esac", "elif", "local", "export", "readonly",
1636 "set",
1637 ],
1638 CodeLanguage::Sql => &[
1639 "select", "from", "where", "and", "or", "not", "insert", "into", "update", "set",
1640 "delete", "create", "alter", "drop", "table", "join", "on", "as", "order", "by",
1641 "group", "having", "limit",
1642 ],
1643 CodeLanguage::Unknown => &[
1644 "fn", "let", "const", "var", "pub", "mut", "return", "def", "class", "import", "from",
1645 "function", "export", "default",
1646 ],
1647 }
1648}
1649
1650fn split_camel_case(s: &str) -> Vec<String> {
1654 let mut parts = Vec::new();
1655 let mut current = String::new();
1656 let chars: Vec<char> = s.chars().collect();
1657
1658 for i in 0..chars.len() {
1659 let c = chars[i];
1660 if !c.is_alphanumeric() {
1661 if !current.is_empty() {
1662 parts.push(std::mem::take(&mut current).to_lowercase());
1663 }
1664 continue;
1665 }
1666 if c.is_uppercase() {
1667 let prev_lower = i > 0 && chars[i - 1].is_lowercase();
1671 let acronym_end = i > 0
1672 && chars[i - 1].is_uppercase()
1673 && i + 1 < chars.len()
1674 && chars[i + 1].is_lowercase();
1675 if prev_lower || acronym_end {
1676 if !current.is_empty() {
1677 parts.push(std::mem::take(&mut current).to_lowercase());
1678 }
1679 }
1680 }
1681 current.push(c);
1682 }
1683 if !current.is_empty() {
1684 parts.push(current.to_lowercase());
1685 }
1686 parts
1687}
1688
1689fn tokenize_code(text: &str, lang: CodeLanguage) -> HashSet<String> {
1691 let mut noise: HashSet<&str> = CODE_NOISE_COMMON.iter().copied().collect();
1692 noise.extend(noise_for_language(lang));
1693
1694 let raw_tokens: Vec<String> = text
1695 .split(|c: char| {
1696 c.is_whitespace()
1697 || c == '/'
1698 || c == ':'
1699 || c == '-'
1700 || c == '_'
1701 || c == '.'
1702 || c == ','
1703 || c == '('
1704 || c == ')'
1705 || c == '{'
1706 || c == '}'
1707 || c == '['
1708 || c == ']'
1709 || c == ';'
1710 || c == '"'
1711 || c == '\''
1712 || c == '<'
1713 || c == '>'
1714 || c == '='
1715 || c == '&'
1716 || c == '|'
1717 || c == '!'
1718 || c == '#'
1719 || c == '*'
1720 || c == '+'
1721 })
1722 .map(|s| s.trim().to_string())
1723 .filter(|s| !s.is_empty())
1724 .collect();
1725
1726 let mut tokens = HashSet::new();
1727 for raw in &raw_tokens {
1728 let parts = split_camel_case(raw);
1729 for part in parts {
1730 if part.len() >= 2 && !noise.contains(part.as_str()) {
1731 tokens.insert(part);
1732 }
1733 }
1734 }
1735 tokens
1736}
1737
1738pub fn tokenize_for_content(text: &str, content_type: ContentType) -> HashSet<String> {
1740 match content_type {
1741 ContentType::Code(lang) => tokenize_code(text, lang),
1742 _ => tokenize(text),
1743 }
1744}
1745
1746pub fn stem(word: &str) -> String {
1749 let w = word.to_lowercase();
1750 if w.len() < 4 {
1751 return w;
1752 }
1753
1754 let suffixes = &[
1756 "ation", "tion", "ment", "ness", "able", "ible", "ence", "ance", "ing", "ful", "ous",
1757 "ive", "ize", "ise", "ify", "ate", "ed", "er", "ly", "al", "es",
1758 ];
1759 for suffix in suffixes {
1760 if let Some(stem) = w.strip_suffix(suffix) {
1761 if stem.len() >= 3 {
1762 return stem.to_string();
1763 }
1764 }
1765 }
1766 if w.ends_with('s') && !w.ends_with("ss") && w.len() > 3 {
1768 return w[..w.len() - 1].to_string();
1769 }
1770 w
1771}
1772
1773const SYNONYM_GROUPS: &[&[&str]] = &[
1776 &[
1777 "auth",
1778 "authenticate",
1779 "authorization",
1780 "credential",
1781 "login",
1782 "jwt",
1783 "token",
1784 "verify",
1785 ],
1786 &[
1787 "db", "database", "sql", "query", "postgres", "sqlite", "mysql",
1788 ],
1789 &["err", "error", "exception", "panic", "fail", "failure"],
1790 &["config", "configuration", "setting", "preference", "option"],
1791 &["msg", "message", "notification", "alert", "event"],
1792 &["req", "request", "http", "api", "endpoint", "route"],
1793 &["resp", "response", "reply", "result", "output"],
1794 &["mem", "memory", "cache", "buffer", "storage"],
1795 &["exec", "execute", "run", "invoke", "call", "dispatch"],
1796 &["parse", "deserialize", "decode", "unmarshal", "extract"],
1797 &["serial", "serialize", "encode", "marshal", "format"],
1798 &["nav", "navigate", "redirect", "goto"],
1799];
1800
1801pub fn synonym_expand(token: &str) -> HashSet<String> {
1803 let mut result = HashSet::new();
1804 let lower = token.to_lowercase();
1805 let stemmed = stem(&lower);
1806 for group in SYNONYM_GROUPS {
1807 let matches = group.iter().any(|&t| t == lower || stem(t) == stemmed);
1808 if matches {
1809 for &t in *group {
1810 result.insert(t.to_string());
1811 }
1812 }
1813 }
1814 result
1815}
1816
1817#[cfg(test)]
1818mod tests {
1819 use super::*;
1820
1821 fn make_fact(key: &str, value: &str, fact_id: &str) -> MemNode {
1822 MemNode {
1823 kind: MemKind::Fact,
1824 layer: 2,
1825 key: key.to_string(),
1826 value: value.to_string(),
1827 fact_id: Some(fact_id.to_string()),
1828 scope: "global".to_string(),
1829 authority: "peer".to_string(),
1830 is_constraint: false,
1831 created_at: Utc::now(),
1832 expires_at: None,
1833 content_type: ContentType::default(),
1834 metadata: FactMetadata::default(),
1835 }
1836 }
1837
1838 #[test]
1839 fn insert_and_lookup() {
1840 let mut g = MemoryGraph::new();
1841 let nix = g.insert(make_fact("language", "Rust", "f1"));
1842 let (found_nix, node) = g.get_by_fact_id("f1").unwrap();
1843 assert_eq!(found_nix, nix);
1844 assert_eq!(node.value, "Rust");
1845 }
1846
1847 #[test]
1848 fn supersession() {
1849 let mut g = MemoryGraph::new();
1850 g.insert(make_fact("db", "PostgreSQL", "f1"));
1851 let new_nix = g.insert(make_fact("db", "SQLite", "f2"));
1852 let invalidated = g.supersede(new_nix, "f1");
1853
1854 assert!(!invalidated.is_empty());
1855 let (_, old) = g.get_by_fact_id("f1").unwrap();
1856 assert_eq!(old.kind, MemKind::FactSuperseded);
1857 assert_eq!(g.valid_fact_count(), 1);
1858 }
1859
1860 #[test]
1861 fn dependency_cascade() {
1862 let mut g = MemoryGraph::new();
1863 let base = g.insert(make_fact("base", "100", "f1"));
1864 let derived = g.insert(make_fact("derived", "200", "f2"));
1865 g.link(derived, base, EdgeKind::DependsOn, 1.0);
1866
1867 let new_base = g.insert(make_fact("base", "150", "f3"));
1868 let invalidated = g.supersede(new_base, "f1");
1869
1870 assert!(invalidated.contains(&derived));
1872 }
1873
1874 #[test]
1875 fn seed_finding() {
1876 let mut g = MemoryGraph::new();
1877 g.insert(make_fact("navigate /ops/dashboard", "Page loads", "f1"));
1878 g.insert(make_fact("click button", "Button clicked", "f2"));
1879
1880 let seeds = g.find_seeds("dashboard", 5);
1881 assert!(!seeds.is_empty()); }
1883
1884 #[test]
1885 fn spreading_activation() {
1886 let mut g = MemoryGraph::new();
1887 let a = g.insert(make_fact("project", "CAR runtime", "f1"));
1888 let b = g.insert(make_fact("language", "Rust", "f2"));
1889 let c = g.insert(make_fact("testing", "uses proptest", "f3"));
1890 g.link(a, b, EdgeKind::RelatedTo, 0.8);
1891 g.link(b, c, EdgeKind::RelatedTo, 0.7);
1892
1893 let hits = g.retrieve(&[a], 3, 10, 0.7, 0.05);
1894 assert!(hits.len() >= 2);
1896 assert!(hits[0].activation > hits[1].activation); }
1898
1899 #[test]
1900 fn conversation_temporal_links() {
1901 let mut g = MemoryGraph::new();
1902 let c1 = g.insert(MemNode {
1903 kind: MemKind::Conversation,
1904 layer: 3,
1905 key: "user".to_string(),
1906 value: "How's the project?".to_string(),
1907 fact_id: None,
1908 scope: "global".to_string(),
1909 authority: "peer".to_string(),
1910 is_constraint: false,
1911 created_at: Utc::now(),
1912 expires_at: None,
1913 content_type: ContentType::NaturalLanguage,
1914 metadata: FactMetadata::default(),
1915 });
1916 let c2 = g.insert(MemNode {
1917 kind: MemKind::Conversation,
1918 layer: 3,
1919 key: "assistant".to_string(),
1920 value: "Going well".to_string(),
1921 fact_id: None,
1922 scope: "global".to_string(),
1923 authority: "peer".to_string(),
1924 is_constraint: false,
1925 created_at: Utc::now(),
1926 expires_at: None,
1927 content_type: ContentType::NaturalLanguage,
1928 metadata: FactMetadata::default(),
1929 });
1930
1931 assert_eq!(g.edge_count(), 1);
1933 let edge = g.inner.edges(c1).next().unwrap();
1934 assert_eq!(edge.weight().kind, EdgeKind::TemporalNext);
1935 assert_eq!(edge.target(), c2);
1936 }
1937
1938 #[test]
1939 fn constraints_tracked() {
1940 let mut g = MemoryGraph::new();
1941 let mut node = make_fact("budget", "Max $500K", "c1");
1942 node.is_constraint = true;
1943 g.insert(node);
1944 g.insert(make_fact("language", "Rust", "f1"));
1945
1946 assert_eq!(g.constraints().len(), 1);
1947 assert_eq!(g.valid_fact_count(), 2);
1948 }
1949
1950 #[test]
1951 fn gc_superseded_removes_deep_chains() {
1952 let mut g = MemoryGraph::new();
1954 let _n1 = g.insert(make_fact("db", "v1", "f1"));
1955 let n2 = g.insert(make_fact("db", "v2", "f2"));
1956 g.supersede(n2, "f1"); let n3 = g.insert(make_fact("db", "v3", "f3"));
1958 g.supersede(n3, "f2"); let n4 = g.insert(make_fact("db", "v4", "f4"));
1960 g.supersede(n4, "f3"); assert_eq!(g.node_count(), 4);
1964
1965 let removed = g.gc_superseded(1);
1967 assert_eq!(removed, 2);
1968 assert_eq!(g.node_count(), 2);
1969
1970 assert!(g.get_by_fact_id("f4").is_some());
1972 assert!(g.get_by_fact_id("f1").is_none());
1974 assert!(g.get_by_fact_id("f2").is_none());
1975 }
1976
1977 #[test]
1978 fn gc_superseded_retains_shallow_chain() {
1979 let mut g = MemoryGraph::new();
1981 let _n1 = g.insert(make_fact("db", "old", "f1"));
1982 let n2 = g.insert(make_fact("db", "new", "f2"));
1983 g.supersede(n2, "f1");
1984
1985 let removed = g.gc_superseded(1);
1986 assert_eq!(removed, 0);
1987 assert_eq!(g.node_count(), 2);
1988 }
1989
1990 #[test]
1991 fn gc_superseded_removes_orphaned() {
1992 let mut g = MemoryGraph::new();
1994 let mut orphan = make_fact("db", "orphan", "f1");
1995 orphan.kind = MemKind::FactSuperseded;
1996 g.insert(orphan);
1997
1998 let removed = g.gc_superseded(1);
1999 assert_eq!(removed, 1);
2000 assert_eq!(g.node_count(), 0);
2001 }
2002
2003 #[test]
2004 fn gc_superseded_noop_on_clean_graph() {
2005 let mut g = MemoryGraph::new();
2006 g.insert(make_fact("a", "1", "f1"));
2007 g.insert(make_fact("b", "2", "f2"));
2008
2009 let removed = g.gc_superseded(1);
2010 assert_eq!(removed, 0);
2011 assert_eq!(g.node_count(), 2);
2012 }
2013
2014 #[test]
2015 fn prune_expired_removes_ttl_nodes() {
2016 let mut g = MemoryGraph::new();
2017 let now = Utc::now();
2018 let mut expired = make_fact("temp", "gone", "e1");
2019 expired.kind = MemKind::Environment;
2020 expired.layer = 4;
2021 expired.expires_at = Some(now - chrono::Duration::hours(1));
2022 g.insert(expired);
2023
2024 let mut fresh = make_fact("temp", "here", "e2");
2025 fresh.kind = MemKind::Environment;
2026 fresh.layer = 4;
2027 fresh.expires_at = Some(now + chrono::Duration::hours(1));
2028 g.insert(fresh);
2029
2030 g.prune_expired(now);
2031 assert_eq!(g.node_count(), 1);
2032 }
2033
2034 #[test]
2035 fn prune_expired_cleans_by_key_and_by_layer() {
2036 let mut g = MemoryGraph::new();
2037 let now = Utc::now();
2038
2039 let mut expired = make_fact("env_temp", "gone", "e1");
2041 expired.kind = MemKind::Environment;
2042 expired.layer = 4;
2043 expired.expires_at = Some(now - chrono::Duration::hours(1));
2044 g.insert(expired);
2045
2046 assert!(g.by_key.contains_key("env_temp"));
2048 let layer_idx = (4usize).saturating_sub(1).min(3); assert_eq!(g.by_layer[layer_idx].len(), 1);
2050
2051 g.prune_expired(now);
2052
2053 assert!(!g.by_key.contains_key("env_temp"));
2055 assert_eq!(g.by_layer[layer_idx].len(), 0);
2057 assert!(g.get_by_fact_id("e1").is_none());
2059 }
2060
2061 #[test]
2062 fn gc_superseded_cleans_by_key_and_by_layer() {
2063 let mut g = MemoryGraph::new();
2065 let _n1 = g.insert(make_fact("db", "v1", "f1"));
2066 let n2 = g.insert(make_fact("db", "v2", "f2"));
2067 g.supersede(n2, "f1");
2068 let n3 = g.insert(make_fact("db", "v3", "f3"));
2069 g.supersede(n3, "f2");
2070
2071 assert_eq!(g.by_key.get("db").map(|v| v.len()).unwrap_or(0), 3);
2073 let layer_idx = (2usize).saturating_sub(1).min(3); let initial_layer_count = g.by_layer[layer_idx].len();
2075 assert_eq!(initial_layer_count, 3);
2076
2077 let removed = g.gc_superseded(0);
2079 assert_eq!(removed, 2);
2080
2081 assert_eq!(g.by_key.get("db").map(|v| v.len()).unwrap_or(0), 1);
2083 assert_eq!(g.by_layer[layer_idx].len(), 1);
2085 }
2086
2087 #[test]
2088 fn remove_conversation_nodes_cleans_by_key_and_by_layer() {
2089 let mut g = MemoryGraph::new();
2090 let c1 = g.insert(MemNode {
2091 kind: MemKind::Conversation,
2092 layer: 3,
2093 key: "user".to_string(),
2094 value: "Hello".to_string(),
2095 fact_id: None,
2096 scope: "global".to_string(),
2097 authority: "peer".to_string(),
2098 is_constraint: false,
2099 created_at: Utc::now(),
2100 expires_at: None,
2101 content_type: ContentType::NaturalLanguage,
2102 metadata: FactMetadata::default(),
2103 });
2104 let c2 = g.insert(MemNode {
2105 kind: MemKind::Conversation,
2106 layer: 3,
2107 key: "assistant".to_string(),
2108 value: "Hi there".to_string(),
2109 fact_id: None,
2110 scope: "global".to_string(),
2111 authority: "peer".to_string(),
2112 is_constraint: false,
2113 created_at: Utc::now(),
2114 expires_at: None,
2115 content_type: ContentType::NaturalLanguage,
2116 metadata: FactMetadata::default(),
2117 });
2118
2119 assert!(g.by_key.contains_key("user"));
2121 assert!(g.by_key.contains_key("assistant"));
2122 let layer_idx = (3usize).saturating_sub(1).min(3); assert_eq!(g.by_layer[layer_idx].len(), 2);
2124
2125 g.remove_conversation_nodes(&[c1, c2]);
2127
2128 assert!(!g.by_key.contains_key("user"));
2130 assert!(!g.by_key.contains_key("assistant"));
2131 assert_eq!(g.by_layer[layer_idx].len(), 0);
2133 assert_eq!(g.node_count(), 0);
2135 }
2136}