Skip to main content

car_memgine/
graph.rs

1//! The memory graph — nodes are memory entries, edges are relationships.
2//!
3//! Replaces ImmutableStore + LayerState + 6 HashMaps with a single graph.
4
5use chrono::{DateTime, Utc};
6use petgraph::stable_graph::{NodeIndex, StableGraph};
7use petgraph::visit::EdgeRef;
8use petgraph::Direction;
9use std::collections::{HashMap, HashSet, VecDeque};
10
11pub type Layer = u8;
12
13/// Namespace for memory nodes. Keeps external/foreign code knowledge from
14/// polluting the project's own memory graph during spreading activation.
15///
16/// Stored in a side-table keyed by `NodeIndex` so adding partitions does not
17/// require rewriting every `MemNode { .. }` literal. Absence from the side
18/// table means `Project`.
19#[derive(Debug, Clone, PartialEq, Eq, Hash)]
20pub enum Partition {
21    /// The host project's own memory. Default for all legacy call sites.
22    Project,
23    /// Knowledge harvested from an external repository at a specific commit.
24    /// Keyed by (source_repo, commit) for idempotent re-ingestion.
25    Foreign { source_repo: String, commit: String },
26}
27
28impl Partition {
29    pub fn is_project(&self) -> bool {
30        matches!(self, Partition::Project)
31    }
32    pub fn is_foreign(&self) -> bool {
33        matches!(self, Partition::Foreign { .. })
34    }
35}
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
38pub enum CodeLanguage {
39    Rust,
40    Python,
41    JavaScript,
42    TypeScript,
43    Go,
44    Java,
45    Cpp,
46    Shell,
47    Sql,
48    Unknown,
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
52pub enum ContentType {
53    #[default]
54    NaturalLanguage,
55    Code(CodeLanguage),
56    StructuredData,
57}
58
59impl ContentType {
60    /// Check if this is any code variant regardless of language.
61    pub fn is_code(&self) -> bool {
62        matches!(self, ContentType::Code(_))
63    }
64
65    /// Get the language if this is a code type.
66    pub fn language(&self) -> Option<CodeLanguage> {
67        match self {
68            ContentType::Code(lang) => Some(*lang),
69            _ => None,
70        }
71    }
72
73    /// Parse from a string label (as used in JSON serialization).
74    /// Unknown strings default to `NaturalLanguage`.
75    pub fn from_label(s: &str) -> Self {
76        match s {
77            "code" => ContentType::Code(CodeLanguage::Unknown),
78            "structured_data" => ContentType::StructuredData,
79            _ => ContentType::NaturalLanguage,
80        }
81    }
82
83    /// Convert to a string label for JSON serialization.
84    pub fn as_label(&self) -> &'static str {
85        match self {
86            ContentType::NaturalLanguage => "natural_language",
87            ContentType::Code(_) => "code",
88            ContentType::StructuredData => "structured_data",
89        }
90    }
91}
92
93/// Detect content type from text using heuristics.
94/// Identifies language for code content via language-specific markers.
95pub fn detect_content_type(text: &str) -> ContentType {
96    let trimmed = text.trim();
97
98    // Structured data: starts with JSON/YAML/TOML markers
99    if trimmed.starts_with('{') || trimmed.starts_with('[') {
100        if trimmed.ends_with('}') || trimmed.ends_with(']') {
101            return ContentType::StructuredData;
102        }
103    }
104    if trimmed.starts_with("---") {
105        return ContentType::StructuredData;
106    }
107    // TOML/INI-style: majority of lines are key = value or key: value
108    let lines: Vec<&str> = trimmed.lines().collect();
109    if lines.len() >= 2 {
110        let kv_count = lines
111            .iter()
112            .filter(|l| {
113                let l = l.trim();
114                if l.is_empty() || l.starts_with('#') || l.starts_with("//") {
115                    return false;
116                }
117                if let Some(pos) = l.find(" = ") {
118                    return pos < 30 && !l[..pos].contains(' ');
119                }
120                if let Some(pos) = l.find(": ") {
121                    return pos < 30 && !l[..pos].contains(' ');
122                }
123                false
124            })
125            .count();
126        if kv_count as f32 / lines.len() as f32 > 0.6 {
127            return ContentType::StructuredData;
128        }
129    }
130
131    // Code detection with language identification
132    if let Some(lang) = detect_code_language(text) {
133        return ContentType::Code(lang);
134    }
135
136    ContentType::NaturalLanguage
137}
138
139/// Detect whether text is code and identify the language.
140/// Returns None if the text is not code.
141fn detect_code_language(text: &str) -> Option<CodeLanguage> {
142    // Language-specific strong markers (unique to one language)
143    let rust_markers: &[&str] = &[
144        "fn ", "pub fn", "impl ", "trait ", "mod ", "#[", "let mut ", "::",
145    ];
146    let python_markers: &[&str] = &[
147        "def ",
148        "async def",
149        "elif ",
150        "self.",
151        "import ",
152        "__init__",
153        "None",
154        "True",
155        "False",
156    ];
157    let js_markers: &[&str] = &[
158        "function ",
159        "const ",
160        "let ",
161        "var ",
162        "=> ",
163        "===",
164        "!==",
165        "require(",
166        "console.",
167    ];
168    let ts_markers: &[&str] = &[
169        "interface ",
170        ": string",
171        ": number",
172        ": boolean",
173        "readonly ",
174        "as const",
175    ];
176    let go_markers: &[&str] = &["func ", "package ", "go func", ":= ", "fmt."];
177    let java_markers: &[&str] = &[
178        "public class",
179        "private ",
180        "protected ",
181        "System.out",
182        "@Override",
183    ];
184    let cpp_markers: &[&str] = &["#include", "std::", "cout", "nullptr", "template<"];
185    let shell_markers: &[&str] = &["#!/bin", "echo ", "fi\n", "done\n", "esac", "$(", "export "];
186    let sql_markers: &[&str] = &[
187        "SELECT ",
188        "INSERT ",
189        "UPDATE ",
190        "DELETE ",
191        "CREATE TABLE",
192        "ALTER TABLE",
193        "JOIN ",
194    ];
195
196    let count = |markers: &[&str]| markers.iter().filter(|m| text.contains(**m)).count();
197
198    let rust = count(rust_markers);
199    let python = count(python_markers);
200    let js = count(js_markers);
201    let ts = count(ts_markers);
202    let go = count(go_markers);
203    let java = count(java_markers);
204    let cpp = count(cpp_markers);
205    let shell = count(shell_markers);
206    let sql = count(sql_markers);
207
208    // Find the best match
209    let scores = [
210        (rust, CodeLanguage::Rust),
211        (python, CodeLanguage::Python),
212        (js, CodeLanguage::JavaScript),
213        (ts, CodeLanguage::TypeScript),
214        (go, CodeLanguage::Go),
215        (java, CodeLanguage::Java),
216        (cpp, CodeLanguage::Cpp),
217        (shell, CodeLanguage::Shell),
218        (sql, CodeLanguage::Sql),
219    ];
220
221    let (best_score, best_lang) = scores
222        .iter()
223        .max_by_key(|(score, _)| *score)
224        .copied()
225        .unwrap();
226
227    if best_score >= 2 {
228        return Some(best_lang);
229    }
230
231    // Fallback: check generic code markers (language-agnostic)
232    let has_braces = text.contains('{') && text.contains('}');
233    let has_parens_semi = text.contains('(') && text.contains(';');
234    let structural = has_braces as usize + has_parens_semi as usize;
235
236    // Generic strong markers
237    let generic_strong: &[&str] = &["fn ", "def ", "impl ", "struct ", "enum ", "=>", "->"];
238    let generic_weak: &[&str] = &[
239        "function ",
240        "class ",
241        "import ",
242        "from ",
243        "const ",
244        "let ",
245        "var ",
246        "export ",
247        "pub ",
248        "use ",
249    ];
250    let strong = count(generic_strong);
251    let weak = count(generic_weak);
252    let total = strong + weak;
253
254    let is_code = (strong >= 2)
255        || (strong >= 1 && (structural > 0 || weak >= 1))
256        || (weak >= 2 && structural > 0)
257        || (total >= 3 && structural > 0);
258
259    if is_code {
260        // We know it's code but not which language — pick best guess or Unknown
261        if best_score >= 1 {
262            Some(best_lang)
263        } else {
264            Some(CodeLanguage::Unknown)
265        }
266    } else {
267        None
268    }
269}
270
271#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
272pub enum MemKind {
273    Identity,
274    Fact,
275    FactSuperseded,
276    /// Derived knowledge citing premise Facts. Automatically invalidated
277    /// when any cited premise is superseded (cascade invalidation).
278    Conclusion,
279    /// A Conclusion whose premises changed — needs recalculation.
280    ConclusionInvalidated,
281    Skill,
282    SkillDeprecated,
283    Conversation,
284    /// Compacted summary of older conversation turns.
285    ConversationSummary,
286    Environment,
287    /// Model performance profile — tracks observed quality/latency per model.
288    Model,
289    /// Parsed code symbol — function, struct, class, etc. from tree-sitter AST.
290    /// Value is JSON-serialized `CodeSymbolMeta`. Key is `file_path::symbol_name`.
291    CodeSymbol,
292}
293
294#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
295pub enum EdgeKind {
296    Supersedes,   // new → old ("I replace you")
297    DependsOn,    // dependent → dependency ("I need you")
298    RelatedTo,    // semantic similarity (bidirectional by convention)
299    Triggers,     // skill → trigger context ("I fire when this matches")
300    TemporalNext, // conversation ordering
301    Calls,        // CodeSymbol → CodeSymbol ("I call you")
302    DefinedIn,    // CodeSymbol → CodeSymbol (method → impl/class)
303    Imports,      // CodeSymbol → CodeSymbol (file → dependency)
304    CitesPremise, // Conclusion → Fact ("I am derived from this premise")
305}
306
307/// Metadata for a CodeSymbol graph node.
308#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
309pub struct CodeSymbolMeta {
310    pub name: String,
311    pub kind: String, // "Function", "Method", "Struct", "Class", etc.
312    pub signature: String,
313    pub file_path: String,
314    pub start_line: u32,
315    pub end_line: u32,
316    pub doc_comment: Option<String>,
317    pub parent: Option<String>,
318}
319
320impl CodeSymbolMeta {
321    pub fn from_node(node: &MemNode) -> Option<Self> {
322        if node.kind != MemKind::CodeSymbol {
323            return None;
324        }
325        serde_json::from_str(&node.value).ok()
326    }
327    pub fn encode(&self) -> String {
328        serde_json::to_string(self).expect("CodeSymbolMeta serializable")
329    }
330}
331
332// --- Skill types ---
333
334/// Skill scope — general (applies everywhere) or domain-specific.
335/// Follows SkillRL's SkillBank = S_g ∪ ∪S_k structure.
336#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
337#[serde(rename_all = "snake_case")]
338pub enum SkillScope {
339    /// Universal strategy applicable across all tasks.
340    Global,
341    /// Task-specific skill scoped to a domain (persona, task type, etc).
342    Domain(String),
343}
344
345impl Default for SkillScope {
346    fn default() -> Self {
347        SkillScope::Global
348    }
349}
350
351/// Trigger metadata for a stored skill (Parslee-ai/car#181).
352///
353/// The original shape (`persona`, `url_pattern`, `task_keywords`)
354/// stays load-bearing for the web-task automation case — `find_skill`
355/// keys on those fields today. The new optional `structured` field
356/// opens the trigger to non-web cases (intent signatures, IDE
357/// contexts, audio-classifier outputs, anything an agent wants to
358/// match a stored skill on without keyword scoring).
359///
360/// Backward-compatible: existing serialized skills lack `structured`
361/// and load with `structured = None`.
362///
363/// **Matcher dispatch is deferred.** Today `find_skill` keys on the
364/// keyword-shaped fields only. A skill whose `structured` is `Some`
365/// and whose web-task fields are empty WILL NOT be returned by
366/// `find_skill` for a structured query — agents storing such
367/// skills today should enumerate via `list_skills` and apply
368/// their own matcher until the dispatch lands. Naming this
369/// upfront so callers don't read `Some(structured)` as
370/// "find_skill will match this."
371#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default, PartialEq)]
372pub struct SkillTrigger {
373    #[serde(default)]
374    pub persona: String,
375    #[serde(default)]
376    pub url_pattern: String,
377    #[serde(default)]
378    pub task_keywords: Vec<String>,
379    /// Structured trigger payload for non-web-task skills. The
380    /// `kind` identifies the matcher (`"intent_signature"`,
381    /// `"ide_context"`, etc.); `signature` is opaque JSON the
382    /// matcher knows how to compare. Both fields are optional;
383    /// `None` means "use web-task keyword matching."
384    #[serde(default, skip_serializing_if = "Option::is_none")]
385    pub structured: Option<StructuredTrigger>,
386}
387
388/// Discriminated structured trigger for matcher plugins.
389#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
390pub struct StructuredTrigger {
391    pub kind: String,
392    pub signature: serde_json::Value,
393}
394
395#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default, PartialEq)]
396pub struct SkillStats {
397    pub success_count: u64,
398    pub fail_count: u64,
399    pub degraded: bool,
400    pub broken_for_repair: bool,
401    /// Continuous success metric for telemetry feeds where success/
402    /// failure isn't binary (Parslee-ai/car#181). Range [0.0, 1.0]
403    /// or `None` if the consumer hasn't reported one. Distinct from
404    /// `success_ratio()` which is a derived aggregate.
405    #[serde(default, skip_serializing_if = "Option::is_none")]
406    pub completion_rate: Option<f64>,
407    /// User-reported complaint count — the next layer below
408    /// `fail_count`. A correct-looking but user-disliked outcome
409    /// increments this without flipping `fail_count`.
410    #[serde(default, skip_serializing_if = "Option::is_none")]
411    pub complaint_count: Option<u64>,
412    /// Last time this skill was applied (success or fail). Drives
413    /// recency-aware decay in the matcher.
414    #[serde(default, skip_serializing_if = "Option::is_none")]
415    pub last_used_at: Option<DateTime<Utc>>,
416}
417
418impl SkillStats {
419    pub fn should_degrade(&self, threshold: u64) -> bool {
420        self.fail_count > self.success_count + threshold
421    }
422    pub fn success_ratio(&self) -> f64 {
423        let total = self.success_count + self.fail_count;
424        if total == 0 {
425            return 0.5;
426        }
427        self.success_count as f64 / total as f64
428    }
429}
430
431#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
432#[serde(rename_all = "snake_case")]
433pub enum SkillOutcome {
434    Success,
435    Fail,
436}
437
438#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
439pub struct SkillMeta {
440    pub name: String,
441    pub code: String,     // opaque to Rust — Playwright body, shell script, etc.
442    pub platform: String, // "playwright", "node", "shell", "python"
443    pub description: String,
444    pub trigger: SkillTrigger,
445    /// Scope: Global (general strategy) or Domain(name) (task-specific).
446    #[serde(default)]
447    pub scope: SkillScope,
448    /// When this skill should be applied — human-readable condition.
449    #[serde(default)]
450    pub when_to_apply: String,
451    #[serde(default)]
452    pub stats: SkillStats,
453    #[serde(default = "default_skill_version")]
454    pub version: u64,
455    /// Multi-tenant isolation tag (Parslee-ai/car#187 phase 3-D).
456    /// When set, the skill belongs to a specific tenant and only
457    /// surfaces in retrievals scoped to that tenant. `None` means
458    /// the skill is unscoped — the legacy / single-tenant default.
459    ///
460    /// Important: scoped retrievals do NOT see unscoped skills.
461    /// Strict isolation by design — see the `ScopedMemgineView` doc
462    /// for the rationale and the rebootstrap-per-tenant pattern.
463    #[serde(default)]
464    pub tenant_id: Option<String>,
465}
466
467fn default_skill_version() -> u64 {
468    1
469}
470
471impl SkillMeta {
472    pub fn from_node(node: &MemNode) -> Option<Self> {
473        if !matches!(node.kind, MemKind::Skill | MemKind::SkillDeprecated) {
474            return None;
475        }
476        serde_json::from_str(&node.value).ok()
477    }
478    pub fn encode(&self) -> String {
479        serde_json::to_string(self).expect("SkillMeta serializable")
480    }
481}
482
483/// Provenance record — where a fact came from.
484#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
485pub struct Provenance {
486    pub source: String, // "user", "system", "consolidation", "reflection", "compaction"
487    pub reference: String, // file path, URL, conversation turn, etc.
488    #[serde(default)]
489    pub date: Option<DateTime<Utc>>,
490}
491
492/// Structured metadata for knowledge nodes (inspired by metaswarm's JSONL schema).
493/// Tracks provenance, confidence, usage signals, and file affinity.
494#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
495pub struct FactMetadata {
496    /// Confidence level: "high", "medium", "low", "derived".
497    #[serde(default)]
498    pub confidence: String,
499    /// Where this fact came from (may have multiple sources).
500    #[serde(default)]
501    pub provenance: Vec<Provenance>,
502    /// File paths this fact is relevant to (glob patterns allowed).
503    #[serde(default)]
504    pub affected_files: Vec<String>,
505    /// Free-form tags for filtering and retrieval.
506    #[serde(default)]
507    pub tags: Vec<String>,
508    /// Fact category: "fact", "gotcha", "anti_pattern", "decision", "pattern".
509    #[serde(default)]
510    pub category: String,
511    /// How many times this fact was included in context.
512    #[serde(default)]
513    pub usage_count: u64,
514    /// How many times usage of this fact correlated with a successful outcome.
515    #[serde(default)]
516    pub helpful_count: u64,
517    /// How many times this fact has been flagged as potentially outdated.
518    #[serde(default)]
519    pub outdated_reports: u64,
520    /// Multi-tenant isolation tag (Parslee-ai/car#187 phase 3-D).
521    /// When set, the fact belongs to a specific tenant and only
522    /// surfaces in retrievals scoped to that tenant. `None` means
523    /// the fact is unscoped — the legacy / single-tenant default.
524    ///
525    /// Important: scoped retrievals do NOT see unscoped facts.
526    /// Strict isolation by design — operators bootstrap each tenant's
527    /// memgine with whatever seeded facts they want; there's no
528    /// "global facts visible to everyone" fallthrough because that
529    /// would let any tenant pollute the shared namespace.
530    #[serde(default)]
531    pub tenant_id: Option<String>,
532}
533
534impl FactMetadata {
535    pub fn is_empty(&self) -> bool {
536        self.confidence.is_empty()
537            && self.provenance.is_empty()
538            && self.affected_files.is_empty()
539            && self.tags.is_empty()
540            && self.category.is_empty()
541    }
542
543    /// Staleness signal: facts with high outdated_reports relative to usage are suspect.
544    pub fn staleness_ratio(&self) -> f64 {
545        if self.usage_count == 0 {
546            return 0.0;
547        }
548        self.outdated_reports as f64 / self.usage_count as f64
549    }
550
551    /// Helpfulness signal: facts that correlate with success.
552    pub fn helpfulness_ratio(&self) -> f64 {
553        if self.usage_count == 0 {
554            return 0.5;
555        } // neutral prior
556        self.helpful_count as f64 / self.usage_count as f64
557    }
558}
559
560#[derive(Debug, Clone)]
561pub struct MemNode {
562    pub kind: MemKind,
563    pub layer: Layer,
564    pub key: String,
565    pub value: String,
566    pub fact_id: Option<String>,
567    pub scope: String,
568    pub authority: String,
569    pub is_constraint: bool,
570    pub created_at: DateTime<Utc>,
571    pub expires_at: Option<DateTime<Utc>>,
572    pub content_type: ContentType,
573    /// Structured metadata for knowledge quality signals (optional, defaults empty).
574    pub metadata: FactMetadata,
575}
576
577impl MemNode {
578    pub fn token_estimate(&self) -> usize {
579        std::cmp::max(1, self.value.len() / 4)
580    }
581
582    pub fn is_valid(&self) -> bool {
583        !matches!(
584            self.kind,
585            MemKind::FactSuperseded | MemKind::SkillDeprecated | MemKind::ConclusionInvalidated
586        )
587    }
588}
589
590#[derive(Debug, Clone)]
591pub struct MemEdge {
592    pub kind: EdgeKind,
593    pub weight: f32,
594    pub created_at: DateTime<Utc>,
595}
596
597#[derive(Debug, Clone)]
598pub struct RetrievalHit {
599    pub node_ix: NodeIndex,
600    pub activation: f32,
601    pub hops: usize,
602}
603
604/// The memory graph. Nodes are entries, edges are relationships.
605pub struct MemoryGraph {
606    pub inner: StableGraph<MemNode, MemEdge>,
607    by_fact_id: HashMap<String, NodeIndex>,
608    by_key: HashMap<String, Vec<NodeIndex>>,
609    by_layer: [Vec<NodeIndex>; 4],
610    last_conversation: Option<NodeIndex>,
611    /// Partition assignments. Only non-`Project` entries are stored —
612    /// absence means `Project`.
613    partitions: HashMap<NodeIndex, Partition>,
614    /// Idempotency index for foreign nodes, keyed by `fact_id`.
615    /// `fact_id` is already globally unique (Indexer encodes
616    /// `foreign::{repo}::{commit}::{path}::{symbol}` — distinguishes
617    /// overloads and impl blocks that share a short `key`). A node without
618    /// a `fact_id` is NOT idempotency-tracked and will duplicate on
619    /// re-insert — callers that want dedupe must populate `fact_id`.
620    foreign_by_fact_id: HashMap<String, NodeIndex>,
621}
622
623impl MemoryGraph {
624    pub fn new() -> Self {
625        Self {
626            inner: StableGraph::new(),
627            by_fact_id: HashMap::new(),
628            by_key: HashMap::new(),
629            by_layer: [Vec::new(), Vec::new(), Vec::new(), Vec::new()],
630            last_conversation: None,
631            partitions: HashMap::new(),
632            foreign_by_fact_id: HashMap::new(),
633        }
634    }
635
636    /// Partition a node belongs to. Returns `Partition::Project` for any node
637    /// not explicitly assigned (the legacy default).
638    pub fn partition_of(&self, nix: NodeIndex) -> Partition {
639        self.partitions
640            .get(&nix)
641            .cloned()
642            .unwrap_or(Partition::Project)
643    }
644
645    /// Insert a node into the given `Partition`. For `Project`, equivalent
646    /// to `insert`. For `Foreign`, enforces idempotency by `(repo, commit, key)`
647    /// — re-inserting the same triple returns the existing `NodeIndex`
648    /// rather than creating a duplicate.
649    pub fn insert_partitioned(&mut self, node: MemNode, partition: Partition) -> NodeIndex {
650        if let Partition::Foreign { .. } = &partition {
651            if let Some(fid) = node.fact_id.as_deref() {
652                if let Some(&existing) = self.foreign_by_fact_id.get(fid) {
653                    return existing;
654                }
655            }
656            let fid = node.fact_id.clone();
657            let nix = self.insert(node);
658            if let Some(fid) = fid {
659                self.foreign_by_fact_id.insert(fid, nix);
660            }
661            self.partitions.insert(nix, partition);
662            return nix;
663        }
664        self.insert(node)
665    }
666
667    /// Convenience: insert a node under `Partition::Foreign { source_repo, commit }`.
668    pub fn insert_foreign(
669        &mut self,
670        source_repo: impl Into<String>,
671        commit: impl Into<String>,
672        node: MemNode,
673    ) -> NodeIndex {
674        self.insert_partitioned(
675            node,
676            Partition::Foreign {
677                source_repo: source_repo.into(),
678                commit: commit.into(),
679            },
680        )
681    }
682
683    /// Insert a node and index it.
684    pub fn insert(&mut self, node: MemNode) -> NodeIndex {
685        let layer_idx = (node.layer as usize).saturating_sub(1).min(3);
686        let fact_id = node.fact_id.clone();
687        let key = node.key.clone();
688        let is_conv = matches!(
689            node.kind,
690            MemKind::Conversation | MemKind::ConversationSummary
691        );
692
693        let nix = self.inner.add_node(node);
694
695        if let Some(fid) = fact_id {
696            self.by_fact_id.insert(fid, nix);
697        }
698        self.by_key.entry(key).or_default().push(nix);
699        self.by_layer[layer_idx].push(nix);
700
701        // Auto-link conversation nodes temporally
702        if is_conv {
703            if let Some(prev) = self.last_conversation {
704                self.inner.add_edge(
705                    prev,
706                    nix,
707                    MemEdge {
708                        kind: EdgeKind::TemporalNext,
709                        weight: 1.0,
710                        created_at: Utc::now(),
711                    },
712                );
713            }
714            self.last_conversation = Some(nix);
715        }
716
717        nix
718    }
719
720    /// Add a typed edge between two nodes.
721    pub fn link(&mut self, from: NodeIndex, to: NodeIndex, kind: EdgeKind, weight: f32) {
722        self.inner.add_edge(
723            from,
724            to,
725            MemEdge {
726                kind,
727                weight,
728                created_at: Utc::now(),
729            },
730        );
731    }
732
733    /// Supersede: mark old as FactSuperseded, add Supersedes edge,
734    /// and cascade invalidation to any Conclusions citing this premise.
735    pub fn supersede(&mut self, new_nix: NodeIndex, old_fact_id: &str) -> HashSet<NodeIndex> {
736        let mut invalidated = HashSet::new();
737
738        if let Some(&old_nix) = self.by_fact_id.get(old_fact_id) {
739            // Mark old as superseded
740            if let Some(old_node) = self.inner.node_weight_mut(old_nix) {
741                old_node.kind = MemKind::FactSuperseded;
742            }
743            // Add supersedes edge
744            self.link(new_nix, old_nix, EdgeKind::Supersedes, 1.0);
745            invalidated.insert(old_nix);
746
747            // Find dependents (nodes with DependsOn edges pointing to old_nix)
748            let dependents: Vec<NodeIndex> = self
749                .inner
750                .neighbors_directed(old_nix, Direction::Incoming)
751                .filter(|&n| {
752                    self.inner
753                        .edges_connecting(n, old_nix)
754                        .any(|e| e.weight().kind == EdgeKind::DependsOn)
755                })
756                .collect();
757
758            for dep in dependents {
759                invalidated.insert(dep);
760            }
761
762            // CASCADE: invalidate Conclusions that cite this premise
763            let citing_conclusions: Vec<NodeIndex> = self
764                .inner
765                .neighbors_directed(old_nix, Direction::Incoming)
766                .filter(|&n| {
767                    self.inner
768                        .edges_connecting(n, old_nix)
769                        .any(|e| e.weight().kind == EdgeKind::CitesPremise)
770                })
771                .collect();
772
773            for conc_nix in citing_conclusions {
774                if let Some(conc_node) = self.inner.node_weight_mut(conc_nix) {
775                    if conc_node.kind == MemKind::Conclusion {
776                        conc_node.kind = MemKind::ConclusionInvalidated;
777                        invalidated.insert(conc_nix);
778                    }
779                }
780            }
781        }
782
783        invalidated
784    }
785
786    /// Lookup by fact_id.
787    pub fn get_by_fact_id(&self, fact_id: &str) -> Option<(NodeIndex, &MemNode)> {
788        self.by_fact_id
789            .get(fact_id)
790            .and_then(|&nix| self.inner.node_weight(nix).map(|n| (nix, n)))
791    }
792
793    /// Get all valid fact nodes.
794    pub fn valid_facts(&self) -> Vec<(NodeIndex, &MemNode)> {
795        self.by_layer[1]
796            .iter() // layer 2 = index 1
797            .filter_map(|&nix| {
798                self.inner
799                    .node_weight(nix)
800                    .filter(|n| n.kind == MemKind::Fact)
801                    .map(|n| (nix, n))
802            })
803            .collect()
804    }
805
806    /// Get all constraint fact nodes.
807    pub fn constraints(&self) -> Vec<(NodeIndex, &MemNode)> {
808        self.valid_facts()
809            .into_iter()
810            .filter(|(_, n)| n.is_constraint)
811            .collect()
812    }
813
814    /// Get nodes by layer.
815    pub fn nodes_by_layer(&self, layer: Layer) -> Vec<(NodeIndex, &MemNode)> {
816        let idx = (layer as usize).saturating_sub(1).min(3);
817        self.by_layer[idx]
818            .iter()
819            .filter_map(|&nix| self.inner.node_weight(nix).map(|n| (nix, n)))
820            .collect()
821    }
822
823    /// Personalized PageRank retrieval from seed nodes (HippoRAG-inspired).
824    ///
825    /// PPR propagates relevance from seed nodes through the graph with a damping
826    /// factor that controls the balance between following edges and teleporting
827    /// back to seeds. Converges to a stationary distribution in ~10-20 iterations.
828    ///
829    /// Results are filtered to seeds' partitions by default. Use
830    /// `retrieve_ppr_cross_partition` to traverse across the Project/Foreign
831    /// boundary.
832    pub fn retrieve_ppr(
833        &self,
834        seeds: &[NodeIndex],
835        seed_weights: Option<&[f32]>,
836        damping: f32,
837        max_results: usize,
838    ) -> Vec<RetrievalHit> {
839        self.retrieve_ppr_inner(seeds, seed_weights, damping, max_results, true)
840    }
841
842    /// Like `retrieve_ppr`, but does NOT filter by partition — foreign nodes
843    /// can be reached from project seeds and vice versa. Caller opts in.
844    pub fn retrieve_ppr_cross_partition(
845        &self,
846        seeds: &[NodeIndex],
847        seed_weights: Option<&[f32]>,
848        damping: f32,
849        max_results: usize,
850    ) -> Vec<RetrievalHit> {
851        self.retrieve_ppr_inner(seeds, seed_weights, damping, max_results, false)
852    }
853
854    fn retrieve_ppr_inner(
855        &self,
856        seeds: &[NodeIndex],
857        seed_weights: Option<&[f32]>,
858        damping: f32,
859        max_results: usize,
860        apply_partition_filter: bool,
861    ) -> Vec<RetrievalHit> {
862        if seeds.is_empty() {
863            return Vec::new();
864        }
865
866        let all_nodes: Vec<NodeIndex> = self.inner.node_indices().collect();
867        let n = all_nodes.len();
868        if n == 0 {
869            return Vec::new();
870        }
871
872        // Build node index → position map for O(1) lookup
873        let pos: HashMap<NodeIndex, usize> = all_nodes
874            .iter()
875            .enumerate()
876            .map(|(i, &nix)| (nix, i))
877            .collect();
878
879        // Build reset vector (teleportation distribution)
880        let mut reset = vec![0.0f32; n];
881        let mut total_weight = 0.0f32;
882        for (i, &seed) in seeds.iter().enumerate() {
883            if let Some(&p) = pos.get(&seed) {
884                let w = seed_weights
885                    .and_then(|sw| sw.get(i).copied())
886                    .unwrap_or(1.0);
887                reset[p] = w;
888                total_weight += w;
889            }
890        }
891        if total_weight > 0.0 {
892            for v in &mut reset {
893                *v /= total_weight;
894            }
895        }
896
897        // Cap any single entry to 0.4 and re-normalize
898        let cap = 0.4f32;
899        let mut needs_renorm = false;
900        for v in &mut reset {
901            if *v > cap {
902                *v = cap;
903                needs_renorm = true;
904            }
905        }
906        if needs_renorm {
907            let sum: f32 = reset.iter().sum();
908            if sum > 0.0 {
909                for v in &mut reset {
910                    *v /= sum;
911                }
912            }
913        }
914
915        // Initialize scores to reset vector
916        let mut scores = reset.clone();
917
918        // Edge weight multipliers by kind
919        let edge_mult = |kind: EdgeKind| -> f32 {
920            match kind {
921                EdgeKind::Supersedes => 0.3,
922                EdgeKind::DependsOn => 0.8,
923                EdgeKind::RelatedTo => 0.6,
924                EdgeKind::Triggers => 0.9,
925                EdgeKind::TemporalNext => 0.5,
926                EdgeKind::Calls => 0.85,
927                EdgeKind::DefinedIn => 0.9,
928                EdgeKind::Imports => 0.4,
929                EdgeKind::CitesPremise => 0.85,
930            }
931        };
932
933        // Precompute outgoing edge weights per node for normalization
934        let mut out_weights: Vec<f32> = vec![0.0; n];
935        for (i, &nix) in all_nodes.iter().enumerate() {
936            let mut total = 0.0f32;
937            for e in self.inner.edges(nix) {
938                total += e.weight().weight * edge_mult(e.weight().kind);
939            }
940            // Also count incoming edges we traverse (RelatedTo, DependsOn, Triggers)
941            for e in self.inner.edges_directed(nix, Direction::Incoming) {
942                let k = e.weight().kind;
943                if matches!(
944                    k,
945                    EdgeKind::RelatedTo
946                        | EdgeKind::DependsOn
947                        | EdgeKind::Triggers
948                        | EdgeKind::Calls
949                        | EdgeKind::DefinedIn
950                        | EdgeKind::Imports
951                ) {
952                    total += e.weight().weight * edge_mult(k);
953                }
954            }
955            out_weights[i] = total;
956        }
957
958        // PPR iteration
959        let max_iters = 30;
960        let epsilon = 1e-6;
961
962        let mut new_scores = vec![0.0f32; n];
963
964        for _iter in 0..max_iters {
965            new_scores.fill(0.0);
966
967            // Teleportation component
968            for i in 0..n {
969                new_scores[i] += (1.0 - damping) * reset[i];
970            }
971
972            // Propagation component: each node distributes its score to neighbors
973            for (i, &nix) in all_nodes.iter().enumerate() {
974                if scores[i] < epsilon {
975                    continue;
976                }
977                let out_w = out_weights[i];
978                if out_w == 0.0 {
979                    continue;
980                }
981
982                // Outgoing edges
983                for e in self.inner.edges(nix) {
984                    if let Some(&j) = pos.get(&e.target()) {
985                        let w = e.weight().weight * edge_mult(e.weight().kind);
986                        new_scores[j] += damping * scores[i] * w / out_w;
987                    }
988                }
989                // Incoming edges (bidirectional traversal)
990                for e in self.inner.edges_directed(nix, Direction::Incoming) {
991                    let k = e.weight().kind;
992                    if !matches!(
993                        k,
994                        EdgeKind::RelatedTo
995                            | EdgeKind::DependsOn
996                            | EdgeKind::Triggers
997                            | EdgeKind::Calls
998                            | EdgeKind::DefinedIn
999                            | EdgeKind::Imports
1000                    ) {
1001                        continue;
1002                    }
1003                    if let Some(&j) = pos.get(&e.source()) {
1004                        let w = e.weight().weight * edge_mult(k);
1005                        new_scores[j] += damping * scores[i] * w / out_w;
1006                    }
1007                }
1008            }
1009
1010            // Check convergence
1011            let delta: f32 = scores
1012                .iter()
1013                .zip(new_scores.iter())
1014                .map(|(a, b)| (a - b).abs())
1015                .sum();
1016            std::mem::swap(&mut scores, &mut new_scores);
1017            if delta < epsilon {
1018                break;
1019            }
1020        }
1021
1022        // Collect results
1023        let mut hits: Vec<RetrievalHit> = all_nodes
1024            .iter()
1025            .enumerate()
1026            .filter_map(|(i, &nix)| {
1027                let node = self.inner.node_weight(nix)?;
1028                if !node.is_valid() {
1029                    return None;
1030                }
1031                if scores[i] < epsilon {
1032                    return None;
1033                }
1034                // Compute approximate hops from nearest seed
1035                // PPR doesn't have a clean concept of "hops" — set to 0
1036                let hops = 0;
1037                Some(RetrievalHit {
1038                    node_ix: nix,
1039                    activation: scores[i],
1040                    hops,
1041                })
1042            })
1043            .collect();
1044
1045        hits.sort_by(|a, b| {
1046            b.activation
1047                .partial_cmp(&a.activation)
1048                .unwrap_or(std::cmp::Ordering::Equal)
1049        });
1050        if apply_partition_filter {
1051            self.filter_same_partition(&mut hits, seeds);
1052        }
1053        hits.truncate(max_results);
1054        hits
1055    }
1056
1057    /// Drop hits whose partition doesn't match any seed's partition.
1058    /// Private helper; `*_cross_partition` variants skip this step.
1059    fn filter_same_partition(&self, hits: &mut Vec<RetrievalHit>, seeds: &[NodeIndex]) {
1060        if seeds.is_empty() {
1061            return;
1062        }
1063        // If all seeds are Project and no Foreign partitions exist, fast-path.
1064        if self.partitions.is_empty() {
1065            return;
1066        }
1067        let seed_parts: HashSet<Partition> = seeds.iter().map(|&n| self.partition_of(n)).collect();
1068        hits.retain(|h| seed_parts.contains(&self.partition_of(h.node_ix)));
1069    }
1070
1071    /// Legacy spreading activation retrieval from seed nodes.
1072    ///
1073    /// Results are filtered to seeds' partitions by default. Use
1074    /// `retrieve_cross_partition` for opt-in cross-partition traversal.
1075    pub fn retrieve(
1076        &self,
1077        seeds: &[NodeIndex],
1078        max_hops: usize,
1079        max_results: usize,
1080        decay: f32,
1081        min_activation: f32,
1082    ) -> Vec<RetrievalHit> {
1083        self.retrieve_inner(seeds, max_hops, max_results, decay, min_activation, true)
1084    }
1085
1086    /// Cross-partition variant of `retrieve`.
1087    pub fn retrieve_cross_partition(
1088        &self,
1089        seeds: &[NodeIndex],
1090        max_hops: usize,
1091        max_results: usize,
1092        decay: f32,
1093        min_activation: f32,
1094    ) -> Vec<RetrievalHit> {
1095        self.retrieve_inner(seeds, max_hops, max_results, decay, min_activation, false)
1096    }
1097
1098    fn retrieve_inner(
1099        &self,
1100        seeds: &[NodeIndex],
1101        max_hops: usize,
1102        max_results: usize,
1103        decay: f32,
1104        min_activation: f32,
1105        apply_partition_filter: bool,
1106    ) -> Vec<RetrievalHit> {
1107        let mut activations: HashMap<NodeIndex, f32> = HashMap::new();
1108        let mut hops_map: HashMap<NodeIndex, usize> = HashMap::new();
1109        let mut queue: VecDeque<(NodeIndex, f32, usize)> = VecDeque::new();
1110
1111        for &seed in seeds {
1112            activations.insert(seed, 1.0);
1113            hops_map.insert(seed, 0);
1114            queue.push_back((seed, 1.0, 0));
1115        }
1116
1117        while let Some((node, activation, hops)) = queue.pop_front() {
1118            if hops >= max_hops {
1119                continue;
1120            }
1121
1122            // Traverse outgoing edges
1123            for edge_ref in self.inner.edges(node) {
1124                let neighbor = edge_ref.target();
1125                let edge = edge_ref.weight();
1126
1127                let edge_mult = match edge.kind {
1128                    EdgeKind::Supersedes => 0.3,
1129                    EdgeKind::DependsOn => 0.8,
1130                    EdgeKind::RelatedTo => 0.6,
1131                    EdgeKind::Triggers => 0.9,
1132                    EdgeKind::TemporalNext => 0.5,
1133                    EdgeKind::Calls => 0.85,
1134                    EdgeKind::DefinedIn => 0.9,
1135                    EdgeKind::Imports => 0.4,
1136                    EdgeKind::CitesPremise => 0.85,
1137                };
1138
1139                let new_activation = activation * decay * edge.weight * edge_mult;
1140                if new_activation < min_activation {
1141                    continue;
1142                }
1143
1144                let existing = activations.get(&neighbor).copied().unwrap_or(0.0);
1145                if new_activation > existing {
1146                    activations.insert(neighbor, new_activation);
1147                    hops_map.insert(neighbor, hops + 1);
1148                    queue.push_back((neighbor, new_activation, hops + 1));
1149                }
1150            }
1151
1152            // Also traverse incoming edges (for bidirectional traversal)
1153            for edge_ref in self.inner.edges_directed(node, Direction::Incoming) {
1154                let neighbor = edge_ref.source();
1155                let edge = edge_ref.weight();
1156
1157                // Follow RelatedTo, DependsOn, and Triggers backwards
1158                let edge_mult = match edge.kind {
1159                    EdgeKind::RelatedTo => 0.6,
1160                    EdgeKind::DependsOn => 0.5,
1161                    EdgeKind::Triggers => 0.85, // "what skill fires for this trigger?"
1162                    EdgeKind::Calls => 0.7,     // "what calls me?"
1163                    EdgeKind::DefinedIn => 0.8, // "what methods does this type have?"
1164                    EdgeKind::Imports => 0.3,
1165                    _ => continue,
1166                };
1167
1168                let new_activation = activation * decay * edge.weight * edge_mult;
1169                if new_activation < min_activation {
1170                    continue;
1171                }
1172
1173                let existing = activations.get(&neighbor).copied().unwrap_or(0.0);
1174                if new_activation > existing {
1175                    activations.insert(neighbor, new_activation);
1176                    hops_map.insert(neighbor, hops + 1);
1177                    queue.push_back((neighbor, new_activation, hops + 1));
1178                }
1179            }
1180        }
1181
1182        let mut hits: Vec<RetrievalHit> = activations
1183            .into_iter()
1184            .filter_map(|(nix, act)| {
1185                let node = self.inner.node_weight(nix)?;
1186                if !node.is_valid() {
1187                    return None;
1188                } // skip superseded
1189                Some(RetrievalHit {
1190                    node_ix: nix,
1191                    activation: act,
1192                    hops: *hops_map.get(&nix).unwrap_or(&0),
1193                })
1194            })
1195            .collect();
1196
1197        hits.sort_by(|a, b| {
1198            b.activation
1199                .partial_cmp(&a.activation)
1200                .unwrap_or(std::cmp::Ordering::Equal)
1201        });
1202        if apply_partition_filter {
1203            self.filter_same_partition(&mut hits, seeds);
1204        }
1205        hits.truncate(max_results);
1206        hits
1207    }
1208
1209    /// Find seed nodes by keyword matching with IDF-weighted scores.
1210    ///
1211    /// Returns (seeds, weights) where weights incorporate corpus IDF (term rarity),
1212    /// stemming, synonym expansion, and content-type-aware tokenization.
1213    /// Use the weights as `seed_weights` in `retrieve_ppr()`.
1214    pub fn find_seeds_weighted(&self, query: &str, max_seeds: usize) -> (Vec<NodeIndex>, Vec<f32>) {
1215        let q_tokens = tokenize(query);
1216        if q_tokens.is_empty() {
1217            return (Vec::new(), Vec::new());
1218        }
1219
1220        // Compute corpus IDF: how rare each term is across all nodes
1221        let corpus_idf = self.compute_corpus_idf();
1222
1223        let mut scored: Vec<(f32, NodeIndex)> = Vec::new();
1224
1225        for nix in self.inner.node_indices() {
1226            let node = match self.inner.node_weight(nix) {
1227                Some(n) => n,
1228                None => continue,
1229            };
1230            if !node.is_valid() {
1231                continue;
1232            }
1233
1234            let text = format!("{} {}", node.key, node.value).to_lowercase();
1235            let n_tokens = tokenize_for_content(&text, node.content_type);
1236            let n_stems: HashSet<String> = n_tokens.iter().map(|t| stem(t)).collect();
1237
1238            // Weighted scoring using corpus IDF
1239            let mut weighted_hits = 0.0f32;
1240            let mut weighted_total = 0.0f32;
1241
1242            for qt in &q_tokens {
1243                let term_idf = corpus_idf.get(qt).copied().unwrap_or(1.0);
1244                weighted_total += term_idf * 3.0; // max score per term = 3 * idf
1245
1246                // Exact token match (3x weight)
1247                if n_tokens.contains(qt) {
1248                    weighted_hits += term_idf * 3.0;
1249                    continue;
1250                }
1251                // Stem match (2x weight)
1252                let qt_stem = stem(qt);
1253                if n_stems.contains(&qt_stem) {
1254                    weighted_hits += term_idf * 2.0;
1255                    continue;
1256                }
1257                // Synonym match (1.5x weight)
1258                let qt_syns = synonym_expand(qt);
1259                if !qt_syns.is_empty()
1260                    && qt_syns
1261                        .iter()
1262                        .any(|s| n_tokens.contains(s) || n_stems.contains(&stem(s)))
1263                {
1264                    weighted_hits += term_idf * 1.5;
1265                    continue;
1266                }
1267                // Substring match (1x weight)
1268                if text.contains(qt.as_str()) {
1269                    weighted_hits += term_idf;
1270                }
1271            }
1272
1273            if weighted_total > 0.0 {
1274                let match_score = weighted_hits / weighted_total;
1275                if match_score > 0.0 {
1276                    scored.push((match_score, nix));
1277                }
1278            }
1279        }
1280
1281        scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
1282        scored.truncate(max_seeds);
1283        let seeds: Vec<NodeIndex> = scored.iter().map(|(_, nix)| *nix).collect();
1284        let weights: Vec<f32> = scored.iter().map(|(w, _)| *w).collect();
1285        (seeds, weights)
1286    }
1287
1288    /// Compute corpus IDF: ln(N / (1 + df)) for each term across all valid nodes.
1289    /// Called once per find_seeds_weighted invocation. O(N × tokens_per_node),
1290    /// sub-millisecond for expected graph sizes (<10K nodes).
1291    fn compute_corpus_idf(&self) -> HashMap<String, f32> {
1292        let total = self.inner.node_count().max(1) as f32;
1293        let mut doc_freq: HashMap<String, usize> = HashMap::new();
1294
1295        for nix in self.inner.node_indices() {
1296            let node = match self.inner.node_weight(nix) {
1297                Some(n) if n.is_valid() => n,
1298                _ => continue,
1299            };
1300            let text = format!("{} {}", node.key, node.value).to_lowercase();
1301            let tokens = tokenize_for_content(&text, node.content_type);
1302            let mut seen = HashSet::new();
1303            for token in tokens {
1304                if seen.insert(token.clone()) {
1305                    *doc_freq.entry(token).or_insert(0) += 1;
1306                }
1307            }
1308        }
1309
1310        doc_freq
1311            .into_iter()
1312            .map(|(term, df)| (term, (total / (1.0 + df as f32)).ln().max(0.1)))
1313            .collect()
1314    }
1315
1316    /// Find seed nodes by keyword matching (unweighted, backward compat).
1317    pub fn find_seeds(&self, query: &str, max_seeds: usize) -> Vec<NodeIndex> {
1318        self.find_seeds_weighted(query, max_seeds).0
1319    }
1320
1321    /// Total token count across all valid nodes.
1322    pub fn total_tokens(&self) -> usize {
1323        self.inner
1324            .node_indices()
1325            .filter_map(|nix| self.inner.node_weight(nix))
1326            .filter(|n| n.is_valid())
1327            .map(|n| n.token_estimate())
1328            .sum()
1329    }
1330
1331    /// Count of valid fact nodes.
1332    pub fn valid_fact_count(&self) -> usize {
1333        self.valid_facts().len()
1334    }
1335
1336    pub fn node_count(&self) -> usize {
1337        self.inner.node_count()
1338    }
1339
1340    pub fn edge_count(&self) -> usize {
1341        self.inner.edge_count()
1342    }
1343
1344    /// Garbage-collect superseded/deprecated nodes beyond a retention depth.
1345    ///
1346    /// Walks Supersedes chains from each FactSuperseded/SkillDeprecated node.
1347    /// If the node is more than `max_depth` hops from any active (non-superseded)
1348    /// ancestor, it is removed. Returns the number of nodes removed.
1349    pub fn gc_superseded(&mut self, max_depth: usize) -> usize {
1350        // Find all superseded/deprecated nodes
1351        let stale: Vec<NodeIndex> = self
1352            .inner
1353            .node_indices()
1354            .filter(|&nix| {
1355                self.inner
1356                    .node_weight(nix)
1357                    .map(|n| {
1358                        matches!(
1359                            n.kind,
1360                            MemKind::FactSuperseded
1361                                | MemKind::SkillDeprecated
1362                                | MemKind::ConclusionInvalidated
1363                        )
1364                    })
1365                    .unwrap_or(false)
1366            })
1367            .collect();
1368
1369        // For each stale node, walk Supersedes edges backward (incoming) to find
1370        // distance to nearest active node. If no active ancestor within max_depth, remove.
1371        let mut to_remove = Vec::new();
1372        for nix in &stale {
1373            let mut depth = 0;
1374            let mut current = *nix;
1375            let mut reachable = false;
1376
1377            // Walk backward: who supersedes me? (incoming Supersedes edges)
1378            loop {
1379                let parent: Option<NodeIndex> = self
1380                    .inner
1381                    .neighbors_directed(current, Direction::Incoming)
1382                    .find(|&neighbor| {
1383                        self.inner
1384                            .edges_connecting(neighbor, current)
1385                            .any(|e| e.weight().kind == EdgeKind::Supersedes)
1386                    });
1387
1388                match parent {
1389                    Some(p) => {
1390                        depth += 1;
1391                        if let Some(node) = self.inner.node_weight(p) {
1392                            if node.is_valid() {
1393                                reachable = true;
1394                                break;
1395                            }
1396                        }
1397                        if depth > max_depth {
1398                            break;
1399                        }
1400                        current = p;
1401                    }
1402                    None => break, // orphaned superseded node
1403                }
1404            }
1405
1406            // Remove if too deep or orphaned (no active ancestor at all)
1407            if !reachable || depth > max_depth {
1408                to_remove.push(*nix);
1409            }
1410        }
1411
1412        let count = to_remove.len();
1413        for nix in to_remove {
1414            // Capture fields before removal for index cleanup
1415            let (key, layer, fact_id) = match self.inner.node_weight(nix) {
1416                Some(node) => (node.key.clone(), node.layer, node.fact_id.clone()),
1417                None => continue,
1418            };
1419            self.remove_from_indexes(nix, &key, layer, fact_id.as_deref());
1420            self.inner.remove_node(nix);
1421        }
1422        count
1423    }
1424
1425    /// Remove expired environment nodes.
1426    pub fn prune_expired(&mut self, now: DateTime<Utc>) {
1427        let expired: Vec<(NodeIndex, String, Layer, Option<String>)> = self
1428            .inner
1429            .node_indices()
1430            .filter_map(|nix| {
1431                let node = self.inner.node_weight(nix)?;
1432                if node.expires_at.map(|e| e <= now).unwrap_or(false) {
1433                    Some((nix, node.key.clone(), node.layer, node.fact_id.clone()))
1434                } else {
1435                    None
1436                }
1437            })
1438            .collect();
1439        for (nix, key, layer, fact_id) in expired {
1440            self.remove_from_indexes(nix, &key, layer, fact_id.as_deref());
1441            self.inner.remove_node(nix);
1442        }
1443    }
1444
1445    /// Remove a set of conversation/summary nodes and repair the temporal chain.
1446    /// After removal, re-links surviving conversation+summary nodes with TemporalNext edges.
1447    pub fn remove_conversation_nodes(&mut self, to_remove: &[NodeIndex]) {
1448        // Clean indexes and remove the nodes
1449        for &nix in to_remove {
1450            let (key, layer, fact_id) = match self.inner.node_weight(nix) {
1451                Some(node) => (node.key.clone(), node.layer, node.fact_id.clone()),
1452                None => continue,
1453            };
1454            self.remove_from_indexes(nix, &key, layer, fact_id.as_deref());
1455            self.inner.remove_node(nix);
1456        }
1457
1458        // Collect surviving conversation/summary nodes, sorted by created_at
1459        let mut survivors: Vec<(NodeIndex, chrono::DateTime<Utc>)> = self
1460            .inner
1461            .node_indices()
1462            .filter_map(|nix| {
1463                let n = self.inner.node_weight(nix)?;
1464                if matches!(n.kind, MemKind::Conversation | MemKind::ConversationSummary) {
1465                    Some((nix, n.created_at))
1466                } else {
1467                    None
1468                }
1469            })
1470            .collect();
1471        survivors.sort_by_key(|&(_, ts)| ts);
1472
1473        // Remove all existing TemporalNext edges between conversation nodes
1474        let temporal_edges: Vec<petgraph::graph::EdgeIndex> = self
1475            .inner
1476            .edge_indices()
1477            .filter(|&eix| {
1478                self.inner
1479                    .edge_weight(eix)
1480                    .map(|e| e.kind == EdgeKind::TemporalNext)
1481                    .unwrap_or(false)
1482            })
1483            .collect();
1484        for eix in temporal_edges {
1485            self.inner.remove_edge(eix);
1486        }
1487
1488        // Re-link surviving nodes in chronological order
1489        for window in survivors.windows(2) {
1490            let (from_nix, _) = window[0];
1491            let (to_nix, _) = window[1];
1492            self.inner.add_edge(
1493                from_nix,
1494                to_nix,
1495                MemEdge {
1496                    kind: EdgeKind::TemporalNext,
1497                    weight: 1.0,
1498                    created_at: Utc::now(),
1499                },
1500            );
1501        }
1502
1503        // Update last_conversation
1504        self.last_conversation = survivors.last().map(|&(nix, _)| nix);
1505    }
1506
1507    /// Remove a node's entries from all secondary indexes.
1508    fn remove_from_indexes(
1509        &mut self,
1510        nix: NodeIndex,
1511        key: &str,
1512        layer: Layer,
1513        fact_id: Option<&str>,
1514    ) {
1515        if let Some(fid) = fact_id {
1516            self.by_fact_id.remove(fid);
1517        }
1518        if let Some(entries) = self.by_key.get_mut(key) {
1519            entries.retain(|&n| n != nix);
1520            if entries.is_empty() {
1521                self.by_key.remove(key);
1522            }
1523        }
1524        let layer_idx = (layer as usize).saturating_sub(1).min(3);
1525        self.by_layer[layer_idx].retain(|&n| n != nix);
1526    }
1527
1528    pub fn clear(&mut self) {
1529        self.inner.clear();
1530        self.by_fact_id.clear();
1531        self.by_key.clear();
1532        for v in &mut self.by_layer {
1533            v.clear();
1534        }
1535        self.last_conversation = None;
1536    }
1537}
1538
1539impl Default for MemoryGraph {
1540    fn default() -> Self {
1541        Self::new()
1542    }
1543}
1544
1545/// Tokenize text on common delimiters for seed matching.
1546fn tokenize(text: &str) -> HashSet<String> {
1547    text.split(|c: char| {
1548        c.is_whitespace() || c == '/' || c == ':' || c == '-' || c == '_' || c == '.' || c == ','
1549    })
1550    .map(|s| s.trim().to_lowercase())
1551    .filter(|s| !s.is_empty())
1552    .collect()
1553}
1554
1555/// Syntax noise tokens common across all languages.
1556const CODE_NOISE_COMMON: &[&str] = &[
1557    "return", "if", "else", "for", "while", "true", "false", "new", "this", "static", "void",
1558    "null", "break", "continue", "try", "catch", "throw",
1559];
1560
1561/// Per-language noise tokens — keywords that carry no semantic meaning for retrieval.
1562fn noise_for_language(lang: CodeLanguage) -> &'static [&'static str] {
1563    match lang {
1564        CodeLanguage::Rust => &[
1565            "fn", "let", "const", "pub", "mut", "use", "mod", "crate", "super", "struct", "enum",
1566            "impl", "trait", "async", "await", "where", "type", "match", "ref", "move", "dyn",
1567            "unsafe",
1568        ],
1569        CodeLanguage::Python => &[
1570            "def", "class", "import", "from", "pass", "none", "self", "elif", "except", "finally",
1571            "yield", "lambda", "nonlocal", "global", "assert", "with", "as", "in", "is", "not",
1572            "and", "or",
1573        ],
1574        CodeLanguage::JavaScript | CodeLanguage::TypeScript => &[
1575            "function",
1576            "const",
1577            "let",
1578            "var",
1579            "export",
1580            "default",
1581            "async",
1582            "await",
1583            "typeof",
1584            "instanceof",
1585            "undefined",
1586            "require",
1587        ],
1588        CodeLanguage::Go => &[
1589            "func",
1590            "package",
1591            "import",
1592            "defer",
1593            "go",
1594            "chan",
1595            "select",
1596            "range",
1597            "switch",
1598            "case",
1599            "fallthrough",
1600        ],
1601        CodeLanguage::Java => &[
1602            "public",
1603            "private",
1604            "protected",
1605            "class",
1606            "interface",
1607            "extends",
1608            "implements",
1609            "final",
1610            "abstract",
1611            "synchronized",
1612            "volatile",
1613            "import",
1614            "package",
1615            "throws",
1616        ],
1617        CodeLanguage::Cpp => &[
1618            "include",
1619            "namespace",
1620            "using",
1621            "template",
1622            "typename",
1623            "virtual",
1624            "override",
1625            "const",
1626            "auto",
1627            "inline",
1628            "extern",
1629            "typedef",
1630            "class",
1631            "struct",
1632            "enum",
1633        ],
1634        CodeLanguage::Shell => &[
1635            "echo", "fi", "do", "done", "then", "esac", "elif", "local", "export", "readonly",
1636            "set",
1637        ],
1638        CodeLanguage::Sql => &[
1639            "select", "from", "where", "and", "or", "not", "insert", "into", "update", "set",
1640            "delete", "create", "alter", "drop", "table", "join", "on", "as", "order", "by",
1641            "group", "having", "limit",
1642        ],
1643        CodeLanguage::Unknown => &[
1644            "fn", "let", "const", "var", "pub", "mut", "return", "def", "class", "import", "from",
1645            "function", "export", "default",
1646        ],
1647    }
1648}
1649
1650/// Split camelCase and PascalCase identifiers into parts.
1651/// e.g. "verifyJwtToken" → ["verify", "jwt", "token"]
1652///      "HTMLParser" → ["html", "parser"]
1653fn split_camel_case(s: &str) -> Vec<String> {
1654    let mut parts = Vec::new();
1655    let mut current = String::new();
1656    let chars: Vec<char> = s.chars().collect();
1657
1658    for i in 0..chars.len() {
1659        let c = chars[i];
1660        if !c.is_alphanumeric() {
1661            if !current.is_empty() {
1662                parts.push(std::mem::take(&mut current).to_lowercase());
1663            }
1664            continue;
1665        }
1666        if c.is_uppercase() {
1667            // Check if this starts a new word:
1668            // 1. Previous was lowercase (camelCase boundary)
1669            // 2. Previous was uppercase but next is lowercase (acronym end: HTMLParser → HTML|Parser)
1670            let prev_lower = i > 0 && chars[i - 1].is_lowercase();
1671            let acronym_end = i > 0
1672                && chars[i - 1].is_uppercase()
1673                && i + 1 < chars.len()
1674                && chars[i + 1].is_lowercase();
1675            if prev_lower || acronym_end {
1676                if !current.is_empty() {
1677                    parts.push(std::mem::take(&mut current).to_lowercase());
1678                }
1679            }
1680        }
1681        current.push(c);
1682    }
1683    if !current.is_empty() {
1684        parts.push(current.to_lowercase());
1685    }
1686    parts
1687}
1688
1689/// Code-aware tokenizer: splits camelCase/PascalCase, filters language-specific noise.
1690fn tokenize_code(text: &str, lang: CodeLanguage) -> HashSet<String> {
1691    let mut noise: HashSet<&str> = CODE_NOISE_COMMON.iter().copied().collect();
1692    noise.extend(noise_for_language(lang));
1693
1694    let raw_tokens: Vec<String> = text
1695        .split(|c: char| {
1696            c.is_whitespace()
1697                || c == '/'
1698                || c == ':'
1699                || c == '-'
1700                || c == '_'
1701                || c == '.'
1702                || c == ','
1703                || c == '('
1704                || c == ')'
1705                || c == '{'
1706                || c == '}'
1707                || c == '['
1708                || c == ']'
1709                || c == ';'
1710                || c == '"'
1711                || c == '\''
1712                || c == '<'
1713                || c == '>'
1714                || c == '='
1715                || c == '&'
1716                || c == '|'
1717                || c == '!'
1718                || c == '#'
1719                || c == '*'
1720                || c == '+'
1721        })
1722        .map(|s| s.trim().to_string())
1723        .filter(|s| !s.is_empty())
1724        .collect();
1725
1726    let mut tokens = HashSet::new();
1727    for raw in &raw_tokens {
1728        let parts = split_camel_case(raw);
1729        for part in parts {
1730            if part.len() >= 2 && !noise.contains(part.as_str()) {
1731                tokens.insert(part);
1732            }
1733        }
1734    }
1735    tokens
1736}
1737
1738/// Dispatch tokenization based on content type.
1739pub fn tokenize_for_content(text: &str, content_type: ContentType) -> HashSet<String> {
1740    match content_type {
1741        ContentType::Code(lang) => tokenize_code(text, lang),
1742        _ => tokenize(text),
1743    }
1744}
1745
1746/// Minimal suffix-stripping stemmer. Returns stem alongside original.
1747/// Strips common English suffixes, minimum stem length 3.
1748pub fn stem(word: &str) -> String {
1749    let w = word.to_lowercase();
1750    if w.len() < 4 {
1751        return w;
1752    }
1753
1754    // Try longest suffixes first
1755    let suffixes = &[
1756        "ation", "tion", "ment", "ness", "able", "ible", "ence", "ance", "ing", "ful", "ous",
1757        "ive", "ize", "ise", "ify", "ate", "ed", "er", "ly", "al", "es",
1758    ];
1759    for suffix in suffixes {
1760        if let Some(stem) = w.strip_suffix(suffix) {
1761            if stem.len() >= 3 {
1762                return stem.to_string();
1763            }
1764        }
1765    }
1766    // Trailing 's' (but not 'ss')
1767    if w.ends_with('s') && !w.ends_with("ss") && w.len() > 3 {
1768        return w[..w.len() - 1].to_string();
1769    }
1770    w
1771}
1772
1773/// Synonym groups for cross-modal matching.
1774/// Each group contains terms that should match each other.
1775const SYNONYM_GROUPS: &[&[&str]] = &[
1776    &[
1777        "auth",
1778        "authenticate",
1779        "authorization",
1780        "credential",
1781        "login",
1782        "jwt",
1783        "token",
1784        "verify",
1785    ],
1786    &[
1787        "db", "database", "sql", "query", "postgres", "sqlite", "mysql",
1788    ],
1789    &["err", "error", "exception", "panic", "fail", "failure"],
1790    &["config", "configuration", "setting", "preference", "option"],
1791    &["msg", "message", "notification", "alert", "event"],
1792    &["req", "request", "http", "api", "endpoint", "route"],
1793    &["resp", "response", "reply", "result", "output"],
1794    &["mem", "memory", "cache", "buffer", "storage"],
1795    &["exec", "execute", "run", "invoke", "call", "dispatch"],
1796    &["parse", "deserialize", "decode", "unmarshal", "extract"],
1797    &["serial", "serialize", "encode", "marshal", "format"],
1798    &["nav", "navigate", "redirect", "goto"],
1799];
1800
1801/// Find synonym expansions for a token.
1802pub fn synonym_expand(token: &str) -> HashSet<String> {
1803    let mut result = HashSet::new();
1804    let lower = token.to_lowercase();
1805    let stemmed = stem(&lower);
1806    for group in SYNONYM_GROUPS {
1807        let matches = group.iter().any(|&t| t == lower || stem(t) == stemmed);
1808        if matches {
1809            for &t in *group {
1810                result.insert(t.to_string());
1811            }
1812        }
1813    }
1814    result
1815}
1816
1817#[cfg(test)]
1818mod tests {
1819    use super::*;
1820
1821    fn make_fact(key: &str, value: &str, fact_id: &str) -> MemNode {
1822        MemNode {
1823            kind: MemKind::Fact,
1824            layer: 2,
1825            key: key.to_string(),
1826            value: value.to_string(),
1827            fact_id: Some(fact_id.to_string()),
1828            scope: "global".to_string(),
1829            authority: "peer".to_string(),
1830            is_constraint: false,
1831            created_at: Utc::now(),
1832            expires_at: None,
1833            content_type: ContentType::default(),
1834            metadata: FactMetadata::default(),
1835        }
1836    }
1837
1838    #[test]
1839    fn insert_and_lookup() {
1840        let mut g = MemoryGraph::new();
1841        let nix = g.insert(make_fact("language", "Rust", "f1"));
1842        let (found_nix, node) = g.get_by_fact_id("f1").unwrap();
1843        assert_eq!(found_nix, nix);
1844        assert_eq!(node.value, "Rust");
1845    }
1846
1847    #[test]
1848    fn supersession() {
1849        let mut g = MemoryGraph::new();
1850        g.insert(make_fact("db", "PostgreSQL", "f1"));
1851        let new_nix = g.insert(make_fact("db", "SQLite", "f2"));
1852        let invalidated = g.supersede(new_nix, "f1");
1853
1854        assert!(!invalidated.is_empty());
1855        let (_, old) = g.get_by_fact_id("f1").unwrap();
1856        assert_eq!(old.kind, MemKind::FactSuperseded);
1857        assert_eq!(g.valid_fact_count(), 1);
1858    }
1859
1860    #[test]
1861    fn dependency_cascade() {
1862        let mut g = MemoryGraph::new();
1863        let base = g.insert(make_fact("base", "100", "f1"));
1864        let derived = g.insert(make_fact("derived", "200", "f2"));
1865        g.link(derived, base, EdgeKind::DependsOn, 1.0);
1866
1867        let new_base = g.insert(make_fact("base", "150", "f3"));
1868        let invalidated = g.supersede(new_base, "f1");
1869
1870        // derived should be in invalidated set (it depends on f1)
1871        assert!(invalidated.contains(&derived));
1872    }
1873
1874    #[test]
1875    fn seed_finding() {
1876        let mut g = MemoryGraph::new();
1877        g.insert(make_fact("navigate /ops/dashboard", "Page loads", "f1"));
1878        g.insert(make_fact("click button", "Button clicked", "f2"));
1879
1880        let seeds = g.find_seeds("dashboard", 5);
1881        assert!(!seeds.is_empty()); // should find f1 via substring match
1882    }
1883
1884    #[test]
1885    fn spreading_activation() {
1886        let mut g = MemoryGraph::new();
1887        let a = g.insert(make_fact("project", "CAR runtime", "f1"));
1888        let b = g.insert(make_fact("language", "Rust", "f2"));
1889        let c = g.insert(make_fact("testing", "uses proptest", "f3"));
1890        g.link(a, b, EdgeKind::RelatedTo, 0.8);
1891        g.link(b, c, EdgeKind::RelatedTo, 0.7);
1892
1893        let hits = g.retrieve(&[a], 3, 10, 0.7, 0.05);
1894        // Should find b (1 hop) and c (2 hops) via RelatedTo edges
1895        assert!(hits.len() >= 2);
1896        assert!(hits[0].activation > hits[1].activation); // closer = higher
1897    }
1898
1899    #[test]
1900    fn conversation_temporal_links() {
1901        let mut g = MemoryGraph::new();
1902        let c1 = g.insert(MemNode {
1903            kind: MemKind::Conversation,
1904            layer: 3,
1905            key: "user".to_string(),
1906            value: "How's the project?".to_string(),
1907            fact_id: None,
1908            scope: "global".to_string(),
1909            authority: "peer".to_string(),
1910            is_constraint: false,
1911            created_at: Utc::now(),
1912            expires_at: None,
1913            content_type: ContentType::NaturalLanguage,
1914            metadata: FactMetadata::default(),
1915        });
1916        let c2 = g.insert(MemNode {
1917            kind: MemKind::Conversation,
1918            layer: 3,
1919            key: "assistant".to_string(),
1920            value: "Going well".to_string(),
1921            fact_id: None,
1922            scope: "global".to_string(),
1923            authority: "peer".to_string(),
1924            is_constraint: false,
1925            created_at: Utc::now(),
1926            expires_at: None,
1927            content_type: ContentType::NaturalLanguage,
1928            metadata: FactMetadata::default(),
1929        });
1930
1931        // Should have TemporalNext edge automatically
1932        assert_eq!(g.edge_count(), 1);
1933        let edge = g.inner.edges(c1).next().unwrap();
1934        assert_eq!(edge.weight().kind, EdgeKind::TemporalNext);
1935        assert_eq!(edge.target(), c2);
1936    }
1937
1938    #[test]
1939    fn constraints_tracked() {
1940        let mut g = MemoryGraph::new();
1941        let mut node = make_fact("budget", "Max $500K", "c1");
1942        node.is_constraint = true;
1943        g.insert(node);
1944        g.insert(make_fact("language", "Rust", "f1"));
1945
1946        assert_eq!(g.constraints().len(), 1);
1947        assert_eq!(g.valid_fact_count(), 2);
1948    }
1949
1950    #[test]
1951    fn gc_superseded_removes_deep_chains() {
1952        // Build chain: f4 supersedes f3 supersedes f2 supersedes f1
1953        let mut g = MemoryGraph::new();
1954        let _n1 = g.insert(make_fact("db", "v1", "f1"));
1955        let n2 = g.insert(make_fact("db", "v2", "f2"));
1956        g.supersede(n2, "f1"); // f1 → FactSuperseded
1957        let n3 = g.insert(make_fact("db", "v3", "f3"));
1958        g.supersede(n3, "f2"); // f2 → FactSuperseded
1959        let n4 = g.insert(make_fact("db", "v4", "f4"));
1960        g.supersede(n4, "f3"); // f3 → FactSuperseded
1961
1962        // Before GC: 4 nodes (1 active, 3 superseded)
1963        assert_eq!(g.node_count(), 4);
1964
1965        // GC with max_depth=1: keep f3 (1 hop from active f4), remove f1 and f2
1966        let removed = g.gc_superseded(1);
1967        assert_eq!(removed, 2);
1968        assert_eq!(g.node_count(), 2);
1969
1970        // f4 (active) and f3 (1 hop) should remain
1971        assert!(g.get_by_fact_id("f4").is_some());
1972        // f1 and f2 should be gone
1973        assert!(g.get_by_fact_id("f1").is_none());
1974        assert!(g.get_by_fact_id("f2").is_none());
1975    }
1976
1977    #[test]
1978    fn gc_superseded_retains_shallow_chain() {
1979        // f2 supersedes f1 — depth 1, should be retained with max_depth=1
1980        let mut g = MemoryGraph::new();
1981        let _n1 = g.insert(make_fact("db", "old", "f1"));
1982        let n2 = g.insert(make_fact("db", "new", "f2"));
1983        g.supersede(n2, "f1");
1984
1985        let removed = g.gc_superseded(1);
1986        assert_eq!(removed, 0);
1987        assert_eq!(g.node_count(), 2);
1988    }
1989
1990    #[test]
1991    fn gc_superseded_removes_orphaned() {
1992        // A superseded node with no active ancestor (orphaned chain)
1993        let mut g = MemoryGraph::new();
1994        let mut orphan = make_fact("db", "orphan", "f1");
1995        orphan.kind = MemKind::FactSuperseded;
1996        g.insert(orphan);
1997
1998        let removed = g.gc_superseded(1);
1999        assert_eq!(removed, 1);
2000        assert_eq!(g.node_count(), 0);
2001    }
2002
2003    #[test]
2004    fn gc_superseded_noop_on_clean_graph() {
2005        let mut g = MemoryGraph::new();
2006        g.insert(make_fact("a", "1", "f1"));
2007        g.insert(make_fact("b", "2", "f2"));
2008
2009        let removed = g.gc_superseded(1);
2010        assert_eq!(removed, 0);
2011        assert_eq!(g.node_count(), 2);
2012    }
2013
2014    #[test]
2015    fn prune_expired_removes_ttl_nodes() {
2016        let mut g = MemoryGraph::new();
2017        let now = Utc::now();
2018        let mut expired = make_fact("temp", "gone", "e1");
2019        expired.kind = MemKind::Environment;
2020        expired.layer = 4;
2021        expired.expires_at = Some(now - chrono::Duration::hours(1));
2022        g.insert(expired);
2023
2024        let mut fresh = make_fact("temp", "here", "e2");
2025        fresh.kind = MemKind::Environment;
2026        fresh.layer = 4;
2027        fresh.expires_at = Some(now + chrono::Duration::hours(1));
2028        g.insert(fresh);
2029
2030        g.prune_expired(now);
2031        assert_eq!(g.node_count(), 1);
2032    }
2033
2034    #[test]
2035    fn prune_expired_cleans_by_key_and_by_layer() {
2036        let mut g = MemoryGraph::new();
2037        let now = Utc::now();
2038
2039        // Create an expired environment node
2040        let mut expired = make_fact("env_temp", "gone", "e1");
2041        expired.kind = MemKind::Environment;
2042        expired.layer = 4;
2043        expired.expires_at = Some(now - chrono::Duration::hours(1));
2044        g.insert(expired);
2045
2046        // Verify indexes are populated before pruning
2047        assert!(g.by_key.contains_key("env_temp"));
2048        let layer_idx = (4usize).saturating_sub(1).min(3); // index 3
2049        assert_eq!(g.by_layer[layer_idx].len(), 1);
2050
2051        g.prune_expired(now);
2052
2053        // After pruning, by_key should not contain the key
2054        assert!(!g.by_key.contains_key("env_temp"));
2055        // by_layer should be clean
2056        assert_eq!(g.by_layer[layer_idx].len(), 0);
2057        // by_fact_id should be clean
2058        assert!(g.get_by_fact_id("e1").is_none());
2059    }
2060
2061    #[test]
2062    fn gc_superseded_cleans_by_key_and_by_layer() {
2063        // Build chain: f3 supersedes f2 supersedes f1
2064        let mut g = MemoryGraph::new();
2065        let _n1 = g.insert(make_fact("db", "v1", "f1"));
2066        let n2 = g.insert(make_fact("db", "v2", "f2"));
2067        g.supersede(n2, "f1");
2068        let n3 = g.insert(make_fact("db", "v3", "f3"));
2069        g.supersede(n3, "f2");
2070
2071        // Before GC: by_key["db"] has 3 entries, by_layer[1] has 3 entries
2072        assert_eq!(g.by_key.get("db").map(|v| v.len()).unwrap_or(0), 3);
2073        let layer_idx = (2usize).saturating_sub(1).min(3); // index 1
2074        let initial_layer_count = g.by_layer[layer_idx].len();
2075        assert_eq!(initial_layer_count, 3);
2076
2077        // GC with max_depth=0: only f3 (active) kept, f1 and f2 removed
2078        let removed = g.gc_superseded(0);
2079        assert_eq!(removed, 2);
2080
2081        // by_key["db"] should only have the active node (f3)
2082        assert_eq!(g.by_key.get("db").map(|v| v.len()).unwrap_or(0), 1);
2083        // by_layer should only have the active node
2084        assert_eq!(g.by_layer[layer_idx].len(), 1);
2085    }
2086
2087    #[test]
2088    fn remove_conversation_nodes_cleans_by_key_and_by_layer() {
2089        let mut g = MemoryGraph::new();
2090        let c1 = g.insert(MemNode {
2091            kind: MemKind::Conversation,
2092            layer: 3,
2093            key: "user".to_string(),
2094            value: "Hello".to_string(),
2095            fact_id: None,
2096            scope: "global".to_string(),
2097            authority: "peer".to_string(),
2098            is_constraint: false,
2099            created_at: Utc::now(),
2100            expires_at: None,
2101            content_type: ContentType::NaturalLanguage,
2102            metadata: FactMetadata::default(),
2103        });
2104        let c2 = g.insert(MemNode {
2105            kind: MemKind::Conversation,
2106            layer: 3,
2107            key: "assistant".to_string(),
2108            value: "Hi there".to_string(),
2109            fact_id: None,
2110            scope: "global".to_string(),
2111            authority: "peer".to_string(),
2112            is_constraint: false,
2113            created_at: Utc::now(),
2114            expires_at: None,
2115            content_type: ContentType::NaturalLanguage,
2116            metadata: FactMetadata::default(),
2117        });
2118
2119        // Verify indexes populated
2120        assert!(g.by_key.contains_key("user"));
2121        assert!(g.by_key.contains_key("assistant"));
2122        let layer_idx = (3usize).saturating_sub(1).min(3); // index 2
2123        assert_eq!(g.by_layer[layer_idx].len(), 2);
2124
2125        // Remove both conversation nodes
2126        g.remove_conversation_nodes(&[c1, c2]);
2127
2128        // by_key should be clean
2129        assert!(!g.by_key.contains_key("user"));
2130        assert!(!g.by_key.contains_key("assistant"));
2131        // by_layer should be clean
2132        assert_eq!(g.by_layer[layer_idx].len(), 0);
2133        // Graph should be empty
2134        assert_eq!(g.node_count(), 0);
2135    }
2136}