Skip to main content

zagens_topic_memory/
retrieve.rs

1//! k-hop subgraph retrieval for token-efficient injection (B2.3).
2
3use std::collections::{HashMap, HashSet};
4
5use crate::extract::extract_topics;
6use crate::graph::{BlockedPoint, CognitiveTrail, GRAPH_SCHEMA_VERSION, PheromoneGraph};
7
8/// Default BFS depth from seed topics (UNDERLYING §2.3).
9pub const DEFAULT_RETRIEVE_K_HOPS: usize = 2;
10
11fn parse_edge_endpoints(key: &str) -> Option<(&str, &str)> {
12    let mut parts = key.split('→');
13    let a = parts.next()?;
14    let b = parts.next()?;
15    Some((a, b))
16}
17
18fn neighbors(graph: &PheromoneGraph, node: &str) -> Vec<String> {
19    let mut out = Vec::new();
20    for key in graph.edges.keys() {
21        if let Some((a, b)) = parse_edge_endpoints(key) {
22            if a == node && graph.nodes.contains_key(b) {
23                out.push(b.to_string());
24            } else if b == node && graph.nodes.contains_key(a) {
25                out.push(a.to_string());
26            }
27        }
28    }
29    out
30}
31
32/// Collect seed topics that exist in the graph; fall back to fuzzy overlap with node keys.
33fn resolve_seeds(graph: &PheromoneGraph, seeds: &[String]) -> HashSet<String> {
34    let mut resolved = HashSet::new();
35    for s in seeds {
36        if graph.nodes.contains_key(s) {
37            resolved.insert(s.clone());
38            continue;
39        }
40        for key in graph.nodes.keys() {
41            if key.contains(s.as_str()) || s.contains(key.as_str()) {
42                resolved.insert(key.clone());
43            }
44        }
45    }
46    resolved
47}
48
49/// BFS k-hop neighborhood around `seeds` (undirected over edge endpoints).
50#[must_use]
51pub fn retrieve_k_hop_subgraph(
52    graph: &PheromoneGraph,
53    seeds: &[String],
54    k: usize,
55) -> PheromoneGraph {
56    let mut visited = resolve_seeds(graph, seeds);
57    if visited.is_empty() {
58        return graph.clone();
59    }
60
61    let mut frontier: Vec<String> = visited.iter().cloned().collect();
62    for _ in 0..k {
63        let mut next = Vec::new();
64        for node in &frontier {
65            for nb in neighbors(graph, node) {
66                if visited.insert(nb.clone()) {
67                    next.push(nb);
68                }
69            }
70        }
71        if next.is_empty() {
72            break;
73        }
74        frontier = next;
75    }
76
77    induced_subgraph(graph, &visited)
78}
79
80/// Build a subgraph containing only `nodes` and edges between them.
81#[must_use]
82pub fn induced_subgraph(graph: &PheromoneGraph, nodes: &HashSet<String>) -> PheromoneGraph {
83    if nodes.is_empty() {
84        return PheromoneGraph {
85            version: graph.version.clone(),
86            last_decay: graph.last_decay.clone(),
87            nodes: HashMap::new(),
88            edges: HashMap::new(),
89            blocked_points: Vec::new(),
90            recent_emotions: graph.recent_emotions.clone(),
91            trails: None,
92        };
93    }
94
95    let nodes_map: HashMap<_, _> = graph
96        .nodes
97        .iter()
98        .filter(|(k, _)| nodes.contains(*k))
99        .map(|(k, v)| (k.clone(), v.clone()))
100        .collect();
101
102    let edges: HashMap<_, _> = graph
103        .edges
104        .iter()
105        .filter(|(key, _)| {
106            parse_edge_endpoints(key)
107                .map(|(a, b)| nodes.contains(a) && nodes.contains(b))
108                .unwrap_or(false)
109        })
110        .map(|(k, v)| (k.clone(), v.clone()))
111        .collect();
112
113    let blocked_points: Vec<BlockedPoint> = graph
114        .blocked_points
115        .iter()
116        .filter(|b| nodes.contains(&b.node))
117        .cloned()
118        .collect();
119
120    let trails: Option<Vec<CognitiveTrail>> = graph.trails.as_ref().map(|ts| {
121        ts.iter()
122            .filter(|t| nodes.contains(&t.entry) || nodes.contains(&t.exit))
123            .cloned()
124            .collect()
125    });
126
127    PheromoneGraph {
128        version: GRAPH_SCHEMA_VERSION.to_string(),
129        last_decay: graph.last_decay.clone(),
130        nodes: nodes_map,
131        edges,
132        blocked_points,
133        recent_emotions: graph.recent_emotions.clone(),
134        trails,
135    }
136}
137
138/// Extract seeds from query text and return a k-hop subgraph.
139#[must_use]
140pub fn retrieve_for_query(graph: &PheromoneGraph, query: &str, k: usize) -> PheromoneGraph {
141    let seeds = extract_topics(query);
142    if seeds.is_empty() {
143        return graph.clone();
144    }
145    retrieve_k_hop_subgraph(graph, &seeds, k)
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151    use crate::engine::{empty_graph, update_graph};
152
153    #[test]
154    fn k_hop_limits_nodes() {
155        let mut g = empty_graph();
156        for _ in 0..3 {
157            g = update_graph(&g, "讨论 Rust 异步", "async await 很重要");
158            g = update_graph(&g, "Tokio runtime 调度", "executor 与 reactor");
159            g = update_graph(&g, "内存安全与所有权", "borrow checker");
160        }
161        let full_count = g.nodes.len();
162        assert!(full_count >= 2);
163        let seeds = vec!["rust".to_string()];
164        let sub = retrieve_k_hop_subgraph(&g, &seeds, 1);
165        assert!(sub.nodes.len() <= full_count);
166        assert!(!sub.nodes.is_empty());
167    }
168}