Skip to main content

zeph_memory/
router.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4/// Classification of which memory backend(s) to query.
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum MemoryRoute {
7    /// Full-text search only (`SQLite` FTS5). Fast, good for keyword/exact queries.
8    Keyword,
9    /// Vector search only (Qdrant). Good for semantic/conceptual queries.
10    Semantic,
11    /// Both backends, results merged by reciprocal rank fusion.
12    Hybrid,
13    /// Graph-based retrieval via BFS traversal. Good for relationship queries.
14    /// When the `graph-memory` feature is disabled, callers treat this as `Hybrid`.
15    Graph,
16}
17
18/// Decides which memory backend(s) to query for a given input.
19pub trait MemoryRouter: Send + Sync {
20    /// Route a query to the appropriate backend(s).
21    fn route(&self, query: &str) -> MemoryRoute;
22}
23
24/// Heuristic-based memory router.
25///
26/// Decision logic:
27/// - If query contains code-like patterns (paths, `::`, pure `snake_case` identifiers)
28///   AND does NOT start with a question word → Keyword
29/// - If query is a natural language question or long → Semantic
30/// - Default → Hybrid
31pub struct HeuristicRouter;
32
33const QUESTION_WORDS: &[&str] = &[
34    "what", "how", "why", "when", "where", "who", "which", "explain", "describe",
35];
36
37/// Simple substrings that signal a relationship query (checked via `str::contains`).
38/// Only used when the `graph-memory` feature is enabled.
39const RELATIONSHIP_PATTERNS: &[&str] = &[
40    "related to",
41    "relates to",
42    "connection between",
43    "relationship",
44    "opinion on",
45    "thinks about",
46    "preference for",
47    "history of",
48    "know about",
49];
50
51fn starts_with_question(words: &[&str]) -> bool {
52    words
53        .first()
54        .is_some_and(|w| QUESTION_WORDS.iter().any(|qw| w.eq_ignore_ascii_case(qw)))
55}
56
57/// Returns true if `word` is a pure `snake_case` identifier (all ASCII, lowercase letters,
58/// digits and underscores, contains at least one underscore, not purely numeric).
59fn is_pure_snake_case(word: &str) -> bool {
60    if word.is_empty() {
61        return false;
62    }
63    let has_underscore = word.contains('_');
64    if !has_underscore {
65        return false;
66    }
67    word.chars()
68        .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
69        && !word.chars().all(|c| c.is_ascii_digit() || c == '_')
70}
71
72impl MemoryRouter for HeuristicRouter {
73    fn route(&self, query: &str) -> MemoryRoute {
74        let words: Vec<&str> = query.split_whitespace().collect();
75        let word_count = words.len();
76
77        // Relationship queries go to graph retrieval (feature-gated at call site)
78        {
79            let lower = query.to_ascii_lowercase();
80            let has_relationship = RELATIONSHIP_PATTERNS.iter().any(|p| lower.contains(p));
81            if has_relationship {
82                return MemoryRoute::Graph;
83            }
84        }
85
86        // Code-like patterns that unambiguously indicate keyword search:
87        // file paths (contain '/'), Rust paths (contain '::')
88        let has_structural_code_pattern = query.contains('/') || query.contains("::");
89
90        // Pure snake_case identifiers (e.g. "memory_limit", "error_handling")
91        // but only if the query does NOT start with a question word
92        let has_snake_case = words.iter().any(|w| is_pure_snake_case(w));
93        let question = starts_with_question(&words);
94
95        if has_structural_code_pattern && !question {
96            return MemoryRoute::Keyword;
97        }
98
99        // Long NL queries → semantic, regardless of snake_case tokens
100        if question || word_count >= 6 {
101            return MemoryRoute::Semantic;
102        }
103
104        // Short queries without question words → keyword
105        if word_count <= 3 && !question {
106            return MemoryRoute::Keyword;
107        }
108
109        // Short code-like patterns → keyword
110        if has_snake_case {
111            return MemoryRoute::Keyword;
112        }
113
114        // Default
115        MemoryRoute::Hybrid
116    }
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    fn route(q: &str) -> MemoryRoute {
124        HeuristicRouter.route(q)
125    }
126
127    #[test]
128    fn rust_path_routes_keyword() {
129        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
130    }
131
132    #[test]
133    fn file_path_routes_keyword() {
134        assert_eq!(
135            route("crates/zeph-core/src/agent/mod.rs"),
136            MemoryRoute::Keyword
137        );
138    }
139
140    #[test]
141    fn pure_snake_case_routes_keyword() {
142        assert_eq!(route("memory_limit"), MemoryRoute::Keyword);
143        assert_eq!(route("error_handling"), MemoryRoute::Keyword);
144    }
145
146    #[test]
147    fn question_with_snake_case_routes_semantic() {
148        // "what is the memory_limit setting" — question word overrides snake_case heuristic
149        assert_eq!(
150            route("what is the memory_limit setting"),
151            MemoryRoute::Semantic
152        );
153        assert_eq!(route("how does error_handling work"), MemoryRoute::Semantic);
154    }
155
156    #[test]
157    fn short_query_routes_keyword() {
158        assert_eq!(route("context compaction"), MemoryRoute::Keyword);
159        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
160    }
161
162    #[test]
163    fn question_routes_semantic() {
164        assert_eq!(
165            route("what is the purpose of semantic memory"),
166            MemoryRoute::Semantic
167        );
168        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
169        assert_eq!(route("why does compaction fail"), MemoryRoute::Semantic);
170        assert_eq!(route("explain context compression"), MemoryRoute::Semantic);
171    }
172
173    #[test]
174    fn long_natural_query_routes_semantic() {
175        assert_eq!(
176            route("the agent keeps running out of context during long conversations"),
177            MemoryRoute::Semantic
178        );
179    }
180
181    #[test]
182    fn medium_non_question_routes_hybrid() {
183        // 4-5 words, no question word, no code pattern
184        assert_eq!(route("context window token budget"), MemoryRoute::Hybrid);
185    }
186
187    #[test]
188    fn empty_query_routes_keyword() {
189        // 0 words, no question → keyword (short path)
190        assert_eq!(route(""), MemoryRoute::Keyword);
191    }
192
193    #[test]
194    fn question_word_only_routes_semantic() {
195        // single question word → word_count = 1, but starts_with_question = true
196        // short query with question: the question check happens first in semantic branch
197        // Actually with word_count=1 and question=true: short path `<= 3 && !question` is false,
198        // then `question || word_count >= 6` is true → Semantic
199        assert_eq!(route("what"), MemoryRoute::Semantic);
200    }
201
202    #[test]
203    fn camel_case_does_not_route_keyword_without_pattern() {
204        // CamelCase words without :: or / — 4-word query without question word → Hybrid
205        // (4 words: no question, no snake_case, no structural code pattern → Hybrid)
206        assert_eq!(
207            route("SemanticMemory configuration and options"),
208            MemoryRoute::Hybrid
209        );
210    }
211
212    #[test]
213    fn relationship_query_routes_graph() {
214        assert_eq!(
215            route("what is user's opinion on neovim"),
216            MemoryRoute::Graph
217        );
218        assert_eq!(
219            route("show the relationship between Alice and Bob"),
220            MemoryRoute::Graph
221        );
222    }
223
224    #[test]
225    fn relationship_query_related_to_routes_graph() {
226        assert_eq!(
227            route("how is Rust related to this project"),
228            MemoryRoute::Graph
229        );
230        assert_eq!(
231            route("how does this relates to the config"),
232            MemoryRoute::Graph
233        );
234    }
235
236    #[test]
237    fn relationship_know_about_routes_graph() {
238        assert_eq!(route("what do I know about neovim"), MemoryRoute::Graph);
239    }
240
241    #[test]
242    fn translate_does_not_route_graph() {
243        // "translate" contains "relate" substring but is not in RELATIONSHIP_PATTERNS
244        // (we removed bare "relate", keeping only "related to" and "relates to")
245        assert_ne!(route("translate this code to Python"), MemoryRoute::Graph);
246    }
247
248    #[test]
249    fn non_relationship_stays_semantic() {
250        assert_eq!(
251            route("find similar code patterns in the codebase"),
252            MemoryRoute::Semantic
253        );
254    }
255
256    #[test]
257    fn short_keyword_unchanged() {
258        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
259    }
260
261    // Regression tests for #1661: long NL queries with snake_case must go to Semantic
262    #[test]
263    fn long_nl_with_snake_case_routes_semantic() {
264        assert_eq!(
265            route("Use memory_search to find information about Rust ownership"),
266            MemoryRoute::Semantic
267        );
268    }
269
270    #[test]
271    fn short_snake_case_only_routes_keyword() {
272        assert_eq!(route("memory_search"), MemoryRoute::Keyword);
273    }
274
275    #[test]
276    fn question_with_snake_case_short_routes_semantic() {
277        assert_eq!(
278            route("What does memory_search return?"),
279            MemoryRoute::Semantic
280        );
281    }
282}