Skip to main content

zeph_memory/
router.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4/// Classification of which memory backend(s) to query.
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum MemoryRoute {
7    /// Full-text search only (`SQLite` FTS5). Fast, good for keyword/exact queries.
8    Keyword,
9    /// Vector search only (Qdrant). Good for semantic/conceptual queries.
10    Semantic,
11    /// Both backends, results merged by reciprocal rank fusion.
12    Hybrid,
13}
14
15/// Decides which memory backend(s) to query for a given input.
16pub trait MemoryRouter: Send + Sync {
17    /// Route a query to the appropriate backend(s).
18    fn route(&self, query: &str) -> MemoryRoute;
19}
20
21/// Heuristic-based memory router.
22///
23/// Decision logic:
24/// - If query contains code-like patterns (paths, `::`, pure `snake_case` identifiers)
25///   AND does NOT start with a question word → Keyword
26/// - If query is a natural language question or long → Semantic
27/// - Default → Hybrid
28pub struct HeuristicRouter;
29
30const QUESTION_WORDS: &[&str] = &[
31    "what", "how", "why", "when", "where", "who", "which", "explain", "describe",
32];
33
34fn starts_with_question(words: &[&str]) -> bool {
35    words
36        .first()
37        .is_some_and(|w| QUESTION_WORDS.iter().any(|qw| w.eq_ignore_ascii_case(qw)))
38}
39
40/// Returns true if `word` is a pure `snake_case` identifier (all ASCII, lowercase letters,
41/// digits and underscores, contains at least one underscore, not purely numeric).
42fn is_pure_snake_case(word: &str) -> bool {
43    if word.is_empty() {
44        return false;
45    }
46    let has_underscore = word.contains('_');
47    if !has_underscore {
48        return false;
49    }
50    word.chars()
51        .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
52        && !word.chars().all(|c| c.is_ascii_digit() || c == '_')
53}
54
55impl MemoryRouter for HeuristicRouter {
56    fn route(&self, query: &str) -> MemoryRoute {
57        let words: Vec<&str> = query.split_whitespace().collect();
58        let word_count = words.len();
59
60        // Code-like patterns that unambiguously indicate keyword search:
61        // file paths (contain '/'), Rust paths (contain '::')
62        let has_structural_code_pattern = query.contains('/') || query.contains("::");
63
64        // Pure snake_case identifiers (e.g. "memory_limit", "error_handling")
65        // but only if the query does NOT start with a question word
66        let has_snake_case = words.iter().any(|w| is_pure_snake_case(w));
67        let question = starts_with_question(&words);
68
69        if has_structural_code_pattern && !question {
70            return MemoryRoute::Keyword;
71        }
72
73        if has_snake_case && !question {
74            return MemoryRoute::Keyword;
75        }
76
77        // Short queries without question words → keyword
78        if word_count <= 3 && !question {
79            return MemoryRoute::Keyword;
80        }
81
82        // Natural language questions or long queries → semantic
83        if question || word_count >= 6 {
84            return MemoryRoute::Semantic;
85        }
86
87        // Default
88        MemoryRoute::Hybrid
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95
96    fn route(q: &str) -> MemoryRoute {
97        HeuristicRouter.route(q)
98    }
99
100    #[test]
101    fn rust_path_routes_keyword() {
102        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
103    }
104
105    #[test]
106    fn file_path_routes_keyword() {
107        assert_eq!(
108            route("crates/zeph-core/src/agent/mod.rs"),
109            MemoryRoute::Keyword
110        );
111    }
112
113    #[test]
114    fn pure_snake_case_routes_keyword() {
115        assert_eq!(route("memory_limit"), MemoryRoute::Keyword);
116        assert_eq!(route("error_handling"), MemoryRoute::Keyword);
117    }
118
119    #[test]
120    fn question_with_snake_case_routes_semantic() {
121        // "what is the memory_limit setting" — question word overrides snake_case heuristic
122        assert_eq!(
123            route("what is the memory_limit setting"),
124            MemoryRoute::Semantic
125        );
126        assert_eq!(route("how does error_handling work"), MemoryRoute::Semantic);
127    }
128
129    #[test]
130    fn short_query_routes_keyword() {
131        assert_eq!(route("context compaction"), MemoryRoute::Keyword);
132        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
133    }
134
135    #[test]
136    fn question_routes_semantic() {
137        assert_eq!(
138            route("what is the purpose of semantic memory"),
139            MemoryRoute::Semantic
140        );
141        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
142        assert_eq!(route("why does compaction fail"), MemoryRoute::Semantic);
143        assert_eq!(route("explain context compression"), MemoryRoute::Semantic);
144    }
145
146    #[test]
147    fn long_natural_query_routes_semantic() {
148        assert_eq!(
149            route("the agent keeps running out of context during long conversations"),
150            MemoryRoute::Semantic
151        );
152    }
153
154    #[test]
155    fn medium_non_question_routes_hybrid() {
156        // 4-5 words, no question word, no code pattern
157        assert_eq!(route("context window token budget"), MemoryRoute::Hybrid);
158    }
159
160    #[test]
161    fn empty_query_routes_keyword() {
162        // 0 words, no question → keyword (short path)
163        assert_eq!(route(""), MemoryRoute::Keyword);
164    }
165
166    #[test]
167    fn question_word_only_routes_semantic() {
168        // single question word → word_count = 1, but starts_with_question = true
169        // short query with question: the question check happens first in semantic branch
170        // Actually with word_count=1 and question=true: short path `<= 3 && !question` is false,
171        // then `question || word_count >= 6` is true → Semantic
172        assert_eq!(route("what"), MemoryRoute::Semantic);
173    }
174
175    #[test]
176    fn camel_case_does_not_route_keyword_without_pattern() {
177        // CamelCase words without :: or / — 4-word query without question word → Hybrid
178        // (4 words: no question, no snake_case, no structural code pattern → Hybrid)
179        assert_eq!(
180            route("SemanticMemory configuration and options"),
181            MemoryRoute::Hybrid
182        );
183    }
184}