zeph_memory/
router.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use chrono::{DateTime, Duration, Utc};
5
6use crate::graph::EdgeType;
7
8/// Classification of which memory backend(s) to query.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum MemoryRoute {
11    /// Full-text search only (`SQLite` FTS5). Fast, good for keyword/exact queries.
12    Keyword,
13    /// Vector search only (Qdrant). Good for semantic/conceptual queries.
14    Semantic,
15    /// Both backends, results merged by reciprocal rank fusion.
16    Hybrid,
17    /// Graph-based retrieval via BFS traversal. Good for relationship queries.
18    /// When the `graph-memory` feature is disabled, callers treat this as `Hybrid`.
19    Graph,
20    /// FTS5 search with a timestamp-range filter. Used for temporal/episodic queries
21    /// ("what did we discuss yesterday", "last week's conversation about Rust").
22    ///
23    /// Known trade-off (MVP): skips vector search entirely for speed. Semantically similar
24    /// but lexically different messages may be missed. Use `Hybrid` route when semantic
25    /// precision matters more than temporal filtering.
26    Episodic,
27}
28
29/// Routing decision with confidence and optional LLM reasoning.
30#[derive(Debug, Clone)]
31pub struct RoutingDecision {
32    pub route: MemoryRoute,
33    /// Confidence in `[0, 1]`. `1.0` = certain, `0.5` = ambiguous.
34    pub confidence: f32,
35    /// Only populated when an LLM classifier was used.
36    pub reasoning: Option<String>,
37}
38
39/// Decides which memory backend(s) to query for a given input.
40pub trait MemoryRouter: Send + Sync {
41    /// Route a query to the appropriate backend(s).
42    fn route(&self, query: &str) -> MemoryRoute;
43
44    /// Route with a confidence signal. Default implementation wraps `route()` with confidence 1.0.
45    ///
46    /// Override this in routers that can express ambiguity (e.g. `HeuristicRouter`)
47    /// so that `HybridRouter` can escalate uncertain decisions to LLM.
48    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
49        RoutingDecision {
50            route: self.route(query),
51            confidence: 1.0,
52            reasoning: None,
53        }
54    }
55}
56
57/// Resolved datetime boundaries for a temporal query.
58///
59/// Both fields use `SQLite` datetime format (`YYYY-MM-DD HH:MM:SS`, UTC).
60/// `None` means "no bound" on that side.
61///
62/// Note: All timestamps are UTC. The `created_at` column in the `messages` table
63/// defaults to `datetime('now')` which is also UTC, so comparisons are consistent.
64/// Users in non-UTC timezones may get slightly unexpected results for "yesterday"
65/// queries (e.g. at 01:00 UTC+5 the user's local yesterday differs from UTC yesterday).
66/// This is an accepted approximation for the heuristic-only MVP.
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct TemporalRange {
69    /// Exclusive lower bound: `created_at > after`.
70    pub after: Option<String>,
71    /// Exclusive upper bound: `created_at < before`.
72    pub before: Option<String>,
73}
74
75/// Temporal patterns that indicate an episodic / time-scoped recall query.
76///
77/// Multi-word patterns are preferred over single-word ones to reduce false positives.
78/// Single-word patterns that can appear inside other words (e.g. "ago" in "Chicago")
79/// must be checked with `contains_word()` to enforce word-boundary semantics.
80///
81/// Omitted on purpose: "before", "after", "since", "during", "earlier", "recently"
82/// — these are too ambiguous in technical contexts ("before the function returns",
83/// "since you asked", "during compilation"). They are not in this list.
84const TEMPORAL_PATTERNS: &[&str] = &[
85    // relative day
86    "yesterday",
87    "today",
88    "this morning",
89    "tonight",
90    "last night",
91    // relative week
92    "last week",
93    "this week",
94    "past week",
95    // relative month
96    "last month",
97    "this month",
98    "past month",
99    // temporal questions
100    "when did",
101    "remember when",
102    "last time",
103    "how long ago",
104    // relative phrases requiring word-boundary check
105    // (checked separately via `contains_word` to avoid matching "a few days ago" substring in longer words)
106    "few days ago",
107    "few hours ago",
108    "earlier today",
109];
110
111/// Single-word temporal tokens that require word-boundary checking.
112/// These are NOT in `TEMPORAL_PATTERNS` to avoid substring false positives.
113const WORD_BOUNDARY_TEMPORAL: &[&str] = &["ago"];
114
115/// MAGMA causal edge markers.
116///
117/// Shared between [`HeuristicRouter`] and [`classify_graph_subgraph`] to prevent
118/// pattern-list drift between the two classifiers (critic suggestion).
119pub(crate) const CAUSAL_MARKERS: &[&str] = &[
120    "why",
121    "because",
122    "caused",
123    "cause",
124    "reason",
125    "result",
126    "led to",
127    "consequence",
128    "trigger",
129    "effect",
130    "blame",
131    "fault",
132];
133
134/// MAGMA temporal edge markers for subgraph classification.
135///
136/// Shared between [`HeuristicRouter`] and [`classify_graph_subgraph`].
137/// Note: these are distinct from `TEMPORAL_PATTERNS` (which drive `Episodic` routing).
138/// `TEMPORAL_MARKERS` detect edges whose *semantics* are temporal (sequencing/ordering),
139/// while `TEMPORAL_PATTERNS` detect queries that ask about *when* events occurred.
140pub(crate) const TEMPORAL_MARKERS: &[&str] = &[
141    "before", "after", "first", "then", "timeline", "sequence", "preceded", "followed", "started",
142    "ended", "during", "prior",
143];
144
145/// MAGMA entity/structural markers.
146pub(crate) const ENTITY_MARKERS: &[&str] = &[
147    "is a",
148    "type of",
149    "kind of",
150    "part of",
151    "instance",
152    "same as",
153    "alias",
154    "subtype",
155    "subclass",
156    "belongs to",
157];
158
159/// Classify a query into the MAGMA edge types to use for subgraph-scoped BFS retrieval.
160///
161/// Pure heuristic, zero latency — no LLM call. Returns a prioritised list of [`EdgeType`]s.
162///
163/// Rules (checked in order):
164/// 1. Causal markers → include `Causal`
165/// 2. Temporal markers → include `Temporal`
166/// 3. Entity/structural markers → include `Entity`
167/// 4. `Semantic` is always included as fallback to guarantee recall >= current untyped BFS.
168///
169/// Multiple markers may match, producing a union of detected types.
170///
171/// # Example
172///
173/// ```
174/// # use zeph_memory::router::classify_graph_subgraph;
175/// # use zeph_memory::EdgeType;
176/// let types = classify_graph_subgraph("why did X happen");
177/// assert!(types.contains(&EdgeType::Causal));
178/// assert!(types.contains(&EdgeType::Semantic));
179/// ```
180#[must_use]
181pub fn classify_graph_subgraph(query: &str) -> Vec<EdgeType> {
182    let lower = query.to_ascii_lowercase();
183    let mut types: Vec<EdgeType> = Vec::new();
184
185    if CAUSAL_MARKERS.iter().any(|m| lower.contains(m)) {
186        types.push(EdgeType::Causal);
187    }
188    if TEMPORAL_MARKERS.iter().any(|m| lower.contains(m)) {
189        types.push(EdgeType::Temporal);
190    }
191    if ENTITY_MARKERS.iter().any(|m| lower.contains(m)) {
192        types.push(EdgeType::Entity);
193    }
194
195    // Semantic is always included as fallback — recall cannot be worse than untyped BFS.
196    if !types.contains(&EdgeType::Semantic) {
197        types.push(EdgeType::Semantic);
198    }
199
200    types
201}
202
203/// Heuristic-based memory router.
204///
205/// Decision logic (in priority order):
206/// 1. Temporal patterns → `Episodic`
207/// 2. Relationship patterns → `Graph`
208/// 3. Code-like patterns (paths, `::`) without question word → `Keyword`
209/// 4. Long NL query or question word → `Semantic`
210/// 5. Short non-question query → `Keyword`
211/// 6. Default → `Hybrid`
212pub struct HeuristicRouter;
213
214const QUESTION_WORDS: &[&str] = &[
215    "what", "how", "why", "when", "where", "who", "which", "explain", "describe",
216];
217
218/// Simple substrings that signal a relationship query (checked via `str::contains`).
219/// Only used when the `graph-memory` feature is enabled.
220const RELATIONSHIP_PATTERNS: &[&str] = &[
221    "related to",
222    "relates to",
223    "connection between",
224    "relationship",
225    "opinion on",
226    "thinks about",
227    "preference for",
228    "history of",
229    "know about",
230];
231
232/// Returns true if `text` contains `word` as a whole word (word-boundary semantics).
233///
234/// A "word boundary" here means the character before and after `word` (if present)
235/// is not an ASCII alphanumeric character or underscore.
236fn contains_word(text: &str, word: &str) -> bool {
237    let bytes = text.as_bytes();
238    let wbytes = word.as_bytes();
239    let wlen = wbytes.len();
240    if wlen > bytes.len() {
241        return false;
242    }
243    for start in 0..=(bytes.len() - wlen) {
244        if bytes[start..start + wlen].eq_ignore_ascii_case(wbytes) {
245            let before_ok =
246                start == 0 || !bytes[start - 1].is_ascii_alphanumeric() && bytes[start - 1] != b'_';
247            let after_ok = start + wlen == bytes.len()
248                || !bytes[start + wlen].is_ascii_alphanumeric() && bytes[start + wlen] != b'_';
249            if before_ok && after_ok {
250                return true;
251            }
252        }
253    }
254    false
255}
256
257/// Returns true if the lowercased query contains any temporal cue that indicates
258/// an episodic / time-scoped recall request.
259fn has_temporal_cue(lower: &str) -> bool {
260    if TEMPORAL_PATTERNS.iter().any(|p| lower.contains(p)) {
261        return true;
262    }
263    WORD_BOUNDARY_TEMPORAL
264        .iter()
265        .any(|w| contains_word(lower, w))
266}
267
268/// Temporal patterns sorted longest-first for stripping. Initialized once via `LazyLock`
269/// to avoid allocating and sorting on every call to `strip_temporal_keywords`.
270static SORTED_TEMPORAL_PATTERNS: std::sync::LazyLock<Vec<&'static str>> =
271    std::sync::LazyLock::new(|| {
272        let mut v: Vec<&str> = TEMPORAL_PATTERNS.to_vec();
273        v.sort_by_key(|p| std::cmp::Reverse(p.len()));
274        v
275    });
276
277/// Strip matched temporal keywords from a query string before passing to FTS5.
278///
279/// Temporal keywords are routing metadata, not search terms. Passing them to FTS5
280/// causes BM25 score distortion — messages that literally mention "yesterday" get
281/// boosted regardless of actual content relevance.
282///
283/// All occurrences of each pattern are removed (not just the first), preventing
284/// score distortion from repeated temporal tokens in edge cases like
285/// "yesterday I mentioned yesterday's bug".
286///
287/// # Example
288/// ```
289/// # use zeph_memory::router::strip_temporal_keywords;
290/// let cleaned = strip_temporal_keywords("what did we discuss yesterday about Rust");
291/// assert_eq!(cleaned, "what did we discuss about Rust");
292/// ```
293#[must_use]
294pub fn strip_temporal_keywords(query: &str) -> String {
295    // Lowercase once for pattern matching; track removal positions in the original string.
296    // We operate on the lowercased copy for matching, then remove spans from `result`
297    // by rebuilding via byte indices (both strings have identical byte lengths because
298    // to_ascii_lowercase is a 1:1 byte mapping for ASCII).
299    let lower = query.to_ascii_lowercase();
300    // Collect all (start, end) spans to remove, then rebuild the string in one pass.
301    let mut remove: Vec<(usize, usize)> = Vec::new();
302
303    for pattern in SORTED_TEMPORAL_PATTERNS.iter() {
304        let plen = pattern.len();
305        let mut search_from = 0;
306        while let Some(pos) = lower[search_from..].find(pattern) {
307            let abs = search_from + pos;
308            remove.push((abs, abs + plen));
309            search_from = abs + plen;
310        }
311    }
312
313    // Strip word-boundary tokens (single-word, e.g. "ago") — all occurrences.
314    for word in WORD_BOUNDARY_TEMPORAL {
315        let wlen = word.len();
316        let lbytes = lower.as_bytes();
317        let mut i = 0;
318        while i + wlen <= lower.len() {
319            if lower[i..].starts_with(*word) {
320                let before_ok =
321                    i == 0 || !lbytes[i - 1].is_ascii_alphanumeric() && lbytes[i - 1] != b'_';
322                let after_ok = i + wlen == lower.len()
323                    || !lbytes[i + wlen].is_ascii_alphanumeric() && lbytes[i + wlen] != b'_';
324                if before_ok && after_ok {
325                    remove.push((i, i + wlen));
326                    i += wlen;
327                    continue;
328                }
329            }
330            i += 1;
331        }
332    }
333
334    if remove.is_empty() {
335        // Fast path: no patterns found — return the original string.
336        return query.split_whitespace().collect::<Vec<_>>().join(" ");
337    }
338
339    // Merge overlapping/adjacent spans and remove them from the original string.
340    remove.sort_unstable_by_key(|r| r.0);
341    let bytes = query.as_bytes();
342    let mut result = Vec::with_capacity(query.len());
343    let mut cursor = 0;
344    for (start, end) in remove {
345        if start > cursor {
346            result.extend_from_slice(&bytes[cursor..start]);
347        }
348        cursor = cursor.max(end);
349    }
350    if cursor < bytes.len() {
351        result.extend_from_slice(&bytes[cursor..]);
352    }
353
354    // Collapse multiple spaces and trim.
355    // SAFETY: We only removed ASCII byte spans; remaining bytes are still valid UTF-8.
356    let s = String::from_utf8(result).unwrap_or_default();
357    s.split_whitespace()
358        .filter(|t| !t.is_empty())
359        .collect::<Vec<_>>()
360        .join(" ")
361}
362
363/// Resolve temporal keywords in `query` to a `(after, before)` datetime boundary pair.
364///
365/// Returns `None` when no specific range can be computed (the episodic path then falls
366/// back to FTS5 without a time filter, relying on temporal decay for recency boosting).
367///
368/// The `now` parameter is injectable for deterministic unit testing. Production callers
369/// should pass `chrono::Utc::now()`.
370///
371/// All datetime strings are in `SQLite` format: `YYYY-MM-DD HH:MM:SS` (UTC).
372#[must_use]
373pub fn resolve_temporal_range(query: &str, now: DateTime<Utc>) -> Option<TemporalRange> {
374    let lower = query.to_ascii_lowercase();
375
376    // yesterday: the full calendar day before today (UTC)
377    if lower.contains("yesterday") {
378        let yesterday = now.date_naive() - Duration::days(1);
379        return Some(TemporalRange {
380            after: Some(format!("{yesterday} 00:00:00")),
381            before: Some(format!("{yesterday} 23:59:59")),
382        });
383    }
384
385    // last night: 18:00 yesterday to 06:00 today (UTC approximation)
386    if lower.contains("last night") {
387        let yesterday = now.date_naive() - Duration::days(1);
388        let today = now.date_naive();
389        return Some(TemporalRange {
390            after: Some(format!("{yesterday} 18:00:00")),
391            before: Some(format!("{today} 06:00:00")),
392        });
393    }
394
395    // tonight: 18:00 today onwards
396    if lower.contains("tonight") {
397        let today = now.date_naive();
398        return Some(TemporalRange {
399            after: Some(format!("{today} 18:00:00")),
400            before: None,
401        });
402    }
403
404    // this morning: midnight to noon today
405    if lower.contains("this morning") {
406        let today = now.date_naive();
407        return Some(TemporalRange {
408            after: Some(format!("{today} 00:00:00")),
409            before: Some(format!("{today} 12:00:00")),
410        });
411    }
412
413    // today / earlier today: midnight to now.
414    // Note: "earlier today" always contains "today", so a separate branch would be
415    // dead code — the "today" check subsumes it.
416    if lower.contains("today") {
417        let today = now.date_naive();
418        return Some(TemporalRange {
419            after: Some(format!("{today} 00:00:00")),
420            before: None,
421        });
422    }
423
424    // last week / past week / this week: 7-day lookback
425    if lower.contains("last week") || lower.contains("past week") || lower.contains("this week") {
426        let start = now - Duration::days(7);
427        return Some(TemporalRange {
428            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
429            before: None,
430        });
431    }
432
433    // last month / past month / this month: 30-day lookback (approximate)
434    if lower.contains("last month") || lower.contains("past month") || lower.contains("this month")
435    {
436        let start = now - Duration::days(30);
437        return Some(TemporalRange {
438            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
439            before: None,
440        });
441    }
442
443    // "few days ago" / "few hours ago": 3-day lookback
444    if lower.contains("few days ago") {
445        let start = now - Duration::days(3);
446        return Some(TemporalRange {
447            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
448            before: None,
449        });
450    }
451    if lower.contains("few hours ago") {
452        let start = now - Duration::hours(6);
453        return Some(TemporalRange {
454            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
455            before: None,
456        });
457    }
458
459    // "ago" (word-boundary): generic recent lookback (24h)
460    if contains_word(&lower, "ago") {
461        let start = now - Duration::hours(24);
462        return Some(TemporalRange {
463            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
464            before: None,
465        });
466    }
467
468    // Generic temporal cues without a specific range ("when did", "remember when",
469    // "last time", "how long ago") — fall back to FTS5-only with temporal decay.
470    None
471}
472
473fn starts_with_question(words: &[&str]) -> bool {
474    words
475        .first()
476        .is_some_and(|w| QUESTION_WORDS.iter().any(|qw| w.eq_ignore_ascii_case(qw)))
477}
478
479/// Returns true if `word` is a pure `snake_case` identifier (all ASCII, lowercase letters,
480/// digits and underscores, contains at least one underscore, not purely numeric).
481fn is_pure_snake_case(word: &str) -> bool {
482    if word.is_empty() {
483        return false;
484    }
485    let has_underscore = word.contains('_');
486    if !has_underscore {
487        return false;
488    }
489    word.chars()
490        .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
491        && !word.chars().all(|c| c.is_ascii_digit() || c == '_')
492}
493
494impl MemoryRouter for HeuristicRouter {
495    /// Returns a confidence signal based on pattern match count (W2.1 fix: gradual scale).
496    ///
497    /// - Exactly one route pattern matches → confidence `1.0` (clear signal)
498    /// - Zero patterns match → confidence `0.0` (pure default fallback)
499    /// - More than one pattern matches → confidence `1.0 / matched_count` (ambiguous, decreasing)
500    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
501        let lower = query.to_ascii_lowercase();
502        let mut matched: u32 = 0;
503        if has_temporal_cue(&lower) {
504            matched += 1;
505        }
506        if RELATIONSHIP_PATTERNS.iter().any(|p| lower.contains(p)) {
507            matched += 1;
508        }
509        let words: Vec<&str> = query.split_whitespace().collect();
510        let word_count = words.len();
511        let has_structural = query.contains('/') || query.contains("::");
512        let question = starts_with_question(&words);
513        let has_snake = words.iter().any(|w| is_pure_snake_case(w));
514        if has_structural && !question {
515            matched += 1;
516        }
517        if question || word_count >= 6 {
518            matched += 1;
519        }
520        if word_count <= 3 && !question {
521            matched += 1;
522        }
523        if has_snake {
524            matched += 1;
525        }
526
527        #[allow(clippy::cast_precision_loss)]
528        let confidence = match matched {
529            0 => 0.0,
530            1 => 1.0,
531            n => 1.0 / n as f32,
532        };
533
534        RoutingDecision {
535            route: self.route(query),
536            confidence,
537            reasoning: None,
538        }
539    }
540
541    fn route(&self, query: &str) -> MemoryRoute {
542        let lower = query.to_ascii_lowercase();
543
544        // 1. Temporal queries take highest priority — must run before relationship check
545        //    to prevent "history of changes last week" from routing to Graph instead of Episodic.
546        if has_temporal_cue(&lower) {
547            return MemoryRoute::Episodic;
548        }
549
550        // 2. Relationship queries go to graph retrieval (feature-gated at call site)
551        let has_relationship = RELATIONSHIP_PATTERNS.iter().any(|p| lower.contains(p));
552        if has_relationship {
553            return MemoryRoute::Graph;
554        }
555
556        let words: Vec<&str> = query.split_whitespace().collect();
557        let word_count = words.len();
558
559        // Code-like patterns that unambiguously indicate keyword search:
560        // file paths (contain '/'), Rust paths (contain '::')
561        let has_structural_code_pattern = query.contains('/') || query.contains("::");
562
563        // Pure snake_case identifiers (e.g. "memory_limit", "error_handling")
564        // but only if the query does NOT start with a question word
565        let has_snake_case = words.iter().any(|w| is_pure_snake_case(w));
566        let question = starts_with_question(&words);
567
568        if has_structural_code_pattern && !question {
569            return MemoryRoute::Keyword;
570        }
571
572        // Long NL queries → semantic, regardless of snake_case tokens
573        if question || word_count >= 6 {
574            return MemoryRoute::Semantic;
575        }
576
577        // Short queries without question words → keyword
578        if word_count <= 3 && !question {
579            return MemoryRoute::Keyword;
580        }
581
582        // Short code-like patterns → keyword
583        if has_snake_case {
584            return MemoryRoute::Keyword;
585        }
586
587        // Default
588        MemoryRoute::Hybrid
589    }
590}
591
592/// LLM-based memory router.
593///
594/// Sends the query to the configured provider and parses a JSON response:
595/// `{"route": "keyword|semantic|hybrid|graph|episodic", "confidence": 0.0-1.0}`.
596///
597/// On LLM failure, falls back to `HeuristicRouter`.
598pub struct LlmRouter {
599    provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
600    fallback_route: MemoryRoute,
601}
602
603impl LlmRouter {
604    #[must_use]
605    pub fn new(
606        provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
607        fallback_route: MemoryRoute,
608    ) -> Self {
609        Self {
610            provider,
611            fallback_route,
612        }
613    }
614
615    async fn classify_async(&self, query: &str) -> RoutingDecision {
616        use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
617
618        let system = "You are a memory store routing classifier. \
619            Given a user query, decide which memory backend is most appropriate. \
620            Respond with ONLY a JSON object: \
621            {\"route\": \"<route>\", \"confidence\": <0.0-1.0>, \"reasoning\": \"<brief>\"} \
622            where <route> is one of: keyword, semantic, hybrid, graph, episodic. \
623            Use 'keyword' for exact/code lookups, 'semantic' for conceptual questions, \
624            'hybrid' for mixed, 'graph' for relationship queries, 'episodic' for time-scoped queries.";
625
626        // Wrap query in delimiters to prevent injection (W2.2 fix).
627        let user = format!(
628            "<query>{}</query>",
629            query.chars().take(500).collect::<String>()
630        );
631
632        let messages = vec![
633            Message {
634                role: Role::System,
635                content: system.to_owned(),
636                parts: vec![],
637                metadata: MessageMetadata::default(),
638            },
639            Message {
640                role: Role::User,
641                content: user,
642                parts: vec![],
643                metadata: MessageMetadata::default(),
644            },
645        ];
646
647        let result = match tokio::time::timeout(
648            std::time::Duration::from_secs(5),
649            self.provider.chat(&messages),
650        )
651        .await
652        {
653            Ok(Ok(r)) => r,
654            Ok(Err(e)) => {
655                tracing::debug!(error = %e, "LlmRouter: LLM call failed, falling back to heuristic");
656                return Self::heuristic_fallback(query);
657            }
658            Err(_) => {
659                tracing::debug!("LlmRouter: LLM timed out, falling back to heuristic");
660                return Self::heuristic_fallback(query);
661            }
662        };
663
664        self.parse_llm_response(&result, query)
665    }
666
667    fn parse_llm_response(&self, raw: &str, query: &str) -> RoutingDecision {
668        // Extract JSON object from the response (may have surrounding text).
669        let json_str = raw
670            .find('{')
671            .and_then(|start| raw[start..].rfind('}').map(|end| &raw[start..=start + end]))
672            .unwrap_or("");
673
674        if let Ok(v) = serde_json::from_str::<serde_json::Value>(json_str) {
675            let route_str = v.get("route").and_then(|r| r.as_str()).unwrap_or("hybrid");
676            #[allow(clippy::cast_possible_truncation)]
677            let confidence = v
678                .get("confidence")
679                .and_then(serde_json::Value::as_f64)
680                .map_or(0.5, |c| c.clamp(0.0, 1.0) as f32);
681            let reasoning = v
682                .get("reasoning")
683                .and_then(|r| r.as_str())
684                .map(str::to_owned);
685
686            let route = parse_route_str(route_str, self.fallback_route);
687
688            tracing::debug!(
689                query = &query[..query.len().min(60)],
690                ?route,
691                confidence,
692                "LlmRouter: classified"
693            );
694
695            return RoutingDecision {
696                route,
697                confidence,
698                reasoning,
699            };
700        }
701
702        tracing::debug!("LlmRouter: failed to parse JSON response, falling back to heuristic");
703        Self::heuristic_fallback(query)
704    }
705
706    fn heuristic_fallback(query: &str) -> RoutingDecision {
707        HeuristicRouter.route_with_confidence(query)
708    }
709}
710
711#[must_use]
712pub fn parse_route_str(s: &str, fallback: MemoryRoute) -> MemoryRoute {
713    match s {
714        "keyword" => MemoryRoute::Keyword,
715        "semantic" => MemoryRoute::Semantic,
716        "hybrid" => MemoryRoute::Hybrid,
717        "graph" => MemoryRoute::Graph,
718        "episodic" => MemoryRoute::Episodic,
719        _ => fallback,
720    }
721}
722
723impl MemoryRouter for LlmRouter {
724    fn route(&self, query: &str) -> MemoryRoute {
725        // Sync path: LLM is not available without an async executor.
726        // Falls back to heuristic — use route_async() for LLM-based classification.
727        HeuristicRouter.route(query)
728    }
729
730    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
731        // LlmRouter is designed for use in async contexts via classify_async.
732        // When called synchronously (e.g. in tests), fall back to heuristic.
733        HeuristicRouter.route_with_confidence(query)
734    }
735}
736
737/// Async extension for LLM-capable routers.
738pub trait AsyncMemoryRouter: MemoryRouter {
739    fn route_async<'a>(
740        &'a self,
741        query: &'a str,
742    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>>;
743}
744
745impl AsyncMemoryRouter for LlmRouter {
746    fn route_async<'a>(
747        &'a self,
748        query: &'a str,
749    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
750        Box::pin(self.classify_async(query))
751    }
752}
753
754/// Hybrid router: heuristic-first, escalates to LLM when confidence is low.
755///
756/// The `HybridRouter` runs `HeuristicRouter` first. If the heuristic confidence
757/// is below `confidence_threshold`, it escalates to the LLM router.
758/// LLM failures always fall back to the heuristic result.
759pub struct HybridRouter {
760    llm: LlmRouter,
761    confidence_threshold: f32,
762}
763
764impl HybridRouter {
765    #[must_use]
766    pub fn new(
767        provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
768        fallback_route: MemoryRoute,
769        confidence_threshold: f32,
770    ) -> Self {
771        Self {
772            llm: LlmRouter::new(provider, fallback_route),
773            confidence_threshold,
774        }
775    }
776
777    pub async fn classify_async(&self, query: &str) -> RoutingDecision {
778        let heuristic = HeuristicRouter.route_with_confidence(query);
779        if heuristic.confidence >= self.confidence_threshold {
780            tracing::debug!(
781                query = &query[..query.len().min(60)],
782                confidence = heuristic.confidence,
783                route = ?heuristic.route,
784                "HybridRouter: heuristic sufficient, skipping LLM"
785            );
786            return heuristic;
787        }
788
789        tracing::debug!(
790            query = &query[..query.len().min(60)],
791            confidence = heuristic.confidence,
792            threshold = self.confidence_threshold,
793            "HybridRouter: low confidence, escalating to LLM"
794        );
795
796        let llm_result = self.llm.classify_async(query).await;
797
798        // LLM failure path: classify_async returns a heuristic fallback on error.
799        // Always log the final decision.
800        tracing::debug!(
801            route = ?llm_result.route,
802            confidence = llm_result.confidence,
803            "HybridRouter: final route after LLM escalation"
804        );
805        llm_result
806    }
807}
808
809impl MemoryRouter for HybridRouter {
810    fn route(&self, query: &str) -> MemoryRoute {
811        HeuristicRouter.route(query)
812    }
813
814    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
815        // Synchronous path: can't call async LLM, use heuristic only.
816        HeuristicRouter.route_with_confidence(query)
817    }
818}
819
820impl AsyncMemoryRouter for HeuristicRouter {
821    fn route_async<'a>(
822        &'a self,
823        query: &'a str,
824    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
825        Box::pin(std::future::ready(self.route_with_confidence(query)))
826    }
827}
828
829impl AsyncMemoryRouter for HybridRouter {
830    fn route_async<'a>(
831        &'a self,
832        query: &'a str,
833    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
834        Box::pin(self.classify_async(query))
835    }
836}
837
838#[cfg(test)]
839mod tests {
840    use chrono::TimeZone as _;
841
842    use super::*;
843
844    fn route(q: &str) -> MemoryRoute {
845        HeuristicRouter.route(q)
846    }
847
848    fn fixed_now() -> DateTime<Utc> {
849        // 2026-03-14 12:00:00 UTC — fixed reference point for all temporal tests
850        Utc.with_ymd_and_hms(2026, 3, 14, 12, 0, 0).unwrap()
851    }
852
853    #[test]
854    fn rust_path_routes_keyword() {
855        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
856    }
857
858    #[test]
859    fn file_path_routes_keyword() {
860        assert_eq!(
861            route("crates/zeph-core/src/agent/mod.rs"),
862            MemoryRoute::Keyword
863        );
864    }
865
866    #[test]
867    fn pure_snake_case_routes_keyword() {
868        assert_eq!(route("memory_limit"), MemoryRoute::Keyword);
869        assert_eq!(route("error_handling"), MemoryRoute::Keyword);
870    }
871
872    #[test]
873    fn question_with_snake_case_routes_semantic() {
874        // "what is the memory_limit setting" — question word overrides snake_case heuristic
875        assert_eq!(
876            route("what is the memory_limit setting"),
877            MemoryRoute::Semantic
878        );
879        assert_eq!(route("how does error_handling work"), MemoryRoute::Semantic);
880    }
881
882    #[test]
883    fn short_query_routes_keyword() {
884        assert_eq!(route("context compaction"), MemoryRoute::Keyword);
885        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
886    }
887
888    #[test]
889    fn question_routes_semantic() {
890        assert_eq!(
891            route("what is the purpose of semantic memory"),
892            MemoryRoute::Semantic
893        );
894        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
895        assert_eq!(route("why does compaction fail"), MemoryRoute::Semantic);
896        assert_eq!(route("explain context compression"), MemoryRoute::Semantic);
897    }
898
899    #[test]
900    fn long_natural_query_routes_semantic() {
901        assert_eq!(
902            route("the agent keeps running out of context during long conversations"),
903            MemoryRoute::Semantic
904        );
905    }
906
907    #[test]
908    fn medium_non_question_routes_hybrid() {
909        // 4-5 words, no question word, no code pattern
910        assert_eq!(route("context window token budget"), MemoryRoute::Hybrid);
911    }
912
913    #[test]
914    fn empty_query_routes_keyword() {
915        // 0 words, no question → keyword (short path)
916        assert_eq!(route(""), MemoryRoute::Keyword);
917    }
918
919    #[test]
920    fn question_word_only_routes_semantic() {
921        // single question word → word_count = 1, but starts_with_question = true
922        // short query with question: the question check happens first in semantic branch
923        // Actually with word_count=1 and question=true: short path `<= 3 && !question` is false,
924        // then `question || word_count >= 6` is true → Semantic
925        assert_eq!(route("what"), MemoryRoute::Semantic);
926    }
927
928    #[test]
929    fn camel_case_does_not_route_keyword_without_pattern() {
930        // CamelCase words without :: or / — 4-word query without question word → Hybrid
931        // (4 words: no question, no snake_case, no structural code pattern → Hybrid)
932        assert_eq!(
933            route("SemanticMemory configuration and options"),
934            MemoryRoute::Hybrid
935        );
936    }
937
938    #[test]
939    fn relationship_query_routes_graph() {
940        assert_eq!(
941            route("what is user's opinion on neovim"),
942            MemoryRoute::Graph
943        );
944        assert_eq!(
945            route("show the relationship between Alice and Bob"),
946            MemoryRoute::Graph
947        );
948    }
949
950    #[test]
951    fn relationship_query_related_to_routes_graph() {
952        assert_eq!(
953            route("how is Rust related to this project"),
954            MemoryRoute::Graph
955        );
956        assert_eq!(
957            route("how does this relates to the config"),
958            MemoryRoute::Graph
959        );
960    }
961
962    #[test]
963    fn relationship_know_about_routes_graph() {
964        assert_eq!(route("what do I know about neovim"), MemoryRoute::Graph);
965    }
966
967    #[test]
968    fn translate_does_not_route_graph() {
969        // "translate" contains "relate" substring but is not in RELATIONSHIP_PATTERNS
970        // (we removed bare "relate", keeping only "related to" and "relates to")
971        assert_ne!(route("translate this code to Python"), MemoryRoute::Graph);
972    }
973
974    #[test]
975    fn non_relationship_stays_semantic() {
976        assert_eq!(
977            route("find similar code patterns in the codebase"),
978            MemoryRoute::Semantic
979        );
980    }
981
982    #[test]
983    fn short_keyword_unchanged() {
984        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
985    }
986
987    // Regression tests for #1661: long NL queries with snake_case must go to Semantic
988    #[test]
989    fn long_nl_with_snake_case_routes_semantic() {
990        assert_eq!(
991            route("Use memory_search to find information about Rust ownership"),
992            MemoryRoute::Semantic
993        );
994    }
995
996    #[test]
997    fn short_snake_case_only_routes_keyword() {
998        assert_eq!(route("memory_search"), MemoryRoute::Keyword);
999    }
1000
1001    #[test]
1002    fn question_with_snake_case_short_routes_semantic() {
1003        assert_eq!(
1004            route("What does memory_search return?"),
1005            MemoryRoute::Semantic
1006        );
1007    }
1008
1009    // ── Temporal routing tests ────────────────────────────────────────────────
1010
1011    #[test]
1012    fn temporal_yesterday_routes_episodic() {
1013        assert_eq!(
1014            route("what did we discuss yesterday"),
1015            MemoryRoute::Episodic
1016        );
1017    }
1018
1019    #[test]
1020    fn temporal_last_week_routes_episodic() {
1021        assert_eq!(
1022            route("remember what happened last week"),
1023            MemoryRoute::Episodic
1024        );
1025    }
1026
1027    #[test]
1028    fn temporal_when_did_routes_episodic() {
1029        assert_eq!(
1030            route("when did we last talk about Qdrant"),
1031            MemoryRoute::Episodic
1032        );
1033    }
1034
1035    #[test]
1036    fn temporal_last_time_routes_episodic() {
1037        assert_eq!(
1038            route("last time we discussed the scheduler"),
1039            MemoryRoute::Episodic
1040        );
1041    }
1042
1043    #[test]
1044    fn temporal_today_routes_episodic() {
1045        assert_eq!(
1046            route("what did I mention today about testing"),
1047            MemoryRoute::Episodic
1048        );
1049    }
1050
1051    #[test]
1052    fn temporal_this_morning_routes_episodic() {
1053        assert_eq!(route("what did we say this morning"), MemoryRoute::Episodic);
1054    }
1055
1056    #[test]
1057    fn temporal_last_month_routes_episodic() {
1058        assert_eq!(
1059            route("find the config change from last month"),
1060            MemoryRoute::Episodic
1061        );
1062    }
1063
1064    #[test]
1065    fn temporal_history_collision_routes_episodic() {
1066        // CRIT-01: "history of" is a relationship pattern, but temporal wins when both match.
1067        // Temporal check is first — "last week" causes Episodic, not Graph.
1068        assert_eq!(route("history of changes last week"), MemoryRoute::Episodic);
1069    }
1070
1071    #[test]
1072    fn temporal_ago_word_boundary_routes_episodic() {
1073        assert_eq!(route("we fixed this a day ago"), MemoryRoute::Episodic);
1074    }
1075
1076    #[test]
1077    fn ago_in_chicago_no_false_positive() {
1078        // MED-01: "Chicago" contains "ago" but must NOT route to Episodic.
1079        // word-boundary check prevents this false positive.
1080        assert_ne!(
1081            route("meeting in Chicago about the project"),
1082            MemoryRoute::Episodic
1083        );
1084    }
1085
1086    #[test]
1087    fn non_temporal_unchanged() {
1088        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
1089    }
1090
1091    #[test]
1092    fn code_query_unchanged() {
1093        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
1094    }
1095
1096    // ── resolve_temporal_range tests ─────────────────────────────────────────
1097
1098    #[test]
1099    fn resolve_yesterday_range() {
1100        let now = fixed_now(); // 2026-03-14 12:00:00 UTC
1101        let range = resolve_temporal_range("what did we discuss yesterday", now).unwrap();
1102        assert_eq!(range.after.as_deref(), Some("2026-03-13 00:00:00"));
1103        assert_eq!(range.before.as_deref(), Some("2026-03-13 23:59:59"));
1104    }
1105
1106    #[test]
1107    fn resolve_last_week_range() {
1108        let now = fixed_now(); // 2026-03-14 12:00:00 UTC
1109        let range = resolve_temporal_range("remember last week's discussion", now).unwrap();
1110        // 7 days before 2026-03-14 = 2026-03-07
1111        assert!(range.after.as_deref().unwrap().starts_with("2026-03-07"));
1112        assert!(range.before.is_none());
1113    }
1114
1115    #[test]
1116    fn resolve_last_month_range() {
1117        let now = fixed_now();
1118        let range = resolve_temporal_range("find the bug from last month", now).unwrap();
1119        // 30 days before 2026-03-14 = 2026-02-12
1120        assert!(range.after.as_deref().unwrap().starts_with("2026-02-12"));
1121        assert!(range.before.is_none());
1122    }
1123
1124    #[test]
1125    fn resolve_today_range() {
1126        let now = fixed_now();
1127        let range = resolve_temporal_range("what did we do today", now).unwrap();
1128        assert_eq!(range.after.as_deref(), Some("2026-03-14 00:00:00"));
1129        assert!(range.before.is_none());
1130    }
1131
1132    #[test]
1133    fn resolve_this_morning_range() {
1134        let now = fixed_now();
1135        let range = resolve_temporal_range("what did we say this morning", now).unwrap();
1136        assert_eq!(range.after.as_deref(), Some("2026-03-14 00:00:00"));
1137        assert_eq!(range.before.as_deref(), Some("2026-03-14 12:00:00"));
1138    }
1139
1140    #[test]
1141    fn resolve_last_night_range() {
1142        let now = fixed_now();
1143        let range = resolve_temporal_range("last night's conversation", now).unwrap();
1144        assert_eq!(range.after.as_deref(), Some("2026-03-13 18:00:00"));
1145        assert_eq!(range.before.as_deref(), Some("2026-03-14 06:00:00"));
1146    }
1147
1148    #[test]
1149    fn resolve_tonight_range() {
1150        let now = fixed_now();
1151        let range = resolve_temporal_range("remind me tonight what we agreed on", now).unwrap();
1152        assert_eq!(range.after.as_deref(), Some("2026-03-14 18:00:00"));
1153        assert!(range.before.is_none());
1154    }
1155
1156    #[test]
1157    fn resolve_no_temporal_returns_none() {
1158        let now = fixed_now();
1159        assert!(resolve_temporal_range("what is the purpose of semantic memory", now).is_none());
1160    }
1161
1162    #[test]
1163    fn resolve_generic_temporal_returns_none() {
1164        // "when did", "remember when", "last time", "how long ago" — no specific range
1165        let now = fixed_now();
1166        assert!(resolve_temporal_range("when did we discuss this feature", now).is_none());
1167        assert!(resolve_temporal_range("remember when we fixed that bug", now).is_none());
1168    }
1169
1170    // ── strip_temporal_keywords tests ────────────────────────────────────────
1171
1172    #[test]
1173    fn strip_yesterday_from_query() {
1174        let cleaned = strip_temporal_keywords("what did we discuss yesterday about Rust");
1175        assert_eq!(cleaned, "what did we discuss about Rust");
1176    }
1177
1178    #[test]
1179    fn strip_last_week_from_query() {
1180        let cleaned = strip_temporal_keywords("find the config change from last week");
1181        assert_eq!(cleaned, "find the config change from");
1182    }
1183
1184    #[test]
1185    fn strip_does_not_alter_non_temporal() {
1186        let q = "what is the purpose of semantic memory";
1187        assert_eq!(strip_temporal_keywords(q), q);
1188    }
1189
1190    #[test]
1191    fn strip_ago_word_boundary() {
1192        let cleaned = strip_temporal_keywords("we fixed this a day ago in the scheduler");
1193        // "ago" removed, rest preserved
1194        assert!(!cleaned.contains("ago"));
1195        assert!(cleaned.contains("scheduler"));
1196    }
1197
1198    #[test]
1199    fn strip_does_not_touch_chicago() {
1200        let q = "meeting in Chicago about the project";
1201        assert_eq!(strip_temporal_keywords(q), q);
1202    }
1203
1204    #[test]
1205    fn strip_empty_string_returns_empty() {
1206        assert_eq!(strip_temporal_keywords(""), "");
1207    }
1208
1209    #[test]
1210    fn strip_only_temporal_keyword_returns_empty() {
1211        // When the entire query is a temporal keyword, stripping leaves an empty string.
1212        // recall_routed falls back to the original query in this case.
1213        assert_eq!(strip_temporal_keywords("yesterday"), "");
1214    }
1215
1216    #[test]
1217    fn strip_repeated_temporal_keyword_removes_all_occurrences() {
1218        // IMPL-02: all occurrences must be removed, not just the first.
1219        let cleaned = strip_temporal_keywords("yesterday I mentioned yesterday's bug");
1220        assert!(
1221            !cleaned.contains("yesterday"),
1222            "both occurrences must be removed: got '{cleaned}'"
1223        );
1224        assert!(cleaned.contains("mentioned"));
1225    }
1226
1227    // ── route_with_confidence tests ───────────────────────────────────────────
1228
1229    #[test]
1230    fn confidence_multiple_matches_is_less_than_one() {
1231        // Structural code pattern + snake_case + short query fire 3 signals →
1232        // confidence = 1.0 / 3 < 1.0
1233        let d = HeuristicRouter.route_with_confidence("zeph_memory::recall");
1234        assert!(
1235            d.confidence < 1.0,
1236            "ambiguous query should have confidence < 1.0, got {}",
1237            d.confidence
1238        );
1239        assert_eq!(d.route, MemoryRoute::Keyword);
1240    }
1241
1242    #[test]
1243    fn confidence_long_question_with_snake_fires_multiple_signals() {
1244        // Long question with snake_case fires multiple signals → confidence < 1.0
1245        let d = HeuristicRouter
1246            .route_with_confidence("what is the purpose of memory_limit in the config system");
1247        assert!(
1248            d.confidence < 1.0,
1249            "ambiguous query must have confidence < 1.0, got {}",
1250            d.confidence
1251        );
1252    }
1253
1254    #[test]
1255    fn confidence_empty_query_is_nonzero() {
1256        // Empty string: word_count=0 → short path fires (<=3 && !question) → matched=1 → confidence=1.0
1257        let d = HeuristicRouter.route_with_confidence("");
1258        assert!(
1259            d.confidence > 0.0,
1260            "empty query must match short-path signal"
1261        );
1262    }
1263
1264    #[test]
1265    fn routing_decision_route_matches_route_fn() {
1266        // route_with_confidence().route must agree with route()
1267        let queries = [
1268            "qdrant",
1269            "what is the agent loop",
1270            "context window token budget",
1271            "what did we discuss yesterday",
1272        ];
1273        for q in queries {
1274            let decision = HeuristicRouter.route_with_confidence(q);
1275            assert_eq!(
1276                decision.route,
1277                HeuristicRouter.route(q),
1278                "mismatch for query: {q}"
1279            );
1280        }
1281    }
1282}
zeph_memory/router.rs

zeph_memory/
router.rs