zeph_memory/
router.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Memory routing — classify recall queries and dispatch to the right backend.
5//!
6//! The [`MemoryRouter`] trait is implemented by:
7//!
8//! | Type | Strategy |
9//! |------|----------|
10//! | [`HeuristicRouter`] | Fast regex/keyword pattern matching. No LLM call. |
11//! | [`LlmRouter`] | Uses an LLM classifier for high-accuracy routing. |
12//! | [`HybridRouter`] | Runs [`HeuristicRouter`] first; escalates to [`LlmRouter`] when confidence is low. |
13//! | [`AsyncMemoryRouter`] | Async wrapper over any `MemoryRouter` for use in async contexts. |
14//!
15//! # Routing pipeline
16//!
17//! 1. `HeuristicRouter` classifies the query in < 1 ms using temporal-keyword detection
18//!    and graph-relationship pattern matching.
19//! 2. If confidence >= threshold, the route is used directly.
20//! 3. Otherwise, `HybridRouter` forwards the query to `LlmRouter` for a second opinion.
21
22use chrono::{DateTime, Duration, Utc};
23
24use crate::graph::EdgeType;
25
26/// Classification of which memory backend(s) to query.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum MemoryRoute {
29    /// Full-text search only (`SQLite` FTS5). Fast, good for keyword/exact queries.
30    Keyword,
31    /// Vector search only (Qdrant). Good for semantic/conceptual queries.
32    Semantic,
33    /// Both backends, results merged by reciprocal rank fusion.
34    Hybrid,
35    /// Graph-based retrieval via BFS traversal. Good for relationship queries.
36    /// When the `graph-memory` feature is disabled, callers treat this as `Hybrid`.
37    Graph,
38    /// FTS5 search with a timestamp-range filter. Used for temporal/episodic queries
39    /// ("what did we discuss yesterday", "last week's conversation about Rust").
40    ///
41    /// Known trade-off (MVP): skips vector search entirely for speed. Semantically similar
42    /// but lexically different messages may be missed. Use `Hybrid` route when semantic
43    /// precision matters more than temporal filtering.
44    Episodic,
45}
46
47/// Routing decision with confidence and optional LLM reasoning.
48#[derive(Debug, Clone)]
49pub struct RoutingDecision {
50    pub route: MemoryRoute,
51    /// Confidence in `[0, 1]`. `1.0` = certain, `0.5` = ambiguous.
52    pub confidence: f32,
53    /// Only populated when an LLM classifier was used.
54    pub reasoning: Option<String>,
55}
56
57/// Decides which memory backend(s) to query for a given input.
58pub trait MemoryRouter: Send + Sync {
59    /// Route a query to the appropriate backend(s).
60    fn route(&self, query: &str) -> MemoryRoute;
61
62    /// Route with a confidence signal. Default implementation wraps `route()` with confidence 1.0.
63    ///
64    /// Override this in routers that can express ambiguity (e.g. `HeuristicRouter`)
65    /// so that `HybridRouter` can escalate uncertain decisions to LLM.
66    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
67        RoutingDecision {
68            route: self.route(query),
69            confidence: 1.0,
70            reasoning: None,
71        }
72    }
73}
74
75/// Resolved datetime boundaries for a temporal query.
76///
77/// Both fields use `SQLite` datetime format (`YYYY-MM-DD HH:MM:SS`, UTC).
78/// `None` means "no bound" on that side.
79///
80/// Note: All timestamps are UTC. The `created_at` column in the `messages` table
81/// defaults to `datetime('now')` which is also UTC, so comparisons are consistent.
82/// Users in non-UTC timezones may get slightly unexpected results for "yesterday"
83/// queries (e.g. at 01:00 UTC+5 the user's local yesterday differs from UTC yesterday).
84/// This is an accepted approximation for the heuristic-only MVP.
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub struct TemporalRange {
87    /// Exclusive lower bound: `created_at > after`.
88    pub after: Option<String>,
89    /// Exclusive upper bound: `created_at < before`.
90    pub before: Option<String>,
91}
92
93/// Temporal patterns that indicate an episodic / time-scoped recall query.
94///
95/// Multi-word patterns are preferred over single-word ones to reduce false positives.
96/// Single-word patterns that can appear inside other words (e.g. "ago" in "Chicago")
97/// must be checked with `contains_word()` to enforce word-boundary semantics.
98///
99/// Omitted on purpose: "before", "after", "since", "during", "earlier", "recently"
100/// — these are too ambiguous in technical contexts ("before the function returns",
101/// "since you asked", "during compilation"). They are not in this list.
102const TEMPORAL_PATTERNS: &[&str] = &[
103    // relative day
104    "yesterday",
105    "today",
106    "this morning",
107    "tonight",
108    "last night",
109    // relative week
110    "last week",
111    "this week",
112    "past week",
113    // relative month
114    "last month",
115    "this month",
116    "past month",
117    // temporal questions
118    "when did",
119    "remember when",
120    "last time",
121    "how long ago",
122    // relative phrases requiring word-boundary check
123    // (checked separately via `contains_word` to avoid matching "a few days ago" substring in longer words)
124    "few days ago",
125    "few hours ago",
126    "earlier today",
127];
128
129/// Single-word temporal tokens that require word-boundary checking.
130/// These are NOT in `TEMPORAL_PATTERNS` to avoid substring false positives.
131const WORD_BOUNDARY_TEMPORAL: &[&str] = &["ago"];
132
133/// MAGMA causal edge markers.
134///
135/// Shared between [`HeuristicRouter`] and [`classify_graph_subgraph`] to prevent
136/// pattern-list drift between the two classifiers (critic suggestion).
137pub(crate) const CAUSAL_MARKERS: &[&str] = &[
138    "why",
139    "because",
140    "caused",
141    "cause",
142    "reason",
143    "result",
144    "led to",
145    "consequence",
146    "trigger",
147    "effect",
148    "blame",
149    "fault",
150];
151
152/// MAGMA temporal edge markers for subgraph classification.
153///
154/// Shared between [`HeuristicRouter`] and [`classify_graph_subgraph`].
155/// Note: these are distinct from `TEMPORAL_PATTERNS` (which drive `Episodic` routing).
156/// `TEMPORAL_MARKERS` detect edges whose *semantics* are temporal (sequencing/ordering),
157/// while `TEMPORAL_PATTERNS` detect queries that ask about *when* events occurred.
158pub(crate) const TEMPORAL_MARKERS: &[&str] = &[
159    "before", "after", "first", "then", "timeline", "sequence", "preceded", "followed", "started",
160    "ended", "during", "prior",
161];
162
163/// MAGMA entity/structural markers.
164pub(crate) const ENTITY_MARKERS: &[&str] = &[
165    "is a",
166    "type of",
167    "kind of",
168    "part of",
169    "instance",
170    "same as",
171    "alias",
172    "subtype",
173    "subclass",
174    "belongs to",
175];
176
177/// Classify a query into the MAGMA edge types to use for subgraph-scoped BFS retrieval.
178///
179/// Pure heuristic, zero latency — no LLM call. Returns a prioritised list of [`EdgeType`]s.
180///
181/// Rules (checked in order):
182/// 1. Causal markers → include `Causal`
183/// 2. Temporal markers → include `Temporal`
184/// 3. Entity/structural markers → include `Entity`
185/// 4. `Semantic` is always included as fallback to guarantee recall >= current untyped BFS.
186///
187/// Multiple markers may match, producing a union of detected types.
188///
189/// # Example
190///
191/// ```
192/// # use zeph_memory::router::classify_graph_subgraph;
193/// # use zeph_memory::EdgeType;
194/// let types = classify_graph_subgraph("why did X happen");
195/// assert!(types.contains(&EdgeType::Causal));
196/// assert!(types.contains(&EdgeType::Semantic));
197/// ```
198#[must_use]
199pub fn classify_graph_subgraph(query: &str) -> Vec<EdgeType> {
200    let lower = query.to_ascii_lowercase();
201    let mut types: Vec<EdgeType> = Vec::new();
202
203    if CAUSAL_MARKERS.iter().any(|m| lower.contains(m)) {
204        types.push(EdgeType::Causal);
205    }
206    if TEMPORAL_MARKERS.iter().any(|m| lower.contains(m)) {
207        types.push(EdgeType::Temporal);
208    }
209    if ENTITY_MARKERS.iter().any(|m| lower.contains(m)) {
210        types.push(EdgeType::Entity);
211    }
212
213    // Semantic is always included as fallback — recall cannot be worse than untyped BFS.
214    if !types.contains(&EdgeType::Semantic) {
215        types.push(EdgeType::Semantic);
216    }
217
218    types
219}
220
221/// Heuristic-based memory router.
222///
223/// Decision logic (in priority order):
224/// 1. Temporal patterns → `Episodic`
225/// 2. Relationship patterns → `Graph`
226/// 3. Code-like patterns (paths, `::`) without question word → `Keyword`
227/// 4. Long NL query or question word → `Semantic`
228/// 5. Short non-question query → `Keyword`
229/// 6. Default → `Hybrid`
230pub struct HeuristicRouter;
231
232const QUESTION_WORDS: &[&str] = &[
233    "what", "how", "why", "when", "where", "who", "which", "explain", "describe",
234];
235
236/// Simple substrings that signal a relationship query (checked via `str::contains`).
237/// Only used when the `graph-memory` feature is enabled.
238const RELATIONSHIP_PATTERNS: &[&str] = &[
239    "related to",
240    "relates to",
241    "connection between",
242    "relationship",
243    "opinion on",
244    "thinks about",
245    "preference for",
246    "history of",
247    "know about",
248];
249
250/// Returns true if `text` contains `word` as a whole word (word-boundary semantics).
251///
252/// A "word boundary" here means the character before and after `word` (if present)
253/// is not an ASCII alphanumeric character or underscore.
254fn contains_word(text: &str, word: &str) -> bool {
255    let bytes = text.as_bytes();
256    let wbytes = word.as_bytes();
257    let wlen = wbytes.len();
258    if wlen > bytes.len() {
259        return false;
260    }
261    for start in 0..=(bytes.len() - wlen) {
262        if bytes[start..start + wlen].eq_ignore_ascii_case(wbytes) {
263            let before_ok =
264                start == 0 || !bytes[start - 1].is_ascii_alphanumeric() && bytes[start - 1] != b'_';
265            let after_ok = start + wlen == bytes.len()
266                || !bytes[start + wlen].is_ascii_alphanumeric() && bytes[start + wlen] != b'_';
267            if before_ok && after_ok {
268                return true;
269            }
270        }
271    }
272    false
273}
274
275/// Returns true if the lowercased query contains any temporal cue that indicates
276/// an episodic / time-scoped recall request.
277fn has_temporal_cue(lower: &str) -> bool {
278    if TEMPORAL_PATTERNS.iter().any(|p| lower.contains(p)) {
279        return true;
280    }
281    WORD_BOUNDARY_TEMPORAL
282        .iter()
283        .any(|w| contains_word(lower, w))
284}
285
286/// Temporal patterns sorted longest-first for stripping. Initialized once via `LazyLock`
287/// to avoid allocating and sorting on every call to `strip_temporal_keywords`.
288static SORTED_TEMPORAL_PATTERNS: std::sync::LazyLock<Vec<&'static str>> =
289    std::sync::LazyLock::new(|| {
290        let mut v: Vec<&str> = TEMPORAL_PATTERNS.to_vec();
291        v.sort_by_key(|p| std::cmp::Reverse(p.len()));
292        v
293    });
294
295/// Strip matched temporal keywords from a query string before passing to FTS5.
296///
297/// Temporal keywords are routing metadata, not search terms. Passing them to FTS5
298/// causes BM25 score distortion — messages that literally mention "yesterday" get
299/// boosted regardless of actual content relevance.
300///
301/// All occurrences of each pattern are removed (not just the first), preventing
302/// score distortion from repeated temporal tokens in edge cases like
303/// "yesterday I mentioned yesterday's bug".
304///
305/// # Example
306/// ```
307/// # use zeph_memory::router::strip_temporal_keywords;
308/// let cleaned = strip_temporal_keywords("what did we discuss yesterday about Rust");
309/// assert_eq!(cleaned, "what did we discuss about Rust");
310/// ```
311#[must_use]
312pub fn strip_temporal_keywords(query: &str) -> String {
313    // Lowercase once for pattern matching; track removal positions in the original string.
314    // We operate on the lowercased copy for matching, then remove spans from `result`
315    // by rebuilding via byte indices (both strings have identical byte lengths because
316    // to_ascii_lowercase is a 1:1 byte mapping for ASCII).
317    let lower = query.to_ascii_lowercase();
318    // Collect all (start, end) spans to remove, then rebuild the string in one pass.
319    let mut remove: Vec<(usize, usize)> = Vec::new();
320
321    for pattern in SORTED_TEMPORAL_PATTERNS.iter() {
322        let plen = pattern.len();
323        let mut search_from = 0;
324        while let Some(pos) = lower[search_from..].find(pattern) {
325            let abs = search_from + pos;
326            remove.push((abs, abs + plen));
327            search_from = abs + plen;
328        }
329    }
330
331    // Strip word-boundary tokens (single-word, e.g. "ago") — all occurrences.
332    for word in WORD_BOUNDARY_TEMPORAL {
333        let wlen = word.len();
334        let lbytes = lower.as_bytes();
335        let mut i = 0;
336        while i + wlen <= lower.len() {
337            if lower[i..].starts_with(*word) {
338                let before_ok =
339                    i == 0 || !lbytes[i - 1].is_ascii_alphanumeric() && lbytes[i - 1] != b'_';
340                let after_ok = i + wlen == lower.len()
341                    || !lbytes[i + wlen].is_ascii_alphanumeric() && lbytes[i + wlen] != b'_';
342                if before_ok && after_ok {
343                    remove.push((i, i + wlen));
344                    i += wlen;
345                    continue;
346                }
347            }
348            i += 1;
349        }
350    }
351
352    if remove.is_empty() {
353        // Fast path: no patterns found — return the original string.
354        return query.split_whitespace().collect::<Vec<_>>().join(" ");
355    }
356
357    // Merge overlapping/adjacent spans and remove them from the original string.
358    remove.sort_unstable_by_key(|r| r.0);
359    let bytes = query.as_bytes();
360    let mut result = Vec::with_capacity(query.len());
361    let mut cursor = 0;
362    for (start, end) in remove {
363        if start > cursor {
364            result.extend_from_slice(&bytes[cursor..start]);
365        }
366        cursor = cursor.max(end);
367    }
368    if cursor < bytes.len() {
369        result.extend_from_slice(&bytes[cursor..]);
370    }
371
372    // Collapse multiple spaces and trim.
373    // SAFETY: We only removed ASCII byte spans; remaining bytes are still valid UTF-8.
374    let s = String::from_utf8(result).unwrap_or_default();
375    s.split_whitespace()
376        .filter(|t| !t.is_empty())
377        .collect::<Vec<_>>()
378        .join(" ")
379}
380
381/// Resolve temporal keywords in `query` to a `(after, before)` datetime boundary pair.
382///
383/// Returns `None` when no specific range can be computed (the episodic path then falls
384/// back to FTS5 without a time filter, relying on temporal decay for recency boosting).
385///
386/// The `now` parameter is injectable for deterministic unit testing. Production callers
387/// should pass `chrono::Utc::now()`.
388///
389/// All datetime strings are in `SQLite` format: `YYYY-MM-DD HH:MM:SS` (UTC).
390#[must_use]
391pub fn resolve_temporal_range(query: &str, now: DateTime<Utc>) -> Option<TemporalRange> {
392    let lower = query.to_ascii_lowercase();
393
394    // yesterday: the full calendar day before today (UTC)
395    if lower.contains("yesterday") {
396        let yesterday = now.date_naive() - Duration::days(1);
397        return Some(TemporalRange {
398            after: Some(format!("{yesterday} 00:00:00")),
399            before: Some(format!("{yesterday} 23:59:59")),
400        });
401    }
402
403    // last night: 18:00 yesterday to 06:00 today (UTC approximation)
404    if lower.contains("last night") {
405        let yesterday = now.date_naive() - Duration::days(1);
406        let today = now.date_naive();
407        return Some(TemporalRange {
408            after: Some(format!("{yesterday} 18:00:00")),
409            before: Some(format!("{today} 06:00:00")),
410        });
411    }
412
413    // tonight: 18:00 today onwards
414    if lower.contains("tonight") {
415        let today = now.date_naive();
416        return Some(TemporalRange {
417            after: Some(format!("{today} 18:00:00")),
418            before: None,
419        });
420    }
421
422    // this morning: midnight to noon today
423    if lower.contains("this morning") {
424        let today = now.date_naive();
425        return Some(TemporalRange {
426            after: Some(format!("{today} 00:00:00")),
427            before: Some(format!("{today} 12:00:00")),
428        });
429    }
430
431    // today / earlier today: midnight to now.
432    // Note: "earlier today" always contains "today", so a separate branch would be
433    // dead code — the "today" check subsumes it.
434    if lower.contains("today") {
435        let today = now.date_naive();
436        return Some(TemporalRange {
437            after: Some(format!("{today} 00:00:00")),
438            before: None,
439        });
440    }
441
442    // last week / past week / this week: 7-day lookback
443    if lower.contains("last week") || lower.contains("past week") || lower.contains("this week") {
444        let start = now - Duration::days(7);
445        return Some(TemporalRange {
446            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
447            before: None,
448        });
449    }
450
451    // last month / past month / this month: 30-day lookback (approximate)
452    if lower.contains("last month") || lower.contains("past month") || lower.contains("this month")
453    {
454        let start = now - Duration::days(30);
455        return Some(TemporalRange {
456            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
457            before: None,
458        });
459    }
460
461    // "few days ago" / "few hours ago": 3-day lookback
462    if lower.contains("few days ago") {
463        let start = now - Duration::days(3);
464        return Some(TemporalRange {
465            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
466            before: None,
467        });
468    }
469    if lower.contains("few hours ago") {
470        let start = now - Duration::hours(6);
471        return Some(TemporalRange {
472            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
473            before: None,
474        });
475    }
476
477    // "ago" (word-boundary): generic recent lookback (24h)
478    if contains_word(&lower, "ago") {
479        let start = now - Duration::hours(24);
480        return Some(TemporalRange {
481            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
482            before: None,
483        });
484    }
485
486    // Generic temporal cues without a specific range ("when did", "remember when",
487    // "last time", "how long ago") — fall back to FTS5-only with temporal decay.
488    None
489}
490
491fn starts_with_question(words: &[&str]) -> bool {
492    words
493        .first()
494        .is_some_and(|w| QUESTION_WORDS.iter().any(|qw| w.eq_ignore_ascii_case(qw)))
495}
496
497/// Returns true if `word` is a pure `snake_case` identifier (all ASCII, lowercase letters,
498/// digits and underscores, contains at least one underscore, not purely numeric).
499fn is_pure_snake_case(word: &str) -> bool {
500    if word.is_empty() {
501        return false;
502    }
503    let has_underscore = word.contains('_');
504    if !has_underscore {
505        return false;
506    }
507    word.chars()
508        .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
509        && !word.chars().all(|c| c.is_ascii_digit() || c == '_')
510}
511
512impl MemoryRouter for HeuristicRouter {
513    /// Returns a confidence signal based on pattern match count (W2.1 fix: gradual scale).
514    ///
515    /// - Exactly one route pattern matches → confidence `1.0` (clear signal)
516    /// - Zero patterns match → confidence `0.0` (pure default fallback)
517    /// - More than one pattern matches → confidence `1.0 / matched_count` (ambiguous, decreasing)
518    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
519        let lower = query.to_ascii_lowercase();
520        let mut matched: u32 = 0;
521        if has_temporal_cue(&lower) {
522            matched += 1;
523        }
524        if RELATIONSHIP_PATTERNS.iter().any(|p| lower.contains(p)) {
525            matched += 1;
526        }
527        let words: Vec<&str> = query.split_whitespace().collect();
528        let word_count = words.len();
529        let has_structural = query.contains('/') || query.contains("::");
530        let question = starts_with_question(&words);
531        let has_snake = words.iter().any(|w| is_pure_snake_case(w));
532        if has_structural && !question {
533            matched += 1;
534        }
535        if question || word_count >= 6 {
536            matched += 1;
537        }
538        if word_count <= 3 && !question {
539            matched += 1;
540        }
541        if has_snake {
542            matched += 1;
543        }
544
545        #[allow(clippy::cast_precision_loss)]
546        let confidence = match matched {
547            0 => 0.0,
548            1 => 1.0,
549            n => 1.0 / n as f32,
550        };
551
552        RoutingDecision {
553            route: self.route(query),
554            confidence,
555            reasoning: None,
556        }
557    }
558
559    fn route(&self, query: &str) -> MemoryRoute {
560        let lower = query.to_ascii_lowercase();
561
562        // 1. Temporal queries take highest priority — must run before relationship check
563        //    to prevent "history of changes last week" from routing to Graph instead of Episodic.
564        if has_temporal_cue(&lower) {
565            return MemoryRoute::Episodic;
566        }
567
568        // 2. Relationship queries go to graph retrieval (feature-gated at call site)
569        let has_relationship = RELATIONSHIP_PATTERNS.iter().any(|p| lower.contains(p));
570        if has_relationship {
571            return MemoryRoute::Graph;
572        }
573
574        let words: Vec<&str> = query.split_whitespace().collect();
575        let word_count = words.len();
576
577        // Code-like patterns that unambiguously indicate keyword search:
578        // file paths (contain '/'), Rust paths (contain '::')
579        let has_structural_code_pattern = query.contains('/') || query.contains("::");
580
581        // Pure snake_case identifiers (e.g. "memory_limit", "error_handling")
582        // but only if the query does NOT start with a question word
583        let has_snake_case = words.iter().any(|w| is_pure_snake_case(w));
584        let question = starts_with_question(&words);
585
586        if has_structural_code_pattern && !question {
587            return MemoryRoute::Keyword;
588        }
589
590        // Long NL queries → semantic, regardless of snake_case tokens
591        if question || word_count >= 6 {
592            return MemoryRoute::Semantic;
593        }
594
595        // Short queries without question words → keyword
596        if word_count <= 3 && !question {
597            return MemoryRoute::Keyword;
598        }
599
600        // Short code-like patterns → keyword
601        if has_snake_case {
602            return MemoryRoute::Keyword;
603        }
604
605        // Default
606        MemoryRoute::Hybrid
607    }
608}
609
610/// LLM-based memory router.
611///
612/// Sends the query to the configured provider and parses a JSON response:
613/// `{"route": "keyword|semantic|hybrid|graph|episodic", "confidence": 0.0-1.0}`.
614///
615/// On LLM failure, falls back to `HeuristicRouter`.
616pub struct LlmRouter {
617    provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
618    fallback_route: MemoryRoute,
619}
620
621impl LlmRouter {
622    /// Create a new `LlmRouter`.
623    ///
624    /// - `provider` — LLM provider used for classification.
625    /// - `fallback_route` — route used when the LLM call fails.
626    #[must_use]
627    pub fn new(
628        provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
629        fallback_route: MemoryRoute,
630    ) -> Self {
631        Self {
632            provider,
633            fallback_route,
634        }
635    }
636
637    async fn classify_async(&self, query: &str) -> RoutingDecision {
638        use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
639
640        let system = "You are a memory store routing classifier. \
641            Given a user query, decide which memory backend is most appropriate. \
642            Respond with ONLY a JSON object: \
643            {\"route\": \"<route>\", \"confidence\": <0.0-1.0>, \"reasoning\": \"<brief>\"} \
644            where <route> is one of: keyword, semantic, hybrid, graph, episodic. \
645            Use 'keyword' for exact/code lookups, 'semantic' for conceptual questions, \
646            'hybrid' for mixed, 'graph' for relationship queries, 'episodic' for time-scoped queries.";
647
648        // Wrap query in delimiters to prevent injection (W2.2 fix).
649        let user = format!(
650            "<query>{}</query>",
651            query.chars().take(500).collect::<String>()
652        );
653
654        let messages = vec![
655            Message {
656                role: Role::System,
657                content: system.to_owned(),
658                parts: vec![],
659                metadata: MessageMetadata::default(),
660            },
661            Message {
662                role: Role::User,
663                content: user,
664                parts: vec![],
665                metadata: MessageMetadata::default(),
666            },
667        ];
668
669        let result = match tokio::time::timeout(
670            std::time::Duration::from_secs(5),
671            self.provider.chat(&messages),
672        )
673        .await
674        {
675            Ok(Ok(r)) => r,
676            Ok(Err(e)) => {
677                tracing::debug!(error = %e, "LlmRouter: LLM call failed, falling back to heuristic");
678                return Self::heuristic_fallback(query);
679            }
680            Err(_) => {
681                tracing::debug!("LlmRouter: LLM timed out, falling back to heuristic");
682                return Self::heuristic_fallback(query);
683            }
684        };
685
686        self.parse_llm_response(&result, query)
687    }
688
689    fn parse_llm_response(&self, raw: &str, query: &str) -> RoutingDecision {
690        // Extract JSON object from the response (may have surrounding text).
691        let json_str = raw
692            .find('{')
693            .and_then(|start| raw[start..].rfind('}').map(|end| &raw[start..=start + end]))
694            .unwrap_or("");
695
696        if let Ok(v) = serde_json::from_str::<serde_json::Value>(json_str) {
697            let route_str = v.get("route").and_then(|r| r.as_str()).unwrap_or("hybrid");
698            #[allow(clippy::cast_possible_truncation)]
699            let confidence = v
700                .get("confidence")
701                .and_then(serde_json::Value::as_f64)
702                .map_or(0.5, |c| c.clamp(0.0, 1.0) as f32);
703            let reasoning = v
704                .get("reasoning")
705                .and_then(|r| r.as_str())
706                .map(str::to_owned);
707
708            let route = parse_route_str(route_str, self.fallback_route);
709
710            tracing::debug!(
711                query = &query[..query.len().min(60)],
712                ?route,
713                confidence,
714                "LlmRouter: classified"
715            );
716
717            return RoutingDecision {
718                route,
719                confidence,
720                reasoning,
721            };
722        }
723
724        tracing::debug!("LlmRouter: failed to parse JSON response, falling back to heuristic");
725        Self::heuristic_fallback(query)
726    }
727
728    fn heuristic_fallback(query: &str) -> RoutingDecision {
729        HeuristicRouter.route_with_confidence(query)
730    }
731}
732
733/// Parse a route name string into a [`MemoryRoute`], falling back to `fallback` on unknown values.
734///
735/// # Examples
736///
737/// ```
738/// use zeph_memory::router::{parse_route_str, MemoryRoute};
739///
740/// assert_eq!(parse_route_str("semantic", MemoryRoute::Hybrid), MemoryRoute::Semantic);
741/// assert_eq!(parse_route_str("unknown", MemoryRoute::Hybrid), MemoryRoute::Hybrid);
742/// ```
743#[must_use]
744pub fn parse_route_str(s: &str, fallback: MemoryRoute) -> MemoryRoute {
745    match s {
746        "keyword" => MemoryRoute::Keyword,
747        "semantic" => MemoryRoute::Semantic,
748        "hybrid" => MemoryRoute::Hybrid,
749        "graph" => MemoryRoute::Graph,
750        "episodic" => MemoryRoute::Episodic,
751        _ => fallback,
752    }
753}
754
755impl MemoryRouter for LlmRouter {
756    fn route(&self, query: &str) -> MemoryRoute {
757        // Sync path: LLM is not available without an async executor.
758        // Falls back to heuristic — use route_async() for LLM-based classification.
759        HeuristicRouter.route(query)
760    }
761
762    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
763        // LlmRouter is designed for use in async contexts via classify_async.
764        // When called synchronously (e.g. in tests), fall back to heuristic.
765        HeuristicRouter.route_with_confidence(query)
766    }
767}
768
769/// Async extension for LLM-capable routers.
770pub trait AsyncMemoryRouter: MemoryRouter {
771    fn route_async<'a>(
772        &'a self,
773        query: &'a str,
774    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>>;
775}
776
777impl AsyncMemoryRouter for LlmRouter {
778    fn route_async<'a>(
779        &'a self,
780        query: &'a str,
781    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
782        Box::pin(self.classify_async(query))
783    }
784}
785
786/// Hybrid router: heuristic-first, escalates to LLM when confidence is low.
787///
788/// The `HybridRouter` runs `HeuristicRouter` first. If the heuristic confidence
789/// is below `confidence_threshold`, it escalates to the LLM router.
790/// LLM failures always fall back to the heuristic result.
791pub struct HybridRouter {
792    llm: LlmRouter,
793    confidence_threshold: f32,
794}
795
796impl HybridRouter {
797    /// Create a new `HybridRouter`.
798    ///
799    /// - `confidence_threshold` — heuristic decisions with confidence below this value
800    ///   are escalated to the LLM classifier.  `0.7` is a good default.
801    #[must_use]
802    pub fn new(
803        provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
804        fallback_route: MemoryRoute,
805        confidence_threshold: f32,
806    ) -> Self {
807        Self {
808            llm: LlmRouter::new(provider, fallback_route),
809            confidence_threshold,
810        }
811    }
812
813    pub async fn classify_async(&self, query: &str) -> RoutingDecision {
814        let heuristic = HeuristicRouter.route_with_confidence(query);
815        if heuristic.confidence >= self.confidence_threshold {
816            tracing::debug!(
817                query = &query[..query.len().min(60)],
818                confidence = heuristic.confidence,
819                route = ?heuristic.route,
820                "HybridRouter: heuristic sufficient, skipping LLM"
821            );
822            return heuristic;
823        }
824
825        tracing::debug!(
826            query = &query[..query.len().min(60)],
827            confidence = heuristic.confidence,
828            threshold = self.confidence_threshold,
829            "HybridRouter: low confidence, escalating to LLM"
830        );
831
832        let llm_result = self.llm.classify_async(query).await;
833
834        // LLM failure path: classify_async returns a heuristic fallback on error.
835        // Always log the final decision.
836        tracing::debug!(
837            route = ?llm_result.route,
838            confidence = llm_result.confidence,
839            "HybridRouter: final route after LLM escalation"
840        );
841        llm_result
842    }
843}
844
845impl MemoryRouter for HybridRouter {
846    fn route(&self, query: &str) -> MemoryRoute {
847        HeuristicRouter.route(query)
848    }
849
850    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
851        // Synchronous path: can't call async LLM, use heuristic only.
852        HeuristicRouter.route_with_confidence(query)
853    }
854}
855
856impl AsyncMemoryRouter for HeuristicRouter {
857    fn route_async<'a>(
858        &'a self,
859        query: &'a str,
860    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
861        Box::pin(std::future::ready(self.route_with_confidence(query)))
862    }
863}
864
865impl AsyncMemoryRouter for HybridRouter {
866    fn route_async<'a>(
867        &'a self,
868        query: &'a str,
869    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
870        Box::pin(self.classify_async(query))
871    }
872}
873
874#[cfg(test)]
875mod tests {
876    use chrono::TimeZone as _;
877
878    use super::*;
879
880    fn route(q: &str) -> MemoryRoute {
881        HeuristicRouter.route(q)
882    }
883
884    fn fixed_now() -> DateTime<Utc> {
885        // 2026-03-14 12:00:00 UTC — fixed reference point for all temporal tests
886        Utc.with_ymd_and_hms(2026, 3, 14, 12, 0, 0).unwrap()
887    }
888
889    #[test]
890    fn rust_path_routes_keyword() {
891        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
892    }
893
894    #[test]
895    fn file_path_routes_keyword() {
896        assert_eq!(
897            route("crates/zeph-core/src/agent/mod.rs"),
898            MemoryRoute::Keyword
899        );
900    }
901
902    #[test]
903    fn pure_snake_case_routes_keyword() {
904        assert_eq!(route("memory_limit"), MemoryRoute::Keyword);
905        assert_eq!(route("error_handling"), MemoryRoute::Keyword);
906    }
907
908    #[test]
909    fn question_with_snake_case_routes_semantic() {
910        // "what is the memory_limit setting" — question word overrides snake_case heuristic
911        assert_eq!(
912            route("what is the memory_limit setting"),
913            MemoryRoute::Semantic
914        );
915        assert_eq!(route("how does error_handling work"), MemoryRoute::Semantic);
916    }
917
918    #[test]
919    fn short_query_routes_keyword() {
920        assert_eq!(route("context compaction"), MemoryRoute::Keyword);
921        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
922    }
923
924    #[test]
925    fn question_routes_semantic() {
926        assert_eq!(
927            route("what is the purpose of semantic memory"),
928            MemoryRoute::Semantic
929        );
930        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
931        assert_eq!(route("why does compaction fail"), MemoryRoute::Semantic);
932        assert_eq!(route("explain context compression"), MemoryRoute::Semantic);
933    }
934
935    #[test]
936    fn long_natural_query_routes_semantic() {
937        assert_eq!(
938            route("the agent keeps running out of context during long conversations"),
939            MemoryRoute::Semantic
940        );
941    }
942
943    #[test]
944    fn medium_non_question_routes_hybrid() {
945        // 4-5 words, no question word, no code pattern
946        assert_eq!(route("context window token budget"), MemoryRoute::Hybrid);
947    }
948
949    #[test]
950    fn empty_query_routes_keyword() {
951        // 0 words, no question → keyword (short path)
952        assert_eq!(route(""), MemoryRoute::Keyword);
953    }
954
955    #[test]
956    fn question_word_only_routes_semantic() {
957        // single question word → word_count = 1, but starts_with_question = true
958        // short query with question: the question check happens first in semantic branch
959        // Actually with word_count=1 and question=true: short path `<= 3 && !question` is false,
960        // then `question || word_count >= 6` is true → Semantic
961        assert_eq!(route("what"), MemoryRoute::Semantic);
962    }
963
964    #[test]
965    fn camel_case_does_not_route_keyword_without_pattern() {
966        // CamelCase words without :: or / — 4-word query without question word → Hybrid
967        // (4 words: no question, no snake_case, no structural code pattern → Hybrid)
968        assert_eq!(
969            route("SemanticMemory configuration and options"),
970            MemoryRoute::Hybrid
971        );
972    }
973
974    #[test]
975    fn relationship_query_routes_graph() {
976        assert_eq!(
977            route("what is user's opinion on neovim"),
978            MemoryRoute::Graph
979        );
980        assert_eq!(
981            route("show the relationship between Alice and Bob"),
982            MemoryRoute::Graph
983        );
984    }
985
986    #[test]
987    fn relationship_query_related_to_routes_graph() {
988        assert_eq!(
989            route("how is Rust related to this project"),
990            MemoryRoute::Graph
991        );
992        assert_eq!(
993            route("how does this relates to the config"),
994            MemoryRoute::Graph
995        );
996    }
997
998    #[test]
999    fn relationship_know_about_routes_graph() {
1000        assert_eq!(route("what do I know about neovim"), MemoryRoute::Graph);
1001    }
1002
1003    #[test]
1004    fn translate_does_not_route_graph() {
1005        // "translate" contains "relate" substring but is not in RELATIONSHIP_PATTERNS
1006        // (we removed bare "relate", keeping only "related to" and "relates to")
1007        assert_ne!(route("translate this code to Python"), MemoryRoute::Graph);
1008    }
1009
1010    #[test]
1011    fn non_relationship_stays_semantic() {
1012        assert_eq!(
1013            route("find similar code patterns in the codebase"),
1014            MemoryRoute::Semantic
1015        );
1016    }
1017
1018    #[test]
1019    fn short_keyword_unchanged() {
1020        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
1021    }
1022
1023    // Regression tests for #1661: long NL queries with snake_case must go to Semantic
1024    #[test]
1025    fn long_nl_with_snake_case_routes_semantic() {
1026        assert_eq!(
1027            route("Use memory_search to find information about Rust ownership"),
1028            MemoryRoute::Semantic
1029        );
1030    }
1031
1032    #[test]
1033    fn short_snake_case_only_routes_keyword() {
1034        assert_eq!(route("memory_search"), MemoryRoute::Keyword);
1035    }
1036
1037    #[test]
1038    fn question_with_snake_case_short_routes_semantic() {
1039        assert_eq!(
1040            route("What does memory_search return?"),
1041            MemoryRoute::Semantic
1042        );
1043    }
1044
1045    // ── Temporal routing tests ────────────────────────────────────────────────
1046
1047    #[test]
1048    fn temporal_yesterday_routes_episodic() {
1049        assert_eq!(
1050            route("what did we discuss yesterday"),
1051            MemoryRoute::Episodic
1052        );
1053    }
1054
1055    #[test]
1056    fn temporal_last_week_routes_episodic() {
1057        assert_eq!(
1058            route("remember what happened last week"),
1059            MemoryRoute::Episodic
1060        );
1061    }
1062
1063    #[test]
1064    fn temporal_when_did_routes_episodic() {
1065        assert_eq!(
1066            route("when did we last talk about Qdrant"),
1067            MemoryRoute::Episodic
1068        );
1069    }
1070
1071    #[test]
1072    fn temporal_last_time_routes_episodic() {
1073        assert_eq!(
1074            route("last time we discussed the scheduler"),
1075            MemoryRoute::Episodic
1076        );
1077    }
1078
1079    #[test]
1080    fn temporal_today_routes_episodic() {
1081        assert_eq!(
1082            route("what did I mention today about testing"),
1083            MemoryRoute::Episodic
1084        );
1085    }
1086
1087    #[test]
1088    fn temporal_this_morning_routes_episodic() {
1089        assert_eq!(route("what did we say this morning"), MemoryRoute::Episodic);
1090    }
1091
1092    #[test]
1093    fn temporal_last_month_routes_episodic() {
1094        assert_eq!(
1095            route("find the config change from last month"),
1096            MemoryRoute::Episodic
1097        );
1098    }
1099
1100    #[test]
1101    fn temporal_history_collision_routes_episodic() {
1102        // CRIT-01: "history of" is a relationship pattern, but temporal wins when both match.
1103        // Temporal check is first — "last week" causes Episodic, not Graph.
1104        assert_eq!(route("history of changes last week"), MemoryRoute::Episodic);
1105    }
1106
1107    #[test]
1108    fn temporal_ago_word_boundary_routes_episodic() {
1109        assert_eq!(route("we fixed this a day ago"), MemoryRoute::Episodic);
1110    }
1111
1112    #[test]
1113    fn ago_in_chicago_no_false_positive() {
1114        // MED-01: "Chicago" contains "ago" but must NOT route to Episodic.
1115        // word-boundary check prevents this false positive.
1116        assert_ne!(
1117            route("meeting in Chicago about the project"),
1118            MemoryRoute::Episodic
1119        );
1120    }
1121
1122    #[test]
1123    fn non_temporal_unchanged() {
1124        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
1125    }
1126
1127    #[test]
1128    fn code_query_unchanged() {
1129        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
1130    }
1131
1132    // ── resolve_temporal_range tests ─────────────────────────────────────────
1133
1134    #[test]
1135    fn resolve_yesterday_range() {
1136        let now = fixed_now(); // 2026-03-14 12:00:00 UTC
1137        let range = resolve_temporal_range("what did we discuss yesterday", now).unwrap();
1138        assert_eq!(range.after.as_deref(), Some("2026-03-13 00:00:00"));
1139        assert_eq!(range.before.as_deref(), Some("2026-03-13 23:59:59"));
1140    }
1141
1142    #[test]
1143    fn resolve_last_week_range() {
1144        let now = fixed_now(); // 2026-03-14 12:00:00 UTC
1145        let range = resolve_temporal_range("remember last week's discussion", now).unwrap();
1146        // 7 days before 2026-03-14 = 2026-03-07
1147        assert!(range.after.as_deref().unwrap().starts_with("2026-03-07"));
1148        assert!(range.before.is_none());
1149    }
1150
1151    #[test]
1152    fn resolve_last_month_range() {
1153        let now = fixed_now();
1154        let range = resolve_temporal_range("find the bug from last month", now).unwrap();
1155        // 30 days before 2026-03-14 = 2026-02-12
1156        assert!(range.after.as_deref().unwrap().starts_with("2026-02-12"));
1157        assert!(range.before.is_none());
1158    }
1159
1160    #[test]
1161    fn resolve_today_range() {
1162        let now = fixed_now();
1163        let range = resolve_temporal_range("what did we do today", now).unwrap();
1164        assert_eq!(range.after.as_deref(), Some("2026-03-14 00:00:00"));
1165        assert!(range.before.is_none());
1166    }
1167
1168    #[test]
1169    fn resolve_this_morning_range() {
1170        let now = fixed_now();
1171        let range = resolve_temporal_range("what did we say this morning", now).unwrap();
1172        assert_eq!(range.after.as_deref(), Some("2026-03-14 00:00:00"));
1173        assert_eq!(range.before.as_deref(), Some("2026-03-14 12:00:00"));
1174    }
1175
1176    #[test]
1177    fn resolve_last_night_range() {
1178        let now = fixed_now();
1179        let range = resolve_temporal_range("last night's conversation", now).unwrap();
1180        assert_eq!(range.after.as_deref(), Some("2026-03-13 18:00:00"));
1181        assert_eq!(range.before.as_deref(), Some("2026-03-14 06:00:00"));
1182    }
1183
1184    #[test]
1185    fn resolve_tonight_range() {
1186        let now = fixed_now();
1187        let range = resolve_temporal_range("remind me tonight what we agreed on", now).unwrap();
1188        assert_eq!(range.after.as_deref(), Some("2026-03-14 18:00:00"));
1189        assert!(range.before.is_none());
1190    }
1191
1192    #[test]
1193    fn resolve_no_temporal_returns_none() {
1194        let now = fixed_now();
1195        assert!(resolve_temporal_range("what is the purpose of semantic memory", now).is_none());
1196    }
1197
1198    #[test]
1199    fn resolve_generic_temporal_returns_none() {
1200        // "when did", "remember when", "last time", "how long ago" — no specific range
1201        let now = fixed_now();
1202        assert!(resolve_temporal_range("when did we discuss this feature", now).is_none());
1203        assert!(resolve_temporal_range("remember when we fixed that bug", now).is_none());
1204    }
1205
1206    // ── strip_temporal_keywords tests ────────────────────────────────────────
1207
1208    #[test]
1209    fn strip_yesterday_from_query() {
1210        let cleaned = strip_temporal_keywords("what did we discuss yesterday about Rust");
1211        assert_eq!(cleaned, "what did we discuss about Rust");
1212    }
1213
1214    #[test]
1215    fn strip_last_week_from_query() {
1216        let cleaned = strip_temporal_keywords("find the config change from last week");
1217        assert_eq!(cleaned, "find the config change from");
1218    }
1219
1220    #[test]
1221    fn strip_does_not_alter_non_temporal() {
1222        let q = "what is the purpose of semantic memory";
1223        assert_eq!(strip_temporal_keywords(q), q);
1224    }
1225
1226    #[test]
1227    fn strip_ago_word_boundary() {
1228        let cleaned = strip_temporal_keywords("we fixed this a day ago in the scheduler");
1229        // "ago" removed, rest preserved
1230        assert!(!cleaned.contains("ago"));
1231        assert!(cleaned.contains("scheduler"));
1232    }
1233
1234    #[test]
1235    fn strip_does_not_touch_chicago() {
1236        let q = "meeting in Chicago about the project";
1237        assert_eq!(strip_temporal_keywords(q), q);
1238    }
1239
1240    #[test]
1241    fn strip_empty_string_returns_empty() {
1242        assert_eq!(strip_temporal_keywords(""), "");
1243    }
1244
1245    #[test]
1246    fn strip_only_temporal_keyword_returns_empty() {
1247        // When the entire query is a temporal keyword, stripping leaves an empty string.
1248        // recall_routed falls back to the original query in this case.
1249        assert_eq!(strip_temporal_keywords("yesterday"), "");
1250    }
1251
1252    #[test]
1253    fn strip_repeated_temporal_keyword_removes_all_occurrences() {
1254        // IMPL-02: all occurrences must be removed, not just the first.
1255        let cleaned = strip_temporal_keywords("yesterday I mentioned yesterday's bug");
1256        assert!(
1257            !cleaned.contains("yesterday"),
1258            "both occurrences must be removed: got '{cleaned}'"
1259        );
1260        assert!(cleaned.contains("mentioned"));
1261    }
1262
1263    // ── route_with_confidence tests ───────────────────────────────────────────
1264
1265    #[test]
1266    fn confidence_multiple_matches_is_less_than_one() {
1267        // Structural code pattern + snake_case + short query fire 3 signals →
1268        // confidence = 1.0 / 3 < 1.0
1269        let d = HeuristicRouter.route_with_confidence("zeph_memory::recall");
1270        assert!(
1271            d.confidence < 1.0,
1272            "ambiguous query should have confidence < 1.0, got {}",
1273            d.confidence
1274        );
1275        assert_eq!(d.route, MemoryRoute::Keyword);
1276    }
1277
1278    #[test]
1279    fn confidence_long_question_with_snake_fires_multiple_signals() {
1280        // Long question with snake_case fires multiple signals → confidence < 1.0
1281        let d = HeuristicRouter
1282            .route_with_confidence("what is the purpose of memory_limit in the config system");
1283        assert!(
1284            d.confidence < 1.0,
1285            "ambiguous query must have confidence < 1.0, got {}",
1286            d.confidence
1287        );
1288    }
1289
1290    #[test]
1291    fn confidence_empty_query_is_nonzero() {
1292        // Empty string: word_count=0 → short path fires (<=3 && !question) → matched=1 → confidence=1.0
1293        let d = HeuristicRouter.route_with_confidence("");
1294        assert!(
1295            d.confidence > 0.0,
1296            "empty query must match short-path signal"
1297        );
1298    }
1299
1300    #[test]
1301    fn routing_decision_route_matches_route_fn() {
1302        // route_with_confidence().route must agree with route()
1303        let queries = [
1304            "qdrant",
1305            "what is the agent loop",
1306            "context window token budget",
1307            "what did we discuss yesterday",
1308        ];
1309        for q in queries {
1310            let decision = HeuristicRouter.route_with_confidence(q);
1311            assert_eq!(
1312                decision.route,
1313                HeuristicRouter.route(q),
1314                "mismatch for query: {q}"
1315            );
1316        }
1317    }
1318}
zeph_memory/router.rs

zeph_memory/
router.rs