zeph_memory/
router.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use chrono::{DateTime, Duration, Utc};
5
6use crate::graph::EdgeType;
7
8/// Classification of which memory backend(s) to query.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum MemoryRoute {
11    /// Full-text search only (`SQLite` FTS5). Fast, good for keyword/exact queries.
12    Keyword,
13    /// Vector search only (Qdrant). Good for semantic/conceptual queries.
14    Semantic,
15    /// Both backends, results merged by reciprocal rank fusion.
16    Hybrid,
17    /// Graph-based retrieval via BFS traversal. Good for relationship queries.
18    /// When the `graph-memory` feature is disabled, callers treat this as `Hybrid`.
19    Graph,
20    /// FTS5 search with a timestamp-range filter. Used for temporal/episodic queries
21    /// ("what did we discuss yesterday", "last week's conversation about Rust").
22    ///
23    /// Known trade-off (MVP): skips vector search entirely for speed. Semantically similar
24    /// but lexically different messages may be missed. Use `Hybrid` route when semantic
25    /// precision matters more than temporal filtering.
26    Episodic,
27}
28
29/// Routing decision with confidence and optional LLM reasoning.
30#[derive(Debug, Clone)]
31pub struct RoutingDecision {
32    pub route: MemoryRoute,
33    /// Confidence in `[0, 1]`. `1.0` = certain, `0.5` = ambiguous.
34    pub confidence: f32,
35    /// Only populated when an LLM classifier was used.
36    pub reasoning: Option<String>,
37}
38
39/// Decides which memory backend(s) to query for a given input.
40pub trait MemoryRouter: Send + Sync {
41    /// Route a query to the appropriate backend(s).
42    fn route(&self, query: &str) -> MemoryRoute;
43
44    /// Route with a confidence signal. Default implementation wraps `route()` with confidence 1.0.
45    ///
46    /// Override this in routers that can express ambiguity (e.g. `HeuristicRouter`)
47    /// so that `HybridRouter` can escalate uncertain decisions to LLM.
48    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
49        RoutingDecision {
50            route: self.route(query),
51            confidence: 1.0,
52            reasoning: None,
53        }
54    }
55}
56
57/// Resolved datetime boundaries for a temporal query.
58///
59/// Both fields use `SQLite` datetime format (`YYYY-MM-DD HH:MM:SS`, UTC).
60/// `None` means "no bound" on that side.
61///
62/// Note: All timestamps are UTC. The `created_at` column in the `messages` table
63/// defaults to `datetime('now')` which is also UTC, so comparisons are consistent.
64/// Users in non-UTC timezones may get slightly unexpected results for "yesterday"
65/// queries (e.g. at 01:00 UTC+5 the user's local yesterday differs from UTC yesterday).
66/// This is an accepted approximation for the heuristic-only MVP.
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct TemporalRange {
69    /// Exclusive lower bound: `created_at > after`.
70    pub after: Option<String>,
71    /// Exclusive upper bound: `created_at < before`.
72    pub before: Option<String>,
73}
74
75/// Temporal patterns that indicate an episodic / time-scoped recall query.
76///
77/// Multi-word patterns are preferred over single-word ones to reduce false positives.
78/// Single-word patterns that can appear inside other words (e.g. "ago" in "Chicago")
79/// must be checked with `contains_word()` to enforce word-boundary semantics.
80///
81/// Omitted on purpose: "before", "after", "since", "during", "earlier", "recently"
82/// — these are too ambiguous in technical contexts ("before the function returns",
83/// "since you asked", "during compilation"). They are not in this list.
84const TEMPORAL_PATTERNS: &[&str] = &[
85    // relative day
86    "yesterday",
87    "today",
88    "this morning",
89    "tonight",
90    "last night",
91    // relative week
92    "last week",
93    "this week",
94    "past week",
95    // relative month
96    "last month",
97    "this month",
98    "past month",
99    // temporal questions
100    "when did",
101    "remember when",
102    "last time",
103    "how long ago",
104    // relative phrases requiring word-boundary check
105    // (checked separately via `contains_word` to avoid matching "a few days ago" substring in longer words)
106    "few days ago",
107    "few hours ago",
108    "earlier today",
109];
110
111/// Single-word temporal tokens that require word-boundary checking.
112/// These are NOT in `TEMPORAL_PATTERNS` to avoid substring false positives.
113const WORD_BOUNDARY_TEMPORAL: &[&str] = &["ago"];
114
115/// MAGMA causal edge markers.
116///
117/// Shared between [`HeuristicRouter`] and [`classify_graph_subgraph`] to prevent
118/// pattern-list drift between the two classifiers (critic suggestion).
119pub(crate) const CAUSAL_MARKERS: &[&str] = &[
120    "why",
121    "because",
122    "caused",
123    "cause",
124    "reason",
125    "result",
126    "led to",
127    "consequence",
128    "trigger",
129    "effect",
130    "blame",
131    "fault",
132];
133
134/// MAGMA temporal edge markers for subgraph classification.
135///
136/// Shared between [`HeuristicRouter`] and [`classify_graph_subgraph`].
137/// Note: these are distinct from `TEMPORAL_PATTERNS` (which drive `Episodic` routing).
138/// `TEMPORAL_MARKERS` detect edges whose *semantics* are temporal (sequencing/ordering),
139/// while `TEMPORAL_PATTERNS` detect queries that ask about *when* events occurred.
140pub(crate) const TEMPORAL_MARKERS: &[&str] = &[
141    "before", "after", "first", "then", "timeline", "sequence", "preceded", "followed", "started",
142    "ended", "during", "prior",
143];
144
145/// MAGMA entity/structural markers.
146pub(crate) const ENTITY_MARKERS: &[&str] = &[
147    "is a",
148    "type of",
149    "kind of",
150    "part of",
151    "instance",
152    "same as",
153    "alias",
154    "subtype",
155    "subclass",
156    "belongs to",
157];
158
159/// Classify a query into the MAGMA edge types to use for subgraph-scoped BFS retrieval.
160///
161/// Pure heuristic, zero latency — no LLM call. Returns a prioritised list of [`EdgeType`]s.
162///
163/// Rules (checked in order):
164/// 1. Causal markers → include `Causal`
165/// 2. Temporal markers → include `Temporal`
166/// 3. Entity/structural markers → include `Entity`
167/// 4. `Semantic` is always included as fallback to guarantee recall >= current untyped BFS.
168///
169/// Multiple markers may match, producing a union of detected types.
170///
171/// # Example
172///
173/// ```
174/// # use zeph_memory::router::classify_graph_subgraph;
175/// # use zeph_memory::EdgeType;
176/// let types = classify_graph_subgraph("why did X happen");
177/// assert!(types.contains(&EdgeType::Causal));
178/// assert!(types.contains(&EdgeType::Semantic));
179/// ```
180#[must_use]
181pub fn classify_graph_subgraph(query: &str) -> Vec<EdgeType> {
182    let lower = query.to_ascii_lowercase();
183    let mut types: Vec<EdgeType> = Vec::new();
184
185    if CAUSAL_MARKERS.iter().any(|m| lower.contains(m)) {
186        types.push(EdgeType::Causal);
187    }
188    if TEMPORAL_MARKERS.iter().any(|m| lower.contains(m)) {
189        types.push(EdgeType::Temporal);
190    }
191    if ENTITY_MARKERS.iter().any(|m| lower.contains(m)) {
192        types.push(EdgeType::Entity);
193    }
194
195    // Semantic is always included as fallback — recall cannot be worse than untyped BFS.
196    if !types.contains(&EdgeType::Semantic) {
197        types.push(EdgeType::Semantic);
198    }
199
200    types
201}
202
203/// Heuristic-based memory router.
204///
205/// Decision logic (in priority order):
206/// 1. Temporal patterns → `Episodic`
207/// 2. Relationship patterns → `Graph`
208/// 3. Code-like patterns (paths, `::`) without question word → `Keyword`
209/// 4. Long NL query or question word → `Semantic`
210/// 5. Short non-question query → `Keyword`
211/// 6. Default → `Hybrid`
212pub struct HeuristicRouter;
213
214const QUESTION_WORDS: &[&str] = &[
215    "what", "how", "why", "when", "where", "who", "which", "explain", "describe",
216];
217
218/// Simple substrings that signal a relationship query (checked via `str::contains`).
219/// Only used when the `graph-memory` feature is enabled.
220const RELATIONSHIP_PATTERNS: &[&str] = &[
221    "related to",
222    "relates to",
223    "connection between",
224    "relationship",
225    "opinion on",
226    "thinks about",
227    "preference for",
228    "history of",
229    "know about",
230];
231
232/// Returns true if `text` contains `word` as a whole word (word-boundary semantics).
233///
234/// A "word boundary" here means the character before and after `word` (if present)
235/// is not an ASCII alphanumeric character or underscore.
236fn contains_word(text: &str, word: &str) -> bool {
237    let bytes = text.as_bytes();
238    let wbytes = word.as_bytes();
239    let wlen = wbytes.len();
240    if wlen > bytes.len() {
241        return false;
242    }
243    for start in 0..=(bytes.len() - wlen) {
244        if bytes[start..start + wlen].eq_ignore_ascii_case(wbytes) {
245            let before_ok =
246                start == 0 || !bytes[start - 1].is_ascii_alphanumeric() && bytes[start - 1] != b'_';
247            let after_ok = start + wlen == bytes.len()
248                || !bytes[start + wlen].is_ascii_alphanumeric() && bytes[start + wlen] != b'_';
249            if before_ok && after_ok {
250                return true;
251            }
252        }
253    }
254    false
255}
256
257/// Returns true if the lowercased query contains any temporal cue that indicates
258/// an episodic / time-scoped recall request.
259fn has_temporal_cue(lower: &str) -> bool {
260    if TEMPORAL_PATTERNS.iter().any(|p| lower.contains(p)) {
261        return true;
262    }
263    WORD_BOUNDARY_TEMPORAL
264        .iter()
265        .any(|w| contains_word(lower, w))
266}
267
268/// Temporal patterns sorted longest-first for stripping. Initialized once via `LazyLock`
269/// to avoid allocating and sorting on every call to `strip_temporal_keywords`.
270static SORTED_TEMPORAL_PATTERNS: std::sync::LazyLock<Vec<&'static str>> =
271    std::sync::LazyLock::new(|| {
272        let mut v: Vec<&str> = TEMPORAL_PATTERNS.to_vec();
273        v.sort_by_key(|p| std::cmp::Reverse(p.len()));
274        v
275    });
276
277/// Strip matched temporal keywords from a query string before passing to FTS5.
278///
279/// Temporal keywords are routing metadata, not search terms. Passing them to FTS5
280/// causes BM25 score distortion — messages that literally mention "yesterday" get
281/// boosted regardless of actual content relevance.
282///
283/// All occurrences of each pattern are removed (not just the first), preventing
284/// score distortion from repeated temporal tokens in edge cases like
285/// "yesterday I mentioned yesterday's bug".
286///
287/// # Example
288/// ```
289/// # use zeph_memory::router::strip_temporal_keywords;
290/// let cleaned = strip_temporal_keywords("what did we discuss yesterday about Rust");
291/// assert_eq!(cleaned, "what did we discuss about Rust");
292/// ```
293#[must_use]
294pub fn strip_temporal_keywords(query: &str) -> String {
295    // Lowercase once for pattern matching; track removal positions in the original string.
296    // We operate on the lowercased copy for matching, then remove spans from `result`
297    // by rebuilding via byte indices (both strings have identical byte lengths because
298    // to_ascii_lowercase is a 1:1 byte mapping for ASCII).
299    let lower = query.to_ascii_lowercase();
300    // Collect all (start, end) spans to remove, then rebuild the string in one pass.
301    let mut remove: Vec<(usize, usize)> = Vec::new();
302
303    for pattern in SORTED_TEMPORAL_PATTERNS.iter() {
304        let plen = pattern.len();
305        let mut search_from = 0;
306        while let Some(pos) = lower[search_from..].find(pattern) {
307            let abs = search_from + pos;
308            remove.push((abs, abs + plen));
309            search_from = abs + plen;
310        }
311    }
312
313    // Strip word-boundary tokens (single-word, e.g. "ago") — all occurrences.
314    for word in WORD_BOUNDARY_TEMPORAL {
315        let wlen = word.len();
316        let lbytes = lower.as_bytes();
317        let mut i = 0;
318        while i + wlen <= lower.len() {
319            if lower[i..].starts_with(*word) {
320                let before_ok =
321                    i == 0 || !lbytes[i - 1].is_ascii_alphanumeric() && lbytes[i - 1] != b'_';
322                let after_ok = i + wlen == lower.len()
323                    || !lbytes[i + wlen].is_ascii_alphanumeric() && lbytes[i + wlen] != b'_';
324                if before_ok && after_ok {
325                    remove.push((i, i + wlen));
326                    i += wlen;
327                    continue;
328                }
329            }
330            i += 1;
331        }
332    }
333
334    if remove.is_empty() {
335        // Fast path: no patterns found — return the original string.
336        return query.split_whitespace().collect::<Vec<_>>().join(" ");
337    }
338
339    // Merge overlapping/adjacent spans and remove them from the original string.
340    remove.sort_unstable_by_key(|r| r.0);
341    let bytes = query.as_bytes();
342    let mut result = Vec::with_capacity(query.len());
343    let mut cursor = 0;
344    for (start, end) in remove {
345        if start > cursor {
346            result.extend_from_slice(&bytes[cursor..start]);
347        }
348        cursor = cursor.max(end);
349    }
350    if cursor < bytes.len() {
351        result.extend_from_slice(&bytes[cursor..]);
352    }
353
354    // Collapse multiple spaces and trim.
355    // SAFETY: We only removed ASCII byte spans; remaining bytes are still valid UTF-8.
356    let s = String::from_utf8(result).unwrap_or_default();
357    s.split_whitespace()
358        .filter(|t| !t.is_empty())
359        .collect::<Vec<_>>()
360        .join(" ")
361}
362
363/// Resolve temporal keywords in `query` to a `(after, before)` datetime boundary pair.
364///
365/// Returns `None` when no specific range can be computed (the episodic path then falls
366/// back to FTS5 without a time filter, relying on temporal decay for recency boosting).
367///
368/// The `now` parameter is injectable for deterministic unit testing. Production callers
369/// should pass `chrono::Utc::now()`.
370///
371/// All datetime strings are in `SQLite` format: `YYYY-MM-DD HH:MM:SS` (UTC).
372#[must_use]
373pub fn resolve_temporal_range(query: &str, now: DateTime<Utc>) -> Option<TemporalRange> {
374    let lower = query.to_ascii_lowercase();
375
376    // yesterday: the full calendar day before today (UTC)
377    if lower.contains("yesterday") {
378        let yesterday = now.date_naive() - Duration::days(1);
379        return Some(TemporalRange {
380            after: Some(format!("{yesterday} 00:00:00")),
381            before: Some(format!("{yesterday} 23:59:59")),
382        });
383    }
384
385    // last night: 18:00 yesterday to 06:00 today (UTC approximation)
386    if lower.contains("last night") {
387        let yesterday = now.date_naive() - Duration::days(1);
388        let today = now.date_naive();
389        return Some(TemporalRange {
390            after: Some(format!("{yesterday} 18:00:00")),
391            before: Some(format!("{today} 06:00:00")),
392        });
393    }
394
395    // tonight: 18:00 today onwards
396    if lower.contains("tonight") {
397        let today = now.date_naive();
398        return Some(TemporalRange {
399            after: Some(format!("{today} 18:00:00")),
400            before: None,
401        });
402    }
403
404    // this morning: midnight to noon today
405    if lower.contains("this morning") {
406        let today = now.date_naive();
407        return Some(TemporalRange {
408            after: Some(format!("{today} 00:00:00")),
409            before: Some(format!("{today} 12:00:00")),
410        });
411    }
412
413    // today / earlier today: midnight to now.
414    // Note: "earlier today" always contains "today", so a separate branch would be
415    // dead code — the "today" check subsumes it.
416    if lower.contains("today") {
417        let today = now.date_naive();
418        return Some(TemporalRange {
419            after: Some(format!("{today} 00:00:00")),
420            before: None,
421        });
422    }
423
424    // last week / past week / this week: 7-day lookback
425    if lower.contains("last week") || lower.contains("past week") || lower.contains("this week") {
426        let start = now - Duration::days(7);
427        return Some(TemporalRange {
428            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
429            before: None,
430        });
431    }
432
433    // last month / past month / this month: 30-day lookback (approximate)
434    if lower.contains("last month") || lower.contains("past month") || lower.contains("this month")
435    {
436        let start = now - Duration::days(30);
437        return Some(TemporalRange {
438            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
439            before: None,
440        });
441    }
442
443    // "few days ago" / "few hours ago": 3-day lookback
444    if lower.contains("few days ago") {
445        let start = now - Duration::days(3);
446        return Some(TemporalRange {
447            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
448            before: None,
449        });
450    }
451    if lower.contains("few hours ago") {
452        let start = now - Duration::hours(6);
453        return Some(TemporalRange {
454            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
455            before: None,
456        });
457    }
458
459    // "ago" (word-boundary): generic recent lookback (24h)
460    if contains_word(&lower, "ago") {
461        let start = now - Duration::hours(24);
462        return Some(TemporalRange {
463            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
464            before: None,
465        });
466    }
467
468    // Generic temporal cues without a specific range ("when did", "remember when",
469    // "last time", "how long ago") — fall back to FTS5-only with temporal decay.
470    None
471}
472
473fn starts_with_question(words: &[&str]) -> bool {
474    words
475        .first()
476        .is_some_and(|w| QUESTION_WORDS.iter().any(|qw| w.eq_ignore_ascii_case(qw)))
477}
478
479/// Returns true if `word` is a pure `snake_case` identifier (all ASCII, lowercase letters,
480/// digits and underscores, contains at least one underscore, not purely numeric).
481fn is_pure_snake_case(word: &str) -> bool {
482    if word.is_empty() {
483        return false;
484    }
485    let has_underscore = word.contains('_');
486    if !has_underscore {
487        return false;
488    }
489    word.chars()
490        .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
491        && !word.chars().all(|c| c.is_ascii_digit() || c == '_')
492}
493
494impl MemoryRouter for HeuristicRouter {
495    /// Returns a confidence signal based on pattern match count (W2.1 fix: gradual scale).
496    ///
497    /// - Exactly one route pattern matches → confidence `1.0` (clear signal)
498    /// - Zero patterns match → confidence `0.0` (pure default fallback)
499    /// - More than one pattern matches → confidence `1.0 / matched_count` (ambiguous, decreasing)
500    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
501        let lower = query.to_ascii_lowercase();
502        let mut matched: u32 = 0;
503        if has_temporal_cue(&lower) {
504            matched += 1;
505        }
506        if RELATIONSHIP_PATTERNS.iter().any(|p| lower.contains(p)) {
507            matched += 1;
508        }
509        let words: Vec<&str> = query.split_whitespace().collect();
510        let word_count = words.len();
511        let has_structural = query.contains('/') || query.contains("::");
512        let question = starts_with_question(&words);
513        let has_snake = words.iter().any(|w| is_pure_snake_case(w));
514        if has_structural && !question {
515            matched += 1;
516        }
517        if question || word_count >= 6 {
518            matched += 1;
519        }
520        if word_count <= 3 && !question {
521            matched += 1;
522        }
523        if has_snake {
524            matched += 1;
525        }
526
527        #[allow(clippy::cast_precision_loss)]
528        let confidence = match matched {
529            0 => 0.0,
530            1 => 1.0,
531            n => 1.0 / n as f32,
532        };
533
534        RoutingDecision {
535            route: self.route(query),
536            confidence,
537            reasoning: None,
538        }
539    }
540
541    fn route(&self, query: &str) -> MemoryRoute {
542        let lower = query.to_ascii_lowercase();
543
544        // 1. Temporal queries take highest priority — must run before relationship check
545        //    to prevent "history of changes last week" from routing to Graph instead of Episodic.
546        if has_temporal_cue(&lower) {
547            return MemoryRoute::Episodic;
548        }
549
550        // 2. Relationship queries go to graph retrieval (feature-gated at call site)
551        let has_relationship = RELATIONSHIP_PATTERNS.iter().any(|p| lower.contains(p));
552        if has_relationship {
553            return MemoryRoute::Graph;
554        }
555
556        let words: Vec<&str> = query.split_whitespace().collect();
557        let word_count = words.len();
558
559        // Code-like patterns that unambiguously indicate keyword search:
560        // file paths (contain '/'), Rust paths (contain '::')
561        let has_structural_code_pattern = query.contains('/') || query.contains("::");
562
563        // Pure snake_case identifiers (e.g. "memory_limit", "error_handling")
564        // but only if the query does NOT start with a question word
565        let has_snake_case = words.iter().any(|w| is_pure_snake_case(w));
566        let question = starts_with_question(&words);
567
568        if has_structural_code_pattern && !question {
569            return MemoryRoute::Keyword;
570        }
571
572        // Long NL queries → semantic, regardless of snake_case tokens
573        if question || word_count >= 6 {
574            return MemoryRoute::Semantic;
575        }
576
577        // Short queries without question words → keyword
578        if word_count <= 3 && !question {
579            return MemoryRoute::Keyword;
580        }
581
582        // Short code-like patterns → keyword
583        if has_snake_case {
584            return MemoryRoute::Keyword;
585        }
586
587        // Default
588        MemoryRoute::Hybrid
589    }
590}
591
592/// LLM-based memory router.
593///
594/// Sends the query to the configured provider and parses a JSON response:
595/// `{"route": "keyword|semantic|hybrid|graph|episodic", "confidence": 0.0-1.0}`.
596///
597/// On LLM failure, falls back to `HeuristicRouter`.
598pub struct LlmRouter {
599    provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
600    fallback_route: MemoryRoute,
601}
602
603impl LlmRouter {
604    #[must_use]
605    pub fn new(
606        provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
607        fallback_route: MemoryRoute,
608    ) -> Self {
609        Self {
610            provider,
611            fallback_route,
612        }
613    }
614
615    async fn classify_async(&self, query: &str) -> RoutingDecision {
616        use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
617
618        let system = "You are a memory store routing classifier. \
619            Given a user query, decide which memory backend is most appropriate. \
620            Respond with ONLY a JSON object: \
621            {\"route\": \"<route>\", \"confidence\": <0.0-1.0>, \"reasoning\": \"<brief>\"} \
622            where <route> is one of: keyword, semantic, hybrid, graph, episodic. \
623            Use 'keyword' for exact/code lookups, 'semantic' for conceptual questions, \
624            'hybrid' for mixed, 'graph' for relationship queries, 'episodic' for time-scoped queries.";
625
626        // Wrap query in delimiters to prevent injection (W2.2 fix).
627        let user = format!(
628            "<query>{}</query>",
629            query.chars().take(500).collect::<String>()
630        );
631
632        let messages = vec![
633            Message {
634                role: Role::System,
635                content: system.to_owned(),
636                parts: vec![],
637                metadata: MessageMetadata::default(),
638            },
639            Message {
640                role: Role::User,
641                content: user,
642                parts: vec![],
643                metadata: MessageMetadata::default(),
644            },
645        ];
646
647        let result = match tokio::time::timeout(
648            std::time::Duration::from_secs(5),
649            self.provider.chat(&messages),
650        )
651        .await
652        {
653            Ok(Ok(r)) => r,
654            Ok(Err(e)) => {
655                tracing::debug!(error = %e, "LlmRouter: LLM call failed, falling back to heuristic");
656                return Self::heuristic_fallback(query);
657            }
658            Err(_) => {
659                tracing::debug!("LlmRouter: LLM timed out, falling back to heuristic");
660                return Self::heuristic_fallback(query);
661            }
662        };
663
664        self.parse_llm_response(&result, query)
665    }
666
667    fn parse_llm_response(&self, raw: &str, query: &str) -> RoutingDecision {
668        // Extract JSON object from the response (may have surrounding text).
669        let json_str = raw
670            .find('{')
671            .and_then(|start| raw[start..].rfind('}').map(|end| &raw[start..=start + end]))
672            .unwrap_or("");
673
674        if let Ok(v) = serde_json::from_str::<serde_json::Value>(json_str) {
675            let route_str = v.get("route").and_then(|r| r.as_str()).unwrap_or("hybrid");
676            #[allow(clippy::cast_possible_truncation)]
677            let confidence = v
678                .get("confidence")
679                .and_then(serde_json::Value::as_f64)
680                .map_or(0.5, |c| c.clamp(0.0, 1.0) as f32);
681            let reasoning = v
682                .get("reasoning")
683                .and_then(|r| r.as_str())
684                .map(str::to_owned);
685
686            let route = parse_route_str(route_str, self.fallback_route);
687
688            tracing::debug!(
689                query = &query[..query.len().min(60)],
690                ?route,
691                confidence,
692                "LlmRouter: classified"
693            );
694
695            return RoutingDecision {
696                route,
697                confidence,
698                reasoning,
699            };
700        }
701
702        tracing::debug!("LlmRouter: failed to parse JSON response, falling back to heuristic");
703        Self::heuristic_fallback(query)
704    }
705
706    fn heuristic_fallback(query: &str) -> RoutingDecision {
707        HeuristicRouter.route_with_confidence(query)
708    }
709}
710
711fn parse_route_str(s: &str, fallback: MemoryRoute) -> MemoryRoute {
712    match s {
713        "keyword" => MemoryRoute::Keyword,
714        "semantic" => MemoryRoute::Semantic,
715        "hybrid" => MemoryRoute::Hybrid,
716        "graph" => MemoryRoute::Graph,
717        "episodic" => MemoryRoute::Episodic,
718        _ => fallback,
719    }
720}
721
722impl MemoryRouter for LlmRouter {
723    fn route(&self, query: &str) -> MemoryRoute {
724        // Sync path: LLM is not available without an async executor.
725        // Falls back to heuristic — use route_async() for LLM-based classification.
726        HeuristicRouter.route(query)
727    }
728
729    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
730        // LlmRouter is designed for use in async contexts via classify_async.
731        // When called synchronously (e.g. in tests), fall back to heuristic.
732        HeuristicRouter.route_with_confidence(query)
733    }
734}
735
736/// Async extension for LLM-capable routers.
737pub trait AsyncMemoryRouter: MemoryRouter {
738    fn route_async<'a>(
739        &'a self,
740        query: &'a str,
741    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>>;
742}
743
744impl AsyncMemoryRouter for LlmRouter {
745    fn route_async<'a>(
746        &'a self,
747        query: &'a str,
748    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
749        Box::pin(self.classify_async(query))
750    }
751}
752
753/// Hybrid router: heuristic-first, escalates to LLM when confidence is low.
754///
755/// The `HybridRouter` runs `HeuristicRouter` first. If the heuristic confidence
756/// is below `confidence_threshold`, it escalates to the LLM router.
757/// LLM failures always fall back to the heuristic result.
758pub struct HybridRouter {
759    llm: LlmRouter,
760    confidence_threshold: f32,
761}
762
763impl HybridRouter {
764    #[must_use]
765    pub fn new(
766        provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
767        fallback_route: MemoryRoute,
768        confidence_threshold: f32,
769    ) -> Self {
770        Self {
771            llm: LlmRouter::new(provider, fallback_route),
772            confidence_threshold,
773        }
774    }
775
776    pub async fn route_async(&self, query: &str) -> RoutingDecision {
777        let heuristic = HeuristicRouter.route_with_confidence(query);
778        if heuristic.confidence >= self.confidence_threshold {
779            tracing::debug!(
780                query = &query[..query.len().min(60)],
781                confidence = heuristic.confidence,
782                route = ?heuristic.route,
783                "HybridRouter: heuristic sufficient, skipping LLM"
784            );
785            return heuristic;
786        }
787
788        tracing::debug!(
789            query = &query[..query.len().min(60)],
790            confidence = heuristic.confidence,
791            threshold = self.confidence_threshold,
792            "HybridRouter: low confidence, escalating to LLM"
793        );
794
795        let llm_result = self.llm.classify_async(query).await;
796
797        // LLM failure path: classify_async returns a heuristic fallback on error.
798        // Always log the final decision.
799        tracing::debug!(
800            route = ?llm_result.route,
801            confidence = llm_result.confidence,
802            "HybridRouter: final route after LLM escalation"
803        );
804        llm_result
805    }
806}
807
808impl MemoryRouter for HybridRouter {
809    fn route(&self, query: &str) -> MemoryRoute {
810        HeuristicRouter.route(query)
811    }
812
813    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
814        // Synchronous path: can't call async LLM, use heuristic only.
815        HeuristicRouter.route_with_confidence(query)
816    }
817}
818
819#[cfg(test)]
820mod tests {
821    use chrono::TimeZone as _;
822
823    use super::*;
824
825    fn route(q: &str) -> MemoryRoute {
826        HeuristicRouter.route(q)
827    }
828
829    fn fixed_now() -> DateTime<Utc> {
830        // 2026-03-14 12:00:00 UTC — fixed reference point for all temporal tests
831        Utc.with_ymd_and_hms(2026, 3, 14, 12, 0, 0).unwrap()
832    }
833
834    #[test]
835    fn rust_path_routes_keyword() {
836        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
837    }
838
839    #[test]
840    fn file_path_routes_keyword() {
841        assert_eq!(
842            route("crates/zeph-core/src/agent/mod.rs"),
843            MemoryRoute::Keyword
844        );
845    }
846
847    #[test]
848    fn pure_snake_case_routes_keyword() {
849        assert_eq!(route("memory_limit"), MemoryRoute::Keyword);
850        assert_eq!(route("error_handling"), MemoryRoute::Keyword);
851    }
852
853    #[test]
854    fn question_with_snake_case_routes_semantic() {
855        // "what is the memory_limit setting" — question word overrides snake_case heuristic
856        assert_eq!(
857            route("what is the memory_limit setting"),
858            MemoryRoute::Semantic
859        );
860        assert_eq!(route("how does error_handling work"), MemoryRoute::Semantic);
861    }
862
863    #[test]
864    fn short_query_routes_keyword() {
865        assert_eq!(route("context compaction"), MemoryRoute::Keyword);
866        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
867    }
868
869    #[test]
870    fn question_routes_semantic() {
871        assert_eq!(
872            route("what is the purpose of semantic memory"),
873            MemoryRoute::Semantic
874        );
875        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
876        assert_eq!(route("why does compaction fail"), MemoryRoute::Semantic);
877        assert_eq!(route("explain context compression"), MemoryRoute::Semantic);
878    }
879
880    #[test]
881    fn long_natural_query_routes_semantic() {
882        assert_eq!(
883            route("the agent keeps running out of context during long conversations"),
884            MemoryRoute::Semantic
885        );
886    }
887
888    #[test]
889    fn medium_non_question_routes_hybrid() {
890        // 4-5 words, no question word, no code pattern
891        assert_eq!(route("context window token budget"), MemoryRoute::Hybrid);
892    }
893
894    #[test]
895    fn empty_query_routes_keyword() {
896        // 0 words, no question → keyword (short path)
897        assert_eq!(route(""), MemoryRoute::Keyword);
898    }
899
900    #[test]
901    fn question_word_only_routes_semantic() {
902        // single question word → word_count = 1, but starts_with_question = true
903        // short query with question: the question check happens first in semantic branch
904        // Actually with word_count=1 and question=true: short path `<= 3 && !question` is false,
905        // then `question || word_count >= 6` is true → Semantic
906        assert_eq!(route("what"), MemoryRoute::Semantic);
907    }
908
909    #[test]
910    fn camel_case_does_not_route_keyword_without_pattern() {
911        // CamelCase words without :: or / — 4-word query without question word → Hybrid
912        // (4 words: no question, no snake_case, no structural code pattern → Hybrid)
913        assert_eq!(
914            route("SemanticMemory configuration and options"),
915            MemoryRoute::Hybrid
916        );
917    }
918
919    #[test]
920    fn relationship_query_routes_graph() {
921        assert_eq!(
922            route("what is user's opinion on neovim"),
923            MemoryRoute::Graph
924        );
925        assert_eq!(
926            route("show the relationship between Alice and Bob"),
927            MemoryRoute::Graph
928        );
929    }
930
931    #[test]
932    fn relationship_query_related_to_routes_graph() {
933        assert_eq!(
934            route("how is Rust related to this project"),
935            MemoryRoute::Graph
936        );
937        assert_eq!(
938            route("how does this relates to the config"),
939            MemoryRoute::Graph
940        );
941    }
942
943    #[test]
944    fn relationship_know_about_routes_graph() {
945        assert_eq!(route("what do I know about neovim"), MemoryRoute::Graph);
946    }
947
948    #[test]
949    fn translate_does_not_route_graph() {
950        // "translate" contains "relate" substring but is not in RELATIONSHIP_PATTERNS
951        // (we removed bare "relate", keeping only "related to" and "relates to")
952        assert_ne!(route("translate this code to Python"), MemoryRoute::Graph);
953    }
954
955    #[test]
956    fn non_relationship_stays_semantic() {
957        assert_eq!(
958            route("find similar code patterns in the codebase"),
959            MemoryRoute::Semantic
960        );
961    }
962
963    #[test]
964    fn short_keyword_unchanged() {
965        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
966    }
967
968    // Regression tests for #1661: long NL queries with snake_case must go to Semantic
969    #[test]
970    fn long_nl_with_snake_case_routes_semantic() {
971        assert_eq!(
972            route("Use memory_search to find information about Rust ownership"),
973            MemoryRoute::Semantic
974        );
975    }
976
977    #[test]
978    fn short_snake_case_only_routes_keyword() {
979        assert_eq!(route("memory_search"), MemoryRoute::Keyword);
980    }
981
982    #[test]
983    fn question_with_snake_case_short_routes_semantic() {
984        assert_eq!(
985            route("What does memory_search return?"),
986            MemoryRoute::Semantic
987        );
988    }
989
990    // ── Temporal routing tests ────────────────────────────────────────────────
991
992    #[test]
993    fn temporal_yesterday_routes_episodic() {
994        assert_eq!(
995            route("what did we discuss yesterday"),
996            MemoryRoute::Episodic
997        );
998    }
999
1000    #[test]
1001    fn temporal_last_week_routes_episodic() {
1002        assert_eq!(
1003            route("remember what happened last week"),
1004            MemoryRoute::Episodic
1005        );
1006    }
1007
1008    #[test]
1009    fn temporal_when_did_routes_episodic() {
1010        assert_eq!(
1011            route("when did we last talk about Qdrant"),
1012            MemoryRoute::Episodic
1013        );
1014    }
1015
1016    #[test]
1017    fn temporal_last_time_routes_episodic() {
1018        assert_eq!(
1019            route("last time we discussed the scheduler"),
1020            MemoryRoute::Episodic
1021        );
1022    }
1023
1024    #[test]
1025    fn temporal_today_routes_episodic() {
1026        assert_eq!(
1027            route("what did I mention today about testing"),
1028            MemoryRoute::Episodic
1029        );
1030    }
1031
1032    #[test]
1033    fn temporal_this_morning_routes_episodic() {
1034        assert_eq!(route("what did we say this morning"), MemoryRoute::Episodic);
1035    }
1036
1037    #[test]
1038    fn temporal_last_month_routes_episodic() {
1039        assert_eq!(
1040            route("find the config change from last month"),
1041            MemoryRoute::Episodic
1042        );
1043    }
1044
1045    #[test]
1046    fn temporal_history_collision_routes_episodic() {
1047        // CRIT-01: "history of" is a relationship pattern, but temporal wins when both match.
1048        // Temporal check is first — "last week" causes Episodic, not Graph.
1049        assert_eq!(route("history of changes last week"), MemoryRoute::Episodic);
1050    }
1051
1052    #[test]
1053    fn temporal_ago_word_boundary_routes_episodic() {
1054        assert_eq!(route("we fixed this a day ago"), MemoryRoute::Episodic);
1055    }
1056
1057    #[test]
1058    fn ago_in_chicago_no_false_positive() {
1059        // MED-01: "Chicago" contains "ago" but must NOT route to Episodic.
1060        // word-boundary check prevents this false positive.
1061        assert_ne!(
1062            route("meeting in Chicago about the project"),
1063            MemoryRoute::Episodic
1064        );
1065    }
1066
1067    #[test]
1068    fn non_temporal_unchanged() {
1069        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
1070    }
1071
1072    #[test]
1073    fn code_query_unchanged() {
1074        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
1075    }
1076
1077    // ── resolve_temporal_range tests ─────────────────────────────────────────
1078
1079    #[test]
1080    fn resolve_yesterday_range() {
1081        let now = fixed_now(); // 2026-03-14 12:00:00 UTC
1082        let range = resolve_temporal_range("what did we discuss yesterday", now).unwrap();
1083        assert_eq!(range.after.as_deref(), Some("2026-03-13 00:00:00"));
1084        assert_eq!(range.before.as_deref(), Some("2026-03-13 23:59:59"));
1085    }
1086
1087    #[test]
1088    fn resolve_last_week_range() {
1089        let now = fixed_now(); // 2026-03-14 12:00:00 UTC
1090        let range = resolve_temporal_range("remember last week's discussion", now).unwrap();
1091        // 7 days before 2026-03-14 = 2026-03-07
1092        assert!(range.after.as_deref().unwrap().starts_with("2026-03-07"));
1093        assert!(range.before.is_none());
1094    }
1095
1096    #[test]
1097    fn resolve_last_month_range() {
1098        let now = fixed_now();
1099        let range = resolve_temporal_range("find the bug from last month", now).unwrap();
1100        // 30 days before 2026-03-14 = 2026-02-12
1101        assert!(range.after.as_deref().unwrap().starts_with("2026-02-12"));
1102        assert!(range.before.is_none());
1103    }
1104
1105    #[test]
1106    fn resolve_today_range() {
1107        let now = fixed_now();
1108        let range = resolve_temporal_range("what did we do today", now).unwrap();
1109        assert_eq!(range.after.as_deref(), Some("2026-03-14 00:00:00"));
1110        assert!(range.before.is_none());
1111    }
1112
1113    #[test]
1114    fn resolve_this_morning_range() {
1115        let now = fixed_now();
1116        let range = resolve_temporal_range("what did we say this morning", now).unwrap();
1117        assert_eq!(range.after.as_deref(), Some("2026-03-14 00:00:00"));
1118        assert_eq!(range.before.as_deref(), Some("2026-03-14 12:00:00"));
1119    }
1120
1121    #[test]
1122    fn resolve_last_night_range() {
1123        let now = fixed_now();
1124        let range = resolve_temporal_range("last night's conversation", now).unwrap();
1125        assert_eq!(range.after.as_deref(), Some("2026-03-13 18:00:00"));
1126        assert_eq!(range.before.as_deref(), Some("2026-03-14 06:00:00"));
1127    }
1128
1129    #[test]
1130    fn resolve_tonight_range() {
1131        let now = fixed_now();
1132        let range = resolve_temporal_range("remind me tonight what we agreed on", now).unwrap();
1133        assert_eq!(range.after.as_deref(), Some("2026-03-14 18:00:00"));
1134        assert!(range.before.is_none());
1135    }
1136
1137    #[test]
1138    fn resolve_no_temporal_returns_none() {
1139        let now = fixed_now();
1140        assert!(resolve_temporal_range("what is the purpose of semantic memory", now).is_none());
1141    }
1142
1143    #[test]
1144    fn resolve_generic_temporal_returns_none() {
1145        // "when did", "remember when", "last time", "how long ago" — no specific range
1146        let now = fixed_now();
1147        assert!(resolve_temporal_range("when did we discuss this feature", now).is_none());
1148        assert!(resolve_temporal_range("remember when we fixed that bug", now).is_none());
1149    }
1150
1151    // ── strip_temporal_keywords tests ────────────────────────────────────────
1152
1153    #[test]
1154    fn strip_yesterday_from_query() {
1155        let cleaned = strip_temporal_keywords("what did we discuss yesterday about Rust");
1156        assert_eq!(cleaned, "what did we discuss about Rust");
1157    }
1158
1159    #[test]
1160    fn strip_last_week_from_query() {
1161        let cleaned = strip_temporal_keywords("find the config change from last week");
1162        assert_eq!(cleaned, "find the config change from");
1163    }
1164
1165    #[test]
1166    fn strip_does_not_alter_non_temporal() {
1167        let q = "what is the purpose of semantic memory";
1168        assert_eq!(strip_temporal_keywords(q), q);
1169    }
1170
1171    #[test]
1172    fn strip_ago_word_boundary() {
1173        let cleaned = strip_temporal_keywords("we fixed this a day ago in the scheduler");
1174        // "ago" removed, rest preserved
1175        assert!(!cleaned.contains("ago"));
1176        assert!(cleaned.contains("scheduler"));
1177    }
1178
1179    #[test]
1180    fn strip_does_not_touch_chicago() {
1181        let q = "meeting in Chicago about the project";
1182        assert_eq!(strip_temporal_keywords(q), q);
1183    }
1184
1185    #[test]
1186    fn strip_empty_string_returns_empty() {
1187        assert_eq!(strip_temporal_keywords(""), "");
1188    }
1189
1190    #[test]
1191    fn strip_only_temporal_keyword_returns_empty() {
1192        // When the entire query is a temporal keyword, stripping leaves an empty string.
1193        // recall_routed falls back to the original query in this case.
1194        assert_eq!(strip_temporal_keywords("yesterday"), "");
1195    }
1196
1197    #[test]
1198    fn strip_repeated_temporal_keyword_removes_all_occurrences() {
1199        // IMPL-02: all occurrences must be removed, not just the first.
1200        let cleaned = strip_temporal_keywords("yesterday I mentioned yesterday's bug");
1201        assert!(
1202            !cleaned.contains("yesterday"),
1203            "both occurrences must be removed: got '{cleaned}'"
1204        );
1205        assert!(cleaned.contains("mentioned"));
1206    }
1207
1208    // ── route_with_confidence tests ───────────────────────────────────────────
1209
1210    #[test]
1211    fn confidence_multiple_matches_is_less_than_one() {
1212        // Structural code pattern + snake_case + short query fire 3 signals →
1213        // confidence = 1.0 / 3 < 1.0
1214        let d = HeuristicRouter.route_with_confidence("zeph_memory::recall");
1215        assert!(
1216            d.confidence < 1.0,
1217            "ambiguous query should have confidence < 1.0, got {}",
1218            d.confidence
1219        );
1220        assert_eq!(d.route, MemoryRoute::Keyword);
1221    }
1222
1223    #[test]
1224    fn confidence_long_question_with_snake_fires_multiple_signals() {
1225        // Long question with snake_case fires multiple signals → confidence < 1.0
1226        let d = HeuristicRouter
1227            .route_with_confidence("what is the purpose of memory_limit in the config system");
1228        assert!(
1229            d.confidence < 1.0,
1230            "ambiguous query must have confidence < 1.0, got {}",
1231            d.confidence
1232        );
1233    }
1234
1235    #[test]
1236    fn confidence_empty_query_is_nonzero() {
1237        // Empty string: word_count=0 → short path fires (<=3 && !question) → matched=1 → confidence=1.0
1238        let d = HeuristicRouter.route_with_confidence("");
1239        assert!(
1240            d.confidence > 0.0,
1241            "empty query must match short-path signal"
1242        );
1243    }
1244
1245    #[test]
1246    fn routing_decision_route_matches_route_fn() {
1247        // route_with_confidence().route must agree with route()
1248        let queries = [
1249            "qdrant",
1250            "what is the agent loop",
1251            "context window token budget",
1252            "what did we discuss yesterday",
1253        ];
1254        for q in queries {
1255            let decision = HeuristicRouter.route_with_confidence(q);
1256            assert_eq!(
1257                decision.route,
1258                HeuristicRouter.route(q),
1259                "mismatch for query: {q}"
1260            );
1261        }
1262    }
1263}
zeph_memory/router.rs

zeph_memory/
router.rs