Skip to main content

zeph_memory/
router.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Memory routing — classify recall queries and dispatch to the right backend.
5//!
6//! The [`MemoryRouter`] trait is implemented by:
7//!
8//! | Type | Strategy |
9//! |------|----------|
10//! | [`HeuristicRouter`] | Fast regex/keyword pattern matching. No LLM call. |
11//! | [`LlmRouter`] | Uses an LLM classifier for high-accuracy routing. |
12//! | [`HybridRouter`] | Runs [`HeuristicRouter`] first; escalates to [`LlmRouter`] when confidence is low. |
13//! | [`AsyncMemoryRouter`] | Async wrapper over any `MemoryRouter` for use in async contexts. |
14//!
15//! # Routing pipeline
16//!
17//! 1. `HeuristicRouter` classifies the query in < 1 ms using temporal-keyword detection
18//!    and graph-relationship pattern matching.
19//! 2. If confidence >= threshold, the route is used directly.
20//! 3. Otherwise, `HybridRouter` forwards the query to `LlmRouter` for a second opinion.
21
22use chrono::{DateTime, Duration, Utc};
23
24pub use zeph_common::memory::{
25    AsyncMemoryRouter, CAUSAL_MARKERS, ENTITY_MARKERS, MemoryRoute, MemoryRouter, RoutingDecision,
26    TEMPORAL_MARKERS, WORD_BOUNDARY_TEMPORAL, classify_graph_subgraph, parse_route_str,
27};
28
29/// Resolved datetime boundaries for a temporal query.
30///
31/// Both fields use `SQLite` datetime format (`YYYY-MM-DD HH:MM:SS`, UTC).
32/// `None` means "no bound" on that side.
33///
34/// Note: All timestamps are UTC. The `created_at` column in the `messages` table
35/// defaults to `datetime('now')` which is also UTC, so comparisons are consistent.
36/// Users in non-UTC timezones may get slightly unexpected results for "yesterday"
37/// queries (e.g. at 01:00 UTC+5 the user's local yesterday differs from UTC yesterday).
38/// This is an accepted approximation for the heuristic-only MVP.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub struct TemporalRange {
41    /// Exclusive lower bound: `created_at > after`.
42    pub after: Option<String>,
43    /// Exclusive upper bound: `created_at < before`.
44    pub before: Option<String>,
45}
46
47/// Temporal patterns that indicate an episodic / time-scoped recall query.
48///
49/// Multi-word patterns are preferred over single-word ones to reduce false positives.
50/// Single-word patterns that can appear inside other words (e.g. "ago" in "Chicago")
51/// must be checked with `contains_word()` to enforce word-boundary semantics.
52///
53/// Omitted on purpose: "before", "after", "since", "during", "earlier", "recently"
54/// — these are too ambiguous in technical contexts ("before the function returns",
55/// "since you asked", "during compilation"). They are not in this list.
56const TEMPORAL_PATTERNS: &[&str] = &[
57    // relative day
58    "yesterday",
59    "today",
60    "this morning",
61    "tonight",
62    "last night",
63    // relative week
64    "last week",
65    "this week",
66    "past week",
67    // relative month
68    "last month",
69    "this month",
70    "past month",
71    // temporal questions
72    "when did",
73    "remember when",
74    "last time",
75    "how long ago",
76    // relative phrases requiring word-boundary check
77    // (checked separately via `contains_word` to avoid matching "a few days ago" substring in longer words)
78    "few days ago",
79    "few hours ago",
80    "earlier today",
81];
82
83/// Heuristic-based memory router.
84///
85/// Decision logic (in priority order):
86/// 1. Temporal patterns → `Episodic`
87/// 2. Relationship patterns → `Graph`
88/// 3. Code-like patterns (paths, `::`) without question word → `Keyword`
89/// 4. Long NL query or question word → `Semantic`
90/// 5. Short non-question query → `Keyword`
91/// 6. Default → `Hybrid`
92pub struct HeuristicRouter;
93
94const QUESTION_WORDS: &[&str] = &[
95    "what", "how", "why", "when", "where", "who", "which", "explain", "describe",
96];
97
98/// Simple substrings that signal a relationship query (checked via `str::contains`).
99/// Only used when the `graph-memory` feature is enabled.
100const RELATIONSHIP_PATTERNS: &[&str] = &[
101    "related to",
102    "relates to",
103    "connection between",
104    "relationship",
105    "opinion on",
106    "thinks about",
107    "preference for",
108    "history of",
109    "know about",
110];
111
112/// Returns true if `text` contains `word` as a whole word (word-boundary semantics).
113///
114/// A "word boundary" here means the character before and after `word` (if present)
115/// is not an ASCII alphanumeric character or underscore.
116fn contains_word(text: &str, word: &str) -> bool {
117    let bytes = text.as_bytes();
118    let wbytes = word.as_bytes();
119    let wlen = wbytes.len();
120    if wlen > bytes.len() {
121        return false;
122    }
123    for start in 0..=(bytes.len() - wlen) {
124        if bytes[start..start + wlen].eq_ignore_ascii_case(wbytes) {
125            let before_ok =
126                start == 0 || !bytes[start - 1].is_ascii_alphanumeric() && bytes[start - 1] != b'_';
127            let after_ok = start + wlen == bytes.len()
128                || !bytes[start + wlen].is_ascii_alphanumeric() && bytes[start + wlen] != b'_';
129            if before_ok && after_ok {
130                return true;
131            }
132        }
133    }
134    false
135}
136
137/// Returns true if the lowercased query contains any temporal cue that indicates
138/// an episodic / time-scoped recall request.
139fn has_temporal_cue(lower: &str) -> bool {
140    if TEMPORAL_PATTERNS.iter().any(|p| lower.contains(p)) {
141        return true;
142    }
143    WORD_BOUNDARY_TEMPORAL
144        .iter()
145        .any(|w| contains_word(lower, w))
146}
147
148/// Temporal patterns sorted longest-first for stripping. Initialized once via `LazyLock`
149/// to avoid allocating and sorting on every call to `strip_temporal_keywords`.
150static SORTED_TEMPORAL_PATTERNS: std::sync::LazyLock<Vec<&'static str>> =
151    std::sync::LazyLock::new(|| {
152        let mut v: Vec<&str> = TEMPORAL_PATTERNS.to_vec();
153        v.sort_by_key(|p| std::cmp::Reverse(p.len()));
154        v
155    });
156
157/// Strip matched temporal keywords from a query string before passing to FTS5.
158///
159/// Temporal keywords are routing metadata, not search terms. Passing them to FTS5
160/// causes BM25 score distortion — messages that literally mention "yesterday" get
161/// boosted regardless of actual content relevance.
162///
163/// All occurrences of each pattern are removed (not just the first), preventing
164/// score distortion from repeated temporal tokens in edge cases like
165/// "yesterday I mentioned yesterday's bug".
166///
167/// # Example
168/// ```
169/// # use zeph_memory::router::strip_temporal_keywords;
170/// let cleaned = strip_temporal_keywords("what did we discuss yesterday about Rust");
171/// assert_eq!(cleaned, "what did we discuss about Rust");
172/// ```
173#[must_use]
174pub fn strip_temporal_keywords(query: &str) -> String {
175    // Lowercase once for pattern matching; track removal positions in the original string.
176    // We operate on the lowercased copy for matching, then remove spans from `result`
177    // by rebuilding via byte indices (both strings have identical byte lengths because
178    // to_ascii_lowercase is a 1:1 byte mapping for ASCII).
179    let lower = query.to_ascii_lowercase();
180    // Collect all (start, end) spans to remove, then rebuild the string in one pass.
181    let mut remove: Vec<(usize, usize)> = Vec::new();
182
183    for pattern in SORTED_TEMPORAL_PATTERNS.iter() {
184        let plen = pattern.len();
185        let mut search_from = 0;
186        while let Some(pos) = lower[search_from..].find(pattern) {
187            let abs = search_from + pos;
188            remove.push((abs, abs + plen));
189            search_from = abs + plen;
190        }
191    }
192
193    // Strip word-boundary tokens (single-word, e.g. "ago") — all occurrences.
194    for word in WORD_BOUNDARY_TEMPORAL {
195        let wlen = word.len();
196        let lbytes = lower.as_bytes();
197        let mut i = 0;
198        while i + wlen <= lower.len() {
199            if lower[i..].starts_with(*word) {
200                let before_ok =
201                    i == 0 || !lbytes[i - 1].is_ascii_alphanumeric() && lbytes[i - 1] != b'_';
202                let after_ok = i + wlen == lower.len()
203                    || !lbytes[i + wlen].is_ascii_alphanumeric() && lbytes[i + wlen] != b'_';
204                if before_ok && after_ok {
205                    remove.push((i, i + wlen));
206                    i += wlen;
207                    continue;
208                }
209            }
210            i += 1;
211        }
212    }
213
214    if remove.is_empty() {
215        // Fast path: no patterns found — return the original string.
216        return query.split_whitespace().collect::<Vec<_>>().join(" ");
217    }
218
219    // Merge overlapping/adjacent spans and remove them from the original string.
220    remove.sort_unstable_by_key(|r| r.0);
221    let bytes = query.as_bytes();
222    let mut result = Vec::with_capacity(query.len());
223    let mut cursor = 0;
224    for (start, end) in remove {
225        if start > cursor {
226            result.extend_from_slice(&bytes[cursor..start]);
227        }
228        cursor = cursor.max(end);
229    }
230    if cursor < bytes.len() {
231        result.extend_from_slice(&bytes[cursor..]);
232    }
233
234    // Collapse multiple spaces and trim.
235    // SAFETY: We only removed ASCII byte spans; remaining bytes are still valid UTF-8.
236    let s = String::from_utf8(result).unwrap_or_default();
237    s.split_whitespace()
238        .filter(|t| !t.is_empty())
239        .collect::<Vec<_>>()
240        .join(" ")
241}
242
243/// Resolve temporal keywords in `query` to a `(after, before)` datetime boundary pair.
244///
245/// Returns `None` when no specific range can be computed (the episodic path then falls
246/// back to FTS5 without a time filter, relying on temporal decay for recency boosting).
247///
248/// The `now` parameter is injectable for deterministic unit testing. Production callers
249/// should pass `chrono::Utc::now()`.
250///
251/// All datetime strings are in `SQLite` format: `YYYY-MM-DD HH:MM:SS` (UTC).
252#[must_use]
253pub fn resolve_temporal_range(query: &str, now: DateTime<Utc>) -> Option<TemporalRange> {
254    let lower = query.to_ascii_lowercase();
255
256    // yesterday: the full calendar day before today (UTC)
257    if lower.contains("yesterday") {
258        let yesterday = now.date_naive() - Duration::days(1);
259        return Some(TemporalRange {
260            after: Some(format!("{yesterday} 00:00:00")),
261            before: Some(format!("{yesterday} 23:59:59")),
262        });
263    }
264
265    // last night: 18:00 yesterday to 06:00 today (UTC approximation)
266    if lower.contains("last night") {
267        let yesterday = now.date_naive() - Duration::days(1);
268        let today = now.date_naive();
269        return Some(TemporalRange {
270            after: Some(format!("{yesterday} 18:00:00")),
271            before: Some(format!("{today} 06:00:00")),
272        });
273    }
274
275    // tonight: 18:00 today onwards
276    if lower.contains("tonight") {
277        let today = now.date_naive();
278        return Some(TemporalRange {
279            after: Some(format!("{today} 18:00:00")),
280            before: None,
281        });
282    }
283
284    // this morning: midnight to noon today
285    if lower.contains("this morning") {
286        let today = now.date_naive();
287        return Some(TemporalRange {
288            after: Some(format!("{today} 00:00:00")),
289            before: Some(format!("{today} 12:00:00")),
290        });
291    }
292
293    // today / earlier today: midnight to now.
294    // Note: "earlier today" always contains "today", so a separate branch would be
295    // dead code — the "today" check subsumes it.
296    if lower.contains("today") {
297        let today = now.date_naive();
298        return Some(TemporalRange {
299            after: Some(format!("{today} 00:00:00")),
300            before: None,
301        });
302    }
303
304    // last week / past week / this week: 7-day lookback
305    if lower.contains("last week") || lower.contains("past week") || lower.contains("this week") {
306        let start = now - Duration::days(7);
307        return Some(TemporalRange {
308            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
309            before: None,
310        });
311    }
312
313    // last month / past month / this month: 30-day lookback (approximate)
314    if lower.contains("last month") || lower.contains("past month") || lower.contains("this month")
315    {
316        let start = now - Duration::days(30);
317        return Some(TemporalRange {
318            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
319            before: None,
320        });
321    }
322
323    // "few days ago" / "few hours ago": 3-day lookback
324    if lower.contains("few days ago") {
325        let start = now - Duration::days(3);
326        return Some(TemporalRange {
327            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
328            before: None,
329        });
330    }
331    if lower.contains("few hours ago") {
332        let start = now - Duration::hours(6);
333        return Some(TemporalRange {
334            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
335            before: None,
336        });
337    }
338
339    // "ago" (word-boundary): generic recent lookback (24h)
340    if contains_word(&lower, "ago") {
341        let start = now - Duration::hours(24);
342        return Some(TemporalRange {
343            after: Some(start.format("%Y-%m-%d %H:%M:%S").to_string()),
344            before: None,
345        });
346    }
347
348    // Generic temporal cues without a specific range ("when did", "remember when",
349    // "last time", "how long ago") — fall back to FTS5-only with temporal decay.
350    None
351}
352
353fn starts_with_question(words: &[&str]) -> bool {
354    words
355        .first()
356        .is_some_and(|w| QUESTION_WORDS.iter().any(|qw| w.eq_ignore_ascii_case(qw)))
357}
358
359/// Returns true if `word` is a pure `snake_case` identifier (all ASCII, lowercase letters,
360/// digits and underscores, contains at least one underscore, not purely numeric).
361fn is_pure_snake_case(word: &str) -> bool {
362    if word.is_empty() {
363        return false;
364    }
365    let has_underscore = word.contains('_');
366    if !has_underscore {
367        return false;
368    }
369    word.chars()
370        .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
371        && !word.chars().all(|c| c.is_ascii_digit() || c == '_')
372}
373
374impl MemoryRouter for HeuristicRouter {
375    /// Returns a confidence signal based on pattern match count (W2.1 fix: gradual scale).
376    ///
377    /// - Exactly one route pattern matches → confidence `1.0` (clear signal)
378    /// - Zero patterns match → confidence `0.0` (pure default fallback)
379    /// - More than one pattern matches → confidence `1.0 / matched_count` (ambiguous, decreasing)
380    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
381        let lower = query.to_ascii_lowercase();
382        let mut matched: u32 = 0;
383        if has_temporal_cue(&lower) {
384            matched += 1;
385        }
386        if RELATIONSHIP_PATTERNS.iter().any(|p| lower.contains(p)) {
387            matched += 1;
388        }
389        let words: Vec<&str> = query.split_whitespace().collect();
390        let word_count = words.len();
391        let has_structural = query.contains('/') || query.contains("::");
392        let question = starts_with_question(&words);
393        let has_snake = words.iter().any(|w| is_pure_snake_case(w));
394        if has_structural && !question {
395            matched += 1;
396        }
397        if question || word_count >= 6 {
398            matched += 1;
399        }
400        if word_count <= 3 && !question {
401            matched += 1;
402        }
403        if has_snake {
404            matched += 1;
405        }
406
407        #[allow(clippy::cast_precision_loss)]
408        let confidence = match matched {
409            0 => 0.0,
410            1 => 1.0,
411            n => 1.0 / n as f32,
412        };
413
414        RoutingDecision {
415            route: self.route(query),
416            confidence,
417            reasoning: None,
418        }
419    }
420
421    fn route(&self, query: &str) -> MemoryRoute {
422        let lower = query.to_ascii_lowercase();
423
424        // 1. Temporal queries take highest priority — must run before relationship check
425        //    to prevent "history of changes last week" from routing to Graph instead of Episodic.
426        if has_temporal_cue(&lower) {
427            return MemoryRoute::Episodic;
428        }
429
430        // 2. Relationship queries go to graph retrieval (feature-gated at call site)
431        let has_relationship = RELATIONSHIP_PATTERNS.iter().any(|p| lower.contains(p));
432        if has_relationship {
433            return MemoryRoute::Graph;
434        }
435
436        let words: Vec<&str> = query.split_whitespace().collect();
437        let word_count = words.len();
438
439        // Code-like patterns that unambiguously indicate keyword search:
440        // file paths (contain '/'), Rust paths (contain '::')
441        let has_structural_code_pattern = query.contains('/') || query.contains("::");
442
443        // Pure snake_case identifiers (e.g. "memory_limit", "error_handling")
444        // but only if the query does NOT start with a question word
445        let has_snake_case = words.iter().any(|w| is_pure_snake_case(w));
446        let question = starts_with_question(&words);
447
448        if has_structural_code_pattern && !question {
449            return MemoryRoute::Keyword;
450        }
451
452        // Long NL queries → semantic, regardless of snake_case tokens
453        if question || word_count >= 6 {
454            return MemoryRoute::Semantic;
455        }
456
457        // Short queries without question words → keyword
458        if word_count <= 3 && !question {
459            return MemoryRoute::Keyword;
460        }
461
462        // Short code-like patterns → keyword
463        if has_snake_case {
464            return MemoryRoute::Keyword;
465        }
466
467        // Default
468        MemoryRoute::Hybrid
469    }
470}
471
472/// LLM-based memory router.
473///
474/// Sends the query to the configured provider and parses a JSON response:
475/// `{"route": "keyword|semantic|hybrid|graph|episodic", "confidence": 0.0-1.0}`.
476///
477/// On LLM failure, falls back to `HeuristicRouter`.
478pub struct LlmRouter {
479    provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
480    fallback_route: MemoryRoute,
481}
482
483impl LlmRouter {
484    /// Create a new `LlmRouter`.
485    ///
486    /// - `provider` — LLM provider used for classification.
487    /// - `fallback_route` — route used when the LLM call fails.
488    #[must_use]
489    pub fn new(
490        provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
491        fallback_route: MemoryRoute,
492    ) -> Self {
493        Self {
494            provider,
495            fallback_route,
496        }
497    }
498
499    async fn classify_async(&self, query: &str) -> RoutingDecision {
500        use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
501
502        let system = "You are a memory store routing classifier. \
503            Given a user query, decide which memory backend is most appropriate. \
504            Respond with ONLY a JSON object: \
505            {\"route\": \"<route>\", \"confidence\": <0.0-1.0>, \"reasoning\": \"<brief>\"} \
506            where <route> is one of: keyword, semantic, hybrid, graph, episodic. \
507            Use 'keyword' for exact/code lookups, 'semantic' for conceptual questions, \
508            'hybrid' for mixed, 'graph' for relationship queries, 'episodic' for time-scoped queries.";
509
510        // Wrap query in delimiters to prevent injection (W2.2 fix).
511        let user = format!(
512            "<query>{}</query>",
513            query.chars().take(500).collect::<String>()
514        );
515
516        let messages = vec![
517            Message {
518                role: Role::System,
519                content: system.to_owned(),
520                parts: vec![],
521                metadata: MessageMetadata::default(),
522            },
523            Message {
524                role: Role::User,
525                content: user,
526                parts: vec![],
527                metadata: MessageMetadata::default(),
528            },
529        ];
530
531        let result = match tokio::time::timeout(
532            std::time::Duration::from_secs(5),
533            self.provider.chat(&messages),
534        )
535        .await
536        {
537            Ok(Ok(r)) => r,
538            Ok(Err(e)) => {
539                tracing::debug!(error = %e, "LlmRouter: LLM call failed, falling back to heuristic");
540                return Self::heuristic_fallback(query);
541            }
542            Err(_) => {
543                tracing::debug!("LlmRouter: LLM timed out, falling back to heuristic");
544                return Self::heuristic_fallback(query);
545            }
546        };
547
548        self.parse_llm_response(&result, query)
549    }
550
551    fn parse_llm_response(&self, raw: &str, query: &str) -> RoutingDecision {
552        // Extract JSON object from the response (may have surrounding text).
553        let json_str = raw
554            .find('{')
555            .and_then(|start| raw[start..].rfind('}').map(|end| &raw[start..=start + end]))
556            .unwrap_or("");
557
558        if let Ok(v) = serde_json::from_str::<serde_json::Value>(json_str) {
559            let route_str = v.get("route").and_then(|r| r.as_str()).unwrap_or("hybrid");
560            #[allow(clippy::cast_possible_truncation)]
561            let confidence = v
562                .get("confidence")
563                .and_then(serde_json::Value::as_f64)
564                .map_or(0.5, |c| c.clamp(0.0, 1.0) as f32);
565            let reasoning = v
566                .get("reasoning")
567                .and_then(|r| r.as_str())
568                .map(str::to_owned);
569
570            let route = parse_route_str(route_str, self.fallback_route);
571
572            tracing::debug!(
573                query = &query[..query.len().min(60)],
574                ?route,
575                confidence,
576                "LlmRouter: classified"
577            );
578
579            return RoutingDecision {
580                route,
581                confidence,
582                reasoning,
583            };
584        }
585
586        tracing::debug!("LlmRouter: failed to parse JSON response, falling back to heuristic");
587        Self::heuristic_fallback(query)
588    }
589
590    fn heuristic_fallback(query: &str) -> RoutingDecision {
591        HeuristicRouter.route_with_confidence(query)
592    }
593}
594
595impl MemoryRouter for LlmRouter {
596    fn route(&self, query: &str) -> MemoryRoute {
597        // Sync path: LLM is not available without an async executor.
598        // Falls back to heuristic — use route_async() for LLM-based classification.
599        HeuristicRouter.route(query)
600    }
601
602    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
603        // LlmRouter is designed for use in async contexts via classify_async.
604        // When called synchronously (e.g. in tests), fall back to heuristic.
605        HeuristicRouter.route_with_confidence(query)
606    }
607}
608
609impl AsyncMemoryRouter for LlmRouter {
610    fn route_async<'a>(
611        &'a self,
612        query: &'a str,
613    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
614        Box::pin(self.classify_async(query))
615    }
616}
617
618/// Hybrid router: heuristic-first, escalates to LLM when confidence is low.
619///
620/// The `HybridRouter` runs `HeuristicRouter` first. If the heuristic confidence
621/// is below `confidence_threshold`, it escalates to the LLM router.
622/// LLM failures always fall back to the heuristic result.
623pub struct HybridRouter {
624    llm: LlmRouter,
625    confidence_threshold: f32,
626}
627
628impl HybridRouter {
629    /// Create a new `HybridRouter`.
630    ///
631    /// - `confidence_threshold` — heuristic decisions with confidence below this value
632    ///   are escalated to the LLM classifier.  `0.7` is a good default.
633    #[must_use]
634    pub fn new(
635        provider: std::sync::Arc<zeph_llm::any::AnyProvider>,
636        fallback_route: MemoryRoute,
637        confidence_threshold: f32,
638    ) -> Self {
639        Self {
640            llm: LlmRouter::new(provider, fallback_route),
641            confidence_threshold,
642        }
643    }
644
645    pub async fn classify_async(&self, query: &str) -> RoutingDecision {
646        let heuristic = HeuristicRouter.route_with_confidence(query);
647        if heuristic.confidence >= self.confidence_threshold {
648            tracing::debug!(
649                query = &query[..query.len().min(60)],
650                confidence = heuristic.confidence,
651                route = ?heuristic.route,
652                "HybridRouter: heuristic sufficient, skipping LLM"
653            );
654            return heuristic;
655        }
656
657        tracing::debug!(
658            query = &query[..query.len().min(60)],
659            confidence = heuristic.confidence,
660            threshold = self.confidence_threshold,
661            "HybridRouter: low confidence, escalating to LLM"
662        );
663
664        let llm_result = self.llm.classify_async(query).await;
665
666        // LLM failure path: classify_async returns a heuristic fallback on error.
667        // Always log the final decision.
668        tracing::debug!(
669            route = ?llm_result.route,
670            confidence = llm_result.confidence,
671            "HybridRouter: final route after LLM escalation"
672        );
673        llm_result
674    }
675}
676
677impl MemoryRouter for HybridRouter {
678    fn route(&self, query: &str) -> MemoryRoute {
679        HeuristicRouter.route(query)
680    }
681
682    fn route_with_confidence(&self, query: &str) -> RoutingDecision {
683        // Synchronous path: can't call async LLM, use heuristic only.
684        HeuristicRouter.route_with_confidence(query)
685    }
686}
687
688impl AsyncMemoryRouter for HeuristicRouter {
689    fn route_async<'a>(
690        &'a self,
691        query: &'a str,
692    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
693        Box::pin(std::future::ready(self.route_with_confidence(query)))
694    }
695}
696
697impl AsyncMemoryRouter for HybridRouter {
698    fn route_async<'a>(
699        &'a self,
700        query: &'a str,
701    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = RoutingDecision> + Send + 'a>> {
702        Box::pin(self.classify_async(query))
703    }
704}
705
706#[cfg(test)]
707mod tests {
708    use chrono::TimeZone as _;
709
710    use super::*;
711
712    fn route(q: &str) -> MemoryRoute {
713        HeuristicRouter.route(q)
714    }
715
716    fn fixed_now() -> DateTime<Utc> {
717        // 2026-03-14 12:00:00 UTC — fixed reference point for all temporal tests
718        Utc.with_ymd_and_hms(2026, 3, 14, 12, 0, 0).unwrap()
719    }
720
721    #[test]
722    fn rust_path_routes_keyword() {
723        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
724    }
725
726    #[test]
727    fn file_path_routes_keyword() {
728        assert_eq!(
729            route("crates/zeph-core/src/agent/mod.rs"),
730            MemoryRoute::Keyword
731        );
732    }
733
734    #[test]
735    fn pure_snake_case_routes_keyword() {
736        assert_eq!(route("memory_limit"), MemoryRoute::Keyword);
737        assert_eq!(route("error_handling"), MemoryRoute::Keyword);
738    }
739
740    #[test]
741    fn question_with_snake_case_routes_semantic() {
742        // "what is the memory_limit setting" — question word overrides snake_case heuristic
743        assert_eq!(
744            route("what is the memory_limit setting"),
745            MemoryRoute::Semantic
746        );
747        assert_eq!(route("how does error_handling work"), MemoryRoute::Semantic);
748    }
749
750    #[test]
751    fn short_query_routes_keyword() {
752        assert_eq!(route("context compaction"), MemoryRoute::Keyword);
753        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
754    }
755
756    #[test]
757    fn question_routes_semantic() {
758        assert_eq!(
759            route("what is the purpose of semantic memory"),
760            MemoryRoute::Semantic
761        );
762        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
763        assert_eq!(route("why does compaction fail"), MemoryRoute::Semantic);
764        assert_eq!(route("explain context compression"), MemoryRoute::Semantic);
765    }
766
767    #[test]
768    fn long_natural_query_routes_semantic() {
769        assert_eq!(
770            route("the agent keeps running out of context during long conversations"),
771            MemoryRoute::Semantic
772        );
773    }
774
775    #[test]
776    fn medium_non_question_routes_hybrid() {
777        // 4-5 words, no question word, no code pattern
778        assert_eq!(route("context window token budget"), MemoryRoute::Hybrid);
779    }
780
781    #[test]
782    fn empty_query_routes_keyword() {
783        // 0 words, no question → keyword (short path)
784        assert_eq!(route(""), MemoryRoute::Keyword);
785    }
786
787    #[test]
788    fn question_word_only_routes_semantic() {
789        // single question word → word_count = 1, but starts_with_question = true
790        // short query with question: the question check happens first in semantic branch
791        // Actually with word_count=1 and question=true: short path `<= 3 && !question` is false,
792        // then `question || word_count >= 6` is true → Semantic
793        assert_eq!(route("what"), MemoryRoute::Semantic);
794    }
795
796    #[test]
797    fn camel_case_does_not_route_keyword_without_pattern() {
798        // CamelCase words without :: or / — 4-word query without question word → Hybrid
799        // (4 words: no question, no snake_case, no structural code pattern → Hybrid)
800        assert_eq!(
801            route("SemanticMemory configuration and options"),
802            MemoryRoute::Hybrid
803        );
804    }
805
806    #[test]
807    fn relationship_query_routes_graph() {
808        assert_eq!(
809            route("what is user's opinion on neovim"),
810            MemoryRoute::Graph
811        );
812        assert_eq!(
813            route("show the relationship between Alice and Bob"),
814            MemoryRoute::Graph
815        );
816    }
817
818    #[test]
819    fn relationship_query_related_to_routes_graph() {
820        assert_eq!(
821            route("how is Rust related to this project"),
822            MemoryRoute::Graph
823        );
824        assert_eq!(
825            route("how does this relates to the config"),
826            MemoryRoute::Graph
827        );
828    }
829
830    #[test]
831    fn relationship_know_about_routes_graph() {
832        assert_eq!(route("what do I know about neovim"), MemoryRoute::Graph);
833    }
834
835    #[test]
836    fn translate_does_not_route_graph() {
837        // "translate" contains "relate" substring but is not in RELATIONSHIP_PATTERNS
838        // (we removed bare "relate", keeping only "related to" and "relates to")
839        assert_ne!(route("translate this code to Python"), MemoryRoute::Graph);
840    }
841
842    #[test]
843    fn non_relationship_stays_semantic() {
844        assert_eq!(
845            route("find similar code patterns in the codebase"),
846            MemoryRoute::Semantic
847        );
848    }
849
850    #[test]
851    fn short_keyword_unchanged() {
852        assert_eq!(route("qdrant"), MemoryRoute::Keyword);
853    }
854
855    // Regression tests for #1661: long NL queries with snake_case must go to Semantic
856    #[test]
857    fn long_nl_with_snake_case_routes_semantic() {
858        assert_eq!(
859            route("Use memory_search to find information about Rust ownership"),
860            MemoryRoute::Semantic
861        );
862    }
863
864    #[test]
865    fn short_snake_case_only_routes_keyword() {
866        assert_eq!(route("memory_search"), MemoryRoute::Keyword);
867    }
868
869    #[test]
870    fn question_with_snake_case_short_routes_semantic() {
871        assert_eq!(
872            route("What does memory_search return?"),
873            MemoryRoute::Semantic
874        );
875    }
876
877    // ── Temporal routing tests ────────────────────────────────────────────────
878
879    #[test]
880    fn temporal_yesterday_routes_episodic() {
881        assert_eq!(
882            route("what did we discuss yesterday"),
883            MemoryRoute::Episodic
884        );
885    }
886
887    #[test]
888    fn temporal_last_week_routes_episodic() {
889        assert_eq!(
890            route("remember what happened last week"),
891            MemoryRoute::Episodic
892        );
893    }
894
895    #[test]
896    fn temporal_when_did_routes_episodic() {
897        assert_eq!(
898            route("when did we last talk about Qdrant"),
899            MemoryRoute::Episodic
900        );
901    }
902
903    #[test]
904    fn temporal_last_time_routes_episodic() {
905        assert_eq!(
906            route("last time we discussed the scheduler"),
907            MemoryRoute::Episodic
908        );
909    }
910
911    #[test]
912    fn temporal_today_routes_episodic() {
913        assert_eq!(
914            route("what did I mention today about testing"),
915            MemoryRoute::Episodic
916        );
917    }
918
919    #[test]
920    fn temporal_this_morning_routes_episodic() {
921        assert_eq!(route("what did we say this morning"), MemoryRoute::Episodic);
922    }
923
924    #[test]
925    fn temporal_last_month_routes_episodic() {
926        assert_eq!(
927            route("find the config change from last month"),
928            MemoryRoute::Episodic
929        );
930    }
931
932    #[test]
933    fn temporal_history_collision_routes_episodic() {
934        // CRIT-01: "history of" is a relationship pattern, but temporal wins when both match.
935        // Temporal check is first — "last week" causes Episodic, not Graph.
936        assert_eq!(route("history of changes last week"), MemoryRoute::Episodic);
937    }
938
939    #[test]
940    fn temporal_ago_word_boundary_routes_episodic() {
941        assert_eq!(route("we fixed this a day ago"), MemoryRoute::Episodic);
942    }
943
944    #[test]
945    fn ago_in_chicago_no_false_positive() {
946        // MED-01: "Chicago" contains "ago" but must NOT route to Episodic.
947        // word-boundary check prevents this false positive.
948        assert_ne!(
949            route("meeting in Chicago about the project"),
950            MemoryRoute::Episodic
951        );
952    }
953
954    #[test]
955    fn non_temporal_unchanged() {
956        assert_eq!(route("how does the agent loop work"), MemoryRoute::Semantic);
957    }
958
959    #[test]
960    fn code_query_unchanged() {
961        assert_eq!(route("zeph_memory::recall"), MemoryRoute::Keyword);
962    }
963
964    // ── resolve_temporal_range tests ─────────────────────────────────────────
965
966    #[test]
967    fn resolve_yesterday_range() {
968        let now = fixed_now(); // 2026-03-14 12:00:00 UTC
969        let range = resolve_temporal_range("what did we discuss yesterday", now).unwrap();
970        assert_eq!(range.after.as_deref(), Some("2026-03-13 00:00:00"));
971        assert_eq!(range.before.as_deref(), Some("2026-03-13 23:59:59"));
972    }
973
974    #[test]
975    fn resolve_last_week_range() {
976        let now = fixed_now(); // 2026-03-14 12:00:00 UTC
977        let range = resolve_temporal_range("remember last week's discussion", now).unwrap();
978        // 7 days before 2026-03-14 = 2026-03-07
979        assert!(range.after.as_deref().unwrap().starts_with("2026-03-07"));
980        assert!(range.before.is_none());
981    }
982
983    #[test]
984    fn resolve_last_month_range() {
985        let now = fixed_now();
986        let range = resolve_temporal_range("find the bug from last month", now).unwrap();
987        // 30 days before 2026-03-14 = 2026-02-12
988        assert!(range.after.as_deref().unwrap().starts_with("2026-02-12"));
989        assert!(range.before.is_none());
990    }
991
992    #[test]
993    fn resolve_today_range() {
994        let now = fixed_now();
995        let range = resolve_temporal_range("what did we do today", now).unwrap();
996        assert_eq!(range.after.as_deref(), Some("2026-03-14 00:00:00"));
997        assert!(range.before.is_none());
998    }
999
1000    #[test]
1001    fn resolve_this_morning_range() {
1002        let now = fixed_now();
1003        let range = resolve_temporal_range("what did we say this morning", now).unwrap();
1004        assert_eq!(range.after.as_deref(), Some("2026-03-14 00:00:00"));
1005        assert_eq!(range.before.as_deref(), Some("2026-03-14 12:00:00"));
1006    }
1007
1008    #[test]
1009    fn resolve_last_night_range() {
1010        let now = fixed_now();
1011        let range = resolve_temporal_range("last night's conversation", now).unwrap();
1012        assert_eq!(range.after.as_deref(), Some("2026-03-13 18:00:00"));
1013        assert_eq!(range.before.as_deref(), Some("2026-03-14 06:00:00"));
1014    }
1015
1016    #[test]
1017    fn resolve_tonight_range() {
1018        let now = fixed_now();
1019        let range = resolve_temporal_range("remind me tonight what we agreed on", now).unwrap();
1020        assert_eq!(range.after.as_deref(), Some("2026-03-14 18:00:00"));
1021        assert!(range.before.is_none());
1022    }
1023
1024    #[test]
1025    fn resolve_no_temporal_returns_none() {
1026        let now = fixed_now();
1027        assert!(resolve_temporal_range("what is the purpose of semantic memory", now).is_none());
1028    }
1029
1030    #[test]
1031    fn resolve_generic_temporal_returns_none() {
1032        // "when did", "remember when", "last time", "how long ago" — no specific range
1033        let now = fixed_now();
1034        assert!(resolve_temporal_range("when did we discuss this feature", now).is_none());
1035        assert!(resolve_temporal_range("remember when we fixed that bug", now).is_none());
1036    }
1037
1038    // ── strip_temporal_keywords tests ────────────────────────────────────────
1039
1040    #[test]
1041    fn strip_yesterday_from_query() {
1042        let cleaned = strip_temporal_keywords("what did we discuss yesterday about Rust");
1043        assert_eq!(cleaned, "what did we discuss about Rust");
1044    }
1045
1046    #[test]
1047    fn strip_last_week_from_query() {
1048        let cleaned = strip_temporal_keywords("find the config change from last week");
1049        assert_eq!(cleaned, "find the config change from");
1050    }
1051
1052    #[test]
1053    fn strip_does_not_alter_non_temporal() {
1054        let q = "what is the purpose of semantic memory";
1055        assert_eq!(strip_temporal_keywords(q), q);
1056    }
1057
1058    #[test]
1059    fn strip_ago_word_boundary() {
1060        let cleaned = strip_temporal_keywords("we fixed this a day ago in the scheduler");
1061        // "ago" removed, rest preserved
1062        assert!(!cleaned.contains("ago"));
1063        assert!(cleaned.contains("scheduler"));
1064    }
1065
1066    #[test]
1067    fn strip_does_not_touch_chicago() {
1068        let q = "meeting in Chicago about the project";
1069        assert_eq!(strip_temporal_keywords(q), q);
1070    }
1071
1072    #[test]
1073    fn strip_empty_string_returns_empty() {
1074        assert_eq!(strip_temporal_keywords(""), "");
1075    }
1076
1077    #[test]
1078    fn strip_only_temporal_keyword_returns_empty() {
1079        // When the entire query is a temporal keyword, stripping leaves an empty string.
1080        // recall_routed falls back to the original query in this case.
1081        assert_eq!(strip_temporal_keywords("yesterday"), "");
1082    }
1083
1084    #[test]
1085    fn strip_repeated_temporal_keyword_removes_all_occurrences() {
1086        // IMPL-02: all occurrences must be removed, not just the first.
1087        let cleaned = strip_temporal_keywords("yesterday I mentioned yesterday's bug");
1088        assert!(
1089            !cleaned.contains("yesterday"),
1090            "both occurrences must be removed: got '{cleaned}'"
1091        );
1092        assert!(cleaned.contains("mentioned"));
1093    }
1094
1095    // ── route_with_confidence tests ───────────────────────────────────────────
1096
1097    #[test]
1098    fn confidence_multiple_matches_is_less_than_one() {
1099        // Structural code pattern + snake_case + short query fire 3 signals →
1100        // confidence = 1.0 / 3 < 1.0
1101        let d = HeuristicRouter.route_with_confidence("zeph_memory::recall");
1102        assert!(
1103            d.confidence < 1.0,
1104            "ambiguous query should have confidence < 1.0, got {}",
1105            d.confidence
1106        );
1107        assert_eq!(d.route, MemoryRoute::Keyword);
1108    }
1109
1110    #[test]
1111    fn confidence_long_question_with_snake_fires_multiple_signals() {
1112        // Long question with snake_case fires multiple signals → confidence < 1.0
1113        let d = HeuristicRouter
1114            .route_with_confidence("what is the purpose of memory_limit in the config system");
1115        assert!(
1116            d.confidence < 1.0,
1117            "ambiguous query must have confidence < 1.0, got {}",
1118            d.confidence
1119        );
1120    }
1121
1122    #[test]
1123    fn confidence_empty_query_is_nonzero() {
1124        // Empty string: word_count=0 → short path fires (<=3 && !question) → matched=1 → confidence=1.0
1125        let d = HeuristicRouter.route_with_confidence("");
1126        assert!(
1127            d.confidence > 0.0,
1128            "empty query must match short-path signal"
1129        );
1130    }
1131
1132    #[test]
1133    fn routing_decision_route_matches_route_fn() {
1134        // route_with_confidence().route must agree with route()
1135        let queries = [
1136            "qdrant",
1137            "what is the agent loop",
1138            "context window token budget",
1139            "what did we discuss yesterday",
1140        ];
1141        for q in queries {
1142            let decision = HeuristicRouter.route_with_confidence(q);
1143            assert_eq!(
1144                decision.route,
1145                HeuristicRouter.route(q),
1146                "mismatch for query: {q}"
1147            );
1148        }
1149    }
1150}