dakera-engine 0.11.20

//! Semantic Routing Engine for Dakera AI Agent Memory Platform.
//!
//! Agents query Dakera without knowing which namespace holds the answer.
//! Dakera figures it out by comparing the query embedding against cached
//! namespace centroids (averaged embeddings sampled from each namespace).
//!
//! The centroid cache is refreshed periodically in the background.

use std::collections::HashMap;
use std::sync::Arc;

use parking_lot::RwLock;
use storage::VectorStorage;

use crate::distance::calculate_distance;
use common::DistanceMetric;

/// A route result: which namespace matched and how strongly.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct RouteMatch {
    pub namespace: String,
    pub similarity: f32,
    pub memory_count: usize,
}

/// Configuration for the semantic router.
pub struct SemanticRouterConfig {
    /// Maximum number of memories to sample per namespace for centroid calculation
    pub sample_size: usize,
    /// How often to refresh centroids (seconds)
    pub refresh_interval_secs: u64,
}

impl Default for SemanticRouterConfig {
    fn default() -> Self {
        Self {
            sample_size: 20,
            refresh_interval_secs: 1800, // 30 minutes
        }
    }
}

impl SemanticRouterConfig {
    pub fn from_env() -> Self {
        let sample_size: usize = std::env::var("DAKERA_ROUTE_SAMPLE_SIZE")
            .ok()
            .and_then(|v| v.parse().ok())
            .unwrap_or(20);

        let refresh_interval_secs: u64 = std::env::var("DAKERA_ROUTE_REFRESH_SECS")
            .ok()
            .and_then(|v| v.parse().ok())
            .unwrap_or(1800);

        Self {
            sample_size,
            refresh_interval_secs,
        }
    }
}

/// Cached centroid for a namespace: average embedding + vector count.
#[derive(Clone)]
struct CentroidEntry {
    centroid: Vec<f32>,
    count: usize,
}

/// Semantic router that maintains a centroid cache per namespace.
pub struct SemanticRouter {
    config: SemanticRouterConfig,
    /// Namespace → averaged centroid embedding + count
    cache: RwLock<HashMap<String, CentroidEntry>>,
}

impl SemanticRouter {
    pub fn new(config: SemanticRouterConfig) -> Self {
        Self {
            config,
            cache: RwLock::new(HashMap::new()),
        }
    }

    /// Route a query embedding to the most relevant namespaces.
    ///
    /// Returns namespaces sorted by similarity (descending), filtered
    /// by `min_similarity`.
    pub fn route(&self, query: &[f32], top_k: usize, min_similarity: f32) -> Vec<RouteMatch> {
        let cache = self.cache.read();
        let mut matches: Vec<RouteMatch> = cache
            .iter()
            .filter_map(|(ns, entry)| {
                if entry.centroid.len() != query.len() {
                    return None; // dimension mismatch, skip
                }
                let sim = calculate_distance(query, &entry.centroid, DistanceMetric::Cosine);
                if sim >= min_similarity {
                    Some(RouteMatch {
                        namespace: ns.clone(),
                        similarity: sim,
                        memory_count: entry.count,
                    })
                } else {
                    None
                }
            })
            .collect();

        matches.sort_by(|a, b| {
            b.similarity
                .partial_cmp(&a.similarity)
                .unwrap_or(std::cmp::Ordering::Equal)
        });
        matches.truncate(top_k);
        matches
    }

    /// Refresh the centroid cache by sampling memories from each agent namespace.
    ///
    /// For each `_dakera_agent_*` namespace, sample up to `sample_size` vectors,
    /// average their embeddings into a single centroid.
    pub async fn refresh_centroids(&self, storage: &Arc<dyn VectorStorage>) {
        let namespaces = match storage.list_namespaces().await {
            Ok(ns) => ns,
            Err(e) => {
                tracing::warn!(error = %e, "Failed to list namespaces for centroid refresh");
                return;
            }
        };

        let mut new_cache: HashMap<String, CentroidEntry> = HashMap::new();

        for namespace in &namespaces {
            if !namespace.starts_with("_dakera_agent_") {
                continue;
            }

            let vectors = match storage.get_all(namespace).await {
                Ok(v) => v,
                Err(_) => continue,
            };

            if vectors.is_empty() {
                continue;
            }

            let count = vectors.len();

            // Sample up to sample_size vectors (take first N — they're stored in insertion order)
            let sample: Vec<&Vec<f32>> = vectors
                .iter()
                .filter(|v| !v.values.is_empty())
                .take(self.config.sample_size)
                .map(|v| &v.values)
                .collect();

            if sample.is_empty() {
                continue;
            }

            // Compute centroid (average embedding)
            let dim = sample[0].len();
            let mut centroid = vec![0.0f32; dim];
            let mut valid = 0usize;
            for embedding in &sample {
                if embedding.len() == dim {
                    for (i, val) in embedding.iter().enumerate() {
                        centroid[i] += val;
                    }
                    valid += 1;
                }
            }

            if valid > 0 {
                for val in &mut centroid {
                    *val /= valid as f32;
                }
                // Normalize centroid for cosine comparison
                let norm: f32 = centroid.iter().map(|x| x * x).sum::<f32>().sqrt();
                if norm > 1e-8 {
                    for val in &mut centroid {
                        *val /= norm;
                    }
                }
                new_cache.insert(namespace.clone(), CentroidEntry { centroid, count });
            }
        }

        let refreshed_count = new_cache.len();
        *self.cache.write() = new_cache;

        tracing::info!(
            namespaces_cached = refreshed_count,
            "Semantic router centroid cache refreshed"
        );
    }

    /// Spawn the centroid refresh as a background tokio task.
    pub fn spawn_refresh(
        router: Arc<SemanticRouter>,
        storage: Arc<dyn VectorStorage>,
    ) -> tokio::task::JoinHandle<()> {
        let interval_secs = router.config.refresh_interval_secs;
        tokio::spawn(async move {
            // Initial refresh on startup (small delay to let storage warm up)
            tokio::time::sleep(std::time::Duration::from_secs(5)).await;
            router.refresh_centroids(&storage).await;

            let mut interval = tokio::time::interval(std::time::Duration::from_secs(interval_secs));
            loop {
                interval.tick().await;
                router.refresh_centroids(&storage).await;
            }
        })
    }
}

// ============================================================================
// CE-12a: Query Classifier for smart routing
// ============================================================================

/// Inferred query kind used for smart routing decisions.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QueryKind {
    /// Short / keyword-based query → prefer BM25 full-text search
    Keyword,
    /// Long / natural-language query → prefer vector similarity search
    Semantic,
    /// Mixed signal → hybrid (vector + BM25)
    Hybrid,
    /// Temporal question (when/what year/what date/how long ago) → pure BM25.
    ///
    /// CE-19: LoCoMo CE-18 benchmark showed Hybrid(80/20) scored 40.6% vs
    /// pure BM25 43.8% (−3.2pp). Date-prefixed memories have near-zero cosine
    /// similarity to temporal queries; ANY vector weight contaminates the score.
    /// Pure BM25 finds the correct date-prefixed memory at rank-1 with no noise.
    Temporal,
    /// Multi-hop sequential reasoning query → Hybrid with BM25-tilted weight (0.40).
    ///
    /// CE-34 v2: Only fires on high-precision structural patterns where a
    /// sequential marker is immediately followed by a pronoun, possessive, or
    /// article — or on unambiguous causative phrases.  Bare markers ("after",
    /// "before", "since", "once") are intentionally excluded: they appear in
    /// >60% of LoCoMo queries and caused a Cat2 collapse to 2.0% in CE-34 v1.
    ///
    /// BM25 tilted to 0.40 because multi-hop bridging memories are found via
    /// entity co-occurrence (exact-match BM25) rather than semantic embedding.
    MultiHop,
}

/// Heuristic classifier that determines the best retrieval strategy for a
/// free-text query without any model inference.
pub struct QueryClassifier;

impl QueryClassifier {
    /// Classify a raw query string into a [`QueryKind`].
    ///
    /// Heuristics (in priority order):
    /// 1. Temporal question (when/what year/what date/how long ago/…) → [`QueryKind::Temporal`]
    ///    Routed to pure BM25 (CE-19) because date-prefixed memories rank BM25 rank-1
    ///    but score near-zero in vector space. Any vector weight degrades recall.
    /// 2. Natural language question (has `?` or starts with a question word) → [`QueryKind::Hybrid`]
    ///    BM25 finds exact names/dates while vector captures semantic intent.
    ///    This covers all LoCoMo benchmark queries which are long NL questions.
    /// 3. ≥ 8 words **or** sentence ends in `.` (prose, no question) → [`QueryKind::Semantic`]
    /// 4. ≤ 3 words with no sentence structure → [`QueryKind::Keyword`]
    /// 5. Everything else → [`QueryKind::Hybrid`]
    pub fn classify(query: &str) -> QueryKind {
        let trimmed = query.trim();
        let word_count = trimmed.split_whitespace().count();
        let lower = trimmed.to_lowercase();

        // Temporal queries need BM25 to dominate: date-prefixed memories rank
        // BM25 rank-1 but score near-zero in vector space. Detect before the
        // general is_question check because "when " would otherwise fall through
        // to Hybrid with a balanced 0.50/0.50 weight.
        //
        // CE-35: Extended with duration/timing patterns that were previously
        // falling through to MultiHop (CE-34 v2). "How long after she moved?"
        // contains "after she " (MultiHop trigger) but is unambiguously temporal
        // — it needs pure BM25 to find date-prefixed memories, not vector search.
        // Temporal check fires BEFORE MultiHop, so adding these patterns here
        // prevents vector_weight=0.40 contamination from the MultiHop path.
        //
        // CE-36: Narrowed "how long" and "how soon" to require an explicit
        // sequential marker (after/before/since).  Bare "how long" matched
        // Cat1 queries ("How long have they been friends?") and Cat2 queries
        // ("How long did he stay before they moved?") that perform better on
        // the Hybrid path.  v0.11.19 bench showed -2.8pp Cat1 and -1.6pp Cat2
        // from this over-capture.  "How long after/before/since" still fires
        // for the motivating Cat3 cases ("How long after she moved?").
        let is_temporal = lower.starts_with("when ")
            || lower.starts_with("when did")
            || lower.starts_with("when was")
            || lower.starts_with("when were")
            || lower.starts_with("when is")
            || lower.contains("what year")
            || lower.contains("what date")
            || lower.contains("what time did")
            || lower.contains("what time was")
            || lower.contains("how long ago")
            || lower.contains("how long after ")   // CE-36: scoped — "how long after she moved"
            || lower.contains("how long before ")  // CE-36: scoped — "how long before the wedding"
            || lower.contains("how long since ")   // CE-36: scoped — "how long since they married"
            || lower.contains("how soon after ")   // CE-36: scoped — "how soon after she started"
            || lower.contains("how soon before ")  // CE-36: scoped — "how soon before the event"
            || lower.contains("how many years")
            || lower.contains("how many months")
            || lower.contains("how many weeks")  // CE-35: time unit not previously covered
            || lower.contains("how many days")
            || lower.contains("how many hours")  // CE-35: time unit not previously covered
            || lower.contains("how many minutes") // CE-35: time unit not previously covered
            || lower.contains("since when")
            || lower.contains("at what age")
            || lower.contains("how old was")
            || lower.contains("how old were");

        if is_temporal {
            return QueryKind::Temporal;
        }

        // CE-34 v2: Multi-hop sequential reasoning — only fire on high-precision
        // structural patterns.  Bare sequential markers ("after", "before", "since",
        // "once") were intentionally removed: they matched >60% of all LoCoMo queries
        // in CE-34 v1, routing them to MultiHop with vector_weight=0.40 instead of
        // 0.50 and causing a Cat2 collapse from 69.5% → 2.0%.
        //
        // Safe patterns require a pronoun, possessive, or article immediately after
        // the sequential marker (structural context), or an unambiguous causative phrase.
        let is_multi_hop = lower.contains("as a result of")
            || lower.contains("as a consequence of")
            || lower.contains("as a consequence")
            // "after [pronoun/possessive]" — requires structural context
            || lower.contains("after she ")
            || lower.contains("after he ")
            || lower.contains("after they ")
            || lower.contains("after it ")
            || lower.contains("after we ")
            || lower.contains("after her ")
            || lower.contains("after his ")
            || lower.contains("after their ")
            // "once [pronoun]" — sequential event trigger
            || lower.contains("once she ")
            || lower.contains("once he ")
            || lower.contains("once they ")
            // "following [article/possessive]" — requires structural context
            || lower.contains("following the ")
            || lower.contains("following her ")
            || lower.contains("following his ")
            || lower.contains("following their ")
            || lower.contains("following a ");

        if is_multi_hop {
            return QueryKind::MultiHop;
        }

        // Natural language questions benefit from both BM25 (named entities, dates)
        // and vector search (semantic meaning).
        let is_question = trimmed.contains('?')
            || lower.starts_with("what ")
            || lower.starts_with("how ")
            || lower.starts_with("why ")
            || lower.starts_with("when ")
            || lower.starts_with("where ")
            || lower.starts_with("who ")
            || lower.starts_with("tell me")
            || lower.starts_with("explain")
            || lower.starts_with("describe");

        if is_question {
            QueryKind::Hybrid
        } else if word_count >= 8 || trimmed.contains('.') {
            QueryKind::Semantic
        } else if word_count <= 3 {
            QueryKind::Keyword
        } else {
            QueryKind::Hybrid
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_route_empty_cache() {
        let router = SemanticRouter::new(SemanticRouterConfig::default());
        let results = router.route(&[1.0, 0.0, 0.0], 3, 0.5);
        assert!(results.is_empty());
    }

    #[test]
    fn test_route_with_cached_centroids() {
        let router = SemanticRouter::new(SemanticRouterConfig::default());

        // Manually populate cache
        {
            let mut cache = router.cache.write();
            cache.insert(
                "_dakera_agent_dev".to_string(),
                CentroidEntry {
                    centroid: vec![1.0, 0.0, 0.0],
                    count: 100,
                },
            );
            cache.insert(
                "_dakera_agent_ops".to_string(),
                CentroidEntry {
                    centroid: vec![0.0, 1.0, 0.0],
                    count: 50,
                },
            );
            cache.insert(
                "_dakera_agent_sec".to_string(),
                CentroidEntry {
                    centroid: vec![0.707, 0.707, 0.0],
                    count: 30,
                },
            );
        }

        // Query aligned with "dev" namespace
        let results = router.route(&[1.0, 0.0, 0.0], 3, 0.0);
        assert_eq!(results.len(), 3);
        assert_eq!(results[0].namespace, "_dakera_agent_dev");
        assert!(results[0].similarity > results[1].similarity);
    }

    #[test]
    fn test_route_min_similarity_filter() {
        let router = SemanticRouter::new(SemanticRouterConfig::default());

        {
            let mut cache = router.cache.write();
            cache.insert(
                "_dakera_agent_a".to_string(),
                CentroidEntry {
                    centroid: vec![1.0, 0.0, 0.0],
                    count: 10,
                },
            );
            cache.insert(
                "_dakera_agent_b".to_string(),
                CentroidEntry {
                    centroid: vec![0.0, 1.0, 0.0],
                    count: 10,
                },
            );
        }

        // High min_similarity should filter out the orthogonal namespace
        let results = router.route(&[1.0, 0.0, 0.0], 5, 0.9);
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].namespace, "_dakera_agent_a");
    }

    #[test]
    fn test_route_top_k_truncation() {
        let router = SemanticRouter::new(SemanticRouterConfig::default());

        {
            let mut cache = router.cache.write();
            for i in 0..10 {
                let mut centroid = vec![0.0f32; 3];
                centroid[0] = 1.0 - (i as f32 * 0.05);
                centroid[1] = i as f32 * 0.05;
                let norm = (centroid[0] * centroid[0] + centroid[1] * centroid[1]).sqrt();
                centroid[0] /= norm;
                centroid[1] /= norm;
                cache.insert(
                    format!("_dakera_agent_{}", i),
                    CentroidEntry {
                        centroid,
                        count: 10,
                    },
                );
            }
        }

        let results = router.route(&[1.0, 0.0, 0.0], 3, 0.0);
        assert_eq!(results.len(), 3);
    }

    #[test]
    fn test_route_dimension_mismatch_skipped() {
        let router = SemanticRouter::new(SemanticRouterConfig::default());

        {
            let mut cache = router.cache.write();
            cache.insert(
                "_dakera_agent_3d".to_string(),
                CentroidEntry {
                    centroid: vec![1.0, 0.0, 0.0],
                    count: 10,
                },
            );
            cache.insert(
                "_dakera_agent_5d".to_string(),
                CentroidEntry {
                    centroid: vec![1.0, 0.0, 0.0, 0.0, 0.0],
                    count: 10,
                },
            );
        }

        // Query is 3D, should only match the 3D centroid
        let results = router.route(&[1.0, 0.0, 0.0], 5, 0.0);
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].namespace, "_dakera_agent_3d");
    }

    #[test]
    fn test_config_defaults() {
        let config = SemanticRouterConfig::default();
        assert_eq!(config.sample_size, 20);
        assert_eq!(config.refresh_interval_secs, 1800);
    }

    // --- QueryClassifier tests ---

    #[test]
    fn test_classify_keyword_short() {
        assert_eq!(QueryClassifier::classify("rust async"), QueryKind::Keyword);
        assert_eq!(QueryClassifier::classify("HNSW"), QueryKind::Keyword);
        assert_eq!(
            QueryClassifier::classify("memory importance"),
            QueryKind::Keyword
        );
    }

    #[test]
    fn test_classify_question_routes_hybrid() {
        // Natural language questions → Hybrid (BM25 + vector, covers LoCoMo benchmark queries)
        assert_eq!(
            QueryClassifier::classify(
                "what is the best way to store long term memories in an AI system"
            ),
            QueryKind::Hybrid
        );
        assert_eq!(
            QueryClassifier::classify("tell me about the agent memory architecture"),
            QueryKind::Hybrid
        );
        assert_eq!(
            QueryClassifier::classify("how does HNSW work?"),
            QueryKind::Hybrid
        );
        assert_eq!(
            QueryClassifier::classify("What sport did Sarah's brother play in high school?"),
            QueryKind::Hybrid
        );
    }

    #[test]
    fn test_classify_semantic_long_prose() {
        // Long prose without question structure → Semantic
        assert_eq!(
            QueryClassifier::classify(
                "the agent memory platform stores embeddings with adaptive decay weighting"
            ),
            QueryKind::Semantic
        );
    }

    #[test]
    fn test_classify_hybrid_middle() {
        assert_eq!(
            QueryClassifier::classify("vector search memory agent"),
            QueryKind::Hybrid
        );
    }

    // --- CE-15: Temporal classifier tests ---

    #[test]
    fn test_classify_temporal_when_prefix() {
        // "when " prefix → Temporal (BM25-heavy, not balanced Hybrid)
        assert_eq!(
            QueryClassifier::classify("when did Caroline go to the store?"),
            QueryKind::Temporal
        );
        assert_eq!(
            QueryClassifier::classify("When was the last time they spoke?"),
            QueryKind::Temporal
        );
        assert_eq!(
            QueryClassifier::classify("When were the siblings born?"),
            QueryKind::Temporal
        );
    }

    #[test]
    fn test_classify_temporal_date_year_patterns() {
        assert_eq!(
            QueryClassifier::classify("What year did they get married?"),
            QueryKind::Temporal
        );
        assert_eq!(
            QueryClassifier::classify("what date did the conference take place?"),
            QueryKind::Temporal
        );
        assert_eq!(
            QueryClassifier::classify("What time did the meeting start?"),
            QueryKind::Temporal
        );
        assert_eq!(
            QueryClassifier::classify("How long ago did this happen?"),
            QueryKind::Temporal
        );
        assert_eq!(
            QueryClassifier::classify("How many years have they been friends?"),
            QueryKind::Temporal
        );
        assert_eq!(
            QueryClassifier::classify("How old was Sarah when she graduated?"),
            QueryKind::Temporal
        );
    }

    #[test]
    fn test_classify_temporal_does_not_capture_non_temporal_what() {
        // "what sport" / "what color" / "what is" should NOT route to Temporal
        assert_eq!(
            QueryClassifier::classify("What sport did Sarah's brother play in high school?"),
            QueryKind::Hybrid
        );
        assert_eq!(
            QueryClassifier::classify("what is the best way to find old memories"),
            QueryKind::Hybrid
        );
    }

    // --- CE-34 v2: MultiHop classifier tests ---

    #[test]
    fn test_classify_multihop_pronoun_after_marker() {
        // "after [pronoun]" patterns — high-precision sequential event chains
        assert_eq!(
            QueryClassifier::classify("What did Sarah do after she got married?"),
            QueryKind::MultiHop
        );
        assert_eq!(
            QueryClassifier::classify("Where did they move after they sold the house?"),
            QueryKind::MultiHop
        );
        assert_eq!(
            QueryClassifier::classify("What happened after he graduated from college?"),
            QueryKind::MultiHop
        );
        assert_eq!(
            QueryClassifier::classify("What did Alice do once she moved to the new city?"),
            QueryKind::MultiHop
        );
    }

    #[test]
    fn test_classify_multihop_causative_phrases() {
        // Unambiguous causative phrases — always multi-hop
        assert_eq!(
            QueryClassifier::classify("What changed as a result of their decision?"),
            QueryKind::MultiHop
        );
        assert_eq!(
            QueryClassifier::classify("What happened as a consequence of the accident?"),
            QueryKind::MultiHop
        );
    }

    #[test]
    fn test_classify_multihop_following_structural() {
        // "following [article/possessive]" — structural context required
        assert_eq!(
            QueryClassifier::classify("What did Bob do following the promotion?"),
            QueryKind::MultiHop
        );
        assert_eq!(
            QueryClassifier::classify("Where did they live following her diagnosis?"),
            QueryKind::MultiHop
        );
    }

    #[test]
    fn test_classify_multihop_bare_markers_do_not_fire() {
        // CE-34 v1 regression: bare "after"/"before"/"since"/"once" without structural
        // context must NOT trigger MultiHop — they appear in >60% of LoCoMo queries.
        assert_eq!(
            QueryClassifier::classify("What did Sarah do after school?"),
            QueryKind::Hybrid // bare "after" without pronoun → Hybrid, not MultiHop
        );
        assert_eq!(
            QueryClassifier::classify("What happened before the wedding?"),
            QueryKind::Hybrid // bare "before" → Hybrid
        );
        assert_eq!(
            QueryClassifier::classify("Since when did they live there?"),
            QueryKind::Temporal // "since when" → Temporal (existing rule)
        );
        assert_eq!(
            QueryClassifier::classify("How did they feel once settled?"),
            QueryKind::Hybrid // bare "once" without pronoun → Hybrid
        );
    }

    #[test]
    fn test_classify_multihop_does_not_interfere_with_temporal() {
        // Temporal check fires BEFORE MultiHop — temporal questions must not be misrouted.
        assert_eq!(
            QueryClassifier::classify("when did she move after he graduated?"),
            QueryKind::Temporal // "when " prefix wins over "after he "
        );
    }

    // --- CE-35: Extended temporal classifier tests ---

    #[test]
    fn test_classify_temporal_how_long_patterns() {
        // CE-36: "how long after/before/since [context]" → Temporal.
        // Only the sequentially-scoped forms are routed to pure BM25.  Bare "how long"
        // is NOT temporal — it over-captured Cat1/Cat2 queries in CE-35 (-2.8pp Cat1,
        // -1.6pp Cat2 in v0.11.19 bench) and those queries perform better on Hybrid.
        assert_eq!(
            QueryClassifier::classify("How long after she moved did they get married?"),
            QueryKind::Temporal // "how long after " wins over "after she " (MultiHop trigger)
        );
        assert_eq!(
            QueryClassifier::classify("How long after he graduated did she find a job?"),
            QueryKind::Temporal // "how long after " wins over "after he " (MultiHop trigger)
        );
        assert_eq!(
            QueryClassifier::classify("How long before the wedding did they meet?"),
            QueryKind::Temporal // "how long before " → sequential temporal
        );
        assert_eq!(
            QueryClassifier::classify("How long since they got married has she been working?"),
            QueryKind::Temporal // "how long since " → elapsed-time temporal
        );
        // CE-36: bare "how long" without sequential marker → Hybrid (Cat1 non-regression).
        assert_eq!(
            QueryClassifier::classify("How long did the relationship last?"),
            QueryKind::Hybrid // generic duration, no sequential marker — Hybrid performs better
        );
        assert_eq!(
            QueryClassifier::classify("How long have they been friends?"),
            QueryKind::Hybrid // Cat1 single-hop — "how " prefix → Hybrid, not BM25-only
        );
        assert_eq!(
            QueryClassifier::classify("How long has she been working at the company?"),
            QueryKind::Hybrid // Cat1 single-hop — restored to Hybrid in CE-36
        );
    }

    #[test]
    fn test_classify_temporal_how_soon_patterns() {
        // CE-36: "how soon after/before [context]" → Temporal (scoped).
        assert_eq!(
            QueryClassifier::classify("How soon after she started the new job did they move?"),
            QueryKind::Temporal // "how soon after " wins over "after she " (MultiHop trigger)
        );
        assert_eq!(
            QueryClassifier::classify("How soon before the trip did they pack?"),
            QueryKind::Temporal // "how soon before " → sequential temporal
        );
        // CE-36: bare "how soon" without sequential marker → Hybrid (Cat1/Cat2 non-regression).
        assert_eq!(
            QueryClassifier::classify("How soon did they get back together?"),
            QueryKind::Hybrid // no sequential marker — Hybrid performs better for this form
        );
    }

    #[test]
    fn test_classify_temporal_new_time_units() {
        // Time units not previously covered — CE-35 addition.
        assert_eq!(
            QueryClassifier::classify("How many weeks after the move did they settle in?"),
            QueryKind::Temporal // "how many weeks" — time unit duration
        );
        assert_eq!(
            QueryClassifier::classify("How many hours did the procedure take?"),
            QueryKind::Temporal
        );
        assert_eq!(
            QueryClassifier::classify("How many minutes before the event did she arrive?"),
            QueryKind::Temporal
        );
    }

    #[test]
    fn test_classify_temporal_how_long_beats_multihop() {
        // Regression guard: "how long after [pronoun]" MUST be Temporal, never MultiHop.
        // CE-34 v2's MultiHop patterns include "after she/he/they/..." — without the
        // "how long after " temporal guard these would misclassify as MultiHop (vector_weight=0.40)
        // instead of Temporal (vector_weight=0.0), contaminating date-prefixed memory recall.
        assert_ne!(
            QueryClassifier::classify("How long after she started did he propose?"),
            QueryKind::MultiHop
        );
        assert_ne!(
            QueryClassifier::classify(
                "How long after they moved following the promotion did he get promoted again?"
            ),
            QueryKind::MultiHop
        );
        // CE-36: "how soon after [pronoun]" must also be Temporal, not MultiHop.
        assert_ne!(
            QueryClassifier::classify("How soon after she started the job did they move?"),
            QueryKind::MultiHop
        );
    }

    // --- CE-36: Cat1/Cat2 non-regression guard ---

    #[test]
    fn test_classify_ce36_cat1_cat2_not_over_captured_by_temporal() {
        // CE-36 regression guard: bare "how long"/"how soon" without a sequential marker
        // (after/before/since) must NOT route to Temporal.  CE-35's broad capture caused
        // -2.8pp Cat1 and -1.6pp Cat2 in the v0.11.19 full bench (1540Q run #24683940116).
        // These queries are answered better by Hybrid (BM25+vector) than pure BM25+PRF.

        // Cat1 single-hop duration queries — Hybrid
        assert_eq!(
            QueryClassifier::classify("How long have they been dating?"),
            QueryKind::Hybrid
        );
        assert_eq!(
            QueryClassifier::classify("How long did he live in New York?"),
            QueryKind::Hybrid
        );
        assert_eq!(
            QueryClassifier::classify("How long was the trip they took together?"),
            QueryKind::Hybrid
        );
        // Cat2 multi-hop with bare "how long" — should fall through to MultiHop/Hybrid,
        // not be forced to Temporal by a broad pattern.
        assert_eq!(
            QueryClassifier::classify("How long did they stay after she got the promotion?"),
            QueryKind::MultiHop // "after she " still fires for the multi-hop part
        );
        // bare "how soon" without sequential marker — Hybrid
        assert_eq!(
            QueryClassifier::classify("How soon will they be ready?"),
            QueryKind::Hybrid
        );
    }
}