memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use colored_json::ToColoredJson;
15use blake3::hash;
16use clap::{ArgAction, Args, ValueEnum};
17#[cfg(feature = "temporal_track")]
18use memvid_core::{
19    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
20    TemporalResolution, TemporalResolutionValue,
21};
22use memvid_core::{
23    types::{AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy, SearchHitMetadata},
24    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind, SearchHit,
25    SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
26};
27#[cfg(feature = "temporal_track")]
28use serde::Serialize;
29use serde_json::json;
30#[cfg(feature = "temporal_track")]
31use time::format_description::well_known::Rfc3339;
32use time::{Date, PrimitiveDateTime, Time};
33#[cfg(feature = "temporal_track")]
34use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
35use tracing::{info, warn};
36
37use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
38
39use memvid_ask_model::{
40    run_model_inference, ModelAnswer, ModelContextFragment, ModelContextFragmentKind,
41    ModelInference,
42};
43
44// frame_to_json and print_frame_summary available from commands but not used in this module
45use crate::config::{
46    load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
47    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingModelChoice, EmbeddingRuntime,
48};
49use crate::utils::{
50    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
51    parse_date_boundary, parse_vector, read_embedding,
52};
53
54const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
55#[cfg(feature = "temporal_track")]
56const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
57
58fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
59    let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
60    message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
61    if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
62        message.push_str(&format!(
63            "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
64            model.name(),
65            model.name()
66        ));
67        if model.is_openai() {
68            message.push_str(" (and set `OPENAI_API_KEY`).");
69        } else {
70            message.push('.');
71        }
72        message.push_str(&format!(
73            "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
74            model.name()
75        ));
76        message.push_str(&format!(
77            "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
78        ));
79        message.push_str("\nOr use `--mode lex` to disable semantic search.");
80    }
81    message
82}
83
84/// Arguments for the `timeline` subcommand
85#[derive(Args)]
86pub struct TimelineArgs {
87    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
88    pub file: PathBuf,
89    #[arg(long)]
90    pub json: bool,
91    #[arg(long)]
92    pub reverse: bool,
93    #[arg(long, value_name = "LIMIT")]
94    pub limit: Option<NonZeroU64>,
95    #[arg(long, value_name = "TIMESTAMP")]
96    pub since: Option<i64>,
97    #[arg(long, value_name = "TIMESTAMP")]
98    pub until: Option<i64>,
99    #[cfg(feature = "temporal_track")]
100    #[arg(long = "on", value_name = "PHRASE")]
101    pub phrase: Option<String>,
102    #[cfg(feature = "temporal_track")]
103    #[arg(long = "tz", value_name = "IANA_ZONE")]
104    pub tz: Option<String>,
105    #[cfg(feature = "temporal_track")]
106    #[arg(long = "anchor", value_name = "RFC3339")]
107    pub anchor: Option<String>,
108    #[cfg(feature = "temporal_track")]
109    #[arg(long = "window", value_name = "MINUTES")]
110    pub window: Option<u64>,
111    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
112    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
113    pub as_of_frame: Option<u64>,
114    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
115    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
116    pub as_of_ts: Option<i64>,
117}
118
119/// Arguments for the `when` subcommand
120#[cfg(feature = "temporal_track")]
121#[derive(Args)]
122pub struct WhenArgs {
123    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
124    pub file: PathBuf,
125    #[arg(long = "on", value_name = "PHRASE")]
126    pub phrase: String,
127    #[arg(long = "tz", value_name = "IANA_ZONE")]
128    pub tz: Option<String>,
129    #[arg(long = "anchor", value_name = "RFC3339")]
130    pub anchor: Option<String>,
131    #[arg(long = "window", value_name = "MINUTES")]
132    pub window: Option<u64>,
133    #[arg(long, value_name = "LIMIT")]
134    pub limit: Option<NonZeroU64>,
135    #[arg(long, value_name = "TIMESTAMP")]
136    pub since: Option<i64>,
137    #[arg(long, value_name = "TIMESTAMP")]
138    pub until: Option<i64>,
139    #[arg(long)]
140    pub reverse: bool,
141    #[arg(long)]
142    pub json: bool,
143}
144
145/// Arguments for the `ask` subcommand
146#[derive(Args)]
147pub struct AskArgs {
148    #[arg(value_name = "TARGET", num_args = 0..)]
149    pub targets: Vec<String>,
150    #[arg(long = "question", value_name = "TEXT")]
151    pub question: Option<String>,
152    #[arg(long = "uri", value_name = "URI")]
153    pub uri: Option<String>,
154    #[arg(long = "scope", value_name = "URI_PREFIX")]
155    pub scope: Option<String>,
156    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
157    pub top_k: usize,
158    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
159    pub snippet_chars: usize,
160    #[arg(long = "cursor", value_name = "TOKEN")]
161    pub cursor: Option<String>,
162    #[arg(long = "mode", value_enum, default_value = "hybrid")]
163    pub mode: AskModeArg,
164    #[arg(long)]
165    pub json: bool,
166    #[arg(long = "context-only", action = ArgAction::SetTrue)]
167    pub context_only: bool,
168    /// Show detailed source information for each citation
169    #[arg(long = "sources", action = ArgAction::SetTrue)]
170    pub sources: bool,
171    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
172    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
173    pub mask_pii: bool,
174    /// Include structured memory cards in the context (facts, preferences, etc.)
175    #[arg(long = "memories", action = ArgAction::SetTrue)]
176    pub memories: bool,
177    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
178    #[arg(long = "llm-context-depth", value_name = "CHARS")]
179    pub llm_context_depth: Option<usize>,
180    #[arg(long = "start", value_name = "DATE")]
181    pub start: Option<String>,
182    #[arg(long = "end", value_name = "DATE")]
183    pub end: Option<String>,
184    /// Synthesize an answer with an LLM (defaults to tinyllama when provided without a value).
185    ///
186    /// Examples:
187    /// - `--use-model` (local TinyLlama)
188    /// - `--use-model openai` (defaults to gpt-4o-mini; requires OPENAI_API_KEY)
189    /// - `--use-model nvidia` (defaults to meta/llama3-8b-instruct; requires NVIDIA_API_KEY)
190    /// - `--use-model nvidia:meta/llama3-70b-instruct`
191    #[arg(
192        long = "use-model",
193        value_name = "MODEL",
194        num_args = 0..=1,
195        default_missing_value = "tinyllama"
196    )]
197    pub use_model: Option<String>,
198    /// Embedding model to use for query (must match the model used during ingestion)
199    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
200    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
201    pub query_embedding_model: Option<String>,
202    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
203    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
204    pub as_of_frame: Option<u64>,
205    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
206    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
207    pub as_of_ts: Option<i64>,
208    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
209    #[arg(long = "system-prompt", value_name = "TEXT")]
210    pub system_prompt: Option<String>,
211    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
212    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
213    pub no_rerank: bool,
214
215    // Adaptive retrieval options (enabled by default for best results)
216    /// Disable adaptive retrieval and use fixed top-k instead.
217    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
218    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
219    pub no_adaptive: bool,
220    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
221    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
222    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
223    pub min_relevancy: f32,
224    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
225    /// Set high enough to capture all potentially relevant results.
226    #[arg(long = "max-k", value_name = "K", default_value = "100")]
227    pub max_k: usize,
228    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
229    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
230    pub adaptive_strategy: AdaptiveStrategyArg,
231}
232
233/// Ask mode argument
234#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
235pub enum AskModeArg {
236    Lex,
237    Sem,
238    Hybrid,
239}
240
241impl From<AskModeArg> for AskMode {
242    fn from(value: AskModeArg) -> Self {
243        match value {
244            AskModeArg::Lex => AskMode::Lex,
245            AskModeArg::Sem => AskMode::Sem,
246            AskModeArg::Hybrid => AskMode::Hybrid,
247        }
248    }
249}
250
251/// Arguments for the `find` subcommand
252#[derive(Args)]
253pub struct FindArgs {
254    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
255    pub file: PathBuf,
256    #[arg(long = "query", value_name = "TEXT")]
257    pub query: String,
258    #[arg(long = "uri", value_name = "URI")]
259    pub uri: Option<String>,
260    #[arg(long = "scope", value_name = "URI_PREFIX")]
261    pub scope: Option<String>,
262    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
263    pub top_k: usize,
264    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
265    pub snippet_chars: usize,
266    #[arg(long = "cursor", value_name = "TOKEN")]
267    pub cursor: Option<String>,
268    #[arg(long)]
269    pub json: bool,
270    #[arg(long = "json-legacy", conflicts_with = "json")]
271    pub json_legacy: bool,
272    #[arg(long = "mode", value_enum, default_value = "auto")]
273    pub mode: SearchMode,
274    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
275    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
276    pub as_of_frame: Option<u64>,
277    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
278    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
279    pub as_of_ts: Option<i64>,
280    /// Embedding model to use for query (must match the model used during ingestion)
281    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
282    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
283    pub query_embedding_model: Option<String>,
284
285    // Adaptive retrieval options (enabled by default for best results)
286    /// Disable adaptive retrieval and use fixed top-k instead.
287    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
288    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
289    pub no_adaptive: bool,
290    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
291    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
292    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
293    pub min_relevancy: f32,
294    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
295    /// Set high enough to capture all potentially relevant results.
296    #[arg(long = "max-k", value_name = "K", default_value = "100")]
297    pub max_k: usize,
298    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
299    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
300    pub adaptive_strategy: AdaptiveStrategyArg,
301
302    /// Enable graph-aware search: filter by entity relationships before ranking.
303    /// Uses MemoryCards to find entities matching patterns like "who lives in X".
304    #[arg(long = "graph", action = ArgAction::SetTrue)]
305    pub graph: bool,
306
307    /// Enable hybrid search: combine graph filtering with text search.
308    /// Automatically detects relational patterns in the query.
309    #[arg(long = "hybrid", action = ArgAction::SetTrue)]
310    pub hybrid: bool,
311}
312
313/// Search mode argument
314#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
315pub enum SearchMode {
316    Auto,
317    Lex,
318    Sem,
319    /// CLIP visual search using text-to-image embeddings
320    #[cfg(feature = "clip")]
321    Clip,
322}
323
324/// Adaptive retrieval strategy
325#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
326pub enum AdaptiveStrategyArg {
327    /// Stop when score drops below X% of top score (default)
328    Relative,
329    /// Stop when score drops below fixed threshold
330    Absolute,
331    /// Stop when score drops sharply from previous result
332    Cliff,
333    /// Automatically detect "elbow" in score curve
334    Elbow,
335    /// Combine relative + cliff + absolute (recommended)
336    Combined,
337}
338
339/// Arguments for the `vec-search` subcommand
340#[derive(Args)]
341pub struct VecSearchArgs {
342    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
343    pub file: PathBuf,
344    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
345    pub vector: Option<String>,
346    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
347    pub embedding: Option<PathBuf>,
348    #[arg(long, value_name = "K", default_value = "10")]
349    pub limit: usize,
350    #[arg(long)]
351    pub json: bool,
352}
353
354/// Arguments for the `audit` subcommand
355#[derive(Args)]
356pub struct AuditArgs {
357    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
358    pub file: PathBuf,
359    /// The question or topic to audit
360    #[arg(value_name = "QUESTION")]
361    pub question: String,
362    /// Output file path (stdout if not provided)
363    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
364    pub out: Option<PathBuf>,
365    /// Output format
366    #[arg(long = "format", value_enum, default_value = "text")]
367    pub format: AuditFormat,
368    /// Number of sources to retrieve
369    #[arg(long = "top-k", value_name = "K", default_value = "10")]
370    pub top_k: usize,
371    /// Maximum characters per snippet
372    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
373    pub snippet_chars: usize,
374    /// Retrieval mode
375    #[arg(long = "mode", value_enum, default_value = "hybrid")]
376    pub mode: AskModeArg,
377    /// Optional scope filter (URI prefix)
378    #[arg(long = "scope", value_name = "URI_PREFIX")]
379    pub scope: Option<String>,
380    /// Start date filter
381    #[arg(long = "start", value_name = "DATE")]
382    pub start: Option<String>,
383    /// End date filter
384    #[arg(long = "end", value_name = "DATE")]
385    pub end: Option<String>,
386    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
387    #[arg(long = "use-model", value_name = "MODEL")]
388    pub use_model: Option<String>,
389}
390
391/// Audit output format
392#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
393pub enum AuditFormat {
394    /// Plain text report
395    Text,
396    /// Markdown report
397    Markdown,
398    /// JSON report
399    Json,
400}
401
402// ============================================================================
403// Search & Retrieval command handlers
404// ============================================================================
405
406pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
407    let mut mem = open_read_only_mem(&args.file)?;
408    let mut builder = TimelineQueryBuilder::default();
409    #[cfg(feature = "temporal_track")]
410    if args.phrase.is_none()
411        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
412    {
413        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
414    }
415    if let Some(limit) = args.limit {
416        builder = builder.limit(limit);
417    }
418    if let Some(since) = args.since {
419        builder = builder.since(since);
420    }
421    if let Some(until) = args.until {
422        builder = builder.until(until);
423    }
424    builder = builder.reverse(args.reverse);
425    #[cfg(feature = "temporal_track")]
426    let temporal_summary = if let Some(ref phrase) = args.phrase {
427        let (filter, summary) = build_temporal_filter(
428            phrase,
429            args.tz.as_deref(),
430            args.anchor.as_deref(),
431            args.window,
432        )?;
433        builder = builder.temporal(filter);
434        Some(summary)
435    } else {
436        None
437    };
438    let query = builder.build();
439    let mut entries = mem.timeline(query)?;
440
441    // Apply Replay filtering if requested
442    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
443        entries.retain(|entry| {
444            // Check as_of_frame filter
445            if let Some(cutoff_frame) = args.as_of_frame {
446                if entry.frame_id > cutoff_frame {
447                    return false;
448                }
449            }
450
451            // Check as_of_ts filter
452            if let Some(cutoff_ts) = args.as_of_ts {
453                if entry.timestamp > cutoff_ts {
454                    return false;
455                }
456            }
457
458            true
459        });
460    }
461
462    if args.json {
463        #[cfg(feature = "temporal_track")]
464        if let Some(summary) = temporal_summary.as_ref() {
465            println!(
466                "{}",
467                serde_json::to_string_pretty(&TimelineOutput {
468                    temporal: Some(summary_to_output(summary)),
469                    entries: &entries,
470                })?
471            );
472        } else {
473            println!("{}", serde_json::to_string_pretty(&entries)?);
474        }
475        #[cfg(not(feature = "temporal_track"))]
476        println!("{}", serde_json::to_string_pretty(&entries)?);
477    } else if entries.is_empty() {
478        println!("Timeline is empty");
479    } else {
480        #[cfg(feature = "temporal_track")]
481        if let Some(summary) = temporal_summary.as_ref() {
482            print_temporal_summary(summary);
483        }
484        for entry in entries {
485            println!(
486                "#{} @ {} — {}",
487                entry.frame_id,
488                entry.timestamp,
489                entry.preview.replace('\n', " ")
490            );
491            if let Some(uri) = entry.uri.as_deref() {
492                println!("  URI: {uri}");
493            }
494            if !entry.child_frames.is_empty() {
495                let child_list = entry
496                    .child_frames
497                    .iter()
498                    .map(|id| id.to_string())
499                    .collect::<Vec<_>>()
500                    .join(", ");
501                println!("  Child frames: {child_list}");
502            }
503            #[cfg(feature = "temporal_track")]
504            if let Some(temporal) = entry.temporal.as_ref() {
505                print_entry_temporal_details(temporal);
506            }
507        }
508    }
509    Ok(())
510}
511
512#[cfg(feature = "temporal_track")]
513pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
514    let mut mem = open_read_only_mem(&args.file)?;
515
516    let (filter, summary) = build_temporal_filter(
517        &args.phrase,
518        args.tz.as_deref(),
519        args.anchor.as_deref(),
520        args.window,
521    )?;
522
523    let mut builder = TimelineQueryBuilder::default();
524    if let Some(limit) = args.limit {
525        builder = builder.limit(limit);
526    }
527    if let Some(since) = args.since {
528        builder = builder.since(since);
529    }
530    if let Some(until) = args.until {
531        builder = builder.until(until);
532    }
533    builder = builder.reverse(args.reverse).temporal(filter.clone());
534    let entries = mem.timeline(builder.build())?;
535
536    if args.json {
537        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
538        let output = WhenOutput {
539            summary: summary_to_output(&summary),
540            entries: entry_views,
541        };
542        println!("{}", serde_json::to_string_pretty(&output)?);
543        return Ok(());
544    }
545
546    print_temporal_summary(&summary);
547    if entries.is_empty() {
548        println!("No frames matched the resolved window");
549        return Ok(());
550    }
551
552    for entry in &entries {
553        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
554        println!(
555            "#{} @ {} ({iso}) — {}",
556            entry.frame_id,
557            entry.timestamp,
558            entry.preview.replace('\n', " ")
559        );
560        if let Some(uri) = entry.uri.as_deref() {
561            println!("  URI: {uri}");
562        }
563        if !entry.child_frames.is_empty() {
564            let child_list = entry
565                .child_frames
566                .iter()
567                .map(|id| id.to_string())
568                .collect::<Vec<_>>()
569                .join(", ");
570            println!("  Child frames: {child_list}");
571        }
572        if let Some(temporal) = entry.temporal.as_ref() {
573            print_entry_temporal_details(temporal);
574        }
575    }
576
577    Ok(())
578}
579
580#[cfg(feature = "temporal_track")]
581#[derive(Serialize)]
582struct TimelineOutput<'a> {
583    #[serde(skip_serializing_if = "Option::is_none")]
584    temporal: Option<TemporalSummaryOutput>,
585    entries: &'a [TimelineEntry],
586}
587
588#[cfg(feature = "temporal_track")]
589#[derive(Serialize)]
590struct WhenOutput {
591    summary: TemporalSummaryOutput,
592    entries: Vec<WhenEntry>,
593}
594
595#[cfg(feature = "temporal_track")]
596#[derive(Serialize)]
597struct WhenEntry {
598    frame_id: FrameId,
599    timestamp: i64,
600    #[serde(skip_serializing_if = "Option::is_none")]
601    timestamp_iso: Option<String>,
602    preview: String,
603    #[serde(skip_serializing_if = "Option::is_none")]
604    uri: Option<String>,
605    #[serde(skip_serializing_if = "Vec::is_empty")]
606    child_frames: Vec<FrameId>,
607    #[serde(skip_serializing_if = "Option::is_none")]
608    temporal: Option<SearchHitTemporal>,
609}
610
611#[cfg(feature = "temporal_track")]
612#[derive(Serialize)]
613struct TemporalSummaryOutput {
614    phrase: String,
615    timezone: String,
616    anchor_utc: i64,
617    anchor_iso: String,
618    confidence: u16,
619    #[serde(skip_serializing_if = "Vec::is_empty")]
620    flags: Vec<&'static str>,
621    resolution_kind: &'static str,
622    window_start_utc: Option<i64>,
623    window_start_iso: Option<String>,
624    window_end_utc: Option<i64>,
625    window_end_iso: Option<String>,
626    #[serde(skip_serializing_if = "Option::is_none")]
627    window_minutes: Option<u64>,
628}
629
630#[cfg(feature = "temporal_track")]
631struct TemporalSummary {
632    phrase: String,
633    tz: String,
634    anchor: OffsetDateTime,
635    start_utc: Option<i64>,
636    end_utc: Option<i64>,
637    resolution: TemporalResolution,
638    window_minutes: Option<u64>,
639}
640
641#[cfg(feature = "temporal_track")]
642fn build_temporal_filter(
643    phrase: &str,
644    tz_override: Option<&str>,
645    anchor_override: Option<&str>,
646    window_minutes: Option<u64>,
647) -> Result<(TemporalFilter, TemporalSummary)> {
648    let tz = tz_override
649        .unwrap_or(DEFAULT_TEMPORAL_TZ)
650        .trim()
651        .to_string();
652    if tz.is_empty() {
653        bail!("E-TEMP-003 timezone must not be empty");
654    }
655
656    let anchor = if let Some(raw) = anchor_override {
657        OffsetDateTime::parse(raw, &Rfc3339)
658            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
659    } else {
660        OffsetDateTime::now_utc()
661    };
662
663    let context = TemporalContext::new(anchor, tz.clone());
664    let normalizer = TemporalNormalizer::new(context);
665    let resolution = normalizer
666        .resolve(phrase)
667        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
668
669    let (mut start, mut end) = resolution_bounds(&resolution)?;
670    if let Some(minutes) = window_minutes {
671        if minutes > 0 {
672            let delta = TimeDuration::minutes(minutes as i64);
673            if let (Some(s), Some(e)) = (start, end) {
674                if s == e {
675                    start = Some(s.saturating_sub(delta.whole_seconds()));
676                    end = Some(e.saturating_add(delta.whole_seconds()));
677                } else {
678                    start = Some(s.saturating_sub(delta.whole_seconds()));
679                    end = Some(e.saturating_add(delta.whole_seconds()));
680                }
681            }
682        }
683    }
684
685    let filter = TemporalFilter {
686        start_utc: start,
687        end_utc: end,
688        phrase: None,
689        tz: None,
690    };
691
692    let summary = TemporalSummary {
693        phrase: phrase.to_owned(),
694        tz,
695        anchor,
696        start_utc: start,
697        end_utc: end,
698        resolution,
699        window_minutes,
700    };
701
702    Ok((filter, summary))
703}
704
705#[cfg(feature = "temporal_track")]
706fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
707    TemporalSummaryOutput {
708        phrase: summary.phrase.clone(),
709        timezone: summary.tz.clone(),
710        anchor_utc: summary.anchor.unix_timestamp(),
711        anchor_iso: summary
712            .anchor
713            .format(&Rfc3339)
714            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
715        confidence: summary.resolution.confidence,
716        flags: summary
717            .resolution
718            .flags
719            .iter()
720            .map(|flag| flag.as_str())
721            .collect(),
722        resolution_kind: resolution_kind(&summary.resolution),
723        window_start_utc: summary.start_utc,
724        window_start_iso: summary.start_utc.and_then(format_timestamp),
725        window_end_utc: summary.end_utc,
726        window_end_iso: summary.end_utc.and_then(format_timestamp),
727        window_minutes: summary.window_minutes,
728    }
729}
730
731#[cfg(feature = "temporal_track")]
732fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
733    WhenEntry {
734        frame_id: entry.frame_id,
735        timestamp: entry.timestamp,
736        timestamp_iso: format_timestamp(entry.timestamp),
737        preview: entry.preview.clone(),
738        uri: entry.uri.clone(),
739        child_frames: entry.child_frames.clone(),
740        temporal: entry.temporal.clone(),
741    }
742}
743
744#[cfg(feature = "temporal_track")]
745fn print_temporal_summary(summary: &TemporalSummary) {
746    println!("Phrase: \"{}\"", summary.phrase);
747    println!("Timezone: {}", summary.tz);
748    println!(
749        "Anchor: {}",
750        summary
751            .anchor
752            .format(&Rfc3339)
753            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
754    );
755    let start_iso = summary.start_utc.and_then(format_timestamp);
756    let end_iso = summary.end_utc.and_then(format_timestamp);
757    match (start_iso, end_iso) {
758        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
759        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
760        (Some(start), None) => println!("Window start: {start}"),
761        (None, Some(end)) => println!("Window end: {end}"),
762        _ => println!("Window: (not resolved)"),
763    }
764    println!("Confidence: {}", summary.resolution.confidence);
765    let flags: Vec<&'static str> = summary
766        .resolution
767        .flags
768        .iter()
769        .map(|flag| flag.as_str())
770        .collect();
771    if !flags.is_empty() {
772        println!("Flags: {}", flags.join(", "));
773    }
774    if let Some(window) = summary.window_minutes {
775        if window > 0 {
776            println!("Window padding: {window} minute(s)");
777        }
778    }
779    println!();
780}
781
782#[cfg(feature = "temporal_track")]
783fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
784    if let Some(anchor) = temporal.anchor.as_ref() {
785        let iso = anchor
786            .iso_8601
787            .clone()
788            .or_else(|| format_timestamp(anchor.ts_utc));
789        println!(
790            "  Anchor: {} (source: {:?})",
791            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
792            anchor.source
793        );
794    }
795    if !temporal.mentions.is_empty() {
796        println!("  Mentions:");
797        for mention in &temporal.mentions {
798            let iso = mention
799                .iso_8601
800                .clone()
801                .or_else(|| format_timestamp(mention.ts_utc))
802                .unwrap_or_else(|| mention.ts_utc.to_string());
803            let mut details = format!(
804                "    - {} ({:?}, confidence {})",
805                iso, mention.kind, mention.confidence
806            );
807            if let Some(text) = mention.text.as_deref() {
808                details.push_str(&format!(" — \"{}\"", text));
809            }
810            println!("{details}");
811        }
812    }
813}
814
815#[cfg(feature = "temporal_track")]
816fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
817    match &resolution.value {
818        TemporalResolutionValue::Date(date) => {
819            let ts = date_to_timestamp(*date);
820            Ok((Some(ts), Some(ts)))
821        }
822        TemporalResolutionValue::DateTime(dt) => {
823            let ts = dt.unix_timestamp();
824            Ok((Some(ts), Some(ts)))
825        }
826        TemporalResolutionValue::DateRange { start, end } => Ok((
827            Some(date_to_timestamp(*start)),
828            Some(date_to_timestamp(*end)),
829        )),
830        TemporalResolutionValue::DateTimeRange { start, end } => {
831            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
832        }
833        TemporalResolutionValue::Month { year, month } => {
834            let start_date = Date::from_calendar_date(*year, *month, 1)
835                .map_err(|_| anyhow!("invalid month resolution"))?;
836            let end_date = last_day_in_month(*year, *month)
837                .map_err(|_| anyhow!("invalid month resolution"))?;
838            Ok((
839                Some(date_to_timestamp(start_date)),
840                Some(date_to_timestamp(end_date)),
841            ))
842        }
843    }
844}
845
846#[cfg(feature = "temporal_track")]
847fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
848    match resolution.value {
849        TemporalResolutionValue::Date(_) => "date",
850        TemporalResolutionValue::DateTime(_) => "datetime",
851        TemporalResolutionValue::DateRange { .. } => "date_range",
852        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
853        TemporalResolutionValue::Month { .. } => "month",
854    }
855}
856
857#[cfg(feature = "temporal_track")]
858fn date_to_timestamp(date: Date) -> i64 {
859    PrimitiveDateTime::new(date, Time::MIDNIGHT)
860        .assume_offset(UtcOffset::UTC)
861        .unix_timestamp()
862}
863
864#[cfg(feature = "temporal_track")]
865fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
866    let mut date = Date::from_calendar_date(year, month, 1)
867        .map_err(|_| anyhow!("invalid month resolution"))?;
868    while let Some(next) = date.next_day() {
869        if next.month() == month {
870            date = next;
871        } else {
872            break;
873        }
874    }
875    Ok(date)
876}
877
878#[cfg(feature = "temporal_track")]
879
880fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
881    if fragments.is_empty() {
882        return;
883    }
884
885    response.context_fragments = fragments
886        .into_iter()
887        .map(|fragment| AskContextFragment {
888            rank: fragment.rank,
889            frame_id: fragment.frame_id,
890            uri: fragment.uri,
891            title: fragment.title,
892            score: fragment.score,
893            matches: fragment.matches,
894            range: Some(fragment.range),
895            chunk_range: fragment.chunk_range,
896            text: fragment.text,
897            kind: Some(match fragment.kind {
898                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
899                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
900            }),
901            #[cfg(feature = "temporal_track")]
902            temporal: None,
903        })
904        .collect();
905}
906
907pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
908    if args.uri.is_some() && args.scope.is_some() {
909        warn!("--scope ignored because --uri is provided");
910    }
911
912    let mut question_tokens = Vec::new();
913    let mut file_path: Option<PathBuf> = None;
914    for token in &args.targets {
915        if file_path.is_none() && looks_like_memory(token) {
916            file_path = Some(PathBuf::from(token));
917        } else {
918            question_tokens.push(token.clone());
919        }
920    }
921
922    let positional_question = if question_tokens.is_empty() {
923        None
924    } else {
925        Some(question_tokens.join(" "))
926    };
927
928    let question = args
929        .question
930        .or(positional_question)
931        .map(|value| value.trim().to_string())
932        .filter(|value| !value.is_empty());
933
934    let question = question
935        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
936
937    let memory_path = match file_path {
938        Some(path) => path,
939        None => autodetect_memory_file()?,
940    };
941
942    let start = parse_date_boundary(args.start.as_ref(), false)?;
943    let end = parse_date_boundary(args.end.as_ref(), true)?;
944    if let (Some(start_ts), Some(end_ts)) = (start, end) {
945        if end_ts < start_ts {
946            anyhow::bail!("--end must not be earlier than --start");
947        }
948    }
949
950    // Open MV2 file first to get vector dimension for auto-detection
951    let mut mem = Memvid::open(&memory_path)?;
952
953    // Load active replay session if one exists
954    #[cfg(feature = "replay")]
955    let _ = mem.load_active_session();
956
957    // Get the vector dimension from the MV2 file for auto-detection
958    let mv2_dimension = mem.effective_vec_index_dimension()?;
959
960    let ask_mode: AskMode = args.mode.into();
961    let inferred_model_override = match args.mode {
962        AskModeArg::Lex => None,
963        AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
964            memvid_core::EmbeddingIdentitySummary::Single(identity) => identity.model.map(String::from),
965            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
966                let models: Vec<_> = identities
967                    .iter()
968                    .filter_map(|entry| entry.identity.model.as_deref())
969                    .collect();
970                anyhow::bail!(
971                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
972                    Detected models: {:?}\n\n\
973                    Suggested fix: split into separate memories per embedding model.",
974                    models
975                );
976            }
977            memvid_core::EmbeddingIdentitySummary::Unknown => None,
978        },
979    };
980    let emb_model_override = args
981        .query_embedding_model
982        .as_deref()
983        .or(inferred_model_override.as_deref());
984    let runtime = match args.mode {
985        AskModeArg::Lex => None,
986        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
987            config,
988            emb_model_override,
989            mv2_dimension,
990        )?),
991        AskModeArg::Hybrid => {
992            // For hybrid, use auto-detection from MV2 dimension
993            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
994                || {
995                    // Force a load; if it fails we error below.
996                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
997                        .ok()
998                        .map(|rt| {
999                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1000                            rt
1001                        })
1002                },
1003            )
1004        }
1005    };
1006    if runtime.is_none() && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1007        anyhow::bail!(
1008            "semantic embeddings unavailable; install/cached model required for {:?} mode",
1009            args.mode
1010        );
1011    }
1012
1013    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1014
1015    // Build adaptive config (enabled by default, use --no-adaptive to disable)
1016    let adaptive = if !args.no_adaptive {
1017        Some(AdaptiveConfig {
1018            enabled: true,
1019            max_results: args.max_k,
1020            min_results: 1,
1021            normalize_scores: true,
1022            strategy: match args.adaptive_strategy {
1023                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1024                    min_ratio: args.min_relevancy,
1025                },
1026                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1027                    min_score: args.min_relevancy,
1028                },
1029                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff { max_drop_ratio: 0.3 },
1030                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1031                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1032                    relative_threshold: args.min_relevancy,
1033                    max_drop_ratio: 0.3,
1034                    absolute_min: 0.3,
1035                },
1036            },
1037        })
1038    } else {
1039        None
1040    };
1041
1042    let request = AskRequest {
1043        question,
1044        top_k: args.top_k,
1045        snippet_chars: args.snippet_chars,
1046        uri: args.uri.clone(),
1047        scope: args.scope.clone(),
1048        cursor: args.cursor.clone(),
1049        start,
1050        end,
1051        #[cfg(feature = "temporal_track")]
1052        temporal: None,
1053        context_only: args.context_only,
1054        mode: ask_mode,
1055        as_of_frame: args.as_of_frame,
1056        as_of_ts: args.as_of_ts,
1057        adaptive,
1058    };
1059    let mut response = mem.ask(request, embedder).map_err(|err| match err {
1060        MemvidError::VecDimensionMismatch { expected, actual } => anyhow!(vec_dimension_mismatch_help(expected, actual)),
1061        other => anyhow!(other),
1062    })?;
1063
1064    // Apply cross-encoder reranking for better precision on preference/personalization queries
1065    // This is especially important for questions like "What should I..." where semantic
1066    // similarity doesn't capture personal relevance well.
1067    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
1068    if !args.no_rerank
1069        && !response.retrieval.hits.is_empty()
1070        && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1071    {
1072        // Create a temporary SearchResponse for reranking
1073        let mut search_response = SearchResponse {
1074            query: response.question.clone(),
1075            hits: response.retrieval.hits.clone(),
1076            total_hits: response.retrieval.hits.len(),
1077            params: memvid_core::SearchParams {
1078                top_k: args.top_k,
1079                snippet_chars: args.snippet_chars,
1080                cursor: None,
1081            },
1082            elapsed_ms: 0,
1083            engine: memvid_core::SearchEngineKind::Hybrid,
1084            next_cursor: None,
1085            context: String::new(),
1086        };
1087
1088        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1089            warn!("Cross-encoder reranking failed: {e}");
1090        } else {
1091            // Update the response hits with reranked order
1092            response.retrieval.hits = search_response.hits;
1093            // Rebuild context from reranked hits
1094            response.retrieval.context = response
1095                .retrieval
1096                .hits
1097                .iter()
1098                .take(10) // Use top-10 for context
1099                .map(|hit| hit.text.as_str())
1100                .collect::<Vec<_>>()
1101                .join("\n\n---\n\n");
1102        }
1103    }
1104
1105    // Inject memory cards into context if --memories flag is set
1106    if args.memories {
1107        let memory_context = build_memory_context(&mem);
1108        if !memory_context.is_empty() {
1109            // Prepend memory context to retrieval context
1110            response.retrieval.context = format!(
1111                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1112                memory_context, response.retrieval.context
1113            );
1114        }
1115    }
1116
1117    // Inject entity context from Logic-Mesh if entities were found in search hits
1118    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1119    if !entity_context.is_empty() {
1120        // Prepend entity context to retrieval context
1121        response.retrieval.context = format!(
1122            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1123            entity_context, response.retrieval.context
1124        );
1125    }
1126
1127    // Apply PII masking if requested
1128    if args.mask_pii {
1129        use memvid_core::pii::mask_pii;
1130
1131        // Mask the aggregated context
1132        response.retrieval.context = mask_pii(&response.retrieval.context);
1133
1134        // Mask text in each hit
1135        for hit in &mut response.retrieval.hits {
1136            hit.text = mask_pii(&hit.text);
1137            if let Some(chunk_text) = &hit.chunk_text {
1138                hit.chunk_text = Some(mask_pii(chunk_text));
1139            }
1140        }
1141    }
1142
1143    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1144
1145    let mut model_result: Option<ModelAnswer> = None;
1146    if response.context_only {
1147        if args.use_model.is_some() {
1148            warn!("--use-model ignored because --context-only disables synthesis");
1149        }
1150    } else if let Some(model_name) = args.use_model.as_deref() {
1151        match run_model_inference(
1152            model_name,
1153            &response.question,
1154            &response.retrieval.context,
1155            &response.retrieval.hits,
1156            llm_context_override,
1157            None,
1158            args.system_prompt.as_deref(),
1159        ) {
1160            Ok(inference) => {
1161                let ModelInference {
1162                    answer,
1163                    context_body,
1164                    context_fragments,
1165                    ..
1166                } = inference;
1167                response.answer = Some(answer.answer.clone());
1168                response.retrieval.context = context_body;
1169                apply_model_context_fragments(&mut response, context_fragments);
1170                model_result = Some(answer);
1171            }
1172            Err(err) => {
1173                warn!(
1174                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1175                    model_name
1176                );
1177            }
1178        }
1179    }
1180
1181    // Record the ask action if a replay session is active
1182    #[cfg(feature = "replay")]
1183    if let Some(ref model_answer) = model_result {
1184        if let Some(model_name) = args.use_model.as_deref() {
1185            mem.record_ask_action(
1186                &response.question,
1187                model_name, // provider
1188                model_name, // model
1189                model_answer.answer.as_bytes(),
1190                0, // duration_ms not tracked at this level
1191            );
1192        }
1193    }
1194
1195    if args.json {
1196        if let Some(model_name) = args.use_model.as_deref() {
1197            emit_model_json(
1198                &response,
1199                model_name,
1200                model_result.as_ref(),
1201                args.sources,
1202                &mut mem,
1203            )?;
1204        } else {
1205            emit_ask_json(
1206                &response,
1207                args.mode,
1208                model_result.as_ref(),
1209                args.sources,
1210                &mut mem,
1211            )?;
1212        }
1213    } else {
1214        emit_ask_pretty(
1215            &response,
1216            args.mode,
1217            model_result.as_ref(),
1218            args.sources,
1219            &mut mem,
1220        );
1221    }
1222
1223    // Save active replay session if one exists
1224    #[cfg(feature = "replay")]
1225    let _ = mem.save_active_session();
1226
1227    Ok(())
1228}
1229
1230/// Handle graph-aware find with --graph or --hybrid flags
1231fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1232    use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1233    use memvid_core::types::{GraphPattern, QueryPlan, TriplePattern, PatternTerm};
1234
1235    let planner = QueryPlanner::new();
1236
1237    // Create query plan based on mode
1238    let plan = if args.graph {
1239        // Pure graph mode - let planner detect patterns
1240        let plan = planner.plan(&args.query, args.top_k);
1241        // If it's a hybrid plan from auto-detection, convert to graph-only
1242        match plan {
1243            QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1244                QueryPlan::graph_only(graph_filter, args.top_k)
1245            }
1246            _ => plan,
1247        }
1248    } else {
1249        // Hybrid mode - use the auto-detected plan
1250        planner.plan(&args.query, args.top_k)
1251    };
1252
1253    // Execute the search
1254    let hits = hybrid_search(mem, &plan)?;
1255
1256    if args.json {
1257        // JSON output
1258        let output = serde_json::json!({
1259            "query": args.query,
1260            "mode": if args.graph { "graph" } else { "hybrid" },
1261            "plan": format!("{:?}", plan),
1262            "hits": hits.iter().map(|h| {
1263                serde_json::json!({
1264                    "frame_id": h.frame_id,
1265                    "score": h.score,
1266                    "graph_score": h.graph_score,
1267                    "vector_score": h.vector_score,
1268                    "matched_entity": h.matched_entity,
1269                    "preview": h.preview,
1270                })
1271            }).collect::<Vec<_>>(),
1272        });
1273        println!("{}", serde_json::to_string_pretty(&output)?);
1274    } else {
1275        // Human-readable output
1276        let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1277        println!("{} search for: \"{}\"", mode_str, args.query);
1278        println!("Plan: {:?}", plan);
1279        println!();
1280
1281        if hits.is_empty() {
1282            println!("No results found.");
1283        } else {
1284            println!("Results ({} hits):", hits.len());
1285            for (i, hit) in hits.iter().enumerate() {
1286                println!();
1287                println!(
1288                    "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1289                    i + 1,
1290                    hit.frame_id,
1291                    hit.score,
1292                    hit.graph_score,
1293                    hit.vector_score
1294                );
1295                if let Some(entity) = &hit.matched_entity {
1296                    println!("   Matched entity: {}", entity);
1297                }
1298                if let Some(preview) = &hit.preview {
1299                    let truncated = if preview.len() > 200 {
1300                        format!("{}...", &preview[..200])
1301                    } else {
1302                        preview.clone()
1303                    };
1304                    println!("   {}", truncated.replace('\n', " "));
1305                }
1306            }
1307        }
1308    }
1309
1310    Ok(())
1311}
1312
1313pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1314    let mut mem = open_read_only_mem(&args.file)?;
1315
1316    // Load active replay session if one exists
1317    #[cfg(feature = "replay")]
1318    let _ = mem.load_active_session();
1319
1320    // Handle graph-aware and hybrid search modes
1321    if args.graph || args.hybrid {
1322        return handle_graph_find(&mut mem, &args);
1323    }
1324
1325    if args.uri.is_some() && args.scope.is_some() {
1326        warn!("--scope ignored because --uri is provided");
1327    }
1328
1329    // Get vector dimension from MV2 for auto-detection
1330    let mv2_dimension = mem.effective_vec_index_dimension()?;
1331    let identity_summary = match args.mode {
1332        SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1333        #[cfg(feature = "clip")]
1334        SearchMode::Clip => None,
1335        SearchMode::Lex => None,
1336    };
1337
1338    let mut semantic_allowed = true;
1339    let inferred_model_override = match identity_summary.as_ref() {
1340        Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1341            identity.model.as_deref().map(|value| value.to_string())
1342        }
1343        Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1344            let models: Vec<_> = identities
1345                .iter()
1346                .filter_map(|entry| entry.identity.model.as_deref())
1347                .collect();
1348            if args.mode == SearchMode::Sem {
1349                anyhow::bail!(
1350                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1351                    Detected models: {:?}\n\n\
1352                    Suggested fix: split into separate memories per embedding model.",
1353                    models
1354                );
1355            }
1356            warn!("semantic search disabled: mixed embedding models detected: {:?}", models);
1357            semantic_allowed = false;
1358            None
1359        }
1360        _ => None,
1361    };
1362
1363    let emb_model_override = args
1364        .query_embedding_model
1365        .as_deref()
1366        .or(inferred_model_override.as_deref());
1367
1368    let (mode_label, runtime_option) = match args.mode {
1369        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1370        SearchMode::Sem => {
1371            let runtime =
1372                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1373            ("Semantic (vector search)".to_string(), Some(runtime))
1374        }
1375        SearchMode::Auto => {
1376            if !semantic_allowed {
1377                ("Lexical (semantic unsafe)".to_string(), None)
1378            } else if let Some(runtime) =
1379                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1380            {
1381                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1382            } else {
1383                ("Lexical (semantic unavailable)".to_string(), None)
1384            }
1385        }
1386        #[cfg(feature = "clip")]
1387        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1388    };
1389
1390    let mode_key = match args.mode {
1391        SearchMode::Sem => "semantic",
1392        SearchMode::Lex => "text",
1393        SearchMode::Auto => {
1394            if runtime_option.is_some() {
1395                "hybrid"
1396            } else {
1397                "text"
1398            }
1399        }
1400        #[cfg(feature = "clip")]
1401        SearchMode::Clip => "clip",
1402    };
1403
1404    // For CLIP mode, use CLIP visual search
1405    #[cfg(feature = "clip")]
1406    if args.mode == SearchMode::Clip {
1407        use memvid_core::clip::{ClipConfig, ClipModel};
1408
1409        // Initialize CLIP model
1410        let config = ClipConfig::default();
1411        let clip = ClipModel::new(config).map_err(|e| {
1412            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1413        })?;
1414
1415        // Encode query text
1416        let query_embedding = clip
1417            .encode_text(&args.query)
1418            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1419
1420        // Search CLIP index
1421        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1422
1423        // Debug distances before filtering
1424        for hit in &hits {
1425            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1426                tracing::debug!(
1427                    frame_id = hit.frame_id,
1428                    title = %frame.title.unwrap_or_default(),
1429                    page = hit.page,
1430                    distance = hit.distance,
1431                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1432                    "CLIP raw hit"
1433                );
1434            } else {
1435                tracing::debug!(
1436                    frame_id = hit.frame_id,
1437                    page = hit.page,
1438                    distance = hit.distance,
1439                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1440                    "CLIP raw hit (missing frame)"
1441                );
1442            }
1443        }
1444
1445        // CLIP distance threshold for filtering poor matches
1446        // CLIP uses L2 distance on normalized embeddings:
1447        //   - distance² = 2(1 - cosine_similarity)
1448        //   - distance = 0 → identical (cosine_sim = 1)
1449        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1450        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1451        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1452        //   - distance = 2.0 → opposite (cosine_sim = -1)
1453        //
1454        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1455        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1456        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1457        const CLIP_MAX_DISTANCE: f32 = 1.26;
1458
1459        // Convert CLIP hits to SearchResponse format, filtering by threshold
1460        let search_hits: Vec<SearchHit> = hits
1461            .into_iter()
1462            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1463            .enumerate()
1464            .filter_map(|(rank, hit)| {
1465                // Convert L2 distance to cosine similarity for display
1466                // cos_sim = 1 - (distance² / 2)
1467                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1468
1469                // Get frame preview for snippet
1470                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1471                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1472                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1473                let title = match (base_title, hit.page) {
1474                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1475                    (Some(t), None) => Some(t),
1476                    (None, Some(p)) => Some(format!("Page {p}")),
1477                    _ => None,
1478                };
1479                Some(SearchHit {
1480                    rank: rank + 1,
1481                    frame_id: hit.frame_id,
1482                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1483                    title,
1484                    text: preview.clone(),
1485                    chunk_text: Some(preview),
1486                    range: (0, 0),
1487                    chunk_range: None,
1488                    matches: 0,
1489                    score: Some(cosine_similarity),
1490                    metadata: None,
1491                })
1492            })
1493            .collect();
1494
1495        let response = SearchResponse {
1496            query: args.query.clone(),
1497            hits: search_hits.clone(),
1498            total_hits: search_hits.len(),
1499            params: memvid_core::SearchParams {
1500                top_k: args.top_k,
1501                snippet_chars: args.snippet_chars,
1502                cursor: args.cursor.clone(),
1503            },
1504            elapsed_ms: 0,
1505            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1506            next_cursor: None,
1507            context: String::new(),
1508        };
1509
1510        if args.json_legacy {
1511            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1512            emit_legacy_search_json(&response)?;
1513        } else if args.json {
1514            emit_search_json(&response, mode_key)?;
1515        } else {
1516            println!(
1517                "mode: {}   k={}   time: {} ms",
1518                mode_label, response.params.top_k, response.elapsed_ms
1519            );
1520            println!("engine: clip (MobileCLIP-S2)");
1521            println!(
1522                "hits: {} (showing {})",
1523                response.total_hits,
1524                response.hits.len()
1525            );
1526            emit_search_table(&response);
1527        }
1528        return Ok(());
1529    }
1530
1531    // For semantic mode, use pure vector search.
1532    let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1533        let runtime = runtime_option
1534            .as_ref()
1535            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1536
1537        // Embed the query
1538        let query_embedding = runtime.embed_query(&args.query)?;
1539
1540        // Use pure vector search (adaptive by default, use --no-adaptive to disable)
1541        let scope = args.scope.as_deref().or(args.uri.as_deref());
1542
1543        if !args.no_adaptive {
1544            // Build adaptive config from CLI args
1545            let strategy = match args.adaptive_strategy {
1546                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1547                    min_ratio: args.min_relevancy,
1548                },
1549                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1550                    min_score: args.min_relevancy,
1551                },
1552                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1553                    max_drop_ratio: 0.35, // 35% drop triggers cutoff
1554                },
1555                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1556                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1557                    relative_threshold: args.min_relevancy,
1558                    max_drop_ratio: 0.35,
1559                    absolute_min: 0.3,
1560                },
1561            };
1562
1563            let config = AdaptiveConfig {
1564                enabled: true,
1565                max_results: args.max_k,
1566                min_results: 1,
1567                strategy,
1568                normalize_scores: true,
1569            };
1570
1571            match mem.search_adaptive(
1572                &args.query,
1573                &query_embedding,
1574                config,
1575                args.snippet_chars,
1576                scope,
1577            ) {
1578                Ok(result) => {
1579                    let mut resp = SearchResponse {
1580                        query: args.query.clone(),
1581                        hits: result.results,
1582                        total_hits: result.stats.returned,
1583                        params: memvid_core::SearchParams {
1584                            top_k: result.stats.returned,
1585                            snippet_chars: args.snippet_chars,
1586                            cursor: args.cursor.clone(),
1587                        },
1588                        elapsed_ms: 0,
1589                        engine: SearchEngineKind::Hybrid,
1590                        next_cursor: None,
1591                        context: String::new(),
1592                    };
1593                    apply_preference_rerank(&mut resp);
1594                    (
1595                        resp,
1596                        "semantic (adaptive vector search)".to_string(),
1597                        Some(result.stats),
1598                    )
1599                }
1600                Err(e) => {
1601                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1602                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1603                    }
1604
1605                    warn!("Adaptive search failed ({e}), falling back to fixed-k");
1606                    match mem.vec_search_with_embedding(
1607                        &args.query,
1608                        &query_embedding,
1609                        args.top_k,
1610                        args.snippet_chars,
1611                        scope,
1612                    ) {
1613                        Ok(mut resp) => {
1614                            apply_preference_rerank(&mut resp);
1615                            (resp, "semantic (vector search fallback)".to_string(), None)
1616                        }
1617                        Err(e2) => {
1618                            if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1619                                return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1620                            }
1621                            return Err(anyhow!("Both adaptive and fixed-k search failed: {e}, {e2}"));
1622                        }
1623                    }
1624                }
1625            }
1626        } else {
1627            // Standard fixed-k vector search
1628            match mem.vec_search_with_embedding(
1629                &args.query,
1630                &query_embedding,
1631                args.top_k,
1632                args.snippet_chars,
1633                scope,
1634            ) {
1635                Ok(mut resp) => {
1636                    // Apply preference boost to rerank results for preference-seeking queries
1637                    apply_preference_rerank(&mut resp);
1638                    (resp, "semantic (vector search)".to_string(), None)
1639                }
1640                Err(e) => {
1641                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1642                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1643                    }
1644
1645                    // Fall back to lexical search + rerank if vector search fails
1646                    warn!("Vector search failed ({e}), falling back to lexical + rerank");
1647                    let request = SearchRequest {
1648                        query: args.query.clone(),
1649                        top_k: args.top_k,
1650                        snippet_chars: args.snippet_chars,
1651                        uri: args.uri.clone(),
1652                        scope: args.scope.clone(),
1653                        cursor: args.cursor.clone(),
1654                        #[cfg(feature = "temporal_track")]
1655                        temporal: None,
1656                        as_of_frame: args.as_of_frame,
1657                        as_of_ts: args.as_of_ts,
1658                    };
1659                    let mut resp = mem.search(request)?;
1660                    apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1661                    (resp, "semantic (fallback rerank)".to_string(), None)
1662                }
1663            }
1664        }
1665    } else {
1666        // For lexical and auto modes, use existing behavior
1667        let request = SearchRequest {
1668            query: args.query.clone(),
1669            top_k: args.top_k,
1670            snippet_chars: args.snippet_chars,
1671            uri: args.uri.clone(),
1672            scope: args.scope.clone(),
1673            cursor: args.cursor.clone(),
1674            #[cfg(feature = "temporal_track")]
1675            temporal: None,
1676            as_of_frame: args.as_of_frame,
1677            as_of_ts: args.as_of_ts,
1678        };
1679
1680        let mut resp = mem.search(request)?;
1681
1682        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1683            warn!("Search index unavailable; returning basic text results");
1684        }
1685
1686        let mut engine_label = match resp.engine {
1687            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1688            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1689            SearchEngineKind::Hybrid => "hybrid".to_string(),
1690        };
1691
1692        if runtime_option.is_some() {
1693            engine_label = format!("hybrid ({engine_label} + semantic)");
1694        }
1695
1696        if let Some(ref runtime) = runtime_option {
1697            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1698        }
1699
1700        (resp, engine_label, None)
1701    };
1702
1703    if args.json_legacy {
1704        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1705        emit_legacy_search_json(&response)?;
1706    } else if args.json {
1707        emit_search_json(&response, mode_key)?;
1708    } else {
1709        println!(
1710            "mode: {}   k={}   time: {} ms",
1711            mode_label, response.params.top_k, response.elapsed_ms
1712        );
1713        println!("engine: {}", engine_label);
1714
1715        // Show adaptive retrieval stats if enabled
1716        if let Some(ref stats) = adaptive_stats {
1717            println!(
1718                "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1719                stats.total_considered,
1720                stats.returned,
1721                stats.triggered_by,
1722                stats.top_score.unwrap_or(0.0),
1723                stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1724            );
1725        }
1726
1727        println!(
1728            "hits: {} (showing {})",
1729            response.total_hits,
1730            response.hits.len()
1731        );
1732        emit_search_table(&response);
1733    }
1734
1735    // Save active replay session if one exists
1736    #[cfg(feature = "replay")]
1737    let _ = mem.save_active_session();
1738
1739    Ok(())
1740}
1741
1742pub fn handle_vec_search(_config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1743    let mut mem = open_read_only_mem(&args.file)?;
1744    let vector = if let Some(path) = args.embedding.as_deref() {
1745        read_embedding(path)?
1746    } else if let Some(vector_string) = &args.vector {
1747        parse_vector(vector_string)?
1748    } else {
1749        anyhow::bail!("provide --vector or --embedding for search input");
1750    };
1751
1752    let hits = mem.search_vec(&vector, args.limit).map_err(|err| match err {
1753        MemvidError::VecDimensionMismatch { expected, actual } => {
1754            anyhow!(vec_dimension_mismatch_help(expected, actual))
1755        }
1756        other => anyhow!(other),
1757    })?;
1758    let mut enriched = Vec::with_capacity(hits.len());
1759    for hit in hits {
1760        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1761        enriched.push((hit.frame_id, hit.distance, preview));
1762    }
1763
1764    if args.json {
1765        let json_hits: Vec<_> = enriched
1766            .iter()
1767            .map(|(frame_id, distance, preview)| {
1768                json!({
1769                    "frame_id": frame_id,
1770                    "distance": distance,
1771                    "preview": preview,
1772                })
1773            })
1774            .collect();
1775        let json_str = serde_json::to_string_pretty(&json_hits)?;
1776        println!("{}", json_str.to_colored_json_auto()?);
1777    } else if enriched.is_empty() {
1778        println!("No vector matches found");
1779    } else {
1780        for (frame_id, distance, preview) in enriched {
1781            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1782        }
1783    }
1784    Ok(())
1785}
1786
1787pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1788    use memvid_core::AuditOptions;
1789    use std::fs::File;
1790    use std::io::Write;
1791
1792    let mut mem = Memvid::open(&args.file)?;
1793
1794    // Parse date boundaries
1795    let start = parse_date_boundary(args.start.as_ref(), false)?;
1796    let end = parse_date_boundary(args.end.as_ref(), true)?;
1797    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1798        if end_ts < start_ts {
1799            anyhow::bail!("--end must not be earlier than --start");
1800        }
1801    }
1802
1803    // Set up embedding runtime if needed
1804    let ask_mode: AskMode = args.mode.into();
1805    let runtime = match args.mode {
1806        AskModeArg::Lex => None,
1807        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1808        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1809    };
1810    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1811
1812    // Build audit options
1813    let options = AuditOptions {
1814        top_k: Some(args.top_k),
1815        snippet_chars: Some(args.snippet_chars),
1816        mode: Some(ask_mode),
1817        scope: args.scope,
1818        start,
1819        end,
1820        include_snippets: true,
1821    };
1822
1823    // Run the audit
1824    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1825
1826    // If --use-model is provided, run model inference to synthesize the answer
1827    if let Some(model_name) = args.use_model.as_deref() {
1828        // Build context from sources for model inference
1829        let context = report
1830            .sources
1831            .iter()
1832            .filter_map(|s| s.snippet.clone())
1833            .collect::<Vec<_>>()
1834            .join("\n\n");
1835
1836        match run_model_inference(
1837            model_name,
1838            &report.question,
1839            &context,
1840            &[], // No hits needed for audit
1841            None,
1842            None,
1843            None, // No system prompt override for audit
1844        ) {
1845            Ok(inference) => {
1846                report.answer = Some(inference.answer.answer);
1847                report.notes.push(format!(
1848                    "Answer synthesized by model: {}",
1849                    inference.answer.model
1850                ));
1851            }
1852            Err(err) => {
1853                warn!(
1854                    "model inference unavailable for '{}': {err}. Using default answer.",
1855                    model_name
1856                );
1857            }
1858        }
1859    }
1860
1861    // Format the output
1862    let output = match args.format {
1863        AuditFormat::Text => report.to_text(),
1864        AuditFormat::Markdown => report.to_markdown(),
1865        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1866    };
1867
1868    // Write output
1869    if let Some(out_path) = args.out {
1870        let mut file = File::create(&out_path)?;
1871        file.write_all(output.as_bytes())?;
1872        println!("Audit report written to: {}", out_path.display());
1873    } else {
1874        println!("{}", output);
1875    }
1876
1877    Ok(())
1878}
1879
1880fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1881    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1882
1883    let mut additional_params = serde_json::Map::new();
1884    if let Some(cursor) = &response.params.cursor {
1885        additional_params.insert("cursor".into(), json!(cursor));
1886    }
1887
1888    let mut params = serde_json::Map::new();
1889    params.insert("top_k".into(), json!(response.params.top_k));
1890    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
1891    params.insert("mode".into(), json!(mode));
1892    params.insert(
1893        "additional_params".into(),
1894        serde_json::Value::Object(additional_params),
1895    );
1896
1897    let mut metadata_json = serde_json::Map::new();
1898    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
1899    metadata_json.insert("total_hits".into(), json!(response.total_hits));
1900    metadata_json.insert(
1901        "next_cursor".into(),
1902        match &response.next_cursor {
1903            Some(cursor) => json!(cursor),
1904            None => serde_json::Value::Null,
1905        },
1906    );
1907    metadata_json.insert("engine".into(), json!(response.engine));
1908    metadata_json.insert("params".into(), serde_json::Value::Object(params));
1909
1910    let body = json!({
1911        "version": "mv2.result.v2",
1912        "query": response.query,
1913        "metadata": metadata_json,
1914        "hits": hits,
1915        "context": response.context,
1916    });
1917    let json_str = serde_json::to_string_pretty(&body)?;
1918    println!("{}", json_str.to_colored_json_auto()?);
1919    Ok(())
1920}
1921
1922fn emit_ask_json(
1923    response: &AskResponse,
1924    requested_mode: AskModeArg,
1925    model: Option<&ModelAnswer>,
1926    include_sources: bool,
1927    mem: &mut Memvid,
1928) -> Result<()> {
1929    let hits: Vec<_> = response
1930        .retrieval
1931        .hits
1932        .iter()
1933        .map(search_hit_to_json)
1934        .collect();
1935
1936    let citations: Vec<_> = response
1937        .citations
1938        .iter()
1939        .map(|citation| {
1940            let mut map = serde_json::Map::new();
1941            map.insert("index".into(), json!(citation.index));
1942            map.insert("frame_id".into(), json!(citation.frame_id));
1943            map.insert("uri".into(), json!(citation.uri));
1944            if let Some(range) = citation.chunk_range {
1945                map.insert("chunk_range".into(), json!([range.0, range.1]));
1946            }
1947            if let Some(score) = citation.score {
1948                map.insert("score".into(), json!(score));
1949            }
1950            serde_json::Value::Object(map)
1951        })
1952        .collect();
1953
1954    let mut body = json!({
1955        "version": "mv2.ask.v1",
1956        "question": response.question,
1957        "answer": response.answer,
1958        "context_only": response.context_only,
1959        "mode": ask_mode_display(requested_mode),
1960        "retriever": ask_retriever_display(response.retriever),
1961        "top_k": response.retrieval.params.top_k,
1962        "results": hits,
1963        "citations": citations,
1964        "stats": {
1965            "retrieval_ms": response.stats.retrieval_ms,
1966            "synthesis_ms": response.stats.synthesis_ms,
1967            "latency_ms": response.stats.latency_ms,
1968        },
1969        "engine": search_engine_label(&response.retrieval.engine),
1970        "total_hits": response.retrieval.total_hits,
1971        "next_cursor": response.retrieval.next_cursor,
1972        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1973    });
1974
1975    if let Some(model) = model {
1976        if let serde_json::Value::Object(ref mut map) = body {
1977            map.insert("model".into(), json!(model.requested));
1978            if model.model != model.requested {
1979                map.insert("model_used".into(), json!(model.model));
1980            }
1981        }
1982    }
1983
1984    // Add detailed sources if requested
1985    if include_sources {
1986        if let serde_json::Value::Object(ref mut map) = body {
1987            let sources = build_sources_json(response, mem);
1988            map.insert("sources".into(), json!(sources));
1989        }
1990    }
1991
1992    println!("{}", serde_json::to_string_pretty(&body)?);
1993    Ok(())
1994}
1995
1996fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
1997    response
1998        .citations
1999        .iter()
2000        .enumerate()
2001        .map(|(idx, citation)| {
2002            let mut source = serde_json::Map::new();
2003            source.insert("index".into(), json!(idx + 1));
2004            source.insert("frame_id".into(), json!(citation.frame_id));
2005            source.insert("uri".into(), json!(citation.uri));
2006
2007            if let Some(range) = citation.chunk_range {
2008                source.insert("chunk_range".into(), json!([range.0, range.1]));
2009            }
2010            if let Some(score) = citation.score {
2011                source.insert("score".into(), json!(score));
2012            }
2013
2014            // Get frame metadata for rich source information
2015            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2016                if let Some(title) = frame.title {
2017                    source.insert("title".into(), json!(title));
2018                }
2019                if !frame.tags.is_empty() {
2020                    source.insert("tags".into(), json!(frame.tags));
2021                }
2022                if !frame.labels.is_empty() {
2023                    source.insert("labels".into(), json!(frame.labels));
2024                }
2025                source.insert("frame_timestamp".into(), json!(frame.timestamp));
2026                if !frame.content_dates.is_empty() {
2027                    source.insert("content_dates".into(), json!(frame.content_dates));
2028                }
2029            }
2030
2031            // Get snippet from hit
2032            if let Some(hit) = response
2033                .retrieval
2034                .hits
2035                .iter()
2036                .find(|h| h.frame_id == citation.frame_id)
2037            {
2038                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2039                source.insert("snippet".into(), json!(snippet));
2040            }
2041
2042            serde_json::Value::Object(source)
2043        })
2044        .collect()
2045}
2046
2047fn emit_model_json(
2048    response: &AskResponse,
2049    requested_model: &str,
2050    model: Option<&ModelAnswer>,
2051    include_sources: bool,
2052    mem: &mut Memvid,
2053) -> Result<()> {
2054    let answer = response.answer.clone().unwrap_or_default();
2055    let requested_label = model
2056        .map(|m| m.requested.clone())
2057        .unwrap_or_else(|| requested_model.to_string());
2058    let used_label = model
2059        .map(|m| m.model.clone())
2060        .unwrap_or_else(|| requested_model.to_string());
2061
2062    let mut body = json!({
2063        "question": response.question,
2064        "model": requested_label,
2065        "model_used": used_label,
2066        "answer": answer,
2067        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2068    });
2069
2070    // Add detailed sources if requested
2071    if include_sources {
2072        if let serde_json::Value::Object(ref mut map) = body {
2073            let sources = build_sources_json(response, mem);
2074            map.insert("sources".into(), json!(sources));
2075        }
2076    }
2077
2078    // Use colored JSON output
2079    let json_str = serde_json::to_string_pretty(&body)?;
2080    println!("{}", json_str.to_colored_json_auto()?);
2081    Ok(())
2082}
2083
2084fn emit_ask_pretty(
2085    response: &AskResponse,
2086    requested_mode: AskModeArg,
2087    model: Option<&ModelAnswer>,
2088    include_sources: bool,
2089    mem: &mut Memvid,
2090) {
2091    println!(
2092        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
2093        ask_mode_pretty(requested_mode),
2094        ask_retriever_pretty(response.retriever),
2095        response.retrieval.params.top_k,
2096        response.stats.latency_ms,
2097        response.stats.retrieval_ms
2098    );
2099    if let Some(model) = model {
2100        if model.requested.trim() == model.model {
2101            println!("model: {}", model.model);
2102        } else {
2103            println!(
2104                "model requested: {}   model used: {}",
2105                model.requested, model.model
2106            );
2107        }
2108    }
2109    println!(
2110        "engine: {}",
2111        search_engine_label(&response.retrieval.engine)
2112    );
2113    println!(
2114        "hits: {} (showing {})",
2115        response.retrieval.total_hits,
2116        response.retrieval.hits.len()
2117    );
2118
2119    if response.context_only {
2120        println!();
2121        println!("Context-only mode: synthesis disabled.");
2122        println!();
2123    } else if let Some(answer) = &response.answer {
2124        println!();
2125        println!("Answer:\n{answer}");
2126        println!();
2127    }
2128
2129    if !response.citations.is_empty() {
2130        println!("Citations:");
2131        for citation in &response.citations {
2132            match citation.score {
2133                Some(score) => println!(
2134                    "[{}] {} (frame {}, score {:.3})",
2135                    citation.index, citation.uri, citation.frame_id, score
2136                ),
2137                None => println!(
2138                    "[{}] {} (frame {})",
2139                    citation.index, citation.uri, citation.frame_id
2140                ),
2141            }
2142        }
2143        println!();
2144    }
2145
2146    // Print detailed sources if requested
2147    if include_sources && !response.citations.is_empty() {
2148        println!("=== SOURCES ===");
2149        println!();
2150        for citation in &response.citations {
2151            println!("[{}] {}", citation.index, citation.uri);
2152
2153            // Get frame metadata
2154            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2155                if let Some(title) = &frame.title {
2156                    println!("    Title: {}", title);
2157                }
2158                println!("    Frame ID: {}", citation.frame_id);
2159                if let Some(score) = citation.score {
2160                    println!("    Score: {:.4}", score);
2161                }
2162                if let Some((start, end)) = citation.chunk_range {
2163                    println!("    Range: [{}..{})", start, end);
2164                }
2165                if !frame.tags.is_empty() {
2166                    println!("    Tags: {}", frame.tags.join(", "));
2167                }
2168                if !frame.labels.is_empty() {
2169                    println!("    Labels: {}", frame.labels.join(", "));
2170                }
2171                println!("    Timestamp: {}", frame.timestamp);
2172                if !frame.content_dates.is_empty() {
2173                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2174                }
2175            }
2176
2177            // Get snippet from hit
2178            if let Some(hit) = response
2179                .retrieval
2180                .hits
2181                .iter()
2182                .find(|h| h.frame_id == citation.frame_id)
2183            {
2184                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2185                let truncated = if snippet.len() > 200 {
2186                    format!("{}...", &snippet[..200])
2187                } else {
2188                    snippet.clone()
2189                };
2190                println!("    Snippet: {}", truncated.replace('\n', " "));
2191            }
2192            println!();
2193        }
2194    }
2195
2196    if !include_sources {
2197        println!();
2198        emit_search_table(&response.retrieval);
2199    }
2200}
2201
2202fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2203    let hits: Vec<_> = response
2204        .hits
2205        .iter()
2206        .map(|hit| {
2207            json!({
2208                "frame_id": hit.frame_id,
2209                "matches": hit.matches,
2210                "snippets": [hit.text.clone()],
2211            })
2212        })
2213        .collect();
2214    println!("{}", serde_json::to_string_pretty(&hits)?);
2215    Ok(())
2216}
2217
2218fn emit_search_table(response: &SearchResponse) {
2219    if response.hits.is_empty() {
2220        println!("No results for '{}'.", response.query);
2221        return;
2222    }
2223    for hit in &response.hits {
2224        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2225        if let Some(title) = &hit.title {
2226            println!("  Title: {title}");
2227        }
2228        if let Some(score) = hit.score {
2229            println!("  Score: {score:.3}");
2230        }
2231        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
2232        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2233            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
2234        }
2235        if let Some(chunk_text) = &hit.chunk_text {
2236            println!("  Chunk Text: {}", chunk_text.trim());
2237        }
2238        if let Some(metadata) = &hit.metadata {
2239            if let Some(track) = &metadata.track {
2240                println!("  Track: {track}");
2241            }
2242            if !metadata.tags.is_empty() {
2243                println!("  Tags: {}", metadata.tags.join(", "));
2244            }
2245            if !metadata.labels.is_empty() {
2246                println!("  Labels: {}", metadata.labels.join(", "));
2247            }
2248            if let Some(created_at) = &metadata.created_at {
2249                println!("  Created: {created_at}");
2250            }
2251            if !metadata.content_dates.is_empty() {
2252                println!("  Content Dates: {}", metadata.content_dates.join(", "));
2253            }
2254            if !metadata.entities.is_empty() {
2255                let entity_strs: Vec<String> = metadata
2256                    .entities
2257                    .iter()
2258                    .map(|e| format!("{} ({})", e.name, e.kind))
2259                    .collect();
2260                println!("  Entities: {}", entity_strs.join(", "));
2261            }
2262        }
2263        println!("  Snippet: {}", hit.text.trim());
2264        println!();
2265    }
2266    if let Some(cursor) = &response.next_cursor {
2267        println!("Next cursor: {cursor}");
2268    }
2269}
2270
2271fn ask_mode_display(mode: AskModeArg) -> &'static str {
2272    match mode {
2273        AskModeArg::Lex => "lex",
2274        AskModeArg::Sem => "sem",
2275        AskModeArg::Hybrid => "hybrid",
2276    }
2277}
2278
2279fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2280    match mode {
2281        AskModeArg::Lex => "Lexical",
2282        AskModeArg::Sem => "Semantic",
2283        AskModeArg::Hybrid => "Hybrid",
2284    }
2285}
2286
2287fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2288    match retriever {
2289        AskRetriever::Lex => "lex",
2290        AskRetriever::Semantic => "semantic",
2291        AskRetriever::Hybrid => "hybrid",
2292        AskRetriever::LexFallback => "lex_fallback",
2293        AskRetriever::TimelineFallback => "timeline_fallback",
2294    }
2295}
2296
2297fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2298    match retriever {
2299        AskRetriever::Lex => "Lexical",
2300        AskRetriever::Semantic => "Semantic",
2301        AskRetriever::Hybrid => "Hybrid",
2302        AskRetriever::LexFallback => "Lexical (fallback)",
2303        AskRetriever::TimelineFallback => "Timeline (fallback)",
2304    }
2305}
2306
2307fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2308    match engine {
2309        SearchEngineKind::Tantivy => "text (tantivy)",
2310        SearchEngineKind::LexFallback => "text (fallback)",
2311        SearchEngineKind::Hybrid => "hybrid",
2312    }
2313}
2314
2315fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2316    let digest = hash(uri.as_bytes()).to_hex().to_string();
2317    let prefix_len = digest.len().min(12);
2318    let prefix = &digest[..prefix_len];
2319    format!("mv2-hit-{prefix}-{frame_id}-{start}")
2320}
2321
2322fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2323    if text.chars().count() <= limit {
2324        return text.to_string();
2325    }
2326
2327    let truncated: String = text.chars().take(limit).collect();
2328    format!("{truncated}...")
2329}
2330
2331fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2332    let mut hit_json = serde_json::Map::new();
2333    hit_json.insert("rank".into(), json!(hit.rank));
2334    if let Some(score) = hit.score {
2335        hit_json.insert("score".into(), json!(score));
2336    }
2337    hit_json.insert(
2338        "id".into(),
2339        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2340    );
2341    hit_json.insert("frame_id".into(), json!(hit.frame_id));
2342    hit_json.insert("uri".into(), json!(hit.uri));
2343    if let Some(title) = &hit.title {
2344        hit_json.insert("title".into(), json!(title));
2345    }
2346    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2347    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2348    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2349    hit_json.insert("text".into(), json!(hit.text));
2350
2351    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2352        matches: hit.matches,
2353        ..SearchHitMetadata::default()
2354    });
2355    let mut meta_json = serde_json::Map::new();
2356    meta_json.insert("matches".into(), json!(metadata.matches));
2357    if !metadata.tags.is_empty() {
2358        meta_json.insert("tags".into(), json!(metadata.tags));
2359    }
2360    if !metadata.labels.is_empty() {
2361        meta_json.insert("labels".into(), json!(metadata.labels));
2362    }
2363    if let Some(track) = metadata.track {
2364        meta_json.insert("track".into(), json!(track));
2365    }
2366    if let Some(created_at) = metadata.created_at {
2367        meta_json.insert("created_at".into(), json!(created_at));
2368    }
2369    if !metadata.content_dates.is_empty() {
2370        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
2371    }
2372    if !metadata.entities.is_empty() {
2373        let entities_json: Vec<serde_json::Value> = metadata
2374            .entities
2375            .iter()
2376            .map(|e| {
2377                let mut ent = serde_json::Map::new();
2378                ent.insert("name".into(), json!(e.name));
2379                ent.insert("kind".into(), json!(e.kind));
2380                if let Some(conf) = e.confidence {
2381                    ent.insert("confidence".into(), json!(conf));
2382                }
2383                serde_json::Value::Object(ent)
2384            })
2385            .collect();
2386        meta_json.insert("entities".into(), json!(entities_json));
2387    }
2388    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
2389    serde_json::Value::Object(hit_json)
2390}
2391/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
2392///
2393/// RRF is mathematically superior to raw score combination because:
2394/// - BM25 scores are unbounded (0 to infinity)
2395/// - Cosine similarity is bounded (-1 to 1)
2396/// - RRF normalizes by using only RANKS, not raw scores
2397///
2398/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
2399fn apply_semantic_rerank(
2400    runtime: &EmbeddingRuntime,
2401    mem: &mut Memvid,
2402    response: &mut SearchResponse,
2403) -> Result<()> {
2404    if response.hits.is_empty() {
2405        return Ok(());
2406    }
2407
2408    let query_embedding = runtime.embed_query(&response.query)?;
2409    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
2410    for hit in &response.hits {
2411        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
2412            if embedding.len() == runtime.dimension() {
2413                let score = cosine_similarity(&query_embedding, &embedding);
2414                semantic_scores.insert(hit.frame_id, score);
2415            }
2416        }
2417    }
2418
2419    if semantic_scores.is_empty() {
2420        return Ok(());
2421    }
2422
2423    // Sort by semantic score to get semantic ranks
2424    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
2425        .iter()
2426        .map(|(frame_id, score)| (*frame_id, *score))
2427        .collect();
2428    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
2429
2430    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
2431    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
2432        semantic_rank.insert(*frame_id, idx + 1);
2433    }
2434
2435    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
2436    let query_lower = response.query.to_lowercase();
2437    let is_preference_query = query_lower.contains("suggest")
2438        || query_lower.contains("recommend")
2439        || query_lower.contains("should i")
2440        || query_lower.contains("what should")
2441        || query_lower.contains("prefer")
2442        || query_lower.contains("favorite")
2443        || query_lower.contains("best for me");
2444
2445    // Pure RRF: Use ONLY ranks, NOT raw scores
2446    // This prevents a "confidently wrong" high-scoring vector from burying
2447    // a "precisely correct" keyword match
2448    const RRF_K: f32 = 60.0;
2449
2450    let mut ordering: Vec<(usize, f32, usize)> = response
2451        .hits
2452        .iter()
2453        .enumerate()
2454        .map(|(idx, hit)| {
2455            let lexical_rank = hit.rank;
2456
2457            // RRF score for lexical rank
2458            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
2459
2460            // RRF score for semantic rank
2461            let semantic_rrf = semantic_rank
2462                .get(&hit.frame_id)
2463                .map(|rank| 1.0 / (RRF_K + *rank as f32))
2464                .unwrap_or(0.0);
2465
2466            // Apply preference boost for hits containing user preference signals
2467            // This is a small bonus for content with first-person preference indicators
2468            let preference_boost = if is_preference_query {
2469                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
2470            } else {
2471                0.0
2472            };
2473
2474            // Pure RRF: Only rank-based scores, no raw similarity scores
2475            let combined = lexical_rrf + semantic_rrf + preference_boost;
2476            (idx, combined, lexical_rank)
2477        })
2478        .collect();
2479
2480    ordering.sort_by(|a, b| {
2481        b.1.partial_cmp(&a.1)
2482            .unwrap_or(Ordering::Equal)
2483            .then(a.2.cmp(&b.2))
2484    });
2485
2486    let mut reordered = Vec::with_capacity(response.hits.len());
2487    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
2488        let mut hit = response.hits[idx].clone();
2489        hit.rank = rank_idx + 1;
2490        reordered.push(hit);
2491    }
2492
2493    response.hits = reordered;
2494    Ok(())
2495}
2496
2497/// Rerank search results by boosting hits that contain user preference signals.
2498/// Only applies when the query appears to be seeking recommendations or preferences.
2499fn apply_preference_rerank(response: &mut SearchResponse) {
2500    if response.hits.is_empty() {
2501        return;
2502    }
2503
2504    // Check if query is preference-seeking
2505    let query_lower = response.query.to_lowercase();
2506    let is_preference_query = query_lower.contains("suggest")
2507        || query_lower.contains("recommend")
2508        || query_lower.contains("should i")
2509        || query_lower.contains("what should")
2510        || query_lower.contains("prefer")
2511        || query_lower.contains("favorite")
2512        || query_lower.contains("best for me");
2513
2514    if !is_preference_query {
2515        return;
2516    }
2517
2518    // Compute boost scores for each hit
2519    let mut scored: Vec<(usize, f32, f32)> = response
2520        .hits
2521        .iter()
2522        .enumerate()
2523        .map(|(idx, hit)| {
2524            let original_score = hit.score.unwrap_or(0.0);
2525            let preference_boost = compute_preference_boost(&hit.text);
2526            let boosted_score = original_score + preference_boost;
2527            (idx, boosted_score, original_score)
2528        })
2529        .collect();
2530
2531    // Sort by boosted score (descending)
2532    scored.sort_by(|a, b| {
2533        b.1.partial_cmp(&a.1)
2534            .unwrap_or(Ordering::Equal)
2535            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
2536    });
2537
2538    // Reorder hits
2539    let mut reordered = Vec::with_capacity(response.hits.len());
2540    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
2541        let mut hit = response.hits[idx].clone();
2542        hit.rank = rank_idx + 1;
2543        reordered.push(hit);
2544    }
2545
2546    response.hits = reordered;
2547}
2548
2549/// Compute a boost score for hits that contain user preference signals.
2550/// This helps surface context where users express their preferences,
2551/// habits, or personal information that's relevant to recommendation queries.
2552///
2553/// Key insight: We want to distinguish content where the user describes
2554/// their ESTABLISHED situation/preferences (high boost) from content where
2555/// the user is making a REQUEST (low boost). Both use first-person language,
2556/// but they serve different purposes for personalization.
2557fn compute_preference_boost(text: &str) -> f32 {
2558    let text_lower = text.to_lowercase();
2559    let mut boost = 0.0f32;
2560
2561    // Strong signals: Past/present user experiences and possessions
2562    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
2563    let established_context = [
2564        // Past tense - indicates actual experience
2565        "i've been",
2566        "i've had",
2567        "i've used",
2568        "i've tried",
2569        "i recently",
2570        "i just",
2571        "lately",
2572        "i started",
2573        "i bought",
2574        "i harvested",
2575        "i grew",
2576        // Current possessions/ownership (indicates established context)
2577        "my garden",
2578        "my home",
2579        "my house",
2580        "my setup",
2581        "my equipment",
2582        "my camera",
2583        "my car",
2584        "my phone",
2585        "i have a",
2586        "i own",
2587        "i got a",
2588        // Established habits/preferences
2589        "i prefer",
2590        "i like to",
2591        "i love to",
2592        "i enjoy",
2593        "i usually",
2594        "i always",
2595        "i typically",
2596        "my favorite",
2597        "i tend to",
2598        "i often",
2599        // Regular activities (indicates ongoing behavior)
2600        "i use",
2601        "i grow",
2602        "i cook",
2603        "i make",
2604        "i work on",
2605        "i'm into",
2606        "i collect",
2607    ];
2608    for pattern in established_context {
2609        if text_lower.contains(pattern) {
2610            boost += 0.15;
2611        }
2612    }
2613
2614    // Moderate signals: General first-person statements
2615    let first_person = [" i ", " my ", " me "];
2616    for pattern in first_person {
2617        if text_lower.contains(pattern) {
2618            boost += 0.02;
2619        }
2620    }
2621
2622    // Weak signals: Requests/intentions (not yet established preferences)
2623    // These indicate the user wants something, but don't describe established context
2624    let request_patterns = [
2625        "i'm trying to",
2626        "i want to",
2627        "i need to",
2628        "looking for",
2629        "can you suggest",
2630        "can you help",
2631    ];
2632    for pattern in request_patterns {
2633        if text_lower.contains(pattern) {
2634            boost += 0.02;
2635        }
2636    }
2637
2638    // Cap the boost to avoid over-weighting
2639    boost.min(0.5)
2640}
2641
2642fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
2643    let mut dot = 0.0f32;
2644    let mut sum_a = 0.0f32;
2645    let mut sum_b = 0.0f32;
2646    for (x, y) in a.iter().zip(b.iter()) {
2647        dot += x * y;
2648        sum_a += x * x;
2649        sum_b += y * y;
2650    }
2651
2652    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
2653        0.0
2654    } else {
2655        dot / (sum_a.sqrt() * sum_b.sqrt())
2656    }
2657}
2658
2659/// Apply cross-encoder reranking to search results.
2660///
2661/// Cross-encoders directly score query-document pairs and can understand
2662/// more nuanced relevance than bi-encoders (embeddings). This is especially
2663/// useful for personalization queries where semantic similarity != relevance.
2664///
2665/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
2666fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
2667    if response.hits.is_empty() || response.hits.len() < 2 {
2668        return Ok(());
2669    }
2670
2671    // Only rerank if we have enough candidates
2672    let candidates_to_rerank = response.hits.len().min(50);
2673
2674    // Initialize the reranker (model will be downloaded on first use, ~86MB)
2675    // Using JINA Turbo - faster than BGE while maintaining good accuracy
2676    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
2677        .with_show_download_progress(true);
2678
2679    let mut reranker = match TextRerank::try_new(options) {
2680        Ok(r) => r,
2681        Err(e) => {
2682            warn!("Failed to initialize cross-encoder reranker: {e}");
2683            return Ok(());
2684        }
2685    };
2686
2687    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
2688    let documents: Vec<String> = response.hits[..candidates_to_rerank]
2689        .iter()
2690        .map(|hit| hit.text.clone())
2691        .collect();
2692
2693    // Rerank using cross-encoder
2694    info!("Cross-encoder reranking {} candidates", documents.len());
2695    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
2696        Ok(results) => results,
2697        Err(e) => {
2698            warn!("Cross-encoder reranking failed: {e}");
2699            return Ok(());
2700        }
2701    };
2702
2703    // Reorder hits based on cross-encoder scores
2704    let mut reordered = Vec::with_capacity(response.hits.len());
2705    for (new_rank, result) in rerank_results.iter().enumerate() {
2706        let original_idx = result.index;
2707        let mut hit = response.hits[original_idx].clone();
2708        hit.rank = new_rank + 1;
2709        // Store cross-encoder score in the hit score for reference
2710        hit.score = Some(result.score);
2711        reordered.push(hit);
2712    }
2713
2714    // Add any remaining hits that weren't reranked (beyond top-50)
2715    for hit in response.hits.iter().skip(candidates_to_rerank) {
2716        let mut h = hit.clone();
2717        h.rank = reordered.len() + 1;
2718        reordered.push(h);
2719    }
2720
2721    response.hits = reordered;
2722    info!("Cross-encoder reranking complete");
2723    Ok(())
2724}
2725
2726/// Build a context string from memory cards stored in the MV2 file.
2727/// Groups facts by entity for better LLM comprehension.
2728fn build_memory_context(mem: &Memvid) -> String {
2729    let entities = mem.memory_entities();
2730    if entities.is_empty() {
2731        return String::new();
2732    }
2733
2734    let mut sections = Vec::new();
2735    for entity in entities {
2736        let cards = mem.get_entity_memories(&entity);
2737        if cards.is_empty() {
2738            continue;
2739        }
2740
2741        let mut entity_lines = Vec::new();
2742        for card in cards {
2743            // Format: "slot: value" with optional polarity indicator
2744            let polarity_marker = card
2745                .polarity
2746                .as_ref()
2747                .map(|p| match p.to_string().as_str() {
2748                    "Positive" => " (+)",
2749                    "Negative" => " (-)",
2750                    _ => "",
2751                })
2752                .unwrap_or("");
2753            entity_lines.push(format!(
2754                "  - {}: {}{}",
2755                card.slot, card.value, polarity_marker
2756            ));
2757        }
2758
2759        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
2760    }
2761
2762    sections.join("\n\n")
2763}
2764
2765/// Build a context string from entities found in search hits.
2766/// Groups entities by type for better LLM comprehension.
2767fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
2768    use std::collections::HashMap;
2769
2770    // Collect unique entities by kind
2771    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
2772
2773    for hit in hits {
2774        if let Some(metadata) = &hit.metadata {
2775            for entity in &metadata.entities {
2776                entities_by_kind
2777                    .entry(entity.kind.clone())
2778                    .or_default()
2779                    .push(entity.name.clone());
2780            }
2781        }
2782    }
2783
2784    if entities_by_kind.is_empty() {
2785        return String::new();
2786    }
2787
2788    // Deduplicate and format
2789    let mut sections = Vec::new();
2790    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
2791    sorted_kinds.sort();
2792
2793    for kind in sorted_kinds {
2794        let names = entities_by_kind.get(kind).unwrap();
2795        let mut unique_names: Vec<_> = names.iter().collect();
2796        unique_names.sort();
2797        unique_names.dedup();
2798
2799        let names_str = unique_names
2800            .iter()
2801            .take(10) // Limit to 10 entities per kind
2802            .map(|s| s.as_str())
2803            .collect::<Vec<_>>()
2804            .join(", ");
2805
2806        sections.push(format!("{}: {}", kind, names_str));
2807    }
2808
2809    sections.join("\n")
2810}