memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use colored::Colorize;
15use colored_json::ToColoredJson;
16use blake3::hash;
17use clap::{ArgAction, Args, ValueEnum};
18#[cfg(feature = "temporal_track")]
19use memvid_core::{
20    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
21    TemporalResolution, TemporalResolutionValue,
22};
23use memvid_core::{
24    types::{AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy, SearchHitMetadata},
25    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind, SearchHit,
26    SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
27};
28#[cfg(feature = "temporal_track")]
29use serde::Serialize;
30use serde_json::json;
31#[cfg(feature = "temporal_track")]
32use time::format_description::well_known::Rfc3339;
33use time::{Date, PrimitiveDateTime, Time};
34#[cfg(feature = "temporal_track")]
35use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
36use tracing::{info, warn};
37
38#[cfg(feature = "local-embeddings")]
39use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
40
41use memvid_ask_model::{
42    run_model_inference, ModelContextFragment, ModelContextFragmentKind, ModelInference,
43};
44
45// frame_to_json and print_frame_summary available from commands but not used in this module
46use crate::config::{
47    load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
48    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingModelChoice, EmbeddingRuntime,
49};
50use crate::utils::{
51    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
52    parse_date_boundary, parse_vector, read_embedding,
53};
54
55const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
56#[cfg(feature = "temporal_track")]
57const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
58
59fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
60    let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
61    message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
62    if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
63        message.push_str(&format!(
64            "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
65            model.name(),
66            model.name()
67        ));
68        if model.is_openai() {
69            message.push_str(" (and set `OPENAI_API_KEY`).");
70        } else {
71            message.push('.');
72        }
73        message.push_str(&format!(
74            "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
75            model.name()
76        ));
77        message.push_str(&format!(
78            "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
79        ));
80        message.push_str("\nOr use `--mode lex` to disable semantic search.");
81    }
82    message
83}
84
85/// Arguments for the `timeline` subcommand
86#[derive(Args)]
87pub struct TimelineArgs {
88    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
89    pub file: PathBuf,
90    #[arg(long)]
91    pub json: bool,
92    #[arg(long)]
93    pub reverse: bool,
94    #[arg(long, value_name = "LIMIT")]
95    pub limit: Option<NonZeroU64>,
96    #[arg(long, value_name = "TIMESTAMP")]
97    pub since: Option<i64>,
98    #[arg(long, value_name = "TIMESTAMP")]
99    pub until: Option<i64>,
100    #[cfg(feature = "temporal_track")]
101    #[arg(long = "on", value_name = "PHRASE")]
102    pub phrase: Option<String>,
103    #[cfg(feature = "temporal_track")]
104    #[arg(long = "tz", value_name = "IANA_ZONE")]
105    pub tz: Option<String>,
106    #[cfg(feature = "temporal_track")]
107    #[arg(long = "anchor", value_name = "RFC3339")]
108    pub anchor: Option<String>,
109    #[cfg(feature = "temporal_track")]
110    #[arg(long = "window", value_name = "MINUTES")]
111    pub window: Option<u64>,
112    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
113    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
114    pub as_of_frame: Option<u64>,
115    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
116    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
117    pub as_of_ts: Option<i64>,
118}
119
120/// Arguments for the `when` subcommand
121#[cfg(feature = "temporal_track")]
122#[derive(Args)]
123pub struct WhenArgs {
124    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
125    pub file: PathBuf,
126    #[arg(long = "on", value_name = "PHRASE")]
127    pub phrase: String,
128    #[arg(long = "tz", value_name = "IANA_ZONE")]
129    pub tz: Option<String>,
130    #[arg(long = "anchor", value_name = "RFC3339")]
131    pub anchor: Option<String>,
132    #[arg(long = "window", value_name = "MINUTES")]
133    pub window: Option<u64>,
134    #[arg(long, value_name = "LIMIT")]
135    pub limit: Option<NonZeroU64>,
136    #[arg(long, value_name = "TIMESTAMP")]
137    pub since: Option<i64>,
138    #[arg(long, value_name = "TIMESTAMP")]
139    pub until: Option<i64>,
140    #[arg(long)]
141    pub reverse: bool,
142    #[arg(long)]
143    pub json: bool,
144}
145
146/// Arguments for the `ask` subcommand
147#[derive(Args)]
148pub struct AskArgs {
149    #[arg(value_name = "TARGET", num_args = 0..)]
150    pub targets: Vec<String>,
151    #[arg(long = "question", value_name = "TEXT")]
152    pub question: Option<String>,
153    #[arg(long = "uri", value_name = "URI")]
154    pub uri: Option<String>,
155    #[arg(long = "scope", value_name = "URI_PREFIX")]
156    pub scope: Option<String>,
157    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
158    pub top_k: usize,
159    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
160    pub snippet_chars: usize,
161    #[arg(long = "cursor", value_name = "TOKEN")]
162    pub cursor: Option<String>,
163    #[arg(long = "mode", value_enum, default_value = "hybrid")]
164    pub mode: AskModeArg,
165    #[arg(long)]
166    pub json: bool,
167    #[arg(long = "context-only", action = ArgAction::SetTrue)]
168    pub context_only: bool,
169    /// Show detailed source information for each citation
170    #[arg(long = "sources", action = ArgAction::SetTrue)]
171    pub sources: bool,
172    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
173    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
174    pub mask_pii: bool,
175    /// Include structured memory cards in the context (facts, preferences, etc.)
176    #[arg(long = "memories", action = ArgAction::SetTrue)]
177    pub memories: bool,
178    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
179    #[arg(long = "llm-context-depth", value_name = "CHARS")]
180    pub llm_context_depth: Option<usize>,
181    #[arg(long = "start", value_name = "DATE")]
182    pub start: Option<String>,
183    #[arg(long = "end", value_name = "DATE")]
184    pub end: Option<String>,
185    /// Synthesize an answer with an LLM (defaults to tinyllama when provided without a value).
186    ///
187    /// Examples:
188    /// - `--use-model` (local TinyLlama)
189    /// - `--use-model openai` (defaults to gpt-4o-mini; requires OPENAI_API_KEY)
190    /// - `--use-model nvidia` (defaults to meta/llama3-8b-instruct; requires NVIDIA_API_KEY)
191    /// - `--use-model nvidia:meta/llama3-70b-instruct`
192    #[arg(
193        long = "use-model",
194        value_name = "MODEL",
195        num_args = 0..=1,
196        default_missing_value = "tinyllama"
197    )]
198    pub use_model: Option<String>,
199    /// Embedding model to use for query (must match the model used during ingestion)
200    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
201    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
202    pub query_embedding_model: Option<String>,
203    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
204    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
205    pub as_of_frame: Option<u64>,
206    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
207    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
208    pub as_of_ts: Option<i64>,
209    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
210    #[arg(long = "system-prompt", value_name = "TEXT")]
211    pub system_prompt: Option<String>,
212    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
213    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
214    pub no_rerank: bool,
215
216    /// Return verbatim evidence without LLM synthesis.
217    /// Shows the most relevant passages with citations, no paraphrasing or summarization.
218    #[arg(long = "no-llm", action = ArgAction::SetTrue)]
219    pub no_llm: bool,
220
221    // Adaptive retrieval options (enabled by default for best results)
222    /// Disable adaptive retrieval and use fixed top-k instead.
223    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
224    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
225    pub no_adaptive: bool,
226    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
227    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
228    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
229    pub min_relevancy: f32,
230    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
231    /// Set high enough to capture all potentially relevant results.
232    #[arg(long = "max-k", value_name = "K", default_value = "100")]
233    pub max_k: usize,
234    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
235    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
236    pub adaptive_strategy: AdaptiveStrategyArg,
237}
238
239/// Ask mode argument
240#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
241pub enum AskModeArg {
242    Lex,
243    Sem,
244    Hybrid,
245}
246
247impl From<AskModeArg> for AskMode {
248    fn from(value: AskModeArg) -> Self {
249        match value {
250            AskModeArg::Lex => AskMode::Lex,
251            AskModeArg::Sem => AskMode::Sem,
252            AskModeArg::Hybrid => AskMode::Hybrid,
253        }
254    }
255}
256
257/// Arguments for the `find` subcommand
258#[derive(Args)]
259pub struct FindArgs {
260    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
261    pub file: PathBuf,
262    #[arg(long = "query", value_name = "TEXT")]
263    pub query: String,
264    #[arg(long = "uri", value_name = "URI")]
265    pub uri: Option<String>,
266    #[arg(long = "scope", value_name = "URI_PREFIX")]
267    pub scope: Option<String>,
268    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
269    pub top_k: usize,
270    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
271    pub snippet_chars: usize,
272    #[arg(long = "cursor", value_name = "TOKEN")]
273    pub cursor: Option<String>,
274    #[arg(long)]
275    pub json: bool,
276    #[arg(long = "json-legacy", conflicts_with = "json")]
277    pub json_legacy: bool,
278    #[arg(long = "mode", value_enum, default_value = "auto")]
279    pub mode: SearchMode,
280    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
281    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
282    pub as_of_frame: Option<u64>,
283    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
284    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
285    pub as_of_ts: Option<i64>,
286    /// Embedding model to use for query (must match the model used during ingestion)
287    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
288    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
289    pub query_embedding_model: Option<String>,
290
291    // Adaptive retrieval options (enabled by default for best results)
292    /// Disable adaptive retrieval and use fixed top-k instead.
293    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
294    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
295    pub no_adaptive: bool,
296    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
297    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
298    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
299    pub min_relevancy: f32,
300    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
301    /// Set high enough to capture all potentially relevant results.
302    #[arg(long = "max-k", value_name = "K", default_value = "100")]
303    pub max_k: usize,
304    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
305    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
306    pub adaptive_strategy: AdaptiveStrategyArg,
307
308    /// Enable graph-aware search: filter by entity relationships before ranking.
309    /// Uses MemoryCards to find entities matching patterns like "who lives in X".
310    #[arg(long = "graph", action = ArgAction::SetTrue)]
311    pub graph: bool,
312
313    /// Enable hybrid search: combine graph filtering with text search.
314    /// Automatically detects relational patterns in the query.
315    #[arg(long = "hybrid", action = ArgAction::SetTrue)]
316    pub hybrid: bool,
317
318    /// Disable sketch pre-filtering (for benchmarking/debugging).
319    /// By default, sketches are used for fast candidate generation if available.
320    #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
321    pub no_sketch: bool,
322}
323
324/// Search mode argument
325#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
326pub enum SearchMode {
327    Auto,
328    Lex,
329    Sem,
330    /// CLIP visual search using text-to-image embeddings
331    #[cfg(feature = "clip")]
332    Clip,
333}
334
335/// Adaptive retrieval strategy
336#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
337pub enum AdaptiveStrategyArg {
338    /// Stop when score drops below X% of top score (default)
339    Relative,
340    /// Stop when score drops below fixed threshold
341    Absolute,
342    /// Stop when score drops sharply from previous result
343    Cliff,
344    /// Automatically detect "elbow" in score curve
345    Elbow,
346    /// Combine relative + cliff + absolute (recommended)
347    Combined,
348}
349
350/// Arguments for the `vec-search` subcommand
351#[derive(Args)]
352pub struct VecSearchArgs {
353    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
354    pub file: PathBuf,
355    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
356    pub vector: Option<String>,
357    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
358    pub embedding: Option<PathBuf>,
359    #[arg(long, value_name = "K", default_value = "10")]
360    pub limit: usize,
361    #[arg(long)]
362    pub json: bool,
363}
364
365/// Arguments for the `audit` subcommand
366#[derive(Args)]
367pub struct AuditArgs {
368    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
369    pub file: PathBuf,
370    /// The question or topic to audit
371    #[arg(value_name = "QUESTION")]
372    pub question: String,
373    /// Output file path (stdout if not provided)
374    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
375    pub out: Option<PathBuf>,
376    /// Output format
377    #[arg(long = "format", value_enum, default_value = "text")]
378    pub format: AuditFormat,
379    /// Number of sources to retrieve
380    #[arg(long = "top-k", value_name = "K", default_value = "10")]
381    pub top_k: usize,
382    /// Maximum characters per snippet
383    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
384    pub snippet_chars: usize,
385    /// Retrieval mode
386    #[arg(long = "mode", value_enum, default_value = "hybrid")]
387    pub mode: AskModeArg,
388    /// Optional scope filter (URI prefix)
389    #[arg(long = "scope", value_name = "URI_PREFIX")]
390    pub scope: Option<String>,
391    /// Start date filter
392    #[arg(long = "start", value_name = "DATE")]
393    pub start: Option<String>,
394    /// End date filter
395    #[arg(long = "end", value_name = "DATE")]
396    pub end: Option<String>,
397    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
398    #[arg(long = "use-model", value_name = "MODEL")]
399    pub use_model: Option<String>,
400}
401
402/// Audit output format
403#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
404pub enum AuditFormat {
405    /// Plain text report
406    Text,
407    /// Markdown report
408    Markdown,
409    /// JSON report
410    Json,
411}
412
413// ============================================================================
414// Search & Retrieval command handlers
415// ============================================================================
416
417pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
418    let mut mem = open_read_only_mem(&args.file)?;
419    let mut builder = TimelineQueryBuilder::default();
420    #[cfg(feature = "temporal_track")]
421    if args.phrase.is_none()
422        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
423    {
424        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
425    }
426    if let Some(limit) = args.limit {
427        builder = builder.limit(limit);
428    }
429    if let Some(since) = args.since {
430        builder = builder.since(since);
431    }
432    if let Some(until) = args.until {
433        builder = builder.until(until);
434    }
435    builder = builder.reverse(args.reverse);
436    #[cfg(feature = "temporal_track")]
437    let temporal_summary = if let Some(ref phrase) = args.phrase {
438        let (filter, summary) = build_temporal_filter(
439            phrase,
440            args.tz.as_deref(),
441            args.anchor.as_deref(),
442            args.window,
443        )?;
444        builder = builder.temporal(filter);
445        Some(summary)
446    } else {
447        None
448    };
449    let query = builder.build();
450    let mut entries = mem.timeline(query)?;
451
452    // Apply Replay filtering if requested
453    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
454        entries.retain(|entry| {
455            // Check as_of_frame filter
456            if let Some(cutoff_frame) = args.as_of_frame {
457                if entry.frame_id > cutoff_frame {
458                    return false;
459                }
460            }
461
462            // Check as_of_ts filter
463            if let Some(cutoff_ts) = args.as_of_ts {
464                if entry.timestamp > cutoff_ts {
465                    return false;
466                }
467            }
468
469            true
470        });
471    }
472
473    if args.json {
474        #[cfg(feature = "temporal_track")]
475        if let Some(summary) = temporal_summary.as_ref() {
476            println!(
477                "{}",
478                serde_json::to_string_pretty(&TimelineOutput {
479                    temporal: Some(summary_to_output(summary)),
480                    entries: &entries,
481                })?
482            );
483        } else {
484            println!("{}", serde_json::to_string_pretty(&entries)?);
485        }
486        #[cfg(not(feature = "temporal_track"))]
487        println!("{}", serde_json::to_string_pretty(&entries)?);
488    } else if entries.is_empty() {
489        println!("Timeline is empty");
490    } else {
491        #[cfg(feature = "temporal_track")]
492        if let Some(summary) = temporal_summary.as_ref() {
493            print_temporal_summary(summary);
494        }
495        for entry in entries {
496            println!(
497                "#{} @ {} — {}",
498                entry.frame_id,
499                entry.timestamp,
500                entry.preview.replace('\n', " ")
501            );
502            if let Some(uri) = entry.uri.as_deref() {
503                println!("  URI: {uri}");
504            }
505            if !entry.child_frames.is_empty() {
506                let child_list = entry
507                    .child_frames
508                    .iter()
509                    .map(|id| id.to_string())
510                    .collect::<Vec<_>>()
511                    .join(", ");
512                println!("  Child frames: {child_list}");
513            }
514            #[cfg(feature = "temporal_track")]
515            if let Some(temporal) = entry.temporal.as_ref() {
516                print_entry_temporal_details(temporal);
517            }
518        }
519    }
520    Ok(())
521}
522
523#[cfg(feature = "temporal_track")]
524pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
525    let mut mem = open_read_only_mem(&args.file)?;
526
527    let (filter, summary) = build_temporal_filter(
528        &args.phrase,
529        args.tz.as_deref(),
530        args.anchor.as_deref(),
531        args.window,
532    )?;
533
534    let mut builder = TimelineQueryBuilder::default();
535    if let Some(limit) = args.limit {
536        builder = builder.limit(limit);
537    }
538    if let Some(since) = args.since {
539        builder = builder.since(since);
540    }
541    if let Some(until) = args.until {
542        builder = builder.until(until);
543    }
544    builder = builder.reverse(args.reverse).temporal(filter.clone());
545    let entries = mem.timeline(builder.build())?;
546
547    if args.json {
548        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
549        let output = WhenOutput {
550            summary: summary_to_output(&summary),
551            entries: entry_views,
552        };
553        println!("{}", serde_json::to_string_pretty(&output)?);
554        return Ok(());
555    }
556
557    print_temporal_summary(&summary);
558    if entries.is_empty() {
559        println!("No frames matched the resolved window");
560        return Ok(());
561    }
562
563    for entry in &entries {
564        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
565        println!(
566            "#{} @ {} ({iso}) — {}",
567            entry.frame_id,
568            entry.timestamp,
569            entry.preview.replace('\n', " ")
570        );
571        if let Some(uri) = entry.uri.as_deref() {
572            println!("  URI: {uri}");
573        }
574        if !entry.child_frames.is_empty() {
575            let child_list = entry
576                .child_frames
577                .iter()
578                .map(|id| id.to_string())
579                .collect::<Vec<_>>()
580                .join(", ");
581            println!("  Child frames: {child_list}");
582        }
583        if let Some(temporal) = entry.temporal.as_ref() {
584            print_entry_temporal_details(temporal);
585        }
586    }
587
588    Ok(())
589}
590
591#[cfg(feature = "temporal_track")]
592#[derive(Serialize)]
593struct TimelineOutput<'a> {
594    #[serde(skip_serializing_if = "Option::is_none")]
595    temporal: Option<TemporalSummaryOutput>,
596    entries: &'a [TimelineEntry],
597}
598
599#[cfg(feature = "temporal_track")]
600#[derive(Serialize)]
601struct WhenOutput {
602    summary: TemporalSummaryOutput,
603    entries: Vec<WhenEntry>,
604}
605
606#[cfg(feature = "temporal_track")]
607#[derive(Serialize)]
608struct WhenEntry {
609    frame_id: FrameId,
610    timestamp: i64,
611    #[serde(skip_serializing_if = "Option::is_none")]
612    timestamp_iso: Option<String>,
613    preview: String,
614    #[serde(skip_serializing_if = "Option::is_none")]
615    uri: Option<String>,
616    #[serde(skip_serializing_if = "Vec::is_empty")]
617    child_frames: Vec<FrameId>,
618    #[serde(skip_serializing_if = "Option::is_none")]
619    temporal: Option<SearchHitTemporal>,
620}
621
622#[cfg(feature = "temporal_track")]
623#[derive(Serialize)]
624struct TemporalSummaryOutput {
625    phrase: String,
626    timezone: String,
627    anchor_utc: i64,
628    anchor_iso: String,
629    confidence: u16,
630    #[serde(skip_serializing_if = "Vec::is_empty")]
631    flags: Vec<&'static str>,
632    resolution_kind: &'static str,
633    window_start_utc: Option<i64>,
634    window_start_iso: Option<String>,
635    window_end_utc: Option<i64>,
636    window_end_iso: Option<String>,
637    #[serde(skip_serializing_if = "Option::is_none")]
638    window_minutes: Option<u64>,
639}
640
641#[cfg(feature = "temporal_track")]
642struct TemporalSummary {
643    phrase: String,
644    tz: String,
645    anchor: OffsetDateTime,
646    start_utc: Option<i64>,
647    end_utc: Option<i64>,
648    resolution: TemporalResolution,
649    window_minutes: Option<u64>,
650}
651
652#[cfg(feature = "temporal_track")]
653fn build_temporal_filter(
654    phrase: &str,
655    tz_override: Option<&str>,
656    anchor_override: Option<&str>,
657    window_minutes: Option<u64>,
658) -> Result<(TemporalFilter, TemporalSummary)> {
659    let tz = tz_override
660        .unwrap_or(DEFAULT_TEMPORAL_TZ)
661        .trim()
662        .to_string();
663    if tz.is_empty() {
664        bail!("E-TEMP-003 timezone must not be empty");
665    }
666
667    let anchor = if let Some(raw) = anchor_override {
668        OffsetDateTime::parse(raw, &Rfc3339)
669            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
670    } else {
671        OffsetDateTime::now_utc()
672    };
673
674    let context = TemporalContext::new(anchor, tz.clone());
675    let normalizer = TemporalNormalizer::new(context);
676    let resolution = normalizer
677        .resolve(phrase)
678        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
679
680    let (mut start, mut end) = resolution_bounds(&resolution)?;
681    if let Some(minutes) = window_minutes {
682        if minutes > 0 {
683            let delta = TimeDuration::minutes(minutes as i64);
684            if let (Some(s), Some(e)) = (start, end) {
685                if s == e {
686                    start = Some(s.saturating_sub(delta.whole_seconds()));
687                    end = Some(e.saturating_add(delta.whole_seconds()));
688                } else {
689                    start = Some(s.saturating_sub(delta.whole_seconds()));
690                    end = Some(e.saturating_add(delta.whole_seconds()));
691                }
692            }
693        }
694    }
695
696    let filter = TemporalFilter {
697        start_utc: start,
698        end_utc: end,
699        phrase: None,
700        tz: None,
701    };
702
703    let summary = TemporalSummary {
704        phrase: phrase.to_owned(),
705        tz,
706        anchor,
707        start_utc: start,
708        end_utc: end,
709        resolution,
710        window_minutes,
711    };
712
713    Ok((filter, summary))
714}
715
716#[cfg(feature = "temporal_track")]
717fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
718    TemporalSummaryOutput {
719        phrase: summary.phrase.clone(),
720        timezone: summary.tz.clone(),
721        anchor_utc: summary.anchor.unix_timestamp(),
722        anchor_iso: summary
723            .anchor
724            .format(&Rfc3339)
725            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
726        confidence: summary.resolution.confidence,
727        flags: summary
728            .resolution
729            .flags
730            .iter()
731            .map(|flag| flag.as_str())
732            .collect(),
733        resolution_kind: resolution_kind(&summary.resolution),
734        window_start_utc: summary.start_utc,
735        window_start_iso: summary.start_utc.and_then(format_timestamp),
736        window_end_utc: summary.end_utc,
737        window_end_iso: summary.end_utc.and_then(format_timestamp),
738        window_minutes: summary.window_minutes,
739    }
740}
741
742#[cfg(feature = "temporal_track")]
743fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
744    WhenEntry {
745        frame_id: entry.frame_id,
746        timestamp: entry.timestamp,
747        timestamp_iso: format_timestamp(entry.timestamp),
748        preview: entry.preview.clone(),
749        uri: entry.uri.clone(),
750        child_frames: entry.child_frames.clone(),
751        temporal: entry.temporal.clone(),
752    }
753}
754
755#[cfg(feature = "temporal_track")]
756fn print_temporal_summary(summary: &TemporalSummary) {
757    println!("Phrase: \"{}\"", summary.phrase);
758    println!("Timezone: {}", summary.tz);
759    println!(
760        "Anchor: {}",
761        summary
762            .anchor
763            .format(&Rfc3339)
764            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
765    );
766    let start_iso = summary.start_utc.and_then(format_timestamp);
767    let end_iso = summary.end_utc.and_then(format_timestamp);
768    match (start_iso, end_iso) {
769        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
770        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
771        (Some(start), None) => println!("Window start: {start}"),
772        (None, Some(end)) => println!("Window end: {end}"),
773        _ => println!("Window: (not resolved)"),
774    }
775    println!("Confidence: {}", summary.resolution.confidence);
776    let flags: Vec<&'static str> = summary
777        .resolution
778        .flags
779        .iter()
780        .map(|flag| flag.as_str())
781        .collect();
782    if !flags.is_empty() {
783        println!("Flags: {}", flags.join(", "));
784    }
785    if let Some(window) = summary.window_minutes {
786        if window > 0 {
787            println!("Window padding: {window} minute(s)");
788        }
789    }
790    println!();
791}
792
793#[cfg(feature = "temporal_track")]
794fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
795    if let Some(anchor) = temporal.anchor.as_ref() {
796        let iso = anchor
797            .iso_8601
798            .clone()
799            .or_else(|| format_timestamp(anchor.ts_utc));
800        println!(
801            "  Anchor: {} (source: {:?})",
802            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
803            anchor.source
804        );
805    }
806    if !temporal.mentions.is_empty() {
807        println!("  Mentions:");
808        for mention in &temporal.mentions {
809            let iso = mention
810                .iso_8601
811                .clone()
812                .or_else(|| format_timestamp(mention.ts_utc))
813                .unwrap_or_else(|| mention.ts_utc.to_string());
814            let mut details = format!(
815                "    - {} ({:?}, confidence {})",
816                iso, mention.kind, mention.confidence
817            );
818            if let Some(text) = mention.text.as_deref() {
819                details.push_str(&format!(" — \"{}\"", text));
820            }
821            println!("{details}");
822        }
823    }
824}
825
826#[cfg(feature = "temporal_track")]
827fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
828    match &resolution.value {
829        TemporalResolutionValue::Date(date) => {
830            let ts = date_to_timestamp(*date);
831            Ok((Some(ts), Some(ts)))
832        }
833        TemporalResolutionValue::DateTime(dt) => {
834            let ts = dt.unix_timestamp();
835            Ok((Some(ts), Some(ts)))
836        }
837        TemporalResolutionValue::DateRange { start, end } => Ok((
838            Some(date_to_timestamp(*start)),
839            Some(date_to_timestamp(*end)),
840        )),
841        TemporalResolutionValue::DateTimeRange { start, end } => {
842            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
843        }
844        TemporalResolutionValue::Month { year, month } => {
845            let start_date = Date::from_calendar_date(*year, *month, 1)
846                .map_err(|_| anyhow!("invalid month resolution"))?;
847            let end_date = last_day_in_month(*year, *month)
848                .map_err(|_| anyhow!("invalid month resolution"))?;
849            Ok((
850                Some(date_to_timestamp(start_date)),
851                Some(date_to_timestamp(end_date)),
852            ))
853        }
854    }
855}
856
857#[cfg(feature = "temporal_track")]
858fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
859    match resolution.value {
860        TemporalResolutionValue::Date(_) => "date",
861        TemporalResolutionValue::DateTime(_) => "datetime",
862        TemporalResolutionValue::DateRange { .. } => "date_range",
863        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
864        TemporalResolutionValue::Month { .. } => "month",
865    }
866}
867
868#[cfg(feature = "temporal_track")]
869fn date_to_timestamp(date: Date) -> i64 {
870    PrimitiveDateTime::new(date, Time::MIDNIGHT)
871        .assume_offset(UtcOffset::UTC)
872        .unix_timestamp()
873}
874
875#[cfg(feature = "temporal_track")]
876fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
877    let mut date = Date::from_calendar_date(year, month, 1)
878        .map_err(|_| anyhow!("invalid month resolution"))?;
879    while let Some(next) = date.next_day() {
880        if next.month() == month {
881            date = next;
882        } else {
883            break;
884        }
885    }
886    Ok(date)
887}
888
889#[cfg(feature = "temporal_track")]
890
891fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
892    if fragments.is_empty() {
893        return;
894    }
895
896    response.context_fragments = fragments
897        .into_iter()
898        .map(|fragment| AskContextFragment {
899            rank: fragment.rank,
900            frame_id: fragment.frame_id,
901            uri: fragment.uri,
902            title: fragment.title,
903            score: fragment.score,
904            matches: fragment.matches,
905            range: Some(fragment.range),
906            chunk_range: fragment.chunk_range,
907            text: fragment.text,
908            kind: Some(match fragment.kind {
909                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
910                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
911            }),
912            #[cfg(feature = "temporal_track")]
913            temporal: None,
914        })
915        .collect();
916}
917
918pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
919    // Check if plan allows query operations (blocks expired subscriptions)
920    crate::utils::require_active_plan(config, "ask")?;
921
922    // Track query usage against plan quota
923    crate::api::track_query_usage(config, 1)?;
924
925    if args.uri.is_some() && args.scope.is_some() {
926        warn!("--scope ignored because --uri is provided");
927    }
928
929    let mut question_tokens = Vec::new();
930    let mut file_path: Option<PathBuf> = None;
931    for token in &args.targets {
932        if file_path.is_none() && looks_like_memory(token) {
933            file_path = Some(PathBuf::from(token));
934        } else {
935            question_tokens.push(token.clone());
936        }
937    }
938
939    let positional_question = if question_tokens.is_empty() {
940        None
941    } else {
942        Some(question_tokens.join(" "))
943    };
944
945    let question = args
946        .question
947        .or(positional_question)
948        .map(|value| value.trim().to_string())
949        .filter(|value| !value.is_empty());
950
951    let question = question
952        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
953
954    // Expand query for better retrieval using LLM (expands abbreviations, adds synonyms)
955    // This happens when --use-model is set or we have an API key
956    let (original_question, search_query) = {
957        // For query expansion, we use the fastest available model
958        // Priority: OpenAI > Groq > Anthropic > XAI > Mistral
959        let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
960            if let Ok(key) = std::env::var("OPENAI_API_KEY") {
961                // OpenAI available - use gpt-4o-mini (fastest, cheapest)
962                (Some("gpt-4o-mini"), Some(key))
963            } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
964                // Groq available - use llama-3.1-8b-instant (very fast)
965                (Some("llama-3.1-8b-instant"), Some(key))
966            } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
967                // Anthropic available - use haiku
968                (Some("claude-haiku-4-5"), Some(key))
969            } else if let Ok(key) = std::env::var("XAI_API_KEY") {
970                // XAI available - use grok-4-fast
971                (Some("grok-4-fast"), Some(key))
972            } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
973                // Mistral available - use mistral-small
974                (Some("mistral-small-latest"), Some(key))
975            } else {
976                // No fast model available for expansion
977                (None, None)
978            };
979
980        // DISABLED: Query expansion for ask command
981        // The ask command has sophisticated retrieval with fallbacks, aggregation detection,
982        // temporal boosting, and diverse retrieval strategies. Query expansion often strips
983        // out important semantic context (temporal markers, aggregation signals, analytical
984        // keywords) that these strategies depend on. The original question is preserved
985        // to ensure all downstream detection and ranking works correctly.
986        //
987        // Query expansion may be appropriate for simple keyword searches, but for complex
988        // natural language questions it causes more problems than it solves.
989        let _ = (model_for_expansion, api_key_for_expansion); // suppress unused warnings
990        (question.clone(), question.clone())
991    };
992
993    let memory_path = match file_path {
994        Some(path) => path,
995        None => autodetect_memory_file()?,
996    };
997
998    let start = parse_date_boundary(args.start.as_ref(), false)?;
999    let end = parse_date_boundary(args.end.as_ref(), true)?;
1000    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1001        if end_ts < start_ts {
1002            anyhow::bail!("--end must not be earlier than --start");
1003        }
1004    }
1005
1006    // Open MV2 file first to get vector dimension for auto-detection
1007    let mut mem = Memvid::open(&memory_path)?;
1008
1009    // Load active replay session if one exists
1010    #[cfg(feature = "replay")]
1011    let _ = mem.load_active_session();
1012
1013    // Get the vector dimension from the MV2 file for auto-detection
1014    let mv2_dimension = mem.effective_vec_index_dimension()?;
1015
1016    // Check if memory has any vectors - if not, force lexical mode
1017    let stats = mem.stats()?;
1018    let has_vectors = stats.vector_count > 0;
1019    let effective_mode = if !has_vectors && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1020        tracing::info!(
1021            "Memory has no embeddings (vector_count=0); falling back to lexical mode"
1022        );
1023        AskModeArg::Lex
1024    } else {
1025        args.mode.clone()
1026    };
1027
1028    let ask_mode: AskMode = effective_mode.clone().into();
1029    let inferred_model_override = match effective_mode {
1030        AskModeArg::Lex => None,
1031        AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1032            memvid_core::EmbeddingIdentitySummary::Single(identity) => identity.model.map(String::from),
1033            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1034                let models: Vec<_> = identities
1035                    .iter()
1036                    .filter_map(|entry| entry.identity.model.as_deref())
1037                    .collect();
1038                anyhow::bail!(
1039                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1040                    Detected models: {:?}\n\n\
1041                    Suggested fix: split into separate memories per embedding model.",
1042                    models
1043                );
1044            }
1045            memvid_core::EmbeddingIdentitySummary::Unknown => None,
1046        },
1047    };
1048    let emb_model_override = args
1049        .query_embedding_model
1050        .as_deref()
1051        .or(inferred_model_override.as_deref());
1052    let runtime = match effective_mode {
1053        AskModeArg::Lex => None,
1054        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1055            config,
1056            emb_model_override,
1057            mv2_dimension,
1058        )?),
1059        AskModeArg::Hybrid => {
1060            // For hybrid, use auto-detection from MV2 dimension
1061            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1062                || {
1063                    // Force a load; if it fails we error below.
1064                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1065                        .ok()
1066                        .map(|rt| {
1067                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1068                            rt
1069                        })
1070                },
1071            )
1072        }
1073    };
1074    if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1075        anyhow::bail!(
1076            "semantic embeddings unavailable; install/cached model required for {:?} mode",
1077            effective_mode
1078        );
1079    }
1080
1081    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1082
1083    // Build adaptive config (enabled by default, use --no-adaptive to disable)
1084    let adaptive = if !args.no_adaptive {
1085        Some(AdaptiveConfig {
1086            enabled: true,
1087            max_results: args.max_k,
1088            min_results: 1,
1089            normalize_scores: true,
1090            strategy: match args.adaptive_strategy {
1091                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1092                    min_ratio: args.min_relevancy,
1093                },
1094                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1095                    min_score: args.min_relevancy,
1096                },
1097                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff { max_drop_ratio: 0.3 },
1098                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1099                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1100                    relative_threshold: args.min_relevancy,
1101                    max_drop_ratio: 0.3,
1102                    absolute_min: 0.3,
1103                },
1104            },
1105        })
1106    } else {
1107        None
1108    };
1109
1110    let request = AskRequest {
1111        question: search_query, // Use expanded query for retrieval
1112        top_k: args.top_k,
1113        snippet_chars: args.snippet_chars,
1114        uri: args.uri.clone(),
1115        scope: args.scope.clone(),
1116        cursor: args.cursor.clone(),
1117        start,
1118        end,
1119        #[cfg(feature = "temporal_track")]
1120        temporal: None,
1121        context_only: args.context_only,
1122        mode: ask_mode,
1123        as_of_frame: args.as_of_frame,
1124        as_of_ts: args.as_of_ts,
1125        adaptive,
1126    };
1127    let mut response = mem.ask(request, embedder).map_err(|err| match err {
1128        MemvidError::VecDimensionMismatch { expected, actual } => anyhow!(vec_dimension_mismatch_help(expected, actual)),
1129        other => anyhow!(other),
1130    })?;
1131
1132    // Restore original question for display and LLM synthesis
1133    // (search_query was used for retrieval but original_question is shown to user)
1134    response.question = original_question;
1135
1136    // Apply cross-encoder reranking for better precision on preference/personalization queries
1137    // This is especially important for questions like "What should I..." where semantic
1138    // similarity doesn't capture personal relevance well.
1139    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
1140    // Skip for temporal/recency queries - cross-encoder doesn't understand temporal context
1141    // and would override the recency boost from lexical search
1142    let is_temporal_query = {
1143        let q_lower = response.question.to_lowercase();
1144        q_lower.contains("current") || q_lower.contains("latest") || q_lower.contains("recent")
1145            || q_lower.contains("now") || q_lower.contains("today") || q_lower.contains("updated")
1146            || q_lower.contains("new ") || q_lower.contains("newest")
1147    };
1148    if !args.no_rerank
1149        && !response.retrieval.hits.is_empty()
1150        && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1151        && !is_temporal_query
1152    {
1153        // Create a temporary SearchResponse for reranking
1154        let mut search_response = SearchResponse {
1155            query: response.question.clone(),
1156            hits: response.retrieval.hits.clone(),
1157            total_hits: response.retrieval.hits.len(),
1158            params: memvid_core::SearchParams {
1159                top_k: args.top_k,
1160                snippet_chars: args.snippet_chars,
1161                cursor: None,
1162            },
1163            elapsed_ms: 0,
1164            engine: memvid_core::SearchEngineKind::Hybrid,
1165            next_cursor: None,
1166            context: String::new(),
1167        };
1168
1169        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1170            warn!("Cross-encoder reranking failed: {e}");
1171        } else {
1172            // Update the response hits with reranked order
1173            response.retrieval.hits = search_response.hits;
1174            // Rebuild context from reranked hits
1175            response.retrieval.context = response
1176                .retrieval
1177                .hits
1178                .iter()
1179                .take(10) // Use top-10 for context
1180                .map(|hit| hit.text.as_str())
1181                .collect::<Vec<_>>()
1182                .join("\n\n---\n\n");
1183        }
1184    }
1185
1186    // Inject memory cards into context if --memories flag is set
1187    if args.memories {
1188        let memory_context = build_memory_context(&mem);
1189        if !memory_context.is_empty() {
1190            // Prepend memory context to retrieval context
1191            response.retrieval.context = format!(
1192                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1193                memory_context, response.retrieval.context
1194            );
1195        }
1196    }
1197
1198    // Inject entity context from Logic-Mesh if entities were found in search hits
1199    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1200    if !entity_context.is_empty() {
1201        // Prepend entity context to retrieval context
1202        response.retrieval.context = format!(
1203            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1204            entity_context, response.retrieval.context
1205        );
1206    }
1207
1208    // Apply PII masking if requested
1209    if args.mask_pii {
1210        use memvid_core::pii::mask_pii;
1211
1212        // Mask the aggregated context
1213        response.retrieval.context = mask_pii(&response.retrieval.context);
1214
1215        // Mask text in each hit
1216        for hit in &mut response.retrieval.hits {
1217            hit.text = mask_pii(&hit.text);
1218            if let Some(chunk_text) = &hit.chunk_text {
1219                hit.chunk_text = Some(mask_pii(chunk_text));
1220            }
1221        }
1222    }
1223
1224    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1225
1226    let mut model_result: Option<ModelInference> = None;
1227    if args.no_llm {
1228        // --no-llm: return verbatim evidence without LLM synthesis
1229        if args.use_model.is_some() {
1230            warn!("--use-model ignored because --no-llm disables LLM synthesis");
1231        }
1232        if args.json {
1233            emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1234        } else {
1235            emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1236        }
1237
1238        // Save active replay session if one exists
1239        #[cfg(feature = "replay")]
1240        let _ = mem.save_active_session();
1241
1242        return Ok(());
1243    } else if response.context_only {
1244        if args.use_model.is_some() {
1245            warn!("--use-model ignored because --context-only disables synthesis");
1246        }
1247    } else if let Some(model_name) = args.use_model.as_deref() {
1248        match run_model_inference(
1249            model_name,
1250            &response.question,
1251            &response.retrieval.context,
1252            &response.retrieval.hits,
1253            llm_context_override,
1254            None,
1255            args.system_prompt.as_deref(),
1256        ) {
1257            Ok(inference) => {
1258                response.answer = Some(inference.answer.answer.clone());
1259                response.retrieval.context = inference.context_body.clone();
1260                apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1261                model_result = Some(inference);
1262            }
1263            Err(err) => {
1264                warn!(
1265                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1266                    model_name
1267                );
1268            }
1269        }
1270    }
1271
1272    // Record the ask action if a replay session is active
1273    #[cfg(feature = "replay")]
1274    if let Some(ref inference) = model_result {
1275        if let Some(model_name) = args.use_model.as_deref() {
1276            // Extract frame IDs from retrieval hits for replay audit
1277            let retrieved_frames: Vec<u64> = response
1278                .retrieval
1279                .hits
1280                .iter()
1281                .map(|hit| hit.frame_id)
1282                .collect();
1283
1284            mem.record_ask_action(
1285                &response.question,
1286                model_name, // provider
1287                model_name, // model
1288                inference.answer.answer.as_bytes(),
1289                0, // duration_ms not tracked at this level
1290                retrieved_frames,
1291            );
1292        }
1293    }
1294
1295    if args.json {
1296        if let Some(model_name) = args.use_model.as_deref() {
1297            emit_model_json(
1298                &response,
1299                model_name,
1300                model_result.as_ref(),
1301                args.sources,
1302                &mut mem,
1303            )?;
1304        } else {
1305            emit_ask_json(
1306                &response,
1307                effective_mode.clone(),
1308                model_result.as_ref(),
1309                args.sources,
1310                &mut mem,
1311            )?;
1312        }
1313    } else {
1314        emit_ask_pretty(
1315            &response,
1316            effective_mode.clone(),
1317            model_result.as_ref(),
1318            args.sources,
1319            &mut mem,
1320        );
1321    }
1322
1323    // Save active replay session if one exists
1324    #[cfg(feature = "replay")]
1325    let _ = mem.save_active_session();
1326
1327    Ok(())
1328}
1329
1330/// Handle graph-aware find with --graph or --hybrid flags
1331fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1332    use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1333    use memvid_core::types::QueryPlan;
1334
1335    let planner = QueryPlanner::new();
1336
1337    // Create query plan based on mode
1338    let plan = if args.graph {
1339        // Pure graph mode - let planner detect patterns
1340        let plan = planner.plan(&args.query, args.top_k);
1341        // If it's a hybrid plan from auto-detection, convert to graph-only
1342        match plan {
1343            QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1344                QueryPlan::graph_only(graph_filter, args.top_k)
1345            }
1346            _ => plan,
1347        }
1348    } else {
1349        // Hybrid mode - use the auto-detected plan
1350        planner.plan(&args.query, args.top_k)
1351    };
1352
1353    // Execute the search
1354    let hits = hybrid_search(mem, &plan)?;
1355
1356    if args.json {
1357        // JSON output
1358        let output = serde_json::json!({
1359            "query": args.query,
1360            "mode": if args.graph { "graph" } else { "hybrid" },
1361            "plan": format!("{:?}", plan),
1362            "hits": hits.iter().map(|h| {
1363                serde_json::json!({
1364                    "frame_id": h.frame_id,
1365                    "score": h.score,
1366                    "graph_score": h.graph_score,
1367                    "vector_score": h.vector_score,
1368                    "matched_entity": h.matched_entity,
1369                    "preview": h.preview,
1370                })
1371            }).collect::<Vec<_>>(),
1372        });
1373        println!("{}", serde_json::to_string_pretty(&output)?);
1374    } else {
1375        // Human-readable output
1376        let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1377        println!("{} search for: \"{}\"", mode_str, args.query);
1378        println!("Plan: {:?}", plan);
1379        println!();
1380
1381        if hits.is_empty() {
1382            println!("No results found.");
1383        } else {
1384            println!("Results ({} hits):", hits.len());
1385            for (i, hit) in hits.iter().enumerate() {
1386                println!();
1387                println!(
1388                    "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1389                    i + 1,
1390                    hit.frame_id,
1391                    hit.score,
1392                    hit.graph_score,
1393                    hit.vector_score
1394                );
1395                if let Some(entity) = &hit.matched_entity {
1396                    println!("   Matched entity: {}", entity);
1397                }
1398                if let Some(preview) = &hit.preview {
1399                    let truncated = if preview.len() > 200 {
1400                        format!("{}...", &preview[..200])
1401                    } else {
1402                        preview.clone()
1403                    };
1404                    println!("   {}", truncated.replace('\n', " "));
1405                }
1406            }
1407        }
1408    }
1409
1410    Ok(())
1411}
1412
1413pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1414    // Check if plan allows query operations (blocks expired subscriptions)
1415    crate::utils::require_active_plan(config, "find")?;
1416
1417    // Track query usage against plan quota
1418    crate::api::track_query_usage(config, 1)?;
1419
1420    let mut mem = open_read_only_mem(&args.file)?;
1421
1422    // Load active replay session if one exists
1423    #[cfg(feature = "replay")]
1424    let _ = mem.load_active_session();
1425
1426    // Handle graph-aware and hybrid search modes
1427    if args.graph || args.hybrid {
1428        return handle_graph_find(&mut mem, &args);
1429    }
1430
1431    if args.uri.is_some() && args.scope.is_some() {
1432        warn!("--scope ignored because --uri is provided");
1433    }
1434
1435    // Get vector dimension from MV2 for auto-detection
1436    let mv2_dimension = mem.effective_vec_index_dimension()?;
1437    let identity_summary = match args.mode {
1438        SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1439        #[cfg(feature = "clip")]
1440        SearchMode::Clip => None,
1441        SearchMode::Lex => None,
1442    };
1443
1444    let mut semantic_allowed = true;
1445    let inferred_model_override = match identity_summary.as_ref() {
1446        Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1447            identity.model.as_deref().map(|value| value.to_string())
1448        }
1449        Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1450            let models: Vec<_> = identities
1451                .iter()
1452                .filter_map(|entry| entry.identity.model.as_deref())
1453                .collect();
1454            if args.mode == SearchMode::Sem {
1455                anyhow::bail!(
1456                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1457                    Detected models: {:?}\n\n\
1458                    Suggested fix: split into separate memories per embedding model.",
1459                    models
1460                );
1461            }
1462            warn!("semantic search disabled: mixed embedding models detected: {:?}", models);
1463            semantic_allowed = false;
1464            None
1465        }
1466        _ => None,
1467    };
1468
1469    let emb_model_override = args
1470        .query_embedding_model
1471        .as_deref()
1472        .or(inferred_model_override.as_deref());
1473
1474    let (mode_label, runtime_option) = match args.mode {
1475        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1476        SearchMode::Sem => {
1477            let runtime =
1478                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1479            ("Semantic (vector search)".to_string(), Some(runtime))
1480        }
1481        SearchMode::Auto => {
1482            if !semantic_allowed {
1483                ("Lexical (semantic unsafe)".to_string(), None)
1484            } else if let Some(runtime) =
1485                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1486            {
1487                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1488            } else {
1489                ("Lexical (semantic unavailable)".to_string(), None)
1490            }
1491        }
1492        #[cfg(feature = "clip")]
1493        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1494    };
1495
1496    let mode_key = match args.mode {
1497        SearchMode::Sem => "semantic",
1498        SearchMode::Lex => "text",
1499        SearchMode::Auto => {
1500            if runtime_option.is_some() {
1501                "hybrid"
1502            } else {
1503                "text"
1504            }
1505        }
1506        #[cfg(feature = "clip")]
1507        SearchMode::Clip => "clip",
1508    };
1509
1510    // For CLIP mode, use CLIP visual search
1511    #[cfg(feature = "clip")]
1512    if args.mode == SearchMode::Clip {
1513        use memvid_core::clip::{ClipConfig, ClipModel};
1514
1515        // Initialize CLIP model
1516        let config = ClipConfig::default();
1517        let clip = ClipModel::new(config).map_err(|e| {
1518            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1519        })?;
1520
1521        // Encode query text
1522        let query_embedding = clip
1523            .encode_text(&args.query)
1524            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1525
1526        // Search CLIP index
1527        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1528
1529        // Debug distances before filtering
1530        for hit in &hits {
1531            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1532                tracing::debug!(
1533                    frame_id = hit.frame_id,
1534                    title = %frame.title.unwrap_or_default(),
1535                    page = hit.page,
1536                    distance = hit.distance,
1537                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1538                    "CLIP raw hit"
1539                );
1540            } else {
1541                tracing::debug!(
1542                    frame_id = hit.frame_id,
1543                    page = hit.page,
1544                    distance = hit.distance,
1545                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1546                    "CLIP raw hit (missing frame)"
1547                );
1548            }
1549        }
1550
1551        // CLIP distance threshold for filtering poor matches
1552        // CLIP uses L2 distance on normalized embeddings:
1553        //   - distance² = 2(1 - cosine_similarity)
1554        //   - distance = 0 → identical (cosine_sim = 1)
1555        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1556        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1557        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1558        //   - distance = 2.0 → opposite (cosine_sim = -1)
1559        //
1560        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1561        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1562        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1563        const CLIP_MAX_DISTANCE: f32 = 1.26;
1564
1565        // Convert CLIP hits to SearchResponse format, filtering by threshold
1566        let search_hits: Vec<SearchHit> = hits
1567            .into_iter()
1568            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1569            .enumerate()
1570            .filter_map(|(rank, hit)| {
1571                // Convert L2 distance to cosine similarity for display
1572                // cos_sim = 1 - (distance² / 2)
1573                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1574
1575                // Get frame preview for snippet
1576                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1577                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1578                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1579                let title = match (base_title, hit.page) {
1580                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1581                    (Some(t), None) => Some(t),
1582                    (None, Some(p)) => Some(format!("Page {p}")),
1583                    _ => None,
1584                };
1585                Some(SearchHit {
1586                    rank: rank + 1,
1587                    frame_id: hit.frame_id,
1588                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1589                    title,
1590                    text: preview.clone(),
1591                    chunk_text: Some(preview),
1592                    range: (0, 0),
1593                    chunk_range: None,
1594                    matches: 0,
1595                    score: Some(cosine_similarity),
1596                    metadata: None,
1597                })
1598            })
1599            .collect();
1600
1601        let response = SearchResponse {
1602            query: args.query.clone(),
1603            hits: search_hits.clone(),
1604            total_hits: search_hits.len(),
1605            params: memvid_core::SearchParams {
1606                top_k: args.top_k,
1607                snippet_chars: args.snippet_chars,
1608                cursor: args.cursor.clone(),
1609            },
1610            elapsed_ms: 0,
1611            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1612            next_cursor: None,
1613            context: String::new(),
1614        };
1615
1616        if args.json_legacy {
1617            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1618            emit_legacy_search_json(&response)?;
1619        } else if args.json {
1620            emit_search_json(&response, mode_key)?;
1621        } else {
1622            println!(
1623                "mode: {}   k={}   time: {} ms",
1624                mode_label, response.params.top_k, response.elapsed_ms
1625            );
1626            println!("engine: clip (MobileCLIP-S2)");
1627            println!(
1628                "hits: {} (showing {})",
1629                response.total_hits,
1630                response.hits.len()
1631            );
1632            emit_search_table(&response);
1633        }
1634        return Ok(());
1635    }
1636
1637    // For semantic mode, use pure vector search.
1638    let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1639        let runtime = runtime_option
1640            .as_ref()
1641            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1642
1643        // Embed the query
1644        let query_embedding = runtime.embed_query(&args.query)?;
1645
1646        // Use pure vector search (adaptive by default, use --no-adaptive to disable)
1647        let scope = args.scope.as_deref().or(args.uri.as_deref());
1648
1649        if !args.no_adaptive {
1650            // Build adaptive config from CLI args
1651            let strategy = match args.adaptive_strategy {
1652                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1653                    min_ratio: args.min_relevancy,
1654                },
1655                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1656                    min_score: args.min_relevancy,
1657                },
1658                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1659                    max_drop_ratio: 0.35, // 35% drop triggers cutoff
1660                },
1661                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1662                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1663                    relative_threshold: args.min_relevancy,
1664                    max_drop_ratio: 0.35,
1665                    absolute_min: 0.3,
1666                },
1667            };
1668
1669            let config = AdaptiveConfig {
1670                enabled: true,
1671                max_results: args.max_k,
1672                min_results: 1,
1673                strategy,
1674                normalize_scores: true,
1675            };
1676
1677            match mem.search_adaptive(
1678                &args.query,
1679                &query_embedding,
1680                config,
1681                args.snippet_chars,
1682                scope,
1683            ) {
1684                Ok(result) => {
1685                    let mut resp = SearchResponse {
1686                        query: args.query.clone(),
1687                        hits: result.results,
1688                        total_hits: result.stats.returned,
1689                        params: memvid_core::SearchParams {
1690                            top_k: result.stats.returned,
1691                            snippet_chars: args.snippet_chars,
1692                            cursor: args.cursor.clone(),
1693                        },
1694                        elapsed_ms: 0,
1695                        engine: SearchEngineKind::Hybrid,
1696                        next_cursor: None,
1697                        context: String::new(),
1698                    };
1699                    apply_preference_rerank(&mut resp);
1700                    (
1701                        resp,
1702                        "semantic (adaptive vector search)".to_string(),
1703                        Some(result.stats),
1704                    )
1705                }
1706                Err(e) => {
1707                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1708                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1709                    }
1710
1711                    warn!("Adaptive search failed ({e}), falling back to fixed-k");
1712                    match mem.vec_search_with_embedding(
1713                        &args.query,
1714                        &query_embedding,
1715                        args.top_k,
1716                        args.snippet_chars,
1717                        scope,
1718                    ) {
1719                        Ok(mut resp) => {
1720                            apply_preference_rerank(&mut resp);
1721                            (resp, "semantic (vector search fallback)".to_string(), None)
1722                        }
1723                        Err(e2) => {
1724                            if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1725                                return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1726                            }
1727                            return Err(anyhow!("Both adaptive and fixed-k search failed: {e}, {e2}"));
1728                        }
1729                    }
1730                }
1731            }
1732        } else {
1733            // Standard fixed-k vector search
1734            match mem.vec_search_with_embedding(
1735                &args.query,
1736                &query_embedding,
1737                args.top_k,
1738                args.snippet_chars,
1739                scope,
1740            ) {
1741                Ok(mut resp) => {
1742                    // Apply preference boost to rerank results for preference-seeking queries
1743                    apply_preference_rerank(&mut resp);
1744                    (resp, "semantic (vector search)".to_string(), None)
1745                }
1746                Err(e) => {
1747                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1748                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1749                    }
1750
1751                    // Fall back to lexical search + rerank if vector search fails
1752                    warn!("Vector search failed ({e}), falling back to lexical + rerank");
1753                    let request = SearchRequest {
1754                        query: args.query.clone(),
1755                        top_k: args.top_k,
1756                        snippet_chars: args.snippet_chars,
1757                        uri: args.uri.clone(),
1758                        scope: args.scope.clone(),
1759                        cursor: args.cursor.clone(),
1760                        #[cfg(feature = "temporal_track")]
1761                        temporal: None,
1762                        as_of_frame: args.as_of_frame,
1763                        as_of_ts: args.as_of_ts,
1764                        no_sketch: args.no_sketch,
1765                    };
1766                    let mut resp = mem.search(request)?;
1767                    apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1768                    (resp, "semantic (fallback rerank)".to_string(), None)
1769                }
1770            }
1771        }
1772    } else {
1773        // For lexical and auto modes, use existing behavior
1774        let request = SearchRequest {
1775            query: args.query.clone(),
1776            top_k: args.top_k,
1777            snippet_chars: args.snippet_chars,
1778            uri: args.uri.clone(),
1779            scope: args.scope.clone(),
1780            cursor: args.cursor.clone(),
1781            #[cfg(feature = "temporal_track")]
1782            temporal: None,
1783            as_of_frame: args.as_of_frame,
1784            as_of_ts: args.as_of_ts,
1785            no_sketch: args.no_sketch,
1786        };
1787
1788        let mut resp = mem.search(request)?;
1789
1790        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1791            warn!("Search index unavailable; returning basic text results");
1792        }
1793
1794        let mut engine_label = match resp.engine {
1795            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1796            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1797            SearchEngineKind::Hybrid => "hybrid".to_string(),
1798        };
1799
1800        if runtime_option.is_some() {
1801            engine_label = format!("hybrid ({engine_label} + semantic)");
1802        }
1803
1804        if let Some(ref runtime) = runtime_option {
1805            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1806        }
1807
1808        (resp, engine_label, None)
1809    };
1810
1811    if args.json_legacy {
1812        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1813        emit_legacy_search_json(&response)?;
1814    } else if args.json {
1815        emit_search_json(&response, mode_key)?;
1816    } else {
1817        println!(
1818            "mode: {}   k={}   time: {} ms",
1819            mode_label, response.params.top_k, response.elapsed_ms
1820        );
1821        println!("engine: {}", engine_label);
1822
1823        // Show adaptive retrieval stats if enabled
1824        if let Some(ref stats) = adaptive_stats {
1825            println!(
1826                "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1827                stats.total_considered,
1828                stats.returned,
1829                stats.triggered_by,
1830                stats.top_score.unwrap_or(0.0),
1831                stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1832            );
1833        }
1834
1835        println!(
1836            "hits: {} (showing {})",
1837            response.total_hits,
1838            response.hits.len()
1839        );
1840        emit_search_table(&response);
1841    }
1842
1843    // Save active replay session if one exists
1844    #[cfg(feature = "replay")]
1845    let _ = mem.save_active_session();
1846
1847    Ok(())
1848}
1849
1850pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1851    // Track query usage against plan quota
1852    crate::api::track_query_usage(config, 1)?;
1853
1854    let mut mem = open_read_only_mem(&args.file)?;
1855    let vector = if let Some(path) = args.embedding.as_deref() {
1856        read_embedding(path)?
1857    } else if let Some(vector_string) = &args.vector {
1858        parse_vector(vector_string)?
1859    } else {
1860        anyhow::bail!("provide --vector or --embedding for search input");
1861    };
1862
1863    let hits = mem.search_vec(&vector, args.limit).map_err(|err| match err {
1864        MemvidError::VecDimensionMismatch { expected, actual } => {
1865            anyhow!(vec_dimension_mismatch_help(expected, actual))
1866        }
1867        other => anyhow!(other),
1868    })?;
1869    let mut enriched = Vec::with_capacity(hits.len());
1870    for hit in hits {
1871        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1872        enriched.push((hit.frame_id, hit.distance, preview));
1873    }
1874
1875    if args.json {
1876        let json_hits: Vec<_> = enriched
1877            .iter()
1878            .map(|(frame_id, distance, preview)| {
1879                json!({
1880                    "frame_id": frame_id,
1881                    "distance": distance,
1882                    "preview": preview,
1883                })
1884            })
1885            .collect();
1886        let json_str = serde_json::to_string_pretty(&json_hits)?;
1887        println!("{}", json_str.to_colored_json_auto()?);
1888    } else if enriched.is_empty() {
1889        println!("No vector matches found");
1890    } else {
1891        for (frame_id, distance, preview) in enriched {
1892            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1893        }
1894    }
1895    Ok(())
1896}
1897
1898pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1899    use memvid_core::AuditOptions;
1900    use std::fs::File;
1901    use std::io::Write;
1902
1903    let mut mem = Memvid::open(&args.file)?;
1904
1905    // Parse date boundaries
1906    let start = parse_date_boundary(args.start.as_ref(), false)?;
1907    let end = parse_date_boundary(args.end.as_ref(), true)?;
1908    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1909        if end_ts < start_ts {
1910            anyhow::bail!("--end must not be earlier than --start");
1911        }
1912    }
1913
1914    // Set up embedding runtime if needed
1915    let ask_mode: AskMode = args.mode.into();
1916    let runtime = match args.mode {
1917        AskModeArg::Lex => None,
1918        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1919        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1920    };
1921    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1922
1923    // Build audit options
1924    let options = AuditOptions {
1925        top_k: Some(args.top_k),
1926        snippet_chars: Some(args.snippet_chars),
1927        mode: Some(ask_mode),
1928        scope: args.scope,
1929        start,
1930        end,
1931        include_snippets: true,
1932    };
1933
1934    // Run the audit
1935    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1936
1937    // If --use-model is provided, run model inference to synthesize the answer
1938    if let Some(model_name) = args.use_model.as_deref() {
1939        // Build context from sources for model inference
1940        let context = report
1941            .sources
1942            .iter()
1943            .filter_map(|s| s.snippet.clone())
1944            .collect::<Vec<_>>()
1945            .join("\n\n");
1946
1947        match run_model_inference(
1948            model_name,
1949            &report.question,
1950            &context,
1951            &[], // No hits needed for audit
1952            None,
1953            None,
1954            None, // No system prompt override for audit
1955        ) {
1956            Ok(inference) => {
1957                report.answer = Some(inference.answer.answer);
1958                report.notes.push(format!(
1959                    "Answer synthesized by model: {}",
1960                    inference.answer.model
1961                ));
1962            }
1963            Err(err) => {
1964                warn!(
1965                    "model inference unavailable for '{}': {err}. Using default answer.",
1966                    model_name
1967                );
1968            }
1969        }
1970    }
1971
1972    // Format the output
1973    let output = match args.format {
1974        AuditFormat::Text => report.to_text(),
1975        AuditFormat::Markdown => report.to_markdown(),
1976        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1977    };
1978
1979    // Write output
1980    if let Some(out_path) = args.out {
1981        let mut file = File::create(&out_path)?;
1982        file.write_all(output.as_bytes())?;
1983        println!("Audit report written to: {}", out_path.display());
1984    } else {
1985        println!("{}", output);
1986    }
1987
1988    Ok(())
1989}
1990
1991fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1992    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1993
1994    let mut additional_params = serde_json::Map::new();
1995    if let Some(cursor) = &response.params.cursor {
1996        additional_params.insert("cursor".into(), json!(cursor));
1997    }
1998
1999    let mut params = serde_json::Map::new();
2000    params.insert("top_k".into(), json!(response.params.top_k));
2001    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2002    params.insert("mode".into(), json!(mode));
2003    params.insert(
2004        "additional_params".into(),
2005        serde_json::Value::Object(additional_params),
2006    );
2007
2008    let mut metadata_json = serde_json::Map::new();
2009    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2010    metadata_json.insert("total_hits".into(), json!(response.total_hits));
2011    metadata_json.insert(
2012        "next_cursor".into(),
2013        match &response.next_cursor {
2014            Some(cursor) => json!(cursor),
2015            None => serde_json::Value::Null,
2016        },
2017    );
2018    metadata_json.insert("engine".into(), json!(response.engine));
2019    metadata_json.insert("params".into(), serde_json::Value::Object(params));
2020
2021    let body = json!({
2022        "version": "mv2.result.v2",
2023        "query": response.query,
2024        "metadata": metadata_json,
2025        "hits": hits,
2026        "context": response.context,
2027    });
2028    let json_str = serde_json::to_string_pretty(&body)?;
2029    println!("{}", json_str.to_colored_json_auto()?);
2030    Ok(())
2031}
2032
2033fn emit_ask_json(
2034    response: &AskResponse,
2035    requested_mode: AskModeArg,
2036    inference: Option<&ModelInference>,
2037    include_sources: bool,
2038    mem: &mut Memvid,
2039) -> Result<()> {
2040    let hits: Vec<_> = response
2041        .retrieval
2042        .hits
2043        .iter()
2044        .map(search_hit_to_json)
2045        .collect();
2046
2047    let citations: Vec<_> = response
2048        .citations
2049        .iter()
2050        .map(|citation| {
2051            let mut map = serde_json::Map::new();
2052            map.insert("index".into(), json!(citation.index));
2053            map.insert("frame_id".into(), json!(citation.frame_id));
2054            map.insert("uri".into(), json!(citation.uri));
2055            if let Some(range) = citation.chunk_range {
2056                map.insert("chunk_range".into(), json!([range.0, range.1]));
2057            }
2058            if let Some(score) = citation.score {
2059                map.insert("score".into(), json!(score));
2060            }
2061            serde_json::Value::Object(map)
2062        })
2063        .collect();
2064
2065    let mut body = json!({
2066        "version": "mv2.ask.v1",
2067        "question": response.question,
2068        "answer": response.answer,
2069        "context_only": response.context_only,
2070        "mode": ask_mode_display(requested_mode),
2071        "retriever": ask_retriever_display(response.retriever),
2072        "top_k": response.retrieval.params.top_k,
2073        "results": hits,
2074        "citations": citations,
2075        "stats": {
2076            "retrieval_ms": response.stats.retrieval_ms,
2077            "synthesis_ms": response.stats.synthesis_ms,
2078            "latency_ms": response.stats.latency_ms,
2079        },
2080        "engine": search_engine_label(&response.retrieval.engine),
2081        "total_hits": response.retrieval.total_hits,
2082        "next_cursor": response.retrieval.next_cursor,
2083        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2084    });
2085
2086    if let Some(inf) = inference {
2087        let model = &inf.answer;
2088        if let serde_json::Value::Object(ref mut map) = body {
2089            map.insert("model".into(), json!(model.requested));
2090            if model.model != model.requested {
2091                map.insert("model_used".into(), json!(model.model));
2092            }
2093            map.insert("cached".into(), json!(inf.cached));
2094            // Add usage and cost if available
2095            if let Some(usage) = &inf.usage {
2096                map.insert("usage".into(), json!({
2097                    "input_tokens": usage.input_tokens,
2098                    "output_tokens": usage.output_tokens,
2099                    "total_tokens": usage.total_tokens,
2100                    "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2101                    "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2102                }));
2103            }
2104            // Add grounding/hallucination score if available
2105            if let Some(grounding) = &inf.grounding {
2106                map.insert("grounding".into(), json!({
2107                    "score": grounding.score,
2108                    "label": grounding.label(),
2109                    "sentence_count": grounding.sentence_count,
2110                    "grounded_sentences": grounding.grounded_sentences,
2111                    "has_warning": grounding.has_warning,
2112                    "warning_reason": grounding.warning_reason,
2113                }));
2114            }
2115        }
2116    }
2117
2118    // Add detailed sources if requested
2119    if include_sources {
2120        if let serde_json::Value::Object(ref mut map) = body {
2121            let sources = build_sources_json(response, mem);
2122            map.insert("sources".into(), json!(sources));
2123        }
2124    }
2125
2126    // Add follow-up suggestions if confidence is low
2127    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2128        if let serde_json::Value::Object(ref mut map) = body {
2129            map.insert("follow_up".into(), follow_up);
2130        }
2131    }
2132
2133    println!("{}", serde_json::to_string_pretty(&body)?);
2134    Ok(())
2135}
2136
2137fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2138    response
2139        .citations
2140        .iter()
2141        .enumerate()
2142        .map(|(idx, citation)| {
2143            let mut source = serde_json::Map::new();
2144            source.insert("index".into(), json!(idx + 1));
2145            source.insert("frame_id".into(), json!(citation.frame_id));
2146            source.insert("uri".into(), json!(citation.uri));
2147
2148            if let Some(range) = citation.chunk_range {
2149                source.insert("chunk_range".into(), json!([range.0, range.1]));
2150            }
2151            if let Some(score) = citation.score {
2152                source.insert("score".into(), json!(score));
2153            }
2154
2155            // Get frame metadata for rich source information
2156            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2157                if let Some(title) = frame.title {
2158                    source.insert("title".into(), json!(title));
2159                }
2160                if !frame.tags.is_empty() {
2161                    source.insert("tags".into(), json!(frame.tags));
2162                }
2163                if !frame.labels.is_empty() {
2164                    source.insert("labels".into(), json!(frame.labels));
2165                }
2166                source.insert("frame_timestamp".into(), json!(frame.timestamp));
2167                if !frame.content_dates.is_empty() {
2168                    source.insert("content_dates".into(), json!(frame.content_dates));
2169                }
2170            }
2171
2172            // Get snippet from hit
2173            if let Some(hit) = response
2174                .retrieval
2175                .hits
2176                .iter()
2177                .find(|h| h.frame_id == citation.frame_id)
2178            {
2179                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2180                source.insert("snippet".into(), json!(snippet));
2181            }
2182
2183            serde_json::Value::Object(source)
2184        })
2185        .collect()
2186}
2187
2188/// Build follow-up suggestions when the answer has low grounding/confidence.
2189/// Helps users understand what the memory contains and suggests relevant questions.
2190fn build_follow_up_suggestions(
2191    response: &AskResponse,
2192    inference: Option<&ModelInference>,
2193    mem: &mut Memvid,
2194) -> Option<serde_json::Value> {
2195    // Check if we need follow-up suggestions
2196    let needs_followup = inference
2197        .and_then(|inf| inf.grounding.as_ref())
2198        .map(|g| g.score < 0.3 || g.has_warning)
2199        .unwrap_or(false);
2200
2201    // Also trigger if retrieval hits have very low scores or no hits
2202    let low_retrieval = response.retrieval.hits.first()
2203        .and_then(|h| h.score)
2204        .map(|score| score < -2.0)
2205        .unwrap_or(true);
2206
2207    if !needs_followup && !low_retrieval {
2208        return None;
2209    }
2210
2211    // Get available topics from the memory by sampling timeline entries
2212    let limit = std::num::NonZeroU64::new(20).unwrap();
2213    let timeline_query = TimelineQueryBuilder::default()
2214        .limit(limit)
2215        .build();
2216
2217    let available_topics: Vec<String> = mem
2218        .timeline(timeline_query)
2219        .ok()
2220        .map(|entries| {
2221            entries
2222                .iter()
2223                .filter_map(|e| {
2224                    // Extract meaningful preview/title
2225                    let preview = e.preview.trim();
2226                    if preview.is_empty() || preview.len() < 5 {
2227                        return None;
2228                    }
2229                    // Get first line or truncate
2230                    let first_line = preview.lines().next().unwrap_or(preview);
2231                    if first_line.len() > 60 {
2232                        Some(format!("{}...", &first_line[..57]))
2233                    } else {
2234                        Some(first_line.to_string())
2235                    }
2236                })
2237                .collect::<std::collections::HashSet<_>>()
2238                .into_iter()
2239                .take(5)
2240                .collect()
2241        })
2242        .unwrap_or_default();
2243
2244    // Determine the reason for low confidence
2245    let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2246        "No relevant information found in memory"
2247    } else if inference.and_then(|i| i.grounding.as_ref()).map(|g| g.has_warning).unwrap_or(false) {
2248        "Answer may not be well-supported by the available context"
2249    } else {
2250        "Low confidence in the answer"
2251    };
2252
2253    // Generate suggestion questions based on available topics
2254    let suggestions: Vec<String> = if available_topics.is_empty() {
2255        vec![
2256            "What information is stored in this memory?".to_string(),
2257            "Can you list the main topics covered?".to_string(),
2258        ]
2259    } else {
2260        available_topics
2261            .iter()
2262            .take(3)
2263            .map(|topic| format!("Tell me about {}", topic))
2264            .chain(std::iter::once("What topics are in this memory?".to_string()))
2265            .collect()
2266    };
2267
2268    Some(json!({
2269        "needed": true,
2270        "reason": reason,
2271        "hint": if available_topics.is_empty() {
2272            "This memory may not contain information about your query."
2273        } else {
2274            "This memory contains information about different topics. Try asking about those instead."
2275        },
2276        "available_topics": available_topics,
2277        "suggestions": suggestions
2278    }))
2279}
2280
2281fn emit_model_json(
2282    response: &AskResponse,
2283    requested_model: &str,
2284    inference: Option<&ModelInference>,
2285    include_sources: bool,
2286    mem: &mut Memvid,
2287) -> Result<()> {
2288    let answer = response.answer.clone().unwrap_or_default();
2289    let requested_label = inference
2290        .map(|m| m.answer.requested.clone())
2291        .unwrap_or_else(|| requested_model.to_string());
2292    let used_label = inference
2293        .map(|m| m.answer.model.clone())
2294        .unwrap_or_else(|| requested_model.to_string());
2295
2296    let mut body = json!({
2297        "question": response.question,
2298        "model": requested_label,
2299        "model_used": used_label,
2300        "answer": answer,
2301        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2302    });
2303
2304    // Add usage and cost if available
2305    if let Some(inf) = inference {
2306        if let serde_json::Value::Object(ref mut map) = body {
2307            map.insert("cached".into(), json!(inf.cached));
2308            if let Some(usage) = &inf.usage {
2309                map.insert("usage".into(), json!({
2310                    "input_tokens": usage.input_tokens,
2311                    "output_tokens": usage.output_tokens,
2312                    "total_tokens": usage.total_tokens,
2313                    "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2314                    "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2315                }));
2316            }
2317            if let Some(grounding) = &inf.grounding {
2318                map.insert("grounding".into(), json!({
2319                    "score": grounding.score,
2320                    "label": grounding.label(),
2321                    "sentence_count": grounding.sentence_count,
2322                    "grounded_sentences": grounding.grounded_sentences,
2323                    "has_warning": grounding.has_warning,
2324                    "warning_reason": grounding.warning_reason,
2325                }));
2326            }
2327        }
2328    }
2329
2330    // Add detailed sources if requested
2331    if include_sources {
2332        if let serde_json::Value::Object(ref mut map) = body {
2333            let sources = build_sources_json(response, mem);
2334            map.insert("sources".into(), json!(sources));
2335        }
2336    }
2337
2338    // Add follow-up suggestions if confidence is low
2339    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2340        if let serde_json::Value::Object(ref mut map) = body {
2341            map.insert("follow_up".into(), follow_up);
2342        }
2343    }
2344
2345    // Use colored JSON output
2346    let json_str = serde_json::to_string_pretty(&body)?;
2347    println!("{}", json_str.to_colored_json_auto()?);
2348    Ok(())
2349}
2350
2351fn emit_ask_pretty(
2352    response: &AskResponse,
2353    requested_mode: AskModeArg,
2354    inference: Option<&ModelInference>,
2355    include_sources: bool,
2356    mem: &mut Memvid,
2357) {
2358    println!(
2359        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
2360        ask_mode_pretty(requested_mode),
2361        ask_retriever_pretty(response.retriever),
2362        response.retrieval.params.top_k,
2363        response.stats.latency_ms,
2364        response.stats.retrieval_ms
2365    );
2366    if let Some(inference) = inference {
2367        let model = &inference.answer;
2368        let cached_label = if inference.cached { " [CACHED]" } else { "" };
2369        if model.requested.trim() == model.model {
2370            println!("model: {}{}", model.model, cached_label);
2371        } else {
2372            println!(
2373                "model requested: {}   model used: {}{}",
2374                model.requested, model.model, cached_label
2375            );
2376        }
2377        // Display usage and cost if available
2378        if let Some(usage) = &inference.usage {
2379            let cost_label = if inference.cached {
2380                format!("$0.00 (saved ${:.6})", usage.cost_usd)
2381            } else {
2382                format!("${:.6}", usage.cost_usd)
2383            };
2384            println!(
2385                "tokens: {} input + {} output = {}   cost: {}",
2386                usage.input_tokens,
2387                usage.output_tokens,
2388                usage.total_tokens,
2389                cost_label
2390            );
2391        }
2392        // Display grounding/hallucination score
2393        if let Some(grounding) = &inference.grounding {
2394            let warning = if grounding.has_warning {
2395                format!(" [WARNING: {}]", grounding.warning_reason.as_deref().unwrap_or("potential hallucination"))
2396            } else {
2397                String::new()
2398            };
2399            println!(
2400                "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2401                grounding.score * 100.0,
2402                grounding.label(),
2403                grounding.grounded_sentences,
2404                grounding.sentence_count,
2405                warning
2406            );
2407        }
2408    }
2409    println!(
2410        "engine: {}",
2411        search_engine_label(&response.retrieval.engine)
2412    );
2413    println!(
2414        "hits: {} (showing {})",
2415        response.retrieval.total_hits,
2416        response.retrieval.hits.len()
2417    );
2418
2419    if response.context_only {
2420        println!();
2421        println!("Context-only mode: synthesis disabled.");
2422        println!();
2423    } else if let Some(answer) = &response.answer {
2424        println!();
2425        println!("Answer:\n{answer}");
2426        println!();
2427    }
2428
2429    if !response.citations.is_empty() {
2430        println!("Citations:");
2431        for citation in &response.citations {
2432            match citation.score {
2433                Some(score) => println!(
2434                    "[{}] {} (frame {}, score {:.3})",
2435                    citation.index, citation.uri, citation.frame_id, score
2436                ),
2437                None => println!(
2438                    "[{}] {} (frame {})",
2439                    citation.index, citation.uri, citation.frame_id
2440                ),
2441            }
2442        }
2443        println!();
2444    }
2445
2446    // Print detailed sources if requested
2447    if include_sources && !response.citations.is_empty() {
2448        println!("=== SOURCES ===");
2449        println!();
2450        for citation in &response.citations {
2451            println!("[{}] {}", citation.index, citation.uri);
2452
2453            // Get frame metadata
2454            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2455                if let Some(title) = &frame.title {
2456                    println!("    Title: {}", title);
2457                }
2458                println!("    Frame ID: {}", citation.frame_id);
2459                if let Some(score) = citation.score {
2460                    println!("    Score: {:.4}", score);
2461                }
2462                if let Some((start, end)) = citation.chunk_range {
2463                    println!("    Range: [{}..{})", start, end);
2464                }
2465                if !frame.tags.is_empty() {
2466                    println!("    Tags: {}", frame.tags.join(", "));
2467                }
2468                if !frame.labels.is_empty() {
2469                    println!("    Labels: {}", frame.labels.join(", "));
2470                }
2471                println!("    Timestamp: {}", frame.timestamp);
2472                if !frame.content_dates.is_empty() {
2473                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2474                }
2475            }
2476
2477            // Get snippet from hit
2478            if let Some(hit) = response
2479                .retrieval
2480                .hits
2481                .iter()
2482                .find(|h| h.frame_id == citation.frame_id)
2483            {
2484                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2485                let truncated = if snippet.len() > 200 {
2486                    format!("{}...", &snippet[..200])
2487                } else {
2488                    snippet.clone()
2489                };
2490                println!("    Snippet: {}", truncated.replace('\n', " "));
2491            }
2492            println!();
2493        }
2494    }
2495
2496    if !include_sources {
2497        println!();
2498        emit_search_table(&response.retrieval);
2499    }
2500
2501    // Display follow-up suggestions if confidence is low
2502    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2503        if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2504            if needed {
2505                println!();
2506                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2507                println!("💡 FOLLOW-UP SUGGESTIONS");
2508                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2509
2510                if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2511                    println!("Reason: {}", reason);
2512                }
2513
2514                if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2515                    println!("Hint: {}", hint);
2516                }
2517
2518                if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2519                    if !topics.is_empty() {
2520                        println!();
2521                        println!("Available topics in this memory:");
2522                        for topic in topics.iter().filter_map(|t| t.as_str()) {
2523                            println!("  • {}", topic);
2524                        }
2525                    }
2526                }
2527
2528                if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2529                    if !suggestions.is_empty() {
2530                        println!();
2531                        println!("Try asking:");
2532                        for (i, suggestion) in suggestions.iter().filter_map(|s| s.as_str()).enumerate() {
2533                            println!("  {}. \"{}\"", i + 1, suggestion);
2534                        }
2535                    }
2536                }
2537                println!();
2538            }
2539        }
2540    }
2541}
2542
2543/// Emit verbatim evidence as JSON without LLM synthesis.
2544/// Format: {evidence: [{source, text, score}], question, hits, stats}
2545fn emit_verbatim_evidence_json(
2546    response: &AskResponse,
2547    include_sources: bool,
2548    mem: &mut Memvid,
2549) -> Result<()> {
2550    // Build evidence array from hits - verbatim excerpts with citations
2551    let evidence: Vec<_> = response
2552        .retrieval
2553        .hits
2554        .iter()
2555        .enumerate()
2556        .map(|(idx, hit)| {
2557            let mut entry = serde_json::Map::new();
2558            entry.insert("index".into(), json!(idx + 1));
2559            entry.insert("frame_id".into(), json!(hit.frame_id));
2560            entry.insert("uri".into(), json!(&hit.uri));
2561            if let Some(title) = &hit.title {
2562                entry.insert("title".into(), json!(title));
2563            }
2564            // Use chunk_text if available (more specific), otherwise full text
2565            let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2566            entry.insert("text".into(), json!(verbatim));
2567            if let Some(score) = hit.score {
2568                entry.insert("score".into(), json!(score));
2569            }
2570            serde_json::Value::Object(entry)
2571        })
2572        .collect();
2573
2574    // Build sources array if requested
2575    let sources: Option<Vec<_>> = if include_sources {
2576        Some(
2577            response
2578                .retrieval
2579                .hits
2580                .iter()
2581                .filter_map(|hit| {
2582                    mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2583                        let mut source = serde_json::Map::new();
2584                        source.insert("frame_id".into(), json!(frame.id));
2585                        source.insert("uri".into(), json!(frame.uri.as_deref().unwrap_or("(unknown)")));
2586                        if let Some(title) = &frame.title {
2587                            source.insert("title".into(), json!(title));
2588                        }
2589                        source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2590                        if !frame.tags.is_empty() {
2591                            source.insert("tags".into(), json!(frame.tags));
2592                        }
2593                        if !frame.labels.is_empty() {
2594                            source.insert("labels".into(), json!(frame.labels));
2595                        }
2596                        serde_json::Value::Object(source)
2597                    })
2598                })
2599                .collect(),
2600        )
2601    } else {
2602        None
2603    };
2604
2605    let mut body = json!({
2606        "version": "mv2.evidence.v1",
2607        "mode": "verbatim",
2608        "question": response.question,
2609        "evidence": evidence,
2610        "evidence_count": evidence.len(),
2611        "total_hits": response.retrieval.total_hits,
2612        "stats": {
2613            "retrieval_ms": response.stats.retrieval_ms,
2614            "latency_ms": response.stats.latency_ms,
2615        },
2616        "engine": search_engine_label(&response.retrieval.engine),
2617    });
2618
2619    if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2620        map.insert("sources".into(), json!(sources));
2621    }
2622
2623    let json_str = serde_json::to_string_pretty(&body)?;
2624    println!("{}", json_str.to_colored_json_auto()?);
2625    Ok(())
2626}
2627
2628/// Emit verbatim evidence in human-readable format without LLM synthesis.
2629fn emit_verbatim_evidence_pretty(
2630    response: &AskResponse,
2631    include_sources: bool,
2632    mem: &mut Memvid,
2633) {
2634    println!(
2635        "mode: {}   latency: {} ms (retrieval {} ms)",
2636        "verbatim evidence".cyan(),
2637        response.stats.latency_ms,
2638        response.stats.retrieval_ms
2639    );
2640    println!(
2641        "engine: {}",
2642        search_engine_label(&response.retrieval.engine)
2643    );
2644    println!(
2645        "hits: {} (showing {})",
2646        response.retrieval.total_hits,
2647        response.retrieval.hits.len()
2648    );
2649    println!();
2650
2651    // Header
2652    println!("{}", "━".repeat(60));
2653    println!(
2654        "{}",
2655        format!(
2656            "VERBATIM EVIDENCE for: \"{}\"",
2657            truncate_with_ellipsis(&response.question, 40)
2658        )
2659        .bold()
2660    );
2661    println!("{}", "━".repeat(60));
2662    println!();
2663
2664    if response.retrieval.hits.is_empty() {
2665        println!("No evidence found.");
2666        return;
2667    }
2668
2669    // Calculate score range for normalization (BM25 scores can be negative)
2670    let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2671    let (min_score, max_score) = score_range(&scores);
2672
2673    // Display each piece of evidence with citation
2674    for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2675        let uri = &hit.uri;
2676        let title = hit.title.as_deref().unwrap_or("Untitled");
2677        let score_str = hit
2678            .score
2679            .map(|s| {
2680                let normalized = normalize_bm25_for_display(s, min_score, max_score);
2681                format!(" (relevance: {:.0}%)", normalized)
2682            })
2683            .unwrap_or_default();
2684
2685        println!(
2686            "{}",
2687            format!("[{}] {}{}", idx + 1, title, score_str).green().bold()
2688        );
2689        println!("    Source: {} (frame {})", uri, hit.frame_id);
2690        println!();
2691
2692        // Show verbatim text - prefer chunk_text if available
2693        let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2694        // Indent each line for readability
2695        for line in verbatim.lines() {
2696            if !line.trim().is_empty() {
2697                println!("    │ {}", line);
2698            }
2699        }
2700        println!();
2701    }
2702
2703    // Print detailed sources if requested
2704    if include_sources {
2705        println!("{}", "━".repeat(60));
2706        println!("{}", "SOURCE DETAILS".bold());
2707        println!("{}", "━".repeat(60));
2708        println!();
2709
2710        for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2711            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2712                println!("{}", format!("[{}] {}", idx + 1, frame.uri.as_deref().unwrap_or("(unknown)")).cyan());
2713                if let Some(title) = &frame.title {
2714                    println!("    Title: {}", title);
2715                }
2716                println!("    Frame ID: {}", frame.id);
2717                println!("    Timestamp: {}", frame.timestamp);
2718                if !frame.tags.is_empty() {
2719                    println!("    Tags: {}", frame.tags.join(", "));
2720                }
2721                if !frame.labels.is_empty() {
2722                    println!("    Labels: {}", frame.labels.join(", "));
2723                }
2724                if !frame.content_dates.is_empty() {
2725                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2726                }
2727                println!();
2728            }
2729        }
2730    }
2731
2732    // Note about no LLM synthesis
2733    println!("{}", "─".repeat(60));
2734    println!(
2735        "{}",
2736        "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2737    );
2738    println!(
2739        "{}",
2740        "Use --use-model to get an AI-synthesized answer.".dimmed()
2741    );
2742}
2743
2744fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2745    let hits: Vec<_> = response
2746        .hits
2747        .iter()
2748        .map(|hit| {
2749            json!({
2750                "frame_id": hit.frame_id,
2751                "matches": hit.matches,
2752                "snippets": [hit.text.clone()],
2753            })
2754        })
2755        .collect();
2756    println!("{}", serde_json::to_string_pretty(&hits)?);
2757    Ok(())
2758}
2759
2760fn emit_search_table(response: &SearchResponse) {
2761    if response.hits.is_empty() {
2762        println!("No results for '{}'.", response.query);
2763        return;
2764    }
2765
2766    // Calculate score range for normalization (BM25 scores can be negative)
2767    let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2768    let (min_score, max_score) = score_range(&scores);
2769
2770    for hit in &response.hits {
2771        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2772        if let Some(title) = &hit.title {
2773            println!("  Title: {title}");
2774        }
2775        if let Some(score) = hit.score {
2776            let normalized = normalize_bm25_for_display(score, min_score, max_score);
2777            println!("  Relevance: {:.0}%", normalized);
2778        }
2779        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
2780        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2781            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
2782        }
2783        if let Some(chunk_text) = &hit.chunk_text {
2784            println!("  Chunk Text: {}", chunk_text.trim());
2785        }
2786        if let Some(metadata) = &hit.metadata {
2787            if let Some(track) = &metadata.track {
2788                println!("  Track: {track}");
2789            }
2790            if !metadata.tags.is_empty() {
2791                println!("  Tags: {}", metadata.tags.join(", "));
2792            }
2793            if !metadata.labels.is_empty() {
2794                println!("  Labels: {}", metadata.labels.join(", "));
2795            }
2796            if let Some(created_at) = &metadata.created_at {
2797                println!("  Created: {created_at}");
2798            }
2799            if !metadata.content_dates.is_empty() {
2800                println!("  Content Dates: {}", metadata.content_dates.join(", "));
2801            }
2802            if !metadata.entities.is_empty() {
2803                let entity_strs: Vec<String> = metadata
2804                    .entities
2805                    .iter()
2806                    .map(|e| format!("{} ({})", e.name, e.kind))
2807                    .collect();
2808                println!("  Entities: {}", entity_strs.join(", "));
2809            }
2810        }
2811        println!("  Snippet: {}", hit.text.trim());
2812        println!();
2813    }
2814    if let Some(cursor) = &response.next_cursor {
2815        println!("Next cursor: {cursor}");
2816    }
2817}
2818
2819fn ask_mode_display(mode: AskModeArg) -> &'static str {
2820    match mode {
2821        AskModeArg::Lex => "lex",
2822        AskModeArg::Sem => "sem",
2823        AskModeArg::Hybrid => "hybrid",
2824    }
2825}
2826
2827fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2828    match mode {
2829        AskModeArg::Lex => "Lexical",
2830        AskModeArg::Sem => "Semantic",
2831        AskModeArg::Hybrid => "Hybrid",
2832    }
2833}
2834
2835fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2836    match retriever {
2837        AskRetriever::Lex => "lex",
2838        AskRetriever::Semantic => "semantic",
2839        AskRetriever::Hybrid => "hybrid",
2840        AskRetriever::LexFallback => "lex_fallback",
2841        AskRetriever::TimelineFallback => "timeline_fallback",
2842    }
2843}
2844
2845fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2846    match retriever {
2847        AskRetriever::Lex => "Lexical",
2848        AskRetriever::Semantic => "Semantic",
2849        AskRetriever::Hybrid => "Hybrid",
2850        AskRetriever::LexFallback => "Lexical (fallback)",
2851        AskRetriever::TimelineFallback => "Timeline (fallback)",
2852    }
2853}
2854
2855fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2856    match engine {
2857        SearchEngineKind::Tantivy => "text (tantivy)",
2858        SearchEngineKind::LexFallback => "text (fallback)",
2859        SearchEngineKind::Hybrid => "hybrid",
2860    }
2861}
2862
2863fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2864    let digest = hash(uri.as_bytes()).to_hex().to_string();
2865    let prefix_len = digest.len().min(12);
2866    let prefix = &digest[..prefix_len];
2867    format!("mv2-hit-{prefix}-{frame_id}-{start}")
2868}
2869
2870fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2871    if text.chars().count() <= limit {
2872        return text.to_string();
2873    }
2874
2875    let truncated: String = text.chars().take(limit).collect();
2876    format!("{truncated}...")
2877}
2878
2879/// Normalize a BM25 score to 0-100 range for user-friendly display.
2880///
2881/// BM25 scores can be negative (Tantivy uses log-based TF which can go negative
2882/// for very common terms). This function normalizes scores relative to the
2883/// min/max in the result set so users see intuitive 0-100 values.
2884///
2885/// - Returns 100.0 if min == max (all scores equal)
2886/// - Returns normalized 0-100 value based on position in [min, max] range
2887fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2888    if (max_score - min_score).abs() < f32::EPSILON {
2889        // All scores are the same, show 100%
2890        return 100.0;
2891    }
2892    // Normalize to 0-100 range
2893    ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2894}
2895
2896/// Extract min and max scores from a slice of optional scores.
2897fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2898    let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2899    if valid_scores.is_empty() {
2900        return (0.0, 0.0);
2901    }
2902    let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2903    let max = valid_scores.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
2904    (min, max)
2905}
2906
2907fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2908    let mut hit_json = serde_json::Map::new();
2909    hit_json.insert("rank".into(), json!(hit.rank));
2910    if let Some(score) = hit.score {
2911        hit_json.insert("score".into(), json!(score));
2912    }
2913    hit_json.insert(
2914        "id".into(),
2915        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2916    );
2917    hit_json.insert("frame_id".into(), json!(hit.frame_id));
2918    hit_json.insert("uri".into(), json!(hit.uri));
2919    if let Some(title) = &hit.title {
2920        hit_json.insert("title".into(), json!(title));
2921    }
2922    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2923    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2924    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2925    hit_json.insert("text".into(), json!(hit.text));
2926
2927    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2928        matches: hit.matches,
2929        ..SearchHitMetadata::default()
2930    });
2931    let mut meta_json = serde_json::Map::new();
2932    meta_json.insert("matches".into(), json!(metadata.matches));
2933    if !metadata.tags.is_empty() {
2934        meta_json.insert("tags".into(), json!(metadata.tags));
2935    }
2936    if !metadata.labels.is_empty() {
2937        meta_json.insert("labels".into(), json!(metadata.labels));
2938    }
2939    if let Some(track) = metadata.track {
2940        meta_json.insert("track".into(), json!(track));
2941    }
2942    if let Some(created_at) = metadata.created_at {
2943        meta_json.insert("created_at".into(), json!(created_at));
2944    }
2945    if !metadata.content_dates.is_empty() {
2946        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
2947    }
2948    if !metadata.entities.is_empty() {
2949        let entities_json: Vec<serde_json::Value> = metadata
2950            .entities
2951            .iter()
2952            .map(|e| {
2953                let mut ent = serde_json::Map::new();
2954                ent.insert("name".into(), json!(e.name));
2955                ent.insert("kind".into(), json!(e.kind));
2956                if let Some(conf) = e.confidence {
2957                    ent.insert("confidence".into(), json!(conf));
2958                }
2959                serde_json::Value::Object(ent)
2960            })
2961            .collect();
2962        meta_json.insert("entities".into(), json!(entities_json));
2963    }
2964    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
2965    serde_json::Value::Object(hit_json)
2966}
2967/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
2968///
2969/// RRF is mathematically superior to raw score combination because:
2970/// - BM25 scores are unbounded (0 to infinity)
2971/// - Cosine similarity is bounded (-1 to 1)
2972/// - RRF normalizes by using only RANKS, not raw scores
2973///
2974/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
2975fn apply_semantic_rerank(
2976    runtime: &EmbeddingRuntime,
2977    mem: &mut Memvid,
2978    response: &mut SearchResponse,
2979) -> Result<()> {
2980    if response.hits.is_empty() {
2981        return Ok(());
2982    }
2983
2984    let query_embedding = runtime.embed_query(&response.query)?;
2985    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
2986    for hit in &response.hits {
2987        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
2988            if embedding.len() == runtime.dimension() {
2989                let score = cosine_similarity(&query_embedding, &embedding);
2990                semantic_scores.insert(hit.frame_id, score);
2991            }
2992        }
2993    }
2994
2995    if semantic_scores.is_empty() {
2996        return Ok(());
2997    }
2998
2999    // Sort by semantic score to get semantic ranks
3000    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3001        .iter()
3002        .map(|(frame_id, score)| (*frame_id, *score))
3003        .collect();
3004    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3005
3006    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3007    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3008        semantic_rank.insert(*frame_id, idx + 1);
3009    }
3010
3011    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
3012    let query_lower = response.query.to_lowercase();
3013    let is_preference_query = query_lower.contains("suggest")
3014        || query_lower.contains("recommend")
3015        || query_lower.contains("should i")
3016        || query_lower.contains("what should")
3017        || query_lower.contains("prefer")
3018        || query_lower.contains("favorite")
3019        || query_lower.contains("best for me");
3020
3021    // Pure RRF: Use ONLY ranks, NOT raw scores
3022    // This prevents a "confidently wrong" high-scoring vector from burying
3023    // a "precisely correct" keyword match
3024    const RRF_K: f32 = 60.0;
3025
3026    let mut ordering: Vec<(usize, f32, usize)> = response
3027        .hits
3028        .iter()
3029        .enumerate()
3030        .map(|(idx, hit)| {
3031            let lexical_rank = hit.rank;
3032
3033            // RRF score for lexical rank
3034            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3035
3036            // RRF score for semantic rank
3037            let semantic_rrf = semantic_rank
3038                .get(&hit.frame_id)
3039                .map(|rank| 1.0 / (RRF_K + *rank as f32))
3040                .unwrap_or(0.0);
3041
3042            // Apply preference boost for hits containing user preference signals
3043            // This is a small bonus for content with first-person preference indicators
3044            let preference_boost = if is_preference_query {
3045                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
3046            } else {
3047                0.0
3048            };
3049
3050            // Pure RRF: Only rank-based scores, no raw similarity scores
3051            let combined = lexical_rrf + semantic_rrf + preference_boost;
3052            (idx, combined, lexical_rank)
3053        })
3054        .collect();
3055
3056    ordering.sort_by(|a, b| {
3057        b.1.partial_cmp(&a.1)
3058            .unwrap_or(Ordering::Equal)
3059            .then(a.2.cmp(&b.2))
3060    });
3061
3062    let mut reordered = Vec::with_capacity(response.hits.len());
3063    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3064        let mut hit = response.hits[idx].clone();
3065        hit.rank = rank_idx + 1;
3066        reordered.push(hit);
3067    }
3068
3069    response.hits = reordered;
3070    Ok(())
3071}
3072
3073/// Rerank search results by boosting hits that contain user preference signals.
3074/// Only applies when the query appears to be seeking recommendations or preferences.
3075fn apply_preference_rerank(response: &mut SearchResponse) {
3076    if response.hits.is_empty() {
3077        return;
3078    }
3079
3080    // Check if query is preference-seeking
3081    let query_lower = response.query.to_lowercase();
3082    let is_preference_query = query_lower.contains("suggest")
3083        || query_lower.contains("recommend")
3084        || query_lower.contains("should i")
3085        || query_lower.contains("what should")
3086        || query_lower.contains("prefer")
3087        || query_lower.contains("favorite")
3088        || query_lower.contains("best for me");
3089
3090    if !is_preference_query {
3091        return;
3092    }
3093
3094    // Compute boost scores for each hit
3095    let mut scored: Vec<(usize, f32, f32)> = response
3096        .hits
3097        .iter()
3098        .enumerate()
3099        .map(|(idx, hit)| {
3100            let original_score = hit.score.unwrap_or(0.0);
3101            let preference_boost = compute_preference_boost(&hit.text);
3102            let boosted_score = original_score + preference_boost;
3103            (idx, boosted_score, original_score)
3104        })
3105        .collect();
3106
3107    // Sort by boosted score (descending)
3108    scored.sort_by(|a, b| {
3109        b.1.partial_cmp(&a.1)
3110            .unwrap_or(Ordering::Equal)
3111            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3112    });
3113
3114    // Reorder hits
3115    let mut reordered = Vec::with_capacity(response.hits.len());
3116    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3117        let mut hit = response.hits[idx].clone();
3118        hit.rank = rank_idx + 1;
3119        reordered.push(hit);
3120    }
3121
3122    response.hits = reordered;
3123}
3124
3125/// Compute a boost score for hits that contain user preference signals.
3126/// This helps surface context where users express their preferences,
3127/// habits, or personal information that's relevant to recommendation queries.
3128///
3129/// Key insight: We want to distinguish content where the user describes
3130/// their ESTABLISHED situation/preferences (high boost) from content where
3131/// the user is making a REQUEST (low boost). Both use first-person language,
3132/// but they serve different purposes for personalization.
3133fn compute_preference_boost(text: &str) -> f32 {
3134    let text_lower = text.to_lowercase();
3135    let mut boost = 0.0f32;
3136
3137    // Strong signals: Past/present user experiences and possessions
3138    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
3139    let established_context = [
3140        // Past tense - indicates actual experience
3141        "i've been",
3142        "i've had",
3143        "i've used",
3144        "i've tried",
3145        "i recently",
3146        "i just",
3147        "lately",
3148        "i started",
3149        "i bought",
3150        "i harvested",
3151        "i grew",
3152        // Current possessions/ownership (indicates established context)
3153        "my garden",
3154        "my home",
3155        "my house",
3156        "my setup",
3157        "my equipment",
3158        "my camera",
3159        "my car",
3160        "my phone",
3161        "i have a",
3162        "i own",
3163        "i got a",
3164        // Established habits/preferences
3165        "i prefer",
3166        "i like to",
3167        "i love to",
3168        "i enjoy",
3169        "i usually",
3170        "i always",
3171        "i typically",
3172        "my favorite",
3173        "i tend to",
3174        "i often",
3175        // Regular activities (indicates ongoing behavior)
3176        "i use",
3177        "i grow",
3178        "i cook",
3179        "i make",
3180        "i work on",
3181        "i'm into",
3182        "i collect",
3183    ];
3184    for pattern in established_context {
3185        if text_lower.contains(pattern) {
3186            boost += 0.15;
3187        }
3188    }
3189
3190    // Moderate signals: General first-person statements
3191    let first_person = [" i ", " my ", " me "];
3192    for pattern in first_person {
3193        if text_lower.contains(pattern) {
3194            boost += 0.02;
3195        }
3196    }
3197
3198    // Weak signals: Requests/intentions (not yet established preferences)
3199    // These indicate the user wants something, but don't describe established context
3200    let request_patterns = [
3201        "i'm trying to",
3202        "i want to",
3203        "i need to",
3204        "looking for",
3205        "can you suggest",
3206        "can you help",
3207    ];
3208    for pattern in request_patterns {
3209        if text_lower.contains(pattern) {
3210            boost += 0.02;
3211        }
3212    }
3213
3214    // Cap the boost to avoid over-weighting
3215    boost.min(0.5)
3216}
3217
3218fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3219    let mut dot = 0.0f32;
3220    let mut sum_a = 0.0f32;
3221    let mut sum_b = 0.0f32;
3222    for (x, y) in a.iter().zip(b.iter()) {
3223        dot += x * y;
3224        sum_a += x * x;
3225        sum_b += y * y;
3226    }
3227
3228    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3229        0.0
3230    } else {
3231        dot / (sum_a.sqrt() * sum_b.sqrt())
3232    }
3233}
3234
3235/// Apply cross-encoder reranking to search results.
3236///
3237/// Cross-encoders directly score query-document pairs and can understand
3238/// more nuanced relevance than bi-encoders (embeddings). This is especially
3239/// useful for personalization queries where semantic similarity != relevance.
3240///
3241/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
3242#[cfg(feature = "local-embeddings")]
3243fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3244    if response.hits.is_empty() || response.hits.len() < 2 {
3245        return Ok(());
3246    }
3247
3248    // Only rerank if we have enough candidates
3249    let candidates_to_rerank = response.hits.len().min(50);
3250
3251    // Initialize the reranker (model will be downloaded on first use, ~86MB)
3252    // Using JINA Turbo - faster than BGE while maintaining good accuracy
3253    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3254        .with_show_download_progress(true);
3255
3256    let mut reranker = match TextRerank::try_new(options) {
3257        Ok(r) => r,
3258        Err(e) => {
3259            warn!("Failed to initialize cross-encoder reranker: {e}");
3260            return Ok(());
3261        }
3262    };
3263
3264    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
3265    let documents: Vec<String> = response.hits[..candidates_to_rerank]
3266        .iter()
3267        .map(|hit| hit.text.clone())
3268        .collect();
3269
3270    // Rerank using cross-encoder
3271    info!("Cross-encoder reranking {} candidates", documents.len());
3272    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3273        Ok(results) => results,
3274        Err(e) => {
3275            warn!("Cross-encoder reranking failed: {e}");
3276            return Ok(());
3277        }
3278    };
3279
3280    // Blend cross-encoder scores with original scores to preserve temporal boosting.
3281    // The original score includes recency boost; purely replacing it loses temporal relevance.
3282    // We collect (blended_score, original_idx) pairs and sort by blended score.
3283    let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3284
3285    // Find score range for normalization (original scores can be negative for BM25)
3286    let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3287        .iter()
3288        .filter_map(|h| h.score)
3289        .collect();
3290    let orig_min = original_scores.iter().cloned().fold(f32::INFINITY, f32::min);
3291    let orig_max = original_scores.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
3292    let orig_range = (orig_max - orig_min).max(0.001); // Avoid division by zero
3293
3294    for result in rerank_results.iter() {
3295        let original_idx = result.index;
3296        let cross_encoder_score = result.score; // Already normalized 0-1
3297
3298        // Normalize original score to 0-1 range
3299        let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3300        let normalized_original = (original_score - orig_min) / orig_range;
3301
3302        // Blend: 20% cross-encoder (relevance) + 80% original (includes temporal boost)
3303        // Very heavy weight on original score to preserve temporal ranking
3304        // The original score already incorporates BM25 + recency boost
3305        let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3306
3307        scored_hits.push((blended, original_idx));
3308    }
3309
3310    // Sort by blended score (descending)
3311    scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3312
3313    // Build reordered hits with new ranks
3314    let mut reordered = Vec::with_capacity(response.hits.len());
3315    for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3316        let mut hit = response.hits[original_idx].clone();
3317        hit.rank = new_rank + 1;
3318        // Store blended score for reference
3319        hit.score = Some(blended_score);
3320        reordered.push(hit);
3321    }
3322
3323    // Add any remaining hits that weren't reranked (beyond top-50)
3324    for hit in response.hits.iter().skip(candidates_to_rerank) {
3325        let mut h = hit.clone();
3326        h.rank = reordered.len() + 1;
3327        reordered.push(h);
3328    }
3329
3330    response.hits = reordered;
3331    info!("Cross-encoder reranking complete");
3332    Ok(())
3333}
3334
3335/// Stub for cross-encoder reranking when local-embeddings is disabled.
3336/// Does nothing - reranking is skipped silently.
3337#[cfg(not(feature = "local-embeddings"))]
3338fn apply_cross_encoder_rerank(_response: &mut SearchResponse) -> Result<()> {
3339    Ok(())
3340}
3341
3342/// Build a context string from memory cards stored in the MV2 file.
3343/// Groups facts by entity for better LLM comprehension.
3344fn build_memory_context(mem: &Memvid) -> String {
3345    let entities = mem.memory_entities();
3346    if entities.is_empty() {
3347        return String::new();
3348    }
3349
3350    let mut sections = Vec::new();
3351    for entity in entities {
3352        let cards = mem.get_entity_memories(&entity);
3353        if cards.is_empty() {
3354            continue;
3355        }
3356
3357        let mut entity_lines = Vec::new();
3358        for card in cards {
3359            // Format: "slot: value" with optional polarity indicator
3360            let polarity_marker = card
3361                .polarity
3362                .as_ref()
3363                .map(|p| match p.to_string().as_str() {
3364                    "Positive" => " (+)",
3365                    "Negative" => " (-)",
3366                    _ => "",
3367                })
3368                .unwrap_or("");
3369            entity_lines.push(format!(
3370                "  - {}: {}{}",
3371                card.slot, card.value, polarity_marker
3372            ));
3373        }
3374
3375        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3376    }
3377
3378    sections.join("\n\n")
3379}
3380
3381/// Build a context string from entities found in search hits.
3382/// Groups entities by type for better LLM comprehension.
3383fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3384    use std::collections::HashMap;
3385
3386    // Collect unique entities by kind
3387    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3388
3389    for hit in hits {
3390        if let Some(metadata) = &hit.metadata {
3391            for entity in &metadata.entities {
3392                entities_by_kind
3393                    .entry(entity.kind.clone())
3394                    .or_default()
3395                    .push(entity.name.clone());
3396            }
3397        }
3398    }
3399
3400    if entities_by_kind.is_empty() {
3401        return String::new();
3402    }
3403
3404    // Deduplicate and format
3405    let mut sections = Vec::new();
3406    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3407    sorted_kinds.sort();
3408
3409    for kind in sorted_kinds {
3410        let names = entities_by_kind.get(kind).unwrap();
3411        let mut unique_names: Vec<_> = names.iter().collect();
3412        unique_names.sort();
3413        unique_names.dedup();
3414
3415        let names_str = unique_names
3416            .iter()
3417            .take(10) // Limit to 10 entities per kind
3418            .map(|s| s.as_str())
3419            .collect::<Vec<_>>()
3420            .join(", ");
3421
3422        sections.push(format!("{}: {}", kind, names_str));
3423    }
3424
3425    sections.join("\n")
3426}