memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use colored::Colorize;
15use colored_json::ToColoredJson;
16use blake3::hash;
17use clap::{ArgAction, Args, ValueEnum};
18#[cfg(feature = "temporal_track")]
19use memvid_core::{
20    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
21    TemporalResolution, TemporalResolutionValue,
22};
23use memvid_core::{
24    types::{AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy, SearchHitMetadata},
25    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind, SearchHit,
26    SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
27};
28#[cfg(feature = "temporal_track")]
29use serde::Serialize;
30use serde_json::json;
31#[cfg(feature = "temporal_track")]
32use time::format_description::well_known::Rfc3339;
33use time::{Date, PrimitiveDateTime, Time};
34#[cfg(feature = "temporal_track")]
35use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
36use tracing::{info, warn};
37
38use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
39
40use memvid_ask_model::{
41    run_model_inference, ModelContextFragment, ModelContextFragmentKind, ModelInference,
42};
43
44// frame_to_json and print_frame_summary available from commands but not used in this module
45use crate::config::{
46    load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
47    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingModelChoice, EmbeddingRuntime,
48};
49use crate::utils::{
50    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
51    parse_date_boundary, parse_vector, read_embedding,
52};
53
54const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
55#[cfg(feature = "temporal_track")]
56const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
57
58fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
59    let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
60    message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
61    if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
62        message.push_str(&format!(
63            "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
64            model.name(),
65            model.name()
66        ));
67        if model.is_openai() {
68            message.push_str(" (and set `OPENAI_API_KEY`).");
69        } else {
70            message.push('.');
71        }
72        message.push_str(&format!(
73            "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
74            model.name()
75        ));
76        message.push_str(&format!(
77            "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
78        ));
79        message.push_str("\nOr use `--mode lex` to disable semantic search.");
80    }
81    message
82}
83
84/// Arguments for the `timeline` subcommand
85#[derive(Args)]
86pub struct TimelineArgs {
87    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
88    pub file: PathBuf,
89    #[arg(long)]
90    pub json: bool,
91    #[arg(long)]
92    pub reverse: bool,
93    #[arg(long, value_name = "LIMIT")]
94    pub limit: Option<NonZeroU64>,
95    #[arg(long, value_name = "TIMESTAMP")]
96    pub since: Option<i64>,
97    #[arg(long, value_name = "TIMESTAMP")]
98    pub until: Option<i64>,
99    #[cfg(feature = "temporal_track")]
100    #[arg(long = "on", value_name = "PHRASE")]
101    pub phrase: Option<String>,
102    #[cfg(feature = "temporal_track")]
103    #[arg(long = "tz", value_name = "IANA_ZONE")]
104    pub tz: Option<String>,
105    #[cfg(feature = "temporal_track")]
106    #[arg(long = "anchor", value_name = "RFC3339")]
107    pub anchor: Option<String>,
108    #[cfg(feature = "temporal_track")]
109    #[arg(long = "window", value_name = "MINUTES")]
110    pub window: Option<u64>,
111    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
112    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
113    pub as_of_frame: Option<u64>,
114    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
115    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
116    pub as_of_ts: Option<i64>,
117}
118
119/// Arguments for the `when` subcommand
120#[cfg(feature = "temporal_track")]
121#[derive(Args)]
122pub struct WhenArgs {
123    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
124    pub file: PathBuf,
125    #[arg(long = "on", value_name = "PHRASE")]
126    pub phrase: String,
127    #[arg(long = "tz", value_name = "IANA_ZONE")]
128    pub tz: Option<String>,
129    #[arg(long = "anchor", value_name = "RFC3339")]
130    pub anchor: Option<String>,
131    #[arg(long = "window", value_name = "MINUTES")]
132    pub window: Option<u64>,
133    #[arg(long, value_name = "LIMIT")]
134    pub limit: Option<NonZeroU64>,
135    #[arg(long, value_name = "TIMESTAMP")]
136    pub since: Option<i64>,
137    #[arg(long, value_name = "TIMESTAMP")]
138    pub until: Option<i64>,
139    #[arg(long)]
140    pub reverse: bool,
141    #[arg(long)]
142    pub json: bool,
143}
144
145/// Arguments for the `ask` subcommand
146#[derive(Args)]
147pub struct AskArgs {
148    #[arg(value_name = "TARGET", num_args = 0..)]
149    pub targets: Vec<String>,
150    #[arg(long = "question", value_name = "TEXT")]
151    pub question: Option<String>,
152    #[arg(long = "uri", value_name = "URI")]
153    pub uri: Option<String>,
154    #[arg(long = "scope", value_name = "URI_PREFIX")]
155    pub scope: Option<String>,
156    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
157    pub top_k: usize,
158    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
159    pub snippet_chars: usize,
160    #[arg(long = "cursor", value_name = "TOKEN")]
161    pub cursor: Option<String>,
162    #[arg(long = "mode", value_enum, default_value = "hybrid")]
163    pub mode: AskModeArg,
164    #[arg(long)]
165    pub json: bool,
166    #[arg(long = "context-only", action = ArgAction::SetTrue)]
167    pub context_only: bool,
168    /// Show detailed source information for each citation
169    #[arg(long = "sources", action = ArgAction::SetTrue)]
170    pub sources: bool,
171    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
172    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
173    pub mask_pii: bool,
174    /// Include structured memory cards in the context (facts, preferences, etc.)
175    #[arg(long = "memories", action = ArgAction::SetTrue)]
176    pub memories: bool,
177    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
178    #[arg(long = "llm-context-depth", value_name = "CHARS")]
179    pub llm_context_depth: Option<usize>,
180    #[arg(long = "start", value_name = "DATE")]
181    pub start: Option<String>,
182    #[arg(long = "end", value_name = "DATE")]
183    pub end: Option<String>,
184    /// Synthesize an answer with an LLM (defaults to tinyllama when provided without a value).
185    ///
186    /// Examples:
187    /// - `--use-model` (local TinyLlama)
188    /// - `--use-model openai` (defaults to gpt-4o-mini; requires OPENAI_API_KEY)
189    /// - `--use-model nvidia` (defaults to meta/llama3-8b-instruct; requires NVIDIA_API_KEY)
190    /// - `--use-model nvidia:meta/llama3-70b-instruct`
191    #[arg(
192        long = "use-model",
193        value_name = "MODEL",
194        num_args = 0..=1,
195        default_missing_value = "tinyllama"
196    )]
197    pub use_model: Option<String>,
198    /// Embedding model to use for query (must match the model used during ingestion)
199    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
200    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
201    pub query_embedding_model: Option<String>,
202    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
203    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
204    pub as_of_frame: Option<u64>,
205    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
206    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
207    pub as_of_ts: Option<i64>,
208    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
209    #[arg(long = "system-prompt", value_name = "TEXT")]
210    pub system_prompt: Option<String>,
211    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
212    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
213    pub no_rerank: bool,
214
215    /// Return verbatim evidence without LLM synthesis.
216    /// Shows the most relevant passages with citations, no paraphrasing or summarization.
217    #[arg(long = "no-llm", action = ArgAction::SetTrue)]
218    pub no_llm: bool,
219
220    // Adaptive retrieval options (enabled by default for best results)
221    /// Disable adaptive retrieval and use fixed top-k instead.
222    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
223    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
224    pub no_adaptive: bool,
225    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
226    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
227    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
228    pub min_relevancy: f32,
229    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
230    /// Set high enough to capture all potentially relevant results.
231    #[arg(long = "max-k", value_name = "K", default_value = "100")]
232    pub max_k: usize,
233    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
234    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
235    pub adaptive_strategy: AdaptiveStrategyArg,
236}
237
238/// Ask mode argument
239#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
240pub enum AskModeArg {
241    Lex,
242    Sem,
243    Hybrid,
244}
245
246impl From<AskModeArg> for AskMode {
247    fn from(value: AskModeArg) -> Self {
248        match value {
249            AskModeArg::Lex => AskMode::Lex,
250            AskModeArg::Sem => AskMode::Sem,
251            AskModeArg::Hybrid => AskMode::Hybrid,
252        }
253    }
254}
255
256/// Arguments for the `find` subcommand
257#[derive(Args)]
258pub struct FindArgs {
259    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
260    pub file: PathBuf,
261    #[arg(long = "query", value_name = "TEXT")]
262    pub query: String,
263    #[arg(long = "uri", value_name = "URI")]
264    pub uri: Option<String>,
265    #[arg(long = "scope", value_name = "URI_PREFIX")]
266    pub scope: Option<String>,
267    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
268    pub top_k: usize,
269    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
270    pub snippet_chars: usize,
271    #[arg(long = "cursor", value_name = "TOKEN")]
272    pub cursor: Option<String>,
273    #[arg(long)]
274    pub json: bool,
275    #[arg(long = "json-legacy", conflicts_with = "json")]
276    pub json_legacy: bool,
277    #[arg(long = "mode", value_enum, default_value = "auto")]
278    pub mode: SearchMode,
279    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
280    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
281    pub as_of_frame: Option<u64>,
282    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
283    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
284    pub as_of_ts: Option<i64>,
285    /// Embedding model to use for query (must match the model used during ingestion)
286    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
287    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
288    pub query_embedding_model: Option<String>,
289
290    // Adaptive retrieval options (enabled by default for best results)
291    /// Disable adaptive retrieval and use fixed top-k instead.
292    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
293    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
294    pub no_adaptive: bool,
295    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
296    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
297    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
298    pub min_relevancy: f32,
299    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
300    /// Set high enough to capture all potentially relevant results.
301    #[arg(long = "max-k", value_name = "K", default_value = "100")]
302    pub max_k: usize,
303    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
304    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
305    pub adaptive_strategy: AdaptiveStrategyArg,
306
307    /// Enable graph-aware search: filter by entity relationships before ranking.
308    /// Uses MemoryCards to find entities matching patterns like "who lives in X".
309    #[arg(long = "graph", action = ArgAction::SetTrue)]
310    pub graph: bool,
311
312    /// Enable hybrid search: combine graph filtering with text search.
313    /// Automatically detects relational patterns in the query.
314    #[arg(long = "hybrid", action = ArgAction::SetTrue)]
315    pub hybrid: bool,
316
317    /// Disable sketch pre-filtering (for benchmarking/debugging).
318    /// By default, sketches are used for fast candidate generation if available.
319    #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
320    pub no_sketch: bool,
321}
322
323/// Search mode argument
324#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
325pub enum SearchMode {
326    Auto,
327    Lex,
328    Sem,
329    /// CLIP visual search using text-to-image embeddings
330    #[cfg(feature = "clip")]
331    Clip,
332}
333
334/// Adaptive retrieval strategy
335#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
336pub enum AdaptiveStrategyArg {
337    /// Stop when score drops below X% of top score (default)
338    Relative,
339    /// Stop when score drops below fixed threshold
340    Absolute,
341    /// Stop when score drops sharply from previous result
342    Cliff,
343    /// Automatically detect "elbow" in score curve
344    Elbow,
345    /// Combine relative + cliff + absolute (recommended)
346    Combined,
347}
348
349/// Arguments for the `vec-search` subcommand
350#[derive(Args)]
351pub struct VecSearchArgs {
352    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
353    pub file: PathBuf,
354    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
355    pub vector: Option<String>,
356    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
357    pub embedding: Option<PathBuf>,
358    #[arg(long, value_name = "K", default_value = "10")]
359    pub limit: usize,
360    #[arg(long)]
361    pub json: bool,
362}
363
364/// Arguments for the `audit` subcommand
365#[derive(Args)]
366pub struct AuditArgs {
367    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
368    pub file: PathBuf,
369    /// The question or topic to audit
370    #[arg(value_name = "QUESTION")]
371    pub question: String,
372    /// Output file path (stdout if not provided)
373    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
374    pub out: Option<PathBuf>,
375    /// Output format
376    #[arg(long = "format", value_enum, default_value = "text")]
377    pub format: AuditFormat,
378    /// Number of sources to retrieve
379    #[arg(long = "top-k", value_name = "K", default_value = "10")]
380    pub top_k: usize,
381    /// Maximum characters per snippet
382    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
383    pub snippet_chars: usize,
384    /// Retrieval mode
385    #[arg(long = "mode", value_enum, default_value = "hybrid")]
386    pub mode: AskModeArg,
387    /// Optional scope filter (URI prefix)
388    #[arg(long = "scope", value_name = "URI_PREFIX")]
389    pub scope: Option<String>,
390    /// Start date filter
391    #[arg(long = "start", value_name = "DATE")]
392    pub start: Option<String>,
393    /// End date filter
394    #[arg(long = "end", value_name = "DATE")]
395    pub end: Option<String>,
396    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
397    #[arg(long = "use-model", value_name = "MODEL")]
398    pub use_model: Option<String>,
399}
400
401/// Audit output format
402#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
403pub enum AuditFormat {
404    /// Plain text report
405    Text,
406    /// Markdown report
407    Markdown,
408    /// JSON report
409    Json,
410}
411
412// ============================================================================
413// Search & Retrieval command handlers
414// ============================================================================
415
416pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
417    let mut mem = open_read_only_mem(&args.file)?;
418    let mut builder = TimelineQueryBuilder::default();
419    #[cfg(feature = "temporal_track")]
420    if args.phrase.is_none()
421        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
422    {
423        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
424    }
425    if let Some(limit) = args.limit {
426        builder = builder.limit(limit);
427    }
428    if let Some(since) = args.since {
429        builder = builder.since(since);
430    }
431    if let Some(until) = args.until {
432        builder = builder.until(until);
433    }
434    builder = builder.reverse(args.reverse);
435    #[cfg(feature = "temporal_track")]
436    let temporal_summary = if let Some(ref phrase) = args.phrase {
437        let (filter, summary) = build_temporal_filter(
438            phrase,
439            args.tz.as_deref(),
440            args.anchor.as_deref(),
441            args.window,
442        )?;
443        builder = builder.temporal(filter);
444        Some(summary)
445    } else {
446        None
447    };
448    let query = builder.build();
449    let mut entries = mem.timeline(query)?;
450
451    // Apply Replay filtering if requested
452    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
453        entries.retain(|entry| {
454            // Check as_of_frame filter
455            if let Some(cutoff_frame) = args.as_of_frame {
456                if entry.frame_id > cutoff_frame {
457                    return false;
458                }
459            }
460
461            // Check as_of_ts filter
462            if let Some(cutoff_ts) = args.as_of_ts {
463                if entry.timestamp > cutoff_ts {
464                    return false;
465                }
466            }
467
468            true
469        });
470    }
471
472    if args.json {
473        #[cfg(feature = "temporal_track")]
474        if let Some(summary) = temporal_summary.as_ref() {
475            println!(
476                "{}",
477                serde_json::to_string_pretty(&TimelineOutput {
478                    temporal: Some(summary_to_output(summary)),
479                    entries: &entries,
480                })?
481            );
482        } else {
483            println!("{}", serde_json::to_string_pretty(&entries)?);
484        }
485        #[cfg(not(feature = "temporal_track"))]
486        println!("{}", serde_json::to_string_pretty(&entries)?);
487    } else if entries.is_empty() {
488        println!("Timeline is empty");
489    } else {
490        #[cfg(feature = "temporal_track")]
491        if let Some(summary) = temporal_summary.as_ref() {
492            print_temporal_summary(summary);
493        }
494        for entry in entries {
495            println!(
496                "#{} @ {} — {}",
497                entry.frame_id,
498                entry.timestamp,
499                entry.preview.replace('\n', " ")
500            );
501            if let Some(uri) = entry.uri.as_deref() {
502                println!("  URI: {uri}");
503            }
504            if !entry.child_frames.is_empty() {
505                let child_list = entry
506                    .child_frames
507                    .iter()
508                    .map(|id| id.to_string())
509                    .collect::<Vec<_>>()
510                    .join(", ");
511                println!("  Child frames: {child_list}");
512            }
513            #[cfg(feature = "temporal_track")]
514            if let Some(temporal) = entry.temporal.as_ref() {
515                print_entry_temporal_details(temporal);
516            }
517        }
518    }
519    Ok(())
520}
521
522#[cfg(feature = "temporal_track")]
523pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
524    let mut mem = open_read_only_mem(&args.file)?;
525
526    let (filter, summary) = build_temporal_filter(
527        &args.phrase,
528        args.tz.as_deref(),
529        args.anchor.as_deref(),
530        args.window,
531    )?;
532
533    let mut builder = TimelineQueryBuilder::default();
534    if let Some(limit) = args.limit {
535        builder = builder.limit(limit);
536    }
537    if let Some(since) = args.since {
538        builder = builder.since(since);
539    }
540    if let Some(until) = args.until {
541        builder = builder.until(until);
542    }
543    builder = builder.reverse(args.reverse).temporal(filter.clone());
544    let entries = mem.timeline(builder.build())?;
545
546    if args.json {
547        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
548        let output = WhenOutput {
549            summary: summary_to_output(&summary),
550            entries: entry_views,
551        };
552        println!("{}", serde_json::to_string_pretty(&output)?);
553        return Ok(());
554    }
555
556    print_temporal_summary(&summary);
557    if entries.is_empty() {
558        println!("No frames matched the resolved window");
559        return Ok(());
560    }
561
562    for entry in &entries {
563        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
564        println!(
565            "#{} @ {} ({iso}) — {}",
566            entry.frame_id,
567            entry.timestamp,
568            entry.preview.replace('\n', " ")
569        );
570        if let Some(uri) = entry.uri.as_deref() {
571            println!("  URI: {uri}");
572        }
573        if !entry.child_frames.is_empty() {
574            let child_list = entry
575                .child_frames
576                .iter()
577                .map(|id| id.to_string())
578                .collect::<Vec<_>>()
579                .join(", ");
580            println!("  Child frames: {child_list}");
581        }
582        if let Some(temporal) = entry.temporal.as_ref() {
583            print_entry_temporal_details(temporal);
584        }
585    }
586
587    Ok(())
588}
589
590#[cfg(feature = "temporal_track")]
591#[derive(Serialize)]
592struct TimelineOutput<'a> {
593    #[serde(skip_serializing_if = "Option::is_none")]
594    temporal: Option<TemporalSummaryOutput>,
595    entries: &'a [TimelineEntry],
596}
597
598#[cfg(feature = "temporal_track")]
599#[derive(Serialize)]
600struct WhenOutput {
601    summary: TemporalSummaryOutput,
602    entries: Vec<WhenEntry>,
603}
604
605#[cfg(feature = "temporal_track")]
606#[derive(Serialize)]
607struct WhenEntry {
608    frame_id: FrameId,
609    timestamp: i64,
610    #[serde(skip_serializing_if = "Option::is_none")]
611    timestamp_iso: Option<String>,
612    preview: String,
613    #[serde(skip_serializing_if = "Option::is_none")]
614    uri: Option<String>,
615    #[serde(skip_serializing_if = "Vec::is_empty")]
616    child_frames: Vec<FrameId>,
617    #[serde(skip_serializing_if = "Option::is_none")]
618    temporal: Option<SearchHitTemporal>,
619}
620
621#[cfg(feature = "temporal_track")]
622#[derive(Serialize)]
623struct TemporalSummaryOutput {
624    phrase: String,
625    timezone: String,
626    anchor_utc: i64,
627    anchor_iso: String,
628    confidence: u16,
629    #[serde(skip_serializing_if = "Vec::is_empty")]
630    flags: Vec<&'static str>,
631    resolution_kind: &'static str,
632    window_start_utc: Option<i64>,
633    window_start_iso: Option<String>,
634    window_end_utc: Option<i64>,
635    window_end_iso: Option<String>,
636    #[serde(skip_serializing_if = "Option::is_none")]
637    window_minutes: Option<u64>,
638}
639
640#[cfg(feature = "temporal_track")]
641struct TemporalSummary {
642    phrase: String,
643    tz: String,
644    anchor: OffsetDateTime,
645    start_utc: Option<i64>,
646    end_utc: Option<i64>,
647    resolution: TemporalResolution,
648    window_minutes: Option<u64>,
649}
650
651#[cfg(feature = "temporal_track")]
652fn build_temporal_filter(
653    phrase: &str,
654    tz_override: Option<&str>,
655    anchor_override: Option<&str>,
656    window_minutes: Option<u64>,
657) -> Result<(TemporalFilter, TemporalSummary)> {
658    let tz = tz_override
659        .unwrap_or(DEFAULT_TEMPORAL_TZ)
660        .trim()
661        .to_string();
662    if tz.is_empty() {
663        bail!("E-TEMP-003 timezone must not be empty");
664    }
665
666    let anchor = if let Some(raw) = anchor_override {
667        OffsetDateTime::parse(raw, &Rfc3339)
668            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
669    } else {
670        OffsetDateTime::now_utc()
671    };
672
673    let context = TemporalContext::new(anchor, tz.clone());
674    let normalizer = TemporalNormalizer::new(context);
675    let resolution = normalizer
676        .resolve(phrase)
677        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
678
679    let (mut start, mut end) = resolution_bounds(&resolution)?;
680    if let Some(minutes) = window_minutes {
681        if minutes > 0 {
682            let delta = TimeDuration::minutes(minutes as i64);
683            if let (Some(s), Some(e)) = (start, end) {
684                if s == e {
685                    start = Some(s.saturating_sub(delta.whole_seconds()));
686                    end = Some(e.saturating_add(delta.whole_seconds()));
687                } else {
688                    start = Some(s.saturating_sub(delta.whole_seconds()));
689                    end = Some(e.saturating_add(delta.whole_seconds()));
690                }
691            }
692        }
693    }
694
695    let filter = TemporalFilter {
696        start_utc: start,
697        end_utc: end,
698        phrase: None,
699        tz: None,
700    };
701
702    let summary = TemporalSummary {
703        phrase: phrase.to_owned(),
704        tz,
705        anchor,
706        start_utc: start,
707        end_utc: end,
708        resolution,
709        window_minutes,
710    };
711
712    Ok((filter, summary))
713}
714
715#[cfg(feature = "temporal_track")]
716fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
717    TemporalSummaryOutput {
718        phrase: summary.phrase.clone(),
719        timezone: summary.tz.clone(),
720        anchor_utc: summary.anchor.unix_timestamp(),
721        anchor_iso: summary
722            .anchor
723            .format(&Rfc3339)
724            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
725        confidence: summary.resolution.confidence,
726        flags: summary
727            .resolution
728            .flags
729            .iter()
730            .map(|flag| flag.as_str())
731            .collect(),
732        resolution_kind: resolution_kind(&summary.resolution),
733        window_start_utc: summary.start_utc,
734        window_start_iso: summary.start_utc.and_then(format_timestamp),
735        window_end_utc: summary.end_utc,
736        window_end_iso: summary.end_utc.and_then(format_timestamp),
737        window_minutes: summary.window_minutes,
738    }
739}
740
741#[cfg(feature = "temporal_track")]
742fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
743    WhenEntry {
744        frame_id: entry.frame_id,
745        timestamp: entry.timestamp,
746        timestamp_iso: format_timestamp(entry.timestamp),
747        preview: entry.preview.clone(),
748        uri: entry.uri.clone(),
749        child_frames: entry.child_frames.clone(),
750        temporal: entry.temporal.clone(),
751    }
752}
753
754#[cfg(feature = "temporal_track")]
755fn print_temporal_summary(summary: &TemporalSummary) {
756    println!("Phrase: \"{}\"", summary.phrase);
757    println!("Timezone: {}", summary.tz);
758    println!(
759        "Anchor: {}",
760        summary
761            .anchor
762            .format(&Rfc3339)
763            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
764    );
765    let start_iso = summary.start_utc.and_then(format_timestamp);
766    let end_iso = summary.end_utc.and_then(format_timestamp);
767    match (start_iso, end_iso) {
768        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
769        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
770        (Some(start), None) => println!("Window start: {start}"),
771        (None, Some(end)) => println!("Window end: {end}"),
772        _ => println!("Window: (not resolved)"),
773    }
774    println!("Confidence: {}", summary.resolution.confidence);
775    let flags: Vec<&'static str> = summary
776        .resolution
777        .flags
778        .iter()
779        .map(|flag| flag.as_str())
780        .collect();
781    if !flags.is_empty() {
782        println!("Flags: {}", flags.join(", "));
783    }
784    if let Some(window) = summary.window_minutes {
785        if window > 0 {
786            println!("Window padding: {window} minute(s)");
787        }
788    }
789    println!();
790}
791
792#[cfg(feature = "temporal_track")]
793fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
794    if let Some(anchor) = temporal.anchor.as_ref() {
795        let iso = anchor
796            .iso_8601
797            .clone()
798            .or_else(|| format_timestamp(anchor.ts_utc));
799        println!(
800            "  Anchor: {} (source: {:?})",
801            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
802            anchor.source
803        );
804    }
805    if !temporal.mentions.is_empty() {
806        println!("  Mentions:");
807        for mention in &temporal.mentions {
808            let iso = mention
809                .iso_8601
810                .clone()
811                .or_else(|| format_timestamp(mention.ts_utc))
812                .unwrap_or_else(|| mention.ts_utc.to_string());
813            let mut details = format!(
814                "    - {} ({:?}, confidence {})",
815                iso, mention.kind, mention.confidence
816            );
817            if let Some(text) = mention.text.as_deref() {
818                details.push_str(&format!(" — \"{}\"", text));
819            }
820            println!("{details}");
821        }
822    }
823}
824
825#[cfg(feature = "temporal_track")]
826fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
827    match &resolution.value {
828        TemporalResolutionValue::Date(date) => {
829            let ts = date_to_timestamp(*date);
830            Ok((Some(ts), Some(ts)))
831        }
832        TemporalResolutionValue::DateTime(dt) => {
833            let ts = dt.unix_timestamp();
834            Ok((Some(ts), Some(ts)))
835        }
836        TemporalResolutionValue::DateRange { start, end } => Ok((
837            Some(date_to_timestamp(*start)),
838            Some(date_to_timestamp(*end)),
839        )),
840        TemporalResolutionValue::DateTimeRange { start, end } => {
841            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
842        }
843        TemporalResolutionValue::Month { year, month } => {
844            let start_date = Date::from_calendar_date(*year, *month, 1)
845                .map_err(|_| anyhow!("invalid month resolution"))?;
846            let end_date = last_day_in_month(*year, *month)
847                .map_err(|_| anyhow!("invalid month resolution"))?;
848            Ok((
849                Some(date_to_timestamp(start_date)),
850                Some(date_to_timestamp(end_date)),
851            ))
852        }
853    }
854}
855
856#[cfg(feature = "temporal_track")]
857fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
858    match resolution.value {
859        TemporalResolutionValue::Date(_) => "date",
860        TemporalResolutionValue::DateTime(_) => "datetime",
861        TemporalResolutionValue::DateRange { .. } => "date_range",
862        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
863        TemporalResolutionValue::Month { .. } => "month",
864    }
865}
866
867#[cfg(feature = "temporal_track")]
868fn date_to_timestamp(date: Date) -> i64 {
869    PrimitiveDateTime::new(date, Time::MIDNIGHT)
870        .assume_offset(UtcOffset::UTC)
871        .unix_timestamp()
872}
873
874#[cfg(feature = "temporal_track")]
875fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
876    let mut date = Date::from_calendar_date(year, month, 1)
877        .map_err(|_| anyhow!("invalid month resolution"))?;
878    while let Some(next) = date.next_day() {
879        if next.month() == month {
880            date = next;
881        } else {
882            break;
883        }
884    }
885    Ok(date)
886}
887
888#[cfg(feature = "temporal_track")]
889
890fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
891    if fragments.is_empty() {
892        return;
893    }
894
895    response.context_fragments = fragments
896        .into_iter()
897        .map(|fragment| AskContextFragment {
898            rank: fragment.rank,
899            frame_id: fragment.frame_id,
900            uri: fragment.uri,
901            title: fragment.title,
902            score: fragment.score,
903            matches: fragment.matches,
904            range: Some(fragment.range),
905            chunk_range: fragment.chunk_range,
906            text: fragment.text,
907            kind: Some(match fragment.kind {
908                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
909                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
910            }),
911            #[cfg(feature = "temporal_track")]
912            temporal: None,
913        })
914        .collect();
915}
916
917pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
918    // Check if plan allows query operations (blocks expired subscriptions)
919    crate::utils::require_active_plan(config, "ask")?;
920
921    // Track query usage against plan quota
922    crate::api::track_query_usage(config, 1)?;
923
924    if args.uri.is_some() && args.scope.is_some() {
925        warn!("--scope ignored because --uri is provided");
926    }
927
928    let mut question_tokens = Vec::new();
929    let mut file_path: Option<PathBuf> = None;
930    for token in &args.targets {
931        if file_path.is_none() && looks_like_memory(token) {
932            file_path = Some(PathBuf::from(token));
933        } else {
934            question_tokens.push(token.clone());
935        }
936    }
937
938    let positional_question = if question_tokens.is_empty() {
939        None
940    } else {
941        Some(question_tokens.join(" "))
942    };
943
944    let question = args
945        .question
946        .or(positional_question)
947        .map(|value| value.trim().to_string())
948        .filter(|value| !value.is_empty());
949
950    let question = question
951        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
952
953    // Expand query for better retrieval using LLM (expands abbreviations, adds synonyms)
954    // This happens when --use-model is set or we have an API key
955    let (original_question, search_query) = {
956        // For query expansion, we use the fastest available model
957        // Priority: OpenAI > Groq > Anthropic > XAI > Mistral
958        let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
959            if let Ok(key) = std::env::var("OPENAI_API_KEY") {
960                // OpenAI available - use gpt-4o-mini (fastest, cheapest)
961                (Some("gpt-4o-mini"), Some(key))
962            } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
963                // Groq available - use llama-3.1-8b-instant (very fast)
964                (Some("llama-3.1-8b-instant"), Some(key))
965            } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
966                // Anthropic available - use haiku
967                (Some("claude-haiku-4-5"), Some(key))
968            } else if let Ok(key) = std::env::var("XAI_API_KEY") {
969                // XAI available - use grok-4-fast
970                (Some("grok-4-fast"), Some(key))
971            } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
972                // Mistral available - use mistral-small
973                (Some("mistral-small-latest"), Some(key))
974            } else {
975                // No fast model available for expansion
976                (None, None)
977            };
978
979        // DISABLED: Query expansion for ask command
980        // The ask command has sophisticated retrieval with fallbacks, aggregation detection,
981        // temporal boosting, and diverse retrieval strategies. Query expansion often strips
982        // out important semantic context (temporal markers, aggregation signals, analytical
983        // keywords) that these strategies depend on. The original question is preserved
984        // to ensure all downstream detection and ranking works correctly.
985        //
986        // Query expansion may be appropriate for simple keyword searches, but for complex
987        // natural language questions it causes more problems than it solves.
988        let _ = (model_for_expansion, api_key_for_expansion); // suppress unused warnings
989        (question.clone(), question.clone())
990    };
991
992    let memory_path = match file_path {
993        Some(path) => path,
994        None => autodetect_memory_file()?,
995    };
996
997    let start = parse_date_boundary(args.start.as_ref(), false)?;
998    let end = parse_date_boundary(args.end.as_ref(), true)?;
999    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1000        if end_ts < start_ts {
1001            anyhow::bail!("--end must not be earlier than --start");
1002        }
1003    }
1004
1005    // Open MV2 file first to get vector dimension for auto-detection
1006    let mut mem = Memvid::open(&memory_path)?;
1007
1008    // Load active replay session if one exists
1009    #[cfg(feature = "replay")]
1010    let _ = mem.load_active_session();
1011
1012    // Get the vector dimension from the MV2 file for auto-detection
1013    let mv2_dimension = mem.effective_vec_index_dimension()?;
1014
1015    // Check if memory has any vectors - if not, force lexical mode
1016    let stats = mem.stats()?;
1017    let has_vectors = stats.vector_count > 0;
1018    let effective_mode = if !has_vectors && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1019        tracing::info!(
1020            "Memory has no embeddings (vector_count=0); falling back to lexical mode"
1021        );
1022        AskModeArg::Lex
1023    } else {
1024        args.mode.clone()
1025    };
1026
1027    let ask_mode: AskMode = effective_mode.clone().into();
1028    let inferred_model_override = match effective_mode {
1029        AskModeArg::Lex => None,
1030        AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1031            memvid_core::EmbeddingIdentitySummary::Single(identity) => identity.model.map(String::from),
1032            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1033                let models: Vec<_> = identities
1034                    .iter()
1035                    .filter_map(|entry| entry.identity.model.as_deref())
1036                    .collect();
1037                anyhow::bail!(
1038                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1039                    Detected models: {:?}\n\n\
1040                    Suggested fix: split into separate memories per embedding model.",
1041                    models
1042                );
1043            }
1044            memvid_core::EmbeddingIdentitySummary::Unknown => None,
1045        },
1046    };
1047    let emb_model_override = args
1048        .query_embedding_model
1049        .as_deref()
1050        .or(inferred_model_override.as_deref());
1051    let runtime = match effective_mode {
1052        AskModeArg::Lex => None,
1053        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1054            config,
1055            emb_model_override,
1056            mv2_dimension,
1057        )?),
1058        AskModeArg::Hybrid => {
1059            // For hybrid, use auto-detection from MV2 dimension
1060            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1061                || {
1062                    // Force a load; if it fails we error below.
1063                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1064                        .ok()
1065                        .map(|rt| {
1066                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1067                            rt
1068                        })
1069                },
1070            )
1071        }
1072    };
1073    if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1074        anyhow::bail!(
1075            "semantic embeddings unavailable; install/cached model required for {:?} mode",
1076            effective_mode
1077        );
1078    }
1079
1080    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1081
1082    // Build adaptive config (enabled by default, use --no-adaptive to disable)
1083    let adaptive = if !args.no_adaptive {
1084        Some(AdaptiveConfig {
1085            enabled: true,
1086            max_results: args.max_k,
1087            min_results: 1,
1088            normalize_scores: true,
1089            strategy: match args.adaptive_strategy {
1090                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1091                    min_ratio: args.min_relevancy,
1092                },
1093                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1094                    min_score: args.min_relevancy,
1095                },
1096                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff { max_drop_ratio: 0.3 },
1097                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1098                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1099                    relative_threshold: args.min_relevancy,
1100                    max_drop_ratio: 0.3,
1101                    absolute_min: 0.3,
1102                },
1103            },
1104        })
1105    } else {
1106        None
1107    };
1108
1109    let request = AskRequest {
1110        question: search_query, // Use expanded query for retrieval
1111        top_k: args.top_k,
1112        snippet_chars: args.snippet_chars,
1113        uri: args.uri.clone(),
1114        scope: args.scope.clone(),
1115        cursor: args.cursor.clone(),
1116        start,
1117        end,
1118        #[cfg(feature = "temporal_track")]
1119        temporal: None,
1120        context_only: args.context_only,
1121        mode: ask_mode,
1122        as_of_frame: args.as_of_frame,
1123        as_of_ts: args.as_of_ts,
1124        adaptive,
1125    };
1126    let mut response = mem.ask(request, embedder).map_err(|err| match err {
1127        MemvidError::VecDimensionMismatch { expected, actual } => anyhow!(vec_dimension_mismatch_help(expected, actual)),
1128        other => anyhow!(other),
1129    })?;
1130
1131    // Restore original question for display and LLM synthesis
1132    // (search_query was used for retrieval but original_question is shown to user)
1133    response.question = original_question;
1134
1135    // Apply cross-encoder reranking for better precision on preference/personalization queries
1136    // This is especially important for questions like "What should I..." where semantic
1137    // similarity doesn't capture personal relevance well.
1138    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
1139    // Skip for temporal/recency queries - cross-encoder doesn't understand temporal context
1140    // and would override the recency boost from lexical search
1141    let is_temporal_query = {
1142        let q_lower = response.question.to_lowercase();
1143        q_lower.contains("current") || q_lower.contains("latest") || q_lower.contains("recent")
1144            || q_lower.contains("now") || q_lower.contains("today") || q_lower.contains("updated")
1145            || q_lower.contains("new ") || q_lower.contains("newest")
1146    };
1147    if !args.no_rerank
1148        && !response.retrieval.hits.is_empty()
1149        && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1150        && !is_temporal_query
1151    {
1152        // Create a temporary SearchResponse for reranking
1153        let mut search_response = SearchResponse {
1154            query: response.question.clone(),
1155            hits: response.retrieval.hits.clone(),
1156            total_hits: response.retrieval.hits.len(),
1157            params: memvid_core::SearchParams {
1158                top_k: args.top_k,
1159                snippet_chars: args.snippet_chars,
1160                cursor: None,
1161            },
1162            elapsed_ms: 0,
1163            engine: memvid_core::SearchEngineKind::Hybrid,
1164            next_cursor: None,
1165            context: String::new(),
1166        };
1167
1168        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1169            warn!("Cross-encoder reranking failed: {e}");
1170        } else {
1171            // Update the response hits with reranked order
1172            response.retrieval.hits = search_response.hits;
1173            // Rebuild context from reranked hits
1174            response.retrieval.context = response
1175                .retrieval
1176                .hits
1177                .iter()
1178                .take(10) // Use top-10 for context
1179                .map(|hit| hit.text.as_str())
1180                .collect::<Vec<_>>()
1181                .join("\n\n---\n\n");
1182        }
1183    }
1184
1185    // Inject memory cards into context if --memories flag is set
1186    if args.memories {
1187        let memory_context = build_memory_context(&mem);
1188        if !memory_context.is_empty() {
1189            // Prepend memory context to retrieval context
1190            response.retrieval.context = format!(
1191                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1192                memory_context, response.retrieval.context
1193            );
1194        }
1195    }
1196
1197    // Inject entity context from Logic-Mesh if entities were found in search hits
1198    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1199    if !entity_context.is_empty() {
1200        // Prepend entity context to retrieval context
1201        response.retrieval.context = format!(
1202            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1203            entity_context, response.retrieval.context
1204        );
1205    }
1206
1207    // Apply PII masking if requested
1208    if args.mask_pii {
1209        use memvid_core::pii::mask_pii;
1210
1211        // Mask the aggregated context
1212        response.retrieval.context = mask_pii(&response.retrieval.context);
1213
1214        // Mask text in each hit
1215        for hit in &mut response.retrieval.hits {
1216            hit.text = mask_pii(&hit.text);
1217            if let Some(chunk_text) = &hit.chunk_text {
1218                hit.chunk_text = Some(mask_pii(chunk_text));
1219            }
1220        }
1221    }
1222
1223    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1224
1225    let mut model_result: Option<ModelInference> = None;
1226    if args.no_llm {
1227        // --no-llm: return verbatim evidence without LLM synthesis
1228        if args.use_model.is_some() {
1229            warn!("--use-model ignored because --no-llm disables LLM synthesis");
1230        }
1231        if args.json {
1232            emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1233        } else {
1234            emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1235        }
1236
1237        // Save active replay session if one exists
1238        #[cfg(feature = "replay")]
1239        let _ = mem.save_active_session();
1240
1241        return Ok(());
1242    } else if response.context_only {
1243        if args.use_model.is_some() {
1244            warn!("--use-model ignored because --context-only disables synthesis");
1245        }
1246    } else if let Some(model_name) = args.use_model.as_deref() {
1247        match run_model_inference(
1248            model_name,
1249            &response.question,
1250            &response.retrieval.context,
1251            &response.retrieval.hits,
1252            llm_context_override,
1253            None,
1254            args.system_prompt.as_deref(),
1255        ) {
1256            Ok(inference) => {
1257                response.answer = Some(inference.answer.answer.clone());
1258                response.retrieval.context = inference.context_body.clone();
1259                apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1260                model_result = Some(inference);
1261            }
1262            Err(err) => {
1263                warn!(
1264                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1265                    model_name
1266                );
1267            }
1268        }
1269    }
1270
1271    // Record the ask action if a replay session is active
1272    #[cfg(feature = "replay")]
1273    if let Some(ref inference) = model_result {
1274        if let Some(model_name) = args.use_model.as_deref() {
1275            // Extract frame IDs from retrieval hits for replay audit
1276            let retrieved_frames: Vec<u64> = response
1277                .retrieval
1278                .hits
1279                .iter()
1280                .map(|hit| hit.frame_id)
1281                .collect();
1282
1283            mem.record_ask_action(
1284                &response.question,
1285                model_name, // provider
1286                model_name, // model
1287                inference.answer.answer.as_bytes(),
1288                0, // duration_ms not tracked at this level
1289                retrieved_frames,
1290            );
1291        }
1292    }
1293
1294    if args.json {
1295        if let Some(model_name) = args.use_model.as_deref() {
1296            emit_model_json(
1297                &response,
1298                model_name,
1299                model_result.as_ref(),
1300                args.sources,
1301                &mut mem,
1302            )?;
1303        } else {
1304            emit_ask_json(
1305                &response,
1306                effective_mode.clone(),
1307                model_result.as_ref(),
1308                args.sources,
1309                &mut mem,
1310            )?;
1311        }
1312    } else {
1313        emit_ask_pretty(
1314            &response,
1315            effective_mode.clone(),
1316            model_result.as_ref(),
1317            args.sources,
1318            &mut mem,
1319        );
1320    }
1321
1322    // Save active replay session if one exists
1323    #[cfg(feature = "replay")]
1324    let _ = mem.save_active_session();
1325
1326    Ok(())
1327}
1328
1329/// Handle graph-aware find with --graph or --hybrid flags
1330fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1331    use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1332    use memvid_core::types::QueryPlan;
1333
1334    let planner = QueryPlanner::new();
1335
1336    // Create query plan based on mode
1337    let plan = if args.graph {
1338        // Pure graph mode - let planner detect patterns
1339        let plan = planner.plan(&args.query, args.top_k);
1340        // If it's a hybrid plan from auto-detection, convert to graph-only
1341        match plan {
1342            QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1343                QueryPlan::graph_only(graph_filter, args.top_k)
1344            }
1345            _ => plan,
1346        }
1347    } else {
1348        // Hybrid mode - use the auto-detected plan
1349        planner.plan(&args.query, args.top_k)
1350    };
1351
1352    // Execute the search
1353    let hits = hybrid_search(mem, &plan)?;
1354
1355    if args.json {
1356        // JSON output
1357        let output = serde_json::json!({
1358            "query": args.query,
1359            "mode": if args.graph { "graph" } else { "hybrid" },
1360            "plan": format!("{:?}", plan),
1361            "hits": hits.iter().map(|h| {
1362                serde_json::json!({
1363                    "frame_id": h.frame_id,
1364                    "score": h.score,
1365                    "graph_score": h.graph_score,
1366                    "vector_score": h.vector_score,
1367                    "matched_entity": h.matched_entity,
1368                    "preview": h.preview,
1369                })
1370            }).collect::<Vec<_>>(),
1371        });
1372        println!("{}", serde_json::to_string_pretty(&output)?);
1373    } else {
1374        // Human-readable output
1375        let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1376        println!("{} search for: \"{}\"", mode_str, args.query);
1377        println!("Plan: {:?}", plan);
1378        println!();
1379
1380        if hits.is_empty() {
1381            println!("No results found.");
1382        } else {
1383            println!("Results ({} hits):", hits.len());
1384            for (i, hit) in hits.iter().enumerate() {
1385                println!();
1386                println!(
1387                    "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1388                    i + 1,
1389                    hit.frame_id,
1390                    hit.score,
1391                    hit.graph_score,
1392                    hit.vector_score
1393                );
1394                if let Some(entity) = &hit.matched_entity {
1395                    println!("   Matched entity: {}", entity);
1396                }
1397                if let Some(preview) = &hit.preview {
1398                    let truncated = if preview.len() > 200 {
1399                        format!("{}...", &preview[..200])
1400                    } else {
1401                        preview.clone()
1402                    };
1403                    println!("   {}", truncated.replace('\n', " "));
1404                }
1405            }
1406        }
1407    }
1408
1409    Ok(())
1410}
1411
1412pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1413    // Check if plan allows query operations (blocks expired subscriptions)
1414    crate::utils::require_active_plan(config, "find")?;
1415
1416    // Track query usage against plan quota
1417    crate::api::track_query_usage(config, 1)?;
1418
1419    let mut mem = open_read_only_mem(&args.file)?;
1420
1421    // Load active replay session if one exists
1422    #[cfg(feature = "replay")]
1423    let _ = mem.load_active_session();
1424
1425    // Handle graph-aware and hybrid search modes
1426    if args.graph || args.hybrid {
1427        return handle_graph_find(&mut mem, &args);
1428    }
1429
1430    if args.uri.is_some() && args.scope.is_some() {
1431        warn!("--scope ignored because --uri is provided");
1432    }
1433
1434    // Get vector dimension from MV2 for auto-detection
1435    let mv2_dimension = mem.effective_vec_index_dimension()?;
1436    let identity_summary = match args.mode {
1437        SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1438        #[cfg(feature = "clip")]
1439        SearchMode::Clip => None,
1440        SearchMode::Lex => None,
1441    };
1442
1443    let mut semantic_allowed = true;
1444    let inferred_model_override = match identity_summary.as_ref() {
1445        Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1446            identity.model.as_deref().map(|value| value.to_string())
1447        }
1448        Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1449            let models: Vec<_> = identities
1450                .iter()
1451                .filter_map(|entry| entry.identity.model.as_deref())
1452                .collect();
1453            if args.mode == SearchMode::Sem {
1454                anyhow::bail!(
1455                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1456                    Detected models: {:?}\n\n\
1457                    Suggested fix: split into separate memories per embedding model.",
1458                    models
1459                );
1460            }
1461            warn!("semantic search disabled: mixed embedding models detected: {:?}", models);
1462            semantic_allowed = false;
1463            None
1464        }
1465        _ => None,
1466    };
1467
1468    let emb_model_override = args
1469        .query_embedding_model
1470        .as_deref()
1471        .or(inferred_model_override.as_deref());
1472
1473    let (mode_label, runtime_option) = match args.mode {
1474        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1475        SearchMode::Sem => {
1476            let runtime =
1477                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1478            ("Semantic (vector search)".to_string(), Some(runtime))
1479        }
1480        SearchMode::Auto => {
1481            if !semantic_allowed {
1482                ("Lexical (semantic unsafe)".to_string(), None)
1483            } else if let Some(runtime) =
1484                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1485            {
1486                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1487            } else {
1488                ("Lexical (semantic unavailable)".to_string(), None)
1489            }
1490        }
1491        #[cfg(feature = "clip")]
1492        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1493    };
1494
1495    let mode_key = match args.mode {
1496        SearchMode::Sem => "semantic",
1497        SearchMode::Lex => "text",
1498        SearchMode::Auto => {
1499            if runtime_option.is_some() {
1500                "hybrid"
1501            } else {
1502                "text"
1503            }
1504        }
1505        #[cfg(feature = "clip")]
1506        SearchMode::Clip => "clip",
1507    };
1508
1509    // For CLIP mode, use CLIP visual search
1510    #[cfg(feature = "clip")]
1511    if args.mode == SearchMode::Clip {
1512        use memvid_core::clip::{ClipConfig, ClipModel};
1513
1514        // Initialize CLIP model
1515        let config = ClipConfig::default();
1516        let clip = ClipModel::new(config).map_err(|e| {
1517            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1518        })?;
1519
1520        // Encode query text
1521        let query_embedding = clip
1522            .encode_text(&args.query)
1523            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1524
1525        // Search CLIP index
1526        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1527
1528        // Debug distances before filtering
1529        for hit in &hits {
1530            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1531                tracing::debug!(
1532                    frame_id = hit.frame_id,
1533                    title = %frame.title.unwrap_or_default(),
1534                    page = hit.page,
1535                    distance = hit.distance,
1536                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1537                    "CLIP raw hit"
1538                );
1539            } else {
1540                tracing::debug!(
1541                    frame_id = hit.frame_id,
1542                    page = hit.page,
1543                    distance = hit.distance,
1544                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1545                    "CLIP raw hit (missing frame)"
1546                );
1547            }
1548        }
1549
1550        // CLIP distance threshold for filtering poor matches
1551        // CLIP uses L2 distance on normalized embeddings:
1552        //   - distance² = 2(1 - cosine_similarity)
1553        //   - distance = 0 → identical (cosine_sim = 1)
1554        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1555        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1556        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1557        //   - distance = 2.0 → opposite (cosine_sim = -1)
1558        //
1559        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1560        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1561        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1562        const CLIP_MAX_DISTANCE: f32 = 1.26;
1563
1564        // Convert CLIP hits to SearchResponse format, filtering by threshold
1565        let search_hits: Vec<SearchHit> = hits
1566            .into_iter()
1567            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1568            .enumerate()
1569            .filter_map(|(rank, hit)| {
1570                // Convert L2 distance to cosine similarity for display
1571                // cos_sim = 1 - (distance² / 2)
1572                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1573
1574                // Get frame preview for snippet
1575                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1576                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1577                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1578                let title = match (base_title, hit.page) {
1579                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1580                    (Some(t), None) => Some(t),
1581                    (None, Some(p)) => Some(format!("Page {p}")),
1582                    _ => None,
1583                };
1584                Some(SearchHit {
1585                    rank: rank + 1,
1586                    frame_id: hit.frame_id,
1587                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1588                    title,
1589                    text: preview.clone(),
1590                    chunk_text: Some(preview),
1591                    range: (0, 0),
1592                    chunk_range: None,
1593                    matches: 0,
1594                    score: Some(cosine_similarity),
1595                    metadata: None,
1596                })
1597            })
1598            .collect();
1599
1600        let response = SearchResponse {
1601            query: args.query.clone(),
1602            hits: search_hits.clone(),
1603            total_hits: search_hits.len(),
1604            params: memvid_core::SearchParams {
1605                top_k: args.top_k,
1606                snippet_chars: args.snippet_chars,
1607                cursor: args.cursor.clone(),
1608            },
1609            elapsed_ms: 0,
1610            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1611            next_cursor: None,
1612            context: String::new(),
1613        };
1614
1615        if args.json_legacy {
1616            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1617            emit_legacy_search_json(&response)?;
1618        } else if args.json {
1619            emit_search_json(&response, mode_key)?;
1620        } else {
1621            println!(
1622                "mode: {}   k={}   time: {} ms",
1623                mode_label, response.params.top_k, response.elapsed_ms
1624            );
1625            println!("engine: clip (MobileCLIP-S2)");
1626            println!(
1627                "hits: {} (showing {})",
1628                response.total_hits,
1629                response.hits.len()
1630            );
1631            emit_search_table(&response);
1632        }
1633        return Ok(());
1634    }
1635
1636    // For semantic mode, use pure vector search.
1637    let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1638        let runtime = runtime_option
1639            .as_ref()
1640            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1641
1642        // Embed the query
1643        let query_embedding = runtime.embed_query(&args.query)?;
1644
1645        // Use pure vector search (adaptive by default, use --no-adaptive to disable)
1646        let scope = args.scope.as_deref().or(args.uri.as_deref());
1647
1648        if !args.no_adaptive {
1649            // Build adaptive config from CLI args
1650            let strategy = match args.adaptive_strategy {
1651                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1652                    min_ratio: args.min_relevancy,
1653                },
1654                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1655                    min_score: args.min_relevancy,
1656                },
1657                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1658                    max_drop_ratio: 0.35, // 35% drop triggers cutoff
1659                },
1660                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1661                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1662                    relative_threshold: args.min_relevancy,
1663                    max_drop_ratio: 0.35,
1664                    absolute_min: 0.3,
1665                },
1666            };
1667
1668            let config = AdaptiveConfig {
1669                enabled: true,
1670                max_results: args.max_k,
1671                min_results: 1,
1672                strategy,
1673                normalize_scores: true,
1674            };
1675
1676            match mem.search_adaptive(
1677                &args.query,
1678                &query_embedding,
1679                config,
1680                args.snippet_chars,
1681                scope,
1682            ) {
1683                Ok(result) => {
1684                    let mut resp = SearchResponse {
1685                        query: args.query.clone(),
1686                        hits: result.results,
1687                        total_hits: result.stats.returned,
1688                        params: memvid_core::SearchParams {
1689                            top_k: result.stats.returned,
1690                            snippet_chars: args.snippet_chars,
1691                            cursor: args.cursor.clone(),
1692                        },
1693                        elapsed_ms: 0,
1694                        engine: SearchEngineKind::Hybrid,
1695                        next_cursor: None,
1696                        context: String::new(),
1697                    };
1698                    apply_preference_rerank(&mut resp);
1699                    (
1700                        resp,
1701                        "semantic (adaptive vector search)".to_string(),
1702                        Some(result.stats),
1703                    )
1704                }
1705                Err(e) => {
1706                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1707                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1708                    }
1709
1710                    warn!("Adaptive search failed ({e}), falling back to fixed-k");
1711                    match mem.vec_search_with_embedding(
1712                        &args.query,
1713                        &query_embedding,
1714                        args.top_k,
1715                        args.snippet_chars,
1716                        scope,
1717                    ) {
1718                        Ok(mut resp) => {
1719                            apply_preference_rerank(&mut resp);
1720                            (resp, "semantic (vector search fallback)".to_string(), None)
1721                        }
1722                        Err(e2) => {
1723                            if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1724                                return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1725                            }
1726                            return Err(anyhow!("Both adaptive and fixed-k search failed: {e}, {e2}"));
1727                        }
1728                    }
1729                }
1730            }
1731        } else {
1732            // Standard fixed-k vector search
1733            match mem.vec_search_with_embedding(
1734                &args.query,
1735                &query_embedding,
1736                args.top_k,
1737                args.snippet_chars,
1738                scope,
1739            ) {
1740                Ok(mut resp) => {
1741                    // Apply preference boost to rerank results for preference-seeking queries
1742                    apply_preference_rerank(&mut resp);
1743                    (resp, "semantic (vector search)".to_string(), None)
1744                }
1745                Err(e) => {
1746                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1747                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1748                    }
1749
1750                    // Fall back to lexical search + rerank if vector search fails
1751                    warn!("Vector search failed ({e}), falling back to lexical + rerank");
1752                    let request = SearchRequest {
1753                        query: args.query.clone(),
1754                        top_k: args.top_k,
1755                        snippet_chars: args.snippet_chars,
1756                        uri: args.uri.clone(),
1757                        scope: args.scope.clone(),
1758                        cursor: args.cursor.clone(),
1759                        #[cfg(feature = "temporal_track")]
1760                        temporal: None,
1761                        as_of_frame: args.as_of_frame,
1762                        as_of_ts: args.as_of_ts,
1763                        no_sketch: args.no_sketch,
1764                    };
1765                    let mut resp = mem.search(request)?;
1766                    apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1767                    (resp, "semantic (fallback rerank)".to_string(), None)
1768                }
1769            }
1770        }
1771    } else {
1772        // For lexical and auto modes, use existing behavior
1773        let request = SearchRequest {
1774            query: args.query.clone(),
1775            top_k: args.top_k,
1776            snippet_chars: args.snippet_chars,
1777            uri: args.uri.clone(),
1778            scope: args.scope.clone(),
1779            cursor: args.cursor.clone(),
1780            #[cfg(feature = "temporal_track")]
1781            temporal: None,
1782            as_of_frame: args.as_of_frame,
1783            as_of_ts: args.as_of_ts,
1784            no_sketch: args.no_sketch,
1785        };
1786
1787        let mut resp = mem.search(request)?;
1788
1789        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1790            warn!("Search index unavailable; returning basic text results");
1791        }
1792
1793        let mut engine_label = match resp.engine {
1794            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1795            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1796            SearchEngineKind::Hybrid => "hybrid".to_string(),
1797        };
1798
1799        if runtime_option.is_some() {
1800            engine_label = format!("hybrid ({engine_label} + semantic)");
1801        }
1802
1803        if let Some(ref runtime) = runtime_option {
1804            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1805        }
1806
1807        (resp, engine_label, None)
1808    };
1809
1810    if args.json_legacy {
1811        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1812        emit_legacy_search_json(&response)?;
1813    } else if args.json {
1814        emit_search_json(&response, mode_key)?;
1815    } else {
1816        println!(
1817            "mode: {}   k={}   time: {} ms",
1818            mode_label, response.params.top_k, response.elapsed_ms
1819        );
1820        println!("engine: {}", engine_label);
1821
1822        // Show adaptive retrieval stats if enabled
1823        if let Some(ref stats) = adaptive_stats {
1824            println!(
1825                "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1826                stats.total_considered,
1827                stats.returned,
1828                stats.triggered_by,
1829                stats.top_score.unwrap_or(0.0),
1830                stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1831            );
1832        }
1833
1834        println!(
1835            "hits: {} (showing {})",
1836            response.total_hits,
1837            response.hits.len()
1838        );
1839        emit_search_table(&response);
1840    }
1841
1842    // Save active replay session if one exists
1843    #[cfg(feature = "replay")]
1844    let _ = mem.save_active_session();
1845
1846    Ok(())
1847}
1848
1849pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1850    // Track query usage against plan quota
1851    crate::api::track_query_usage(config, 1)?;
1852
1853    let mut mem = open_read_only_mem(&args.file)?;
1854    let vector = if let Some(path) = args.embedding.as_deref() {
1855        read_embedding(path)?
1856    } else if let Some(vector_string) = &args.vector {
1857        parse_vector(vector_string)?
1858    } else {
1859        anyhow::bail!("provide --vector or --embedding for search input");
1860    };
1861
1862    let hits = mem.search_vec(&vector, args.limit).map_err(|err| match err {
1863        MemvidError::VecDimensionMismatch { expected, actual } => {
1864            anyhow!(vec_dimension_mismatch_help(expected, actual))
1865        }
1866        other => anyhow!(other),
1867    })?;
1868    let mut enriched = Vec::with_capacity(hits.len());
1869    for hit in hits {
1870        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1871        enriched.push((hit.frame_id, hit.distance, preview));
1872    }
1873
1874    if args.json {
1875        let json_hits: Vec<_> = enriched
1876            .iter()
1877            .map(|(frame_id, distance, preview)| {
1878                json!({
1879                    "frame_id": frame_id,
1880                    "distance": distance,
1881                    "preview": preview,
1882                })
1883            })
1884            .collect();
1885        let json_str = serde_json::to_string_pretty(&json_hits)?;
1886        println!("{}", json_str.to_colored_json_auto()?);
1887    } else if enriched.is_empty() {
1888        println!("No vector matches found");
1889    } else {
1890        for (frame_id, distance, preview) in enriched {
1891            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1892        }
1893    }
1894    Ok(())
1895}
1896
1897pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1898    use memvid_core::AuditOptions;
1899    use std::fs::File;
1900    use std::io::Write;
1901
1902    let mut mem = Memvid::open(&args.file)?;
1903
1904    // Parse date boundaries
1905    let start = parse_date_boundary(args.start.as_ref(), false)?;
1906    let end = parse_date_boundary(args.end.as_ref(), true)?;
1907    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1908        if end_ts < start_ts {
1909            anyhow::bail!("--end must not be earlier than --start");
1910        }
1911    }
1912
1913    // Set up embedding runtime if needed
1914    let ask_mode: AskMode = args.mode.into();
1915    let runtime = match args.mode {
1916        AskModeArg::Lex => None,
1917        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1918        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1919    };
1920    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1921
1922    // Build audit options
1923    let options = AuditOptions {
1924        top_k: Some(args.top_k),
1925        snippet_chars: Some(args.snippet_chars),
1926        mode: Some(ask_mode),
1927        scope: args.scope,
1928        start,
1929        end,
1930        include_snippets: true,
1931    };
1932
1933    // Run the audit
1934    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1935
1936    // If --use-model is provided, run model inference to synthesize the answer
1937    if let Some(model_name) = args.use_model.as_deref() {
1938        // Build context from sources for model inference
1939        let context = report
1940            .sources
1941            .iter()
1942            .filter_map(|s| s.snippet.clone())
1943            .collect::<Vec<_>>()
1944            .join("\n\n");
1945
1946        match run_model_inference(
1947            model_name,
1948            &report.question,
1949            &context,
1950            &[], // No hits needed for audit
1951            None,
1952            None,
1953            None, // No system prompt override for audit
1954        ) {
1955            Ok(inference) => {
1956                report.answer = Some(inference.answer.answer);
1957                report.notes.push(format!(
1958                    "Answer synthesized by model: {}",
1959                    inference.answer.model
1960                ));
1961            }
1962            Err(err) => {
1963                warn!(
1964                    "model inference unavailable for '{}': {err}. Using default answer.",
1965                    model_name
1966                );
1967            }
1968        }
1969    }
1970
1971    // Format the output
1972    let output = match args.format {
1973        AuditFormat::Text => report.to_text(),
1974        AuditFormat::Markdown => report.to_markdown(),
1975        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1976    };
1977
1978    // Write output
1979    if let Some(out_path) = args.out {
1980        let mut file = File::create(&out_path)?;
1981        file.write_all(output.as_bytes())?;
1982        println!("Audit report written to: {}", out_path.display());
1983    } else {
1984        println!("{}", output);
1985    }
1986
1987    Ok(())
1988}
1989
1990fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1991    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1992
1993    let mut additional_params = serde_json::Map::new();
1994    if let Some(cursor) = &response.params.cursor {
1995        additional_params.insert("cursor".into(), json!(cursor));
1996    }
1997
1998    let mut params = serde_json::Map::new();
1999    params.insert("top_k".into(), json!(response.params.top_k));
2000    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2001    params.insert("mode".into(), json!(mode));
2002    params.insert(
2003        "additional_params".into(),
2004        serde_json::Value::Object(additional_params),
2005    );
2006
2007    let mut metadata_json = serde_json::Map::new();
2008    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2009    metadata_json.insert("total_hits".into(), json!(response.total_hits));
2010    metadata_json.insert(
2011        "next_cursor".into(),
2012        match &response.next_cursor {
2013            Some(cursor) => json!(cursor),
2014            None => serde_json::Value::Null,
2015        },
2016    );
2017    metadata_json.insert("engine".into(), json!(response.engine));
2018    metadata_json.insert("params".into(), serde_json::Value::Object(params));
2019
2020    let body = json!({
2021        "version": "mv2.result.v2",
2022        "query": response.query,
2023        "metadata": metadata_json,
2024        "hits": hits,
2025        "context": response.context,
2026    });
2027    let json_str = serde_json::to_string_pretty(&body)?;
2028    println!("{}", json_str.to_colored_json_auto()?);
2029    Ok(())
2030}
2031
2032fn emit_ask_json(
2033    response: &AskResponse,
2034    requested_mode: AskModeArg,
2035    inference: Option<&ModelInference>,
2036    include_sources: bool,
2037    mem: &mut Memvid,
2038) -> Result<()> {
2039    let hits: Vec<_> = response
2040        .retrieval
2041        .hits
2042        .iter()
2043        .map(search_hit_to_json)
2044        .collect();
2045
2046    let citations: Vec<_> = response
2047        .citations
2048        .iter()
2049        .map(|citation| {
2050            let mut map = serde_json::Map::new();
2051            map.insert("index".into(), json!(citation.index));
2052            map.insert("frame_id".into(), json!(citation.frame_id));
2053            map.insert("uri".into(), json!(citation.uri));
2054            if let Some(range) = citation.chunk_range {
2055                map.insert("chunk_range".into(), json!([range.0, range.1]));
2056            }
2057            if let Some(score) = citation.score {
2058                map.insert("score".into(), json!(score));
2059            }
2060            serde_json::Value::Object(map)
2061        })
2062        .collect();
2063
2064    let mut body = json!({
2065        "version": "mv2.ask.v1",
2066        "question": response.question,
2067        "answer": response.answer,
2068        "context_only": response.context_only,
2069        "mode": ask_mode_display(requested_mode),
2070        "retriever": ask_retriever_display(response.retriever),
2071        "top_k": response.retrieval.params.top_k,
2072        "results": hits,
2073        "citations": citations,
2074        "stats": {
2075            "retrieval_ms": response.stats.retrieval_ms,
2076            "synthesis_ms": response.stats.synthesis_ms,
2077            "latency_ms": response.stats.latency_ms,
2078        },
2079        "engine": search_engine_label(&response.retrieval.engine),
2080        "total_hits": response.retrieval.total_hits,
2081        "next_cursor": response.retrieval.next_cursor,
2082        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2083    });
2084
2085    if let Some(inf) = inference {
2086        let model = &inf.answer;
2087        if let serde_json::Value::Object(ref mut map) = body {
2088            map.insert("model".into(), json!(model.requested));
2089            if model.model != model.requested {
2090                map.insert("model_used".into(), json!(model.model));
2091            }
2092            map.insert("cached".into(), json!(inf.cached));
2093            // Add usage and cost if available
2094            if let Some(usage) = &inf.usage {
2095                map.insert("usage".into(), json!({
2096                    "input_tokens": usage.input_tokens,
2097                    "output_tokens": usage.output_tokens,
2098                    "total_tokens": usage.total_tokens,
2099                    "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2100                    "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2101                }));
2102            }
2103            // Add grounding/hallucination score if available
2104            if let Some(grounding) = &inf.grounding {
2105                map.insert("grounding".into(), json!({
2106                    "score": grounding.score,
2107                    "label": grounding.label(),
2108                    "sentence_count": grounding.sentence_count,
2109                    "grounded_sentences": grounding.grounded_sentences,
2110                    "has_warning": grounding.has_warning,
2111                    "warning_reason": grounding.warning_reason,
2112                }));
2113            }
2114        }
2115    }
2116
2117    // Add detailed sources if requested
2118    if include_sources {
2119        if let serde_json::Value::Object(ref mut map) = body {
2120            let sources = build_sources_json(response, mem);
2121            map.insert("sources".into(), json!(sources));
2122        }
2123    }
2124
2125    // Add follow-up suggestions if confidence is low
2126    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2127        if let serde_json::Value::Object(ref mut map) = body {
2128            map.insert("follow_up".into(), follow_up);
2129        }
2130    }
2131
2132    println!("{}", serde_json::to_string_pretty(&body)?);
2133    Ok(())
2134}
2135
2136fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2137    response
2138        .citations
2139        .iter()
2140        .enumerate()
2141        .map(|(idx, citation)| {
2142            let mut source = serde_json::Map::new();
2143            source.insert("index".into(), json!(idx + 1));
2144            source.insert("frame_id".into(), json!(citation.frame_id));
2145            source.insert("uri".into(), json!(citation.uri));
2146
2147            if let Some(range) = citation.chunk_range {
2148                source.insert("chunk_range".into(), json!([range.0, range.1]));
2149            }
2150            if let Some(score) = citation.score {
2151                source.insert("score".into(), json!(score));
2152            }
2153
2154            // Get frame metadata for rich source information
2155            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2156                if let Some(title) = frame.title {
2157                    source.insert("title".into(), json!(title));
2158                }
2159                if !frame.tags.is_empty() {
2160                    source.insert("tags".into(), json!(frame.tags));
2161                }
2162                if !frame.labels.is_empty() {
2163                    source.insert("labels".into(), json!(frame.labels));
2164                }
2165                source.insert("frame_timestamp".into(), json!(frame.timestamp));
2166                if !frame.content_dates.is_empty() {
2167                    source.insert("content_dates".into(), json!(frame.content_dates));
2168                }
2169            }
2170
2171            // Get snippet from hit
2172            if let Some(hit) = response
2173                .retrieval
2174                .hits
2175                .iter()
2176                .find(|h| h.frame_id == citation.frame_id)
2177            {
2178                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2179                source.insert("snippet".into(), json!(snippet));
2180            }
2181
2182            serde_json::Value::Object(source)
2183        })
2184        .collect()
2185}
2186
2187/// Build follow-up suggestions when the answer has low grounding/confidence.
2188/// Helps users understand what the memory contains and suggests relevant questions.
2189fn build_follow_up_suggestions(
2190    response: &AskResponse,
2191    inference: Option<&ModelInference>,
2192    mem: &mut Memvid,
2193) -> Option<serde_json::Value> {
2194    // Check if we need follow-up suggestions
2195    let needs_followup = inference
2196        .and_then(|inf| inf.grounding.as_ref())
2197        .map(|g| g.score < 0.3 || g.has_warning)
2198        .unwrap_or(false);
2199
2200    // Also trigger if retrieval hits have very low scores or no hits
2201    let low_retrieval = response.retrieval.hits.first()
2202        .and_then(|h| h.score)
2203        .map(|score| score < -2.0)
2204        .unwrap_or(true);
2205
2206    if !needs_followup && !low_retrieval {
2207        return None;
2208    }
2209
2210    // Get available topics from the memory by sampling timeline entries
2211    let limit = std::num::NonZeroU64::new(20).unwrap();
2212    let timeline_query = TimelineQueryBuilder::default()
2213        .limit(limit)
2214        .build();
2215
2216    let available_topics: Vec<String> = mem
2217        .timeline(timeline_query)
2218        .ok()
2219        .map(|entries| {
2220            entries
2221                .iter()
2222                .filter_map(|e| {
2223                    // Extract meaningful preview/title
2224                    let preview = e.preview.trim();
2225                    if preview.is_empty() || preview.len() < 5 {
2226                        return None;
2227                    }
2228                    // Get first line or truncate
2229                    let first_line = preview.lines().next().unwrap_or(preview);
2230                    if first_line.len() > 60 {
2231                        Some(format!("{}...", &first_line[..57]))
2232                    } else {
2233                        Some(first_line.to_string())
2234                    }
2235                })
2236                .collect::<std::collections::HashSet<_>>()
2237                .into_iter()
2238                .take(5)
2239                .collect()
2240        })
2241        .unwrap_or_default();
2242
2243    // Determine the reason for low confidence
2244    let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2245        "No relevant information found in memory"
2246    } else if inference.and_then(|i| i.grounding.as_ref()).map(|g| g.has_warning).unwrap_or(false) {
2247        "Answer may not be well-supported by the available context"
2248    } else {
2249        "Low confidence in the answer"
2250    };
2251
2252    // Generate suggestion questions based on available topics
2253    let suggestions: Vec<String> = if available_topics.is_empty() {
2254        vec![
2255            "What information is stored in this memory?".to_string(),
2256            "Can you list the main topics covered?".to_string(),
2257        ]
2258    } else {
2259        available_topics
2260            .iter()
2261            .take(3)
2262            .map(|topic| format!("Tell me about {}", topic))
2263            .chain(std::iter::once("What topics are in this memory?".to_string()))
2264            .collect()
2265    };
2266
2267    Some(json!({
2268        "needed": true,
2269        "reason": reason,
2270        "hint": if available_topics.is_empty() {
2271            "This memory may not contain information about your query."
2272        } else {
2273            "This memory contains information about different topics. Try asking about those instead."
2274        },
2275        "available_topics": available_topics,
2276        "suggestions": suggestions
2277    }))
2278}
2279
2280fn emit_model_json(
2281    response: &AskResponse,
2282    requested_model: &str,
2283    inference: Option<&ModelInference>,
2284    include_sources: bool,
2285    mem: &mut Memvid,
2286) -> Result<()> {
2287    let answer = response.answer.clone().unwrap_or_default();
2288    let requested_label = inference
2289        .map(|m| m.answer.requested.clone())
2290        .unwrap_or_else(|| requested_model.to_string());
2291    let used_label = inference
2292        .map(|m| m.answer.model.clone())
2293        .unwrap_or_else(|| requested_model.to_string());
2294
2295    let mut body = json!({
2296        "question": response.question,
2297        "model": requested_label,
2298        "model_used": used_label,
2299        "answer": answer,
2300        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2301    });
2302
2303    // Add usage and cost if available
2304    if let Some(inf) = inference {
2305        if let serde_json::Value::Object(ref mut map) = body {
2306            map.insert("cached".into(), json!(inf.cached));
2307            if let Some(usage) = &inf.usage {
2308                map.insert("usage".into(), json!({
2309                    "input_tokens": usage.input_tokens,
2310                    "output_tokens": usage.output_tokens,
2311                    "total_tokens": usage.total_tokens,
2312                    "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2313                    "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2314                }));
2315            }
2316            if let Some(grounding) = &inf.grounding {
2317                map.insert("grounding".into(), json!({
2318                    "score": grounding.score,
2319                    "label": grounding.label(),
2320                    "sentence_count": grounding.sentence_count,
2321                    "grounded_sentences": grounding.grounded_sentences,
2322                    "has_warning": grounding.has_warning,
2323                    "warning_reason": grounding.warning_reason,
2324                }));
2325            }
2326        }
2327    }
2328
2329    // Add detailed sources if requested
2330    if include_sources {
2331        if let serde_json::Value::Object(ref mut map) = body {
2332            let sources = build_sources_json(response, mem);
2333            map.insert("sources".into(), json!(sources));
2334        }
2335    }
2336
2337    // Add follow-up suggestions if confidence is low
2338    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2339        if let serde_json::Value::Object(ref mut map) = body {
2340            map.insert("follow_up".into(), follow_up);
2341        }
2342    }
2343
2344    // Use colored JSON output
2345    let json_str = serde_json::to_string_pretty(&body)?;
2346    println!("{}", json_str.to_colored_json_auto()?);
2347    Ok(())
2348}
2349
2350fn emit_ask_pretty(
2351    response: &AskResponse,
2352    requested_mode: AskModeArg,
2353    inference: Option<&ModelInference>,
2354    include_sources: bool,
2355    mem: &mut Memvid,
2356) {
2357    println!(
2358        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
2359        ask_mode_pretty(requested_mode),
2360        ask_retriever_pretty(response.retriever),
2361        response.retrieval.params.top_k,
2362        response.stats.latency_ms,
2363        response.stats.retrieval_ms
2364    );
2365    if let Some(inference) = inference {
2366        let model = &inference.answer;
2367        let cached_label = if inference.cached { " [CACHED]" } else { "" };
2368        if model.requested.trim() == model.model {
2369            println!("model: {}{}", model.model, cached_label);
2370        } else {
2371            println!(
2372                "model requested: {}   model used: {}{}",
2373                model.requested, model.model, cached_label
2374            );
2375        }
2376        // Display usage and cost if available
2377        if let Some(usage) = &inference.usage {
2378            let cost_label = if inference.cached {
2379                format!("$0.00 (saved ${:.6})", usage.cost_usd)
2380            } else {
2381                format!("${:.6}", usage.cost_usd)
2382            };
2383            println!(
2384                "tokens: {} input + {} output = {}   cost: {}",
2385                usage.input_tokens,
2386                usage.output_tokens,
2387                usage.total_tokens,
2388                cost_label
2389            );
2390        }
2391        // Display grounding/hallucination score
2392        if let Some(grounding) = &inference.grounding {
2393            let warning = if grounding.has_warning {
2394                format!(" [WARNING: {}]", grounding.warning_reason.as_deref().unwrap_or("potential hallucination"))
2395            } else {
2396                String::new()
2397            };
2398            println!(
2399                "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2400                grounding.score * 100.0,
2401                grounding.label(),
2402                grounding.grounded_sentences,
2403                grounding.sentence_count,
2404                warning
2405            );
2406        }
2407    }
2408    println!(
2409        "engine: {}",
2410        search_engine_label(&response.retrieval.engine)
2411    );
2412    println!(
2413        "hits: {} (showing {})",
2414        response.retrieval.total_hits,
2415        response.retrieval.hits.len()
2416    );
2417
2418    if response.context_only {
2419        println!();
2420        println!("Context-only mode: synthesis disabled.");
2421        println!();
2422    } else if let Some(answer) = &response.answer {
2423        println!();
2424        println!("Answer:\n{answer}");
2425        println!();
2426    }
2427
2428    if !response.citations.is_empty() {
2429        println!("Citations:");
2430        for citation in &response.citations {
2431            match citation.score {
2432                Some(score) => println!(
2433                    "[{}] {} (frame {}, score {:.3})",
2434                    citation.index, citation.uri, citation.frame_id, score
2435                ),
2436                None => println!(
2437                    "[{}] {} (frame {})",
2438                    citation.index, citation.uri, citation.frame_id
2439                ),
2440            }
2441        }
2442        println!();
2443    }
2444
2445    // Print detailed sources if requested
2446    if include_sources && !response.citations.is_empty() {
2447        println!("=== SOURCES ===");
2448        println!();
2449        for citation in &response.citations {
2450            println!("[{}] {}", citation.index, citation.uri);
2451
2452            // Get frame metadata
2453            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2454                if let Some(title) = &frame.title {
2455                    println!("    Title: {}", title);
2456                }
2457                println!("    Frame ID: {}", citation.frame_id);
2458                if let Some(score) = citation.score {
2459                    println!("    Score: {:.4}", score);
2460                }
2461                if let Some((start, end)) = citation.chunk_range {
2462                    println!("    Range: [{}..{})", start, end);
2463                }
2464                if !frame.tags.is_empty() {
2465                    println!("    Tags: {}", frame.tags.join(", "));
2466                }
2467                if !frame.labels.is_empty() {
2468                    println!("    Labels: {}", frame.labels.join(", "));
2469                }
2470                println!("    Timestamp: {}", frame.timestamp);
2471                if !frame.content_dates.is_empty() {
2472                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2473                }
2474            }
2475
2476            // Get snippet from hit
2477            if let Some(hit) = response
2478                .retrieval
2479                .hits
2480                .iter()
2481                .find(|h| h.frame_id == citation.frame_id)
2482            {
2483                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2484                let truncated = if snippet.len() > 200 {
2485                    format!("{}...", &snippet[..200])
2486                } else {
2487                    snippet.clone()
2488                };
2489                println!("    Snippet: {}", truncated.replace('\n', " "));
2490            }
2491            println!();
2492        }
2493    }
2494
2495    if !include_sources {
2496        println!();
2497        emit_search_table(&response.retrieval);
2498    }
2499
2500    // Display follow-up suggestions if confidence is low
2501    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2502        if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2503            if needed {
2504                println!();
2505                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2506                println!("💡 FOLLOW-UP SUGGESTIONS");
2507                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2508
2509                if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2510                    println!("Reason: {}", reason);
2511                }
2512
2513                if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2514                    println!("Hint: {}", hint);
2515                }
2516
2517                if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2518                    if !topics.is_empty() {
2519                        println!();
2520                        println!("Available topics in this memory:");
2521                        for topic in topics.iter().filter_map(|t| t.as_str()) {
2522                            println!("  • {}", topic);
2523                        }
2524                    }
2525                }
2526
2527                if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2528                    if !suggestions.is_empty() {
2529                        println!();
2530                        println!("Try asking:");
2531                        for (i, suggestion) in suggestions.iter().filter_map(|s| s.as_str()).enumerate() {
2532                            println!("  {}. \"{}\"", i + 1, suggestion);
2533                        }
2534                    }
2535                }
2536                println!();
2537            }
2538        }
2539    }
2540}
2541
2542/// Emit verbatim evidence as JSON without LLM synthesis.
2543/// Format: {evidence: [{source, text, score}], question, hits, stats}
2544fn emit_verbatim_evidence_json(
2545    response: &AskResponse,
2546    include_sources: bool,
2547    mem: &mut Memvid,
2548) -> Result<()> {
2549    // Build evidence array from hits - verbatim excerpts with citations
2550    let evidence: Vec<_> = response
2551        .retrieval
2552        .hits
2553        .iter()
2554        .enumerate()
2555        .map(|(idx, hit)| {
2556            let mut entry = serde_json::Map::new();
2557            entry.insert("index".into(), json!(idx + 1));
2558            entry.insert("frame_id".into(), json!(hit.frame_id));
2559            entry.insert("uri".into(), json!(&hit.uri));
2560            if let Some(title) = &hit.title {
2561                entry.insert("title".into(), json!(title));
2562            }
2563            // Use chunk_text if available (more specific), otherwise full text
2564            let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2565            entry.insert("text".into(), json!(verbatim));
2566            if let Some(score) = hit.score {
2567                entry.insert("score".into(), json!(score));
2568            }
2569            serde_json::Value::Object(entry)
2570        })
2571        .collect();
2572
2573    // Build sources array if requested
2574    let sources: Option<Vec<_>> = if include_sources {
2575        Some(
2576            response
2577                .retrieval
2578                .hits
2579                .iter()
2580                .filter_map(|hit| {
2581                    mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2582                        let mut source = serde_json::Map::new();
2583                        source.insert("frame_id".into(), json!(frame.id));
2584                        source.insert("uri".into(), json!(frame.uri.as_deref().unwrap_or("(unknown)")));
2585                        if let Some(title) = &frame.title {
2586                            source.insert("title".into(), json!(title));
2587                        }
2588                        source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2589                        if !frame.tags.is_empty() {
2590                            source.insert("tags".into(), json!(frame.tags));
2591                        }
2592                        if !frame.labels.is_empty() {
2593                            source.insert("labels".into(), json!(frame.labels));
2594                        }
2595                        serde_json::Value::Object(source)
2596                    })
2597                })
2598                .collect(),
2599        )
2600    } else {
2601        None
2602    };
2603
2604    let mut body = json!({
2605        "version": "mv2.evidence.v1",
2606        "mode": "verbatim",
2607        "question": response.question,
2608        "evidence": evidence,
2609        "evidence_count": evidence.len(),
2610        "total_hits": response.retrieval.total_hits,
2611        "stats": {
2612            "retrieval_ms": response.stats.retrieval_ms,
2613            "latency_ms": response.stats.latency_ms,
2614        },
2615        "engine": search_engine_label(&response.retrieval.engine),
2616    });
2617
2618    if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2619        map.insert("sources".into(), json!(sources));
2620    }
2621
2622    let json_str = serde_json::to_string_pretty(&body)?;
2623    println!("{}", json_str.to_colored_json_auto()?);
2624    Ok(())
2625}
2626
2627/// Emit verbatim evidence in human-readable format without LLM synthesis.
2628fn emit_verbatim_evidence_pretty(
2629    response: &AskResponse,
2630    include_sources: bool,
2631    mem: &mut Memvid,
2632) {
2633    println!(
2634        "mode: {}   latency: {} ms (retrieval {} ms)",
2635        "verbatim evidence".cyan(),
2636        response.stats.latency_ms,
2637        response.stats.retrieval_ms
2638    );
2639    println!(
2640        "engine: {}",
2641        search_engine_label(&response.retrieval.engine)
2642    );
2643    println!(
2644        "hits: {} (showing {})",
2645        response.retrieval.total_hits,
2646        response.retrieval.hits.len()
2647    );
2648    println!();
2649
2650    // Header
2651    println!("{}", "━".repeat(60));
2652    println!(
2653        "{}",
2654        format!(
2655            "VERBATIM EVIDENCE for: \"{}\"",
2656            truncate_with_ellipsis(&response.question, 40)
2657        )
2658        .bold()
2659    );
2660    println!("{}", "━".repeat(60));
2661    println!();
2662
2663    if response.retrieval.hits.is_empty() {
2664        println!("No evidence found.");
2665        return;
2666    }
2667
2668    // Calculate score range for normalization (BM25 scores can be negative)
2669    let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2670    let (min_score, max_score) = score_range(&scores);
2671
2672    // Display each piece of evidence with citation
2673    for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2674        let uri = &hit.uri;
2675        let title = hit.title.as_deref().unwrap_or("Untitled");
2676        let score_str = hit
2677            .score
2678            .map(|s| {
2679                let normalized = normalize_bm25_for_display(s, min_score, max_score);
2680                format!(" (relevance: {:.0}%)", normalized)
2681            })
2682            .unwrap_or_default();
2683
2684        println!(
2685            "{}",
2686            format!("[{}] {}{}", idx + 1, title, score_str).green().bold()
2687        );
2688        println!("    Source: {} (frame {})", uri, hit.frame_id);
2689        println!();
2690
2691        // Show verbatim text - prefer chunk_text if available
2692        let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2693        // Indent each line for readability
2694        for line in verbatim.lines() {
2695            if !line.trim().is_empty() {
2696                println!("    │ {}", line);
2697            }
2698        }
2699        println!();
2700    }
2701
2702    // Print detailed sources if requested
2703    if include_sources {
2704        println!("{}", "━".repeat(60));
2705        println!("{}", "SOURCE DETAILS".bold());
2706        println!("{}", "━".repeat(60));
2707        println!();
2708
2709        for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2710            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2711                println!("{}", format!("[{}] {}", idx + 1, frame.uri.as_deref().unwrap_or("(unknown)")).cyan());
2712                if let Some(title) = &frame.title {
2713                    println!("    Title: {}", title);
2714                }
2715                println!("    Frame ID: {}", frame.id);
2716                println!("    Timestamp: {}", frame.timestamp);
2717                if !frame.tags.is_empty() {
2718                    println!("    Tags: {}", frame.tags.join(", "));
2719                }
2720                if !frame.labels.is_empty() {
2721                    println!("    Labels: {}", frame.labels.join(", "));
2722                }
2723                if !frame.content_dates.is_empty() {
2724                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2725                }
2726                println!();
2727            }
2728        }
2729    }
2730
2731    // Note about no LLM synthesis
2732    println!("{}", "─".repeat(60));
2733    println!(
2734        "{}",
2735        "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2736    );
2737    println!(
2738        "{}",
2739        "Use --use-model to get an AI-synthesized answer.".dimmed()
2740    );
2741}
2742
2743fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2744    let hits: Vec<_> = response
2745        .hits
2746        .iter()
2747        .map(|hit| {
2748            json!({
2749                "frame_id": hit.frame_id,
2750                "matches": hit.matches,
2751                "snippets": [hit.text.clone()],
2752            })
2753        })
2754        .collect();
2755    println!("{}", serde_json::to_string_pretty(&hits)?);
2756    Ok(())
2757}
2758
2759fn emit_search_table(response: &SearchResponse) {
2760    if response.hits.is_empty() {
2761        println!("No results for '{}'.", response.query);
2762        return;
2763    }
2764
2765    // Calculate score range for normalization (BM25 scores can be negative)
2766    let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2767    let (min_score, max_score) = score_range(&scores);
2768
2769    for hit in &response.hits {
2770        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2771        if let Some(title) = &hit.title {
2772            println!("  Title: {title}");
2773        }
2774        if let Some(score) = hit.score {
2775            let normalized = normalize_bm25_for_display(score, min_score, max_score);
2776            println!("  Relevance: {:.0}%", normalized);
2777        }
2778        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
2779        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2780            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
2781        }
2782        if let Some(chunk_text) = &hit.chunk_text {
2783            println!("  Chunk Text: {}", chunk_text.trim());
2784        }
2785        if let Some(metadata) = &hit.metadata {
2786            if let Some(track) = &metadata.track {
2787                println!("  Track: {track}");
2788            }
2789            if !metadata.tags.is_empty() {
2790                println!("  Tags: {}", metadata.tags.join(", "));
2791            }
2792            if !metadata.labels.is_empty() {
2793                println!("  Labels: {}", metadata.labels.join(", "));
2794            }
2795            if let Some(created_at) = &metadata.created_at {
2796                println!("  Created: {created_at}");
2797            }
2798            if !metadata.content_dates.is_empty() {
2799                println!("  Content Dates: {}", metadata.content_dates.join(", "));
2800            }
2801            if !metadata.entities.is_empty() {
2802                let entity_strs: Vec<String> = metadata
2803                    .entities
2804                    .iter()
2805                    .map(|e| format!("{} ({})", e.name, e.kind))
2806                    .collect();
2807                println!("  Entities: {}", entity_strs.join(", "));
2808            }
2809        }
2810        println!("  Snippet: {}", hit.text.trim());
2811        println!();
2812    }
2813    if let Some(cursor) = &response.next_cursor {
2814        println!("Next cursor: {cursor}");
2815    }
2816}
2817
2818fn ask_mode_display(mode: AskModeArg) -> &'static str {
2819    match mode {
2820        AskModeArg::Lex => "lex",
2821        AskModeArg::Sem => "sem",
2822        AskModeArg::Hybrid => "hybrid",
2823    }
2824}
2825
2826fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2827    match mode {
2828        AskModeArg::Lex => "Lexical",
2829        AskModeArg::Sem => "Semantic",
2830        AskModeArg::Hybrid => "Hybrid",
2831    }
2832}
2833
2834fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2835    match retriever {
2836        AskRetriever::Lex => "lex",
2837        AskRetriever::Semantic => "semantic",
2838        AskRetriever::Hybrid => "hybrid",
2839        AskRetriever::LexFallback => "lex_fallback",
2840        AskRetriever::TimelineFallback => "timeline_fallback",
2841    }
2842}
2843
2844fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2845    match retriever {
2846        AskRetriever::Lex => "Lexical",
2847        AskRetriever::Semantic => "Semantic",
2848        AskRetriever::Hybrid => "Hybrid",
2849        AskRetriever::LexFallback => "Lexical (fallback)",
2850        AskRetriever::TimelineFallback => "Timeline (fallback)",
2851    }
2852}
2853
2854fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2855    match engine {
2856        SearchEngineKind::Tantivy => "text (tantivy)",
2857        SearchEngineKind::LexFallback => "text (fallback)",
2858        SearchEngineKind::Hybrid => "hybrid",
2859    }
2860}
2861
2862fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2863    let digest = hash(uri.as_bytes()).to_hex().to_string();
2864    let prefix_len = digest.len().min(12);
2865    let prefix = &digest[..prefix_len];
2866    format!("mv2-hit-{prefix}-{frame_id}-{start}")
2867}
2868
2869fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2870    if text.chars().count() <= limit {
2871        return text.to_string();
2872    }
2873
2874    let truncated: String = text.chars().take(limit).collect();
2875    format!("{truncated}...")
2876}
2877
2878/// Normalize a BM25 score to 0-100 range for user-friendly display.
2879///
2880/// BM25 scores can be negative (Tantivy uses log-based TF which can go negative
2881/// for very common terms). This function normalizes scores relative to the
2882/// min/max in the result set so users see intuitive 0-100 values.
2883///
2884/// - Returns 100.0 if min == max (all scores equal)
2885/// - Returns normalized 0-100 value based on position in [min, max] range
2886fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2887    if (max_score - min_score).abs() < f32::EPSILON {
2888        // All scores are the same, show 100%
2889        return 100.0;
2890    }
2891    // Normalize to 0-100 range
2892    ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2893}
2894
2895/// Extract min and max scores from a slice of optional scores.
2896fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2897    let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2898    if valid_scores.is_empty() {
2899        return (0.0, 0.0);
2900    }
2901    let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2902    let max = valid_scores.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
2903    (min, max)
2904}
2905
2906fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2907    let mut hit_json = serde_json::Map::new();
2908    hit_json.insert("rank".into(), json!(hit.rank));
2909    if let Some(score) = hit.score {
2910        hit_json.insert("score".into(), json!(score));
2911    }
2912    hit_json.insert(
2913        "id".into(),
2914        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2915    );
2916    hit_json.insert("frame_id".into(), json!(hit.frame_id));
2917    hit_json.insert("uri".into(), json!(hit.uri));
2918    if let Some(title) = &hit.title {
2919        hit_json.insert("title".into(), json!(title));
2920    }
2921    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2922    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2923    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2924    hit_json.insert("text".into(), json!(hit.text));
2925
2926    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2927        matches: hit.matches,
2928        ..SearchHitMetadata::default()
2929    });
2930    let mut meta_json = serde_json::Map::new();
2931    meta_json.insert("matches".into(), json!(metadata.matches));
2932    if !metadata.tags.is_empty() {
2933        meta_json.insert("tags".into(), json!(metadata.tags));
2934    }
2935    if !metadata.labels.is_empty() {
2936        meta_json.insert("labels".into(), json!(metadata.labels));
2937    }
2938    if let Some(track) = metadata.track {
2939        meta_json.insert("track".into(), json!(track));
2940    }
2941    if let Some(created_at) = metadata.created_at {
2942        meta_json.insert("created_at".into(), json!(created_at));
2943    }
2944    if !metadata.content_dates.is_empty() {
2945        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
2946    }
2947    if !metadata.entities.is_empty() {
2948        let entities_json: Vec<serde_json::Value> = metadata
2949            .entities
2950            .iter()
2951            .map(|e| {
2952                let mut ent = serde_json::Map::new();
2953                ent.insert("name".into(), json!(e.name));
2954                ent.insert("kind".into(), json!(e.kind));
2955                if let Some(conf) = e.confidence {
2956                    ent.insert("confidence".into(), json!(conf));
2957                }
2958                serde_json::Value::Object(ent)
2959            })
2960            .collect();
2961        meta_json.insert("entities".into(), json!(entities_json));
2962    }
2963    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
2964    serde_json::Value::Object(hit_json)
2965}
2966/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
2967///
2968/// RRF is mathematically superior to raw score combination because:
2969/// - BM25 scores are unbounded (0 to infinity)
2970/// - Cosine similarity is bounded (-1 to 1)
2971/// - RRF normalizes by using only RANKS, not raw scores
2972///
2973/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
2974fn apply_semantic_rerank(
2975    runtime: &EmbeddingRuntime,
2976    mem: &mut Memvid,
2977    response: &mut SearchResponse,
2978) -> Result<()> {
2979    if response.hits.is_empty() {
2980        return Ok(());
2981    }
2982
2983    let query_embedding = runtime.embed_query(&response.query)?;
2984    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
2985    for hit in &response.hits {
2986        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
2987            if embedding.len() == runtime.dimension() {
2988                let score = cosine_similarity(&query_embedding, &embedding);
2989                semantic_scores.insert(hit.frame_id, score);
2990            }
2991        }
2992    }
2993
2994    if semantic_scores.is_empty() {
2995        return Ok(());
2996    }
2997
2998    // Sort by semantic score to get semantic ranks
2999    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3000        .iter()
3001        .map(|(frame_id, score)| (*frame_id, *score))
3002        .collect();
3003    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3004
3005    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3006    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3007        semantic_rank.insert(*frame_id, idx + 1);
3008    }
3009
3010    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
3011    let query_lower = response.query.to_lowercase();
3012    let is_preference_query = query_lower.contains("suggest")
3013        || query_lower.contains("recommend")
3014        || query_lower.contains("should i")
3015        || query_lower.contains("what should")
3016        || query_lower.contains("prefer")
3017        || query_lower.contains("favorite")
3018        || query_lower.contains("best for me");
3019
3020    // Pure RRF: Use ONLY ranks, NOT raw scores
3021    // This prevents a "confidently wrong" high-scoring vector from burying
3022    // a "precisely correct" keyword match
3023    const RRF_K: f32 = 60.0;
3024
3025    let mut ordering: Vec<(usize, f32, usize)> = response
3026        .hits
3027        .iter()
3028        .enumerate()
3029        .map(|(idx, hit)| {
3030            let lexical_rank = hit.rank;
3031
3032            // RRF score for lexical rank
3033            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3034
3035            // RRF score for semantic rank
3036            let semantic_rrf = semantic_rank
3037                .get(&hit.frame_id)
3038                .map(|rank| 1.0 / (RRF_K + *rank as f32))
3039                .unwrap_or(0.0);
3040
3041            // Apply preference boost for hits containing user preference signals
3042            // This is a small bonus for content with first-person preference indicators
3043            let preference_boost = if is_preference_query {
3044                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
3045            } else {
3046                0.0
3047            };
3048
3049            // Pure RRF: Only rank-based scores, no raw similarity scores
3050            let combined = lexical_rrf + semantic_rrf + preference_boost;
3051            (idx, combined, lexical_rank)
3052        })
3053        .collect();
3054
3055    ordering.sort_by(|a, b| {
3056        b.1.partial_cmp(&a.1)
3057            .unwrap_or(Ordering::Equal)
3058            .then(a.2.cmp(&b.2))
3059    });
3060
3061    let mut reordered = Vec::with_capacity(response.hits.len());
3062    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3063        let mut hit = response.hits[idx].clone();
3064        hit.rank = rank_idx + 1;
3065        reordered.push(hit);
3066    }
3067
3068    response.hits = reordered;
3069    Ok(())
3070}
3071
3072/// Rerank search results by boosting hits that contain user preference signals.
3073/// Only applies when the query appears to be seeking recommendations or preferences.
3074fn apply_preference_rerank(response: &mut SearchResponse) {
3075    if response.hits.is_empty() {
3076        return;
3077    }
3078
3079    // Check if query is preference-seeking
3080    let query_lower = response.query.to_lowercase();
3081    let is_preference_query = query_lower.contains("suggest")
3082        || query_lower.contains("recommend")
3083        || query_lower.contains("should i")
3084        || query_lower.contains("what should")
3085        || query_lower.contains("prefer")
3086        || query_lower.contains("favorite")
3087        || query_lower.contains("best for me");
3088
3089    if !is_preference_query {
3090        return;
3091    }
3092
3093    // Compute boost scores for each hit
3094    let mut scored: Vec<(usize, f32, f32)> = response
3095        .hits
3096        .iter()
3097        .enumerate()
3098        .map(|(idx, hit)| {
3099            let original_score = hit.score.unwrap_or(0.0);
3100            let preference_boost = compute_preference_boost(&hit.text);
3101            let boosted_score = original_score + preference_boost;
3102            (idx, boosted_score, original_score)
3103        })
3104        .collect();
3105
3106    // Sort by boosted score (descending)
3107    scored.sort_by(|a, b| {
3108        b.1.partial_cmp(&a.1)
3109            .unwrap_or(Ordering::Equal)
3110            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3111    });
3112
3113    // Reorder hits
3114    let mut reordered = Vec::with_capacity(response.hits.len());
3115    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3116        let mut hit = response.hits[idx].clone();
3117        hit.rank = rank_idx + 1;
3118        reordered.push(hit);
3119    }
3120
3121    response.hits = reordered;
3122}
3123
3124/// Compute a boost score for hits that contain user preference signals.
3125/// This helps surface context where users express their preferences,
3126/// habits, or personal information that's relevant to recommendation queries.
3127///
3128/// Key insight: We want to distinguish content where the user describes
3129/// their ESTABLISHED situation/preferences (high boost) from content where
3130/// the user is making a REQUEST (low boost). Both use first-person language,
3131/// but they serve different purposes for personalization.
3132fn compute_preference_boost(text: &str) -> f32 {
3133    let text_lower = text.to_lowercase();
3134    let mut boost = 0.0f32;
3135
3136    // Strong signals: Past/present user experiences and possessions
3137    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
3138    let established_context = [
3139        // Past tense - indicates actual experience
3140        "i've been",
3141        "i've had",
3142        "i've used",
3143        "i've tried",
3144        "i recently",
3145        "i just",
3146        "lately",
3147        "i started",
3148        "i bought",
3149        "i harvested",
3150        "i grew",
3151        // Current possessions/ownership (indicates established context)
3152        "my garden",
3153        "my home",
3154        "my house",
3155        "my setup",
3156        "my equipment",
3157        "my camera",
3158        "my car",
3159        "my phone",
3160        "i have a",
3161        "i own",
3162        "i got a",
3163        // Established habits/preferences
3164        "i prefer",
3165        "i like to",
3166        "i love to",
3167        "i enjoy",
3168        "i usually",
3169        "i always",
3170        "i typically",
3171        "my favorite",
3172        "i tend to",
3173        "i often",
3174        // Regular activities (indicates ongoing behavior)
3175        "i use",
3176        "i grow",
3177        "i cook",
3178        "i make",
3179        "i work on",
3180        "i'm into",
3181        "i collect",
3182    ];
3183    for pattern in established_context {
3184        if text_lower.contains(pattern) {
3185            boost += 0.15;
3186        }
3187    }
3188
3189    // Moderate signals: General first-person statements
3190    let first_person = [" i ", " my ", " me "];
3191    for pattern in first_person {
3192        if text_lower.contains(pattern) {
3193            boost += 0.02;
3194        }
3195    }
3196
3197    // Weak signals: Requests/intentions (not yet established preferences)
3198    // These indicate the user wants something, but don't describe established context
3199    let request_patterns = [
3200        "i'm trying to",
3201        "i want to",
3202        "i need to",
3203        "looking for",
3204        "can you suggest",
3205        "can you help",
3206    ];
3207    for pattern in request_patterns {
3208        if text_lower.contains(pattern) {
3209            boost += 0.02;
3210        }
3211    }
3212
3213    // Cap the boost to avoid over-weighting
3214    boost.min(0.5)
3215}
3216
3217fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3218    let mut dot = 0.0f32;
3219    let mut sum_a = 0.0f32;
3220    let mut sum_b = 0.0f32;
3221    for (x, y) in a.iter().zip(b.iter()) {
3222        dot += x * y;
3223        sum_a += x * x;
3224        sum_b += y * y;
3225    }
3226
3227    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3228        0.0
3229    } else {
3230        dot / (sum_a.sqrt() * sum_b.sqrt())
3231    }
3232}
3233
3234/// Apply cross-encoder reranking to search results.
3235///
3236/// Cross-encoders directly score query-document pairs and can understand
3237/// more nuanced relevance than bi-encoders (embeddings). This is especially
3238/// useful for personalization queries where semantic similarity != relevance.
3239///
3240/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
3241fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3242    if response.hits.is_empty() || response.hits.len() < 2 {
3243        return Ok(());
3244    }
3245
3246    // Only rerank if we have enough candidates
3247    let candidates_to_rerank = response.hits.len().min(50);
3248
3249    // Initialize the reranker (model will be downloaded on first use, ~86MB)
3250    // Using JINA Turbo - faster than BGE while maintaining good accuracy
3251    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3252        .with_show_download_progress(true);
3253
3254    let mut reranker = match TextRerank::try_new(options) {
3255        Ok(r) => r,
3256        Err(e) => {
3257            warn!("Failed to initialize cross-encoder reranker: {e}");
3258            return Ok(());
3259        }
3260    };
3261
3262    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
3263    let documents: Vec<String> = response.hits[..candidates_to_rerank]
3264        .iter()
3265        .map(|hit| hit.text.clone())
3266        .collect();
3267
3268    // Rerank using cross-encoder
3269    info!("Cross-encoder reranking {} candidates", documents.len());
3270    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3271        Ok(results) => results,
3272        Err(e) => {
3273            warn!("Cross-encoder reranking failed: {e}");
3274            return Ok(());
3275        }
3276    };
3277
3278    // Blend cross-encoder scores with original scores to preserve temporal boosting.
3279    // The original score includes recency boost; purely replacing it loses temporal relevance.
3280    // We collect (blended_score, original_idx) pairs and sort by blended score.
3281    let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3282
3283    // Find score range for normalization (original scores can be negative for BM25)
3284    let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3285        .iter()
3286        .filter_map(|h| h.score)
3287        .collect();
3288    let orig_min = original_scores.iter().cloned().fold(f32::INFINITY, f32::min);
3289    let orig_max = original_scores.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
3290    let orig_range = (orig_max - orig_min).max(0.001); // Avoid division by zero
3291
3292    for result in rerank_results.iter() {
3293        let original_idx = result.index;
3294        let cross_encoder_score = result.score; // Already normalized 0-1
3295
3296        // Normalize original score to 0-1 range
3297        let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3298        let normalized_original = (original_score - orig_min) / orig_range;
3299
3300        // Blend: 20% cross-encoder (relevance) + 80% original (includes temporal boost)
3301        // Very heavy weight on original score to preserve temporal ranking
3302        // The original score already incorporates BM25 + recency boost
3303        let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3304
3305        scored_hits.push((blended, original_idx));
3306    }
3307
3308    // Sort by blended score (descending)
3309    scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3310
3311    // Build reordered hits with new ranks
3312    let mut reordered = Vec::with_capacity(response.hits.len());
3313    for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3314        let mut hit = response.hits[original_idx].clone();
3315        hit.rank = new_rank + 1;
3316        // Store blended score for reference
3317        hit.score = Some(blended_score);
3318        reordered.push(hit);
3319    }
3320
3321    // Add any remaining hits that weren't reranked (beyond top-50)
3322    for hit in response.hits.iter().skip(candidates_to_rerank) {
3323        let mut h = hit.clone();
3324        h.rank = reordered.len() + 1;
3325        reordered.push(h);
3326    }
3327
3328    response.hits = reordered;
3329    info!("Cross-encoder reranking complete");
3330    Ok(())
3331}
3332
3333/// Build a context string from memory cards stored in the MV2 file.
3334/// Groups facts by entity for better LLM comprehension.
3335fn build_memory_context(mem: &Memvid) -> String {
3336    let entities = mem.memory_entities();
3337    if entities.is_empty() {
3338        return String::new();
3339    }
3340
3341    let mut sections = Vec::new();
3342    for entity in entities {
3343        let cards = mem.get_entity_memories(&entity);
3344        if cards.is_empty() {
3345            continue;
3346        }
3347
3348        let mut entity_lines = Vec::new();
3349        for card in cards {
3350            // Format: "slot: value" with optional polarity indicator
3351            let polarity_marker = card
3352                .polarity
3353                .as_ref()
3354                .map(|p| match p.to_string().as_str() {
3355                    "Positive" => " (+)",
3356                    "Negative" => " (-)",
3357                    _ => "",
3358                })
3359                .unwrap_or("");
3360            entity_lines.push(format!(
3361                "  - {}: {}{}",
3362                card.slot, card.value, polarity_marker
3363            ));
3364        }
3365
3366        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3367    }
3368
3369    sections.join("\n\n")
3370}
3371
3372/// Build a context string from entities found in search hits.
3373/// Groups entities by type for better LLM comprehension.
3374fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3375    use std::collections::HashMap;
3376
3377    // Collect unique entities by kind
3378    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3379
3380    for hit in hits {
3381        if let Some(metadata) = &hit.metadata {
3382            for entity in &metadata.entities {
3383                entities_by_kind
3384                    .entry(entity.kind.clone())
3385                    .or_default()
3386                    .push(entity.name.clone());
3387            }
3388        }
3389    }
3390
3391    if entities_by_kind.is_empty() {
3392        return String::new();
3393    }
3394
3395    // Deduplicate and format
3396    let mut sections = Vec::new();
3397    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3398    sorted_kinds.sort();
3399
3400    for kind in sorted_kinds {
3401        let names = entities_by_kind.get(kind).unwrap();
3402        let mut unique_names: Vec<_> = names.iter().collect();
3403        unique_names.sort();
3404        unique_names.dedup();
3405
3406        let names_str = unique_names
3407            .iter()
3408            .take(10) // Limit to 10 entities per kind
3409            .map(|s| s.as_str())
3410            .collect::<Vec<_>>()
3411            .join(", ");
3412
3413        sections.push(format!("{}: {}", kind, names_str));
3414    }
3415
3416    sections.join("\n")
3417}