Skip to main content

memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use blake3::hash;
15use clap::{ArgAction, Args, ValueEnum};
16use colored::Colorize;
17use colored_json::ToColoredJson;
18#[cfg(feature = "temporal_track")]
19use memvid_core::{
20    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
21    TemporalResolution, TemporalResolutionValue,
22};
23use memvid_core::{
24    types::{
25        AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy,
26        SearchHitMetadata,
27    },
28    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind,
29    SearchHit, SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
30};
31#[cfg(feature = "temporal_track")]
32use serde::Serialize;
33use serde_json::json;
34#[cfg(feature = "temporal_track")]
35use time::format_description::well_known::Rfc3339;
36use time::{Date, PrimitiveDateTime, Time};
37#[cfg(feature = "temporal_track")]
38use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
39use tracing::{info, warn};
40
41#[cfg(feature = "local-embeddings")]
42use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
43
44use memvid_ask_model::{
45    run_model_inference, ModelContextFragment, ModelContextFragmentKind, ModelInference,
46};
47
48// frame_to_json and print_frame_summary available from commands but not used in this module
49use crate::config::{
50    load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
51    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig,
52    EmbeddingModelChoice, EmbeddingRuntime,
53};
54use crate::utils::{
55    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
56    parse_date_boundary, parse_vector, read_embedding,
57};
58
59const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
60#[cfg(feature = "temporal_track")]
61const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
62
63fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
64    let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
65    message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
66    if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
67        message.push_str(&format!(
68            "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
69            model.name(),
70            model.name()
71        ));
72        if model.is_openai() {
73            message.push_str(" (and set `OPENAI_API_KEY`).");
74        } else {
75            message.push('.');
76        }
77        message.push_str(&format!(
78            "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
79            model.name()
80        ));
81        message.push_str(&format!(
82            "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
83        ));
84        message.push_str("\nOr use `--mode lex` to disable semantic search.");
85    }
86    message
87}
88
89/// Arguments for the `timeline` subcommand
90#[derive(Args)]
91pub struct TimelineArgs {
92    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
93    pub file: PathBuf,
94    #[arg(long)]
95    pub json: bool,
96    #[arg(long)]
97    pub reverse: bool,
98    #[arg(long, value_name = "LIMIT")]
99    pub limit: Option<NonZeroU64>,
100    #[arg(long, value_name = "TIMESTAMP")]
101    pub since: Option<i64>,
102    #[arg(long, value_name = "TIMESTAMP")]
103    pub until: Option<i64>,
104    #[cfg(feature = "temporal_track")]
105    #[arg(long = "on", value_name = "PHRASE")]
106    pub phrase: Option<String>,
107    #[cfg(feature = "temporal_track")]
108    #[arg(long = "tz", value_name = "IANA_ZONE")]
109    pub tz: Option<String>,
110    #[cfg(feature = "temporal_track")]
111    #[arg(long = "anchor", value_name = "RFC3339")]
112    pub anchor: Option<String>,
113    #[cfg(feature = "temporal_track")]
114    #[arg(long = "window", value_name = "MINUTES")]
115    pub window: Option<u64>,
116    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
117    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
118    pub as_of_frame: Option<u64>,
119    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
120    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
121    pub as_of_ts: Option<i64>,
122}
123
124/// Arguments for the `when` subcommand
125#[cfg(feature = "temporal_track")]
126#[derive(Args)]
127pub struct WhenArgs {
128    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
129    pub file: PathBuf,
130    #[arg(long = "on", value_name = "PHRASE")]
131    pub phrase: String,
132    #[arg(long = "tz", value_name = "IANA_ZONE")]
133    pub tz: Option<String>,
134    #[arg(long = "anchor", value_name = "RFC3339")]
135    pub anchor: Option<String>,
136    #[arg(long = "window", value_name = "MINUTES")]
137    pub window: Option<u64>,
138    #[arg(long, value_name = "LIMIT")]
139    pub limit: Option<NonZeroU64>,
140    #[arg(long, value_name = "TIMESTAMP")]
141    pub since: Option<i64>,
142    #[arg(long, value_name = "TIMESTAMP")]
143    pub until: Option<i64>,
144    #[arg(long)]
145    pub reverse: bool,
146    #[arg(long)]
147    pub json: bool,
148}
149
150/// Arguments for the `ask` subcommand
151#[derive(Args)]
152pub struct AskArgs {
153    #[arg(value_name = "TARGET", num_args = 0..)]
154    pub targets: Vec<String>,
155    #[arg(long = "question", value_name = "TEXT")]
156    pub question: Option<String>,
157    #[arg(long = "uri", value_name = "URI")]
158    pub uri: Option<String>,
159    #[arg(long = "scope", value_name = "URI_PREFIX")]
160    pub scope: Option<String>,
161    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
162    pub top_k: usize,
163    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
164    pub snippet_chars: usize,
165    #[arg(long = "cursor", value_name = "TOKEN")]
166    pub cursor: Option<String>,
167    #[arg(long = "mode", value_enum, default_value = "hybrid")]
168    pub mode: AskModeArg,
169    #[arg(long)]
170    pub json: bool,
171    #[arg(long = "context-only", action = ArgAction::SetTrue)]
172    pub context_only: bool,
173    /// Show detailed source information for each citation
174    #[arg(long = "sources", action = ArgAction::SetTrue)]
175    pub sources: bool,
176    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
177    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
178    pub mask_pii: bool,
179    /// Include structured memory cards in the context (facts, preferences, etc.)
180    #[arg(long = "memories", action = ArgAction::SetTrue)]
181    pub memories: bool,
182    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
183    #[arg(long = "llm-context-depth", value_name = "CHARS")]
184    pub llm_context_depth: Option<usize>,
185    #[arg(long = "start", value_name = "DATE")]
186    pub start: Option<String>,
187    #[arg(long = "end", value_name = "DATE")]
188    pub end: Option<String>,
189    /// Synthesize an answer with an LLM (defaults to tinyllama when provided without a value).
190    ///
191    /// Examples:
192    /// - `--use-model` (local TinyLlama)
193    /// - `--use-model openai` (defaults to gpt-4o-mini; requires OPENAI_API_KEY)
194    /// - `--use-model nvidia` (defaults to meta/llama3-8b-instruct; requires NVIDIA_API_KEY)
195    /// - `--use-model nvidia:meta/llama3-70b-instruct`
196    #[arg(
197        long = "use-model",
198        value_name = "MODEL",
199        num_args = 0..=1,
200        default_missing_value = "tinyllama"
201    )]
202    pub use_model: Option<String>,
203    /// Embedding model to use for query (must match the model used during ingestion)
204    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
205    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
206    pub query_embedding_model: Option<String>,
207    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
208    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
209    pub as_of_frame: Option<u64>,
210    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
211    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
212    pub as_of_ts: Option<i64>,
213    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
214    #[arg(long = "system-prompt", value_name = "TEXT")]
215    pub system_prompt: Option<String>,
216    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
217    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
218    pub no_rerank: bool,
219
220    /// Return verbatim evidence without LLM synthesis.
221    /// Shows the most relevant passages with citations, no paraphrasing or summarization.
222    #[arg(long = "no-llm", action = ArgAction::SetTrue)]
223    pub no_llm: bool,
224
225    // Adaptive retrieval options (enabled by default for best results)
226    /// Disable adaptive retrieval and use fixed top-k instead.
227    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
228    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
229    pub no_adaptive: bool,
230    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
231    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
232    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
233    pub min_relevancy: f32,
234    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
235    /// Set high enough to capture all potentially relevant results.
236    #[arg(long = "max-k", value_name = "K", default_value = "100")]
237    pub max_k: usize,
238    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
239    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
240    pub adaptive_strategy: AdaptiveStrategyArg,
241}
242
243/// Ask mode argument
244#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
245pub enum AskModeArg {
246    Lex,
247    Sem,
248    Hybrid,
249}
250
251impl From<AskModeArg> for AskMode {
252    fn from(value: AskModeArg) -> Self {
253        match value {
254            AskModeArg::Lex => AskMode::Lex,
255            AskModeArg::Sem => AskMode::Sem,
256            AskModeArg::Hybrid => AskMode::Hybrid,
257        }
258    }
259}
260
261/// Arguments for the `find` subcommand
262#[derive(Args)]
263pub struct FindArgs {
264    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
265    pub file: PathBuf,
266    #[arg(long = "query", value_name = "TEXT")]
267    pub query: String,
268    #[arg(long = "uri", value_name = "URI")]
269    pub uri: Option<String>,
270    #[arg(long = "scope", value_name = "URI_PREFIX")]
271    pub scope: Option<String>,
272    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
273    pub top_k: usize,
274    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
275    pub snippet_chars: usize,
276    #[arg(long = "cursor", value_name = "TOKEN")]
277    pub cursor: Option<String>,
278    #[arg(long)]
279    pub json: bool,
280    #[arg(long = "json-legacy", conflicts_with = "json")]
281    pub json_legacy: bool,
282    #[arg(long = "mode", value_enum, default_value = "auto")]
283    pub mode: SearchMode,
284    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
285    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
286    pub as_of_frame: Option<u64>,
287    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
288    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
289    pub as_of_ts: Option<i64>,
290    /// Embedding model to use for query (must match the model used during ingestion)
291    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
292    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
293    pub query_embedding_model: Option<String>,
294
295    // Adaptive retrieval options (enabled by default for best results)
296    /// Disable adaptive retrieval and use fixed top-k instead.
297    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
298    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
299    pub no_adaptive: bool,
300    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
301    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
302    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
303    pub min_relevancy: f32,
304    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
305    /// Set high enough to capture all potentially relevant results.
306    #[arg(long = "max-k", value_name = "K", default_value = "100")]
307    pub max_k: usize,
308    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
309    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
310    pub adaptive_strategy: AdaptiveStrategyArg,
311
312    /// Enable graph-aware search: filter by entity relationships before ranking.
313    /// Uses MemoryCards to find entities matching patterns like "who lives in X".
314    #[arg(long = "graph", action = ArgAction::SetTrue)]
315    pub graph: bool,
316
317    /// Enable hybrid search: combine graph filtering with text search.
318    /// Automatically detects relational patterns in the query.
319    #[arg(long = "hybrid", action = ArgAction::SetTrue)]
320    pub hybrid: bool,
321
322    /// Disable sketch pre-filtering (for benchmarking/debugging).
323    /// By default, sketches are used for fast candidate generation if available.
324    #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
325    pub no_sketch: bool,
326}
327
328/// Search mode argument
329#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
330pub enum SearchMode {
331    Auto,
332    Lex,
333    Sem,
334    /// CLIP visual search using text-to-image embeddings
335    #[cfg(feature = "clip")]
336    Clip,
337}
338
339/// Adaptive retrieval strategy
340#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
341pub enum AdaptiveStrategyArg {
342    /// Stop when score drops below X% of top score (default)
343    Relative,
344    /// Stop when score drops below fixed threshold
345    Absolute,
346    /// Stop when score drops sharply from previous result
347    Cliff,
348    /// Automatically detect "elbow" in score curve
349    Elbow,
350    /// Combine relative + cliff + absolute (recommended)
351    Combined,
352}
353
354/// Arguments for the `vec-search` subcommand
355#[derive(Args)]
356pub struct VecSearchArgs {
357    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
358    pub file: PathBuf,
359    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
360    pub vector: Option<String>,
361    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
362    pub embedding: Option<PathBuf>,
363    #[arg(long, value_name = "K", default_value = "10")]
364    pub limit: usize,
365    #[arg(long)]
366    pub json: bool,
367}
368
369/// Arguments for the `audit` subcommand
370#[derive(Args)]
371pub struct AuditArgs {
372    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
373    pub file: PathBuf,
374    /// The question or topic to audit
375    #[arg(value_name = "QUESTION")]
376    pub question: String,
377    /// Output file path (stdout if not provided)
378    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
379    pub out: Option<PathBuf>,
380    /// Output format
381    #[arg(long = "format", value_enum, default_value = "text")]
382    pub format: AuditFormat,
383    /// Number of sources to retrieve
384    #[arg(long = "top-k", value_name = "K", default_value = "10")]
385    pub top_k: usize,
386    /// Maximum characters per snippet
387    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
388    pub snippet_chars: usize,
389    /// Retrieval mode
390    #[arg(long = "mode", value_enum, default_value = "hybrid")]
391    pub mode: AskModeArg,
392    /// Optional scope filter (URI prefix)
393    #[arg(long = "scope", value_name = "URI_PREFIX")]
394    pub scope: Option<String>,
395    /// Start date filter
396    #[arg(long = "start", value_name = "DATE")]
397    pub start: Option<String>,
398    /// End date filter
399    #[arg(long = "end", value_name = "DATE")]
400    pub end: Option<String>,
401    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
402    #[arg(long = "use-model", value_name = "MODEL")]
403    pub use_model: Option<String>,
404}
405
406/// Audit output format
407#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
408pub enum AuditFormat {
409    /// Plain text report
410    Text,
411    /// Markdown report
412    Markdown,
413    /// JSON report
414    Json,
415}
416
417// ============================================================================
418// Search & Retrieval command handlers
419// ============================================================================
420
421pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
422    let mut mem = open_read_only_mem(&args.file)?;
423    let mut builder = TimelineQueryBuilder::default();
424    #[cfg(feature = "temporal_track")]
425    if args.phrase.is_none()
426        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
427    {
428        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
429    }
430    if let Some(limit) = args.limit {
431        builder = builder.limit(limit);
432    }
433    if let Some(since) = args.since {
434        builder = builder.since(since);
435    }
436    if let Some(until) = args.until {
437        builder = builder.until(until);
438    }
439    builder = builder.reverse(args.reverse);
440    #[cfg(feature = "temporal_track")]
441    let temporal_summary = if let Some(ref phrase) = args.phrase {
442        let (filter, summary) = build_temporal_filter(
443            phrase,
444            args.tz.as_deref(),
445            args.anchor.as_deref(),
446            args.window,
447        )?;
448        builder = builder.temporal(filter);
449        Some(summary)
450    } else {
451        None
452    };
453    let query = builder.build();
454    let mut entries = mem.timeline(query)?;
455
456    // Apply Replay filtering if requested
457    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
458        entries.retain(|entry| {
459            // Check as_of_frame filter
460            if let Some(cutoff_frame) = args.as_of_frame {
461                if entry.frame_id > cutoff_frame {
462                    return false;
463                }
464            }
465
466            // Check as_of_ts filter
467            if let Some(cutoff_ts) = args.as_of_ts {
468                if entry.timestamp > cutoff_ts {
469                    return false;
470                }
471            }
472
473            true
474        });
475    }
476
477    if args.json {
478        #[cfg(feature = "temporal_track")]
479        if let Some(summary) = temporal_summary.as_ref() {
480            println!(
481                "{}",
482                serde_json::to_string_pretty(&TimelineOutput {
483                    temporal: Some(summary_to_output(summary)),
484                    entries: &entries,
485                })?
486            );
487        } else {
488            println!("{}", serde_json::to_string_pretty(&entries)?);
489        }
490        #[cfg(not(feature = "temporal_track"))]
491        println!("{}", serde_json::to_string_pretty(&entries)?);
492    } else if entries.is_empty() {
493        println!("Timeline is empty");
494    } else {
495        #[cfg(feature = "temporal_track")]
496        if let Some(summary) = temporal_summary.as_ref() {
497            print_temporal_summary(summary);
498        }
499        for entry in entries {
500            println!(
501                "#{} @ {} — {}",
502                entry.frame_id,
503                entry.timestamp,
504                entry.preview.replace('\n', " ")
505            );
506            if let Some(uri) = entry.uri.as_deref() {
507                println!("  URI: {uri}");
508            }
509            if !entry.child_frames.is_empty() {
510                let child_list = entry
511                    .child_frames
512                    .iter()
513                    .map(|id| id.to_string())
514                    .collect::<Vec<_>>()
515                    .join(", ");
516                println!("  Child frames: {child_list}");
517            }
518            #[cfg(feature = "temporal_track")]
519            if let Some(temporal) = entry.temporal.as_ref() {
520                print_entry_temporal_details(temporal);
521            }
522        }
523    }
524    Ok(())
525}
526
527#[cfg(feature = "temporal_track")]
528pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
529    let mut mem = open_read_only_mem(&args.file)?;
530
531    let (filter, summary) = build_temporal_filter(
532        &args.phrase,
533        args.tz.as_deref(),
534        args.anchor.as_deref(),
535        args.window,
536    )?;
537
538    let mut builder = TimelineQueryBuilder::default();
539    if let Some(limit) = args.limit {
540        builder = builder.limit(limit);
541    }
542    if let Some(since) = args.since {
543        builder = builder.since(since);
544    }
545    if let Some(until) = args.until {
546        builder = builder.until(until);
547    }
548    builder = builder.reverse(args.reverse).temporal(filter.clone());
549    let entries = mem.timeline(builder.build())?;
550
551    if args.json {
552        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
553        let output = WhenOutput {
554            summary: summary_to_output(&summary),
555            entries: entry_views,
556        };
557        println!("{}", serde_json::to_string_pretty(&output)?);
558        return Ok(());
559    }
560
561    print_temporal_summary(&summary);
562    if entries.is_empty() {
563        println!("No frames matched the resolved window");
564        return Ok(());
565    }
566
567    for entry in &entries {
568        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
569        println!(
570            "#{} @ {} ({iso}) — {}",
571            entry.frame_id,
572            entry.timestamp,
573            entry.preview.replace('\n', " ")
574        );
575        if let Some(uri) = entry.uri.as_deref() {
576            println!("  URI: {uri}");
577        }
578        if !entry.child_frames.is_empty() {
579            let child_list = entry
580                .child_frames
581                .iter()
582                .map(|id| id.to_string())
583                .collect::<Vec<_>>()
584                .join(", ");
585            println!("  Child frames: {child_list}");
586        }
587        if let Some(temporal) = entry.temporal.as_ref() {
588            print_entry_temporal_details(temporal);
589        }
590    }
591
592    Ok(())
593}
594
595#[cfg(feature = "temporal_track")]
596#[derive(Serialize)]
597struct TimelineOutput<'a> {
598    #[serde(skip_serializing_if = "Option::is_none")]
599    temporal: Option<TemporalSummaryOutput>,
600    entries: &'a [TimelineEntry],
601}
602
603#[cfg(feature = "temporal_track")]
604#[derive(Serialize)]
605struct WhenOutput {
606    summary: TemporalSummaryOutput,
607    entries: Vec<WhenEntry>,
608}
609
610#[cfg(feature = "temporal_track")]
611#[derive(Serialize)]
612struct WhenEntry {
613    frame_id: FrameId,
614    timestamp: i64,
615    #[serde(skip_serializing_if = "Option::is_none")]
616    timestamp_iso: Option<String>,
617    preview: String,
618    #[serde(skip_serializing_if = "Option::is_none")]
619    uri: Option<String>,
620    #[serde(skip_serializing_if = "Vec::is_empty")]
621    child_frames: Vec<FrameId>,
622    #[serde(skip_serializing_if = "Option::is_none")]
623    temporal: Option<SearchHitTemporal>,
624}
625
626#[cfg(feature = "temporal_track")]
627#[derive(Serialize)]
628struct TemporalSummaryOutput {
629    phrase: String,
630    timezone: String,
631    anchor_utc: i64,
632    anchor_iso: String,
633    confidence: u16,
634    #[serde(skip_serializing_if = "Vec::is_empty")]
635    flags: Vec<&'static str>,
636    resolution_kind: &'static str,
637    window_start_utc: Option<i64>,
638    window_start_iso: Option<String>,
639    window_end_utc: Option<i64>,
640    window_end_iso: Option<String>,
641    #[serde(skip_serializing_if = "Option::is_none")]
642    window_minutes: Option<u64>,
643}
644
645#[cfg(feature = "temporal_track")]
646struct TemporalSummary {
647    phrase: String,
648    tz: String,
649    anchor: OffsetDateTime,
650    start_utc: Option<i64>,
651    end_utc: Option<i64>,
652    resolution: TemporalResolution,
653    window_minutes: Option<u64>,
654}
655
656#[cfg(feature = "temporal_track")]
657fn build_temporal_filter(
658    phrase: &str,
659    tz_override: Option<&str>,
660    anchor_override: Option<&str>,
661    window_minutes: Option<u64>,
662) -> Result<(TemporalFilter, TemporalSummary)> {
663    let tz = tz_override
664        .unwrap_or(DEFAULT_TEMPORAL_TZ)
665        .trim()
666        .to_string();
667    if tz.is_empty() {
668        bail!("E-TEMP-003 timezone must not be empty");
669    }
670
671    let anchor = if let Some(raw) = anchor_override {
672        OffsetDateTime::parse(raw, &Rfc3339)
673            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
674    } else {
675        OffsetDateTime::now_utc()
676    };
677
678    let context = TemporalContext::new(anchor, tz.clone());
679    let normalizer = TemporalNormalizer::new(context);
680    let resolution = normalizer
681        .resolve(phrase)
682        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
683
684    let (mut start, mut end) = resolution_bounds(&resolution)?;
685    if let Some(minutes) = window_minutes {
686        if minutes > 0 {
687            let delta = TimeDuration::minutes(minutes as i64);
688            if let (Some(s), Some(e)) = (start, end) {
689                if s == e {
690                    start = Some(s.saturating_sub(delta.whole_seconds()));
691                    end = Some(e.saturating_add(delta.whole_seconds()));
692                } else {
693                    start = Some(s.saturating_sub(delta.whole_seconds()));
694                    end = Some(e.saturating_add(delta.whole_seconds()));
695                }
696            }
697        }
698    }
699
700    let filter = TemporalFilter {
701        start_utc: start,
702        end_utc: end,
703        phrase: None,
704        tz: None,
705    };
706
707    let summary = TemporalSummary {
708        phrase: phrase.to_owned(),
709        tz,
710        anchor,
711        start_utc: start,
712        end_utc: end,
713        resolution,
714        window_minutes,
715    };
716
717    Ok((filter, summary))
718}
719
720#[cfg(feature = "temporal_track")]
721fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
722    TemporalSummaryOutput {
723        phrase: summary.phrase.clone(),
724        timezone: summary.tz.clone(),
725        anchor_utc: summary.anchor.unix_timestamp(),
726        anchor_iso: summary
727            .anchor
728            .format(&Rfc3339)
729            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
730        confidence: summary.resolution.confidence,
731        flags: summary
732            .resolution
733            .flags
734            .iter()
735            .map(|flag| flag.as_str())
736            .collect(),
737        resolution_kind: resolution_kind(&summary.resolution),
738        window_start_utc: summary.start_utc,
739        window_start_iso: summary.start_utc.and_then(format_timestamp),
740        window_end_utc: summary.end_utc,
741        window_end_iso: summary.end_utc.and_then(format_timestamp),
742        window_minutes: summary.window_minutes,
743    }
744}
745
746#[cfg(feature = "temporal_track")]
747fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
748    WhenEntry {
749        frame_id: entry.frame_id,
750        timestamp: entry.timestamp,
751        timestamp_iso: format_timestamp(entry.timestamp),
752        preview: entry.preview.clone(),
753        uri: entry.uri.clone(),
754        child_frames: entry.child_frames.clone(),
755        temporal: entry.temporal.clone(),
756    }
757}
758
759#[cfg(feature = "temporal_track")]
760fn print_temporal_summary(summary: &TemporalSummary) {
761    println!("Phrase: \"{}\"", summary.phrase);
762    println!("Timezone: {}", summary.tz);
763    println!(
764        "Anchor: {}",
765        summary
766            .anchor
767            .format(&Rfc3339)
768            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
769    );
770    let start_iso = summary.start_utc.and_then(format_timestamp);
771    let end_iso = summary.end_utc.and_then(format_timestamp);
772    match (start_iso, end_iso) {
773        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
774        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
775        (Some(start), None) => println!("Window start: {start}"),
776        (None, Some(end)) => println!("Window end: {end}"),
777        _ => println!("Window: (not resolved)"),
778    }
779    println!("Confidence: {}", summary.resolution.confidence);
780    let flags: Vec<&'static str> = summary
781        .resolution
782        .flags
783        .iter()
784        .map(|flag| flag.as_str())
785        .collect();
786    if !flags.is_empty() {
787        println!("Flags: {}", flags.join(", "));
788    }
789    if let Some(window) = summary.window_minutes {
790        if window > 0 {
791            println!("Window padding: {window} minute(s)");
792        }
793    }
794    println!();
795}
796
797#[cfg(feature = "temporal_track")]
798fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
799    if let Some(anchor) = temporal.anchor.as_ref() {
800        let iso = anchor
801            .iso_8601
802            .clone()
803            .or_else(|| format_timestamp(anchor.ts_utc));
804        println!(
805            "  Anchor: {} (source: {:?})",
806            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
807            anchor.source
808        );
809    }
810    if !temporal.mentions.is_empty() {
811        println!("  Mentions:");
812        for mention in &temporal.mentions {
813            let iso = mention
814                .iso_8601
815                .clone()
816                .or_else(|| format_timestamp(mention.ts_utc))
817                .unwrap_or_else(|| mention.ts_utc.to_string());
818            let mut details = format!(
819                "    - {} ({:?}, confidence {})",
820                iso, mention.kind, mention.confidence
821            );
822            if let Some(text) = mention.text.as_deref() {
823                details.push_str(&format!(" — \"{}\"", text));
824            }
825            println!("{details}");
826        }
827    }
828}
829
830#[cfg(feature = "temporal_track")]
831fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
832    match &resolution.value {
833        TemporalResolutionValue::Date(date) => {
834            let ts = date_to_timestamp(*date);
835            Ok((Some(ts), Some(ts)))
836        }
837        TemporalResolutionValue::DateTime(dt) => {
838            let ts = dt.unix_timestamp();
839            Ok((Some(ts), Some(ts)))
840        }
841        TemporalResolutionValue::DateRange { start, end } => Ok((
842            Some(date_to_timestamp(*start)),
843            Some(date_to_timestamp(*end)),
844        )),
845        TemporalResolutionValue::DateTimeRange { start, end } => {
846            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
847        }
848        TemporalResolutionValue::Month { year, month } => {
849            let start_date = Date::from_calendar_date(*year, *month, 1)
850                .map_err(|_| anyhow!("invalid month resolution"))?;
851            let end_date = last_day_in_month(*year, *month)
852                .map_err(|_| anyhow!("invalid month resolution"))?;
853            Ok((
854                Some(date_to_timestamp(start_date)),
855                Some(date_to_timestamp(end_date)),
856            ))
857        }
858    }
859}
860
861#[cfg(feature = "temporal_track")]
862fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
863    match resolution.value {
864        TemporalResolutionValue::Date(_) => "date",
865        TemporalResolutionValue::DateTime(_) => "datetime",
866        TemporalResolutionValue::DateRange { .. } => "date_range",
867        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
868        TemporalResolutionValue::Month { .. } => "month",
869    }
870}
871
872#[cfg(feature = "temporal_track")]
873fn date_to_timestamp(date: Date) -> i64 {
874    PrimitiveDateTime::new(date, Time::MIDNIGHT)
875        .assume_offset(UtcOffset::UTC)
876        .unix_timestamp()
877}
878
879#[cfg(feature = "temporal_track")]
880fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
881    let mut date = Date::from_calendar_date(year, month, 1)
882        .map_err(|_| anyhow!("invalid month resolution"))?;
883    while let Some(next) = date.next_day() {
884        if next.month() == month {
885            date = next;
886        } else {
887            break;
888        }
889    }
890    Ok(date)
891}
892
893#[cfg(feature = "temporal_track")]
894
895fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
896    if fragments.is_empty() {
897        return;
898    }
899
900    response.context_fragments = fragments
901        .into_iter()
902        .map(|fragment| AskContextFragment {
903            rank: fragment.rank,
904            frame_id: fragment.frame_id,
905            uri: fragment.uri,
906            title: fragment.title,
907            score: fragment.score,
908            matches: fragment.matches,
909            range: Some(fragment.range),
910            chunk_range: fragment.chunk_range,
911            text: fragment.text,
912            kind: Some(match fragment.kind {
913                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
914                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
915            }),
916            #[cfg(feature = "temporal_track")]
917            temporal: None,
918        })
919        .collect();
920}
921
922pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
923    // Check if plan allows query operations (blocks expired subscriptions)
924    crate::utils::require_active_plan(config, "ask")?;
925
926    // Track query usage against plan quota
927    crate::api::track_query_usage(config, 1)?;
928
929    if args.uri.is_some() && args.scope.is_some() {
930        warn!("--scope ignored because --uri is provided");
931    }
932
933    let mut question_tokens = Vec::new();
934    let mut file_path: Option<PathBuf> = None;
935    for token in &args.targets {
936        if file_path.is_none() && looks_like_memory(token) {
937            file_path = Some(PathBuf::from(token));
938        } else {
939            question_tokens.push(token.clone());
940        }
941    }
942
943    let positional_question = if question_tokens.is_empty() {
944        None
945    } else {
946        Some(question_tokens.join(" "))
947    };
948
949    let question = args
950        .question
951        .or(positional_question)
952        .map(|value| value.trim().to_string())
953        .filter(|value| !value.is_empty());
954
955    let question = question
956        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
957
958    // Expand query for better retrieval using LLM (expands abbreviations, adds synonyms)
959    // This happens when --use-model is set or we have an API key
960    let (original_question, search_query) = {
961        // For query expansion, we use the fastest available model
962        // Priority: OpenAI > Groq > Anthropic > XAI > Mistral
963        let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
964            if let Ok(key) = std::env::var("OPENAI_API_KEY") {
965                // OpenAI available - use gpt-4o-mini (fastest, cheapest)
966                (Some("gpt-4o-mini"), Some(key))
967            } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
968                // Groq available - use llama-3.1-8b-instant (very fast)
969                (Some("llama-3.1-8b-instant"), Some(key))
970            } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
971                // Anthropic available - use haiku
972                (Some("claude-haiku-4-5"), Some(key))
973            } else if let Ok(key) = std::env::var("XAI_API_KEY") {
974                // XAI available - use grok-4-fast
975                (Some("grok-4-fast"), Some(key))
976            } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
977                // Mistral available - use mistral-small
978                (Some("mistral-small-latest"), Some(key))
979            } else {
980                // No fast model available for expansion
981                (None, None)
982            };
983
984        // DISABLED: Query expansion for ask command
985        // The ask command has sophisticated retrieval with fallbacks, aggregation detection,
986        // temporal boosting, and diverse retrieval strategies. Query expansion often strips
987        // out important semantic context (temporal markers, aggregation signals, analytical
988        // keywords) that these strategies depend on. The original question is preserved
989        // to ensure all downstream detection and ranking works correctly.
990        //
991        // Query expansion may be appropriate for simple keyword searches, but for complex
992        // natural language questions it causes more problems than it solves.
993        let _ = (model_for_expansion, api_key_for_expansion); // suppress unused warnings
994        (question.clone(), question.clone())
995    };
996
997    let memory_path = match file_path {
998        Some(path) => path,
999        None => autodetect_memory_file()?,
1000    };
1001
1002    let start = parse_date_boundary(args.start.as_ref(), false)?;
1003    let end = parse_date_boundary(args.end.as_ref(), true)?;
1004    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1005        if end_ts < start_ts {
1006            anyhow::bail!("--end must not be earlier than --start");
1007        }
1008    }
1009
1010    // Open MV2 file first to get vector dimension for auto-detection
1011    let mut mem = Memvid::open(&memory_path)?;
1012
1013    // Load active replay session if one exists
1014    #[cfg(feature = "replay")]
1015    let _ = mem.load_active_session();
1016
1017    // Get the vector dimension from the MV2 file for auto-detection
1018    let mv2_dimension = mem.effective_vec_index_dimension()?;
1019
1020    // Check if memory has any vectors - if not, force lexical mode
1021    let stats = mem.stats()?;
1022    let has_vectors = stats.vector_count > 0;
1023    let effective_mode = if !has_vectors
1024        && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1025    {
1026        tracing::info!("Memory has no embeddings (vector_count=0); falling back to lexical mode");
1027        AskModeArg::Lex
1028    } else {
1029        args.mode.clone()
1030    };
1031
1032    let ask_mode: AskMode = effective_mode.clone().into();
1033    let inferred_model_override = match effective_mode {
1034        AskModeArg::Lex => None,
1035        AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1036            memvid_core::EmbeddingIdentitySummary::Single(identity) => {
1037                identity.model.map(String::from)
1038            }
1039            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1040                let models: Vec<_> = identities
1041                    .iter()
1042                    .filter_map(|entry| entry.identity.model.as_deref())
1043                    .collect();
1044                anyhow::bail!(
1045                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1046                    Detected models: {:?}\n\n\
1047                    Suggested fix: split into separate memories per embedding model.",
1048                    models
1049                );
1050            }
1051            memvid_core::EmbeddingIdentitySummary::Unknown => None,
1052        },
1053    };
1054    let emb_model_override = args
1055        .query_embedding_model
1056        .as_deref()
1057        .or(inferred_model_override.as_deref());
1058    let runtime = match effective_mode {
1059        AskModeArg::Lex => None,
1060        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1061            config,
1062            emb_model_override,
1063            mv2_dimension,
1064        )?),
1065        AskModeArg::Hybrid => {
1066            // For hybrid, use auto-detection from MV2 dimension
1067            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1068                || {
1069                    // Force a load; if it fails we error below.
1070                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1071                        .ok()
1072                        .map(|rt| {
1073                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1074                            rt
1075                        })
1076                },
1077            )
1078        }
1079    };
1080    if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1081        anyhow::bail!(
1082            "semantic embeddings unavailable; install/cached model required for {:?} mode",
1083            effective_mode
1084        );
1085    }
1086
1087    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1088
1089    // Build adaptive config (enabled by default, use --no-adaptive to disable)
1090    let adaptive = if !args.no_adaptive {
1091        Some(AdaptiveConfig {
1092            enabled: true,
1093            max_results: args.max_k,
1094            min_results: 1,
1095            normalize_scores: true,
1096            strategy: match args.adaptive_strategy {
1097                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1098                    min_ratio: args.min_relevancy,
1099                },
1100                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1101                    min_score: args.min_relevancy,
1102                },
1103                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1104                    max_drop_ratio: 0.3,
1105                },
1106                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1107                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1108                    relative_threshold: args.min_relevancy,
1109                    max_drop_ratio: 0.3,
1110                    absolute_min: 0.3,
1111                },
1112            },
1113        })
1114    } else {
1115        None
1116    };
1117
1118    let request = AskRequest {
1119        question: search_query, // Use expanded query for retrieval
1120        top_k: args.top_k,
1121        snippet_chars: args.snippet_chars,
1122        uri: args.uri.clone(),
1123        scope: args.scope.clone(),
1124        cursor: args.cursor.clone(),
1125        start,
1126        end,
1127        #[cfg(feature = "temporal_track")]
1128        temporal: None,
1129        context_only: args.context_only,
1130        mode: ask_mode,
1131        as_of_frame: args.as_of_frame,
1132        as_of_ts: args.as_of_ts,
1133        adaptive,
1134    };
1135    let mut response = mem.ask(request, embedder).map_err(|err| match err {
1136        MemvidError::VecDimensionMismatch { expected, actual } => {
1137            anyhow!(vec_dimension_mismatch_help(expected, actual))
1138        }
1139        other => anyhow!(other),
1140    })?;
1141
1142    // Restore original question for display and LLM synthesis
1143    // (search_query was used for retrieval but original_question is shown to user)
1144    response.question = original_question;
1145
1146    // Apply cross-encoder reranking for better precision on preference/personalization queries
1147    // This is especially important for questions like "What should I..." where semantic
1148    // similarity doesn't capture personal relevance well.
1149    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
1150    // Skip for temporal/recency queries - cross-encoder doesn't understand temporal context
1151    // and would override the recency boost from lexical search
1152    let is_temporal_query = {
1153        let q_lower = response.question.to_lowercase();
1154        q_lower.contains("current")
1155            || q_lower.contains("latest")
1156            || q_lower.contains("recent")
1157            || q_lower.contains("now")
1158            || q_lower.contains("today")
1159            || q_lower.contains("updated")
1160            || q_lower.contains("new ")
1161            || q_lower.contains("newest")
1162    };
1163    if !args.no_rerank
1164        && !response.retrieval.hits.is_empty()
1165        && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1166        && !is_temporal_query
1167    {
1168        // Create a temporary SearchResponse for reranking
1169        let mut search_response = SearchResponse {
1170            query: response.question.clone(),
1171            hits: response.retrieval.hits.clone(),
1172            total_hits: response.retrieval.hits.len(),
1173            params: memvid_core::SearchParams {
1174                top_k: args.top_k,
1175                snippet_chars: args.snippet_chars,
1176                cursor: None,
1177            },
1178            elapsed_ms: 0,
1179            engine: memvid_core::SearchEngineKind::Hybrid,
1180            next_cursor: None,
1181            context: String::new(),
1182        };
1183
1184        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1185            warn!("Cross-encoder reranking failed: {e}");
1186        } else {
1187            // Update the response hits with reranked order
1188            response.retrieval.hits = search_response.hits;
1189            // Rebuild context from reranked hits
1190            response.retrieval.context = response
1191                .retrieval
1192                .hits
1193                .iter()
1194                .take(10) // Use top-10 for context
1195                .map(|hit| hit.text.as_str())
1196                .collect::<Vec<_>>()
1197                .join("\n\n---\n\n");
1198        }
1199    }
1200
1201    // Inject memory cards into context if --memories flag is set
1202    if args.memories {
1203        let memory_context = build_memory_context(&mem);
1204        if !memory_context.is_empty() {
1205            // Prepend memory context to retrieval context
1206            response.retrieval.context = format!(
1207                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1208                memory_context, response.retrieval.context
1209            );
1210        }
1211    }
1212
1213    // Inject entity context from Logic-Mesh if entities were found in search hits
1214    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1215    if !entity_context.is_empty() {
1216        // Prepend entity context to retrieval context
1217        response.retrieval.context = format!(
1218            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1219            entity_context, response.retrieval.context
1220        );
1221    }
1222
1223    // Apply PII masking if requested
1224    if args.mask_pii {
1225        use memvid_core::pii::mask_pii;
1226
1227        // Mask the aggregated context
1228        response.retrieval.context = mask_pii(&response.retrieval.context);
1229
1230        // Mask text in each hit
1231        for hit in &mut response.retrieval.hits {
1232            hit.text = mask_pii(&hit.text);
1233            if let Some(chunk_text) = &hit.chunk_text {
1234                hit.chunk_text = Some(mask_pii(chunk_text));
1235            }
1236        }
1237    }
1238
1239    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1240
1241    let mut model_result: Option<ModelInference> = None;
1242    if args.no_llm {
1243        // --no-llm: return verbatim evidence without LLM synthesis
1244        if args.use_model.is_some() {
1245            warn!("--use-model ignored because --no-llm disables LLM synthesis");
1246        }
1247        if args.json {
1248            emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1249        } else {
1250            emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1251        }
1252
1253        // Save active replay session if one exists
1254        #[cfg(feature = "replay")]
1255        let _ = mem.save_active_session();
1256
1257        return Ok(());
1258    } else if response.context_only {
1259        if args.use_model.is_some() {
1260            warn!("--use-model ignored because --context-only disables synthesis");
1261        }
1262    } else if let Some(model_name) = args.use_model.as_deref() {
1263        match run_model_inference(
1264            model_name,
1265            &response.question,
1266            &response.retrieval.context,
1267            &response.retrieval.hits,
1268            llm_context_override,
1269            None,
1270            args.system_prompt.as_deref(),
1271        ) {
1272            Ok(inference) => {
1273                response.answer = Some(inference.answer.answer.clone());
1274                response.retrieval.context = inference.context_body.clone();
1275                apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1276                model_result = Some(inference);
1277            }
1278            Err(err) => {
1279                warn!(
1280                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1281                    model_name
1282                );
1283            }
1284        }
1285    }
1286
1287    // Record the ask action if a replay session is active
1288    #[cfg(feature = "replay")]
1289    if let Some(ref inference) = model_result {
1290        if let Some(model_name) = args.use_model.as_deref() {
1291            // Extract frame IDs from retrieval hits for replay audit
1292            let retrieved_frames: Vec<u64> = response
1293                .retrieval
1294                .hits
1295                .iter()
1296                .map(|hit| hit.frame_id)
1297                .collect();
1298
1299            mem.record_ask_action(
1300                &response.question,
1301                model_name, // provider
1302                model_name, // model
1303                inference.answer.answer.as_bytes(),
1304                0, // duration_ms not tracked at this level
1305                retrieved_frames,
1306            );
1307        }
1308    }
1309
1310    if args.json {
1311        if let Some(model_name) = args.use_model.as_deref() {
1312            emit_model_json(
1313                &response,
1314                model_name,
1315                model_result.as_ref(),
1316                args.sources,
1317                &mut mem,
1318            )?;
1319        } else {
1320            emit_ask_json(
1321                &response,
1322                effective_mode.clone(),
1323                model_result.as_ref(),
1324                args.sources,
1325                &mut mem,
1326            )?;
1327        }
1328    } else {
1329        emit_ask_pretty(
1330            &response,
1331            effective_mode.clone(),
1332            model_result.as_ref(),
1333            args.sources,
1334            &mut mem,
1335        );
1336    }
1337
1338    // Save active replay session if one exists
1339    #[cfg(feature = "replay")]
1340    let _ = mem.save_active_session();
1341
1342    Ok(())
1343}
1344
1345/// Handle graph-aware find with --graph or --hybrid flags
1346fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1347    use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1348    use memvid_core::types::QueryPlan;
1349
1350    let planner = QueryPlanner::new();
1351
1352    // Create query plan based on mode
1353    let plan = if args.graph {
1354        // Pure graph mode - let planner detect patterns
1355        let plan = planner.plan(&args.query, args.top_k);
1356        // If it's a hybrid plan from auto-detection, convert to graph-only
1357        match plan {
1358            QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1359                QueryPlan::graph_only(graph_filter, args.top_k)
1360            }
1361            _ => plan,
1362        }
1363    } else {
1364        // Hybrid mode - use the auto-detected plan
1365        planner.plan(&args.query, args.top_k)
1366    };
1367
1368    // Execute the search
1369    let hits = hybrid_search(mem, &plan)?;
1370
1371    if args.json {
1372        // JSON output
1373        let output = serde_json::json!({
1374            "query": args.query,
1375            "mode": if args.graph { "graph" } else { "hybrid" },
1376            "plan": format!("{:?}", plan),
1377            "hits": hits.iter().map(|h| {
1378                serde_json::json!({
1379                    "frame_id": h.frame_id,
1380                    "score": h.score,
1381                    "graph_score": h.graph_score,
1382                    "vector_score": h.vector_score,
1383                    "matched_entity": h.matched_entity,
1384                    "preview": h.preview,
1385                })
1386            }).collect::<Vec<_>>(),
1387        });
1388        println!("{}", serde_json::to_string_pretty(&output)?);
1389    } else {
1390        // Human-readable output
1391        let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1392        println!("{} search for: \"{}\"", mode_str, args.query);
1393        println!("Plan: {:?}", plan);
1394        println!();
1395
1396        if hits.is_empty() {
1397            println!("No results found.");
1398        } else {
1399            println!("Results ({} hits):", hits.len());
1400            for (i, hit) in hits.iter().enumerate() {
1401                println!();
1402                println!(
1403                    "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1404                    i + 1,
1405                    hit.frame_id,
1406                    hit.score,
1407                    hit.graph_score,
1408                    hit.vector_score
1409                );
1410                if let Some(entity) = &hit.matched_entity {
1411                    println!("   Matched entity: {}", entity);
1412                }
1413                if let Some(preview) = &hit.preview {
1414                    let truncated = if preview.len() > 200 {
1415                        format!("{}...", &preview[..200])
1416                    } else {
1417                        preview.clone()
1418                    };
1419                    println!("   {}", truncated.replace('\n', " "));
1420                }
1421            }
1422        }
1423    }
1424
1425    Ok(())
1426}
1427
1428pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1429    // Check if plan allows query operations (blocks expired subscriptions)
1430    crate::utils::require_active_plan(config, "find")?;
1431
1432    // Track query usage against plan quota
1433    crate::api::track_query_usage(config, 1)?;
1434
1435    let mut mem = open_read_only_mem(&args.file)?;
1436
1437    // Load active replay session if one exists
1438    #[cfg(feature = "replay")]
1439    let _ = mem.load_active_session();
1440
1441    // Handle graph-aware and hybrid search modes
1442    if args.graph || args.hybrid {
1443        return handle_graph_find(&mut mem, &args);
1444    }
1445
1446    if args.uri.is_some() && args.scope.is_some() {
1447        warn!("--scope ignored because --uri is provided");
1448    }
1449
1450    // Get vector dimension from MV2 for auto-detection
1451    let mv2_dimension = mem.effective_vec_index_dimension()?;
1452    let identity_summary = match args.mode {
1453        SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1454        #[cfg(feature = "clip")]
1455        SearchMode::Clip => None,
1456        SearchMode::Lex => None,
1457    };
1458
1459    let mut semantic_allowed = true;
1460    let inferred_model_override = match identity_summary.as_ref() {
1461        Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1462            identity.model.as_deref().map(|value| value.to_string())
1463        }
1464        Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1465            let models: Vec<_> = identities
1466                .iter()
1467                .filter_map(|entry| entry.identity.model.as_deref())
1468                .collect();
1469            if args.mode == SearchMode::Sem {
1470                anyhow::bail!(
1471                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1472                    Detected models: {:?}\n\n\
1473                    Suggested fix: split into separate memories per embedding model.",
1474                    models
1475                );
1476            }
1477            warn!(
1478                "semantic search disabled: mixed embedding models detected: {:?}",
1479                models
1480            );
1481            semantic_allowed = false;
1482            None
1483        }
1484        _ => None,
1485    };
1486
1487    let emb_model_override = args
1488        .query_embedding_model
1489        .as_deref()
1490        .or(inferred_model_override.as_deref());
1491
1492    let (mode_label, runtime_option) = match args.mode {
1493        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1494        SearchMode::Sem => {
1495            let runtime =
1496                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1497            ("Semantic (vector search)".to_string(), Some(runtime))
1498        }
1499        SearchMode::Auto => {
1500            if !semantic_allowed {
1501                ("Lexical (semantic unsafe)".to_string(), None)
1502            } else if let Some(runtime) =
1503                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1504            {
1505                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1506            } else {
1507                ("Lexical (semantic unavailable)".to_string(), None)
1508            }
1509        }
1510        #[cfg(feature = "clip")]
1511        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1512    };
1513
1514    let mode_key = match args.mode {
1515        SearchMode::Sem => "semantic",
1516        SearchMode::Lex => "text",
1517        SearchMode::Auto => {
1518            if runtime_option.is_some() {
1519                "hybrid"
1520            } else {
1521                "text"
1522            }
1523        }
1524        #[cfg(feature = "clip")]
1525        SearchMode::Clip => "clip",
1526    };
1527
1528    // For CLIP mode, use CLIP visual search
1529    #[cfg(feature = "clip")]
1530    if args.mode == SearchMode::Clip {
1531        use memvid_core::clip::{ClipConfig, ClipModel};
1532
1533        // Initialize CLIP model
1534        let config = ClipConfig::default();
1535        let clip = ClipModel::new(config).map_err(|e| {
1536            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1537        })?;
1538
1539        // Encode query text
1540        let query_embedding = clip
1541            .encode_text(&args.query)
1542            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1543
1544        // Search CLIP index
1545        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1546
1547        // Debug distances before filtering
1548        for hit in &hits {
1549            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1550                tracing::debug!(
1551                    frame_id = hit.frame_id,
1552                    title = %frame.title.unwrap_or_default(),
1553                    page = hit.page,
1554                    distance = hit.distance,
1555                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1556                    "CLIP raw hit"
1557                );
1558            } else {
1559                tracing::debug!(
1560                    frame_id = hit.frame_id,
1561                    page = hit.page,
1562                    distance = hit.distance,
1563                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1564                    "CLIP raw hit (missing frame)"
1565                );
1566            }
1567        }
1568
1569        // CLIP distance threshold for filtering poor matches
1570        // CLIP uses L2 distance on normalized embeddings:
1571        //   - distance² = 2(1 - cosine_similarity)
1572        //   - distance = 0 → identical (cosine_sim = 1)
1573        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1574        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1575        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1576        //   - distance = 2.0 → opposite (cosine_sim = -1)
1577        //
1578        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1579        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1580        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1581        const CLIP_MAX_DISTANCE: f32 = 1.26;
1582
1583        // Convert CLIP hits to SearchResponse format, filtering by threshold
1584        let search_hits: Vec<SearchHit> = hits
1585            .into_iter()
1586            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1587            .enumerate()
1588            .filter_map(|(rank, hit)| {
1589                // Convert L2 distance to cosine similarity for display
1590                // cos_sim = 1 - (distance² / 2)
1591                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1592
1593                // Get frame preview for snippet
1594                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1595                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1596                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1597                let title = match (base_title, hit.page) {
1598                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1599                    (Some(t), None) => Some(t),
1600                    (None, Some(p)) => Some(format!("Page {p}")),
1601                    _ => None,
1602                };
1603                Some(SearchHit {
1604                    rank: rank + 1,
1605                    frame_id: hit.frame_id,
1606                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1607                    title,
1608                    text: preview.clone(),
1609                    chunk_text: Some(preview),
1610                    range: (0, 0),
1611                    chunk_range: None,
1612                    matches: 0,
1613                    score: Some(cosine_similarity),
1614                    metadata: None,
1615                })
1616            })
1617            .collect();
1618
1619        let response = SearchResponse {
1620            query: args.query.clone(),
1621            hits: search_hits.clone(),
1622            total_hits: search_hits.len(),
1623            params: memvid_core::SearchParams {
1624                top_k: args.top_k,
1625                snippet_chars: args.snippet_chars,
1626                cursor: args.cursor.clone(),
1627            },
1628            elapsed_ms: 0,
1629            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1630            next_cursor: None,
1631            context: String::new(),
1632        };
1633
1634        if args.json_legacy {
1635            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1636            emit_legacy_search_json(&response)?;
1637        } else if args.json {
1638            emit_search_json(&response, mode_key)?;
1639        } else {
1640            println!(
1641                "mode: {}   k={}   time: {} ms",
1642                mode_label, response.params.top_k, response.elapsed_ms
1643            );
1644            println!("engine: clip (MobileCLIP-S2)");
1645            println!(
1646                "hits: {} (showing {})",
1647                response.total_hits,
1648                response.hits.len()
1649            );
1650            emit_search_table(&response);
1651        }
1652        return Ok(());
1653    }
1654
1655    // For semantic mode, use pure vector search.
1656    let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1657        let runtime = runtime_option
1658            .as_ref()
1659            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1660
1661        // Embed the query
1662        let query_embedding = runtime.embed_query(&args.query)?;
1663
1664        // Use pure vector search (adaptive by default, use --no-adaptive to disable)
1665        let scope = args.scope.as_deref().or(args.uri.as_deref());
1666
1667        if !args.no_adaptive {
1668            // Build adaptive config from CLI args
1669            let strategy = match args.adaptive_strategy {
1670                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1671                    min_ratio: args.min_relevancy,
1672                },
1673                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1674                    min_score: args.min_relevancy,
1675                },
1676                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1677                    max_drop_ratio: 0.35, // 35% drop triggers cutoff
1678                },
1679                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1680                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1681                    relative_threshold: args.min_relevancy,
1682                    max_drop_ratio: 0.35,
1683                    absolute_min: 0.3,
1684                },
1685            };
1686
1687            let config = AdaptiveConfig {
1688                enabled: true,
1689                max_results: args.max_k,
1690                min_results: 1,
1691                strategy,
1692                normalize_scores: true,
1693            };
1694
1695            match mem.search_adaptive(
1696                &args.query,
1697                &query_embedding,
1698                config,
1699                args.snippet_chars,
1700                scope,
1701            ) {
1702                Ok(result) => {
1703                    let mut resp = SearchResponse {
1704                        query: args.query.clone(),
1705                        hits: result.results,
1706                        total_hits: result.stats.returned,
1707                        params: memvid_core::SearchParams {
1708                            top_k: result.stats.returned,
1709                            snippet_chars: args.snippet_chars,
1710                            cursor: args.cursor.clone(),
1711                        },
1712                        elapsed_ms: 0,
1713                        engine: SearchEngineKind::Hybrid,
1714                        next_cursor: None,
1715                        context: String::new(),
1716                    };
1717                    apply_preference_rerank(&mut resp);
1718                    (
1719                        resp,
1720                        "semantic (adaptive vector search)".to_string(),
1721                        Some(result.stats),
1722                    )
1723                }
1724                Err(e) => {
1725                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1726                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1727                    }
1728
1729                    warn!("Adaptive search failed ({e}), falling back to fixed-k");
1730                    match mem.vec_search_with_embedding(
1731                        &args.query,
1732                        &query_embedding,
1733                        args.top_k,
1734                        args.snippet_chars,
1735                        scope,
1736                    ) {
1737                        Ok(mut resp) => {
1738                            apply_preference_rerank(&mut resp);
1739                            (resp, "semantic (vector search fallback)".to_string(), None)
1740                        }
1741                        Err(e2) => {
1742                            if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1743                                return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1744                            }
1745                            return Err(anyhow!(
1746                                "Both adaptive and fixed-k search failed: {e}, {e2}"
1747                            ));
1748                        }
1749                    }
1750                }
1751            }
1752        } else {
1753            // Standard fixed-k vector search
1754            match mem.vec_search_with_embedding(
1755                &args.query,
1756                &query_embedding,
1757                args.top_k,
1758                args.snippet_chars,
1759                scope,
1760            ) {
1761                Ok(mut resp) => {
1762                    // Apply preference boost to rerank results for preference-seeking queries
1763                    apply_preference_rerank(&mut resp);
1764                    (resp, "semantic (vector search)".to_string(), None)
1765                }
1766                Err(e) => {
1767                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1768                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1769                    }
1770
1771                    // Fall back to lexical search + rerank if vector search fails
1772                    warn!("Vector search failed ({e}), falling back to lexical + rerank");
1773                    let request = SearchRequest {
1774                        query: args.query.clone(),
1775                        top_k: args.top_k,
1776                        snippet_chars: args.snippet_chars,
1777                        uri: args.uri.clone(),
1778                        scope: args.scope.clone(),
1779                        cursor: args.cursor.clone(),
1780                        #[cfg(feature = "temporal_track")]
1781                        temporal: None,
1782                        as_of_frame: args.as_of_frame,
1783                        as_of_ts: args.as_of_ts,
1784                        no_sketch: args.no_sketch,
1785                    };
1786                    let mut resp = mem.search(request)?;
1787                    apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1788                    (resp, "semantic (fallback rerank)".to_string(), None)
1789                }
1790            }
1791        }
1792    } else {
1793        // For lexical and auto modes, use existing behavior
1794        let request = SearchRequest {
1795            query: args.query.clone(),
1796            top_k: args.top_k,
1797            snippet_chars: args.snippet_chars,
1798            uri: args.uri.clone(),
1799            scope: args.scope.clone(),
1800            cursor: args.cursor.clone(),
1801            #[cfg(feature = "temporal_track")]
1802            temporal: None,
1803            as_of_frame: args.as_of_frame,
1804            as_of_ts: args.as_of_ts,
1805            no_sketch: args.no_sketch,
1806        };
1807
1808        let mut resp = mem.search(request)?;
1809
1810        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1811            warn!("Search index unavailable; returning basic text results");
1812        }
1813
1814        let mut engine_label = match resp.engine {
1815            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1816            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1817            SearchEngineKind::Hybrid => "hybrid".to_string(),
1818        };
1819
1820        if runtime_option.is_some() {
1821            engine_label = format!("hybrid ({engine_label} + semantic)");
1822        }
1823
1824        if let Some(ref runtime) = runtime_option {
1825            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1826        }
1827
1828        (resp, engine_label, None)
1829    };
1830
1831    if args.json_legacy {
1832        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1833        emit_legacy_search_json(&response)?;
1834    } else if args.json {
1835        emit_search_json(&response, mode_key)?;
1836    } else {
1837        println!(
1838            "mode: {}   k={}   time: {} ms",
1839            mode_label, response.params.top_k, response.elapsed_ms
1840        );
1841        println!("engine: {}", engine_label);
1842
1843        // Show adaptive retrieval stats if enabled
1844        if let Some(ref stats) = adaptive_stats {
1845            println!(
1846                "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1847                stats.total_considered,
1848                stats.returned,
1849                stats.triggered_by,
1850                stats.top_score.unwrap_or(0.0),
1851                stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1852            );
1853        }
1854
1855        println!(
1856            "hits: {} (showing {})",
1857            response.total_hits,
1858            response.hits.len()
1859        );
1860        emit_search_table(&response);
1861    }
1862
1863    // Save active replay session if one exists
1864    #[cfg(feature = "replay")]
1865    let _ = mem.save_active_session();
1866
1867    Ok(())
1868}
1869
1870pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1871    // Track query usage against plan quota
1872    crate::api::track_query_usage(config, 1)?;
1873
1874    let mut mem = open_read_only_mem(&args.file)?;
1875    let vector = if let Some(path) = args.embedding.as_deref() {
1876        read_embedding(path)?
1877    } else if let Some(vector_string) = &args.vector {
1878        parse_vector(vector_string)?
1879    } else {
1880        anyhow::bail!("provide --vector or --embedding for search input");
1881    };
1882
1883    let hits = mem
1884        .search_vec(&vector, args.limit)
1885        .map_err(|err| match err {
1886            MemvidError::VecDimensionMismatch { expected, actual } => {
1887                anyhow!(vec_dimension_mismatch_help(expected, actual))
1888            }
1889            other => anyhow!(other),
1890        })?;
1891    let mut enriched = Vec::with_capacity(hits.len());
1892    for hit in hits {
1893        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1894        enriched.push((hit.frame_id, hit.distance, preview));
1895    }
1896
1897    if args.json {
1898        let json_hits: Vec<_> = enriched
1899            .iter()
1900            .map(|(frame_id, distance, preview)| {
1901                json!({
1902                    "frame_id": frame_id,
1903                    "distance": distance,
1904                    "preview": preview,
1905                })
1906            })
1907            .collect();
1908        let json_str = serde_json::to_string_pretty(&json_hits)?;
1909        println!("{}", json_str.to_colored_json_auto()?);
1910    } else if enriched.is_empty() {
1911        println!("No vector matches found");
1912    } else {
1913        for (frame_id, distance, preview) in enriched {
1914            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1915        }
1916    }
1917    Ok(())
1918}
1919
1920pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1921    use memvid_core::AuditOptions;
1922    use std::fs::File;
1923    use std::io::Write;
1924
1925    let mut mem = Memvid::open(&args.file)?;
1926
1927    // Parse date boundaries
1928    let start = parse_date_boundary(args.start.as_ref(), false)?;
1929    let end = parse_date_boundary(args.end.as_ref(), true)?;
1930    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1931        if end_ts < start_ts {
1932            anyhow::bail!("--end must not be earlier than --start");
1933        }
1934    }
1935
1936    // Set up embedding runtime if needed
1937    let ask_mode: AskMode = args.mode.into();
1938    let runtime = match args.mode {
1939        AskModeArg::Lex => None,
1940        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1941        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1942    };
1943    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1944
1945    // Build audit options
1946    let options = AuditOptions {
1947        top_k: Some(args.top_k),
1948        snippet_chars: Some(args.snippet_chars),
1949        mode: Some(ask_mode),
1950        scope: args.scope,
1951        start,
1952        end,
1953        include_snippets: true,
1954    };
1955
1956    // Run the audit
1957    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1958
1959    // If --use-model is provided, run model inference to synthesize the answer
1960    if let Some(model_name) = args.use_model.as_deref() {
1961        // Build context from sources for model inference
1962        let context = report
1963            .sources
1964            .iter()
1965            .filter_map(|s| s.snippet.clone())
1966            .collect::<Vec<_>>()
1967            .join("\n\n");
1968
1969        match run_model_inference(
1970            model_name,
1971            &report.question,
1972            &context,
1973            &[], // No hits needed for audit
1974            None,
1975            None,
1976            None, // No system prompt override for audit
1977        ) {
1978            Ok(inference) => {
1979                report.answer = Some(inference.answer.answer);
1980                report.notes.push(format!(
1981                    "Answer synthesized by model: {}",
1982                    inference.answer.model
1983                ));
1984            }
1985            Err(err) => {
1986                warn!(
1987                    "model inference unavailable for '{}': {err}. Using default answer.",
1988                    model_name
1989                );
1990            }
1991        }
1992    }
1993
1994    // Format the output
1995    let output = match args.format {
1996        AuditFormat::Text => report.to_text(),
1997        AuditFormat::Markdown => report.to_markdown(),
1998        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1999    };
2000
2001    // Write output
2002    if let Some(out_path) = args.out {
2003        let mut file = File::create(&out_path)?;
2004        file.write_all(output.as_bytes())?;
2005        println!("Audit report written to: {}", out_path.display());
2006    } else {
2007        println!("{}", output);
2008    }
2009
2010    Ok(())
2011}
2012
2013fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
2014    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
2015
2016    let mut additional_params = serde_json::Map::new();
2017    if let Some(cursor) = &response.params.cursor {
2018        additional_params.insert("cursor".into(), json!(cursor));
2019    }
2020
2021    let mut params = serde_json::Map::new();
2022    params.insert("top_k".into(), json!(response.params.top_k));
2023    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2024    params.insert("mode".into(), json!(mode));
2025    params.insert(
2026        "additional_params".into(),
2027        serde_json::Value::Object(additional_params),
2028    );
2029
2030    let mut metadata_json = serde_json::Map::new();
2031    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2032    metadata_json.insert("total_hits".into(), json!(response.total_hits));
2033    metadata_json.insert(
2034        "next_cursor".into(),
2035        match &response.next_cursor {
2036            Some(cursor) => json!(cursor),
2037            None => serde_json::Value::Null,
2038        },
2039    );
2040    metadata_json.insert("engine".into(), json!(response.engine));
2041    metadata_json.insert("params".into(), serde_json::Value::Object(params));
2042
2043    let body = json!({
2044        "version": "mv2.result.v2",
2045        "query": response.query,
2046        "metadata": metadata_json,
2047        "hits": hits,
2048        "context": response.context,
2049    });
2050    let json_str = serde_json::to_string_pretty(&body)?;
2051    println!("{}", json_str.to_colored_json_auto()?);
2052    Ok(())
2053}
2054
2055fn emit_ask_json(
2056    response: &AskResponse,
2057    requested_mode: AskModeArg,
2058    inference: Option<&ModelInference>,
2059    include_sources: bool,
2060    mem: &mut Memvid,
2061) -> Result<()> {
2062    let hits: Vec<_> = response
2063        .retrieval
2064        .hits
2065        .iter()
2066        .map(search_hit_to_json)
2067        .collect();
2068
2069    let citations: Vec<_> = response
2070        .citations
2071        .iter()
2072        .map(|citation| {
2073            let mut map = serde_json::Map::new();
2074            map.insert("index".into(), json!(citation.index));
2075            map.insert("frame_id".into(), json!(citation.frame_id));
2076            map.insert("uri".into(), json!(citation.uri));
2077            if let Some(range) = citation.chunk_range {
2078                map.insert("chunk_range".into(), json!([range.0, range.1]));
2079            }
2080            if let Some(score) = citation.score {
2081                map.insert("score".into(), json!(score));
2082            }
2083            serde_json::Value::Object(map)
2084        })
2085        .collect();
2086
2087    let mut body = json!({
2088        "version": "mv2.ask.v1",
2089        "question": response.question,
2090        "answer": response.answer,
2091        "context_only": response.context_only,
2092        "mode": ask_mode_display(requested_mode),
2093        "retriever": ask_retriever_display(response.retriever),
2094        "top_k": response.retrieval.params.top_k,
2095        "results": hits,
2096        "citations": citations,
2097        "stats": {
2098            "retrieval_ms": response.stats.retrieval_ms,
2099            "synthesis_ms": response.stats.synthesis_ms,
2100            "latency_ms": response.stats.latency_ms,
2101        },
2102        "engine": search_engine_label(&response.retrieval.engine),
2103        "total_hits": response.retrieval.total_hits,
2104        "next_cursor": response.retrieval.next_cursor,
2105        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2106    });
2107
2108    if let Some(inf) = inference {
2109        let model = &inf.answer;
2110        if let serde_json::Value::Object(ref mut map) = body {
2111            map.insert("model".into(), json!(model.requested));
2112            if model.model != model.requested {
2113                map.insert("model_used".into(), json!(model.model));
2114            }
2115            map.insert("cached".into(), json!(inf.cached));
2116            // Add usage and cost if available
2117            if let Some(usage) = &inf.usage {
2118                map.insert(
2119                    "usage".into(),
2120                    json!({
2121                        "input_tokens": usage.input_tokens,
2122                        "output_tokens": usage.output_tokens,
2123                        "total_tokens": usage.total_tokens,
2124                        "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2125                        "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2126                    }),
2127                );
2128            }
2129            // Add grounding/hallucination score if available
2130            if let Some(grounding) = &inf.grounding {
2131                map.insert(
2132                    "grounding".into(),
2133                    json!({
2134                        "score": grounding.score,
2135                        "label": grounding.label(),
2136                        "sentence_count": grounding.sentence_count,
2137                        "grounded_sentences": grounding.grounded_sentences,
2138                        "has_warning": grounding.has_warning,
2139                        "warning_reason": grounding.warning_reason,
2140                    }),
2141                );
2142            }
2143        }
2144    }
2145
2146    // Add detailed sources if requested
2147    if include_sources {
2148        if let serde_json::Value::Object(ref mut map) = body {
2149            let sources = build_sources_json(response, mem);
2150            map.insert("sources".into(), json!(sources));
2151        }
2152    }
2153
2154    // Add follow-up suggestions if confidence is low
2155    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2156        if let serde_json::Value::Object(ref mut map) = body {
2157            map.insert("follow_up".into(), follow_up);
2158        }
2159    }
2160
2161    println!("{}", serde_json::to_string_pretty(&body)?);
2162    Ok(())
2163}
2164
2165fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2166    response
2167        .citations
2168        .iter()
2169        .enumerate()
2170        .map(|(idx, citation)| {
2171            let mut source = serde_json::Map::new();
2172            source.insert("index".into(), json!(idx + 1));
2173            source.insert("frame_id".into(), json!(citation.frame_id));
2174            source.insert("uri".into(), json!(citation.uri));
2175
2176            if let Some(range) = citation.chunk_range {
2177                source.insert("chunk_range".into(), json!([range.0, range.1]));
2178            }
2179            if let Some(score) = citation.score {
2180                source.insert("score".into(), json!(score));
2181            }
2182
2183            // Get frame metadata for rich source information
2184            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2185                if let Some(title) = frame.title {
2186                    source.insert("title".into(), json!(title));
2187                }
2188                if !frame.tags.is_empty() {
2189                    source.insert("tags".into(), json!(frame.tags));
2190                }
2191                if !frame.labels.is_empty() {
2192                    source.insert("labels".into(), json!(frame.labels));
2193                }
2194                source.insert("frame_timestamp".into(), json!(frame.timestamp));
2195                if !frame.content_dates.is_empty() {
2196                    source.insert("content_dates".into(), json!(frame.content_dates));
2197                }
2198            }
2199
2200            // Get snippet from hit
2201            if let Some(hit) = response
2202                .retrieval
2203                .hits
2204                .iter()
2205                .find(|h| h.frame_id == citation.frame_id)
2206            {
2207                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2208                source.insert("snippet".into(), json!(snippet));
2209            }
2210
2211            serde_json::Value::Object(source)
2212        })
2213        .collect()
2214}
2215
2216/// Build follow-up suggestions when the answer has low grounding/confidence.
2217/// Helps users understand what the memory contains and suggests relevant questions.
2218fn build_follow_up_suggestions(
2219    response: &AskResponse,
2220    inference: Option<&ModelInference>,
2221    mem: &mut Memvid,
2222) -> Option<serde_json::Value> {
2223    // Check if we need follow-up suggestions
2224    let needs_followup = inference
2225        .and_then(|inf| inf.grounding.as_ref())
2226        .map(|g| g.score < 0.3 || g.has_warning)
2227        .unwrap_or(false);
2228
2229    // Also trigger if retrieval hits have very low scores or no hits
2230    let low_retrieval = response
2231        .retrieval
2232        .hits
2233        .first()
2234        .and_then(|h| h.score)
2235        .map(|score| score < -2.0)
2236        .unwrap_or(true);
2237
2238    if !needs_followup && !low_retrieval {
2239        return None;
2240    }
2241
2242    // Get available topics from the memory by sampling timeline entries
2243    let limit = std::num::NonZeroU64::new(20).unwrap();
2244    let timeline_query = TimelineQueryBuilder::default().limit(limit).build();
2245
2246    let available_topics: Vec<String> = mem
2247        .timeline(timeline_query)
2248        .ok()
2249        .map(|entries| {
2250            entries
2251                .iter()
2252                .filter_map(|e| {
2253                    // Extract meaningful preview/title
2254                    let preview = e.preview.trim();
2255                    if preview.is_empty() || preview.len() < 5 {
2256                        return None;
2257                    }
2258                    // Get first line or truncate
2259                    let first_line = preview.lines().next().unwrap_or(preview);
2260                    if first_line.len() > 60 {
2261                        Some(format!("{}...", &first_line[..57]))
2262                    } else {
2263                        Some(first_line.to_string())
2264                    }
2265                })
2266                .collect::<std::collections::HashSet<_>>()
2267                .into_iter()
2268                .take(5)
2269                .collect()
2270        })
2271        .unwrap_or_default();
2272
2273    // Determine the reason for low confidence
2274    let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2275        "No relevant information found in memory"
2276    } else if inference
2277        .and_then(|i| i.grounding.as_ref())
2278        .map(|g| g.has_warning)
2279        .unwrap_or(false)
2280    {
2281        "Answer may not be well-supported by the available context"
2282    } else {
2283        "Low confidence in the answer"
2284    };
2285
2286    // Generate suggestion questions based on available topics
2287    let suggestions: Vec<String> = if available_topics.is_empty() {
2288        vec![
2289            "What information is stored in this memory?".to_string(),
2290            "Can you list the main topics covered?".to_string(),
2291        ]
2292    } else {
2293        available_topics
2294            .iter()
2295            .take(3)
2296            .map(|topic| format!("Tell me about {}", topic))
2297            .chain(std::iter::once(
2298                "What topics are in this memory?".to_string(),
2299            ))
2300            .collect()
2301    };
2302
2303    Some(json!({
2304        "needed": true,
2305        "reason": reason,
2306        "hint": if available_topics.is_empty() {
2307            "This memory may not contain information about your query."
2308        } else {
2309            "This memory contains information about different topics. Try asking about those instead."
2310        },
2311        "available_topics": available_topics,
2312        "suggestions": suggestions
2313    }))
2314}
2315
2316fn emit_model_json(
2317    response: &AskResponse,
2318    requested_model: &str,
2319    inference: Option<&ModelInference>,
2320    include_sources: bool,
2321    mem: &mut Memvid,
2322) -> Result<()> {
2323    let answer = response.answer.clone().unwrap_or_default();
2324    let requested_label = inference
2325        .map(|m| m.answer.requested.clone())
2326        .unwrap_or_else(|| requested_model.to_string());
2327    let used_label = inference
2328        .map(|m| m.answer.model.clone())
2329        .unwrap_or_else(|| requested_model.to_string());
2330
2331    let mut body = json!({
2332        "question": response.question,
2333        "model": requested_label,
2334        "model_used": used_label,
2335        "answer": answer,
2336        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2337    });
2338
2339    // Add usage and cost if available
2340    if let Some(inf) = inference {
2341        if let serde_json::Value::Object(ref mut map) = body {
2342            map.insert("cached".into(), json!(inf.cached));
2343            if let Some(usage) = &inf.usage {
2344                map.insert(
2345                    "usage".into(),
2346                    json!({
2347                        "input_tokens": usage.input_tokens,
2348                        "output_tokens": usage.output_tokens,
2349                        "total_tokens": usage.total_tokens,
2350                        "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2351                        "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2352                    }),
2353                );
2354            }
2355            if let Some(grounding) = &inf.grounding {
2356                map.insert(
2357                    "grounding".into(),
2358                    json!({
2359                        "score": grounding.score,
2360                        "label": grounding.label(),
2361                        "sentence_count": grounding.sentence_count,
2362                        "grounded_sentences": grounding.grounded_sentences,
2363                        "has_warning": grounding.has_warning,
2364                        "warning_reason": grounding.warning_reason,
2365                    }),
2366                );
2367            }
2368        }
2369    }
2370
2371    // Add detailed sources if requested
2372    if include_sources {
2373        if let serde_json::Value::Object(ref mut map) = body {
2374            let sources = build_sources_json(response, mem);
2375            map.insert("sources".into(), json!(sources));
2376        }
2377    }
2378
2379    // Add follow-up suggestions if confidence is low
2380    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2381        if let serde_json::Value::Object(ref mut map) = body {
2382            map.insert("follow_up".into(), follow_up);
2383        }
2384    }
2385
2386    // Use colored JSON output
2387    let json_str = serde_json::to_string_pretty(&body)?;
2388    println!("{}", json_str.to_colored_json_auto()?);
2389    Ok(())
2390}
2391
2392fn emit_ask_pretty(
2393    response: &AskResponse,
2394    requested_mode: AskModeArg,
2395    inference: Option<&ModelInference>,
2396    include_sources: bool,
2397    mem: &mut Memvid,
2398) {
2399    println!(
2400        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
2401        ask_mode_pretty(requested_mode),
2402        ask_retriever_pretty(response.retriever),
2403        response.retrieval.params.top_k,
2404        response.stats.latency_ms,
2405        response.stats.retrieval_ms
2406    );
2407    if let Some(inference) = inference {
2408        let model = &inference.answer;
2409        let cached_label = if inference.cached { " [CACHED]" } else { "" };
2410        if model.requested.trim() == model.model {
2411            println!("model: {}{}", model.model, cached_label);
2412        } else {
2413            println!(
2414                "model requested: {}   model used: {}{}",
2415                model.requested, model.model, cached_label
2416            );
2417        }
2418        // Display usage and cost if available
2419        if let Some(usage) = &inference.usage {
2420            let cost_label = if inference.cached {
2421                format!("$0.00 (saved ${:.6})", usage.cost_usd)
2422            } else {
2423                format!("${:.6}", usage.cost_usd)
2424            };
2425            println!(
2426                "tokens: {} input + {} output = {}   cost: {}",
2427                usage.input_tokens, usage.output_tokens, usage.total_tokens, cost_label
2428            );
2429        }
2430        // Display grounding/hallucination score
2431        if let Some(grounding) = &inference.grounding {
2432            let warning = if grounding.has_warning {
2433                format!(
2434                    " [WARNING: {}]",
2435                    grounding
2436                        .warning_reason
2437                        .as_deref()
2438                        .unwrap_or("potential hallucination")
2439                )
2440            } else {
2441                String::new()
2442            };
2443            println!(
2444                "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2445                grounding.score * 100.0,
2446                grounding.label(),
2447                grounding.grounded_sentences,
2448                grounding.sentence_count,
2449                warning
2450            );
2451        }
2452    }
2453    println!(
2454        "engine: {}",
2455        search_engine_label(&response.retrieval.engine)
2456    );
2457    println!(
2458        "hits: {} (showing {})",
2459        response.retrieval.total_hits,
2460        response.retrieval.hits.len()
2461    );
2462
2463    if response.context_only {
2464        println!();
2465        println!("Context-only mode: synthesis disabled.");
2466        println!();
2467    } else if let Some(answer) = &response.answer {
2468        println!();
2469        println!("Answer:\n{answer}");
2470        println!();
2471    }
2472
2473    if !response.citations.is_empty() {
2474        println!("Citations:");
2475        for citation in &response.citations {
2476            match citation.score {
2477                Some(score) => println!(
2478                    "[{}] {} (frame {}, score {:.3})",
2479                    citation.index, citation.uri, citation.frame_id, score
2480                ),
2481                None => println!(
2482                    "[{}] {} (frame {})",
2483                    citation.index, citation.uri, citation.frame_id
2484                ),
2485            }
2486        }
2487        println!();
2488    }
2489
2490    // Print detailed sources if requested
2491    if include_sources && !response.citations.is_empty() {
2492        println!("=== SOURCES ===");
2493        println!();
2494        for citation in &response.citations {
2495            println!("[{}] {}", citation.index, citation.uri);
2496
2497            // Get frame metadata
2498            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2499                if let Some(title) = &frame.title {
2500                    println!("    Title: {}", title);
2501                }
2502                println!("    Frame ID: {}", citation.frame_id);
2503                if let Some(score) = citation.score {
2504                    println!("    Score: {:.4}", score);
2505                }
2506                if let Some((start, end)) = citation.chunk_range {
2507                    println!("    Range: [{}..{})", start, end);
2508                }
2509                if !frame.tags.is_empty() {
2510                    println!("    Tags: {}", frame.tags.join(", "));
2511                }
2512                if !frame.labels.is_empty() {
2513                    println!("    Labels: {}", frame.labels.join(", "));
2514                }
2515                println!("    Timestamp: {}", frame.timestamp);
2516                if !frame.content_dates.is_empty() {
2517                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2518                }
2519            }
2520
2521            // Get snippet from hit
2522            if let Some(hit) = response
2523                .retrieval
2524                .hits
2525                .iter()
2526                .find(|h| h.frame_id == citation.frame_id)
2527            {
2528                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2529                let truncated = if snippet.len() > 200 {
2530                    format!("{}...", &snippet[..200])
2531                } else {
2532                    snippet.clone()
2533                };
2534                println!("    Snippet: {}", truncated.replace('\n', " "));
2535            }
2536            println!();
2537        }
2538    }
2539
2540    if !include_sources {
2541        println!();
2542        emit_search_table(&response.retrieval);
2543    }
2544
2545    // Display follow-up suggestions if confidence is low
2546    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2547        if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2548            if needed {
2549                println!();
2550                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2551                println!("💡 FOLLOW-UP SUGGESTIONS");
2552                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2553
2554                if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2555                    println!("Reason: {}", reason);
2556                }
2557
2558                if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2559                    println!("Hint: {}", hint);
2560                }
2561
2562                if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2563                    if !topics.is_empty() {
2564                        println!();
2565                        println!("Available topics in this memory:");
2566                        for topic in topics.iter().filter_map(|t| t.as_str()) {
2567                            println!("  • {}", topic);
2568                        }
2569                    }
2570                }
2571
2572                if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2573                    if !suggestions.is_empty() {
2574                        println!();
2575                        println!("Try asking:");
2576                        for (i, suggestion) in
2577                            suggestions.iter().filter_map(|s| s.as_str()).enumerate()
2578                        {
2579                            println!("  {}. \"{}\"", i + 1, suggestion);
2580                        }
2581                    }
2582                }
2583                println!();
2584            }
2585        }
2586    }
2587}
2588
2589/// Emit verbatim evidence as JSON without LLM synthesis.
2590/// Format: {evidence: [{source, text, score}], question, hits, stats}
2591fn emit_verbatim_evidence_json(
2592    response: &AskResponse,
2593    include_sources: bool,
2594    mem: &mut Memvid,
2595) -> Result<()> {
2596    // Build evidence array from hits - verbatim excerpts with citations
2597    let evidence: Vec<_> = response
2598        .retrieval
2599        .hits
2600        .iter()
2601        .enumerate()
2602        .map(|(idx, hit)| {
2603            let mut entry = serde_json::Map::new();
2604            entry.insert("index".into(), json!(idx + 1));
2605            entry.insert("frame_id".into(), json!(hit.frame_id));
2606            entry.insert("uri".into(), json!(&hit.uri));
2607            if let Some(title) = &hit.title {
2608                entry.insert("title".into(), json!(title));
2609            }
2610            // Use chunk_text if available (more specific), otherwise full text
2611            let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2612            entry.insert("text".into(), json!(verbatim));
2613            if let Some(score) = hit.score {
2614                entry.insert("score".into(), json!(score));
2615            }
2616            serde_json::Value::Object(entry)
2617        })
2618        .collect();
2619
2620    // Build sources array if requested
2621    let sources: Option<Vec<_>> = if include_sources {
2622        Some(
2623            response
2624                .retrieval
2625                .hits
2626                .iter()
2627                .filter_map(|hit| {
2628                    mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2629                        let mut source = serde_json::Map::new();
2630                        source.insert("frame_id".into(), json!(frame.id));
2631                        source.insert(
2632                            "uri".into(),
2633                            json!(frame.uri.as_deref().unwrap_or("(unknown)")),
2634                        );
2635                        if let Some(title) = &frame.title {
2636                            source.insert("title".into(), json!(title));
2637                        }
2638                        source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2639                        if !frame.tags.is_empty() {
2640                            source.insert("tags".into(), json!(frame.tags));
2641                        }
2642                        if !frame.labels.is_empty() {
2643                            source.insert("labels".into(), json!(frame.labels));
2644                        }
2645                        serde_json::Value::Object(source)
2646                    })
2647                })
2648                .collect(),
2649        )
2650    } else {
2651        None
2652    };
2653
2654    let mut body = json!({
2655        "version": "mv2.evidence.v1",
2656        "mode": "verbatim",
2657        "question": response.question,
2658        "evidence": evidence,
2659        "evidence_count": evidence.len(),
2660        "total_hits": response.retrieval.total_hits,
2661        "stats": {
2662            "retrieval_ms": response.stats.retrieval_ms,
2663            "latency_ms": response.stats.latency_ms,
2664        },
2665        "engine": search_engine_label(&response.retrieval.engine),
2666    });
2667
2668    if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2669        map.insert("sources".into(), json!(sources));
2670    }
2671
2672    let json_str = serde_json::to_string_pretty(&body)?;
2673    println!("{}", json_str.to_colored_json_auto()?);
2674    Ok(())
2675}
2676
2677/// Emit verbatim evidence in human-readable format without LLM synthesis.
2678fn emit_verbatim_evidence_pretty(response: &AskResponse, include_sources: bool, mem: &mut Memvid) {
2679    println!(
2680        "mode: {}   latency: {} ms (retrieval {} ms)",
2681        "verbatim evidence".cyan(),
2682        response.stats.latency_ms,
2683        response.stats.retrieval_ms
2684    );
2685    println!(
2686        "engine: {}",
2687        search_engine_label(&response.retrieval.engine)
2688    );
2689    println!(
2690        "hits: {} (showing {})",
2691        response.retrieval.total_hits,
2692        response.retrieval.hits.len()
2693    );
2694    println!();
2695
2696    // Header
2697    println!("{}", "━".repeat(60));
2698    println!(
2699        "{}",
2700        format!(
2701            "VERBATIM EVIDENCE for: \"{}\"",
2702            truncate_with_ellipsis(&response.question, 40)
2703        )
2704        .bold()
2705    );
2706    println!("{}", "━".repeat(60));
2707    println!();
2708
2709    if response.retrieval.hits.is_empty() {
2710        println!("No evidence found.");
2711        return;
2712    }
2713
2714    // Calculate score range for normalization (BM25 scores can be negative)
2715    let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2716    let (min_score, max_score) = score_range(&scores);
2717
2718    // Display each piece of evidence with citation
2719    for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2720        let uri = &hit.uri;
2721        let title = hit.title.as_deref().unwrap_or("Untitled");
2722        let score_str = hit
2723            .score
2724            .map(|s| {
2725                let normalized = normalize_bm25_for_display(s, min_score, max_score);
2726                format!(" (relevance: {:.0}%)", normalized)
2727            })
2728            .unwrap_or_default();
2729
2730        println!(
2731            "{}",
2732            format!("[{}] {}{}", idx + 1, title, score_str)
2733                .green()
2734                .bold()
2735        );
2736        println!("    Source: {} (frame {})", uri, hit.frame_id);
2737        println!();
2738
2739        // Show verbatim text - prefer chunk_text if available
2740        let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2741        // Indent each line for readability
2742        for line in verbatim.lines() {
2743            if !line.trim().is_empty() {
2744                println!("    │ {}", line);
2745            }
2746        }
2747        println!();
2748    }
2749
2750    // Print detailed sources if requested
2751    if include_sources {
2752        println!("{}", "━".repeat(60));
2753        println!("{}", "SOURCE DETAILS".bold());
2754        println!("{}", "━".repeat(60));
2755        println!();
2756
2757        for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2758            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2759                println!(
2760                    "{}",
2761                    format!(
2762                        "[{}] {}",
2763                        idx + 1,
2764                        frame.uri.as_deref().unwrap_or("(unknown)")
2765                    )
2766                    .cyan()
2767                );
2768                if let Some(title) = &frame.title {
2769                    println!("    Title: {}", title);
2770                }
2771                println!("    Frame ID: {}", frame.id);
2772                println!("    Timestamp: {}", frame.timestamp);
2773                if !frame.tags.is_empty() {
2774                    println!("    Tags: {}", frame.tags.join(", "));
2775                }
2776                if !frame.labels.is_empty() {
2777                    println!("    Labels: {}", frame.labels.join(", "));
2778                }
2779                if !frame.content_dates.is_empty() {
2780                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2781                }
2782                println!();
2783            }
2784        }
2785    }
2786
2787    // Note about no LLM synthesis
2788    println!("{}", "─".repeat(60));
2789    println!(
2790        "{}",
2791        "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2792    );
2793    println!(
2794        "{}",
2795        "Use --use-model to get an AI-synthesized answer.".dimmed()
2796    );
2797}
2798
2799fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2800    let hits: Vec<_> = response
2801        .hits
2802        .iter()
2803        .map(|hit| {
2804            json!({
2805                "frame_id": hit.frame_id,
2806                "matches": hit.matches,
2807                "snippets": [hit.text.clone()],
2808            })
2809        })
2810        .collect();
2811    println!("{}", serde_json::to_string_pretty(&hits)?);
2812    Ok(())
2813}
2814
2815fn emit_search_table(response: &SearchResponse) {
2816    if response.hits.is_empty() {
2817        println!("No results for '{}'.", response.query);
2818        return;
2819    }
2820
2821    // Calculate score range for normalization (BM25 scores can be negative)
2822    let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2823    let (min_score, max_score) = score_range(&scores);
2824
2825    for hit in &response.hits {
2826        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2827        if let Some(title) = &hit.title {
2828            println!("  Title: {title}");
2829        }
2830        if let Some(score) = hit.score {
2831            let normalized = normalize_bm25_for_display(score, min_score, max_score);
2832            println!("  Relevance: {:.0}%", normalized);
2833        }
2834        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
2835        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2836            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
2837        }
2838        if let Some(chunk_text) = &hit.chunk_text {
2839            println!("  Chunk Text: {}", chunk_text.trim());
2840        }
2841        if let Some(metadata) = &hit.metadata {
2842            if let Some(track) = &metadata.track {
2843                println!("  Track: {track}");
2844            }
2845            if !metadata.tags.is_empty() {
2846                println!("  Tags: {}", metadata.tags.join(", "));
2847            }
2848            if !metadata.labels.is_empty() {
2849                println!("  Labels: {}", metadata.labels.join(", "));
2850            }
2851            if let Some(created_at) = &metadata.created_at {
2852                println!("  Created: {created_at}");
2853            }
2854            if !metadata.content_dates.is_empty() {
2855                println!("  Content Dates: {}", metadata.content_dates.join(", "));
2856            }
2857            if !metadata.entities.is_empty() {
2858                let entity_strs: Vec<String> = metadata
2859                    .entities
2860                    .iter()
2861                    .map(|e| format!("{} ({})", e.name, e.kind))
2862                    .collect();
2863                println!("  Entities: {}", entity_strs.join(", "));
2864            }
2865        }
2866        println!("  Snippet: {}", hit.text.trim());
2867        println!();
2868    }
2869    if let Some(cursor) = &response.next_cursor {
2870        println!("Next cursor: {cursor}");
2871    }
2872}
2873
2874fn ask_mode_display(mode: AskModeArg) -> &'static str {
2875    match mode {
2876        AskModeArg::Lex => "lex",
2877        AskModeArg::Sem => "sem",
2878        AskModeArg::Hybrid => "hybrid",
2879    }
2880}
2881
2882fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2883    match mode {
2884        AskModeArg::Lex => "Lexical",
2885        AskModeArg::Sem => "Semantic",
2886        AskModeArg::Hybrid => "Hybrid",
2887    }
2888}
2889
2890fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2891    match retriever {
2892        AskRetriever::Lex => "lex",
2893        AskRetriever::Semantic => "semantic",
2894        AskRetriever::Hybrid => "hybrid",
2895        AskRetriever::LexFallback => "lex_fallback",
2896        AskRetriever::TimelineFallback => "timeline_fallback",
2897    }
2898}
2899
2900fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2901    match retriever {
2902        AskRetriever::Lex => "Lexical",
2903        AskRetriever::Semantic => "Semantic",
2904        AskRetriever::Hybrid => "Hybrid",
2905        AskRetriever::LexFallback => "Lexical (fallback)",
2906        AskRetriever::TimelineFallback => "Timeline (fallback)",
2907    }
2908}
2909
2910fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2911    match engine {
2912        SearchEngineKind::Tantivy => "text (tantivy)",
2913        SearchEngineKind::LexFallback => "text (fallback)",
2914        SearchEngineKind::Hybrid => "hybrid",
2915    }
2916}
2917
2918fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2919    let digest = hash(uri.as_bytes()).to_hex().to_string();
2920    let prefix_len = digest.len().min(12);
2921    let prefix = &digest[..prefix_len];
2922    format!("mv2-hit-{prefix}-{frame_id}-{start}")
2923}
2924
2925fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2926    if text.chars().count() <= limit {
2927        return text.to_string();
2928    }
2929
2930    let truncated: String = text.chars().take(limit).collect();
2931    format!("{truncated}...")
2932}
2933
2934/// Normalize a BM25 score to 0-100 range for user-friendly display.
2935///
2936/// BM25 scores can be negative (Tantivy uses log-based TF which can go negative
2937/// for very common terms). This function normalizes scores relative to the
2938/// min/max in the result set so users see intuitive 0-100 values.
2939///
2940/// - Returns 100.0 if min == max (all scores equal)
2941/// - Returns normalized 0-100 value based on position in [min, max] range
2942fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2943    if (max_score - min_score).abs() < f32::EPSILON {
2944        // All scores are the same, show 100%
2945        return 100.0;
2946    }
2947    // Normalize to 0-100 range
2948    ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2949}
2950
2951/// Extract min and max scores from a slice of optional scores.
2952fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2953    let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2954    if valid_scores.is_empty() {
2955        return (0.0, 0.0);
2956    }
2957    let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2958    let max = valid_scores
2959        .iter()
2960        .cloned()
2961        .fold(f32::NEG_INFINITY, f32::max);
2962    (min, max)
2963}
2964
2965fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2966    let mut hit_json = serde_json::Map::new();
2967    hit_json.insert("rank".into(), json!(hit.rank));
2968    if let Some(score) = hit.score {
2969        hit_json.insert("score".into(), json!(score));
2970    }
2971    hit_json.insert(
2972        "id".into(),
2973        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2974    );
2975    hit_json.insert("frame_id".into(), json!(hit.frame_id));
2976    hit_json.insert("uri".into(), json!(hit.uri));
2977    if let Some(title) = &hit.title {
2978        hit_json.insert("title".into(), json!(title));
2979    }
2980    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2981    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2982    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2983    hit_json.insert("text".into(), json!(hit.text));
2984
2985    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2986        matches: hit.matches,
2987        ..SearchHitMetadata::default()
2988    });
2989    let mut meta_json = serde_json::Map::new();
2990    meta_json.insert("matches".into(), json!(metadata.matches));
2991    if !metadata.tags.is_empty() {
2992        meta_json.insert("tags".into(), json!(metadata.tags));
2993    }
2994    if !metadata.labels.is_empty() {
2995        meta_json.insert("labels".into(), json!(metadata.labels));
2996    }
2997    if let Some(track) = metadata.track {
2998        meta_json.insert("track".into(), json!(track));
2999    }
3000    if let Some(created_at) = metadata.created_at {
3001        meta_json.insert("created_at".into(), json!(created_at));
3002    }
3003    if !metadata.content_dates.is_empty() {
3004        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
3005    }
3006    if !metadata.entities.is_empty() {
3007        let entities_json: Vec<serde_json::Value> = metadata
3008            .entities
3009            .iter()
3010            .map(|e| {
3011                let mut ent = serde_json::Map::new();
3012                ent.insert("name".into(), json!(e.name));
3013                ent.insert("kind".into(), json!(e.kind));
3014                if let Some(conf) = e.confidence {
3015                    ent.insert("confidence".into(), json!(conf));
3016                }
3017                serde_json::Value::Object(ent)
3018            })
3019            .collect();
3020        meta_json.insert("entities".into(), json!(entities_json));
3021    }
3022    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
3023    serde_json::Value::Object(hit_json)
3024}
3025/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
3026///
3027/// RRF is mathematically superior to raw score combination because:
3028/// - BM25 scores are unbounded (0 to infinity)
3029/// - Cosine similarity is bounded (-1 to 1)
3030/// - RRF normalizes by using only RANKS, not raw scores
3031///
3032/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
3033fn apply_semantic_rerank(
3034    runtime: &EmbeddingRuntime,
3035    mem: &mut Memvid,
3036    response: &mut SearchResponse,
3037) -> Result<()> {
3038    if response.hits.is_empty() {
3039        return Ok(());
3040    }
3041
3042    let query_embedding = runtime.embed_query(&response.query)?;
3043    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
3044    for hit in &response.hits {
3045        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
3046            if embedding.len() == runtime.dimension() {
3047                let score = cosine_similarity(&query_embedding, &embedding);
3048                semantic_scores.insert(hit.frame_id, score);
3049            }
3050        }
3051    }
3052
3053    if semantic_scores.is_empty() {
3054        return Ok(());
3055    }
3056
3057    // Sort by semantic score to get semantic ranks
3058    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3059        .iter()
3060        .map(|(frame_id, score)| (*frame_id, *score))
3061        .collect();
3062    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3063
3064    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3065    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3066        semantic_rank.insert(*frame_id, idx + 1);
3067    }
3068
3069    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
3070    let query_lower = response.query.to_lowercase();
3071    let is_preference_query = query_lower.contains("suggest")
3072        || query_lower.contains("recommend")
3073        || query_lower.contains("should i")
3074        || query_lower.contains("what should")
3075        || query_lower.contains("prefer")
3076        || query_lower.contains("favorite")
3077        || query_lower.contains("best for me");
3078
3079    // Pure RRF: Use ONLY ranks, NOT raw scores
3080    // This prevents a "confidently wrong" high-scoring vector from burying
3081    // a "precisely correct" keyword match
3082    const RRF_K: f32 = 60.0;
3083
3084    let mut ordering: Vec<(usize, f32, usize)> = response
3085        .hits
3086        .iter()
3087        .enumerate()
3088        .map(|(idx, hit)| {
3089            let lexical_rank = hit.rank;
3090
3091            // RRF score for lexical rank
3092            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3093
3094            // RRF score for semantic rank
3095            let semantic_rrf = semantic_rank
3096                .get(&hit.frame_id)
3097                .map(|rank| 1.0 / (RRF_K + *rank as f32))
3098                .unwrap_or(0.0);
3099
3100            // Apply preference boost for hits containing user preference signals
3101            // This is a small bonus for content with first-person preference indicators
3102            let preference_boost = if is_preference_query {
3103                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
3104            } else {
3105                0.0
3106            };
3107
3108            // Pure RRF: Only rank-based scores, no raw similarity scores
3109            let combined = lexical_rrf + semantic_rrf + preference_boost;
3110            (idx, combined, lexical_rank)
3111        })
3112        .collect();
3113
3114    ordering.sort_by(|a, b| {
3115        b.1.partial_cmp(&a.1)
3116            .unwrap_or(Ordering::Equal)
3117            .then(a.2.cmp(&b.2))
3118    });
3119
3120    let mut reordered = Vec::with_capacity(response.hits.len());
3121    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3122        let mut hit = response.hits[idx].clone();
3123        hit.rank = rank_idx + 1;
3124        reordered.push(hit);
3125    }
3126
3127    response.hits = reordered;
3128    Ok(())
3129}
3130
3131/// Rerank search results by boosting hits that contain user preference signals.
3132/// Only applies when the query appears to be seeking recommendations or preferences.
3133fn apply_preference_rerank(response: &mut SearchResponse) {
3134    if response.hits.is_empty() {
3135        return;
3136    }
3137
3138    // Check if query is preference-seeking
3139    let query_lower = response.query.to_lowercase();
3140    let is_preference_query = query_lower.contains("suggest")
3141        || query_lower.contains("recommend")
3142        || query_lower.contains("should i")
3143        || query_lower.contains("what should")
3144        || query_lower.contains("prefer")
3145        || query_lower.contains("favorite")
3146        || query_lower.contains("best for me");
3147
3148    if !is_preference_query {
3149        return;
3150    }
3151
3152    // Compute boost scores for each hit
3153    let mut scored: Vec<(usize, f32, f32)> = response
3154        .hits
3155        .iter()
3156        .enumerate()
3157        .map(|(idx, hit)| {
3158            let original_score = hit.score.unwrap_or(0.0);
3159            let preference_boost = compute_preference_boost(&hit.text);
3160            let boosted_score = original_score + preference_boost;
3161            (idx, boosted_score, original_score)
3162        })
3163        .collect();
3164
3165    // Sort by boosted score (descending)
3166    scored.sort_by(|a, b| {
3167        b.1.partial_cmp(&a.1)
3168            .unwrap_or(Ordering::Equal)
3169            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3170    });
3171
3172    // Reorder hits
3173    let mut reordered = Vec::with_capacity(response.hits.len());
3174    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3175        let mut hit = response.hits[idx].clone();
3176        hit.rank = rank_idx + 1;
3177        reordered.push(hit);
3178    }
3179
3180    response.hits = reordered;
3181}
3182
3183/// Compute a boost score for hits that contain user preference signals.
3184/// This helps surface context where users express their preferences,
3185/// habits, or personal information that's relevant to recommendation queries.
3186///
3187/// Key insight: We want to distinguish content where the user describes
3188/// their ESTABLISHED situation/preferences (high boost) from content where
3189/// the user is making a REQUEST (low boost). Both use first-person language,
3190/// but they serve different purposes for personalization.
3191fn compute_preference_boost(text: &str) -> f32 {
3192    let text_lower = text.to_lowercase();
3193    let mut boost = 0.0f32;
3194
3195    // Strong signals: Past/present user experiences and possessions
3196    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
3197    let established_context = [
3198        // Past tense - indicates actual experience
3199        "i've been",
3200        "i've had",
3201        "i've used",
3202        "i've tried",
3203        "i recently",
3204        "i just",
3205        "lately",
3206        "i started",
3207        "i bought",
3208        "i harvested",
3209        "i grew",
3210        // Current possessions/ownership (indicates established context)
3211        "my garden",
3212        "my home",
3213        "my house",
3214        "my setup",
3215        "my equipment",
3216        "my camera",
3217        "my car",
3218        "my phone",
3219        "i have a",
3220        "i own",
3221        "i got a",
3222        // Established habits/preferences
3223        "i prefer",
3224        "i like to",
3225        "i love to",
3226        "i enjoy",
3227        "i usually",
3228        "i always",
3229        "i typically",
3230        "my favorite",
3231        "i tend to",
3232        "i often",
3233        // Regular activities (indicates ongoing behavior)
3234        "i use",
3235        "i grow",
3236        "i cook",
3237        "i make",
3238        "i work on",
3239        "i'm into",
3240        "i collect",
3241    ];
3242    for pattern in established_context {
3243        if text_lower.contains(pattern) {
3244            boost += 0.15;
3245        }
3246    }
3247
3248    // Moderate signals: General first-person statements
3249    let first_person = [" i ", " my ", " me "];
3250    for pattern in first_person {
3251        if text_lower.contains(pattern) {
3252            boost += 0.02;
3253        }
3254    }
3255
3256    // Weak signals: Requests/intentions (not yet established preferences)
3257    // These indicate the user wants something, but don't describe established context
3258    let request_patterns = [
3259        "i'm trying to",
3260        "i want to",
3261        "i need to",
3262        "looking for",
3263        "can you suggest",
3264        "can you help",
3265    ];
3266    for pattern in request_patterns {
3267        if text_lower.contains(pattern) {
3268            boost += 0.02;
3269        }
3270    }
3271
3272    // Cap the boost to avoid over-weighting
3273    boost.min(0.5)
3274}
3275
3276fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3277    let mut dot = 0.0f32;
3278    let mut sum_a = 0.0f32;
3279    let mut sum_b = 0.0f32;
3280    for (x, y) in a.iter().zip(b.iter()) {
3281        dot += x * y;
3282        sum_a += x * x;
3283        sum_b += y * y;
3284    }
3285
3286    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3287        0.0
3288    } else {
3289        dot / (sum_a.sqrt() * sum_b.sqrt())
3290    }
3291}
3292
3293/// Apply cross-encoder reranking to search results.
3294///
3295/// Cross-encoders directly score query-document pairs and can understand
3296/// more nuanced relevance than bi-encoders (embeddings). This is especially
3297/// useful for personalization queries where semantic similarity != relevance.
3298///
3299/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
3300#[cfg(feature = "local-embeddings")]
3301fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3302    if response.hits.is_empty() || response.hits.len() < 2 {
3303        return Ok(());
3304    }
3305
3306    // Only rerank if we have enough candidates
3307    let candidates_to_rerank = response.hits.len().min(50);
3308
3309    // Initialize the reranker (model will be downloaded on first use, ~86MB)
3310    // Using JINA Turbo - faster than BGE while maintaining good accuracy
3311    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3312        .with_show_download_progress(true);
3313
3314    let mut reranker = match TextRerank::try_new(options) {
3315        Ok(r) => r,
3316        Err(e) => {
3317            warn!("Failed to initialize cross-encoder reranker: {e}");
3318            return Ok(());
3319        }
3320    };
3321
3322    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
3323    let documents: Vec<String> = response.hits[..candidates_to_rerank]
3324        .iter()
3325        .map(|hit| hit.text.clone())
3326        .collect();
3327
3328    // Rerank using cross-encoder
3329    info!("Cross-encoder reranking {} candidates", documents.len());
3330    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3331        Ok(results) => results,
3332        Err(e) => {
3333            warn!("Cross-encoder reranking failed: {e}");
3334            return Ok(());
3335        }
3336    };
3337
3338    // Blend cross-encoder scores with original scores to preserve temporal boosting.
3339    // The original score includes recency boost; purely replacing it loses temporal relevance.
3340    // We collect (blended_score, original_idx) pairs and sort by blended score.
3341    let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3342
3343    // Find score range for normalization (original scores can be negative for BM25)
3344    let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3345        .iter()
3346        .filter_map(|h| h.score)
3347        .collect();
3348    let orig_min = original_scores
3349        .iter()
3350        .cloned()
3351        .fold(f32::INFINITY, f32::min);
3352    let orig_max = original_scores
3353        .iter()
3354        .cloned()
3355        .fold(f32::NEG_INFINITY, f32::max);
3356    let orig_range = (orig_max - orig_min).max(0.001); // Avoid division by zero
3357
3358    for result in rerank_results.iter() {
3359        let original_idx = result.index;
3360        let cross_encoder_score = result.score; // Already normalized 0-1
3361
3362        // Normalize original score to 0-1 range
3363        let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3364        let normalized_original = (original_score - orig_min) / orig_range;
3365
3366        // Blend: 20% cross-encoder (relevance) + 80% original (includes temporal boost)
3367        // Very heavy weight on original score to preserve temporal ranking
3368        // The original score already incorporates BM25 + recency boost
3369        let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3370
3371        scored_hits.push((blended, original_idx));
3372    }
3373
3374    // Sort by blended score (descending)
3375    scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3376
3377    // Build reordered hits with new ranks
3378    let mut reordered = Vec::with_capacity(response.hits.len());
3379    for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3380        let mut hit = response.hits[original_idx].clone();
3381        hit.rank = new_rank + 1;
3382        // Store blended score for reference
3383        hit.score = Some(blended_score);
3384        reordered.push(hit);
3385    }
3386
3387    // Add any remaining hits that weren't reranked (beyond top-50)
3388    for hit in response.hits.iter().skip(candidates_to_rerank) {
3389        let mut h = hit.clone();
3390        h.rank = reordered.len() + 1;
3391        reordered.push(h);
3392    }
3393
3394    response.hits = reordered;
3395    info!("Cross-encoder reranking complete");
3396    Ok(())
3397}
3398
3399/// Stub for cross-encoder reranking when local-embeddings is disabled.
3400/// Does nothing - reranking is skipped silently.
3401#[cfg(not(feature = "local-embeddings"))]
3402fn apply_cross_encoder_rerank(_response: &mut SearchResponse) -> Result<()> {
3403    Ok(())
3404}
3405
3406/// Build a context string from memory cards stored in the MV2 file.
3407/// Groups facts by entity for better LLM comprehension.
3408fn build_memory_context(mem: &Memvid) -> String {
3409    let entities = mem.memory_entities();
3410    if entities.is_empty() {
3411        return String::new();
3412    }
3413
3414    let mut sections = Vec::new();
3415    for entity in entities {
3416        let cards = mem.get_entity_memories(&entity);
3417        if cards.is_empty() {
3418            continue;
3419        }
3420
3421        let mut entity_lines = Vec::new();
3422        for card in cards {
3423            // Format: "slot: value" with optional polarity indicator
3424            let polarity_marker = card
3425                .polarity
3426                .as_ref()
3427                .map(|p| match p.to_string().as_str() {
3428                    "Positive" => " (+)",
3429                    "Negative" => " (-)",
3430                    _ => "",
3431                })
3432                .unwrap_or("");
3433            entity_lines.push(format!(
3434                "  - {}: {}{}",
3435                card.slot, card.value, polarity_marker
3436            ));
3437        }
3438
3439        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3440    }
3441
3442    sections.join("\n\n")
3443}
3444
3445/// Build a context string from entities found in search hits.
3446/// Groups entities by type for better LLM comprehension.
3447fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3448    use std::collections::HashMap;
3449
3450    // Collect unique entities by kind
3451    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3452
3453    for hit in hits {
3454        if let Some(metadata) = &hit.metadata {
3455            for entity in &metadata.entities {
3456                entities_by_kind
3457                    .entry(entity.kind.clone())
3458                    .or_default()
3459                    .push(entity.name.clone());
3460            }
3461        }
3462    }
3463
3464    if entities_by_kind.is_empty() {
3465        return String::new();
3466    }
3467
3468    // Deduplicate and format
3469    let mut sections = Vec::new();
3470    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3471    sorted_kinds.sort();
3472
3473    for kind in sorted_kinds {
3474        let names = entities_by_kind.get(kind).unwrap();
3475        let mut unique_names: Vec<_> = names.iter().collect();
3476        unique_names.sort();
3477        unique_names.dedup();
3478
3479        let names_str = unique_names
3480            .iter()
3481            .take(10) // Limit to 10 entities per kind
3482            .map(|s| s.as_str())
3483            .collect::<Vec<_>>()
3484            .join(", ");
3485
3486        sections.push(format!("{}: {}", kind, names_str));
3487    }
3488
3489    sections.join("\n")
3490}