Skip to main content

memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{Result, anyhow, bail};
14use blake3::hash;
15use clap::{ArgAction, Args, ValueEnum};
16use colored::Colorize;
17use colored_json::ToColoredJson;
18use memvid_core::{
19    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind,
20    SearchHit, SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
21    types::{
22        AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy,
23        SearchHitMetadata,
24    },
25};
26#[cfg(feature = "temporal_track")]
27use memvid_core::{
28    TemporalContext, TemporalFilter, TemporalNormalizer, TemporalResolution,
29    TemporalResolutionValue, types::SearchHitTemporal,
30};
31#[cfg(feature = "temporal_track")]
32use serde::Serialize;
33use serde_json::json;
34#[cfg(feature = "temporal_track")]
35use time::format_description::well_known::Rfc3339;
36use time::{Date, PrimitiveDateTime, Time};
37#[cfg(feature = "temporal_track")]
38use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
39use tracing::{info, warn};
40
41#[cfg(feature = "local-embeddings")]
42use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
43
44use memvid_ask_model::{
45    ModelContextFragment, ModelContextFragmentKind, ModelInference, run_model_inference,
46};
47
48// frame_to_json and print_frame_summary available from commands but not used in this module
49use crate::config::{
50    CliConfig, EmbeddingModelChoice, EmbeddingRuntime, load_embedding_runtime,
51    load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
52    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2,
53};
54use crate::utils::{
55    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
56    parse_date_boundary, parse_vector, read_embedding,
57};
58
59const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
60#[cfg(feature = "temporal_track")]
61const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
62
63fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
64    let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
65    message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
66    if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
67        message.push_str(&format!(
68            "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
69            model.name(),
70            model.name()
71        ));
72        if model.is_openai() {
73            message.push_str(" (and set `OPENAI_API_KEY`).");
74        } else {
75            message.push('.');
76        }
77        message.push_str(&format!(
78            "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
79            model.name()
80        ));
81        message.push_str(&format!(
82            "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
83        ));
84        message.push_str("\nOr use `--mode lex` to disable semantic search.");
85    }
86    message
87}
88
89/// Arguments for the `timeline` subcommand
90#[derive(Args)]
91pub struct TimelineArgs {
92    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
93    pub file: PathBuf,
94    #[arg(long)]
95    pub json: bool,
96    #[arg(long)]
97    pub reverse: bool,
98    #[arg(long, value_name = "LIMIT")]
99    pub limit: Option<NonZeroU64>,
100    #[arg(long, value_name = "TIMESTAMP")]
101    pub since: Option<i64>,
102    #[arg(long, value_name = "TIMESTAMP")]
103    pub until: Option<i64>,
104    #[cfg(feature = "temporal_track")]
105    #[arg(long = "on", value_name = "PHRASE")]
106    pub phrase: Option<String>,
107    #[cfg(feature = "temporal_track")]
108    #[arg(long = "tz", value_name = "IANA_ZONE")]
109    pub tz: Option<String>,
110    #[cfg(feature = "temporal_track")]
111    #[arg(long = "anchor", value_name = "RFC3339")]
112    pub anchor: Option<String>,
113    #[cfg(feature = "temporal_track")]
114    #[arg(long = "window", value_name = "MINUTES")]
115    pub window: Option<u64>,
116    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
117    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
118    pub as_of_frame: Option<u64>,
119    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
120    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
121    pub as_of_ts: Option<i64>,
122}
123
124/// Arguments for the `when` subcommand
125#[cfg(feature = "temporal_track")]
126#[derive(Args)]
127pub struct WhenArgs {
128    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
129    pub file: PathBuf,
130    #[arg(long = "on", value_name = "PHRASE")]
131    pub phrase: String,
132    #[arg(long = "tz", value_name = "IANA_ZONE")]
133    pub tz: Option<String>,
134    #[arg(long = "anchor", value_name = "RFC3339")]
135    pub anchor: Option<String>,
136    #[arg(long = "window", value_name = "MINUTES")]
137    pub window: Option<u64>,
138    #[arg(long, value_name = "LIMIT")]
139    pub limit: Option<NonZeroU64>,
140    #[arg(long, value_name = "TIMESTAMP")]
141    pub since: Option<i64>,
142    #[arg(long, value_name = "TIMESTAMP")]
143    pub until: Option<i64>,
144    #[arg(long)]
145    pub reverse: bool,
146    #[arg(long)]
147    pub json: bool,
148}
149
150/// Arguments for the `ask` subcommand
151#[derive(Args)]
152pub struct AskArgs {
153    #[arg(value_name = "TARGET", num_args = 0..)]
154    pub targets: Vec<String>,
155    #[arg(long = "question", value_name = "TEXT")]
156    pub question: Option<String>,
157    #[arg(long = "uri", value_name = "URI")]
158    pub uri: Option<String>,
159    #[arg(long = "scope", value_name = "URI_PREFIX")]
160    pub scope: Option<String>,
161    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
162    pub top_k: usize,
163    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
164    pub snippet_chars: usize,
165    #[arg(long = "cursor", value_name = "TOKEN")]
166    pub cursor: Option<String>,
167    #[arg(long = "mode", value_enum, default_value = "hybrid")]
168    pub mode: AskModeArg,
169    #[arg(long)]
170    pub json: bool,
171    #[arg(long = "context-only", action = ArgAction::SetTrue)]
172    pub context_only: bool,
173    /// Show detailed source information for each citation
174    #[arg(long = "sources", action = ArgAction::SetTrue)]
175    pub sources: bool,
176    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
177    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
178    pub mask_pii: bool,
179    /// Include structured memory cards in the context (facts, preferences, etc.)
180    #[arg(long = "memories", action = ArgAction::SetTrue)]
181    pub memories: bool,
182    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
183    #[arg(long = "llm-context-depth", value_name = "CHARS")]
184    pub llm_context_depth: Option<usize>,
185    #[arg(long = "start", value_name = "DATE")]
186    pub start: Option<String>,
187    #[arg(long = "end", value_name = "DATE")]
188    pub end: Option<String>,
189    /// Synthesize an answer with an LLM (defaults to tinyllama when provided without a value).
190    ///
191    /// Examples:
192    /// - `--use-model` (local TinyLlama)
193    /// - `--use-model openai` (defaults to gpt-4o-mini; requires OPENAI_API_KEY)
194    /// - `--use-model nvidia` (defaults to meta/llama3-8b-instruct; requires NVIDIA_API_KEY)
195    /// - `--use-model nvidia:meta/llama3-70b-instruct`
196    #[arg(
197        long = "use-model",
198        value_name = "MODEL",
199        num_args = 0..=1,
200        default_missing_value = "tinyllama"
201    )]
202    pub use_model: Option<String>,
203    /// Embedding model to use for query (must match the model used during ingestion)
204    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
205    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
206    pub query_embedding_model: Option<String>,
207    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
208    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
209    pub as_of_frame: Option<u64>,
210    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
211    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
212    pub as_of_ts: Option<i64>,
213    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
214    #[arg(long = "system-prompt", value_name = "TEXT")]
215    pub system_prompt: Option<String>,
216    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
217    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
218    pub no_rerank: bool,
219
220    /// Return verbatim evidence without LLM synthesis.
221    /// Shows the most relevant passages with citations, no paraphrasing or summarization.
222    #[arg(long = "no-llm", action = ArgAction::SetTrue)]
223    pub no_llm: bool,
224
225    // Adaptive retrieval options (enabled by default for best results)
226    /// Disable adaptive retrieval and use fixed top-k instead.
227    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
228    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
229    pub no_adaptive: bool,
230    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
231    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
232    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
233    pub min_relevancy: f32,
234    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
235    /// Set high enough to capture all potentially relevant results.
236    #[arg(long = "max-k", value_name = "K", default_value = "100")]
237    pub max_k: usize,
238    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
239    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
240    pub adaptive_strategy: AdaptiveStrategyArg,
241}
242
243/// Ask mode argument
244#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
245pub enum AskModeArg {
246    Lex,
247    Sem,
248    Hybrid,
249}
250
251impl From<AskModeArg> for AskMode {
252    fn from(value: AskModeArg) -> Self {
253        match value {
254            AskModeArg::Lex => AskMode::Lex,
255            AskModeArg::Sem => AskMode::Sem,
256            AskModeArg::Hybrid => AskMode::Hybrid,
257        }
258    }
259}
260
261/// Arguments for the `find` subcommand
262#[derive(Args)]
263pub struct FindArgs {
264    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
265    pub file: PathBuf,
266    #[arg(long = "query", value_name = "TEXT")]
267    pub query: String,
268    #[arg(long = "uri", value_name = "URI")]
269    pub uri: Option<String>,
270    #[arg(long = "scope", value_name = "URI_PREFIX")]
271    pub scope: Option<String>,
272    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
273    pub top_k: usize,
274    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
275    pub snippet_chars: usize,
276    #[arg(long = "cursor", value_name = "TOKEN")]
277    pub cursor: Option<String>,
278    #[arg(long)]
279    pub json: bool,
280    #[arg(long = "json-legacy", conflicts_with = "json")]
281    pub json_legacy: bool,
282    #[arg(long = "mode", value_enum, default_value = "auto")]
283    pub mode: SearchMode,
284    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
285    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
286    pub as_of_frame: Option<u64>,
287    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
288    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
289    pub as_of_ts: Option<i64>,
290    /// Embedding model to use for query (must match the model used during ingestion)
291    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
292    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
293    pub query_embedding_model: Option<String>,
294
295    // Adaptive retrieval options (enabled by default for best results)
296    /// Disable adaptive retrieval and use fixed top-k instead.
297    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
298    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
299    pub no_adaptive: bool,
300    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
301    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
302    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
303    pub min_relevancy: f32,
304    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
305    /// Set high enough to capture all potentially relevant results.
306    #[arg(long = "max-k", value_name = "K", default_value = "100")]
307    pub max_k: usize,
308    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
309    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
310    pub adaptive_strategy: AdaptiveStrategyArg,
311
312    /// Enable graph-aware search: filter by entity relationships before ranking.
313    /// Uses MemoryCards to find entities matching patterns like "who lives in X".
314    #[arg(long = "graph", action = ArgAction::SetTrue)]
315    pub graph: bool,
316
317    /// Enable hybrid search: combine graph filtering with text search.
318    /// Automatically detects relational patterns in the query.
319    #[arg(long = "hybrid", action = ArgAction::SetTrue)]
320    pub hybrid: bool,
321
322    /// Disable sketch pre-filtering (for benchmarking/debugging).
323    /// By default, sketches are used for fast candidate generation if available.
324    #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
325    pub no_sketch: bool,
326}
327
328/// Search mode argument
329#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
330pub enum SearchMode {
331    Auto,
332    Lex,
333    Sem,
334    /// CLIP visual search using text-to-image embeddings
335    #[cfg(feature = "clip")]
336    Clip,
337}
338
339/// Adaptive retrieval strategy
340#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
341pub enum AdaptiveStrategyArg {
342    /// Stop when score drops below X% of top score (default)
343    Relative,
344    /// Stop when score drops below fixed threshold
345    Absolute,
346    /// Stop when score drops sharply from previous result
347    Cliff,
348    /// Automatically detect "elbow" in score curve
349    Elbow,
350    /// Combine relative + cliff + absolute (recommended)
351    Combined,
352}
353
354/// Arguments for the `vec-search` subcommand
355#[derive(Args)]
356pub struct VecSearchArgs {
357    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
358    pub file: PathBuf,
359    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
360    pub vector: Option<String>,
361    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
362    pub embedding: Option<PathBuf>,
363    #[arg(long, value_name = "K", default_value = "10")]
364    pub limit: usize,
365    #[arg(long)]
366    pub json: bool,
367}
368
369/// Arguments for the `audit` subcommand
370#[derive(Args)]
371pub struct AuditArgs {
372    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
373    pub file: PathBuf,
374    /// The question or topic to audit
375    #[arg(value_name = "QUESTION")]
376    pub question: String,
377    /// Output file path (stdout if not provided)
378    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
379    pub out: Option<PathBuf>,
380    /// Output format
381    #[arg(long = "format", value_enum, default_value = "text")]
382    pub format: AuditFormat,
383    /// Number of sources to retrieve
384    #[arg(long = "top-k", value_name = "K", default_value = "10")]
385    pub top_k: usize,
386    /// Maximum characters per snippet
387    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
388    pub snippet_chars: usize,
389    /// Retrieval mode
390    #[arg(long = "mode", value_enum, default_value = "hybrid")]
391    pub mode: AskModeArg,
392    /// Optional scope filter (URI prefix)
393    #[arg(long = "scope", value_name = "URI_PREFIX")]
394    pub scope: Option<String>,
395    /// Start date filter
396    #[arg(long = "start", value_name = "DATE")]
397    pub start: Option<String>,
398    /// End date filter
399    #[arg(long = "end", value_name = "DATE")]
400    pub end: Option<String>,
401    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
402    #[arg(long = "use-model", value_name = "MODEL")]
403    pub use_model: Option<String>,
404}
405
406/// Audit output format
407#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
408pub enum AuditFormat {
409    /// Plain text report
410    Text,
411    /// Markdown report
412    Markdown,
413    /// JSON report
414    Json,
415}
416
417// ============================================================================
418// Search & Retrieval command handlers
419// ============================================================================
420
421pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
422    let mut mem = open_read_only_mem(&args.file)?;
423    let mut builder = TimelineQueryBuilder::default();
424    #[cfg(feature = "temporal_track")]
425    if args.phrase.is_none()
426        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
427    {
428        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
429    }
430    if let Some(limit) = args.limit {
431        builder = builder.limit(limit);
432    }
433    if let Some(since) = args.since {
434        builder = builder.since(since);
435    }
436    if let Some(until) = args.until {
437        builder = builder.until(until);
438    }
439    builder = builder.reverse(args.reverse);
440    #[cfg(feature = "temporal_track")]
441    let temporal_summary = if let Some(ref phrase) = args.phrase {
442        let (filter, summary) = build_temporal_filter(
443            phrase,
444            args.tz.as_deref(),
445            args.anchor.as_deref(),
446            args.window,
447        )?;
448        builder = builder.temporal(filter);
449        Some(summary)
450    } else {
451        None
452    };
453    let query = builder.build();
454    let mut entries = mem.timeline(query)?;
455
456    // Apply Replay filtering if requested
457    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
458        entries.retain(|entry| {
459            // Check as_of_frame filter
460            if let Some(cutoff_frame) = args.as_of_frame {
461                if entry.frame_id > cutoff_frame {
462                    return false;
463                }
464            }
465
466            // Check as_of_ts filter
467            if let Some(cutoff_ts) = args.as_of_ts {
468                if entry.timestamp > cutoff_ts {
469                    return false;
470                }
471            }
472
473            true
474        });
475    }
476
477    if args.json {
478        #[cfg(feature = "temporal_track")]
479        if let Some(summary) = temporal_summary.as_ref() {
480            println!(
481                "{}",
482                serde_json::to_string_pretty(&TimelineOutput {
483                    temporal: Some(summary_to_output(summary)),
484                    entries: &entries,
485                })?
486            );
487        } else {
488            println!("{}", serde_json::to_string_pretty(&entries)?);
489        }
490        #[cfg(not(feature = "temporal_track"))]
491        println!("{}", serde_json::to_string_pretty(&entries)?);
492    } else if entries.is_empty() {
493        println!("Timeline is empty");
494    } else {
495        #[cfg(feature = "temporal_track")]
496        if let Some(summary) = temporal_summary.as_ref() {
497            print_temporal_summary(summary);
498        }
499        for entry in entries {
500            println!(
501                "#{} @ {} — {}",
502                entry.frame_id,
503                entry.timestamp,
504                entry.preview.replace('\n', " ")
505            );
506            if let Some(uri) = entry.uri.as_deref() {
507                println!("  URI: {uri}");
508            }
509            if !entry.child_frames.is_empty() {
510                let child_list = entry
511                    .child_frames
512                    .iter()
513                    .map(|id| id.to_string())
514                    .collect::<Vec<_>>()
515                    .join(", ");
516                println!("  Child frames: {child_list}");
517            }
518            #[cfg(feature = "temporal_track")]
519            if let Some(temporal) = entry.temporal.as_ref() {
520                print_entry_temporal_details(temporal);
521            }
522        }
523    }
524    Ok(())
525}
526
527#[cfg(feature = "temporal_track")]
528pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
529    let mut mem = open_read_only_mem(&args.file)?;
530
531    let (filter, summary) = build_temporal_filter(
532        &args.phrase,
533        args.tz.as_deref(),
534        args.anchor.as_deref(),
535        args.window,
536    )?;
537
538    let mut builder = TimelineQueryBuilder::default();
539    if let Some(limit) = args.limit {
540        builder = builder.limit(limit);
541    }
542    if let Some(since) = args.since {
543        builder = builder.since(since);
544    }
545    if let Some(until) = args.until {
546        builder = builder.until(until);
547    }
548    builder = builder.reverse(args.reverse).temporal(filter.clone());
549    let entries = mem.timeline(builder.build())?;
550
551    if args.json {
552        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
553        let output = WhenOutput {
554            summary: summary_to_output(&summary),
555            entries: entry_views,
556        };
557        println!("{}", serde_json::to_string_pretty(&output)?);
558        return Ok(());
559    }
560
561    print_temporal_summary(&summary);
562    if entries.is_empty() {
563        println!("No frames matched the resolved window");
564        return Ok(());
565    }
566
567    for entry in &entries {
568        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
569        println!(
570            "#{} @ {} ({iso}) — {}",
571            entry.frame_id,
572            entry.timestamp,
573            entry.preview.replace('\n', " ")
574        );
575        if let Some(uri) = entry.uri.as_deref() {
576            println!("  URI: {uri}");
577        }
578        if !entry.child_frames.is_empty() {
579            let child_list = entry
580                .child_frames
581                .iter()
582                .map(|id| id.to_string())
583                .collect::<Vec<_>>()
584                .join(", ");
585            println!("  Child frames: {child_list}");
586        }
587        if let Some(temporal) = entry.temporal.as_ref() {
588            print_entry_temporal_details(temporal);
589        }
590    }
591
592    Ok(())
593}
594
595#[cfg(feature = "temporal_track")]
596#[derive(Serialize)]
597struct TimelineOutput<'a> {
598    #[serde(skip_serializing_if = "Option::is_none")]
599    temporal: Option<TemporalSummaryOutput>,
600    entries: &'a [TimelineEntry],
601}
602
603#[cfg(feature = "temporal_track")]
604#[derive(Serialize)]
605struct WhenOutput {
606    summary: TemporalSummaryOutput,
607    entries: Vec<WhenEntry>,
608}
609
610#[cfg(feature = "temporal_track")]
611#[derive(Serialize)]
612struct WhenEntry {
613    frame_id: FrameId,
614    timestamp: i64,
615    #[serde(skip_serializing_if = "Option::is_none")]
616    timestamp_iso: Option<String>,
617    preview: String,
618    #[serde(skip_serializing_if = "Option::is_none")]
619    uri: Option<String>,
620    #[serde(skip_serializing_if = "Vec::is_empty")]
621    child_frames: Vec<FrameId>,
622    #[serde(skip_serializing_if = "Option::is_none")]
623    temporal: Option<SearchHitTemporal>,
624}
625
626#[cfg(feature = "temporal_track")]
627#[derive(Serialize)]
628struct TemporalSummaryOutput {
629    phrase: String,
630    timezone: String,
631    anchor_utc: i64,
632    anchor_iso: String,
633    confidence: u16,
634    #[serde(skip_serializing_if = "Vec::is_empty")]
635    flags: Vec<&'static str>,
636    resolution_kind: &'static str,
637    window_start_utc: Option<i64>,
638    window_start_iso: Option<String>,
639    window_end_utc: Option<i64>,
640    window_end_iso: Option<String>,
641    #[serde(skip_serializing_if = "Option::is_none")]
642    window_minutes: Option<u64>,
643}
644
645#[cfg(feature = "temporal_track")]
646struct TemporalSummary {
647    phrase: String,
648    tz: String,
649    anchor: OffsetDateTime,
650    start_utc: Option<i64>,
651    end_utc: Option<i64>,
652    resolution: TemporalResolution,
653    window_minutes: Option<u64>,
654}
655
656#[cfg(feature = "temporal_track")]
657fn build_temporal_filter(
658    phrase: &str,
659    tz_override: Option<&str>,
660    anchor_override: Option<&str>,
661    window_minutes: Option<u64>,
662) -> Result<(TemporalFilter, TemporalSummary)> {
663    let tz = tz_override
664        .unwrap_or(DEFAULT_TEMPORAL_TZ)
665        .trim()
666        .to_string();
667    if tz.is_empty() {
668        bail!("E-TEMP-003 timezone must not be empty");
669    }
670
671    let anchor = if let Some(raw) = anchor_override {
672        OffsetDateTime::parse(raw, &Rfc3339)
673            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
674    } else {
675        OffsetDateTime::now_utc()
676    };
677
678    let context = TemporalContext::new(anchor, tz.clone());
679    let normalizer = TemporalNormalizer::new(context);
680    let resolution = normalizer
681        .resolve(phrase)
682        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
683
684    let (mut start, mut end) = resolution_bounds(&resolution)?;
685    if let Some(minutes) = window_minutes {
686        if minutes > 0 {
687            let delta = TimeDuration::minutes(minutes as i64);
688            if let (Some(s), Some(e)) = (start, end) {
689                if s == e {
690                    start = Some(s.saturating_sub(delta.whole_seconds()));
691                    end = Some(e.saturating_add(delta.whole_seconds()));
692                } else {
693                    start = Some(s.saturating_sub(delta.whole_seconds()));
694                    end = Some(e.saturating_add(delta.whole_seconds()));
695                }
696            }
697        }
698    }
699
700    let filter = TemporalFilter {
701        start_utc: start,
702        end_utc: end,
703        phrase: None,
704        tz: None,
705    };
706
707    let summary = TemporalSummary {
708        phrase: phrase.to_owned(),
709        tz,
710        anchor,
711        start_utc: start,
712        end_utc: end,
713        resolution,
714        window_minutes,
715    };
716
717    Ok((filter, summary))
718}
719
720#[cfg(feature = "temporal_track")]
721fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
722    TemporalSummaryOutput {
723        phrase: summary.phrase.clone(),
724        timezone: summary.tz.clone(),
725        anchor_utc: summary.anchor.unix_timestamp(),
726        anchor_iso: summary
727            .anchor
728            .format(&Rfc3339)
729            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
730        confidence: summary.resolution.confidence,
731        flags: summary
732            .resolution
733            .flags
734            .iter()
735            .map(|flag| flag.as_str())
736            .collect(),
737        resolution_kind: resolution_kind(&summary.resolution),
738        window_start_utc: summary.start_utc,
739        window_start_iso: summary.start_utc.and_then(format_timestamp),
740        window_end_utc: summary.end_utc,
741        window_end_iso: summary.end_utc.and_then(format_timestamp),
742        window_minutes: summary.window_minutes,
743    }
744}
745
746#[cfg(feature = "temporal_track")]
747fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
748    WhenEntry {
749        frame_id: entry.frame_id,
750        timestamp: entry.timestamp,
751        timestamp_iso: format_timestamp(entry.timestamp),
752        preview: entry.preview.clone(),
753        uri: entry.uri.clone(),
754        child_frames: entry.child_frames.clone(),
755        temporal: entry.temporal.clone(),
756    }
757}
758
759#[cfg(feature = "temporal_track")]
760fn print_temporal_summary(summary: &TemporalSummary) {
761    println!("Phrase: \"{}\"", summary.phrase);
762    println!("Timezone: {}", summary.tz);
763    println!(
764        "Anchor: {}",
765        summary
766            .anchor
767            .format(&Rfc3339)
768            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
769    );
770    let start_iso = summary.start_utc.and_then(format_timestamp);
771    let end_iso = summary.end_utc.and_then(format_timestamp);
772    match (start_iso, end_iso) {
773        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
774        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
775        (Some(start), None) => println!("Window start: {start}"),
776        (None, Some(end)) => println!("Window end: {end}"),
777        _ => println!("Window: (not resolved)"),
778    }
779    println!("Confidence: {}", summary.resolution.confidence);
780    let flags: Vec<&'static str> = summary
781        .resolution
782        .flags
783        .iter()
784        .map(|flag| flag.as_str())
785        .collect();
786    if !flags.is_empty() {
787        println!("Flags: {}", flags.join(", "));
788    }
789    if let Some(window) = summary.window_minutes {
790        if window > 0 {
791            println!("Window padding: {window} minute(s)");
792        }
793    }
794    println!();
795}
796
797#[cfg(feature = "temporal_track")]
798fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
799    if let Some(anchor) = temporal.anchor.as_ref() {
800        let iso = anchor
801            .iso_8601
802            .clone()
803            .or_else(|| format_timestamp(anchor.ts_utc));
804        println!(
805            "  Anchor: {} (source: {:?})",
806            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
807            anchor.source
808        );
809    }
810    if !temporal.mentions.is_empty() {
811        println!("  Mentions:");
812        for mention in &temporal.mentions {
813            let iso = mention
814                .iso_8601
815                .clone()
816                .or_else(|| format_timestamp(mention.ts_utc))
817                .unwrap_or_else(|| mention.ts_utc.to_string());
818            let mut details = format!(
819                "    - {} ({:?}, confidence {})",
820                iso, mention.kind, mention.confidence
821            );
822            if let Some(text) = mention.text.as_deref() {
823                details.push_str(&format!(" — \"{}\"", text));
824            }
825            println!("{details}");
826        }
827    }
828}
829
830#[cfg(feature = "temporal_track")]
831fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
832    match &resolution.value {
833        TemporalResolutionValue::Date(date) => {
834            let ts = date_to_timestamp(*date);
835            Ok((Some(ts), Some(ts)))
836        }
837        TemporalResolutionValue::DateTime(dt) => {
838            let ts = dt.unix_timestamp();
839            Ok((Some(ts), Some(ts)))
840        }
841        TemporalResolutionValue::DateRange { start, end } => Ok((
842            Some(date_to_timestamp(*start)),
843            Some(date_to_timestamp(*end)),
844        )),
845        TemporalResolutionValue::DateTimeRange { start, end } => {
846            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
847        }
848        TemporalResolutionValue::Month { year, month } => {
849            let start_date = Date::from_calendar_date(*year, *month, 1)
850                .map_err(|_| anyhow!("invalid month resolution"))?;
851            let end_date = last_day_in_month(*year, *month)
852                .map_err(|_| anyhow!("invalid month resolution"))?;
853            Ok((
854                Some(date_to_timestamp(start_date)),
855                Some(date_to_timestamp(end_date)),
856            ))
857        }
858    }
859}
860
861#[cfg(feature = "temporal_track")]
862fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
863    match resolution.value {
864        TemporalResolutionValue::Date(_) => "date",
865        TemporalResolutionValue::DateTime(_) => "datetime",
866        TemporalResolutionValue::DateRange { .. } => "date_range",
867        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
868        TemporalResolutionValue::Month { .. } => "month",
869    }
870}
871
872#[cfg(feature = "temporal_track")]
873fn date_to_timestamp(date: Date) -> i64 {
874    PrimitiveDateTime::new(date, Time::MIDNIGHT)
875        .assume_offset(UtcOffset::UTC)
876        .unix_timestamp()
877}
878
879#[cfg(feature = "temporal_track")]
880fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
881    let mut date = Date::from_calendar_date(year, month, 1)
882        .map_err(|_| anyhow!("invalid month resolution"))?;
883    while let Some(next) = date.next_day() {
884        if next.month() == month {
885            date = next;
886        } else {
887            break;
888        }
889    }
890    Ok(date)
891}
892
893#[cfg(feature = "temporal_track")]
894
895fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
896    if fragments.is_empty() {
897        return;
898    }
899
900    response.context_fragments = fragments
901        .into_iter()
902        .map(|fragment| AskContextFragment {
903            rank: fragment.rank,
904            frame_id: fragment.frame_id,
905            uri: fragment.uri,
906            title: fragment.title,
907            score: fragment.score,
908            matches: fragment.matches,
909            range: Some(fragment.range),
910            chunk_range: fragment.chunk_range,
911            text: fragment.text,
912            kind: Some(match fragment.kind {
913                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
914                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
915            }),
916            #[cfg(feature = "temporal_track")]
917            temporal: None,
918        })
919        .collect();
920}
921
922pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
923    // Check if plan allows query operations (blocks expired subscriptions)
924    crate::utils::require_active_plan(config, "ask")?;
925
926    // Track query usage against plan quota
927    crate::api::track_query_usage(config, 1)?;
928
929    if args.uri.is_some() && args.scope.is_some() {
930        warn!("--scope ignored because --uri is provided");
931    }
932
933    let mut question_tokens = Vec::new();
934    let mut file_path: Option<PathBuf> = None;
935    for token in &args.targets {
936        if file_path.is_none() && looks_like_memory(token) {
937            file_path = Some(PathBuf::from(token));
938        } else {
939            question_tokens.push(token.clone());
940        }
941    }
942
943    let positional_question = if question_tokens.is_empty() {
944        None
945    } else {
946        Some(question_tokens.join(" "))
947    };
948
949    let question = args
950        .question
951        .or(positional_question)
952        .map(|value| value.trim().to_string())
953        .filter(|value| !value.is_empty());
954
955    let question = question
956        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
957
958    // Expand query for better retrieval using LLM (expands abbreviations, adds synonyms)
959    // This happens when --use-model is set or we have an API key
960    let (original_question, search_query) = {
961        // For query expansion, we use the fastest available model
962        // Priority: OpenAI > Groq > Anthropic > XAI > Mistral
963        let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
964            if let Ok(key) = std::env::var("OPENAI_API_KEY") {
965                // OpenAI available - use gpt-4o-mini (fastest, cheapest)
966                (Some("gpt-4o-mini"), Some(key))
967            } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
968                // Groq available - use llama-3.1-8b-instant (very fast)
969                (Some("llama-3.1-8b-instant"), Some(key))
970            } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
971                // Anthropic available - use haiku
972                (Some("claude-haiku-4-5"), Some(key))
973            } else if let Ok(key) = std::env::var("XAI_API_KEY") {
974                // XAI available - use grok-4-fast
975                (Some("grok-4-fast"), Some(key))
976            } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
977                // Mistral available - use mistral-small
978                (Some("mistral-small-latest"), Some(key))
979            } else {
980                // No fast model available for expansion
981                (None, None)
982            };
983
984        // DISABLED: Query expansion for ask command
985        // The ask command has sophisticated retrieval with fallbacks, aggregation detection,
986        // temporal boosting, and diverse retrieval strategies. Query expansion often strips
987        // out important semantic context (temporal markers, aggregation signals, analytical
988        // keywords) that these strategies depend on. The original question is preserved
989        // to ensure all downstream detection and ranking works correctly.
990        //
991        // Query expansion may be appropriate for simple keyword searches, but for complex
992        // natural language questions it causes more problems than it solves.
993        let _ = (model_for_expansion, api_key_for_expansion); // suppress unused warnings
994        (question.clone(), question.clone())
995    };
996
997    let memory_path = match file_path {
998        Some(path) => path,
999        None => autodetect_memory_file()?,
1000    };
1001
1002    let start = parse_date_boundary(args.start.as_ref(), false)?;
1003    let end = parse_date_boundary(args.end.as_ref(), true)?;
1004    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1005        if end_ts < start_ts {
1006            anyhow::bail!("--end must not be earlier than --start");
1007        }
1008    }
1009
1010    // Open MV2 file first to get vector dimension for auto-detection
1011    let mut mem = Memvid::open(&memory_path)?;
1012
1013    // Load active replay session if one exists
1014    #[cfg(feature = "replay")]
1015    let _ = mem.load_active_session();
1016
1017    // Get the vector dimension from the MV2 file for auto-detection
1018    let mv2_dimension = mem.effective_vec_index_dimension()?;
1019
1020    // Check if memory has any vectors - if not, force lexical mode
1021    let stats = mem.stats()?;
1022    let has_vectors = stats.vector_count > 0;
1023    let effective_mode = if !has_vectors
1024        && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1025    {
1026        tracing::info!("Memory has no embeddings (vector_count=0); falling back to lexical mode");
1027        AskModeArg::Lex
1028    } else {
1029        args.mode.clone()
1030    };
1031
1032    let ask_mode: AskMode = effective_mode.clone().into();
1033    let inferred_model_override = match effective_mode {
1034        AskModeArg::Lex => None,
1035        AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1036            memvid_core::EmbeddingIdentitySummary::Single(identity) => {
1037                identity.model.map(String::from)
1038            }
1039            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1040                let models: Vec<_> = identities
1041                    .iter()
1042                    .filter_map(|entry| entry.identity.model.as_deref())
1043                    .collect();
1044                anyhow::bail!(
1045                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1046                    Detected models: {:?}\n\n\
1047                    Suggested fix: split into separate memories per embedding model.",
1048                    models
1049                );
1050            }
1051            memvid_core::EmbeddingIdentitySummary::Unknown => None,
1052        },
1053    };
1054    let emb_model_override = args
1055        .query_embedding_model
1056        .as_deref()
1057        .or(inferred_model_override.as_deref());
1058    let runtime = match effective_mode {
1059        AskModeArg::Lex => None,
1060        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1061            config,
1062            emb_model_override,
1063            mv2_dimension,
1064        )?),
1065        AskModeArg::Hybrid => {
1066            // For hybrid, use auto-detection from MV2 dimension
1067            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1068                || {
1069                    // Force a load; if it fails we error below.
1070                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1071                        .ok()
1072                        .map(|rt| {
1073                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1074                            rt
1075                        })
1076                },
1077            )
1078        }
1079    };
1080    if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1081        anyhow::bail!(
1082            "semantic embeddings unavailable; install/cached model required for {:?} mode",
1083            effective_mode
1084        );
1085    }
1086
1087    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1088
1089    // Build adaptive config (enabled by default, use --no-adaptive to disable)
1090    let adaptive = if !args.no_adaptive {
1091        Some(AdaptiveConfig {
1092            enabled: true,
1093            max_results: args.max_k,
1094            min_results: 1,
1095            normalize_scores: true,
1096            strategy: match args.adaptive_strategy {
1097                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1098                    min_ratio: args.min_relevancy,
1099                },
1100                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1101                    min_score: args.min_relevancy,
1102                },
1103                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1104                    max_drop_ratio: 0.3,
1105                },
1106                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1107                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1108                    relative_threshold: args.min_relevancy,
1109                    max_drop_ratio: 0.3,
1110                    absolute_min: 0.3,
1111                },
1112            },
1113        })
1114    } else {
1115        None
1116    };
1117
1118    let request = AskRequest {
1119        question: search_query, // Use expanded query for retrieval
1120        top_k: args.top_k,
1121        snippet_chars: args.snippet_chars,
1122        uri: args.uri.clone(),
1123        scope: args.scope.clone(),
1124        cursor: args.cursor.clone(),
1125        start,
1126        end,
1127        #[cfg(feature = "temporal_track")]
1128        temporal: None,
1129        context_only: args.context_only,
1130        mode: ask_mode,
1131        as_of_frame: args.as_of_frame,
1132        as_of_ts: args.as_of_ts,
1133        adaptive,
1134        acl_context: None,
1135        acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1136    };
1137    let mut response = mem.ask(request, embedder).map_err(|err| match err {
1138        MemvidError::VecDimensionMismatch { expected, actual } => {
1139            anyhow!(vec_dimension_mismatch_help(expected, actual))
1140        }
1141        other => anyhow!(other),
1142    })?;
1143
1144    // Restore original question for display and LLM synthesis
1145    // (search_query was used for retrieval but original_question is shown to user)
1146    response.question = original_question;
1147
1148    // Apply cross-encoder reranking for better precision on preference/personalization queries
1149    // This is especially important for questions like "What should I..." where semantic
1150    // similarity doesn't capture personal relevance well.
1151    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
1152    // Skip for temporal/recency queries - cross-encoder doesn't understand temporal context
1153    // and would override the recency boost from lexical search
1154    let is_temporal_query = {
1155        let q_lower = response.question.to_lowercase();
1156        q_lower.contains("current")
1157            || q_lower.contains("latest")
1158            || q_lower.contains("recent")
1159            || q_lower.contains("now")
1160            || q_lower.contains("today")
1161            || q_lower.contains("updated")
1162            || q_lower.contains("new ")
1163            || q_lower.contains("newest")
1164    };
1165    if !args.no_rerank
1166        && !response.retrieval.hits.is_empty()
1167        && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1168        && !is_temporal_query
1169    {
1170        // Create a temporary SearchResponse for reranking
1171        let mut search_response = SearchResponse {
1172            query: response.question.clone(),
1173            hits: response.retrieval.hits.clone(),
1174            total_hits: response.retrieval.hits.len(),
1175            params: memvid_core::SearchParams {
1176                top_k: args.top_k,
1177                snippet_chars: args.snippet_chars,
1178                cursor: None,
1179            },
1180            elapsed_ms: 0,
1181            engine: memvid_core::SearchEngineKind::Hybrid,
1182            next_cursor: None,
1183            context: String::new(),
1184        };
1185
1186        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1187            warn!("Cross-encoder reranking failed: {e}");
1188        } else {
1189            // Update the response hits with reranked order
1190            response.retrieval.hits = search_response.hits;
1191            // Rebuild context from reranked hits
1192            response.retrieval.context = response
1193                .retrieval
1194                .hits
1195                .iter()
1196                .take(10) // Use top-10 for context
1197                .map(|hit| hit.text.as_str())
1198                .collect::<Vec<_>>()
1199                .join("\n\n---\n\n");
1200        }
1201    }
1202
1203    // Inject memory cards into context if --memories flag is set
1204    if args.memories {
1205        let memory_context = build_memory_context(&mem);
1206        if !memory_context.is_empty() {
1207            // Prepend memory context to retrieval context
1208            response.retrieval.context = format!(
1209                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1210                memory_context, response.retrieval.context
1211            );
1212        }
1213    }
1214
1215    // Inject entity context from Logic-Mesh if entities were found in search hits
1216    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1217    if !entity_context.is_empty() {
1218        // Prepend entity context to retrieval context
1219        response.retrieval.context = format!(
1220            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1221            entity_context, response.retrieval.context
1222        );
1223    }
1224
1225    // Apply PII masking if requested
1226    if args.mask_pii {
1227        use memvid_core::pii::mask_pii;
1228
1229        // Mask the aggregated context
1230        response.retrieval.context = mask_pii(&response.retrieval.context);
1231
1232        // Mask text in each hit
1233        for hit in &mut response.retrieval.hits {
1234            hit.text = mask_pii(&hit.text);
1235            if let Some(chunk_text) = &hit.chunk_text {
1236                hit.chunk_text = Some(mask_pii(chunk_text));
1237            }
1238        }
1239    }
1240
1241    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1242
1243    let mut model_result: Option<ModelInference> = None;
1244    if args.no_llm {
1245        // --no-llm: return verbatim evidence without LLM synthesis
1246        if args.use_model.is_some() {
1247            warn!("--use-model ignored because --no-llm disables LLM synthesis");
1248        }
1249        if args.json {
1250            emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1251        } else {
1252            emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1253        }
1254
1255        // Save active replay session if one exists
1256        #[cfg(feature = "replay")]
1257        let _ = mem.save_active_session();
1258
1259        return Ok(());
1260    } else if response.context_only {
1261        if args.use_model.is_some() {
1262            warn!("--use-model ignored because --context-only disables synthesis");
1263        }
1264    } else if let Some(model_name) = args.use_model.as_deref() {
1265        match run_model_inference(
1266            model_name,
1267            &response.question,
1268            &response.retrieval.context,
1269            &response.retrieval.hits,
1270            llm_context_override,
1271            None,
1272            args.system_prompt.as_deref(),
1273        ) {
1274            Ok(inference) => {
1275                response.answer = Some(inference.answer.answer.clone());
1276                response.retrieval.context = inference.context_body.clone();
1277                apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1278                model_result = Some(inference);
1279            }
1280            Err(err) => {
1281                warn!(
1282                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1283                    model_name
1284                );
1285            }
1286        }
1287    }
1288
1289    // Record the ask action if a replay session is active
1290    #[cfg(feature = "replay")]
1291    if let Some(ref inference) = model_result {
1292        if let Some(model_name) = args.use_model.as_deref() {
1293            // Extract frame IDs from retrieval hits for replay audit
1294            let retrieved_frames: Vec<u64> = response
1295                .retrieval
1296                .hits
1297                .iter()
1298                .map(|hit| hit.frame_id)
1299                .collect();
1300
1301            mem.record_ask_action(
1302                &response.question,
1303                model_name, // provider
1304                model_name, // model
1305                inference.answer.answer.as_bytes(),
1306                0, // duration_ms not tracked at this level
1307                retrieved_frames,
1308            );
1309        }
1310    }
1311
1312    if args.json {
1313        if let Some(model_name) = args.use_model.as_deref() {
1314            emit_model_json(
1315                &response,
1316                model_name,
1317                model_result.as_ref(),
1318                args.sources,
1319                &mut mem,
1320            )?;
1321        } else {
1322            emit_ask_json(
1323                &response,
1324                effective_mode.clone(),
1325                model_result.as_ref(),
1326                args.sources,
1327                &mut mem,
1328            )?;
1329        }
1330    } else {
1331        emit_ask_pretty(
1332            &response,
1333            effective_mode.clone(),
1334            model_result.as_ref(),
1335            args.sources,
1336            &mut mem,
1337        );
1338    }
1339
1340    // Save active replay session if one exists
1341    #[cfg(feature = "replay")]
1342    let _ = mem.save_active_session();
1343
1344    Ok(())
1345}
1346
1347/// Handle graph-aware find with --graph or --hybrid flags
1348fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1349    use memvid_core::graph_search::{QueryPlanner, hybrid_search};
1350    use memvid_core::types::QueryPlan;
1351
1352    let planner = QueryPlanner::new();
1353
1354    // Create query plan based on mode
1355    let plan = if args.graph {
1356        // Pure graph mode - let planner detect patterns
1357        let plan = planner.plan(&args.query, args.top_k);
1358        // If it's a hybrid plan from auto-detection, convert to graph-only
1359        match plan {
1360            QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1361                QueryPlan::graph_only(graph_filter, args.top_k)
1362            }
1363            _ => plan,
1364        }
1365    } else {
1366        // Hybrid mode - use the auto-detected plan
1367        planner.plan(&args.query, args.top_k)
1368    };
1369
1370    // Execute the search
1371    let hits = hybrid_search(mem, &plan)?;
1372
1373    if args.json {
1374        // JSON output
1375        let output = serde_json::json!({
1376            "query": args.query,
1377            "mode": if args.graph { "graph" } else { "hybrid" },
1378            "plan": format!("{:?}", plan),
1379            "hits": hits.iter().map(|h| {
1380                serde_json::json!({
1381                    "frame_id": h.frame_id,
1382                    "score": h.score,
1383                    "graph_score": h.graph_score,
1384                    "vector_score": h.vector_score,
1385                    "matched_entity": h.matched_entity,
1386                    "preview": h.preview,
1387                })
1388            }).collect::<Vec<_>>(),
1389        });
1390        println!("{}", serde_json::to_string_pretty(&output)?);
1391    } else {
1392        // Human-readable output
1393        let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1394        println!("{} search for: \"{}\"", mode_str, args.query);
1395        println!("Plan: {:?}", plan);
1396        println!();
1397
1398        if hits.is_empty() {
1399            println!("No results found.");
1400        } else {
1401            println!("Results ({} hits):", hits.len());
1402            for (i, hit) in hits.iter().enumerate() {
1403                println!();
1404                println!(
1405                    "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1406                    i + 1,
1407                    hit.frame_id,
1408                    hit.score,
1409                    hit.graph_score,
1410                    hit.vector_score
1411                );
1412                if let Some(entity) = &hit.matched_entity {
1413                    println!("   Matched entity: {}", entity);
1414                }
1415                if let Some(preview) = &hit.preview {
1416                    let truncated = if preview.len() > 200 {
1417                        format!("{}...", &preview[..200])
1418                    } else {
1419                        preview.clone()
1420                    };
1421                    println!("   {}", truncated.replace('\n', " "));
1422                }
1423            }
1424        }
1425    }
1426
1427    Ok(())
1428}
1429
1430pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1431    // Check if plan allows query operations (blocks expired subscriptions)
1432    crate::utils::require_active_plan(config, "find")?;
1433
1434    // Track query usage against plan quota
1435    crate::api::track_query_usage(config, 1)?;
1436
1437    let mut mem = open_read_only_mem(&args.file)?;
1438
1439    // Load active replay session if one exists
1440    #[cfg(feature = "replay")]
1441    let _ = mem.load_active_session();
1442
1443    // Handle graph-aware and hybrid search modes
1444    if args.graph || args.hybrid {
1445        return handle_graph_find(&mut mem, &args);
1446    }
1447
1448    if args.uri.is_some() && args.scope.is_some() {
1449        warn!("--scope ignored because --uri is provided");
1450    }
1451
1452    // Get vector dimension from MV2 for auto-detection
1453    let mv2_dimension = mem.effective_vec_index_dimension()?;
1454    let identity_summary = match args.mode {
1455        SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1456        #[cfg(feature = "clip")]
1457        SearchMode::Clip => None,
1458        SearchMode::Lex => None,
1459    };
1460
1461    let mut semantic_allowed = true;
1462    let inferred_model_override = match identity_summary.as_ref() {
1463        Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1464            identity.model.as_deref().map(|value| value.to_string())
1465        }
1466        Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1467            let models: Vec<_> = identities
1468                .iter()
1469                .filter_map(|entry| entry.identity.model.as_deref())
1470                .collect();
1471            if args.mode == SearchMode::Sem {
1472                anyhow::bail!(
1473                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1474                    Detected models: {:?}\n\n\
1475                    Suggested fix: split into separate memories per embedding model.",
1476                    models
1477                );
1478            }
1479            warn!(
1480                "semantic search disabled: mixed embedding models detected: {:?}",
1481                models
1482            );
1483            semantic_allowed = false;
1484            None
1485        }
1486        _ => None,
1487    };
1488
1489    let emb_model_override = args
1490        .query_embedding_model
1491        .as_deref()
1492        .or(inferred_model_override.as_deref());
1493
1494    let (mode_label, runtime_option) = match args.mode {
1495        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1496        SearchMode::Sem => {
1497            let runtime =
1498                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1499            ("Semantic (vector search)".to_string(), Some(runtime))
1500        }
1501        SearchMode::Auto => {
1502            if !semantic_allowed {
1503                ("Lexical (semantic unsafe)".to_string(), None)
1504            } else if let Some(runtime) =
1505                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1506            {
1507                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1508            } else {
1509                ("Lexical (semantic unavailable)".to_string(), None)
1510            }
1511        }
1512        #[cfg(feature = "clip")]
1513        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1514    };
1515
1516    let mode_key = match args.mode {
1517        SearchMode::Sem => "semantic",
1518        SearchMode::Lex => "text",
1519        SearchMode::Auto => {
1520            if runtime_option.is_some() {
1521                "hybrid"
1522            } else {
1523                "text"
1524            }
1525        }
1526        #[cfg(feature = "clip")]
1527        SearchMode::Clip => "clip",
1528    };
1529
1530    // For CLIP mode, use CLIP visual search
1531    #[cfg(feature = "clip")]
1532    if args.mode == SearchMode::Clip {
1533        use memvid_core::clip::{ClipConfig, ClipModel};
1534
1535        // Initialize CLIP model
1536        let config = ClipConfig::default();
1537        let clip = ClipModel::new(config).map_err(|e| {
1538            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1539        })?;
1540
1541        // Encode query text
1542        let query_embedding = clip
1543            .encode_text(&args.query)
1544            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1545
1546        // Search CLIP index
1547        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1548
1549        // Debug distances before filtering
1550        for hit in &hits {
1551            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1552                tracing::debug!(
1553                    frame_id = hit.frame_id,
1554                    title = %frame.title.unwrap_or_default(),
1555                    page = hit.page,
1556                    distance = hit.distance,
1557                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1558                    "CLIP raw hit"
1559                );
1560            } else {
1561                tracing::debug!(
1562                    frame_id = hit.frame_id,
1563                    page = hit.page,
1564                    distance = hit.distance,
1565                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1566                    "CLIP raw hit (missing frame)"
1567                );
1568            }
1569        }
1570
1571        // CLIP distance threshold for filtering poor matches
1572        // CLIP uses L2 distance on normalized embeddings:
1573        //   - distance² = 2(1 - cosine_similarity)
1574        //   - distance = 0 → identical (cosine_sim = 1)
1575        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1576        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1577        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1578        //   - distance = 2.0 → opposite (cosine_sim = -1)
1579        //
1580        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1581        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1582        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1583        const CLIP_MAX_DISTANCE: f32 = 1.26;
1584
1585        // Convert CLIP hits to SearchResponse format, filtering by threshold
1586        let search_hits: Vec<SearchHit> = hits
1587            .into_iter()
1588            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1589            .enumerate()
1590            .filter_map(|(rank, hit)| {
1591                // Convert L2 distance to cosine similarity for display
1592                // cos_sim = 1 - (distance² / 2)
1593                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1594
1595                // Get frame preview for snippet
1596                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1597                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1598                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1599                let title = match (base_title, hit.page) {
1600                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1601                    (Some(t), None) => Some(t),
1602                    (None, Some(p)) => Some(format!("Page {p}")),
1603                    _ => None,
1604                };
1605                Some(SearchHit {
1606                    rank: rank + 1,
1607                    frame_id: hit.frame_id,
1608                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1609                    title,
1610                    text: preview.clone(),
1611                    chunk_text: Some(preview),
1612                    range: (0, 0),
1613                    chunk_range: None,
1614                    matches: 0,
1615                    score: Some(cosine_similarity),
1616                    metadata: None,
1617                })
1618            })
1619            .collect();
1620
1621        let response = SearchResponse {
1622            query: args.query.clone(),
1623            hits: search_hits.clone(),
1624            total_hits: search_hits.len(),
1625            params: memvid_core::SearchParams {
1626                top_k: args.top_k,
1627                snippet_chars: args.snippet_chars,
1628                cursor: args.cursor.clone(),
1629            },
1630            elapsed_ms: 0,
1631            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1632            next_cursor: None,
1633            context: String::new(),
1634        };
1635
1636        if args.json_legacy {
1637            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1638            emit_legacy_search_json(&response)?;
1639        } else if args.json {
1640            emit_search_json(&response, mode_key)?;
1641        } else {
1642            println!(
1643                "mode: {}   k={}   time: {} ms",
1644                mode_label, response.params.top_k, response.elapsed_ms
1645            );
1646            println!("engine: clip (MobileCLIP-S2)");
1647            println!(
1648                "hits: {} (showing {})",
1649                response.total_hits,
1650                response.hits.len()
1651            );
1652            emit_search_table(&response);
1653        }
1654        return Ok(());
1655    }
1656
1657    // For semantic mode, use pure vector search.
1658    let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1659        let runtime = runtime_option
1660            .as_ref()
1661            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1662
1663        // Embed the query
1664        let query_embedding = runtime.embed_query(&args.query)?;
1665
1666        // Use pure vector search (adaptive by default, use --no-adaptive to disable)
1667        let scope = args.scope.as_deref().or(args.uri.as_deref());
1668
1669        if !args.no_adaptive {
1670            // Build adaptive config from CLI args
1671            let strategy = match args.adaptive_strategy {
1672                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1673                    min_ratio: args.min_relevancy,
1674                },
1675                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1676                    min_score: args.min_relevancy,
1677                },
1678                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1679                    max_drop_ratio: 0.35, // 35% drop triggers cutoff
1680                },
1681                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1682                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1683                    relative_threshold: args.min_relevancy,
1684                    max_drop_ratio: 0.35,
1685                    absolute_min: 0.3,
1686                },
1687            };
1688
1689            let config = AdaptiveConfig {
1690                enabled: true,
1691                max_results: args.max_k,
1692                min_results: 1,
1693                strategy,
1694                normalize_scores: true,
1695            };
1696
1697            match mem.search_adaptive(
1698                &args.query,
1699                &query_embedding,
1700                config,
1701                args.snippet_chars,
1702                scope,
1703            ) {
1704                Ok(result) => {
1705                    let mut resp = SearchResponse {
1706                        query: args.query.clone(),
1707                        hits: result.results,
1708                        total_hits: result.stats.returned,
1709                        params: memvid_core::SearchParams {
1710                            top_k: result.stats.returned,
1711                            snippet_chars: args.snippet_chars,
1712                            cursor: args.cursor.clone(),
1713                        },
1714                        elapsed_ms: 0,
1715                        engine: SearchEngineKind::Hybrid,
1716                        next_cursor: None,
1717                        context: String::new(),
1718                    };
1719                    apply_preference_rerank(&mut resp);
1720                    (
1721                        resp,
1722                        "semantic (adaptive vector search)".to_string(),
1723                        Some(result.stats),
1724                    )
1725                }
1726                Err(e) => {
1727                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1728                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1729                    }
1730
1731                    warn!("Adaptive search failed ({e}), falling back to fixed-k");
1732                    match mem.vec_search_with_embedding(
1733                        &args.query,
1734                        &query_embedding,
1735                        args.top_k,
1736                        args.snippet_chars,
1737                        scope,
1738                    ) {
1739                        Ok(mut resp) => {
1740                            apply_preference_rerank(&mut resp);
1741                            (resp, "semantic (vector search fallback)".to_string(), None)
1742                        }
1743                        Err(e2) => {
1744                            if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1745                                return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1746                            }
1747                            return Err(anyhow!(
1748                                "Both adaptive and fixed-k search failed: {e}, {e2}"
1749                            ));
1750                        }
1751                    }
1752                }
1753            }
1754        } else {
1755            // Standard fixed-k vector search
1756            match mem.vec_search_with_embedding(
1757                &args.query,
1758                &query_embedding,
1759                args.top_k,
1760                args.snippet_chars,
1761                scope,
1762            ) {
1763                Ok(mut resp) => {
1764                    // Apply preference boost to rerank results for preference-seeking queries
1765                    apply_preference_rerank(&mut resp);
1766                    (resp, "semantic (vector search)".to_string(), None)
1767                }
1768                Err(e) => {
1769                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1770                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1771                    }
1772
1773                    // Fall back to lexical search + rerank if vector search fails
1774                    warn!("Vector search failed ({e}), falling back to lexical + rerank");
1775                    let request = SearchRequest {
1776                        query: args.query.clone(),
1777                        top_k: args.top_k,
1778                        snippet_chars: args.snippet_chars,
1779                        uri: args.uri.clone(),
1780                        scope: args.scope.clone(),
1781                        cursor: args.cursor.clone(),
1782                        #[cfg(feature = "temporal_track")]
1783                        temporal: None,
1784                        as_of_frame: args.as_of_frame,
1785                        as_of_ts: args.as_of_ts,
1786                        no_sketch: args.no_sketch,
1787                        acl_context: None,
1788                        acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1789                    };
1790                    let mut resp = mem.search(request)?;
1791                    apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1792                    (resp, "semantic (fallback rerank)".to_string(), None)
1793                }
1794            }
1795        }
1796    } else {
1797        // For lexical and auto modes, use existing behavior
1798        let request = SearchRequest {
1799            query: args.query.clone(),
1800            top_k: args.top_k,
1801            snippet_chars: args.snippet_chars,
1802            uri: args.uri.clone(),
1803            scope: args.scope.clone(),
1804            cursor: args.cursor.clone(),
1805            #[cfg(feature = "temporal_track")]
1806            temporal: None,
1807            as_of_frame: args.as_of_frame,
1808            as_of_ts: args.as_of_ts,
1809            no_sketch: args.no_sketch,
1810            acl_context: None,
1811            acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1812        };
1813
1814        let mut resp = mem.search(request)?;
1815
1816        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1817            warn!("Search index unavailable; returning basic text results");
1818        }
1819
1820        let mut engine_label = match resp.engine {
1821            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1822            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1823            SearchEngineKind::Hybrid => "hybrid".to_string(),
1824        };
1825
1826        if runtime_option.is_some() {
1827            engine_label = format!("hybrid ({engine_label} + semantic)");
1828        }
1829
1830        if let Some(ref runtime) = runtime_option {
1831            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1832        }
1833
1834        (resp, engine_label, None)
1835    };
1836
1837    if args.json_legacy {
1838        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1839        emit_legacy_search_json(&response)?;
1840    } else if args.json {
1841        emit_search_json(&response, mode_key)?;
1842    } else {
1843        println!(
1844            "mode: {}   k={}   time: {} ms",
1845            mode_label, response.params.top_k, response.elapsed_ms
1846        );
1847        println!("engine: {}", engine_label);
1848
1849        // Show adaptive retrieval stats if enabled
1850        if let Some(ref stats) = adaptive_stats {
1851            println!(
1852                "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1853                stats.total_considered,
1854                stats.returned,
1855                stats.triggered_by,
1856                stats.top_score.unwrap_or(0.0),
1857                stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1858            );
1859        }
1860
1861        println!(
1862            "hits: {} (showing {})",
1863            response.total_hits,
1864            response.hits.len()
1865        );
1866        emit_search_table(&response);
1867    }
1868
1869    // Save active replay session if one exists
1870    #[cfg(feature = "replay")]
1871    let _ = mem.save_active_session();
1872
1873    Ok(())
1874}
1875
1876pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1877    // Track query usage against plan quota
1878    crate::api::track_query_usage(config, 1)?;
1879
1880    let mut mem = open_read_only_mem(&args.file)?;
1881    let vector = if let Some(path) = args.embedding.as_deref() {
1882        read_embedding(path)?
1883    } else if let Some(vector_string) = &args.vector {
1884        parse_vector(vector_string)?
1885    } else {
1886        anyhow::bail!("provide --vector or --embedding for search input");
1887    };
1888
1889    let hits = mem
1890        .search_vec(&vector, args.limit)
1891        .map_err(|err| match err {
1892            MemvidError::VecDimensionMismatch { expected, actual } => {
1893                anyhow!(vec_dimension_mismatch_help(expected, actual))
1894            }
1895            other => anyhow!(other),
1896        })?;
1897    let mut enriched = Vec::with_capacity(hits.len());
1898    for hit in hits {
1899        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1900        enriched.push((hit.frame_id, hit.distance, preview));
1901    }
1902
1903    if args.json {
1904        let json_hits: Vec<_> = enriched
1905            .iter()
1906            .map(|(frame_id, distance, preview)| {
1907                json!({
1908                    "frame_id": frame_id,
1909                    "distance": distance,
1910                    "preview": preview,
1911                })
1912            })
1913            .collect();
1914        let json_str = serde_json::to_string_pretty(&json_hits)?;
1915        println!("{}", json_str.to_colored_json_auto()?);
1916    } else if enriched.is_empty() {
1917        println!("No vector matches found");
1918    } else {
1919        for (frame_id, distance, preview) in enriched {
1920            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1921        }
1922    }
1923    Ok(())
1924}
1925
1926pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1927    use memvid_core::AuditOptions;
1928    use std::fs::File;
1929    use std::io::Write;
1930
1931    let mut mem = Memvid::open(&args.file)?;
1932
1933    // Parse date boundaries
1934    let start = parse_date_boundary(args.start.as_ref(), false)?;
1935    let end = parse_date_boundary(args.end.as_ref(), true)?;
1936    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1937        if end_ts < start_ts {
1938            anyhow::bail!("--end must not be earlier than --start");
1939        }
1940    }
1941
1942    // Set up embedding runtime if needed
1943    let ask_mode: AskMode = args.mode.into();
1944    let runtime = match args.mode {
1945        AskModeArg::Lex => None,
1946        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1947        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1948    };
1949    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1950
1951    // Build audit options
1952    let options = AuditOptions {
1953        top_k: Some(args.top_k),
1954        snippet_chars: Some(args.snippet_chars),
1955        mode: Some(ask_mode),
1956        scope: args.scope,
1957        start,
1958        end,
1959        include_snippets: true,
1960    };
1961
1962    // Run the audit
1963    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1964
1965    // If --use-model is provided, run model inference to synthesize the answer
1966    if let Some(model_name) = args.use_model.as_deref() {
1967        // Build context from sources for model inference
1968        let context = report
1969            .sources
1970            .iter()
1971            .filter_map(|s| s.snippet.clone())
1972            .collect::<Vec<_>>()
1973            .join("\n\n");
1974
1975        match run_model_inference(
1976            model_name,
1977            &report.question,
1978            &context,
1979            &[], // No hits needed for audit
1980            None,
1981            None,
1982            None, // No system prompt override for audit
1983        ) {
1984            Ok(inference) => {
1985                report.answer = Some(inference.answer.answer);
1986                report.notes.push(format!(
1987                    "Answer synthesized by model: {}",
1988                    inference.answer.model
1989                ));
1990            }
1991            Err(err) => {
1992                warn!(
1993                    "model inference unavailable for '{}': {err}. Using default answer.",
1994                    model_name
1995                );
1996            }
1997        }
1998    }
1999
2000    // Format the output
2001    let output = match args.format {
2002        AuditFormat::Text => report.to_text(),
2003        AuditFormat::Markdown => report.to_markdown(),
2004        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
2005    };
2006
2007    // Write output
2008    if let Some(out_path) = args.out {
2009        let mut file = File::create(&out_path)?;
2010        file.write_all(output.as_bytes())?;
2011        println!("Audit report written to: {}", out_path.display());
2012    } else {
2013        println!("{}", output);
2014    }
2015
2016    Ok(())
2017}
2018
2019fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
2020    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
2021
2022    let mut additional_params = serde_json::Map::new();
2023    if let Some(cursor) = &response.params.cursor {
2024        additional_params.insert("cursor".into(), json!(cursor));
2025    }
2026
2027    let mut params = serde_json::Map::new();
2028    params.insert("top_k".into(), json!(response.params.top_k));
2029    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2030    params.insert("mode".into(), json!(mode));
2031    params.insert(
2032        "additional_params".into(),
2033        serde_json::Value::Object(additional_params),
2034    );
2035
2036    let mut metadata_json = serde_json::Map::new();
2037    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2038    metadata_json.insert("total_hits".into(), json!(response.total_hits));
2039    metadata_json.insert(
2040        "next_cursor".into(),
2041        match &response.next_cursor {
2042            Some(cursor) => json!(cursor),
2043            None => serde_json::Value::Null,
2044        },
2045    );
2046    metadata_json.insert("engine".into(), json!(response.engine));
2047    metadata_json.insert("params".into(), serde_json::Value::Object(params));
2048
2049    let body = json!({
2050        "version": "mv2.result.v2",
2051        "query": response.query,
2052        "metadata": metadata_json,
2053        "hits": hits,
2054        "context": response.context,
2055    });
2056    let json_str = serde_json::to_string_pretty(&body)?;
2057    println!("{}", json_str.to_colored_json_auto()?);
2058    Ok(())
2059}
2060
2061fn emit_ask_json(
2062    response: &AskResponse,
2063    requested_mode: AskModeArg,
2064    inference: Option<&ModelInference>,
2065    include_sources: bool,
2066    mem: &mut Memvid,
2067) -> Result<()> {
2068    let hits: Vec<_> = response
2069        .retrieval
2070        .hits
2071        .iter()
2072        .map(search_hit_to_json)
2073        .collect();
2074
2075    let citations: Vec<_> = response
2076        .citations
2077        .iter()
2078        .map(|citation| {
2079            let mut map = serde_json::Map::new();
2080            map.insert("index".into(), json!(citation.index));
2081            map.insert("frame_id".into(), json!(citation.frame_id));
2082            map.insert("uri".into(), json!(citation.uri));
2083            if let Some(range) = citation.chunk_range {
2084                map.insert("chunk_range".into(), json!([range.0, range.1]));
2085            }
2086            if let Some(score) = citation.score {
2087                map.insert("score".into(), json!(score));
2088            }
2089            serde_json::Value::Object(map)
2090        })
2091        .collect();
2092
2093    let mut body = json!({
2094        "version": "mv2.ask.v1",
2095        "question": response.question,
2096        "answer": response.answer,
2097        "context_only": response.context_only,
2098        "mode": ask_mode_display(requested_mode),
2099        "retriever": ask_retriever_display(response.retriever),
2100        "top_k": response.retrieval.params.top_k,
2101        "results": hits,
2102        "citations": citations,
2103        "stats": {
2104            "retrieval_ms": response.stats.retrieval_ms,
2105            "synthesis_ms": response.stats.synthesis_ms,
2106            "latency_ms": response.stats.latency_ms,
2107        },
2108        "engine": search_engine_label(&response.retrieval.engine),
2109        "total_hits": response.retrieval.total_hits,
2110        "next_cursor": response.retrieval.next_cursor,
2111        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2112    });
2113
2114    if let Some(inf) = inference {
2115        let model = &inf.answer;
2116        if let serde_json::Value::Object(ref mut map) = body {
2117            map.insert("model".into(), json!(model.requested));
2118            if model.model != model.requested {
2119                map.insert("model_used".into(), json!(model.model));
2120            }
2121            map.insert("cached".into(), json!(inf.cached));
2122            // Add usage and cost if available
2123            if let Some(usage) = &inf.usage {
2124                map.insert(
2125                    "usage".into(),
2126                    json!({
2127                        "input_tokens": usage.input_tokens,
2128                        "output_tokens": usage.output_tokens,
2129                        "total_tokens": usage.total_tokens,
2130                        "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2131                        "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2132                    }),
2133                );
2134            }
2135            // Add grounding/hallucination score if available
2136            if let Some(grounding) = &inf.grounding {
2137                map.insert(
2138                    "grounding".into(),
2139                    json!({
2140                        "score": grounding.score,
2141                        "label": grounding.label(),
2142                        "sentence_count": grounding.sentence_count,
2143                        "grounded_sentences": grounding.grounded_sentences,
2144                        "has_warning": grounding.has_warning,
2145                        "warning_reason": grounding.warning_reason,
2146                    }),
2147                );
2148            }
2149        }
2150    }
2151
2152    // Add detailed sources if requested
2153    if include_sources {
2154        if let serde_json::Value::Object(ref mut map) = body {
2155            let sources = build_sources_json(response, mem);
2156            map.insert("sources".into(), json!(sources));
2157        }
2158    }
2159
2160    // Add follow-up suggestions if confidence is low
2161    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2162        if let serde_json::Value::Object(ref mut map) = body {
2163            map.insert("follow_up".into(), follow_up);
2164        }
2165    }
2166
2167    println!("{}", serde_json::to_string_pretty(&body)?);
2168    Ok(())
2169}
2170
2171fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2172    response
2173        .citations
2174        .iter()
2175        .enumerate()
2176        .map(|(idx, citation)| {
2177            let mut source = serde_json::Map::new();
2178            source.insert("index".into(), json!(idx + 1));
2179            source.insert("frame_id".into(), json!(citation.frame_id));
2180            source.insert("uri".into(), json!(citation.uri));
2181
2182            if let Some(range) = citation.chunk_range {
2183                source.insert("chunk_range".into(), json!([range.0, range.1]));
2184            }
2185            if let Some(score) = citation.score {
2186                source.insert("score".into(), json!(score));
2187            }
2188
2189            // Get frame metadata for rich source information
2190            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2191                if let Some(title) = frame.title {
2192                    source.insert("title".into(), json!(title));
2193                }
2194                if !frame.tags.is_empty() {
2195                    source.insert("tags".into(), json!(frame.tags));
2196                }
2197                if !frame.labels.is_empty() {
2198                    source.insert("labels".into(), json!(frame.labels));
2199                }
2200                source.insert("frame_timestamp".into(), json!(frame.timestamp));
2201                if !frame.content_dates.is_empty() {
2202                    source.insert("content_dates".into(), json!(frame.content_dates));
2203                }
2204            }
2205
2206            // Get snippet from hit
2207            if let Some(hit) = response
2208                .retrieval
2209                .hits
2210                .iter()
2211                .find(|h| h.frame_id == citation.frame_id)
2212            {
2213                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2214                source.insert("snippet".into(), json!(snippet));
2215            }
2216
2217            serde_json::Value::Object(source)
2218        })
2219        .collect()
2220}
2221
2222/// Build follow-up suggestions when the answer has low grounding/confidence.
2223/// Helps users understand what the memory contains and suggests relevant questions.
2224fn build_follow_up_suggestions(
2225    response: &AskResponse,
2226    inference: Option<&ModelInference>,
2227    mem: &mut Memvid,
2228) -> Option<serde_json::Value> {
2229    // Check if we need follow-up suggestions
2230    let needs_followup = inference
2231        .and_then(|inf| inf.grounding.as_ref())
2232        .map(|g| g.score < 0.3 || g.has_warning)
2233        .unwrap_or(false);
2234
2235    // Also trigger if retrieval hits have very low scores or no hits
2236    let low_retrieval = response
2237        .retrieval
2238        .hits
2239        .first()
2240        .and_then(|h| h.score)
2241        .map(|score| score < -2.0)
2242        .unwrap_or(true);
2243
2244    if !needs_followup && !low_retrieval {
2245        return None;
2246    }
2247
2248    // Get available topics from the memory by sampling timeline entries
2249    let limit = std::num::NonZeroU64::new(20).unwrap();
2250    let timeline_query = TimelineQueryBuilder::default().limit(limit).build();
2251
2252    let available_topics: Vec<String> = mem
2253        .timeline(timeline_query)
2254        .ok()
2255        .map(|entries| {
2256            entries
2257                .iter()
2258                .filter_map(|e| {
2259                    // Extract meaningful preview/title
2260                    let preview = e.preview.trim();
2261                    if preview.is_empty() || preview.len() < 5 {
2262                        return None;
2263                    }
2264                    // Get first line or truncate
2265                    let first_line = preview.lines().next().unwrap_or(preview);
2266                    if first_line.len() > 60 {
2267                        Some(format!("{}...", &first_line[..57]))
2268                    } else {
2269                        Some(first_line.to_string())
2270                    }
2271                })
2272                .collect::<std::collections::HashSet<_>>()
2273                .into_iter()
2274                .take(5)
2275                .collect()
2276        })
2277        .unwrap_or_default();
2278
2279    // Determine the reason for low confidence
2280    let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2281        "No relevant information found in memory"
2282    } else if inference
2283        .and_then(|i| i.grounding.as_ref())
2284        .map(|g| g.has_warning)
2285        .unwrap_or(false)
2286    {
2287        "Answer may not be well-supported by the available context"
2288    } else {
2289        "Low confidence in the answer"
2290    };
2291
2292    // Generate suggestion questions based on available topics
2293    let suggestions: Vec<String> = if available_topics.is_empty() {
2294        vec![
2295            "What information is stored in this memory?".to_string(),
2296            "Can you list the main topics covered?".to_string(),
2297        ]
2298    } else {
2299        available_topics
2300            .iter()
2301            .take(3)
2302            .map(|topic| format!("Tell me about {}", topic))
2303            .chain(std::iter::once(
2304                "What topics are in this memory?".to_string(),
2305            ))
2306            .collect()
2307    };
2308
2309    Some(json!({
2310        "needed": true,
2311        "reason": reason,
2312        "hint": if available_topics.is_empty() {
2313            "This memory may not contain information about your query."
2314        } else {
2315            "This memory contains information about different topics. Try asking about those instead."
2316        },
2317        "available_topics": available_topics,
2318        "suggestions": suggestions
2319    }))
2320}
2321
2322fn emit_model_json(
2323    response: &AskResponse,
2324    requested_model: &str,
2325    inference: Option<&ModelInference>,
2326    include_sources: bool,
2327    mem: &mut Memvid,
2328) -> Result<()> {
2329    let answer = response.answer.clone().unwrap_or_default();
2330    let requested_label = inference
2331        .map(|m| m.answer.requested.clone())
2332        .unwrap_or_else(|| requested_model.to_string());
2333    let used_label = inference
2334        .map(|m| m.answer.model.clone())
2335        .unwrap_or_else(|| requested_model.to_string());
2336
2337    let mut body = json!({
2338        "question": response.question,
2339        "model": requested_label,
2340        "model_used": used_label,
2341        "answer": answer,
2342        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2343    });
2344
2345    // Add usage and cost if available
2346    if let Some(inf) = inference {
2347        if let serde_json::Value::Object(ref mut map) = body {
2348            map.insert("cached".into(), json!(inf.cached));
2349            if let Some(usage) = &inf.usage {
2350                map.insert(
2351                    "usage".into(),
2352                    json!({
2353                        "input_tokens": usage.input_tokens,
2354                        "output_tokens": usage.output_tokens,
2355                        "total_tokens": usage.total_tokens,
2356                        "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2357                        "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2358                    }),
2359                );
2360            }
2361            if let Some(grounding) = &inf.grounding {
2362                map.insert(
2363                    "grounding".into(),
2364                    json!({
2365                        "score": grounding.score,
2366                        "label": grounding.label(),
2367                        "sentence_count": grounding.sentence_count,
2368                        "grounded_sentences": grounding.grounded_sentences,
2369                        "has_warning": grounding.has_warning,
2370                        "warning_reason": grounding.warning_reason,
2371                    }),
2372                );
2373            }
2374        }
2375    }
2376
2377    // Add detailed sources if requested
2378    if include_sources {
2379        if let serde_json::Value::Object(ref mut map) = body {
2380            let sources = build_sources_json(response, mem);
2381            map.insert("sources".into(), json!(sources));
2382        }
2383    }
2384
2385    // Add follow-up suggestions if confidence is low
2386    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2387        if let serde_json::Value::Object(ref mut map) = body {
2388            map.insert("follow_up".into(), follow_up);
2389        }
2390    }
2391
2392    // Use colored JSON output
2393    let json_str = serde_json::to_string_pretty(&body)?;
2394    println!("{}", json_str.to_colored_json_auto()?);
2395    Ok(())
2396}
2397
2398fn emit_ask_pretty(
2399    response: &AskResponse,
2400    requested_mode: AskModeArg,
2401    inference: Option<&ModelInference>,
2402    include_sources: bool,
2403    mem: &mut Memvid,
2404) {
2405    println!(
2406        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
2407        ask_mode_pretty(requested_mode),
2408        ask_retriever_pretty(response.retriever),
2409        response.retrieval.params.top_k,
2410        response.stats.latency_ms,
2411        response.stats.retrieval_ms
2412    );
2413    if let Some(inference) = inference {
2414        let model = &inference.answer;
2415        let cached_label = if inference.cached { " [CACHED]" } else { "" };
2416        if model.requested.trim() == model.model {
2417            println!("model: {}{}", model.model, cached_label);
2418        } else {
2419            println!(
2420                "model requested: {}   model used: {}{}",
2421                model.requested, model.model, cached_label
2422            );
2423        }
2424        // Display usage and cost if available
2425        if let Some(usage) = &inference.usage {
2426            let cost_label = if inference.cached {
2427                format!("$0.00 (saved ${:.6})", usage.cost_usd)
2428            } else {
2429                format!("${:.6}", usage.cost_usd)
2430            };
2431            println!(
2432                "tokens: {} input + {} output = {}   cost: {}",
2433                usage.input_tokens, usage.output_tokens, usage.total_tokens, cost_label
2434            );
2435        }
2436        // Display grounding/hallucination score
2437        if let Some(grounding) = &inference.grounding {
2438            let warning = if grounding.has_warning {
2439                format!(
2440                    " [WARNING: {}]",
2441                    grounding
2442                        .warning_reason
2443                        .as_deref()
2444                        .unwrap_or("potential hallucination")
2445                )
2446            } else {
2447                String::new()
2448            };
2449            println!(
2450                "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2451                grounding.score * 100.0,
2452                grounding.label(),
2453                grounding.grounded_sentences,
2454                grounding.sentence_count,
2455                warning
2456            );
2457        }
2458    }
2459    println!(
2460        "engine: {}",
2461        search_engine_label(&response.retrieval.engine)
2462    );
2463    println!(
2464        "hits: {} (showing {})",
2465        response.retrieval.total_hits,
2466        response.retrieval.hits.len()
2467    );
2468
2469    if response.context_only {
2470        println!();
2471        println!("Context-only mode: synthesis disabled.");
2472        println!();
2473    } else if let Some(answer) = &response.answer {
2474        println!();
2475        println!("Answer:\n{answer}");
2476        println!();
2477    }
2478
2479    if !response.citations.is_empty() {
2480        println!("Citations:");
2481        for citation in &response.citations {
2482            match citation.score {
2483                Some(score) => println!(
2484                    "[{}] {} (frame {}, score {:.3})",
2485                    citation.index, citation.uri, citation.frame_id, score
2486                ),
2487                None => println!(
2488                    "[{}] {} (frame {})",
2489                    citation.index, citation.uri, citation.frame_id
2490                ),
2491            }
2492        }
2493        println!();
2494    }
2495
2496    // Print detailed sources if requested
2497    if include_sources && !response.citations.is_empty() {
2498        println!("=== SOURCES ===");
2499        println!();
2500        for citation in &response.citations {
2501            println!("[{}] {}", citation.index, citation.uri);
2502
2503            // Get frame metadata
2504            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2505                if let Some(title) = &frame.title {
2506                    println!("    Title: {}", title);
2507                }
2508                println!("    Frame ID: {}", citation.frame_id);
2509                if let Some(score) = citation.score {
2510                    println!("    Score: {:.4}", score);
2511                }
2512                if let Some((start, end)) = citation.chunk_range {
2513                    println!("    Range: [{}..{})", start, end);
2514                }
2515                if !frame.tags.is_empty() {
2516                    println!("    Tags: {}", frame.tags.join(", "));
2517                }
2518                if !frame.labels.is_empty() {
2519                    println!("    Labels: {}", frame.labels.join(", "));
2520                }
2521                println!("    Timestamp: {}", frame.timestamp);
2522                if !frame.content_dates.is_empty() {
2523                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2524                }
2525            }
2526
2527            // Get snippet from hit
2528            if let Some(hit) = response
2529                .retrieval
2530                .hits
2531                .iter()
2532                .find(|h| h.frame_id == citation.frame_id)
2533            {
2534                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2535                let truncated = if snippet.len() > 200 {
2536                    format!("{}...", &snippet[..200])
2537                } else {
2538                    snippet.clone()
2539                };
2540                println!("    Snippet: {}", truncated.replace('\n', " "));
2541            }
2542            println!();
2543        }
2544    }
2545
2546    if !include_sources {
2547        println!();
2548        emit_search_table(&response.retrieval);
2549    }
2550
2551    // Display follow-up suggestions if confidence is low
2552    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2553        if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2554            if needed {
2555                println!();
2556                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2557                println!("💡 FOLLOW-UP SUGGESTIONS");
2558                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2559
2560                if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2561                    println!("Reason: {}", reason);
2562                }
2563
2564                if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2565                    println!("Hint: {}", hint);
2566                }
2567
2568                if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2569                    if !topics.is_empty() {
2570                        println!();
2571                        println!("Available topics in this memory:");
2572                        for topic in topics.iter().filter_map(|t| t.as_str()) {
2573                            println!("  • {}", topic);
2574                        }
2575                    }
2576                }
2577
2578                if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2579                    if !suggestions.is_empty() {
2580                        println!();
2581                        println!("Try asking:");
2582                        for (i, suggestion) in
2583                            suggestions.iter().filter_map(|s| s.as_str()).enumerate()
2584                        {
2585                            println!("  {}. \"{}\"", i + 1, suggestion);
2586                        }
2587                    }
2588                }
2589                println!();
2590            }
2591        }
2592    }
2593}
2594
2595/// Emit verbatim evidence as JSON without LLM synthesis.
2596/// Format: {evidence: [{source, text, score}], question, hits, stats}
2597fn emit_verbatim_evidence_json(
2598    response: &AskResponse,
2599    include_sources: bool,
2600    mem: &mut Memvid,
2601) -> Result<()> {
2602    // Build evidence array from hits - verbatim excerpts with citations
2603    let evidence: Vec<_> = response
2604        .retrieval
2605        .hits
2606        .iter()
2607        .enumerate()
2608        .map(|(idx, hit)| {
2609            let mut entry = serde_json::Map::new();
2610            entry.insert("index".into(), json!(idx + 1));
2611            entry.insert("frame_id".into(), json!(hit.frame_id));
2612            entry.insert("uri".into(), json!(&hit.uri));
2613            if let Some(title) = &hit.title {
2614                entry.insert("title".into(), json!(title));
2615            }
2616            // Use chunk_text if available (more specific), otherwise full text
2617            let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2618            entry.insert("text".into(), json!(verbatim));
2619            if let Some(score) = hit.score {
2620                entry.insert("score".into(), json!(score));
2621            }
2622            serde_json::Value::Object(entry)
2623        })
2624        .collect();
2625
2626    // Build sources array if requested
2627    let sources: Option<Vec<_>> = if include_sources {
2628        Some(
2629            response
2630                .retrieval
2631                .hits
2632                .iter()
2633                .filter_map(|hit| {
2634                    mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2635                        let mut source = serde_json::Map::new();
2636                        source.insert("frame_id".into(), json!(frame.id));
2637                        source.insert(
2638                            "uri".into(),
2639                            json!(frame.uri.as_deref().unwrap_or("(unknown)")),
2640                        );
2641                        if let Some(title) = &frame.title {
2642                            source.insert("title".into(), json!(title));
2643                        }
2644                        source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2645                        if !frame.tags.is_empty() {
2646                            source.insert("tags".into(), json!(frame.tags));
2647                        }
2648                        if !frame.labels.is_empty() {
2649                            source.insert("labels".into(), json!(frame.labels));
2650                        }
2651                        serde_json::Value::Object(source)
2652                    })
2653                })
2654                .collect(),
2655        )
2656    } else {
2657        None
2658    };
2659
2660    let mut body = json!({
2661        "version": "mv2.evidence.v1",
2662        "mode": "verbatim",
2663        "question": response.question,
2664        "evidence": evidence,
2665        "evidence_count": evidence.len(),
2666        "total_hits": response.retrieval.total_hits,
2667        "stats": {
2668            "retrieval_ms": response.stats.retrieval_ms,
2669            "latency_ms": response.stats.latency_ms,
2670        },
2671        "engine": search_engine_label(&response.retrieval.engine),
2672    });
2673
2674    if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2675        map.insert("sources".into(), json!(sources));
2676    }
2677
2678    let json_str = serde_json::to_string_pretty(&body)?;
2679    println!("{}", json_str.to_colored_json_auto()?);
2680    Ok(())
2681}
2682
2683/// Emit verbatim evidence in human-readable format without LLM synthesis.
2684fn emit_verbatim_evidence_pretty(response: &AskResponse, include_sources: bool, mem: &mut Memvid) {
2685    println!(
2686        "mode: {}   latency: {} ms (retrieval {} ms)",
2687        "verbatim evidence".cyan(),
2688        response.stats.latency_ms,
2689        response.stats.retrieval_ms
2690    );
2691    println!(
2692        "engine: {}",
2693        search_engine_label(&response.retrieval.engine)
2694    );
2695    println!(
2696        "hits: {} (showing {})",
2697        response.retrieval.total_hits,
2698        response.retrieval.hits.len()
2699    );
2700    println!();
2701
2702    // Header
2703    println!("{}", "━".repeat(60));
2704    println!(
2705        "{}",
2706        format!(
2707            "VERBATIM EVIDENCE for: \"{}\"",
2708            truncate_with_ellipsis(&response.question, 40)
2709        )
2710        .bold()
2711    );
2712    println!("{}", "━".repeat(60));
2713    println!();
2714
2715    if response.retrieval.hits.is_empty() {
2716        println!("No evidence found.");
2717        return;
2718    }
2719
2720    // Calculate score range for normalization (BM25 scores can be negative)
2721    let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2722    let (min_score, max_score) = score_range(&scores);
2723
2724    // Display each piece of evidence with citation
2725    for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2726        let uri = &hit.uri;
2727        let title = hit.title.as_deref().unwrap_or("Untitled");
2728        let score_str = hit
2729            .score
2730            .map(|s| {
2731                let normalized = normalize_bm25_for_display(s, min_score, max_score);
2732                format!(" (relevance: {:.0}%)", normalized)
2733            })
2734            .unwrap_or_default();
2735
2736        println!(
2737            "{}",
2738            format!("[{}] {}{}", idx + 1, title, score_str)
2739                .green()
2740                .bold()
2741        );
2742        println!("    Source: {} (frame {})", uri, hit.frame_id);
2743        println!();
2744
2745        // Show verbatim text - prefer chunk_text if available
2746        let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2747        // Indent each line for readability
2748        for line in verbatim.lines() {
2749            if !line.trim().is_empty() {
2750                println!("    │ {}", line);
2751            }
2752        }
2753        println!();
2754    }
2755
2756    // Print detailed sources if requested
2757    if include_sources {
2758        println!("{}", "━".repeat(60));
2759        println!("{}", "SOURCE DETAILS".bold());
2760        println!("{}", "━".repeat(60));
2761        println!();
2762
2763        for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2764            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2765                println!(
2766                    "{}",
2767                    format!(
2768                        "[{}] {}",
2769                        idx + 1,
2770                        frame.uri.as_deref().unwrap_or("(unknown)")
2771                    )
2772                    .cyan()
2773                );
2774                if let Some(title) = &frame.title {
2775                    println!("    Title: {}", title);
2776                }
2777                println!("    Frame ID: {}", frame.id);
2778                println!("    Timestamp: {}", frame.timestamp);
2779                if !frame.tags.is_empty() {
2780                    println!("    Tags: {}", frame.tags.join(", "));
2781                }
2782                if !frame.labels.is_empty() {
2783                    println!("    Labels: {}", frame.labels.join(", "));
2784                }
2785                if !frame.content_dates.is_empty() {
2786                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2787                }
2788                println!();
2789            }
2790        }
2791    }
2792
2793    // Note about no LLM synthesis
2794    println!("{}", "─".repeat(60));
2795    println!(
2796        "{}",
2797        "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2798    );
2799    println!(
2800        "{}",
2801        "Use --use-model to get an AI-synthesized answer.".dimmed()
2802    );
2803}
2804
2805fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2806    let hits: Vec<_> = response
2807        .hits
2808        .iter()
2809        .map(|hit| {
2810            json!({
2811                "frame_id": hit.frame_id,
2812                "matches": hit.matches,
2813                "snippets": [hit.text.clone()],
2814            })
2815        })
2816        .collect();
2817    println!("{}", serde_json::to_string_pretty(&hits)?);
2818    Ok(())
2819}
2820
2821fn emit_search_table(response: &SearchResponse) {
2822    if response.hits.is_empty() {
2823        println!("No results for '{}'.", response.query);
2824        return;
2825    }
2826
2827    // Calculate score range for normalization (BM25 scores can be negative)
2828    let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2829    let (min_score, max_score) = score_range(&scores);
2830
2831    for hit in &response.hits {
2832        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2833        if let Some(title) = &hit.title {
2834            println!("  Title: {title}");
2835        }
2836        if let Some(score) = hit.score {
2837            let normalized = normalize_bm25_for_display(score, min_score, max_score);
2838            println!("  Relevance: {:.0}%", normalized);
2839        }
2840        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
2841        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2842            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
2843        }
2844        if let Some(chunk_text) = &hit.chunk_text {
2845            println!("  Chunk Text: {}", chunk_text.trim());
2846        }
2847        if let Some(metadata) = &hit.metadata {
2848            if let Some(track) = &metadata.track {
2849                println!("  Track: {track}");
2850            }
2851            if !metadata.tags.is_empty() {
2852                println!("  Tags: {}", metadata.tags.join(", "));
2853            }
2854            if !metadata.labels.is_empty() {
2855                println!("  Labels: {}", metadata.labels.join(", "));
2856            }
2857            if let Some(created_at) = &metadata.created_at {
2858                println!("  Created: {created_at}");
2859            }
2860            if !metadata.content_dates.is_empty() {
2861                println!("  Content Dates: {}", metadata.content_dates.join(", "));
2862            }
2863            if !metadata.entities.is_empty() {
2864                let entity_strs: Vec<String> = metadata
2865                    .entities
2866                    .iter()
2867                    .map(|e| format!("{} ({})", e.name, e.kind))
2868                    .collect();
2869                println!("  Entities: {}", entity_strs.join(", "));
2870            }
2871        }
2872        println!("  Snippet: {}", hit.text.trim());
2873        println!();
2874    }
2875    if let Some(cursor) = &response.next_cursor {
2876        println!("Next cursor: {cursor}");
2877    }
2878}
2879
2880fn ask_mode_display(mode: AskModeArg) -> &'static str {
2881    match mode {
2882        AskModeArg::Lex => "lex",
2883        AskModeArg::Sem => "sem",
2884        AskModeArg::Hybrid => "hybrid",
2885    }
2886}
2887
2888fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2889    match mode {
2890        AskModeArg::Lex => "Lexical",
2891        AskModeArg::Sem => "Semantic",
2892        AskModeArg::Hybrid => "Hybrid",
2893    }
2894}
2895
2896fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2897    match retriever {
2898        AskRetriever::Lex => "lex",
2899        AskRetriever::Semantic => "semantic",
2900        AskRetriever::Hybrid => "hybrid",
2901        AskRetriever::LexFallback => "lex_fallback",
2902        AskRetriever::TimelineFallback => "timeline_fallback",
2903    }
2904}
2905
2906fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2907    match retriever {
2908        AskRetriever::Lex => "Lexical",
2909        AskRetriever::Semantic => "Semantic",
2910        AskRetriever::Hybrid => "Hybrid",
2911        AskRetriever::LexFallback => "Lexical (fallback)",
2912        AskRetriever::TimelineFallback => "Timeline (fallback)",
2913    }
2914}
2915
2916fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2917    match engine {
2918        SearchEngineKind::Tantivy => "text (tantivy)",
2919        SearchEngineKind::LexFallback => "text (fallback)",
2920        SearchEngineKind::Hybrid => "hybrid",
2921    }
2922}
2923
2924fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2925    let digest = hash(uri.as_bytes()).to_hex().to_string();
2926    let prefix_len = digest.len().min(12);
2927    let prefix = &digest[..prefix_len];
2928    format!("mv2-hit-{prefix}-{frame_id}-{start}")
2929}
2930
2931fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2932    if text.chars().count() <= limit {
2933        return text.to_string();
2934    }
2935
2936    let truncated: String = text.chars().take(limit).collect();
2937    format!("{truncated}...")
2938}
2939
2940/// Normalize a BM25 score to 0-100 range for user-friendly display.
2941///
2942/// BM25 scores can be negative (Tantivy uses log-based TF which can go negative
2943/// for very common terms). This function normalizes scores relative to the
2944/// min/max in the result set so users see intuitive 0-100 values.
2945///
2946/// - Returns 100.0 if min == max (all scores equal)
2947/// - Returns normalized 0-100 value based on position in [min, max] range
2948fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2949    if (max_score - min_score).abs() < f32::EPSILON {
2950        // All scores are the same, show 100%
2951        return 100.0;
2952    }
2953    // Normalize to 0-100 range
2954    ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2955}
2956
2957/// Extract min and max scores from a slice of optional scores.
2958fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2959    let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2960    if valid_scores.is_empty() {
2961        return (0.0, 0.0);
2962    }
2963    let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2964    let max = valid_scores
2965        .iter()
2966        .cloned()
2967        .fold(f32::NEG_INFINITY, f32::max);
2968    (min, max)
2969}
2970
2971fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2972    let mut hit_json = serde_json::Map::new();
2973    hit_json.insert("rank".into(), json!(hit.rank));
2974    if let Some(score) = hit.score {
2975        hit_json.insert("score".into(), json!(score));
2976    }
2977    hit_json.insert(
2978        "id".into(),
2979        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2980    );
2981    hit_json.insert("frame_id".into(), json!(hit.frame_id));
2982    hit_json.insert("uri".into(), json!(hit.uri));
2983    if let Some(title) = &hit.title {
2984        hit_json.insert("title".into(), json!(title));
2985    }
2986    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2987    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2988    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2989    hit_json.insert("text".into(), json!(hit.text));
2990
2991    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2992        matches: hit.matches,
2993        ..SearchHitMetadata::default()
2994    });
2995    let mut meta_json = serde_json::Map::new();
2996    meta_json.insert("matches".into(), json!(metadata.matches));
2997    if !metadata.tags.is_empty() {
2998        meta_json.insert("tags".into(), json!(metadata.tags));
2999    }
3000    if !metadata.labels.is_empty() {
3001        meta_json.insert("labels".into(), json!(metadata.labels));
3002    }
3003    if let Some(track) = metadata.track {
3004        meta_json.insert("track".into(), json!(track));
3005    }
3006    if let Some(created_at) = metadata.created_at {
3007        meta_json.insert("created_at".into(), json!(created_at));
3008    }
3009    if !metadata.content_dates.is_empty() {
3010        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
3011    }
3012    if !metadata.entities.is_empty() {
3013        let entities_json: Vec<serde_json::Value> = metadata
3014            .entities
3015            .iter()
3016            .map(|e| {
3017                let mut ent = serde_json::Map::new();
3018                ent.insert("name".into(), json!(e.name));
3019                ent.insert("kind".into(), json!(e.kind));
3020                if let Some(conf) = e.confidence {
3021                    ent.insert("confidence".into(), json!(conf));
3022                }
3023                serde_json::Value::Object(ent)
3024            })
3025            .collect();
3026        meta_json.insert("entities".into(), json!(entities_json));
3027    }
3028    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
3029    serde_json::Value::Object(hit_json)
3030}
3031/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
3032///
3033/// RRF is mathematically superior to raw score combination because:
3034/// - BM25 scores are unbounded (0 to infinity)
3035/// - Cosine similarity is bounded (-1 to 1)
3036/// - RRF normalizes by using only RANKS, not raw scores
3037///
3038/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
3039fn apply_semantic_rerank(
3040    runtime: &EmbeddingRuntime,
3041    mem: &mut Memvid,
3042    response: &mut SearchResponse,
3043) -> Result<()> {
3044    if response.hits.is_empty() {
3045        return Ok(());
3046    }
3047
3048    let query_embedding = runtime.embed_query(&response.query)?;
3049    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
3050    for hit in &response.hits {
3051        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
3052            if embedding.len() == runtime.dimension() {
3053                let score = cosine_similarity(&query_embedding, &embedding);
3054                semantic_scores.insert(hit.frame_id, score);
3055            }
3056        }
3057    }
3058
3059    if semantic_scores.is_empty() {
3060        return Ok(());
3061    }
3062
3063    // Sort by semantic score to get semantic ranks
3064    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3065        .iter()
3066        .map(|(frame_id, score)| (*frame_id, *score))
3067        .collect();
3068    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3069
3070    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3071    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3072        semantic_rank.insert(*frame_id, idx + 1);
3073    }
3074
3075    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
3076    let query_lower = response.query.to_lowercase();
3077    let is_preference_query = query_lower.contains("suggest")
3078        || query_lower.contains("recommend")
3079        || query_lower.contains("should i")
3080        || query_lower.contains("what should")
3081        || query_lower.contains("prefer")
3082        || query_lower.contains("favorite")
3083        || query_lower.contains("best for me");
3084
3085    // Pure RRF: Use ONLY ranks, NOT raw scores
3086    // This prevents a "confidently wrong" high-scoring vector from burying
3087    // a "precisely correct" keyword match
3088    const RRF_K: f32 = 60.0;
3089
3090    let mut ordering: Vec<(usize, f32, usize)> = response
3091        .hits
3092        .iter()
3093        .enumerate()
3094        .map(|(idx, hit)| {
3095            let lexical_rank = hit.rank;
3096
3097            // RRF score for lexical rank
3098            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3099
3100            // RRF score for semantic rank
3101            let semantic_rrf = semantic_rank
3102                .get(&hit.frame_id)
3103                .map(|rank| 1.0 / (RRF_K + *rank as f32))
3104                .unwrap_or(0.0);
3105
3106            // Apply preference boost for hits containing user preference signals
3107            // This is a small bonus for content with first-person preference indicators
3108            let preference_boost = if is_preference_query {
3109                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
3110            } else {
3111                0.0
3112            };
3113
3114            // Pure RRF: Only rank-based scores, no raw similarity scores
3115            let combined = lexical_rrf + semantic_rrf + preference_boost;
3116            (idx, combined, lexical_rank)
3117        })
3118        .collect();
3119
3120    ordering.sort_by(|a, b| {
3121        b.1.partial_cmp(&a.1)
3122            .unwrap_or(Ordering::Equal)
3123            .then(a.2.cmp(&b.2))
3124    });
3125
3126    let mut reordered = Vec::with_capacity(response.hits.len());
3127    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3128        let mut hit = response.hits[idx].clone();
3129        hit.rank = rank_idx + 1;
3130        reordered.push(hit);
3131    }
3132
3133    response.hits = reordered;
3134    Ok(())
3135}
3136
3137/// Rerank search results by boosting hits that contain user preference signals.
3138/// Only applies when the query appears to be seeking recommendations or preferences.
3139fn apply_preference_rerank(response: &mut SearchResponse) {
3140    if response.hits.is_empty() {
3141        return;
3142    }
3143
3144    // Check if query is preference-seeking
3145    let query_lower = response.query.to_lowercase();
3146    let is_preference_query = query_lower.contains("suggest")
3147        || query_lower.contains("recommend")
3148        || query_lower.contains("should i")
3149        || query_lower.contains("what should")
3150        || query_lower.contains("prefer")
3151        || query_lower.contains("favorite")
3152        || query_lower.contains("best for me");
3153
3154    if !is_preference_query {
3155        return;
3156    }
3157
3158    // Compute boost scores for each hit
3159    let mut scored: Vec<(usize, f32, f32)> = response
3160        .hits
3161        .iter()
3162        .enumerate()
3163        .map(|(idx, hit)| {
3164            let original_score = hit.score.unwrap_or(0.0);
3165            let preference_boost = compute_preference_boost(&hit.text);
3166            let boosted_score = original_score + preference_boost;
3167            (idx, boosted_score, original_score)
3168        })
3169        .collect();
3170
3171    // Sort by boosted score (descending)
3172    scored.sort_by(|a, b| {
3173        b.1.partial_cmp(&a.1)
3174            .unwrap_or(Ordering::Equal)
3175            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3176    });
3177
3178    // Reorder hits
3179    let mut reordered = Vec::with_capacity(response.hits.len());
3180    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3181        let mut hit = response.hits[idx].clone();
3182        hit.rank = rank_idx + 1;
3183        reordered.push(hit);
3184    }
3185
3186    response.hits = reordered;
3187}
3188
3189/// Compute a boost score for hits that contain user preference signals.
3190/// This helps surface context where users express their preferences,
3191/// habits, or personal information that's relevant to recommendation queries.
3192///
3193/// Key insight: We want to distinguish content where the user describes
3194/// their ESTABLISHED situation/preferences (high boost) from content where
3195/// the user is making a REQUEST (low boost). Both use first-person language,
3196/// but they serve different purposes for personalization.
3197fn compute_preference_boost(text: &str) -> f32 {
3198    let text_lower = text.to_lowercase();
3199    let mut boost = 0.0f32;
3200
3201    // Strong signals: Past/present user experiences and possessions
3202    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
3203    let established_context = [
3204        // Past tense - indicates actual experience
3205        "i've been",
3206        "i've had",
3207        "i've used",
3208        "i've tried",
3209        "i recently",
3210        "i just",
3211        "lately",
3212        "i started",
3213        "i bought",
3214        "i harvested",
3215        "i grew",
3216        // Current possessions/ownership (indicates established context)
3217        "my garden",
3218        "my home",
3219        "my house",
3220        "my setup",
3221        "my equipment",
3222        "my camera",
3223        "my car",
3224        "my phone",
3225        "i have a",
3226        "i own",
3227        "i got a",
3228        // Established habits/preferences
3229        "i prefer",
3230        "i like to",
3231        "i love to",
3232        "i enjoy",
3233        "i usually",
3234        "i always",
3235        "i typically",
3236        "my favorite",
3237        "i tend to",
3238        "i often",
3239        // Regular activities (indicates ongoing behavior)
3240        "i use",
3241        "i grow",
3242        "i cook",
3243        "i make",
3244        "i work on",
3245        "i'm into",
3246        "i collect",
3247    ];
3248    for pattern in established_context {
3249        if text_lower.contains(pattern) {
3250            boost += 0.15;
3251        }
3252    }
3253
3254    // Moderate signals: General first-person statements
3255    let first_person = [" i ", " my ", " me "];
3256    for pattern in first_person {
3257        if text_lower.contains(pattern) {
3258            boost += 0.02;
3259        }
3260    }
3261
3262    // Weak signals: Requests/intentions (not yet established preferences)
3263    // These indicate the user wants something, but don't describe established context
3264    let request_patterns = [
3265        "i'm trying to",
3266        "i want to",
3267        "i need to",
3268        "looking for",
3269        "can you suggest",
3270        "can you help",
3271    ];
3272    for pattern in request_patterns {
3273        if text_lower.contains(pattern) {
3274            boost += 0.02;
3275        }
3276    }
3277
3278    // Cap the boost to avoid over-weighting
3279    boost.min(0.5)
3280}
3281
3282fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3283    let mut dot = 0.0f32;
3284    let mut sum_a = 0.0f32;
3285    let mut sum_b = 0.0f32;
3286    for (x, y) in a.iter().zip(b.iter()) {
3287        dot += x * y;
3288        sum_a += x * x;
3289        sum_b += y * y;
3290    }
3291
3292    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3293        0.0
3294    } else {
3295        dot / (sum_a.sqrt() * sum_b.sqrt())
3296    }
3297}
3298
3299/// Apply cross-encoder reranking to search results.
3300///
3301/// Cross-encoders directly score query-document pairs and can understand
3302/// more nuanced relevance than bi-encoders (embeddings). This is especially
3303/// useful for personalization queries where semantic similarity != relevance.
3304///
3305/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
3306#[cfg(feature = "local-embeddings")]
3307fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3308    if response.hits.is_empty() || response.hits.len() < 2 {
3309        return Ok(());
3310    }
3311
3312    // Only rerank if we have enough candidates
3313    let candidates_to_rerank = response.hits.len().min(50);
3314
3315    // Initialize the reranker (model will be downloaded on first use, ~86MB)
3316    // Using JINA Turbo - faster than BGE while maintaining good accuracy
3317    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3318        .with_show_download_progress(true);
3319
3320    let mut reranker = match TextRerank::try_new(options) {
3321        Ok(r) => r,
3322        Err(e) => {
3323            warn!("Failed to initialize cross-encoder reranker: {e}");
3324            return Ok(());
3325        }
3326    };
3327
3328    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
3329    let documents: Vec<String> = response.hits[..candidates_to_rerank]
3330        .iter()
3331        .map(|hit| hit.text.clone())
3332        .collect();
3333
3334    // Rerank using cross-encoder
3335    info!("Cross-encoder reranking {} candidates", documents.len());
3336    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3337        Ok(results) => results,
3338        Err(e) => {
3339            warn!("Cross-encoder reranking failed: {e}");
3340            return Ok(());
3341        }
3342    };
3343
3344    // Blend cross-encoder scores with original scores to preserve temporal boosting.
3345    // The original score includes recency boost; purely replacing it loses temporal relevance.
3346    // We collect (blended_score, original_idx) pairs and sort by blended score.
3347    let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3348
3349    // Find score range for normalization (original scores can be negative for BM25)
3350    let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3351        .iter()
3352        .filter_map(|h| h.score)
3353        .collect();
3354    let orig_min = original_scores
3355        .iter()
3356        .cloned()
3357        .fold(f32::INFINITY, f32::min);
3358    let orig_max = original_scores
3359        .iter()
3360        .cloned()
3361        .fold(f32::NEG_INFINITY, f32::max);
3362    let orig_range = (orig_max - orig_min).max(0.001); // Avoid division by zero
3363
3364    for result in rerank_results.iter() {
3365        let original_idx = result.index;
3366        let cross_encoder_score = result.score; // Already normalized 0-1
3367
3368        // Normalize original score to 0-1 range
3369        let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3370        let normalized_original = (original_score - orig_min) / orig_range;
3371
3372        // Blend: 20% cross-encoder (relevance) + 80% original (includes temporal boost)
3373        // Very heavy weight on original score to preserve temporal ranking
3374        // The original score already incorporates BM25 + recency boost
3375        let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3376
3377        scored_hits.push((blended, original_idx));
3378    }
3379
3380    // Sort by blended score (descending)
3381    scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3382
3383    // Build reordered hits with new ranks
3384    let mut reordered = Vec::with_capacity(response.hits.len());
3385    for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3386        let mut hit = response.hits[original_idx].clone();
3387        hit.rank = new_rank + 1;
3388        // Store blended score for reference
3389        hit.score = Some(blended_score);
3390        reordered.push(hit);
3391    }
3392
3393    // Add any remaining hits that weren't reranked (beyond top-50)
3394    for hit in response.hits.iter().skip(candidates_to_rerank) {
3395        let mut h = hit.clone();
3396        h.rank = reordered.len() + 1;
3397        reordered.push(h);
3398    }
3399
3400    response.hits = reordered;
3401    info!("Cross-encoder reranking complete");
3402    Ok(())
3403}
3404
3405/// Stub for cross-encoder reranking when local-embeddings is disabled.
3406/// Does nothing - reranking is skipped silently.
3407#[cfg(not(feature = "local-embeddings"))]
3408fn apply_cross_encoder_rerank(_response: &mut SearchResponse) -> Result<()> {
3409    Ok(())
3410}
3411
3412/// Build a context string from memory cards stored in the MV2 file.
3413/// Groups facts by entity for better LLM comprehension.
3414fn build_memory_context(mem: &Memvid) -> String {
3415    let entities = mem.memory_entities();
3416    if entities.is_empty() {
3417        return String::new();
3418    }
3419
3420    let mut sections = Vec::new();
3421    for entity in entities {
3422        let cards = mem.get_entity_memories(&entity);
3423        if cards.is_empty() {
3424            continue;
3425        }
3426
3427        let mut entity_lines = Vec::new();
3428        for card in cards {
3429            // Format: "slot: value" with optional polarity indicator
3430            let polarity_marker = card
3431                .polarity
3432                .as_ref()
3433                .map(|p| match p.to_string().as_str() {
3434                    "Positive" => " (+)",
3435                    "Negative" => " (-)",
3436                    _ => "",
3437                })
3438                .unwrap_or("");
3439            entity_lines.push(format!(
3440                "  - {}: {}{}",
3441                card.slot, card.value, polarity_marker
3442            ));
3443        }
3444
3445        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3446    }
3447
3448    sections.join("\n\n")
3449}
3450
3451/// Build a context string from entities found in search hits.
3452/// Groups entities by type for better LLM comprehension.
3453fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3454    use std::collections::HashMap;
3455
3456    // Collect unique entities by kind
3457    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3458
3459    for hit in hits {
3460        if let Some(metadata) = &hit.metadata {
3461            for entity in &metadata.entities {
3462                entities_by_kind
3463                    .entry(entity.kind.clone())
3464                    .or_default()
3465                    .push(entity.name.clone());
3466            }
3467        }
3468    }
3469
3470    if entities_by_kind.is_empty() {
3471        return String::new();
3472    }
3473
3474    // Deduplicate and format
3475    let mut sections = Vec::new();
3476    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3477    sorted_kinds.sort();
3478
3479    for kind in sorted_kinds {
3480        let names = entities_by_kind.get(kind).unwrap();
3481        let mut unique_names: Vec<_> = names.iter().collect();
3482        unique_names.sort();
3483        unique_names.dedup();
3484
3485        let names_str = unique_names
3486            .iter()
3487            .take(10) // Limit to 10 entities per kind
3488            .map(|s| s.as_str())
3489            .collect::<Vec<_>>()
3490            .join(", ");
3491
3492        sections.push(format!("{}: {}", kind, names_str));
3493    }
3494
3495    sections.join("\n")
3496}