Skip to main content

memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use blake3::hash;
15use clap::{ArgAction, Args, ValueEnum};
16use colored::Colorize;
17use colored_json::ToColoredJson;
18#[cfg(feature = "temporal_track")]
19use memvid_core::{
20    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
21    TemporalResolution, TemporalResolutionValue,
22};
23use memvid_core::{
24    types::{
25        AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy,
26        SearchHitMetadata,
27    },
28    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind,
29    SearchHit, SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
30};
31#[cfg(feature = "temporal_track")]
32use serde::Serialize;
33use serde_json::json;
34#[cfg(feature = "temporal_track")]
35use time::format_description::well_known::Rfc3339;
36use time::{Date, PrimitiveDateTime, Time};
37#[cfg(feature = "temporal_track")]
38use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
39use tracing::{info, warn};
40
41#[cfg(feature = "local-embeddings")]
42use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
43
44use memvid_ask_model::{
45    run_model_inference, ModelContextFragment, ModelContextFragmentKind, ModelInference,
46};
47
48// frame_to_json and print_frame_summary available from commands but not used in this module
49use crate::config::{
50    load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
51    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig,
52    EmbeddingModelChoice, EmbeddingRuntime,
53};
54use crate::utils::{
55    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
56    parse_date_boundary, parse_vector, read_embedding,
57};
58
59const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
60#[cfg(feature = "temporal_track")]
61const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
62
63fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
64    let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
65    message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
66    if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
67        message.push_str(&format!(
68            "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
69            model.name(),
70            model.name()
71        ));
72        if model.is_openai() {
73            message.push_str(" (and set `OPENAI_API_KEY`).");
74        } else {
75            message.push('.');
76        }
77        message.push_str(&format!(
78            "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
79            model.name()
80        ));
81        message.push_str(&format!(
82            "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
83        ));
84        message.push_str("\nOr use `--mode lex` to disable semantic search.");
85    }
86    message
87}
88
89/// Arguments for the `timeline` subcommand
90#[derive(Args)]
91pub struct TimelineArgs {
92    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
93    pub file: PathBuf,
94    #[arg(long)]
95    pub json: bool,
96    #[arg(long)]
97    pub reverse: bool,
98    #[arg(long, value_name = "LIMIT")]
99    pub limit: Option<NonZeroU64>,
100    #[arg(long, value_name = "TIMESTAMP")]
101    pub since: Option<i64>,
102    #[arg(long, value_name = "TIMESTAMP")]
103    pub until: Option<i64>,
104    #[cfg(feature = "temporal_track")]
105    #[arg(long = "on", value_name = "PHRASE")]
106    pub phrase: Option<String>,
107    #[cfg(feature = "temporal_track")]
108    #[arg(long = "tz", value_name = "IANA_ZONE")]
109    pub tz: Option<String>,
110    #[cfg(feature = "temporal_track")]
111    #[arg(long = "anchor", value_name = "RFC3339")]
112    pub anchor: Option<String>,
113    #[cfg(feature = "temporal_track")]
114    #[arg(long = "window", value_name = "MINUTES")]
115    pub window: Option<u64>,
116    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
117    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
118    pub as_of_frame: Option<u64>,
119    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
120    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
121    pub as_of_ts: Option<i64>,
122}
123
124/// Arguments for the `when` subcommand
125#[cfg(feature = "temporal_track")]
126#[derive(Args)]
127pub struct WhenArgs {
128    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
129    pub file: PathBuf,
130    #[arg(long = "on", value_name = "PHRASE")]
131    pub phrase: String,
132    #[arg(long = "tz", value_name = "IANA_ZONE")]
133    pub tz: Option<String>,
134    #[arg(long = "anchor", value_name = "RFC3339")]
135    pub anchor: Option<String>,
136    #[arg(long = "window", value_name = "MINUTES")]
137    pub window: Option<u64>,
138    #[arg(long, value_name = "LIMIT")]
139    pub limit: Option<NonZeroU64>,
140    #[arg(long, value_name = "TIMESTAMP")]
141    pub since: Option<i64>,
142    #[arg(long, value_name = "TIMESTAMP")]
143    pub until: Option<i64>,
144    #[arg(long)]
145    pub reverse: bool,
146    #[arg(long)]
147    pub json: bool,
148}
149
150/// Arguments for the `ask` subcommand
151#[derive(Args)]
152pub struct AskArgs {
153    #[arg(value_name = "TARGET", num_args = 0..)]
154    pub targets: Vec<String>,
155    #[arg(long = "question", value_name = "TEXT")]
156    pub question: Option<String>,
157    #[arg(long = "uri", value_name = "URI")]
158    pub uri: Option<String>,
159    #[arg(long = "scope", value_name = "URI_PREFIX")]
160    pub scope: Option<String>,
161    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
162    pub top_k: usize,
163    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
164    pub snippet_chars: usize,
165    #[arg(long = "cursor", value_name = "TOKEN")]
166    pub cursor: Option<String>,
167    #[arg(long = "mode", value_enum, default_value = "hybrid")]
168    pub mode: AskModeArg,
169    #[arg(long)]
170    pub json: bool,
171    #[arg(long = "context-only", action = ArgAction::SetTrue)]
172    pub context_only: bool,
173    /// Show detailed source information for each citation
174    #[arg(long = "sources", action = ArgAction::SetTrue)]
175    pub sources: bool,
176    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
177    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
178    pub mask_pii: bool,
179    /// Include structured memory cards in the context (facts, preferences, etc.)
180    #[arg(long = "memories", action = ArgAction::SetTrue)]
181    pub memories: bool,
182    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
183    #[arg(long = "llm-context-depth", value_name = "CHARS")]
184    pub llm_context_depth: Option<usize>,
185    #[arg(long = "start", value_name = "DATE")]
186    pub start: Option<String>,
187    #[arg(long = "end", value_name = "DATE")]
188    pub end: Option<String>,
189    /// Synthesize an answer with an LLM (defaults to tinyllama when provided without a value).
190    ///
191    /// Examples:
192    /// - `--use-model` (local TinyLlama)
193    /// - `--use-model openai` (defaults to gpt-4o-mini; requires OPENAI_API_KEY)
194    /// - `--use-model nvidia` (defaults to meta/llama3-8b-instruct; requires NVIDIA_API_KEY)
195    /// - `--use-model nvidia:meta/llama3-70b-instruct`
196    #[arg(
197        long = "use-model",
198        value_name = "MODEL",
199        num_args = 0..=1,
200        default_missing_value = "tinyllama"
201    )]
202    pub use_model: Option<String>,
203    /// Embedding model to use for query (must match the model used during ingestion)
204    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
205    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
206    pub query_embedding_model: Option<String>,
207    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
208    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
209    pub as_of_frame: Option<u64>,
210    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
211    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
212    pub as_of_ts: Option<i64>,
213    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
214    #[arg(long = "system-prompt", value_name = "TEXT")]
215    pub system_prompt: Option<String>,
216    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
217    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
218    pub no_rerank: bool,
219
220    /// Return verbatim evidence without LLM synthesis.
221    /// Shows the most relevant passages with citations, no paraphrasing or summarization.
222    #[arg(long = "no-llm", action = ArgAction::SetTrue)]
223    pub no_llm: bool,
224
225    // Adaptive retrieval options (enabled by default for best results)
226    /// Disable adaptive retrieval and use fixed top-k instead.
227    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
228    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
229    pub no_adaptive: bool,
230    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
231    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
232    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
233    pub min_relevancy: f32,
234    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
235    /// Set high enough to capture all potentially relevant results.
236    #[arg(long = "max-k", value_name = "K", default_value = "100")]
237    pub max_k: usize,
238    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
239    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
240    pub adaptive_strategy: AdaptiveStrategyArg,
241}
242
243/// Ask mode argument
244#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
245pub enum AskModeArg {
246    Lex,
247    Sem,
248    Hybrid,
249}
250
251impl From<AskModeArg> for AskMode {
252    fn from(value: AskModeArg) -> Self {
253        match value {
254            AskModeArg::Lex => AskMode::Lex,
255            AskModeArg::Sem => AskMode::Sem,
256            AskModeArg::Hybrid => AskMode::Hybrid,
257        }
258    }
259}
260
261/// Arguments for the `find` subcommand
262#[derive(Args)]
263pub struct FindArgs {
264    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
265    pub file: PathBuf,
266    #[arg(long = "query", value_name = "TEXT")]
267    pub query: String,
268    #[arg(long = "uri", value_name = "URI")]
269    pub uri: Option<String>,
270    #[arg(long = "scope", value_name = "URI_PREFIX")]
271    pub scope: Option<String>,
272    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
273    pub top_k: usize,
274    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
275    pub snippet_chars: usize,
276    #[arg(long = "cursor", value_name = "TOKEN")]
277    pub cursor: Option<String>,
278    #[arg(long)]
279    pub json: bool,
280    #[arg(long = "json-legacy", conflicts_with = "json")]
281    pub json_legacy: bool,
282    #[arg(long = "mode", value_enum, default_value = "auto")]
283    pub mode: SearchMode,
284    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
285    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
286    pub as_of_frame: Option<u64>,
287    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
288    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
289    pub as_of_ts: Option<i64>,
290    /// Embedding model to use for query (must match the model used during ingestion)
291    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
292    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
293    pub query_embedding_model: Option<String>,
294
295    // Adaptive retrieval options (enabled by default for best results)
296    /// Disable adaptive retrieval and use fixed top-k instead.
297    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
298    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
299    pub no_adaptive: bool,
300    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
301    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
302    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
303    pub min_relevancy: f32,
304    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
305    /// Set high enough to capture all potentially relevant results.
306    #[arg(long = "max-k", value_name = "K", default_value = "100")]
307    pub max_k: usize,
308    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
309    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
310    pub adaptive_strategy: AdaptiveStrategyArg,
311
312    /// Enable graph-aware search: filter by entity relationships before ranking.
313    /// Uses MemoryCards to find entities matching patterns like "who lives in X".
314    #[arg(long = "graph", action = ArgAction::SetTrue)]
315    pub graph: bool,
316
317    /// Enable hybrid search: combine graph filtering with text search.
318    /// Automatically detects relational patterns in the query.
319    #[arg(long = "hybrid", action = ArgAction::SetTrue)]
320    pub hybrid: bool,
321
322    /// Disable sketch pre-filtering (for benchmarking/debugging).
323    /// By default, sketches are used for fast candidate generation if available.
324    #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
325    pub no_sketch: bool,
326}
327
328/// Search mode argument
329#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
330pub enum SearchMode {
331    Auto,
332    Lex,
333    Sem,
334    /// CLIP visual search using text-to-image embeddings
335    #[cfg(feature = "clip")]
336    Clip,
337}
338
339/// Adaptive retrieval strategy
340#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
341pub enum AdaptiveStrategyArg {
342    /// Stop when score drops below X% of top score (default)
343    Relative,
344    /// Stop when score drops below fixed threshold
345    Absolute,
346    /// Stop when score drops sharply from previous result
347    Cliff,
348    /// Automatically detect "elbow" in score curve
349    Elbow,
350    /// Combine relative + cliff + absolute (recommended)
351    Combined,
352}
353
354/// Arguments for the `vec-search` subcommand
355#[derive(Args)]
356pub struct VecSearchArgs {
357    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
358    pub file: PathBuf,
359    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
360    pub vector: Option<String>,
361    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
362    pub embedding: Option<PathBuf>,
363    #[arg(long, value_name = "K", default_value = "10")]
364    pub limit: usize,
365    #[arg(long)]
366    pub json: bool,
367}
368
369/// Arguments for the `audit` subcommand
370#[derive(Args)]
371pub struct AuditArgs {
372    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
373    pub file: PathBuf,
374    /// The question or topic to audit
375    #[arg(value_name = "QUESTION")]
376    pub question: String,
377    /// Output file path (stdout if not provided)
378    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
379    pub out: Option<PathBuf>,
380    /// Output format
381    #[arg(long = "format", value_enum, default_value = "text")]
382    pub format: AuditFormat,
383    /// Number of sources to retrieve
384    #[arg(long = "top-k", value_name = "K", default_value = "10")]
385    pub top_k: usize,
386    /// Maximum characters per snippet
387    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
388    pub snippet_chars: usize,
389    /// Retrieval mode
390    #[arg(long = "mode", value_enum, default_value = "hybrid")]
391    pub mode: AskModeArg,
392    /// Optional scope filter (URI prefix)
393    #[arg(long = "scope", value_name = "URI_PREFIX")]
394    pub scope: Option<String>,
395    /// Start date filter
396    #[arg(long = "start", value_name = "DATE")]
397    pub start: Option<String>,
398    /// End date filter
399    #[arg(long = "end", value_name = "DATE")]
400    pub end: Option<String>,
401    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
402    #[arg(long = "use-model", value_name = "MODEL")]
403    pub use_model: Option<String>,
404}
405
406/// Audit output format
407#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
408pub enum AuditFormat {
409    /// Plain text report
410    Text,
411    /// Markdown report
412    Markdown,
413    /// JSON report
414    Json,
415}
416
417// ============================================================================
418// Search & Retrieval command handlers
419// ============================================================================
420
421pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
422    let mut mem = open_read_only_mem(&args.file)?;
423    let mut builder = TimelineQueryBuilder::default();
424    #[cfg(feature = "temporal_track")]
425    if args.phrase.is_none()
426        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
427    {
428        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
429    }
430    if let Some(limit) = args.limit {
431        builder = builder.limit(limit);
432    }
433    if let Some(since) = args.since {
434        builder = builder.since(since);
435    }
436    if let Some(until) = args.until {
437        builder = builder.until(until);
438    }
439    builder = builder.reverse(args.reverse);
440    #[cfg(feature = "temporal_track")]
441    let temporal_summary = if let Some(ref phrase) = args.phrase {
442        let (filter, summary) = build_temporal_filter(
443            phrase,
444            args.tz.as_deref(),
445            args.anchor.as_deref(),
446            args.window,
447        )?;
448        builder = builder.temporal(filter);
449        Some(summary)
450    } else {
451        None
452    };
453    let query = builder.build();
454    let mut entries = mem.timeline(query)?;
455
456    // Apply Replay filtering if requested
457    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
458        entries.retain(|entry| {
459            // Check as_of_frame filter
460            if let Some(cutoff_frame) = args.as_of_frame {
461                if entry.frame_id > cutoff_frame {
462                    return false;
463                }
464            }
465
466            // Check as_of_ts filter
467            if let Some(cutoff_ts) = args.as_of_ts {
468                if entry.timestamp > cutoff_ts {
469                    return false;
470                }
471            }
472
473            true
474        });
475    }
476
477    if args.json {
478        #[cfg(feature = "temporal_track")]
479        if let Some(summary) = temporal_summary.as_ref() {
480            println!(
481                "{}",
482                serde_json::to_string_pretty(&TimelineOutput {
483                    temporal: Some(summary_to_output(summary)),
484                    entries: &entries,
485                })?
486            );
487        } else {
488            println!("{}", serde_json::to_string_pretty(&entries)?);
489        }
490        #[cfg(not(feature = "temporal_track"))]
491        println!("{}", serde_json::to_string_pretty(&entries)?);
492    } else if entries.is_empty() {
493        println!("Timeline is empty");
494    } else {
495        #[cfg(feature = "temporal_track")]
496        if let Some(summary) = temporal_summary.as_ref() {
497            print_temporal_summary(summary);
498        }
499        for entry in entries {
500            println!(
501                "#{} @ {} — {}",
502                entry.frame_id,
503                entry.timestamp,
504                entry.preview.replace('\n', " ")
505            );
506            if let Some(uri) = entry.uri.as_deref() {
507                println!("  URI: {uri}");
508            }
509            if !entry.child_frames.is_empty() {
510                let child_list = entry
511                    .child_frames
512                    .iter()
513                    .map(|id| id.to_string())
514                    .collect::<Vec<_>>()
515                    .join(", ");
516                println!("  Child frames: {child_list}");
517            }
518            #[cfg(feature = "temporal_track")]
519            if let Some(temporal) = entry.temporal.as_ref() {
520                print_entry_temporal_details(temporal);
521            }
522        }
523    }
524    Ok(())
525}
526
527#[cfg(feature = "temporal_track")]
528pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
529    let mut mem = open_read_only_mem(&args.file)?;
530
531    let (filter, summary) = build_temporal_filter(
532        &args.phrase,
533        args.tz.as_deref(),
534        args.anchor.as_deref(),
535        args.window,
536    )?;
537
538    let mut builder = TimelineQueryBuilder::default();
539    if let Some(limit) = args.limit {
540        builder = builder.limit(limit);
541    }
542    if let Some(since) = args.since {
543        builder = builder.since(since);
544    }
545    if let Some(until) = args.until {
546        builder = builder.until(until);
547    }
548    builder = builder.reverse(args.reverse).temporal(filter.clone());
549    let entries = mem.timeline(builder.build())?;
550
551    if args.json {
552        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
553        let output = WhenOutput {
554            summary: summary_to_output(&summary),
555            entries: entry_views,
556        };
557        println!("{}", serde_json::to_string_pretty(&output)?);
558        return Ok(());
559    }
560
561    print_temporal_summary(&summary);
562    if entries.is_empty() {
563        println!("No frames matched the resolved window");
564        return Ok(());
565    }
566
567    for entry in &entries {
568        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
569        println!(
570            "#{} @ {} ({iso}) — {}",
571            entry.frame_id,
572            entry.timestamp,
573            entry.preview.replace('\n', " ")
574        );
575        if let Some(uri) = entry.uri.as_deref() {
576            println!("  URI: {uri}");
577        }
578        if !entry.child_frames.is_empty() {
579            let child_list = entry
580                .child_frames
581                .iter()
582                .map(|id| id.to_string())
583                .collect::<Vec<_>>()
584                .join(", ");
585            println!("  Child frames: {child_list}");
586        }
587        if let Some(temporal) = entry.temporal.as_ref() {
588            print_entry_temporal_details(temporal);
589        }
590    }
591
592    Ok(())
593}
594
595#[cfg(feature = "temporal_track")]
596#[derive(Serialize)]
597struct TimelineOutput<'a> {
598    #[serde(skip_serializing_if = "Option::is_none")]
599    temporal: Option<TemporalSummaryOutput>,
600    entries: &'a [TimelineEntry],
601}
602
603#[cfg(feature = "temporal_track")]
604#[derive(Serialize)]
605struct WhenOutput {
606    summary: TemporalSummaryOutput,
607    entries: Vec<WhenEntry>,
608}
609
610#[cfg(feature = "temporal_track")]
611#[derive(Serialize)]
612struct WhenEntry {
613    frame_id: FrameId,
614    timestamp: i64,
615    #[serde(skip_serializing_if = "Option::is_none")]
616    timestamp_iso: Option<String>,
617    preview: String,
618    #[serde(skip_serializing_if = "Option::is_none")]
619    uri: Option<String>,
620    #[serde(skip_serializing_if = "Vec::is_empty")]
621    child_frames: Vec<FrameId>,
622    #[serde(skip_serializing_if = "Option::is_none")]
623    temporal: Option<SearchHitTemporal>,
624}
625
626#[cfg(feature = "temporal_track")]
627#[derive(Serialize)]
628struct TemporalSummaryOutput {
629    phrase: String,
630    timezone: String,
631    anchor_utc: i64,
632    anchor_iso: String,
633    confidence: u16,
634    #[serde(skip_serializing_if = "Vec::is_empty")]
635    flags: Vec<&'static str>,
636    resolution_kind: &'static str,
637    window_start_utc: Option<i64>,
638    window_start_iso: Option<String>,
639    window_end_utc: Option<i64>,
640    window_end_iso: Option<String>,
641    #[serde(skip_serializing_if = "Option::is_none")]
642    window_minutes: Option<u64>,
643}
644
645#[cfg(feature = "temporal_track")]
646struct TemporalSummary {
647    phrase: String,
648    tz: String,
649    anchor: OffsetDateTime,
650    start_utc: Option<i64>,
651    end_utc: Option<i64>,
652    resolution: TemporalResolution,
653    window_minutes: Option<u64>,
654}
655
656#[cfg(feature = "temporal_track")]
657fn build_temporal_filter(
658    phrase: &str,
659    tz_override: Option<&str>,
660    anchor_override: Option<&str>,
661    window_minutes: Option<u64>,
662) -> Result<(TemporalFilter, TemporalSummary)> {
663    let tz = tz_override
664        .unwrap_or(DEFAULT_TEMPORAL_TZ)
665        .trim()
666        .to_string();
667    if tz.is_empty() {
668        bail!("E-TEMP-003 timezone must not be empty");
669    }
670
671    let anchor = if let Some(raw) = anchor_override {
672        OffsetDateTime::parse(raw, &Rfc3339)
673            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
674    } else {
675        OffsetDateTime::now_utc()
676    };
677
678    let context = TemporalContext::new(anchor, tz.clone());
679    let normalizer = TemporalNormalizer::new(context);
680    let resolution = normalizer
681        .resolve(phrase)
682        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
683
684    let (mut start, mut end) = resolution_bounds(&resolution)?;
685    if let Some(minutes) = window_minutes {
686        if minutes > 0 {
687            let delta = TimeDuration::minutes(minutes as i64);
688            if let (Some(s), Some(e)) = (start, end) {
689                if s == e {
690                    start = Some(s.saturating_sub(delta.whole_seconds()));
691                    end = Some(e.saturating_add(delta.whole_seconds()));
692                } else {
693                    start = Some(s.saturating_sub(delta.whole_seconds()));
694                    end = Some(e.saturating_add(delta.whole_seconds()));
695                }
696            }
697        }
698    }
699
700    let filter = TemporalFilter {
701        start_utc: start,
702        end_utc: end,
703        phrase: None,
704        tz: None,
705    };
706
707    let summary = TemporalSummary {
708        phrase: phrase.to_owned(),
709        tz,
710        anchor,
711        start_utc: start,
712        end_utc: end,
713        resolution,
714        window_minutes,
715    };
716
717    Ok((filter, summary))
718}
719
720#[cfg(feature = "temporal_track")]
721fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
722    TemporalSummaryOutput {
723        phrase: summary.phrase.clone(),
724        timezone: summary.tz.clone(),
725        anchor_utc: summary.anchor.unix_timestamp(),
726        anchor_iso: summary
727            .anchor
728            .format(&Rfc3339)
729            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
730        confidence: summary.resolution.confidence,
731        flags: summary
732            .resolution
733            .flags
734            .iter()
735            .map(|flag| flag.as_str())
736            .collect(),
737        resolution_kind: resolution_kind(&summary.resolution),
738        window_start_utc: summary.start_utc,
739        window_start_iso: summary.start_utc.and_then(format_timestamp),
740        window_end_utc: summary.end_utc,
741        window_end_iso: summary.end_utc.and_then(format_timestamp),
742        window_minutes: summary.window_minutes,
743    }
744}
745
746#[cfg(feature = "temporal_track")]
747fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
748    WhenEntry {
749        frame_id: entry.frame_id,
750        timestamp: entry.timestamp,
751        timestamp_iso: format_timestamp(entry.timestamp),
752        preview: entry.preview.clone(),
753        uri: entry.uri.clone(),
754        child_frames: entry.child_frames.clone(),
755        temporal: entry.temporal.clone(),
756    }
757}
758
759#[cfg(feature = "temporal_track")]
760fn print_temporal_summary(summary: &TemporalSummary) {
761    println!("Phrase: \"{}\"", summary.phrase);
762    println!("Timezone: {}", summary.tz);
763    println!(
764        "Anchor: {}",
765        summary
766            .anchor
767            .format(&Rfc3339)
768            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
769    );
770    let start_iso = summary.start_utc.and_then(format_timestamp);
771    let end_iso = summary.end_utc.and_then(format_timestamp);
772    match (start_iso, end_iso) {
773        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
774        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
775        (Some(start), None) => println!("Window start: {start}"),
776        (None, Some(end)) => println!("Window end: {end}"),
777        _ => println!("Window: (not resolved)"),
778    }
779    println!("Confidence: {}", summary.resolution.confidence);
780    let flags: Vec<&'static str> = summary
781        .resolution
782        .flags
783        .iter()
784        .map(|flag| flag.as_str())
785        .collect();
786    if !flags.is_empty() {
787        println!("Flags: {}", flags.join(", "));
788    }
789    if let Some(window) = summary.window_minutes {
790        if window > 0 {
791            println!("Window padding: {window} minute(s)");
792        }
793    }
794    println!();
795}
796
797#[cfg(feature = "temporal_track")]
798fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
799    if let Some(anchor) = temporal.anchor.as_ref() {
800        let iso = anchor
801            .iso_8601
802            .clone()
803            .or_else(|| format_timestamp(anchor.ts_utc));
804        println!(
805            "  Anchor: {} (source: {:?})",
806            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
807            anchor.source
808        );
809    }
810    if !temporal.mentions.is_empty() {
811        println!("  Mentions:");
812        for mention in &temporal.mentions {
813            let iso = mention
814                .iso_8601
815                .clone()
816                .or_else(|| format_timestamp(mention.ts_utc))
817                .unwrap_or_else(|| mention.ts_utc.to_string());
818            let mut details = format!(
819                "    - {} ({:?}, confidence {})",
820                iso, mention.kind, mention.confidence
821            );
822            if let Some(text) = mention.text.as_deref() {
823                details.push_str(&format!(" — \"{}\"", text));
824            }
825            println!("{details}");
826        }
827    }
828}
829
830#[cfg(feature = "temporal_track")]
831fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
832    match &resolution.value {
833        TemporalResolutionValue::Date(date) => {
834            let ts = date_to_timestamp(*date);
835            Ok((Some(ts), Some(ts)))
836        }
837        TemporalResolutionValue::DateTime(dt) => {
838            let ts = dt.unix_timestamp();
839            Ok((Some(ts), Some(ts)))
840        }
841        TemporalResolutionValue::DateRange { start, end } => Ok((
842            Some(date_to_timestamp(*start)),
843            Some(date_to_timestamp(*end)),
844        )),
845        TemporalResolutionValue::DateTimeRange { start, end } => {
846            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
847        }
848        TemporalResolutionValue::Month { year, month } => {
849            let start_date = Date::from_calendar_date(*year, *month, 1)
850                .map_err(|_| anyhow!("invalid month resolution"))?;
851            let end_date = last_day_in_month(*year, *month)
852                .map_err(|_| anyhow!("invalid month resolution"))?;
853            Ok((
854                Some(date_to_timestamp(start_date)),
855                Some(date_to_timestamp(end_date)),
856            ))
857        }
858    }
859}
860
861#[cfg(feature = "temporal_track")]
862fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
863    match resolution.value {
864        TemporalResolutionValue::Date(_) => "date",
865        TemporalResolutionValue::DateTime(_) => "datetime",
866        TemporalResolutionValue::DateRange { .. } => "date_range",
867        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
868        TemporalResolutionValue::Month { .. } => "month",
869    }
870}
871
872#[cfg(feature = "temporal_track")]
873fn date_to_timestamp(date: Date) -> i64 {
874    PrimitiveDateTime::new(date, Time::MIDNIGHT)
875        .assume_offset(UtcOffset::UTC)
876        .unix_timestamp()
877}
878
879#[cfg(feature = "temporal_track")]
880fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
881    let mut date = Date::from_calendar_date(year, month, 1)
882        .map_err(|_| anyhow!("invalid month resolution"))?;
883    while let Some(next) = date.next_day() {
884        if next.month() == month {
885            date = next;
886        } else {
887            break;
888        }
889    }
890    Ok(date)
891}
892
893#[cfg(feature = "temporal_track")]
894
895fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
896    if fragments.is_empty() {
897        return;
898    }
899
900    response.context_fragments = fragments
901        .into_iter()
902        .map(|fragment| AskContextFragment {
903            rank: fragment.rank,
904            frame_id: fragment.frame_id,
905            uri: fragment.uri,
906            title: fragment.title,
907            score: fragment.score,
908            matches: fragment.matches,
909            range: Some(fragment.range),
910            chunk_range: fragment.chunk_range,
911            text: fragment.text,
912            kind: Some(match fragment.kind {
913                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
914                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
915            }),
916            #[cfg(feature = "temporal_track")]
917            temporal: None,
918        })
919        .collect();
920}
921
922pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
923    // Check if plan allows query operations (blocks expired subscriptions)
924    crate::utils::require_active_plan(config, "ask")?;
925
926    // Track query usage against plan quota
927    crate::api::track_query_usage(config, 1)?;
928
929    if args.uri.is_some() && args.scope.is_some() {
930        warn!("--scope ignored because --uri is provided");
931    }
932
933    let mut question_tokens = Vec::new();
934    let mut file_path: Option<PathBuf> = None;
935    for token in &args.targets {
936        if file_path.is_none() && looks_like_memory(token) {
937            file_path = Some(PathBuf::from(token));
938        } else {
939            question_tokens.push(token.clone());
940        }
941    }
942
943    let positional_question = if question_tokens.is_empty() {
944        None
945    } else {
946        Some(question_tokens.join(" "))
947    };
948
949    let question = args
950        .question
951        .or(positional_question)
952        .map(|value| value.trim().to_string())
953        .filter(|value| !value.is_empty());
954
955    let question = question
956        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
957
958    // Expand query for better retrieval using LLM (expands abbreviations, adds synonyms)
959    // This happens when --use-model is set or we have an API key
960    let (original_question, search_query) = {
961        // For query expansion, we use the fastest available model
962        // Priority: OpenAI > Groq > Anthropic > XAI > Mistral
963        let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
964            if let Ok(key) = std::env::var("OPENAI_API_KEY") {
965                // OpenAI available - use gpt-4o-mini (fastest, cheapest)
966                (Some("gpt-4o-mini"), Some(key))
967            } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
968                // Groq available - use llama-3.1-8b-instant (very fast)
969                (Some("llama-3.1-8b-instant"), Some(key))
970            } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
971                // Anthropic available - use haiku
972                (Some("claude-haiku-4-5"), Some(key))
973            } else if let Ok(key) = std::env::var("XAI_API_KEY") {
974                // XAI available - use grok-4-fast
975                (Some("grok-4-fast"), Some(key))
976            } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
977                // Mistral available - use mistral-small
978                (Some("mistral-small-latest"), Some(key))
979            } else {
980                // No fast model available for expansion
981                (None, None)
982            };
983
984        // DISABLED: Query expansion for ask command
985        // The ask command has sophisticated retrieval with fallbacks, aggregation detection,
986        // temporal boosting, and diverse retrieval strategies. Query expansion often strips
987        // out important semantic context (temporal markers, aggregation signals, analytical
988        // keywords) that these strategies depend on. The original question is preserved
989        // to ensure all downstream detection and ranking works correctly.
990        //
991        // Query expansion may be appropriate for simple keyword searches, but for complex
992        // natural language questions it causes more problems than it solves.
993        let _ = (model_for_expansion, api_key_for_expansion); // suppress unused warnings
994        (question.clone(), question.clone())
995    };
996
997    let memory_path = match file_path {
998        Some(path) => path,
999        None => autodetect_memory_file()?,
1000    };
1001
1002    let start = parse_date_boundary(args.start.as_ref(), false)?;
1003    let end = parse_date_boundary(args.end.as_ref(), true)?;
1004    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1005        if end_ts < start_ts {
1006            anyhow::bail!("--end must not be earlier than --start");
1007        }
1008    }
1009
1010    // Open MV2 file first to get vector dimension for auto-detection
1011    let mut mem = Memvid::open(&memory_path)?;
1012
1013    // Load active replay session if one exists
1014    #[cfg(feature = "replay")]
1015    let _ = mem.load_active_session();
1016
1017    // Get the vector dimension from the MV2 file for auto-detection
1018    let mv2_dimension = mem.effective_vec_index_dimension()?;
1019
1020    // Check if memory has any vectors - if not, force lexical mode
1021    let stats = mem.stats()?;
1022    let has_vectors = stats.vector_count > 0;
1023    let effective_mode = if !has_vectors
1024        && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1025    {
1026        tracing::info!("Memory has no embeddings (vector_count=0); falling back to lexical mode");
1027        AskModeArg::Lex
1028    } else {
1029        args.mode.clone()
1030    };
1031
1032    let ask_mode: AskMode = effective_mode.clone().into();
1033    let inferred_model_override = match effective_mode {
1034        AskModeArg::Lex => None,
1035        AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1036            memvid_core::EmbeddingIdentitySummary::Single(identity) => {
1037                identity.model.map(String::from)
1038            }
1039            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1040                let models: Vec<_> = identities
1041                    .iter()
1042                    .filter_map(|entry| entry.identity.model.as_deref())
1043                    .collect();
1044                anyhow::bail!(
1045                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1046                    Detected models: {:?}\n\n\
1047                    Suggested fix: split into separate memories per embedding model.",
1048                    models
1049                );
1050            }
1051            memvid_core::EmbeddingIdentitySummary::Unknown => None,
1052        },
1053    };
1054    let emb_model_override = args
1055        .query_embedding_model
1056        .as_deref()
1057        .or(inferred_model_override.as_deref());
1058    let runtime = match effective_mode {
1059        AskModeArg::Lex => None,
1060        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1061            config,
1062            emb_model_override,
1063            mv2_dimension,
1064        )?),
1065        AskModeArg::Hybrid => {
1066            // For hybrid, use auto-detection from MV2 dimension
1067            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1068                || {
1069                    // Force a load; if it fails we error below.
1070                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1071                        .ok()
1072                        .map(|rt| {
1073                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1074                            rt
1075                        })
1076                },
1077            )
1078        }
1079    };
1080    if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1081        anyhow::bail!(
1082            "semantic embeddings unavailable; install/cached model required for {:?} mode",
1083            effective_mode
1084        );
1085    }
1086
1087    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1088
1089    // Build adaptive config (enabled by default, use --no-adaptive to disable)
1090    let adaptive = if !args.no_adaptive {
1091        Some(AdaptiveConfig {
1092            enabled: true,
1093            max_results: args.max_k,
1094            min_results: 1,
1095            normalize_scores: true,
1096            strategy: match args.adaptive_strategy {
1097                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1098                    min_ratio: args.min_relevancy,
1099                },
1100                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1101                    min_score: args.min_relevancy,
1102                },
1103                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1104                    max_drop_ratio: 0.3,
1105                },
1106                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1107                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1108                    relative_threshold: args.min_relevancy,
1109                    max_drop_ratio: 0.3,
1110                    absolute_min: 0.3,
1111                },
1112            },
1113        })
1114    } else {
1115        None
1116    };
1117
1118    let request = AskRequest {
1119        question: search_query, // Use expanded query for retrieval
1120        top_k: args.top_k,
1121        snippet_chars: args.snippet_chars,
1122        uri: args.uri.clone(),
1123        scope: args.scope.clone(),
1124        cursor: args.cursor.clone(),
1125        start,
1126        end,
1127        #[cfg(feature = "temporal_track")]
1128        temporal: None,
1129        context_only: args.context_only,
1130        mode: ask_mode,
1131        as_of_frame: args.as_of_frame,
1132        as_of_ts: args.as_of_ts,
1133        adaptive,
1134        acl_context: None,
1135        acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1136    };
1137    let mut response = mem.ask(request, embedder).map_err(|err| match err {
1138        MemvidError::VecDimensionMismatch { expected, actual } => {
1139            anyhow!(vec_dimension_mismatch_help(expected, actual))
1140        }
1141        other => anyhow!(other),
1142    })?;
1143
1144    // Restore original question for display and LLM synthesis
1145    // (search_query was used for retrieval but original_question is shown to user)
1146    response.question = original_question;
1147
1148    // Apply cross-encoder reranking for better precision on preference/personalization queries
1149    // This is especially important for questions like "What should I..." where semantic
1150    // similarity doesn't capture personal relevance well.
1151    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
1152    // Skip for temporal/recency queries - cross-encoder doesn't understand temporal context
1153    // and would override the recency boost from lexical search
1154    let is_temporal_query = {
1155        let q_lower = response.question.to_lowercase();
1156        q_lower.contains("current")
1157            || q_lower.contains("latest")
1158            || q_lower.contains("recent")
1159            || q_lower.contains("now")
1160            || q_lower.contains("today")
1161            || q_lower.contains("updated")
1162            || q_lower.contains("new ")
1163            || q_lower.contains("newest")
1164    };
1165    if !args.no_rerank
1166        && !response.retrieval.hits.is_empty()
1167        && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1168        && !is_temporal_query
1169    {
1170        // Create a temporary SearchResponse for reranking
1171        let mut search_response = SearchResponse {
1172            query: response.question.clone(),
1173            hits: response.retrieval.hits.clone(),
1174            total_hits: response.retrieval.hits.len(),
1175            params: memvid_core::SearchParams {
1176                top_k: args.top_k,
1177                snippet_chars: args.snippet_chars,
1178                cursor: None,
1179            },
1180            elapsed_ms: 0,
1181            engine: memvid_core::SearchEngineKind::Hybrid,
1182            next_cursor: None,
1183            context: String::new(),
1184            stale_index_skips: 0,
1185        };
1186
1187        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1188            warn!("Cross-encoder reranking failed: {e}");
1189        } else {
1190            // Update the response hits with reranked order
1191            response.retrieval.hits = search_response.hits;
1192            // Rebuild context from reranked hits
1193            response.retrieval.context = response
1194                .retrieval
1195                .hits
1196                .iter()
1197                .take(10) // Use top-10 for context
1198                .map(|hit| hit.text.as_str())
1199                .collect::<Vec<_>>()
1200                .join("\n\n---\n\n");
1201        }
1202    }
1203
1204    // Inject memory cards into context if --memories flag is set
1205    if args.memories {
1206        let memory_context = build_memory_context(&mem);
1207        if !memory_context.is_empty() {
1208            // Prepend memory context to retrieval context
1209            response.retrieval.context = format!(
1210                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1211                memory_context, response.retrieval.context
1212            );
1213        }
1214    }
1215
1216    // Inject entity context from Logic-Mesh if entities were found in search hits
1217    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1218    if !entity_context.is_empty() {
1219        // Prepend entity context to retrieval context
1220        response.retrieval.context = format!(
1221            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1222            entity_context, response.retrieval.context
1223        );
1224    }
1225
1226    // Apply PII masking if requested
1227    if args.mask_pii {
1228        use memvid_core::pii::mask_pii;
1229
1230        // Mask the aggregated context
1231        response.retrieval.context = mask_pii(&response.retrieval.context);
1232
1233        // Mask text in each hit
1234        for hit in &mut response.retrieval.hits {
1235            hit.text = mask_pii(&hit.text);
1236            if let Some(chunk_text) = &hit.chunk_text {
1237                hit.chunk_text = Some(mask_pii(chunk_text));
1238            }
1239        }
1240    }
1241
1242    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1243
1244    let mut model_result: Option<ModelInference> = None;
1245    if args.no_llm {
1246        // --no-llm: return verbatim evidence without LLM synthesis
1247        if args.use_model.is_some() {
1248            warn!("--use-model ignored because --no-llm disables LLM synthesis");
1249        }
1250        if args.json {
1251            emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1252        } else {
1253            emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1254        }
1255
1256        // Save active replay session if one exists
1257        #[cfg(feature = "replay")]
1258        let _ = mem.save_active_session();
1259
1260        return Ok(());
1261    } else if response.context_only {
1262        if args.use_model.is_some() {
1263            warn!("--use-model ignored because --context-only disables synthesis");
1264        }
1265    } else if let Some(model_name) = args.use_model.as_deref() {
1266        match run_model_inference(
1267            model_name,
1268            &response.question,
1269            &response.retrieval.context,
1270            &response.retrieval.hits,
1271            llm_context_override,
1272            None,
1273            args.system_prompt.as_deref(),
1274        ) {
1275            Ok(inference) => {
1276                response.answer = Some(inference.answer.answer.clone());
1277                response.retrieval.context = inference.context_body.clone();
1278                apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1279                model_result = Some(inference);
1280            }
1281            Err(err) => {
1282                warn!(
1283                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1284                    model_name
1285                );
1286            }
1287        }
1288    }
1289
1290    // Record the ask action if a replay session is active
1291    #[cfg(feature = "replay")]
1292    if let Some(ref inference) = model_result {
1293        if let Some(model_name) = args.use_model.as_deref() {
1294            // Extract frame IDs from retrieval hits for replay audit
1295            let retrieved_frames: Vec<u64> = response
1296                .retrieval
1297                .hits
1298                .iter()
1299                .map(|hit| hit.frame_id)
1300                .collect();
1301
1302            mem.record_ask_action(
1303                &response.question,
1304                model_name, // provider
1305                model_name, // model
1306                inference.answer.answer.as_bytes(),
1307                0, // duration_ms not tracked at this level
1308                retrieved_frames,
1309            );
1310        }
1311    }
1312
1313    if args.json {
1314        if let Some(model_name) = args.use_model.as_deref() {
1315            emit_model_json(
1316                &response,
1317                model_name,
1318                model_result.as_ref(),
1319                args.sources,
1320                &mut mem,
1321            )?;
1322        } else {
1323            emit_ask_json(
1324                &response,
1325                effective_mode.clone(),
1326                model_result.as_ref(),
1327                args.sources,
1328                &mut mem,
1329            )?;
1330        }
1331    } else {
1332        emit_ask_pretty(
1333            &response,
1334            effective_mode.clone(),
1335            model_result.as_ref(),
1336            args.sources,
1337            &mut mem,
1338        );
1339    }
1340
1341    // Save active replay session if one exists
1342    #[cfg(feature = "replay")]
1343    let _ = mem.save_active_session();
1344
1345    Ok(())
1346}
1347
1348/// Handle graph-aware find with --graph or --hybrid flags
1349fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1350    use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1351    use memvid_core::types::QueryPlan;
1352
1353    let planner = QueryPlanner::new();
1354
1355    // Create query plan based on mode
1356    let plan = if args.graph {
1357        // Pure graph mode - let planner detect patterns
1358        let plan = planner.plan(&args.query, args.top_k);
1359        // If it's a hybrid plan from auto-detection, convert to graph-only
1360        match plan {
1361            QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1362                QueryPlan::graph_only(graph_filter, args.top_k)
1363            }
1364            _ => plan,
1365        }
1366    } else {
1367        // Hybrid mode - use the auto-detected plan
1368        planner.plan(&args.query, args.top_k)
1369    };
1370
1371    // Execute the search
1372    let hits = hybrid_search(mem, &plan)?;
1373
1374    if args.json {
1375        // JSON output
1376        let output = serde_json::json!({
1377            "query": args.query,
1378            "mode": if args.graph { "graph" } else { "hybrid" },
1379            "plan": format!("{:?}", plan),
1380            "hits": hits.iter().map(|h| {
1381                serde_json::json!({
1382                    "frame_id": h.frame_id,
1383                    "score": h.score,
1384                    "graph_score": h.graph_score,
1385                    "vector_score": h.vector_score,
1386                    "matched_entity": h.matched_entity,
1387                    "preview": h.preview,
1388                })
1389            }).collect::<Vec<_>>(),
1390        });
1391        println!("{}", serde_json::to_string_pretty(&output)?);
1392    } else {
1393        // Human-readable output
1394        let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1395        println!("{} search for: \"{}\"", mode_str, args.query);
1396        println!("Plan: {:?}", plan);
1397        println!();
1398
1399        if hits.is_empty() {
1400            println!("No results found.");
1401        } else {
1402            println!("Results ({} hits):", hits.len());
1403            for (i, hit) in hits.iter().enumerate() {
1404                println!();
1405                println!(
1406                    "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1407                    i + 1,
1408                    hit.frame_id,
1409                    hit.score,
1410                    hit.graph_score,
1411                    hit.vector_score
1412                );
1413                if let Some(entity) = &hit.matched_entity {
1414                    println!("   Matched entity: {}", entity);
1415                }
1416                if let Some(preview) = &hit.preview {
1417                    let truncated = if preview.len() > 200 {
1418                        format!("{}...", &preview[..200])
1419                    } else {
1420                        preview.clone()
1421                    };
1422                    println!("   {}", truncated.replace('\n', " "));
1423                }
1424            }
1425        }
1426    }
1427
1428    Ok(())
1429}
1430
1431pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1432    // Check if plan allows query operations (blocks expired subscriptions)
1433    crate::utils::require_active_plan(config, "find")?;
1434
1435    // Track query usage against plan quota
1436    crate::api::track_query_usage(config, 1)?;
1437
1438    let mut mem = open_read_only_mem(&args.file)?;
1439
1440    // Load active replay session if one exists
1441    #[cfg(feature = "replay")]
1442    let _ = mem.load_active_session();
1443
1444    // Handle graph-aware and hybrid search modes
1445    if args.graph || args.hybrid {
1446        return handle_graph_find(&mut mem, &args);
1447    }
1448
1449    if args.uri.is_some() && args.scope.is_some() {
1450        warn!("--scope ignored because --uri is provided");
1451    }
1452
1453    // Get vector dimension from MV2 for auto-detection
1454    let mv2_dimension = mem.effective_vec_index_dimension()?;
1455    let identity_summary = match args.mode {
1456        SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1457        #[cfg(feature = "clip")]
1458        SearchMode::Clip => None,
1459        SearchMode::Lex => None,
1460    };
1461
1462    let mut semantic_allowed = true;
1463    let inferred_model_override = match identity_summary.as_ref() {
1464        Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1465            identity.model.as_deref().map(|value| value.to_string())
1466        }
1467        Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1468            let models: Vec<_> = identities
1469                .iter()
1470                .filter_map(|entry| entry.identity.model.as_deref())
1471                .collect();
1472            if args.mode == SearchMode::Sem {
1473                anyhow::bail!(
1474                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1475                    Detected models: {:?}\n\n\
1476                    Suggested fix: split into separate memories per embedding model.",
1477                    models
1478                );
1479            }
1480            warn!(
1481                "semantic search disabled: mixed embedding models detected: {:?}",
1482                models
1483            );
1484            semantic_allowed = false;
1485            None
1486        }
1487        _ => None,
1488    };
1489
1490    let emb_model_override = args
1491        .query_embedding_model
1492        .as_deref()
1493        .or(inferred_model_override.as_deref());
1494
1495    let (mode_label, runtime_option) = match args.mode {
1496        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1497        SearchMode::Sem => {
1498            let runtime =
1499                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1500            ("Semantic (vector search)".to_string(), Some(runtime))
1501        }
1502        SearchMode::Auto => {
1503            if !semantic_allowed {
1504                ("Lexical (semantic unsafe)".to_string(), None)
1505            } else if let Some(runtime) =
1506                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1507            {
1508                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1509            } else {
1510                ("Lexical (semantic unavailable)".to_string(), None)
1511            }
1512        }
1513        #[cfg(feature = "clip")]
1514        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1515    };
1516
1517    let mode_key = match args.mode {
1518        SearchMode::Sem => "semantic",
1519        SearchMode::Lex => "text",
1520        SearchMode::Auto => {
1521            if runtime_option.is_some() {
1522                "hybrid"
1523            } else {
1524                "text"
1525            }
1526        }
1527        #[cfg(feature = "clip")]
1528        SearchMode::Clip => "clip",
1529    };
1530
1531    // For CLIP mode, use CLIP visual search
1532    #[cfg(feature = "clip")]
1533    if args.mode == SearchMode::Clip {
1534        use memvid_core::clip::{ClipConfig, ClipModel};
1535
1536        // Initialize CLIP model
1537        let config = ClipConfig::default();
1538        let clip = ClipModel::new(config).map_err(|e| {
1539            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1540        })?;
1541
1542        // Encode query text
1543        let query_embedding = clip
1544            .encode_text(&args.query)
1545            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1546
1547        // Search CLIP index
1548        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1549
1550        // Debug distances before filtering
1551        for hit in &hits {
1552            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1553                tracing::debug!(
1554                    frame_id = hit.frame_id,
1555                    title = %frame.title.unwrap_or_default(),
1556                    page = hit.page,
1557                    distance = hit.distance,
1558                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1559                    "CLIP raw hit"
1560                );
1561            } else {
1562                tracing::debug!(
1563                    frame_id = hit.frame_id,
1564                    page = hit.page,
1565                    distance = hit.distance,
1566                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1567                    "CLIP raw hit (missing frame)"
1568                );
1569            }
1570        }
1571
1572        // CLIP distance threshold for filtering poor matches
1573        // CLIP uses L2 distance on normalized embeddings:
1574        //   - distance² = 2(1 - cosine_similarity)
1575        //   - distance = 0 → identical (cosine_sim = 1)
1576        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1577        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1578        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1579        //   - distance = 2.0 → opposite (cosine_sim = -1)
1580        //
1581        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1582        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1583        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1584        const CLIP_MAX_DISTANCE: f32 = 1.26;
1585
1586        // Convert CLIP hits to SearchResponse format, filtering by threshold
1587        let search_hits: Vec<SearchHit> = hits
1588            .into_iter()
1589            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1590            .enumerate()
1591            .filter_map(|(rank, hit)| {
1592                // Convert L2 distance to cosine similarity for display
1593                // cos_sim = 1 - (distance² / 2)
1594                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1595
1596                // Get frame preview for snippet
1597                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1598                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1599                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1600                let title = match (base_title, hit.page) {
1601                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1602                    (Some(t), None) => Some(t),
1603                    (None, Some(p)) => Some(format!("Page {p}")),
1604                    _ => None,
1605                };
1606                Some(SearchHit {
1607                    rank: rank + 1,
1608                    frame_id: hit.frame_id,
1609                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1610                    title,
1611                    text: preview.clone(),
1612                    chunk_text: Some(preview),
1613                    range: (0, 0),
1614                    chunk_range: None,
1615                    matches: 0,
1616                    score: Some(cosine_similarity),
1617                    metadata: None,
1618                })
1619            })
1620            .collect();
1621
1622        let response = SearchResponse {
1623            query: args.query.clone(),
1624            hits: search_hits.clone(),
1625            total_hits: search_hits.len(),
1626            params: memvid_core::SearchParams {
1627                top_k: args.top_k,
1628                snippet_chars: args.snippet_chars,
1629                cursor: args.cursor.clone(),
1630            },
1631            elapsed_ms: 0,
1632            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1633            next_cursor: None,
1634            context: String::new(),
1635            stale_index_skips: 0,
1636        };
1637
1638        if args.json_legacy {
1639            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1640            emit_legacy_search_json(&response)?;
1641        } else if args.json {
1642            emit_search_json(&response, mode_key)?;
1643        } else {
1644            println!(
1645                "mode: {}   k={}   time: {} ms",
1646                mode_label, response.params.top_k, response.elapsed_ms
1647            );
1648            println!("engine: clip (MobileCLIP-S2)");
1649            println!(
1650                "hits: {} (showing {})",
1651                response.total_hits,
1652                response.hits.len()
1653            );
1654            emit_search_table(&response);
1655        }
1656        return Ok(());
1657    }
1658
1659    // For semantic mode, use pure vector search.
1660    let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1661        let runtime = runtime_option
1662            .as_ref()
1663            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1664
1665        // Embed the query
1666        let query_embedding = runtime.embed_query(&args.query)?;
1667
1668        // Use pure vector search (adaptive by default, use --no-adaptive to disable)
1669        let scope = args.scope.as_deref().or(args.uri.as_deref());
1670
1671        if !args.no_adaptive {
1672            // Build adaptive config from CLI args
1673            let strategy = match args.adaptive_strategy {
1674                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1675                    min_ratio: args.min_relevancy,
1676                },
1677                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1678                    min_score: args.min_relevancy,
1679                },
1680                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1681                    max_drop_ratio: 0.35, // 35% drop triggers cutoff
1682                },
1683                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1684                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1685                    relative_threshold: args.min_relevancy,
1686                    max_drop_ratio: 0.35,
1687                    absolute_min: 0.3,
1688                },
1689            };
1690
1691            let config = AdaptiveConfig {
1692                enabled: true,
1693                max_results: args.max_k,
1694                min_results: 1,
1695                strategy,
1696                normalize_scores: true,
1697            };
1698
1699            match mem.search_adaptive(
1700                &args.query,
1701                &query_embedding,
1702                config,
1703                args.snippet_chars,
1704                scope,
1705            ) {
1706                Ok(result) => {
1707                    let mut resp = SearchResponse {
1708                        query: args.query.clone(),
1709                        hits: result.results,
1710                        total_hits: result.stats.returned,
1711                        params: memvid_core::SearchParams {
1712                            top_k: result.stats.returned,
1713                            snippet_chars: args.snippet_chars,
1714                            cursor: args.cursor.clone(),
1715                        },
1716                        elapsed_ms: 0,
1717                        engine: SearchEngineKind::Hybrid,
1718                        next_cursor: None,
1719                        context: String::new(),
1720                        stale_index_skips: 0,
1721                    };
1722                    apply_preference_rerank(&mut resp);
1723                    (
1724                        resp,
1725                        "semantic (adaptive vector search)".to_string(),
1726                        Some(result.stats),
1727                    )
1728                }
1729                Err(e) => {
1730                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1731                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1732                    }
1733
1734                    warn!("Adaptive search failed ({e}), falling back to fixed-k");
1735                    match mem.vec_search_with_embedding(
1736                        &args.query,
1737                        &query_embedding,
1738                        args.top_k,
1739                        args.snippet_chars,
1740                        scope,
1741                    ) {
1742                        Ok(mut resp) => {
1743                            apply_preference_rerank(&mut resp);
1744                            (resp, "semantic (vector search fallback)".to_string(), None)
1745                        }
1746                        Err(e2) => {
1747                            if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1748                                return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1749                            }
1750                            return Err(anyhow!(
1751                                "Both adaptive and fixed-k search failed: {e}, {e2}"
1752                            ));
1753                        }
1754                    }
1755                }
1756            }
1757        } else {
1758            // Standard fixed-k vector search
1759            match mem.vec_search_with_embedding(
1760                &args.query,
1761                &query_embedding,
1762                args.top_k,
1763                args.snippet_chars,
1764                scope,
1765            ) {
1766                Ok(mut resp) => {
1767                    // Apply preference boost to rerank results for preference-seeking queries
1768                    apply_preference_rerank(&mut resp);
1769                    (resp, "semantic (vector search)".to_string(), None)
1770                }
1771                Err(e) => {
1772                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1773                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1774                    }
1775
1776                    // Fall back to lexical search + rerank if vector search fails
1777                    warn!("Vector search failed ({e}), falling back to lexical + rerank");
1778                    let request = SearchRequest {
1779                        query: args.query.clone(),
1780                        top_k: args.top_k,
1781                        snippet_chars: args.snippet_chars,
1782                        uri: args.uri.clone(),
1783                        scope: args.scope.clone(),
1784                        cursor: args.cursor.clone(),
1785                        #[cfg(feature = "temporal_track")]
1786                        temporal: None,
1787                        as_of_frame: args.as_of_frame,
1788                        as_of_ts: args.as_of_ts,
1789                        no_sketch: args.no_sketch,
1790                        acl_context: None,
1791                        acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1792                    };
1793                    let mut resp = mem.search(request)?;
1794                    apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1795                    (resp, "semantic (fallback rerank)".to_string(), None)
1796                }
1797            }
1798        }
1799    } else {
1800        // For lexical and auto modes, use existing behavior
1801        let request = SearchRequest {
1802            query: args.query.clone(),
1803            top_k: args.top_k,
1804            snippet_chars: args.snippet_chars,
1805            uri: args.uri.clone(),
1806            scope: args.scope.clone(),
1807            cursor: args.cursor.clone(),
1808            #[cfg(feature = "temporal_track")]
1809            temporal: None,
1810            as_of_frame: args.as_of_frame,
1811            as_of_ts: args.as_of_ts,
1812            no_sketch: args.no_sketch,
1813            acl_context: None,
1814            acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1815        };
1816
1817        let mut resp = mem.search(request)?;
1818
1819        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1820            warn!("Search index unavailable; returning basic text results");
1821        }
1822
1823        let mut engine_label = match resp.engine {
1824            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1825            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1826            SearchEngineKind::Hybrid => "hybrid".to_string(),
1827        };
1828
1829        if runtime_option.is_some() {
1830            engine_label = format!("hybrid ({engine_label} + semantic)");
1831        }
1832
1833        if let Some(ref runtime) = runtime_option {
1834            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1835        }
1836
1837        (resp, engine_label, None)
1838    };
1839
1840    if args.json_legacy {
1841        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1842        emit_legacy_search_json(&response)?;
1843    } else if args.json {
1844        emit_search_json(&response, mode_key)?;
1845    } else {
1846        println!(
1847            "mode: {}   k={}   time: {} ms",
1848            mode_label, response.params.top_k, response.elapsed_ms
1849        );
1850        println!("engine: {}", engine_label);
1851
1852        // Show adaptive retrieval stats if enabled
1853        if let Some(ref stats) = adaptive_stats {
1854            println!(
1855                "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1856                stats.total_considered,
1857                stats.returned,
1858                stats.triggered_by,
1859                stats.top_score.unwrap_or(0.0),
1860                stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1861            );
1862        }
1863
1864        println!(
1865            "hits: {} (showing {})",
1866            response.total_hits,
1867            response.hits.len()
1868        );
1869        emit_search_table(&response);
1870    }
1871
1872    // Save active replay session if one exists
1873    #[cfg(feature = "replay")]
1874    let _ = mem.save_active_session();
1875
1876    Ok(())
1877}
1878
1879pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1880    // Track query usage against plan quota
1881    crate::api::track_query_usage(config, 1)?;
1882
1883    let mut mem = open_read_only_mem(&args.file)?;
1884    let vector = if let Some(path) = args.embedding.as_deref() {
1885        read_embedding(path)?
1886    } else if let Some(vector_string) = &args.vector {
1887        parse_vector(vector_string)?
1888    } else {
1889        anyhow::bail!("provide --vector or --embedding for search input");
1890    };
1891
1892    let hits = mem
1893        .search_vec(&vector, args.limit)
1894        .map_err(|err| match err {
1895            MemvidError::VecDimensionMismatch { expected, actual } => {
1896                anyhow!(vec_dimension_mismatch_help(expected, actual))
1897            }
1898            other => anyhow!(other),
1899        })?;
1900    let mut enriched = Vec::with_capacity(hits.len());
1901    for hit in hits {
1902        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1903        enriched.push((hit.frame_id, hit.distance, preview));
1904    }
1905
1906    if args.json {
1907        let json_hits: Vec<_> = enriched
1908            .iter()
1909            .map(|(frame_id, distance, preview)| {
1910                json!({
1911                    "frame_id": frame_id,
1912                    "distance": distance,
1913                    "preview": preview,
1914                })
1915            })
1916            .collect();
1917        let json_str = serde_json::to_string_pretty(&json_hits)?;
1918        println!("{}", json_str.to_colored_json_auto()?);
1919    } else if enriched.is_empty() {
1920        println!("No vector matches found");
1921    } else {
1922        for (frame_id, distance, preview) in enriched {
1923            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1924        }
1925    }
1926    Ok(())
1927}
1928
1929pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1930    use memvid_core::AuditOptions;
1931    use std::fs::File;
1932    use std::io::Write;
1933
1934    let mut mem = Memvid::open(&args.file)?;
1935
1936    // Parse date boundaries
1937    let start = parse_date_boundary(args.start.as_ref(), false)?;
1938    let end = parse_date_boundary(args.end.as_ref(), true)?;
1939    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1940        if end_ts < start_ts {
1941            anyhow::bail!("--end must not be earlier than --start");
1942        }
1943    }
1944
1945    // Set up embedding runtime if needed
1946    let ask_mode: AskMode = args.mode.into();
1947    let runtime = match args.mode {
1948        AskModeArg::Lex => None,
1949        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1950        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1951    };
1952    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1953
1954    // Build audit options
1955    let options = AuditOptions {
1956        top_k: Some(args.top_k),
1957        snippet_chars: Some(args.snippet_chars),
1958        mode: Some(ask_mode),
1959        scope: args.scope,
1960        start,
1961        end,
1962        include_snippets: true,
1963    };
1964
1965    // Run the audit
1966    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1967
1968    // If --use-model is provided, run model inference to synthesize the answer
1969    if let Some(model_name) = args.use_model.as_deref() {
1970        // Build context from sources for model inference
1971        let context = report
1972            .sources
1973            .iter()
1974            .filter_map(|s| s.snippet.clone())
1975            .collect::<Vec<_>>()
1976            .join("\n\n");
1977
1978        match run_model_inference(
1979            model_name,
1980            &report.question,
1981            &context,
1982            &[], // No hits needed for audit
1983            None,
1984            None,
1985            None, // No system prompt override for audit
1986        ) {
1987            Ok(inference) => {
1988                report.answer = Some(inference.answer.answer);
1989                report.notes.push(format!(
1990                    "Answer synthesized by model: {}",
1991                    inference.answer.model
1992                ));
1993            }
1994            Err(err) => {
1995                warn!(
1996                    "model inference unavailable for '{}': {err}. Using default answer.",
1997                    model_name
1998                );
1999            }
2000        }
2001    }
2002
2003    // Format the output
2004    let output = match args.format {
2005        AuditFormat::Text => report.to_text(),
2006        AuditFormat::Markdown => report.to_markdown(),
2007        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
2008    };
2009
2010    // Write output
2011    if let Some(out_path) = args.out {
2012        let mut file = File::create(&out_path)?;
2013        file.write_all(output.as_bytes())?;
2014        println!("Audit report written to: {}", out_path.display());
2015    } else {
2016        println!("{}", output);
2017    }
2018
2019    Ok(())
2020}
2021
2022fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
2023    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
2024
2025    let mut additional_params = serde_json::Map::new();
2026    if let Some(cursor) = &response.params.cursor {
2027        additional_params.insert("cursor".into(), json!(cursor));
2028    }
2029
2030    let mut params = serde_json::Map::new();
2031    params.insert("top_k".into(), json!(response.params.top_k));
2032    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2033    params.insert("mode".into(), json!(mode));
2034    params.insert(
2035        "additional_params".into(),
2036        serde_json::Value::Object(additional_params),
2037    );
2038
2039    let mut metadata_json = serde_json::Map::new();
2040    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2041    metadata_json.insert("total_hits".into(), json!(response.total_hits));
2042    metadata_json.insert(
2043        "next_cursor".into(),
2044        match &response.next_cursor {
2045            Some(cursor) => json!(cursor),
2046            None => serde_json::Value::Null,
2047        },
2048    );
2049    metadata_json.insert("engine".into(), json!(response.engine));
2050    metadata_json.insert("params".into(), serde_json::Value::Object(params));
2051
2052    let body = json!({
2053        "version": "mv2.result.v2",
2054        "query": response.query,
2055        "metadata": metadata_json,
2056        "hits": hits,
2057        "context": response.context,
2058    });
2059    let json_str = serde_json::to_string_pretty(&body)?;
2060    println!("{}", json_str.to_colored_json_auto()?);
2061    Ok(())
2062}
2063
2064fn emit_ask_json(
2065    response: &AskResponse,
2066    requested_mode: AskModeArg,
2067    inference: Option<&ModelInference>,
2068    include_sources: bool,
2069    mem: &mut Memvid,
2070) -> Result<()> {
2071    let hits: Vec<_> = response
2072        .retrieval
2073        .hits
2074        .iter()
2075        .map(search_hit_to_json)
2076        .collect();
2077
2078    let citations: Vec<_> = response
2079        .citations
2080        .iter()
2081        .map(|citation| {
2082            let mut map = serde_json::Map::new();
2083            map.insert("index".into(), json!(citation.index));
2084            map.insert("frame_id".into(), json!(citation.frame_id));
2085            map.insert("uri".into(), json!(citation.uri));
2086            if let Some(range) = citation.chunk_range {
2087                map.insert("chunk_range".into(), json!([range.0, range.1]));
2088            }
2089            if let Some(score) = citation.score {
2090                map.insert("score".into(), json!(score));
2091            }
2092            serde_json::Value::Object(map)
2093        })
2094        .collect();
2095
2096    let mut body = json!({
2097        "version": "mv2.ask.v1",
2098        "question": response.question,
2099        "answer": response.answer,
2100        "context_only": response.context_only,
2101        "mode": ask_mode_display(requested_mode),
2102        "retriever": ask_retriever_display(response.retriever),
2103        "top_k": response.retrieval.params.top_k,
2104        "results": hits,
2105        "citations": citations,
2106        "stats": {
2107            "retrieval_ms": response.stats.retrieval_ms,
2108            "synthesis_ms": response.stats.synthesis_ms,
2109            "latency_ms": response.stats.latency_ms,
2110        },
2111        "engine": search_engine_label(&response.retrieval.engine),
2112        "total_hits": response.retrieval.total_hits,
2113        "next_cursor": response.retrieval.next_cursor,
2114        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2115    });
2116
2117    if let Some(inf) = inference {
2118        let model = &inf.answer;
2119        if let serde_json::Value::Object(ref mut map) = body {
2120            map.insert("model".into(), json!(model.requested));
2121            if model.model != model.requested {
2122                map.insert("model_used".into(), json!(model.model));
2123            }
2124            map.insert("cached".into(), json!(inf.cached));
2125            // Add usage and cost if available
2126            if let Some(usage) = &inf.usage {
2127                map.insert(
2128                    "usage".into(),
2129                    json!({
2130                        "input_tokens": usage.input_tokens,
2131                        "output_tokens": usage.output_tokens,
2132                        "total_tokens": usage.total_tokens,
2133                        "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2134                        "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2135                    }),
2136                );
2137            }
2138            // Add grounding/hallucination score if available
2139            if let Some(grounding) = &inf.grounding {
2140                map.insert(
2141                    "grounding".into(),
2142                    json!({
2143                        "score": grounding.score,
2144                        "label": grounding.label(),
2145                        "sentence_count": grounding.sentence_count,
2146                        "grounded_sentences": grounding.grounded_sentences,
2147                        "has_warning": grounding.has_warning,
2148                        "warning_reason": grounding.warning_reason,
2149                    }),
2150                );
2151            }
2152        }
2153    }
2154
2155    // Add detailed sources if requested
2156    if include_sources {
2157        if let serde_json::Value::Object(ref mut map) = body {
2158            let sources = build_sources_json(response, mem);
2159            map.insert("sources".into(), json!(sources));
2160        }
2161    }
2162
2163    // Add follow-up suggestions if confidence is low
2164    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2165        if let serde_json::Value::Object(ref mut map) = body {
2166            map.insert("follow_up".into(), follow_up);
2167        }
2168    }
2169
2170    println!("{}", serde_json::to_string_pretty(&body)?);
2171    Ok(())
2172}
2173
2174fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2175    response
2176        .citations
2177        .iter()
2178        .enumerate()
2179        .map(|(idx, citation)| {
2180            let mut source = serde_json::Map::new();
2181            source.insert("index".into(), json!(idx + 1));
2182            source.insert("frame_id".into(), json!(citation.frame_id));
2183            source.insert("uri".into(), json!(citation.uri));
2184
2185            if let Some(range) = citation.chunk_range {
2186                source.insert("chunk_range".into(), json!([range.0, range.1]));
2187            }
2188            if let Some(score) = citation.score {
2189                source.insert("score".into(), json!(score));
2190            }
2191
2192            // Get frame metadata for rich source information
2193            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2194                if let Some(title) = frame.title {
2195                    source.insert("title".into(), json!(title));
2196                }
2197                if !frame.tags.is_empty() {
2198                    source.insert("tags".into(), json!(frame.tags));
2199                }
2200                if !frame.labels.is_empty() {
2201                    source.insert("labels".into(), json!(frame.labels));
2202                }
2203                source.insert("frame_timestamp".into(), json!(frame.timestamp));
2204                if !frame.content_dates.is_empty() {
2205                    source.insert("content_dates".into(), json!(frame.content_dates));
2206                }
2207            }
2208
2209            // Get snippet from hit
2210            if let Some(hit) = response
2211                .retrieval
2212                .hits
2213                .iter()
2214                .find(|h| h.frame_id == citation.frame_id)
2215            {
2216                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2217                source.insert("snippet".into(), json!(snippet));
2218            }
2219
2220            serde_json::Value::Object(source)
2221        })
2222        .collect()
2223}
2224
2225/// Build follow-up suggestions when the answer has low grounding/confidence.
2226/// Helps users understand what the memory contains and suggests relevant questions.
2227fn build_follow_up_suggestions(
2228    response: &AskResponse,
2229    inference: Option<&ModelInference>,
2230    mem: &mut Memvid,
2231) -> Option<serde_json::Value> {
2232    // Check if we need follow-up suggestions
2233    let needs_followup = inference
2234        .and_then(|inf| inf.grounding.as_ref())
2235        .map(|g| g.score < 0.3 || g.has_warning)
2236        .unwrap_or(false);
2237
2238    // Also trigger if retrieval hits have very low scores or no hits
2239    let low_retrieval = response
2240        .retrieval
2241        .hits
2242        .first()
2243        .and_then(|h| h.score)
2244        .map(|score| score < -2.0)
2245        .unwrap_or(true);
2246
2247    if !needs_followup && !low_retrieval {
2248        return None;
2249    }
2250
2251    // Get available topics from the memory by sampling timeline entries
2252    let limit = std::num::NonZeroU64::new(20).unwrap();
2253    let timeline_query = TimelineQueryBuilder::default().limit(limit).build();
2254
2255    let available_topics: Vec<String> = mem
2256        .timeline(timeline_query)
2257        .ok()
2258        .map(|entries| {
2259            entries
2260                .iter()
2261                .filter_map(|e| {
2262                    // Extract meaningful preview/title
2263                    let preview = e.preview.trim();
2264                    if preview.is_empty() || preview.len() < 5 {
2265                        return None;
2266                    }
2267                    // Get first line or truncate
2268                    let first_line = preview.lines().next().unwrap_or(preview);
2269                    if first_line.len() > 60 {
2270                        Some(format!("{}...", &first_line[..57]))
2271                    } else {
2272                        Some(first_line.to_string())
2273                    }
2274                })
2275                .collect::<std::collections::HashSet<_>>()
2276                .into_iter()
2277                .take(5)
2278                .collect()
2279        })
2280        .unwrap_or_default();
2281
2282    // Determine the reason for low confidence
2283    let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2284        "No relevant information found in memory"
2285    } else if inference
2286        .and_then(|i| i.grounding.as_ref())
2287        .map(|g| g.has_warning)
2288        .unwrap_or(false)
2289    {
2290        "Answer may not be well-supported by the available context"
2291    } else {
2292        "Low confidence in the answer"
2293    };
2294
2295    // Generate suggestion questions based on available topics
2296    let suggestions: Vec<String> = if available_topics.is_empty() {
2297        vec![
2298            "What information is stored in this memory?".to_string(),
2299            "Can you list the main topics covered?".to_string(),
2300        ]
2301    } else {
2302        available_topics
2303            .iter()
2304            .take(3)
2305            .map(|topic| format!("Tell me about {}", topic))
2306            .chain(std::iter::once(
2307                "What topics are in this memory?".to_string(),
2308            ))
2309            .collect()
2310    };
2311
2312    Some(json!({
2313        "needed": true,
2314        "reason": reason,
2315        "hint": if available_topics.is_empty() {
2316            "This memory may not contain information about your query."
2317        } else {
2318            "This memory contains information about different topics. Try asking about those instead."
2319        },
2320        "available_topics": available_topics,
2321        "suggestions": suggestions
2322    }))
2323}
2324
2325fn emit_model_json(
2326    response: &AskResponse,
2327    requested_model: &str,
2328    inference: Option<&ModelInference>,
2329    include_sources: bool,
2330    mem: &mut Memvid,
2331) -> Result<()> {
2332    let answer = response.answer.clone().unwrap_or_default();
2333    let requested_label = inference
2334        .map(|m| m.answer.requested.clone())
2335        .unwrap_or_else(|| requested_model.to_string());
2336    let used_label = inference
2337        .map(|m| m.answer.model.clone())
2338        .unwrap_or_else(|| requested_model.to_string());
2339
2340    let mut body = json!({
2341        "question": response.question,
2342        "model": requested_label,
2343        "model_used": used_label,
2344        "answer": answer,
2345        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2346    });
2347
2348    // Add usage and cost if available
2349    if let Some(inf) = inference {
2350        if let serde_json::Value::Object(ref mut map) = body {
2351            map.insert("cached".into(), json!(inf.cached));
2352            if let Some(usage) = &inf.usage {
2353                map.insert(
2354                    "usage".into(),
2355                    json!({
2356                        "input_tokens": usage.input_tokens,
2357                        "output_tokens": usage.output_tokens,
2358                        "total_tokens": usage.total_tokens,
2359                        "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2360                        "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2361                    }),
2362                );
2363            }
2364            if let Some(grounding) = &inf.grounding {
2365                map.insert(
2366                    "grounding".into(),
2367                    json!({
2368                        "score": grounding.score,
2369                        "label": grounding.label(),
2370                        "sentence_count": grounding.sentence_count,
2371                        "grounded_sentences": grounding.grounded_sentences,
2372                        "has_warning": grounding.has_warning,
2373                        "warning_reason": grounding.warning_reason,
2374                    }),
2375                );
2376            }
2377        }
2378    }
2379
2380    // Add detailed sources if requested
2381    if include_sources {
2382        if let serde_json::Value::Object(ref mut map) = body {
2383            let sources = build_sources_json(response, mem);
2384            map.insert("sources".into(), json!(sources));
2385        }
2386    }
2387
2388    // Add follow-up suggestions if confidence is low
2389    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2390        if let serde_json::Value::Object(ref mut map) = body {
2391            map.insert("follow_up".into(), follow_up);
2392        }
2393    }
2394
2395    // Use colored JSON output
2396    let json_str = serde_json::to_string_pretty(&body)?;
2397    println!("{}", json_str.to_colored_json_auto()?);
2398    Ok(())
2399}
2400
2401fn emit_ask_pretty(
2402    response: &AskResponse,
2403    requested_mode: AskModeArg,
2404    inference: Option<&ModelInference>,
2405    include_sources: bool,
2406    mem: &mut Memvid,
2407) {
2408    println!(
2409        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
2410        ask_mode_pretty(requested_mode),
2411        ask_retriever_pretty(response.retriever),
2412        response.retrieval.params.top_k,
2413        response.stats.latency_ms,
2414        response.stats.retrieval_ms
2415    );
2416    if let Some(inference) = inference {
2417        let model = &inference.answer;
2418        let cached_label = if inference.cached { " [CACHED]" } else { "" };
2419        if model.requested.trim() == model.model {
2420            println!("model: {}{}", model.model, cached_label);
2421        } else {
2422            println!(
2423                "model requested: {}   model used: {}{}",
2424                model.requested, model.model, cached_label
2425            );
2426        }
2427        // Display usage and cost if available
2428        if let Some(usage) = &inference.usage {
2429            let cost_label = if inference.cached {
2430                format!("$0.00 (saved ${:.6})", usage.cost_usd)
2431            } else {
2432                format!("${:.6}", usage.cost_usd)
2433            };
2434            println!(
2435                "tokens: {} input + {} output = {}   cost: {}",
2436                usage.input_tokens, usage.output_tokens, usage.total_tokens, cost_label
2437            );
2438        }
2439        // Display grounding/hallucination score
2440        if let Some(grounding) = &inference.grounding {
2441            let warning = if grounding.has_warning {
2442                format!(
2443                    " [WARNING: {}]",
2444                    grounding
2445                        .warning_reason
2446                        .as_deref()
2447                        .unwrap_or("potential hallucination")
2448                )
2449            } else {
2450                String::new()
2451            };
2452            println!(
2453                "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2454                grounding.score * 100.0,
2455                grounding.label(),
2456                grounding.grounded_sentences,
2457                grounding.sentence_count,
2458                warning
2459            );
2460        }
2461    }
2462    println!(
2463        "engine: {}",
2464        search_engine_label(&response.retrieval.engine)
2465    );
2466    println!(
2467        "hits: {} (showing {})",
2468        response.retrieval.total_hits,
2469        response.retrieval.hits.len()
2470    );
2471
2472    if response.context_only {
2473        println!();
2474        println!("Context-only mode: synthesis disabled.");
2475        println!();
2476    } else if let Some(answer) = &response.answer {
2477        println!();
2478        println!("Answer:\n{answer}");
2479        println!();
2480    }
2481
2482    if !response.citations.is_empty() {
2483        println!("Citations:");
2484        for citation in &response.citations {
2485            match citation.score {
2486                Some(score) => println!(
2487                    "[{}] {} (frame {}, score {:.3})",
2488                    citation.index, citation.uri, citation.frame_id, score
2489                ),
2490                None => println!(
2491                    "[{}] {} (frame {})",
2492                    citation.index, citation.uri, citation.frame_id
2493                ),
2494            }
2495        }
2496        println!();
2497    }
2498
2499    // Print detailed sources if requested
2500    if include_sources && !response.citations.is_empty() {
2501        println!("=== SOURCES ===");
2502        println!();
2503        for citation in &response.citations {
2504            println!("[{}] {}", citation.index, citation.uri);
2505
2506            // Get frame metadata
2507            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2508                if let Some(title) = &frame.title {
2509                    println!("    Title: {}", title);
2510                }
2511                println!("    Frame ID: {}", citation.frame_id);
2512                if let Some(score) = citation.score {
2513                    println!("    Score: {:.4}", score);
2514                }
2515                if let Some((start, end)) = citation.chunk_range {
2516                    println!("    Range: [{}..{})", start, end);
2517                }
2518                if !frame.tags.is_empty() {
2519                    println!("    Tags: {}", frame.tags.join(", "));
2520                }
2521                if !frame.labels.is_empty() {
2522                    println!("    Labels: {}", frame.labels.join(", "));
2523                }
2524                println!("    Timestamp: {}", frame.timestamp);
2525                if !frame.content_dates.is_empty() {
2526                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2527                }
2528            }
2529
2530            // Get snippet from hit
2531            if let Some(hit) = response
2532                .retrieval
2533                .hits
2534                .iter()
2535                .find(|h| h.frame_id == citation.frame_id)
2536            {
2537                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2538                let truncated = if snippet.len() > 200 {
2539                    format!("{}...", &snippet[..200])
2540                } else {
2541                    snippet.clone()
2542                };
2543                println!("    Snippet: {}", truncated.replace('\n', " "));
2544            }
2545            println!();
2546        }
2547    }
2548
2549    if !include_sources {
2550        println!();
2551        emit_search_table(&response.retrieval);
2552    }
2553
2554    // Display follow-up suggestions if confidence is low
2555    if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2556        if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2557            if needed {
2558                println!();
2559                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2560                println!("💡 FOLLOW-UP SUGGESTIONS");
2561                println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2562
2563                if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2564                    println!("Reason: {}", reason);
2565                }
2566
2567                if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2568                    println!("Hint: {}", hint);
2569                }
2570
2571                if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2572                    if !topics.is_empty() {
2573                        println!();
2574                        println!("Available topics in this memory:");
2575                        for topic in topics.iter().filter_map(|t| t.as_str()) {
2576                            println!("  • {}", topic);
2577                        }
2578                    }
2579                }
2580
2581                if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2582                    if !suggestions.is_empty() {
2583                        println!();
2584                        println!("Try asking:");
2585                        for (i, suggestion) in
2586                            suggestions.iter().filter_map(|s| s.as_str()).enumerate()
2587                        {
2588                            println!("  {}. \"{}\"", i + 1, suggestion);
2589                        }
2590                    }
2591                }
2592                println!();
2593            }
2594        }
2595    }
2596}
2597
2598/// Emit verbatim evidence as JSON without LLM synthesis.
2599/// Format: {evidence: [{source, text, score}], question, hits, stats}
2600fn emit_verbatim_evidence_json(
2601    response: &AskResponse,
2602    include_sources: bool,
2603    mem: &mut Memvid,
2604) -> Result<()> {
2605    // Build evidence array from hits - verbatim excerpts with citations
2606    let evidence: Vec<_> = response
2607        .retrieval
2608        .hits
2609        .iter()
2610        .enumerate()
2611        .map(|(idx, hit)| {
2612            let mut entry = serde_json::Map::new();
2613            entry.insert("index".into(), json!(idx + 1));
2614            entry.insert("frame_id".into(), json!(hit.frame_id));
2615            entry.insert("uri".into(), json!(&hit.uri));
2616            if let Some(title) = &hit.title {
2617                entry.insert("title".into(), json!(title));
2618            }
2619            // Use chunk_text if available (more specific), otherwise full text
2620            let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2621            entry.insert("text".into(), json!(verbatim));
2622            if let Some(score) = hit.score {
2623                entry.insert("score".into(), json!(score));
2624            }
2625            serde_json::Value::Object(entry)
2626        })
2627        .collect();
2628
2629    // Build sources array if requested
2630    let sources: Option<Vec<_>> = if include_sources {
2631        Some(
2632            response
2633                .retrieval
2634                .hits
2635                .iter()
2636                .filter_map(|hit| {
2637                    mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2638                        let mut source = serde_json::Map::new();
2639                        source.insert("frame_id".into(), json!(frame.id));
2640                        source.insert(
2641                            "uri".into(),
2642                            json!(frame.uri.as_deref().unwrap_or("(unknown)")),
2643                        );
2644                        if let Some(title) = &frame.title {
2645                            source.insert("title".into(), json!(title));
2646                        }
2647                        source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2648                        if !frame.tags.is_empty() {
2649                            source.insert("tags".into(), json!(frame.tags));
2650                        }
2651                        if !frame.labels.is_empty() {
2652                            source.insert("labels".into(), json!(frame.labels));
2653                        }
2654                        serde_json::Value::Object(source)
2655                    })
2656                })
2657                .collect(),
2658        )
2659    } else {
2660        None
2661    };
2662
2663    let mut body = json!({
2664        "version": "mv2.evidence.v1",
2665        "mode": "verbatim",
2666        "question": response.question,
2667        "evidence": evidence,
2668        "evidence_count": evidence.len(),
2669        "total_hits": response.retrieval.total_hits,
2670        "stats": {
2671            "retrieval_ms": response.stats.retrieval_ms,
2672            "latency_ms": response.stats.latency_ms,
2673        },
2674        "engine": search_engine_label(&response.retrieval.engine),
2675    });
2676
2677    if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2678        map.insert("sources".into(), json!(sources));
2679    }
2680
2681    let json_str = serde_json::to_string_pretty(&body)?;
2682    println!("{}", json_str.to_colored_json_auto()?);
2683    Ok(())
2684}
2685
2686/// Emit verbatim evidence in human-readable format without LLM synthesis.
2687fn emit_verbatim_evidence_pretty(response: &AskResponse, include_sources: bool, mem: &mut Memvid) {
2688    println!(
2689        "mode: {}   latency: {} ms (retrieval {} ms)",
2690        "verbatim evidence".cyan(),
2691        response.stats.latency_ms,
2692        response.stats.retrieval_ms
2693    );
2694    println!(
2695        "engine: {}",
2696        search_engine_label(&response.retrieval.engine)
2697    );
2698    println!(
2699        "hits: {} (showing {})",
2700        response.retrieval.total_hits,
2701        response.retrieval.hits.len()
2702    );
2703    println!();
2704
2705    // Header
2706    println!("{}", "━".repeat(60));
2707    println!(
2708        "{}",
2709        format!(
2710            "VERBATIM EVIDENCE for: \"{}\"",
2711            truncate_with_ellipsis(&response.question, 40)
2712        )
2713        .bold()
2714    );
2715    println!("{}", "━".repeat(60));
2716    println!();
2717
2718    if response.retrieval.hits.is_empty() {
2719        println!("No evidence found.");
2720        return;
2721    }
2722
2723    // Calculate score range for normalization (BM25 scores can be negative)
2724    let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2725    let (min_score, max_score) = score_range(&scores);
2726
2727    // Display each piece of evidence with citation
2728    for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2729        let uri = &hit.uri;
2730        let title = hit.title.as_deref().unwrap_or("Untitled");
2731        let score_str = hit
2732            .score
2733            .map(|s| {
2734                let normalized = normalize_bm25_for_display(s, min_score, max_score);
2735                format!(" (relevance: {:.0}%)", normalized)
2736            })
2737            .unwrap_or_default();
2738
2739        println!(
2740            "{}",
2741            format!("[{}] {}{}", idx + 1, title, score_str)
2742                .green()
2743                .bold()
2744        );
2745        println!("    Source: {} (frame {})", uri, hit.frame_id);
2746        println!();
2747
2748        // Show verbatim text - prefer chunk_text if available
2749        let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2750        // Indent each line for readability
2751        for line in verbatim.lines() {
2752            if !line.trim().is_empty() {
2753                println!("    │ {}", line);
2754            }
2755        }
2756        println!();
2757    }
2758
2759    // Print detailed sources if requested
2760    if include_sources {
2761        println!("{}", "━".repeat(60));
2762        println!("{}", "SOURCE DETAILS".bold());
2763        println!("{}", "━".repeat(60));
2764        println!();
2765
2766        for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2767            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2768                println!(
2769                    "{}",
2770                    format!(
2771                        "[{}] {}",
2772                        idx + 1,
2773                        frame.uri.as_deref().unwrap_or("(unknown)")
2774                    )
2775                    .cyan()
2776                );
2777                if let Some(title) = &frame.title {
2778                    println!("    Title: {}", title);
2779                }
2780                println!("    Frame ID: {}", frame.id);
2781                println!("    Timestamp: {}", frame.timestamp);
2782                if !frame.tags.is_empty() {
2783                    println!("    Tags: {}", frame.tags.join(", "));
2784                }
2785                if !frame.labels.is_empty() {
2786                    println!("    Labels: {}", frame.labels.join(", "));
2787                }
2788                if !frame.content_dates.is_empty() {
2789                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2790                }
2791                println!();
2792            }
2793        }
2794    }
2795
2796    // Note about no LLM synthesis
2797    println!("{}", "─".repeat(60));
2798    println!(
2799        "{}",
2800        "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2801    );
2802    println!(
2803        "{}",
2804        "Use --use-model to get an AI-synthesized answer.".dimmed()
2805    );
2806}
2807
2808fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2809    let hits: Vec<_> = response
2810        .hits
2811        .iter()
2812        .map(|hit| {
2813            json!({
2814                "frame_id": hit.frame_id,
2815                "matches": hit.matches,
2816                "snippets": [hit.text.clone()],
2817            })
2818        })
2819        .collect();
2820    println!("{}", serde_json::to_string_pretty(&hits)?);
2821    Ok(())
2822}
2823
2824fn emit_search_table(response: &SearchResponse) {
2825    if response.hits.is_empty() {
2826        println!("No results for '{}'.", response.query);
2827        return;
2828    }
2829
2830    // Calculate score range for normalization (BM25 scores can be negative)
2831    let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2832    let (min_score, max_score) = score_range(&scores);
2833
2834    for hit in &response.hits {
2835        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2836        if let Some(title) = &hit.title {
2837            println!("  Title: {title}");
2838        }
2839        if let Some(score) = hit.score {
2840            let normalized = normalize_bm25_for_display(score, min_score, max_score);
2841            println!("  Relevance: {:.0}%", normalized);
2842        }
2843        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
2844        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2845            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
2846        }
2847        if let Some(chunk_text) = &hit.chunk_text {
2848            println!("  Chunk Text: {}", chunk_text.trim());
2849        }
2850        if let Some(metadata) = &hit.metadata {
2851            if let Some(track) = &metadata.track {
2852                println!("  Track: {track}");
2853            }
2854            if !metadata.tags.is_empty() {
2855                println!("  Tags: {}", metadata.tags.join(", "));
2856            }
2857            if !metadata.labels.is_empty() {
2858                println!("  Labels: {}", metadata.labels.join(", "));
2859            }
2860            if let Some(created_at) = &metadata.created_at {
2861                println!("  Created: {created_at}");
2862            }
2863            if !metadata.content_dates.is_empty() {
2864                println!("  Content Dates: {}", metadata.content_dates.join(", "));
2865            }
2866            if !metadata.entities.is_empty() {
2867                let entity_strs: Vec<String> = metadata
2868                    .entities
2869                    .iter()
2870                    .map(|e| format!("{} ({})", e.name, e.kind))
2871                    .collect();
2872                println!("  Entities: {}", entity_strs.join(", "));
2873            }
2874        }
2875        println!("  Snippet: {}", hit.text.trim());
2876        println!();
2877    }
2878    if let Some(cursor) = &response.next_cursor {
2879        println!("Next cursor: {cursor}");
2880    }
2881}
2882
2883fn ask_mode_display(mode: AskModeArg) -> &'static str {
2884    match mode {
2885        AskModeArg::Lex => "lex",
2886        AskModeArg::Sem => "sem",
2887        AskModeArg::Hybrid => "hybrid",
2888    }
2889}
2890
2891fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2892    match mode {
2893        AskModeArg::Lex => "Lexical",
2894        AskModeArg::Sem => "Semantic",
2895        AskModeArg::Hybrid => "Hybrid",
2896    }
2897}
2898
2899fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2900    match retriever {
2901        AskRetriever::Lex => "lex",
2902        AskRetriever::Semantic => "semantic",
2903        AskRetriever::Hybrid => "hybrid",
2904        AskRetriever::LexFallback => "lex_fallback",
2905        AskRetriever::TimelineFallback => "timeline_fallback",
2906    }
2907}
2908
2909fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2910    match retriever {
2911        AskRetriever::Lex => "Lexical",
2912        AskRetriever::Semantic => "Semantic",
2913        AskRetriever::Hybrid => "Hybrid",
2914        AskRetriever::LexFallback => "Lexical (fallback)",
2915        AskRetriever::TimelineFallback => "Timeline (fallback)",
2916    }
2917}
2918
2919fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2920    match engine {
2921        SearchEngineKind::Tantivy => "text (tantivy)",
2922        SearchEngineKind::LexFallback => "text (fallback)",
2923        SearchEngineKind::Hybrid => "hybrid",
2924    }
2925}
2926
2927fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2928    let digest = hash(uri.as_bytes()).to_hex().to_string();
2929    let prefix_len = digest.len().min(12);
2930    let prefix = &digest[..prefix_len];
2931    format!("mv2-hit-{prefix}-{frame_id}-{start}")
2932}
2933
2934fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2935    if text.chars().count() <= limit {
2936        return text.to_string();
2937    }
2938
2939    let truncated: String = text.chars().take(limit).collect();
2940    format!("{truncated}...")
2941}
2942
2943/// Normalize a BM25 score to 0-100 range for user-friendly display.
2944///
2945/// BM25 scores can be negative (Tantivy uses log-based TF which can go negative
2946/// for very common terms). This function normalizes scores relative to the
2947/// min/max in the result set so users see intuitive 0-100 values.
2948///
2949/// - Returns 100.0 if min == max (all scores equal)
2950/// - Returns normalized 0-100 value based on position in [min, max] range
2951fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2952    if (max_score - min_score).abs() < f32::EPSILON {
2953        // All scores are the same, show 100%
2954        return 100.0;
2955    }
2956    // Normalize to 0-100 range
2957    ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2958}
2959
2960/// Extract min and max scores from a slice of optional scores.
2961fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2962    let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2963    if valid_scores.is_empty() {
2964        return (0.0, 0.0);
2965    }
2966    let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2967    let max = valid_scores
2968        .iter()
2969        .cloned()
2970        .fold(f32::NEG_INFINITY, f32::max);
2971    (min, max)
2972}
2973
2974fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2975    let mut hit_json = serde_json::Map::new();
2976    hit_json.insert("rank".into(), json!(hit.rank));
2977    if let Some(score) = hit.score {
2978        hit_json.insert("score".into(), json!(score));
2979    }
2980    hit_json.insert(
2981        "id".into(),
2982        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2983    );
2984    hit_json.insert("frame_id".into(), json!(hit.frame_id));
2985    hit_json.insert("uri".into(), json!(hit.uri));
2986    if let Some(title) = &hit.title {
2987        hit_json.insert("title".into(), json!(title));
2988    }
2989    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2990    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2991    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2992    hit_json.insert("text".into(), json!(hit.text));
2993
2994    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2995        matches: hit.matches,
2996        ..SearchHitMetadata::default()
2997    });
2998    let mut meta_json = serde_json::Map::new();
2999    meta_json.insert("matches".into(), json!(metadata.matches));
3000    if !metadata.tags.is_empty() {
3001        meta_json.insert("tags".into(), json!(metadata.tags));
3002    }
3003    if !metadata.labels.is_empty() {
3004        meta_json.insert("labels".into(), json!(metadata.labels));
3005    }
3006    if let Some(track) = metadata.track {
3007        meta_json.insert("track".into(), json!(track));
3008    }
3009    if let Some(created_at) = metadata.created_at {
3010        meta_json.insert("created_at".into(), json!(created_at));
3011    }
3012    if !metadata.content_dates.is_empty() {
3013        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
3014    }
3015    if !metadata.entities.is_empty() {
3016        let entities_json: Vec<serde_json::Value> = metadata
3017            .entities
3018            .iter()
3019            .map(|e| {
3020                let mut ent = serde_json::Map::new();
3021                ent.insert("name".into(), json!(e.name));
3022                ent.insert("kind".into(), json!(e.kind));
3023                if let Some(conf) = e.confidence {
3024                    ent.insert("confidence".into(), json!(conf));
3025                }
3026                serde_json::Value::Object(ent)
3027            })
3028            .collect();
3029        meta_json.insert("entities".into(), json!(entities_json));
3030    }
3031    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
3032    serde_json::Value::Object(hit_json)
3033}
3034/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
3035///
3036/// RRF is mathematically superior to raw score combination because:
3037/// - BM25 scores are unbounded (0 to infinity)
3038/// - Cosine similarity is bounded (-1 to 1)
3039/// - RRF normalizes by using only RANKS, not raw scores
3040///
3041/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
3042fn apply_semantic_rerank(
3043    runtime: &EmbeddingRuntime,
3044    mem: &mut Memvid,
3045    response: &mut SearchResponse,
3046) -> Result<()> {
3047    if response.hits.is_empty() {
3048        return Ok(());
3049    }
3050
3051    let query_embedding = runtime.embed_query(&response.query)?;
3052    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
3053    for hit in &response.hits {
3054        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
3055            if embedding.len() == runtime.dimension() {
3056                let score = cosine_similarity(&query_embedding, &embedding);
3057                semantic_scores.insert(hit.frame_id, score);
3058            }
3059        }
3060    }
3061
3062    if semantic_scores.is_empty() {
3063        return Ok(());
3064    }
3065
3066    // Sort by semantic score to get semantic ranks
3067    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3068        .iter()
3069        .map(|(frame_id, score)| (*frame_id, *score))
3070        .collect();
3071    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3072
3073    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3074    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3075        semantic_rank.insert(*frame_id, idx + 1);
3076    }
3077
3078    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
3079    let query_lower = response.query.to_lowercase();
3080    let is_preference_query = query_lower.contains("suggest")
3081        || query_lower.contains("recommend")
3082        || query_lower.contains("should i")
3083        || query_lower.contains("what should")
3084        || query_lower.contains("prefer")
3085        || query_lower.contains("favorite")
3086        || query_lower.contains("best for me");
3087
3088    // Pure RRF: Use ONLY ranks, NOT raw scores
3089    // This prevents a "confidently wrong" high-scoring vector from burying
3090    // a "precisely correct" keyword match
3091    const RRF_K: f32 = 60.0;
3092
3093    let mut ordering: Vec<(usize, f32, usize)> = response
3094        .hits
3095        .iter()
3096        .enumerate()
3097        .map(|(idx, hit)| {
3098            let lexical_rank = hit.rank;
3099
3100            // RRF score for lexical rank
3101            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3102
3103            // RRF score for semantic rank
3104            let semantic_rrf = semantic_rank
3105                .get(&hit.frame_id)
3106                .map(|rank| 1.0 / (RRF_K + *rank as f32))
3107                .unwrap_or(0.0);
3108
3109            // Apply preference boost for hits containing user preference signals
3110            // This is a small bonus for content with first-person preference indicators
3111            let preference_boost = if is_preference_query {
3112                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
3113            } else {
3114                0.0
3115            };
3116
3117            // Pure RRF: Only rank-based scores, no raw similarity scores
3118            let combined = lexical_rrf + semantic_rrf + preference_boost;
3119            (idx, combined, lexical_rank)
3120        })
3121        .collect();
3122
3123    ordering.sort_by(|a, b| {
3124        b.1.partial_cmp(&a.1)
3125            .unwrap_or(Ordering::Equal)
3126            .then(a.2.cmp(&b.2))
3127    });
3128
3129    let mut reordered = Vec::with_capacity(response.hits.len());
3130    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3131        let mut hit = response.hits[idx].clone();
3132        hit.rank = rank_idx + 1;
3133        reordered.push(hit);
3134    }
3135
3136    response.hits = reordered;
3137    Ok(())
3138}
3139
3140/// Rerank search results by boosting hits that contain user preference signals.
3141/// Only applies when the query appears to be seeking recommendations or preferences.
3142fn apply_preference_rerank(response: &mut SearchResponse) {
3143    if response.hits.is_empty() {
3144        return;
3145    }
3146
3147    // Check if query is preference-seeking
3148    let query_lower = response.query.to_lowercase();
3149    let is_preference_query = query_lower.contains("suggest")
3150        || query_lower.contains("recommend")
3151        || query_lower.contains("should i")
3152        || query_lower.contains("what should")
3153        || query_lower.contains("prefer")
3154        || query_lower.contains("favorite")
3155        || query_lower.contains("best for me");
3156
3157    if !is_preference_query {
3158        return;
3159    }
3160
3161    // Compute boost scores for each hit
3162    let mut scored: Vec<(usize, f32, f32)> = response
3163        .hits
3164        .iter()
3165        .enumerate()
3166        .map(|(idx, hit)| {
3167            let original_score = hit.score.unwrap_or(0.0);
3168            let preference_boost = compute_preference_boost(&hit.text);
3169            let boosted_score = original_score + preference_boost;
3170            (idx, boosted_score, original_score)
3171        })
3172        .collect();
3173
3174    // Sort by boosted score (descending)
3175    scored.sort_by(|a, b| {
3176        b.1.partial_cmp(&a.1)
3177            .unwrap_or(Ordering::Equal)
3178            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3179    });
3180
3181    // Reorder hits
3182    let mut reordered = Vec::with_capacity(response.hits.len());
3183    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3184        let mut hit = response.hits[idx].clone();
3185        hit.rank = rank_idx + 1;
3186        reordered.push(hit);
3187    }
3188
3189    response.hits = reordered;
3190}
3191
3192/// Compute a boost score for hits that contain user preference signals.
3193/// This helps surface context where users express their preferences,
3194/// habits, or personal information that's relevant to recommendation queries.
3195///
3196/// Key insight: We want to distinguish content where the user describes
3197/// their ESTABLISHED situation/preferences (high boost) from content where
3198/// the user is making a REQUEST (low boost). Both use first-person language,
3199/// but they serve different purposes for personalization.
3200fn compute_preference_boost(text: &str) -> f32 {
3201    let text_lower = text.to_lowercase();
3202    let mut boost = 0.0f32;
3203
3204    // Strong signals: Past/present user experiences and possessions
3205    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
3206    let established_context = [
3207        // Past tense - indicates actual experience
3208        "i've been",
3209        "i've had",
3210        "i've used",
3211        "i've tried",
3212        "i recently",
3213        "i just",
3214        "lately",
3215        "i started",
3216        "i bought",
3217        "i harvested",
3218        "i grew",
3219        // Current possessions/ownership (indicates established context)
3220        "my garden",
3221        "my home",
3222        "my house",
3223        "my setup",
3224        "my equipment",
3225        "my camera",
3226        "my car",
3227        "my phone",
3228        "i have a",
3229        "i own",
3230        "i got a",
3231        // Established habits/preferences
3232        "i prefer",
3233        "i like to",
3234        "i love to",
3235        "i enjoy",
3236        "i usually",
3237        "i always",
3238        "i typically",
3239        "my favorite",
3240        "i tend to",
3241        "i often",
3242        // Regular activities (indicates ongoing behavior)
3243        "i use",
3244        "i grow",
3245        "i cook",
3246        "i make",
3247        "i work on",
3248        "i'm into",
3249        "i collect",
3250    ];
3251    for pattern in established_context {
3252        if text_lower.contains(pattern) {
3253            boost += 0.15;
3254        }
3255    }
3256
3257    // Moderate signals: General first-person statements
3258    let first_person = [" i ", " my ", " me "];
3259    for pattern in first_person {
3260        if text_lower.contains(pattern) {
3261            boost += 0.02;
3262        }
3263    }
3264
3265    // Weak signals: Requests/intentions (not yet established preferences)
3266    // These indicate the user wants something, but don't describe established context
3267    let request_patterns = [
3268        "i'm trying to",
3269        "i want to",
3270        "i need to",
3271        "looking for",
3272        "can you suggest",
3273        "can you help",
3274    ];
3275    for pattern in request_patterns {
3276        if text_lower.contains(pattern) {
3277            boost += 0.02;
3278        }
3279    }
3280
3281    // Cap the boost to avoid over-weighting
3282    boost.min(0.5)
3283}
3284
3285fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3286    let mut dot = 0.0f32;
3287    let mut sum_a = 0.0f32;
3288    let mut sum_b = 0.0f32;
3289    for (x, y) in a.iter().zip(b.iter()) {
3290        dot += x * y;
3291        sum_a += x * x;
3292        sum_b += y * y;
3293    }
3294
3295    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3296        0.0
3297    } else {
3298        dot / (sum_a.sqrt() * sum_b.sqrt())
3299    }
3300}
3301
3302/// Apply cross-encoder reranking to search results.
3303///
3304/// Cross-encoders directly score query-document pairs and can understand
3305/// more nuanced relevance than bi-encoders (embeddings). This is especially
3306/// useful for personalization queries where semantic similarity != relevance.
3307///
3308/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
3309#[cfg(feature = "local-embeddings")]
3310fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3311    if response.hits.is_empty() || response.hits.len() < 2 {
3312        return Ok(());
3313    }
3314
3315    // Only rerank if we have enough candidates
3316    let candidates_to_rerank = response.hits.len().min(50);
3317
3318    // Initialize the reranker (model will be downloaded on first use, ~86MB)
3319    // Using JINA Turbo - faster than BGE while maintaining good accuracy
3320    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3321        .with_show_download_progress(true);
3322
3323    let mut reranker = match TextRerank::try_new(options) {
3324        Ok(r) => r,
3325        Err(e) => {
3326            warn!("Failed to initialize cross-encoder reranker: {e}");
3327            return Ok(());
3328        }
3329    };
3330
3331    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
3332    let documents: Vec<String> = response.hits[..candidates_to_rerank]
3333        .iter()
3334        .map(|hit| hit.text.clone())
3335        .collect();
3336
3337    // Rerank using cross-encoder
3338    info!("Cross-encoder reranking {} candidates", documents.len());
3339    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3340        Ok(results) => results,
3341        Err(e) => {
3342            warn!("Cross-encoder reranking failed: {e}");
3343            return Ok(());
3344        }
3345    };
3346
3347    // Blend cross-encoder scores with original scores to preserve temporal boosting.
3348    // The original score includes recency boost; purely replacing it loses temporal relevance.
3349    // We collect (blended_score, original_idx) pairs and sort by blended score.
3350    let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3351
3352    // Find score range for normalization (original scores can be negative for BM25)
3353    let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3354        .iter()
3355        .filter_map(|h| h.score)
3356        .collect();
3357    let orig_min = original_scores
3358        .iter()
3359        .cloned()
3360        .fold(f32::INFINITY, f32::min);
3361    let orig_max = original_scores
3362        .iter()
3363        .cloned()
3364        .fold(f32::NEG_INFINITY, f32::max);
3365    let orig_range = (orig_max - orig_min).max(0.001); // Avoid division by zero
3366
3367    for result in rerank_results.iter() {
3368        let original_idx = result.index;
3369        let cross_encoder_score = result.score; // Already normalized 0-1
3370
3371        // Normalize original score to 0-1 range
3372        let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3373        let normalized_original = (original_score - orig_min) / orig_range;
3374
3375        // Blend: 20% cross-encoder (relevance) + 80% original (includes temporal boost)
3376        // Very heavy weight on original score to preserve temporal ranking
3377        // The original score already incorporates BM25 + recency boost
3378        let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3379
3380        scored_hits.push((blended, original_idx));
3381    }
3382
3383    // Sort by blended score (descending)
3384    scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3385
3386    // Build reordered hits with new ranks
3387    let mut reordered = Vec::with_capacity(response.hits.len());
3388    for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3389        let mut hit = response.hits[original_idx].clone();
3390        hit.rank = new_rank + 1;
3391        // Store blended score for reference
3392        hit.score = Some(blended_score);
3393        reordered.push(hit);
3394    }
3395
3396    // Add any remaining hits that weren't reranked (beyond top-50)
3397    for hit in response.hits.iter().skip(candidates_to_rerank) {
3398        let mut h = hit.clone();
3399        h.rank = reordered.len() + 1;
3400        reordered.push(h);
3401    }
3402
3403    response.hits = reordered;
3404    info!("Cross-encoder reranking complete");
3405    Ok(())
3406}
3407
3408/// Stub for cross-encoder reranking when local-embeddings is disabled.
3409/// Does nothing - reranking is skipped silently.
3410#[cfg(not(feature = "local-embeddings"))]
3411fn apply_cross_encoder_rerank(_response: &mut SearchResponse) -> Result<()> {
3412    Ok(())
3413}
3414
3415/// Build a context string from memory cards stored in the MV2 file.
3416/// Groups facts by entity for better LLM comprehension.
3417fn build_memory_context(mem: &Memvid) -> String {
3418    let entities = mem.memory_entities();
3419    if entities.is_empty() {
3420        return String::new();
3421    }
3422
3423    let mut sections = Vec::new();
3424    for entity in entities {
3425        let cards = mem.get_entity_memories(&entity);
3426        if cards.is_empty() {
3427            continue;
3428        }
3429
3430        let mut entity_lines = Vec::new();
3431        for card in cards {
3432            // Format: "slot: value" with optional polarity indicator
3433            let polarity_marker = card
3434                .polarity
3435                .as_ref()
3436                .map(|p| match p.to_string().as_str() {
3437                    "Positive" => " (+)",
3438                    "Negative" => " (-)",
3439                    _ => "",
3440                })
3441                .unwrap_or("");
3442            entity_lines.push(format!(
3443                "  - {}: {}{}",
3444                card.slot, card.value, polarity_marker
3445            ));
3446        }
3447
3448        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3449    }
3450
3451    sections.join("\n\n")
3452}
3453
3454/// Build a context string from entities found in search hits.
3455/// Groups entities by type for better LLM comprehension.
3456fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3457    use std::collections::HashMap;
3458
3459    // Collect unique entities by kind
3460    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3461
3462    for hit in hits {
3463        if let Some(metadata) = &hit.metadata {
3464            for entity in &metadata.entities {
3465                entities_by_kind
3466                    .entry(entity.kind.clone())
3467                    .or_default()
3468                    .push(entity.name.clone());
3469            }
3470        }
3471    }
3472
3473    if entities_by_kind.is_empty() {
3474        return String::new();
3475    }
3476
3477    // Deduplicate and format
3478    let mut sections = Vec::new();
3479    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3480    sorted_kinds.sort();
3481
3482    for kind in sorted_kinds {
3483        let names = entities_by_kind.get(kind).unwrap();
3484        let mut unique_names: Vec<_> = names.iter().collect();
3485        unique_names.sort();
3486        unique_names.dedup();
3487
3488        let names_str = unique_names
3489            .iter()
3490            .take(10) // Limit to 10 entities per kind
3491            .map(|s| s.as_str())
3492            .collect::<Vec<_>>()
3493            .join(", ");
3494
3495        sections.push(format!("{}: {}", kind, names_str));
3496    }
3497
3498    sections.join("\n")
3499}