memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use colored_json::ToColoredJson;
15use blake3::hash;
16use clap::{ArgAction, Args, ValueEnum};
17#[cfg(feature = "temporal_track")]
18use memvid_core::{
19    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
20    TemporalResolution, TemporalResolutionValue,
21};
22use memvid_core::{
23    types::{AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy, SearchHitMetadata},
24    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind, SearchHit,
25    SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
26};
27#[cfg(feature = "temporal_track")]
28use serde::Serialize;
29use serde_json::json;
30#[cfg(feature = "temporal_track")]
31use time::format_description::well_known::Rfc3339;
32use time::{Date, PrimitiveDateTime, Time};
33#[cfg(feature = "temporal_track")]
34use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
35use tracing::{info, warn};
36
37use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
38
39use memvid_ask_model::{
40    run_model_inference, ModelAnswer, ModelContextFragment, ModelContextFragmentKind,
41    ModelInference,
42};
43
44// frame_to_json and print_frame_summary available from commands but not used in this module
45use crate::config::{
46    load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
47    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingModelChoice, EmbeddingRuntime,
48};
49use crate::utils::{
50    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
51    parse_date_boundary, parse_vector, read_embedding,
52};
53
54const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
55#[cfg(feature = "temporal_track")]
56const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
57
58fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
59    let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
60    message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
61    if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
62        message.push_str(&format!(
63            "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
64            model.name(),
65            model.name()
66        ));
67        if model.is_openai() {
68            message.push_str(" (and set `OPENAI_API_KEY`).");
69        } else {
70            message.push('.');
71        }
72        message.push_str(&format!(
73            "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
74            model.name()
75        ));
76        message.push_str(&format!(
77            "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
78        ));
79        message.push_str("\nOr use `--mode lex` to disable semantic search.");
80    }
81    message
82}
83
84/// Arguments for the `timeline` subcommand
85#[derive(Args)]
86pub struct TimelineArgs {
87    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
88    pub file: PathBuf,
89    #[arg(long)]
90    pub json: bool,
91    #[arg(long)]
92    pub reverse: bool,
93    #[arg(long, value_name = "LIMIT")]
94    pub limit: Option<NonZeroU64>,
95    #[arg(long, value_name = "TIMESTAMP")]
96    pub since: Option<i64>,
97    #[arg(long, value_name = "TIMESTAMP")]
98    pub until: Option<i64>,
99    #[cfg(feature = "temporal_track")]
100    #[arg(long = "on", value_name = "PHRASE")]
101    pub phrase: Option<String>,
102    #[cfg(feature = "temporal_track")]
103    #[arg(long = "tz", value_name = "IANA_ZONE")]
104    pub tz: Option<String>,
105    #[cfg(feature = "temporal_track")]
106    #[arg(long = "anchor", value_name = "RFC3339")]
107    pub anchor: Option<String>,
108    #[cfg(feature = "temporal_track")]
109    #[arg(long = "window", value_name = "MINUTES")]
110    pub window: Option<u64>,
111    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
112    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
113    pub as_of_frame: Option<u64>,
114    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
115    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
116    pub as_of_ts: Option<i64>,
117}
118
119/// Arguments for the `when` subcommand
120#[cfg(feature = "temporal_track")]
121#[derive(Args)]
122pub struct WhenArgs {
123    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
124    pub file: PathBuf,
125    #[arg(long = "on", value_name = "PHRASE")]
126    pub phrase: String,
127    #[arg(long = "tz", value_name = "IANA_ZONE")]
128    pub tz: Option<String>,
129    #[arg(long = "anchor", value_name = "RFC3339")]
130    pub anchor: Option<String>,
131    #[arg(long = "window", value_name = "MINUTES")]
132    pub window: Option<u64>,
133    #[arg(long, value_name = "LIMIT")]
134    pub limit: Option<NonZeroU64>,
135    #[arg(long, value_name = "TIMESTAMP")]
136    pub since: Option<i64>,
137    #[arg(long, value_name = "TIMESTAMP")]
138    pub until: Option<i64>,
139    #[arg(long)]
140    pub reverse: bool,
141    #[arg(long)]
142    pub json: bool,
143}
144
145/// Arguments for the `ask` subcommand
146#[derive(Args)]
147pub struct AskArgs {
148    #[arg(value_name = "TARGET", num_args = 0..)]
149    pub targets: Vec<String>,
150    #[arg(long = "question", value_name = "TEXT")]
151    pub question: Option<String>,
152    #[arg(long = "uri", value_name = "URI")]
153    pub uri: Option<String>,
154    #[arg(long = "scope", value_name = "URI_PREFIX")]
155    pub scope: Option<String>,
156    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
157    pub top_k: usize,
158    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
159    pub snippet_chars: usize,
160    #[arg(long = "cursor", value_name = "TOKEN")]
161    pub cursor: Option<String>,
162    #[arg(long = "mode", value_enum, default_value = "hybrid")]
163    pub mode: AskModeArg,
164    #[arg(long)]
165    pub json: bool,
166    #[arg(long = "context-only", action = ArgAction::SetTrue)]
167    pub context_only: bool,
168    /// Show detailed source information for each citation
169    #[arg(long = "sources", action = ArgAction::SetTrue)]
170    pub sources: bool,
171    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
172    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
173    pub mask_pii: bool,
174    /// Include structured memory cards in the context (facts, preferences, etc.)
175    #[arg(long = "memories", action = ArgAction::SetTrue)]
176    pub memories: bool,
177    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
178    #[arg(long = "llm-context-depth", value_name = "CHARS")]
179    pub llm_context_depth: Option<usize>,
180    #[arg(long = "start", value_name = "DATE")]
181    pub start: Option<String>,
182    #[arg(long = "end", value_name = "DATE")]
183    pub end: Option<String>,
184    /// Synthesize an answer with an LLM (defaults to tinyllama when provided without a value).
185    ///
186    /// Examples:
187    /// - `--use-model` (local TinyLlama)
188    /// - `--use-model openai` (defaults to gpt-4o-mini; requires OPENAI_API_KEY)
189    /// - `--use-model nvidia` (defaults to meta/llama3-8b-instruct; requires NVIDIA_API_KEY)
190    /// - `--use-model nvidia:meta/llama3-70b-instruct`
191    #[arg(
192        long = "use-model",
193        value_name = "MODEL",
194        num_args = 0..=1,
195        default_missing_value = "tinyllama"
196    )]
197    pub use_model: Option<String>,
198    /// Embedding model to use for query (must match the model used during ingestion)
199    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
200    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
201    pub query_embedding_model: Option<String>,
202    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
203    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
204    pub as_of_frame: Option<u64>,
205    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
206    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
207    pub as_of_ts: Option<i64>,
208    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
209    #[arg(long = "system-prompt", value_name = "TEXT")]
210    pub system_prompt: Option<String>,
211    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
212    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
213    pub no_rerank: bool,
214
215    // Adaptive retrieval options (enabled by default for best results)
216    /// Disable adaptive retrieval and use fixed top-k instead.
217    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
218    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
219    pub no_adaptive: bool,
220    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
221    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
222    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
223    pub min_relevancy: f32,
224    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
225    /// Set high enough to capture all potentially relevant results.
226    #[arg(long = "max-k", value_name = "K", default_value = "100")]
227    pub max_k: usize,
228    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
229    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
230    pub adaptive_strategy: AdaptiveStrategyArg,
231}
232
233/// Ask mode argument
234#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
235pub enum AskModeArg {
236    Lex,
237    Sem,
238    Hybrid,
239}
240
241impl From<AskModeArg> for AskMode {
242    fn from(value: AskModeArg) -> Self {
243        match value {
244            AskModeArg::Lex => AskMode::Lex,
245            AskModeArg::Sem => AskMode::Sem,
246            AskModeArg::Hybrid => AskMode::Hybrid,
247        }
248    }
249}
250
251/// Arguments for the `find` subcommand
252#[derive(Args)]
253pub struct FindArgs {
254    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
255    pub file: PathBuf,
256    #[arg(long = "query", value_name = "TEXT")]
257    pub query: String,
258    #[arg(long = "uri", value_name = "URI")]
259    pub uri: Option<String>,
260    #[arg(long = "scope", value_name = "URI_PREFIX")]
261    pub scope: Option<String>,
262    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
263    pub top_k: usize,
264    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
265    pub snippet_chars: usize,
266    #[arg(long = "cursor", value_name = "TOKEN")]
267    pub cursor: Option<String>,
268    #[arg(long)]
269    pub json: bool,
270    #[arg(long = "json-legacy", conflicts_with = "json")]
271    pub json_legacy: bool,
272    #[arg(long = "mode", value_enum, default_value = "auto")]
273    pub mode: SearchMode,
274    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
275    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
276    pub as_of_frame: Option<u64>,
277    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
278    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
279    pub as_of_ts: Option<i64>,
280    /// Embedding model to use for query (must match the model used during ingestion)
281    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
282    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
283    pub query_embedding_model: Option<String>,
284
285    // Adaptive retrieval options (enabled by default for best results)
286    /// Disable adaptive retrieval and use fixed top-k instead.
287    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
288    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
289    pub no_adaptive: bool,
290    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
291    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
292    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
293    pub min_relevancy: f32,
294    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
295    /// Set high enough to capture all potentially relevant results.
296    #[arg(long = "max-k", value_name = "K", default_value = "100")]
297    pub max_k: usize,
298    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
299    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
300    pub adaptive_strategy: AdaptiveStrategyArg,
301}
302
303/// Search mode argument
304#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
305pub enum SearchMode {
306    Auto,
307    Lex,
308    Sem,
309    /// CLIP visual search using text-to-image embeddings
310    #[cfg(feature = "clip")]
311    Clip,
312}
313
314/// Adaptive retrieval strategy
315#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
316pub enum AdaptiveStrategyArg {
317    /// Stop when score drops below X% of top score (default)
318    Relative,
319    /// Stop when score drops below fixed threshold
320    Absolute,
321    /// Stop when score drops sharply from previous result
322    Cliff,
323    /// Automatically detect "elbow" in score curve
324    Elbow,
325    /// Combine relative + cliff + absolute (recommended)
326    Combined,
327}
328
329/// Arguments for the `vec-search` subcommand
330#[derive(Args)]
331pub struct VecSearchArgs {
332    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
333    pub file: PathBuf,
334    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
335    pub vector: Option<String>,
336    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
337    pub embedding: Option<PathBuf>,
338    #[arg(long, value_name = "K", default_value = "10")]
339    pub limit: usize,
340    #[arg(long)]
341    pub json: bool,
342}
343
344/// Arguments for the `audit` subcommand
345#[derive(Args)]
346pub struct AuditArgs {
347    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
348    pub file: PathBuf,
349    /// The question or topic to audit
350    #[arg(value_name = "QUESTION")]
351    pub question: String,
352    /// Output file path (stdout if not provided)
353    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
354    pub out: Option<PathBuf>,
355    /// Output format
356    #[arg(long = "format", value_enum, default_value = "text")]
357    pub format: AuditFormat,
358    /// Number of sources to retrieve
359    #[arg(long = "top-k", value_name = "K", default_value = "10")]
360    pub top_k: usize,
361    /// Maximum characters per snippet
362    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
363    pub snippet_chars: usize,
364    /// Retrieval mode
365    #[arg(long = "mode", value_enum, default_value = "hybrid")]
366    pub mode: AskModeArg,
367    /// Optional scope filter (URI prefix)
368    #[arg(long = "scope", value_name = "URI_PREFIX")]
369    pub scope: Option<String>,
370    /// Start date filter
371    #[arg(long = "start", value_name = "DATE")]
372    pub start: Option<String>,
373    /// End date filter
374    #[arg(long = "end", value_name = "DATE")]
375    pub end: Option<String>,
376    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
377    #[arg(long = "use-model", value_name = "MODEL")]
378    pub use_model: Option<String>,
379}
380
381/// Audit output format
382#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
383pub enum AuditFormat {
384    /// Plain text report
385    Text,
386    /// Markdown report
387    Markdown,
388    /// JSON report
389    Json,
390}
391
392// ============================================================================
393// Search & Retrieval command handlers
394// ============================================================================
395
396pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
397    let mut mem = open_read_only_mem(&args.file)?;
398    let mut builder = TimelineQueryBuilder::default();
399    #[cfg(feature = "temporal_track")]
400    if args.phrase.is_none()
401        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
402    {
403        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
404    }
405    if let Some(limit) = args.limit {
406        builder = builder.limit(limit);
407    }
408    if let Some(since) = args.since {
409        builder = builder.since(since);
410    }
411    if let Some(until) = args.until {
412        builder = builder.until(until);
413    }
414    builder = builder.reverse(args.reverse);
415    #[cfg(feature = "temporal_track")]
416    let temporal_summary = if let Some(ref phrase) = args.phrase {
417        let (filter, summary) = build_temporal_filter(
418            phrase,
419            args.tz.as_deref(),
420            args.anchor.as_deref(),
421            args.window,
422        )?;
423        builder = builder.temporal(filter);
424        Some(summary)
425    } else {
426        None
427    };
428    let query = builder.build();
429    let mut entries = mem.timeline(query)?;
430
431    // Apply Replay filtering if requested
432    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
433        entries.retain(|entry| {
434            // Check as_of_frame filter
435            if let Some(cutoff_frame) = args.as_of_frame {
436                if entry.frame_id > cutoff_frame {
437                    return false;
438                }
439            }
440
441            // Check as_of_ts filter
442            if let Some(cutoff_ts) = args.as_of_ts {
443                if entry.timestamp > cutoff_ts {
444                    return false;
445                }
446            }
447
448            true
449        });
450    }
451
452    if args.json {
453        #[cfg(feature = "temporal_track")]
454        if let Some(summary) = temporal_summary.as_ref() {
455            println!(
456                "{}",
457                serde_json::to_string_pretty(&TimelineOutput {
458                    temporal: Some(summary_to_output(summary)),
459                    entries: &entries,
460                })?
461            );
462        } else {
463            println!("{}", serde_json::to_string_pretty(&entries)?);
464        }
465        #[cfg(not(feature = "temporal_track"))]
466        println!("{}", serde_json::to_string_pretty(&entries)?);
467    } else if entries.is_empty() {
468        println!("Timeline is empty");
469    } else {
470        #[cfg(feature = "temporal_track")]
471        if let Some(summary) = temporal_summary.as_ref() {
472            print_temporal_summary(summary);
473        }
474        for entry in entries {
475            println!(
476                "#{} @ {} — {}",
477                entry.frame_id,
478                entry.timestamp,
479                entry.preview.replace('\n', " ")
480            );
481            if let Some(uri) = entry.uri.as_deref() {
482                println!("  URI: {uri}");
483            }
484            if !entry.child_frames.is_empty() {
485                let child_list = entry
486                    .child_frames
487                    .iter()
488                    .map(|id| id.to_string())
489                    .collect::<Vec<_>>()
490                    .join(", ");
491                println!("  Child frames: {child_list}");
492            }
493            #[cfg(feature = "temporal_track")]
494            if let Some(temporal) = entry.temporal.as_ref() {
495                print_entry_temporal_details(temporal);
496            }
497        }
498    }
499    Ok(())
500}
501
502#[cfg(feature = "temporal_track")]
503pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
504    let mut mem = open_read_only_mem(&args.file)?;
505
506    let (filter, summary) = build_temporal_filter(
507        &args.phrase,
508        args.tz.as_deref(),
509        args.anchor.as_deref(),
510        args.window,
511    )?;
512
513    let mut builder = TimelineQueryBuilder::default();
514    if let Some(limit) = args.limit {
515        builder = builder.limit(limit);
516    }
517    if let Some(since) = args.since {
518        builder = builder.since(since);
519    }
520    if let Some(until) = args.until {
521        builder = builder.until(until);
522    }
523    builder = builder.reverse(args.reverse).temporal(filter.clone());
524    let entries = mem.timeline(builder.build())?;
525
526    if args.json {
527        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
528        let output = WhenOutput {
529            summary: summary_to_output(&summary),
530            entries: entry_views,
531        };
532        println!("{}", serde_json::to_string_pretty(&output)?);
533        return Ok(());
534    }
535
536    print_temporal_summary(&summary);
537    if entries.is_empty() {
538        println!("No frames matched the resolved window");
539        return Ok(());
540    }
541
542    for entry in &entries {
543        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
544        println!(
545            "#{} @ {} ({iso}) — {}",
546            entry.frame_id,
547            entry.timestamp,
548            entry.preview.replace('\n', " ")
549        );
550        if let Some(uri) = entry.uri.as_deref() {
551            println!("  URI: {uri}");
552        }
553        if !entry.child_frames.is_empty() {
554            let child_list = entry
555                .child_frames
556                .iter()
557                .map(|id| id.to_string())
558                .collect::<Vec<_>>()
559                .join(", ");
560            println!("  Child frames: {child_list}");
561        }
562        if let Some(temporal) = entry.temporal.as_ref() {
563            print_entry_temporal_details(temporal);
564        }
565    }
566
567    Ok(())
568}
569
570#[cfg(feature = "temporal_track")]
571#[derive(Serialize)]
572struct TimelineOutput<'a> {
573    #[serde(skip_serializing_if = "Option::is_none")]
574    temporal: Option<TemporalSummaryOutput>,
575    entries: &'a [TimelineEntry],
576}
577
578#[cfg(feature = "temporal_track")]
579#[derive(Serialize)]
580struct WhenOutput {
581    summary: TemporalSummaryOutput,
582    entries: Vec<WhenEntry>,
583}
584
585#[cfg(feature = "temporal_track")]
586#[derive(Serialize)]
587struct WhenEntry {
588    frame_id: FrameId,
589    timestamp: i64,
590    #[serde(skip_serializing_if = "Option::is_none")]
591    timestamp_iso: Option<String>,
592    preview: String,
593    #[serde(skip_serializing_if = "Option::is_none")]
594    uri: Option<String>,
595    #[serde(skip_serializing_if = "Vec::is_empty")]
596    child_frames: Vec<FrameId>,
597    #[serde(skip_serializing_if = "Option::is_none")]
598    temporal: Option<SearchHitTemporal>,
599}
600
601#[cfg(feature = "temporal_track")]
602#[derive(Serialize)]
603struct TemporalSummaryOutput {
604    phrase: String,
605    timezone: String,
606    anchor_utc: i64,
607    anchor_iso: String,
608    confidence: u16,
609    #[serde(skip_serializing_if = "Vec::is_empty")]
610    flags: Vec<&'static str>,
611    resolution_kind: &'static str,
612    window_start_utc: Option<i64>,
613    window_start_iso: Option<String>,
614    window_end_utc: Option<i64>,
615    window_end_iso: Option<String>,
616    #[serde(skip_serializing_if = "Option::is_none")]
617    window_minutes: Option<u64>,
618}
619
620#[cfg(feature = "temporal_track")]
621struct TemporalSummary {
622    phrase: String,
623    tz: String,
624    anchor: OffsetDateTime,
625    start_utc: Option<i64>,
626    end_utc: Option<i64>,
627    resolution: TemporalResolution,
628    window_minutes: Option<u64>,
629}
630
631#[cfg(feature = "temporal_track")]
632fn build_temporal_filter(
633    phrase: &str,
634    tz_override: Option<&str>,
635    anchor_override: Option<&str>,
636    window_minutes: Option<u64>,
637) -> Result<(TemporalFilter, TemporalSummary)> {
638    let tz = tz_override
639        .unwrap_or(DEFAULT_TEMPORAL_TZ)
640        .trim()
641        .to_string();
642    if tz.is_empty() {
643        bail!("E-TEMP-003 timezone must not be empty");
644    }
645
646    let anchor = if let Some(raw) = anchor_override {
647        OffsetDateTime::parse(raw, &Rfc3339)
648            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
649    } else {
650        OffsetDateTime::now_utc()
651    };
652
653    let context = TemporalContext::new(anchor, tz.clone());
654    let normalizer = TemporalNormalizer::new(context);
655    let resolution = normalizer
656        .resolve(phrase)
657        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
658
659    let (mut start, mut end) = resolution_bounds(&resolution)?;
660    if let Some(minutes) = window_minutes {
661        if minutes > 0 {
662            let delta = TimeDuration::minutes(minutes as i64);
663            if let (Some(s), Some(e)) = (start, end) {
664                if s == e {
665                    start = Some(s.saturating_sub(delta.whole_seconds()));
666                    end = Some(e.saturating_add(delta.whole_seconds()));
667                } else {
668                    start = Some(s.saturating_sub(delta.whole_seconds()));
669                    end = Some(e.saturating_add(delta.whole_seconds()));
670                }
671            }
672        }
673    }
674
675    let filter = TemporalFilter {
676        start_utc: start,
677        end_utc: end,
678        phrase: None,
679        tz: None,
680    };
681
682    let summary = TemporalSummary {
683        phrase: phrase.to_owned(),
684        tz,
685        anchor,
686        start_utc: start,
687        end_utc: end,
688        resolution,
689        window_minutes,
690    };
691
692    Ok((filter, summary))
693}
694
695#[cfg(feature = "temporal_track")]
696fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
697    TemporalSummaryOutput {
698        phrase: summary.phrase.clone(),
699        timezone: summary.tz.clone(),
700        anchor_utc: summary.anchor.unix_timestamp(),
701        anchor_iso: summary
702            .anchor
703            .format(&Rfc3339)
704            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
705        confidence: summary.resolution.confidence,
706        flags: summary
707            .resolution
708            .flags
709            .iter()
710            .map(|flag| flag.as_str())
711            .collect(),
712        resolution_kind: resolution_kind(&summary.resolution),
713        window_start_utc: summary.start_utc,
714        window_start_iso: summary.start_utc.and_then(format_timestamp),
715        window_end_utc: summary.end_utc,
716        window_end_iso: summary.end_utc.and_then(format_timestamp),
717        window_minutes: summary.window_minutes,
718    }
719}
720
721#[cfg(feature = "temporal_track")]
722fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
723    WhenEntry {
724        frame_id: entry.frame_id,
725        timestamp: entry.timestamp,
726        timestamp_iso: format_timestamp(entry.timestamp),
727        preview: entry.preview.clone(),
728        uri: entry.uri.clone(),
729        child_frames: entry.child_frames.clone(),
730        temporal: entry.temporal.clone(),
731    }
732}
733
734#[cfg(feature = "temporal_track")]
735fn print_temporal_summary(summary: &TemporalSummary) {
736    println!("Phrase: \"{}\"", summary.phrase);
737    println!("Timezone: {}", summary.tz);
738    println!(
739        "Anchor: {}",
740        summary
741            .anchor
742            .format(&Rfc3339)
743            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
744    );
745    let start_iso = summary.start_utc.and_then(format_timestamp);
746    let end_iso = summary.end_utc.and_then(format_timestamp);
747    match (start_iso, end_iso) {
748        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
749        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
750        (Some(start), None) => println!("Window start: {start}"),
751        (None, Some(end)) => println!("Window end: {end}"),
752        _ => println!("Window: (not resolved)"),
753    }
754    println!("Confidence: {}", summary.resolution.confidence);
755    let flags: Vec<&'static str> = summary
756        .resolution
757        .flags
758        .iter()
759        .map(|flag| flag.as_str())
760        .collect();
761    if !flags.is_empty() {
762        println!("Flags: {}", flags.join(", "));
763    }
764    if let Some(window) = summary.window_minutes {
765        if window > 0 {
766            println!("Window padding: {window} minute(s)");
767        }
768    }
769    println!();
770}
771
772#[cfg(feature = "temporal_track")]
773fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
774    if let Some(anchor) = temporal.anchor.as_ref() {
775        let iso = anchor
776            .iso_8601
777            .clone()
778            .or_else(|| format_timestamp(anchor.ts_utc));
779        println!(
780            "  Anchor: {} (source: {:?})",
781            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
782            anchor.source
783        );
784    }
785    if !temporal.mentions.is_empty() {
786        println!("  Mentions:");
787        for mention in &temporal.mentions {
788            let iso = mention
789                .iso_8601
790                .clone()
791                .or_else(|| format_timestamp(mention.ts_utc))
792                .unwrap_or_else(|| mention.ts_utc.to_string());
793            let mut details = format!(
794                "    - {} ({:?}, confidence {})",
795                iso, mention.kind, mention.confidence
796            );
797            if let Some(text) = mention.text.as_deref() {
798                details.push_str(&format!(" — \"{}\"", text));
799            }
800            println!("{details}");
801        }
802    }
803}
804
805#[cfg(feature = "temporal_track")]
806fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
807    match &resolution.value {
808        TemporalResolutionValue::Date(date) => {
809            let ts = date_to_timestamp(*date);
810            Ok((Some(ts), Some(ts)))
811        }
812        TemporalResolutionValue::DateTime(dt) => {
813            let ts = dt.unix_timestamp();
814            Ok((Some(ts), Some(ts)))
815        }
816        TemporalResolutionValue::DateRange { start, end } => Ok((
817            Some(date_to_timestamp(*start)),
818            Some(date_to_timestamp(*end)),
819        )),
820        TemporalResolutionValue::DateTimeRange { start, end } => {
821            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
822        }
823        TemporalResolutionValue::Month { year, month } => {
824            let start_date = Date::from_calendar_date(*year, *month, 1)
825                .map_err(|_| anyhow!("invalid month resolution"))?;
826            let end_date = last_day_in_month(*year, *month)
827                .map_err(|_| anyhow!("invalid month resolution"))?;
828            Ok((
829                Some(date_to_timestamp(start_date)),
830                Some(date_to_timestamp(end_date)),
831            ))
832        }
833    }
834}
835
836#[cfg(feature = "temporal_track")]
837fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
838    match resolution.value {
839        TemporalResolutionValue::Date(_) => "date",
840        TemporalResolutionValue::DateTime(_) => "datetime",
841        TemporalResolutionValue::DateRange { .. } => "date_range",
842        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
843        TemporalResolutionValue::Month { .. } => "month",
844    }
845}
846
847#[cfg(feature = "temporal_track")]
848fn date_to_timestamp(date: Date) -> i64 {
849    PrimitiveDateTime::new(date, Time::MIDNIGHT)
850        .assume_offset(UtcOffset::UTC)
851        .unix_timestamp()
852}
853
854#[cfg(feature = "temporal_track")]
855fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
856    let mut date = Date::from_calendar_date(year, month, 1)
857        .map_err(|_| anyhow!("invalid month resolution"))?;
858    while let Some(next) = date.next_day() {
859        if next.month() == month {
860            date = next;
861        } else {
862            break;
863        }
864    }
865    Ok(date)
866}
867
868#[cfg(feature = "temporal_track")]
869
870fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
871    if fragments.is_empty() {
872        return;
873    }
874
875    response.context_fragments = fragments
876        .into_iter()
877        .map(|fragment| AskContextFragment {
878            rank: fragment.rank,
879            frame_id: fragment.frame_id,
880            uri: fragment.uri,
881            title: fragment.title,
882            score: fragment.score,
883            matches: fragment.matches,
884            range: Some(fragment.range),
885            chunk_range: fragment.chunk_range,
886            text: fragment.text,
887            kind: Some(match fragment.kind {
888                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
889                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
890            }),
891            #[cfg(feature = "temporal_track")]
892            temporal: None,
893        })
894        .collect();
895}
896
897pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
898    if args.uri.is_some() && args.scope.is_some() {
899        warn!("--scope ignored because --uri is provided");
900    }
901
902    let mut question_tokens = Vec::new();
903    let mut file_path: Option<PathBuf> = None;
904    for token in &args.targets {
905        if file_path.is_none() && looks_like_memory(token) {
906            file_path = Some(PathBuf::from(token));
907        } else {
908            question_tokens.push(token.clone());
909        }
910    }
911
912    let positional_question = if question_tokens.is_empty() {
913        None
914    } else {
915        Some(question_tokens.join(" "))
916    };
917
918    let question = args
919        .question
920        .or(positional_question)
921        .map(|value| value.trim().to_string())
922        .filter(|value| !value.is_empty());
923
924    let question = question
925        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
926
927    let memory_path = match file_path {
928        Some(path) => path,
929        None => autodetect_memory_file()?,
930    };
931
932    let start = parse_date_boundary(args.start.as_ref(), false)?;
933    let end = parse_date_boundary(args.end.as_ref(), true)?;
934    if let (Some(start_ts), Some(end_ts)) = (start, end) {
935        if end_ts < start_ts {
936            anyhow::bail!("--end must not be earlier than --start");
937        }
938    }
939
940    // Open MV2 file first to get vector dimension for auto-detection
941    let mut mem = Memvid::open(&memory_path)?;
942
943    // Load active replay session if one exists
944    #[cfg(feature = "replay")]
945    let _ = mem.load_active_session();
946
947    // Get the vector dimension from the MV2 file for auto-detection
948    let mv2_dimension = mem.effective_vec_index_dimension()?;
949
950    let ask_mode: AskMode = args.mode.into();
951    let inferred_model_override = match args.mode {
952        AskModeArg::Lex => None,
953        AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
954            memvid_core::EmbeddingIdentitySummary::Single(identity) => identity.model.map(String::from),
955            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
956                let models: Vec<_> = identities
957                    .iter()
958                    .filter_map(|entry| entry.identity.model.as_deref())
959                    .collect();
960                anyhow::bail!(
961                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
962                    Detected models: {:?}\n\n\
963                    Suggested fix: split into separate memories per embedding model.",
964                    models
965                );
966            }
967            memvid_core::EmbeddingIdentitySummary::Unknown => None,
968        },
969    };
970    let emb_model_override = args
971        .query_embedding_model
972        .as_deref()
973        .or(inferred_model_override.as_deref());
974    let runtime = match args.mode {
975        AskModeArg::Lex => None,
976        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
977            config,
978            emb_model_override,
979            mv2_dimension,
980        )?),
981        AskModeArg::Hybrid => {
982            // For hybrid, use auto-detection from MV2 dimension
983            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
984                || {
985                    // Force a load; if it fails we error below.
986                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
987                        .ok()
988                        .map(|rt| {
989                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
990                            rt
991                        })
992                },
993            )
994        }
995    };
996    if runtime.is_none() && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
997        anyhow::bail!(
998            "semantic embeddings unavailable; install/cached model required for {:?} mode",
999            args.mode
1000        );
1001    }
1002
1003    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1004
1005    // Build adaptive config (enabled by default, use --no-adaptive to disable)
1006    let adaptive = if !args.no_adaptive {
1007        Some(AdaptiveConfig {
1008            enabled: true,
1009            max_results: args.max_k,
1010            min_results: 1,
1011            normalize_scores: true,
1012            strategy: match args.adaptive_strategy {
1013                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1014                    min_ratio: args.min_relevancy,
1015                },
1016                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1017                    min_score: args.min_relevancy,
1018                },
1019                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff { max_drop_ratio: 0.3 },
1020                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1021                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1022                    relative_threshold: args.min_relevancy,
1023                    max_drop_ratio: 0.3,
1024                    absolute_min: 0.3,
1025                },
1026            },
1027        })
1028    } else {
1029        None
1030    };
1031
1032    let request = AskRequest {
1033        question,
1034        top_k: args.top_k,
1035        snippet_chars: args.snippet_chars,
1036        uri: args.uri.clone(),
1037        scope: args.scope.clone(),
1038        cursor: args.cursor.clone(),
1039        start,
1040        end,
1041        #[cfg(feature = "temporal_track")]
1042        temporal: None,
1043        context_only: args.context_only,
1044        mode: ask_mode,
1045        as_of_frame: args.as_of_frame,
1046        as_of_ts: args.as_of_ts,
1047        adaptive,
1048    };
1049    let mut response = mem.ask(request, embedder).map_err(|err| match err {
1050        MemvidError::VecDimensionMismatch { expected, actual } => anyhow!(vec_dimension_mismatch_help(expected, actual)),
1051        other => anyhow!(other),
1052    })?;
1053
1054    // Apply cross-encoder reranking for better precision on preference/personalization queries
1055    // This is especially important for questions like "What should I..." where semantic
1056    // similarity doesn't capture personal relevance well.
1057    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
1058    if !args.no_rerank
1059        && !response.retrieval.hits.is_empty()
1060        && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1061    {
1062        // Create a temporary SearchResponse for reranking
1063        let mut search_response = SearchResponse {
1064            query: response.question.clone(),
1065            hits: response.retrieval.hits.clone(),
1066            total_hits: response.retrieval.hits.len(),
1067            params: memvid_core::SearchParams {
1068                top_k: args.top_k,
1069                snippet_chars: args.snippet_chars,
1070                cursor: None,
1071            },
1072            elapsed_ms: 0,
1073            engine: memvid_core::SearchEngineKind::Hybrid,
1074            next_cursor: None,
1075            context: String::new(),
1076        };
1077
1078        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1079            warn!("Cross-encoder reranking failed: {e}");
1080        } else {
1081            // Update the response hits with reranked order
1082            response.retrieval.hits = search_response.hits;
1083            // Rebuild context from reranked hits
1084            response.retrieval.context = response
1085                .retrieval
1086                .hits
1087                .iter()
1088                .take(10) // Use top-10 for context
1089                .map(|hit| hit.text.as_str())
1090                .collect::<Vec<_>>()
1091                .join("\n\n---\n\n");
1092        }
1093    }
1094
1095    // Inject memory cards into context if --memories flag is set
1096    if args.memories {
1097        let memory_context = build_memory_context(&mem);
1098        if !memory_context.is_empty() {
1099            // Prepend memory context to retrieval context
1100            response.retrieval.context = format!(
1101                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1102                memory_context, response.retrieval.context
1103            );
1104        }
1105    }
1106
1107    // Inject entity context from Logic-Mesh if entities were found in search hits
1108    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1109    if !entity_context.is_empty() {
1110        // Prepend entity context to retrieval context
1111        response.retrieval.context = format!(
1112            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1113            entity_context, response.retrieval.context
1114        );
1115    }
1116
1117    // Apply PII masking if requested
1118    if args.mask_pii {
1119        use memvid_core::pii::mask_pii;
1120
1121        // Mask the aggregated context
1122        response.retrieval.context = mask_pii(&response.retrieval.context);
1123
1124        // Mask text in each hit
1125        for hit in &mut response.retrieval.hits {
1126            hit.text = mask_pii(&hit.text);
1127            if let Some(chunk_text) = &hit.chunk_text {
1128                hit.chunk_text = Some(mask_pii(chunk_text));
1129            }
1130        }
1131    }
1132
1133    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1134
1135    let mut model_result: Option<ModelAnswer> = None;
1136    if response.context_only {
1137        if args.use_model.is_some() {
1138            warn!("--use-model ignored because --context-only disables synthesis");
1139        }
1140    } else if let Some(model_name) = args.use_model.as_deref() {
1141        match run_model_inference(
1142            model_name,
1143            &response.question,
1144            &response.retrieval.context,
1145            &response.retrieval.hits,
1146            llm_context_override,
1147            None,
1148            args.system_prompt.as_deref(),
1149        ) {
1150            Ok(inference) => {
1151                let ModelInference {
1152                    answer,
1153                    context_body,
1154                    context_fragments,
1155                    ..
1156                } = inference;
1157                response.answer = Some(answer.answer.clone());
1158                response.retrieval.context = context_body;
1159                apply_model_context_fragments(&mut response, context_fragments);
1160                model_result = Some(answer);
1161            }
1162            Err(err) => {
1163                warn!(
1164                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1165                    model_name
1166                );
1167            }
1168        }
1169    }
1170
1171    // Record the ask action if a replay session is active
1172    #[cfg(feature = "replay")]
1173    if let Some(ref model_answer) = model_result {
1174        if let Some(model_name) = args.use_model.as_deref() {
1175            mem.record_ask_action(
1176                &response.question,
1177                model_name, // provider
1178                model_name, // model
1179                model_answer.answer.as_bytes(),
1180                0, // duration_ms not tracked at this level
1181            );
1182        }
1183    }
1184
1185    if args.json {
1186        if let Some(model_name) = args.use_model.as_deref() {
1187            emit_model_json(
1188                &response,
1189                model_name,
1190                model_result.as_ref(),
1191                args.sources,
1192                &mut mem,
1193            )?;
1194        } else {
1195            emit_ask_json(
1196                &response,
1197                args.mode,
1198                model_result.as_ref(),
1199                args.sources,
1200                &mut mem,
1201            )?;
1202        }
1203    } else {
1204        emit_ask_pretty(
1205            &response,
1206            args.mode,
1207            model_result.as_ref(),
1208            args.sources,
1209            &mut mem,
1210        );
1211    }
1212
1213    // Save active replay session if one exists
1214    #[cfg(feature = "replay")]
1215    let _ = mem.save_active_session();
1216
1217    Ok(())
1218}
1219
1220pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1221    let mut mem = open_read_only_mem(&args.file)?;
1222
1223    // Load active replay session if one exists
1224    #[cfg(feature = "replay")]
1225    let _ = mem.load_active_session();
1226
1227    if args.uri.is_some() && args.scope.is_some() {
1228        warn!("--scope ignored because --uri is provided");
1229    }
1230
1231    // Get vector dimension from MV2 for auto-detection
1232    let mv2_dimension = mem.effective_vec_index_dimension()?;
1233    let identity_summary = match args.mode {
1234        SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1235        #[cfg(feature = "clip")]
1236        SearchMode::Clip => None,
1237        SearchMode::Lex => None,
1238    };
1239
1240    let mut semantic_allowed = true;
1241    let inferred_model_override = match identity_summary.as_ref() {
1242        Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1243            identity.model.as_deref().map(|value| value.to_string())
1244        }
1245        Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1246            let models: Vec<_> = identities
1247                .iter()
1248                .filter_map(|entry| entry.identity.model.as_deref())
1249                .collect();
1250            if args.mode == SearchMode::Sem {
1251                anyhow::bail!(
1252                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1253                    Detected models: {:?}\n\n\
1254                    Suggested fix: split into separate memories per embedding model.",
1255                    models
1256                );
1257            }
1258            warn!("semantic search disabled: mixed embedding models detected: {:?}", models);
1259            semantic_allowed = false;
1260            None
1261        }
1262        _ => None,
1263    };
1264
1265    let emb_model_override = args
1266        .query_embedding_model
1267        .as_deref()
1268        .or(inferred_model_override.as_deref());
1269
1270    let (mode_label, runtime_option) = match args.mode {
1271        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1272        SearchMode::Sem => {
1273            let runtime =
1274                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1275            ("Semantic (vector search)".to_string(), Some(runtime))
1276        }
1277        SearchMode::Auto => {
1278            if !semantic_allowed {
1279                ("Lexical (semantic unsafe)".to_string(), None)
1280            } else if let Some(runtime) =
1281                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1282            {
1283                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1284            } else {
1285                ("Lexical (semantic unavailable)".to_string(), None)
1286            }
1287        }
1288        #[cfg(feature = "clip")]
1289        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1290    };
1291
1292    let mode_key = match args.mode {
1293        SearchMode::Sem => "semantic",
1294        SearchMode::Lex => "text",
1295        SearchMode::Auto => {
1296            if runtime_option.is_some() {
1297                "hybrid"
1298            } else {
1299                "text"
1300            }
1301        }
1302        #[cfg(feature = "clip")]
1303        SearchMode::Clip => "clip",
1304    };
1305
1306    // For CLIP mode, use CLIP visual search
1307    #[cfg(feature = "clip")]
1308    if args.mode == SearchMode::Clip {
1309        use memvid_core::clip::{ClipConfig, ClipModel};
1310
1311        // Initialize CLIP model
1312        let config = ClipConfig::default();
1313        let clip = ClipModel::new(config).map_err(|e| {
1314            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1315        })?;
1316
1317        // Encode query text
1318        let query_embedding = clip
1319            .encode_text(&args.query)
1320            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1321
1322        // Search CLIP index
1323        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1324
1325        // Debug distances before filtering
1326        for hit in &hits {
1327            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1328                tracing::debug!(
1329                    frame_id = hit.frame_id,
1330                    title = %frame.title.unwrap_or_default(),
1331                    page = hit.page,
1332                    distance = hit.distance,
1333                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1334                    "CLIP raw hit"
1335                );
1336            } else {
1337                tracing::debug!(
1338                    frame_id = hit.frame_id,
1339                    page = hit.page,
1340                    distance = hit.distance,
1341                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1342                    "CLIP raw hit (missing frame)"
1343                );
1344            }
1345        }
1346
1347        // CLIP distance threshold for filtering poor matches
1348        // CLIP uses L2 distance on normalized embeddings:
1349        //   - distance² = 2(1 - cosine_similarity)
1350        //   - distance = 0 → identical (cosine_sim = 1)
1351        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1352        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1353        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1354        //   - distance = 2.0 → opposite (cosine_sim = -1)
1355        //
1356        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1357        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1358        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1359        const CLIP_MAX_DISTANCE: f32 = 1.26;
1360
1361        // Convert CLIP hits to SearchResponse format, filtering by threshold
1362        let search_hits: Vec<SearchHit> = hits
1363            .into_iter()
1364            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1365            .enumerate()
1366            .filter_map(|(rank, hit)| {
1367                // Convert L2 distance to cosine similarity for display
1368                // cos_sim = 1 - (distance² / 2)
1369                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1370
1371                // Get frame preview for snippet
1372                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1373                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1374                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1375                let title = match (base_title, hit.page) {
1376                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1377                    (Some(t), None) => Some(t),
1378                    (None, Some(p)) => Some(format!("Page {p}")),
1379                    _ => None,
1380                };
1381                Some(SearchHit {
1382                    rank: rank + 1,
1383                    frame_id: hit.frame_id,
1384                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1385                    title,
1386                    text: preview.clone(),
1387                    chunk_text: Some(preview),
1388                    range: (0, 0),
1389                    chunk_range: None,
1390                    matches: 0,
1391                    score: Some(cosine_similarity),
1392                    metadata: None,
1393                })
1394            })
1395            .collect();
1396
1397        let response = SearchResponse {
1398            query: args.query.clone(),
1399            hits: search_hits.clone(),
1400            total_hits: search_hits.len(),
1401            params: memvid_core::SearchParams {
1402                top_k: args.top_k,
1403                snippet_chars: args.snippet_chars,
1404                cursor: args.cursor.clone(),
1405            },
1406            elapsed_ms: 0,
1407            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1408            next_cursor: None,
1409            context: String::new(),
1410        };
1411
1412        if args.json_legacy {
1413            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1414            emit_legacy_search_json(&response)?;
1415        } else if args.json {
1416            emit_search_json(&response, mode_key)?;
1417        } else {
1418            println!(
1419                "mode: {}   k={}   time: {} ms",
1420                mode_label, response.params.top_k, response.elapsed_ms
1421            );
1422            println!("engine: clip (MobileCLIP-S2)");
1423            println!(
1424                "hits: {} (showing {})",
1425                response.total_hits,
1426                response.hits.len()
1427            );
1428            emit_search_table(&response);
1429        }
1430        return Ok(());
1431    }
1432
1433    // For semantic mode, use pure vector search.
1434    let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1435        let runtime = runtime_option
1436            .as_ref()
1437            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1438
1439        // Embed the query
1440        let query_embedding = runtime.embed_query(&args.query)?;
1441
1442        // Use pure vector search (adaptive by default, use --no-adaptive to disable)
1443        let scope = args.scope.as_deref().or(args.uri.as_deref());
1444
1445        if !args.no_adaptive {
1446            // Build adaptive config from CLI args
1447            let strategy = match args.adaptive_strategy {
1448                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1449                    min_ratio: args.min_relevancy,
1450                },
1451                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1452                    min_score: args.min_relevancy,
1453                },
1454                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1455                    max_drop_ratio: 0.35, // 35% drop triggers cutoff
1456                },
1457                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1458                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1459                    relative_threshold: args.min_relevancy,
1460                    max_drop_ratio: 0.35,
1461                    absolute_min: 0.3,
1462                },
1463            };
1464
1465            let config = AdaptiveConfig {
1466                enabled: true,
1467                max_results: args.max_k,
1468                min_results: 1,
1469                strategy,
1470                normalize_scores: true,
1471            };
1472
1473            match mem.search_adaptive(
1474                &args.query,
1475                &query_embedding,
1476                config,
1477                args.snippet_chars,
1478                scope,
1479            ) {
1480                Ok(result) => {
1481                    let mut resp = SearchResponse {
1482                        query: args.query.clone(),
1483                        hits: result.results,
1484                        total_hits: result.stats.returned,
1485                        params: memvid_core::SearchParams {
1486                            top_k: result.stats.returned,
1487                            snippet_chars: args.snippet_chars,
1488                            cursor: args.cursor.clone(),
1489                        },
1490                        elapsed_ms: 0,
1491                        engine: SearchEngineKind::Hybrid,
1492                        next_cursor: None,
1493                        context: String::new(),
1494                    };
1495                    apply_preference_rerank(&mut resp);
1496                    (
1497                        resp,
1498                        "semantic (adaptive vector search)".to_string(),
1499                        Some(result.stats),
1500                    )
1501                }
1502                Err(e) => {
1503                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1504                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1505                    }
1506
1507                    warn!("Adaptive search failed ({e}), falling back to fixed-k");
1508                    match mem.vec_search_with_embedding(
1509                        &args.query,
1510                        &query_embedding,
1511                        args.top_k,
1512                        args.snippet_chars,
1513                        scope,
1514                    ) {
1515                        Ok(mut resp) => {
1516                            apply_preference_rerank(&mut resp);
1517                            (resp, "semantic (vector search fallback)".to_string(), None)
1518                        }
1519                        Err(e2) => {
1520                            if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1521                                return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1522                            }
1523                            return Err(anyhow!("Both adaptive and fixed-k search failed: {e}, {e2}"));
1524                        }
1525                    }
1526                }
1527            }
1528        } else {
1529            // Standard fixed-k vector search
1530            match mem.vec_search_with_embedding(
1531                &args.query,
1532                &query_embedding,
1533                args.top_k,
1534                args.snippet_chars,
1535                scope,
1536            ) {
1537                Ok(mut resp) => {
1538                    // Apply preference boost to rerank results for preference-seeking queries
1539                    apply_preference_rerank(&mut resp);
1540                    (resp, "semantic (vector search)".to_string(), None)
1541                }
1542                Err(e) => {
1543                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1544                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1545                    }
1546
1547                    // Fall back to lexical search + rerank if vector search fails
1548                    warn!("Vector search failed ({e}), falling back to lexical + rerank");
1549                    let request = SearchRequest {
1550                        query: args.query.clone(),
1551                        top_k: args.top_k,
1552                        snippet_chars: args.snippet_chars,
1553                        uri: args.uri.clone(),
1554                        scope: args.scope.clone(),
1555                        cursor: args.cursor.clone(),
1556                        #[cfg(feature = "temporal_track")]
1557                        temporal: None,
1558                        as_of_frame: args.as_of_frame,
1559                        as_of_ts: args.as_of_ts,
1560                    };
1561                    let mut resp = mem.search(request)?;
1562                    apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1563                    (resp, "semantic (fallback rerank)".to_string(), None)
1564                }
1565            }
1566        }
1567    } else {
1568        // For lexical and auto modes, use existing behavior
1569        let request = SearchRequest {
1570            query: args.query.clone(),
1571            top_k: args.top_k,
1572            snippet_chars: args.snippet_chars,
1573            uri: args.uri.clone(),
1574            scope: args.scope.clone(),
1575            cursor: args.cursor.clone(),
1576            #[cfg(feature = "temporal_track")]
1577            temporal: None,
1578            as_of_frame: args.as_of_frame,
1579            as_of_ts: args.as_of_ts,
1580        };
1581
1582        let mut resp = mem.search(request)?;
1583
1584        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1585            warn!("Search index unavailable; returning basic text results");
1586        }
1587
1588        let mut engine_label = match resp.engine {
1589            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1590            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1591            SearchEngineKind::Hybrid => "hybrid".to_string(),
1592        };
1593
1594        if runtime_option.is_some() {
1595            engine_label = format!("hybrid ({engine_label} + semantic)");
1596        }
1597
1598        if let Some(ref runtime) = runtime_option {
1599            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1600        }
1601
1602        (resp, engine_label, None)
1603    };
1604
1605    if args.json_legacy {
1606        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1607        emit_legacy_search_json(&response)?;
1608    } else if args.json {
1609        emit_search_json(&response, mode_key)?;
1610    } else {
1611        println!(
1612            "mode: {}   k={}   time: {} ms",
1613            mode_label, response.params.top_k, response.elapsed_ms
1614        );
1615        println!("engine: {}", engine_label);
1616
1617        // Show adaptive retrieval stats if enabled
1618        if let Some(ref stats) = adaptive_stats {
1619            println!(
1620                "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1621                stats.total_considered,
1622                stats.returned,
1623                stats.triggered_by,
1624                stats.top_score.unwrap_or(0.0),
1625                stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1626            );
1627        }
1628
1629        println!(
1630            "hits: {} (showing {})",
1631            response.total_hits,
1632            response.hits.len()
1633        );
1634        emit_search_table(&response);
1635    }
1636
1637    // Save active replay session if one exists
1638    #[cfg(feature = "replay")]
1639    let _ = mem.save_active_session();
1640
1641    Ok(())
1642}
1643
1644pub fn handle_vec_search(_config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1645    let mut mem = open_read_only_mem(&args.file)?;
1646    let vector = if let Some(path) = args.embedding.as_deref() {
1647        read_embedding(path)?
1648    } else if let Some(vector_string) = &args.vector {
1649        parse_vector(vector_string)?
1650    } else {
1651        anyhow::bail!("provide --vector or --embedding for search input");
1652    };
1653
1654    let hits = mem.search_vec(&vector, args.limit).map_err(|err| match err {
1655        MemvidError::VecDimensionMismatch { expected, actual } => {
1656            anyhow!(vec_dimension_mismatch_help(expected, actual))
1657        }
1658        other => anyhow!(other),
1659    })?;
1660    let mut enriched = Vec::with_capacity(hits.len());
1661    for hit in hits {
1662        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1663        enriched.push((hit.frame_id, hit.distance, preview));
1664    }
1665
1666    if args.json {
1667        let json_hits: Vec<_> = enriched
1668            .iter()
1669            .map(|(frame_id, distance, preview)| {
1670                json!({
1671                    "frame_id": frame_id,
1672                    "distance": distance,
1673                    "preview": preview,
1674                })
1675            })
1676            .collect();
1677        let json_str = serde_json::to_string_pretty(&json_hits)?;
1678        println!("{}", json_str.to_colored_json_auto()?);
1679    } else if enriched.is_empty() {
1680        println!("No vector matches found");
1681    } else {
1682        for (frame_id, distance, preview) in enriched {
1683            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1684        }
1685    }
1686    Ok(())
1687}
1688
1689pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1690    use memvid_core::AuditOptions;
1691    use std::fs::File;
1692    use std::io::Write;
1693
1694    let mut mem = Memvid::open(&args.file)?;
1695
1696    // Parse date boundaries
1697    let start = parse_date_boundary(args.start.as_ref(), false)?;
1698    let end = parse_date_boundary(args.end.as_ref(), true)?;
1699    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1700        if end_ts < start_ts {
1701            anyhow::bail!("--end must not be earlier than --start");
1702        }
1703    }
1704
1705    // Set up embedding runtime if needed
1706    let ask_mode: AskMode = args.mode.into();
1707    let runtime = match args.mode {
1708        AskModeArg::Lex => None,
1709        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1710        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1711    };
1712    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1713
1714    // Build audit options
1715    let options = AuditOptions {
1716        top_k: Some(args.top_k),
1717        snippet_chars: Some(args.snippet_chars),
1718        mode: Some(ask_mode),
1719        scope: args.scope,
1720        start,
1721        end,
1722        include_snippets: true,
1723    };
1724
1725    // Run the audit
1726    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1727
1728    // If --use-model is provided, run model inference to synthesize the answer
1729    if let Some(model_name) = args.use_model.as_deref() {
1730        // Build context from sources for model inference
1731        let context = report
1732            .sources
1733            .iter()
1734            .filter_map(|s| s.snippet.clone())
1735            .collect::<Vec<_>>()
1736            .join("\n\n");
1737
1738        match run_model_inference(
1739            model_name,
1740            &report.question,
1741            &context,
1742            &[], // No hits needed for audit
1743            None,
1744            None,
1745            None, // No system prompt override for audit
1746        ) {
1747            Ok(inference) => {
1748                report.answer = Some(inference.answer.answer);
1749                report.notes.push(format!(
1750                    "Answer synthesized by model: {}",
1751                    inference.answer.model
1752                ));
1753            }
1754            Err(err) => {
1755                warn!(
1756                    "model inference unavailable for '{}': {err}. Using default answer.",
1757                    model_name
1758                );
1759            }
1760        }
1761    }
1762
1763    // Format the output
1764    let output = match args.format {
1765        AuditFormat::Text => report.to_text(),
1766        AuditFormat::Markdown => report.to_markdown(),
1767        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1768    };
1769
1770    // Write output
1771    if let Some(out_path) = args.out {
1772        let mut file = File::create(&out_path)?;
1773        file.write_all(output.as_bytes())?;
1774        println!("Audit report written to: {}", out_path.display());
1775    } else {
1776        println!("{}", output);
1777    }
1778
1779    Ok(())
1780}
1781
1782fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1783    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1784
1785    let mut additional_params = serde_json::Map::new();
1786    if let Some(cursor) = &response.params.cursor {
1787        additional_params.insert("cursor".into(), json!(cursor));
1788    }
1789
1790    let mut params = serde_json::Map::new();
1791    params.insert("top_k".into(), json!(response.params.top_k));
1792    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
1793    params.insert("mode".into(), json!(mode));
1794    params.insert(
1795        "additional_params".into(),
1796        serde_json::Value::Object(additional_params),
1797    );
1798
1799    let mut metadata_json = serde_json::Map::new();
1800    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
1801    metadata_json.insert("total_hits".into(), json!(response.total_hits));
1802    metadata_json.insert(
1803        "next_cursor".into(),
1804        match &response.next_cursor {
1805            Some(cursor) => json!(cursor),
1806            None => serde_json::Value::Null,
1807        },
1808    );
1809    metadata_json.insert("engine".into(), json!(response.engine));
1810    metadata_json.insert("params".into(), serde_json::Value::Object(params));
1811
1812    let body = json!({
1813        "version": "mv2.result.v2",
1814        "query": response.query,
1815        "metadata": metadata_json,
1816        "hits": hits,
1817        "context": response.context,
1818    });
1819    let json_str = serde_json::to_string_pretty(&body)?;
1820    println!("{}", json_str.to_colored_json_auto()?);
1821    Ok(())
1822}
1823
1824fn emit_ask_json(
1825    response: &AskResponse,
1826    requested_mode: AskModeArg,
1827    model: Option<&ModelAnswer>,
1828    include_sources: bool,
1829    mem: &mut Memvid,
1830) -> Result<()> {
1831    let hits: Vec<_> = response
1832        .retrieval
1833        .hits
1834        .iter()
1835        .map(search_hit_to_json)
1836        .collect();
1837
1838    let citations: Vec<_> = response
1839        .citations
1840        .iter()
1841        .map(|citation| {
1842            let mut map = serde_json::Map::new();
1843            map.insert("index".into(), json!(citation.index));
1844            map.insert("frame_id".into(), json!(citation.frame_id));
1845            map.insert("uri".into(), json!(citation.uri));
1846            if let Some(range) = citation.chunk_range {
1847                map.insert("chunk_range".into(), json!([range.0, range.1]));
1848            }
1849            if let Some(score) = citation.score {
1850                map.insert("score".into(), json!(score));
1851            }
1852            serde_json::Value::Object(map)
1853        })
1854        .collect();
1855
1856    let mut body = json!({
1857        "version": "mv2.ask.v1",
1858        "question": response.question,
1859        "answer": response.answer,
1860        "context_only": response.context_only,
1861        "mode": ask_mode_display(requested_mode),
1862        "retriever": ask_retriever_display(response.retriever),
1863        "top_k": response.retrieval.params.top_k,
1864        "results": hits,
1865        "citations": citations,
1866        "stats": {
1867            "retrieval_ms": response.stats.retrieval_ms,
1868            "synthesis_ms": response.stats.synthesis_ms,
1869            "latency_ms": response.stats.latency_ms,
1870        },
1871        "engine": search_engine_label(&response.retrieval.engine),
1872        "total_hits": response.retrieval.total_hits,
1873        "next_cursor": response.retrieval.next_cursor,
1874        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1875    });
1876
1877    if let Some(model) = model {
1878        if let serde_json::Value::Object(ref mut map) = body {
1879            map.insert("model".into(), json!(model.requested));
1880            if model.model != model.requested {
1881                map.insert("model_used".into(), json!(model.model));
1882            }
1883        }
1884    }
1885
1886    // Add detailed sources if requested
1887    if include_sources {
1888        if let serde_json::Value::Object(ref mut map) = body {
1889            let sources = build_sources_json(response, mem);
1890            map.insert("sources".into(), json!(sources));
1891        }
1892    }
1893
1894    println!("{}", serde_json::to_string_pretty(&body)?);
1895    Ok(())
1896}
1897
1898fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
1899    response
1900        .citations
1901        .iter()
1902        .enumerate()
1903        .map(|(idx, citation)| {
1904            let mut source = serde_json::Map::new();
1905            source.insert("index".into(), json!(idx + 1));
1906            source.insert("frame_id".into(), json!(citation.frame_id));
1907            source.insert("uri".into(), json!(citation.uri));
1908
1909            if let Some(range) = citation.chunk_range {
1910                source.insert("chunk_range".into(), json!([range.0, range.1]));
1911            }
1912            if let Some(score) = citation.score {
1913                source.insert("score".into(), json!(score));
1914            }
1915
1916            // Get frame metadata for rich source information
1917            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
1918                if let Some(title) = frame.title {
1919                    source.insert("title".into(), json!(title));
1920                }
1921                if !frame.tags.is_empty() {
1922                    source.insert("tags".into(), json!(frame.tags));
1923                }
1924                if !frame.labels.is_empty() {
1925                    source.insert("labels".into(), json!(frame.labels));
1926                }
1927                source.insert("frame_timestamp".into(), json!(frame.timestamp));
1928                if !frame.content_dates.is_empty() {
1929                    source.insert("content_dates".into(), json!(frame.content_dates));
1930                }
1931            }
1932
1933            // Get snippet from hit
1934            if let Some(hit) = response
1935                .retrieval
1936                .hits
1937                .iter()
1938                .find(|h| h.frame_id == citation.frame_id)
1939            {
1940                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
1941                source.insert("snippet".into(), json!(snippet));
1942            }
1943
1944            serde_json::Value::Object(source)
1945        })
1946        .collect()
1947}
1948
1949fn emit_model_json(
1950    response: &AskResponse,
1951    requested_model: &str,
1952    model: Option<&ModelAnswer>,
1953    include_sources: bool,
1954    mem: &mut Memvid,
1955) -> Result<()> {
1956    let answer = response.answer.clone().unwrap_or_default();
1957    let requested_label = model
1958        .map(|m| m.requested.clone())
1959        .unwrap_or_else(|| requested_model.to_string());
1960    let used_label = model
1961        .map(|m| m.model.clone())
1962        .unwrap_or_else(|| requested_model.to_string());
1963
1964    let mut body = json!({
1965        "question": response.question,
1966        "model": requested_label,
1967        "model_used": used_label,
1968        "answer": answer,
1969        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1970    });
1971
1972    // Add detailed sources if requested
1973    if include_sources {
1974        if let serde_json::Value::Object(ref mut map) = body {
1975            let sources = build_sources_json(response, mem);
1976            map.insert("sources".into(), json!(sources));
1977        }
1978    }
1979
1980    // Use colored JSON output
1981    let json_str = serde_json::to_string_pretty(&body)?;
1982    println!("{}", json_str.to_colored_json_auto()?);
1983    Ok(())
1984}
1985
1986fn emit_ask_pretty(
1987    response: &AskResponse,
1988    requested_mode: AskModeArg,
1989    model: Option<&ModelAnswer>,
1990    include_sources: bool,
1991    mem: &mut Memvid,
1992) {
1993    println!(
1994        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
1995        ask_mode_pretty(requested_mode),
1996        ask_retriever_pretty(response.retriever),
1997        response.retrieval.params.top_k,
1998        response.stats.latency_ms,
1999        response.stats.retrieval_ms
2000    );
2001    if let Some(model) = model {
2002        if model.requested.trim() == model.model {
2003            println!("model: {}", model.model);
2004        } else {
2005            println!(
2006                "model requested: {}   model used: {}",
2007                model.requested, model.model
2008            );
2009        }
2010    }
2011    println!(
2012        "engine: {}",
2013        search_engine_label(&response.retrieval.engine)
2014    );
2015    println!(
2016        "hits: {} (showing {})",
2017        response.retrieval.total_hits,
2018        response.retrieval.hits.len()
2019    );
2020
2021    if response.context_only {
2022        println!();
2023        println!("Context-only mode: synthesis disabled.");
2024        println!();
2025    } else if let Some(answer) = &response.answer {
2026        println!();
2027        println!("Answer:\n{answer}");
2028        println!();
2029    }
2030
2031    if !response.citations.is_empty() {
2032        println!("Citations:");
2033        for citation in &response.citations {
2034            match citation.score {
2035                Some(score) => println!(
2036                    "[{}] {} (frame {}, score {:.3})",
2037                    citation.index, citation.uri, citation.frame_id, score
2038                ),
2039                None => println!(
2040                    "[{}] {} (frame {})",
2041                    citation.index, citation.uri, citation.frame_id
2042                ),
2043            }
2044        }
2045        println!();
2046    }
2047
2048    // Print detailed sources if requested
2049    if include_sources && !response.citations.is_empty() {
2050        println!("=== SOURCES ===");
2051        println!();
2052        for citation in &response.citations {
2053            println!("[{}] {}", citation.index, citation.uri);
2054
2055            // Get frame metadata
2056            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2057                if let Some(title) = &frame.title {
2058                    println!("    Title: {}", title);
2059                }
2060                println!("    Frame ID: {}", citation.frame_id);
2061                if let Some(score) = citation.score {
2062                    println!("    Score: {:.4}", score);
2063                }
2064                if let Some((start, end)) = citation.chunk_range {
2065                    println!("    Range: [{}..{})", start, end);
2066                }
2067                if !frame.tags.is_empty() {
2068                    println!("    Tags: {}", frame.tags.join(", "));
2069                }
2070                if !frame.labels.is_empty() {
2071                    println!("    Labels: {}", frame.labels.join(", "));
2072                }
2073                println!("    Timestamp: {}", frame.timestamp);
2074                if !frame.content_dates.is_empty() {
2075                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2076                }
2077            }
2078
2079            // Get snippet from hit
2080            if let Some(hit) = response
2081                .retrieval
2082                .hits
2083                .iter()
2084                .find(|h| h.frame_id == citation.frame_id)
2085            {
2086                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2087                let truncated = if snippet.len() > 200 {
2088                    format!("{}...", &snippet[..200])
2089                } else {
2090                    snippet.clone()
2091                };
2092                println!("    Snippet: {}", truncated.replace('\n', " "));
2093            }
2094            println!();
2095        }
2096    }
2097
2098    if !include_sources {
2099        println!();
2100        emit_search_table(&response.retrieval);
2101    }
2102}
2103
2104fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2105    let hits: Vec<_> = response
2106        .hits
2107        .iter()
2108        .map(|hit| {
2109            json!({
2110                "frame_id": hit.frame_id,
2111                "matches": hit.matches,
2112                "snippets": [hit.text.clone()],
2113            })
2114        })
2115        .collect();
2116    println!("{}", serde_json::to_string_pretty(&hits)?);
2117    Ok(())
2118}
2119
2120fn emit_search_table(response: &SearchResponse) {
2121    if response.hits.is_empty() {
2122        println!("No results for '{}'.", response.query);
2123        return;
2124    }
2125    for hit in &response.hits {
2126        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2127        if let Some(title) = &hit.title {
2128            println!("  Title: {title}");
2129        }
2130        if let Some(score) = hit.score {
2131            println!("  Score: {score:.3}");
2132        }
2133        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
2134        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2135            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
2136        }
2137        if let Some(chunk_text) = &hit.chunk_text {
2138            println!("  Chunk Text: {}", chunk_text.trim());
2139        }
2140        if let Some(metadata) = &hit.metadata {
2141            if let Some(track) = &metadata.track {
2142                println!("  Track: {track}");
2143            }
2144            if !metadata.tags.is_empty() {
2145                println!("  Tags: {}", metadata.tags.join(", "));
2146            }
2147            if !metadata.labels.is_empty() {
2148                println!("  Labels: {}", metadata.labels.join(", "));
2149            }
2150            if let Some(created_at) = &metadata.created_at {
2151                println!("  Created: {created_at}");
2152            }
2153            if !metadata.content_dates.is_empty() {
2154                println!("  Content Dates: {}", metadata.content_dates.join(", "));
2155            }
2156            if !metadata.entities.is_empty() {
2157                let entity_strs: Vec<String> = metadata
2158                    .entities
2159                    .iter()
2160                    .map(|e| format!("{} ({})", e.name, e.kind))
2161                    .collect();
2162                println!("  Entities: {}", entity_strs.join(", "));
2163            }
2164        }
2165        println!("  Snippet: {}", hit.text.trim());
2166        println!();
2167    }
2168    if let Some(cursor) = &response.next_cursor {
2169        println!("Next cursor: {cursor}");
2170    }
2171}
2172
2173fn ask_mode_display(mode: AskModeArg) -> &'static str {
2174    match mode {
2175        AskModeArg::Lex => "lex",
2176        AskModeArg::Sem => "sem",
2177        AskModeArg::Hybrid => "hybrid",
2178    }
2179}
2180
2181fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2182    match mode {
2183        AskModeArg::Lex => "Lexical",
2184        AskModeArg::Sem => "Semantic",
2185        AskModeArg::Hybrid => "Hybrid",
2186    }
2187}
2188
2189fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2190    match retriever {
2191        AskRetriever::Lex => "lex",
2192        AskRetriever::Semantic => "semantic",
2193        AskRetriever::Hybrid => "hybrid",
2194        AskRetriever::LexFallback => "lex_fallback",
2195        AskRetriever::TimelineFallback => "timeline_fallback",
2196    }
2197}
2198
2199fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2200    match retriever {
2201        AskRetriever::Lex => "Lexical",
2202        AskRetriever::Semantic => "Semantic",
2203        AskRetriever::Hybrid => "Hybrid",
2204        AskRetriever::LexFallback => "Lexical (fallback)",
2205        AskRetriever::TimelineFallback => "Timeline (fallback)",
2206    }
2207}
2208
2209fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2210    match engine {
2211        SearchEngineKind::Tantivy => "text (tantivy)",
2212        SearchEngineKind::LexFallback => "text (fallback)",
2213        SearchEngineKind::Hybrid => "hybrid",
2214    }
2215}
2216
2217fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2218    let digest = hash(uri.as_bytes()).to_hex().to_string();
2219    let prefix_len = digest.len().min(12);
2220    let prefix = &digest[..prefix_len];
2221    format!("mv2-hit-{prefix}-{frame_id}-{start}")
2222}
2223
2224fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2225    if text.chars().count() <= limit {
2226        return text.to_string();
2227    }
2228
2229    let truncated: String = text.chars().take(limit).collect();
2230    format!("{truncated}...")
2231}
2232
2233fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2234    let mut hit_json = serde_json::Map::new();
2235    hit_json.insert("rank".into(), json!(hit.rank));
2236    if let Some(score) = hit.score {
2237        hit_json.insert("score".into(), json!(score));
2238    }
2239    hit_json.insert(
2240        "id".into(),
2241        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2242    );
2243    hit_json.insert("frame_id".into(), json!(hit.frame_id));
2244    hit_json.insert("uri".into(), json!(hit.uri));
2245    if let Some(title) = &hit.title {
2246        hit_json.insert("title".into(), json!(title));
2247    }
2248    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2249    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2250    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2251    hit_json.insert("text".into(), json!(hit.text));
2252
2253    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2254        matches: hit.matches,
2255        ..SearchHitMetadata::default()
2256    });
2257    let mut meta_json = serde_json::Map::new();
2258    meta_json.insert("matches".into(), json!(metadata.matches));
2259    if !metadata.tags.is_empty() {
2260        meta_json.insert("tags".into(), json!(metadata.tags));
2261    }
2262    if !metadata.labels.is_empty() {
2263        meta_json.insert("labels".into(), json!(metadata.labels));
2264    }
2265    if let Some(track) = metadata.track {
2266        meta_json.insert("track".into(), json!(track));
2267    }
2268    if let Some(created_at) = metadata.created_at {
2269        meta_json.insert("created_at".into(), json!(created_at));
2270    }
2271    if !metadata.content_dates.is_empty() {
2272        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
2273    }
2274    if !metadata.entities.is_empty() {
2275        let entities_json: Vec<serde_json::Value> = metadata
2276            .entities
2277            .iter()
2278            .map(|e| {
2279                let mut ent = serde_json::Map::new();
2280                ent.insert("name".into(), json!(e.name));
2281                ent.insert("kind".into(), json!(e.kind));
2282                if let Some(conf) = e.confidence {
2283                    ent.insert("confidence".into(), json!(conf));
2284                }
2285                serde_json::Value::Object(ent)
2286            })
2287            .collect();
2288        meta_json.insert("entities".into(), json!(entities_json));
2289    }
2290    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
2291    serde_json::Value::Object(hit_json)
2292}
2293/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
2294///
2295/// RRF is mathematically superior to raw score combination because:
2296/// - BM25 scores are unbounded (0 to infinity)
2297/// - Cosine similarity is bounded (-1 to 1)
2298/// - RRF normalizes by using only RANKS, not raw scores
2299///
2300/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
2301fn apply_semantic_rerank(
2302    runtime: &EmbeddingRuntime,
2303    mem: &mut Memvid,
2304    response: &mut SearchResponse,
2305) -> Result<()> {
2306    if response.hits.is_empty() {
2307        return Ok(());
2308    }
2309
2310    let query_embedding = runtime.embed_query(&response.query)?;
2311    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
2312    for hit in &response.hits {
2313        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
2314            if embedding.len() == runtime.dimension() {
2315                let score = cosine_similarity(&query_embedding, &embedding);
2316                semantic_scores.insert(hit.frame_id, score);
2317            }
2318        }
2319    }
2320
2321    if semantic_scores.is_empty() {
2322        return Ok(());
2323    }
2324
2325    // Sort by semantic score to get semantic ranks
2326    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
2327        .iter()
2328        .map(|(frame_id, score)| (*frame_id, *score))
2329        .collect();
2330    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
2331
2332    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
2333    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
2334        semantic_rank.insert(*frame_id, idx + 1);
2335    }
2336
2337    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
2338    let query_lower = response.query.to_lowercase();
2339    let is_preference_query = query_lower.contains("suggest")
2340        || query_lower.contains("recommend")
2341        || query_lower.contains("should i")
2342        || query_lower.contains("what should")
2343        || query_lower.contains("prefer")
2344        || query_lower.contains("favorite")
2345        || query_lower.contains("best for me");
2346
2347    // Pure RRF: Use ONLY ranks, NOT raw scores
2348    // This prevents a "confidently wrong" high-scoring vector from burying
2349    // a "precisely correct" keyword match
2350    const RRF_K: f32 = 60.0;
2351
2352    let mut ordering: Vec<(usize, f32, usize)> = response
2353        .hits
2354        .iter()
2355        .enumerate()
2356        .map(|(idx, hit)| {
2357            let lexical_rank = hit.rank;
2358
2359            // RRF score for lexical rank
2360            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
2361
2362            // RRF score for semantic rank
2363            let semantic_rrf = semantic_rank
2364                .get(&hit.frame_id)
2365                .map(|rank| 1.0 / (RRF_K + *rank as f32))
2366                .unwrap_or(0.0);
2367
2368            // Apply preference boost for hits containing user preference signals
2369            // This is a small bonus for content with first-person preference indicators
2370            let preference_boost = if is_preference_query {
2371                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
2372            } else {
2373                0.0
2374            };
2375
2376            // Pure RRF: Only rank-based scores, no raw similarity scores
2377            let combined = lexical_rrf + semantic_rrf + preference_boost;
2378            (idx, combined, lexical_rank)
2379        })
2380        .collect();
2381
2382    ordering.sort_by(|a, b| {
2383        b.1.partial_cmp(&a.1)
2384            .unwrap_or(Ordering::Equal)
2385            .then(a.2.cmp(&b.2))
2386    });
2387
2388    let mut reordered = Vec::with_capacity(response.hits.len());
2389    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
2390        let mut hit = response.hits[idx].clone();
2391        hit.rank = rank_idx + 1;
2392        reordered.push(hit);
2393    }
2394
2395    response.hits = reordered;
2396    Ok(())
2397}
2398
2399/// Rerank search results by boosting hits that contain user preference signals.
2400/// Only applies when the query appears to be seeking recommendations or preferences.
2401fn apply_preference_rerank(response: &mut SearchResponse) {
2402    if response.hits.is_empty() {
2403        return;
2404    }
2405
2406    // Check if query is preference-seeking
2407    let query_lower = response.query.to_lowercase();
2408    let is_preference_query = query_lower.contains("suggest")
2409        || query_lower.contains("recommend")
2410        || query_lower.contains("should i")
2411        || query_lower.contains("what should")
2412        || query_lower.contains("prefer")
2413        || query_lower.contains("favorite")
2414        || query_lower.contains("best for me");
2415
2416    if !is_preference_query {
2417        return;
2418    }
2419
2420    // Compute boost scores for each hit
2421    let mut scored: Vec<(usize, f32, f32)> = response
2422        .hits
2423        .iter()
2424        .enumerate()
2425        .map(|(idx, hit)| {
2426            let original_score = hit.score.unwrap_or(0.0);
2427            let preference_boost = compute_preference_boost(&hit.text);
2428            let boosted_score = original_score + preference_boost;
2429            (idx, boosted_score, original_score)
2430        })
2431        .collect();
2432
2433    // Sort by boosted score (descending)
2434    scored.sort_by(|a, b| {
2435        b.1.partial_cmp(&a.1)
2436            .unwrap_or(Ordering::Equal)
2437            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
2438    });
2439
2440    // Reorder hits
2441    let mut reordered = Vec::with_capacity(response.hits.len());
2442    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
2443        let mut hit = response.hits[idx].clone();
2444        hit.rank = rank_idx + 1;
2445        reordered.push(hit);
2446    }
2447
2448    response.hits = reordered;
2449}
2450
2451/// Compute a boost score for hits that contain user preference signals.
2452/// This helps surface context where users express their preferences,
2453/// habits, or personal information that's relevant to recommendation queries.
2454///
2455/// Key insight: We want to distinguish content where the user describes
2456/// their ESTABLISHED situation/preferences (high boost) from content where
2457/// the user is making a REQUEST (low boost). Both use first-person language,
2458/// but they serve different purposes for personalization.
2459fn compute_preference_boost(text: &str) -> f32 {
2460    let text_lower = text.to_lowercase();
2461    let mut boost = 0.0f32;
2462
2463    // Strong signals: Past/present user experiences and possessions
2464    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
2465    let established_context = [
2466        // Past tense - indicates actual experience
2467        "i've been",
2468        "i've had",
2469        "i've used",
2470        "i've tried",
2471        "i recently",
2472        "i just",
2473        "lately",
2474        "i started",
2475        "i bought",
2476        "i harvested",
2477        "i grew",
2478        // Current possessions/ownership (indicates established context)
2479        "my garden",
2480        "my home",
2481        "my house",
2482        "my setup",
2483        "my equipment",
2484        "my camera",
2485        "my car",
2486        "my phone",
2487        "i have a",
2488        "i own",
2489        "i got a",
2490        // Established habits/preferences
2491        "i prefer",
2492        "i like to",
2493        "i love to",
2494        "i enjoy",
2495        "i usually",
2496        "i always",
2497        "i typically",
2498        "my favorite",
2499        "i tend to",
2500        "i often",
2501        // Regular activities (indicates ongoing behavior)
2502        "i use",
2503        "i grow",
2504        "i cook",
2505        "i make",
2506        "i work on",
2507        "i'm into",
2508        "i collect",
2509    ];
2510    for pattern in established_context {
2511        if text_lower.contains(pattern) {
2512            boost += 0.15;
2513        }
2514    }
2515
2516    // Moderate signals: General first-person statements
2517    let first_person = [" i ", " my ", " me "];
2518    for pattern in first_person {
2519        if text_lower.contains(pattern) {
2520            boost += 0.02;
2521        }
2522    }
2523
2524    // Weak signals: Requests/intentions (not yet established preferences)
2525    // These indicate the user wants something, but don't describe established context
2526    let request_patterns = [
2527        "i'm trying to",
2528        "i want to",
2529        "i need to",
2530        "looking for",
2531        "can you suggest",
2532        "can you help",
2533    ];
2534    for pattern in request_patterns {
2535        if text_lower.contains(pattern) {
2536            boost += 0.02;
2537        }
2538    }
2539
2540    // Cap the boost to avoid over-weighting
2541    boost.min(0.5)
2542}
2543
2544fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
2545    let mut dot = 0.0f32;
2546    let mut sum_a = 0.0f32;
2547    let mut sum_b = 0.0f32;
2548    for (x, y) in a.iter().zip(b.iter()) {
2549        dot += x * y;
2550        sum_a += x * x;
2551        sum_b += y * y;
2552    }
2553
2554    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
2555        0.0
2556    } else {
2557        dot / (sum_a.sqrt() * sum_b.sqrt())
2558    }
2559}
2560
2561/// Apply cross-encoder reranking to search results.
2562///
2563/// Cross-encoders directly score query-document pairs and can understand
2564/// more nuanced relevance than bi-encoders (embeddings). This is especially
2565/// useful for personalization queries where semantic similarity != relevance.
2566///
2567/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
2568fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
2569    if response.hits.is_empty() || response.hits.len() < 2 {
2570        return Ok(());
2571    }
2572
2573    // Only rerank if we have enough candidates
2574    let candidates_to_rerank = response.hits.len().min(50);
2575
2576    // Initialize the reranker (model will be downloaded on first use, ~86MB)
2577    // Using JINA Turbo - faster than BGE while maintaining good accuracy
2578    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
2579        .with_show_download_progress(true);
2580
2581    let mut reranker = match TextRerank::try_new(options) {
2582        Ok(r) => r,
2583        Err(e) => {
2584            warn!("Failed to initialize cross-encoder reranker: {e}");
2585            return Ok(());
2586        }
2587    };
2588
2589    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
2590    let documents: Vec<String> = response.hits[..candidates_to_rerank]
2591        .iter()
2592        .map(|hit| hit.text.clone())
2593        .collect();
2594
2595    // Rerank using cross-encoder
2596    info!("Cross-encoder reranking {} candidates", documents.len());
2597    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
2598        Ok(results) => results,
2599        Err(e) => {
2600            warn!("Cross-encoder reranking failed: {e}");
2601            return Ok(());
2602        }
2603    };
2604
2605    // Reorder hits based on cross-encoder scores
2606    let mut reordered = Vec::with_capacity(response.hits.len());
2607    for (new_rank, result) in rerank_results.iter().enumerate() {
2608        let original_idx = result.index;
2609        let mut hit = response.hits[original_idx].clone();
2610        hit.rank = new_rank + 1;
2611        // Store cross-encoder score in the hit score for reference
2612        hit.score = Some(result.score);
2613        reordered.push(hit);
2614    }
2615
2616    // Add any remaining hits that weren't reranked (beyond top-50)
2617    for hit in response.hits.iter().skip(candidates_to_rerank) {
2618        let mut h = hit.clone();
2619        h.rank = reordered.len() + 1;
2620        reordered.push(h);
2621    }
2622
2623    response.hits = reordered;
2624    info!("Cross-encoder reranking complete");
2625    Ok(())
2626}
2627
2628/// Build a context string from memory cards stored in the MV2 file.
2629/// Groups facts by entity for better LLM comprehension.
2630fn build_memory_context(mem: &Memvid) -> String {
2631    let entities = mem.memory_entities();
2632    if entities.is_empty() {
2633        return String::new();
2634    }
2635
2636    let mut sections = Vec::new();
2637    for entity in entities {
2638        let cards = mem.get_entity_memories(&entity);
2639        if cards.is_empty() {
2640            continue;
2641        }
2642
2643        let mut entity_lines = Vec::new();
2644        for card in cards {
2645            // Format: "slot: value" with optional polarity indicator
2646            let polarity_marker = card
2647                .polarity
2648                .as_ref()
2649                .map(|p| match p.to_string().as_str() {
2650                    "Positive" => " (+)",
2651                    "Negative" => " (-)",
2652                    _ => "",
2653                })
2654                .unwrap_or("");
2655            entity_lines.push(format!(
2656                "  - {}: {}{}",
2657                card.slot, card.value, polarity_marker
2658            ));
2659        }
2660
2661        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
2662    }
2663
2664    sections.join("\n\n")
2665}
2666
2667/// Build a context string from entities found in search hits.
2668/// Groups entities by type for better LLM comprehension.
2669fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
2670    use std::collections::HashMap;
2671
2672    // Collect unique entities by kind
2673    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
2674
2675    for hit in hits {
2676        if let Some(metadata) = &hit.metadata {
2677            for entity in &metadata.entities {
2678                entities_by_kind
2679                    .entry(entity.kind.clone())
2680                    .or_default()
2681                    .push(entity.name.clone());
2682            }
2683        }
2684    }
2685
2686    if entities_by_kind.is_empty() {
2687        return String::new();
2688    }
2689
2690    // Deduplicate and format
2691    let mut sections = Vec::new();
2692    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
2693    sorted_kinds.sort();
2694
2695    for kind in sorted_kinds {
2696        let names = entities_by_kind.get(kind).unwrap();
2697        let mut unique_names: Vec<_> = names.iter().collect();
2698        unique_names.sort();
2699        unique_names.dedup();
2700
2701        let names_str = unique_names
2702            .iter()
2703            .take(10) // Limit to 10 entities per kind
2704            .map(|s| s.as_str())
2705            .collect::<Vec<_>>()
2706            .join(", ");
2707
2708        sections.push(format!("{}: {}", kind, names_str));
2709    }
2710
2711    sections.join("\n")
2712}