memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use blake3::hash;
15use clap::{ArgAction, Args, ValueEnum};
16#[cfg(feature = "temporal_track")]
17use memvid_core::{
18    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
19    TemporalResolution, TemporalResolutionValue,
20};
21use memvid_core::{
22    types::{AskContextFragment, AskContextFragmentKind, SearchHitMetadata},
23    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, SearchEngineKind, SearchHit,
24    SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
25};
26#[cfg(feature = "temporal_track")]
27use serde::Serialize;
28use serde_json::json;
29#[cfg(feature = "temporal_track")]
30use time::format_description::well_known::Rfc3339;
31use time::{Date, PrimitiveDateTime, Time};
32#[cfg(feature = "temporal_track")]
33use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
34use tracing::{info, warn};
35
36use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
37
38use memvid_ask_model::{
39    run_model_inference, ModelAnswer, ModelContextFragment, ModelContextFragmentKind,
40    ModelInference,
41};
42
43// frame_to_json and print_frame_summary available from commands but not used in this module
44use crate::config::{
45    load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
46    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingRuntime,
47};
48use crate::utils::{
49    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
50    parse_date_boundary, parse_vector, read_embedding,
51};
52
53const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
54#[cfg(feature = "temporal_track")]
55const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
56
57/// Arguments for the `timeline` subcommand
58#[derive(Args)]
59pub struct TimelineArgs {
60    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
61    pub file: PathBuf,
62    #[arg(long)]
63    pub json: bool,
64    #[arg(long)]
65    pub reverse: bool,
66    #[arg(long, value_name = "LIMIT")]
67    pub limit: Option<NonZeroU64>,
68    #[arg(long, value_name = "TIMESTAMP")]
69    pub since: Option<i64>,
70    #[arg(long, value_name = "TIMESTAMP")]
71    pub until: Option<i64>,
72    #[cfg(feature = "temporal_track")]
73    #[arg(long = "on", value_name = "PHRASE")]
74    pub phrase: Option<String>,
75    #[cfg(feature = "temporal_track")]
76    #[arg(long = "tz", value_name = "IANA_ZONE")]
77    pub tz: Option<String>,
78    #[cfg(feature = "temporal_track")]
79    #[arg(long = "anchor", value_name = "RFC3339")]
80    pub anchor: Option<String>,
81    #[cfg(feature = "temporal_track")]
82    #[arg(long = "window", value_name = "MINUTES")]
83    pub window: Option<u64>,
84    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
85    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
86    pub as_of_frame: Option<u64>,
87    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
88    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
89    pub as_of_ts: Option<i64>,
90}
91
92/// Arguments for the `when` subcommand
93#[cfg(feature = "temporal_track")]
94#[derive(Args)]
95pub struct WhenArgs {
96    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
97    pub file: PathBuf,
98    #[arg(long = "on", value_name = "PHRASE")]
99    pub phrase: String,
100    #[arg(long = "tz", value_name = "IANA_ZONE")]
101    pub tz: Option<String>,
102    #[arg(long = "anchor", value_name = "RFC3339")]
103    pub anchor: Option<String>,
104    #[arg(long = "window", value_name = "MINUTES")]
105    pub window: Option<u64>,
106    #[arg(long, value_name = "LIMIT")]
107    pub limit: Option<NonZeroU64>,
108    #[arg(long, value_name = "TIMESTAMP")]
109    pub since: Option<i64>,
110    #[arg(long, value_name = "TIMESTAMP")]
111    pub until: Option<i64>,
112    #[arg(long)]
113    pub reverse: bool,
114    #[arg(long)]
115    pub json: bool,
116}
117
118/// Arguments for the `ask` subcommand
119#[derive(Args)]
120pub struct AskArgs {
121    #[arg(value_name = "TARGET", num_args = 0..)]
122    pub targets: Vec<String>,
123    #[arg(long = "question", value_name = "TEXT")]
124    pub question: Option<String>,
125    #[arg(long = "uri", value_name = "URI")]
126    pub uri: Option<String>,
127    #[arg(long = "scope", value_name = "URI_PREFIX")]
128    pub scope: Option<String>,
129    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
130    pub top_k: usize,
131    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
132    pub snippet_chars: usize,
133    #[arg(long = "cursor", value_name = "TOKEN")]
134    pub cursor: Option<String>,
135    #[arg(long = "mode", value_enum, default_value = "hybrid")]
136    pub mode: AskModeArg,
137    #[arg(long)]
138    pub json: bool,
139    #[arg(long = "context-only", action = ArgAction::SetTrue)]
140    pub context_only: bool,
141    /// Show detailed source information for each citation
142    #[arg(long = "sources", action = ArgAction::SetTrue)]
143    pub sources: bool,
144    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
145    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
146    pub mask_pii: bool,
147    /// Include structured memory cards in the context (facts, preferences, etc.)
148    #[arg(long = "memories", action = ArgAction::SetTrue)]
149    pub memories: bool,
150    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
151    #[arg(long = "llm-context-depth", value_name = "CHARS")]
152    pub llm_context_depth: Option<usize>,
153    #[arg(long = "start", value_name = "DATE")]
154    pub start: Option<String>,
155    #[arg(long = "end", value_name = "DATE")]
156    pub end: Option<String>,
157    #[arg(
158        long = "use-model",
159        value_name = "MODEL",
160        num_args = 0..=1,
161        default_missing_value = "tinyllama"
162    )]
163    pub use_model: Option<String>,
164    /// Embedding model to use for query (must match the model used during ingestion)
165    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
166    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
167    pub query_embedding_model: Option<String>,
168    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
169    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
170    pub as_of_frame: Option<u64>,
171    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
172    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
173    pub as_of_ts: Option<i64>,
174    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
175    #[arg(long = "system-prompt", value_name = "TEXT")]
176    pub system_prompt: Option<String>,
177    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
178    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
179    pub no_rerank: bool,
180}
181
182/// Ask mode argument
183#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
184pub enum AskModeArg {
185    Lex,
186    Sem,
187    Hybrid,
188}
189
190impl From<AskModeArg> for AskMode {
191    fn from(value: AskModeArg) -> Self {
192        match value {
193            AskModeArg::Lex => AskMode::Lex,
194            AskModeArg::Sem => AskMode::Sem,
195            AskModeArg::Hybrid => AskMode::Hybrid,
196        }
197    }
198}
199
200/// Arguments for the `find` subcommand
201#[derive(Args)]
202pub struct FindArgs {
203    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
204    pub file: PathBuf,
205    #[arg(long = "query", value_name = "TEXT")]
206    pub query: String,
207    #[arg(long = "uri", value_name = "URI")]
208    pub uri: Option<String>,
209    #[arg(long = "scope", value_name = "URI_PREFIX")]
210    pub scope: Option<String>,
211    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
212    pub top_k: usize,
213    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
214    pub snippet_chars: usize,
215    #[arg(long = "cursor", value_name = "TOKEN")]
216    pub cursor: Option<String>,
217    #[arg(long)]
218    pub json: bool,
219    #[arg(long = "json-legacy", conflicts_with = "json")]
220    pub json_legacy: bool,
221    #[arg(long = "mode", value_enum, default_value = "auto")]
222    pub mode: SearchMode,
223    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
224    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
225    pub as_of_frame: Option<u64>,
226    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
227    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
228    pub as_of_ts: Option<i64>,
229    /// Embedding model to use for query (must match the model used during ingestion)
230    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
231    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
232    pub query_embedding_model: Option<String>,
233}
234
235/// Search mode argument
236#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
237pub enum SearchMode {
238    Auto,
239    Lex,
240    Sem,
241    /// CLIP visual search using text-to-image embeddings
242    #[cfg(feature = "clip")]
243    Clip,
244}
245
246/// Arguments for the `vec-search` subcommand
247#[derive(Args)]
248pub struct VecSearchArgs {
249    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
250    pub file: PathBuf,
251    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
252    pub vector: Option<String>,
253    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
254    pub embedding: Option<PathBuf>,
255    #[arg(long, value_name = "K", default_value = "10")]
256    pub limit: usize,
257    #[arg(long)]
258    pub json: bool,
259}
260
261/// Arguments for the `audit` subcommand
262#[derive(Args)]
263pub struct AuditArgs {
264    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
265    pub file: PathBuf,
266    /// The question or topic to audit
267    #[arg(value_name = "QUESTION")]
268    pub question: String,
269    /// Output file path (stdout if not provided)
270    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
271    pub out: Option<PathBuf>,
272    /// Output format
273    #[arg(long = "format", value_enum, default_value = "text")]
274    pub format: AuditFormat,
275    /// Number of sources to retrieve
276    #[arg(long = "top-k", value_name = "K", default_value = "10")]
277    pub top_k: usize,
278    /// Maximum characters per snippet
279    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
280    pub snippet_chars: usize,
281    /// Retrieval mode
282    #[arg(long = "mode", value_enum, default_value = "hybrid")]
283    pub mode: AskModeArg,
284    /// Optional scope filter (URI prefix)
285    #[arg(long = "scope", value_name = "URI_PREFIX")]
286    pub scope: Option<String>,
287    /// Start date filter
288    #[arg(long = "start", value_name = "DATE")]
289    pub start: Option<String>,
290    /// End date filter
291    #[arg(long = "end", value_name = "DATE")]
292    pub end: Option<String>,
293    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
294    #[arg(long = "use-model", value_name = "MODEL")]
295    pub use_model: Option<String>,
296}
297
298/// Audit output format
299#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
300pub enum AuditFormat {
301    /// Plain text report
302    Text,
303    /// Markdown report
304    Markdown,
305    /// JSON report
306    Json,
307}
308
309// ============================================================================
310// Search & Retrieval command handlers
311// ============================================================================
312
313pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
314    let mut mem = open_read_only_mem(&args.file)?;
315    let mut builder = TimelineQueryBuilder::default();
316    #[cfg(feature = "temporal_track")]
317    if args.phrase.is_none()
318        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
319    {
320        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
321    }
322    if let Some(limit) = args.limit {
323        builder = builder.limit(limit);
324    }
325    if let Some(since) = args.since {
326        builder = builder.since(since);
327    }
328    if let Some(until) = args.until {
329        builder = builder.until(until);
330    }
331    builder = builder.reverse(args.reverse);
332    #[cfg(feature = "temporal_track")]
333    let temporal_summary = if let Some(ref phrase) = args.phrase {
334        let (filter, summary) = build_temporal_filter(
335            phrase,
336            args.tz.as_deref(),
337            args.anchor.as_deref(),
338            args.window,
339        )?;
340        builder = builder.temporal(filter);
341        Some(summary)
342    } else {
343        None
344    };
345    let query = builder.build();
346    let mut entries = mem.timeline(query)?;
347
348    // Apply Replay filtering if requested
349    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
350        entries.retain(|entry| {
351            // Check as_of_frame filter
352            if let Some(cutoff_frame) = args.as_of_frame {
353                if entry.frame_id > cutoff_frame {
354                    return false;
355                }
356            }
357
358            // Check as_of_ts filter
359            if let Some(cutoff_ts) = args.as_of_ts {
360                if entry.timestamp > cutoff_ts {
361                    return false;
362                }
363            }
364
365            true
366        });
367    }
368
369    if args.json {
370        #[cfg(feature = "temporal_track")]
371        if let Some(summary) = temporal_summary.as_ref() {
372            println!(
373                "{}",
374                serde_json::to_string_pretty(&TimelineOutput {
375                    temporal: Some(summary_to_output(summary)),
376                    entries: &entries,
377                })?
378            );
379        } else {
380            println!("{}", serde_json::to_string_pretty(&entries)?);
381        }
382        #[cfg(not(feature = "temporal_track"))]
383        println!("{}", serde_json::to_string_pretty(&entries)?);
384    } else if entries.is_empty() {
385        println!("Timeline is empty");
386    } else {
387        #[cfg(feature = "temporal_track")]
388        if let Some(summary) = temporal_summary.as_ref() {
389            print_temporal_summary(summary);
390        }
391        for entry in entries {
392            println!(
393                "#{} @ {} — {}",
394                entry.frame_id,
395                entry.timestamp,
396                entry.preview.replace('\n', " ")
397            );
398            if let Some(uri) = entry.uri.as_deref() {
399                println!("  URI: {uri}");
400            }
401            if !entry.child_frames.is_empty() {
402                let child_list = entry
403                    .child_frames
404                    .iter()
405                    .map(|id| id.to_string())
406                    .collect::<Vec<_>>()
407                    .join(", ");
408                println!("  Child frames: {child_list}");
409            }
410            #[cfg(feature = "temporal_track")]
411            if let Some(temporal) = entry.temporal.as_ref() {
412                print_entry_temporal_details(temporal);
413            }
414        }
415    }
416    Ok(())
417}
418
419#[cfg(feature = "temporal_track")]
420pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
421    let mut mem = open_read_only_mem(&args.file)?;
422
423    let (filter, summary) = build_temporal_filter(
424        &args.phrase,
425        args.tz.as_deref(),
426        args.anchor.as_deref(),
427        args.window,
428    )?;
429
430    let mut builder = TimelineQueryBuilder::default();
431    if let Some(limit) = args.limit {
432        builder = builder.limit(limit);
433    }
434    if let Some(since) = args.since {
435        builder = builder.since(since);
436    }
437    if let Some(until) = args.until {
438        builder = builder.until(until);
439    }
440    builder = builder.reverse(args.reverse).temporal(filter.clone());
441    let entries = mem.timeline(builder.build())?;
442
443    if args.json {
444        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
445        let output = WhenOutput {
446            summary: summary_to_output(&summary),
447            entries: entry_views,
448        };
449        println!("{}", serde_json::to_string_pretty(&output)?);
450        return Ok(());
451    }
452
453    print_temporal_summary(&summary);
454    if entries.is_empty() {
455        println!("No frames matched the resolved window");
456        return Ok(());
457    }
458
459    for entry in &entries {
460        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
461        println!(
462            "#{} @ {} ({iso}) — {}",
463            entry.frame_id,
464            entry.timestamp,
465            entry.preview.replace('\n', " ")
466        );
467        if let Some(uri) = entry.uri.as_deref() {
468            println!("  URI: {uri}");
469        }
470        if !entry.child_frames.is_empty() {
471            let child_list = entry
472                .child_frames
473                .iter()
474                .map(|id| id.to_string())
475                .collect::<Vec<_>>()
476                .join(", ");
477            println!("  Child frames: {child_list}");
478        }
479        if let Some(temporal) = entry.temporal.as_ref() {
480            print_entry_temporal_details(temporal);
481        }
482    }
483
484    Ok(())
485}
486
487#[cfg(feature = "temporal_track")]
488#[derive(Serialize)]
489struct TimelineOutput<'a> {
490    #[serde(skip_serializing_if = "Option::is_none")]
491    temporal: Option<TemporalSummaryOutput>,
492    entries: &'a [TimelineEntry],
493}
494
495#[cfg(feature = "temporal_track")]
496#[derive(Serialize)]
497struct WhenOutput {
498    summary: TemporalSummaryOutput,
499    entries: Vec<WhenEntry>,
500}
501
502#[cfg(feature = "temporal_track")]
503#[derive(Serialize)]
504struct WhenEntry {
505    frame_id: FrameId,
506    timestamp: i64,
507    #[serde(skip_serializing_if = "Option::is_none")]
508    timestamp_iso: Option<String>,
509    preview: String,
510    #[serde(skip_serializing_if = "Option::is_none")]
511    uri: Option<String>,
512    #[serde(skip_serializing_if = "Vec::is_empty")]
513    child_frames: Vec<FrameId>,
514    #[serde(skip_serializing_if = "Option::is_none")]
515    temporal: Option<SearchHitTemporal>,
516}
517
518#[cfg(feature = "temporal_track")]
519#[derive(Serialize)]
520struct TemporalSummaryOutput {
521    phrase: String,
522    timezone: String,
523    anchor_utc: i64,
524    anchor_iso: String,
525    confidence: u16,
526    #[serde(skip_serializing_if = "Vec::is_empty")]
527    flags: Vec<&'static str>,
528    resolution_kind: &'static str,
529    window_start_utc: Option<i64>,
530    window_start_iso: Option<String>,
531    window_end_utc: Option<i64>,
532    window_end_iso: Option<String>,
533    #[serde(skip_serializing_if = "Option::is_none")]
534    window_minutes: Option<u64>,
535}
536
537#[cfg(feature = "temporal_track")]
538struct TemporalSummary {
539    phrase: String,
540    tz: String,
541    anchor: OffsetDateTime,
542    start_utc: Option<i64>,
543    end_utc: Option<i64>,
544    resolution: TemporalResolution,
545    window_minutes: Option<u64>,
546}
547
548#[cfg(feature = "temporal_track")]
549fn build_temporal_filter(
550    phrase: &str,
551    tz_override: Option<&str>,
552    anchor_override: Option<&str>,
553    window_minutes: Option<u64>,
554) -> Result<(TemporalFilter, TemporalSummary)> {
555    let tz = tz_override
556        .unwrap_or(DEFAULT_TEMPORAL_TZ)
557        .trim()
558        .to_string();
559    if tz.is_empty() {
560        bail!("E-TEMP-003 timezone must not be empty");
561    }
562
563    let anchor = if let Some(raw) = anchor_override {
564        OffsetDateTime::parse(raw, &Rfc3339)
565            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
566    } else {
567        OffsetDateTime::now_utc()
568    };
569
570    let context = TemporalContext::new(anchor, tz.clone());
571    let normalizer = TemporalNormalizer::new(context);
572    let resolution = normalizer
573        .resolve(phrase)
574        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
575
576    let (mut start, mut end) = resolution_bounds(&resolution)?;
577    if let Some(minutes) = window_minutes {
578        if minutes > 0 {
579            let delta = TimeDuration::minutes(minutes as i64);
580            if let (Some(s), Some(e)) = (start, end) {
581                if s == e {
582                    start = Some(s.saturating_sub(delta.whole_seconds()));
583                    end = Some(e.saturating_add(delta.whole_seconds()));
584                } else {
585                    start = Some(s.saturating_sub(delta.whole_seconds()));
586                    end = Some(e.saturating_add(delta.whole_seconds()));
587                }
588            }
589        }
590    }
591
592    let filter = TemporalFilter {
593        start_utc: start,
594        end_utc: end,
595        phrase: None,
596        tz: None,
597    };
598
599    let summary = TemporalSummary {
600        phrase: phrase.to_owned(),
601        tz,
602        anchor,
603        start_utc: start,
604        end_utc: end,
605        resolution,
606        window_minutes,
607    };
608
609    Ok((filter, summary))
610}
611
612#[cfg(feature = "temporal_track")]
613fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
614    TemporalSummaryOutput {
615        phrase: summary.phrase.clone(),
616        timezone: summary.tz.clone(),
617        anchor_utc: summary.anchor.unix_timestamp(),
618        anchor_iso: summary
619            .anchor
620            .format(&Rfc3339)
621            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
622        confidence: summary.resolution.confidence,
623        flags: summary
624            .resolution
625            .flags
626            .iter()
627            .map(|flag| flag.as_str())
628            .collect(),
629        resolution_kind: resolution_kind(&summary.resolution),
630        window_start_utc: summary.start_utc,
631        window_start_iso: summary.start_utc.and_then(format_timestamp),
632        window_end_utc: summary.end_utc,
633        window_end_iso: summary.end_utc.and_then(format_timestamp),
634        window_minutes: summary.window_minutes,
635    }
636}
637
638#[cfg(feature = "temporal_track")]
639fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
640    WhenEntry {
641        frame_id: entry.frame_id,
642        timestamp: entry.timestamp,
643        timestamp_iso: format_timestamp(entry.timestamp),
644        preview: entry.preview.clone(),
645        uri: entry.uri.clone(),
646        child_frames: entry.child_frames.clone(),
647        temporal: entry.temporal.clone(),
648    }
649}
650
651#[cfg(feature = "temporal_track")]
652fn print_temporal_summary(summary: &TemporalSummary) {
653    println!("Phrase: \"{}\"", summary.phrase);
654    println!("Timezone: {}", summary.tz);
655    println!(
656        "Anchor: {}",
657        summary
658            .anchor
659            .format(&Rfc3339)
660            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
661    );
662    let start_iso = summary.start_utc.and_then(format_timestamp);
663    let end_iso = summary.end_utc.and_then(format_timestamp);
664    match (start_iso, end_iso) {
665        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
666        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
667        (Some(start), None) => println!("Window start: {start}"),
668        (None, Some(end)) => println!("Window end: {end}"),
669        _ => println!("Window: (not resolved)"),
670    }
671    println!("Confidence: {}", summary.resolution.confidence);
672    let flags: Vec<&'static str> = summary
673        .resolution
674        .flags
675        .iter()
676        .map(|flag| flag.as_str())
677        .collect();
678    if !flags.is_empty() {
679        println!("Flags: {}", flags.join(", "));
680    }
681    if let Some(window) = summary.window_minutes {
682        if window > 0 {
683            println!("Window padding: {window} minute(s)");
684        }
685    }
686    println!();
687}
688
689#[cfg(feature = "temporal_track")]
690fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
691    if let Some(anchor) = temporal.anchor.as_ref() {
692        let iso = anchor
693            .iso_8601
694            .clone()
695            .or_else(|| format_timestamp(anchor.ts_utc));
696        println!(
697            "  Anchor: {} (source: {:?})",
698            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
699            anchor.source
700        );
701    }
702    if !temporal.mentions.is_empty() {
703        println!("  Mentions:");
704        for mention in &temporal.mentions {
705            let iso = mention
706                .iso_8601
707                .clone()
708                .or_else(|| format_timestamp(mention.ts_utc))
709                .unwrap_or_else(|| mention.ts_utc.to_string());
710            let mut details = format!(
711                "    - {} ({:?}, confidence {})",
712                iso, mention.kind, mention.confidence
713            );
714            if let Some(text) = mention.text.as_deref() {
715                details.push_str(&format!(" — \"{}\"", text));
716            }
717            println!("{details}");
718        }
719    }
720}
721
722#[cfg(feature = "temporal_track")]
723fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
724    match &resolution.value {
725        TemporalResolutionValue::Date(date) => {
726            let ts = date_to_timestamp(*date);
727            Ok((Some(ts), Some(ts)))
728        }
729        TemporalResolutionValue::DateTime(dt) => {
730            let ts = dt.unix_timestamp();
731            Ok((Some(ts), Some(ts)))
732        }
733        TemporalResolutionValue::DateRange { start, end } => Ok((
734            Some(date_to_timestamp(*start)),
735            Some(date_to_timestamp(*end)),
736        )),
737        TemporalResolutionValue::DateTimeRange { start, end } => {
738            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
739        }
740        TemporalResolutionValue::Month { year, month } => {
741            let start_date = Date::from_calendar_date(*year, *month, 1)
742                .map_err(|_| anyhow!("invalid month resolution"))?;
743            let end_date = last_day_in_month(*year, *month)
744                .map_err(|_| anyhow!("invalid month resolution"))?;
745            Ok((
746                Some(date_to_timestamp(start_date)),
747                Some(date_to_timestamp(end_date)),
748            ))
749        }
750    }
751}
752
753#[cfg(feature = "temporal_track")]
754fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
755    match resolution.value {
756        TemporalResolutionValue::Date(_) => "date",
757        TemporalResolutionValue::DateTime(_) => "datetime",
758        TemporalResolutionValue::DateRange { .. } => "date_range",
759        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
760        TemporalResolutionValue::Month { .. } => "month",
761    }
762}
763
764#[cfg(feature = "temporal_track")]
765fn date_to_timestamp(date: Date) -> i64 {
766    PrimitiveDateTime::new(date, Time::MIDNIGHT)
767        .assume_offset(UtcOffset::UTC)
768        .unix_timestamp()
769}
770
771#[cfg(feature = "temporal_track")]
772fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
773    let mut date = Date::from_calendar_date(year, month, 1)
774        .map_err(|_| anyhow!("invalid month resolution"))?;
775    while let Some(next) = date.next_day() {
776        if next.month() == month {
777            date = next;
778        } else {
779            break;
780        }
781    }
782    Ok(date)
783}
784
785#[cfg(feature = "temporal_track")]
786
787fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
788    if fragments.is_empty() {
789        return;
790    }
791
792    response.context_fragments = fragments
793        .into_iter()
794        .map(|fragment| AskContextFragment {
795            rank: fragment.rank,
796            frame_id: fragment.frame_id,
797            uri: fragment.uri,
798            title: fragment.title,
799            score: fragment.score,
800            matches: fragment.matches,
801            range: Some(fragment.range),
802            chunk_range: fragment.chunk_range,
803            text: fragment.text,
804            kind: Some(match fragment.kind {
805                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
806                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
807            }),
808            #[cfg(feature = "temporal_track")]
809            temporal: None,
810        })
811        .collect();
812}
813
814pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
815    if args.uri.is_some() && args.scope.is_some() {
816        warn!("--scope ignored because --uri is provided");
817    }
818
819    let mut question_tokens = Vec::new();
820    let mut file_path: Option<PathBuf> = None;
821    for token in &args.targets {
822        if file_path.is_none() && looks_like_memory(token) {
823            file_path = Some(PathBuf::from(token));
824        } else {
825            question_tokens.push(token.clone());
826        }
827    }
828
829    let positional_question = if question_tokens.is_empty() {
830        None
831    } else {
832        Some(question_tokens.join(" "))
833    };
834
835    let question = args
836        .question
837        .or(positional_question)
838        .map(|value| value.trim().to_string())
839        .filter(|value| !value.is_empty());
840
841    let question = question
842        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
843
844    let memory_path = match file_path {
845        Some(path) => path,
846        None => autodetect_memory_file()?,
847    };
848
849    let start = parse_date_boundary(args.start.as_ref(), false)?;
850    let end = parse_date_boundary(args.end.as_ref(), true)?;
851    if let (Some(start_ts), Some(end_ts)) = (start, end) {
852        if end_ts < start_ts {
853            anyhow::bail!("--end must not be earlier than --start");
854        }
855    }
856
857    // Open MV2 file first to get vector dimension for auto-detection
858    let mut mem = Memvid::open(&memory_path)?;
859
860    // Get the vector dimension from the MV2 file for auto-detection
861    let mv2_dimension = mem.vec_index_dimension();
862
863    let ask_mode: AskMode = args.mode.into();
864    let emb_model_override = args.query_embedding_model.as_deref();
865    let runtime = match args.mode {
866        AskModeArg::Lex => None,
867        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
868            config,
869            emb_model_override,
870            mv2_dimension,
871        )?),
872        AskModeArg::Hybrid => {
873            // For hybrid, use auto-detection from MV2 dimension
874            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
875                || {
876                    // Force a load; if it fails we error below.
877                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
878                        .ok()
879                        .map(|rt| {
880                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
881                            rt
882                        })
883                },
884            )
885        }
886    };
887    if runtime.is_none() && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
888        anyhow::bail!(
889            "semantic embeddings unavailable; install/cached model required for {:?} mode",
890            args.mode
891        );
892    }
893
894    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
895
896    let request = AskRequest {
897        question,
898        top_k: args.top_k,
899        snippet_chars: args.snippet_chars,
900        uri: args.uri.clone(),
901        scope: args.scope.clone(),
902        cursor: args.cursor.clone(),
903        start,
904        end,
905        #[cfg(feature = "temporal_track")]
906        temporal: None,
907        context_only: args.context_only,
908        mode: ask_mode,
909        as_of_frame: args.as_of_frame,
910        as_of_ts: args.as_of_ts,
911    };
912    let mut response = mem.ask(request, embedder)?;
913
914    // Apply cross-encoder reranking for better precision on preference/personalization queries
915    // This is especially important for questions like "What should I..." where semantic
916    // similarity doesn't capture personal relevance well.
917    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
918    if !args.no_rerank
919        && !response.retrieval.hits.is_empty()
920        && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
921    {
922        // Create a temporary SearchResponse for reranking
923        let mut search_response = SearchResponse {
924            query: response.question.clone(),
925            hits: response.retrieval.hits.clone(),
926            total_hits: response.retrieval.hits.len(),
927            params: memvid_core::SearchParams {
928                top_k: args.top_k,
929                snippet_chars: args.snippet_chars,
930                cursor: None,
931            },
932            elapsed_ms: 0,
933            engine: memvid_core::SearchEngineKind::Hybrid,
934            next_cursor: None,
935            context: String::new(),
936        };
937
938        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
939            warn!("Cross-encoder reranking failed: {e}");
940        } else {
941            // Update the response hits with reranked order
942            response.retrieval.hits = search_response.hits;
943            // Rebuild context from reranked hits
944            response.retrieval.context = response
945                .retrieval
946                .hits
947                .iter()
948                .take(10) // Use top-10 for context
949                .map(|hit| hit.text.as_str())
950                .collect::<Vec<_>>()
951                .join("\n\n---\n\n");
952        }
953    }
954
955    // Inject memory cards into context if --memories flag is set
956    if args.memories {
957        let memory_context = build_memory_context(&mem);
958        if !memory_context.is_empty() {
959            // Prepend memory context to retrieval context
960            response.retrieval.context = format!(
961                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
962                memory_context, response.retrieval.context
963            );
964        }
965    }
966
967    // Inject entity context from Logic-Mesh if entities were found in search hits
968    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
969    if !entity_context.is_empty() {
970        // Prepend entity context to retrieval context
971        response.retrieval.context = format!(
972            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
973            entity_context, response.retrieval.context
974        );
975    }
976
977    // Apply PII masking if requested
978    if args.mask_pii {
979        use memvid_core::pii::mask_pii;
980
981        // Mask the aggregated context
982        response.retrieval.context = mask_pii(&response.retrieval.context);
983
984        // Mask text in each hit
985        for hit in &mut response.retrieval.hits {
986            hit.text = mask_pii(&hit.text);
987            if let Some(chunk_text) = &hit.chunk_text {
988                hit.chunk_text = Some(mask_pii(chunk_text));
989            }
990        }
991    }
992
993    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
994
995    let mut model_result: Option<ModelAnswer> = None;
996    if response.context_only {
997        if args.use_model.is_some() {
998            warn!("--use-model ignored because --context-only disables synthesis");
999        }
1000    } else if let Some(model_name) = args.use_model.as_deref() {
1001        match run_model_inference(
1002            model_name,
1003            &response.question,
1004            &response.retrieval.context,
1005            &response.retrieval.hits,
1006            llm_context_override,
1007            None,
1008            args.system_prompt.as_deref(),
1009        ) {
1010            Ok(inference) => {
1011                let ModelInference {
1012                    answer,
1013                    context_body,
1014                    context_fragments,
1015                    ..
1016                } = inference;
1017                response.answer = Some(answer.answer.clone());
1018                response.retrieval.context = context_body;
1019                apply_model_context_fragments(&mut response, context_fragments);
1020                model_result = Some(answer);
1021            }
1022            Err(err) => {
1023                warn!(
1024                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1025                    model_name
1026                );
1027            }
1028        }
1029    }
1030
1031    if args.json {
1032        if let Some(model_name) = args.use_model.as_deref() {
1033            emit_model_json(
1034                &response,
1035                model_name,
1036                model_result.as_ref(),
1037                args.sources,
1038                &mut mem,
1039            )?;
1040        } else {
1041            emit_ask_json(
1042                &response,
1043                args.mode,
1044                model_result.as_ref(),
1045                args.sources,
1046                &mut mem,
1047            )?;
1048        }
1049    } else {
1050        emit_ask_pretty(
1051            &response,
1052            args.mode,
1053            model_result.as_ref(),
1054            args.sources,
1055            &mut mem,
1056        );
1057    }
1058
1059    Ok(())
1060}
1061
1062pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1063    let mut mem = open_read_only_mem(&args.file)?;
1064    if args.uri.is_some() && args.scope.is_some() {
1065        warn!("--scope ignored because --uri is provided");
1066    }
1067
1068    // Get vector dimension from MV2 for auto-detection
1069    let mv2_dimension = mem.vec_index_dimension();
1070    let emb_model_override = args.query_embedding_model.as_deref();
1071
1072    let (mode_label, runtime_option) = match args.mode {
1073        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1074        SearchMode::Sem => {
1075            let runtime =
1076                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1077            ("Semantic (vector search)".to_string(), Some(runtime))
1078        }
1079        SearchMode::Auto => {
1080            if let Some(runtime) =
1081                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1082            {
1083                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1084            } else {
1085                ("Lexical (semantic unavailable)".to_string(), None)
1086            }
1087        }
1088        #[cfg(feature = "clip")]
1089        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1090    };
1091
1092    let mode_key = match args.mode {
1093        SearchMode::Sem => "semantic",
1094        SearchMode::Lex => "text",
1095        SearchMode::Auto => {
1096            if runtime_option.is_some() {
1097                "hybrid"
1098            } else {
1099                "text"
1100            }
1101        }
1102        #[cfg(feature = "clip")]
1103        SearchMode::Clip => "clip",
1104    };
1105
1106    // For CLIP mode, use CLIP visual search
1107    #[cfg(feature = "clip")]
1108    if args.mode == SearchMode::Clip {
1109        use memvid_core::clip::{ClipConfig, ClipModel};
1110
1111        // Initialize CLIP model
1112        let config = ClipConfig::default();
1113        let clip = ClipModel::new(config).map_err(|e| {
1114            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1115        })?;
1116
1117        // Encode query text
1118        let query_embedding = clip
1119            .encode_text(&args.query)
1120            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1121
1122        // Search CLIP index
1123        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1124
1125        // Debug distances before filtering
1126        for hit in &hits {
1127            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1128                tracing::debug!(
1129                    frame_id = hit.frame_id,
1130                    title = %frame.title.unwrap_or_default(),
1131                    page = hit.page,
1132                    distance = hit.distance,
1133                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1134                    "CLIP raw hit"
1135                );
1136            } else {
1137                tracing::debug!(
1138                    frame_id = hit.frame_id,
1139                    page = hit.page,
1140                    distance = hit.distance,
1141                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1142                    "CLIP raw hit (missing frame)"
1143                );
1144            }
1145        }
1146
1147        // CLIP distance threshold for filtering poor matches
1148        // CLIP uses L2 distance on normalized embeddings:
1149        //   - distance² = 2(1 - cosine_similarity)
1150        //   - distance = 0 → identical (cosine_sim = 1)
1151        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1152        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1153        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1154        //   - distance = 2.0 → opposite (cosine_sim = -1)
1155        //
1156        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1157        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1158        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1159        const CLIP_MAX_DISTANCE: f32 = 1.26;
1160
1161        // Convert CLIP hits to SearchResponse format, filtering by threshold
1162        let search_hits: Vec<SearchHit> = hits
1163            .into_iter()
1164            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1165            .enumerate()
1166            .filter_map(|(rank, hit)| {
1167                // Convert L2 distance to cosine similarity for display
1168                // cos_sim = 1 - (distance² / 2)
1169                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1170
1171                // Get frame preview for snippet
1172                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1173                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1174                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1175                let title = match (base_title, hit.page) {
1176                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1177                    (Some(t), None) => Some(t),
1178                    (None, Some(p)) => Some(format!("Page {p}")),
1179                    _ => None,
1180                };
1181                Some(SearchHit {
1182                    rank: rank + 1,
1183                    frame_id: hit.frame_id,
1184                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1185                    title,
1186                    text: preview.clone(),
1187                    chunk_text: Some(preview),
1188                    range: (0, 0),
1189                    chunk_range: None,
1190                    matches: 0,
1191                    score: Some(cosine_similarity),
1192                    metadata: None,
1193                })
1194            })
1195            .collect();
1196
1197        let response = SearchResponse {
1198            query: args.query.clone(),
1199            hits: search_hits.clone(),
1200            total_hits: search_hits.len(),
1201            params: memvid_core::SearchParams {
1202                top_k: args.top_k,
1203                snippet_chars: args.snippet_chars,
1204                cursor: args.cursor.clone(),
1205            },
1206            elapsed_ms: 0,
1207            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1208            next_cursor: None,
1209            context: String::new(),
1210        };
1211
1212        if args.json_legacy {
1213            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1214            emit_legacy_search_json(&response)?;
1215        } else if args.json {
1216            emit_search_json(&response, mode_key)?;
1217        } else {
1218            println!(
1219                "mode: {}   k={}   time: {} ms",
1220                mode_label, response.params.top_k, response.elapsed_ms
1221            );
1222            println!("engine: clip (MobileCLIP-S2)");
1223            println!(
1224                "hits: {} (showing {})",
1225                response.total_hits,
1226                response.hits.len()
1227            );
1228            emit_search_table(&response);
1229        }
1230        return Ok(());
1231    }
1232
1233    // For semantic mode, use pure vector search via HNSW index
1234    let (response, engine_label) = if args.mode == SearchMode::Sem {
1235        let runtime = runtime_option
1236            .as_ref()
1237            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1238
1239        // Embed the query
1240        let query_embedding = runtime.embed(&args.query)?;
1241
1242        // Use pure vector search
1243        let scope = args.scope.as_deref().or(args.uri.as_deref());
1244        match mem.vec_search_with_embedding(
1245            &args.query,
1246            &query_embedding,
1247            args.top_k,
1248            args.snippet_chars,
1249            scope,
1250        ) {
1251            Ok(mut resp) => {
1252                // Apply preference boost to rerank results for preference-seeking queries
1253                apply_preference_rerank(&mut resp);
1254                (resp, "semantic (HNSW vector index)".to_string())
1255            }
1256            Err(e) => {
1257                // Fall back to lexical search + rerank if vector search fails
1258                warn!("Vector search failed ({e}), falling back to lexical + rerank");
1259                let request = SearchRequest {
1260                    query: args.query.clone(),
1261                    top_k: args.top_k,
1262                    snippet_chars: args.snippet_chars,
1263                    uri: args.uri.clone(),
1264                    scope: args.scope.clone(),
1265                    cursor: args.cursor.clone(),
1266                    #[cfg(feature = "temporal_track")]
1267                    temporal: None,
1268                    as_of_frame: args.as_of_frame,
1269                    as_of_ts: args.as_of_ts,
1270                };
1271                let mut resp = mem.search(request)?;
1272                apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1273                (resp, "semantic (fallback rerank)".to_string())
1274            }
1275        }
1276    } else {
1277        // For lexical and auto modes, use existing behavior
1278        let request = SearchRequest {
1279            query: args.query.clone(),
1280            top_k: args.top_k,
1281            snippet_chars: args.snippet_chars,
1282            uri: args.uri.clone(),
1283            scope: args.scope.clone(),
1284            cursor: args.cursor.clone(),
1285            #[cfg(feature = "temporal_track")]
1286            temporal: None,
1287            as_of_frame: args.as_of_frame,
1288            as_of_ts: args.as_of_ts,
1289        };
1290
1291        let mut resp = mem.search(request)?;
1292
1293        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1294            warn!("Search index unavailable; returning basic text results");
1295        }
1296
1297        let mut engine_label = match resp.engine {
1298            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1299            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1300            SearchEngineKind::Hybrid => "hybrid".to_string(),
1301        };
1302
1303        if runtime_option.is_some() {
1304            engine_label = format!("hybrid ({engine_label} + semantic)");
1305        }
1306
1307        if let Some(ref runtime) = runtime_option {
1308            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1309        }
1310
1311        (resp, engine_label)
1312    };
1313
1314    if args.json_legacy {
1315        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1316        emit_legacy_search_json(&response)?;
1317    } else if args.json {
1318        emit_search_json(&response, mode_key)?;
1319    } else {
1320        println!(
1321            "mode: {}   k={}   time: {} ms",
1322            mode_label, response.params.top_k, response.elapsed_ms
1323        );
1324        println!("engine: {}", engine_label);
1325        println!(
1326            "hits: {} (showing {})",
1327            response.total_hits,
1328            response.hits.len()
1329        );
1330        emit_search_table(&response);
1331    }
1332    Ok(())
1333}
1334
1335pub fn handle_vec_search(_config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1336    let mut mem = open_read_only_mem(&args.file)?;
1337    let vector = if let Some(path) = args.embedding.as_deref() {
1338        read_embedding(path)?
1339    } else if let Some(vector_string) = &args.vector {
1340        parse_vector(vector_string)?
1341    } else {
1342        anyhow::bail!("provide --vector or --embedding for search input");
1343    };
1344
1345    let hits = mem.search_vec(&vector, args.limit)?;
1346    let mut enriched = Vec::with_capacity(hits.len());
1347    for hit in hits {
1348        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1349        enriched.push((hit.frame_id, hit.distance, preview));
1350    }
1351
1352    if args.json {
1353        let json_hits: Vec<_> = enriched
1354            .iter()
1355            .map(|(frame_id, distance, preview)| {
1356                json!({
1357                    "frame_id": frame_id,
1358                    "distance": distance,
1359                    "preview": preview,
1360                })
1361            })
1362            .collect();
1363        println!("{}", serde_json::to_string_pretty(&json_hits)?);
1364    } else if enriched.is_empty() {
1365        println!("No vector matches found");
1366    } else {
1367        for (frame_id, distance, preview) in enriched {
1368            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1369        }
1370    }
1371    Ok(())
1372}
1373
1374pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1375    use memvid_core::AuditOptions;
1376    use std::fs::File;
1377    use std::io::Write;
1378
1379    let mut mem = Memvid::open(&args.file)?;
1380
1381    // Parse date boundaries
1382    let start = parse_date_boundary(args.start.as_ref(), false)?;
1383    let end = parse_date_boundary(args.end.as_ref(), true)?;
1384    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1385        if end_ts < start_ts {
1386            anyhow::bail!("--end must not be earlier than --start");
1387        }
1388    }
1389
1390    // Set up embedding runtime if needed
1391    let ask_mode: AskMode = args.mode.into();
1392    let runtime = match args.mode {
1393        AskModeArg::Lex => None,
1394        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1395        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1396    };
1397    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1398
1399    // Build audit options
1400    let options = AuditOptions {
1401        top_k: Some(args.top_k),
1402        snippet_chars: Some(args.snippet_chars),
1403        mode: Some(ask_mode),
1404        scope: args.scope,
1405        start,
1406        end,
1407        include_snippets: true,
1408    };
1409
1410    // Run the audit
1411    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1412
1413    // If --use-model is provided, run model inference to synthesize the answer
1414    if let Some(model_name) = args.use_model.as_deref() {
1415        // Build context from sources for model inference
1416        let context = report
1417            .sources
1418            .iter()
1419            .filter_map(|s| s.snippet.clone())
1420            .collect::<Vec<_>>()
1421            .join("\n\n");
1422
1423        match run_model_inference(
1424            model_name,
1425            &report.question,
1426            &context,
1427            &[], // No hits needed for audit
1428            None,
1429            None,
1430            None, // No system prompt override for audit
1431        ) {
1432            Ok(inference) => {
1433                report.answer = Some(inference.answer.answer);
1434                report.notes.push(format!(
1435                    "Answer synthesized by model: {}",
1436                    inference.answer.model
1437                ));
1438            }
1439            Err(err) => {
1440                warn!(
1441                    "model inference unavailable for '{}': {err}. Using default answer.",
1442                    model_name
1443                );
1444            }
1445        }
1446    }
1447
1448    // Format the output
1449    let output = match args.format {
1450        AuditFormat::Text => report.to_text(),
1451        AuditFormat::Markdown => report.to_markdown(),
1452        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1453    };
1454
1455    // Write output
1456    if let Some(out_path) = args.out {
1457        let mut file = File::create(&out_path)?;
1458        file.write_all(output.as_bytes())?;
1459        println!("Audit report written to: {}", out_path.display());
1460    } else {
1461        println!("{}", output);
1462    }
1463
1464    Ok(())
1465}
1466
1467fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1468    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1469
1470    let mut additional_params = serde_json::Map::new();
1471    if let Some(cursor) = &response.params.cursor {
1472        additional_params.insert("cursor".into(), json!(cursor));
1473    }
1474
1475    let mut params = serde_json::Map::new();
1476    params.insert("top_k".into(), json!(response.params.top_k));
1477    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
1478    params.insert("mode".into(), json!(mode));
1479    params.insert(
1480        "additional_params".into(),
1481        serde_json::Value::Object(additional_params),
1482    );
1483
1484    let mut metadata_json = serde_json::Map::new();
1485    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
1486    metadata_json.insert("total_hits".into(), json!(response.total_hits));
1487    metadata_json.insert(
1488        "next_cursor".into(),
1489        match &response.next_cursor {
1490            Some(cursor) => json!(cursor),
1491            None => serde_json::Value::Null,
1492        },
1493    );
1494    metadata_json.insert("engine".into(), json!(response.engine));
1495    metadata_json.insert("params".into(), serde_json::Value::Object(params));
1496
1497    let body = json!({
1498        "version": "mv2.result.v2",
1499        "query": response.query,
1500        "metadata": metadata_json,
1501        "hits": hits,
1502        "context": response.context,
1503    });
1504    println!("{}", serde_json::to_string_pretty(&body)?);
1505    Ok(())
1506}
1507
1508fn emit_ask_json(
1509    response: &AskResponse,
1510    requested_mode: AskModeArg,
1511    model: Option<&ModelAnswer>,
1512    include_sources: bool,
1513    mem: &mut Memvid,
1514) -> Result<()> {
1515    let hits: Vec<_> = response
1516        .retrieval
1517        .hits
1518        .iter()
1519        .map(search_hit_to_json)
1520        .collect();
1521
1522    let citations: Vec<_> = response
1523        .citations
1524        .iter()
1525        .map(|citation| {
1526            let mut map = serde_json::Map::new();
1527            map.insert("index".into(), json!(citation.index));
1528            map.insert("frame_id".into(), json!(citation.frame_id));
1529            map.insert("uri".into(), json!(citation.uri));
1530            if let Some(range) = citation.chunk_range {
1531                map.insert("chunk_range".into(), json!([range.0, range.1]));
1532            }
1533            if let Some(score) = citation.score {
1534                map.insert("score".into(), json!(score));
1535            }
1536            serde_json::Value::Object(map)
1537        })
1538        .collect();
1539
1540    let mut body = json!({
1541        "version": "mv2.ask.v1",
1542        "question": response.question,
1543        "answer": response.answer,
1544        "context_only": response.context_only,
1545        "mode": ask_mode_display(requested_mode),
1546        "retriever": ask_retriever_display(response.retriever),
1547        "top_k": response.retrieval.params.top_k,
1548        "results": hits,
1549        "citations": citations,
1550        "stats": {
1551            "retrieval_ms": response.stats.retrieval_ms,
1552            "synthesis_ms": response.stats.synthesis_ms,
1553            "latency_ms": response.stats.latency_ms,
1554        },
1555        "engine": search_engine_label(&response.retrieval.engine),
1556        "total_hits": response.retrieval.total_hits,
1557        "next_cursor": response.retrieval.next_cursor,
1558        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1559    });
1560
1561    if let Some(model) = model {
1562        if let serde_json::Value::Object(ref mut map) = body {
1563            map.insert("model".into(), json!(model.requested));
1564            if model.model != model.requested {
1565                map.insert("model_used".into(), json!(model.model));
1566            }
1567        }
1568    }
1569
1570    // Add detailed sources if requested
1571    if include_sources {
1572        if let serde_json::Value::Object(ref mut map) = body {
1573            let sources = build_sources_json(response, mem);
1574            map.insert("sources".into(), json!(sources));
1575        }
1576    }
1577
1578    println!("{}", serde_json::to_string_pretty(&body)?);
1579    Ok(())
1580}
1581
1582fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
1583    response
1584        .citations
1585        .iter()
1586        .enumerate()
1587        .map(|(idx, citation)| {
1588            let mut source = serde_json::Map::new();
1589            source.insert("index".into(), json!(idx + 1));
1590            source.insert("frame_id".into(), json!(citation.frame_id));
1591            source.insert("uri".into(), json!(citation.uri));
1592
1593            if let Some(range) = citation.chunk_range {
1594                source.insert("chunk_range".into(), json!([range.0, range.1]));
1595            }
1596            if let Some(score) = citation.score {
1597                source.insert("score".into(), json!(score));
1598            }
1599
1600            // Get frame metadata for rich source information
1601            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
1602                if let Some(title) = frame.title {
1603                    source.insert("title".into(), json!(title));
1604                }
1605                if !frame.tags.is_empty() {
1606                    source.insert("tags".into(), json!(frame.tags));
1607                }
1608                if !frame.labels.is_empty() {
1609                    source.insert("labels".into(), json!(frame.labels));
1610                }
1611                source.insert("frame_timestamp".into(), json!(frame.timestamp));
1612                if !frame.content_dates.is_empty() {
1613                    source.insert("content_dates".into(), json!(frame.content_dates));
1614                }
1615            }
1616
1617            // Get snippet from hit
1618            if let Some(hit) = response
1619                .retrieval
1620                .hits
1621                .iter()
1622                .find(|h| h.frame_id == citation.frame_id)
1623            {
1624                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
1625                source.insert("snippet".into(), json!(snippet));
1626            }
1627
1628            serde_json::Value::Object(source)
1629        })
1630        .collect()
1631}
1632
1633fn emit_model_json(
1634    response: &AskResponse,
1635    requested_model: &str,
1636    model: Option<&ModelAnswer>,
1637    include_sources: bool,
1638    mem: &mut Memvid,
1639) -> Result<()> {
1640    let answer = response.answer.clone().unwrap_or_default();
1641    let requested_label = model
1642        .map(|m| m.requested.clone())
1643        .unwrap_or_else(|| requested_model.to_string());
1644    let used_label = model
1645        .map(|m| m.model.clone())
1646        .unwrap_or_else(|| requested_model.to_string());
1647
1648    let mut body = json!({
1649        "question": response.question,
1650        "model": requested_label,
1651        "model_used": used_label,
1652        "answer": answer,
1653        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1654    });
1655
1656    // Add detailed sources if requested
1657    if include_sources {
1658        if let serde_json::Value::Object(ref mut map) = body {
1659            let sources = build_sources_json(response, mem);
1660            map.insert("sources".into(), json!(sources));
1661        }
1662    }
1663
1664    println!("{}", serde_json::to_string_pretty(&body)?);
1665    Ok(())
1666}
1667
1668fn emit_ask_pretty(
1669    response: &AskResponse,
1670    requested_mode: AskModeArg,
1671    model: Option<&ModelAnswer>,
1672    include_sources: bool,
1673    mem: &mut Memvid,
1674) {
1675    println!(
1676        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
1677        ask_mode_pretty(requested_mode),
1678        ask_retriever_pretty(response.retriever),
1679        response.retrieval.params.top_k,
1680        response.stats.latency_ms,
1681        response.stats.retrieval_ms
1682    );
1683    if let Some(model) = model {
1684        if model.requested.trim() == model.model {
1685            println!("model: {}", model.model);
1686        } else {
1687            println!(
1688                "model requested: {}   model used: {}",
1689                model.requested, model.model
1690            );
1691        }
1692    }
1693    println!(
1694        "engine: {}",
1695        search_engine_label(&response.retrieval.engine)
1696    );
1697    println!(
1698        "hits: {} (showing {})",
1699        response.retrieval.total_hits,
1700        response.retrieval.hits.len()
1701    );
1702
1703    if response.context_only {
1704        println!();
1705        println!("Context-only mode: synthesis disabled.");
1706        println!();
1707    } else if let Some(answer) = &response.answer {
1708        println!();
1709        println!("Answer:\n{answer}");
1710        println!();
1711    }
1712
1713    if !response.citations.is_empty() {
1714        println!("Citations:");
1715        for citation in &response.citations {
1716            match citation.score {
1717                Some(score) => println!(
1718                    "[{}] {} (frame {}, score {:.3})",
1719                    citation.index, citation.uri, citation.frame_id, score
1720                ),
1721                None => println!(
1722                    "[{}] {} (frame {})",
1723                    citation.index, citation.uri, citation.frame_id
1724                ),
1725            }
1726        }
1727        println!();
1728    }
1729
1730    // Print detailed sources if requested
1731    if include_sources && !response.citations.is_empty() {
1732        println!("=== SOURCES ===");
1733        println!();
1734        for citation in &response.citations {
1735            println!("[{}] {}", citation.index, citation.uri);
1736
1737            // Get frame metadata
1738            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
1739                if let Some(title) = &frame.title {
1740                    println!("    Title: {}", title);
1741                }
1742                println!("    Frame ID: {}", citation.frame_id);
1743                if let Some(score) = citation.score {
1744                    println!("    Score: {:.4}", score);
1745                }
1746                if let Some((start, end)) = citation.chunk_range {
1747                    println!("    Range: [{}..{})", start, end);
1748                }
1749                if !frame.tags.is_empty() {
1750                    println!("    Tags: {}", frame.tags.join(", "));
1751                }
1752                if !frame.labels.is_empty() {
1753                    println!("    Labels: {}", frame.labels.join(", "));
1754                }
1755                println!("    Timestamp: {}", frame.timestamp);
1756                if !frame.content_dates.is_empty() {
1757                    println!("    Content Dates: {}", frame.content_dates.join(", "));
1758                }
1759            }
1760
1761            // Get snippet from hit
1762            if let Some(hit) = response
1763                .retrieval
1764                .hits
1765                .iter()
1766                .find(|h| h.frame_id == citation.frame_id)
1767            {
1768                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
1769                let truncated = if snippet.len() > 200 {
1770                    format!("{}...", &snippet[..200])
1771                } else {
1772                    snippet.clone()
1773                };
1774                println!("    Snippet: {}", truncated.replace('\n', " "));
1775            }
1776            println!();
1777        }
1778    }
1779
1780    if !include_sources {
1781        println!();
1782        emit_search_table(&response.retrieval);
1783    }
1784}
1785
1786fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
1787    let hits: Vec<_> = response
1788        .hits
1789        .iter()
1790        .map(|hit| {
1791            json!({
1792                "frame_id": hit.frame_id,
1793                "matches": hit.matches,
1794                "snippets": [hit.text.clone()],
1795            })
1796        })
1797        .collect();
1798    println!("{}", serde_json::to_string_pretty(&hits)?);
1799    Ok(())
1800}
1801
1802fn emit_search_table(response: &SearchResponse) {
1803    if response.hits.is_empty() {
1804        println!("No results for '{}'.", response.query);
1805        return;
1806    }
1807    for hit in &response.hits {
1808        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
1809        if let Some(title) = &hit.title {
1810            println!("  Title: {title}");
1811        }
1812        if let Some(score) = hit.score {
1813            println!("  Score: {score:.3}");
1814        }
1815        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
1816        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
1817            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
1818        }
1819        if let Some(chunk_text) = &hit.chunk_text {
1820            println!("  Chunk Text: {}", chunk_text.trim());
1821        }
1822        if let Some(metadata) = &hit.metadata {
1823            if let Some(track) = &metadata.track {
1824                println!("  Track: {track}");
1825            }
1826            if !metadata.tags.is_empty() {
1827                println!("  Tags: {}", metadata.tags.join(", "));
1828            }
1829            if !metadata.labels.is_empty() {
1830                println!("  Labels: {}", metadata.labels.join(", "));
1831            }
1832            if let Some(created_at) = &metadata.created_at {
1833                println!("  Created: {created_at}");
1834            }
1835            if !metadata.content_dates.is_empty() {
1836                println!("  Content Dates: {}", metadata.content_dates.join(", "));
1837            }
1838            if !metadata.entities.is_empty() {
1839                let entity_strs: Vec<String> = metadata
1840                    .entities
1841                    .iter()
1842                    .map(|e| format!("{} ({})", e.name, e.kind))
1843                    .collect();
1844                println!("  Entities: {}", entity_strs.join(", "));
1845            }
1846        }
1847        println!("  Snippet: {}", hit.text.trim());
1848        println!();
1849    }
1850    if let Some(cursor) = &response.next_cursor {
1851        println!("Next cursor: {cursor}");
1852    }
1853}
1854
1855fn ask_mode_display(mode: AskModeArg) -> &'static str {
1856    match mode {
1857        AskModeArg::Lex => "lex",
1858        AskModeArg::Sem => "sem",
1859        AskModeArg::Hybrid => "hybrid",
1860    }
1861}
1862
1863fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
1864    match mode {
1865        AskModeArg::Lex => "Lexical",
1866        AskModeArg::Sem => "Semantic",
1867        AskModeArg::Hybrid => "Hybrid",
1868    }
1869}
1870
1871fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
1872    match retriever {
1873        AskRetriever::Lex => "lex",
1874        AskRetriever::Semantic => "semantic",
1875        AskRetriever::Hybrid => "hybrid",
1876        AskRetriever::LexFallback => "lex_fallback",
1877        AskRetriever::TimelineFallback => "timeline_fallback",
1878    }
1879}
1880
1881fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
1882    match retriever {
1883        AskRetriever::Lex => "Lexical",
1884        AskRetriever::Semantic => "Semantic",
1885        AskRetriever::Hybrid => "Hybrid",
1886        AskRetriever::LexFallback => "Lexical (fallback)",
1887        AskRetriever::TimelineFallback => "Timeline (fallback)",
1888    }
1889}
1890
1891fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
1892    match engine {
1893        SearchEngineKind::Tantivy => "text (tantivy)",
1894        SearchEngineKind::LexFallback => "text (fallback)",
1895        SearchEngineKind::Hybrid => "hybrid",
1896    }
1897}
1898
1899fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
1900    let digest = hash(uri.as_bytes()).to_hex().to_string();
1901    let prefix_len = digest.len().min(12);
1902    let prefix = &digest[..prefix_len];
1903    format!("mv2-hit-{prefix}-{frame_id}-{start}")
1904}
1905
1906fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
1907    if text.chars().count() <= limit {
1908        return text.to_string();
1909    }
1910
1911    let truncated: String = text.chars().take(limit).collect();
1912    format!("{truncated}...")
1913}
1914
1915fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
1916    let mut hit_json = serde_json::Map::new();
1917    hit_json.insert("rank".into(), json!(hit.rank));
1918    if let Some(score) = hit.score {
1919        hit_json.insert("score".into(), json!(score));
1920    }
1921    hit_json.insert(
1922        "id".into(),
1923        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
1924    );
1925    hit_json.insert("frame_id".into(), json!(hit.frame_id));
1926    hit_json.insert("uri".into(), json!(hit.uri));
1927    if let Some(title) = &hit.title {
1928        hit_json.insert("title".into(), json!(title));
1929    }
1930    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
1931    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
1932    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
1933    hit_json.insert("text".into(), json!(hit.text));
1934
1935    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
1936        matches: hit.matches,
1937        ..SearchHitMetadata::default()
1938    });
1939    let mut meta_json = serde_json::Map::new();
1940    meta_json.insert("matches".into(), json!(metadata.matches));
1941    if !metadata.tags.is_empty() {
1942        meta_json.insert("tags".into(), json!(metadata.tags));
1943    }
1944    if !metadata.labels.is_empty() {
1945        meta_json.insert("labels".into(), json!(metadata.labels));
1946    }
1947    if let Some(track) = metadata.track {
1948        meta_json.insert("track".into(), json!(track));
1949    }
1950    if let Some(created_at) = metadata.created_at {
1951        meta_json.insert("created_at".into(), json!(created_at));
1952    }
1953    if !metadata.content_dates.is_empty() {
1954        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
1955    }
1956    if !metadata.entities.is_empty() {
1957        let entities_json: Vec<serde_json::Value> = metadata
1958            .entities
1959            .iter()
1960            .map(|e| {
1961                let mut ent = serde_json::Map::new();
1962                ent.insert("name".into(), json!(e.name));
1963                ent.insert("kind".into(), json!(e.kind));
1964                if let Some(conf) = e.confidence {
1965                    ent.insert("confidence".into(), json!(conf));
1966                }
1967                serde_json::Value::Object(ent)
1968            })
1969            .collect();
1970        meta_json.insert("entities".into(), json!(entities_json));
1971    }
1972    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
1973    serde_json::Value::Object(hit_json)
1974}
1975/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
1976///
1977/// RRF is mathematically superior to raw score combination because:
1978/// - BM25 scores are unbounded (0 to infinity)
1979/// - Cosine similarity is bounded (-1 to 1)
1980/// - RRF normalizes by using only RANKS, not raw scores
1981///
1982/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
1983fn apply_semantic_rerank(
1984    runtime: &EmbeddingRuntime,
1985    mem: &mut Memvid,
1986    response: &mut SearchResponse,
1987) -> Result<()> {
1988    if response.hits.is_empty() {
1989        return Ok(());
1990    }
1991
1992    let query_embedding = runtime.embed(&response.query)?;
1993    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
1994    for hit in &response.hits {
1995        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
1996            if embedding.len() == runtime.dimension() {
1997                let score = cosine_similarity(&query_embedding, &embedding);
1998                semantic_scores.insert(hit.frame_id, score);
1999            }
2000        }
2001    }
2002
2003    if semantic_scores.is_empty() {
2004        return Ok(());
2005    }
2006
2007    // Sort by semantic score to get semantic ranks
2008    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
2009        .iter()
2010        .map(|(frame_id, score)| (*frame_id, *score))
2011        .collect();
2012    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
2013
2014    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
2015    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
2016        semantic_rank.insert(*frame_id, idx + 1);
2017    }
2018
2019    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
2020    let query_lower = response.query.to_lowercase();
2021    let is_preference_query = query_lower.contains("suggest")
2022        || query_lower.contains("recommend")
2023        || query_lower.contains("should i")
2024        || query_lower.contains("what should")
2025        || query_lower.contains("prefer")
2026        || query_lower.contains("favorite")
2027        || query_lower.contains("best for me");
2028
2029    // Pure RRF: Use ONLY ranks, NOT raw scores
2030    // This prevents a "confidently wrong" high-scoring vector from burying
2031    // a "precisely correct" keyword match
2032    const RRF_K: f32 = 60.0;
2033
2034    let mut ordering: Vec<(usize, f32, usize)> = response
2035        .hits
2036        .iter()
2037        .enumerate()
2038        .map(|(idx, hit)| {
2039            let lexical_rank = hit.rank;
2040
2041            // RRF score for lexical rank
2042            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
2043
2044            // RRF score for semantic rank
2045            let semantic_rrf = semantic_rank
2046                .get(&hit.frame_id)
2047                .map(|rank| 1.0 / (RRF_K + *rank as f32))
2048                .unwrap_or(0.0);
2049
2050            // Apply preference boost for hits containing user preference signals
2051            // This is a small bonus for content with first-person preference indicators
2052            let preference_boost = if is_preference_query {
2053                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
2054            } else {
2055                0.0
2056            };
2057
2058            // Pure RRF: Only rank-based scores, no raw similarity scores
2059            let combined = lexical_rrf + semantic_rrf + preference_boost;
2060            (idx, combined, lexical_rank)
2061        })
2062        .collect();
2063
2064    ordering.sort_by(|a, b| {
2065        b.1.partial_cmp(&a.1)
2066            .unwrap_or(Ordering::Equal)
2067            .then(a.2.cmp(&b.2))
2068    });
2069
2070    let mut reordered = Vec::with_capacity(response.hits.len());
2071    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
2072        let mut hit = response.hits[idx].clone();
2073        hit.rank = rank_idx + 1;
2074        reordered.push(hit);
2075    }
2076
2077    response.hits = reordered;
2078    Ok(())
2079}
2080
2081/// Rerank search results by boosting hits that contain user preference signals.
2082/// Only applies when the query appears to be seeking recommendations or preferences.
2083fn apply_preference_rerank(response: &mut SearchResponse) {
2084    if response.hits.is_empty() {
2085        return;
2086    }
2087
2088    // Check if query is preference-seeking
2089    let query_lower = response.query.to_lowercase();
2090    let is_preference_query = query_lower.contains("suggest")
2091        || query_lower.contains("recommend")
2092        || query_lower.contains("should i")
2093        || query_lower.contains("what should")
2094        || query_lower.contains("prefer")
2095        || query_lower.contains("favorite")
2096        || query_lower.contains("best for me");
2097
2098    if !is_preference_query {
2099        return;
2100    }
2101
2102    // Compute boost scores for each hit
2103    let mut scored: Vec<(usize, f32, f32)> = response
2104        .hits
2105        .iter()
2106        .enumerate()
2107        .map(|(idx, hit)| {
2108            let original_score = hit.score.unwrap_or(0.0);
2109            let preference_boost = compute_preference_boost(&hit.text);
2110            let boosted_score = original_score + preference_boost;
2111            (idx, boosted_score, original_score)
2112        })
2113        .collect();
2114
2115    // Sort by boosted score (descending)
2116    scored.sort_by(|a, b| {
2117        b.1.partial_cmp(&a.1)
2118            .unwrap_or(Ordering::Equal)
2119            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
2120    });
2121
2122    // Reorder hits
2123    let mut reordered = Vec::with_capacity(response.hits.len());
2124    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
2125        let mut hit = response.hits[idx].clone();
2126        hit.rank = rank_idx + 1;
2127        reordered.push(hit);
2128    }
2129
2130    response.hits = reordered;
2131}
2132
2133/// Compute a boost score for hits that contain user preference signals.
2134/// This helps surface context where users express their preferences,
2135/// habits, or personal information that's relevant to recommendation queries.
2136///
2137/// Key insight: We want to distinguish content where the user describes
2138/// their ESTABLISHED situation/preferences (high boost) from content where
2139/// the user is making a REQUEST (low boost). Both use first-person language,
2140/// but they serve different purposes for personalization.
2141fn compute_preference_boost(text: &str) -> f32 {
2142    let text_lower = text.to_lowercase();
2143    let mut boost = 0.0f32;
2144
2145    // Strong signals: Past/present user experiences and possessions
2146    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
2147    let established_context = [
2148        // Past tense - indicates actual experience
2149        "i've been",
2150        "i've had",
2151        "i've used",
2152        "i've tried",
2153        "i recently",
2154        "i just",
2155        "lately",
2156        "i started",
2157        "i bought",
2158        "i harvested",
2159        "i grew",
2160        // Current possessions/ownership (indicates established context)
2161        "my garden",
2162        "my home",
2163        "my house",
2164        "my setup",
2165        "my equipment",
2166        "my camera",
2167        "my car",
2168        "my phone",
2169        "i have a",
2170        "i own",
2171        "i got a",
2172        // Established habits/preferences
2173        "i prefer",
2174        "i like to",
2175        "i love to",
2176        "i enjoy",
2177        "i usually",
2178        "i always",
2179        "i typically",
2180        "my favorite",
2181        "i tend to",
2182        "i often",
2183        // Regular activities (indicates ongoing behavior)
2184        "i use",
2185        "i grow",
2186        "i cook",
2187        "i make",
2188        "i work on",
2189        "i'm into",
2190        "i collect",
2191    ];
2192    for pattern in established_context {
2193        if text_lower.contains(pattern) {
2194            boost += 0.15;
2195        }
2196    }
2197
2198    // Moderate signals: General first-person statements
2199    let first_person = [" i ", " my ", " me "];
2200    for pattern in first_person {
2201        if text_lower.contains(pattern) {
2202            boost += 0.02;
2203        }
2204    }
2205
2206    // Weak signals: Requests/intentions (not yet established preferences)
2207    // These indicate the user wants something, but don't describe established context
2208    let request_patterns = [
2209        "i'm trying to",
2210        "i want to",
2211        "i need to",
2212        "looking for",
2213        "can you suggest",
2214        "can you help",
2215    ];
2216    for pattern in request_patterns {
2217        if text_lower.contains(pattern) {
2218            boost += 0.02;
2219        }
2220    }
2221
2222    // Cap the boost to avoid over-weighting
2223    boost.min(0.5)
2224}
2225
2226fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
2227    let mut dot = 0.0f32;
2228    let mut sum_a = 0.0f32;
2229    let mut sum_b = 0.0f32;
2230    for (x, y) in a.iter().zip(b.iter()) {
2231        dot += x * y;
2232        sum_a += x * x;
2233        sum_b += y * y;
2234    }
2235
2236    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
2237        0.0
2238    } else {
2239        dot / (sum_a.sqrt() * sum_b.sqrt())
2240    }
2241}
2242
2243/// Apply cross-encoder reranking to search results.
2244///
2245/// Cross-encoders directly score query-document pairs and can understand
2246/// more nuanced relevance than bi-encoders (embeddings). This is especially
2247/// useful for personalization queries where semantic similarity != relevance.
2248///
2249/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
2250fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
2251    if response.hits.is_empty() || response.hits.len() < 2 {
2252        return Ok(());
2253    }
2254
2255    // Only rerank if we have enough candidates
2256    let candidates_to_rerank = response.hits.len().min(50);
2257
2258    // Initialize the reranker (model will be downloaded on first use, ~86MB)
2259    // Using JINA Turbo - faster than BGE while maintaining good accuracy
2260    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
2261        .with_show_download_progress(true);
2262
2263    let mut reranker = match TextRerank::try_new(options) {
2264        Ok(r) => r,
2265        Err(e) => {
2266            warn!("Failed to initialize cross-encoder reranker: {e}");
2267            return Ok(());
2268        }
2269    };
2270
2271    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
2272    let documents: Vec<String> = response.hits[..candidates_to_rerank]
2273        .iter()
2274        .map(|hit| hit.text.clone())
2275        .collect();
2276
2277    // Rerank using cross-encoder
2278    info!("Cross-encoder reranking {} candidates", documents.len());
2279    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
2280        Ok(results) => results,
2281        Err(e) => {
2282            warn!("Cross-encoder reranking failed: {e}");
2283            return Ok(());
2284        }
2285    };
2286
2287    // Reorder hits based on cross-encoder scores
2288    let mut reordered = Vec::with_capacity(response.hits.len());
2289    for (new_rank, result) in rerank_results.iter().enumerate() {
2290        let original_idx = result.index;
2291        let mut hit = response.hits[original_idx].clone();
2292        hit.rank = new_rank + 1;
2293        // Store cross-encoder score in the hit score for reference
2294        hit.score = Some(result.score);
2295        reordered.push(hit);
2296    }
2297
2298    // Add any remaining hits that weren't reranked (beyond top-50)
2299    for hit in response.hits.iter().skip(candidates_to_rerank) {
2300        let mut h = hit.clone();
2301        h.rank = reordered.len() + 1;
2302        reordered.push(h);
2303    }
2304
2305    response.hits = reordered;
2306    info!("Cross-encoder reranking complete");
2307    Ok(())
2308}
2309
2310/// Build a context string from memory cards stored in the MV2 file.
2311/// Groups facts by entity for better LLM comprehension.
2312fn build_memory_context(mem: &Memvid) -> String {
2313    let entities = mem.memory_entities();
2314    if entities.is_empty() {
2315        return String::new();
2316    }
2317
2318    let mut sections = Vec::new();
2319    for entity in entities {
2320        let cards = mem.get_entity_memories(&entity);
2321        if cards.is_empty() {
2322            continue;
2323        }
2324
2325        let mut entity_lines = Vec::new();
2326        for card in cards {
2327            // Format: "slot: value" with optional polarity indicator
2328            let polarity_marker = card
2329                .polarity
2330                .as_ref()
2331                .map(|p| match p.to_string().as_str() {
2332                    "Positive" => " (+)",
2333                    "Negative" => " (-)",
2334                    _ => "",
2335                })
2336                .unwrap_or("");
2337            entity_lines.push(format!(
2338                "  - {}: {}{}",
2339                card.slot, card.value, polarity_marker
2340            ));
2341        }
2342
2343        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
2344    }
2345
2346    sections.join("\n\n")
2347}
2348
2349/// Build a context string from entities found in search hits.
2350/// Groups entities by type for better LLM comprehension.
2351fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
2352    use std::collections::HashMap;
2353
2354    // Collect unique entities by kind
2355    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
2356
2357    for hit in hits {
2358        if let Some(metadata) = &hit.metadata {
2359            for entity in &metadata.entities {
2360                entities_by_kind
2361                    .entry(entity.kind.clone())
2362                    .or_default()
2363                    .push(entity.name.clone());
2364            }
2365        }
2366    }
2367
2368    if entities_by_kind.is_empty() {
2369        return String::new();
2370    }
2371
2372    // Deduplicate and format
2373    let mut sections = Vec::new();
2374    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
2375    sorted_kinds.sort();
2376
2377    for kind in sorted_kinds {
2378        let names = entities_by_kind.get(kind).unwrap();
2379        let mut unique_names: Vec<_> = names.iter().collect();
2380        unique_names.sort();
2381        unique_names.dedup();
2382
2383        let names_str = unique_names
2384            .iter()
2385            .take(10) // Limit to 10 entities per kind
2386            .map(|s| s.as_str())
2387            .collect::<Vec<_>>()
2388            .join(", ");
2389
2390        sections.push(format!("{}: {}", kind, names_str));
2391    }
2392
2393    sections.join("\n")
2394}