memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use blake3::hash;
15use clap::{ArgAction, Args, ValueEnum};
16#[cfg(feature = "temporal_track")]
17use memvid_core::{
18    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
19    TemporalResolution, TemporalResolutionValue,
20};
21use memvid_core::{
22    types::{AskContextFragment, AskContextFragmentKind, SearchHitMetadata},
23    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, SearchEngineKind, SearchHit,
24    SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
25};
26#[cfg(feature = "temporal_track")]
27use serde::Serialize;
28use serde_json::json;
29#[cfg(feature = "temporal_track")]
30use time::format_description::well_known::Rfc3339;
31use time::{Date, PrimitiveDateTime, Time};
32#[cfg(feature = "temporal_track")]
33use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
34use tracing::{info, warn};
35
36use fastembed::{RerankerModel, RerankInitOptions, TextRerank};
37
38use memvid_ask_model::{
39    run_model_inference, ModelAnswer, ModelContextFragment, ModelContextFragmentKind,
40    ModelInference,
41};
42
43// frame_to_json and print_frame_summary available from commands but not used in this module
44use crate::config::{
45    load_embedding_runtime, load_embedding_runtime_for_mv2,
46    resolve_llm_context_budget_override, try_load_embedding_runtime,
47    try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingRuntime,
48};
49use crate::utils::{
50    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
51    parse_date_boundary, parse_vector, read_embedding,
52};
53
54const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
55#[cfg(feature = "temporal_track")]
56const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
57
58/// Arguments for the `timeline` subcommand
59#[derive(Args)]
60pub struct TimelineArgs {
61    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
62    pub file: PathBuf,
63    #[arg(long)]
64    pub json: bool,
65    #[arg(long)]
66    pub reverse: bool,
67    #[arg(long, value_name = "LIMIT")]
68    pub limit: Option<NonZeroU64>,
69    #[arg(long, value_name = "TIMESTAMP")]
70    pub since: Option<i64>,
71    #[arg(long, value_name = "TIMESTAMP")]
72    pub until: Option<i64>,
73    #[cfg(feature = "temporal_track")]
74    #[arg(long = "on", value_name = "PHRASE")]
75    pub phrase: Option<String>,
76    #[cfg(feature = "temporal_track")]
77    #[arg(long = "tz", value_name = "IANA_ZONE")]
78    pub tz: Option<String>,
79    #[cfg(feature = "temporal_track")]
80    #[arg(long = "anchor", value_name = "RFC3339")]
81    pub anchor: Option<String>,
82    #[cfg(feature = "temporal_track")]
83    #[arg(long = "window", value_name = "MINUTES")]
84    pub window: Option<u64>,
85    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
86    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
87    pub as_of_frame: Option<u64>,
88    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
89    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
90    pub as_of_ts: Option<i64>,
91}
92
93/// Arguments for the `when` subcommand
94#[cfg(feature = "temporal_track")]
95#[derive(Args)]
96pub struct WhenArgs {
97    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
98    pub file: PathBuf,
99    #[arg(long = "on", value_name = "PHRASE")]
100    pub phrase: String,
101    #[arg(long = "tz", value_name = "IANA_ZONE")]
102    pub tz: Option<String>,
103    #[arg(long = "anchor", value_name = "RFC3339")]
104    pub anchor: Option<String>,
105    #[arg(long = "window", value_name = "MINUTES")]
106    pub window: Option<u64>,
107    #[arg(long, value_name = "LIMIT")]
108    pub limit: Option<NonZeroU64>,
109    #[arg(long, value_name = "TIMESTAMP")]
110    pub since: Option<i64>,
111    #[arg(long, value_name = "TIMESTAMP")]
112    pub until: Option<i64>,
113    #[arg(long)]
114    pub reverse: bool,
115    #[arg(long)]
116    pub json: bool,
117}
118
119/// Arguments for the `ask` subcommand
120#[derive(Args)]
121pub struct AskArgs {
122    #[arg(value_name = "TARGET", num_args = 0..)]
123    pub targets: Vec<String>,
124    #[arg(long = "question", value_name = "TEXT")]
125    pub question: Option<String>,
126    #[arg(long = "uri", value_name = "URI")]
127    pub uri: Option<String>,
128    #[arg(long = "scope", value_name = "URI_PREFIX")]
129    pub scope: Option<String>,
130    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
131    pub top_k: usize,
132    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
133    pub snippet_chars: usize,
134    #[arg(long = "cursor", value_name = "TOKEN")]
135    pub cursor: Option<String>,
136    #[arg(long = "mode", value_enum, default_value = "hybrid")]
137    pub mode: AskModeArg,
138    #[arg(long)]
139    pub json: bool,
140    #[arg(long = "context-only", action = ArgAction::SetTrue)]
141    pub context_only: bool,
142    /// Show detailed source information for each citation
143    #[arg(long = "sources", action = ArgAction::SetTrue)]
144    pub sources: bool,
145    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
146    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
147    pub mask_pii: bool,
148    /// Include structured memory cards in the context (facts, preferences, etc.)
149    #[arg(long = "memories", action = ArgAction::SetTrue)]
150    pub memories: bool,
151    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
152    #[arg(long = "llm-context-depth", value_name = "CHARS")]
153    pub llm_context_depth: Option<usize>,
154    #[arg(long = "start", value_name = "DATE")]
155    pub start: Option<String>,
156    #[arg(long = "end", value_name = "DATE")]
157    pub end: Option<String>,
158    #[arg(
159        long = "use-model",
160        value_name = "MODEL",
161        num_args = 0..=1,
162        default_missing_value = "tinyllama"
163    )]
164    pub use_model: Option<String>,
165    /// Embedding model to use for query (must match the model used during ingestion)
166    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
167    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
168    pub query_embedding_model: Option<String>,
169    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
170    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
171    pub as_of_frame: Option<u64>,
172    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
173    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
174    pub as_of_ts: Option<i64>,
175    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
176    #[arg(long = "system-prompt", value_name = "TEXT")]
177    pub system_prompt: Option<String>,
178    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
179    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
180    pub no_rerank: bool,
181}
182
183/// Ask mode argument
184#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
185pub enum AskModeArg {
186    Lex,
187    Sem,
188    Hybrid,
189}
190
191impl From<AskModeArg> for AskMode {
192    fn from(value: AskModeArg) -> Self {
193        match value {
194            AskModeArg::Lex => AskMode::Lex,
195            AskModeArg::Sem => AskMode::Sem,
196            AskModeArg::Hybrid => AskMode::Hybrid,
197        }
198    }
199}
200
201/// Arguments for the `find` subcommand
202#[derive(Args)]
203pub struct FindArgs {
204    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
205    pub file: PathBuf,
206    #[arg(long = "query", value_name = "TEXT")]
207    pub query: String,
208    #[arg(long = "uri", value_name = "URI")]
209    pub uri: Option<String>,
210    #[arg(long = "scope", value_name = "URI_PREFIX")]
211    pub scope: Option<String>,
212    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
213    pub top_k: usize,
214    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
215    pub snippet_chars: usize,
216    #[arg(long = "cursor", value_name = "TOKEN")]
217    pub cursor: Option<String>,
218    #[arg(long)]
219    pub json: bool,
220    #[arg(long = "json-legacy", conflicts_with = "json")]
221    pub json_legacy: bool,
222    #[arg(long = "mode", value_enum, default_value = "auto")]
223    pub mode: SearchMode,
224    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
225    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
226    pub as_of_frame: Option<u64>,
227    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
228    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
229    pub as_of_ts: Option<i64>,
230    /// Embedding model to use for query (must match the model used during ingestion)
231    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
232    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
233    pub query_embedding_model: Option<String>,
234}
235
236/// Search mode argument
237#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
238pub enum SearchMode {
239    Auto,
240    Lex,
241    Sem,
242}
243
244/// Arguments for the `vec-search` subcommand
245#[derive(Args)]
246pub struct VecSearchArgs {
247    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
248    pub file: PathBuf,
249    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
250    pub vector: Option<String>,
251    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
252    pub embedding: Option<PathBuf>,
253    #[arg(long, value_name = "K", default_value = "10")]
254    pub limit: usize,
255    #[arg(long)]
256    pub json: bool,
257}
258
259/// Arguments for the `audit` subcommand
260#[derive(Args)]
261pub struct AuditArgs {
262    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
263    pub file: PathBuf,
264    /// The question or topic to audit
265    #[arg(value_name = "QUESTION")]
266    pub question: String,
267    /// Output file path (stdout if not provided)
268    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
269    pub out: Option<PathBuf>,
270    /// Output format
271    #[arg(long = "format", value_enum, default_value = "text")]
272    pub format: AuditFormat,
273    /// Number of sources to retrieve
274    #[arg(long = "top-k", value_name = "K", default_value = "10")]
275    pub top_k: usize,
276    /// Maximum characters per snippet
277    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
278    pub snippet_chars: usize,
279    /// Retrieval mode
280    #[arg(long = "mode", value_enum, default_value = "hybrid")]
281    pub mode: AskModeArg,
282    /// Optional scope filter (URI prefix)
283    #[arg(long = "scope", value_name = "URI_PREFIX")]
284    pub scope: Option<String>,
285    /// Start date filter
286    #[arg(long = "start", value_name = "DATE")]
287    pub start: Option<String>,
288    /// End date filter
289    #[arg(long = "end", value_name = "DATE")]
290    pub end: Option<String>,
291    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
292    #[arg(long = "use-model", value_name = "MODEL")]
293    pub use_model: Option<String>,
294}
295
296/// Audit output format
297#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
298pub enum AuditFormat {
299    /// Plain text report
300    Text,
301    /// Markdown report
302    Markdown,
303    /// JSON report
304    Json,
305}
306
307// ============================================================================
308// Search & Retrieval command handlers
309// ============================================================================
310
311pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
312    let mut mem = open_read_only_mem(&args.file)?;
313    let mut builder = TimelineQueryBuilder::default();
314    #[cfg(feature = "temporal_track")]
315    if args.phrase.is_none()
316        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
317    {
318        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
319    }
320    if let Some(limit) = args.limit {
321        builder = builder.limit(limit);
322    }
323    if let Some(since) = args.since {
324        builder = builder.since(since);
325    }
326    if let Some(until) = args.until {
327        builder = builder.until(until);
328    }
329    builder = builder.reverse(args.reverse);
330    #[cfg(feature = "temporal_track")]
331    let temporal_summary = if let Some(ref phrase) = args.phrase {
332        let (filter, summary) = build_temporal_filter(
333            phrase,
334            args.tz.as_deref(),
335            args.anchor.as_deref(),
336            args.window,
337        )?;
338        builder = builder.temporal(filter);
339        Some(summary)
340    } else {
341        None
342    };
343    let query = builder.build();
344    let mut entries = mem.timeline(query)?;
345
346    // Apply Replay filtering if requested
347    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
348        entries.retain(|entry| {
349            // Check as_of_frame filter
350            if let Some(cutoff_frame) = args.as_of_frame {
351                if entry.frame_id > cutoff_frame {
352                    return false;
353                }
354            }
355
356            // Check as_of_ts filter
357            if let Some(cutoff_ts) = args.as_of_ts {
358                if entry.timestamp > cutoff_ts {
359                    return false;
360                }
361            }
362
363            true
364        });
365    }
366
367    if args.json {
368        #[cfg(feature = "temporal_track")]
369        if let Some(summary) = temporal_summary.as_ref() {
370            println!(
371                "{}",
372                serde_json::to_string_pretty(&TimelineOutput {
373                    temporal: Some(summary_to_output(summary)),
374                    entries: &entries,
375                })?
376            );
377        } else {
378            println!("{}", serde_json::to_string_pretty(&entries)?);
379        }
380        #[cfg(not(feature = "temporal_track"))]
381        println!("{}", serde_json::to_string_pretty(&entries)?);
382    } else if entries.is_empty() {
383        println!("Timeline is empty");
384    } else {
385        #[cfg(feature = "temporal_track")]
386        if let Some(summary) = temporal_summary.as_ref() {
387            print_temporal_summary(summary);
388        }
389        for entry in entries {
390            println!(
391                "#{} @ {} — {}",
392                entry.frame_id,
393                entry.timestamp,
394                entry.preview.replace('\n', " ")
395            );
396            if let Some(uri) = entry.uri.as_deref() {
397                println!("  URI: {uri}");
398            }
399            if !entry.child_frames.is_empty() {
400                let child_list = entry
401                    .child_frames
402                    .iter()
403                    .map(|id| id.to_string())
404                    .collect::<Vec<_>>()
405                    .join(", ");
406                println!("  Child frames: {child_list}");
407            }
408            #[cfg(feature = "temporal_track")]
409            if let Some(temporal) = entry.temporal.as_ref() {
410                print_entry_temporal_details(temporal);
411            }
412        }
413    }
414    Ok(())
415}
416
417#[cfg(feature = "temporal_track")]
418pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
419    let mut mem = open_read_only_mem(&args.file)?;
420
421    let (filter, summary) = build_temporal_filter(
422        &args.phrase,
423        args.tz.as_deref(),
424        args.anchor.as_deref(),
425        args.window,
426    )?;
427
428    let mut builder = TimelineQueryBuilder::default();
429    if let Some(limit) = args.limit {
430        builder = builder.limit(limit);
431    }
432    if let Some(since) = args.since {
433        builder = builder.since(since);
434    }
435    if let Some(until) = args.until {
436        builder = builder.until(until);
437    }
438    builder = builder.reverse(args.reverse).temporal(filter.clone());
439    let entries = mem.timeline(builder.build())?;
440
441    if args.json {
442        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
443        let output = WhenOutput {
444            summary: summary_to_output(&summary),
445            entries: entry_views,
446        };
447        println!("{}", serde_json::to_string_pretty(&output)?);
448        return Ok(());
449    }
450
451    print_temporal_summary(&summary);
452    if entries.is_empty() {
453        println!("No frames matched the resolved window");
454        return Ok(());
455    }
456
457    for entry in &entries {
458        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
459        println!(
460            "#{} @ {} ({iso}) — {}",
461            entry.frame_id,
462            entry.timestamp,
463            entry.preview.replace('\n', " ")
464        );
465        if let Some(uri) = entry.uri.as_deref() {
466            println!("  URI: {uri}");
467        }
468        if !entry.child_frames.is_empty() {
469            let child_list = entry
470                .child_frames
471                .iter()
472                .map(|id| id.to_string())
473                .collect::<Vec<_>>()
474                .join(", ");
475            println!("  Child frames: {child_list}");
476        }
477        if let Some(temporal) = entry.temporal.as_ref() {
478            print_entry_temporal_details(temporal);
479        }
480    }
481
482    Ok(())
483}
484
485#[cfg(feature = "temporal_track")]
486#[derive(Serialize)]
487struct TimelineOutput<'a> {
488    #[serde(skip_serializing_if = "Option::is_none")]
489    temporal: Option<TemporalSummaryOutput>,
490    entries: &'a [TimelineEntry],
491}
492
493#[cfg(feature = "temporal_track")]
494#[derive(Serialize)]
495struct WhenOutput {
496    summary: TemporalSummaryOutput,
497    entries: Vec<WhenEntry>,
498}
499
500#[cfg(feature = "temporal_track")]
501#[derive(Serialize)]
502struct WhenEntry {
503    frame_id: FrameId,
504    timestamp: i64,
505    #[serde(skip_serializing_if = "Option::is_none")]
506    timestamp_iso: Option<String>,
507    preview: String,
508    #[serde(skip_serializing_if = "Option::is_none")]
509    uri: Option<String>,
510    #[serde(skip_serializing_if = "Vec::is_empty")]
511    child_frames: Vec<FrameId>,
512    #[serde(skip_serializing_if = "Option::is_none")]
513    temporal: Option<SearchHitTemporal>,
514}
515
516#[cfg(feature = "temporal_track")]
517#[derive(Serialize)]
518struct TemporalSummaryOutput {
519    phrase: String,
520    timezone: String,
521    anchor_utc: i64,
522    anchor_iso: String,
523    confidence: u16,
524    #[serde(skip_serializing_if = "Vec::is_empty")]
525    flags: Vec<&'static str>,
526    resolution_kind: &'static str,
527    window_start_utc: Option<i64>,
528    window_start_iso: Option<String>,
529    window_end_utc: Option<i64>,
530    window_end_iso: Option<String>,
531    #[serde(skip_serializing_if = "Option::is_none")]
532    window_minutes: Option<u64>,
533}
534
535#[cfg(feature = "temporal_track")]
536struct TemporalSummary {
537    phrase: String,
538    tz: String,
539    anchor: OffsetDateTime,
540    start_utc: Option<i64>,
541    end_utc: Option<i64>,
542    resolution: TemporalResolution,
543    window_minutes: Option<u64>,
544}
545
546#[cfg(feature = "temporal_track")]
547fn build_temporal_filter(
548    phrase: &str,
549    tz_override: Option<&str>,
550    anchor_override: Option<&str>,
551    window_minutes: Option<u64>,
552) -> Result<(TemporalFilter, TemporalSummary)> {
553    let tz = tz_override
554        .unwrap_or(DEFAULT_TEMPORAL_TZ)
555        .trim()
556        .to_string();
557    if tz.is_empty() {
558        bail!("E-TEMP-003 timezone must not be empty");
559    }
560
561    let anchor = if let Some(raw) = anchor_override {
562        OffsetDateTime::parse(raw, &Rfc3339)
563            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
564    } else {
565        OffsetDateTime::now_utc()
566    };
567
568    let context = TemporalContext::new(anchor, tz.clone());
569    let normalizer = TemporalNormalizer::new(context);
570    let resolution = normalizer
571        .resolve(phrase)
572        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
573
574    let (mut start, mut end) = resolution_bounds(&resolution)?;
575    if let Some(minutes) = window_minutes {
576        if minutes > 0 {
577            let delta = TimeDuration::minutes(minutes as i64);
578            if let (Some(s), Some(e)) = (start, end) {
579                if s == e {
580                    start = Some(s.saturating_sub(delta.whole_seconds()));
581                    end = Some(e.saturating_add(delta.whole_seconds()));
582                } else {
583                    start = Some(s.saturating_sub(delta.whole_seconds()));
584                    end = Some(e.saturating_add(delta.whole_seconds()));
585                }
586            }
587        }
588    }
589
590    let filter = TemporalFilter {
591        start_utc: start,
592        end_utc: end,
593        phrase: None,
594        tz: None,
595    };
596
597    let summary = TemporalSummary {
598        phrase: phrase.to_owned(),
599        tz,
600        anchor,
601        start_utc: start,
602        end_utc: end,
603        resolution,
604        window_minutes,
605    };
606
607    Ok((filter, summary))
608}
609
610#[cfg(feature = "temporal_track")]
611fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
612    TemporalSummaryOutput {
613        phrase: summary.phrase.clone(),
614        timezone: summary.tz.clone(),
615        anchor_utc: summary.anchor.unix_timestamp(),
616        anchor_iso: summary
617            .anchor
618            .format(&Rfc3339)
619            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
620        confidence: summary.resolution.confidence,
621        flags: summary
622            .resolution
623            .flags
624            .iter()
625            .map(|flag| flag.as_str())
626            .collect(),
627        resolution_kind: resolution_kind(&summary.resolution),
628        window_start_utc: summary.start_utc,
629        window_start_iso: summary.start_utc.and_then(format_timestamp),
630        window_end_utc: summary.end_utc,
631        window_end_iso: summary.end_utc.and_then(format_timestamp),
632        window_minutes: summary.window_minutes,
633    }
634}
635
636#[cfg(feature = "temporal_track")]
637fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
638    WhenEntry {
639        frame_id: entry.frame_id,
640        timestamp: entry.timestamp,
641        timestamp_iso: format_timestamp(entry.timestamp),
642        preview: entry.preview.clone(),
643        uri: entry.uri.clone(),
644        child_frames: entry.child_frames.clone(),
645        temporal: entry.temporal.clone(),
646    }
647}
648
649#[cfg(feature = "temporal_track")]
650fn print_temporal_summary(summary: &TemporalSummary) {
651    println!("Phrase: \"{}\"", summary.phrase);
652    println!("Timezone: {}", summary.tz);
653    println!(
654        "Anchor: {}",
655        summary
656            .anchor
657            .format(&Rfc3339)
658            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
659    );
660    let start_iso = summary.start_utc.and_then(format_timestamp);
661    let end_iso = summary.end_utc.and_then(format_timestamp);
662    match (start_iso, end_iso) {
663        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
664        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
665        (Some(start), None) => println!("Window start: {start}"),
666        (None, Some(end)) => println!("Window end: {end}"),
667        _ => println!("Window: (not resolved)"),
668    }
669    println!("Confidence: {}", summary.resolution.confidence);
670    let flags: Vec<&'static str> = summary
671        .resolution
672        .flags
673        .iter()
674        .map(|flag| flag.as_str())
675        .collect();
676    if !flags.is_empty() {
677        println!("Flags: {}", flags.join(", "));
678    }
679    if let Some(window) = summary.window_minutes {
680        if window > 0 {
681            println!("Window padding: {window} minute(s)");
682        }
683    }
684    println!();
685}
686
687#[cfg(feature = "temporal_track")]
688fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
689    if let Some(anchor) = temporal.anchor.as_ref() {
690        let iso = anchor
691            .iso_8601
692            .clone()
693            .or_else(|| format_timestamp(anchor.ts_utc));
694        println!(
695            "  Anchor: {} (source: {:?})",
696            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
697            anchor.source
698        );
699    }
700    if !temporal.mentions.is_empty() {
701        println!("  Mentions:");
702        for mention in &temporal.mentions {
703            let iso = mention
704                .iso_8601
705                .clone()
706                .or_else(|| format_timestamp(mention.ts_utc))
707                .unwrap_or_else(|| mention.ts_utc.to_string());
708            let mut details = format!(
709                "    - {} ({:?}, confidence {})",
710                iso, mention.kind, mention.confidence
711            );
712            if let Some(text) = mention.text.as_deref() {
713                details.push_str(&format!(" — \"{}\"", text));
714            }
715            println!("{details}");
716        }
717    }
718}
719
720#[cfg(feature = "temporal_track")]
721fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
722    match &resolution.value {
723        TemporalResolutionValue::Date(date) => {
724            let ts = date_to_timestamp(*date);
725            Ok((Some(ts), Some(ts)))
726        }
727        TemporalResolutionValue::DateTime(dt) => {
728            let ts = dt.unix_timestamp();
729            Ok((Some(ts), Some(ts)))
730        }
731        TemporalResolutionValue::DateRange { start, end } => Ok((
732            Some(date_to_timestamp(*start)),
733            Some(date_to_timestamp(*end)),
734        )),
735        TemporalResolutionValue::DateTimeRange { start, end } => {
736            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
737        }
738        TemporalResolutionValue::Month { year, month } => {
739            let start_date = Date::from_calendar_date(*year, *month, 1)
740                .map_err(|_| anyhow!("invalid month resolution"))?;
741            let end_date = last_day_in_month(*year, *month)
742                .map_err(|_| anyhow!("invalid month resolution"))?;
743            Ok((
744                Some(date_to_timestamp(start_date)),
745                Some(date_to_timestamp(end_date)),
746            ))
747        }
748    }
749}
750
751#[cfg(feature = "temporal_track")]
752fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
753    match resolution.value {
754        TemporalResolutionValue::Date(_) => "date",
755        TemporalResolutionValue::DateTime(_) => "datetime",
756        TemporalResolutionValue::DateRange { .. } => "date_range",
757        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
758        TemporalResolutionValue::Month { .. } => "month",
759    }
760}
761
762#[cfg(feature = "temporal_track")]
763fn date_to_timestamp(date: Date) -> i64 {
764    PrimitiveDateTime::new(date, Time::MIDNIGHT)
765        .assume_offset(UtcOffset::UTC)
766        .unix_timestamp()
767}
768
769#[cfg(feature = "temporal_track")]
770fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
771    let mut date = Date::from_calendar_date(year, month, 1)
772        .map_err(|_| anyhow!("invalid month resolution"))?;
773    while let Some(next) = date.next_day() {
774        if next.month() == month {
775            date = next;
776        } else {
777            break;
778        }
779    }
780    Ok(date)
781}
782
783#[cfg(feature = "temporal_track")]
784
785fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
786    if fragments.is_empty() {
787        return;
788    }
789
790    response.context_fragments = fragments
791        .into_iter()
792        .map(|fragment| AskContextFragment {
793            rank: fragment.rank,
794            frame_id: fragment.frame_id,
795            uri: fragment.uri,
796            title: fragment.title,
797            score: fragment.score,
798            matches: fragment.matches,
799            range: Some(fragment.range),
800            chunk_range: fragment.chunk_range,
801            text: fragment.text,
802            kind: Some(match fragment.kind {
803                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
804                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
805            }),
806            #[cfg(feature = "temporal_track")]
807            temporal: None,
808        })
809        .collect();
810}
811
812pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
813    if args.uri.is_some() && args.scope.is_some() {
814        warn!("--scope ignored because --uri is provided");
815    }
816
817    let mut question_tokens = Vec::new();
818    let mut file_path: Option<PathBuf> = None;
819    for token in &args.targets {
820        if file_path.is_none() && looks_like_memory(token) {
821            file_path = Some(PathBuf::from(token));
822        } else {
823            question_tokens.push(token.clone());
824        }
825    }
826
827    let positional_question = if question_tokens.is_empty() {
828        None
829    } else {
830        Some(question_tokens.join(" "))
831    };
832
833    let question = args
834        .question
835        .or(positional_question)
836        .map(|value| value.trim().to_string())
837        .filter(|value| !value.is_empty());
838
839    let question = question
840        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
841
842    let memory_path = match file_path {
843        Some(path) => path,
844        None => autodetect_memory_file()?,
845    };
846
847    let start = parse_date_boundary(args.start.as_ref(), false)?;
848    let end = parse_date_boundary(args.end.as_ref(), true)?;
849    if let (Some(start_ts), Some(end_ts)) = (start, end) {
850        if end_ts < start_ts {
851            anyhow::bail!("--end must not be earlier than --start");
852        }
853    }
854
855    // Open MV2 file first to get vector dimension for auto-detection
856    let mut mem = Memvid::open(&memory_path)?;
857
858    // Get the vector dimension from the MV2 file for auto-detection
859    let mv2_dimension = mem.vec_index_dimension();
860
861    let ask_mode: AskMode = args.mode.into();
862    let emb_model_override = args.query_embedding_model.as_deref();
863    let runtime = match args.mode {
864        AskModeArg::Lex => None,
865        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
866            config,
867            emb_model_override,
868            mv2_dimension,
869        )?),
870        AskModeArg::Hybrid => {
871            // For hybrid, use auto-detection from MV2 dimension
872            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
873                || {
874                    // Force a load; if it fails we error below.
875                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
876                        .ok()
877                        .map(|rt| {
878                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
879                            rt
880                        })
881                },
882            )
883        }
884    };
885    if runtime.is_none() && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
886        anyhow::bail!(
887            "semantic embeddings unavailable; install/cached model required for {:?} mode",
888            args.mode
889        );
890    }
891
892    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
893
894    let request = AskRequest {
895        question,
896        top_k: args.top_k,
897        snippet_chars: args.snippet_chars,
898        uri: args.uri.clone(),
899        scope: args.scope.clone(),
900        cursor: args.cursor.clone(),
901        start,
902        end,
903        #[cfg(feature = "temporal_track")]
904        temporal: None,
905        context_only: args.context_only,
906        mode: ask_mode,
907        as_of_frame: args.as_of_frame,
908        as_of_ts: args.as_of_ts,
909    };
910    let mut response = mem.ask(request, embedder)?;
911
912    // Apply cross-encoder reranking for better precision on preference/personalization queries
913    // This is especially important for questions like "What should I..." where semantic
914    // similarity doesn't capture personal relevance well.
915    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
916    if !args.no_rerank && !response.retrieval.hits.is_empty() && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
917        // Create a temporary SearchResponse for reranking
918        let mut search_response = SearchResponse {
919            query: response.question.clone(),
920            hits: response.retrieval.hits.clone(),
921            total_hits: response.retrieval.hits.len(),
922            params: memvid_core::SearchParams {
923                top_k: args.top_k,
924                snippet_chars: args.snippet_chars,
925                cursor: None,
926            },
927            elapsed_ms: 0,
928            engine: memvid_core::SearchEngineKind::Hybrid,
929            next_cursor: None,
930            context: String::new(),
931        };
932
933        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
934            warn!("Cross-encoder reranking failed: {e}");
935        } else {
936            // Update the response hits with reranked order
937            response.retrieval.hits = search_response.hits;
938            // Rebuild context from reranked hits
939            response.retrieval.context = response
940                .retrieval
941                .hits
942                .iter()
943                .take(10) // Use top-10 for context
944                .map(|hit| hit.text.as_str())
945                .collect::<Vec<_>>()
946                .join("\n\n---\n\n");
947        }
948    }
949
950    // Inject memory cards into context if --memories flag is set
951    if args.memories {
952        let memory_context = build_memory_context(&mem);
953        if !memory_context.is_empty() {
954            // Prepend memory context to retrieval context
955            response.retrieval.context = format!(
956                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
957                memory_context, response.retrieval.context
958            );
959        }
960    }
961
962    // Apply PII masking if requested
963    if args.mask_pii {
964        use memvid_core::pii::mask_pii;
965
966        // Mask the aggregated context
967        response.retrieval.context = mask_pii(&response.retrieval.context);
968
969        // Mask text in each hit
970        for hit in &mut response.retrieval.hits {
971            hit.text = mask_pii(&hit.text);
972            if let Some(chunk_text) = &hit.chunk_text {
973                hit.chunk_text = Some(mask_pii(chunk_text));
974            }
975        }
976    }
977
978    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
979
980    let mut model_result: Option<ModelAnswer> = None;
981    if response.context_only {
982        if args.use_model.is_some() {
983            warn!("--use-model ignored because --context-only disables synthesis");
984        }
985    } else if let Some(model_name) = args.use_model.as_deref() {
986        match run_model_inference(
987            model_name,
988            &response.question,
989            &response.retrieval.context,
990            &response.retrieval.hits,
991            llm_context_override,
992            None,
993            args.system_prompt.as_deref(),
994        ) {
995            Ok(inference) => {
996                let ModelInference {
997                    answer,
998                    context_body,
999                    context_fragments,
1000                    ..
1001                } = inference;
1002                response.answer = Some(answer.answer.clone());
1003                response.retrieval.context = context_body;
1004                apply_model_context_fragments(&mut response, context_fragments);
1005                model_result = Some(answer);
1006            }
1007            Err(err) => {
1008                warn!(
1009                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1010                    model_name
1011                );
1012            }
1013        }
1014    }
1015
1016    if args.json {
1017        if let Some(model_name) = args.use_model.as_deref() {
1018            emit_model_json(
1019                &response,
1020                model_name,
1021                model_result.as_ref(),
1022                args.sources,
1023                &mut mem,
1024            )?;
1025        } else {
1026            emit_ask_json(
1027                &response,
1028                args.mode,
1029                model_result.as_ref(),
1030                args.sources,
1031                &mut mem,
1032            )?;
1033        }
1034    } else {
1035        emit_ask_pretty(
1036            &response,
1037            args.mode,
1038            model_result.as_ref(),
1039            args.sources,
1040            &mut mem,
1041        );
1042    }
1043
1044    Ok(())
1045}
1046
1047pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1048    let mut mem = open_read_only_mem(&args.file)?;
1049    if args.uri.is_some() && args.scope.is_some() {
1050        warn!("--scope ignored because --uri is provided");
1051    }
1052
1053    // Get vector dimension from MV2 for auto-detection
1054    let mv2_dimension = mem.vec_index_dimension();
1055    let emb_model_override = args.query_embedding_model.as_deref();
1056
1057    let (mode_label, runtime_option) = match args.mode {
1058        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1059        SearchMode::Sem => {
1060            let runtime = load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1061            ("Semantic (vector search)".to_string(), Some(runtime))
1062        }
1063        SearchMode::Auto => {
1064            if let Some(runtime) = try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension) {
1065                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1066            } else {
1067                ("Lexical (semantic unavailable)".to_string(), None)
1068            }
1069        }
1070    };
1071
1072    let mode_key = match args.mode {
1073        SearchMode::Sem => "semantic",
1074        SearchMode::Lex => "text",
1075        SearchMode::Auto => {
1076            if runtime_option.is_some() {
1077                "hybrid"
1078            } else {
1079                "text"
1080            }
1081        }
1082    };
1083
1084    // For semantic mode, use pure vector search via HNSW index
1085    let (response, engine_label) = if args.mode == SearchMode::Sem {
1086        let runtime = runtime_option.as_ref().ok_or_else(|| {
1087            anyhow!("Semantic search requires an embedding runtime")
1088        })?;
1089
1090        // Embed the query
1091        let query_embedding = runtime.embed(&args.query)?;
1092
1093        // Use pure vector search
1094        let scope = args.scope.as_deref().or(args.uri.as_deref());
1095        match mem.vec_search_with_embedding(
1096            &args.query,
1097            &query_embedding,
1098            args.top_k,
1099            args.snippet_chars,
1100            scope,
1101        ) {
1102            Ok(mut resp) => {
1103                // Apply preference boost to rerank results for preference-seeking queries
1104                apply_preference_rerank(&mut resp);
1105                (resp, "semantic (HNSW vector index)".to_string())
1106            }
1107            Err(e) => {
1108                // Fall back to lexical search + rerank if vector search fails
1109                warn!("Vector search failed ({e}), falling back to lexical + rerank");
1110                let request = SearchRequest {
1111                    query: args.query.clone(),
1112                    top_k: args.top_k,
1113                    snippet_chars: args.snippet_chars,
1114                    uri: args.uri.clone(),
1115                    scope: args.scope.clone(),
1116                    cursor: args.cursor.clone(),
1117                    #[cfg(feature = "temporal_track")]
1118                    temporal: None,
1119                    as_of_frame: args.as_of_frame,
1120                    as_of_ts: args.as_of_ts,
1121                };
1122                let mut resp = mem.search(request)?;
1123                apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1124                (resp, "semantic (fallback rerank)".to_string())
1125            }
1126        }
1127    } else {
1128        // For lexical and auto modes, use existing behavior
1129        let request = SearchRequest {
1130            query: args.query.clone(),
1131            top_k: args.top_k,
1132            snippet_chars: args.snippet_chars,
1133            uri: args.uri.clone(),
1134            scope: args.scope.clone(),
1135            cursor: args.cursor.clone(),
1136            #[cfg(feature = "temporal_track")]
1137            temporal: None,
1138            as_of_frame: args.as_of_frame,
1139            as_of_ts: args.as_of_ts,
1140        };
1141
1142        let mut resp = mem.search(request)?;
1143
1144        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1145            warn!("Search index unavailable; returning basic text results");
1146        }
1147
1148        let mut engine_label = match resp.engine {
1149            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1150            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1151            SearchEngineKind::Hybrid => "hybrid".to_string(),
1152        };
1153
1154        if runtime_option.is_some() {
1155            engine_label = format!("hybrid ({engine_label} + semantic)");
1156        }
1157
1158        if let Some(ref runtime) = runtime_option {
1159            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1160        }
1161
1162        (resp, engine_label)
1163    };
1164
1165    if args.json_legacy {
1166        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1167        emit_legacy_search_json(&response)?;
1168    } else if args.json {
1169        emit_search_json(&response, mode_key)?;
1170    } else {
1171        println!(
1172            "mode: {}   k={}   time: {} ms",
1173            mode_label, response.params.top_k, response.elapsed_ms
1174        );
1175        println!("engine: {}", engine_label);
1176        println!(
1177            "hits: {} (showing {})",
1178            response.total_hits,
1179            response.hits.len()
1180        );
1181        emit_search_table(&response);
1182    }
1183    Ok(())
1184}
1185
1186pub fn handle_vec_search(_config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1187    let mut mem = open_read_only_mem(&args.file)?;
1188    let vector = if let Some(path) = args.embedding.as_deref() {
1189        read_embedding(path)?
1190    } else if let Some(vector_string) = &args.vector {
1191        parse_vector(vector_string)?
1192    } else {
1193        anyhow::bail!("provide --vector or --embedding for search input");
1194    };
1195
1196    let hits = mem.search_vec(&vector, args.limit)?;
1197    let mut enriched = Vec::with_capacity(hits.len());
1198    for hit in hits {
1199        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1200        enriched.push((hit.frame_id, hit.distance, preview));
1201    }
1202
1203    if args.json {
1204        let json_hits: Vec<_> = enriched
1205            .iter()
1206            .map(|(frame_id, distance, preview)| {
1207                json!({
1208                    "frame_id": frame_id,
1209                    "distance": distance,
1210                    "preview": preview,
1211                })
1212            })
1213            .collect();
1214        println!("{}", serde_json::to_string_pretty(&json_hits)?);
1215    } else if enriched.is_empty() {
1216        println!("No vector matches found");
1217    } else {
1218        for (frame_id, distance, preview) in enriched {
1219            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1220        }
1221    }
1222    Ok(())
1223}
1224
1225pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1226    use memvid_core::AuditOptions;
1227    use std::fs::File;
1228    use std::io::Write;
1229
1230    let mut mem = Memvid::open(&args.file)?;
1231
1232    // Parse date boundaries
1233    let start = parse_date_boundary(args.start.as_ref(), false)?;
1234    let end = parse_date_boundary(args.end.as_ref(), true)?;
1235    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1236        if end_ts < start_ts {
1237            anyhow::bail!("--end must not be earlier than --start");
1238        }
1239    }
1240
1241    // Set up embedding runtime if needed
1242    let ask_mode: AskMode = args.mode.into();
1243    let runtime = match args.mode {
1244        AskModeArg::Lex => None,
1245        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1246        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1247    };
1248    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1249
1250    // Build audit options
1251    let options = AuditOptions {
1252        top_k: Some(args.top_k),
1253        snippet_chars: Some(args.snippet_chars),
1254        mode: Some(ask_mode),
1255        scope: args.scope,
1256        start,
1257        end,
1258        include_snippets: true,
1259    };
1260
1261    // Run the audit
1262    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1263
1264    // If --use-model is provided, run model inference to synthesize the answer
1265    if let Some(model_name) = args.use_model.as_deref() {
1266        // Build context from sources for model inference
1267        let context = report
1268            .sources
1269            .iter()
1270            .filter_map(|s| s.snippet.clone())
1271            .collect::<Vec<_>>()
1272            .join("\n\n");
1273
1274        match run_model_inference(
1275            model_name,
1276            &report.question,
1277            &context,
1278            &[], // No hits needed for audit
1279            None,
1280            None,
1281            None, // No system prompt override for audit
1282        ) {
1283            Ok(inference) => {
1284                report.answer = Some(inference.answer.answer);
1285                report.notes.push(format!(
1286                    "Answer synthesized by model: {}",
1287                    inference.answer.model
1288                ));
1289            }
1290            Err(err) => {
1291                warn!(
1292                    "model inference unavailable for '{}': {err}. Using default answer.",
1293                    model_name
1294                );
1295            }
1296        }
1297    }
1298
1299    // Format the output
1300    let output = match args.format {
1301        AuditFormat::Text => report.to_text(),
1302        AuditFormat::Markdown => report.to_markdown(),
1303        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1304    };
1305
1306    // Write output
1307    if let Some(out_path) = args.out {
1308        let mut file = File::create(&out_path)?;
1309        file.write_all(output.as_bytes())?;
1310        println!("Audit report written to: {}", out_path.display());
1311    } else {
1312        println!("{}", output);
1313    }
1314
1315    Ok(())
1316}
1317
1318fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1319    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1320
1321    let mut additional_params = serde_json::Map::new();
1322    if let Some(cursor) = &response.params.cursor {
1323        additional_params.insert("cursor".into(), json!(cursor));
1324    }
1325
1326    let mut params = serde_json::Map::new();
1327    params.insert("top_k".into(), json!(response.params.top_k));
1328    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
1329    params.insert("mode".into(), json!(mode));
1330    params.insert(
1331        "additional_params".into(),
1332        serde_json::Value::Object(additional_params),
1333    );
1334
1335    let mut metadata_json = serde_json::Map::new();
1336    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
1337    metadata_json.insert("total_hits".into(), json!(response.total_hits));
1338    metadata_json.insert(
1339        "next_cursor".into(),
1340        match &response.next_cursor {
1341            Some(cursor) => json!(cursor),
1342            None => serde_json::Value::Null,
1343        },
1344    );
1345    metadata_json.insert("engine".into(), json!(response.engine));
1346    metadata_json.insert("params".into(), serde_json::Value::Object(params));
1347
1348    let body = json!({
1349        "version": "mv2.result.v2",
1350        "query": response.query,
1351        "metadata": metadata_json,
1352        "hits": hits,
1353        "context": response.context,
1354    });
1355    println!("{}", serde_json::to_string_pretty(&body)?);
1356    Ok(())
1357}
1358
1359fn emit_ask_json(
1360    response: &AskResponse,
1361    requested_mode: AskModeArg,
1362    model: Option<&ModelAnswer>,
1363    include_sources: bool,
1364    mem: &mut Memvid,
1365) -> Result<()> {
1366    let hits: Vec<_> = response
1367        .retrieval
1368        .hits
1369        .iter()
1370        .map(search_hit_to_json)
1371        .collect();
1372
1373    let citations: Vec<_> = response
1374        .citations
1375        .iter()
1376        .map(|citation| {
1377            let mut map = serde_json::Map::new();
1378            map.insert("index".into(), json!(citation.index));
1379            map.insert("frame_id".into(), json!(citation.frame_id));
1380            map.insert("uri".into(), json!(citation.uri));
1381            if let Some(range) = citation.chunk_range {
1382                map.insert("chunk_range".into(), json!([range.0, range.1]));
1383            }
1384            if let Some(score) = citation.score {
1385                map.insert("score".into(), json!(score));
1386            }
1387            serde_json::Value::Object(map)
1388        })
1389        .collect();
1390
1391    let mut body = json!({
1392        "version": "mv2.ask.v1",
1393        "question": response.question,
1394        "answer": response.answer,
1395        "context_only": response.context_only,
1396        "mode": ask_mode_display(requested_mode),
1397        "retriever": ask_retriever_display(response.retriever),
1398        "top_k": response.retrieval.params.top_k,
1399        "results": hits,
1400        "citations": citations,
1401        "stats": {
1402            "retrieval_ms": response.stats.retrieval_ms,
1403            "synthesis_ms": response.stats.synthesis_ms,
1404            "latency_ms": response.stats.latency_ms,
1405        },
1406        "engine": search_engine_label(&response.retrieval.engine),
1407        "total_hits": response.retrieval.total_hits,
1408        "next_cursor": response.retrieval.next_cursor,
1409        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1410    });
1411
1412    if let Some(model) = model {
1413        if let serde_json::Value::Object(ref mut map) = body {
1414            map.insert("model".into(), json!(model.requested));
1415            if model.model != model.requested {
1416                map.insert("model_used".into(), json!(model.model));
1417            }
1418        }
1419    }
1420
1421    // Add detailed sources if requested
1422    if include_sources {
1423        if let serde_json::Value::Object(ref mut map) = body {
1424            let sources = build_sources_json(response, mem);
1425            map.insert("sources".into(), json!(sources));
1426        }
1427    }
1428
1429    println!("{}", serde_json::to_string_pretty(&body)?);
1430    Ok(())
1431}
1432
1433fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
1434    response
1435        .citations
1436        .iter()
1437        .enumerate()
1438        .map(|(idx, citation)| {
1439            let mut source = serde_json::Map::new();
1440            source.insert("index".into(), json!(idx + 1));
1441            source.insert("frame_id".into(), json!(citation.frame_id));
1442            source.insert("uri".into(), json!(citation.uri));
1443
1444            if let Some(range) = citation.chunk_range {
1445                source.insert("chunk_range".into(), json!([range.0, range.1]));
1446            }
1447            if let Some(score) = citation.score {
1448                source.insert("score".into(), json!(score));
1449            }
1450
1451            // Get frame metadata for rich source information
1452            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
1453                if let Some(title) = frame.title {
1454                    source.insert("title".into(), json!(title));
1455                }
1456                if !frame.tags.is_empty() {
1457                    source.insert("tags".into(), json!(frame.tags));
1458                }
1459                if !frame.labels.is_empty() {
1460                    source.insert("labels".into(), json!(frame.labels));
1461                }
1462                source.insert("frame_timestamp".into(), json!(frame.timestamp));
1463                if !frame.content_dates.is_empty() {
1464                    source.insert("content_dates".into(), json!(frame.content_dates));
1465                }
1466            }
1467
1468            // Get snippet from hit
1469            if let Some(hit) = response
1470                .retrieval
1471                .hits
1472                .iter()
1473                .find(|h| h.frame_id == citation.frame_id)
1474            {
1475                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
1476                source.insert("snippet".into(), json!(snippet));
1477            }
1478
1479            serde_json::Value::Object(source)
1480        })
1481        .collect()
1482}
1483
1484fn emit_model_json(
1485    response: &AskResponse,
1486    requested_model: &str,
1487    model: Option<&ModelAnswer>,
1488    include_sources: bool,
1489    mem: &mut Memvid,
1490) -> Result<()> {
1491    let answer = response.answer.clone().unwrap_or_default();
1492    let requested_label = model
1493        .map(|m| m.requested.clone())
1494        .unwrap_or_else(|| requested_model.to_string());
1495    let used_label = model
1496        .map(|m| m.model.clone())
1497        .unwrap_or_else(|| requested_model.to_string());
1498
1499    let mut body = json!({
1500        "question": response.question,
1501        "model": requested_label,
1502        "model_used": used_label,
1503        "answer": answer,
1504        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1505    });
1506
1507    // Add detailed sources if requested
1508    if include_sources {
1509        if let serde_json::Value::Object(ref mut map) = body {
1510            let sources = build_sources_json(response, mem);
1511            map.insert("sources".into(), json!(sources));
1512        }
1513    }
1514
1515    println!("{}", serde_json::to_string_pretty(&body)?);
1516    Ok(())
1517}
1518
1519fn emit_ask_pretty(
1520    response: &AskResponse,
1521    requested_mode: AskModeArg,
1522    model: Option<&ModelAnswer>,
1523    include_sources: bool,
1524    mem: &mut Memvid,
1525) {
1526    println!(
1527        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
1528        ask_mode_pretty(requested_mode),
1529        ask_retriever_pretty(response.retriever),
1530        response.retrieval.params.top_k,
1531        response.stats.latency_ms,
1532        response.stats.retrieval_ms
1533    );
1534    if let Some(model) = model {
1535        if model.requested.trim() == model.model {
1536            println!("model: {}", model.model);
1537        } else {
1538            println!(
1539                "model requested: {}   model used: {}",
1540                model.requested, model.model
1541            );
1542        }
1543    }
1544    println!(
1545        "engine: {}",
1546        search_engine_label(&response.retrieval.engine)
1547    );
1548    println!(
1549        "hits: {} (showing {})",
1550        response.retrieval.total_hits,
1551        response.retrieval.hits.len()
1552    );
1553
1554    if response.context_only {
1555        println!();
1556        println!("Context-only mode: synthesis disabled.");
1557        println!();
1558    } else if let Some(answer) = &response.answer {
1559        println!();
1560        println!("Answer:\n{answer}");
1561        println!();
1562    }
1563
1564    if !response.citations.is_empty() {
1565        println!("Citations:");
1566        for citation in &response.citations {
1567            match citation.score {
1568                Some(score) => println!(
1569                    "[{}] {} (frame {}, score {:.3})",
1570                    citation.index, citation.uri, citation.frame_id, score
1571                ),
1572                None => println!(
1573                    "[{}] {} (frame {})",
1574                    citation.index, citation.uri, citation.frame_id
1575                ),
1576            }
1577        }
1578        println!();
1579    }
1580
1581    // Print detailed sources if requested
1582    if include_sources && !response.citations.is_empty() {
1583        println!("=== SOURCES ===");
1584        println!();
1585        for citation in &response.citations {
1586            println!("[{}] {}", citation.index, citation.uri);
1587
1588            // Get frame metadata
1589            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
1590                if let Some(title) = &frame.title {
1591                    println!("    Title: {}", title);
1592                }
1593                println!("    Frame ID: {}", citation.frame_id);
1594                if let Some(score) = citation.score {
1595                    println!("    Score: {:.4}", score);
1596                }
1597                if let Some((start, end)) = citation.chunk_range {
1598                    println!("    Range: [{}..{})", start, end);
1599                }
1600                if !frame.tags.is_empty() {
1601                    println!("    Tags: {}", frame.tags.join(", "));
1602                }
1603                if !frame.labels.is_empty() {
1604                    println!("    Labels: {}", frame.labels.join(", "));
1605                }
1606                println!("    Timestamp: {}", frame.timestamp);
1607                if !frame.content_dates.is_empty() {
1608                    println!("    Content Dates: {}", frame.content_dates.join(", "));
1609                }
1610            }
1611
1612            // Get snippet from hit
1613            if let Some(hit) = response
1614                .retrieval
1615                .hits
1616                .iter()
1617                .find(|h| h.frame_id == citation.frame_id)
1618            {
1619                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
1620                let truncated = if snippet.len() > 200 {
1621                    format!("{}...", &snippet[..200])
1622                } else {
1623                    snippet.clone()
1624                };
1625                println!("    Snippet: {}", truncated.replace('\n', " "));
1626            }
1627            println!();
1628        }
1629    }
1630
1631    if !include_sources {
1632        println!();
1633        emit_search_table(&response.retrieval);
1634    }
1635}
1636
1637fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
1638    let hits: Vec<_> = response
1639        .hits
1640        .iter()
1641        .map(|hit| {
1642            json!({
1643                "frame_id": hit.frame_id,
1644                "matches": hit.matches,
1645                "snippets": [hit.text.clone()],
1646            })
1647        })
1648        .collect();
1649    println!("{}", serde_json::to_string_pretty(&hits)?);
1650    Ok(())
1651}
1652
1653fn emit_search_table(response: &SearchResponse) {
1654    if response.hits.is_empty() {
1655        println!("No results for '{}'.", response.query);
1656        return;
1657    }
1658    for hit in &response.hits {
1659        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
1660        if let Some(title) = &hit.title {
1661            println!("  Title: {title}");
1662        }
1663        if let Some(score) = hit.score {
1664            println!("  Score: {score:.3}");
1665        }
1666        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
1667        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
1668            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
1669        }
1670        if let Some(chunk_text) = &hit.chunk_text {
1671            println!("  Chunk Text: {}", chunk_text.trim());
1672        }
1673        if let Some(metadata) = &hit.metadata {
1674            if let Some(track) = &metadata.track {
1675                println!("  Track: {track}");
1676            }
1677            if !metadata.tags.is_empty() {
1678                println!("  Tags: {}", metadata.tags.join(", "));
1679            }
1680            if !metadata.labels.is_empty() {
1681                println!("  Labels: {}", metadata.labels.join(", "));
1682            }
1683            if let Some(created_at) = &metadata.created_at {
1684                println!("  Created: {created_at}");
1685            }
1686            if !metadata.content_dates.is_empty() {
1687                println!("  Content Dates: {}", metadata.content_dates.join(", "));
1688            }
1689        }
1690        println!("  Snippet: {}", hit.text.trim());
1691        println!();
1692    }
1693    if let Some(cursor) = &response.next_cursor {
1694        println!("Next cursor: {cursor}");
1695    }
1696}
1697
1698fn ask_mode_display(mode: AskModeArg) -> &'static str {
1699    match mode {
1700        AskModeArg::Lex => "lex",
1701        AskModeArg::Sem => "sem",
1702        AskModeArg::Hybrid => "hybrid",
1703    }
1704}
1705
1706fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
1707    match mode {
1708        AskModeArg::Lex => "Lexical",
1709        AskModeArg::Sem => "Semantic",
1710        AskModeArg::Hybrid => "Hybrid",
1711    }
1712}
1713
1714fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
1715    match retriever {
1716        AskRetriever::Lex => "lex",
1717        AskRetriever::Semantic => "semantic",
1718        AskRetriever::Hybrid => "hybrid",
1719        AskRetriever::LexFallback => "lex_fallback",
1720        AskRetriever::TimelineFallback => "timeline_fallback",
1721    }
1722}
1723
1724fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
1725    match retriever {
1726        AskRetriever::Lex => "Lexical",
1727        AskRetriever::Semantic => "Semantic",
1728        AskRetriever::Hybrid => "Hybrid",
1729        AskRetriever::LexFallback => "Lexical (fallback)",
1730        AskRetriever::TimelineFallback => "Timeline (fallback)",
1731    }
1732}
1733
1734fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
1735    match engine {
1736        SearchEngineKind::Tantivy => "text (tantivy)",
1737        SearchEngineKind::LexFallback => "text (fallback)",
1738        SearchEngineKind::Hybrid => "hybrid",
1739    }
1740}
1741
1742fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
1743    let digest = hash(uri.as_bytes()).to_hex().to_string();
1744    let prefix_len = digest.len().min(12);
1745    let prefix = &digest[..prefix_len];
1746    format!("mv2-hit-{prefix}-{frame_id}-{start}")
1747}
1748
1749fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
1750    if text.chars().count() <= limit {
1751        return text.to_string();
1752    }
1753
1754    let truncated: String = text.chars().take(limit).collect();
1755    format!("{truncated}...")
1756}
1757
1758fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
1759    let mut hit_json = serde_json::Map::new();
1760    hit_json.insert("rank".into(), json!(hit.rank));
1761    if let Some(score) = hit.score {
1762        hit_json.insert("score".into(), json!(score));
1763    }
1764    hit_json.insert(
1765        "id".into(),
1766        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
1767    );
1768    hit_json.insert("frame_id".into(), json!(hit.frame_id));
1769    hit_json.insert("uri".into(), json!(hit.uri));
1770    if let Some(title) = &hit.title {
1771        hit_json.insert("title".into(), json!(title));
1772    }
1773    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
1774    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
1775    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
1776    hit_json.insert("text".into(), json!(hit.text));
1777
1778    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
1779        matches: hit.matches,
1780        ..SearchHitMetadata::default()
1781    });
1782    let mut meta_json = serde_json::Map::new();
1783    meta_json.insert("matches".into(), json!(metadata.matches));
1784    if !metadata.tags.is_empty() {
1785        meta_json.insert("tags".into(), json!(metadata.tags));
1786    }
1787    if !metadata.labels.is_empty() {
1788        meta_json.insert("labels".into(), json!(metadata.labels));
1789    }
1790    if let Some(track) = metadata.track {
1791        meta_json.insert("track".into(), json!(track));
1792    }
1793    if let Some(created_at) = metadata.created_at {
1794        meta_json.insert("created_at".into(), json!(created_at));
1795    }
1796    if !metadata.content_dates.is_empty() {
1797        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
1798    }
1799    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
1800    serde_json::Value::Object(hit_json)
1801}
1802/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
1803///
1804/// RRF is mathematically superior to raw score combination because:
1805/// - BM25 scores are unbounded (0 to infinity)
1806/// - Cosine similarity is bounded (-1 to 1)
1807/// - RRF normalizes by using only RANKS, not raw scores
1808///
1809/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
1810fn apply_semantic_rerank(
1811    runtime: &EmbeddingRuntime,
1812    mem: &mut Memvid,
1813    response: &mut SearchResponse,
1814) -> Result<()> {
1815    if response.hits.is_empty() {
1816        return Ok(());
1817    }
1818
1819    let query_embedding = runtime.embed(&response.query)?;
1820    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
1821    for hit in &response.hits {
1822        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
1823            if embedding.len() == runtime.dimension() {
1824                let score = cosine_similarity(&query_embedding, &embedding);
1825                semantic_scores.insert(hit.frame_id, score);
1826            }
1827        }
1828    }
1829
1830    if semantic_scores.is_empty() {
1831        return Ok(());
1832    }
1833
1834    // Sort by semantic score to get semantic ranks
1835    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
1836        .iter()
1837        .map(|(frame_id, score)| (*frame_id, *score))
1838        .collect();
1839    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
1840
1841    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
1842    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
1843        semantic_rank.insert(*frame_id, idx + 1);
1844    }
1845
1846    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
1847    let query_lower = response.query.to_lowercase();
1848    let is_preference_query = query_lower.contains("suggest")
1849        || query_lower.contains("recommend")
1850        || query_lower.contains("should i")
1851        || query_lower.contains("what should")
1852        || query_lower.contains("prefer")
1853        || query_lower.contains("favorite")
1854        || query_lower.contains("best for me");
1855
1856    // Pure RRF: Use ONLY ranks, NOT raw scores
1857    // This prevents a "confidently wrong" high-scoring vector from burying
1858    // a "precisely correct" keyword match
1859    const RRF_K: f32 = 60.0;
1860
1861    let mut ordering: Vec<(usize, f32, usize)> = response
1862        .hits
1863        .iter()
1864        .enumerate()
1865        .map(|(idx, hit)| {
1866            let lexical_rank = hit.rank;
1867
1868            // RRF score for lexical rank
1869            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
1870
1871            // RRF score for semantic rank
1872            let semantic_rrf = semantic_rank
1873                .get(&hit.frame_id)
1874                .map(|rank| 1.0 / (RRF_K + *rank as f32))
1875                .unwrap_or(0.0);
1876
1877            // Apply preference boost for hits containing user preference signals
1878            // This is a small bonus for content with first-person preference indicators
1879            let preference_boost = if is_preference_query {
1880                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
1881            } else {
1882                0.0
1883            };
1884
1885            // Pure RRF: Only rank-based scores, no raw similarity scores
1886            let combined = lexical_rrf + semantic_rrf + preference_boost;
1887            (idx, combined, lexical_rank)
1888        })
1889        .collect();
1890
1891    ordering.sort_by(|a, b| {
1892        b.1.partial_cmp(&a.1)
1893            .unwrap_or(Ordering::Equal)
1894            .then(a.2.cmp(&b.2))
1895    });
1896
1897    let mut reordered = Vec::with_capacity(response.hits.len());
1898    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
1899        let mut hit = response.hits[idx].clone();
1900        hit.rank = rank_idx + 1;
1901        reordered.push(hit);
1902    }
1903
1904    response.hits = reordered;
1905    Ok(())
1906}
1907
1908/// Rerank search results by boosting hits that contain user preference signals.
1909/// Only applies when the query appears to be seeking recommendations or preferences.
1910fn apply_preference_rerank(response: &mut SearchResponse) {
1911    if response.hits.is_empty() {
1912        return;
1913    }
1914
1915    // Check if query is preference-seeking
1916    let query_lower = response.query.to_lowercase();
1917    let is_preference_query = query_lower.contains("suggest")
1918        || query_lower.contains("recommend")
1919        || query_lower.contains("should i")
1920        || query_lower.contains("what should")
1921        || query_lower.contains("prefer")
1922        || query_lower.contains("favorite")
1923        || query_lower.contains("best for me");
1924
1925    if !is_preference_query {
1926        return;
1927    }
1928
1929    // Compute boost scores for each hit
1930    let mut scored: Vec<(usize, f32, f32)> = response
1931        .hits
1932        .iter()
1933        .enumerate()
1934        .map(|(idx, hit)| {
1935            let original_score = hit.score.unwrap_or(0.0);
1936            let preference_boost = compute_preference_boost(&hit.text);
1937            let boosted_score = original_score + preference_boost;
1938            (idx, boosted_score, original_score)
1939        })
1940        .collect();
1941
1942    // Sort by boosted score (descending)
1943    scored.sort_by(|a, b| {
1944        b.1.partial_cmp(&a.1)
1945            .unwrap_or(Ordering::Equal)
1946            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
1947    });
1948
1949    // Reorder hits
1950    let mut reordered = Vec::with_capacity(response.hits.len());
1951    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
1952        let mut hit = response.hits[idx].clone();
1953        hit.rank = rank_idx + 1;
1954        reordered.push(hit);
1955    }
1956
1957    response.hits = reordered;
1958}
1959
1960/// Compute a boost score for hits that contain user preference signals.
1961/// This helps surface context where users express their preferences,
1962/// habits, or personal information that's relevant to recommendation queries.
1963///
1964/// Key insight: We want to distinguish content where the user describes
1965/// their ESTABLISHED situation/preferences (high boost) from content where
1966/// the user is making a REQUEST (low boost). Both use first-person language,
1967/// but they serve different purposes for personalization.
1968fn compute_preference_boost(text: &str) -> f32 {
1969    let text_lower = text.to_lowercase();
1970    let mut boost = 0.0f32;
1971
1972    // Strong signals: Past/present user experiences and possessions
1973    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
1974    let established_context = [
1975        // Past tense - indicates actual experience
1976        "i've been",
1977        "i've had",
1978        "i've used",
1979        "i've tried",
1980        "i recently",
1981        "i just",
1982        "lately",
1983        "i started",
1984        "i bought",
1985        "i harvested",
1986        "i grew",
1987        // Current possessions/ownership (indicates established context)
1988        "my garden",
1989        "my home",
1990        "my house",
1991        "my setup",
1992        "my equipment",
1993        "my camera",
1994        "my car",
1995        "my phone",
1996        "i have a",
1997        "i own",
1998        "i got a",
1999        // Established habits/preferences
2000        "i prefer",
2001        "i like to",
2002        "i love to",
2003        "i enjoy",
2004        "i usually",
2005        "i always",
2006        "i typically",
2007        "my favorite",
2008        "i tend to",
2009        "i often",
2010        // Regular activities (indicates ongoing behavior)
2011        "i use",
2012        "i grow",
2013        "i cook",
2014        "i make",
2015        "i work on",
2016        "i'm into",
2017        "i collect",
2018    ];
2019    for pattern in established_context {
2020        if text_lower.contains(pattern) {
2021            boost += 0.15;
2022        }
2023    }
2024
2025    // Moderate signals: General first-person statements
2026    let first_person = [" i ", " my ", " me "];
2027    for pattern in first_person {
2028        if text_lower.contains(pattern) {
2029            boost += 0.02;
2030        }
2031    }
2032
2033    // Weak signals: Requests/intentions (not yet established preferences)
2034    // These indicate the user wants something, but don't describe established context
2035    let request_patterns = [
2036        "i'm trying to",
2037        "i want to",
2038        "i need to",
2039        "looking for",
2040        "can you suggest",
2041        "can you help",
2042    ];
2043    for pattern in request_patterns {
2044        if text_lower.contains(pattern) {
2045            boost += 0.02;
2046        }
2047    }
2048
2049    // Cap the boost to avoid over-weighting
2050    boost.min(0.5)
2051}
2052
2053fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
2054    let mut dot = 0.0f32;
2055    let mut sum_a = 0.0f32;
2056    let mut sum_b = 0.0f32;
2057    for (x, y) in a.iter().zip(b.iter()) {
2058        dot += x * y;
2059        sum_a += x * x;
2060        sum_b += y * y;
2061    }
2062
2063    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
2064        0.0
2065    } else {
2066        dot / (sum_a.sqrt() * sum_b.sqrt())
2067    }
2068}
2069
2070/// Apply cross-encoder reranking to search results.
2071///
2072/// Cross-encoders directly score query-document pairs and can understand
2073/// more nuanced relevance than bi-encoders (embeddings). This is especially
2074/// useful for personalization queries where semantic similarity != relevance.
2075///
2076/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
2077fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
2078    if response.hits.is_empty() || response.hits.len() < 2 {
2079        return Ok(());
2080    }
2081
2082    // Only rerank if we have enough candidates
2083    let candidates_to_rerank = response.hits.len().min(50);
2084
2085    // Initialize the reranker (model will be downloaded on first use, ~86MB)
2086    // Using JINA Turbo - faster than BGE while maintaining good accuracy
2087    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
2088        .with_show_download_progress(true);
2089
2090    let mut reranker = match TextRerank::try_new(options) {
2091        Ok(r) => r,
2092        Err(e) => {
2093            warn!("Failed to initialize cross-encoder reranker: {e}");
2094            return Ok(());
2095        }
2096    };
2097
2098    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
2099    let documents: Vec<String> = response.hits[..candidates_to_rerank]
2100        .iter()
2101        .map(|hit| hit.text.clone())
2102        .collect();
2103
2104    // Rerank using cross-encoder
2105    info!("Cross-encoder reranking {} candidates", documents.len());
2106    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
2107        Ok(results) => results,
2108        Err(e) => {
2109            warn!("Cross-encoder reranking failed: {e}");
2110            return Ok(());
2111        }
2112    };
2113
2114    // Reorder hits based on cross-encoder scores
2115    let mut reordered = Vec::with_capacity(response.hits.len());
2116    for (new_rank, result) in rerank_results.iter().enumerate() {
2117        let original_idx = result.index;
2118        let mut hit = response.hits[original_idx].clone();
2119        hit.rank = new_rank + 1;
2120        // Store cross-encoder score in the hit score for reference
2121        hit.score = Some(result.score);
2122        reordered.push(hit);
2123    }
2124
2125    // Add any remaining hits that weren't reranked (beyond top-50)
2126    for hit in response.hits.iter().skip(candidates_to_rerank) {
2127        let mut h = hit.clone();
2128        h.rank = reordered.len() + 1;
2129        reordered.push(h);
2130    }
2131
2132    response.hits = reordered;
2133    info!("Cross-encoder reranking complete");
2134    Ok(())
2135}
2136
2137/// Build a context string from memory cards stored in the MV2 file.
2138/// Groups facts by entity for better LLM comprehension.
2139fn build_memory_context(mem: &Memvid) -> String {
2140    let entities = mem.memory_entities();
2141    if entities.is_empty() {
2142        return String::new();
2143    }
2144
2145    let mut sections = Vec::new();
2146    for entity in entities {
2147        let cards = mem.get_entity_memories(&entity);
2148        if cards.is_empty() {
2149            continue;
2150        }
2151
2152        let mut entity_lines = Vec::new();
2153        for card in cards {
2154            // Format: "slot: value" with optional polarity indicator
2155            let polarity_marker = card
2156                .polarity
2157                .as_ref()
2158                .map(|p| match p.to_string().as_str() {
2159                    "Positive" => " (+)",
2160                    "Negative" => " (-)",
2161                    _ => "",
2162                })
2163                .unwrap_or("");
2164            entity_lines.push(format!(
2165                "  - {}: {}{}",
2166                card.slot, card.value, polarity_marker
2167            ));
2168        }
2169
2170        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
2171    }
2172
2173    sections.join("\n\n")
2174}
memvid_cli/commands/search.rs

memvid_cli/commands/
search.rs