memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use colored_json::ToColoredJson;
15use blake3::hash;
16use clap::{ArgAction, Args, ValueEnum};
17#[cfg(feature = "temporal_track")]
18use memvid_core::{
19    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
20    TemporalResolution, TemporalResolutionValue,
21};
22use memvid_core::{
23    types::{AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy, SearchHitMetadata},
24    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind, SearchHit,
25    SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
26};
27#[cfg(feature = "temporal_track")]
28use serde::Serialize;
29use serde_json::json;
30#[cfg(feature = "temporal_track")]
31use time::format_description::well_known::Rfc3339;
32use time::{Date, PrimitiveDateTime, Time};
33#[cfg(feature = "temporal_track")]
34use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
35use tracing::{info, warn};
36
37use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
38
39use memvid_ask_model::{
40    run_model_inference, ModelAnswer, ModelContextFragment, ModelContextFragmentKind,
41    ModelInference,
42};
43
44// frame_to_json and print_frame_summary available from commands but not used in this module
45use crate::config::{
46    load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
47    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingModelChoice, EmbeddingRuntime,
48};
49use crate::utils::{
50    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
51    parse_date_boundary, parse_vector, read_embedding,
52};
53
54const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
55#[cfg(feature = "temporal_track")]
56const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
57
58fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
59    let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
60    message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
61    if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
62        message.push_str(&format!(
63            "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
64            model.name(),
65            model.name()
66        ));
67        if model.is_openai() {
68            message.push_str(" (and set `OPENAI_API_KEY`).");
69        } else {
70            message.push('.');
71        }
72        message.push_str(&format!(
73            "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
74            model.name()
75        ));
76        message.push_str(&format!(
77            "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
78        ));
79        message.push_str("\nOr use `--mode lex` to disable semantic search.");
80    }
81    message
82}
83
84/// Arguments for the `timeline` subcommand
85#[derive(Args)]
86pub struct TimelineArgs {
87    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
88    pub file: PathBuf,
89    #[arg(long)]
90    pub json: bool,
91    #[arg(long)]
92    pub reverse: bool,
93    #[arg(long, value_name = "LIMIT")]
94    pub limit: Option<NonZeroU64>,
95    #[arg(long, value_name = "TIMESTAMP")]
96    pub since: Option<i64>,
97    #[arg(long, value_name = "TIMESTAMP")]
98    pub until: Option<i64>,
99    #[cfg(feature = "temporal_track")]
100    #[arg(long = "on", value_name = "PHRASE")]
101    pub phrase: Option<String>,
102    #[cfg(feature = "temporal_track")]
103    #[arg(long = "tz", value_name = "IANA_ZONE")]
104    pub tz: Option<String>,
105    #[cfg(feature = "temporal_track")]
106    #[arg(long = "anchor", value_name = "RFC3339")]
107    pub anchor: Option<String>,
108    #[cfg(feature = "temporal_track")]
109    #[arg(long = "window", value_name = "MINUTES")]
110    pub window: Option<u64>,
111    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
112    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
113    pub as_of_frame: Option<u64>,
114    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
115    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
116    pub as_of_ts: Option<i64>,
117}
118
119/// Arguments for the `when` subcommand
120#[cfg(feature = "temporal_track")]
121#[derive(Args)]
122pub struct WhenArgs {
123    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
124    pub file: PathBuf,
125    #[arg(long = "on", value_name = "PHRASE")]
126    pub phrase: String,
127    #[arg(long = "tz", value_name = "IANA_ZONE")]
128    pub tz: Option<String>,
129    #[arg(long = "anchor", value_name = "RFC3339")]
130    pub anchor: Option<String>,
131    #[arg(long = "window", value_name = "MINUTES")]
132    pub window: Option<u64>,
133    #[arg(long, value_name = "LIMIT")]
134    pub limit: Option<NonZeroU64>,
135    #[arg(long, value_name = "TIMESTAMP")]
136    pub since: Option<i64>,
137    #[arg(long, value_name = "TIMESTAMP")]
138    pub until: Option<i64>,
139    #[arg(long)]
140    pub reverse: bool,
141    #[arg(long)]
142    pub json: bool,
143}
144
145/// Arguments for the `ask` subcommand
146#[derive(Args)]
147pub struct AskArgs {
148    #[arg(value_name = "TARGET", num_args = 0..)]
149    pub targets: Vec<String>,
150    #[arg(long = "question", value_name = "TEXT")]
151    pub question: Option<String>,
152    #[arg(long = "uri", value_name = "URI")]
153    pub uri: Option<String>,
154    #[arg(long = "scope", value_name = "URI_PREFIX")]
155    pub scope: Option<String>,
156    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
157    pub top_k: usize,
158    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
159    pub snippet_chars: usize,
160    #[arg(long = "cursor", value_name = "TOKEN")]
161    pub cursor: Option<String>,
162    #[arg(long = "mode", value_enum, default_value = "hybrid")]
163    pub mode: AskModeArg,
164    #[arg(long)]
165    pub json: bool,
166    #[arg(long = "context-only", action = ArgAction::SetTrue)]
167    pub context_only: bool,
168    /// Show detailed source information for each citation
169    #[arg(long = "sources", action = ArgAction::SetTrue)]
170    pub sources: bool,
171    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
172    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
173    pub mask_pii: bool,
174    /// Include structured memory cards in the context (facts, preferences, etc.)
175    #[arg(long = "memories", action = ArgAction::SetTrue)]
176    pub memories: bool,
177    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
178    #[arg(long = "llm-context-depth", value_name = "CHARS")]
179    pub llm_context_depth: Option<usize>,
180    #[arg(long = "start", value_name = "DATE")]
181    pub start: Option<String>,
182    #[arg(long = "end", value_name = "DATE")]
183    pub end: Option<String>,
184    /// Synthesize an answer with an LLM (defaults to tinyllama when provided without a value).
185    ///
186    /// Examples:
187    /// - `--use-model` (local TinyLlama)
188    /// - `--use-model openai` (defaults to gpt-4o-mini; requires OPENAI_API_KEY)
189    /// - `--use-model nvidia` (defaults to meta/llama3-8b-instruct; requires NVIDIA_API_KEY)
190    /// - `--use-model nvidia:meta/llama3-70b-instruct`
191    #[arg(
192        long = "use-model",
193        value_name = "MODEL",
194        num_args = 0..=1,
195        default_missing_value = "tinyllama"
196    )]
197    pub use_model: Option<String>,
198    /// Embedding model to use for query (must match the model used during ingestion)
199    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
200    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
201    pub query_embedding_model: Option<String>,
202    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
203    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
204    pub as_of_frame: Option<u64>,
205    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
206    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
207    pub as_of_ts: Option<i64>,
208    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
209    #[arg(long = "system-prompt", value_name = "TEXT")]
210    pub system_prompt: Option<String>,
211    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
212    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
213    pub no_rerank: bool,
214
215    // Adaptive retrieval options (enabled by default for best results)
216    /// Disable adaptive retrieval and use fixed top-k instead.
217    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
218    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
219    pub no_adaptive: bool,
220    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
221    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
222    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
223    pub min_relevancy: f32,
224    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
225    /// Set high enough to capture all potentially relevant results.
226    #[arg(long = "max-k", value_name = "K", default_value = "100")]
227    pub max_k: usize,
228    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
229    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
230    pub adaptive_strategy: AdaptiveStrategyArg,
231}
232
233/// Ask mode argument
234#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
235pub enum AskModeArg {
236    Lex,
237    Sem,
238    Hybrid,
239}
240
241impl From<AskModeArg> for AskMode {
242    fn from(value: AskModeArg) -> Self {
243        match value {
244            AskModeArg::Lex => AskMode::Lex,
245            AskModeArg::Sem => AskMode::Sem,
246            AskModeArg::Hybrid => AskMode::Hybrid,
247        }
248    }
249}
250
251/// Arguments for the `find` subcommand
252#[derive(Args)]
253pub struct FindArgs {
254    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
255    pub file: PathBuf,
256    #[arg(long = "query", value_name = "TEXT")]
257    pub query: String,
258    #[arg(long = "uri", value_name = "URI")]
259    pub uri: Option<String>,
260    #[arg(long = "scope", value_name = "URI_PREFIX")]
261    pub scope: Option<String>,
262    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
263    pub top_k: usize,
264    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
265    pub snippet_chars: usize,
266    #[arg(long = "cursor", value_name = "TOKEN")]
267    pub cursor: Option<String>,
268    #[arg(long)]
269    pub json: bool,
270    #[arg(long = "json-legacy", conflicts_with = "json")]
271    pub json_legacy: bool,
272    #[arg(long = "mode", value_enum, default_value = "auto")]
273    pub mode: SearchMode,
274    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
275    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
276    pub as_of_frame: Option<u64>,
277    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
278    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
279    pub as_of_ts: Option<i64>,
280    /// Embedding model to use for query (must match the model used during ingestion)
281    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
282    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
283    pub query_embedding_model: Option<String>,
284
285    // Adaptive retrieval options (enabled by default for best results)
286    /// Disable adaptive retrieval and use fixed top-k instead.
287    /// By default, adaptive retrieval is enabled with the 'combined' strategy.
288    #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
289    pub no_adaptive: bool,
290    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
291    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
292    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
293    pub min_relevancy: f32,
294    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
295    /// Set high enough to capture all potentially relevant results.
296    #[arg(long = "max-k", value_name = "K", default_value = "100")]
297    pub max_k: usize,
298    /// Adaptive cutoff strategy: combined (default), relative, absolute, cliff, or elbow
299    #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
300    pub adaptive_strategy: AdaptiveStrategyArg,
301
302    /// Enable graph-aware search: filter by entity relationships before ranking.
303    /// Uses MemoryCards to find entities matching patterns like "who lives in X".
304    #[arg(long = "graph", action = ArgAction::SetTrue)]
305    pub graph: bool,
306
307    /// Enable hybrid search: combine graph filtering with text search.
308    /// Automatically detects relational patterns in the query.
309    #[arg(long = "hybrid", action = ArgAction::SetTrue)]
310    pub hybrid: bool,
311}
312
313/// Search mode argument
314#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
315pub enum SearchMode {
316    Auto,
317    Lex,
318    Sem,
319    /// CLIP visual search using text-to-image embeddings
320    #[cfg(feature = "clip")]
321    Clip,
322}
323
324/// Adaptive retrieval strategy
325#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
326pub enum AdaptiveStrategyArg {
327    /// Stop when score drops below X% of top score (default)
328    Relative,
329    /// Stop when score drops below fixed threshold
330    Absolute,
331    /// Stop when score drops sharply from previous result
332    Cliff,
333    /// Automatically detect "elbow" in score curve
334    Elbow,
335    /// Combine relative + cliff + absolute (recommended)
336    Combined,
337}
338
339/// Arguments for the `vec-search` subcommand
340#[derive(Args)]
341pub struct VecSearchArgs {
342    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
343    pub file: PathBuf,
344    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
345    pub vector: Option<String>,
346    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
347    pub embedding: Option<PathBuf>,
348    #[arg(long, value_name = "K", default_value = "10")]
349    pub limit: usize,
350    #[arg(long)]
351    pub json: bool,
352}
353
354/// Arguments for the `audit` subcommand
355#[derive(Args)]
356pub struct AuditArgs {
357    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
358    pub file: PathBuf,
359    /// The question or topic to audit
360    #[arg(value_name = "QUESTION")]
361    pub question: String,
362    /// Output file path (stdout if not provided)
363    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
364    pub out: Option<PathBuf>,
365    /// Output format
366    #[arg(long = "format", value_enum, default_value = "text")]
367    pub format: AuditFormat,
368    /// Number of sources to retrieve
369    #[arg(long = "top-k", value_name = "K", default_value = "10")]
370    pub top_k: usize,
371    /// Maximum characters per snippet
372    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
373    pub snippet_chars: usize,
374    /// Retrieval mode
375    #[arg(long = "mode", value_enum, default_value = "hybrid")]
376    pub mode: AskModeArg,
377    /// Optional scope filter (URI prefix)
378    #[arg(long = "scope", value_name = "URI_PREFIX")]
379    pub scope: Option<String>,
380    /// Start date filter
381    #[arg(long = "start", value_name = "DATE")]
382    pub start: Option<String>,
383    /// End date filter
384    #[arg(long = "end", value_name = "DATE")]
385    pub end: Option<String>,
386    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
387    #[arg(long = "use-model", value_name = "MODEL")]
388    pub use_model: Option<String>,
389}
390
391/// Audit output format
392#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
393pub enum AuditFormat {
394    /// Plain text report
395    Text,
396    /// Markdown report
397    Markdown,
398    /// JSON report
399    Json,
400}
401
402// ============================================================================
403// Search & Retrieval command handlers
404// ============================================================================
405
406pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
407    let mut mem = open_read_only_mem(&args.file)?;
408    let mut builder = TimelineQueryBuilder::default();
409    #[cfg(feature = "temporal_track")]
410    if args.phrase.is_none()
411        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
412    {
413        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
414    }
415    if let Some(limit) = args.limit {
416        builder = builder.limit(limit);
417    }
418    if let Some(since) = args.since {
419        builder = builder.since(since);
420    }
421    if let Some(until) = args.until {
422        builder = builder.until(until);
423    }
424    builder = builder.reverse(args.reverse);
425    #[cfg(feature = "temporal_track")]
426    let temporal_summary = if let Some(ref phrase) = args.phrase {
427        let (filter, summary) = build_temporal_filter(
428            phrase,
429            args.tz.as_deref(),
430            args.anchor.as_deref(),
431            args.window,
432        )?;
433        builder = builder.temporal(filter);
434        Some(summary)
435    } else {
436        None
437    };
438    let query = builder.build();
439    let mut entries = mem.timeline(query)?;
440
441    // Apply Replay filtering if requested
442    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
443        entries.retain(|entry| {
444            // Check as_of_frame filter
445            if let Some(cutoff_frame) = args.as_of_frame {
446                if entry.frame_id > cutoff_frame {
447                    return false;
448                }
449            }
450
451            // Check as_of_ts filter
452            if let Some(cutoff_ts) = args.as_of_ts {
453                if entry.timestamp > cutoff_ts {
454                    return false;
455                }
456            }
457
458            true
459        });
460    }
461
462    if args.json {
463        #[cfg(feature = "temporal_track")]
464        if let Some(summary) = temporal_summary.as_ref() {
465            println!(
466                "{}",
467                serde_json::to_string_pretty(&TimelineOutput {
468                    temporal: Some(summary_to_output(summary)),
469                    entries: &entries,
470                })?
471            );
472        } else {
473            println!("{}", serde_json::to_string_pretty(&entries)?);
474        }
475        #[cfg(not(feature = "temporal_track"))]
476        println!("{}", serde_json::to_string_pretty(&entries)?);
477    } else if entries.is_empty() {
478        println!("Timeline is empty");
479    } else {
480        #[cfg(feature = "temporal_track")]
481        if let Some(summary) = temporal_summary.as_ref() {
482            print_temporal_summary(summary);
483        }
484        for entry in entries {
485            println!(
486                "#{} @ {} — {}",
487                entry.frame_id,
488                entry.timestamp,
489                entry.preview.replace('\n', " ")
490            );
491            if let Some(uri) = entry.uri.as_deref() {
492                println!("  URI: {uri}");
493            }
494            if !entry.child_frames.is_empty() {
495                let child_list = entry
496                    .child_frames
497                    .iter()
498                    .map(|id| id.to_string())
499                    .collect::<Vec<_>>()
500                    .join(", ");
501                println!("  Child frames: {child_list}");
502            }
503            #[cfg(feature = "temporal_track")]
504            if let Some(temporal) = entry.temporal.as_ref() {
505                print_entry_temporal_details(temporal);
506            }
507        }
508    }
509    Ok(())
510}
511
512#[cfg(feature = "temporal_track")]
513pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
514    let mut mem = open_read_only_mem(&args.file)?;
515
516    let (filter, summary) = build_temporal_filter(
517        &args.phrase,
518        args.tz.as_deref(),
519        args.anchor.as_deref(),
520        args.window,
521    )?;
522
523    let mut builder = TimelineQueryBuilder::default();
524    if let Some(limit) = args.limit {
525        builder = builder.limit(limit);
526    }
527    if let Some(since) = args.since {
528        builder = builder.since(since);
529    }
530    if let Some(until) = args.until {
531        builder = builder.until(until);
532    }
533    builder = builder.reverse(args.reverse).temporal(filter.clone());
534    let entries = mem.timeline(builder.build())?;
535
536    if args.json {
537        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
538        let output = WhenOutput {
539            summary: summary_to_output(&summary),
540            entries: entry_views,
541        };
542        println!("{}", serde_json::to_string_pretty(&output)?);
543        return Ok(());
544    }
545
546    print_temporal_summary(&summary);
547    if entries.is_empty() {
548        println!("No frames matched the resolved window");
549        return Ok(());
550    }
551
552    for entry in &entries {
553        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
554        println!(
555            "#{} @ {} ({iso}) — {}",
556            entry.frame_id,
557            entry.timestamp,
558            entry.preview.replace('\n', " ")
559        );
560        if let Some(uri) = entry.uri.as_deref() {
561            println!("  URI: {uri}");
562        }
563        if !entry.child_frames.is_empty() {
564            let child_list = entry
565                .child_frames
566                .iter()
567                .map(|id| id.to_string())
568                .collect::<Vec<_>>()
569                .join(", ");
570            println!("  Child frames: {child_list}");
571        }
572        if let Some(temporal) = entry.temporal.as_ref() {
573            print_entry_temporal_details(temporal);
574        }
575    }
576
577    Ok(())
578}
579
580#[cfg(feature = "temporal_track")]
581#[derive(Serialize)]
582struct TimelineOutput<'a> {
583    #[serde(skip_serializing_if = "Option::is_none")]
584    temporal: Option<TemporalSummaryOutput>,
585    entries: &'a [TimelineEntry],
586}
587
588#[cfg(feature = "temporal_track")]
589#[derive(Serialize)]
590struct WhenOutput {
591    summary: TemporalSummaryOutput,
592    entries: Vec<WhenEntry>,
593}
594
595#[cfg(feature = "temporal_track")]
596#[derive(Serialize)]
597struct WhenEntry {
598    frame_id: FrameId,
599    timestamp: i64,
600    #[serde(skip_serializing_if = "Option::is_none")]
601    timestamp_iso: Option<String>,
602    preview: String,
603    #[serde(skip_serializing_if = "Option::is_none")]
604    uri: Option<String>,
605    #[serde(skip_serializing_if = "Vec::is_empty")]
606    child_frames: Vec<FrameId>,
607    #[serde(skip_serializing_if = "Option::is_none")]
608    temporal: Option<SearchHitTemporal>,
609}
610
611#[cfg(feature = "temporal_track")]
612#[derive(Serialize)]
613struct TemporalSummaryOutput {
614    phrase: String,
615    timezone: String,
616    anchor_utc: i64,
617    anchor_iso: String,
618    confidence: u16,
619    #[serde(skip_serializing_if = "Vec::is_empty")]
620    flags: Vec<&'static str>,
621    resolution_kind: &'static str,
622    window_start_utc: Option<i64>,
623    window_start_iso: Option<String>,
624    window_end_utc: Option<i64>,
625    window_end_iso: Option<String>,
626    #[serde(skip_serializing_if = "Option::is_none")]
627    window_minutes: Option<u64>,
628}
629
630#[cfg(feature = "temporal_track")]
631struct TemporalSummary {
632    phrase: String,
633    tz: String,
634    anchor: OffsetDateTime,
635    start_utc: Option<i64>,
636    end_utc: Option<i64>,
637    resolution: TemporalResolution,
638    window_minutes: Option<u64>,
639}
640
641#[cfg(feature = "temporal_track")]
642fn build_temporal_filter(
643    phrase: &str,
644    tz_override: Option<&str>,
645    anchor_override: Option<&str>,
646    window_minutes: Option<u64>,
647) -> Result<(TemporalFilter, TemporalSummary)> {
648    let tz = tz_override
649        .unwrap_or(DEFAULT_TEMPORAL_TZ)
650        .trim()
651        .to_string();
652    if tz.is_empty() {
653        bail!("E-TEMP-003 timezone must not be empty");
654    }
655
656    let anchor = if let Some(raw) = anchor_override {
657        OffsetDateTime::parse(raw, &Rfc3339)
658            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
659    } else {
660        OffsetDateTime::now_utc()
661    };
662
663    let context = TemporalContext::new(anchor, tz.clone());
664    let normalizer = TemporalNormalizer::new(context);
665    let resolution = normalizer
666        .resolve(phrase)
667        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
668
669    let (mut start, mut end) = resolution_bounds(&resolution)?;
670    if let Some(minutes) = window_minutes {
671        if minutes > 0 {
672            let delta = TimeDuration::minutes(minutes as i64);
673            if let (Some(s), Some(e)) = (start, end) {
674                if s == e {
675                    start = Some(s.saturating_sub(delta.whole_seconds()));
676                    end = Some(e.saturating_add(delta.whole_seconds()));
677                } else {
678                    start = Some(s.saturating_sub(delta.whole_seconds()));
679                    end = Some(e.saturating_add(delta.whole_seconds()));
680                }
681            }
682        }
683    }
684
685    let filter = TemporalFilter {
686        start_utc: start,
687        end_utc: end,
688        phrase: None,
689        tz: None,
690    };
691
692    let summary = TemporalSummary {
693        phrase: phrase.to_owned(),
694        tz,
695        anchor,
696        start_utc: start,
697        end_utc: end,
698        resolution,
699        window_minutes,
700    };
701
702    Ok((filter, summary))
703}
704
705#[cfg(feature = "temporal_track")]
706fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
707    TemporalSummaryOutput {
708        phrase: summary.phrase.clone(),
709        timezone: summary.tz.clone(),
710        anchor_utc: summary.anchor.unix_timestamp(),
711        anchor_iso: summary
712            .anchor
713            .format(&Rfc3339)
714            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
715        confidence: summary.resolution.confidence,
716        flags: summary
717            .resolution
718            .flags
719            .iter()
720            .map(|flag| flag.as_str())
721            .collect(),
722        resolution_kind: resolution_kind(&summary.resolution),
723        window_start_utc: summary.start_utc,
724        window_start_iso: summary.start_utc.and_then(format_timestamp),
725        window_end_utc: summary.end_utc,
726        window_end_iso: summary.end_utc.and_then(format_timestamp),
727        window_minutes: summary.window_minutes,
728    }
729}
730
731#[cfg(feature = "temporal_track")]
732fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
733    WhenEntry {
734        frame_id: entry.frame_id,
735        timestamp: entry.timestamp,
736        timestamp_iso: format_timestamp(entry.timestamp),
737        preview: entry.preview.clone(),
738        uri: entry.uri.clone(),
739        child_frames: entry.child_frames.clone(),
740        temporal: entry.temporal.clone(),
741    }
742}
743
744#[cfg(feature = "temporal_track")]
745fn print_temporal_summary(summary: &TemporalSummary) {
746    println!("Phrase: \"{}\"", summary.phrase);
747    println!("Timezone: {}", summary.tz);
748    println!(
749        "Anchor: {}",
750        summary
751            .anchor
752            .format(&Rfc3339)
753            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
754    );
755    let start_iso = summary.start_utc.and_then(format_timestamp);
756    let end_iso = summary.end_utc.and_then(format_timestamp);
757    match (start_iso, end_iso) {
758        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
759        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
760        (Some(start), None) => println!("Window start: {start}"),
761        (None, Some(end)) => println!("Window end: {end}"),
762        _ => println!("Window: (not resolved)"),
763    }
764    println!("Confidence: {}", summary.resolution.confidence);
765    let flags: Vec<&'static str> = summary
766        .resolution
767        .flags
768        .iter()
769        .map(|flag| flag.as_str())
770        .collect();
771    if !flags.is_empty() {
772        println!("Flags: {}", flags.join(", "));
773    }
774    if let Some(window) = summary.window_minutes {
775        if window > 0 {
776            println!("Window padding: {window} minute(s)");
777        }
778    }
779    println!();
780}
781
782#[cfg(feature = "temporal_track")]
783fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
784    if let Some(anchor) = temporal.anchor.as_ref() {
785        let iso = anchor
786            .iso_8601
787            .clone()
788            .or_else(|| format_timestamp(anchor.ts_utc));
789        println!(
790            "  Anchor: {} (source: {:?})",
791            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
792            anchor.source
793        );
794    }
795    if !temporal.mentions.is_empty() {
796        println!("  Mentions:");
797        for mention in &temporal.mentions {
798            let iso = mention
799                .iso_8601
800                .clone()
801                .or_else(|| format_timestamp(mention.ts_utc))
802                .unwrap_or_else(|| mention.ts_utc.to_string());
803            let mut details = format!(
804                "    - {} ({:?}, confidence {})",
805                iso, mention.kind, mention.confidence
806            );
807            if let Some(text) = mention.text.as_deref() {
808                details.push_str(&format!(" — \"{}\"", text));
809            }
810            println!("{details}");
811        }
812    }
813}
814
815#[cfg(feature = "temporal_track")]
816fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
817    match &resolution.value {
818        TemporalResolutionValue::Date(date) => {
819            let ts = date_to_timestamp(*date);
820            Ok((Some(ts), Some(ts)))
821        }
822        TemporalResolutionValue::DateTime(dt) => {
823            let ts = dt.unix_timestamp();
824            Ok((Some(ts), Some(ts)))
825        }
826        TemporalResolutionValue::DateRange { start, end } => Ok((
827            Some(date_to_timestamp(*start)),
828            Some(date_to_timestamp(*end)),
829        )),
830        TemporalResolutionValue::DateTimeRange { start, end } => {
831            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
832        }
833        TemporalResolutionValue::Month { year, month } => {
834            let start_date = Date::from_calendar_date(*year, *month, 1)
835                .map_err(|_| anyhow!("invalid month resolution"))?;
836            let end_date = last_day_in_month(*year, *month)
837                .map_err(|_| anyhow!("invalid month resolution"))?;
838            Ok((
839                Some(date_to_timestamp(start_date)),
840                Some(date_to_timestamp(end_date)),
841            ))
842        }
843    }
844}
845
846#[cfg(feature = "temporal_track")]
847fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
848    match resolution.value {
849        TemporalResolutionValue::Date(_) => "date",
850        TemporalResolutionValue::DateTime(_) => "datetime",
851        TemporalResolutionValue::DateRange { .. } => "date_range",
852        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
853        TemporalResolutionValue::Month { .. } => "month",
854    }
855}
856
857#[cfg(feature = "temporal_track")]
858fn date_to_timestamp(date: Date) -> i64 {
859    PrimitiveDateTime::new(date, Time::MIDNIGHT)
860        .assume_offset(UtcOffset::UTC)
861        .unix_timestamp()
862}
863
864#[cfg(feature = "temporal_track")]
865fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
866    let mut date = Date::from_calendar_date(year, month, 1)
867        .map_err(|_| anyhow!("invalid month resolution"))?;
868    while let Some(next) = date.next_day() {
869        if next.month() == month {
870            date = next;
871        } else {
872            break;
873        }
874    }
875    Ok(date)
876}
877
878#[cfg(feature = "temporal_track")]
879
880fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
881    if fragments.is_empty() {
882        return;
883    }
884
885    response.context_fragments = fragments
886        .into_iter()
887        .map(|fragment| AskContextFragment {
888            rank: fragment.rank,
889            frame_id: fragment.frame_id,
890            uri: fragment.uri,
891            title: fragment.title,
892            score: fragment.score,
893            matches: fragment.matches,
894            range: Some(fragment.range),
895            chunk_range: fragment.chunk_range,
896            text: fragment.text,
897            kind: Some(match fragment.kind {
898                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
899                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
900            }),
901            #[cfg(feature = "temporal_track")]
902            temporal: None,
903        })
904        .collect();
905}
906
907pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
908    // Check if plan allows query operations (blocks expired subscriptions)
909    crate::utils::require_active_plan(config, "ask")?;
910
911    if args.uri.is_some() && args.scope.is_some() {
912        warn!("--scope ignored because --uri is provided");
913    }
914
915    let mut question_tokens = Vec::new();
916    let mut file_path: Option<PathBuf> = None;
917    for token in &args.targets {
918        if file_path.is_none() && looks_like_memory(token) {
919            file_path = Some(PathBuf::from(token));
920        } else {
921            question_tokens.push(token.clone());
922        }
923    }
924
925    let positional_question = if question_tokens.is_empty() {
926        None
927    } else {
928        Some(question_tokens.join(" "))
929    };
930
931    let question = args
932        .question
933        .or(positional_question)
934        .map(|value| value.trim().to_string())
935        .filter(|value| !value.is_empty());
936
937    let question = question
938        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
939
940    let memory_path = match file_path {
941        Some(path) => path,
942        None => autodetect_memory_file()?,
943    };
944
945    let start = parse_date_boundary(args.start.as_ref(), false)?;
946    let end = parse_date_boundary(args.end.as_ref(), true)?;
947    if let (Some(start_ts), Some(end_ts)) = (start, end) {
948        if end_ts < start_ts {
949            anyhow::bail!("--end must not be earlier than --start");
950        }
951    }
952
953    // Open MV2 file first to get vector dimension for auto-detection
954    let mut mem = Memvid::open(&memory_path)?;
955
956    // Load active replay session if one exists
957    #[cfg(feature = "replay")]
958    let _ = mem.load_active_session();
959
960    // Get the vector dimension from the MV2 file for auto-detection
961    let mv2_dimension = mem.effective_vec_index_dimension()?;
962
963    let ask_mode: AskMode = args.mode.into();
964    let inferred_model_override = match args.mode {
965        AskModeArg::Lex => None,
966        AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
967            memvid_core::EmbeddingIdentitySummary::Single(identity) => identity.model.map(String::from),
968            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
969                let models: Vec<_> = identities
970                    .iter()
971                    .filter_map(|entry| entry.identity.model.as_deref())
972                    .collect();
973                anyhow::bail!(
974                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
975                    Detected models: {:?}\n\n\
976                    Suggested fix: split into separate memories per embedding model.",
977                    models
978                );
979            }
980            memvid_core::EmbeddingIdentitySummary::Unknown => None,
981        },
982    };
983    let emb_model_override = args
984        .query_embedding_model
985        .as_deref()
986        .or(inferred_model_override.as_deref());
987    let runtime = match args.mode {
988        AskModeArg::Lex => None,
989        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
990            config,
991            emb_model_override,
992            mv2_dimension,
993        )?),
994        AskModeArg::Hybrid => {
995            // For hybrid, use auto-detection from MV2 dimension
996            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
997                || {
998                    // Force a load; if it fails we error below.
999                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1000                        .ok()
1001                        .map(|rt| {
1002                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1003                            rt
1004                        })
1005                },
1006            )
1007        }
1008    };
1009    if runtime.is_none() && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1010        anyhow::bail!(
1011            "semantic embeddings unavailable; install/cached model required for {:?} mode",
1012            args.mode
1013        );
1014    }
1015
1016    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1017
1018    // Build adaptive config (enabled by default, use --no-adaptive to disable)
1019    let adaptive = if !args.no_adaptive {
1020        Some(AdaptiveConfig {
1021            enabled: true,
1022            max_results: args.max_k,
1023            min_results: 1,
1024            normalize_scores: true,
1025            strategy: match args.adaptive_strategy {
1026                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1027                    min_ratio: args.min_relevancy,
1028                },
1029                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1030                    min_score: args.min_relevancy,
1031                },
1032                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff { max_drop_ratio: 0.3 },
1033                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1034                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1035                    relative_threshold: args.min_relevancy,
1036                    max_drop_ratio: 0.3,
1037                    absolute_min: 0.3,
1038                },
1039            },
1040        })
1041    } else {
1042        None
1043    };
1044
1045    let request = AskRequest {
1046        question,
1047        top_k: args.top_k,
1048        snippet_chars: args.snippet_chars,
1049        uri: args.uri.clone(),
1050        scope: args.scope.clone(),
1051        cursor: args.cursor.clone(),
1052        start,
1053        end,
1054        #[cfg(feature = "temporal_track")]
1055        temporal: None,
1056        context_only: args.context_only,
1057        mode: ask_mode,
1058        as_of_frame: args.as_of_frame,
1059        as_of_ts: args.as_of_ts,
1060        adaptive,
1061    };
1062    let mut response = mem.ask(request, embedder).map_err(|err| match err {
1063        MemvidError::VecDimensionMismatch { expected, actual } => anyhow!(vec_dimension_mismatch_help(expected, actual)),
1064        other => anyhow!(other),
1065    })?;
1066
1067    // Apply cross-encoder reranking for better precision on preference/personalization queries
1068    // This is especially important for questions like "What should I..." where semantic
1069    // similarity doesn't capture personal relevance well.
1070    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
1071    if !args.no_rerank
1072        && !response.retrieval.hits.is_empty()
1073        && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1074    {
1075        // Create a temporary SearchResponse for reranking
1076        let mut search_response = SearchResponse {
1077            query: response.question.clone(),
1078            hits: response.retrieval.hits.clone(),
1079            total_hits: response.retrieval.hits.len(),
1080            params: memvid_core::SearchParams {
1081                top_k: args.top_k,
1082                snippet_chars: args.snippet_chars,
1083                cursor: None,
1084            },
1085            elapsed_ms: 0,
1086            engine: memvid_core::SearchEngineKind::Hybrid,
1087            next_cursor: None,
1088            context: String::new(),
1089        };
1090
1091        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1092            warn!("Cross-encoder reranking failed: {e}");
1093        } else {
1094            // Update the response hits with reranked order
1095            response.retrieval.hits = search_response.hits;
1096            // Rebuild context from reranked hits
1097            response.retrieval.context = response
1098                .retrieval
1099                .hits
1100                .iter()
1101                .take(10) // Use top-10 for context
1102                .map(|hit| hit.text.as_str())
1103                .collect::<Vec<_>>()
1104                .join("\n\n---\n\n");
1105        }
1106    }
1107
1108    // Inject memory cards into context if --memories flag is set
1109    if args.memories {
1110        let memory_context = build_memory_context(&mem);
1111        if !memory_context.is_empty() {
1112            // Prepend memory context to retrieval context
1113            response.retrieval.context = format!(
1114                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1115                memory_context, response.retrieval.context
1116            );
1117        }
1118    }
1119
1120    // Inject entity context from Logic-Mesh if entities were found in search hits
1121    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1122    if !entity_context.is_empty() {
1123        // Prepend entity context to retrieval context
1124        response.retrieval.context = format!(
1125            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1126            entity_context, response.retrieval.context
1127        );
1128    }
1129
1130    // Apply PII masking if requested
1131    if args.mask_pii {
1132        use memvid_core::pii::mask_pii;
1133
1134        // Mask the aggregated context
1135        response.retrieval.context = mask_pii(&response.retrieval.context);
1136
1137        // Mask text in each hit
1138        for hit in &mut response.retrieval.hits {
1139            hit.text = mask_pii(&hit.text);
1140            if let Some(chunk_text) = &hit.chunk_text {
1141                hit.chunk_text = Some(mask_pii(chunk_text));
1142            }
1143        }
1144    }
1145
1146    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1147
1148    let mut model_result: Option<ModelAnswer> = None;
1149    if response.context_only {
1150        if args.use_model.is_some() {
1151            warn!("--use-model ignored because --context-only disables synthesis");
1152        }
1153    } else if let Some(model_name) = args.use_model.as_deref() {
1154        match run_model_inference(
1155            model_name,
1156            &response.question,
1157            &response.retrieval.context,
1158            &response.retrieval.hits,
1159            llm_context_override,
1160            None,
1161            args.system_prompt.as_deref(),
1162        ) {
1163            Ok(inference) => {
1164                let ModelInference {
1165                    answer,
1166                    context_body,
1167                    context_fragments,
1168                    ..
1169                } = inference;
1170                response.answer = Some(answer.answer.clone());
1171                response.retrieval.context = context_body;
1172                apply_model_context_fragments(&mut response, context_fragments);
1173                model_result = Some(answer);
1174            }
1175            Err(err) => {
1176                warn!(
1177                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1178                    model_name
1179                );
1180            }
1181        }
1182    }
1183
1184    // Record the ask action if a replay session is active
1185    #[cfg(feature = "replay")]
1186    if let Some(ref model_answer) = model_result {
1187        if let Some(model_name) = args.use_model.as_deref() {
1188            mem.record_ask_action(
1189                &response.question,
1190                model_name, // provider
1191                model_name, // model
1192                model_answer.answer.as_bytes(),
1193                0, // duration_ms not tracked at this level
1194            );
1195        }
1196    }
1197
1198    if args.json {
1199        if let Some(model_name) = args.use_model.as_deref() {
1200            emit_model_json(
1201                &response,
1202                model_name,
1203                model_result.as_ref(),
1204                args.sources,
1205                &mut mem,
1206            )?;
1207        } else {
1208            emit_ask_json(
1209                &response,
1210                args.mode,
1211                model_result.as_ref(),
1212                args.sources,
1213                &mut mem,
1214            )?;
1215        }
1216    } else {
1217        emit_ask_pretty(
1218            &response,
1219            args.mode,
1220            model_result.as_ref(),
1221            args.sources,
1222            &mut mem,
1223        );
1224    }
1225
1226    // Save active replay session if one exists
1227    #[cfg(feature = "replay")]
1228    let _ = mem.save_active_session();
1229
1230    Ok(())
1231}
1232
1233/// Handle graph-aware find with --graph or --hybrid flags
1234fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1235    use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1236    use memvid_core::types::QueryPlan;
1237
1238    let planner = QueryPlanner::new();
1239
1240    // Create query plan based on mode
1241    let plan = if args.graph {
1242        // Pure graph mode - let planner detect patterns
1243        let plan = planner.plan(&args.query, args.top_k);
1244        // If it's a hybrid plan from auto-detection, convert to graph-only
1245        match plan {
1246            QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1247                QueryPlan::graph_only(graph_filter, args.top_k)
1248            }
1249            _ => plan,
1250        }
1251    } else {
1252        // Hybrid mode - use the auto-detected plan
1253        planner.plan(&args.query, args.top_k)
1254    };
1255
1256    // Execute the search
1257    let hits = hybrid_search(mem, &plan)?;
1258
1259    if args.json {
1260        // JSON output
1261        let output = serde_json::json!({
1262            "query": args.query,
1263            "mode": if args.graph { "graph" } else { "hybrid" },
1264            "plan": format!("{:?}", plan),
1265            "hits": hits.iter().map(|h| {
1266                serde_json::json!({
1267                    "frame_id": h.frame_id,
1268                    "score": h.score,
1269                    "graph_score": h.graph_score,
1270                    "vector_score": h.vector_score,
1271                    "matched_entity": h.matched_entity,
1272                    "preview": h.preview,
1273                })
1274            }).collect::<Vec<_>>(),
1275        });
1276        println!("{}", serde_json::to_string_pretty(&output)?);
1277    } else {
1278        // Human-readable output
1279        let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1280        println!("{} search for: \"{}\"", mode_str, args.query);
1281        println!("Plan: {:?}", plan);
1282        println!();
1283
1284        if hits.is_empty() {
1285            println!("No results found.");
1286        } else {
1287            println!("Results ({} hits):", hits.len());
1288            for (i, hit) in hits.iter().enumerate() {
1289                println!();
1290                println!(
1291                    "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1292                    i + 1,
1293                    hit.frame_id,
1294                    hit.score,
1295                    hit.graph_score,
1296                    hit.vector_score
1297                );
1298                if let Some(entity) = &hit.matched_entity {
1299                    println!("   Matched entity: {}", entity);
1300                }
1301                if let Some(preview) = &hit.preview {
1302                    let truncated = if preview.len() > 200 {
1303                        format!("{}...", &preview[..200])
1304                    } else {
1305                        preview.clone()
1306                    };
1307                    println!("   {}", truncated.replace('\n', " "));
1308                }
1309            }
1310        }
1311    }
1312
1313    Ok(())
1314}
1315
1316pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1317    // Check if plan allows query operations (blocks expired subscriptions)
1318    crate::utils::require_active_plan(config, "find")?;
1319
1320    let mut mem = open_read_only_mem(&args.file)?;
1321
1322    // Load active replay session if one exists
1323    #[cfg(feature = "replay")]
1324    let _ = mem.load_active_session();
1325
1326    // Handle graph-aware and hybrid search modes
1327    if args.graph || args.hybrid {
1328        return handle_graph_find(&mut mem, &args);
1329    }
1330
1331    if args.uri.is_some() && args.scope.is_some() {
1332        warn!("--scope ignored because --uri is provided");
1333    }
1334
1335    // Get vector dimension from MV2 for auto-detection
1336    let mv2_dimension = mem.effective_vec_index_dimension()?;
1337    let identity_summary = match args.mode {
1338        SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1339        #[cfg(feature = "clip")]
1340        SearchMode::Clip => None,
1341        SearchMode::Lex => None,
1342    };
1343
1344    let mut semantic_allowed = true;
1345    let inferred_model_override = match identity_summary.as_ref() {
1346        Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1347            identity.model.as_deref().map(|value| value.to_string())
1348        }
1349        Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1350            let models: Vec<_> = identities
1351                .iter()
1352                .filter_map(|entry| entry.identity.model.as_deref())
1353                .collect();
1354            if args.mode == SearchMode::Sem {
1355                anyhow::bail!(
1356                    "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1357                    Detected models: {:?}\n\n\
1358                    Suggested fix: split into separate memories per embedding model.",
1359                    models
1360                );
1361            }
1362            warn!("semantic search disabled: mixed embedding models detected: {:?}", models);
1363            semantic_allowed = false;
1364            None
1365        }
1366        _ => None,
1367    };
1368
1369    let emb_model_override = args
1370        .query_embedding_model
1371        .as_deref()
1372        .or(inferred_model_override.as_deref());
1373
1374    let (mode_label, runtime_option) = match args.mode {
1375        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1376        SearchMode::Sem => {
1377            let runtime =
1378                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1379            ("Semantic (vector search)".to_string(), Some(runtime))
1380        }
1381        SearchMode::Auto => {
1382            if !semantic_allowed {
1383                ("Lexical (semantic unsafe)".to_string(), None)
1384            } else if let Some(runtime) =
1385                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1386            {
1387                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1388            } else {
1389                ("Lexical (semantic unavailable)".to_string(), None)
1390            }
1391        }
1392        #[cfg(feature = "clip")]
1393        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1394    };
1395
1396    let mode_key = match args.mode {
1397        SearchMode::Sem => "semantic",
1398        SearchMode::Lex => "text",
1399        SearchMode::Auto => {
1400            if runtime_option.is_some() {
1401                "hybrid"
1402            } else {
1403                "text"
1404            }
1405        }
1406        #[cfg(feature = "clip")]
1407        SearchMode::Clip => "clip",
1408    };
1409
1410    // For CLIP mode, use CLIP visual search
1411    #[cfg(feature = "clip")]
1412    if args.mode == SearchMode::Clip {
1413        use memvid_core::clip::{ClipConfig, ClipModel};
1414
1415        // Initialize CLIP model
1416        let config = ClipConfig::default();
1417        let clip = ClipModel::new(config).map_err(|e| {
1418            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1419        })?;
1420
1421        // Encode query text
1422        let query_embedding = clip
1423            .encode_text(&args.query)
1424            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1425
1426        // Search CLIP index
1427        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1428
1429        // Debug distances before filtering
1430        for hit in &hits {
1431            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1432                tracing::debug!(
1433                    frame_id = hit.frame_id,
1434                    title = %frame.title.unwrap_or_default(),
1435                    page = hit.page,
1436                    distance = hit.distance,
1437                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1438                    "CLIP raw hit"
1439                );
1440            } else {
1441                tracing::debug!(
1442                    frame_id = hit.frame_id,
1443                    page = hit.page,
1444                    distance = hit.distance,
1445                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1446                    "CLIP raw hit (missing frame)"
1447                );
1448            }
1449        }
1450
1451        // CLIP distance threshold for filtering poor matches
1452        // CLIP uses L2 distance on normalized embeddings:
1453        //   - distance² = 2(1 - cosine_similarity)
1454        //   - distance = 0 → identical (cosine_sim = 1)
1455        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1456        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1457        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1458        //   - distance = 2.0 → opposite (cosine_sim = -1)
1459        //
1460        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1461        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1462        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1463        const CLIP_MAX_DISTANCE: f32 = 1.26;
1464
1465        // Convert CLIP hits to SearchResponse format, filtering by threshold
1466        let search_hits: Vec<SearchHit> = hits
1467            .into_iter()
1468            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1469            .enumerate()
1470            .filter_map(|(rank, hit)| {
1471                // Convert L2 distance to cosine similarity for display
1472                // cos_sim = 1 - (distance² / 2)
1473                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1474
1475                // Get frame preview for snippet
1476                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1477                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1478                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1479                let title = match (base_title, hit.page) {
1480                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1481                    (Some(t), None) => Some(t),
1482                    (None, Some(p)) => Some(format!("Page {p}")),
1483                    _ => None,
1484                };
1485                Some(SearchHit {
1486                    rank: rank + 1,
1487                    frame_id: hit.frame_id,
1488                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1489                    title,
1490                    text: preview.clone(),
1491                    chunk_text: Some(preview),
1492                    range: (0, 0),
1493                    chunk_range: None,
1494                    matches: 0,
1495                    score: Some(cosine_similarity),
1496                    metadata: None,
1497                })
1498            })
1499            .collect();
1500
1501        let response = SearchResponse {
1502            query: args.query.clone(),
1503            hits: search_hits.clone(),
1504            total_hits: search_hits.len(),
1505            params: memvid_core::SearchParams {
1506                top_k: args.top_k,
1507                snippet_chars: args.snippet_chars,
1508                cursor: args.cursor.clone(),
1509            },
1510            elapsed_ms: 0,
1511            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1512            next_cursor: None,
1513            context: String::new(),
1514        };
1515
1516        if args.json_legacy {
1517            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1518            emit_legacy_search_json(&response)?;
1519        } else if args.json {
1520            emit_search_json(&response, mode_key)?;
1521        } else {
1522            println!(
1523                "mode: {}   k={}   time: {} ms",
1524                mode_label, response.params.top_k, response.elapsed_ms
1525            );
1526            println!("engine: clip (MobileCLIP-S2)");
1527            println!(
1528                "hits: {} (showing {})",
1529                response.total_hits,
1530                response.hits.len()
1531            );
1532            emit_search_table(&response);
1533        }
1534        return Ok(());
1535    }
1536
1537    // For semantic mode, use pure vector search.
1538    let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1539        let runtime = runtime_option
1540            .as_ref()
1541            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1542
1543        // Embed the query
1544        let query_embedding = runtime.embed_query(&args.query)?;
1545
1546        // Use pure vector search (adaptive by default, use --no-adaptive to disable)
1547        let scope = args.scope.as_deref().or(args.uri.as_deref());
1548
1549        if !args.no_adaptive {
1550            // Build adaptive config from CLI args
1551            let strategy = match args.adaptive_strategy {
1552                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1553                    min_ratio: args.min_relevancy,
1554                },
1555                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1556                    min_score: args.min_relevancy,
1557                },
1558                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1559                    max_drop_ratio: 0.35, // 35% drop triggers cutoff
1560                },
1561                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1562                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1563                    relative_threshold: args.min_relevancy,
1564                    max_drop_ratio: 0.35,
1565                    absolute_min: 0.3,
1566                },
1567            };
1568
1569            let config = AdaptiveConfig {
1570                enabled: true,
1571                max_results: args.max_k,
1572                min_results: 1,
1573                strategy,
1574                normalize_scores: true,
1575            };
1576
1577            match mem.search_adaptive(
1578                &args.query,
1579                &query_embedding,
1580                config,
1581                args.snippet_chars,
1582                scope,
1583            ) {
1584                Ok(result) => {
1585                    let mut resp = SearchResponse {
1586                        query: args.query.clone(),
1587                        hits: result.results,
1588                        total_hits: result.stats.returned,
1589                        params: memvid_core::SearchParams {
1590                            top_k: result.stats.returned,
1591                            snippet_chars: args.snippet_chars,
1592                            cursor: args.cursor.clone(),
1593                        },
1594                        elapsed_ms: 0,
1595                        engine: SearchEngineKind::Hybrid,
1596                        next_cursor: None,
1597                        context: String::new(),
1598                    };
1599                    apply_preference_rerank(&mut resp);
1600                    (
1601                        resp,
1602                        "semantic (adaptive vector search)".to_string(),
1603                        Some(result.stats),
1604                    )
1605                }
1606                Err(e) => {
1607                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1608                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1609                    }
1610
1611                    warn!("Adaptive search failed ({e}), falling back to fixed-k");
1612                    match mem.vec_search_with_embedding(
1613                        &args.query,
1614                        &query_embedding,
1615                        args.top_k,
1616                        args.snippet_chars,
1617                        scope,
1618                    ) {
1619                        Ok(mut resp) => {
1620                            apply_preference_rerank(&mut resp);
1621                            (resp, "semantic (vector search fallback)".to_string(), None)
1622                        }
1623                        Err(e2) => {
1624                            if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1625                                return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1626                            }
1627                            return Err(anyhow!("Both adaptive and fixed-k search failed: {e}, {e2}"));
1628                        }
1629                    }
1630                }
1631            }
1632        } else {
1633            // Standard fixed-k vector search
1634            match mem.vec_search_with_embedding(
1635                &args.query,
1636                &query_embedding,
1637                args.top_k,
1638                args.snippet_chars,
1639                scope,
1640            ) {
1641                Ok(mut resp) => {
1642                    // Apply preference boost to rerank results for preference-seeking queries
1643                    apply_preference_rerank(&mut resp);
1644                    (resp, "semantic (vector search)".to_string(), None)
1645                }
1646                Err(e) => {
1647                    if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1648                        return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1649                    }
1650
1651                    // Fall back to lexical search + rerank if vector search fails
1652                    warn!("Vector search failed ({e}), falling back to lexical + rerank");
1653                    let request = SearchRequest {
1654                        query: args.query.clone(),
1655                        top_k: args.top_k,
1656                        snippet_chars: args.snippet_chars,
1657                        uri: args.uri.clone(),
1658                        scope: args.scope.clone(),
1659                        cursor: args.cursor.clone(),
1660                        #[cfg(feature = "temporal_track")]
1661                        temporal: None,
1662                        as_of_frame: args.as_of_frame,
1663                        as_of_ts: args.as_of_ts,
1664                    };
1665                    let mut resp = mem.search(request)?;
1666                    apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1667                    (resp, "semantic (fallback rerank)".to_string(), None)
1668                }
1669            }
1670        }
1671    } else {
1672        // For lexical and auto modes, use existing behavior
1673        let request = SearchRequest {
1674            query: args.query.clone(),
1675            top_k: args.top_k,
1676            snippet_chars: args.snippet_chars,
1677            uri: args.uri.clone(),
1678            scope: args.scope.clone(),
1679            cursor: args.cursor.clone(),
1680            #[cfg(feature = "temporal_track")]
1681            temporal: None,
1682            as_of_frame: args.as_of_frame,
1683            as_of_ts: args.as_of_ts,
1684        };
1685
1686        let mut resp = mem.search(request)?;
1687
1688        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1689            warn!("Search index unavailable; returning basic text results");
1690        }
1691
1692        let mut engine_label = match resp.engine {
1693            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1694            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1695            SearchEngineKind::Hybrid => "hybrid".to_string(),
1696        };
1697
1698        if runtime_option.is_some() {
1699            engine_label = format!("hybrid ({engine_label} + semantic)");
1700        }
1701
1702        if let Some(ref runtime) = runtime_option {
1703            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1704        }
1705
1706        (resp, engine_label, None)
1707    };
1708
1709    if args.json_legacy {
1710        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1711        emit_legacy_search_json(&response)?;
1712    } else if args.json {
1713        emit_search_json(&response, mode_key)?;
1714    } else {
1715        println!(
1716            "mode: {}   k={}   time: {} ms",
1717            mode_label, response.params.top_k, response.elapsed_ms
1718        );
1719        println!("engine: {}", engine_label);
1720
1721        // Show adaptive retrieval stats if enabled
1722        if let Some(ref stats) = adaptive_stats {
1723            println!(
1724                "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1725                stats.total_considered,
1726                stats.returned,
1727                stats.triggered_by,
1728                stats.top_score.unwrap_or(0.0),
1729                stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1730            );
1731        }
1732
1733        println!(
1734            "hits: {} (showing {})",
1735            response.total_hits,
1736            response.hits.len()
1737        );
1738        emit_search_table(&response);
1739    }
1740
1741    // Save active replay session if one exists
1742    #[cfg(feature = "replay")]
1743    let _ = mem.save_active_session();
1744
1745    Ok(())
1746}
1747
1748pub fn handle_vec_search(_config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1749    let mut mem = open_read_only_mem(&args.file)?;
1750    let vector = if let Some(path) = args.embedding.as_deref() {
1751        read_embedding(path)?
1752    } else if let Some(vector_string) = &args.vector {
1753        parse_vector(vector_string)?
1754    } else {
1755        anyhow::bail!("provide --vector or --embedding for search input");
1756    };
1757
1758    let hits = mem.search_vec(&vector, args.limit).map_err(|err| match err {
1759        MemvidError::VecDimensionMismatch { expected, actual } => {
1760            anyhow!(vec_dimension_mismatch_help(expected, actual))
1761        }
1762        other => anyhow!(other),
1763    })?;
1764    let mut enriched = Vec::with_capacity(hits.len());
1765    for hit in hits {
1766        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1767        enriched.push((hit.frame_id, hit.distance, preview));
1768    }
1769
1770    if args.json {
1771        let json_hits: Vec<_> = enriched
1772            .iter()
1773            .map(|(frame_id, distance, preview)| {
1774                json!({
1775                    "frame_id": frame_id,
1776                    "distance": distance,
1777                    "preview": preview,
1778                })
1779            })
1780            .collect();
1781        let json_str = serde_json::to_string_pretty(&json_hits)?;
1782        println!("{}", json_str.to_colored_json_auto()?);
1783    } else if enriched.is_empty() {
1784        println!("No vector matches found");
1785    } else {
1786        for (frame_id, distance, preview) in enriched {
1787            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1788        }
1789    }
1790    Ok(())
1791}
1792
1793pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1794    use memvid_core::AuditOptions;
1795    use std::fs::File;
1796    use std::io::Write;
1797
1798    let mut mem = Memvid::open(&args.file)?;
1799
1800    // Parse date boundaries
1801    let start = parse_date_boundary(args.start.as_ref(), false)?;
1802    let end = parse_date_boundary(args.end.as_ref(), true)?;
1803    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1804        if end_ts < start_ts {
1805            anyhow::bail!("--end must not be earlier than --start");
1806        }
1807    }
1808
1809    // Set up embedding runtime if needed
1810    let ask_mode: AskMode = args.mode.into();
1811    let runtime = match args.mode {
1812        AskModeArg::Lex => None,
1813        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1814        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1815    };
1816    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1817
1818    // Build audit options
1819    let options = AuditOptions {
1820        top_k: Some(args.top_k),
1821        snippet_chars: Some(args.snippet_chars),
1822        mode: Some(ask_mode),
1823        scope: args.scope,
1824        start,
1825        end,
1826        include_snippets: true,
1827    };
1828
1829    // Run the audit
1830    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1831
1832    // If --use-model is provided, run model inference to synthesize the answer
1833    if let Some(model_name) = args.use_model.as_deref() {
1834        // Build context from sources for model inference
1835        let context = report
1836            .sources
1837            .iter()
1838            .filter_map(|s| s.snippet.clone())
1839            .collect::<Vec<_>>()
1840            .join("\n\n");
1841
1842        match run_model_inference(
1843            model_name,
1844            &report.question,
1845            &context,
1846            &[], // No hits needed for audit
1847            None,
1848            None,
1849            None, // No system prompt override for audit
1850        ) {
1851            Ok(inference) => {
1852                report.answer = Some(inference.answer.answer);
1853                report.notes.push(format!(
1854                    "Answer synthesized by model: {}",
1855                    inference.answer.model
1856                ));
1857            }
1858            Err(err) => {
1859                warn!(
1860                    "model inference unavailable for '{}': {err}. Using default answer.",
1861                    model_name
1862                );
1863            }
1864        }
1865    }
1866
1867    // Format the output
1868    let output = match args.format {
1869        AuditFormat::Text => report.to_text(),
1870        AuditFormat::Markdown => report.to_markdown(),
1871        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1872    };
1873
1874    // Write output
1875    if let Some(out_path) = args.out {
1876        let mut file = File::create(&out_path)?;
1877        file.write_all(output.as_bytes())?;
1878        println!("Audit report written to: {}", out_path.display());
1879    } else {
1880        println!("{}", output);
1881    }
1882
1883    Ok(())
1884}
1885
1886fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1887    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1888
1889    let mut additional_params = serde_json::Map::new();
1890    if let Some(cursor) = &response.params.cursor {
1891        additional_params.insert("cursor".into(), json!(cursor));
1892    }
1893
1894    let mut params = serde_json::Map::new();
1895    params.insert("top_k".into(), json!(response.params.top_k));
1896    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
1897    params.insert("mode".into(), json!(mode));
1898    params.insert(
1899        "additional_params".into(),
1900        serde_json::Value::Object(additional_params),
1901    );
1902
1903    let mut metadata_json = serde_json::Map::new();
1904    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
1905    metadata_json.insert("total_hits".into(), json!(response.total_hits));
1906    metadata_json.insert(
1907        "next_cursor".into(),
1908        match &response.next_cursor {
1909            Some(cursor) => json!(cursor),
1910            None => serde_json::Value::Null,
1911        },
1912    );
1913    metadata_json.insert("engine".into(), json!(response.engine));
1914    metadata_json.insert("params".into(), serde_json::Value::Object(params));
1915
1916    let body = json!({
1917        "version": "mv2.result.v2",
1918        "query": response.query,
1919        "metadata": metadata_json,
1920        "hits": hits,
1921        "context": response.context,
1922    });
1923    let json_str = serde_json::to_string_pretty(&body)?;
1924    println!("{}", json_str.to_colored_json_auto()?);
1925    Ok(())
1926}
1927
1928fn emit_ask_json(
1929    response: &AskResponse,
1930    requested_mode: AskModeArg,
1931    model: Option<&ModelAnswer>,
1932    include_sources: bool,
1933    mem: &mut Memvid,
1934) -> Result<()> {
1935    let hits: Vec<_> = response
1936        .retrieval
1937        .hits
1938        .iter()
1939        .map(search_hit_to_json)
1940        .collect();
1941
1942    let citations: Vec<_> = response
1943        .citations
1944        .iter()
1945        .map(|citation| {
1946            let mut map = serde_json::Map::new();
1947            map.insert("index".into(), json!(citation.index));
1948            map.insert("frame_id".into(), json!(citation.frame_id));
1949            map.insert("uri".into(), json!(citation.uri));
1950            if let Some(range) = citation.chunk_range {
1951                map.insert("chunk_range".into(), json!([range.0, range.1]));
1952            }
1953            if let Some(score) = citation.score {
1954                map.insert("score".into(), json!(score));
1955            }
1956            serde_json::Value::Object(map)
1957        })
1958        .collect();
1959
1960    let mut body = json!({
1961        "version": "mv2.ask.v1",
1962        "question": response.question,
1963        "answer": response.answer,
1964        "context_only": response.context_only,
1965        "mode": ask_mode_display(requested_mode),
1966        "retriever": ask_retriever_display(response.retriever),
1967        "top_k": response.retrieval.params.top_k,
1968        "results": hits,
1969        "citations": citations,
1970        "stats": {
1971            "retrieval_ms": response.stats.retrieval_ms,
1972            "synthesis_ms": response.stats.synthesis_ms,
1973            "latency_ms": response.stats.latency_ms,
1974        },
1975        "engine": search_engine_label(&response.retrieval.engine),
1976        "total_hits": response.retrieval.total_hits,
1977        "next_cursor": response.retrieval.next_cursor,
1978        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1979    });
1980
1981    if let Some(model) = model {
1982        if let serde_json::Value::Object(ref mut map) = body {
1983            map.insert("model".into(), json!(model.requested));
1984            if model.model != model.requested {
1985                map.insert("model_used".into(), json!(model.model));
1986            }
1987        }
1988    }
1989
1990    // Add detailed sources if requested
1991    if include_sources {
1992        if let serde_json::Value::Object(ref mut map) = body {
1993            let sources = build_sources_json(response, mem);
1994            map.insert("sources".into(), json!(sources));
1995        }
1996    }
1997
1998    println!("{}", serde_json::to_string_pretty(&body)?);
1999    Ok(())
2000}
2001
2002fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2003    response
2004        .citations
2005        .iter()
2006        .enumerate()
2007        .map(|(idx, citation)| {
2008            let mut source = serde_json::Map::new();
2009            source.insert("index".into(), json!(idx + 1));
2010            source.insert("frame_id".into(), json!(citation.frame_id));
2011            source.insert("uri".into(), json!(citation.uri));
2012
2013            if let Some(range) = citation.chunk_range {
2014                source.insert("chunk_range".into(), json!([range.0, range.1]));
2015            }
2016            if let Some(score) = citation.score {
2017                source.insert("score".into(), json!(score));
2018            }
2019
2020            // Get frame metadata for rich source information
2021            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2022                if let Some(title) = frame.title {
2023                    source.insert("title".into(), json!(title));
2024                }
2025                if !frame.tags.is_empty() {
2026                    source.insert("tags".into(), json!(frame.tags));
2027                }
2028                if !frame.labels.is_empty() {
2029                    source.insert("labels".into(), json!(frame.labels));
2030                }
2031                source.insert("frame_timestamp".into(), json!(frame.timestamp));
2032                if !frame.content_dates.is_empty() {
2033                    source.insert("content_dates".into(), json!(frame.content_dates));
2034                }
2035            }
2036
2037            // Get snippet from hit
2038            if let Some(hit) = response
2039                .retrieval
2040                .hits
2041                .iter()
2042                .find(|h| h.frame_id == citation.frame_id)
2043            {
2044                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2045                source.insert("snippet".into(), json!(snippet));
2046            }
2047
2048            serde_json::Value::Object(source)
2049        })
2050        .collect()
2051}
2052
2053fn emit_model_json(
2054    response: &AskResponse,
2055    requested_model: &str,
2056    model: Option<&ModelAnswer>,
2057    include_sources: bool,
2058    mem: &mut Memvid,
2059) -> Result<()> {
2060    let answer = response.answer.clone().unwrap_or_default();
2061    let requested_label = model
2062        .map(|m| m.requested.clone())
2063        .unwrap_or_else(|| requested_model.to_string());
2064    let used_label = model
2065        .map(|m| m.model.clone())
2066        .unwrap_or_else(|| requested_model.to_string());
2067
2068    let mut body = json!({
2069        "question": response.question,
2070        "model": requested_label,
2071        "model_used": used_label,
2072        "answer": answer,
2073        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2074    });
2075
2076    // Add detailed sources if requested
2077    if include_sources {
2078        if let serde_json::Value::Object(ref mut map) = body {
2079            let sources = build_sources_json(response, mem);
2080            map.insert("sources".into(), json!(sources));
2081        }
2082    }
2083
2084    // Use colored JSON output
2085    let json_str = serde_json::to_string_pretty(&body)?;
2086    println!("{}", json_str.to_colored_json_auto()?);
2087    Ok(())
2088}
2089
2090fn emit_ask_pretty(
2091    response: &AskResponse,
2092    requested_mode: AskModeArg,
2093    model: Option<&ModelAnswer>,
2094    include_sources: bool,
2095    mem: &mut Memvid,
2096) {
2097    println!(
2098        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
2099        ask_mode_pretty(requested_mode),
2100        ask_retriever_pretty(response.retriever),
2101        response.retrieval.params.top_k,
2102        response.stats.latency_ms,
2103        response.stats.retrieval_ms
2104    );
2105    if let Some(model) = model {
2106        if model.requested.trim() == model.model {
2107            println!("model: {}", model.model);
2108        } else {
2109            println!(
2110                "model requested: {}   model used: {}",
2111                model.requested, model.model
2112            );
2113        }
2114    }
2115    println!(
2116        "engine: {}",
2117        search_engine_label(&response.retrieval.engine)
2118    );
2119    println!(
2120        "hits: {} (showing {})",
2121        response.retrieval.total_hits,
2122        response.retrieval.hits.len()
2123    );
2124
2125    if response.context_only {
2126        println!();
2127        println!("Context-only mode: synthesis disabled.");
2128        println!();
2129    } else if let Some(answer) = &response.answer {
2130        println!();
2131        println!("Answer:\n{answer}");
2132        println!();
2133    }
2134
2135    if !response.citations.is_empty() {
2136        println!("Citations:");
2137        for citation in &response.citations {
2138            match citation.score {
2139                Some(score) => println!(
2140                    "[{}] {} (frame {}, score {:.3})",
2141                    citation.index, citation.uri, citation.frame_id, score
2142                ),
2143                None => println!(
2144                    "[{}] {} (frame {})",
2145                    citation.index, citation.uri, citation.frame_id
2146                ),
2147            }
2148        }
2149        println!();
2150    }
2151
2152    // Print detailed sources if requested
2153    if include_sources && !response.citations.is_empty() {
2154        println!("=== SOURCES ===");
2155        println!();
2156        for citation in &response.citations {
2157            println!("[{}] {}", citation.index, citation.uri);
2158
2159            // Get frame metadata
2160            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2161                if let Some(title) = &frame.title {
2162                    println!("    Title: {}", title);
2163                }
2164                println!("    Frame ID: {}", citation.frame_id);
2165                if let Some(score) = citation.score {
2166                    println!("    Score: {:.4}", score);
2167                }
2168                if let Some((start, end)) = citation.chunk_range {
2169                    println!("    Range: [{}..{})", start, end);
2170                }
2171                if !frame.tags.is_empty() {
2172                    println!("    Tags: {}", frame.tags.join(", "));
2173                }
2174                if !frame.labels.is_empty() {
2175                    println!("    Labels: {}", frame.labels.join(", "));
2176                }
2177                println!("    Timestamp: {}", frame.timestamp);
2178                if !frame.content_dates.is_empty() {
2179                    println!("    Content Dates: {}", frame.content_dates.join(", "));
2180                }
2181            }
2182
2183            // Get snippet from hit
2184            if let Some(hit) = response
2185                .retrieval
2186                .hits
2187                .iter()
2188                .find(|h| h.frame_id == citation.frame_id)
2189            {
2190                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2191                let truncated = if snippet.len() > 200 {
2192                    format!("{}...", &snippet[..200])
2193                } else {
2194                    snippet.clone()
2195                };
2196                println!("    Snippet: {}", truncated.replace('\n', " "));
2197            }
2198            println!();
2199        }
2200    }
2201
2202    if !include_sources {
2203        println!();
2204        emit_search_table(&response.retrieval);
2205    }
2206}
2207
2208fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2209    let hits: Vec<_> = response
2210        .hits
2211        .iter()
2212        .map(|hit| {
2213            json!({
2214                "frame_id": hit.frame_id,
2215                "matches": hit.matches,
2216                "snippets": [hit.text.clone()],
2217            })
2218        })
2219        .collect();
2220    println!("{}", serde_json::to_string_pretty(&hits)?);
2221    Ok(())
2222}
2223
2224fn emit_search_table(response: &SearchResponse) {
2225    if response.hits.is_empty() {
2226        println!("No results for '{}'.", response.query);
2227        return;
2228    }
2229    for hit in &response.hits {
2230        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2231        if let Some(title) = &hit.title {
2232            println!("  Title: {title}");
2233        }
2234        if let Some(score) = hit.score {
2235            println!("  Score: {score:.3}");
2236        }
2237        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
2238        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2239            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
2240        }
2241        if let Some(chunk_text) = &hit.chunk_text {
2242            println!("  Chunk Text: {}", chunk_text.trim());
2243        }
2244        if let Some(metadata) = &hit.metadata {
2245            if let Some(track) = &metadata.track {
2246                println!("  Track: {track}");
2247            }
2248            if !metadata.tags.is_empty() {
2249                println!("  Tags: {}", metadata.tags.join(", "));
2250            }
2251            if !metadata.labels.is_empty() {
2252                println!("  Labels: {}", metadata.labels.join(", "));
2253            }
2254            if let Some(created_at) = &metadata.created_at {
2255                println!("  Created: {created_at}");
2256            }
2257            if !metadata.content_dates.is_empty() {
2258                println!("  Content Dates: {}", metadata.content_dates.join(", "));
2259            }
2260            if !metadata.entities.is_empty() {
2261                let entity_strs: Vec<String> = metadata
2262                    .entities
2263                    .iter()
2264                    .map(|e| format!("{} ({})", e.name, e.kind))
2265                    .collect();
2266                println!("  Entities: {}", entity_strs.join(", "));
2267            }
2268        }
2269        println!("  Snippet: {}", hit.text.trim());
2270        println!();
2271    }
2272    if let Some(cursor) = &response.next_cursor {
2273        println!("Next cursor: {cursor}");
2274    }
2275}
2276
2277fn ask_mode_display(mode: AskModeArg) -> &'static str {
2278    match mode {
2279        AskModeArg::Lex => "lex",
2280        AskModeArg::Sem => "sem",
2281        AskModeArg::Hybrid => "hybrid",
2282    }
2283}
2284
2285fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2286    match mode {
2287        AskModeArg::Lex => "Lexical",
2288        AskModeArg::Sem => "Semantic",
2289        AskModeArg::Hybrid => "Hybrid",
2290    }
2291}
2292
2293fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2294    match retriever {
2295        AskRetriever::Lex => "lex",
2296        AskRetriever::Semantic => "semantic",
2297        AskRetriever::Hybrid => "hybrid",
2298        AskRetriever::LexFallback => "lex_fallback",
2299        AskRetriever::TimelineFallback => "timeline_fallback",
2300    }
2301}
2302
2303fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2304    match retriever {
2305        AskRetriever::Lex => "Lexical",
2306        AskRetriever::Semantic => "Semantic",
2307        AskRetriever::Hybrid => "Hybrid",
2308        AskRetriever::LexFallback => "Lexical (fallback)",
2309        AskRetriever::TimelineFallback => "Timeline (fallback)",
2310    }
2311}
2312
2313fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2314    match engine {
2315        SearchEngineKind::Tantivy => "text (tantivy)",
2316        SearchEngineKind::LexFallback => "text (fallback)",
2317        SearchEngineKind::Hybrid => "hybrid",
2318    }
2319}
2320
2321fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2322    let digest = hash(uri.as_bytes()).to_hex().to_string();
2323    let prefix_len = digest.len().min(12);
2324    let prefix = &digest[..prefix_len];
2325    format!("mv2-hit-{prefix}-{frame_id}-{start}")
2326}
2327
2328fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2329    if text.chars().count() <= limit {
2330        return text.to_string();
2331    }
2332
2333    let truncated: String = text.chars().take(limit).collect();
2334    format!("{truncated}...")
2335}
2336
2337fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2338    let mut hit_json = serde_json::Map::new();
2339    hit_json.insert("rank".into(), json!(hit.rank));
2340    if let Some(score) = hit.score {
2341        hit_json.insert("score".into(), json!(score));
2342    }
2343    hit_json.insert(
2344        "id".into(),
2345        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2346    );
2347    hit_json.insert("frame_id".into(), json!(hit.frame_id));
2348    hit_json.insert("uri".into(), json!(hit.uri));
2349    if let Some(title) = &hit.title {
2350        hit_json.insert("title".into(), json!(title));
2351    }
2352    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2353    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2354    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2355    hit_json.insert("text".into(), json!(hit.text));
2356
2357    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2358        matches: hit.matches,
2359        ..SearchHitMetadata::default()
2360    });
2361    let mut meta_json = serde_json::Map::new();
2362    meta_json.insert("matches".into(), json!(metadata.matches));
2363    if !metadata.tags.is_empty() {
2364        meta_json.insert("tags".into(), json!(metadata.tags));
2365    }
2366    if !metadata.labels.is_empty() {
2367        meta_json.insert("labels".into(), json!(metadata.labels));
2368    }
2369    if let Some(track) = metadata.track {
2370        meta_json.insert("track".into(), json!(track));
2371    }
2372    if let Some(created_at) = metadata.created_at {
2373        meta_json.insert("created_at".into(), json!(created_at));
2374    }
2375    if !metadata.content_dates.is_empty() {
2376        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
2377    }
2378    if !metadata.entities.is_empty() {
2379        let entities_json: Vec<serde_json::Value> = metadata
2380            .entities
2381            .iter()
2382            .map(|e| {
2383                let mut ent = serde_json::Map::new();
2384                ent.insert("name".into(), json!(e.name));
2385                ent.insert("kind".into(), json!(e.kind));
2386                if let Some(conf) = e.confidence {
2387                    ent.insert("confidence".into(), json!(conf));
2388                }
2389                serde_json::Value::Object(ent)
2390            })
2391            .collect();
2392        meta_json.insert("entities".into(), json!(entities_json));
2393    }
2394    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
2395    serde_json::Value::Object(hit_json)
2396}
2397/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
2398///
2399/// RRF is mathematically superior to raw score combination because:
2400/// - BM25 scores are unbounded (0 to infinity)
2401/// - Cosine similarity is bounded (-1 to 1)
2402/// - RRF normalizes by using only RANKS, not raw scores
2403///
2404/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
2405fn apply_semantic_rerank(
2406    runtime: &EmbeddingRuntime,
2407    mem: &mut Memvid,
2408    response: &mut SearchResponse,
2409) -> Result<()> {
2410    if response.hits.is_empty() {
2411        return Ok(());
2412    }
2413
2414    let query_embedding = runtime.embed_query(&response.query)?;
2415    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
2416    for hit in &response.hits {
2417        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
2418            if embedding.len() == runtime.dimension() {
2419                let score = cosine_similarity(&query_embedding, &embedding);
2420                semantic_scores.insert(hit.frame_id, score);
2421            }
2422        }
2423    }
2424
2425    if semantic_scores.is_empty() {
2426        return Ok(());
2427    }
2428
2429    // Sort by semantic score to get semantic ranks
2430    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
2431        .iter()
2432        .map(|(frame_id, score)| (*frame_id, *score))
2433        .collect();
2434    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
2435
2436    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
2437    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
2438        semantic_rank.insert(*frame_id, idx + 1);
2439    }
2440
2441    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
2442    let query_lower = response.query.to_lowercase();
2443    let is_preference_query = query_lower.contains("suggest")
2444        || query_lower.contains("recommend")
2445        || query_lower.contains("should i")
2446        || query_lower.contains("what should")
2447        || query_lower.contains("prefer")
2448        || query_lower.contains("favorite")
2449        || query_lower.contains("best for me");
2450
2451    // Pure RRF: Use ONLY ranks, NOT raw scores
2452    // This prevents a "confidently wrong" high-scoring vector from burying
2453    // a "precisely correct" keyword match
2454    const RRF_K: f32 = 60.0;
2455
2456    let mut ordering: Vec<(usize, f32, usize)> = response
2457        .hits
2458        .iter()
2459        .enumerate()
2460        .map(|(idx, hit)| {
2461            let lexical_rank = hit.rank;
2462
2463            // RRF score for lexical rank
2464            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
2465
2466            // RRF score for semantic rank
2467            let semantic_rrf = semantic_rank
2468                .get(&hit.frame_id)
2469                .map(|rank| 1.0 / (RRF_K + *rank as f32))
2470                .unwrap_or(0.0);
2471
2472            // Apply preference boost for hits containing user preference signals
2473            // This is a small bonus for content with first-person preference indicators
2474            let preference_boost = if is_preference_query {
2475                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
2476            } else {
2477                0.0
2478            };
2479
2480            // Pure RRF: Only rank-based scores, no raw similarity scores
2481            let combined = lexical_rrf + semantic_rrf + preference_boost;
2482            (idx, combined, lexical_rank)
2483        })
2484        .collect();
2485
2486    ordering.sort_by(|a, b| {
2487        b.1.partial_cmp(&a.1)
2488            .unwrap_or(Ordering::Equal)
2489            .then(a.2.cmp(&b.2))
2490    });
2491
2492    let mut reordered = Vec::with_capacity(response.hits.len());
2493    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
2494        let mut hit = response.hits[idx].clone();
2495        hit.rank = rank_idx + 1;
2496        reordered.push(hit);
2497    }
2498
2499    response.hits = reordered;
2500    Ok(())
2501}
2502
2503/// Rerank search results by boosting hits that contain user preference signals.
2504/// Only applies when the query appears to be seeking recommendations or preferences.
2505fn apply_preference_rerank(response: &mut SearchResponse) {
2506    if response.hits.is_empty() {
2507        return;
2508    }
2509
2510    // Check if query is preference-seeking
2511    let query_lower = response.query.to_lowercase();
2512    let is_preference_query = query_lower.contains("suggest")
2513        || query_lower.contains("recommend")
2514        || query_lower.contains("should i")
2515        || query_lower.contains("what should")
2516        || query_lower.contains("prefer")
2517        || query_lower.contains("favorite")
2518        || query_lower.contains("best for me");
2519
2520    if !is_preference_query {
2521        return;
2522    }
2523
2524    // Compute boost scores for each hit
2525    let mut scored: Vec<(usize, f32, f32)> = response
2526        .hits
2527        .iter()
2528        .enumerate()
2529        .map(|(idx, hit)| {
2530            let original_score = hit.score.unwrap_or(0.0);
2531            let preference_boost = compute_preference_boost(&hit.text);
2532            let boosted_score = original_score + preference_boost;
2533            (idx, boosted_score, original_score)
2534        })
2535        .collect();
2536
2537    // Sort by boosted score (descending)
2538    scored.sort_by(|a, b| {
2539        b.1.partial_cmp(&a.1)
2540            .unwrap_or(Ordering::Equal)
2541            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
2542    });
2543
2544    // Reorder hits
2545    let mut reordered = Vec::with_capacity(response.hits.len());
2546    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
2547        let mut hit = response.hits[idx].clone();
2548        hit.rank = rank_idx + 1;
2549        reordered.push(hit);
2550    }
2551
2552    response.hits = reordered;
2553}
2554
2555/// Compute a boost score for hits that contain user preference signals.
2556/// This helps surface context where users express their preferences,
2557/// habits, or personal information that's relevant to recommendation queries.
2558///
2559/// Key insight: We want to distinguish content where the user describes
2560/// their ESTABLISHED situation/preferences (high boost) from content where
2561/// the user is making a REQUEST (low boost). Both use first-person language,
2562/// but they serve different purposes for personalization.
2563fn compute_preference_boost(text: &str) -> f32 {
2564    let text_lower = text.to_lowercase();
2565    let mut boost = 0.0f32;
2566
2567    // Strong signals: Past/present user experiences and possessions
2568    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
2569    let established_context = [
2570        // Past tense - indicates actual experience
2571        "i've been",
2572        "i've had",
2573        "i've used",
2574        "i've tried",
2575        "i recently",
2576        "i just",
2577        "lately",
2578        "i started",
2579        "i bought",
2580        "i harvested",
2581        "i grew",
2582        // Current possessions/ownership (indicates established context)
2583        "my garden",
2584        "my home",
2585        "my house",
2586        "my setup",
2587        "my equipment",
2588        "my camera",
2589        "my car",
2590        "my phone",
2591        "i have a",
2592        "i own",
2593        "i got a",
2594        // Established habits/preferences
2595        "i prefer",
2596        "i like to",
2597        "i love to",
2598        "i enjoy",
2599        "i usually",
2600        "i always",
2601        "i typically",
2602        "my favorite",
2603        "i tend to",
2604        "i often",
2605        // Regular activities (indicates ongoing behavior)
2606        "i use",
2607        "i grow",
2608        "i cook",
2609        "i make",
2610        "i work on",
2611        "i'm into",
2612        "i collect",
2613    ];
2614    for pattern in established_context {
2615        if text_lower.contains(pattern) {
2616            boost += 0.15;
2617        }
2618    }
2619
2620    // Moderate signals: General first-person statements
2621    let first_person = [" i ", " my ", " me "];
2622    for pattern in first_person {
2623        if text_lower.contains(pattern) {
2624            boost += 0.02;
2625        }
2626    }
2627
2628    // Weak signals: Requests/intentions (not yet established preferences)
2629    // These indicate the user wants something, but don't describe established context
2630    let request_patterns = [
2631        "i'm trying to",
2632        "i want to",
2633        "i need to",
2634        "looking for",
2635        "can you suggest",
2636        "can you help",
2637    ];
2638    for pattern in request_patterns {
2639        if text_lower.contains(pattern) {
2640            boost += 0.02;
2641        }
2642    }
2643
2644    // Cap the boost to avoid over-weighting
2645    boost.min(0.5)
2646}
2647
2648fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
2649    let mut dot = 0.0f32;
2650    let mut sum_a = 0.0f32;
2651    let mut sum_b = 0.0f32;
2652    for (x, y) in a.iter().zip(b.iter()) {
2653        dot += x * y;
2654        sum_a += x * x;
2655        sum_b += y * y;
2656    }
2657
2658    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
2659        0.0
2660    } else {
2661        dot / (sum_a.sqrt() * sum_b.sqrt())
2662    }
2663}
2664
2665/// Apply cross-encoder reranking to search results.
2666///
2667/// Cross-encoders directly score query-document pairs and can understand
2668/// more nuanced relevance than bi-encoders (embeddings). This is especially
2669/// useful for personalization queries where semantic similarity != relevance.
2670///
2671/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
2672fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
2673    if response.hits.is_empty() || response.hits.len() < 2 {
2674        return Ok(());
2675    }
2676
2677    // Only rerank if we have enough candidates
2678    let candidates_to_rerank = response.hits.len().min(50);
2679
2680    // Initialize the reranker (model will be downloaded on first use, ~86MB)
2681    // Using JINA Turbo - faster than BGE while maintaining good accuracy
2682    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
2683        .with_show_download_progress(true);
2684
2685    let mut reranker = match TextRerank::try_new(options) {
2686        Ok(r) => r,
2687        Err(e) => {
2688            warn!("Failed to initialize cross-encoder reranker: {e}");
2689            return Ok(());
2690        }
2691    };
2692
2693    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
2694    let documents: Vec<String> = response.hits[..candidates_to_rerank]
2695        .iter()
2696        .map(|hit| hit.text.clone())
2697        .collect();
2698
2699    // Rerank using cross-encoder
2700    info!("Cross-encoder reranking {} candidates", documents.len());
2701    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
2702        Ok(results) => results,
2703        Err(e) => {
2704            warn!("Cross-encoder reranking failed: {e}");
2705            return Ok(());
2706        }
2707    };
2708
2709    // Reorder hits based on cross-encoder scores
2710    let mut reordered = Vec::with_capacity(response.hits.len());
2711    for (new_rank, result) in rerank_results.iter().enumerate() {
2712        let original_idx = result.index;
2713        let mut hit = response.hits[original_idx].clone();
2714        hit.rank = new_rank + 1;
2715        // Store cross-encoder score in the hit score for reference
2716        hit.score = Some(result.score);
2717        reordered.push(hit);
2718    }
2719
2720    // Add any remaining hits that weren't reranked (beyond top-50)
2721    for hit in response.hits.iter().skip(candidates_to_rerank) {
2722        let mut h = hit.clone();
2723        h.rank = reordered.len() + 1;
2724        reordered.push(h);
2725    }
2726
2727    response.hits = reordered;
2728    info!("Cross-encoder reranking complete");
2729    Ok(())
2730}
2731
2732/// Build a context string from memory cards stored in the MV2 file.
2733/// Groups facts by entity for better LLM comprehension.
2734fn build_memory_context(mem: &Memvid) -> String {
2735    let entities = mem.memory_entities();
2736    if entities.is_empty() {
2737        return String::new();
2738    }
2739
2740    let mut sections = Vec::new();
2741    for entity in entities {
2742        let cards = mem.get_entity_memories(&entity);
2743        if cards.is_empty() {
2744            continue;
2745        }
2746
2747        let mut entity_lines = Vec::new();
2748        for card in cards {
2749            // Format: "slot: value" with optional polarity indicator
2750            let polarity_marker = card
2751                .polarity
2752                .as_ref()
2753                .map(|p| match p.to_string().as_str() {
2754                    "Positive" => " (+)",
2755                    "Negative" => " (-)",
2756                    _ => "",
2757                })
2758                .unwrap_or("");
2759            entity_lines.push(format!(
2760                "  - {}: {}{}",
2761                card.slot, card.value, polarity_marker
2762            ));
2763        }
2764
2765        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
2766    }
2767
2768    sections.join("\n\n")
2769}
2770
2771/// Build a context string from entities found in search hits.
2772/// Groups entities by type for better LLM comprehension.
2773fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
2774    use std::collections::HashMap;
2775
2776    // Collect unique entities by kind
2777    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
2778
2779    for hit in hits {
2780        if let Some(metadata) = &hit.metadata {
2781            for entity in &metadata.entities {
2782                entities_by_kind
2783                    .entry(entity.kind.clone())
2784                    .or_default()
2785                    .push(entity.name.clone());
2786            }
2787        }
2788    }
2789
2790    if entities_by_kind.is_empty() {
2791        return String::new();
2792    }
2793
2794    // Deduplicate and format
2795    let mut sections = Vec::new();
2796    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
2797    sorted_kinds.sort();
2798
2799    for kind in sorted_kinds {
2800        let names = entities_by_kind.get(kind).unwrap();
2801        let mut unique_names: Vec<_> = names.iter().collect();
2802        unique_names.sort();
2803        unique_names.dedup();
2804
2805        let names_str = unique_names
2806            .iter()
2807            .take(10) // Limit to 10 entities per kind
2808            .map(|s| s.as_str())
2809            .collect::<Vec<_>>()
2810            .join(", ");
2811
2812        sections.push(format!("{}: {}", kind, names_str));
2813    }
2814
2815    sections.join("\n")
2816}