memvid_cli/commands/
search.rs

1//! Search & retrieval command handlers (find, vec-search, ask, timeline, when).
2//!
3//! Responsibilities:
4//! - Parse CLI arguments for search/RAG/timeline.
5//! - Call into memvid-core search/ask APIs and present results in JSON or human form.
6//! - Keep user-facing errors friendly and deterministic (no panics on malformed flags).
7
8use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use blake3::hash;
15use clap::{ArgAction, Args, ValueEnum};
16#[cfg(feature = "temporal_track")]
17use memvid_core::{
18    types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
19    TemporalResolution, TemporalResolutionValue,
20};
21use memvid_core::{
22    types::{AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy, SearchHitMetadata},
23    AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, SearchEngineKind, SearchHit,
24    SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
25};
26#[cfg(feature = "temporal_track")]
27use serde::Serialize;
28use serde_json::json;
29#[cfg(feature = "temporal_track")]
30use time::format_description::well_known::Rfc3339;
31use time::{Date, PrimitiveDateTime, Time};
32#[cfg(feature = "temporal_track")]
33use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
34use tracing::{info, warn};
35
36use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
37
38use memvid_ask_model::{
39    run_model_inference, ModelAnswer, ModelContextFragment, ModelContextFragmentKind,
40    ModelInference,
41};
42
43// frame_to_json and print_frame_summary available from commands but not used in this module
44use crate::config::{
45    load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
46    try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingRuntime,
47};
48use crate::utils::{
49    autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
50    parse_date_boundary, parse_vector, read_embedding,
51};
52
53const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
54#[cfg(feature = "temporal_track")]
55const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
56
57/// Arguments for the `timeline` subcommand
58#[derive(Args)]
59pub struct TimelineArgs {
60    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
61    pub file: PathBuf,
62    #[arg(long)]
63    pub json: bool,
64    #[arg(long)]
65    pub reverse: bool,
66    #[arg(long, value_name = "LIMIT")]
67    pub limit: Option<NonZeroU64>,
68    #[arg(long, value_name = "TIMESTAMP")]
69    pub since: Option<i64>,
70    #[arg(long, value_name = "TIMESTAMP")]
71    pub until: Option<i64>,
72    #[cfg(feature = "temporal_track")]
73    #[arg(long = "on", value_name = "PHRASE")]
74    pub phrase: Option<String>,
75    #[cfg(feature = "temporal_track")]
76    #[arg(long = "tz", value_name = "IANA_ZONE")]
77    pub tz: Option<String>,
78    #[cfg(feature = "temporal_track")]
79    #[arg(long = "anchor", value_name = "RFC3339")]
80    pub anchor: Option<String>,
81    #[cfg(feature = "temporal_track")]
82    #[arg(long = "window", value_name = "MINUTES")]
83    pub window: Option<u64>,
84    /// Replay: Show timeline for frames with ID <= AS_OF_FRAME (time-travel view)
85    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
86    pub as_of_frame: Option<u64>,
87    /// Replay: Show timeline for frames with timestamp <= AS_OF_TS (time-travel view)
88    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
89    pub as_of_ts: Option<i64>,
90}
91
92/// Arguments for the `when` subcommand
93#[cfg(feature = "temporal_track")]
94#[derive(Args)]
95pub struct WhenArgs {
96    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
97    pub file: PathBuf,
98    #[arg(long = "on", value_name = "PHRASE")]
99    pub phrase: String,
100    #[arg(long = "tz", value_name = "IANA_ZONE")]
101    pub tz: Option<String>,
102    #[arg(long = "anchor", value_name = "RFC3339")]
103    pub anchor: Option<String>,
104    #[arg(long = "window", value_name = "MINUTES")]
105    pub window: Option<u64>,
106    #[arg(long, value_name = "LIMIT")]
107    pub limit: Option<NonZeroU64>,
108    #[arg(long, value_name = "TIMESTAMP")]
109    pub since: Option<i64>,
110    #[arg(long, value_name = "TIMESTAMP")]
111    pub until: Option<i64>,
112    #[arg(long)]
113    pub reverse: bool,
114    #[arg(long)]
115    pub json: bool,
116}
117
118/// Arguments for the `ask` subcommand
119#[derive(Args)]
120pub struct AskArgs {
121    #[arg(value_name = "TARGET", num_args = 0..)]
122    pub targets: Vec<String>,
123    #[arg(long = "question", value_name = "TEXT")]
124    pub question: Option<String>,
125    #[arg(long = "uri", value_name = "URI")]
126    pub uri: Option<String>,
127    #[arg(long = "scope", value_name = "URI_PREFIX")]
128    pub scope: Option<String>,
129    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
130    pub top_k: usize,
131    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
132    pub snippet_chars: usize,
133    #[arg(long = "cursor", value_name = "TOKEN")]
134    pub cursor: Option<String>,
135    #[arg(long = "mode", value_enum, default_value = "hybrid")]
136    pub mode: AskModeArg,
137    #[arg(long)]
138    pub json: bool,
139    #[arg(long = "context-only", action = ArgAction::SetTrue)]
140    pub context_only: bool,
141    /// Show detailed source information for each citation
142    #[arg(long = "sources", action = ArgAction::SetTrue)]
143    pub sources: bool,
144    /// Mask PII (emails, SSNs, phone numbers, etc.) in context before sending to LLM
145    #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
146    pub mask_pii: bool,
147    /// Include structured memory cards in the context (facts, preferences, etc.)
148    #[arg(long = "memories", action = ArgAction::SetTrue)]
149    pub memories: bool,
150    /// Maximum characters of retrieval context to send to remote LLMs (overrides MEMVID_LLM_CONTEXT_BUDGET)
151    #[arg(long = "llm-context-depth", value_name = "CHARS")]
152    pub llm_context_depth: Option<usize>,
153    #[arg(long = "start", value_name = "DATE")]
154    pub start: Option<String>,
155    #[arg(long = "end", value_name = "DATE")]
156    pub end: Option<String>,
157    #[arg(
158        long = "use-model",
159        value_name = "MODEL",
160        num_args = 0..=1,
161        default_missing_value = "tinyllama"
162    )]
163    pub use_model: Option<String>,
164    /// Embedding model to use for query (must match the model used during ingestion)
165    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
166    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
167    pub query_embedding_model: Option<String>,
168    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
169    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
170    pub as_of_frame: Option<u64>,
171    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
172    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
173    pub as_of_ts: Option<i64>,
174    /// Override the default system prompt (useful for providing date context like "Today is March 27, 2023")
175    #[arg(long = "system-prompt", value_name = "TEXT")]
176    pub system_prompt: Option<String>,
177    /// Skip cross-encoder reranking (useful in gated environments where model downloads are blocked)
178    #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
179    pub no_rerank: bool,
180
181    // Adaptive retrieval options
182    /// Enable adaptive retrieval: dynamically determine how many results to return based on
183    /// relevancy score distribution. Instead of fixed top-k, pulls all results above a
184    /// relevancy threshold. Use this when answers may span multiple chunks.
185    #[arg(long = "adaptive", action = ArgAction::SetTrue)]
186    pub adaptive: bool,
187    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
188    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
189    /// Only used when --adaptive is enabled.
190    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
191    pub min_relevancy: f32,
192    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
193    /// Set high enough to capture all potentially relevant results.
194    /// Only used when --adaptive is enabled.
195    #[arg(long = "max-k", value_name = "K", default_value = "100")]
196    pub max_k: usize,
197    /// Adaptive cutoff strategy: relative (default), absolute, cliff, elbow, or combined
198    #[arg(long = "adaptive-strategy", value_enum, default_value = "relative")]
199    pub adaptive_strategy: AdaptiveStrategyArg,
200}
201
202/// Ask mode argument
203#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
204pub enum AskModeArg {
205    Lex,
206    Sem,
207    Hybrid,
208}
209
210impl From<AskModeArg> for AskMode {
211    fn from(value: AskModeArg) -> Self {
212        match value {
213            AskModeArg::Lex => AskMode::Lex,
214            AskModeArg::Sem => AskMode::Sem,
215            AskModeArg::Hybrid => AskMode::Hybrid,
216        }
217    }
218}
219
220/// Arguments for the `find` subcommand
221#[derive(Args)]
222pub struct FindArgs {
223    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
224    pub file: PathBuf,
225    #[arg(long = "query", value_name = "TEXT")]
226    pub query: String,
227    #[arg(long = "uri", value_name = "URI")]
228    pub uri: Option<String>,
229    #[arg(long = "scope", value_name = "URI_PREFIX")]
230    pub scope: Option<String>,
231    #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
232    pub top_k: usize,
233    #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
234    pub snippet_chars: usize,
235    #[arg(long = "cursor", value_name = "TOKEN")]
236    pub cursor: Option<String>,
237    #[arg(long)]
238    pub json: bool,
239    #[arg(long = "json-legacy", conflicts_with = "json")]
240    pub json_legacy: bool,
241    #[arg(long = "mode", value_enum, default_value = "auto")]
242    pub mode: SearchMode,
243    /// Replay: Filter to frames with ID <= AS_OF_FRAME (time-travel view)
244    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
245    pub as_of_frame: Option<u64>,
246    /// Replay: Filter to frames with timestamp <= AS_OF_TS (time-travel view)
247    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
248    pub as_of_ts: Option<i64>,
249    /// Embedding model to use for query (must match the model used during ingestion)
250    /// Options: bge-small, bge-base, nomic, gte-large, openai, openai-small, openai-ada
251    #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
252    pub query_embedding_model: Option<String>,
253
254    // Adaptive retrieval options
255    /// Enable adaptive retrieval: dynamically determine how many results to return based on
256    /// relevancy score distribution. Instead of fixed top-k, pulls all results above a
257    /// relevancy threshold. Use this when answers may span multiple chunks.
258    #[arg(long = "adaptive", action = ArgAction::SetTrue)]
259    pub adaptive: bool,
260    /// Minimum relevancy ratio vs top score (0.0-1.0). Results below this threshold are excluded.
261    /// Example: 0.5 means only include results with score >= 50% of the top result's score.
262    /// Only used when --adaptive is enabled.
263    #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
264    pub min_relevancy: f32,
265    /// Maximum results to consider for adaptive retrieval (over-retrieval limit).
266    /// Set high enough to capture all potentially relevant results.
267    /// Only used when --adaptive is enabled.
268    #[arg(long = "max-k", value_name = "K", default_value = "100")]
269    pub max_k: usize,
270    /// Adaptive cutoff strategy: relative (default), absolute, cliff, elbow, or combined
271    #[arg(long = "adaptive-strategy", value_enum, default_value = "relative")]
272    pub adaptive_strategy: AdaptiveStrategyArg,
273}
274
275/// Search mode argument
276#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
277pub enum SearchMode {
278    Auto,
279    Lex,
280    Sem,
281    /// CLIP visual search using text-to-image embeddings
282    #[cfg(feature = "clip")]
283    Clip,
284}
285
286/// Adaptive retrieval strategy
287#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
288pub enum AdaptiveStrategyArg {
289    /// Stop when score drops below X% of top score (default)
290    Relative,
291    /// Stop when score drops below fixed threshold
292    Absolute,
293    /// Stop when score drops sharply from previous result
294    Cliff,
295    /// Automatically detect "elbow" in score curve
296    Elbow,
297    /// Combine relative + cliff + absolute (recommended)
298    Combined,
299}
300
301/// Arguments for the `vec-search` subcommand
302#[derive(Args)]
303pub struct VecSearchArgs {
304    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
305    pub file: PathBuf,
306    #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
307    pub vector: Option<String>,
308    #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
309    pub embedding: Option<PathBuf>,
310    #[arg(long, value_name = "K", default_value = "10")]
311    pub limit: usize,
312    #[arg(long)]
313    pub json: bool,
314}
315
316/// Arguments for the `audit` subcommand
317#[derive(Args)]
318pub struct AuditArgs {
319    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
320    pub file: PathBuf,
321    /// The question or topic to audit
322    #[arg(value_name = "QUESTION")]
323    pub question: String,
324    /// Output file path (stdout if not provided)
325    #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
326    pub out: Option<PathBuf>,
327    /// Output format
328    #[arg(long = "format", value_enum, default_value = "text")]
329    pub format: AuditFormat,
330    /// Number of sources to retrieve
331    #[arg(long = "top-k", value_name = "K", default_value = "10")]
332    pub top_k: usize,
333    /// Maximum characters per snippet
334    #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
335    pub snippet_chars: usize,
336    /// Retrieval mode
337    #[arg(long = "mode", value_enum, default_value = "hybrid")]
338    pub mode: AskModeArg,
339    /// Optional scope filter (URI prefix)
340    #[arg(long = "scope", value_name = "URI_PREFIX")]
341    pub scope: Option<String>,
342    /// Start date filter
343    #[arg(long = "start", value_name = "DATE")]
344    pub start: Option<String>,
345    /// End date filter
346    #[arg(long = "end", value_name = "DATE")]
347    pub end: Option<String>,
348    /// Use a model to synthesize the answer (e.g., "ollama:qwen2.5:1.5b")
349    #[arg(long = "use-model", value_name = "MODEL")]
350    pub use_model: Option<String>,
351}
352
353/// Audit output format
354#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
355pub enum AuditFormat {
356    /// Plain text report
357    Text,
358    /// Markdown report
359    Markdown,
360    /// JSON report
361    Json,
362}
363
364// ============================================================================
365// Search & Retrieval command handlers
366// ============================================================================
367
368pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
369    let mut mem = open_read_only_mem(&args.file)?;
370    let mut builder = TimelineQueryBuilder::default();
371    #[cfg(feature = "temporal_track")]
372    if args.phrase.is_none()
373        && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
374    {
375        bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
376    }
377    if let Some(limit) = args.limit {
378        builder = builder.limit(limit);
379    }
380    if let Some(since) = args.since {
381        builder = builder.since(since);
382    }
383    if let Some(until) = args.until {
384        builder = builder.until(until);
385    }
386    builder = builder.reverse(args.reverse);
387    #[cfg(feature = "temporal_track")]
388    let temporal_summary = if let Some(ref phrase) = args.phrase {
389        let (filter, summary) = build_temporal_filter(
390            phrase,
391            args.tz.as_deref(),
392            args.anchor.as_deref(),
393            args.window,
394        )?;
395        builder = builder.temporal(filter);
396        Some(summary)
397    } else {
398        None
399    };
400    let query = builder.build();
401    let mut entries = mem.timeline(query)?;
402
403    // Apply Replay filtering if requested
404    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
405        entries.retain(|entry| {
406            // Check as_of_frame filter
407            if let Some(cutoff_frame) = args.as_of_frame {
408                if entry.frame_id > cutoff_frame {
409                    return false;
410                }
411            }
412
413            // Check as_of_ts filter
414            if let Some(cutoff_ts) = args.as_of_ts {
415                if entry.timestamp > cutoff_ts {
416                    return false;
417                }
418            }
419
420            true
421        });
422    }
423
424    if args.json {
425        #[cfg(feature = "temporal_track")]
426        if let Some(summary) = temporal_summary.as_ref() {
427            println!(
428                "{}",
429                serde_json::to_string_pretty(&TimelineOutput {
430                    temporal: Some(summary_to_output(summary)),
431                    entries: &entries,
432                })?
433            );
434        } else {
435            println!("{}", serde_json::to_string_pretty(&entries)?);
436        }
437        #[cfg(not(feature = "temporal_track"))]
438        println!("{}", serde_json::to_string_pretty(&entries)?);
439    } else if entries.is_empty() {
440        println!("Timeline is empty");
441    } else {
442        #[cfg(feature = "temporal_track")]
443        if let Some(summary) = temporal_summary.as_ref() {
444            print_temporal_summary(summary);
445        }
446        for entry in entries {
447            println!(
448                "#{} @ {} — {}",
449                entry.frame_id,
450                entry.timestamp,
451                entry.preview.replace('\n', " ")
452            );
453            if let Some(uri) = entry.uri.as_deref() {
454                println!("  URI: {uri}");
455            }
456            if !entry.child_frames.is_empty() {
457                let child_list = entry
458                    .child_frames
459                    .iter()
460                    .map(|id| id.to_string())
461                    .collect::<Vec<_>>()
462                    .join(", ");
463                println!("  Child frames: {child_list}");
464            }
465            #[cfg(feature = "temporal_track")]
466            if let Some(temporal) = entry.temporal.as_ref() {
467                print_entry_temporal_details(temporal);
468            }
469        }
470    }
471    Ok(())
472}
473
474#[cfg(feature = "temporal_track")]
475pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
476    let mut mem = open_read_only_mem(&args.file)?;
477
478    let (filter, summary) = build_temporal_filter(
479        &args.phrase,
480        args.tz.as_deref(),
481        args.anchor.as_deref(),
482        args.window,
483    )?;
484
485    let mut builder = TimelineQueryBuilder::default();
486    if let Some(limit) = args.limit {
487        builder = builder.limit(limit);
488    }
489    if let Some(since) = args.since {
490        builder = builder.since(since);
491    }
492    if let Some(until) = args.until {
493        builder = builder.until(until);
494    }
495    builder = builder.reverse(args.reverse).temporal(filter.clone());
496    let entries = mem.timeline(builder.build())?;
497
498    if args.json {
499        let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
500        let output = WhenOutput {
501            summary: summary_to_output(&summary),
502            entries: entry_views,
503        };
504        println!("{}", serde_json::to_string_pretty(&output)?);
505        return Ok(());
506    }
507
508    print_temporal_summary(&summary);
509    if entries.is_empty() {
510        println!("No frames matched the resolved window");
511        return Ok(());
512    }
513
514    for entry in &entries {
515        let iso = format_timestamp(entry.timestamp).unwrap_or_default();
516        println!(
517            "#{} @ {} ({iso}) — {}",
518            entry.frame_id,
519            entry.timestamp,
520            entry.preview.replace('\n', " ")
521        );
522        if let Some(uri) = entry.uri.as_deref() {
523            println!("  URI: {uri}");
524        }
525        if !entry.child_frames.is_empty() {
526            let child_list = entry
527                .child_frames
528                .iter()
529                .map(|id| id.to_string())
530                .collect::<Vec<_>>()
531                .join(", ");
532            println!("  Child frames: {child_list}");
533        }
534        if let Some(temporal) = entry.temporal.as_ref() {
535            print_entry_temporal_details(temporal);
536        }
537    }
538
539    Ok(())
540}
541
542#[cfg(feature = "temporal_track")]
543#[derive(Serialize)]
544struct TimelineOutput<'a> {
545    #[serde(skip_serializing_if = "Option::is_none")]
546    temporal: Option<TemporalSummaryOutput>,
547    entries: &'a [TimelineEntry],
548}
549
550#[cfg(feature = "temporal_track")]
551#[derive(Serialize)]
552struct WhenOutput {
553    summary: TemporalSummaryOutput,
554    entries: Vec<WhenEntry>,
555}
556
557#[cfg(feature = "temporal_track")]
558#[derive(Serialize)]
559struct WhenEntry {
560    frame_id: FrameId,
561    timestamp: i64,
562    #[serde(skip_serializing_if = "Option::is_none")]
563    timestamp_iso: Option<String>,
564    preview: String,
565    #[serde(skip_serializing_if = "Option::is_none")]
566    uri: Option<String>,
567    #[serde(skip_serializing_if = "Vec::is_empty")]
568    child_frames: Vec<FrameId>,
569    #[serde(skip_serializing_if = "Option::is_none")]
570    temporal: Option<SearchHitTemporal>,
571}
572
573#[cfg(feature = "temporal_track")]
574#[derive(Serialize)]
575struct TemporalSummaryOutput {
576    phrase: String,
577    timezone: String,
578    anchor_utc: i64,
579    anchor_iso: String,
580    confidence: u16,
581    #[serde(skip_serializing_if = "Vec::is_empty")]
582    flags: Vec<&'static str>,
583    resolution_kind: &'static str,
584    window_start_utc: Option<i64>,
585    window_start_iso: Option<String>,
586    window_end_utc: Option<i64>,
587    window_end_iso: Option<String>,
588    #[serde(skip_serializing_if = "Option::is_none")]
589    window_minutes: Option<u64>,
590}
591
592#[cfg(feature = "temporal_track")]
593struct TemporalSummary {
594    phrase: String,
595    tz: String,
596    anchor: OffsetDateTime,
597    start_utc: Option<i64>,
598    end_utc: Option<i64>,
599    resolution: TemporalResolution,
600    window_minutes: Option<u64>,
601}
602
603#[cfg(feature = "temporal_track")]
604fn build_temporal_filter(
605    phrase: &str,
606    tz_override: Option<&str>,
607    anchor_override: Option<&str>,
608    window_minutes: Option<u64>,
609) -> Result<(TemporalFilter, TemporalSummary)> {
610    let tz = tz_override
611        .unwrap_or(DEFAULT_TEMPORAL_TZ)
612        .trim()
613        .to_string();
614    if tz.is_empty() {
615        bail!("E-TEMP-003 timezone must not be empty");
616    }
617
618    let anchor = if let Some(raw) = anchor_override {
619        OffsetDateTime::parse(raw, &Rfc3339)
620            .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
621    } else {
622        OffsetDateTime::now_utc()
623    };
624
625    let context = TemporalContext::new(anchor, tz.clone());
626    let normalizer = TemporalNormalizer::new(context);
627    let resolution = normalizer
628        .resolve(phrase)
629        .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
630
631    let (mut start, mut end) = resolution_bounds(&resolution)?;
632    if let Some(minutes) = window_minutes {
633        if minutes > 0 {
634            let delta = TimeDuration::minutes(minutes as i64);
635            if let (Some(s), Some(e)) = (start, end) {
636                if s == e {
637                    start = Some(s.saturating_sub(delta.whole_seconds()));
638                    end = Some(e.saturating_add(delta.whole_seconds()));
639                } else {
640                    start = Some(s.saturating_sub(delta.whole_seconds()));
641                    end = Some(e.saturating_add(delta.whole_seconds()));
642                }
643            }
644        }
645    }
646
647    let filter = TemporalFilter {
648        start_utc: start,
649        end_utc: end,
650        phrase: None,
651        tz: None,
652    };
653
654    let summary = TemporalSummary {
655        phrase: phrase.to_owned(),
656        tz,
657        anchor,
658        start_utc: start,
659        end_utc: end,
660        resolution,
661        window_minutes,
662    };
663
664    Ok((filter, summary))
665}
666
667#[cfg(feature = "temporal_track")]
668fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
669    TemporalSummaryOutput {
670        phrase: summary.phrase.clone(),
671        timezone: summary.tz.clone(),
672        anchor_utc: summary.anchor.unix_timestamp(),
673        anchor_iso: summary
674            .anchor
675            .format(&Rfc3339)
676            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
677        confidence: summary.resolution.confidence,
678        flags: summary
679            .resolution
680            .flags
681            .iter()
682            .map(|flag| flag.as_str())
683            .collect(),
684        resolution_kind: resolution_kind(&summary.resolution),
685        window_start_utc: summary.start_utc,
686        window_start_iso: summary.start_utc.and_then(format_timestamp),
687        window_end_utc: summary.end_utc,
688        window_end_iso: summary.end_utc.and_then(format_timestamp),
689        window_minutes: summary.window_minutes,
690    }
691}
692
693#[cfg(feature = "temporal_track")]
694fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
695    WhenEntry {
696        frame_id: entry.frame_id,
697        timestamp: entry.timestamp,
698        timestamp_iso: format_timestamp(entry.timestamp),
699        preview: entry.preview.clone(),
700        uri: entry.uri.clone(),
701        child_frames: entry.child_frames.clone(),
702        temporal: entry.temporal.clone(),
703    }
704}
705
706#[cfg(feature = "temporal_track")]
707fn print_temporal_summary(summary: &TemporalSummary) {
708    println!("Phrase: \"{}\"", summary.phrase);
709    println!("Timezone: {}", summary.tz);
710    println!(
711        "Anchor: {}",
712        summary
713            .anchor
714            .format(&Rfc3339)
715            .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
716    );
717    let start_iso = summary.start_utc.and_then(format_timestamp);
718    let end_iso = summary.end_utc.and_then(format_timestamp);
719    match (start_iso, end_iso) {
720        (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
721        (Some(start), Some(end)) => println!("Window: {start} → {end}"),
722        (Some(start), None) => println!("Window start: {start}"),
723        (None, Some(end)) => println!("Window end: {end}"),
724        _ => println!("Window: (not resolved)"),
725    }
726    println!("Confidence: {}", summary.resolution.confidence);
727    let flags: Vec<&'static str> = summary
728        .resolution
729        .flags
730        .iter()
731        .map(|flag| flag.as_str())
732        .collect();
733    if !flags.is_empty() {
734        println!("Flags: {}", flags.join(", "));
735    }
736    if let Some(window) = summary.window_minutes {
737        if window > 0 {
738            println!("Window padding: {window} minute(s)");
739        }
740    }
741    println!();
742}
743
744#[cfg(feature = "temporal_track")]
745fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
746    if let Some(anchor) = temporal.anchor.as_ref() {
747        let iso = anchor
748            .iso_8601
749            .clone()
750            .or_else(|| format_timestamp(anchor.ts_utc));
751        println!(
752            "  Anchor: {} (source: {:?})",
753            iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
754            anchor.source
755        );
756    }
757    if !temporal.mentions.is_empty() {
758        println!("  Mentions:");
759        for mention in &temporal.mentions {
760            let iso = mention
761                .iso_8601
762                .clone()
763                .or_else(|| format_timestamp(mention.ts_utc))
764                .unwrap_or_else(|| mention.ts_utc.to_string());
765            let mut details = format!(
766                "    - {} ({:?}, confidence {})",
767                iso, mention.kind, mention.confidence
768            );
769            if let Some(text) = mention.text.as_deref() {
770                details.push_str(&format!(" — \"{}\"", text));
771            }
772            println!("{details}");
773        }
774    }
775}
776
777#[cfg(feature = "temporal_track")]
778fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
779    match &resolution.value {
780        TemporalResolutionValue::Date(date) => {
781            let ts = date_to_timestamp(*date);
782            Ok((Some(ts), Some(ts)))
783        }
784        TemporalResolutionValue::DateTime(dt) => {
785            let ts = dt.unix_timestamp();
786            Ok((Some(ts), Some(ts)))
787        }
788        TemporalResolutionValue::DateRange { start, end } => Ok((
789            Some(date_to_timestamp(*start)),
790            Some(date_to_timestamp(*end)),
791        )),
792        TemporalResolutionValue::DateTimeRange { start, end } => {
793            Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
794        }
795        TemporalResolutionValue::Month { year, month } => {
796            let start_date = Date::from_calendar_date(*year, *month, 1)
797                .map_err(|_| anyhow!("invalid month resolution"))?;
798            let end_date = last_day_in_month(*year, *month)
799                .map_err(|_| anyhow!("invalid month resolution"))?;
800            Ok((
801                Some(date_to_timestamp(start_date)),
802                Some(date_to_timestamp(end_date)),
803            ))
804        }
805    }
806}
807
808#[cfg(feature = "temporal_track")]
809fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
810    match resolution.value {
811        TemporalResolutionValue::Date(_) => "date",
812        TemporalResolutionValue::DateTime(_) => "datetime",
813        TemporalResolutionValue::DateRange { .. } => "date_range",
814        TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
815        TemporalResolutionValue::Month { .. } => "month",
816    }
817}
818
819#[cfg(feature = "temporal_track")]
820fn date_to_timestamp(date: Date) -> i64 {
821    PrimitiveDateTime::new(date, Time::MIDNIGHT)
822        .assume_offset(UtcOffset::UTC)
823        .unix_timestamp()
824}
825
826#[cfg(feature = "temporal_track")]
827fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
828    let mut date = Date::from_calendar_date(year, month, 1)
829        .map_err(|_| anyhow!("invalid month resolution"))?;
830    while let Some(next) = date.next_day() {
831        if next.month() == month {
832            date = next;
833        } else {
834            break;
835        }
836    }
837    Ok(date)
838}
839
840#[cfg(feature = "temporal_track")]
841
842fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
843    if fragments.is_empty() {
844        return;
845    }
846
847    response.context_fragments = fragments
848        .into_iter()
849        .map(|fragment| AskContextFragment {
850            rank: fragment.rank,
851            frame_id: fragment.frame_id,
852            uri: fragment.uri,
853            title: fragment.title,
854            score: fragment.score,
855            matches: fragment.matches,
856            range: Some(fragment.range),
857            chunk_range: fragment.chunk_range,
858            text: fragment.text,
859            kind: Some(match fragment.kind {
860                ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
861                ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
862            }),
863            #[cfg(feature = "temporal_track")]
864            temporal: None,
865        })
866        .collect();
867}
868
869pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
870    if args.uri.is_some() && args.scope.is_some() {
871        warn!("--scope ignored because --uri is provided");
872    }
873
874    let mut question_tokens = Vec::new();
875    let mut file_path: Option<PathBuf> = None;
876    for token in &args.targets {
877        if file_path.is_none() && looks_like_memory(token) {
878            file_path = Some(PathBuf::from(token));
879        } else {
880            question_tokens.push(token.clone());
881        }
882    }
883
884    let positional_question = if question_tokens.is_empty() {
885        None
886    } else {
887        Some(question_tokens.join(" "))
888    };
889
890    let question = args
891        .question
892        .or(positional_question)
893        .map(|value| value.trim().to_string())
894        .filter(|value| !value.is_empty());
895
896    let question = question
897        .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
898
899    let memory_path = match file_path {
900        Some(path) => path,
901        None => autodetect_memory_file()?,
902    };
903
904    let start = parse_date_boundary(args.start.as_ref(), false)?;
905    let end = parse_date_boundary(args.end.as_ref(), true)?;
906    if let (Some(start_ts), Some(end_ts)) = (start, end) {
907        if end_ts < start_ts {
908            anyhow::bail!("--end must not be earlier than --start");
909        }
910    }
911
912    // Open MV2 file first to get vector dimension for auto-detection
913    let mut mem = Memvid::open(&memory_path)?;
914
915    // Get the vector dimension from the MV2 file for auto-detection
916    let mv2_dimension = mem.vec_index_dimension();
917
918    let ask_mode: AskMode = args.mode.into();
919    let emb_model_override = args.query_embedding_model.as_deref();
920    let runtime = match args.mode {
921        AskModeArg::Lex => None,
922        AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
923            config,
924            emb_model_override,
925            mv2_dimension,
926        )?),
927        AskModeArg::Hybrid => {
928            // For hybrid, use auto-detection from MV2 dimension
929            try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
930                || {
931                    // Force a load; if it fails we error below.
932                    load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
933                        .ok()
934                        .map(|rt| {
935                            tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
936                            rt
937                        })
938                },
939            )
940        }
941    };
942    if runtime.is_none() && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
943        anyhow::bail!(
944            "semantic embeddings unavailable; install/cached model required for {:?} mode",
945            args.mode
946        );
947    }
948
949    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
950
951    // Build adaptive config if --adaptive flag is set
952    let adaptive = if args.adaptive {
953        Some(AdaptiveConfig {
954            enabled: true,
955            max_results: args.max_k,
956            min_results: 1,
957            normalize_scores: true,
958            strategy: match args.adaptive_strategy {
959                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
960                    min_ratio: args.min_relevancy,
961                },
962                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
963                    min_score: args.min_relevancy,
964                },
965                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff { max_drop_ratio: 0.3 },
966                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
967                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
968                    relative_threshold: args.min_relevancy,
969                    max_drop_ratio: 0.3,
970                    absolute_min: 0.3,
971                },
972            },
973        })
974    } else {
975        None
976    };
977
978    let request = AskRequest {
979        question,
980        top_k: args.top_k,
981        snippet_chars: args.snippet_chars,
982        uri: args.uri.clone(),
983        scope: args.scope.clone(),
984        cursor: args.cursor.clone(),
985        start,
986        end,
987        #[cfg(feature = "temporal_track")]
988        temporal: None,
989        context_only: args.context_only,
990        mode: ask_mode,
991        as_of_frame: args.as_of_frame,
992        as_of_ts: args.as_of_ts,
993        adaptive,
994    };
995    let mut response = mem.ask(request, embedder)?;
996
997    // Apply cross-encoder reranking for better precision on preference/personalization queries
998    // This is especially important for questions like "What should I..." where semantic
999    // similarity doesn't capture personal relevance well.
1000    // Skip if --no-rerank is set (useful in gated environments where model downloads are blocked)
1001    if !args.no_rerank
1002        && !response.retrieval.hits.is_empty()
1003        && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1004    {
1005        // Create a temporary SearchResponse for reranking
1006        let mut search_response = SearchResponse {
1007            query: response.question.clone(),
1008            hits: response.retrieval.hits.clone(),
1009            total_hits: response.retrieval.hits.len(),
1010            params: memvid_core::SearchParams {
1011                top_k: args.top_k,
1012                snippet_chars: args.snippet_chars,
1013                cursor: None,
1014            },
1015            elapsed_ms: 0,
1016            engine: memvid_core::SearchEngineKind::Hybrid,
1017            next_cursor: None,
1018            context: String::new(),
1019        };
1020
1021        if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1022            warn!("Cross-encoder reranking failed: {e}");
1023        } else {
1024            // Update the response hits with reranked order
1025            response.retrieval.hits = search_response.hits;
1026            // Rebuild context from reranked hits
1027            response.retrieval.context = response
1028                .retrieval
1029                .hits
1030                .iter()
1031                .take(10) // Use top-10 for context
1032                .map(|hit| hit.text.as_str())
1033                .collect::<Vec<_>>()
1034                .join("\n\n---\n\n");
1035        }
1036    }
1037
1038    // Inject memory cards into context if --memories flag is set
1039    if args.memories {
1040        let memory_context = build_memory_context(&mem);
1041        if !memory_context.is_empty() {
1042            // Prepend memory context to retrieval context
1043            response.retrieval.context = format!(
1044                "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1045                memory_context, response.retrieval.context
1046            );
1047        }
1048    }
1049
1050    // Inject entity context from Logic-Mesh if entities were found in search hits
1051    let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1052    if !entity_context.is_empty() {
1053        // Prepend entity context to retrieval context
1054        response.retrieval.context = format!(
1055            "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1056            entity_context, response.retrieval.context
1057        );
1058    }
1059
1060    // Apply PII masking if requested
1061    if args.mask_pii {
1062        use memvid_core::pii::mask_pii;
1063
1064        // Mask the aggregated context
1065        response.retrieval.context = mask_pii(&response.retrieval.context);
1066
1067        // Mask text in each hit
1068        for hit in &mut response.retrieval.hits {
1069            hit.text = mask_pii(&hit.text);
1070            if let Some(chunk_text) = &hit.chunk_text {
1071                hit.chunk_text = Some(mask_pii(chunk_text));
1072            }
1073        }
1074    }
1075
1076    let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1077
1078    let mut model_result: Option<ModelAnswer> = None;
1079    if response.context_only {
1080        if args.use_model.is_some() {
1081            warn!("--use-model ignored because --context-only disables synthesis");
1082        }
1083    } else if let Some(model_name) = args.use_model.as_deref() {
1084        match run_model_inference(
1085            model_name,
1086            &response.question,
1087            &response.retrieval.context,
1088            &response.retrieval.hits,
1089            llm_context_override,
1090            None,
1091            args.system_prompt.as_deref(),
1092        ) {
1093            Ok(inference) => {
1094                let ModelInference {
1095                    answer,
1096                    context_body,
1097                    context_fragments,
1098                    ..
1099                } = inference;
1100                response.answer = Some(answer.answer.clone());
1101                response.retrieval.context = context_body;
1102                apply_model_context_fragments(&mut response, context_fragments);
1103                model_result = Some(answer);
1104            }
1105            Err(err) => {
1106                warn!(
1107                    "model inference unavailable for '{}': {err}. Falling back to default summary.",
1108                    model_name
1109                );
1110            }
1111        }
1112    }
1113
1114    if args.json {
1115        if let Some(model_name) = args.use_model.as_deref() {
1116            emit_model_json(
1117                &response,
1118                model_name,
1119                model_result.as_ref(),
1120                args.sources,
1121                &mut mem,
1122            )?;
1123        } else {
1124            emit_ask_json(
1125                &response,
1126                args.mode,
1127                model_result.as_ref(),
1128                args.sources,
1129                &mut mem,
1130            )?;
1131        }
1132    } else {
1133        emit_ask_pretty(
1134            &response,
1135            args.mode,
1136            model_result.as_ref(),
1137            args.sources,
1138            &mut mem,
1139        );
1140    }
1141
1142    Ok(())
1143}
1144
1145pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1146    let mut mem = open_read_only_mem(&args.file)?;
1147    if args.uri.is_some() && args.scope.is_some() {
1148        warn!("--scope ignored because --uri is provided");
1149    }
1150
1151    // Get vector dimension from MV2 for auto-detection
1152    let mv2_dimension = mem.vec_index_dimension();
1153    let emb_model_override = args.query_embedding_model.as_deref();
1154
1155    let (mode_label, runtime_option) = match args.mode {
1156        SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1157        SearchMode::Sem => {
1158            let runtime =
1159                load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1160            ("Semantic (vector search)".to_string(), Some(runtime))
1161        }
1162        SearchMode::Auto => {
1163            if let Some(runtime) =
1164                try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1165            {
1166                ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1167            } else {
1168                ("Lexical (semantic unavailable)".to_string(), None)
1169            }
1170        }
1171        #[cfg(feature = "clip")]
1172        SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1173    };
1174
1175    let mode_key = match args.mode {
1176        SearchMode::Sem => "semantic",
1177        SearchMode::Lex => "text",
1178        SearchMode::Auto => {
1179            if runtime_option.is_some() {
1180                "hybrid"
1181            } else {
1182                "text"
1183            }
1184        }
1185        #[cfg(feature = "clip")]
1186        SearchMode::Clip => "clip",
1187    };
1188
1189    // For CLIP mode, use CLIP visual search
1190    #[cfg(feature = "clip")]
1191    if args.mode == SearchMode::Clip {
1192        use memvid_core::clip::{ClipConfig, ClipModel};
1193
1194        // Initialize CLIP model
1195        let config = ClipConfig::default();
1196        let clip = ClipModel::new(config).map_err(|e| {
1197            anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1198        })?;
1199
1200        // Encode query text
1201        let query_embedding = clip
1202            .encode_text(&args.query)
1203            .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1204
1205        // Search CLIP index
1206        let hits = mem.search_clip(&query_embedding, args.top_k)?;
1207
1208        // Debug distances before filtering
1209        for hit in &hits {
1210            if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1211                tracing::debug!(
1212                    frame_id = hit.frame_id,
1213                    title = %frame.title.unwrap_or_default(),
1214                    page = hit.page,
1215                    distance = hit.distance,
1216                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1217                    "CLIP raw hit"
1218                );
1219            } else {
1220                tracing::debug!(
1221                    frame_id = hit.frame_id,
1222                    page = hit.page,
1223                    distance = hit.distance,
1224                    cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1225                    "CLIP raw hit (missing frame)"
1226                );
1227            }
1228        }
1229
1230        // CLIP distance threshold for filtering poor matches
1231        // CLIP uses L2 distance on normalized embeddings:
1232        //   - distance² = 2(1 - cosine_similarity)
1233        //   - distance = 0 → identical (cosine_sim = 1)
1234        //   - distance = 1.0 → cosine_sim = 0.5 (50% match)
1235        //   - distance = 1.26 → cosine_sim = 0.20 (20% match - our threshold)
1236        //   - distance = √2 ≈ 1.41 → orthogonal (cosine_sim = 0)
1237        //   - distance = 2.0 → opposite (cosine_sim = -1)
1238        //
1239        // MobileCLIP text-to-image matching typically produces lower scores than expected.
1240        // Good matches are usually in the 0.20-0.35 cosine similarity range.
1241        // We filter at distance > 1.26 (cosine_sim < 0.20) to remove clearly irrelevant results.
1242        const CLIP_MAX_DISTANCE: f32 = 1.26;
1243
1244        // Convert CLIP hits to SearchResponse format, filtering by threshold
1245        let search_hits: Vec<SearchHit> = hits
1246            .into_iter()
1247            .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1248            .enumerate()
1249            .filter_map(|(rank, hit)| {
1250                // Convert L2 distance to cosine similarity for display
1251                // cos_sim = 1 - (distance² / 2)
1252                let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1253
1254                // Get frame preview for snippet
1255                let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1256                let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1257                let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1258                let title = match (base_title, hit.page) {
1259                    (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1260                    (Some(t), None) => Some(t),
1261                    (None, Some(p)) => Some(format!("Page {p}")),
1262                    _ => None,
1263                };
1264                Some(SearchHit {
1265                    rank: rank + 1,
1266                    frame_id: hit.frame_id,
1267                    uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1268                    title,
1269                    text: preview.clone(),
1270                    chunk_text: Some(preview),
1271                    range: (0, 0),
1272                    chunk_range: None,
1273                    matches: 0,
1274                    score: Some(cosine_similarity),
1275                    metadata: None,
1276                })
1277            })
1278            .collect();
1279
1280        let response = SearchResponse {
1281            query: args.query.clone(),
1282            hits: search_hits.clone(),
1283            total_hits: search_hits.len(),
1284            params: memvid_core::SearchParams {
1285                top_k: args.top_k,
1286                snippet_chars: args.snippet_chars,
1287                cursor: args.cursor.clone(),
1288            },
1289            elapsed_ms: 0,
1290            engine: SearchEngineKind::Hybrid, // Use Hybrid as placeholder
1291            next_cursor: None,
1292            context: String::new(),
1293        };
1294
1295        if args.json_legacy {
1296            warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1297            emit_legacy_search_json(&response)?;
1298        } else if args.json {
1299            emit_search_json(&response, mode_key)?;
1300        } else {
1301            println!(
1302                "mode: {}   k={}   time: {} ms",
1303                mode_label, response.params.top_k, response.elapsed_ms
1304            );
1305            println!("engine: clip (MobileCLIP-S2)");
1306            println!(
1307                "hits: {} (showing {})",
1308                response.total_hits,
1309                response.hits.len()
1310            );
1311            emit_search_table(&response);
1312        }
1313        return Ok(());
1314    }
1315
1316    // For semantic mode, use pure vector search via HNSW index
1317    let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1318        let runtime = runtime_option
1319            .as_ref()
1320            .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1321
1322        // Embed the query
1323        let query_embedding = runtime.embed(&args.query)?;
1324
1325        // Use pure vector search (with optional adaptive retrieval)
1326        let scope = args.scope.as_deref().or(args.uri.as_deref());
1327
1328        if args.adaptive {
1329            // Build adaptive config from CLI args
1330            let strategy = match args.adaptive_strategy {
1331                AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1332                    min_ratio: args.min_relevancy,
1333                },
1334                AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1335                    min_score: args.min_relevancy,
1336                },
1337                AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1338                    max_drop_ratio: 0.35, // 35% drop triggers cutoff
1339                },
1340                AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1341                AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1342                    relative_threshold: args.min_relevancy,
1343                    max_drop_ratio: 0.35,
1344                    absolute_min: 0.3,
1345                },
1346            };
1347
1348            let config = AdaptiveConfig {
1349                enabled: true,
1350                max_results: args.max_k,
1351                min_results: 1,
1352                strategy,
1353                normalize_scores: true,
1354            };
1355
1356            match mem.search_adaptive(
1357                &args.query,
1358                &query_embedding,
1359                config,
1360                args.snippet_chars,
1361                scope,
1362            ) {
1363                Ok(result) => {
1364                    let mut resp = SearchResponse {
1365                        query: args.query.clone(),
1366                        hits: result.results,
1367                        total_hits: result.stats.returned,
1368                        params: memvid_core::SearchParams {
1369                            top_k: result.stats.returned,
1370                            snippet_chars: args.snippet_chars,
1371                            cursor: args.cursor.clone(),
1372                        },
1373                        elapsed_ms: 0,
1374                        engine: SearchEngineKind::Hybrid,
1375                        next_cursor: None,
1376                        context: String::new(),
1377                    };
1378                    apply_preference_rerank(&mut resp);
1379                    (
1380                        resp,
1381                        "semantic (adaptive HNSW)".to_string(),
1382                        Some(result.stats),
1383                    )
1384                }
1385                Err(e) => {
1386                    warn!("Adaptive search failed ({e}), falling back to fixed-k");
1387                    match mem.vec_search_with_embedding(
1388                        &args.query,
1389                        &query_embedding,
1390                        args.top_k,
1391                        args.snippet_chars,
1392                        scope,
1393                    ) {
1394                        Ok(mut resp) => {
1395                            apply_preference_rerank(&mut resp);
1396                            (resp, "semantic (HNSW fallback)".to_string(), None)
1397                        }
1398                        Err(e2) => {
1399                            return Err(anyhow!("Both adaptive and fixed-k search failed: {e}, {e2}"));
1400                        }
1401                    }
1402                }
1403            }
1404        } else {
1405            // Standard fixed-k vector search
1406            match mem.vec_search_with_embedding(
1407                &args.query,
1408                &query_embedding,
1409                args.top_k,
1410                args.snippet_chars,
1411                scope,
1412            ) {
1413                Ok(mut resp) => {
1414                    // Apply preference boost to rerank results for preference-seeking queries
1415                    apply_preference_rerank(&mut resp);
1416                    (resp, "semantic (HNSW vector index)".to_string(), None)
1417                }
1418                Err(e) => {
1419                    // Fall back to lexical search + rerank if vector search fails
1420                    warn!("Vector search failed ({e}), falling back to lexical + rerank");
1421                    let request = SearchRequest {
1422                        query: args.query.clone(),
1423                        top_k: args.top_k,
1424                        snippet_chars: args.snippet_chars,
1425                        uri: args.uri.clone(),
1426                        scope: args.scope.clone(),
1427                        cursor: args.cursor.clone(),
1428                        #[cfg(feature = "temporal_track")]
1429                        temporal: None,
1430                        as_of_frame: args.as_of_frame,
1431                        as_of_ts: args.as_of_ts,
1432                    };
1433                    let mut resp = mem.search(request)?;
1434                    apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1435                    (resp, "semantic (fallback rerank)".to_string(), None)
1436                }
1437            }
1438        }
1439    } else {
1440        // For lexical and auto modes, use existing behavior
1441        let request = SearchRequest {
1442            query: args.query.clone(),
1443            top_k: args.top_k,
1444            snippet_chars: args.snippet_chars,
1445            uri: args.uri.clone(),
1446            scope: args.scope.clone(),
1447            cursor: args.cursor.clone(),
1448            #[cfg(feature = "temporal_track")]
1449            temporal: None,
1450            as_of_frame: args.as_of_frame,
1451            as_of_ts: args.as_of_ts,
1452        };
1453
1454        let mut resp = mem.search(request)?;
1455
1456        if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1457            warn!("Search index unavailable; returning basic text results");
1458        }
1459
1460        let mut engine_label = match resp.engine {
1461            SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1462            SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1463            SearchEngineKind::Hybrid => "hybrid".to_string(),
1464        };
1465
1466        if runtime_option.is_some() {
1467            engine_label = format!("hybrid ({engine_label} + semantic)");
1468        }
1469
1470        if let Some(ref runtime) = runtime_option {
1471            apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1472        }
1473
1474        (resp, engine_label, None)
1475    };
1476
1477    if args.json_legacy {
1478        warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1479        emit_legacy_search_json(&response)?;
1480    } else if args.json {
1481        emit_search_json(&response, mode_key)?;
1482    } else {
1483        println!(
1484            "mode: {}   k={}   time: {} ms",
1485            mode_label, response.params.top_k, response.elapsed_ms
1486        );
1487        println!("engine: {}", engine_label);
1488
1489        // Show adaptive retrieval stats if enabled
1490        if let Some(ref stats) = adaptive_stats {
1491            println!(
1492                "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1493                stats.total_considered,
1494                stats.returned,
1495                stats.triggered_by,
1496                stats.top_score.unwrap_or(0.0),
1497                stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1498            );
1499        }
1500
1501        println!(
1502            "hits: {} (showing {})",
1503            response.total_hits,
1504            response.hits.len()
1505        );
1506        emit_search_table(&response);
1507    }
1508    Ok(())
1509}
1510
1511pub fn handle_vec_search(_config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1512    let mut mem = open_read_only_mem(&args.file)?;
1513    let vector = if let Some(path) = args.embedding.as_deref() {
1514        read_embedding(path)?
1515    } else if let Some(vector_string) = &args.vector {
1516        parse_vector(vector_string)?
1517    } else {
1518        anyhow::bail!("provide --vector or --embedding for search input");
1519    };
1520
1521    let hits = mem.search_vec(&vector, args.limit)?;
1522    let mut enriched = Vec::with_capacity(hits.len());
1523    for hit in hits {
1524        let preview = mem.frame_preview_by_id(hit.frame_id)?;
1525        enriched.push((hit.frame_id, hit.distance, preview));
1526    }
1527
1528    if args.json {
1529        let json_hits: Vec<_> = enriched
1530            .iter()
1531            .map(|(frame_id, distance, preview)| {
1532                json!({
1533                    "frame_id": frame_id,
1534                    "distance": distance,
1535                    "preview": preview,
1536                })
1537            })
1538            .collect();
1539        println!("{}", serde_json::to_string_pretty(&json_hits)?);
1540    } else if enriched.is_empty() {
1541        println!("No vector matches found");
1542    } else {
1543        for (frame_id, distance, preview) in enriched {
1544            println!("frame {frame_id} (distance {distance:.6}): {preview}");
1545        }
1546    }
1547    Ok(())
1548}
1549
1550pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1551    use memvid_core::AuditOptions;
1552    use std::fs::File;
1553    use std::io::Write;
1554
1555    let mut mem = Memvid::open(&args.file)?;
1556
1557    // Parse date boundaries
1558    let start = parse_date_boundary(args.start.as_ref(), false)?;
1559    let end = parse_date_boundary(args.end.as_ref(), true)?;
1560    if let (Some(start_ts), Some(end_ts)) = (start, end) {
1561        if end_ts < start_ts {
1562            anyhow::bail!("--end must not be earlier than --start");
1563        }
1564    }
1565
1566    // Set up embedding runtime if needed
1567    let ask_mode: AskMode = args.mode.into();
1568    let runtime = match args.mode {
1569        AskModeArg::Lex => None,
1570        AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1571        AskModeArg::Hybrid => try_load_embedding_runtime(config),
1572    };
1573    let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1574
1575    // Build audit options
1576    let options = AuditOptions {
1577        top_k: Some(args.top_k),
1578        snippet_chars: Some(args.snippet_chars),
1579        mode: Some(ask_mode),
1580        scope: args.scope,
1581        start,
1582        end,
1583        include_snippets: true,
1584    };
1585
1586    // Run the audit
1587    let mut report = mem.audit(&args.question, Some(options), embedder)?;
1588
1589    // If --use-model is provided, run model inference to synthesize the answer
1590    if let Some(model_name) = args.use_model.as_deref() {
1591        // Build context from sources for model inference
1592        let context = report
1593            .sources
1594            .iter()
1595            .filter_map(|s| s.snippet.clone())
1596            .collect::<Vec<_>>()
1597            .join("\n\n");
1598
1599        match run_model_inference(
1600            model_name,
1601            &report.question,
1602            &context,
1603            &[], // No hits needed for audit
1604            None,
1605            None,
1606            None, // No system prompt override for audit
1607        ) {
1608            Ok(inference) => {
1609                report.answer = Some(inference.answer.answer);
1610                report.notes.push(format!(
1611                    "Answer synthesized by model: {}",
1612                    inference.answer.model
1613                ));
1614            }
1615            Err(err) => {
1616                warn!(
1617                    "model inference unavailable for '{}': {err}. Using default answer.",
1618                    model_name
1619                );
1620            }
1621        }
1622    }
1623
1624    // Format the output
1625    let output = match args.format {
1626        AuditFormat::Text => report.to_text(),
1627        AuditFormat::Markdown => report.to_markdown(),
1628        AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1629    };
1630
1631    // Write output
1632    if let Some(out_path) = args.out {
1633        let mut file = File::create(&out_path)?;
1634        file.write_all(output.as_bytes())?;
1635        println!("Audit report written to: {}", out_path.display());
1636    } else {
1637        println!("{}", output);
1638    }
1639
1640    Ok(())
1641}
1642
1643fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1644    let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1645
1646    let mut additional_params = serde_json::Map::new();
1647    if let Some(cursor) = &response.params.cursor {
1648        additional_params.insert("cursor".into(), json!(cursor));
1649    }
1650
1651    let mut params = serde_json::Map::new();
1652    params.insert("top_k".into(), json!(response.params.top_k));
1653    params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
1654    params.insert("mode".into(), json!(mode));
1655    params.insert(
1656        "additional_params".into(),
1657        serde_json::Value::Object(additional_params),
1658    );
1659
1660    let mut metadata_json = serde_json::Map::new();
1661    metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
1662    metadata_json.insert("total_hits".into(), json!(response.total_hits));
1663    metadata_json.insert(
1664        "next_cursor".into(),
1665        match &response.next_cursor {
1666            Some(cursor) => json!(cursor),
1667            None => serde_json::Value::Null,
1668        },
1669    );
1670    metadata_json.insert("engine".into(), json!(response.engine));
1671    metadata_json.insert("params".into(), serde_json::Value::Object(params));
1672
1673    let body = json!({
1674        "version": "mv2.result.v2",
1675        "query": response.query,
1676        "metadata": metadata_json,
1677        "hits": hits,
1678        "context": response.context,
1679    });
1680    println!("{}", serde_json::to_string_pretty(&body)?);
1681    Ok(())
1682}
1683
1684fn emit_ask_json(
1685    response: &AskResponse,
1686    requested_mode: AskModeArg,
1687    model: Option<&ModelAnswer>,
1688    include_sources: bool,
1689    mem: &mut Memvid,
1690) -> Result<()> {
1691    let hits: Vec<_> = response
1692        .retrieval
1693        .hits
1694        .iter()
1695        .map(search_hit_to_json)
1696        .collect();
1697
1698    let citations: Vec<_> = response
1699        .citations
1700        .iter()
1701        .map(|citation| {
1702            let mut map = serde_json::Map::new();
1703            map.insert("index".into(), json!(citation.index));
1704            map.insert("frame_id".into(), json!(citation.frame_id));
1705            map.insert("uri".into(), json!(citation.uri));
1706            if let Some(range) = citation.chunk_range {
1707                map.insert("chunk_range".into(), json!([range.0, range.1]));
1708            }
1709            if let Some(score) = citation.score {
1710                map.insert("score".into(), json!(score));
1711            }
1712            serde_json::Value::Object(map)
1713        })
1714        .collect();
1715
1716    let mut body = json!({
1717        "version": "mv2.ask.v1",
1718        "question": response.question,
1719        "answer": response.answer,
1720        "context_only": response.context_only,
1721        "mode": ask_mode_display(requested_mode),
1722        "retriever": ask_retriever_display(response.retriever),
1723        "top_k": response.retrieval.params.top_k,
1724        "results": hits,
1725        "citations": citations,
1726        "stats": {
1727            "retrieval_ms": response.stats.retrieval_ms,
1728            "synthesis_ms": response.stats.synthesis_ms,
1729            "latency_ms": response.stats.latency_ms,
1730        },
1731        "engine": search_engine_label(&response.retrieval.engine),
1732        "total_hits": response.retrieval.total_hits,
1733        "next_cursor": response.retrieval.next_cursor,
1734        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1735    });
1736
1737    if let Some(model) = model {
1738        if let serde_json::Value::Object(ref mut map) = body {
1739            map.insert("model".into(), json!(model.requested));
1740            if model.model != model.requested {
1741                map.insert("model_used".into(), json!(model.model));
1742            }
1743        }
1744    }
1745
1746    // Add detailed sources if requested
1747    if include_sources {
1748        if let serde_json::Value::Object(ref mut map) = body {
1749            let sources = build_sources_json(response, mem);
1750            map.insert("sources".into(), json!(sources));
1751        }
1752    }
1753
1754    println!("{}", serde_json::to_string_pretty(&body)?);
1755    Ok(())
1756}
1757
1758fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
1759    response
1760        .citations
1761        .iter()
1762        .enumerate()
1763        .map(|(idx, citation)| {
1764            let mut source = serde_json::Map::new();
1765            source.insert("index".into(), json!(idx + 1));
1766            source.insert("frame_id".into(), json!(citation.frame_id));
1767            source.insert("uri".into(), json!(citation.uri));
1768
1769            if let Some(range) = citation.chunk_range {
1770                source.insert("chunk_range".into(), json!([range.0, range.1]));
1771            }
1772            if let Some(score) = citation.score {
1773                source.insert("score".into(), json!(score));
1774            }
1775
1776            // Get frame metadata for rich source information
1777            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
1778                if let Some(title) = frame.title {
1779                    source.insert("title".into(), json!(title));
1780                }
1781                if !frame.tags.is_empty() {
1782                    source.insert("tags".into(), json!(frame.tags));
1783                }
1784                if !frame.labels.is_empty() {
1785                    source.insert("labels".into(), json!(frame.labels));
1786                }
1787                source.insert("frame_timestamp".into(), json!(frame.timestamp));
1788                if !frame.content_dates.is_empty() {
1789                    source.insert("content_dates".into(), json!(frame.content_dates));
1790                }
1791            }
1792
1793            // Get snippet from hit
1794            if let Some(hit) = response
1795                .retrieval
1796                .hits
1797                .iter()
1798                .find(|h| h.frame_id == citation.frame_id)
1799            {
1800                let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
1801                source.insert("snippet".into(), json!(snippet));
1802            }
1803
1804            serde_json::Value::Object(source)
1805        })
1806        .collect()
1807}
1808
1809fn emit_model_json(
1810    response: &AskResponse,
1811    requested_model: &str,
1812    model: Option<&ModelAnswer>,
1813    include_sources: bool,
1814    mem: &mut Memvid,
1815) -> Result<()> {
1816    let answer = response.answer.clone().unwrap_or_default();
1817    let requested_label = model
1818        .map(|m| m.requested.clone())
1819        .unwrap_or_else(|| requested_model.to_string());
1820    let used_label = model
1821        .map(|m| m.model.clone())
1822        .unwrap_or_else(|| requested_model.to_string());
1823
1824    let mut body = json!({
1825        "question": response.question,
1826        "model": requested_label,
1827        "model_used": used_label,
1828        "answer": answer,
1829        "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
1830    });
1831
1832    // Add detailed sources if requested
1833    if include_sources {
1834        if let serde_json::Value::Object(ref mut map) = body {
1835            let sources = build_sources_json(response, mem);
1836            map.insert("sources".into(), json!(sources));
1837        }
1838    }
1839
1840    println!("{}", serde_json::to_string_pretty(&body)?);
1841    Ok(())
1842}
1843
1844fn emit_ask_pretty(
1845    response: &AskResponse,
1846    requested_mode: AskModeArg,
1847    model: Option<&ModelAnswer>,
1848    include_sources: bool,
1849    mem: &mut Memvid,
1850) {
1851    println!(
1852        "mode: {}   retriever: {}   k={}   latency: {} ms (retrieval {} ms)",
1853        ask_mode_pretty(requested_mode),
1854        ask_retriever_pretty(response.retriever),
1855        response.retrieval.params.top_k,
1856        response.stats.latency_ms,
1857        response.stats.retrieval_ms
1858    );
1859    if let Some(model) = model {
1860        if model.requested.trim() == model.model {
1861            println!("model: {}", model.model);
1862        } else {
1863            println!(
1864                "model requested: {}   model used: {}",
1865                model.requested, model.model
1866            );
1867        }
1868    }
1869    println!(
1870        "engine: {}",
1871        search_engine_label(&response.retrieval.engine)
1872    );
1873    println!(
1874        "hits: {} (showing {})",
1875        response.retrieval.total_hits,
1876        response.retrieval.hits.len()
1877    );
1878
1879    if response.context_only {
1880        println!();
1881        println!("Context-only mode: synthesis disabled.");
1882        println!();
1883    } else if let Some(answer) = &response.answer {
1884        println!();
1885        println!("Answer:\n{answer}");
1886        println!();
1887    }
1888
1889    if !response.citations.is_empty() {
1890        println!("Citations:");
1891        for citation in &response.citations {
1892            match citation.score {
1893                Some(score) => println!(
1894                    "[{}] {} (frame {}, score {:.3})",
1895                    citation.index, citation.uri, citation.frame_id, score
1896                ),
1897                None => println!(
1898                    "[{}] {} (frame {})",
1899                    citation.index, citation.uri, citation.frame_id
1900                ),
1901            }
1902        }
1903        println!();
1904    }
1905
1906    // Print detailed sources if requested
1907    if include_sources && !response.citations.is_empty() {
1908        println!("=== SOURCES ===");
1909        println!();
1910        for citation in &response.citations {
1911            println!("[{}] {}", citation.index, citation.uri);
1912
1913            // Get frame metadata
1914            if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
1915                if let Some(title) = &frame.title {
1916                    println!("    Title: {}", title);
1917                }
1918                println!("    Frame ID: {}", citation.frame_id);
1919                if let Some(score) = citation.score {
1920                    println!("    Score: {:.4}", score);
1921                }
1922                if let Some((start, end)) = citation.chunk_range {
1923                    println!("    Range: [{}..{})", start, end);
1924                }
1925                if !frame.tags.is_empty() {
1926                    println!("    Tags: {}", frame.tags.join(", "));
1927                }
1928                if !frame.labels.is_empty() {
1929                    println!("    Labels: {}", frame.labels.join(", "));
1930                }
1931                println!("    Timestamp: {}", frame.timestamp);
1932                if !frame.content_dates.is_empty() {
1933                    println!("    Content Dates: {}", frame.content_dates.join(", "));
1934                }
1935            }
1936
1937            // Get snippet from hit
1938            if let Some(hit) = response
1939                .retrieval
1940                .hits
1941                .iter()
1942                .find(|h| h.frame_id == citation.frame_id)
1943            {
1944                let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
1945                let truncated = if snippet.len() > 200 {
1946                    format!("{}...", &snippet[..200])
1947                } else {
1948                    snippet.clone()
1949                };
1950                println!("    Snippet: {}", truncated.replace('\n', " "));
1951            }
1952            println!();
1953        }
1954    }
1955
1956    if !include_sources {
1957        println!();
1958        emit_search_table(&response.retrieval);
1959    }
1960}
1961
1962fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
1963    let hits: Vec<_> = response
1964        .hits
1965        .iter()
1966        .map(|hit| {
1967            json!({
1968                "frame_id": hit.frame_id,
1969                "matches": hit.matches,
1970                "snippets": [hit.text.clone()],
1971            })
1972        })
1973        .collect();
1974    println!("{}", serde_json::to_string_pretty(&hits)?);
1975    Ok(())
1976}
1977
1978fn emit_search_table(response: &SearchResponse) {
1979    if response.hits.is_empty() {
1980        println!("No results for '{}'.", response.query);
1981        return;
1982    }
1983    for hit in &response.hits {
1984        println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
1985        if let Some(title) = &hit.title {
1986            println!("  Title: {title}");
1987        }
1988        if let Some(score) = hit.score {
1989            println!("  Score: {score:.3}");
1990        }
1991        println!("  Range: [{}..{})", hit.range.0, hit.range.1);
1992        if let Some((chunk_start, chunk_end)) = hit.chunk_range {
1993            println!("  Chunk: [{}..{})", chunk_start, chunk_end);
1994        }
1995        if let Some(chunk_text) = &hit.chunk_text {
1996            println!("  Chunk Text: {}", chunk_text.trim());
1997        }
1998        if let Some(metadata) = &hit.metadata {
1999            if let Some(track) = &metadata.track {
2000                println!("  Track: {track}");
2001            }
2002            if !metadata.tags.is_empty() {
2003                println!("  Tags: {}", metadata.tags.join(", "));
2004            }
2005            if !metadata.labels.is_empty() {
2006                println!("  Labels: {}", metadata.labels.join(", "));
2007            }
2008            if let Some(created_at) = &metadata.created_at {
2009                println!("  Created: {created_at}");
2010            }
2011            if !metadata.content_dates.is_empty() {
2012                println!("  Content Dates: {}", metadata.content_dates.join(", "));
2013            }
2014            if !metadata.entities.is_empty() {
2015                let entity_strs: Vec<String> = metadata
2016                    .entities
2017                    .iter()
2018                    .map(|e| format!("{} ({})", e.name, e.kind))
2019                    .collect();
2020                println!("  Entities: {}", entity_strs.join(", "));
2021            }
2022        }
2023        println!("  Snippet: {}", hit.text.trim());
2024        println!();
2025    }
2026    if let Some(cursor) = &response.next_cursor {
2027        println!("Next cursor: {cursor}");
2028    }
2029}
2030
2031fn ask_mode_display(mode: AskModeArg) -> &'static str {
2032    match mode {
2033        AskModeArg::Lex => "lex",
2034        AskModeArg::Sem => "sem",
2035        AskModeArg::Hybrid => "hybrid",
2036    }
2037}
2038
2039fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2040    match mode {
2041        AskModeArg::Lex => "Lexical",
2042        AskModeArg::Sem => "Semantic",
2043        AskModeArg::Hybrid => "Hybrid",
2044    }
2045}
2046
2047fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2048    match retriever {
2049        AskRetriever::Lex => "lex",
2050        AskRetriever::Semantic => "semantic",
2051        AskRetriever::Hybrid => "hybrid",
2052        AskRetriever::LexFallback => "lex_fallback",
2053        AskRetriever::TimelineFallback => "timeline_fallback",
2054    }
2055}
2056
2057fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2058    match retriever {
2059        AskRetriever::Lex => "Lexical",
2060        AskRetriever::Semantic => "Semantic",
2061        AskRetriever::Hybrid => "Hybrid",
2062        AskRetriever::LexFallback => "Lexical (fallback)",
2063        AskRetriever::TimelineFallback => "Timeline (fallback)",
2064    }
2065}
2066
2067fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2068    match engine {
2069        SearchEngineKind::Tantivy => "text (tantivy)",
2070        SearchEngineKind::LexFallback => "text (fallback)",
2071        SearchEngineKind::Hybrid => "hybrid",
2072    }
2073}
2074
2075fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2076    let digest = hash(uri.as_bytes()).to_hex().to_string();
2077    let prefix_len = digest.len().min(12);
2078    let prefix = &digest[..prefix_len];
2079    format!("mv2-hit-{prefix}-{frame_id}-{start}")
2080}
2081
2082fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2083    if text.chars().count() <= limit {
2084        return text.to_string();
2085    }
2086
2087    let truncated: String = text.chars().take(limit).collect();
2088    format!("{truncated}...")
2089}
2090
2091fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2092    let mut hit_json = serde_json::Map::new();
2093    hit_json.insert("rank".into(), json!(hit.rank));
2094    if let Some(score) = hit.score {
2095        hit_json.insert("score".into(), json!(score));
2096    }
2097    hit_json.insert(
2098        "id".into(),
2099        json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2100    );
2101    hit_json.insert("frame_id".into(), json!(hit.frame_id));
2102    hit_json.insert("uri".into(), json!(hit.uri));
2103    if let Some(title) = &hit.title {
2104        hit_json.insert("title".into(), json!(title));
2105    }
2106    let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2107    hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2108    hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2109    hit_json.insert("text".into(), json!(hit.text));
2110
2111    let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2112        matches: hit.matches,
2113        ..SearchHitMetadata::default()
2114    });
2115    let mut meta_json = serde_json::Map::new();
2116    meta_json.insert("matches".into(), json!(metadata.matches));
2117    if !metadata.tags.is_empty() {
2118        meta_json.insert("tags".into(), json!(metadata.tags));
2119    }
2120    if !metadata.labels.is_empty() {
2121        meta_json.insert("labels".into(), json!(metadata.labels));
2122    }
2123    if let Some(track) = metadata.track {
2124        meta_json.insert("track".into(), json!(track));
2125    }
2126    if let Some(created_at) = metadata.created_at {
2127        meta_json.insert("created_at".into(), json!(created_at));
2128    }
2129    if !metadata.content_dates.is_empty() {
2130        meta_json.insert("content_dates".into(), json!(metadata.content_dates));
2131    }
2132    if !metadata.entities.is_empty() {
2133        let entities_json: Vec<serde_json::Value> = metadata
2134            .entities
2135            .iter()
2136            .map(|e| {
2137                let mut ent = serde_json::Map::new();
2138                ent.insert("name".into(), json!(e.name));
2139                ent.insert("kind".into(), json!(e.kind));
2140                if let Some(conf) = e.confidence {
2141                    ent.insert("confidence".into(), json!(conf));
2142                }
2143                serde_json::Value::Object(ent)
2144            })
2145            .collect();
2146        meta_json.insert("entities".into(), json!(entities_json));
2147    }
2148    hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
2149    serde_json::Value::Object(hit_json)
2150}
2151/// Apply Reciprocal Rank Fusion (RRF) to combine lexical and semantic rankings.
2152///
2153/// RRF is mathematically superior to raw score combination because:
2154/// - BM25 scores are unbounded (0 to infinity)
2155/// - Cosine similarity is bounded (-1 to 1)
2156/// - RRF normalizes by using only RANKS, not raw scores
2157///
2158/// Formula: Score(d) = sum(1 / (k + rank(d))) where k=60 is standard
2159fn apply_semantic_rerank(
2160    runtime: &EmbeddingRuntime,
2161    mem: &mut Memvid,
2162    response: &mut SearchResponse,
2163) -> Result<()> {
2164    if response.hits.is_empty() {
2165        return Ok(());
2166    }
2167
2168    let query_embedding = runtime.embed(&response.query)?;
2169    let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
2170    for hit in &response.hits {
2171        if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
2172            if embedding.len() == runtime.dimension() {
2173                let score = cosine_similarity(&query_embedding, &embedding);
2174                semantic_scores.insert(hit.frame_id, score);
2175            }
2176        }
2177    }
2178
2179    if semantic_scores.is_empty() {
2180        return Ok(());
2181    }
2182
2183    // Sort by semantic score to get semantic ranks
2184    let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
2185        .iter()
2186        .map(|(frame_id, score)| (*frame_id, *score))
2187        .collect();
2188    sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
2189
2190    let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
2191    for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
2192        semantic_rank.insert(*frame_id, idx + 1);
2193    }
2194
2195    // Check if query is preference-seeking (suggests, recommend, should I, etc.)
2196    let query_lower = response.query.to_lowercase();
2197    let is_preference_query = query_lower.contains("suggest")
2198        || query_lower.contains("recommend")
2199        || query_lower.contains("should i")
2200        || query_lower.contains("what should")
2201        || query_lower.contains("prefer")
2202        || query_lower.contains("favorite")
2203        || query_lower.contains("best for me");
2204
2205    // Pure RRF: Use ONLY ranks, NOT raw scores
2206    // This prevents a "confidently wrong" high-scoring vector from burying
2207    // a "precisely correct" keyword match
2208    const RRF_K: f32 = 60.0;
2209
2210    let mut ordering: Vec<(usize, f32, usize)> = response
2211        .hits
2212        .iter()
2213        .enumerate()
2214        .map(|(idx, hit)| {
2215            let lexical_rank = hit.rank;
2216
2217            // RRF score for lexical rank
2218            let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
2219
2220            // RRF score for semantic rank
2221            let semantic_rrf = semantic_rank
2222                .get(&hit.frame_id)
2223                .map(|rank| 1.0 / (RRF_K + *rank as f32))
2224                .unwrap_or(0.0);
2225
2226            // Apply preference boost for hits containing user preference signals
2227            // This is a small bonus for content with first-person preference indicators
2228            let preference_boost = if is_preference_query {
2229                compute_preference_boost(&hit.text) * 0.01 // Scale down to RRF magnitude
2230            } else {
2231                0.0
2232            };
2233
2234            // Pure RRF: Only rank-based scores, no raw similarity scores
2235            let combined = lexical_rrf + semantic_rrf + preference_boost;
2236            (idx, combined, lexical_rank)
2237        })
2238        .collect();
2239
2240    ordering.sort_by(|a, b| {
2241        b.1.partial_cmp(&a.1)
2242            .unwrap_or(Ordering::Equal)
2243            .then(a.2.cmp(&b.2))
2244    });
2245
2246    let mut reordered = Vec::with_capacity(response.hits.len());
2247    for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
2248        let mut hit = response.hits[idx].clone();
2249        hit.rank = rank_idx + 1;
2250        reordered.push(hit);
2251    }
2252
2253    response.hits = reordered;
2254    Ok(())
2255}
2256
2257/// Rerank search results by boosting hits that contain user preference signals.
2258/// Only applies when the query appears to be seeking recommendations or preferences.
2259fn apply_preference_rerank(response: &mut SearchResponse) {
2260    if response.hits.is_empty() {
2261        return;
2262    }
2263
2264    // Check if query is preference-seeking
2265    let query_lower = response.query.to_lowercase();
2266    let is_preference_query = query_lower.contains("suggest")
2267        || query_lower.contains("recommend")
2268        || query_lower.contains("should i")
2269        || query_lower.contains("what should")
2270        || query_lower.contains("prefer")
2271        || query_lower.contains("favorite")
2272        || query_lower.contains("best for me");
2273
2274    if !is_preference_query {
2275        return;
2276    }
2277
2278    // Compute boost scores for each hit
2279    let mut scored: Vec<(usize, f32, f32)> = response
2280        .hits
2281        .iter()
2282        .enumerate()
2283        .map(|(idx, hit)| {
2284            let original_score = hit.score.unwrap_or(0.0);
2285            let preference_boost = compute_preference_boost(&hit.text);
2286            let boosted_score = original_score + preference_boost;
2287            (idx, boosted_score, original_score)
2288        })
2289        .collect();
2290
2291    // Sort by boosted score (descending)
2292    scored.sort_by(|a, b| {
2293        b.1.partial_cmp(&a.1)
2294            .unwrap_or(Ordering::Equal)
2295            .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
2296    });
2297
2298    // Reorder hits
2299    let mut reordered = Vec::with_capacity(response.hits.len());
2300    for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
2301        let mut hit = response.hits[idx].clone();
2302        hit.rank = rank_idx + 1;
2303        reordered.push(hit);
2304    }
2305
2306    response.hits = reordered;
2307}
2308
2309/// Compute a boost score for hits that contain user preference signals.
2310/// This helps surface context where users express their preferences,
2311/// habits, or personal information that's relevant to recommendation queries.
2312///
2313/// Key insight: We want to distinguish content where the user describes
2314/// their ESTABLISHED situation/preferences (high boost) from content where
2315/// the user is making a REQUEST (low boost). Both use first-person language,
2316/// but they serve different purposes for personalization.
2317fn compute_preference_boost(text: &str) -> f32 {
2318    let text_lower = text.to_lowercase();
2319    let mut boost = 0.0f32;
2320
2321    // Strong signals: Past/present user experiences and possessions
2322    // These describe what the user HAS DONE, HAS, or DOES REGULARLY
2323    let established_context = [
2324        // Past tense - indicates actual experience
2325        "i've been",
2326        "i've had",
2327        "i've used",
2328        "i've tried",
2329        "i recently",
2330        "i just",
2331        "lately",
2332        "i started",
2333        "i bought",
2334        "i harvested",
2335        "i grew",
2336        // Current possessions/ownership (indicates established context)
2337        "my garden",
2338        "my home",
2339        "my house",
2340        "my setup",
2341        "my equipment",
2342        "my camera",
2343        "my car",
2344        "my phone",
2345        "i have a",
2346        "i own",
2347        "i got a",
2348        // Established habits/preferences
2349        "i prefer",
2350        "i like to",
2351        "i love to",
2352        "i enjoy",
2353        "i usually",
2354        "i always",
2355        "i typically",
2356        "my favorite",
2357        "i tend to",
2358        "i often",
2359        // Regular activities (indicates ongoing behavior)
2360        "i use",
2361        "i grow",
2362        "i cook",
2363        "i make",
2364        "i work on",
2365        "i'm into",
2366        "i collect",
2367    ];
2368    for pattern in established_context {
2369        if text_lower.contains(pattern) {
2370            boost += 0.15;
2371        }
2372    }
2373
2374    // Moderate signals: General first-person statements
2375    let first_person = [" i ", " my ", " me "];
2376    for pattern in first_person {
2377        if text_lower.contains(pattern) {
2378            boost += 0.02;
2379        }
2380    }
2381
2382    // Weak signals: Requests/intentions (not yet established preferences)
2383    // These indicate the user wants something, but don't describe established context
2384    let request_patterns = [
2385        "i'm trying to",
2386        "i want to",
2387        "i need to",
2388        "looking for",
2389        "can you suggest",
2390        "can you help",
2391    ];
2392    for pattern in request_patterns {
2393        if text_lower.contains(pattern) {
2394            boost += 0.02;
2395        }
2396    }
2397
2398    // Cap the boost to avoid over-weighting
2399    boost.min(0.5)
2400}
2401
2402fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
2403    let mut dot = 0.0f32;
2404    let mut sum_a = 0.0f32;
2405    let mut sum_b = 0.0f32;
2406    for (x, y) in a.iter().zip(b.iter()) {
2407        dot += x * y;
2408        sum_a += x * x;
2409        sum_b += y * y;
2410    }
2411
2412    if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
2413        0.0
2414    } else {
2415        dot / (sum_a.sqrt() * sum_b.sqrt())
2416    }
2417}
2418
2419/// Apply cross-encoder reranking to search results.
2420///
2421/// Cross-encoders directly score query-document pairs and can understand
2422/// more nuanced relevance than bi-encoders (embeddings). This is especially
2423/// useful for personalization queries where semantic similarity != relevance.
2424///
2425/// Uses JINA-reranker-v1-turbo-en (~86MB model) for fast, high-quality reranking.
2426fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
2427    if response.hits.is_empty() || response.hits.len() < 2 {
2428        return Ok(());
2429    }
2430
2431    // Only rerank if we have enough candidates
2432    let candidates_to_rerank = response.hits.len().min(50);
2433
2434    // Initialize the reranker (model will be downloaded on first use, ~86MB)
2435    // Using JINA Turbo - faster than BGE while maintaining good accuracy
2436    let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
2437        .with_show_download_progress(true);
2438
2439    let mut reranker = match TextRerank::try_new(options) {
2440        Ok(r) => r,
2441        Err(e) => {
2442            warn!("Failed to initialize cross-encoder reranker: {e}");
2443            return Ok(());
2444        }
2445    };
2446
2447    // Prepare documents for reranking (owned Strings to avoid lifetime issues)
2448    let documents: Vec<String> = response.hits[..candidates_to_rerank]
2449        .iter()
2450        .map(|hit| hit.text.clone())
2451        .collect();
2452
2453    // Rerank using cross-encoder
2454    info!("Cross-encoder reranking {} candidates", documents.len());
2455    let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
2456        Ok(results) => results,
2457        Err(e) => {
2458            warn!("Cross-encoder reranking failed: {e}");
2459            return Ok(());
2460        }
2461    };
2462
2463    // Reorder hits based on cross-encoder scores
2464    let mut reordered = Vec::with_capacity(response.hits.len());
2465    for (new_rank, result) in rerank_results.iter().enumerate() {
2466        let original_idx = result.index;
2467        let mut hit = response.hits[original_idx].clone();
2468        hit.rank = new_rank + 1;
2469        // Store cross-encoder score in the hit score for reference
2470        hit.score = Some(result.score);
2471        reordered.push(hit);
2472    }
2473
2474    // Add any remaining hits that weren't reranked (beyond top-50)
2475    for hit in response.hits.iter().skip(candidates_to_rerank) {
2476        let mut h = hit.clone();
2477        h.rank = reordered.len() + 1;
2478        reordered.push(h);
2479    }
2480
2481    response.hits = reordered;
2482    info!("Cross-encoder reranking complete");
2483    Ok(())
2484}
2485
2486/// Build a context string from memory cards stored in the MV2 file.
2487/// Groups facts by entity for better LLM comprehension.
2488fn build_memory_context(mem: &Memvid) -> String {
2489    let entities = mem.memory_entities();
2490    if entities.is_empty() {
2491        return String::new();
2492    }
2493
2494    let mut sections = Vec::new();
2495    for entity in entities {
2496        let cards = mem.get_entity_memories(&entity);
2497        if cards.is_empty() {
2498            continue;
2499        }
2500
2501        let mut entity_lines = Vec::new();
2502        for card in cards {
2503            // Format: "slot: value" with optional polarity indicator
2504            let polarity_marker = card
2505                .polarity
2506                .as_ref()
2507                .map(|p| match p.to_string().as_str() {
2508                    "Positive" => " (+)",
2509                    "Negative" => " (-)",
2510                    _ => "",
2511                })
2512                .unwrap_or("");
2513            entity_lines.push(format!(
2514                "  - {}: {}{}",
2515                card.slot, card.value, polarity_marker
2516            ));
2517        }
2518
2519        sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
2520    }
2521
2522    sections.join("\n\n")
2523}
2524
2525/// Build a context string from entities found in search hits.
2526/// Groups entities by type for better LLM comprehension.
2527fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
2528    use std::collections::HashMap;
2529
2530    // Collect unique entities by kind
2531    let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
2532
2533    for hit in hits {
2534        if let Some(metadata) = &hit.metadata {
2535            for entity in &metadata.entities {
2536                entities_by_kind
2537                    .entry(entity.kind.clone())
2538                    .or_default()
2539                    .push(entity.name.clone());
2540            }
2541        }
2542    }
2543
2544    if entities_by_kind.is_empty() {
2545        return String::new();
2546    }
2547
2548    // Deduplicate and format
2549    let mut sections = Vec::new();
2550    let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
2551    sorted_kinds.sort();
2552
2553    for kind in sorted_kinds {
2554        let names = entities_by_kind.get(kind).unwrap();
2555        let mut unique_names: Vec<_> = names.iter().collect();
2556        unique_names.sort();
2557        unique_names.dedup();
2558
2559        let names_str = unique_names
2560            .iter()
2561            .take(10) // Limit to 10 entities per kind
2562            .map(|s| s.as_str())
2563            .collect::<Vec<_>>()
2564            .join(", ");
2565
2566        sections.push(format!("{}: {}", kind, names_str));
2567    }
2568
2569    sections.join("\n")
2570}