1use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{Result, anyhow, bail};
14use blake3::hash;
15use clap::{ArgAction, Args, ValueEnum};
16use colored::Colorize;
17use colored_json::ToColoredJson;
18use memvid_core::{
19 AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind,
20 SearchHit, SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
21 types::{
22 AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy,
23 SearchHitMetadata,
24 },
25};
26#[cfg(feature = "temporal_track")]
27use memvid_core::{
28 TemporalContext, TemporalFilter, TemporalNormalizer, TemporalResolution,
29 TemporalResolutionValue, types::SearchHitTemporal,
30};
31#[cfg(feature = "temporal_track")]
32use serde::Serialize;
33use serde_json::json;
34#[cfg(feature = "temporal_track")]
35use time::format_description::well_known::Rfc3339;
36use time::{Date, PrimitiveDateTime, Time};
37#[cfg(feature = "temporal_track")]
38use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
39use tracing::{info, warn};
40
41#[cfg(feature = "local-embeddings")]
42use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
43
44use memvid_ask_model::{
45 ModelContextFragment, ModelContextFragmentKind, ModelInference, run_model_inference,
46};
47
48use crate::config::{
50 CliConfig, EmbeddingModelChoice, EmbeddingRuntime, load_embedding_runtime,
51 load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
52 try_load_embedding_runtime, try_load_embedding_runtime_for_mv2,
53};
54use crate::utils::{
55 autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
56 parse_date_boundary, parse_vector, read_embedding,
57};
58
59const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
60#[cfg(feature = "temporal_track")]
61const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
62
63fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
64 let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
65 message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
66 if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
67 message.push_str(&format!(
68 "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
69 model.name(),
70 model.name()
71 ));
72 if model.is_openai() {
73 message.push_str(" (and set `OPENAI_API_KEY`).");
74 } else {
75 message.push('.');
76 }
77 message.push_str(&format!(
78 "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
79 model.name()
80 ));
81 message.push_str(&format!(
82 "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
83 ));
84 message.push_str("\nOr use `--mode lex` to disable semantic search.");
85 }
86 message
87}
88
89#[derive(Args)]
91pub struct TimelineArgs {
92 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
93 pub file: PathBuf,
94 #[arg(long)]
95 pub json: bool,
96 #[arg(long)]
97 pub reverse: bool,
98 #[arg(long, value_name = "LIMIT")]
99 pub limit: Option<NonZeroU64>,
100 #[arg(long, value_name = "TIMESTAMP")]
101 pub since: Option<i64>,
102 #[arg(long, value_name = "TIMESTAMP")]
103 pub until: Option<i64>,
104 #[cfg(feature = "temporal_track")]
105 #[arg(long = "on", value_name = "PHRASE")]
106 pub phrase: Option<String>,
107 #[cfg(feature = "temporal_track")]
108 #[arg(long = "tz", value_name = "IANA_ZONE")]
109 pub tz: Option<String>,
110 #[cfg(feature = "temporal_track")]
111 #[arg(long = "anchor", value_name = "RFC3339")]
112 pub anchor: Option<String>,
113 #[cfg(feature = "temporal_track")]
114 #[arg(long = "window", value_name = "MINUTES")]
115 pub window: Option<u64>,
116 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
118 pub as_of_frame: Option<u64>,
119 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
121 pub as_of_ts: Option<i64>,
122}
123
124#[cfg(feature = "temporal_track")]
126#[derive(Args)]
127pub struct WhenArgs {
128 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
129 pub file: PathBuf,
130 #[arg(long = "on", value_name = "PHRASE")]
131 pub phrase: String,
132 #[arg(long = "tz", value_name = "IANA_ZONE")]
133 pub tz: Option<String>,
134 #[arg(long = "anchor", value_name = "RFC3339")]
135 pub anchor: Option<String>,
136 #[arg(long = "window", value_name = "MINUTES")]
137 pub window: Option<u64>,
138 #[arg(long, value_name = "LIMIT")]
139 pub limit: Option<NonZeroU64>,
140 #[arg(long, value_name = "TIMESTAMP")]
141 pub since: Option<i64>,
142 #[arg(long, value_name = "TIMESTAMP")]
143 pub until: Option<i64>,
144 #[arg(long)]
145 pub reverse: bool,
146 #[arg(long)]
147 pub json: bool,
148}
149
150#[derive(Args)]
152pub struct AskArgs {
153 #[arg(value_name = "TARGET", num_args = 0..)]
154 pub targets: Vec<String>,
155 #[arg(long = "question", value_name = "TEXT")]
156 pub question: Option<String>,
157 #[arg(long = "uri", value_name = "URI")]
158 pub uri: Option<String>,
159 #[arg(long = "scope", value_name = "URI_PREFIX")]
160 pub scope: Option<String>,
161 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
162 pub top_k: usize,
163 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
164 pub snippet_chars: usize,
165 #[arg(long = "cursor", value_name = "TOKEN")]
166 pub cursor: Option<String>,
167 #[arg(long = "mode", value_enum, default_value = "hybrid")]
168 pub mode: AskModeArg,
169 #[arg(long)]
170 pub json: bool,
171 #[arg(long = "context-only", action = ArgAction::SetTrue)]
172 pub context_only: bool,
173 #[arg(long = "sources", action = ArgAction::SetTrue)]
175 pub sources: bool,
176 #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
178 pub mask_pii: bool,
179 #[arg(long = "memories", action = ArgAction::SetTrue)]
181 pub memories: bool,
182 #[arg(long = "llm-context-depth", value_name = "CHARS")]
184 pub llm_context_depth: Option<usize>,
185 #[arg(long = "start", value_name = "DATE")]
186 pub start: Option<String>,
187 #[arg(long = "end", value_name = "DATE")]
188 pub end: Option<String>,
189 #[arg(
197 long = "use-model",
198 value_name = "MODEL",
199 num_args = 0..=1,
200 default_missing_value = "tinyllama"
201 )]
202 pub use_model: Option<String>,
203 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
206 pub query_embedding_model: Option<String>,
207 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
209 pub as_of_frame: Option<u64>,
210 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
212 pub as_of_ts: Option<i64>,
213 #[arg(long = "system-prompt", value_name = "TEXT")]
215 pub system_prompt: Option<String>,
216 #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
218 pub no_rerank: bool,
219
220 #[arg(long = "no-llm", action = ArgAction::SetTrue)]
223 pub no_llm: bool,
224
225 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
229 pub no_adaptive: bool,
230 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
233 pub min_relevancy: f32,
234 #[arg(long = "max-k", value_name = "K", default_value = "100")]
237 pub max_k: usize,
238 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
240 pub adaptive_strategy: AdaptiveStrategyArg,
241}
242
243#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
245pub enum AskModeArg {
246 Lex,
247 Sem,
248 Hybrid,
249}
250
251impl From<AskModeArg> for AskMode {
252 fn from(value: AskModeArg) -> Self {
253 match value {
254 AskModeArg::Lex => AskMode::Lex,
255 AskModeArg::Sem => AskMode::Sem,
256 AskModeArg::Hybrid => AskMode::Hybrid,
257 }
258 }
259}
260
261#[derive(Args)]
263pub struct FindArgs {
264 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
265 pub file: PathBuf,
266 #[arg(long = "query", value_name = "TEXT")]
267 pub query: String,
268 #[arg(long = "uri", value_name = "URI")]
269 pub uri: Option<String>,
270 #[arg(long = "scope", value_name = "URI_PREFIX")]
271 pub scope: Option<String>,
272 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
273 pub top_k: usize,
274 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
275 pub snippet_chars: usize,
276 #[arg(long = "cursor", value_name = "TOKEN")]
277 pub cursor: Option<String>,
278 #[arg(long)]
279 pub json: bool,
280 #[arg(long = "json-legacy", conflicts_with = "json")]
281 pub json_legacy: bool,
282 #[arg(long = "mode", value_enum, default_value = "auto")]
283 pub mode: SearchMode,
284 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
286 pub as_of_frame: Option<u64>,
287 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
289 pub as_of_ts: Option<i64>,
290 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
293 pub query_embedding_model: Option<String>,
294
295 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
299 pub no_adaptive: bool,
300 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
303 pub min_relevancy: f32,
304 #[arg(long = "max-k", value_name = "K", default_value = "100")]
307 pub max_k: usize,
308 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
310 pub adaptive_strategy: AdaptiveStrategyArg,
311
312 #[arg(long = "graph", action = ArgAction::SetTrue)]
315 pub graph: bool,
316
317 #[arg(long = "hybrid", action = ArgAction::SetTrue)]
320 pub hybrid: bool,
321
322 #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
325 pub no_sketch: bool,
326}
327
328#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
330pub enum SearchMode {
331 Auto,
332 Lex,
333 Sem,
334 #[cfg(feature = "clip")]
336 Clip,
337}
338
339#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
341pub enum AdaptiveStrategyArg {
342 Relative,
344 Absolute,
346 Cliff,
348 Elbow,
350 Combined,
352}
353
354#[derive(Args)]
356pub struct VecSearchArgs {
357 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
358 pub file: PathBuf,
359 #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
360 pub vector: Option<String>,
361 #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
362 pub embedding: Option<PathBuf>,
363 #[arg(long, value_name = "K", default_value = "10")]
364 pub limit: usize,
365 #[arg(long)]
366 pub json: bool,
367}
368
369#[derive(Args)]
371pub struct AuditArgs {
372 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
373 pub file: PathBuf,
374 #[arg(value_name = "QUESTION")]
376 pub question: String,
377 #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
379 pub out: Option<PathBuf>,
380 #[arg(long = "format", value_enum, default_value = "text")]
382 pub format: AuditFormat,
383 #[arg(long = "top-k", value_name = "K", default_value = "10")]
385 pub top_k: usize,
386 #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
388 pub snippet_chars: usize,
389 #[arg(long = "mode", value_enum, default_value = "hybrid")]
391 pub mode: AskModeArg,
392 #[arg(long = "scope", value_name = "URI_PREFIX")]
394 pub scope: Option<String>,
395 #[arg(long = "start", value_name = "DATE")]
397 pub start: Option<String>,
398 #[arg(long = "end", value_name = "DATE")]
400 pub end: Option<String>,
401 #[arg(long = "use-model", value_name = "MODEL")]
403 pub use_model: Option<String>,
404}
405
406#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
408pub enum AuditFormat {
409 Text,
411 Markdown,
413 Json,
415}
416
417pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
422 let mut mem = open_read_only_mem(&args.file)?;
423 let mut builder = TimelineQueryBuilder::default();
424 #[cfg(feature = "temporal_track")]
425 if args.phrase.is_none()
426 && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
427 {
428 bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
429 }
430 if let Some(limit) = args.limit {
431 builder = builder.limit(limit);
432 }
433 if let Some(since) = args.since {
434 builder = builder.since(since);
435 }
436 if let Some(until) = args.until {
437 builder = builder.until(until);
438 }
439 builder = builder.reverse(args.reverse);
440 #[cfg(feature = "temporal_track")]
441 let temporal_summary = if let Some(ref phrase) = args.phrase {
442 let (filter, summary) = build_temporal_filter(
443 phrase,
444 args.tz.as_deref(),
445 args.anchor.as_deref(),
446 args.window,
447 )?;
448 builder = builder.temporal(filter);
449 Some(summary)
450 } else {
451 None
452 };
453 let query = builder.build();
454 let mut entries = mem.timeline(query)?;
455
456 if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
458 entries.retain(|entry| {
459 if let Some(cutoff_frame) = args.as_of_frame {
461 if entry.frame_id > cutoff_frame {
462 return false;
463 }
464 }
465
466 if let Some(cutoff_ts) = args.as_of_ts {
468 if entry.timestamp > cutoff_ts {
469 return false;
470 }
471 }
472
473 true
474 });
475 }
476
477 if args.json {
478 #[cfg(feature = "temporal_track")]
479 if let Some(summary) = temporal_summary.as_ref() {
480 println!(
481 "{}",
482 serde_json::to_string_pretty(&TimelineOutput {
483 temporal: Some(summary_to_output(summary)),
484 entries: &entries,
485 })?
486 );
487 } else {
488 println!("{}", serde_json::to_string_pretty(&entries)?);
489 }
490 #[cfg(not(feature = "temporal_track"))]
491 println!("{}", serde_json::to_string_pretty(&entries)?);
492 } else if entries.is_empty() {
493 println!("Timeline is empty");
494 } else {
495 #[cfg(feature = "temporal_track")]
496 if let Some(summary) = temporal_summary.as_ref() {
497 print_temporal_summary(summary);
498 }
499 for entry in entries {
500 println!(
501 "#{} @ {} — {}",
502 entry.frame_id,
503 entry.timestamp,
504 entry.preview.replace('\n', " ")
505 );
506 if let Some(uri) = entry.uri.as_deref() {
507 println!(" URI: {uri}");
508 }
509 if !entry.child_frames.is_empty() {
510 let child_list = entry
511 .child_frames
512 .iter()
513 .map(|id| id.to_string())
514 .collect::<Vec<_>>()
515 .join(", ");
516 println!(" Child frames: {child_list}");
517 }
518 #[cfg(feature = "temporal_track")]
519 if let Some(temporal) = entry.temporal.as_ref() {
520 print_entry_temporal_details(temporal);
521 }
522 }
523 }
524 Ok(())
525}
526
527#[cfg(feature = "temporal_track")]
528pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
529 let mut mem = open_read_only_mem(&args.file)?;
530
531 let (filter, summary) = build_temporal_filter(
532 &args.phrase,
533 args.tz.as_deref(),
534 args.anchor.as_deref(),
535 args.window,
536 )?;
537
538 let mut builder = TimelineQueryBuilder::default();
539 if let Some(limit) = args.limit {
540 builder = builder.limit(limit);
541 }
542 if let Some(since) = args.since {
543 builder = builder.since(since);
544 }
545 if let Some(until) = args.until {
546 builder = builder.until(until);
547 }
548 builder = builder.reverse(args.reverse).temporal(filter.clone());
549 let entries = mem.timeline(builder.build())?;
550
551 if args.json {
552 let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
553 let output = WhenOutput {
554 summary: summary_to_output(&summary),
555 entries: entry_views,
556 };
557 println!("{}", serde_json::to_string_pretty(&output)?);
558 return Ok(());
559 }
560
561 print_temporal_summary(&summary);
562 if entries.is_empty() {
563 println!("No frames matched the resolved window");
564 return Ok(());
565 }
566
567 for entry in &entries {
568 let iso = format_timestamp(entry.timestamp).unwrap_or_default();
569 println!(
570 "#{} @ {} ({iso}) — {}",
571 entry.frame_id,
572 entry.timestamp,
573 entry.preview.replace('\n', " ")
574 );
575 if let Some(uri) = entry.uri.as_deref() {
576 println!(" URI: {uri}");
577 }
578 if !entry.child_frames.is_empty() {
579 let child_list = entry
580 .child_frames
581 .iter()
582 .map(|id| id.to_string())
583 .collect::<Vec<_>>()
584 .join(", ");
585 println!(" Child frames: {child_list}");
586 }
587 if let Some(temporal) = entry.temporal.as_ref() {
588 print_entry_temporal_details(temporal);
589 }
590 }
591
592 Ok(())
593}
594
595#[cfg(feature = "temporal_track")]
596#[derive(Serialize)]
597struct TimelineOutput<'a> {
598 #[serde(skip_serializing_if = "Option::is_none")]
599 temporal: Option<TemporalSummaryOutput>,
600 entries: &'a [TimelineEntry],
601}
602
603#[cfg(feature = "temporal_track")]
604#[derive(Serialize)]
605struct WhenOutput {
606 summary: TemporalSummaryOutput,
607 entries: Vec<WhenEntry>,
608}
609
610#[cfg(feature = "temporal_track")]
611#[derive(Serialize)]
612struct WhenEntry {
613 frame_id: FrameId,
614 timestamp: i64,
615 #[serde(skip_serializing_if = "Option::is_none")]
616 timestamp_iso: Option<String>,
617 preview: String,
618 #[serde(skip_serializing_if = "Option::is_none")]
619 uri: Option<String>,
620 #[serde(skip_serializing_if = "Vec::is_empty")]
621 child_frames: Vec<FrameId>,
622 #[serde(skip_serializing_if = "Option::is_none")]
623 temporal: Option<SearchHitTemporal>,
624}
625
626#[cfg(feature = "temporal_track")]
627#[derive(Serialize)]
628struct TemporalSummaryOutput {
629 phrase: String,
630 timezone: String,
631 anchor_utc: i64,
632 anchor_iso: String,
633 confidence: u16,
634 #[serde(skip_serializing_if = "Vec::is_empty")]
635 flags: Vec<&'static str>,
636 resolution_kind: &'static str,
637 window_start_utc: Option<i64>,
638 window_start_iso: Option<String>,
639 window_end_utc: Option<i64>,
640 window_end_iso: Option<String>,
641 #[serde(skip_serializing_if = "Option::is_none")]
642 window_minutes: Option<u64>,
643}
644
645#[cfg(feature = "temporal_track")]
646struct TemporalSummary {
647 phrase: String,
648 tz: String,
649 anchor: OffsetDateTime,
650 start_utc: Option<i64>,
651 end_utc: Option<i64>,
652 resolution: TemporalResolution,
653 window_minutes: Option<u64>,
654}
655
656#[cfg(feature = "temporal_track")]
657fn build_temporal_filter(
658 phrase: &str,
659 tz_override: Option<&str>,
660 anchor_override: Option<&str>,
661 window_minutes: Option<u64>,
662) -> Result<(TemporalFilter, TemporalSummary)> {
663 let tz = tz_override
664 .unwrap_or(DEFAULT_TEMPORAL_TZ)
665 .trim()
666 .to_string();
667 if tz.is_empty() {
668 bail!("E-TEMP-003 timezone must not be empty");
669 }
670
671 let anchor = if let Some(raw) = anchor_override {
672 OffsetDateTime::parse(raw, &Rfc3339)
673 .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
674 } else {
675 OffsetDateTime::now_utc()
676 };
677
678 let context = TemporalContext::new(anchor, tz.clone());
679 let normalizer = TemporalNormalizer::new(context);
680 let resolution = normalizer
681 .resolve(phrase)
682 .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
683
684 let (mut start, mut end) = resolution_bounds(&resolution)?;
685 if let Some(minutes) = window_minutes {
686 if minutes > 0 {
687 let delta = TimeDuration::minutes(minutes as i64);
688 if let (Some(s), Some(e)) = (start, end) {
689 if s == e {
690 start = Some(s.saturating_sub(delta.whole_seconds()));
691 end = Some(e.saturating_add(delta.whole_seconds()));
692 } else {
693 start = Some(s.saturating_sub(delta.whole_seconds()));
694 end = Some(e.saturating_add(delta.whole_seconds()));
695 }
696 }
697 }
698 }
699
700 let filter = TemporalFilter {
701 start_utc: start,
702 end_utc: end,
703 phrase: None,
704 tz: None,
705 };
706
707 let summary = TemporalSummary {
708 phrase: phrase.to_owned(),
709 tz,
710 anchor,
711 start_utc: start,
712 end_utc: end,
713 resolution,
714 window_minutes,
715 };
716
717 Ok((filter, summary))
718}
719
720#[cfg(feature = "temporal_track")]
721fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
722 TemporalSummaryOutput {
723 phrase: summary.phrase.clone(),
724 timezone: summary.tz.clone(),
725 anchor_utc: summary.anchor.unix_timestamp(),
726 anchor_iso: summary
727 .anchor
728 .format(&Rfc3339)
729 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
730 confidence: summary.resolution.confidence,
731 flags: summary
732 .resolution
733 .flags
734 .iter()
735 .map(|flag| flag.as_str())
736 .collect(),
737 resolution_kind: resolution_kind(&summary.resolution),
738 window_start_utc: summary.start_utc,
739 window_start_iso: summary.start_utc.and_then(format_timestamp),
740 window_end_utc: summary.end_utc,
741 window_end_iso: summary.end_utc.and_then(format_timestamp),
742 window_minutes: summary.window_minutes,
743 }
744}
745
746#[cfg(feature = "temporal_track")]
747fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
748 WhenEntry {
749 frame_id: entry.frame_id,
750 timestamp: entry.timestamp,
751 timestamp_iso: format_timestamp(entry.timestamp),
752 preview: entry.preview.clone(),
753 uri: entry.uri.clone(),
754 child_frames: entry.child_frames.clone(),
755 temporal: entry.temporal.clone(),
756 }
757}
758
759#[cfg(feature = "temporal_track")]
760fn print_temporal_summary(summary: &TemporalSummary) {
761 println!("Phrase: \"{}\"", summary.phrase);
762 println!("Timezone: {}", summary.tz);
763 println!(
764 "Anchor: {}",
765 summary
766 .anchor
767 .format(&Rfc3339)
768 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
769 );
770 let start_iso = summary.start_utc.and_then(format_timestamp);
771 let end_iso = summary.end_utc.and_then(format_timestamp);
772 match (start_iso, end_iso) {
773 (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
774 (Some(start), Some(end)) => println!("Window: {start} → {end}"),
775 (Some(start), None) => println!("Window start: {start}"),
776 (None, Some(end)) => println!("Window end: {end}"),
777 _ => println!("Window: (not resolved)"),
778 }
779 println!("Confidence: {}", summary.resolution.confidence);
780 let flags: Vec<&'static str> = summary
781 .resolution
782 .flags
783 .iter()
784 .map(|flag| flag.as_str())
785 .collect();
786 if !flags.is_empty() {
787 println!("Flags: {}", flags.join(", "));
788 }
789 if let Some(window) = summary.window_minutes {
790 if window > 0 {
791 println!("Window padding: {window} minute(s)");
792 }
793 }
794 println!();
795}
796
797#[cfg(feature = "temporal_track")]
798fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
799 if let Some(anchor) = temporal.anchor.as_ref() {
800 let iso = anchor
801 .iso_8601
802 .clone()
803 .or_else(|| format_timestamp(anchor.ts_utc));
804 println!(
805 " Anchor: {} (source: {:?})",
806 iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
807 anchor.source
808 );
809 }
810 if !temporal.mentions.is_empty() {
811 println!(" Mentions:");
812 for mention in &temporal.mentions {
813 let iso = mention
814 .iso_8601
815 .clone()
816 .or_else(|| format_timestamp(mention.ts_utc))
817 .unwrap_or_else(|| mention.ts_utc.to_string());
818 let mut details = format!(
819 " - {} ({:?}, confidence {})",
820 iso, mention.kind, mention.confidence
821 );
822 if let Some(text) = mention.text.as_deref() {
823 details.push_str(&format!(" — \"{}\"", text));
824 }
825 println!("{details}");
826 }
827 }
828}
829
830#[cfg(feature = "temporal_track")]
831fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
832 match &resolution.value {
833 TemporalResolutionValue::Date(date) => {
834 let ts = date_to_timestamp(*date);
835 Ok((Some(ts), Some(ts)))
836 }
837 TemporalResolutionValue::DateTime(dt) => {
838 let ts = dt.unix_timestamp();
839 Ok((Some(ts), Some(ts)))
840 }
841 TemporalResolutionValue::DateRange { start, end } => Ok((
842 Some(date_to_timestamp(*start)),
843 Some(date_to_timestamp(*end)),
844 )),
845 TemporalResolutionValue::DateTimeRange { start, end } => {
846 Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
847 }
848 TemporalResolutionValue::Month { year, month } => {
849 let start_date = Date::from_calendar_date(*year, *month, 1)
850 .map_err(|_| anyhow!("invalid month resolution"))?;
851 let end_date = last_day_in_month(*year, *month)
852 .map_err(|_| anyhow!("invalid month resolution"))?;
853 Ok((
854 Some(date_to_timestamp(start_date)),
855 Some(date_to_timestamp(end_date)),
856 ))
857 }
858 }
859}
860
861#[cfg(feature = "temporal_track")]
862fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
863 match resolution.value {
864 TemporalResolutionValue::Date(_) => "date",
865 TemporalResolutionValue::DateTime(_) => "datetime",
866 TemporalResolutionValue::DateRange { .. } => "date_range",
867 TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
868 TemporalResolutionValue::Month { .. } => "month",
869 }
870}
871
872#[cfg(feature = "temporal_track")]
873fn date_to_timestamp(date: Date) -> i64 {
874 PrimitiveDateTime::new(date, Time::MIDNIGHT)
875 .assume_offset(UtcOffset::UTC)
876 .unix_timestamp()
877}
878
879#[cfg(feature = "temporal_track")]
880fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
881 let mut date = Date::from_calendar_date(year, month, 1)
882 .map_err(|_| anyhow!("invalid month resolution"))?;
883 while let Some(next) = date.next_day() {
884 if next.month() == month {
885 date = next;
886 } else {
887 break;
888 }
889 }
890 Ok(date)
891}
892
893#[cfg(feature = "temporal_track")]
894
895fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
896 if fragments.is_empty() {
897 return;
898 }
899
900 response.context_fragments = fragments
901 .into_iter()
902 .map(|fragment| AskContextFragment {
903 rank: fragment.rank,
904 frame_id: fragment.frame_id,
905 uri: fragment.uri,
906 title: fragment.title,
907 score: fragment.score,
908 matches: fragment.matches,
909 range: Some(fragment.range),
910 chunk_range: fragment.chunk_range,
911 text: fragment.text,
912 kind: Some(match fragment.kind {
913 ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
914 ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
915 }),
916 #[cfg(feature = "temporal_track")]
917 temporal: None,
918 })
919 .collect();
920}
921
922pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
923 crate::utils::require_active_plan(config, "ask")?;
925
926 crate::api::track_query_usage(config, 1)?;
928
929 if args.uri.is_some() && args.scope.is_some() {
930 warn!("--scope ignored because --uri is provided");
931 }
932
933 let mut question_tokens = Vec::new();
934 let mut file_path: Option<PathBuf> = None;
935 for token in &args.targets {
936 if file_path.is_none() && looks_like_memory(token) {
937 file_path = Some(PathBuf::from(token));
938 } else {
939 question_tokens.push(token.clone());
940 }
941 }
942
943 let positional_question = if question_tokens.is_empty() {
944 None
945 } else {
946 Some(question_tokens.join(" "))
947 };
948
949 let question = args
950 .question
951 .or(positional_question)
952 .map(|value| value.trim().to_string())
953 .filter(|value| !value.is_empty());
954
955 let question = question
956 .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
957
958 let (original_question, search_query) = {
961 let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
964 if let Ok(key) = std::env::var("OPENAI_API_KEY") {
965 (Some("gpt-4o-mini"), Some(key))
967 } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
968 (Some("llama-3.1-8b-instant"), Some(key))
970 } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
971 (Some("claude-haiku-4-5"), Some(key))
973 } else if let Ok(key) = std::env::var("XAI_API_KEY") {
974 (Some("grok-4-fast"), Some(key))
976 } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
977 (Some("mistral-small-latest"), Some(key))
979 } else {
980 (None, None)
982 };
983
984 let _ = (model_for_expansion, api_key_for_expansion); (question.clone(), question.clone())
995 };
996
997 let memory_path = match file_path {
998 Some(path) => path,
999 None => autodetect_memory_file()?,
1000 };
1001
1002 let start = parse_date_boundary(args.start.as_ref(), false)?;
1003 let end = parse_date_boundary(args.end.as_ref(), true)?;
1004 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1005 if end_ts < start_ts {
1006 anyhow::bail!("--end must not be earlier than --start");
1007 }
1008 }
1009
1010 let mut mem = Memvid::open(&memory_path)?;
1012
1013 #[cfg(feature = "replay")]
1015 let _ = mem.load_active_session();
1016
1017 let mv2_dimension = mem.effective_vec_index_dimension()?;
1019
1020 let stats = mem.stats()?;
1022 let has_vectors = stats.vector_count > 0;
1023 let effective_mode = if !has_vectors
1024 && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1025 {
1026 tracing::info!("Memory has no embeddings (vector_count=0); falling back to lexical mode");
1027 AskModeArg::Lex
1028 } else {
1029 args.mode.clone()
1030 };
1031
1032 let ask_mode: AskMode = effective_mode.clone().into();
1033 let inferred_model_override = match effective_mode {
1034 AskModeArg::Lex => None,
1035 AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1036 memvid_core::EmbeddingIdentitySummary::Single(identity) => {
1037 identity.model.map(String::from)
1038 }
1039 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1040 let models: Vec<_> = identities
1041 .iter()
1042 .filter_map(|entry| entry.identity.model.as_deref())
1043 .collect();
1044 anyhow::bail!(
1045 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1046 Detected models: {:?}\n\n\
1047 Suggested fix: split into separate memories per embedding model.",
1048 models
1049 );
1050 }
1051 memvid_core::EmbeddingIdentitySummary::Unknown => None,
1052 },
1053 };
1054 let emb_model_override = args
1055 .query_embedding_model
1056 .as_deref()
1057 .or(inferred_model_override.as_deref());
1058 let runtime = match effective_mode {
1059 AskModeArg::Lex => None,
1060 AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1061 config,
1062 emb_model_override,
1063 mv2_dimension,
1064 )?),
1065 AskModeArg::Hybrid => {
1066 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1068 || {
1069 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1071 .ok()
1072 .map(|rt| {
1073 tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1074 rt
1075 })
1076 },
1077 )
1078 }
1079 };
1080 if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1081 anyhow::bail!(
1082 "semantic embeddings unavailable; install/cached model required for {:?} mode",
1083 effective_mode
1084 );
1085 }
1086
1087 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1088
1089 let adaptive = if !args.no_adaptive {
1091 Some(AdaptiveConfig {
1092 enabled: true,
1093 max_results: args.max_k,
1094 min_results: 1,
1095 normalize_scores: true,
1096 strategy: match args.adaptive_strategy {
1097 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1098 min_ratio: args.min_relevancy,
1099 },
1100 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1101 min_score: args.min_relevancy,
1102 },
1103 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1104 max_drop_ratio: 0.3,
1105 },
1106 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1107 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1108 relative_threshold: args.min_relevancy,
1109 max_drop_ratio: 0.3,
1110 absolute_min: 0.3,
1111 },
1112 },
1113 })
1114 } else {
1115 None
1116 };
1117
1118 let request = AskRequest {
1119 question: search_query, top_k: args.top_k,
1121 snippet_chars: args.snippet_chars,
1122 uri: args.uri.clone(),
1123 scope: args.scope.clone(),
1124 cursor: args.cursor.clone(),
1125 start,
1126 end,
1127 #[cfg(feature = "temporal_track")]
1128 temporal: None,
1129 context_only: args.context_only,
1130 mode: ask_mode,
1131 as_of_frame: args.as_of_frame,
1132 as_of_ts: args.as_of_ts,
1133 adaptive,
1134 acl_context: None,
1135 acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1136 };
1137 let mut response = mem.ask(request, embedder).map_err(|err| match err {
1138 MemvidError::VecDimensionMismatch { expected, actual } => {
1139 anyhow!(vec_dimension_mismatch_help(expected, actual))
1140 }
1141 other => anyhow!(other),
1142 })?;
1143
1144 response.question = original_question;
1147
1148 let is_temporal_query = {
1155 let q_lower = response.question.to_lowercase();
1156 q_lower.contains("current")
1157 || q_lower.contains("latest")
1158 || q_lower.contains("recent")
1159 || q_lower.contains("now")
1160 || q_lower.contains("today")
1161 || q_lower.contains("updated")
1162 || q_lower.contains("new ")
1163 || q_lower.contains("newest")
1164 };
1165 if !args.no_rerank
1166 && !response.retrieval.hits.is_empty()
1167 && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1168 && !is_temporal_query
1169 {
1170 let mut search_response = SearchResponse {
1172 query: response.question.clone(),
1173 hits: response.retrieval.hits.clone(),
1174 total_hits: response.retrieval.hits.len(),
1175 params: memvid_core::SearchParams {
1176 top_k: args.top_k,
1177 snippet_chars: args.snippet_chars,
1178 cursor: None,
1179 },
1180 elapsed_ms: 0,
1181 engine: memvid_core::SearchEngineKind::Hybrid,
1182 next_cursor: None,
1183 context: String::new(),
1184 };
1185
1186 if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1187 warn!("Cross-encoder reranking failed: {e}");
1188 } else {
1189 response.retrieval.hits = search_response.hits;
1191 response.retrieval.context = response
1193 .retrieval
1194 .hits
1195 .iter()
1196 .take(10) .map(|hit| hit.text.as_str())
1198 .collect::<Vec<_>>()
1199 .join("\n\n---\n\n");
1200 }
1201 }
1202
1203 if args.memories {
1205 let memory_context = build_memory_context(&mem);
1206 if !memory_context.is_empty() {
1207 response.retrieval.context = format!(
1209 "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1210 memory_context, response.retrieval.context
1211 );
1212 }
1213 }
1214
1215 let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1217 if !entity_context.is_empty() {
1218 response.retrieval.context = format!(
1220 "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1221 entity_context, response.retrieval.context
1222 );
1223 }
1224
1225 if args.mask_pii {
1227 use memvid_core::pii::mask_pii;
1228
1229 response.retrieval.context = mask_pii(&response.retrieval.context);
1231
1232 for hit in &mut response.retrieval.hits {
1234 hit.text = mask_pii(&hit.text);
1235 if let Some(chunk_text) = &hit.chunk_text {
1236 hit.chunk_text = Some(mask_pii(chunk_text));
1237 }
1238 }
1239 }
1240
1241 let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1242
1243 let mut model_result: Option<ModelInference> = None;
1244 if args.no_llm {
1245 if args.use_model.is_some() {
1247 warn!("--use-model ignored because --no-llm disables LLM synthesis");
1248 }
1249 if args.json {
1250 emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1251 } else {
1252 emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1253 }
1254
1255 #[cfg(feature = "replay")]
1257 let _ = mem.save_active_session();
1258
1259 return Ok(());
1260 } else if response.context_only {
1261 if args.use_model.is_some() {
1262 warn!("--use-model ignored because --context-only disables synthesis");
1263 }
1264 } else if let Some(model_name) = args.use_model.as_deref() {
1265 match run_model_inference(
1266 model_name,
1267 &response.question,
1268 &response.retrieval.context,
1269 &response.retrieval.hits,
1270 llm_context_override,
1271 None,
1272 args.system_prompt.as_deref(),
1273 ) {
1274 Ok(inference) => {
1275 response.answer = Some(inference.answer.answer.clone());
1276 response.retrieval.context = inference.context_body.clone();
1277 apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1278 model_result = Some(inference);
1279 }
1280 Err(err) => {
1281 warn!(
1282 "model inference unavailable for '{}': {err}. Falling back to default summary.",
1283 model_name
1284 );
1285 }
1286 }
1287 }
1288
1289 #[cfg(feature = "replay")]
1291 if let Some(ref inference) = model_result {
1292 if let Some(model_name) = args.use_model.as_deref() {
1293 let retrieved_frames: Vec<u64> = response
1295 .retrieval
1296 .hits
1297 .iter()
1298 .map(|hit| hit.frame_id)
1299 .collect();
1300
1301 mem.record_ask_action(
1302 &response.question,
1303 model_name, model_name, inference.answer.answer.as_bytes(),
1306 0, retrieved_frames,
1308 );
1309 }
1310 }
1311
1312 if args.json {
1313 if let Some(model_name) = args.use_model.as_deref() {
1314 emit_model_json(
1315 &response,
1316 model_name,
1317 model_result.as_ref(),
1318 args.sources,
1319 &mut mem,
1320 )?;
1321 } else {
1322 emit_ask_json(
1323 &response,
1324 effective_mode.clone(),
1325 model_result.as_ref(),
1326 args.sources,
1327 &mut mem,
1328 )?;
1329 }
1330 } else {
1331 emit_ask_pretty(
1332 &response,
1333 effective_mode.clone(),
1334 model_result.as_ref(),
1335 args.sources,
1336 &mut mem,
1337 );
1338 }
1339
1340 #[cfg(feature = "replay")]
1342 let _ = mem.save_active_session();
1343
1344 Ok(())
1345}
1346
1347fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1349 use memvid_core::graph_search::{QueryPlanner, hybrid_search};
1350 use memvid_core::types::QueryPlan;
1351
1352 let planner = QueryPlanner::new();
1353
1354 let plan = if args.graph {
1356 let plan = planner.plan(&args.query, args.top_k);
1358 match plan {
1360 QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1361 QueryPlan::graph_only(graph_filter, args.top_k)
1362 }
1363 _ => plan,
1364 }
1365 } else {
1366 planner.plan(&args.query, args.top_k)
1368 };
1369
1370 let hits = hybrid_search(mem, &plan)?;
1372
1373 if args.json {
1374 let output = serde_json::json!({
1376 "query": args.query,
1377 "mode": if args.graph { "graph" } else { "hybrid" },
1378 "plan": format!("{:?}", plan),
1379 "hits": hits.iter().map(|h| {
1380 serde_json::json!({
1381 "frame_id": h.frame_id,
1382 "score": h.score,
1383 "graph_score": h.graph_score,
1384 "vector_score": h.vector_score,
1385 "matched_entity": h.matched_entity,
1386 "preview": h.preview,
1387 })
1388 }).collect::<Vec<_>>(),
1389 });
1390 println!("{}", serde_json::to_string_pretty(&output)?);
1391 } else {
1392 let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1394 println!("{} search for: \"{}\"", mode_str, args.query);
1395 println!("Plan: {:?}", plan);
1396 println!();
1397
1398 if hits.is_empty() {
1399 println!("No results found.");
1400 } else {
1401 println!("Results ({} hits):", hits.len());
1402 for (i, hit) in hits.iter().enumerate() {
1403 println!();
1404 println!(
1405 "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1406 i + 1,
1407 hit.frame_id,
1408 hit.score,
1409 hit.graph_score,
1410 hit.vector_score
1411 );
1412 if let Some(entity) = &hit.matched_entity {
1413 println!(" Matched entity: {}", entity);
1414 }
1415 if let Some(preview) = &hit.preview {
1416 let truncated = if preview.len() > 200 {
1417 format!("{}...", &preview[..200])
1418 } else {
1419 preview.clone()
1420 };
1421 println!(" {}", truncated.replace('\n', " "));
1422 }
1423 }
1424 }
1425 }
1426
1427 Ok(())
1428}
1429
1430pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1431 crate::utils::require_active_plan(config, "find")?;
1433
1434 crate::api::track_query_usage(config, 1)?;
1436
1437 let mut mem = open_read_only_mem(&args.file)?;
1438
1439 #[cfg(feature = "replay")]
1441 let _ = mem.load_active_session();
1442
1443 if args.graph || args.hybrid {
1445 return handle_graph_find(&mut mem, &args);
1446 }
1447
1448 if args.uri.is_some() && args.scope.is_some() {
1449 warn!("--scope ignored because --uri is provided");
1450 }
1451
1452 let mv2_dimension = mem.effective_vec_index_dimension()?;
1454 let identity_summary = match args.mode {
1455 SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1456 #[cfg(feature = "clip")]
1457 SearchMode::Clip => None,
1458 SearchMode::Lex => None,
1459 };
1460
1461 let mut semantic_allowed = true;
1462 let inferred_model_override = match identity_summary.as_ref() {
1463 Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1464 identity.model.as_deref().map(|value| value.to_string())
1465 }
1466 Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1467 let models: Vec<_> = identities
1468 .iter()
1469 .filter_map(|entry| entry.identity.model.as_deref())
1470 .collect();
1471 if args.mode == SearchMode::Sem {
1472 anyhow::bail!(
1473 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1474 Detected models: {:?}\n\n\
1475 Suggested fix: split into separate memories per embedding model.",
1476 models
1477 );
1478 }
1479 warn!(
1480 "semantic search disabled: mixed embedding models detected: {:?}",
1481 models
1482 );
1483 semantic_allowed = false;
1484 None
1485 }
1486 _ => None,
1487 };
1488
1489 let emb_model_override = args
1490 .query_embedding_model
1491 .as_deref()
1492 .or(inferred_model_override.as_deref());
1493
1494 let (mode_label, runtime_option) = match args.mode {
1495 SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1496 SearchMode::Sem => {
1497 let runtime =
1498 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1499 ("Semantic (vector search)".to_string(), Some(runtime))
1500 }
1501 SearchMode::Auto => {
1502 if !semantic_allowed {
1503 ("Lexical (semantic unsafe)".to_string(), None)
1504 } else if let Some(runtime) =
1505 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1506 {
1507 ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1508 } else {
1509 ("Lexical (semantic unavailable)".to_string(), None)
1510 }
1511 }
1512 #[cfg(feature = "clip")]
1513 SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1514 };
1515
1516 let mode_key = match args.mode {
1517 SearchMode::Sem => "semantic",
1518 SearchMode::Lex => "text",
1519 SearchMode::Auto => {
1520 if runtime_option.is_some() {
1521 "hybrid"
1522 } else {
1523 "text"
1524 }
1525 }
1526 #[cfg(feature = "clip")]
1527 SearchMode::Clip => "clip",
1528 };
1529
1530 #[cfg(feature = "clip")]
1532 if args.mode == SearchMode::Clip {
1533 use memvid_core::clip::{ClipConfig, ClipModel};
1534
1535 let config = ClipConfig::default();
1537 let clip = ClipModel::new(config).map_err(|e| {
1538 anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1539 })?;
1540
1541 let query_embedding = clip
1543 .encode_text(&args.query)
1544 .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1545
1546 let hits = mem.search_clip(&query_embedding, args.top_k)?;
1548
1549 for hit in &hits {
1551 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1552 tracing::debug!(
1553 frame_id = hit.frame_id,
1554 title = %frame.title.unwrap_or_default(),
1555 page = hit.page,
1556 distance = hit.distance,
1557 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1558 "CLIP raw hit"
1559 );
1560 } else {
1561 tracing::debug!(
1562 frame_id = hit.frame_id,
1563 page = hit.page,
1564 distance = hit.distance,
1565 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1566 "CLIP raw hit (missing frame)"
1567 );
1568 }
1569 }
1570
1571 const CLIP_MAX_DISTANCE: f32 = 1.26;
1584
1585 let search_hits: Vec<SearchHit> = hits
1587 .into_iter()
1588 .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1589 .enumerate()
1590 .filter_map(|(rank, hit)| {
1591 let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1594
1595 let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1597 let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1598 let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1599 let title = match (base_title, hit.page) {
1600 (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1601 (Some(t), None) => Some(t),
1602 (None, Some(p)) => Some(format!("Page {p}")),
1603 _ => None,
1604 };
1605 Some(SearchHit {
1606 rank: rank + 1,
1607 frame_id: hit.frame_id,
1608 uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1609 title,
1610 text: preview.clone(),
1611 chunk_text: Some(preview),
1612 range: (0, 0),
1613 chunk_range: None,
1614 matches: 0,
1615 score: Some(cosine_similarity),
1616 metadata: None,
1617 })
1618 })
1619 .collect();
1620
1621 let response = SearchResponse {
1622 query: args.query.clone(),
1623 hits: search_hits.clone(),
1624 total_hits: search_hits.len(),
1625 params: memvid_core::SearchParams {
1626 top_k: args.top_k,
1627 snippet_chars: args.snippet_chars,
1628 cursor: args.cursor.clone(),
1629 },
1630 elapsed_ms: 0,
1631 engine: SearchEngineKind::Hybrid, next_cursor: None,
1633 context: String::new(),
1634 };
1635
1636 if args.json_legacy {
1637 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1638 emit_legacy_search_json(&response)?;
1639 } else if args.json {
1640 emit_search_json(&response, mode_key)?;
1641 } else {
1642 println!(
1643 "mode: {} k={} time: {} ms",
1644 mode_label, response.params.top_k, response.elapsed_ms
1645 );
1646 println!("engine: clip (MobileCLIP-S2)");
1647 println!(
1648 "hits: {} (showing {})",
1649 response.total_hits,
1650 response.hits.len()
1651 );
1652 emit_search_table(&response);
1653 }
1654 return Ok(());
1655 }
1656
1657 let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1659 let runtime = runtime_option
1660 .as_ref()
1661 .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1662
1663 let query_embedding = runtime.embed_query(&args.query)?;
1665
1666 let scope = args.scope.as_deref().or(args.uri.as_deref());
1668
1669 if !args.no_adaptive {
1670 let strategy = match args.adaptive_strategy {
1672 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1673 min_ratio: args.min_relevancy,
1674 },
1675 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1676 min_score: args.min_relevancy,
1677 },
1678 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1679 max_drop_ratio: 0.35, },
1681 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1682 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1683 relative_threshold: args.min_relevancy,
1684 max_drop_ratio: 0.35,
1685 absolute_min: 0.3,
1686 },
1687 };
1688
1689 let config = AdaptiveConfig {
1690 enabled: true,
1691 max_results: args.max_k,
1692 min_results: 1,
1693 strategy,
1694 normalize_scores: true,
1695 };
1696
1697 match mem.search_adaptive(
1698 &args.query,
1699 &query_embedding,
1700 config,
1701 args.snippet_chars,
1702 scope,
1703 ) {
1704 Ok(result) => {
1705 let mut resp = SearchResponse {
1706 query: args.query.clone(),
1707 hits: result.results,
1708 total_hits: result.stats.returned,
1709 params: memvid_core::SearchParams {
1710 top_k: result.stats.returned,
1711 snippet_chars: args.snippet_chars,
1712 cursor: args.cursor.clone(),
1713 },
1714 elapsed_ms: 0,
1715 engine: SearchEngineKind::Hybrid,
1716 next_cursor: None,
1717 context: String::new(),
1718 };
1719 apply_preference_rerank(&mut resp);
1720 (
1721 resp,
1722 "semantic (adaptive vector search)".to_string(),
1723 Some(result.stats),
1724 )
1725 }
1726 Err(e) => {
1727 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1728 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1729 }
1730
1731 warn!("Adaptive search failed ({e}), falling back to fixed-k");
1732 match mem.vec_search_with_embedding(
1733 &args.query,
1734 &query_embedding,
1735 args.top_k,
1736 args.snippet_chars,
1737 scope,
1738 ) {
1739 Ok(mut resp) => {
1740 apply_preference_rerank(&mut resp);
1741 (resp, "semantic (vector search fallback)".to_string(), None)
1742 }
1743 Err(e2) => {
1744 if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1745 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1746 }
1747 return Err(anyhow!(
1748 "Both adaptive and fixed-k search failed: {e}, {e2}"
1749 ));
1750 }
1751 }
1752 }
1753 }
1754 } else {
1755 match mem.vec_search_with_embedding(
1757 &args.query,
1758 &query_embedding,
1759 args.top_k,
1760 args.snippet_chars,
1761 scope,
1762 ) {
1763 Ok(mut resp) => {
1764 apply_preference_rerank(&mut resp);
1766 (resp, "semantic (vector search)".to_string(), None)
1767 }
1768 Err(e) => {
1769 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1770 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1771 }
1772
1773 warn!("Vector search failed ({e}), falling back to lexical + rerank");
1775 let request = SearchRequest {
1776 query: args.query.clone(),
1777 top_k: args.top_k,
1778 snippet_chars: args.snippet_chars,
1779 uri: args.uri.clone(),
1780 scope: args.scope.clone(),
1781 cursor: args.cursor.clone(),
1782 #[cfg(feature = "temporal_track")]
1783 temporal: None,
1784 as_of_frame: args.as_of_frame,
1785 as_of_ts: args.as_of_ts,
1786 no_sketch: args.no_sketch,
1787 acl_context: None,
1788 acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1789 };
1790 let mut resp = mem.search(request)?;
1791 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1792 (resp, "semantic (fallback rerank)".to_string(), None)
1793 }
1794 }
1795 }
1796 } else {
1797 let request = SearchRequest {
1799 query: args.query.clone(),
1800 top_k: args.top_k,
1801 snippet_chars: args.snippet_chars,
1802 uri: args.uri.clone(),
1803 scope: args.scope.clone(),
1804 cursor: args.cursor.clone(),
1805 #[cfg(feature = "temporal_track")]
1806 temporal: None,
1807 as_of_frame: args.as_of_frame,
1808 as_of_ts: args.as_of_ts,
1809 no_sketch: args.no_sketch,
1810 acl_context: None,
1811 acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1812 };
1813
1814 let mut resp = mem.search(request)?;
1815
1816 if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1817 warn!("Search index unavailable; returning basic text results");
1818 }
1819
1820 let mut engine_label = match resp.engine {
1821 SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1822 SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1823 SearchEngineKind::Hybrid => "hybrid".to_string(),
1824 };
1825
1826 if runtime_option.is_some() {
1827 engine_label = format!("hybrid ({engine_label} + semantic)");
1828 }
1829
1830 if let Some(ref runtime) = runtime_option {
1831 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1832 }
1833
1834 (resp, engine_label, None)
1835 };
1836
1837 if args.json_legacy {
1838 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1839 emit_legacy_search_json(&response)?;
1840 } else if args.json {
1841 emit_search_json(&response, mode_key)?;
1842 } else {
1843 println!(
1844 "mode: {} k={} time: {} ms",
1845 mode_label, response.params.top_k, response.elapsed_ms
1846 );
1847 println!("engine: {}", engine_label);
1848
1849 if let Some(ref stats) = adaptive_stats {
1851 println!(
1852 "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1853 stats.total_considered,
1854 stats.returned,
1855 stats.triggered_by,
1856 stats.top_score.unwrap_or(0.0),
1857 stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1858 );
1859 }
1860
1861 println!(
1862 "hits: {} (showing {})",
1863 response.total_hits,
1864 response.hits.len()
1865 );
1866 emit_search_table(&response);
1867 }
1868
1869 #[cfg(feature = "replay")]
1871 let _ = mem.save_active_session();
1872
1873 Ok(())
1874}
1875
1876pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1877 crate::api::track_query_usage(config, 1)?;
1879
1880 let mut mem = open_read_only_mem(&args.file)?;
1881 let vector = if let Some(path) = args.embedding.as_deref() {
1882 read_embedding(path)?
1883 } else if let Some(vector_string) = &args.vector {
1884 parse_vector(vector_string)?
1885 } else {
1886 anyhow::bail!("provide --vector or --embedding for search input");
1887 };
1888
1889 let hits = mem
1890 .search_vec(&vector, args.limit)
1891 .map_err(|err| match err {
1892 MemvidError::VecDimensionMismatch { expected, actual } => {
1893 anyhow!(vec_dimension_mismatch_help(expected, actual))
1894 }
1895 other => anyhow!(other),
1896 })?;
1897 let mut enriched = Vec::with_capacity(hits.len());
1898 for hit in hits {
1899 let preview = mem.frame_preview_by_id(hit.frame_id)?;
1900 enriched.push((hit.frame_id, hit.distance, preview));
1901 }
1902
1903 if args.json {
1904 let json_hits: Vec<_> = enriched
1905 .iter()
1906 .map(|(frame_id, distance, preview)| {
1907 json!({
1908 "frame_id": frame_id,
1909 "distance": distance,
1910 "preview": preview,
1911 })
1912 })
1913 .collect();
1914 let json_str = serde_json::to_string_pretty(&json_hits)?;
1915 println!("{}", json_str.to_colored_json_auto()?);
1916 } else if enriched.is_empty() {
1917 println!("No vector matches found");
1918 } else {
1919 for (frame_id, distance, preview) in enriched {
1920 println!("frame {frame_id} (distance {distance:.6}): {preview}");
1921 }
1922 }
1923 Ok(())
1924}
1925
1926pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1927 use memvid_core::AuditOptions;
1928 use std::fs::File;
1929 use std::io::Write;
1930
1931 let mut mem = Memvid::open(&args.file)?;
1932
1933 let start = parse_date_boundary(args.start.as_ref(), false)?;
1935 let end = parse_date_boundary(args.end.as_ref(), true)?;
1936 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1937 if end_ts < start_ts {
1938 anyhow::bail!("--end must not be earlier than --start");
1939 }
1940 }
1941
1942 let ask_mode: AskMode = args.mode.into();
1944 let runtime = match args.mode {
1945 AskModeArg::Lex => None,
1946 AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1947 AskModeArg::Hybrid => try_load_embedding_runtime(config),
1948 };
1949 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1950
1951 let options = AuditOptions {
1953 top_k: Some(args.top_k),
1954 snippet_chars: Some(args.snippet_chars),
1955 mode: Some(ask_mode),
1956 scope: args.scope,
1957 start,
1958 end,
1959 include_snippets: true,
1960 };
1961
1962 let mut report = mem.audit(&args.question, Some(options), embedder)?;
1964
1965 if let Some(model_name) = args.use_model.as_deref() {
1967 let context = report
1969 .sources
1970 .iter()
1971 .filter_map(|s| s.snippet.clone())
1972 .collect::<Vec<_>>()
1973 .join("\n\n");
1974
1975 match run_model_inference(
1976 model_name,
1977 &report.question,
1978 &context,
1979 &[], None,
1981 None,
1982 None, ) {
1984 Ok(inference) => {
1985 report.answer = Some(inference.answer.answer);
1986 report.notes.push(format!(
1987 "Answer synthesized by model: {}",
1988 inference.answer.model
1989 ));
1990 }
1991 Err(err) => {
1992 warn!(
1993 "model inference unavailable for '{}': {err}. Using default answer.",
1994 model_name
1995 );
1996 }
1997 }
1998 }
1999
2000 let output = match args.format {
2002 AuditFormat::Text => report.to_text(),
2003 AuditFormat::Markdown => report.to_markdown(),
2004 AuditFormat::Json => serde_json::to_string_pretty(&report)?,
2005 };
2006
2007 if let Some(out_path) = args.out {
2009 let mut file = File::create(&out_path)?;
2010 file.write_all(output.as_bytes())?;
2011 println!("Audit report written to: {}", out_path.display());
2012 } else {
2013 println!("{}", output);
2014 }
2015
2016 Ok(())
2017}
2018
2019fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
2020 let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
2021
2022 let mut additional_params = serde_json::Map::new();
2023 if let Some(cursor) = &response.params.cursor {
2024 additional_params.insert("cursor".into(), json!(cursor));
2025 }
2026
2027 let mut params = serde_json::Map::new();
2028 params.insert("top_k".into(), json!(response.params.top_k));
2029 params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2030 params.insert("mode".into(), json!(mode));
2031 params.insert(
2032 "additional_params".into(),
2033 serde_json::Value::Object(additional_params),
2034 );
2035
2036 let mut metadata_json = serde_json::Map::new();
2037 metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2038 metadata_json.insert("total_hits".into(), json!(response.total_hits));
2039 metadata_json.insert(
2040 "next_cursor".into(),
2041 match &response.next_cursor {
2042 Some(cursor) => json!(cursor),
2043 None => serde_json::Value::Null,
2044 },
2045 );
2046 metadata_json.insert("engine".into(), json!(response.engine));
2047 metadata_json.insert("params".into(), serde_json::Value::Object(params));
2048
2049 let body = json!({
2050 "version": "mv2.result.v2",
2051 "query": response.query,
2052 "metadata": metadata_json,
2053 "hits": hits,
2054 "context": response.context,
2055 });
2056 let json_str = serde_json::to_string_pretty(&body)?;
2057 println!("{}", json_str.to_colored_json_auto()?);
2058 Ok(())
2059}
2060
2061fn emit_ask_json(
2062 response: &AskResponse,
2063 requested_mode: AskModeArg,
2064 inference: Option<&ModelInference>,
2065 include_sources: bool,
2066 mem: &mut Memvid,
2067) -> Result<()> {
2068 let hits: Vec<_> = response
2069 .retrieval
2070 .hits
2071 .iter()
2072 .map(search_hit_to_json)
2073 .collect();
2074
2075 let citations: Vec<_> = response
2076 .citations
2077 .iter()
2078 .map(|citation| {
2079 let mut map = serde_json::Map::new();
2080 map.insert("index".into(), json!(citation.index));
2081 map.insert("frame_id".into(), json!(citation.frame_id));
2082 map.insert("uri".into(), json!(citation.uri));
2083 if let Some(range) = citation.chunk_range {
2084 map.insert("chunk_range".into(), json!([range.0, range.1]));
2085 }
2086 if let Some(score) = citation.score {
2087 map.insert("score".into(), json!(score));
2088 }
2089 serde_json::Value::Object(map)
2090 })
2091 .collect();
2092
2093 let mut body = json!({
2094 "version": "mv2.ask.v1",
2095 "question": response.question,
2096 "answer": response.answer,
2097 "context_only": response.context_only,
2098 "mode": ask_mode_display(requested_mode),
2099 "retriever": ask_retriever_display(response.retriever),
2100 "top_k": response.retrieval.params.top_k,
2101 "results": hits,
2102 "citations": citations,
2103 "stats": {
2104 "retrieval_ms": response.stats.retrieval_ms,
2105 "synthesis_ms": response.stats.synthesis_ms,
2106 "latency_ms": response.stats.latency_ms,
2107 },
2108 "engine": search_engine_label(&response.retrieval.engine),
2109 "total_hits": response.retrieval.total_hits,
2110 "next_cursor": response.retrieval.next_cursor,
2111 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2112 });
2113
2114 if let Some(inf) = inference {
2115 let model = &inf.answer;
2116 if let serde_json::Value::Object(ref mut map) = body {
2117 map.insert("model".into(), json!(model.requested));
2118 if model.model != model.requested {
2119 map.insert("model_used".into(), json!(model.model));
2120 }
2121 map.insert("cached".into(), json!(inf.cached));
2122 if let Some(usage) = &inf.usage {
2124 map.insert(
2125 "usage".into(),
2126 json!({
2127 "input_tokens": usage.input_tokens,
2128 "output_tokens": usage.output_tokens,
2129 "total_tokens": usage.total_tokens,
2130 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2131 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2132 }),
2133 );
2134 }
2135 if let Some(grounding) = &inf.grounding {
2137 map.insert(
2138 "grounding".into(),
2139 json!({
2140 "score": grounding.score,
2141 "label": grounding.label(),
2142 "sentence_count": grounding.sentence_count,
2143 "grounded_sentences": grounding.grounded_sentences,
2144 "has_warning": grounding.has_warning,
2145 "warning_reason": grounding.warning_reason,
2146 }),
2147 );
2148 }
2149 }
2150 }
2151
2152 if include_sources {
2154 if let serde_json::Value::Object(ref mut map) = body {
2155 let sources = build_sources_json(response, mem);
2156 map.insert("sources".into(), json!(sources));
2157 }
2158 }
2159
2160 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2162 if let serde_json::Value::Object(ref mut map) = body {
2163 map.insert("follow_up".into(), follow_up);
2164 }
2165 }
2166
2167 println!("{}", serde_json::to_string_pretty(&body)?);
2168 Ok(())
2169}
2170
2171fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2172 response
2173 .citations
2174 .iter()
2175 .enumerate()
2176 .map(|(idx, citation)| {
2177 let mut source = serde_json::Map::new();
2178 source.insert("index".into(), json!(idx + 1));
2179 source.insert("frame_id".into(), json!(citation.frame_id));
2180 source.insert("uri".into(), json!(citation.uri));
2181
2182 if let Some(range) = citation.chunk_range {
2183 source.insert("chunk_range".into(), json!([range.0, range.1]));
2184 }
2185 if let Some(score) = citation.score {
2186 source.insert("score".into(), json!(score));
2187 }
2188
2189 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2191 if let Some(title) = frame.title {
2192 source.insert("title".into(), json!(title));
2193 }
2194 if !frame.tags.is_empty() {
2195 source.insert("tags".into(), json!(frame.tags));
2196 }
2197 if !frame.labels.is_empty() {
2198 source.insert("labels".into(), json!(frame.labels));
2199 }
2200 source.insert("frame_timestamp".into(), json!(frame.timestamp));
2201 if !frame.content_dates.is_empty() {
2202 source.insert("content_dates".into(), json!(frame.content_dates));
2203 }
2204 }
2205
2206 if let Some(hit) = response
2208 .retrieval
2209 .hits
2210 .iter()
2211 .find(|h| h.frame_id == citation.frame_id)
2212 {
2213 let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2214 source.insert("snippet".into(), json!(snippet));
2215 }
2216
2217 serde_json::Value::Object(source)
2218 })
2219 .collect()
2220}
2221
2222fn build_follow_up_suggestions(
2225 response: &AskResponse,
2226 inference: Option<&ModelInference>,
2227 mem: &mut Memvid,
2228) -> Option<serde_json::Value> {
2229 let needs_followup = inference
2231 .and_then(|inf| inf.grounding.as_ref())
2232 .map(|g| g.score < 0.3 || g.has_warning)
2233 .unwrap_or(false);
2234
2235 let low_retrieval = response
2237 .retrieval
2238 .hits
2239 .first()
2240 .and_then(|h| h.score)
2241 .map(|score| score < -2.0)
2242 .unwrap_or(true);
2243
2244 if !needs_followup && !low_retrieval {
2245 return None;
2246 }
2247
2248 let limit = std::num::NonZeroU64::new(20).unwrap();
2250 let timeline_query = TimelineQueryBuilder::default().limit(limit).build();
2251
2252 let available_topics: Vec<String> = mem
2253 .timeline(timeline_query)
2254 .ok()
2255 .map(|entries| {
2256 entries
2257 .iter()
2258 .filter_map(|e| {
2259 let preview = e.preview.trim();
2261 if preview.is_empty() || preview.len() < 5 {
2262 return None;
2263 }
2264 let first_line = preview.lines().next().unwrap_or(preview);
2266 if first_line.len() > 60 {
2267 Some(format!("{}...", &first_line[..57]))
2268 } else {
2269 Some(first_line.to_string())
2270 }
2271 })
2272 .collect::<std::collections::HashSet<_>>()
2273 .into_iter()
2274 .take(5)
2275 .collect()
2276 })
2277 .unwrap_or_default();
2278
2279 let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2281 "No relevant information found in memory"
2282 } else if inference
2283 .and_then(|i| i.grounding.as_ref())
2284 .map(|g| g.has_warning)
2285 .unwrap_or(false)
2286 {
2287 "Answer may not be well-supported by the available context"
2288 } else {
2289 "Low confidence in the answer"
2290 };
2291
2292 let suggestions: Vec<String> = if available_topics.is_empty() {
2294 vec![
2295 "What information is stored in this memory?".to_string(),
2296 "Can you list the main topics covered?".to_string(),
2297 ]
2298 } else {
2299 available_topics
2300 .iter()
2301 .take(3)
2302 .map(|topic| format!("Tell me about {}", topic))
2303 .chain(std::iter::once(
2304 "What topics are in this memory?".to_string(),
2305 ))
2306 .collect()
2307 };
2308
2309 Some(json!({
2310 "needed": true,
2311 "reason": reason,
2312 "hint": if available_topics.is_empty() {
2313 "This memory may not contain information about your query."
2314 } else {
2315 "This memory contains information about different topics. Try asking about those instead."
2316 },
2317 "available_topics": available_topics,
2318 "suggestions": suggestions
2319 }))
2320}
2321
2322fn emit_model_json(
2323 response: &AskResponse,
2324 requested_model: &str,
2325 inference: Option<&ModelInference>,
2326 include_sources: bool,
2327 mem: &mut Memvid,
2328) -> Result<()> {
2329 let answer = response.answer.clone().unwrap_or_default();
2330 let requested_label = inference
2331 .map(|m| m.answer.requested.clone())
2332 .unwrap_or_else(|| requested_model.to_string());
2333 let used_label = inference
2334 .map(|m| m.answer.model.clone())
2335 .unwrap_or_else(|| requested_model.to_string());
2336
2337 let mut body = json!({
2338 "question": response.question,
2339 "model": requested_label,
2340 "model_used": used_label,
2341 "answer": answer,
2342 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2343 });
2344
2345 if let Some(inf) = inference {
2347 if let serde_json::Value::Object(ref mut map) = body {
2348 map.insert("cached".into(), json!(inf.cached));
2349 if let Some(usage) = &inf.usage {
2350 map.insert(
2351 "usage".into(),
2352 json!({
2353 "input_tokens": usage.input_tokens,
2354 "output_tokens": usage.output_tokens,
2355 "total_tokens": usage.total_tokens,
2356 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2357 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2358 }),
2359 );
2360 }
2361 if let Some(grounding) = &inf.grounding {
2362 map.insert(
2363 "grounding".into(),
2364 json!({
2365 "score": grounding.score,
2366 "label": grounding.label(),
2367 "sentence_count": grounding.sentence_count,
2368 "grounded_sentences": grounding.grounded_sentences,
2369 "has_warning": grounding.has_warning,
2370 "warning_reason": grounding.warning_reason,
2371 }),
2372 );
2373 }
2374 }
2375 }
2376
2377 if include_sources {
2379 if let serde_json::Value::Object(ref mut map) = body {
2380 let sources = build_sources_json(response, mem);
2381 map.insert("sources".into(), json!(sources));
2382 }
2383 }
2384
2385 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2387 if let serde_json::Value::Object(ref mut map) = body {
2388 map.insert("follow_up".into(), follow_up);
2389 }
2390 }
2391
2392 let json_str = serde_json::to_string_pretty(&body)?;
2394 println!("{}", json_str.to_colored_json_auto()?);
2395 Ok(())
2396}
2397
2398fn emit_ask_pretty(
2399 response: &AskResponse,
2400 requested_mode: AskModeArg,
2401 inference: Option<&ModelInference>,
2402 include_sources: bool,
2403 mem: &mut Memvid,
2404) {
2405 println!(
2406 "mode: {} retriever: {} k={} latency: {} ms (retrieval {} ms)",
2407 ask_mode_pretty(requested_mode),
2408 ask_retriever_pretty(response.retriever),
2409 response.retrieval.params.top_k,
2410 response.stats.latency_ms,
2411 response.stats.retrieval_ms
2412 );
2413 if let Some(inference) = inference {
2414 let model = &inference.answer;
2415 let cached_label = if inference.cached { " [CACHED]" } else { "" };
2416 if model.requested.trim() == model.model {
2417 println!("model: {}{}", model.model, cached_label);
2418 } else {
2419 println!(
2420 "model requested: {} model used: {}{}",
2421 model.requested, model.model, cached_label
2422 );
2423 }
2424 if let Some(usage) = &inference.usage {
2426 let cost_label = if inference.cached {
2427 format!("$0.00 (saved ${:.6})", usage.cost_usd)
2428 } else {
2429 format!("${:.6}", usage.cost_usd)
2430 };
2431 println!(
2432 "tokens: {} input + {} output = {} cost: {}",
2433 usage.input_tokens, usage.output_tokens, usage.total_tokens, cost_label
2434 );
2435 }
2436 if let Some(grounding) = &inference.grounding {
2438 let warning = if grounding.has_warning {
2439 format!(
2440 " [WARNING: {}]",
2441 grounding
2442 .warning_reason
2443 .as_deref()
2444 .unwrap_or("potential hallucination")
2445 )
2446 } else {
2447 String::new()
2448 };
2449 println!(
2450 "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2451 grounding.score * 100.0,
2452 grounding.label(),
2453 grounding.grounded_sentences,
2454 grounding.sentence_count,
2455 warning
2456 );
2457 }
2458 }
2459 println!(
2460 "engine: {}",
2461 search_engine_label(&response.retrieval.engine)
2462 );
2463 println!(
2464 "hits: {} (showing {})",
2465 response.retrieval.total_hits,
2466 response.retrieval.hits.len()
2467 );
2468
2469 if response.context_only {
2470 println!();
2471 println!("Context-only mode: synthesis disabled.");
2472 println!();
2473 } else if let Some(answer) = &response.answer {
2474 println!();
2475 println!("Answer:\n{answer}");
2476 println!();
2477 }
2478
2479 if !response.citations.is_empty() {
2480 println!("Citations:");
2481 for citation in &response.citations {
2482 match citation.score {
2483 Some(score) => println!(
2484 "[{}] {} (frame {}, score {:.3})",
2485 citation.index, citation.uri, citation.frame_id, score
2486 ),
2487 None => println!(
2488 "[{}] {} (frame {})",
2489 citation.index, citation.uri, citation.frame_id
2490 ),
2491 }
2492 }
2493 println!();
2494 }
2495
2496 if include_sources && !response.citations.is_empty() {
2498 println!("=== SOURCES ===");
2499 println!();
2500 for citation in &response.citations {
2501 println!("[{}] {}", citation.index, citation.uri);
2502
2503 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2505 if let Some(title) = &frame.title {
2506 println!(" Title: {}", title);
2507 }
2508 println!(" Frame ID: {}", citation.frame_id);
2509 if let Some(score) = citation.score {
2510 println!(" Score: {:.4}", score);
2511 }
2512 if let Some((start, end)) = citation.chunk_range {
2513 println!(" Range: [{}..{})", start, end);
2514 }
2515 if !frame.tags.is_empty() {
2516 println!(" Tags: {}", frame.tags.join(", "));
2517 }
2518 if !frame.labels.is_empty() {
2519 println!(" Labels: {}", frame.labels.join(", "));
2520 }
2521 println!(" Timestamp: {}", frame.timestamp);
2522 if !frame.content_dates.is_empty() {
2523 println!(" Content Dates: {}", frame.content_dates.join(", "));
2524 }
2525 }
2526
2527 if let Some(hit) = response
2529 .retrieval
2530 .hits
2531 .iter()
2532 .find(|h| h.frame_id == citation.frame_id)
2533 {
2534 let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2535 let truncated = if snippet.len() > 200 {
2536 format!("{}...", &snippet[..200])
2537 } else {
2538 snippet.clone()
2539 };
2540 println!(" Snippet: {}", truncated.replace('\n', " "));
2541 }
2542 println!();
2543 }
2544 }
2545
2546 if !include_sources {
2547 println!();
2548 emit_search_table(&response.retrieval);
2549 }
2550
2551 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2553 if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2554 if needed {
2555 println!();
2556 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2557 println!("💡 FOLLOW-UP SUGGESTIONS");
2558 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2559
2560 if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2561 println!("Reason: {}", reason);
2562 }
2563
2564 if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2565 println!("Hint: {}", hint);
2566 }
2567
2568 if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2569 if !topics.is_empty() {
2570 println!();
2571 println!("Available topics in this memory:");
2572 for topic in topics.iter().filter_map(|t| t.as_str()) {
2573 println!(" • {}", topic);
2574 }
2575 }
2576 }
2577
2578 if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2579 if !suggestions.is_empty() {
2580 println!();
2581 println!("Try asking:");
2582 for (i, suggestion) in
2583 suggestions.iter().filter_map(|s| s.as_str()).enumerate()
2584 {
2585 println!(" {}. \"{}\"", i + 1, suggestion);
2586 }
2587 }
2588 }
2589 println!();
2590 }
2591 }
2592 }
2593}
2594
2595fn emit_verbatim_evidence_json(
2598 response: &AskResponse,
2599 include_sources: bool,
2600 mem: &mut Memvid,
2601) -> Result<()> {
2602 let evidence: Vec<_> = response
2604 .retrieval
2605 .hits
2606 .iter()
2607 .enumerate()
2608 .map(|(idx, hit)| {
2609 let mut entry = serde_json::Map::new();
2610 entry.insert("index".into(), json!(idx + 1));
2611 entry.insert("frame_id".into(), json!(hit.frame_id));
2612 entry.insert("uri".into(), json!(&hit.uri));
2613 if let Some(title) = &hit.title {
2614 entry.insert("title".into(), json!(title));
2615 }
2616 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2618 entry.insert("text".into(), json!(verbatim));
2619 if let Some(score) = hit.score {
2620 entry.insert("score".into(), json!(score));
2621 }
2622 serde_json::Value::Object(entry)
2623 })
2624 .collect();
2625
2626 let sources: Option<Vec<_>> = if include_sources {
2628 Some(
2629 response
2630 .retrieval
2631 .hits
2632 .iter()
2633 .filter_map(|hit| {
2634 mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2635 let mut source = serde_json::Map::new();
2636 source.insert("frame_id".into(), json!(frame.id));
2637 source.insert(
2638 "uri".into(),
2639 json!(frame.uri.as_deref().unwrap_or("(unknown)")),
2640 );
2641 if let Some(title) = &frame.title {
2642 source.insert("title".into(), json!(title));
2643 }
2644 source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2645 if !frame.tags.is_empty() {
2646 source.insert("tags".into(), json!(frame.tags));
2647 }
2648 if !frame.labels.is_empty() {
2649 source.insert("labels".into(), json!(frame.labels));
2650 }
2651 serde_json::Value::Object(source)
2652 })
2653 })
2654 .collect(),
2655 )
2656 } else {
2657 None
2658 };
2659
2660 let mut body = json!({
2661 "version": "mv2.evidence.v1",
2662 "mode": "verbatim",
2663 "question": response.question,
2664 "evidence": evidence,
2665 "evidence_count": evidence.len(),
2666 "total_hits": response.retrieval.total_hits,
2667 "stats": {
2668 "retrieval_ms": response.stats.retrieval_ms,
2669 "latency_ms": response.stats.latency_ms,
2670 },
2671 "engine": search_engine_label(&response.retrieval.engine),
2672 });
2673
2674 if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2675 map.insert("sources".into(), json!(sources));
2676 }
2677
2678 let json_str = serde_json::to_string_pretty(&body)?;
2679 println!("{}", json_str.to_colored_json_auto()?);
2680 Ok(())
2681}
2682
2683fn emit_verbatim_evidence_pretty(response: &AskResponse, include_sources: bool, mem: &mut Memvid) {
2685 println!(
2686 "mode: {} latency: {} ms (retrieval {} ms)",
2687 "verbatim evidence".cyan(),
2688 response.stats.latency_ms,
2689 response.stats.retrieval_ms
2690 );
2691 println!(
2692 "engine: {}",
2693 search_engine_label(&response.retrieval.engine)
2694 );
2695 println!(
2696 "hits: {} (showing {})",
2697 response.retrieval.total_hits,
2698 response.retrieval.hits.len()
2699 );
2700 println!();
2701
2702 println!("{}", "━".repeat(60));
2704 println!(
2705 "{}",
2706 format!(
2707 "VERBATIM EVIDENCE for: \"{}\"",
2708 truncate_with_ellipsis(&response.question, 40)
2709 )
2710 .bold()
2711 );
2712 println!("{}", "━".repeat(60));
2713 println!();
2714
2715 if response.retrieval.hits.is_empty() {
2716 println!("No evidence found.");
2717 return;
2718 }
2719
2720 let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2722 let (min_score, max_score) = score_range(&scores);
2723
2724 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2726 let uri = &hit.uri;
2727 let title = hit.title.as_deref().unwrap_or("Untitled");
2728 let score_str = hit
2729 .score
2730 .map(|s| {
2731 let normalized = normalize_bm25_for_display(s, min_score, max_score);
2732 format!(" (relevance: {:.0}%)", normalized)
2733 })
2734 .unwrap_or_default();
2735
2736 println!(
2737 "{}",
2738 format!("[{}] {}{}", idx + 1, title, score_str)
2739 .green()
2740 .bold()
2741 );
2742 println!(" Source: {} (frame {})", uri, hit.frame_id);
2743 println!();
2744
2745 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2747 for line in verbatim.lines() {
2749 if !line.trim().is_empty() {
2750 println!(" │ {}", line);
2751 }
2752 }
2753 println!();
2754 }
2755
2756 if include_sources {
2758 println!("{}", "━".repeat(60));
2759 println!("{}", "SOURCE DETAILS".bold());
2760 println!("{}", "━".repeat(60));
2761 println!();
2762
2763 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2764 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2765 println!(
2766 "{}",
2767 format!(
2768 "[{}] {}",
2769 idx + 1,
2770 frame.uri.as_deref().unwrap_or("(unknown)")
2771 )
2772 .cyan()
2773 );
2774 if let Some(title) = &frame.title {
2775 println!(" Title: {}", title);
2776 }
2777 println!(" Frame ID: {}", frame.id);
2778 println!(" Timestamp: {}", frame.timestamp);
2779 if !frame.tags.is_empty() {
2780 println!(" Tags: {}", frame.tags.join(", "));
2781 }
2782 if !frame.labels.is_empty() {
2783 println!(" Labels: {}", frame.labels.join(", "));
2784 }
2785 if !frame.content_dates.is_empty() {
2786 println!(" Content Dates: {}", frame.content_dates.join(", "));
2787 }
2788 println!();
2789 }
2790 }
2791 }
2792
2793 println!("{}", "─".repeat(60));
2795 println!(
2796 "{}",
2797 "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2798 );
2799 println!(
2800 "{}",
2801 "Use --use-model to get an AI-synthesized answer.".dimmed()
2802 );
2803}
2804
2805fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2806 let hits: Vec<_> = response
2807 .hits
2808 .iter()
2809 .map(|hit| {
2810 json!({
2811 "frame_id": hit.frame_id,
2812 "matches": hit.matches,
2813 "snippets": [hit.text.clone()],
2814 })
2815 })
2816 .collect();
2817 println!("{}", serde_json::to_string_pretty(&hits)?);
2818 Ok(())
2819}
2820
2821fn emit_search_table(response: &SearchResponse) {
2822 if response.hits.is_empty() {
2823 println!("No results for '{}'.", response.query);
2824 return;
2825 }
2826
2827 let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2829 let (min_score, max_score) = score_range(&scores);
2830
2831 for hit in &response.hits {
2832 println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2833 if let Some(title) = &hit.title {
2834 println!(" Title: {title}");
2835 }
2836 if let Some(score) = hit.score {
2837 let normalized = normalize_bm25_for_display(score, min_score, max_score);
2838 println!(" Relevance: {:.0}%", normalized);
2839 }
2840 println!(" Range: [{}..{})", hit.range.0, hit.range.1);
2841 if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2842 println!(" Chunk: [{}..{})", chunk_start, chunk_end);
2843 }
2844 if let Some(chunk_text) = &hit.chunk_text {
2845 println!(" Chunk Text: {}", chunk_text.trim());
2846 }
2847 if let Some(metadata) = &hit.metadata {
2848 if let Some(track) = &metadata.track {
2849 println!(" Track: {track}");
2850 }
2851 if !metadata.tags.is_empty() {
2852 println!(" Tags: {}", metadata.tags.join(", "));
2853 }
2854 if !metadata.labels.is_empty() {
2855 println!(" Labels: {}", metadata.labels.join(", "));
2856 }
2857 if let Some(created_at) = &metadata.created_at {
2858 println!(" Created: {created_at}");
2859 }
2860 if !metadata.content_dates.is_empty() {
2861 println!(" Content Dates: {}", metadata.content_dates.join(", "));
2862 }
2863 if !metadata.entities.is_empty() {
2864 let entity_strs: Vec<String> = metadata
2865 .entities
2866 .iter()
2867 .map(|e| format!("{} ({})", e.name, e.kind))
2868 .collect();
2869 println!(" Entities: {}", entity_strs.join(", "));
2870 }
2871 }
2872 println!(" Snippet: {}", hit.text.trim());
2873 println!();
2874 }
2875 if let Some(cursor) = &response.next_cursor {
2876 println!("Next cursor: {cursor}");
2877 }
2878}
2879
2880fn ask_mode_display(mode: AskModeArg) -> &'static str {
2881 match mode {
2882 AskModeArg::Lex => "lex",
2883 AskModeArg::Sem => "sem",
2884 AskModeArg::Hybrid => "hybrid",
2885 }
2886}
2887
2888fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2889 match mode {
2890 AskModeArg::Lex => "Lexical",
2891 AskModeArg::Sem => "Semantic",
2892 AskModeArg::Hybrid => "Hybrid",
2893 }
2894}
2895
2896fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2897 match retriever {
2898 AskRetriever::Lex => "lex",
2899 AskRetriever::Semantic => "semantic",
2900 AskRetriever::Hybrid => "hybrid",
2901 AskRetriever::LexFallback => "lex_fallback",
2902 AskRetriever::TimelineFallback => "timeline_fallback",
2903 }
2904}
2905
2906fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2907 match retriever {
2908 AskRetriever::Lex => "Lexical",
2909 AskRetriever::Semantic => "Semantic",
2910 AskRetriever::Hybrid => "Hybrid",
2911 AskRetriever::LexFallback => "Lexical (fallback)",
2912 AskRetriever::TimelineFallback => "Timeline (fallback)",
2913 }
2914}
2915
2916fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2917 match engine {
2918 SearchEngineKind::Tantivy => "text (tantivy)",
2919 SearchEngineKind::LexFallback => "text (fallback)",
2920 SearchEngineKind::Hybrid => "hybrid",
2921 }
2922}
2923
2924fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2925 let digest = hash(uri.as_bytes()).to_hex().to_string();
2926 let prefix_len = digest.len().min(12);
2927 let prefix = &digest[..prefix_len];
2928 format!("mv2-hit-{prefix}-{frame_id}-{start}")
2929}
2930
2931fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2932 if text.chars().count() <= limit {
2933 return text.to_string();
2934 }
2935
2936 let truncated: String = text.chars().take(limit).collect();
2937 format!("{truncated}...")
2938}
2939
2940fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2949 if (max_score - min_score).abs() < f32::EPSILON {
2950 return 100.0;
2952 }
2953 ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2955}
2956
2957fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2959 let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2960 if valid_scores.is_empty() {
2961 return (0.0, 0.0);
2962 }
2963 let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2964 let max = valid_scores
2965 .iter()
2966 .cloned()
2967 .fold(f32::NEG_INFINITY, f32::max);
2968 (min, max)
2969}
2970
2971fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2972 let mut hit_json = serde_json::Map::new();
2973 hit_json.insert("rank".into(), json!(hit.rank));
2974 if let Some(score) = hit.score {
2975 hit_json.insert("score".into(), json!(score));
2976 }
2977 hit_json.insert(
2978 "id".into(),
2979 json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2980 );
2981 hit_json.insert("frame_id".into(), json!(hit.frame_id));
2982 hit_json.insert("uri".into(), json!(hit.uri));
2983 if let Some(title) = &hit.title {
2984 hit_json.insert("title".into(), json!(title));
2985 }
2986 let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2987 hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2988 hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2989 hit_json.insert("text".into(), json!(hit.text));
2990
2991 let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2992 matches: hit.matches,
2993 ..SearchHitMetadata::default()
2994 });
2995 let mut meta_json = serde_json::Map::new();
2996 meta_json.insert("matches".into(), json!(metadata.matches));
2997 if !metadata.tags.is_empty() {
2998 meta_json.insert("tags".into(), json!(metadata.tags));
2999 }
3000 if !metadata.labels.is_empty() {
3001 meta_json.insert("labels".into(), json!(metadata.labels));
3002 }
3003 if let Some(track) = metadata.track {
3004 meta_json.insert("track".into(), json!(track));
3005 }
3006 if let Some(created_at) = metadata.created_at {
3007 meta_json.insert("created_at".into(), json!(created_at));
3008 }
3009 if !metadata.content_dates.is_empty() {
3010 meta_json.insert("content_dates".into(), json!(metadata.content_dates));
3011 }
3012 if !metadata.entities.is_empty() {
3013 let entities_json: Vec<serde_json::Value> = metadata
3014 .entities
3015 .iter()
3016 .map(|e| {
3017 let mut ent = serde_json::Map::new();
3018 ent.insert("name".into(), json!(e.name));
3019 ent.insert("kind".into(), json!(e.kind));
3020 if let Some(conf) = e.confidence {
3021 ent.insert("confidence".into(), json!(conf));
3022 }
3023 serde_json::Value::Object(ent)
3024 })
3025 .collect();
3026 meta_json.insert("entities".into(), json!(entities_json));
3027 }
3028 hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
3029 serde_json::Value::Object(hit_json)
3030}
3031fn apply_semantic_rerank(
3040 runtime: &EmbeddingRuntime,
3041 mem: &mut Memvid,
3042 response: &mut SearchResponse,
3043) -> Result<()> {
3044 if response.hits.is_empty() {
3045 return Ok(());
3046 }
3047
3048 let query_embedding = runtime.embed_query(&response.query)?;
3049 let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
3050 for hit in &response.hits {
3051 if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
3052 if embedding.len() == runtime.dimension() {
3053 let score = cosine_similarity(&query_embedding, &embedding);
3054 semantic_scores.insert(hit.frame_id, score);
3055 }
3056 }
3057 }
3058
3059 if semantic_scores.is_empty() {
3060 return Ok(());
3061 }
3062
3063 let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3065 .iter()
3066 .map(|(frame_id, score)| (*frame_id, *score))
3067 .collect();
3068 sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3069
3070 let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3071 for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3072 semantic_rank.insert(*frame_id, idx + 1);
3073 }
3074
3075 let query_lower = response.query.to_lowercase();
3077 let is_preference_query = query_lower.contains("suggest")
3078 || query_lower.contains("recommend")
3079 || query_lower.contains("should i")
3080 || query_lower.contains("what should")
3081 || query_lower.contains("prefer")
3082 || query_lower.contains("favorite")
3083 || query_lower.contains("best for me");
3084
3085 const RRF_K: f32 = 60.0;
3089
3090 let mut ordering: Vec<(usize, f32, usize)> = response
3091 .hits
3092 .iter()
3093 .enumerate()
3094 .map(|(idx, hit)| {
3095 let lexical_rank = hit.rank;
3096
3097 let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3099
3100 let semantic_rrf = semantic_rank
3102 .get(&hit.frame_id)
3103 .map(|rank| 1.0 / (RRF_K + *rank as f32))
3104 .unwrap_or(0.0);
3105
3106 let preference_boost = if is_preference_query {
3109 compute_preference_boost(&hit.text) * 0.01 } else {
3111 0.0
3112 };
3113
3114 let combined = lexical_rrf + semantic_rrf + preference_boost;
3116 (idx, combined, lexical_rank)
3117 })
3118 .collect();
3119
3120 ordering.sort_by(|a, b| {
3121 b.1.partial_cmp(&a.1)
3122 .unwrap_or(Ordering::Equal)
3123 .then(a.2.cmp(&b.2))
3124 });
3125
3126 let mut reordered = Vec::with_capacity(response.hits.len());
3127 for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3128 let mut hit = response.hits[idx].clone();
3129 hit.rank = rank_idx + 1;
3130 reordered.push(hit);
3131 }
3132
3133 response.hits = reordered;
3134 Ok(())
3135}
3136
3137fn apply_preference_rerank(response: &mut SearchResponse) {
3140 if response.hits.is_empty() {
3141 return;
3142 }
3143
3144 let query_lower = response.query.to_lowercase();
3146 let is_preference_query = query_lower.contains("suggest")
3147 || query_lower.contains("recommend")
3148 || query_lower.contains("should i")
3149 || query_lower.contains("what should")
3150 || query_lower.contains("prefer")
3151 || query_lower.contains("favorite")
3152 || query_lower.contains("best for me");
3153
3154 if !is_preference_query {
3155 return;
3156 }
3157
3158 let mut scored: Vec<(usize, f32, f32)> = response
3160 .hits
3161 .iter()
3162 .enumerate()
3163 .map(|(idx, hit)| {
3164 let original_score = hit.score.unwrap_or(0.0);
3165 let preference_boost = compute_preference_boost(&hit.text);
3166 let boosted_score = original_score + preference_boost;
3167 (idx, boosted_score, original_score)
3168 })
3169 .collect();
3170
3171 scored.sort_by(|a, b| {
3173 b.1.partial_cmp(&a.1)
3174 .unwrap_or(Ordering::Equal)
3175 .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3176 });
3177
3178 let mut reordered = Vec::with_capacity(response.hits.len());
3180 for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3181 let mut hit = response.hits[idx].clone();
3182 hit.rank = rank_idx + 1;
3183 reordered.push(hit);
3184 }
3185
3186 response.hits = reordered;
3187}
3188
3189fn compute_preference_boost(text: &str) -> f32 {
3198 let text_lower = text.to_lowercase();
3199 let mut boost = 0.0f32;
3200
3201 let established_context = [
3204 "i've been",
3206 "i've had",
3207 "i've used",
3208 "i've tried",
3209 "i recently",
3210 "i just",
3211 "lately",
3212 "i started",
3213 "i bought",
3214 "i harvested",
3215 "i grew",
3216 "my garden",
3218 "my home",
3219 "my house",
3220 "my setup",
3221 "my equipment",
3222 "my camera",
3223 "my car",
3224 "my phone",
3225 "i have a",
3226 "i own",
3227 "i got a",
3228 "i prefer",
3230 "i like to",
3231 "i love to",
3232 "i enjoy",
3233 "i usually",
3234 "i always",
3235 "i typically",
3236 "my favorite",
3237 "i tend to",
3238 "i often",
3239 "i use",
3241 "i grow",
3242 "i cook",
3243 "i make",
3244 "i work on",
3245 "i'm into",
3246 "i collect",
3247 ];
3248 for pattern in established_context {
3249 if text_lower.contains(pattern) {
3250 boost += 0.15;
3251 }
3252 }
3253
3254 let first_person = [" i ", " my ", " me "];
3256 for pattern in first_person {
3257 if text_lower.contains(pattern) {
3258 boost += 0.02;
3259 }
3260 }
3261
3262 let request_patterns = [
3265 "i'm trying to",
3266 "i want to",
3267 "i need to",
3268 "looking for",
3269 "can you suggest",
3270 "can you help",
3271 ];
3272 for pattern in request_patterns {
3273 if text_lower.contains(pattern) {
3274 boost += 0.02;
3275 }
3276 }
3277
3278 boost.min(0.5)
3280}
3281
3282fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3283 let mut dot = 0.0f32;
3284 let mut sum_a = 0.0f32;
3285 let mut sum_b = 0.0f32;
3286 for (x, y) in a.iter().zip(b.iter()) {
3287 dot += x * y;
3288 sum_a += x * x;
3289 sum_b += y * y;
3290 }
3291
3292 if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3293 0.0
3294 } else {
3295 dot / (sum_a.sqrt() * sum_b.sqrt())
3296 }
3297}
3298
3299#[cfg(feature = "local-embeddings")]
3307fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3308 if response.hits.is_empty() || response.hits.len() < 2 {
3309 return Ok(());
3310 }
3311
3312 let candidates_to_rerank = response.hits.len().min(50);
3314
3315 let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3318 .with_show_download_progress(true);
3319
3320 let mut reranker = match TextRerank::try_new(options) {
3321 Ok(r) => r,
3322 Err(e) => {
3323 warn!("Failed to initialize cross-encoder reranker: {e}");
3324 return Ok(());
3325 }
3326 };
3327
3328 let documents: Vec<String> = response.hits[..candidates_to_rerank]
3330 .iter()
3331 .map(|hit| hit.text.clone())
3332 .collect();
3333
3334 info!("Cross-encoder reranking {} candidates", documents.len());
3336 let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3337 Ok(results) => results,
3338 Err(e) => {
3339 warn!("Cross-encoder reranking failed: {e}");
3340 return Ok(());
3341 }
3342 };
3343
3344 let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3348
3349 let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3351 .iter()
3352 .filter_map(|h| h.score)
3353 .collect();
3354 let orig_min = original_scores
3355 .iter()
3356 .cloned()
3357 .fold(f32::INFINITY, f32::min);
3358 let orig_max = original_scores
3359 .iter()
3360 .cloned()
3361 .fold(f32::NEG_INFINITY, f32::max);
3362 let orig_range = (orig_max - orig_min).max(0.001); for result in rerank_results.iter() {
3365 let original_idx = result.index;
3366 let cross_encoder_score = result.score; let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3370 let normalized_original = (original_score - orig_min) / orig_range;
3371
3372 let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3376
3377 scored_hits.push((blended, original_idx));
3378 }
3379
3380 scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3382
3383 let mut reordered = Vec::with_capacity(response.hits.len());
3385 for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3386 let mut hit = response.hits[original_idx].clone();
3387 hit.rank = new_rank + 1;
3388 hit.score = Some(blended_score);
3390 reordered.push(hit);
3391 }
3392
3393 for hit in response.hits.iter().skip(candidates_to_rerank) {
3395 let mut h = hit.clone();
3396 h.rank = reordered.len() + 1;
3397 reordered.push(h);
3398 }
3399
3400 response.hits = reordered;
3401 info!("Cross-encoder reranking complete");
3402 Ok(())
3403}
3404
3405#[cfg(not(feature = "local-embeddings"))]
3408fn apply_cross_encoder_rerank(_response: &mut SearchResponse) -> Result<()> {
3409 Ok(())
3410}
3411
3412fn build_memory_context(mem: &Memvid) -> String {
3415 let entities = mem.memory_entities();
3416 if entities.is_empty() {
3417 return String::new();
3418 }
3419
3420 let mut sections = Vec::new();
3421 for entity in entities {
3422 let cards = mem.get_entity_memories(&entity);
3423 if cards.is_empty() {
3424 continue;
3425 }
3426
3427 let mut entity_lines = Vec::new();
3428 for card in cards {
3429 let polarity_marker = card
3431 .polarity
3432 .as_ref()
3433 .map(|p| match p.to_string().as_str() {
3434 "Positive" => " (+)",
3435 "Negative" => " (-)",
3436 _ => "",
3437 })
3438 .unwrap_or("");
3439 entity_lines.push(format!(
3440 " - {}: {}{}",
3441 card.slot, card.value, polarity_marker
3442 ));
3443 }
3444
3445 sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3446 }
3447
3448 sections.join("\n\n")
3449}
3450
3451fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3454 use std::collections::HashMap;
3455
3456 let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3458
3459 for hit in hits {
3460 if let Some(metadata) = &hit.metadata {
3461 for entity in &metadata.entities {
3462 entities_by_kind
3463 .entry(entity.kind.clone())
3464 .or_default()
3465 .push(entity.name.clone());
3466 }
3467 }
3468 }
3469
3470 if entities_by_kind.is_empty() {
3471 return String::new();
3472 }
3473
3474 let mut sections = Vec::new();
3476 let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3477 sorted_kinds.sort();
3478
3479 for kind in sorted_kinds {
3480 let names = entities_by_kind.get(kind).unwrap();
3481 let mut unique_names: Vec<_> = names.iter().collect();
3482 unique_names.sort();
3483 unique_names.dedup();
3484
3485 let names_str = unique_names
3486 .iter()
3487 .take(10) .map(|s| s.as_str())
3489 .collect::<Vec<_>>()
3490 .join(", ");
3491
3492 sections.push(format!("{}: {}", kind, names_str));
3493 }
3494
3495 sections.join("\n")
3496}