1use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use colored::Colorize;
15use colored_json::ToColoredJson;
16use blake3::hash;
17use clap::{ArgAction, Args, ValueEnum};
18#[cfg(feature = "temporal_track")]
19use memvid_core::{
20 types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
21 TemporalResolution, TemporalResolutionValue,
22};
23use memvid_core::{
24 types::{AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy, SearchHitMetadata},
25 AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind, SearchHit,
26 SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
27};
28#[cfg(feature = "temporal_track")]
29use serde::Serialize;
30use serde_json::json;
31#[cfg(feature = "temporal_track")]
32use time::format_description::well_known::Rfc3339;
33use time::{Date, PrimitiveDateTime, Time};
34#[cfg(feature = "temporal_track")]
35use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
36use tracing::{info, warn};
37
38#[cfg(feature = "local-embeddings")]
39use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
40
41use memvid_ask_model::{
42 run_model_inference, ModelContextFragment, ModelContextFragmentKind, ModelInference,
43};
44
45use crate::config::{
47 load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
48 try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingModelChoice, EmbeddingRuntime,
49};
50use crate::utils::{
51 autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
52 parse_date_boundary, parse_vector, read_embedding,
53};
54
55const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
56#[cfg(feature = "temporal_track")]
57const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
58
59fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
60 let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
61 message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
62 if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
63 message.push_str(&format!(
64 "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
65 model.name(),
66 model.name()
67 ));
68 if model.is_openai() {
69 message.push_str(" (and set `OPENAI_API_KEY`).");
70 } else {
71 message.push('.');
72 }
73 message.push_str(&format!(
74 "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
75 model.name()
76 ));
77 message.push_str(&format!(
78 "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
79 ));
80 message.push_str("\nOr use `--mode lex` to disable semantic search.");
81 }
82 message
83}
84
85#[derive(Args)]
87pub struct TimelineArgs {
88 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
89 pub file: PathBuf,
90 #[arg(long)]
91 pub json: bool,
92 #[arg(long)]
93 pub reverse: bool,
94 #[arg(long, value_name = "LIMIT")]
95 pub limit: Option<NonZeroU64>,
96 #[arg(long, value_name = "TIMESTAMP")]
97 pub since: Option<i64>,
98 #[arg(long, value_name = "TIMESTAMP")]
99 pub until: Option<i64>,
100 #[cfg(feature = "temporal_track")]
101 #[arg(long = "on", value_name = "PHRASE")]
102 pub phrase: Option<String>,
103 #[cfg(feature = "temporal_track")]
104 #[arg(long = "tz", value_name = "IANA_ZONE")]
105 pub tz: Option<String>,
106 #[cfg(feature = "temporal_track")]
107 #[arg(long = "anchor", value_name = "RFC3339")]
108 pub anchor: Option<String>,
109 #[cfg(feature = "temporal_track")]
110 #[arg(long = "window", value_name = "MINUTES")]
111 pub window: Option<u64>,
112 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
114 pub as_of_frame: Option<u64>,
115 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
117 pub as_of_ts: Option<i64>,
118}
119
120#[cfg(feature = "temporal_track")]
122#[derive(Args)]
123pub struct WhenArgs {
124 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
125 pub file: PathBuf,
126 #[arg(long = "on", value_name = "PHRASE")]
127 pub phrase: String,
128 #[arg(long = "tz", value_name = "IANA_ZONE")]
129 pub tz: Option<String>,
130 #[arg(long = "anchor", value_name = "RFC3339")]
131 pub anchor: Option<String>,
132 #[arg(long = "window", value_name = "MINUTES")]
133 pub window: Option<u64>,
134 #[arg(long, value_name = "LIMIT")]
135 pub limit: Option<NonZeroU64>,
136 #[arg(long, value_name = "TIMESTAMP")]
137 pub since: Option<i64>,
138 #[arg(long, value_name = "TIMESTAMP")]
139 pub until: Option<i64>,
140 #[arg(long)]
141 pub reverse: bool,
142 #[arg(long)]
143 pub json: bool,
144}
145
146#[derive(Args)]
148pub struct AskArgs {
149 #[arg(value_name = "TARGET", num_args = 0..)]
150 pub targets: Vec<String>,
151 #[arg(long = "question", value_name = "TEXT")]
152 pub question: Option<String>,
153 #[arg(long = "uri", value_name = "URI")]
154 pub uri: Option<String>,
155 #[arg(long = "scope", value_name = "URI_PREFIX")]
156 pub scope: Option<String>,
157 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
158 pub top_k: usize,
159 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
160 pub snippet_chars: usize,
161 #[arg(long = "cursor", value_name = "TOKEN")]
162 pub cursor: Option<String>,
163 #[arg(long = "mode", value_enum, default_value = "hybrid")]
164 pub mode: AskModeArg,
165 #[arg(long)]
166 pub json: bool,
167 #[arg(long = "context-only", action = ArgAction::SetTrue)]
168 pub context_only: bool,
169 #[arg(long = "sources", action = ArgAction::SetTrue)]
171 pub sources: bool,
172 #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
174 pub mask_pii: bool,
175 #[arg(long = "memories", action = ArgAction::SetTrue)]
177 pub memories: bool,
178 #[arg(long = "llm-context-depth", value_name = "CHARS")]
180 pub llm_context_depth: Option<usize>,
181 #[arg(long = "start", value_name = "DATE")]
182 pub start: Option<String>,
183 #[arg(long = "end", value_name = "DATE")]
184 pub end: Option<String>,
185 #[arg(
193 long = "use-model",
194 value_name = "MODEL",
195 num_args = 0..=1,
196 default_missing_value = "tinyllama"
197 )]
198 pub use_model: Option<String>,
199 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
202 pub query_embedding_model: Option<String>,
203 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
205 pub as_of_frame: Option<u64>,
206 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
208 pub as_of_ts: Option<i64>,
209 #[arg(long = "system-prompt", value_name = "TEXT")]
211 pub system_prompt: Option<String>,
212 #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
214 pub no_rerank: bool,
215
216 #[arg(long = "no-llm", action = ArgAction::SetTrue)]
219 pub no_llm: bool,
220
221 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
225 pub no_adaptive: bool,
226 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
229 pub min_relevancy: f32,
230 #[arg(long = "max-k", value_name = "K", default_value = "100")]
233 pub max_k: usize,
234 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
236 pub adaptive_strategy: AdaptiveStrategyArg,
237}
238
239#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
241pub enum AskModeArg {
242 Lex,
243 Sem,
244 Hybrid,
245}
246
247impl From<AskModeArg> for AskMode {
248 fn from(value: AskModeArg) -> Self {
249 match value {
250 AskModeArg::Lex => AskMode::Lex,
251 AskModeArg::Sem => AskMode::Sem,
252 AskModeArg::Hybrid => AskMode::Hybrid,
253 }
254 }
255}
256
257#[derive(Args)]
259pub struct FindArgs {
260 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
261 pub file: PathBuf,
262 #[arg(long = "query", value_name = "TEXT")]
263 pub query: String,
264 #[arg(long = "uri", value_name = "URI")]
265 pub uri: Option<String>,
266 #[arg(long = "scope", value_name = "URI_PREFIX")]
267 pub scope: Option<String>,
268 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
269 pub top_k: usize,
270 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
271 pub snippet_chars: usize,
272 #[arg(long = "cursor", value_name = "TOKEN")]
273 pub cursor: Option<String>,
274 #[arg(long)]
275 pub json: bool,
276 #[arg(long = "json-legacy", conflicts_with = "json")]
277 pub json_legacy: bool,
278 #[arg(long = "mode", value_enum, default_value = "auto")]
279 pub mode: SearchMode,
280 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
282 pub as_of_frame: Option<u64>,
283 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
285 pub as_of_ts: Option<i64>,
286 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
289 pub query_embedding_model: Option<String>,
290
291 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
295 pub no_adaptive: bool,
296 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
299 pub min_relevancy: f32,
300 #[arg(long = "max-k", value_name = "K", default_value = "100")]
303 pub max_k: usize,
304 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
306 pub adaptive_strategy: AdaptiveStrategyArg,
307
308 #[arg(long = "graph", action = ArgAction::SetTrue)]
311 pub graph: bool,
312
313 #[arg(long = "hybrid", action = ArgAction::SetTrue)]
316 pub hybrid: bool,
317
318 #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
321 pub no_sketch: bool,
322}
323
324#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
326pub enum SearchMode {
327 Auto,
328 Lex,
329 Sem,
330 #[cfg(feature = "clip")]
332 Clip,
333}
334
335#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
337pub enum AdaptiveStrategyArg {
338 Relative,
340 Absolute,
342 Cliff,
344 Elbow,
346 Combined,
348}
349
350#[derive(Args)]
352pub struct VecSearchArgs {
353 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
354 pub file: PathBuf,
355 #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
356 pub vector: Option<String>,
357 #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
358 pub embedding: Option<PathBuf>,
359 #[arg(long, value_name = "K", default_value = "10")]
360 pub limit: usize,
361 #[arg(long)]
362 pub json: bool,
363}
364
365#[derive(Args)]
367pub struct AuditArgs {
368 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
369 pub file: PathBuf,
370 #[arg(value_name = "QUESTION")]
372 pub question: String,
373 #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
375 pub out: Option<PathBuf>,
376 #[arg(long = "format", value_enum, default_value = "text")]
378 pub format: AuditFormat,
379 #[arg(long = "top-k", value_name = "K", default_value = "10")]
381 pub top_k: usize,
382 #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
384 pub snippet_chars: usize,
385 #[arg(long = "mode", value_enum, default_value = "hybrid")]
387 pub mode: AskModeArg,
388 #[arg(long = "scope", value_name = "URI_PREFIX")]
390 pub scope: Option<String>,
391 #[arg(long = "start", value_name = "DATE")]
393 pub start: Option<String>,
394 #[arg(long = "end", value_name = "DATE")]
396 pub end: Option<String>,
397 #[arg(long = "use-model", value_name = "MODEL")]
399 pub use_model: Option<String>,
400}
401
402#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
404pub enum AuditFormat {
405 Text,
407 Markdown,
409 Json,
411}
412
413pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
418 let mut mem = open_read_only_mem(&args.file)?;
419 let mut builder = TimelineQueryBuilder::default();
420 #[cfg(feature = "temporal_track")]
421 if args.phrase.is_none()
422 && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
423 {
424 bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
425 }
426 if let Some(limit) = args.limit {
427 builder = builder.limit(limit);
428 }
429 if let Some(since) = args.since {
430 builder = builder.since(since);
431 }
432 if let Some(until) = args.until {
433 builder = builder.until(until);
434 }
435 builder = builder.reverse(args.reverse);
436 #[cfg(feature = "temporal_track")]
437 let temporal_summary = if let Some(ref phrase) = args.phrase {
438 let (filter, summary) = build_temporal_filter(
439 phrase,
440 args.tz.as_deref(),
441 args.anchor.as_deref(),
442 args.window,
443 )?;
444 builder = builder.temporal(filter);
445 Some(summary)
446 } else {
447 None
448 };
449 let query = builder.build();
450 let mut entries = mem.timeline(query)?;
451
452 if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
454 entries.retain(|entry| {
455 if let Some(cutoff_frame) = args.as_of_frame {
457 if entry.frame_id > cutoff_frame {
458 return false;
459 }
460 }
461
462 if let Some(cutoff_ts) = args.as_of_ts {
464 if entry.timestamp > cutoff_ts {
465 return false;
466 }
467 }
468
469 true
470 });
471 }
472
473 if args.json {
474 #[cfg(feature = "temporal_track")]
475 if let Some(summary) = temporal_summary.as_ref() {
476 println!(
477 "{}",
478 serde_json::to_string_pretty(&TimelineOutput {
479 temporal: Some(summary_to_output(summary)),
480 entries: &entries,
481 })?
482 );
483 } else {
484 println!("{}", serde_json::to_string_pretty(&entries)?);
485 }
486 #[cfg(not(feature = "temporal_track"))]
487 println!("{}", serde_json::to_string_pretty(&entries)?);
488 } else if entries.is_empty() {
489 println!("Timeline is empty");
490 } else {
491 #[cfg(feature = "temporal_track")]
492 if let Some(summary) = temporal_summary.as_ref() {
493 print_temporal_summary(summary);
494 }
495 for entry in entries {
496 println!(
497 "#{} @ {} — {}",
498 entry.frame_id,
499 entry.timestamp,
500 entry.preview.replace('\n', " ")
501 );
502 if let Some(uri) = entry.uri.as_deref() {
503 println!(" URI: {uri}");
504 }
505 if !entry.child_frames.is_empty() {
506 let child_list = entry
507 .child_frames
508 .iter()
509 .map(|id| id.to_string())
510 .collect::<Vec<_>>()
511 .join(", ");
512 println!(" Child frames: {child_list}");
513 }
514 #[cfg(feature = "temporal_track")]
515 if let Some(temporal) = entry.temporal.as_ref() {
516 print_entry_temporal_details(temporal);
517 }
518 }
519 }
520 Ok(())
521}
522
523#[cfg(feature = "temporal_track")]
524pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
525 let mut mem = open_read_only_mem(&args.file)?;
526
527 let (filter, summary) = build_temporal_filter(
528 &args.phrase,
529 args.tz.as_deref(),
530 args.anchor.as_deref(),
531 args.window,
532 )?;
533
534 let mut builder = TimelineQueryBuilder::default();
535 if let Some(limit) = args.limit {
536 builder = builder.limit(limit);
537 }
538 if let Some(since) = args.since {
539 builder = builder.since(since);
540 }
541 if let Some(until) = args.until {
542 builder = builder.until(until);
543 }
544 builder = builder.reverse(args.reverse).temporal(filter.clone());
545 let entries = mem.timeline(builder.build())?;
546
547 if args.json {
548 let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
549 let output = WhenOutput {
550 summary: summary_to_output(&summary),
551 entries: entry_views,
552 };
553 println!("{}", serde_json::to_string_pretty(&output)?);
554 return Ok(());
555 }
556
557 print_temporal_summary(&summary);
558 if entries.is_empty() {
559 println!("No frames matched the resolved window");
560 return Ok(());
561 }
562
563 for entry in &entries {
564 let iso = format_timestamp(entry.timestamp).unwrap_or_default();
565 println!(
566 "#{} @ {} ({iso}) — {}",
567 entry.frame_id,
568 entry.timestamp,
569 entry.preview.replace('\n', " ")
570 );
571 if let Some(uri) = entry.uri.as_deref() {
572 println!(" URI: {uri}");
573 }
574 if !entry.child_frames.is_empty() {
575 let child_list = entry
576 .child_frames
577 .iter()
578 .map(|id| id.to_string())
579 .collect::<Vec<_>>()
580 .join(", ");
581 println!(" Child frames: {child_list}");
582 }
583 if let Some(temporal) = entry.temporal.as_ref() {
584 print_entry_temporal_details(temporal);
585 }
586 }
587
588 Ok(())
589}
590
591#[cfg(feature = "temporal_track")]
592#[derive(Serialize)]
593struct TimelineOutput<'a> {
594 #[serde(skip_serializing_if = "Option::is_none")]
595 temporal: Option<TemporalSummaryOutput>,
596 entries: &'a [TimelineEntry],
597}
598
599#[cfg(feature = "temporal_track")]
600#[derive(Serialize)]
601struct WhenOutput {
602 summary: TemporalSummaryOutput,
603 entries: Vec<WhenEntry>,
604}
605
606#[cfg(feature = "temporal_track")]
607#[derive(Serialize)]
608struct WhenEntry {
609 frame_id: FrameId,
610 timestamp: i64,
611 #[serde(skip_serializing_if = "Option::is_none")]
612 timestamp_iso: Option<String>,
613 preview: String,
614 #[serde(skip_serializing_if = "Option::is_none")]
615 uri: Option<String>,
616 #[serde(skip_serializing_if = "Vec::is_empty")]
617 child_frames: Vec<FrameId>,
618 #[serde(skip_serializing_if = "Option::is_none")]
619 temporal: Option<SearchHitTemporal>,
620}
621
622#[cfg(feature = "temporal_track")]
623#[derive(Serialize)]
624struct TemporalSummaryOutput {
625 phrase: String,
626 timezone: String,
627 anchor_utc: i64,
628 anchor_iso: String,
629 confidence: u16,
630 #[serde(skip_serializing_if = "Vec::is_empty")]
631 flags: Vec<&'static str>,
632 resolution_kind: &'static str,
633 window_start_utc: Option<i64>,
634 window_start_iso: Option<String>,
635 window_end_utc: Option<i64>,
636 window_end_iso: Option<String>,
637 #[serde(skip_serializing_if = "Option::is_none")]
638 window_minutes: Option<u64>,
639}
640
641#[cfg(feature = "temporal_track")]
642struct TemporalSummary {
643 phrase: String,
644 tz: String,
645 anchor: OffsetDateTime,
646 start_utc: Option<i64>,
647 end_utc: Option<i64>,
648 resolution: TemporalResolution,
649 window_minutes: Option<u64>,
650}
651
652#[cfg(feature = "temporal_track")]
653fn build_temporal_filter(
654 phrase: &str,
655 tz_override: Option<&str>,
656 anchor_override: Option<&str>,
657 window_minutes: Option<u64>,
658) -> Result<(TemporalFilter, TemporalSummary)> {
659 let tz = tz_override
660 .unwrap_or(DEFAULT_TEMPORAL_TZ)
661 .trim()
662 .to_string();
663 if tz.is_empty() {
664 bail!("E-TEMP-003 timezone must not be empty");
665 }
666
667 let anchor = if let Some(raw) = anchor_override {
668 OffsetDateTime::parse(raw, &Rfc3339)
669 .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
670 } else {
671 OffsetDateTime::now_utc()
672 };
673
674 let context = TemporalContext::new(anchor, tz.clone());
675 let normalizer = TemporalNormalizer::new(context);
676 let resolution = normalizer
677 .resolve(phrase)
678 .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
679
680 let (mut start, mut end) = resolution_bounds(&resolution)?;
681 if let Some(minutes) = window_minutes {
682 if minutes > 0 {
683 let delta = TimeDuration::minutes(minutes as i64);
684 if let (Some(s), Some(e)) = (start, end) {
685 if s == e {
686 start = Some(s.saturating_sub(delta.whole_seconds()));
687 end = Some(e.saturating_add(delta.whole_seconds()));
688 } else {
689 start = Some(s.saturating_sub(delta.whole_seconds()));
690 end = Some(e.saturating_add(delta.whole_seconds()));
691 }
692 }
693 }
694 }
695
696 let filter = TemporalFilter {
697 start_utc: start,
698 end_utc: end,
699 phrase: None,
700 tz: None,
701 };
702
703 let summary = TemporalSummary {
704 phrase: phrase.to_owned(),
705 tz,
706 anchor,
707 start_utc: start,
708 end_utc: end,
709 resolution,
710 window_minutes,
711 };
712
713 Ok((filter, summary))
714}
715
716#[cfg(feature = "temporal_track")]
717fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
718 TemporalSummaryOutput {
719 phrase: summary.phrase.clone(),
720 timezone: summary.tz.clone(),
721 anchor_utc: summary.anchor.unix_timestamp(),
722 anchor_iso: summary
723 .anchor
724 .format(&Rfc3339)
725 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
726 confidence: summary.resolution.confidence,
727 flags: summary
728 .resolution
729 .flags
730 .iter()
731 .map(|flag| flag.as_str())
732 .collect(),
733 resolution_kind: resolution_kind(&summary.resolution),
734 window_start_utc: summary.start_utc,
735 window_start_iso: summary.start_utc.and_then(format_timestamp),
736 window_end_utc: summary.end_utc,
737 window_end_iso: summary.end_utc.and_then(format_timestamp),
738 window_minutes: summary.window_minutes,
739 }
740}
741
742#[cfg(feature = "temporal_track")]
743fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
744 WhenEntry {
745 frame_id: entry.frame_id,
746 timestamp: entry.timestamp,
747 timestamp_iso: format_timestamp(entry.timestamp),
748 preview: entry.preview.clone(),
749 uri: entry.uri.clone(),
750 child_frames: entry.child_frames.clone(),
751 temporal: entry.temporal.clone(),
752 }
753}
754
755#[cfg(feature = "temporal_track")]
756fn print_temporal_summary(summary: &TemporalSummary) {
757 println!("Phrase: \"{}\"", summary.phrase);
758 println!("Timezone: {}", summary.tz);
759 println!(
760 "Anchor: {}",
761 summary
762 .anchor
763 .format(&Rfc3339)
764 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
765 );
766 let start_iso = summary.start_utc.and_then(format_timestamp);
767 let end_iso = summary.end_utc.and_then(format_timestamp);
768 match (start_iso, end_iso) {
769 (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
770 (Some(start), Some(end)) => println!("Window: {start} → {end}"),
771 (Some(start), None) => println!("Window start: {start}"),
772 (None, Some(end)) => println!("Window end: {end}"),
773 _ => println!("Window: (not resolved)"),
774 }
775 println!("Confidence: {}", summary.resolution.confidence);
776 let flags: Vec<&'static str> = summary
777 .resolution
778 .flags
779 .iter()
780 .map(|flag| flag.as_str())
781 .collect();
782 if !flags.is_empty() {
783 println!("Flags: {}", flags.join(", "));
784 }
785 if let Some(window) = summary.window_minutes {
786 if window > 0 {
787 println!("Window padding: {window} minute(s)");
788 }
789 }
790 println!();
791}
792
793#[cfg(feature = "temporal_track")]
794fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
795 if let Some(anchor) = temporal.anchor.as_ref() {
796 let iso = anchor
797 .iso_8601
798 .clone()
799 .or_else(|| format_timestamp(anchor.ts_utc));
800 println!(
801 " Anchor: {} (source: {:?})",
802 iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
803 anchor.source
804 );
805 }
806 if !temporal.mentions.is_empty() {
807 println!(" Mentions:");
808 for mention in &temporal.mentions {
809 let iso = mention
810 .iso_8601
811 .clone()
812 .or_else(|| format_timestamp(mention.ts_utc))
813 .unwrap_or_else(|| mention.ts_utc.to_string());
814 let mut details = format!(
815 " - {} ({:?}, confidence {})",
816 iso, mention.kind, mention.confidence
817 );
818 if let Some(text) = mention.text.as_deref() {
819 details.push_str(&format!(" — \"{}\"", text));
820 }
821 println!("{details}");
822 }
823 }
824}
825
826#[cfg(feature = "temporal_track")]
827fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
828 match &resolution.value {
829 TemporalResolutionValue::Date(date) => {
830 let ts = date_to_timestamp(*date);
831 Ok((Some(ts), Some(ts)))
832 }
833 TemporalResolutionValue::DateTime(dt) => {
834 let ts = dt.unix_timestamp();
835 Ok((Some(ts), Some(ts)))
836 }
837 TemporalResolutionValue::DateRange { start, end } => Ok((
838 Some(date_to_timestamp(*start)),
839 Some(date_to_timestamp(*end)),
840 )),
841 TemporalResolutionValue::DateTimeRange { start, end } => {
842 Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
843 }
844 TemporalResolutionValue::Month { year, month } => {
845 let start_date = Date::from_calendar_date(*year, *month, 1)
846 .map_err(|_| anyhow!("invalid month resolution"))?;
847 let end_date = last_day_in_month(*year, *month)
848 .map_err(|_| anyhow!("invalid month resolution"))?;
849 Ok((
850 Some(date_to_timestamp(start_date)),
851 Some(date_to_timestamp(end_date)),
852 ))
853 }
854 }
855}
856
857#[cfg(feature = "temporal_track")]
858fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
859 match resolution.value {
860 TemporalResolutionValue::Date(_) => "date",
861 TemporalResolutionValue::DateTime(_) => "datetime",
862 TemporalResolutionValue::DateRange { .. } => "date_range",
863 TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
864 TemporalResolutionValue::Month { .. } => "month",
865 }
866}
867
868#[cfg(feature = "temporal_track")]
869fn date_to_timestamp(date: Date) -> i64 {
870 PrimitiveDateTime::new(date, Time::MIDNIGHT)
871 .assume_offset(UtcOffset::UTC)
872 .unix_timestamp()
873}
874
875#[cfg(feature = "temporal_track")]
876fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
877 let mut date = Date::from_calendar_date(year, month, 1)
878 .map_err(|_| anyhow!("invalid month resolution"))?;
879 while let Some(next) = date.next_day() {
880 if next.month() == month {
881 date = next;
882 } else {
883 break;
884 }
885 }
886 Ok(date)
887}
888
889#[cfg(feature = "temporal_track")]
890
891fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
892 if fragments.is_empty() {
893 return;
894 }
895
896 response.context_fragments = fragments
897 .into_iter()
898 .map(|fragment| AskContextFragment {
899 rank: fragment.rank,
900 frame_id: fragment.frame_id,
901 uri: fragment.uri,
902 title: fragment.title,
903 score: fragment.score,
904 matches: fragment.matches,
905 range: Some(fragment.range),
906 chunk_range: fragment.chunk_range,
907 text: fragment.text,
908 kind: Some(match fragment.kind {
909 ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
910 ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
911 }),
912 #[cfg(feature = "temporal_track")]
913 temporal: None,
914 })
915 .collect();
916}
917
918pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
919 crate::utils::require_active_plan(config, "ask")?;
921
922 crate::api::track_query_usage(config, 1)?;
924
925 if args.uri.is_some() && args.scope.is_some() {
926 warn!("--scope ignored because --uri is provided");
927 }
928
929 let mut question_tokens = Vec::new();
930 let mut file_path: Option<PathBuf> = None;
931 for token in &args.targets {
932 if file_path.is_none() && looks_like_memory(token) {
933 file_path = Some(PathBuf::from(token));
934 } else {
935 question_tokens.push(token.clone());
936 }
937 }
938
939 let positional_question = if question_tokens.is_empty() {
940 None
941 } else {
942 Some(question_tokens.join(" "))
943 };
944
945 let question = args
946 .question
947 .or(positional_question)
948 .map(|value| value.trim().to_string())
949 .filter(|value| !value.is_empty());
950
951 let question = question
952 .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
953
954 let (original_question, search_query) = {
957 let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
960 if let Ok(key) = std::env::var("OPENAI_API_KEY") {
961 (Some("gpt-4o-mini"), Some(key))
963 } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
964 (Some("llama-3.1-8b-instant"), Some(key))
966 } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
967 (Some("claude-haiku-4-5"), Some(key))
969 } else if let Ok(key) = std::env::var("XAI_API_KEY") {
970 (Some("grok-4-fast"), Some(key))
972 } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
973 (Some("mistral-small-latest"), Some(key))
975 } else {
976 (None, None)
978 };
979
980 let _ = (model_for_expansion, api_key_for_expansion); (question.clone(), question.clone())
991 };
992
993 let memory_path = match file_path {
994 Some(path) => path,
995 None => autodetect_memory_file()?,
996 };
997
998 let start = parse_date_boundary(args.start.as_ref(), false)?;
999 let end = parse_date_boundary(args.end.as_ref(), true)?;
1000 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1001 if end_ts < start_ts {
1002 anyhow::bail!("--end must not be earlier than --start");
1003 }
1004 }
1005
1006 let mut mem = Memvid::open(&memory_path)?;
1008
1009 #[cfg(feature = "replay")]
1011 let _ = mem.load_active_session();
1012
1013 let mv2_dimension = mem.effective_vec_index_dimension()?;
1015
1016 let stats = mem.stats()?;
1018 let has_vectors = stats.vector_count > 0;
1019 let effective_mode = if !has_vectors && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1020 tracing::info!(
1021 "Memory has no embeddings (vector_count=0); falling back to lexical mode"
1022 );
1023 AskModeArg::Lex
1024 } else {
1025 args.mode.clone()
1026 };
1027
1028 let ask_mode: AskMode = effective_mode.clone().into();
1029 let inferred_model_override = match effective_mode {
1030 AskModeArg::Lex => None,
1031 AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1032 memvid_core::EmbeddingIdentitySummary::Single(identity) => identity.model.map(String::from),
1033 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1034 let models: Vec<_> = identities
1035 .iter()
1036 .filter_map(|entry| entry.identity.model.as_deref())
1037 .collect();
1038 anyhow::bail!(
1039 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1040 Detected models: {:?}\n\n\
1041 Suggested fix: split into separate memories per embedding model.",
1042 models
1043 );
1044 }
1045 memvid_core::EmbeddingIdentitySummary::Unknown => None,
1046 },
1047 };
1048 let emb_model_override = args
1049 .query_embedding_model
1050 .as_deref()
1051 .or(inferred_model_override.as_deref());
1052 let runtime = match effective_mode {
1053 AskModeArg::Lex => None,
1054 AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1055 config,
1056 emb_model_override,
1057 mv2_dimension,
1058 )?),
1059 AskModeArg::Hybrid => {
1060 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1062 || {
1063 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1065 .ok()
1066 .map(|rt| {
1067 tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1068 rt
1069 })
1070 },
1071 )
1072 }
1073 };
1074 if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1075 anyhow::bail!(
1076 "semantic embeddings unavailable; install/cached model required for {:?} mode",
1077 effective_mode
1078 );
1079 }
1080
1081 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1082
1083 let adaptive = if !args.no_adaptive {
1085 Some(AdaptiveConfig {
1086 enabled: true,
1087 max_results: args.max_k,
1088 min_results: 1,
1089 normalize_scores: true,
1090 strategy: match args.adaptive_strategy {
1091 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1092 min_ratio: args.min_relevancy,
1093 },
1094 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1095 min_score: args.min_relevancy,
1096 },
1097 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff { max_drop_ratio: 0.3 },
1098 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1099 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1100 relative_threshold: args.min_relevancy,
1101 max_drop_ratio: 0.3,
1102 absolute_min: 0.3,
1103 },
1104 },
1105 })
1106 } else {
1107 None
1108 };
1109
1110 let request = AskRequest {
1111 question: search_query, top_k: args.top_k,
1113 snippet_chars: args.snippet_chars,
1114 uri: args.uri.clone(),
1115 scope: args.scope.clone(),
1116 cursor: args.cursor.clone(),
1117 start,
1118 end,
1119 #[cfg(feature = "temporal_track")]
1120 temporal: None,
1121 context_only: args.context_only,
1122 mode: ask_mode,
1123 as_of_frame: args.as_of_frame,
1124 as_of_ts: args.as_of_ts,
1125 adaptive,
1126 };
1127 let mut response = mem.ask(request, embedder).map_err(|err| match err {
1128 MemvidError::VecDimensionMismatch { expected, actual } => anyhow!(vec_dimension_mismatch_help(expected, actual)),
1129 other => anyhow!(other),
1130 })?;
1131
1132 response.question = original_question;
1135
1136 let is_temporal_query = {
1143 let q_lower = response.question.to_lowercase();
1144 q_lower.contains("current") || q_lower.contains("latest") || q_lower.contains("recent")
1145 || q_lower.contains("now") || q_lower.contains("today") || q_lower.contains("updated")
1146 || q_lower.contains("new ") || q_lower.contains("newest")
1147 };
1148 if !args.no_rerank
1149 && !response.retrieval.hits.is_empty()
1150 && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1151 && !is_temporal_query
1152 {
1153 let mut search_response = SearchResponse {
1155 query: response.question.clone(),
1156 hits: response.retrieval.hits.clone(),
1157 total_hits: response.retrieval.hits.len(),
1158 params: memvid_core::SearchParams {
1159 top_k: args.top_k,
1160 snippet_chars: args.snippet_chars,
1161 cursor: None,
1162 },
1163 elapsed_ms: 0,
1164 engine: memvid_core::SearchEngineKind::Hybrid,
1165 next_cursor: None,
1166 context: String::new(),
1167 };
1168
1169 if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1170 warn!("Cross-encoder reranking failed: {e}");
1171 } else {
1172 response.retrieval.hits = search_response.hits;
1174 response.retrieval.context = response
1176 .retrieval
1177 .hits
1178 .iter()
1179 .take(10) .map(|hit| hit.text.as_str())
1181 .collect::<Vec<_>>()
1182 .join("\n\n---\n\n");
1183 }
1184 }
1185
1186 if args.memories {
1188 let memory_context = build_memory_context(&mem);
1189 if !memory_context.is_empty() {
1190 response.retrieval.context = format!(
1192 "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1193 memory_context, response.retrieval.context
1194 );
1195 }
1196 }
1197
1198 let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1200 if !entity_context.is_empty() {
1201 response.retrieval.context = format!(
1203 "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1204 entity_context, response.retrieval.context
1205 );
1206 }
1207
1208 if args.mask_pii {
1210 use memvid_core::pii::mask_pii;
1211
1212 response.retrieval.context = mask_pii(&response.retrieval.context);
1214
1215 for hit in &mut response.retrieval.hits {
1217 hit.text = mask_pii(&hit.text);
1218 if let Some(chunk_text) = &hit.chunk_text {
1219 hit.chunk_text = Some(mask_pii(chunk_text));
1220 }
1221 }
1222 }
1223
1224 let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1225
1226 let mut model_result: Option<ModelInference> = None;
1227 if args.no_llm {
1228 if args.use_model.is_some() {
1230 warn!("--use-model ignored because --no-llm disables LLM synthesis");
1231 }
1232 if args.json {
1233 emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1234 } else {
1235 emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1236 }
1237
1238 #[cfg(feature = "replay")]
1240 let _ = mem.save_active_session();
1241
1242 return Ok(());
1243 } else if response.context_only {
1244 if args.use_model.is_some() {
1245 warn!("--use-model ignored because --context-only disables synthesis");
1246 }
1247 } else if let Some(model_name) = args.use_model.as_deref() {
1248 match run_model_inference(
1249 model_name,
1250 &response.question,
1251 &response.retrieval.context,
1252 &response.retrieval.hits,
1253 llm_context_override,
1254 None,
1255 args.system_prompt.as_deref(),
1256 ) {
1257 Ok(inference) => {
1258 response.answer = Some(inference.answer.answer.clone());
1259 response.retrieval.context = inference.context_body.clone();
1260 apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1261 model_result = Some(inference);
1262 }
1263 Err(err) => {
1264 warn!(
1265 "model inference unavailable for '{}': {err}. Falling back to default summary.",
1266 model_name
1267 );
1268 }
1269 }
1270 }
1271
1272 #[cfg(feature = "replay")]
1274 if let Some(ref inference) = model_result {
1275 if let Some(model_name) = args.use_model.as_deref() {
1276 let retrieved_frames: Vec<u64> = response
1278 .retrieval
1279 .hits
1280 .iter()
1281 .map(|hit| hit.frame_id)
1282 .collect();
1283
1284 mem.record_ask_action(
1285 &response.question,
1286 model_name, model_name, inference.answer.answer.as_bytes(),
1289 0, retrieved_frames,
1291 );
1292 }
1293 }
1294
1295 if args.json {
1296 if let Some(model_name) = args.use_model.as_deref() {
1297 emit_model_json(
1298 &response,
1299 model_name,
1300 model_result.as_ref(),
1301 args.sources,
1302 &mut mem,
1303 )?;
1304 } else {
1305 emit_ask_json(
1306 &response,
1307 effective_mode.clone(),
1308 model_result.as_ref(),
1309 args.sources,
1310 &mut mem,
1311 )?;
1312 }
1313 } else {
1314 emit_ask_pretty(
1315 &response,
1316 effective_mode.clone(),
1317 model_result.as_ref(),
1318 args.sources,
1319 &mut mem,
1320 );
1321 }
1322
1323 #[cfg(feature = "replay")]
1325 let _ = mem.save_active_session();
1326
1327 Ok(())
1328}
1329
1330fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1332 use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1333 use memvid_core::types::QueryPlan;
1334
1335 let planner = QueryPlanner::new();
1336
1337 let plan = if args.graph {
1339 let plan = planner.plan(&args.query, args.top_k);
1341 match plan {
1343 QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1344 QueryPlan::graph_only(graph_filter, args.top_k)
1345 }
1346 _ => plan,
1347 }
1348 } else {
1349 planner.plan(&args.query, args.top_k)
1351 };
1352
1353 let hits = hybrid_search(mem, &plan)?;
1355
1356 if args.json {
1357 let output = serde_json::json!({
1359 "query": args.query,
1360 "mode": if args.graph { "graph" } else { "hybrid" },
1361 "plan": format!("{:?}", plan),
1362 "hits": hits.iter().map(|h| {
1363 serde_json::json!({
1364 "frame_id": h.frame_id,
1365 "score": h.score,
1366 "graph_score": h.graph_score,
1367 "vector_score": h.vector_score,
1368 "matched_entity": h.matched_entity,
1369 "preview": h.preview,
1370 })
1371 }).collect::<Vec<_>>(),
1372 });
1373 println!("{}", serde_json::to_string_pretty(&output)?);
1374 } else {
1375 let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1377 println!("{} search for: \"{}\"", mode_str, args.query);
1378 println!("Plan: {:?}", plan);
1379 println!();
1380
1381 if hits.is_empty() {
1382 println!("No results found.");
1383 } else {
1384 println!("Results ({} hits):", hits.len());
1385 for (i, hit) in hits.iter().enumerate() {
1386 println!();
1387 println!(
1388 "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1389 i + 1,
1390 hit.frame_id,
1391 hit.score,
1392 hit.graph_score,
1393 hit.vector_score
1394 );
1395 if let Some(entity) = &hit.matched_entity {
1396 println!(" Matched entity: {}", entity);
1397 }
1398 if let Some(preview) = &hit.preview {
1399 let truncated = if preview.len() > 200 {
1400 format!("{}...", &preview[..200])
1401 } else {
1402 preview.clone()
1403 };
1404 println!(" {}", truncated.replace('\n', " "));
1405 }
1406 }
1407 }
1408 }
1409
1410 Ok(())
1411}
1412
1413pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1414 crate::utils::require_active_plan(config, "find")?;
1416
1417 crate::api::track_query_usage(config, 1)?;
1419
1420 let mut mem = open_read_only_mem(&args.file)?;
1421
1422 #[cfg(feature = "replay")]
1424 let _ = mem.load_active_session();
1425
1426 if args.graph || args.hybrid {
1428 return handle_graph_find(&mut mem, &args);
1429 }
1430
1431 if args.uri.is_some() && args.scope.is_some() {
1432 warn!("--scope ignored because --uri is provided");
1433 }
1434
1435 let mv2_dimension = mem.effective_vec_index_dimension()?;
1437 let identity_summary = match args.mode {
1438 SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1439 #[cfg(feature = "clip")]
1440 SearchMode::Clip => None,
1441 SearchMode::Lex => None,
1442 };
1443
1444 let mut semantic_allowed = true;
1445 let inferred_model_override = match identity_summary.as_ref() {
1446 Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1447 identity.model.as_deref().map(|value| value.to_string())
1448 }
1449 Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1450 let models: Vec<_> = identities
1451 .iter()
1452 .filter_map(|entry| entry.identity.model.as_deref())
1453 .collect();
1454 if args.mode == SearchMode::Sem {
1455 anyhow::bail!(
1456 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1457 Detected models: {:?}\n\n\
1458 Suggested fix: split into separate memories per embedding model.",
1459 models
1460 );
1461 }
1462 warn!("semantic search disabled: mixed embedding models detected: {:?}", models);
1463 semantic_allowed = false;
1464 None
1465 }
1466 _ => None,
1467 };
1468
1469 let emb_model_override = args
1470 .query_embedding_model
1471 .as_deref()
1472 .or(inferred_model_override.as_deref());
1473
1474 let (mode_label, runtime_option) = match args.mode {
1475 SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1476 SearchMode::Sem => {
1477 let runtime =
1478 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1479 ("Semantic (vector search)".to_string(), Some(runtime))
1480 }
1481 SearchMode::Auto => {
1482 if !semantic_allowed {
1483 ("Lexical (semantic unsafe)".to_string(), None)
1484 } else if let Some(runtime) =
1485 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1486 {
1487 ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1488 } else {
1489 ("Lexical (semantic unavailable)".to_string(), None)
1490 }
1491 }
1492 #[cfg(feature = "clip")]
1493 SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1494 };
1495
1496 let mode_key = match args.mode {
1497 SearchMode::Sem => "semantic",
1498 SearchMode::Lex => "text",
1499 SearchMode::Auto => {
1500 if runtime_option.is_some() {
1501 "hybrid"
1502 } else {
1503 "text"
1504 }
1505 }
1506 #[cfg(feature = "clip")]
1507 SearchMode::Clip => "clip",
1508 };
1509
1510 #[cfg(feature = "clip")]
1512 if args.mode == SearchMode::Clip {
1513 use memvid_core::clip::{ClipConfig, ClipModel};
1514
1515 let config = ClipConfig::default();
1517 let clip = ClipModel::new(config).map_err(|e| {
1518 anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1519 })?;
1520
1521 let query_embedding = clip
1523 .encode_text(&args.query)
1524 .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1525
1526 let hits = mem.search_clip(&query_embedding, args.top_k)?;
1528
1529 for hit in &hits {
1531 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1532 tracing::debug!(
1533 frame_id = hit.frame_id,
1534 title = %frame.title.unwrap_or_default(),
1535 page = hit.page,
1536 distance = hit.distance,
1537 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1538 "CLIP raw hit"
1539 );
1540 } else {
1541 tracing::debug!(
1542 frame_id = hit.frame_id,
1543 page = hit.page,
1544 distance = hit.distance,
1545 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1546 "CLIP raw hit (missing frame)"
1547 );
1548 }
1549 }
1550
1551 const CLIP_MAX_DISTANCE: f32 = 1.26;
1564
1565 let search_hits: Vec<SearchHit> = hits
1567 .into_iter()
1568 .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1569 .enumerate()
1570 .filter_map(|(rank, hit)| {
1571 let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1574
1575 let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1577 let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1578 let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1579 let title = match (base_title, hit.page) {
1580 (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1581 (Some(t), None) => Some(t),
1582 (None, Some(p)) => Some(format!("Page {p}")),
1583 _ => None,
1584 };
1585 Some(SearchHit {
1586 rank: rank + 1,
1587 frame_id: hit.frame_id,
1588 uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1589 title,
1590 text: preview.clone(),
1591 chunk_text: Some(preview),
1592 range: (0, 0),
1593 chunk_range: None,
1594 matches: 0,
1595 score: Some(cosine_similarity),
1596 metadata: None,
1597 })
1598 })
1599 .collect();
1600
1601 let response = SearchResponse {
1602 query: args.query.clone(),
1603 hits: search_hits.clone(),
1604 total_hits: search_hits.len(),
1605 params: memvid_core::SearchParams {
1606 top_k: args.top_k,
1607 snippet_chars: args.snippet_chars,
1608 cursor: args.cursor.clone(),
1609 },
1610 elapsed_ms: 0,
1611 engine: SearchEngineKind::Hybrid, next_cursor: None,
1613 context: String::new(),
1614 };
1615
1616 if args.json_legacy {
1617 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1618 emit_legacy_search_json(&response)?;
1619 } else if args.json {
1620 emit_search_json(&response, mode_key)?;
1621 } else {
1622 println!(
1623 "mode: {} k={} time: {} ms",
1624 mode_label, response.params.top_k, response.elapsed_ms
1625 );
1626 println!("engine: clip (MobileCLIP-S2)");
1627 println!(
1628 "hits: {} (showing {})",
1629 response.total_hits,
1630 response.hits.len()
1631 );
1632 emit_search_table(&response);
1633 }
1634 return Ok(());
1635 }
1636
1637 let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1639 let runtime = runtime_option
1640 .as_ref()
1641 .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1642
1643 let query_embedding = runtime.embed_query(&args.query)?;
1645
1646 let scope = args.scope.as_deref().or(args.uri.as_deref());
1648
1649 if !args.no_adaptive {
1650 let strategy = match args.adaptive_strategy {
1652 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1653 min_ratio: args.min_relevancy,
1654 },
1655 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1656 min_score: args.min_relevancy,
1657 },
1658 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1659 max_drop_ratio: 0.35, },
1661 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1662 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1663 relative_threshold: args.min_relevancy,
1664 max_drop_ratio: 0.35,
1665 absolute_min: 0.3,
1666 },
1667 };
1668
1669 let config = AdaptiveConfig {
1670 enabled: true,
1671 max_results: args.max_k,
1672 min_results: 1,
1673 strategy,
1674 normalize_scores: true,
1675 };
1676
1677 match mem.search_adaptive(
1678 &args.query,
1679 &query_embedding,
1680 config,
1681 args.snippet_chars,
1682 scope,
1683 ) {
1684 Ok(result) => {
1685 let mut resp = SearchResponse {
1686 query: args.query.clone(),
1687 hits: result.results,
1688 total_hits: result.stats.returned,
1689 params: memvid_core::SearchParams {
1690 top_k: result.stats.returned,
1691 snippet_chars: args.snippet_chars,
1692 cursor: args.cursor.clone(),
1693 },
1694 elapsed_ms: 0,
1695 engine: SearchEngineKind::Hybrid,
1696 next_cursor: None,
1697 context: String::new(),
1698 };
1699 apply_preference_rerank(&mut resp);
1700 (
1701 resp,
1702 "semantic (adaptive vector search)".to_string(),
1703 Some(result.stats),
1704 )
1705 }
1706 Err(e) => {
1707 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1708 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1709 }
1710
1711 warn!("Adaptive search failed ({e}), falling back to fixed-k");
1712 match mem.vec_search_with_embedding(
1713 &args.query,
1714 &query_embedding,
1715 args.top_k,
1716 args.snippet_chars,
1717 scope,
1718 ) {
1719 Ok(mut resp) => {
1720 apply_preference_rerank(&mut resp);
1721 (resp, "semantic (vector search fallback)".to_string(), None)
1722 }
1723 Err(e2) => {
1724 if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1725 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1726 }
1727 return Err(anyhow!("Both adaptive and fixed-k search failed: {e}, {e2}"));
1728 }
1729 }
1730 }
1731 }
1732 } else {
1733 match mem.vec_search_with_embedding(
1735 &args.query,
1736 &query_embedding,
1737 args.top_k,
1738 args.snippet_chars,
1739 scope,
1740 ) {
1741 Ok(mut resp) => {
1742 apply_preference_rerank(&mut resp);
1744 (resp, "semantic (vector search)".to_string(), None)
1745 }
1746 Err(e) => {
1747 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1748 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1749 }
1750
1751 warn!("Vector search failed ({e}), falling back to lexical + rerank");
1753 let request = SearchRequest {
1754 query: args.query.clone(),
1755 top_k: args.top_k,
1756 snippet_chars: args.snippet_chars,
1757 uri: args.uri.clone(),
1758 scope: args.scope.clone(),
1759 cursor: args.cursor.clone(),
1760 #[cfg(feature = "temporal_track")]
1761 temporal: None,
1762 as_of_frame: args.as_of_frame,
1763 as_of_ts: args.as_of_ts,
1764 no_sketch: args.no_sketch,
1765 };
1766 let mut resp = mem.search(request)?;
1767 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1768 (resp, "semantic (fallback rerank)".to_string(), None)
1769 }
1770 }
1771 }
1772 } else {
1773 let request = SearchRequest {
1775 query: args.query.clone(),
1776 top_k: args.top_k,
1777 snippet_chars: args.snippet_chars,
1778 uri: args.uri.clone(),
1779 scope: args.scope.clone(),
1780 cursor: args.cursor.clone(),
1781 #[cfg(feature = "temporal_track")]
1782 temporal: None,
1783 as_of_frame: args.as_of_frame,
1784 as_of_ts: args.as_of_ts,
1785 no_sketch: args.no_sketch,
1786 };
1787
1788 let mut resp = mem.search(request)?;
1789
1790 if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1791 warn!("Search index unavailable; returning basic text results");
1792 }
1793
1794 let mut engine_label = match resp.engine {
1795 SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1796 SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1797 SearchEngineKind::Hybrid => "hybrid".to_string(),
1798 };
1799
1800 if runtime_option.is_some() {
1801 engine_label = format!("hybrid ({engine_label} + semantic)");
1802 }
1803
1804 if let Some(ref runtime) = runtime_option {
1805 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1806 }
1807
1808 (resp, engine_label, None)
1809 };
1810
1811 if args.json_legacy {
1812 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1813 emit_legacy_search_json(&response)?;
1814 } else if args.json {
1815 emit_search_json(&response, mode_key)?;
1816 } else {
1817 println!(
1818 "mode: {} k={} time: {} ms",
1819 mode_label, response.params.top_k, response.elapsed_ms
1820 );
1821 println!("engine: {}", engine_label);
1822
1823 if let Some(ref stats) = adaptive_stats {
1825 println!(
1826 "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1827 stats.total_considered,
1828 stats.returned,
1829 stats.triggered_by,
1830 stats.top_score.unwrap_or(0.0),
1831 stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1832 );
1833 }
1834
1835 println!(
1836 "hits: {} (showing {})",
1837 response.total_hits,
1838 response.hits.len()
1839 );
1840 emit_search_table(&response);
1841 }
1842
1843 #[cfg(feature = "replay")]
1845 let _ = mem.save_active_session();
1846
1847 Ok(())
1848}
1849
1850pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1851 crate::api::track_query_usage(config, 1)?;
1853
1854 let mut mem = open_read_only_mem(&args.file)?;
1855 let vector = if let Some(path) = args.embedding.as_deref() {
1856 read_embedding(path)?
1857 } else if let Some(vector_string) = &args.vector {
1858 parse_vector(vector_string)?
1859 } else {
1860 anyhow::bail!("provide --vector or --embedding for search input");
1861 };
1862
1863 let hits = mem.search_vec(&vector, args.limit).map_err(|err| match err {
1864 MemvidError::VecDimensionMismatch { expected, actual } => {
1865 anyhow!(vec_dimension_mismatch_help(expected, actual))
1866 }
1867 other => anyhow!(other),
1868 })?;
1869 let mut enriched = Vec::with_capacity(hits.len());
1870 for hit in hits {
1871 let preview = mem.frame_preview_by_id(hit.frame_id)?;
1872 enriched.push((hit.frame_id, hit.distance, preview));
1873 }
1874
1875 if args.json {
1876 let json_hits: Vec<_> = enriched
1877 .iter()
1878 .map(|(frame_id, distance, preview)| {
1879 json!({
1880 "frame_id": frame_id,
1881 "distance": distance,
1882 "preview": preview,
1883 })
1884 })
1885 .collect();
1886 let json_str = serde_json::to_string_pretty(&json_hits)?;
1887 println!("{}", json_str.to_colored_json_auto()?);
1888 } else if enriched.is_empty() {
1889 println!("No vector matches found");
1890 } else {
1891 for (frame_id, distance, preview) in enriched {
1892 println!("frame {frame_id} (distance {distance:.6}): {preview}");
1893 }
1894 }
1895 Ok(())
1896}
1897
1898pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1899 use memvid_core::AuditOptions;
1900 use std::fs::File;
1901 use std::io::Write;
1902
1903 let mut mem = Memvid::open(&args.file)?;
1904
1905 let start = parse_date_boundary(args.start.as_ref(), false)?;
1907 let end = parse_date_boundary(args.end.as_ref(), true)?;
1908 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1909 if end_ts < start_ts {
1910 anyhow::bail!("--end must not be earlier than --start");
1911 }
1912 }
1913
1914 let ask_mode: AskMode = args.mode.into();
1916 let runtime = match args.mode {
1917 AskModeArg::Lex => None,
1918 AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1919 AskModeArg::Hybrid => try_load_embedding_runtime(config),
1920 };
1921 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1922
1923 let options = AuditOptions {
1925 top_k: Some(args.top_k),
1926 snippet_chars: Some(args.snippet_chars),
1927 mode: Some(ask_mode),
1928 scope: args.scope,
1929 start,
1930 end,
1931 include_snippets: true,
1932 };
1933
1934 let mut report = mem.audit(&args.question, Some(options), embedder)?;
1936
1937 if let Some(model_name) = args.use_model.as_deref() {
1939 let context = report
1941 .sources
1942 .iter()
1943 .filter_map(|s| s.snippet.clone())
1944 .collect::<Vec<_>>()
1945 .join("\n\n");
1946
1947 match run_model_inference(
1948 model_name,
1949 &report.question,
1950 &context,
1951 &[], None,
1953 None,
1954 None, ) {
1956 Ok(inference) => {
1957 report.answer = Some(inference.answer.answer);
1958 report.notes.push(format!(
1959 "Answer synthesized by model: {}",
1960 inference.answer.model
1961 ));
1962 }
1963 Err(err) => {
1964 warn!(
1965 "model inference unavailable for '{}': {err}. Using default answer.",
1966 model_name
1967 );
1968 }
1969 }
1970 }
1971
1972 let output = match args.format {
1974 AuditFormat::Text => report.to_text(),
1975 AuditFormat::Markdown => report.to_markdown(),
1976 AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1977 };
1978
1979 if let Some(out_path) = args.out {
1981 let mut file = File::create(&out_path)?;
1982 file.write_all(output.as_bytes())?;
1983 println!("Audit report written to: {}", out_path.display());
1984 } else {
1985 println!("{}", output);
1986 }
1987
1988 Ok(())
1989}
1990
1991fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1992 let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1993
1994 let mut additional_params = serde_json::Map::new();
1995 if let Some(cursor) = &response.params.cursor {
1996 additional_params.insert("cursor".into(), json!(cursor));
1997 }
1998
1999 let mut params = serde_json::Map::new();
2000 params.insert("top_k".into(), json!(response.params.top_k));
2001 params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2002 params.insert("mode".into(), json!(mode));
2003 params.insert(
2004 "additional_params".into(),
2005 serde_json::Value::Object(additional_params),
2006 );
2007
2008 let mut metadata_json = serde_json::Map::new();
2009 metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2010 metadata_json.insert("total_hits".into(), json!(response.total_hits));
2011 metadata_json.insert(
2012 "next_cursor".into(),
2013 match &response.next_cursor {
2014 Some(cursor) => json!(cursor),
2015 None => serde_json::Value::Null,
2016 },
2017 );
2018 metadata_json.insert("engine".into(), json!(response.engine));
2019 metadata_json.insert("params".into(), serde_json::Value::Object(params));
2020
2021 let body = json!({
2022 "version": "mv2.result.v2",
2023 "query": response.query,
2024 "metadata": metadata_json,
2025 "hits": hits,
2026 "context": response.context,
2027 });
2028 let json_str = serde_json::to_string_pretty(&body)?;
2029 println!("{}", json_str.to_colored_json_auto()?);
2030 Ok(())
2031}
2032
2033fn emit_ask_json(
2034 response: &AskResponse,
2035 requested_mode: AskModeArg,
2036 inference: Option<&ModelInference>,
2037 include_sources: bool,
2038 mem: &mut Memvid,
2039) -> Result<()> {
2040 let hits: Vec<_> = response
2041 .retrieval
2042 .hits
2043 .iter()
2044 .map(search_hit_to_json)
2045 .collect();
2046
2047 let citations: Vec<_> = response
2048 .citations
2049 .iter()
2050 .map(|citation| {
2051 let mut map = serde_json::Map::new();
2052 map.insert("index".into(), json!(citation.index));
2053 map.insert("frame_id".into(), json!(citation.frame_id));
2054 map.insert("uri".into(), json!(citation.uri));
2055 if let Some(range) = citation.chunk_range {
2056 map.insert("chunk_range".into(), json!([range.0, range.1]));
2057 }
2058 if let Some(score) = citation.score {
2059 map.insert("score".into(), json!(score));
2060 }
2061 serde_json::Value::Object(map)
2062 })
2063 .collect();
2064
2065 let mut body = json!({
2066 "version": "mv2.ask.v1",
2067 "question": response.question,
2068 "answer": response.answer,
2069 "context_only": response.context_only,
2070 "mode": ask_mode_display(requested_mode),
2071 "retriever": ask_retriever_display(response.retriever),
2072 "top_k": response.retrieval.params.top_k,
2073 "results": hits,
2074 "citations": citations,
2075 "stats": {
2076 "retrieval_ms": response.stats.retrieval_ms,
2077 "synthesis_ms": response.stats.synthesis_ms,
2078 "latency_ms": response.stats.latency_ms,
2079 },
2080 "engine": search_engine_label(&response.retrieval.engine),
2081 "total_hits": response.retrieval.total_hits,
2082 "next_cursor": response.retrieval.next_cursor,
2083 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2084 });
2085
2086 if let Some(inf) = inference {
2087 let model = &inf.answer;
2088 if let serde_json::Value::Object(ref mut map) = body {
2089 map.insert("model".into(), json!(model.requested));
2090 if model.model != model.requested {
2091 map.insert("model_used".into(), json!(model.model));
2092 }
2093 map.insert("cached".into(), json!(inf.cached));
2094 if let Some(usage) = &inf.usage {
2096 map.insert("usage".into(), json!({
2097 "input_tokens": usage.input_tokens,
2098 "output_tokens": usage.output_tokens,
2099 "total_tokens": usage.total_tokens,
2100 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2101 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2102 }));
2103 }
2104 if let Some(grounding) = &inf.grounding {
2106 map.insert("grounding".into(), json!({
2107 "score": grounding.score,
2108 "label": grounding.label(),
2109 "sentence_count": grounding.sentence_count,
2110 "grounded_sentences": grounding.grounded_sentences,
2111 "has_warning": grounding.has_warning,
2112 "warning_reason": grounding.warning_reason,
2113 }));
2114 }
2115 }
2116 }
2117
2118 if include_sources {
2120 if let serde_json::Value::Object(ref mut map) = body {
2121 let sources = build_sources_json(response, mem);
2122 map.insert("sources".into(), json!(sources));
2123 }
2124 }
2125
2126 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2128 if let serde_json::Value::Object(ref mut map) = body {
2129 map.insert("follow_up".into(), follow_up);
2130 }
2131 }
2132
2133 println!("{}", serde_json::to_string_pretty(&body)?);
2134 Ok(())
2135}
2136
2137fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2138 response
2139 .citations
2140 .iter()
2141 .enumerate()
2142 .map(|(idx, citation)| {
2143 let mut source = serde_json::Map::new();
2144 source.insert("index".into(), json!(idx + 1));
2145 source.insert("frame_id".into(), json!(citation.frame_id));
2146 source.insert("uri".into(), json!(citation.uri));
2147
2148 if let Some(range) = citation.chunk_range {
2149 source.insert("chunk_range".into(), json!([range.0, range.1]));
2150 }
2151 if let Some(score) = citation.score {
2152 source.insert("score".into(), json!(score));
2153 }
2154
2155 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2157 if let Some(title) = frame.title {
2158 source.insert("title".into(), json!(title));
2159 }
2160 if !frame.tags.is_empty() {
2161 source.insert("tags".into(), json!(frame.tags));
2162 }
2163 if !frame.labels.is_empty() {
2164 source.insert("labels".into(), json!(frame.labels));
2165 }
2166 source.insert("frame_timestamp".into(), json!(frame.timestamp));
2167 if !frame.content_dates.is_empty() {
2168 source.insert("content_dates".into(), json!(frame.content_dates));
2169 }
2170 }
2171
2172 if let Some(hit) = response
2174 .retrieval
2175 .hits
2176 .iter()
2177 .find(|h| h.frame_id == citation.frame_id)
2178 {
2179 let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2180 source.insert("snippet".into(), json!(snippet));
2181 }
2182
2183 serde_json::Value::Object(source)
2184 })
2185 .collect()
2186}
2187
2188fn build_follow_up_suggestions(
2191 response: &AskResponse,
2192 inference: Option<&ModelInference>,
2193 mem: &mut Memvid,
2194) -> Option<serde_json::Value> {
2195 let needs_followup = inference
2197 .and_then(|inf| inf.grounding.as_ref())
2198 .map(|g| g.score < 0.3 || g.has_warning)
2199 .unwrap_or(false);
2200
2201 let low_retrieval = response.retrieval.hits.first()
2203 .and_then(|h| h.score)
2204 .map(|score| score < -2.0)
2205 .unwrap_or(true);
2206
2207 if !needs_followup && !low_retrieval {
2208 return None;
2209 }
2210
2211 let limit = std::num::NonZeroU64::new(20).unwrap();
2213 let timeline_query = TimelineQueryBuilder::default()
2214 .limit(limit)
2215 .build();
2216
2217 let available_topics: Vec<String> = mem
2218 .timeline(timeline_query)
2219 .ok()
2220 .map(|entries| {
2221 entries
2222 .iter()
2223 .filter_map(|e| {
2224 let preview = e.preview.trim();
2226 if preview.is_empty() || preview.len() < 5 {
2227 return None;
2228 }
2229 let first_line = preview.lines().next().unwrap_or(preview);
2231 if first_line.len() > 60 {
2232 Some(format!("{}...", &first_line[..57]))
2233 } else {
2234 Some(first_line.to_string())
2235 }
2236 })
2237 .collect::<std::collections::HashSet<_>>()
2238 .into_iter()
2239 .take(5)
2240 .collect()
2241 })
2242 .unwrap_or_default();
2243
2244 let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2246 "No relevant information found in memory"
2247 } else if inference.and_then(|i| i.grounding.as_ref()).map(|g| g.has_warning).unwrap_or(false) {
2248 "Answer may not be well-supported by the available context"
2249 } else {
2250 "Low confidence in the answer"
2251 };
2252
2253 let suggestions: Vec<String> = if available_topics.is_empty() {
2255 vec![
2256 "What information is stored in this memory?".to_string(),
2257 "Can you list the main topics covered?".to_string(),
2258 ]
2259 } else {
2260 available_topics
2261 .iter()
2262 .take(3)
2263 .map(|topic| format!("Tell me about {}", topic))
2264 .chain(std::iter::once("What topics are in this memory?".to_string()))
2265 .collect()
2266 };
2267
2268 Some(json!({
2269 "needed": true,
2270 "reason": reason,
2271 "hint": if available_topics.is_empty() {
2272 "This memory may not contain information about your query."
2273 } else {
2274 "This memory contains information about different topics. Try asking about those instead."
2275 },
2276 "available_topics": available_topics,
2277 "suggestions": suggestions
2278 }))
2279}
2280
2281fn emit_model_json(
2282 response: &AskResponse,
2283 requested_model: &str,
2284 inference: Option<&ModelInference>,
2285 include_sources: bool,
2286 mem: &mut Memvid,
2287) -> Result<()> {
2288 let answer = response.answer.clone().unwrap_or_default();
2289 let requested_label = inference
2290 .map(|m| m.answer.requested.clone())
2291 .unwrap_or_else(|| requested_model.to_string());
2292 let used_label = inference
2293 .map(|m| m.answer.model.clone())
2294 .unwrap_or_else(|| requested_model.to_string());
2295
2296 let mut body = json!({
2297 "question": response.question,
2298 "model": requested_label,
2299 "model_used": used_label,
2300 "answer": answer,
2301 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2302 });
2303
2304 if let Some(inf) = inference {
2306 if let serde_json::Value::Object(ref mut map) = body {
2307 map.insert("cached".into(), json!(inf.cached));
2308 if let Some(usage) = &inf.usage {
2309 map.insert("usage".into(), json!({
2310 "input_tokens": usage.input_tokens,
2311 "output_tokens": usage.output_tokens,
2312 "total_tokens": usage.total_tokens,
2313 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2314 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2315 }));
2316 }
2317 if let Some(grounding) = &inf.grounding {
2318 map.insert("grounding".into(), json!({
2319 "score": grounding.score,
2320 "label": grounding.label(),
2321 "sentence_count": grounding.sentence_count,
2322 "grounded_sentences": grounding.grounded_sentences,
2323 "has_warning": grounding.has_warning,
2324 "warning_reason": grounding.warning_reason,
2325 }));
2326 }
2327 }
2328 }
2329
2330 if include_sources {
2332 if let serde_json::Value::Object(ref mut map) = body {
2333 let sources = build_sources_json(response, mem);
2334 map.insert("sources".into(), json!(sources));
2335 }
2336 }
2337
2338 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2340 if let serde_json::Value::Object(ref mut map) = body {
2341 map.insert("follow_up".into(), follow_up);
2342 }
2343 }
2344
2345 let json_str = serde_json::to_string_pretty(&body)?;
2347 println!("{}", json_str.to_colored_json_auto()?);
2348 Ok(())
2349}
2350
2351fn emit_ask_pretty(
2352 response: &AskResponse,
2353 requested_mode: AskModeArg,
2354 inference: Option<&ModelInference>,
2355 include_sources: bool,
2356 mem: &mut Memvid,
2357) {
2358 println!(
2359 "mode: {} retriever: {} k={} latency: {} ms (retrieval {} ms)",
2360 ask_mode_pretty(requested_mode),
2361 ask_retriever_pretty(response.retriever),
2362 response.retrieval.params.top_k,
2363 response.stats.latency_ms,
2364 response.stats.retrieval_ms
2365 );
2366 if let Some(inference) = inference {
2367 let model = &inference.answer;
2368 let cached_label = if inference.cached { " [CACHED]" } else { "" };
2369 if model.requested.trim() == model.model {
2370 println!("model: {}{}", model.model, cached_label);
2371 } else {
2372 println!(
2373 "model requested: {} model used: {}{}",
2374 model.requested, model.model, cached_label
2375 );
2376 }
2377 if let Some(usage) = &inference.usage {
2379 let cost_label = if inference.cached {
2380 format!("$0.00 (saved ${:.6})", usage.cost_usd)
2381 } else {
2382 format!("${:.6}", usage.cost_usd)
2383 };
2384 println!(
2385 "tokens: {} input + {} output = {} cost: {}",
2386 usage.input_tokens,
2387 usage.output_tokens,
2388 usage.total_tokens,
2389 cost_label
2390 );
2391 }
2392 if let Some(grounding) = &inference.grounding {
2394 let warning = if grounding.has_warning {
2395 format!(" [WARNING: {}]", grounding.warning_reason.as_deref().unwrap_or("potential hallucination"))
2396 } else {
2397 String::new()
2398 };
2399 println!(
2400 "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2401 grounding.score * 100.0,
2402 grounding.label(),
2403 grounding.grounded_sentences,
2404 grounding.sentence_count,
2405 warning
2406 );
2407 }
2408 }
2409 println!(
2410 "engine: {}",
2411 search_engine_label(&response.retrieval.engine)
2412 );
2413 println!(
2414 "hits: {} (showing {})",
2415 response.retrieval.total_hits,
2416 response.retrieval.hits.len()
2417 );
2418
2419 if response.context_only {
2420 println!();
2421 println!("Context-only mode: synthesis disabled.");
2422 println!();
2423 } else if let Some(answer) = &response.answer {
2424 println!();
2425 println!("Answer:\n{answer}");
2426 println!();
2427 }
2428
2429 if !response.citations.is_empty() {
2430 println!("Citations:");
2431 for citation in &response.citations {
2432 match citation.score {
2433 Some(score) => println!(
2434 "[{}] {} (frame {}, score {:.3})",
2435 citation.index, citation.uri, citation.frame_id, score
2436 ),
2437 None => println!(
2438 "[{}] {} (frame {})",
2439 citation.index, citation.uri, citation.frame_id
2440 ),
2441 }
2442 }
2443 println!();
2444 }
2445
2446 if include_sources && !response.citations.is_empty() {
2448 println!("=== SOURCES ===");
2449 println!();
2450 for citation in &response.citations {
2451 println!("[{}] {}", citation.index, citation.uri);
2452
2453 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2455 if let Some(title) = &frame.title {
2456 println!(" Title: {}", title);
2457 }
2458 println!(" Frame ID: {}", citation.frame_id);
2459 if let Some(score) = citation.score {
2460 println!(" Score: {:.4}", score);
2461 }
2462 if let Some((start, end)) = citation.chunk_range {
2463 println!(" Range: [{}..{})", start, end);
2464 }
2465 if !frame.tags.is_empty() {
2466 println!(" Tags: {}", frame.tags.join(", "));
2467 }
2468 if !frame.labels.is_empty() {
2469 println!(" Labels: {}", frame.labels.join(", "));
2470 }
2471 println!(" Timestamp: {}", frame.timestamp);
2472 if !frame.content_dates.is_empty() {
2473 println!(" Content Dates: {}", frame.content_dates.join(", "));
2474 }
2475 }
2476
2477 if let Some(hit) = response
2479 .retrieval
2480 .hits
2481 .iter()
2482 .find(|h| h.frame_id == citation.frame_id)
2483 {
2484 let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2485 let truncated = if snippet.len() > 200 {
2486 format!("{}...", &snippet[..200])
2487 } else {
2488 snippet.clone()
2489 };
2490 println!(" Snippet: {}", truncated.replace('\n', " "));
2491 }
2492 println!();
2493 }
2494 }
2495
2496 if !include_sources {
2497 println!();
2498 emit_search_table(&response.retrieval);
2499 }
2500
2501 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2503 if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2504 if needed {
2505 println!();
2506 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2507 println!("💡 FOLLOW-UP SUGGESTIONS");
2508 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2509
2510 if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2511 println!("Reason: {}", reason);
2512 }
2513
2514 if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2515 println!("Hint: {}", hint);
2516 }
2517
2518 if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2519 if !topics.is_empty() {
2520 println!();
2521 println!("Available topics in this memory:");
2522 for topic in topics.iter().filter_map(|t| t.as_str()) {
2523 println!(" • {}", topic);
2524 }
2525 }
2526 }
2527
2528 if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2529 if !suggestions.is_empty() {
2530 println!();
2531 println!("Try asking:");
2532 for (i, suggestion) in suggestions.iter().filter_map(|s| s.as_str()).enumerate() {
2533 println!(" {}. \"{}\"", i + 1, suggestion);
2534 }
2535 }
2536 }
2537 println!();
2538 }
2539 }
2540 }
2541}
2542
2543fn emit_verbatim_evidence_json(
2546 response: &AskResponse,
2547 include_sources: bool,
2548 mem: &mut Memvid,
2549) -> Result<()> {
2550 let evidence: Vec<_> = response
2552 .retrieval
2553 .hits
2554 .iter()
2555 .enumerate()
2556 .map(|(idx, hit)| {
2557 let mut entry = serde_json::Map::new();
2558 entry.insert("index".into(), json!(idx + 1));
2559 entry.insert("frame_id".into(), json!(hit.frame_id));
2560 entry.insert("uri".into(), json!(&hit.uri));
2561 if let Some(title) = &hit.title {
2562 entry.insert("title".into(), json!(title));
2563 }
2564 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2566 entry.insert("text".into(), json!(verbatim));
2567 if let Some(score) = hit.score {
2568 entry.insert("score".into(), json!(score));
2569 }
2570 serde_json::Value::Object(entry)
2571 })
2572 .collect();
2573
2574 let sources: Option<Vec<_>> = if include_sources {
2576 Some(
2577 response
2578 .retrieval
2579 .hits
2580 .iter()
2581 .filter_map(|hit| {
2582 mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2583 let mut source = serde_json::Map::new();
2584 source.insert("frame_id".into(), json!(frame.id));
2585 source.insert("uri".into(), json!(frame.uri.as_deref().unwrap_or("(unknown)")));
2586 if let Some(title) = &frame.title {
2587 source.insert("title".into(), json!(title));
2588 }
2589 source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2590 if !frame.tags.is_empty() {
2591 source.insert("tags".into(), json!(frame.tags));
2592 }
2593 if !frame.labels.is_empty() {
2594 source.insert("labels".into(), json!(frame.labels));
2595 }
2596 serde_json::Value::Object(source)
2597 })
2598 })
2599 .collect(),
2600 )
2601 } else {
2602 None
2603 };
2604
2605 let mut body = json!({
2606 "version": "mv2.evidence.v1",
2607 "mode": "verbatim",
2608 "question": response.question,
2609 "evidence": evidence,
2610 "evidence_count": evidence.len(),
2611 "total_hits": response.retrieval.total_hits,
2612 "stats": {
2613 "retrieval_ms": response.stats.retrieval_ms,
2614 "latency_ms": response.stats.latency_ms,
2615 },
2616 "engine": search_engine_label(&response.retrieval.engine),
2617 });
2618
2619 if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2620 map.insert("sources".into(), json!(sources));
2621 }
2622
2623 let json_str = serde_json::to_string_pretty(&body)?;
2624 println!("{}", json_str.to_colored_json_auto()?);
2625 Ok(())
2626}
2627
2628fn emit_verbatim_evidence_pretty(
2630 response: &AskResponse,
2631 include_sources: bool,
2632 mem: &mut Memvid,
2633) {
2634 println!(
2635 "mode: {} latency: {} ms (retrieval {} ms)",
2636 "verbatim evidence".cyan(),
2637 response.stats.latency_ms,
2638 response.stats.retrieval_ms
2639 );
2640 println!(
2641 "engine: {}",
2642 search_engine_label(&response.retrieval.engine)
2643 );
2644 println!(
2645 "hits: {} (showing {})",
2646 response.retrieval.total_hits,
2647 response.retrieval.hits.len()
2648 );
2649 println!();
2650
2651 println!("{}", "━".repeat(60));
2653 println!(
2654 "{}",
2655 format!(
2656 "VERBATIM EVIDENCE for: \"{}\"",
2657 truncate_with_ellipsis(&response.question, 40)
2658 )
2659 .bold()
2660 );
2661 println!("{}", "━".repeat(60));
2662 println!();
2663
2664 if response.retrieval.hits.is_empty() {
2665 println!("No evidence found.");
2666 return;
2667 }
2668
2669 let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2671 let (min_score, max_score) = score_range(&scores);
2672
2673 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2675 let uri = &hit.uri;
2676 let title = hit.title.as_deref().unwrap_or("Untitled");
2677 let score_str = hit
2678 .score
2679 .map(|s| {
2680 let normalized = normalize_bm25_for_display(s, min_score, max_score);
2681 format!(" (relevance: {:.0}%)", normalized)
2682 })
2683 .unwrap_or_default();
2684
2685 println!(
2686 "{}",
2687 format!("[{}] {}{}", idx + 1, title, score_str).green().bold()
2688 );
2689 println!(" Source: {} (frame {})", uri, hit.frame_id);
2690 println!();
2691
2692 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2694 for line in verbatim.lines() {
2696 if !line.trim().is_empty() {
2697 println!(" │ {}", line);
2698 }
2699 }
2700 println!();
2701 }
2702
2703 if include_sources {
2705 println!("{}", "━".repeat(60));
2706 println!("{}", "SOURCE DETAILS".bold());
2707 println!("{}", "━".repeat(60));
2708 println!();
2709
2710 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2711 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2712 println!("{}", format!("[{}] {}", idx + 1, frame.uri.as_deref().unwrap_or("(unknown)")).cyan());
2713 if let Some(title) = &frame.title {
2714 println!(" Title: {}", title);
2715 }
2716 println!(" Frame ID: {}", frame.id);
2717 println!(" Timestamp: {}", frame.timestamp);
2718 if !frame.tags.is_empty() {
2719 println!(" Tags: {}", frame.tags.join(", "));
2720 }
2721 if !frame.labels.is_empty() {
2722 println!(" Labels: {}", frame.labels.join(", "));
2723 }
2724 if !frame.content_dates.is_empty() {
2725 println!(" Content Dates: {}", frame.content_dates.join(", "));
2726 }
2727 println!();
2728 }
2729 }
2730 }
2731
2732 println!("{}", "─".repeat(60));
2734 println!(
2735 "{}",
2736 "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2737 );
2738 println!(
2739 "{}",
2740 "Use --use-model to get an AI-synthesized answer.".dimmed()
2741 );
2742}
2743
2744fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2745 let hits: Vec<_> = response
2746 .hits
2747 .iter()
2748 .map(|hit| {
2749 json!({
2750 "frame_id": hit.frame_id,
2751 "matches": hit.matches,
2752 "snippets": [hit.text.clone()],
2753 })
2754 })
2755 .collect();
2756 println!("{}", serde_json::to_string_pretty(&hits)?);
2757 Ok(())
2758}
2759
2760fn emit_search_table(response: &SearchResponse) {
2761 if response.hits.is_empty() {
2762 println!("No results for '{}'.", response.query);
2763 return;
2764 }
2765
2766 let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2768 let (min_score, max_score) = score_range(&scores);
2769
2770 for hit in &response.hits {
2771 println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2772 if let Some(title) = &hit.title {
2773 println!(" Title: {title}");
2774 }
2775 if let Some(score) = hit.score {
2776 let normalized = normalize_bm25_for_display(score, min_score, max_score);
2777 println!(" Relevance: {:.0}%", normalized);
2778 }
2779 println!(" Range: [{}..{})", hit.range.0, hit.range.1);
2780 if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2781 println!(" Chunk: [{}..{})", chunk_start, chunk_end);
2782 }
2783 if let Some(chunk_text) = &hit.chunk_text {
2784 println!(" Chunk Text: {}", chunk_text.trim());
2785 }
2786 if let Some(metadata) = &hit.metadata {
2787 if let Some(track) = &metadata.track {
2788 println!(" Track: {track}");
2789 }
2790 if !metadata.tags.is_empty() {
2791 println!(" Tags: {}", metadata.tags.join(", "));
2792 }
2793 if !metadata.labels.is_empty() {
2794 println!(" Labels: {}", metadata.labels.join(", "));
2795 }
2796 if let Some(created_at) = &metadata.created_at {
2797 println!(" Created: {created_at}");
2798 }
2799 if !metadata.content_dates.is_empty() {
2800 println!(" Content Dates: {}", metadata.content_dates.join(", "));
2801 }
2802 if !metadata.entities.is_empty() {
2803 let entity_strs: Vec<String> = metadata
2804 .entities
2805 .iter()
2806 .map(|e| format!("{} ({})", e.name, e.kind))
2807 .collect();
2808 println!(" Entities: {}", entity_strs.join(", "));
2809 }
2810 }
2811 println!(" Snippet: {}", hit.text.trim());
2812 println!();
2813 }
2814 if let Some(cursor) = &response.next_cursor {
2815 println!("Next cursor: {cursor}");
2816 }
2817}
2818
2819fn ask_mode_display(mode: AskModeArg) -> &'static str {
2820 match mode {
2821 AskModeArg::Lex => "lex",
2822 AskModeArg::Sem => "sem",
2823 AskModeArg::Hybrid => "hybrid",
2824 }
2825}
2826
2827fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2828 match mode {
2829 AskModeArg::Lex => "Lexical",
2830 AskModeArg::Sem => "Semantic",
2831 AskModeArg::Hybrid => "Hybrid",
2832 }
2833}
2834
2835fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2836 match retriever {
2837 AskRetriever::Lex => "lex",
2838 AskRetriever::Semantic => "semantic",
2839 AskRetriever::Hybrid => "hybrid",
2840 AskRetriever::LexFallback => "lex_fallback",
2841 AskRetriever::TimelineFallback => "timeline_fallback",
2842 }
2843}
2844
2845fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2846 match retriever {
2847 AskRetriever::Lex => "Lexical",
2848 AskRetriever::Semantic => "Semantic",
2849 AskRetriever::Hybrid => "Hybrid",
2850 AskRetriever::LexFallback => "Lexical (fallback)",
2851 AskRetriever::TimelineFallback => "Timeline (fallback)",
2852 }
2853}
2854
2855fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2856 match engine {
2857 SearchEngineKind::Tantivy => "text (tantivy)",
2858 SearchEngineKind::LexFallback => "text (fallback)",
2859 SearchEngineKind::Hybrid => "hybrid",
2860 }
2861}
2862
2863fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2864 let digest = hash(uri.as_bytes()).to_hex().to_string();
2865 let prefix_len = digest.len().min(12);
2866 let prefix = &digest[..prefix_len];
2867 format!("mv2-hit-{prefix}-{frame_id}-{start}")
2868}
2869
2870fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2871 if text.chars().count() <= limit {
2872 return text.to_string();
2873 }
2874
2875 let truncated: String = text.chars().take(limit).collect();
2876 format!("{truncated}...")
2877}
2878
2879fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2888 if (max_score - min_score).abs() < f32::EPSILON {
2889 return 100.0;
2891 }
2892 ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2894}
2895
2896fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2898 let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2899 if valid_scores.is_empty() {
2900 return (0.0, 0.0);
2901 }
2902 let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2903 let max = valid_scores.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
2904 (min, max)
2905}
2906
2907fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2908 let mut hit_json = serde_json::Map::new();
2909 hit_json.insert("rank".into(), json!(hit.rank));
2910 if let Some(score) = hit.score {
2911 hit_json.insert("score".into(), json!(score));
2912 }
2913 hit_json.insert(
2914 "id".into(),
2915 json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2916 );
2917 hit_json.insert("frame_id".into(), json!(hit.frame_id));
2918 hit_json.insert("uri".into(), json!(hit.uri));
2919 if let Some(title) = &hit.title {
2920 hit_json.insert("title".into(), json!(title));
2921 }
2922 let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2923 hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2924 hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2925 hit_json.insert("text".into(), json!(hit.text));
2926
2927 let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2928 matches: hit.matches,
2929 ..SearchHitMetadata::default()
2930 });
2931 let mut meta_json = serde_json::Map::new();
2932 meta_json.insert("matches".into(), json!(metadata.matches));
2933 if !metadata.tags.is_empty() {
2934 meta_json.insert("tags".into(), json!(metadata.tags));
2935 }
2936 if !metadata.labels.is_empty() {
2937 meta_json.insert("labels".into(), json!(metadata.labels));
2938 }
2939 if let Some(track) = metadata.track {
2940 meta_json.insert("track".into(), json!(track));
2941 }
2942 if let Some(created_at) = metadata.created_at {
2943 meta_json.insert("created_at".into(), json!(created_at));
2944 }
2945 if !metadata.content_dates.is_empty() {
2946 meta_json.insert("content_dates".into(), json!(metadata.content_dates));
2947 }
2948 if !metadata.entities.is_empty() {
2949 let entities_json: Vec<serde_json::Value> = metadata
2950 .entities
2951 .iter()
2952 .map(|e| {
2953 let mut ent = serde_json::Map::new();
2954 ent.insert("name".into(), json!(e.name));
2955 ent.insert("kind".into(), json!(e.kind));
2956 if let Some(conf) = e.confidence {
2957 ent.insert("confidence".into(), json!(conf));
2958 }
2959 serde_json::Value::Object(ent)
2960 })
2961 .collect();
2962 meta_json.insert("entities".into(), json!(entities_json));
2963 }
2964 hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
2965 serde_json::Value::Object(hit_json)
2966}
2967fn apply_semantic_rerank(
2976 runtime: &EmbeddingRuntime,
2977 mem: &mut Memvid,
2978 response: &mut SearchResponse,
2979) -> Result<()> {
2980 if response.hits.is_empty() {
2981 return Ok(());
2982 }
2983
2984 let query_embedding = runtime.embed_query(&response.query)?;
2985 let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
2986 for hit in &response.hits {
2987 if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
2988 if embedding.len() == runtime.dimension() {
2989 let score = cosine_similarity(&query_embedding, &embedding);
2990 semantic_scores.insert(hit.frame_id, score);
2991 }
2992 }
2993 }
2994
2995 if semantic_scores.is_empty() {
2996 return Ok(());
2997 }
2998
2999 let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3001 .iter()
3002 .map(|(frame_id, score)| (*frame_id, *score))
3003 .collect();
3004 sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3005
3006 let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3007 for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3008 semantic_rank.insert(*frame_id, idx + 1);
3009 }
3010
3011 let query_lower = response.query.to_lowercase();
3013 let is_preference_query = query_lower.contains("suggest")
3014 || query_lower.contains("recommend")
3015 || query_lower.contains("should i")
3016 || query_lower.contains("what should")
3017 || query_lower.contains("prefer")
3018 || query_lower.contains("favorite")
3019 || query_lower.contains("best for me");
3020
3021 const RRF_K: f32 = 60.0;
3025
3026 let mut ordering: Vec<(usize, f32, usize)> = response
3027 .hits
3028 .iter()
3029 .enumerate()
3030 .map(|(idx, hit)| {
3031 let lexical_rank = hit.rank;
3032
3033 let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3035
3036 let semantic_rrf = semantic_rank
3038 .get(&hit.frame_id)
3039 .map(|rank| 1.0 / (RRF_K + *rank as f32))
3040 .unwrap_or(0.0);
3041
3042 let preference_boost = if is_preference_query {
3045 compute_preference_boost(&hit.text) * 0.01 } else {
3047 0.0
3048 };
3049
3050 let combined = lexical_rrf + semantic_rrf + preference_boost;
3052 (idx, combined, lexical_rank)
3053 })
3054 .collect();
3055
3056 ordering.sort_by(|a, b| {
3057 b.1.partial_cmp(&a.1)
3058 .unwrap_or(Ordering::Equal)
3059 .then(a.2.cmp(&b.2))
3060 });
3061
3062 let mut reordered = Vec::with_capacity(response.hits.len());
3063 for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3064 let mut hit = response.hits[idx].clone();
3065 hit.rank = rank_idx + 1;
3066 reordered.push(hit);
3067 }
3068
3069 response.hits = reordered;
3070 Ok(())
3071}
3072
3073fn apply_preference_rerank(response: &mut SearchResponse) {
3076 if response.hits.is_empty() {
3077 return;
3078 }
3079
3080 let query_lower = response.query.to_lowercase();
3082 let is_preference_query = query_lower.contains("suggest")
3083 || query_lower.contains("recommend")
3084 || query_lower.contains("should i")
3085 || query_lower.contains("what should")
3086 || query_lower.contains("prefer")
3087 || query_lower.contains("favorite")
3088 || query_lower.contains("best for me");
3089
3090 if !is_preference_query {
3091 return;
3092 }
3093
3094 let mut scored: Vec<(usize, f32, f32)> = response
3096 .hits
3097 .iter()
3098 .enumerate()
3099 .map(|(idx, hit)| {
3100 let original_score = hit.score.unwrap_or(0.0);
3101 let preference_boost = compute_preference_boost(&hit.text);
3102 let boosted_score = original_score + preference_boost;
3103 (idx, boosted_score, original_score)
3104 })
3105 .collect();
3106
3107 scored.sort_by(|a, b| {
3109 b.1.partial_cmp(&a.1)
3110 .unwrap_or(Ordering::Equal)
3111 .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3112 });
3113
3114 let mut reordered = Vec::with_capacity(response.hits.len());
3116 for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3117 let mut hit = response.hits[idx].clone();
3118 hit.rank = rank_idx + 1;
3119 reordered.push(hit);
3120 }
3121
3122 response.hits = reordered;
3123}
3124
3125fn compute_preference_boost(text: &str) -> f32 {
3134 let text_lower = text.to_lowercase();
3135 let mut boost = 0.0f32;
3136
3137 let established_context = [
3140 "i've been",
3142 "i've had",
3143 "i've used",
3144 "i've tried",
3145 "i recently",
3146 "i just",
3147 "lately",
3148 "i started",
3149 "i bought",
3150 "i harvested",
3151 "i grew",
3152 "my garden",
3154 "my home",
3155 "my house",
3156 "my setup",
3157 "my equipment",
3158 "my camera",
3159 "my car",
3160 "my phone",
3161 "i have a",
3162 "i own",
3163 "i got a",
3164 "i prefer",
3166 "i like to",
3167 "i love to",
3168 "i enjoy",
3169 "i usually",
3170 "i always",
3171 "i typically",
3172 "my favorite",
3173 "i tend to",
3174 "i often",
3175 "i use",
3177 "i grow",
3178 "i cook",
3179 "i make",
3180 "i work on",
3181 "i'm into",
3182 "i collect",
3183 ];
3184 for pattern in established_context {
3185 if text_lower.contains(pattern) {
3186 boost += 0.15;
3187 }
3188 }
3189
3190 let first_person = [" i ", " my ", " me "];
3192 for pattern in first_person {
3193 if text_lower.contains(pattern) {
3194 boost += 0.02;
3195 }
3196 }
3197
3198 let request_patterns = [
3201 "i'm trying to",
3202 "i want to",
3203 "i need to",
3204 "looking for",
3205 "can you suggest",
3206 "can you help",
3207 ];
3208 for pattern in request_patterns {
3209 if text_lower.contains(pattern) {
3210 boost += 0.02;
3211 }
3212 }
3213
3214 boost.min(0.5)
3216}
3217
3218fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3219 let mut dot = 0.0f32;
3220 let mut sum_a = 0.0f32;
3221 let mut sum_b = 0.0f32;
3222 for (x, y) in a.iter().zip(b.iter()) {
3223 dot += x * y;
3224 sum_a += x * x;
3225 sum_b += y * y;
3226 }
3227
3228 if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3229 0.0
3230 } else {
3231 dot / (sum_a.sqrt() * sum_b.sqrt())
3232 }
3233}
3234
3235#[cfg(feature = "local-embeddings")]
3243fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3244 if response.hits.is_empty() || response.hits.len() < 2 {
3245 return Ok(());
3246 }
3247
3248 let candidates_to_rerank = response.hits.len().min(50);
3250
3251 let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3254 .with_show_download_progress(true);
3255
3256 let mut reranker = match TextRerank::try_new(options) {
3257 Ok(r) => r,
3258 Err(e) => {
3259 warn!("Failed to initialize cross-encoder reranker: {e}");
3260 return Ok(());
3261 }
3262 };
3263
3264 let documents: Vec<String> = response.hits[..candidates_to_rerank]
3266 .iter()
3267 .map(|hit| hit.text.clone())
3268 .collect();
3269
3270 info!("Cross-encoder reranking {} candidates", documents.len());
3272 let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3273 Ok(results) => results,
3274 Err(e) => {
3275 warn!("Cross-encoder reranking failed: {e}");
3276 return Ok(());
3277 }
3278 };
3279
3280 let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3284
3285 let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3287 .iter()
3288 .filter_map(|h| h.score)
3289 .collect();
3290 let orig_min = original_scores.iter().cloned().fold(f32::INFINITY, f32::min);
3291 let orig_max = original_scores.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
3292 let orig_range = (orig_max - orig_min).max(0.001); for result in rerank_results.iter() {
3295 let original_idx = result.index;
3296 let cross_encoder_score = result.score; let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3300 let normalized_original = (original_score - orig_min) / orig_range;
3301
3302 let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3306
3307 scored_hits.push((blended, original_idx));
3308 }
3309
3310 scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3312
3313 let mut reordered = Vec::with_capacity(response.hits.len());
3315 for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3316 let mut hit = response.hits[original_idx].clone();
3317 hit.rank = new_rank + 1;
3318 hit.score = Some(blended_score);
3320 reordered.push(hit);
3321 }
3322
3323 for hit in response.hits.iter().skip(candidates_to_rerank) {
3325 let mut h = hit.clone();
3326 h.rank = reordered.len() + 1;
3327 reordered.push(h);
3328 }
3329
3330 response.hits = reordered;
3331 info!("Cross-encoder reranking complete");
3332 Ok(())
3333}
3334
3335#[cfg(not(feature = "local-embeddings"))]
3338fn apply_cross_encoder_rerank(_response: &mut SearchResponse) -> Result<()> {
3339 Ok(())
3340}
3341
3342fn build_memory_context(mem: &Memvid) -> String {
3345 let entities = mem.memory_entities();
3346 if entities.is_empty() {
3347 return String::new();
3348 }
3349
3350 let mut sections = Vec::new();
3351 for entity in entities {
3352 let cards = mem.get_entity_memories(&entity);
3353 if cards.is_empty() {
3354 continue;
3355 }
3356
3357 let mut entity_lines = Vec::new();
3358 for card in cards {
3359 let polarity_marker = card
3361 .polarity
3362 .as_ref()
3363 .map(|p| match p.to_string().as_str() {
3364 "Positive" => " (+)",
3365 "Negative" => " (-)",
3366 _ => "",
3367 })
3368 .unwrap_or("");
3369 entity_lines.push(format!(
3370 " - {}: {}{}",
3371 card.slot, card.value, polarity_marker
3372 ));
3373 }
3374
3375 sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3376 }
3377
3378 sections.join("\n\n")
3379}
3380
3381fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3384 use std::collections::HashMap;
3385
3386 let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3388
3389 for hit in hits {
3390 if let Some(metadata) = &hit.metadata {
3391 for entity in &metadata.entities {
3392 entities_by_kind
3393 .entry(entity.kind.clone())
3394 .or_default()
3395 .push(entity.name.clone());
3396 }
3397 }
3398 }
3399
3400 if entities_by_kind.is_empty() {
3401 return String::new();
3402 }
3403
3404 let mut sections = Vec::new();
3406 let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3407 sorted_kinds.sort();
3408
3409 for kind in sorted_kinds {
3410 let names = entities_by_kind.get(kind).unwrap();
3411 let mut unique_names: Vec<_> = names.iter().collect();
3412 unique_names.sort();
3413 unique_names.dedup();
3414
3415 let names_str = unique_names
3416 .iter()
3417 .take(10) .map(|s| s.as_str())
3419 .collect::<Vec<_>>()
3420 .join(", ");
3421
3422 sections.push(format!("{}: {}", kind, names_str));
3423 }
3424
3425 sections.join("\n")
3426}