1use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use colored::Colorize;
15use colored_json::ToColoredJson;
16use blake3::hash;
17use clap::{ArgAction, Args, ValueEnum};
18#[cfg(feature = "temporal_track")]
19use memvid_core::{
20 types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
21 TemporalResolution, TemporalResolutionValue,
22};
23use memvid_core::{
24 types::{AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy, SearchHitMetadata},
25 AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind, SearchHit,
26 SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
27};
28#[cfg(feature = "temporal_track")]
29use serde::Serialize;
30use serde_json::json;
31#[cfg(feature = "temporal_track")]
32use time::format_description::well_known::Rfc3339;
33use time::{Date, PrimitiveDateTime, Time};
34#[cfg(feature = "temporal_track")]
35use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
36use tracing::{info, warn};
37
38use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
39
40use memvid_ask_model::{
41 run_model_inference, ModelContextFragment, ModelContextFragmentKind, ModelInference,
42};
43
44use crate::config::{
46 load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
47 try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig, EmbeddingModelChoice, EmbeddingRuntime,
48};
49use crate::utils::{
50 autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
51 parse_date_boundary, parse_vector, read_embedding,
52};
53
54const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
55#[cfg(feature = "temporal_track")]
56const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
57
58fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
59 let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
60 message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
61 if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
62 message.push_str(&format!(
63 "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
64 model.name(),
65 model.name()
66 ));
67 if model.is_openai() {
68 message.push_str(" (and set `OPENAI_API_KEY`).");
69 } else {
70 message.push('.');
71 }
72 message.push_str(&format!(
73 "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
74 model.name()
75 ));
76 message.push_str(&format!(
77 "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
78 ));
79 message.push_str("\nOr use `--mode lex` to disable semantic search.");
80 }
81 message
82}
83
84#[derive(Args)]
86pub struct TimelineArgs {
87 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
88 pub file: PathBuf,
89 #[arg(long)]
90 pub json: bool,
91 #[arg(long)]
92 pub reverse: bool,
93 #[arg(long, value_name = "LIMIT")]
94 pub limit: Option<NonZeroU64>,
95 #[arg(long, value_name = "TIMESTAMP")]
96 pub since: Option<i64>,
97 #[arg(long, value_name = "TIMESTAMP")]
98 pub until: Option<i64>,
99 #[cfg(feature = "temporal_track")]
100 #[arg(long = "on", value_name = "PHRASE")]
101 pub phrase: Option<String>,
102 #[cfg(feature = "temporal_track")]
103 #[arg(long = "tz", value_name = "IANA_ZONE")]
104 pub tz: Option<String>,
105 #[cfg(feature = "temporal_track")]
106 #[arg(long = "anchor", value_name = "RFC3339")]
107 pub anchor: Option<String>,
108 #[cfg(feature = "temporal_track")]
109 #[arg(long = "window", value_name = "MINUTES")]
110 pub window: Option<u64>,
111 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
113 pub as_of_frame: Option<u64>,
114 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
116 pub as_of_ts: Option<i64>,
117}
118
119#[cfg(feature = "temporal_track")]
121#[derive(Args)]
122pub struct WhenArgs {
123 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
124 pub file: PathBuf,
125 #[arg(long = "on", value_name = "PHRASE")]
126 pub phrase: String,
127 #[arg(long = "tz", value_name = "IANA_ZONE")]
128 pub tz: Option<String>,
129 #[arg(long = "anchor", value_name = "RFC3339")]
130 pub anchor: Option<String>,
131 #[arg(long = "window", value_name = "MINUTES")]
132 pub window: Option<u64>,
133 #[arg(long, value_name = "LIMIT")]
134 pub limit: Option<NonZeroU64>,
135 #[arg(long, value_name = "TIMESTAMP")]
136 pub since: Option<i64>,
137 #[arg(long, value_name = "TIMESTAMP")]
138 pub until: Option<i64>,
139 #[arg(long)]
140 pub reverse: bool,
141 #[arg(long)]
142 pub json: bool,
143}
144
145#[derive(Args)]
147pub struct AskArgs {
148 #[arg(value_name = "TARGET", num_args = 0..)]
149 pub targets: Vec<String>,
150 #[arg(long = "question", value_name = "TEXT")]
151 pub question: Option<String>,
152 #[arg(long = "uri", value_name = "URI")]
153 pub uri: Option<String>,
154 #[arg(long = "scope", value_name = "URI_PREFIX")]
155 pub scope: Option<String>,
156 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
157 pub top_k: usize,
158 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
159 pub snippet_chars: usize,
160 #[arg(long = "cursor", value_name = "TOKEN")]
161 pub cursor: Option<String>,
162 #[arg(long = "mode", value_enum, default_value = "hybrid")]
163 pub mode: AskModeArg,
164 #[arg(long)]
165 pub json: bool,
166 #[arg(long = "context-only", action = ArgAction::SetTrue)]
167 pub context_only: bool,
168 #[arg(long = "sources", action = ArgAction::SetTrue)]
170 pub sources: bool,
171 #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
173 pub mask_pii: bool,
174 #[arg(long = "memories", action = ArgAction::SetTrue)]
176 pub memories: bool,
177 #[arg(long = "llm-context-depth", value_name = "CHARS")]
179 pub llm_context_depth: Option<usize>,
180 #[arg(long = "start", value_name = "DATE")]
181 pub start: Option<String>,
182 #[arg(long = "end", value_name = "DATE")]
183 pub end: Option<String>,
184 #[arg(
192 long = "use-model",
193 value_name = "MODEL",
194 num_args = 0..=1,
195 default_missing_value = "tinyllama"
196 )]
197 pub use_model: Option<String>,
198 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
201 pub query_embedding_model: Option<String>,
202 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
204 pub as_of_frame: Option<u64>,
205 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
207 pub as_of_ts: Option<i64>,
208 #[arg(long = "system-prompt", value_name = "TEXT")]
210 pub system_prompt: Option<String>,
211 #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
213 pub no_rerank: bool,
214
215 #[arg(long = "no-llm", action = ArgAction::SetTrue)]
218 pub no_llm: bool,
219
220 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
224 pub no_adaptive: bool,
225 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
228 pub min_relevancy: f32,
229 #[arg(long = "max-k", value_name = "K", default_value = "100")]
232 pub max_k: usize,
233 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
235 pub adaptive_strategy: AdaptiveStrategyArg,
236}
237
238#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
240pub enum AskModeArg {
241 Lex,
242 Sem,
243 Hybrid,
244}
245
246impl From<AskModeArg> for AskMode {
247 fn from(value: AskModeArg) -> Self {
248 match value {
249 AskModeArg::Lex => AskMode::Lex,
250 AskModeArg::Sem => AskMode::Sem,
251 AskModeArg::Hybrid => AskMode::Hybrid,
252 }
253 }
254}
255
256#[derive(Args)]
258pub struct FindArgs {
259 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
260 pub file: PathBuf,
261 #[arg(long = "query", value_name = "TEXT")]
262 pub query: String,
263 #[arg(long = "uri", value_name = "URI")]
264 pub uri: Option<String>,
265 #[arg(long = "scope", value_name = "URI_PREFIX")]
266 pub scope: Option<String>,
267 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
268 pub top_k: usize,
269 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
270 pub snippet_chars: usize,
271 #[arg(long = "cursor", value_name = "TOKEN")]
272 pub cursor: Option<String>,
273 #[arg(long)]
274 pub json: bool,
275 #[arg(long = "json-legacy", conflicts_with = "json")]
276 pub json_legacy: bool,
277 #[arg(long = "mode", value_enum, default_value = "auto")]
278 pub mode: SearchMode,
279 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
281 pub as_of_frame: Option<u64>,
282 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
284 pub as_of_ts: Option<i64>,
285 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
288 pub query_embedding_model: Option<String>,
289
290 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
294 pub no_adaptive: bool,
295 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
298 pub min_relevancy: f32,
299 #[arg(long = "max-k", value_name = "K", default_value = "100")]
302 pub max_k: usize,
303 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
305 pub adaptive_strategy: AdaptiveStrategyArg,
306
307 #[arg(long = "graph", action = ArgAction::SetTrue)]
310 pub graph: bool,
311
312 #[arg(long = "hybrid", action = ArgAction::SetTrue)]
315 pub hybrid: bool,
316
317 #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
320 pub no_sketch: bool,
321}
322
323#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
325pub enum SearchMode {
326 Auto,
327 Lex,
328 Sem,
329 #[cfg(feature = "clip")]
331 Clip,
332}
333
334#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
336pub enum AdaptiveStrategyArg {
337 Relative,
339 Absolute,
341 Cliff,
343 Elbow,
345 Combined,
347}
348
349#[derive(Args)]
351pub struct VecSearchArgs {
352 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
353 pub file: PathBuf,
354 #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
355 pub vector: Option<String>,
356 #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
357 pub embedding: Option<PathBuf>,
358 #[arg(long, value_name = "K", default_value = "10")]
359 pub limit: usize,
360 #[arg(long)]
361 pub json: bool,
362}
363
364#[derive(Args)]
366pub struct AuditArgs {
367 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
368 pub file: PathBuf,
369 #[arg(value_name = "QUESTION")]
371 pub question: String,
372 #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
374 pub out: Option<PathBuf>,
375 #[arg(long = "format", value_enum, default_value = "text")]
377 pub format: AuditFormat,
378 #[arg(long = "top-k", value_name = "K", default_value = "10")]
380 pub top_k: usize,
381 #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
383 pub snippet_chars: usize,
384 #[arg(long = "mode", value_enum, default_value = "hybrid")]
386 pub mode: AskModeArg,
387 #[arg(long = "scope", value_name = "URI_PREFIX")]
389 pub scope: Option<String>,
390 #[arg(long = "start", value_name = "DATE")]
392 pub start: Option<String>,
393 #[arg(long = "end", value_name = "DATE")]
395 pub end: Option<String>,
396 #[arg(long = "use-model", value_name = "MODEL")]
398 pub use_model: Option<String>,
399}
400
401#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
403pub enum AuditFormat {
404 Text,
406 Markdown,
408 Json,
410}
411
412pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
417 let mut mem = open_read_only_mem(&args.file)?;
418 let mut builder = TimelineQueryBuilder::default();
419 #[cfg(feature = "temporal_track")]
420 if args.phrase.is_none()
421 && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
422 {
423 bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
424 }
425 if let Some(limit) = args.limit {
426 builder = builder.limit(limit);
427 }
428 if let Some(since) = args.since {
429 builder = builder.since(since);
430 }
431 if let Some(until) = args.until {
432 builder = builder.until(until);
433 }
434 builder = builder.reverse(args.reverse);
435 #[cfg(feature = "temporal_track")]
436 let temporal_summary = if let Some(ref phrase) = args.phrase {
437 let (filter, summary) = build_temporal_filter(
438 phrase,
439 args.tz.as_deref(),
440 args.anchor.as_deref(),
441 args.window,
442 )?;
443 builder = builder.temporal(filter);
444 Some(summary)
445 } else {
446 None
447 };
448 let query = builder.build();
449 let mut entries = mem.timeline(query)?;
450
451 if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
453 entries.retain(|entry| {
454 if let Some(cutoff_frame) = args.as_of_frame {
456 if entry.frame_id > cutoff_frame {
457 return false;
458 }
459 }
460
461 if let Some(cutoff_ts) = args.as_of_ts {
463 if entry.timestamp > cutoff_ts {
464 return false;
465 }
466 }
467
468 true
469 });
470 }
471
472 if args.json {
473 #[cfg(feature = "temporal_track")]
474 if let Some(summary) = temporal_summary.as_ref() {
475 println!(
476 "{}",
477 serde_json::to_string_pretty(&TimelineOutput {
478 temporal: Some(summary_to_output(summary)),
479 entries: &entries,
480 })?
481 );
482 } else {
483 println!("{}", serde_json::to_string_pretty(&entries)?);
484 }
485 #[cfg(not(feature = "temporal_track"))]
486 println!("{}", serde_json::to_string_pretty(&entries)?);
487 } else if entries.is_empty() {
488 println!("Timeline is empty");
489 } else {
490 #[cfg(feature = "temporal_track")]
491 if let Some(summary) = temporal_summary.as_ref() {
492 print_temporal_summary(summary);
493 }
494 for entry in entries {
495 println!(
496 "#{} @ {} — {}",
497 entry.frame_id,
498 entry.timestamp,
499 entry.preview.replace('\n', " ")
500 );
501 if let Some(uri) = entry.uri.as_deref() {
502 println!(" URI: {uri}");
503 }
504 if !entry.child_frames.is_empty() {
505 let child_list = entry
506 .child_frames
507 .iter()
508 .map(|id| id.to_string())
509 .collect::<Vec<_>>()
510 .join(", ");
511 println!(" Child frames: {child_list}");
512 }
513 #[cfg(feature = "temporal_track")]
514 if let Some(temporal) = entry.temporal.as_ref() {
515 print_entry_temporal_details(temporal);
516 }
517 }
518 }
519 Ok(())
520}
521
522#[cfg(feature = "temporal_track")]
523pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
524 let mut mem = open_read_only_mem(&args.file)?;
525
526 let (filter, summary) = build_temporal_filter(
527 &args.phrase,
528 args.tz.as_deref(),
529 args.anchor.as_deref(),
530 args.window,
531 )?;
532
533 let mut builder = TimelineQueryBuilder::default();
534 if let Some(limit) = args.limit {
535 builder = builder.limit(limit);
536 }
537 if let Some(since) = args.since {
538 builder = builder.since(since);
539 }
540 if let Some(until) = args.until {
541 builder = builder.until(until);
542 }
543 builder = builder.reverse(args.reverse).temporal(filter.clone());
544 let entries = mem.timeline(builder.build())?;
545
546 if args.json {
547 let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
548 let output = WhenOutput {
549 summary: summary_to_output(&summary),
550 entries: entry_views,
551 };
552 println!("{}", serde_json::to_string_pretty(&output)?);
553 return Ok(());
554 }
555
556 print_temporal_summary(&summary);
557 if entries.is_empty() {
558 println!("No frames matched the resolved window");
559 return Ok(());
560 }
561
562 for entry in &entries {
563 let iso = format_timestamp(entry.timestamp).unwrap_or_default();
564 println!(
565 "#{} @ {} ({iso}) — {}",
566 entry.frame_id,
567 entry.timestamp,
568 entry.preview.replace('\n', " ")
569 );
570 if let Some(uri) = entry.uri.as_deref() {
571 println!(" URI: {uri}");
572 }
573 if !entry.child_frames.is_empty() {
574 let child_list = entry
575 .child_frames
576 .iter()
577 .map(|id| id.to_string())
578 .collect::<Vec<_>>()
579 .join(", ");
580 println!(" Child frames: {child_list}");
581 }
582 if let Some(temporal) = entry.temporal.as_ref() {
583 print_entry_temporal_details(temporal);
584 }
585 }
586
587 Ok(())
588}
589
590#[cfg(feature = "temporal_track")]
591#[derive(Serialize)]
592struct TimelineOutput<'a> {
593 #[serde(skip_serializing_if = "Option::is_none")]
594 temporal: Option<TemporalSummaryOutput>,
595 entries: &'a [TimelineEntry],
596}
597
598#[cfg(feature = "temporal_track")]
599#[derive(Serialize)]
600struct WhenOutput {
601 summary: TemporalSummaryOutput,
602 entries: Vec<WhenEntry>,
603}
604
605#[cfg(feature = "temporal_track")]
606#[derive(Serialize)]
607struct WhenEntry {
608 frame_id: FrameId,
609 timestamp: i64,
610 #[serde(skip_serializing_if = "Option::is_none")]
611 timestamp_iso: Option<String>,
612 preview: String,
613 #[serde(skip_serializing_if = "Option::is_none")]
614 uri: Option<String>,
615 #[serde(skip_serializing_if = "Vec::is_empty")]
616 child_frames: Vec<FrameId>,
617 #[serde(skip_serializing_if = "Option::is_none")]
618 temporal: Option<SearchHitTemporal>,
619}
620
621#[cfg(feature = "temporal_track")]
622#[derive(Serialize)]
623struct TemporalSummaryOutput {
624 phrase: String,
625 timezone: String,
626 anchor_utc: i64,
627 anchor_iso: String,
628 confidence: u16,
629 #[serde(skip_serializing_if = "Vec::is_empty")]
630 flags: Vec<&'static str>,
631 resolution_kind: &'static str,
632 window_start_utc: Option<i64>,
633 window_start_iso: Option<String>,
634 window_end_utc: Option<i64>,
635 window_end_iso: Option<String>,
636 #[serde(skip_serializing_if = "Option::is_none")]
637 window_minutes: Option<u64>,
638}
639
640#[cfg(feature = "temporal_track")]
641struct TemporalSummary {
642 phrase: String,
643 tz: String,
644 anchor: OffsetDateTime,
645 start_utc: Option<i64>,
646 end_utc: Option<i64>,
647 resolution: TemporalResolution,
648 window_minutes: Option<u64>,
649}
650
651#[cfg(feature = "temporal_track")]
652fn build_temporal_filter(
653 phrase: &str,
654 tz_override: Option<&str>,
655 anchor_override: Option<&str>,
656 window_minutes: Option<u64>,
657) -> Result<(TemporalFilter, TemporalSummary)> {
658 let tz = tz_override
659 .unwrap_or(DEFAULT_TEMPORAL_TZ)
660 .trim()
661 .to_string();
662 if tz.is_empty() {
663 bail!("E-TEMP-003 timezone must not be empty");
664 }
665
666 let anchor = if let Some(raw) = anchor_override {
667 OffsetDateTime::parse(raw, &Rfc3339)
668 .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
669 } else {
670 OffsetDateTime::now_utc()
671 };
672
673 let context = TemporalContext::new(anchor, tz.clone());
674 let normalizer = TemporalNormalizer::new(context);
675 let resolution = normalizer
676 .resolve(phrase)
677 .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
678
679 let (mut start, mut end) = resolution_bounds(&resolution)?;
680 if let Some(minutes) = window_minutes {
681 if minutes > 0 {
682 let delta = TimeDuration::minutes(minutes as i64);
683 if let (Some(s), Some(e)) = (start, end) {
684 if s == e {
685 start = Some(s.saturating_sub(delta.whole_seconds()));
686 end = Some(e.saturating_add(delta.whole_seconds()));
687 } else {
688 start = Some(s.saturating_sub(delta.whole_seconds()));
689 end = Some(e.saturating_add(delta.whole_seconds()));
690 }
691 }
692 }
693 }
694
695 let filter = TemporalFilter {
696 start_utc: start,
697 end_utc: end,
698 phrase: None,
699 tz: None,
700 };
701
702 let summary = TemporalSummary {
703 phrase: phrase.to_owned(),
704 tz,
705 anchor,
706 start_utc: start,
707 end_utc: end,
708 resolution,
709 window_minutes,
710 };
711
712 Ok((filter, summary))
713}
714
715#[cfg(feature = "temporal_track")]
716fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
717 TemporalSummaryOutput {
718 phrase: summary.phrase.clone(),
719 timezone: summary.tz.clone(),
720 anchor_utc: summary.anchor.unix_timestamp(),
721 anchor_iso: summary
722 .anchor
723 .format(&Rfc3339)
724 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
725 confidence: summary.resolution.confidence,
726 flags: summary
727 .resolution
728 .flags
729 .iter()
730 .map(|flag| flag.as_str())
731 .collect(),
732 resolution_kind: resolution_kind(&summary.resolution),
733 window_start_utc: summary.start_utc,
734 window_start_iso: summary.start_utc.and_then(format_timestamp),
735 window_end_utc: summary.end_utc,
736 window_end_iso: summary.end_utc.and_then(format_timestamp),
737 window_minutes: summary.window_minutes,
738 }
739}
740
741#[cfg(feature = "temporal_track")]
742fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
743 WhenEntry {
744 frame_id: entry.frame_id,
745 timestamp: entry.timestamp,
746 timestamp_iso: format_timestamp(entry.timestamp),
747 preview: entry.preview.clone(),
748 uri: entry.uri.clone(),
749 child_frames: entry.child_frames.clone(),
750 temporal: entry.temporal.clone(),
751 }
752}
753
754#[cfg(feature = "temporal_track")]
755fn print_temporal_summary(summary: &TemporalSummary) {
756 println!("Phrase: \"{}\"", summary.phrase);
757 println!("Timezone: {}", summary.tz);
758 println!(
759 "Anchor: {}",
760 summary
761 .anchor
762 .format(&Rfc3339)
763 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
764 );
765 let start_iso = summary.start_utc.and_then(format_timestamp);
766 let end_iso = summary.end_utc.and_then(format_timestamp);
767 match (start_iso, end_iso) {
768 (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
769 (Some(start), Some(end)) => println!("Window: {start} → {end}"),
770 (Some(start), None) => println!("Window start: {start}"),
771 (None, Some(end)) => println!("Window end: {end}"),
772 _ => println!("Window: (not resolved)"),
773 }
774 println!("Confidence: {}", summary.resolution.confidence);
775 let flags: Vec<&'static str> = summary
776 .resolution
777 .flags
778 .iter()
779 .map(|flag| flag.as_str())
780 .collect();
781 if !flags.is_empty() {
782 println!("Flags: {}", flags.join(", "));
783 }
784 if let Some(window) = summary.window_minutes {
785 if window > 0 {
786 println!("Window padding: {window} minute(s)");
787 }
788 }
789 println!();
790}
791
792#[cfg(feature = "temporal_track")]
793fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
794 if let Some(anchor) = temporal.anchor.as_ref() {
795 let iso = anchor
796 .iso_8601
797 .clone()
798 .or_else(|| format_timestamp(anchor.ts_utc));
799 println!(
800 " Anchor: {} (source: {:?})",
801 iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
802 anchor.source
803 );
804 }
805 if !temporal.mentions.is_empty() {
806 println!(" Mentions:");
807 for mention in &temporal.mentions {
808 let iso = mention
809 .iso_8601
810 .clone()
811 .or_else(|| format_timestamp(mention.ts_utc))
812 .unwrap_or_else(|| mention.ts_utc.to_string());
813 let mut details = format!(
814 " - {} ({:?}, confidence {})",
815 iso, mention.kind, mention.confidence
816 );
817 if let Some(text) = mention.text.as_deref() {
818 details.push_str(&format!(" — \"{}\"", text));
819 }
820 println!("{details}");
821 }
822 }
823}
824
825#[cfg(feature = "temporal_track")]
826fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
827 match &resolution.value {
828 TemporalResolutionValue::Date(date) => {
829 let ts = date_to_timestamp(*date);
830 Ok((Some(ts), Some(ts)))
831 }
832 TemporalResolutionValue::DateTime(dt) => {
833 let ts = dt.unix_timestamp();
834 Ok((Some(ts), Some(ts)))
835 }
836 TemporalResolutionValue::DateRange { start, end } => Ok((
837 Some(date_to_timestamp(*start)),
838 Some(date_to_timestamp(*end)),
839 )),
840 TemporalResolutionValue::DateTimeRange { start, end } => {
841 Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
842 }
843 TemporalResolutionValue::Month { year, month } => {
844 let start_date = Date::from_calendar_date(*year, *month, 1)
845 .map_err(|_| anyhow!("invalid month resolution"))?;
846 let end_date = last_day_in_month(*year, *month)
847 .map_err(|_| anyhow!("invalid month resolution"))?;
848 Ok((
849 Some(date_to_timestamp(start_date)),
850 Some(date_to_timestamp(end_date)),
851 ))
852 }
853 }
854}
855
856#[cfg(feature = "temporal_track")]
857fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
858 match resolution.value {
859 TemporalResolutionValue::Date(_) => "date",
860 TemporalResolutionValue::DateTime(_) => "datetime",
861 TemporalResolutionValue::DateRange { .. } => "date_range",
862 TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
863 TemporalResolutionValue::Month { .. } => "month",
864 }
865}
866
867#[cfg(feature = "temporal_track")]
868fn date_to_timestamp(date: Date) -> i64 {
869 PrimitiveDateTime::new(date, Time::MIDNIGHT)
870 .assume_offset(UtcOffset::UTC)
871 .unix_timestamp()
872}
873
874#[cfg(feature = "temporal_track")]
875fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
876 let mut date = Date::from_calendar_date(year, month, 1)
877 .map_err(|_| anyhow!("invalid month resolution"))?;
878 while let Some(next) = date.next_day() {
879 if next.month() == month {
880 date = next;
881 } else {
882 break;
883 }
884 }
885 Ok(date)
886}
887
888#[cfg(feature = "temporal_track")]
889
890fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
891 if fragments.is_empty() {
892 return;
893 }
894
895 response.context_fragments = fragments
896 .into_iter()
897 .map(|fragment| AskContextFragment {
898 rank: fragment.rank,
899 frame_id: fragment.frame_id,
900 uri: fragment.uri,
901 title: fragment.title,
902 score: fragment.score,
903 matches: fragment.matches,
904 range: Some(fragment.range),
905 chunk_range: fragment.chunk_range,
906 text: fragment.text,
907 kind: Some(match fragment.kind {
908 ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
909 ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
910 }),
911 #[cfg(feature = "temporal_track")]
912 temporal: None,
913 })
914 .collect();
915}
916
917pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
918 crate::utils::require_active_plan(config, "ask")?;
920
921 crate::api::track_query_usage(config, 1)?;
923
924 if args.uri.is_some() && args.scope.is_some() {
925 warn!("--scope ignored because --uri is provided");
926 }
927
928 let mut question_tokens = Vec::new();
929 let mut file_path: Option<PathBuf> = None;
930 for token in &args.targets {
931 if file_path.is_none() && looks_like_memory(token) {
932 file_path = Some(PathBuf::from(token));
933 } else {
934 question_tokens.push(token.clone());
935 }
936 }
937
938 let positional_question = if question_tokens.is_empty() {
939 None
940 } else {
941 Some(question_tokens.join(" "))
942 };
943
944 let question = args
945 .question
946 .or(positional_question)
947 .map(|value| value.trim().to_string())
948 .filter(|value| !value.is_empty());
949
950 let question = question
951 .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
952
953 let (original_question, search_query) = {
956 let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
959 if let Ok(key) = std::env::var("OPENAI_API_KEY") {
960 (Some("gpt-4o-mini"), Some(key))
962 } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
963 (Some("llama-3.1-8b-instant"), Some(key))
965 } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
966 (Some("claude-haiku-4-5"), Some(key))
968 } else if let Ok(key) = std::env::var("XAI_API_KEY") {
969 (Some("grok-4-fast"), Some(key))
971 } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
972 (Some("mistral-small-latest"), Some(key))
974 } else {
975 (None, None)
977 };
978
979 let _ = (model_for_expansion, api_key_for_expansion); (question.clone(), question.clone())
990 };
991
992 let memory_path = match file_path {
993 Some(path) => path,
994 None => autodetect_memory_file()?,
995 };
996
997 let start = parse_date_boundary(args.start.as_ref(), false)?;
998 let end = parse_date_boundary(args.end.as_ref(), true)?;
999 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1000 if end_ts < start_ts {
1001 anyhow::bail!("--end must not be earlier than --start");
1002 }
1003 }
1004
1005 let mut mem = Memvid::open(&memory_path)?;
1007
1008 #[cfg(feature = "replay")]
1010 let _ = mem.load_active_session();
1011
1012 let mv2_dimension = mem.effective_vec_index_dimension()?;
1014
1015 let stats = mem.stats()?;
1017 let has_vectors = stats.vector_count > 0;
1018 let effective_mode = if !has_vectors && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1019 tracing::info!(
1020 "Memory has no embeddings (vector_count=0); falling back to lexical mode"
1021 );
1022 AskModeArg::Lex
1023 } else {
1024 args.mode.clone()
1025 };
1026
1027 let ask_mode: AskMode = effective_mode.clone().into();
1028 let inferred_model_override = match effective_mode {
1029 AskModeArg::Lex => None,
1030 AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1031 memvid_core::EmbeddingIdentitySummary::Single(identity) => identity.model.map(String::from),
1032 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1033 let models: Vec<_> = identities
1034 .iter()
1035 .filter_map(|entry| entry.identity.model.as_deref())
1036 .collect();
1037 anyhow::bail!(
1038 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1039 Detected models: {:?}\n\n\
1040 Suggested fix: split into separate memories per embedding model.",
1041 models
1042 );
1043 }
1044 memvid_core::EmbeddingIdentitySummary::Unknown => None,
1045 },
1046 };
1047 let emb_model_override = args
1048 .query_embedding_model
1049 .as_deref()
1050 .or(inferred_model_override.as_deref());
1051 let runtime = match effective_mode {
1052 AskModeArg::Lex => None,
1053 AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1054 config,
1055 emb_model_override,
1056 mv2_dimension,
1057 )?),
1058 AskModeArg::Hybrid => {
1059 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1061 || {
1062 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1064 .ok()
1065 .map(|rt| {
1066 tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1067 rt
1068 })
1069 },
1070 )
1071 }
1072 };
1073 if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1074 anyhow::bail!(
1075 "semantic embeddings unavailable; install/cached model required for {:?} mode",
1076 effective_mode
1077 );
1078 }
1079
1080 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1081
1082 let adaptive = if !args.no_adaptive {
1084 Some(AdaptiveConfig {
1085 enabled: true,
1086 max_results: args.max_k,
1087 min_results: 1,
1088 normalize_scores: true,
1089 strategy: match args.adaptive_strategy {
1090 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1091 min_ratio: args.min_relevancy,
1092 },
1093 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1094 min_score: args.min_relevancy,
1095 },
1096 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff { max_drop_ratio: 0.3 },
1097 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1098 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1099 relative_threshold: args.min_relevancy,
1100 max_drop_ratio: 0.3,
1101 absolute_min: 0.3,
1102 },
1103 },
1104 })
1105 } else {
1106 None
1107 };
1108
1109 let request = AskRequest {
1110 question: search_query, top_k: args.top_k,
1112 snippet_chars: args.snippet_chars,
1113 uri: args.uri.clone(),
1114 scope: args.scope.clone(),
1115 cursor: args.cursor.clone(),
1116 start,
1117 end,
1118 #[cfg(feature = "temporal_track")]
1119 temporal: None,
1120 context_only: args.context_only,
1121 mode: ask_mode,
1122 as_of_frame: args.as_of_frame,
1123 as_of_ts: args.as_of_ts,
1124 adaptive,
1125 };
1126 let mut response = mem.ask(request, embedder).map_err(|err| match err {
1127 MemvidError::VecDimensionMismatch { expected, actual } => anyhow!(vec_dimension_mismatch_help(expected, actual)),
1128 other => anyhow!(other),
1129 })?;
1130
1131 response.question = original_question;
1134
1135 let is_temporal_query = {
1142 let q_lower = response.question.to_lowercase();
1143 q_lower.contains("current") || q_lower.contains("latest") || q_lower.contains("recent")
1144 || q_lower.contains("now") || q_lower.contains("today") || q_lower.contains("updated")
1145 || q_lower.contains("new ") || q_lower.contains("newest")
1146 };
1147 if !args.no_rerank
1148 && !response.retrieval.hits.is_empty()
1149 && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1150 && !is_temporal_query
1151 {
1152 let mut search_response = SearchResponse {
1154 query: response.question.clone(),
1155 hits: response.retrieval.hits.clone(),
1156 total_hits: response.retrieval.hits.len(),
1157 params: memvid_core::SearchParams {
1158 top_k: args.top_k,
1159 snippet_chars: args.snippet_chars,
1160 cursor: None,
1161 },
1162 elapsed_ms: 0,
1163 engine: memvid_core::SearchEngineKind::Hybrid,
1164 next_cursor: None,
1165 context: String::new(),
1166 };
1167
1168 if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1169 warn!("Cross-encoder reranking failed: {e}");
1170 } else {
1171 response.retrieval.hits = search_response.hits;
1173 response.retrieval.context = response
1175 .retrieval
1176 .hits
1177 .iter()
1178 .take(10) .map(|hit| hit.text.as_str())
1180 .collect::<Vec<_>>()
1181 .join("\n\n---\n\n");
1182 }
1183 }
1184
1185 if args.memories {
1187 let memory_context = build_memory_context(&mem);
1188 if !memory_context.is_empty() {
1189 response.retrieval.context = format!(
1191 "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1192 memory_context, response.retrieval.context
1193 );
1194 }
1195 }
1196
1197 let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1199 if !entity_context.is_empty() {
1200 response.retrieval.context = format!(
1202 "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1203 entity_context, response.retrieval.context
1204 );
1205 }
1206
1207 if args.mask_pii {
1209 use memvid_core::pii::mask_pii;
1210
1211 response.retrieval.context = mask_pii(&response.retrieval.context);
1213
1214 for hit in &mut response.retrieval.hits {
1216 hit.text = mask_pii(&hit.text);
1217 if let Some(chunk_text) = &hit.chunk_text {
1218 hit.chunk_text = Some(mask_pii(chunk_text));
1219 }
1220 }
1221 }
1222
1223 let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1224
1225 let mut model_result: Option<ModelInference> = None;
1226 if args.no_llm {
1227 if args.use_model.is_some() {
1229 warn!("--use-model ignored because --no-llm disables LLM synthesis");
1230 }
1231 if args.json {
1232 emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1233 } else {
1234 emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1235 }
1236
1237 #[cfg(feature = "replay")]
1239 let _ = mem.save_active_session();
1240
1241 return Ok(());
1242 } else if response.context_only {
1243 if args.use_model.is_some() {
1244 warn!("--use-model ignored because --context-only disables synthesis");
1245 }
1246 } else if let Some(model_name) = args.use_model.as_deref() {
1247 match run_model_inference(
1248 model_name,
1249 &response.question,
1250 &response.retrieval.context,
1251 &response.retrieval.hits,
1252 llm_context_override,
1253 None,
1254 args.system_prompt.as_deref(),
1255 ) {
1256 Ok(inference) => {
1257 response.answer = Some(inference.answer.answer.clone());
1258 response.retrieval.context = inference.context_body.clone();
1259 apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1260 model_result = Some(inference);
1261 }
1262 Err(err) => {
1263 warn!(
1264 "model inference unavailable for '{}': {err}. Falling back to default summary.",
1265 model_name
1266 );
1267 }
1268 }
1269 }
1270
1271 #[cfg(feature = "replay")]
1273 if let Some(ref inference) = model_result {
1274 if let Some(model_name) = args.use_model.as_deref() {
1275 let retrieved_frames: Vec<u64> = response
1277 .retrieval
1278 .hits
1279 .iter()
1280 .map(|hit| hit.frame_id)
1281 .collect();
1282
1283 mem.record_ask_action(
1284 &response.question,
1285 model_name, model_name, inference.answer.answer.as_bytes(),
1288 0, retrieved_frames,
1290 );
1291 }
1292 }
1293
1294 if args.json {
1295 if let Some(model_name) = args.use_model.as_deref() {
1296 emit_model_json(
1297 &response,
1298 model_name,
1299 model_result.as_ref(),
1300 args.sources,
1301 &mut mem,
1302 )?;
1303 } else {
1304 emit_ask_json(
1305 &response,
1306 effective_mode.clone(),
1307 model_result.as_ref(),
1308 args.sources,
1309 &mut mem,
1310 )?;
1311 }
1312 } else {
1313 emit_ask_pretty(
1314 &response,
1315 effective_mode.clone(),
1316 model_result.as_ref(),
1317 args.sources,
1318 &mut mem,
1319 );
1320 }
1321
1322 #[cfg(feature = "replay")]
1324 let _ = mem.save_active_session();
1325
1326 Ok(())
1327}
1328
1329fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1331 use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1332 use memvid_core::types::QueryPlan;
1333
1334 let planner = QueryPlanner::new();
1335
1336 let plan = if args.graph {
1338 let plan = planner.plan(&args.query, args.top_k);
1340 match plan {
1342 QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1343 QueryPlan::graph_only(graph_filter, args.top_k)
1344 }
1345 _ => plan,
1346 }
1347 } else {
1348 planner.plan(&args.query, args.top_k)
1350 };
1351
1352 let hits = hybrid_search(mem, &plan)?;
1354
1355 if args.json {
1356 let output = serde_json::json!({
1358 "query": args.query,
1359 "mode": if args.graph { "graph" } else { "hybrid" },
1360 "plan": format!("{:?}", plan),
1361 "hits": hits.iter().map(|h| {
1362 serde_json::json!({
1363 "frame_id": h.frame_id,
1364 "score": h.score,
1365 "graph_score": h.graph_score,
1366 "vector_score": h.vector_score,
1367 "matched_entity": h.matched_entity,
1368 "preview": h.preview,
1369 })
1370 }).collect::<Vec<_>>(),
1371 });
1372 println!("{}", serde_json::to_string_pretty(&output)?);
1373 } else {
1374 let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1376 println!("{} search for: \"{}\"", mode_str, args.query);
1377 println!("Plan: {:?}", plan);
1378 println!();
1379
1380 if hits.is_empty() {
1381 println!("No results found.");
1382 } else {
1383 println!("Results ({} hits):", hits.len());
1384 for (i, hit) in hits.iter().enumerate() {
1385 println!();
1386 println!(
1387 "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1388 i + 1,
1389 hit.frame_id,
1390 hit.score,
1391 hit.graph_score,
1392 hit.vector_score
1393 );
1394 if let Some(entity) = &hit.matched_entity {
1395 println!(" Matched entity: {}", entity);
1396 }
1397 if let Some(preview) = &hit.preview {
1398 let truncated = if preview.len() > 200 {
1399 format!("{}...", &preview[..200])
1400 } else {
1401 preview.clone()
1402 };
1403 println!(" {}", truncated.replace('\n', " "));
1404 }
1405 }
1406 }
1407 }
1408
1409 Ok(())
1410}
1411
1412pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1413 crate::utils::require_active_plan(config, "find")?;
1415
1416 crate::api::track_query_usage(config, 1)?;
1418
1419 let mut mem = open_read_only_mem(&args.file)?;
1420
1421 #[cfg(feature = "replay")]
1423 let _ = mem.load_active_session();
1424
1425 if args.graph || args.hybrid {
1427 return handle_graph_find(&mut mem, &args);
1428 }
1429
1430 if args.uri.is_some() && args.scope.is_some() {
1431 warn!("--scope ignored because --uri is provided");
1432 }
1433
1434 let mv2_dimension = mem.effective_vec_index_dimension()?;
1436 let identity_summary = match args.mode {
1437 SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1438 #[cfg(feature = "clip")]
1439 SearchMode::Clip => None,
1440 SearchMode::Lex => None,
1441 };
1442
1443 let mut semantic_allowed = true;
1444 let inferred_model_override = match identity_summary.as_ref() {
1445 Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1446 identity.model.as_deref().map(|value| value.to_string())
1447 }
1448 Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1449 let models: Vec<_> = identities
1450 .iter()
1451 .filter_map(|entry| entry.identity.model.as_deref())
1452 .collect();
1453 if args.mode == SearchMode::Sem {
1454 anyhow::bail!(
1455 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1456 Detected models: {:?}\n\n\
1457 Suggested fix: split into separate memories per embedding model.",
1458 models
1459 );
1460 }
1461 warn!("semantic search disabled: mixed embedding models detected: {:?}", models);
1462 semantic_allowed = false;
1463 None
1464 }
1465 _ => None,
1466 };
1467
1468 let emb_model_override = args
1469 .query_embedding_model
1470 .as_deref()
1471 .or(inferred_model_override.as_deref());
1472
1473 let (mode_label, runtime_option) = match args.mode {
1474 SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1475 SearchMode::Sem => {
1476 let runtime =
1477 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1478 ("Semantic (vector search)".to_string(), Some(runtime))
1479 }
1480 SearchMode::Auto => {
1481 if !semantic_allowed {
1482 ("Lexical (semantic unsafe)".to_string(), None)
1483 } else if let Some(runtime) =
1484 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1485 {
1486 ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1487 } else {
1488 ("Lexical (semantic unavailable)".to_string(), None)
1489 }
1490 }
1491 #[cfg(feature = "clip")]
1492 SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1493 };
1494
1495 let mode_key = match args.mode {
1496 SearchMode::Sem => "semantic",
1497 SearchMode::Lex => "text",
1498 SearchMode::Auto => {
1499 if runtime_option.is_some() {
1500 "hybrid"
1501 } else {
1502 "text"
1503 }
1504 }
1505 #[cfg(feature = "clip")]
1506 SearchMode::Clip => "clip",
1507 };
1508
1509 #[cfg(feature = "clip")]
1511 if args.mode == SearchMode::Clip {
1512 use memvid_core::clip::{ClipConfig, ClipModel};
1513
1514 let config = ClipConfig::default();
1516 let clip = ClipModel::new(config).map_err(|e| {
1517 anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1518 })?;
1519
1520 let query_embedding = clip
1522 .encode_text(&args.query)
1523 .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1524
1525 let hits = mem.search_clip(&query_embedding, args.top_k)?;
1527
1528 for hit in &hits {
1530 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1531 tracing::debug!(
1532 frame_id = hit.frame_id,
1533 title = %frame.title.unwrap_or_default(),
1534 page = hit.page,
1535 distance = hit.distance,
1536 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1537 "CLIP raw hit"
1538 );
1539 } else {
1540 tracing::debug!(
1541 frame_id = hit.frame_id,
1542 page = hit.page,
1543 distance = hit.distance,
1544 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1545 "CLIP raw hit (missing frame)"
1546 );
1547 }
1548 }
1549
1550 const CLIP_MAX_DISTANCE: f32 = 1.26;
1563
1564 let search_hits: Vec<SearchHit> = hits
1566 .into_iter()
1567 .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1568 .enumerate()
1569 .filter_map(|(rank, hit)| {
1570 let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1573
1574 let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1576 let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1577 let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1578 let title = match (base_title, hit.page) {
1579 (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1580 (Some(t), None) => Some(t),
1581 (None, Some(p)) => Some(format!("Page {p}")),
1582 _ => None,
1583 };
1584 Some(SearchHit {
1585 rank: rank + 1,
1586 frame_id: hit.frame_id,
1587 uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1588 title,
1589 text: preview.clone(),
1590 chunk_text: Some(preview),
1591 range: (0, 0),
1592 chunk_range: None,
1593 matches: 0,
1594 score: Some(cosine_similarity),
1595 metadata: None,
1596 })
1597 })
1598 .collect();
1599
1600 let response = SearchResponse {
1601 query: args.query.clone(),
1602 hits: search_hits.clone(),
1603 total_hits: search_hits.len(),
1604 params: memvid_core::SearchParams {
1605 top_k: args.top_k,
1606 snippet_chars: args.snippet_chars,
1607 cursor: args.cursor.clone(),
1608 },
1609 elapsed_ms: 0,
1610 engine: SearchEngineKind::Hybrid, next_cursor: None,
1612 context: String::new(),
1613 };
1614
1615 if args.json_legacy {
1616 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1617 emit_legacy_search_json(&response)?;
1618 } else if args.json {
1619 emit_search_json(&response, mode_key)?;
1620 } else {
1621 println!(
1622 "mode: {} k={} time: {} ms",
1623 mode_label, response.params.top_k, response.elapsed_ms
1624 );
1625 println!("engine: clip (MobileCLIP-S2)");
1626 println!(
1627 "hits: {} (showing {})",
1628 response.total_hits,
1629 response.hits.len()
1630 );
1631 emit_search_table(&response);
1632 }
1633 return Ok(());
1634 }
1635
1636 let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1638 let runtime = runtime_option
1639 .as_ref()
1640 .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1641
1642 let query_embedding = runtime.embed_query(&args.query)?;
1644
1645 let scope = args.scope.as_deref().or(args.uri.as_deref());
1647
1648 if !args.no_adaptive {
1649 let strategy = match args.adaptive_strategy {
1651 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1652 min_ratio: args.min_relevancy,
1653 },
1654 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1655 min_score: args.min_relevancy,
1656 },
1657 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1658 max_drop_ratio: 0.35, },
1660 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1661 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1662 relative_threshold: args.min_relevancy,
1663 max_drop_ratio: 0.35,
1664 absolute_min: 0.3,
1665 },
1666 };
1667
1668 let config = AdaptiveConfig {
1669 enabled: true,
1670 max_results: args.max_k,
1671 min_results: 1,
1672 strategy,
1673 normalize_scores: true,
1674 };
1675
1676 match mem.search_adaptive(
1677 &args.query,
1678 &query_embedding,
1679 config,
1680 args.snippet_chars,
1681 scope,
1682 ) {
1683 Ok(result) => {
1684 let mut resp = SearchResponse {
1685 query: args.query.clone(),
1686 hits: result.results,
1687 total_hits: result.stats.returned,
1688 params: memvid_core::SearchParams {
1689 top_k: result.stats.returned,
1690 snippet_chars: args.snippet_chars,
1691 cursor: args.cursor.clone(),
1692 },
1693 elapsed_ms: 0,
1694 engine: SearchEngineKind::Hybrid,
1695 next_cursor: None,
1696 context: String::new(),
1697 };
1698 apply_preference_rerank(&mut resp);
1699 (
1700 resp,
1701 "semantic (adaptive vector search)".to_string(),
1702 Some(result.stats),
1703 )
1704 }
1705 Err(e) => {
1706 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1707 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1708 }
1709
1710 warn!("Adaptive search failed ({e}), falling back to fixed-k");
1711 match mem.vec_search_with_embedding(
1712 &args.query,
1713 &query_embedding,
1714 args.top_k,
1715 args.snippet_chars,
1716 scope,
1717 ) {
1718 Ok(mut resp) => {
1719 apply_preference_rerank(&mut resp);
1720 (resp, "semantic (vector search fallback)".to_string(), None)
1721 }
1722 Err(e2) => {
1723 if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1724 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1725 }
1726 return Err(anyhow!("Both adaptive and fixed-k search failed: {e}, {e2}"));
1727 }
1728 }
1729 }
1730 }
1731 } else {
1732 match mem.vec_search_with_embedding(
1734 &args.query,
1735 &query_embedding,
1736 args.top_k,
1737 args.snippet_chars,
1738 scope,
1739 ) {
1740 Ok(mut resp) => {
1741 apply_preference_rerank(&mut resp);
1743 (resp, "semantic (vector search)".to_string(), None)
1744 }
1745 Err(e) => {
1746 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1747 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1748 }
1749
1750 warn!("Vector search failed ({e}), falling back to lexical + rerank");
1752 let request = SearchRequest {
1753 query: args.query.clone(),
1754 top_k: args.top_k,
1755 snippet_chars: args.snippet_chars,
1756 uri: args.uri.clone(),
1757 scope: args.scope.clone(),
1758 cursor: args.cursor.clone(),
1759 #[cfg(feature = "temporal_track")]
1760 temporal: None,
1761 as_of_frame: args.as_of_frame,
1762 as_of_ts: args.as_of_ts,
1763 no_sketch: args.no_sketch,
1764 };
1765 let mut resp = mem.search(request)?;
1766 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1767 (resp, "semantic (fallback rerank)".to_string(), None)
1768 }
1769 }
1770 }
1771 } else {
1772 let request = SearchRequest {
1774 query: args.query.clone(),
1775 top_k: args.top_k,
1776 snippet_chars: args.snippet_chars,
1777 uri: args.uri.clone(),
1778 scope: args.scope.clone(),
1779 cursor: args.cursor.clone(),
1780 #[cfg(feature = "temporal_track")]
1781 temporal: None,
1782 as_of_frame: args.as_of_frame,
1783 as_of_ts: args.as_of_ts,
1784 no_sketch: args.no_sketch,
1785 };
1786
1787 let mut resp = mem.search(request)?;
1788
1789 if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1790 warn!("Search index unavailable; returning basic text results");
1791 }
1792
1793 let mut engine_label = match resp.engine {
1794 SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1795 SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1796 SearchEngineKind::Hybrid => "hybrid".to_string(),
1797 };
1798
1799 if runtime_option.is_some() {
1800 engine_label = format!("hybrid ({engine_label} + semantic)");
1801 }
1802
1803 if let Some(ref runtime) = runtime_option {
1804 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1805 }
1806
1807 (resp, engine_label, None)
1808 };
1809
1810 if args.json_legacy {
1811 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1812 emit_legacy_search_json(&response)?;
1813 } else if args.json {
1814 emit_search_json(&response, mode_key)?;
1815 } else {
1816 println!(
1817 "mode: {} k={} time: {} ms",
1818 mode_label, response.params.top_k, response.elapsed_ms
1819 );
1820 println!("engine: {}", engine_label);
1821
1822 if let Some(ref stats) = adaptive_stats {
1824 println!(
1825 "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1826 stats.total_considered,
1827 stats.returned,
1828 stats.triggered_by,
1829 stats.top_score.unwrap_or(0.0),
1830 stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1831 );
1832 }
1833
1834 println!(
1835 "hits: {} (showing {})",
1836 response.total_hits,
1837 response.hits.len()
1838 );
1839 emit_search_table(&response);
1840 }
1841
1842 #[cfg(feature = "replay")]
1844 let _ = mem.save_active_session();
1845
1846 Ok(())
1847}
1848
1849pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1850 crate::api::track_query_usage(config, 1)?;
1852
1853 let mut mem = open_read_only_mem(&args.file)?;
1854 let vector = if let Some(path) = args.embedding.as_deref() {
1855 read_embedding(path)?
1856 } else if let Some(vector_string) = &args.vector {
1857 parse_vector(vector_string)?
1858 } else {
1859 anyhow::bail!("provide --vector or --embedding for search input");
1860 };
1861
1862 let hits = mem.search_vec(&vector, args.limit).map_err(|err| match err {
1863 MemvidError::VecDimensionMismatch { expected, actual } => {
1864 anyhow!(vec_dimension_mismatch_help(expected, actual))
1865 }
1866 other => anyhow!(other),
1867 })?;
1868 let mut enriched = Vec::with_capacity(hits.len());
1869 for hit in hits {
1870 let preview = mem.frame_preview_by_id(hit.frame_id)?;
1871 enriched.push((hit.frame_id, hit.distance, preview));
1872 }
1873
1874 if args.json {
1875 let json_hits: Vec<_> = enriched
1876 .iter()
1877 .map(|(frame_id, distance, preview)| {
1878 json!({
1879 "frame_id": frame_id,
1880 "distance": distance,
1881 "preview": preview,
1882 })
1883 })
1884 .collect();
1885 let json_str = serde_json::to_string_pretty(&json_hits)?;
1886 println!("{}", json_str.to_colored_json_auto()?);
1887 } else if enriched.is_empty() {
1888 println!("No vector matches found");
1889 } else {
1890 for (frame_id, distance, preview) in enriched {
1891 println!("frame {frame_id} (distance {distance:.6}): {preview}");
1892 }
1893 }
1894 Ok(())
1895}
1896
1897pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1898 use memvid_core::AuditOptions;
1899 use std::fs::File;
1900 use std::io::Write;
1901
1902 let mut mem = Memvid::open(&args.file)?;
1903
1904 let start = parse_date_boundary(args.start.as_ref(), false)?;
1906 let end = parse_date_boundary(args.end.as_ref(), true)?;
1907 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1908 if end_ts < start_ts {
1909 anyhow::bail!("--end must not be earlier than --start");
1910 }
1911 }
1912
1913 let ask_mode: AskMode = args.mode.into();
1915 let runtime = match args.mode {
1916 AskModeArg::Lex => None,
1917 AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1918 AskModeArg::Hybrid => try_load_embedding_runtime(config),
1919 };
1920 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1921
1922 let options = AuditOptions {
1924 top_k: Some(args.top_k),
1925 snippet_chars: Some(args.snippet_chars),
1926 mode: Some(ask_mode),
1927 scope: args.scope,
1928 start,
1929 end,
1930 include_snippets: true,
1931 };
1932
1933 let mut report = mem.audit(&args.question, Some(options), embedder)?;
1935
1936 if let Some(model_name) = args.use_model.as_deref() {
1938 let context = report
1940 .sources
1941 .iter()
1942 .filter_map(|s| s.snippet.clone())
1943 .collect::<Vec<_>>()
1944 .join("\n\n");
1945
1946 match run_model_inference(
1947 model_name,
1948 &report.question,
1949 &context,
1950 &[], None,
1952 None,
1953 None, ) {
1955 Ok(inference) => {
1956 report.answer = Some(inference.answer.answer);
1957 report.notes.push(format!(
1958 "Answer synthesized by model: {}",
1959 inference.answer.model
1960 ));
1961 }
1962 Err(err) => {
1963 warn!(
1964 "model inference unavailable for '{}': {err}. Using default answer.",
1965 model_name
1966 );
1967 }
1968 }
1969 }
1970
1971 let output = match args.format {
1973 AuditFormat::Text => report.to_text(),
1974 AuditFormat::Markdown => report.to_markdown(),
1975 AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1976 };
1977
1978 if let Some(out_path) = args.out {
1980 let mut file = File::create(&out_path)?;
1981 file.write_all(output.as_bytes())?;
1982 println!("Audit report written to: {}", out_path.display());
1983 } else {
1984 println!("{}", output);
1985 }
1986
1987 Ok(())
1988}
1989
1990fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
1991 let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
1992
1993 let mut additional_params = serde_json::Map::new();
1994 if let Some(cursor) = &response.params.cursor {
1995 additional_params.insert("cursor".into(), json!(cursor));
1996 }
1997
1998 let mut params = serde_json::Map::new();
1999 params.insert("top_k".into(), json!(response.params.top_k));
2000 params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2001 params.insert("mode".into(), json!(mode));
2002 params.insert(
2003 "additional_params".into(),
2004 serde_json::Value::Object(additional_params),
2005 );
2006
2007 let mut metadata_json = serde_json::Map::new();
2008 metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2009 metadata_json.insert("total_hits".into(), json!(response.total_hits));
2010 metadata_json.insert(
2011 "next_cursor".into(),
2012 match &response.next_cursor {
2013 Some(cursor) => json!(cursor),
2014 None => serde_json::Value::Null,
2015 },
2016 );
2017 metadata_json.insert("engine".into(), json!(response.engine));
2018 metadata_json.insert("params".into(), serde_json::Value::Object(params));
2019
2020 let body = json!({
2021 "version": "mv2.result.v2",
2022 "query": response.query,
2023 "metadata": metadata_json,
2024 "hits": hits,
2025 "context": response.context,
2026 });
2027 let json_str = serde_json::to_string_pretty(&body)?;
2028 println!("{}", json_str.to_colored_json_auto()?);
2029 Ok(())
2030}
2031
2032fn emit_ask_json(
2033 response: &AskResponse,
2034 requested_mode: AskModeArg,
2035 inference: Option<&ModelInference>,
2036 include_sources: bool,
2037 mem: &mut Memvid,
2038) -> Result<()> {
2039 let hits: Vec<_> = response
2040 .retrieval
2041 .hits
2042 .iter()
2043 .map(search_hit_to_json)
2044 .collect();
2045
2046 let citations: Vec<_> = response
2047 .citations
2048 .iter()
2049 .map(|citation| {
2050 let mut map = serde_json::Map::new();
2051 map.insert("index".into(), json!(citation.index));
2052 map.insert("frame_id".into(), json!(citation.frame_id));
2053 map.insert("uri".into(), json!(citation.uri));
2054 if let Some(range) = citation.chunk_range {
2055 map.insert("chunk_range".into(), json!([range.0, range.1]));
2056 }
2057 if let Some(score) = citation.score {
2058 map.insert("score".into(), json!(score));
2059 }
2060 serde_json::Value::Object(map)
2061 })
2062 .collect();
2063
2064 let mut body = json!({
2065 "version": "mv2.ask.v1",
2066 "question": response.question,
2067 "answer": response.answer,
2068 "context_only": response.context_only,
2069 "mode": ask_mode_display(requested_mode),
2070 "retriever": ask_retriever_display(response.retriever),
2071 "top_k": response.retrieval.params.top_k,
2072 "results": hits,
2073 "citations": citations,
2074 "stats": {
2075 "retrieval_ms": response.stats.retrieval_ms,
2076 "synthesis_ms": response.stats.synthesis_ms,
2077 "latency_ms": response.stats.latency_ms,
2078 },
2079 "engine": search_engine_label(&response.retrieval.engine),
2080 "total_hits": response.retrieval.total_hits,
2081 "next_cursor": response.retrieval.next_cursor,
2082 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2083 });
2084
2085 if let Some(inf) = inference {
2086 let model = &inf.answer;
2087 if let serde_json::Value::Object(ref mut map) = body {
2088 map.insert("model".into(), json!(model.requested));
2089 if model.model != model.requested {
2090 map.insert("model_used".into(), json!(model.model));
2091 }
2092 map.insert("cached".into(), json!(inf.cached));
2093 if let Some(usage) = &inf.usage {
2095 map.insert("usage".into(), json!({
2096 "input_tokens": usage.input_tokens,
2097 "output_tokens": usage.output_tokens,
2098 "total_tokens": usage.total_tokens,
2099 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2100 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2101 }));
2102 }
2103 if let Some(grounding) = &inf.grounding {
2105 map.insert("grounding".into(), json!({
2106 "score": grounding.score,
2107 "label": grounding.label(),
2108 "sentence_count": grounding.sentence_count,
2109 "grounded_sentences": grounding.grounded_sentences,
2110 "has_warning": grounding.has_warning,
2111 "warning_reason": grounding.warning_reason,
2112 }));
2113 }
2114 }
2115 }
2116
2117 if include_sources {
2119 if let serde_json::Value::Object(ref mut map) = body {
2120 let sources = build_sources_json(response, mem);
2121 map.insert("sources".into(), json!(sources));
2122 }
2123 }
2124
2125 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2127 if let serde_json::Value::Object(ref mut map) = body {
2128 map.insert("follow_up".into(), follow_up);
2129 }
2130 }
2131
2132 println!("{}", serde_json::to_string_pretty(&body)?);
2133 Ok(())
2134}
2135
2136fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2137 response
2138 .citations
2139 .iter()
2140 .enumerate()
2141 .map(|(idx, citation)| {
2142 let mut source = serde_json::Map::new();
2143 source.insert("index".into(), json!(idx + 1));
2144 source.insert("frame_id".into(), json!(citation.frame_id));
2145 source.insert("uri".into(), json!(citation.uri));
2146
2147 if let Some(range) = citation.chunk_range {
2148 source.insert("chunk_range".into(), json!([range.0, range.1]));
2149 }
2150 if let Some(score) = citation.score {
2151 source.insert("score".into(), json!(score));
2152 }
2153
2154 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2156 if let Some(title) = frame.title {
2157 source.insert("title".into(), json!(title));
2158 }
2159 if !frame.tags.is_empty() {
2160 source.insert("tags".into(), json!(frame.tags));
2161 }
2162 if !frame.labels.is_empty() {
2163 source.insert("labels".into(), json!(frame.labels));
2164 }
2165 source.insert("frame_timestamp".into(), json!(frame.timestamp));
2166 if !frame.content_dates.is_empty() {
2167 source.insert("content_dates".into(), json!(frame.content_dates));
2168 }
2169 }
2170
2171 if let Some(hit) = response
2173 .retrieval
2174 .hits
2175 .iter()
2176 .find(|h| h.frame_id == citation.frame_id)
2177 {
2178 let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2179 source.insert("snippet".into(), json!(snippet));
2180 }
2181
2182 serde_json::Value::Object(source)
2183 })
2184 .collect()
2185}
2186
2187fn build_follow_up_suggestions(
2190 response: &AskResponse,
2191 inference: Option<&ModelInference>,
2192 mem: &mut Memvid,
2193) -> Option<serde_json::Value> {
2194 let needs_followup = inference
2196 .and_then(|inf| inf.grounding.as_ref())
2197 .map(|g| g.score < 0.3 || g.has_warning)
2198 .unwrap_or(false);
2199
2200 let low_retrieval = response.retrieval.hits.first()
2202 .and_then(|h| h.score)
2203 .map(|score| score < -2.0)
2204 .unwrap_or(true);
2205
2206 if !needs_followup && !low_retrieval {
2207 return None;
2208 }
2209
2210 let limit = std::num::NonZeroU64::new(20).unwrap();
2212 let timeline_query = TimelineQueryBuilder::default()
2213 .limit(limit)
2214 .build();
2215
2216 let available_topics: Vec<String> = mem
2217 .timeline(timeline_query)
2218 .ok()
2219 .map(|entries| {
2220 entries
2221 .iter()
2222 .filter_map(|e| {
2223 let preview = e.preview.trim();
2225 if preview.is_empty() || preview.len() < 5 {
2226 return None;
2227 }
2228 let first_line = preview.lines().next().unwrap_or(preview);
2230 if first_line.len() > 60 {
2231 Some(format!("{}...", &first_line[..57]))
2232 } else {
2233 Some(first_line.to_string())
2234 }
2235 })
2236 .collect::<std::collections::HashSet<_>>()
2237 .into_iter()
2238 .take(5)
2239 .collect()
2240 })
2241 .unwrap_or_default();
2242
2243 let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2245 "No relevant information found in memory"
2246 } else if inference.and_then(|i| i.grounding.as_ref()).map(|g| g.has_warning).unwrap_or(false) {
2247 "Answer may not be well-supported by the available context"
2248 } else {
2249 "Low confidence in the answer"
2250 };
2251
2252 let suggestions: Vec<String> = if available_topics.is_empty() {
2254 vec![
2255 "What information is stored in this memory?".to_string(),
2256 "Can you list the main topics covered?".to_string(),
2257 ]
2258 } else {
2259 available_topics
2260 .iter()
2261 .take(3)
2262 .map(|topic| format!("Tell me about {}", topic))
2263 .chain(std::iter::once("What topics are in this memory?".to_string()))
2264 .collect()
2265 };
2266
2267 Some(json!({
2268 "needed": true,
2269 "reason": reason,
2270 "hint": if available_topics.is_empty() {
2271 "This memory may not contain information about your query."
2272 } else {
2273 "This memory contains information about different topics. Try asking about those instead."
2274 },
2275 "available_topics": available_topics,
2276 "suggestions": suggestions
2277 }))
2278}
2279
2280fn emit_model_json(
2281 response: &AskResponse,
2282 requested_model: &str,
2283 inference: Option<&ModelInference>,
2284 include_sources: bool,
2285 mem: &mut Memvid,
2286) -> Result<()> {
2287 let answer = response.answer.clone().unwrap_or_default();
2288 let requested_label = inference
2289 .map(|m| m.answer.requested.clone())
2290 .unwrap_or_else(|| requested_model.to_string());
2291 let used_label = inference
2292 .map(|m| m.answer.model.clone())
2293 .unwrap_or_else(|| requested_model.to_string());
2294
2295 let mut body = json!({
2296 "question": response.question,
2297 "model": requested_label,
2298 "model_used": used_label,
2299 "answer": answer,
2300 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2301 });
2302
2303 if let Some(inf) = inference {
2305 if let serde_json::Value::Object(ref mut map) = body {
2306 map.insert("cached".into(), json!(inf.cached));
2307 if let Some(usage) = &inf.usage {
2308 map.insert("usage".into(), json!({
2309 "input_tokens": usage.input_tokens,
2310 "output_tokens": usage.output_tokens,
2311 "total_tokens": usage.total_tokens,
2312 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2313 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2314 }));
2315 }
2316 if let Some(grounding) = &inf.grounding {
2317 map.insert("grounding".into(), json!({
2318 "score": grounding.score,
2319 "label": grounding.label(),
2320 "sentence_count": grounding.sentence_count,
2321 "grounded_sentences": grounding.grounded_sentences,
2322 "has_warning": grounding.has_warning,
2323 "warning_reason": grounding.warning_reason,
2324 }));
2325 }
2326 }
2327 }
2328
2329 if include_sources {
2331 if let serde_json::Value::Object(ref mut map) = body {
2332 let sources = build_sources_json(response, mem);
2333 map.insert("sources".into(), json!(sources));
2334 }
2335 }
2336
2337 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2339 if let serde_json::Value::Object(ref mut map) = body {
2340 map.insert("follow_up".into(), follow_up);
2341 }
2342 }
2343
2344 let json_str = serde_json::to_string_pretty(&body)?;
2346 println!("{}", json_str.to_colored_json_auto()?);
2347 Ok(())
2348}
2349
2350fn emit_ask_pretty(
2351 response: &AskResponse,
2352 requested_mode: AskModeArg,
2353 inference: Option<&ModelInference>,
2354 include_sources: bool,
2355 mem: &mut Memvid,
2356) {
2357 println!(
2358 "mode: {} retriever: {} k={} latency: {} ms (retrieval {} ms)",
2359 ask_mode_pretty(requested_mode),
2360 ask_retriever_pretty(response.retriever),
2361 response.retrieval.params.top_k,
2362 response.stats.latency_ms,
2363 response.stats.retrieval_ms
2364 );
2365 if let Some(inference) = inference {
2366 let model = &inference.answer;
2367 let cached_label = if inference.cached { " [CACHED]" } else { "" };
2368 if model.requested.trim() == model.model {
2369 println!("model: {}{}", model.model, cached_label);
2370 } else {
2371 println!(
2372 "model requested: {} model used: {}{}",
2373 model.requested, model.model, cached_label
2374 );
2375 }
2376 if let Some(usage) = &inference.usage {
2378 let cost_label = if inference.cached {
2379 format!("$0.00 (saved ${:.6})", usage.cost_usd)
2380 } else {
2381 format!("${:.6}", usage.cost_usd)
2382 };
2383 println!(
2384 "tokens: {} input + {} output = {} cost: {}",
2385 usage.input_tokens,
2386 usage.output_tokens,
2387 usage.total_tokens,
2388 cost_label
2389 );
2390 }
2391 if let Some(grounding) = &inference.grounding {
2393 let warning = if grounding.has_warning {
2394 format!(" [WARNING: {}]", grounding.warning_reason.as_deref().unwrap_or("potential hallucination"))
2395 } else {
2396 String::new()
2397 };
2398 println!(
2399 "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2400 grounding.score * 100.0,
2401 grounding.label(),
2402 grounding.grounded_sentences,
2403 grounding.sentence_count,
2404 warning
2405 );
2406 }
2407 }
2408 println!(
2409 "engine: {}",
2410 search_engine_label(&response.retrieval.engine)
2411 );
2412 println!(
2413 "hits: {} (showing {})",
2414 response.retrieval.total_hits,
2415 response.retrieval.hits.len()
2416 );
2417
2418 if response.context_only {
2419 println!();
2420 println!("Context-only mode: synthesis disabled.");
2421 println!();
2422 } else if let Some(answer) = &response.answer {
2423 println!();
2424 println!("Answer:\n{answer}");
2425 println!();
2426 }
2427
2428 if !response.citations.is_empty() {
2429 println!("Citations:");
2430 for citation in &response.citations {
2431 match citation.score {
2432 Some(score) => println!(
2433 "[{}] {} (frame {}, score {:.3})",
2434 citation.index, citation.uri, citation.frame_id, score
2435 ),
2436 None => println!(
2437 "[{}] {} (frame {})",
2438 citation.index, citation.uri, citation.frame_id
2439 ),
2440 }
2441 }
2442 println!();
2443 }
2444
2445 if include_sources && !response.citations.is_empty() {
2447 println!("=== SOURCES ===");
2448 println!();
2449 for citation in &response.citations {
2450 println!("[{}] {}", citation.index, citation.uri);
2451
2452 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2454 if let Some(title) = &frame.title {
2455 println!(" Title: {}", title);
2456 }
2457 println!(" Frame ID: {}", citation.frame_id);
2458 if let Some(score) = citation.score {
2459 println!(" Score: {:.4}", score);
2460 }
2461 if let Some((start, end)) = citation.chunk_range {
2462 println!(" Range: [{}..{})", start, end);
2463 }
2464 if !frame.tags.is_empty() {
2465 println!(" Tags: {}", frame.tags.join(", "));
2466 }
2467 if !frame.labels.is_empty() {
2468 println!(" Labels: {}", frame.labels.join(", "));
2469 }
2470 println!(" Timestamp: {}", frame.timestamp);
2471 if !frame.content_dates.is_empty() {
2472 println!(" Content Dates: {}", frame.content_dates.join(", "));
2473 }
2474 }
2475
2476 if let Some(hit) = response
2478 .retrieval
2479 .hits
2480 .iter()
2481 .find(|h| h.frame_id == citation.frame_id)
2482 {
2483 let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2484 let truncated = if snippet.len() > 200 {
2485 format!("{}...", &snippet[..200])
2486 } else {
2487 snippet.clone()
2488 };
2489 println!(" Snippet: {}", truncated.replace('\n', " "));
2490 }
2491 println!();
2492 }
2493 }
2494
2495 if !include_sources {
2496 println!();
2497 emit_search_table(&response.retrieval);
2498 }
2499
2500 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2502 if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2503 if needed {
2504 println!();
2505 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2506 println!("💡 FOLLOW-UP SUGGESTIONS");
2507 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2508
2509 if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2510 println!("Reason: {}", reason);
2511 }
2512
2513 if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2514 println!("Hint: {}", hint);
2515 }
2516
2517 if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2518 if !topics.is_empty() {
2519 println!();
2520 println!("Available topics in this memory:");
2521 for topic in topics.iter().filter_map(|t| t.as_str()) {
2522 println!(" • {}", topic);
2523 }
2524 }
2525 }
2526
2527 if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2528 if !suggestions.is_empty() {
2529 println!();
2530 println!("Try asking:");
2531 for (i, suggestion) in suggestions.iter().filter_map(|s| s.as_str()).enumerate() {
2532 println!(" {}. \"{}\"", i + 1, suggestion);
2533 }
2534 }
2535 }
2536 println!();
2537 }
2538 }
2539 }
2540}
2541
2542fn emit_verbatim_evidence_json(
2545 response: &AskResponse,
2546 include_sources: bool,
2547 mem: &mut Memvid,
2548) -> Result<()> {
2549 let evidence: Vec<_> = response
2551 .retrieval
2552 .hits
2553 .iter()
2554 .enumerate()
2555 .map(|(idx, hit)| {
2556 let mut entry = serde_json::Map::new();
2557 entry.insert("index".into(), json!(idx + 1));
2558 entry.insert("frame_id".into(), json!(hit.frame_id));
2559 entry.insert("uri".into(), json!(&hit.uri));
2560 if let Some(title) = &hit.title {
2561 entry.insert("title".into(), json!(title));
2562 }
2563 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2565 entry.insert("text".into(), json!(verbatim));
2566 if let Some(score) = hit.score {
2567 entry.insert("score".into(), json!(score));
2568 }
2569 serde_json::Value::Object(entry)
2570 })
2571 .collect();
2572
2573 let sources: Option<Vec<_>> = if include_sources {
2575 Some(
2576 response
2577 .retrieval
2578 .hits
2579 .iter()
2580 .filter_map(|hit| {
2581 mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2582 let mut source = serde_json::Map::new();
2583 source.insert("frame_id".into(), json!(frame.id));
2584 source.insert("uri".into(), json!(frame.uri.as_deref().unwrap_or("(unknown)")));
2585 if let Some(title) = &frame.title {
2586 source.insert("title".into(), json!(title));
2587 }
2588 source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2589 if !frame.tags.is_empty() {
2590 source.insert("tags".into(), json!(frame.tags));
2591 }
2592 if !frame.labels.is_empty() {
2593 source.insert("labels".into(), json!(frame.labels));
2594 }
2595 serde_json::Value::Object(source)
2596 })
2597 })
2598 .collect(),
2599 )
2600 } else {
2601 None
2602 };
2603
2604 let mut body = json!({
2605 "version": "mv2.evidence.v1",
2606 "mode": "verbatim",
2607 "question": response.question,
2608 "evidence": evidence,
2609 "evidence_count": evidence.len(),
2610 "total_hits": response.retrieval.total_hits,
2611 "stats": {
2612 "retrieval_ms": response.stats.retrieval_ms,
2613 "latency_ms": response.stats.latency_ms,
2614 },
2615 "engine": search_engine_label(&response.retrieval.engine),
2616 });
2617
2618 if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2619 map.insert("sources".into(), json!(sources));
2620 }
2621
2622 let json_str = serde_json::to_string_pretty(&body)?;
2623 println!("{}", json_str.to_colored_json_auto()?);
2624 Ok(())
2625}
2626
2627fn emit_verbatim_evidence_pretty(
2629 response: &AskResponse,
2630 include_sources: bool,
2631 mem: &mut Memvid,
2632) {
2633 println!(
2634 "mode: {} latency: {} ms (retrieval {} ms)",
2635 "verbatim evidence".cyan(),
2636 response.stats.latency_ms,
2637 response.stats.retrieval_ms
2638 );
2639 println!(
2640 "engine: {}",
2641 search_engine_label(&response.retrieval.engine)
2642 );
2643 println!(
2644 "hits: {} (showing {})",
2645 response.retrieval.total_hits,
2646 response.retrieval.hits.len()
2647 );
2648 println!();
2649
2650 println!("{}", "━".repeat(60));
2652 println!(
2653 "{}",
2654 format!(
2655 "VERBATIM EVIDENCE for: \"{}\"",
2656 truncate_with_ellipsis(&response.question, 40)
2657 )
2658 .bold()
2659 );
2660 println!("{}", "━".repeat(60));
2661 println!();
2662
2663 if response.retrieval.hits.is_empty() {
2664 println!("No evidence found.");
2665 return;
2666 }
2667
2668 let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2670 let (min_score, max_score) = score_range(&scores);
2671
2672 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2674 let uri = &hit.uri;
2675 let title = hit.title.as_deref().unwrap_or("Untitled");
2676 let score_str = hit
2677 .score
2678 .map(|s| {
2679 let normalized = normalize_bm25_for_display(s, min_score, max_score);
2680 format!(" (relevance: {:.0}%)", normalized)
2681 })
2682 .unwrap_or_default();
2683
2684 println!(
2685 "{}",
2686 format!("[{}] {}{}", idx + 1, title, score_str).green().bold()
2687 );
2688 println!(" Source: {} (frame {})", uri, hit.frame_id);
2689 println!();
2690
2691 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2693 for line in verbatim.lines() {
2695 if !line.trim().is_empty() {
2696 println!(" │ {}", line);
2697 }
2698 }
2699 println!();
2700 }
2701
2702 if include_sources {
2704 println!("{}", "━".repeat(60));
2705 println!("{}", "SOURCE DETAILS".bold());
2706 println!("{}", "━".repeat(60));
2707 println!();
2708
2709 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2710 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2711 println!("{}", format!("[{}] {}", idx + 1, frame.uri.as_deref().unwrap_or("(unknown)")).cyan());
2712 if let Some(title) = &frame.title {
2713 println!(" Title: {}", title);
2714 }
2715 println!(" Frame ID: {}", frame.id);
2716 println!(" Timestamp: {}", frame.timestamp);
2717 if !frame.tags.is_empty() {
2718 println!(" Tags: {}", frame.tags.join(", "));
2719 }
2720 if !frame.labels.is_empty() {
2721 println!(" Labels: {}", frame.labels.join(", "));
2722 }
2723 if !frame.content_dates.is_empty() {
2724 println!(" Content Dates: {}", frame.content_dates.join(", "));
2725 }
2726 println!();
2727 }
2728 }
2729 }
2730
2731 println!("{}", "─".repeat(60));
2733 println!(
2734 "{}",
2735 "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2736 );
2737 println!(
2738 "{}",
2739 "Use --use-model to get an AI-synthesized answer.".dimmed()
2740 );
2741}
2742
2743fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2744 let hits: Vec<_> = response
2745 .hits
2746 .iter()
2747 .map(|hit| {
2748 json!({
2749 "frame_id": hit.frame_id,
2750 "matches": hit.matches,
2751 "snippets": [hit.text.clone()],
2752 })
2753 })
2754 .collect();
2755 println!("{}", serde_json::to_string_pretty(&hits)?);
2756 Ok(())
2757}
2758
2759fn emit_search_table(response: &SearchResponse) {
2760 if response.hits.is_empty() {
2761 println!("No results for '{}'.", response.query);
2762 return;
2763 }
2764
2765 let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2767 let (min_score, max_score) = score_range(&scores);
2768
2769 for hit in &response.hits {
2770 println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2771 if let Some(title) = &hit.title {
2772 println!(" Title: {title}");
2773 }
2774 if let Some(score) = hit.score {
2775 let normalized = normalize_bm25_for_display(score, min_score, max_score);
2776 println!(" Relevance: {:.0}%", normalized);
2777 }
2778 println!(" Range: [{}..{})", hit.range.0, hit.range.1);
2779 if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2780 println!(" Chunk: [{}..{})", chunk_start, chunk_end);
2781 }
2782 if let Some(chunk_text) = &hit.chunk_text {
2783 println!(" Chunk Text: {}", chunk_text.trim());
2784 }
2785 if let Some(metadata) = &hit.metadata {
2786 if let Some(track) = &metadata.track {
2787 println!(" Track: {track}");
2788 }
2789 if !metadata.tags.is_empty() {
2790 println!(" Tags: {}", metadata.tags.join(", "));
2791 }
2792 if !metadata.labels.is_empty() {
2793 println!(" Labels: {}", metadata.labels.join(", "));
2794 }
2795 if let Some(created_at) = &metadata.created_at {
2796 println!(" Created: {created_at}");
2797 }
2798 if !metadata.content_dates.is_empty() {
2799 println!(" Content Dates: {}", metadata.content_dates.join(", "));
2800 }
2801 if !metadata.entities.is_empty() {
2802 let entity_strs: Vec<String> = metadata
2803 .entities
2804 .iter()
2805 .map(|e| format!("{} ({})", e.name, e.kind))
2806 .collect();
2807 println!(" Entities: {}", entity_strs.join(", "));
2808 }
2809 }
2810 println!(" Snippet: {}", hit.text.trim());
2811 println!();
2812 }
2813 if let Some(cursor) = &response.next_cursor {
2814 println!("Next cursor: {cursor}");
2815 }
2816}
2817
2818fn ask_mode_display(mode: AskModeArg) -> &'static str {
2819 match mode {
2820 AskModeArg::Lex => "lex",
2821 AskModeArg::Sem => "sem",
2822 AskModeArg::Hybrid => "hybrid",
2823 }
2824}
2825
2826fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2827 match mode {
2828 AskModeArg::Lex => "Lexical",
2829 AskModeArg::Sem => "Semantic",
2830 AskModeArg::Hybrid => "Hybrid",
2831 }
2832}
2833
2834fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2835 match retriever {
2836 AskRetriever::Lex => "lex",
2837 AskRetriever::Semantic => "semantic",
2838 AskRetriever::Hybrid => "hybrid",
2839 AskRetriever::LexFallback => "lex_fallback",
2840 AskRetriever::TimelineFallback => "timeline_fallback",
2841 }
2842}
2843
2844fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2845 match retriever {
2846 AskRetriever::Lex => "Lexical",
2847 AskRetriever::Semantic => "Semantic",
2848 AskRetriever::Hybrid => "Hybrid",
2849 AskRetriever::LexFallback => "Lexical (fallback)",
2850 AskRetriever::TimelineFallback => "Timeline (fallback)",
2851 }
2852}
2853
2854fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2855 match engine {
2856 SearchEngineKind::Tantivy => "text (tantivy)",
2857 SearchEngineKind::LexFallback => "text (fallback)",
2858 SearchEngineKind::Hybrid => "hybrid",
2859 }
2860}
2861
2862fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2863 let digest = hash(uri.as_bytes()).to_hex().to_string();
2864 let prefix_len = digest.len().min(12);
2865 let prefix = &digest[..prefix_len];
2866 format!("mv2-hit-{prefix}-{frame_id}-{start}")
2867}
2868
2869fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2870 if text.chars().count() <= limit {
2871 return text.to_string();
2872 }
2873
2874 let truncated: String = text.chars().take(limit).collect();
2875 format!("{truncated}...")
2876}
2877
2878fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2887 if (max_score - min_score).abs() < f32::EPSILON {
2888 return 100.0;
2890 }
2891 ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2893}
2894
2895fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2897 let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2898 if valid_scores.is_empty() {
2899 return (0.0, 0.0);
2900 }
2901 let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2902 let max = valid_scores.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
2903 (min, max)
2904}
2905
2906fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2907 let mut hit_json = serde_json::Map::new();
2908 hit_json.insert("rank".into(), json!(hit.rank));
2909 if let Some(score) = hit.score {
2910 hit_json.insert("score".into(), json!(score));
2911 }
2912 hit_json.insert(
2913 "id".into(),
2914 json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2915 );
2916 hit_json.insert("frame_id".into(), json!(hit.frame_id));
2917 hit_json.insert("uri".into(), json!(hit.uri));
2918 if let Some(title) = &hit.title {
2919 hit_json.insert("title".into(), json!(title));
2920 }
2921 let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2922 hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2923 hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2924 hit_json.insert("text".into(), json!(hit.text));
2925
2926 let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2927 matches: hit.matches,
2928 ..SearchHitMetadata::default()
2929 });
2930 let mut meta_json = serde_json::Map::new();
2931 meta_json.insert("matches".into(), json!(metadata.matches));
2932 if !metadata.tags.is_empty() {
2933 meta_json.insert("tags".into(), json!(metadata.tags));
2934 }
2935 if !metadata.labels.is_empty() {
2936 meta_json.insert("labels".into(), json!(metadata.labels));
2937 }
2938 if let Some(track) = metadata.track {
2939 meta_json.insert("track".into(), json!(track));
2940 }
2941 if let Some(created_at) = metadata.created_at {
2942 meta_json.insert("created_at".into(), json!(created_at));
2943 }
2944 if !metadata.content_dates.is_empty() {
2945 meta_json.insert("content_dates".into(), json!(metadata.content_dates));
2946 }
2947 if !metadata.entities.is_empty() {
2948 let entities_json: Vec<serde_json::Value> = metadata
2949 .entities
2950 .iter()
2951 .map(|e| {
2952 let mut ent = serde_json::Map::new();
2953 ent.insert("name".into(), json!(e.name));
2954 ent.insert("kind".into(), json!(e.kind));
2955 if let Some(conf) = e.confidence {
2956 ent.insert("confidence".into(), json!(conf));
2957 }
2958 serde_json::Value::Object(ent)
2959 })
2960 .collect();
2961 meta_json.insert("entities".into(), json!(entities_json));
2962 }
2963 hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
2964 serde_json::Value::Object(hit_json)
2965}
2966fn apply_semantic_rerank(
2975 runtime: &EmbeddingRuntime,
2976 mem: &mut Memvid,
2977 response: &mut SearchResponse,
2978) -> Result<()> {
2979 if response.hits.is_empty() {
2980 return Ok(());
2981 }
2982
2983 let query_embedding = runtime.embed_query(&response.query)?;
2984 let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
2985 for hit in &response.hits {
2986 if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
2987 if embedding.len() == runtime.dimension() {
2988 let score = cosine_similarity(&query_embedding, &embedding);
2989 semantic_scores.insert(hit.frame_id, score);
2990 }
2991 }
2992 }
2993
2994 if semantic_scores.is_empty() {
2995 return Ok(());
2996 }
2997
2998 let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3000 .iter()
3001 .map(|(frame_id, score)| (*frame_id, *score))
3002 .collect();
3003 sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3004
3005 let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3006 for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3007 semantic_rank.insert(*frame_id, idx + 1);
3008 }
3009
3010 let query_lower = response.query.to_lowercase();
3012 let is_preference_query = query_lower.contains("suggest")
3013 || query_lower.contains("recommend")
3014 || query_lower.contains("should i")
3015 || query_lower.contains("what should")
3016 || query_lower.contains("prefer")
3017 || query_lower.contains("favorite")
3018 || query_lower.contains("best for me");
3019
3020 const RRF_K: f32 = 60.0;
3024
3025 let mut ordering: Vec<(usize, f32, usize)> = response
3026 .hits
3027 .iter()
3028 .enumerate()
3029 .map(|(idx, hit)| {
3030 let lexical_rank = hit.rank;
3031
3032 let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3034
3035 let semantic_rrf = semantic_rank
3037 .get(&hit.frame_id)
3038 .map(|rank| 1.0 / (RRF_K + *rank as f32))
3039 .unwrap_or(0.0);
3040
3041 let preference_boost = if is_preference_query {
3044 compute_preference_boost(&hit.text) * 0.01 } else {
3046 0.0
3047 };
3048
3049 let combined = lexical_rrf + semantic_rrf + preference_boost;
3051 (idx, combined, lexical_rank)
3052 })
3053 .collect();
3054
3055 ordering.sort_by(|a, b| {
3056 b.1.partial_cmp(&a.1)
3057 .unwrap_or(Ordering::Equal)
3058 .then(a.2.cmp(&b.2))
3059 });
3060
3061 let mut reordered = Vec::with_capacity(response.hits.len());
3062 for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3063 let mut hit = response.hits[idx].clone();
3064 hit.rank = rank_idx + 1;
3065 reordered.push(hit);
3066 }
3067
3068 response.hits = reordered;
3069 Ok(())
3070}
3071
3072fn apply_preference_rerank(response: &mut SearchResponse) {
3075 if response.hits.is_empty() {
3076 return;
3077 }
3078
3079 let query_lower = response.query.to_lowercase();
3081 let is_preference_query = query_lower.contains("suggest")
3082 || query_lower.contains("recommend")
3083 || query_lower.contains("should i")
3084 || query_lower.contains("what should")
3085 || query_lower.contains("prefer")
3086 || query_lower.contains("favorite")
3087 || query_lower.contains("best for me");
3088
3089 if !is_preference_query {
3090 return;
3091 }
3092
3093 let mut scored: Vec<(usize, f32, f32)> = response
3095 .hits
3096 .iter()
3097 .enumerate()
3098 .map(|(idx, hit)| {
3099 let original_score = hit.score.unwrap_or(0.0);
3100 let preference_boost = compute_preference_boost(&hit.text);
3101 let boosted_score = original_score + preference_boost;
3102 (idx, boosted_score, original_score)
3103 })
3104 .collect();
3105
3106 scored.sort_by(|a, b| {
3108 b.1.partial_cmp(&a.1)
3109 .unwrap_or(Ordering::Equal)
3110 .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3111 });
3112
3113 let mut reordered = Vec::with_capacity(response.hits.len());
3115 for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3116 let mut hit = response.hits[idx].clone();
3117 hit.rank = rank_idx + 1;
3118 reordered.push(hit);
3119 }
3120
3121 response.hits = reordered;
3122}
3123
3124fn compute_preference_boost(text: &str) -> f32 {
3133 let text_lower = text.to_lowercase();
3134 let mut boost = 0.0f32;
3135
3136 let established_context = [
3139 "i've been",
3141 "i've had",
3142 "i've used",
3143 "i've tried",
3144 "i recently",
3145 "i just",
3146 "lately",
3147 "i started",
3148 "i bought",
3149 "i harvested",
3150 "i grew",
3151 "my garden",
3153 "my home",
3154 "my house",
3155 "my setup",
3156 "my equipment",
3157 "my camera",
3158 "my car",
3159 "my phone",
3160 "i have a",
3161 "i own",
3162 "i got a",
3163 "i prefer",
3165 "i like to",
3166 "i love to",
3167 "i enjoy",
3168 "i usually",
3169 "i always",
3170 "i typically",
3171 "my favorite",
3172 "i tend to",
3173 "i often",
3174 "i use",
3176 "i grow",
3177 "i cook",
3178 "i make",
3179 "i work on",
3180 "i'm into",
3181 "i collect",
3182 ];
3183 for pattern in established_context {
3184 if text_lower.contains(pattern) {
3185 boost += 0.15;
3186 }
3187 }
3188
3189 let first_person = [" i ", " my ", " me "];
3191 for pattern in first_person {
3192 if text_lower.contains(pattern) {
3193 boost += 0.02;
3194 }
3195 }
3196
3197 let request_patterns = [
3200 "i'm trying to",
3201 "i want to",
3202 "i need to",
3203 "looking for",
3204 "can you suggest",
3205 "can you help",
3206 ];
3207 for pattern in request_patterns {
3208 if text_lower.contains(pattern) {
3209 boost += 0.02;
3210 }
3211 }
3212
3213 boost.min(0.5)
3215}
3216
3217fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3218 let mut dot = 0.0f32;
3219 let mut sum_a = 0.0f32;
3220 let mut sum_b = 0.0f32;
3221 for (x, y) in a.iter().zip(b.iter()) {
3222 dot += x * y;
3223 sum_a += x * x;
3224 sum_b += y * y;
3225 }
3226
3227 if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3228 0.0
3229 } else {
3230 dot / (sum_a.sqrt() * sum_b.sqrt())
3231 }
3232}
3233
3234fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3242 if response.hits.is_empty() || response.hits.len() < 2 {
3243 return Ok(());
3244 }
3245
3246 let candidates_to_rerank = response.hits.len().min(50);
3248
3249 let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3252 .with_show_download_progress(true);
3253
3254 let mut reranker = match TextRerank::try_new(options) {
3255 Ok(r) => r,
3256 Err(e) => {
3257 warn!("Failed to initialize cross-encoder reranker: {e}");
3258 return Ok(());
3259 }
3260 };
3261
3262 let documents: Vec<String> = response.hits[..candidates_to_rerank]
3264 .iter()
3265 .map(|hit| hit.text.clone())
3266 .collect();
3267
3268 info!("Cross-encoder reranking {} candidates", documents.len());
3270 let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3271 Ok(results) => results,
3272 Err(e) => {
3273 warn!("Cross-encoder reranking failed: {e}");
3274 return Ok(());
3275 }
3276 };
3277
3278 let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3282
3283 let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3285 .iter()
3286 .filter_map(|h| h.score)
3287 .collect();
3288 let orig_min = original_scores.iter().cloned().fold(f32::INFINITY, f32::min);
3289 let orig_max = original_scores.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
3290 let orig_range = (orig_max - orig_min).max(0.001); for result in rerank_results.iter() {
3293 let original_idx = result.index;
3294 let cross_encoder_score = result.score; let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3298 let normalized_original = (original_score - orig_min) / orig_range;
3299
3300 let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3304
3305 scored_hits.push((blended, original_idx));
3306 }
3307
3308 scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3310
3311 let mut reordered = Vec::with_capacity(response.hits.len());
3313 for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3314 let mut hit = response.hits[original_idx].clone();
3315 hit.rank = new_rank + 1;
3316 hit.score = Some(blended_score);
3318 reordered.push(hit);
3319 }
3320
3321 for hit in response.hits.iter().skip(candidates_to_rerank) {
3323 let mut h = hit.clone();
3324 h.rank = reordered.len() + 1;
3325 reordered.push(h);
3326 }
3327
3328 response.hits = reordered;
3329 info!("Cross-encoder reranking complete");
3330 Ok(())
3331}
3332
3333fn build_memory_context(mem: &Memvid) -> String {
3336 let entities = mem.memory_entities();
3337 if entities.is_empty() {
3338 return String::new();
3339 }
3340
3341 let mut sections = Vec::new();
3342 for entity in entities {
3343 let cards = mem.get_entity_memories(&entity);
3344 if cards.is_empty() {
3345 continue;
3346 }
3347
3348 let mut entity_lines = Vec::new();
3349 for card in cards {
3350 let polarity_marker = card
3352 .polarity
3353 .as_ref()
3354 .map(|p| match p.to_string().as_str() {
3355 "Positive" => " (+)",
3356 "Negative" => " (-)",
3357 _ => "",
3358 })
3359 .unwrap_or("");
3360 entity_lines.push(format!(
3361 " - {}: {}{}",
3362 card.slot, card.value, polarity_marker
3363 ));
3364 }
3365
3366 sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3367 }
3368
3369 sections.join("\n\n")
3370}
3371
3372fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3375 use std::collections::HashMap;
3376
3377 let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3379
3380 for hit in hits {
3381 if let Some(metadata) = &hit.metadata {
3382 for entity in &metadata.entities {
3383 entities_by_kind
3384 .entry(entity.kind.clone())
3385 .or_default()
3386 .push(entity.name.clone());
3387 }
3388 }
3389 }
3390
3391 if entities_by_kind.is_empty() {
3392 return String::new();
3393 }
3394
3395 let mut sections = Vec::new();
3397 let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3398 sorted_kinds.sort();
3399
3400 for kind in sorted_kinds {
3401 let names = entities_by_kind.get(kind).unwrap();
3402 let mut unique_names: Vec<_> = names.iter().collect();
3403 unique_names.sort();
3404 unique_names.dedup();
3405
3406 let names_str = unique_names
3407 .iter()
3408 .take(10) .map(|s| s.as_str())
3410 .collect::<Vec<_>>()
3411 .join(", ");
3412
3413 sections.push(format!("{}: {}", kind, names_str));
3414 }
3415
3416 sections.join("\n")
3417}