1use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use blake3::hash;
15use clap::{ArgAction, Args, ValueEnum};
16use colored::Colorize;
17use colored_json::ToColoredJson;
18#[cfg(feature = "temporal_track")]
19use memvid_core::{
20 types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
21 TemporalResolution, TemporalResolutionValue,
22};
23use memvid_core::{
24 types::{
25 AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy,
26 SearchHitMetadata,
27 },
28 AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind,
29 SearchHit, SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
30};
31#[cfg(feature = "temporal_track")]
32use serde::Serialize;
33use serde_json::json;
34#[cfg(feature = "temporal_track")]
35use time::format_description::well_known::Rfc3339;
36use time::{Date, PrimitiveDateTime, Time};
37#[cfg(feature = "temporal_track")]
38use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
39use tracing::{info, warn};
40
41#[cfg(feature = "local-embeddings")]
42use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
43
44use memvid_ask_model::{
45 run_model_inference, ModelContextFragment, ModelContextFragmentKind, ModelInference,
46};
47
48use crate::config::{
50 load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
51 try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig,
52 EmbeddingModelChoice, EmbeddingRuntime,
53};
54use crate::utils::{
55 autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
56 parse_date_boundary, parse_vector, read_embedding,
57};
58
59const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
60#[cfg(feature = "temporal_track")]
61const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
62
63fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
64 let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
65 message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
66 if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
67 message.push_str(&format!(
68 "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
69 model.name(),
70 model.name()
71 ));
72 if model.is_openai() {
73 message.push_str(" (and set `OPENAI_API_KEY`).");
74 } else {
75 message.push('.');
76 }
77 message.push_str(&format!(
78 "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
79 model.name()
80 ));
81 message.push_str(&format!(
82 "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
83 ));
84 message.push_str("\nOr use `--mode lex` to disable semantic search.");
85 }
86 message
87}
88
89#[derive(Args)]
91pub struct TimelineArgs {
92 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
93 pub file: PathBuf,
94 #[arg(long)]
95 pub json: bool,
96 #[arg(long)]
97 pub reverse: bool,
98 #[arg(long, value_name = "LIMIT")]
99 pub limit: Option<NonZeroU64>,
100 #[arg(long, value_name = "TIMESTAMP")]
101 pub since: Option<i64>,
102 #[arg(long, value_name = "TIMESTAMP")]
103 pub until: Option<i64>,
104 #[cfg(feature = "temporal_track")]
105 #[arg(long = "on", value_name = "PHRASE")]
106 pub phrase: Option<String>,
107 #[cfg(feature = "temporal_track")]
108 #[arg(long = "tz", value_name = "IANA_ZONE")]
109 pub tz: Option<String>,
110 #[cfg(feature = "temporal_track")]
111 #[arg(long = "anchor", value_name = "RFC3339")]
112 pub anchor: Option<String>,
113 #[cfg(feature = "temporal_track")]
114 #[arg(long = "window", value_name = "MINUTES")]
115 pub window: Option<u64>,
116 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
118 pub as_of_frame: Option<u64>,
119 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
121 pub as_of_ts: Option<i64>,
122}
123
124#[cfg(feature = "temporal_track")]
126#[derive(Args)]
127pub struct WhenArgs {
128 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
129 pub file: PathBuf,
130 #[arg(long = "on", value_name = "PHRASE")]
131 pub phrase: String,
132 #[arg(long = "tz", value_name = "IANA_ZONE")]
133 pub tz: Option<String>,
134 #[arg(long = "anchor", value_name = "RFC3339")]
135 pub anchor: Option<String>,
136 #[arg(long = "window", value_name = "MINUTES")]
137 pub window: Option<u64>,
138 #[arg(long, value_name = "LIMIT")]
139 pub limit: Option<NonZeroU64>,
140 #[arg(long, value_name = "TIMESTAMP")]
141 pub since: Option<i64>,
142 #[arg(long, value_name = "TIMESTAMP")]
143 pub until: Option<i64>,
144 #[arg(long)]
145 pub reverse: bool,
146 #[arg(long)]
147 pub json: bool,
148}
149
150#[derive(Args)]
152pub struct AskArgs {
153 #[arg(value_name = "TARGET", num_args = 0..)]
154 pub targets: Vec<String>,
155 #[arg(long = "question", value_name = "TEXT")]
156 pub question: Option<String>,
157 #[arg(long = "uri", value_name = "URI")]
158 pub uri: Option<String>,
159 #[arg(long = "scope", value_name = "URI_PREFIX")]
160 pub scope: Option<String>,
161 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
162 pub top_k: usize,
163 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
164 pub snippet_chars: usize,
165 #[arg(long = "cursor", value_name = "TOKEN")]
166 pub cursor: Option<String>,
167 #[arg(long = "mode", value_enum, default_value = "hybrid")]
168 pub mode: AskModeArg,
169 #[arg(long)]
170 pub json: bool,
171 #[arg(long = "context-only", action = ArgAction::SetTrue)]
172 pub context_only: bool,
173 #[arg(long = "sources", action = ArgAction::SetTrue)]
175 pub sources: bool,
176 #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
178 pub mask_pii: bool,
179 #[arg(long = "memories", action = ArgAction::SetTrue)]
181 pub memories: bool,
182 #[arg(long = "llm-context-depth", value_name = "CHARS")]
184 pub llm_context_depth: Option<usize>,
185 #[arg(long = "start", value_name = "DATE")]
186 pub start: Option<String>,
187 #[arg(long = "end", value_name = "DATE")]
188 pub end: Option<String>,
189 #[arg(
197 long = "use-model",
198 value_name = "MODEL",
199 num_args = 0..=1,
200 default_missing_value = "tinyllama"
201 )]
202 pub use_model: Option<String>,
203 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
206 pub query_embedding_model: Option<String>,
207 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
209 pub as_of_frame: Option<u64>,
210 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
212 pub as_of_ts: Option<i64>,
213 #[arg(long = "system-prompt", value_name = "TEXT")]
215 pub system_prompt: Option<String>,
216 #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
218 pub no_rerank: bool,
219
220 #[arg(long = "no-llm", action = ArgAction::SetTrue)]
223 pub no_llm: bool,
224
225 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
229 pub no_adaptive: bool,
230 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
233 pub min_relevancy: f32,
234 #[arg(long = "max-k", value_name = "K", default_value = "100")]
237 pub max_k: usize,
238 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
240 pub adaptive_strategy: AdaptiveStrategyArg,
241}
242
243#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
245pub enum AskModeArg {
246 Lex,
247 Sem,
248 Hybrid,
249}
250
251impl From<AskModeArg> for AskMode {
252 fn from(value: AskModeArg) -> Self {
253 match value {
254 AskModeArg::Lex => AskMode::Lex,
255 AskModeArg::Sem => AskMode::Sem,
256 AskModeArg::Hybrid => AskMode::Hybrid,
257 }
258 }
259}
260
261#[derive(Args)]
263pub struct FindArgs {
264 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
265 pub file: PathBuf,
266 #[arg(long = "query", value_name = "TEXT")]
267 pub query: String,
268 #[arg(long = "uri", value_name = "URI")]
269 pub uri: Option<String>,
270 #[arg(long = "scope", value_name = "URI_PREFIX")]
271 pub scope: Option<String>,
272 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
273 pub top_k: usize,
274 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
275 pub snippet_chars: usize,
276 #[arg(long = "cursor", value_name = "TOKEN")]
277 pub cursor: Option<String>,
278 #[arg(long)]
279 pub json: bool,
280 #[arg(long = "json-legacy", conflicts_with = "json")]
281 pub json_legacy: bool,
282 #[arg(long = "mode", value_enum, default_value = "auto")]
283 pub mode: SearchMode,
284 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
286 pub as_of_frame: Option<u64>,
287 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
289 pub as_of_ts: Option<i64>,
290 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
293 pub query_embedding_model: Option<String>,
294
295 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
299 pub no_adaptive: bool,
300 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
303 pub min_relevancy: f32,
304 #[arg(long = "max-k", value_name = "K", default_value = "100")]
307 pub max_k: usize,
308 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
310 pub adaptive_strategy: AdaptiveStrategyArg,
311
312 #[arg(long = "graph", action = ArgAction::SetTrue)]
315 pub graph: bool,
316
317 #[arg(long = "hybrid", action = ArgAction::SetTrue)]
320 pub hybrid: bool,
321
322 #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
325 pub no_sketch: bool,
326}
327
328#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
330pub enum SearchMode {
331 Auto,
332 Lex,
333 Sem,
334 #[cfg(feature = "clip")]
336 Clip,
337}
338
339#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
341pub enum AdaptiveStrategyArg {
342 Relative,
344 Absolute,
346 Cliff,
348 Elbow,
350 Combined,
352}
353
354#[derive(Args)]
356pub struct VecSearchArgs {
357 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
358 pub file: PathBuf,
359 #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
360 pub vector: Option<String>,
361 #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
362 pub embedding: Option<PathBuf>,
363 #[arg(long, value_name = "K", default_value = "10")]
364 pub limit: usize,
365 #[arg(long)]
366 pub json: bool,
367}
368
369#[derive(Args)]
371pub struct AuditArgs {
372 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
373 pub file: PathBuf,
374 #[arg(value_name = "QUESTION")]
376 pub question: String,
377 #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
379 pub out: Option<PathBuf>,
380 #[arg(long = "format", value_enum, default_value = "text")]
382 pub format: AuditFormat,
383 #[arg(long = "top-k", value_name = "K", default_value = "10")]
385 pub top_k: usize,
386 #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
388 pub snippet_chars: usize,
389 #[arg(long = "mode", value_enum, default_value = "hybrid")]
391 pub mode: AskModeArg,
392 #[arg(long = "scope", value_name = "URI_PREFIX")]
394 pub scope: Option<String>,
395 #[arg(long = "start", value_name = "DATE")]
397 pub start: Option<String>,
398 #[arg(long = "end", value_name = "DATE")]
400 pub end: Option<String>,
401 #[arg(long = "use-model", value_name = "MODEL")]
403 pub use_model: Option<String>,
404}
405
406#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
408pub enum AuditFormat {
409 Text,
411 Markdown,
413 Json,
415}
416
417pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
422 let mut mem = open_read_only_mem(&args.file)?;
423 let mut builder = TimelineQueryBuilder::default();
424 #[cfg(feature = "temporal_track")]
425 if args.phrase.is_none()
426 && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
427 {
428 bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
429 }
430 if let Some(limit) = args.limit {
431 builder = builder.limit(limit);
432 }
433 if let Some(since) = args.since {
434 builder = builder.since(since);
435 }
436 if let Some(until) = args.until {
437 builder = builder.until(until);
438 }
439 builder = builder.reverse(args.reverse);
440 #[cfg(feature = "temporal_track")]
441 let temporal_summary = if let Some(ref phrase) = args.phrase {
442 let (filter, summary) = build_temporal_filter(
443 phrase,
444 args.tz.as_deref(),
445 args.anchor.as_deref(),
446 args.window,
447 )?;
448 builder = builder.temporal(filter);
449 Some(summary)
450 } else {
451 None
452 };
453 let query = builder.build();
454 let mut entries = mem.timeline(query)?;
455
456 if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
458 entries.retain(|entry| {
459 if let Some(cutoff_frame) = args.as_of_frame {
461 if entry.frame_id > cutoff_frame {
462 return false;
463 }
464 }
465
466 if let Some(cutoff_ts) = args.as_of_ts {
468 if entry.timestamp > cutoff_ts {
469 return false;
470 }
471 }
472
473 true
474 });
475 }
476
477 if args.json {
478 #[cfg(feature = "temporal_track")]
479 if let Some(summary) = temporal_summary.as_ref() {
480 println!(
481 "{}",
482 serde_json::to_string_pretty(&TimelineOutput {
483 temporal: Some(summary_to_output(summary)),
484 entries: &entries,
485 })?
486 );
487 } else {
488 println!("{}", serde_json::to_string_pretty(&entries)?);
489 }
490 #[cfg(not(feature = "temporal_track"))]
491 println!("{}", serde_json::to_string_pretty(&entries)?);
492 } else if entries.is_empty() {
493 println!("Timeline is empty");
494 } else {
495 #[cfg(feature = "temporal_track")]
496 if let Some(summary) = temporal_summary.as_ref() {
497 print_temporal_summary(summary);
498 }
499 for entry in entries {
500 println!(
501 "#{} @ {} — {}",
502 entry.frame_id,
503 entry.timestamp,
504 entry.preview.replace('\n', " ")
505 );
506 if let Some(uri) = entry.uri.as_deref() {
507 println!(" URI: {uri}");
508 }
509 if !entry.child_frames.is_empty() {
510 let child_list = entry
511 .child_frames
512 .iter()
513 .map(|id| id.to_string())
514 .collect::<Vec<_>>()
515 .join(", ");
516 println!(" Child frames: {child_list}");
517 }
518 #[cfg(feature = "temporal_track")]
519 if let Some(temporal) = entry.temporal.as_ref() {
520 print_entry_temporal_details(temporal);
521 }
522 }
523 }
524 Ok(())
525}
526
527#[cfg(feature = "temporal_track")]
528pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
529 let mut mem = open_read_only_mem(&args.file)?;
530
531 let (filter, summary) = build_temporal_filter(
532 &args.phrase,
533 args.tz.as_deref(),
534 args.anchor.as_deref(),
535 args.window,
536 )?;
537
538 let mut builder = TimelineQueryBuilder::default();
539 if let Some(limit) = args.limit {
540 builder = builder.limit(limit);
541 }
542 if let Some(since) = args.since {
543 builder = builder.since(since);
544 }
545 if let Some(until) = args.until {
546 builder = builder.until(until);
547 }
548 builder = builder.reverse(args.reverse).temporal(filter.clone());
549 let entries = mem.timeline(builder.build())?;
550
551 if args.json {
552 let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
553 let output = WhenOutput {
554 summary: summary_to_output(&summary),
555 entries: entry_views,
556 };
557 println!("{}", serde_json::to_string_pretty(&output)?);
558 return Ok(());
559 }
560
561 print_temporal_summary(&summary);
562 if entries.is_empty() {
563 println!("No frames matched the resolved window");
564 return Ok(());
565 }
566
567 for entry in &entries {
568 let iso = format_timestamp(entry.timestamp).unwrap_or_default();
569 println!(
570 "#{} @ {} ({iso}) — {}",
571 entry.frame_id,
572 entry.timestamp,
573 entry.preview.replace('\n', " ")
574 );
575 if let Some(uri) = entry.uri.as_deref() {
576 println!(" URI: {uri}");
577 }
578 if !entry.child_frames.is_empty() {
579 let child_list = entry
580 .child_frames
581 .iter()
582 .map(|id| id.to_string())
583 .collect::<Vec<_>>()
584 .join(", ");
585 println!(" Child frames: {child_list}");
586 }
587 if let Some(temporal) = entry.temporal.as_ref() {
588 print_entry_temporal_details(temporal);
589 }
590 }
591
592 Ok(())
593}
594
595#[cfg(feature = "temporal_track")]
596#[derive(Serialize)]
597struct TimelineOutput<'a> {
598 #[serde(skip_serializing_if = "Option::is_none")]
599 temporal: Option<TemporalSummaryOutput>,
600 entries: &'a [TimelineEntry],
601}
602
603#[cfg(feature = "temporal_track")]
604#[derive(Serialize)]
605struct WhenOutput {
606 summary: TemporalSummaryOutput,
607 entries: Vec<WhenEntry>,
608}
609
610#[cfg(feature = "temporal_track")]
611#[derive(Serialize)]
612struct WhenEntry {
613 frame_id: FrameId,
614 timestamp: i64,
615 #[serde(skip_serializing_if = "Option::is_none")]
616 timestamp_iso: Option<String>,
617 preview: String,
618 #[serde(skip_serializing_if = "Option::is_none")]
619 uri: Option<String>,
620 #[serde(skip_serializing_if = "Vec::is_empty")]
621 child_frames: Vec<FrameId>,
622 #[serde(skip_serializing_if = "Option::is_none")]
623 temporal: Option<SearchHitTemporal>,
624}
625
626#[cfg(feature = "temporal_track")]
627#[derive(Serialize)]
628struct TemporalSummaryOutput {
629 phrase: String,
630 timezone: String,
631 anchor_utc: i64,
632 anchor_iso: String,
633 confidence: u16,
634 #[serde(skip_serializing_if = "Vec::is_empty")]
635 flags: Vec<&'static str>,
636 resolution_kind: &'static str,
637 window_start_utc: Option<i64>,
638 window_start_iso: Option<String>,
639 window_end_utc: Option<i64>,
640 window_end_iso: Option<String>,
641 #[serde(skip_serializing_if = "Option::is_none")]
642 window_minutes: Option<u64>,
643}
644
645#[cfg(feature = "temporal_track")]
646struct TemporalSummary {
647 phrase: String,
648 tz: String,
649 anchor: OffsetDateTime,
650 start_utc: Option<i64>,
651 end_utc: Option<i64>,
652 resolution: TemporalResolution,
653 window_minutes: Option<u64>,
654}
655
656#[cfg(feature = "temporal_track")]
657fn build_temporal_filter(
658 phrase: &str,
659 tz_override: Option<&str>,
660 anchor_override: Option<&str>,
661 window_minutes: Option<u64>,
662) -> Result<(TemporalFilter, TemporalSummary)> {
663 let tz = tz_override
664 .unwrap_or(DEFAULT_TEMPORAL_TZ)
665 .trim()
666 .to_string();
667 if tz.is_empty() {
668 bail!("E-TEMP-003 timezone must not be empty");
669 }
670
671 let anchor = if let Some(raw) = anchor_override {
672 OffsetDateTime::parse(raw, &Rfc3339)
673 .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
674 } else {
675 OffsetDateTime::now_utc()
676 };
677
678 let context = TemporalContext::new(anchor, tz.clone());
679 let normalizer = TemporalNormalizer::new(context);
680 let resolution = normalizer
681 .resolve(phrase)
682 .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
683
684 let (mut start, mut end) = resolution_bounds(&resolution)?;
685 if let Some(minutes) = window_minutes {
686 if minutes > 0 {
687 let delta = TimeDuration::minutes(minutes as i64);
688 if let (Some(s), Some(e)) = (start, end) {
689 if s == e {
690 start = Some(s.saturating_sub(delta.whole_seconds()));
691 end = Some(e.saturating_add(delta.whole_seconds()));
692 } else {
693 start = Some(s.saturating_sub(delta.whole_seconds()));
694 end = Some(e.saturating_add(delta.whole_seconds()));
695 }
696 }
697 }
698 }
699
700 let filter = TemporalFilter {
701 start_utc: start,
702 end_utc: end,
703 phrase: None,
704 tz: None,
705 };
706
707 let summary = TemporalSummary {
708 phrase: phrase.to_owned(),
709 tz,
710 anchor,
711 start_utc: start,
712 end_utc: end,
713 resolution,
714 window_minutes,
715 };
716
717 Ok((filter, summary))
718}
719
720#[cfg(feature = "temporal_track")]
721fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
722 TemporalSummaryOutput {
723 phrase: summary.phrase.clone(),
724 timezone: summary.tz.clone(),
725 anchor_utc: summary.anchor.unix_timestamp(),
726 anchor_iso: summary
727 .anchor
728 .format(&Rfc3339)
729 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
730 confidence: summary.resolution.confidence,
731 flags: summary
732 .resolution
733 .flags
734 .iter()
735 .map(|flag| flag.as_str())
736 .collect(),
737 resolution_kind: resolution_kind(&summary.resolution),
738 window_start_utc: summary.start_utc,
739 window_start_iso: summary.start_utc.and_then(format_timestamp),
740 window_end_utc: summary.end_utc,
741 window_end_iso: summary.end_utc.and_then(format_timestamp),
742 window_minutes: summary.window_minutes,
743 }
744}
745
746#[cfg(feature = "temporal_track")]
747fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
748 WhenEntry {
749 frame_id: entry.frame_id,
750 timestamp: entry.timestamp,
751 timestamp_iso: format_timestamp(entry.timestamp),
752 preview: entry.preview.clone(),
753 uri: entry.uri.clone(),
754 child_frames: entry.child_frames.clone(),
755 temporal: entry.temporal.clone(),
756 }
757}
758
759#[cfg(feature = "temporal_track")]
760fn print_temporal_summary(summary: &TemporalSummary) {
761 println!("Phrase: \"{}\"", summary.phrase);
762 println!("Timezone: {}", summary.tz);
763 println!(
764 "Anchor: {}",
765 summary
766 .anchor
767 .format(&Rfc3339)
768 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
769 );
770 let start_iso = summary.start_utc.and_then(format_timestamp);
771 let end_iso = summary.end_utc.and_then(format_timestamp);
772 match (start_iso, end_iso) {
773 (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
774 (Some(start), Some(end)) => println!("Window: {start} → {end}"),
775 (Some(start), None) => println!("Window start: {start}"),
776 (None, Some(end)) => println!("Window end: {end}"),
777 _ => println!("Window: (not resolved)"),
778 }
779 println!("Confidence: {}", summary.resolution.confidence);
780 let flags: Vec<&'static str> = summary
781 .resolution
782 .flags
783 .iter()
784 .map(|flag| flag.as_str())
785 .collect();
786 if !flags.is_empty() {
787 println!("Flags: {}", flags.join(", "));
788 }
789 if let Some(window) = summary.window_minutes {
790 if window > 0 {
791 println!("Window padding: {window} minute(s)");
792 }
793 }
794 println!();
795}
796
797#[cfg(feature = "temporal_track")]
798fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
799 if let Some(anchor) = temporal.anchor.as_ref() {
800 let iso = anchor
801 .iso_8601
802 .clone()
803 .or_else(|| format_timestamp(anchor.ts_utc));
804 println!(
805 " Anchor: {} (source: {:?})",
806 iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
807 anchor.source
808 );
809 }
810 if !temporal.mentions.is_empty() {
811 println!(" Mentions:");
812 for mention in &temporal.mentions {
813 let iso = mention
814 .iso_8601
815 .clone()
816 .or_else(|| format_timestamp(mention.ts_utc))
817 .unwrap_or_else(|| mention.ts_utc.to_string());
818 let mut details = format!(
819 " - {} ({:?}, confidence {})",
820 iso, mention.kind, mention.confidence
821 );
822 if let Some(text) = mention.text.as_deref() {
823 details.push_str(&format!(" — \"{}\"", text));
824 }
825 println!("{details}");
826 }
827 }
828}
829
830#[cfg(feature = "temporal_track")]
831fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
832 match &resolution.value {
833 TemporalResolutionValue::Date(date) => {
834 let ts = date_to_timestamp(*date);
835 Ok((Some(ts), Some(ts)))
836 }
837 TemporalResolutionValue::DateTime(dt) => {
838 let ts = dt.unix_timestamp();
839 Ok((Some(ts), Some(ts)))
840 }
841 TemporalResolutionValue::DateRange { start, end } => Ok((
842 Some(date_to_timestamp(*start)),
843 Some(date_to_timestamp(*end)),
844 )),
845 TemporalResolutionValue::DateTimeRange { start, end } => {
846 Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
847 }
848 TemporalResolutionValue::Month { year, month } => {
849 let start_date = Date::from_calendar_date(*year, *month, 1)
850 .map_err(|_| anyhow!("invalid month resolution"))?;
851 let end_date = last_day_in_month(*year, *month)
852 .map_err(|_| anyhow!("invalid month resolution"))?;
853 Ok((
854 Some(date_to_timestamp(start_date)),
855 Some(date_to_timestamp(end_date)),
856 ))
857 }
858 }
859}
860
861#[cfg(feature = "temporal_track")]
862fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
863 match resolution.value {
864 TemporalResolutionValue::Date(_) => "date",
865 TemporalResolutionValue::DateTime(_) => "datetime",
866 TemporalResolutionValue::DateRange { .. } => "date_range",
867 TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
868 TemporalResolutionValue::Month { .. } => "month",
869 }
870}
871
872#[cfg(feature = "temporal_track")]
873fn date_to_timestamp(date: Date) -> i64 {
874 PrimitiveDateTime::new(date, Time::MIDNIGHT)
875 .assume_offset(UtcOffset::UTC)
876 .unix_timestamp()
877}
878
879#[cfg(feature = "temporal_track")]
880fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
881 let mut date = Date::from_calendar_date(year, month, 1)
882 .map_err(|_| anyhow!("invalid month resolution"))?;
883 while let Some(next) = date.next_day() {
884 if next.month() == month {
885 date = next;
886 } else {
887 break;
888 }
889 }
890 Ok(date)
891}
892
893#[cfg(feature = "temporal_track")]
894
895fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
896 if fragments.is_empty() {
897 return;
898 }
899
900 response.context_fragments = fragments
901 .into_iter()
902 .map(|fragment| AskContextFragment {
903 rank: fragment.rank,
904 frame_id: fragment.frame_id,
905 uri: fragment.uri,
906 title: fragment.title,
907 score: fragment.score,
908 matches: fragment.matches,
909 range: Some(fragment.range),
910 chunk_range: fragment.chunk_range,
911 text: fragment.text,
912 kind: Some(match fragment.kind {
913 ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
914 ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
915 }),
916 #[cfg(feature = "temporal_track")]
917 temporal: None,
918 })
919 .collect();
920}
921
922pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
923 crate::utils::require_active_plan(config, "ask")?;
925
926 crate::api::track_query_usage(config, 1)?;
928
929 if args.uri.is_some() && args.scope.is_some() {
930 warn!("--scope ignored because --uri is provided");
931 }
932
933 let mut question_tokens = Vec::new();
934 let mut file_path: Option<PathBuf> = None;
935 for token in &args.targets {
936 if file_path.is_none() && looks_like_memory(token) {
937 file_path = Some(PathBuf::from(token));
938 } else {
939 question_tokens.push(token.clone());
940 }
941 }
942
943 let positional_question = if question_tokens.is_empty() {
944 None
945 } else {
946 Some(question_tokens.join(" "))
947 };
948
949 let question = args
950 .question
951 .or(positional_question)
952 .map(|value| value.trim().to_string())
953 .filter(|value| !value.is_empty());
954
955 let question = question
956 .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
957
958 let (original_question, search_query) = {
961 let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
964 if let Ok(key) = std::env::var("OPENAI_API_KEY") {
965 (Some("gpt-4o-mini"), Some(key))
967 } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
968 (Some("llama-3.1-8b-instant"), Some(key))
970 } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
971 (Some("claude-haiku-4-5"), Some(key))
973 } else if let Ok(key) = std::env::var("XAI_API_KEY") {
974 (Some("grok-4-fast"), Some(key))
976 } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
977 (Some("mistral-small-latest"), Some(key))
979 } else {
980 (None, None)
982 };
983
984 let _ = (model_for_expansion, api_key_for_expansion); (question.clone(), question.clone())
995 };
996
997 let memory_path = match file_path {
998 Some(path) => path,
999 None => autodetect_memory_file()?,
1000 };
1001
1002 let start = parse_date_boundary(args.start.as_ref(), false)?;
1003 let end = parse_date_boundary(args.end.as_ref(), true)?;
1004 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1005 if end_ts < start_ts {
1006 anyhow::bail!("--end must not be earlier than --start");
1007 }
1008 }
1009
1010 let mut mem = Memvid::open(&memory_path)?;
1012
1013 #[cfg(feature = "replay")]
1015 let _ = mem.load_active_session();
1016
1017 let mv2_dimension = mem.effective_vec_index_dimension()?;
1019
1020 let stats = mem.stats()?;
1022 let has_vectors = stats.vector_count > 0;
1023 let effective_mode = if !has_vectors
1024 && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1025 {
1026 tracing::info!("Memory has no embeddings (vector_count=0); falling back to lexical mode");
1027 AskModeArg::Lex
1028 } else {
1029 args.mode.clone()
1030 };
1031
1032 let ask_mode: AskMode = effective_mode.clone().into();
1033 let inferred_model_override = match effective_mode {
1034 AskModeArg::Lex => None,
1035 AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1036 memvid_core::EmbeddingIdentitySummary::Single(identity) => {
1037 identity.model.map(String::from)
1038 }
1039 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1040 let models: Vec<_> = identities
1041 .iter()
1042 .filter_map(|entry| entry.identity.model.as_deref())
1043 .collect();
1044 anyhow::bail!(
1045 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1046 Detected models: {:?}\n\n\
1047 Suggested fix: split into separate memories per embedding model.",
1048 models
1049 );
1050 }
1051 memvid_core::EmbeddingIdentitySummary::Unknown => None,
1052 },
1053 };
1054 let emb_model_override = args
1055 .query_embedding_model
1056 .as_deref()
1057 .or(inferred_model_override.as_deref());
1058 let runtime = match effective_mode {
1059 AskModeArg::Lex => None,
1060 AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1061 config,
1062 emb_model_override,
1063 mv2_dimension,
1064 )?),
1065 AskModeArg::Hybrid => {
1066 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1068 || {
1069 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1071 .ok()
1072 .map(|rt| {
1073 tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1074 rt
1075 })
1076 },
1077 )
1078 }
1079 };
1080 if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1081 anyhow::bail!(
1082 "semantic embeddings unavailable; install/cached model required for {:?} mode",
1083 effective_mode
1084 );
1085 }
1086
1087 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1088
1089 let adaptive = if !args.no_adaptive {
1091 Some(AdaptiveConfig {
1092 enabled: true,
1093 max_results: args.max_k,
1094 min_results: 1,
1095 normalize_scores: true,
1096 strategy: match args.adaptive_strategy {
1097 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1098 min_ratio: args.min_relevancy,
1099 },
1100 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1101 min_score: args.min_relevancy,
1102 },
1103 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1104 max_drop_ratio: 0.3,
1105 },
1106 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1107 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1108 relative_threshold: args.min_relevancy,
1109 max_drop_ratio: 0.3,
1110 absolute_min: 0.3,
1111 },
1112 },
1113 })
1114 } else {
1115 None
1116 };
1117
1118 let request = AskRequest {
1119 question: search_query, top_k: args.top_k,
1121 snippet_chars: args.snippet_chars,
1122 uri: args.uri.clone(),
1123 scope: args.scope.clone(),
1124 cursor: args.cursor.clone(),
1125 start,
1126 end,
1127 #[cfg(feature = "temporal_track")]
1128 temporal: None,
1129 context_only: args.context_only,
1130 mode: ask_mode,
1131 as_of_frame: args.as_of_frame,
1132 as_of_ts: args.as_of_ts,
1133 adaptive,
1134 acl_context: None,
1135 acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1136 };
1137 let mut response = mem.ask(request, embedder).map_err(|err| match err {
1138 MemvidError::VecDimensionMismatch { expected, actual } => {
1139 anyhow!(vec_dimension_mismatch_help(expected, actual))
1140 }
1141 other => anyhow!(other),
1142 })?;
1143
1144 response.question = original_question;
1147
1148 let is_temporal_query = {
1155 let q_lower = response.question.to_lowercase();
1156 q_lower.contains("current")
1157 || q_lower.contains("latest")
1158 || q_lower.contains("recent")
1159 || q_lower.contains("now")
1160 || q_lower.contains("today")
1161 || q_lower.contains("updated")
1162 || q_lower.contains("new ")
1163 || q_lower.contains("newest")
1164 };
1165 if !args.no_rerank
1166 && !response.retrieval.hits.is_empty()
1167 && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1168 && !is_temporal_query
1169 {
1170 let mut search_response = SearchResponse {
1172 query: response.question.clone(),
1173 hits: response.retrieval.hits.clone(),
1174 total_hits: response.retrieval.hits.len(),
1175 params: memvid_core::SearchParams {
1176 top_k: args.top_k,
1177 snippet_chars: args.snippet_chars,
1178 cursor: None,
1179 },
1180 elapsed_ms: 0,
1181 engine: memvid_core::SearchEngineKind::Hybrid,
1182 next_cursor: None,
1183 context: String::new(),
1184 stale_index_skips: 0,
1185 };
1186
1187 if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1188 warn!("Cross-encoder reranking failed: {e}");
1189 } else {
1190 response.retrieval.hits = search_response.hits;
1192 response.retrieval.context = response
1194 .retrieval
1195 .hits
1196 .iter()
1197 .take(10) .map(|hit| hit.text.as_str())
1199 .collect::<Vec<_>>()
1200 .join("\n\n---\n\n");
1201 }
1202 }
1203
1204 if args.memories {
1206 let memory_context = build_memory_context(&mem);
1207 if !memory_context.is_empty() {
1208 response.retrieval.context = format!(
1210 "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1211 memory_context, response.retrieval.context
1212 );
1213 }
1214 }
1215
1216 let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1218 if !entity_context.is_empty() {
1219 response.retrieval.context = format!(
1221 "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1222 entity_context, response.retrieval.context
1223 );
1224 }
1225
1226 if args.mask_pii {
1228 use memvid_core::pii::mask_pii;
1229
1230 response.retrieval.context = mask_pii(&response.retrieval.context);
1232
1233 for hit in &mut response.retrieval.hits {
1235 hit.text = mask_pii(&hit.text);
1236 if let Some(chunk_text) = &hit.chunk_text {
1237 hit.chunk_text = Some(mask_pii(chunk_text));
1238 }
1239 }
1240 }
1241
1242 let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1243
1244 let mut model_result: Option<ModelInference> = None;
1245 if args.no_llm {
1246 if args.use_model.is_some() {
1248 warn!("--use-model ignored because --no-llm disables LLM synthesis");
1249 }
1250 if args.json {
1251 emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1252 } else {
1253 emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1254 }
1255
1256 #[cfg(feature = "replay")]
1258 let _ = mem.save_active_session();
1259
1260 return Ok(());
1261 } else if response.context_only {
1262 if args.use_model.is_some() {
1263 warn!("--use-model ignored because --context-only disables synthesis");
1264 }
1265 } else if let Some(model_name) = args.use_model.as_deref() {
1266 match run_model_inference(
1267 model_name,
1268 &response.question,
1269 &response.retrieval.context,
1270 &response.retrieval.hits,
1271 llm_context_override,
1272 None,
1273 args.system_prompt.as_deref(),
1274 ) {
1275 Ok(inference) => {
1276 response.answer = Some(inference.answer.answer.clone());
1277 response.retrieval.context = inference.context_body.clone();
1278 apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1279 model_result = Some(inference);
1280 }
1281 Err(err) => {
1282 warn!(
1283 "model inference unavailable for '{}': {err}. Falling back to default summary.",
1284 model_name
1285 );
1286 }
1287 }
1288 }
1289
1290 #[cfg(feature = "replay")]
1292 if let Some(ref inference) = model_result {
1293 if let Some(model_name) = args.use_model.as_deref() {
1294 let retrieved_frames: Vec<u64> = response
1296 .retrieval
1297 .hits
1298 .iter()
1299 .map(|hit| hit.frame_id)
1300 .collect();
1301
1302 mem.record_ask_action(
1303 &response.question,
1304 model_name, model_name, inference.answer.answer.as_bytes(),
1307 0, retrieved_frames,
1309 );
1310 }
1311 }
1312
1313 if args.json {
1314 if let Some(model_name) = args.use_model.as_deref() {
1315 emit_model_json(
1316 &response,
1317 model_name,
1318 model_result.as_ref(),
1319 args.sources,
1320 &mut mem,
1321 )?;
1322 } else {
1323 emit_ask_json(
1324 &response,
1325 effective_mode.clone(),
1326 model_result.as_ref(),
1327 args.sources,
1328 &mut mem,
1329 )?;
1330 }
1331 } else {
1332 emit_ask_pretty(
1333 &response,
1334 effective_mode.clone(),
1335 model_result.as_ref(),
1336 args.sources,
1337 &mut mem,
1338 );
1339 }
1340
1341 #[cfg(feature = "replay")]
1343 let _ = mem.save_active_session();
1344
1345 Ok(())
1346}
1347
1348fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1350 use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1351 use memvid_core::types::QueryPlan;
1352
1353 let planner = QueryPlanner::new();
1354
1355 let plan = if args.graph {
1357 let plan = planner.plan(&args.query, args.top_k);
1359 match plan {
1361 QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1362 QueryPlan::graph_only(graph_filter, args.top_k)
1363 }
1364 _ => plan,
1365 }
1366 } else {
1367 planner.plan(&args.query, args.top_k)
1369 };
1370
1371 let hits = hybrid_search(mem, &plan)?;
1373
1374 if args.json {
1375 let output = serde_json::json!({
1377 "query": args.query,
1378 "mode": if args.graph { "graph" } else { "hybrid" },
1379 "plan": format!("{:?}", plan),
1380 "hits": hits.iter().map(|h| {
1381 serde_json::json!({
1382 "frame_id": h.frame_id,
1383 "score": h.score,
1384 "graph_score": h.graph_score,
1385 "vector_score": h.vector_score,
1386 "matched_entity": h.matched_entity,
1387 "preview": h.preview,
1388 })
1389 }).collect::<Vec<_>>(),
1390 });
1391 println!("{}", serde_json::to_string_pretty(&output)?);
1392 } else {
1393 let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1395 println!("{} search for: \"{}\"", mode_str, args.query);
1396 println!("Plan: {:?}", plan);
1397 println!();
1398
1399 if hits.is_empty() {
1400 println!("No results found.");
1401 } else {
1402 println!("Results ({} hits):", hits.len());
1403 for (i, hit) in hits.iter().enumerate() {
1404 println!();
1405 println!(
1406 "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1407 i + 1,
1408 hit.frame_id,
1409 hit.score,
1410 hit.graph_score,
1411 hit.vector_score
1412 );
1413 if let Some(entity) = &hit.matched_entity {
1414 println!(" Matched entity: {}", entity);
1415 }
1416 if let Some(preview) = &hit.preview {
1417 let truncated = if preview.len() > 200 {
1418 format!("{}...", &preview[..200])
1419 } else {
1420 preview.clone()
1421 };
1422 println!(" {}", truncated.replace('\n', " "));
1423 }
1424 }
1425 }
1426 }
1427
1428 Ok(())
1429}
1430
1431pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1432 crate::utils::require_active_plan(config, "find")?;
1434
1435 crate::api::track_query_usage(config, 1)?;
1437
1438 let mut mem = open_read_only_mem(&args.file)?;
1439
1440 #[cfg(feature = "replay")]
1442 let _ = mem.load_active_session();
1443
1444 if args.graph || args.hybrid {
1446 return handle_graph_find(&mut mem, &args);
1447 }
1448
1449 if args.uri.is_some() && args.scope.is_some() {
1450 warn!("--scope ignored because --uri is provided");
1451 }
1452
1453 let mv2_dimension = mem.effective_vec_index_dimension()?;
1455 let identity_summary = match args.mode {
1456 SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1457 #[cfg(feature = "clip")]
1458 SearchMode::Clip => None,
1459 SearchMode::Lex => None,
1460 };
1461
1462 let mut semantic_allowed = true;
1463 let inferred_model_override = match identity_summary.as_ref() {
1464 Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1465 identity.model.as_deref().map(|value| value.to_string())
1466 }
1467 Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1468 let models: Vec<_> = identities
1469 .iter()
1470 .filter_map(|entry| entry.identity.model.as_deref())
1471 .collect();
1472 if args.mode == SearchMode::Sem {
1473 anyhow::bail!(
1474 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1475 Detected models: {:?}\n\n\
1476 Suggested fix: split into separate memories per embedding model.",
1477 models
1478 );
1479 }
1480 warn!(
1481 "semantic search disabled: mixed embedding models detected: {:?}",
1482 models
1483 );
1484 semantic_allowed = false;
1485 None
1486 }
1487 _ => None,
1488 };
1489
1490 let emb_model_override = args
1491 .query_embedding_model
1492 .as_deref()
1493 .or(inferred_model_override.as_deref());
1494
1495 let (mode_label, runtime_option) = match args.mode {
1496 SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1497 SearchMode::Sem => {
1498 let runtime =
1499 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1500 ("Semantic (vector search)".to_string(), Some(runtime))
1501 }
1502 SearchMode::Auto => {
1503 if !semantic_allowed {
1504 ("Lexical (semantic unsafe)".to_string(), None)
1505 } else if let Some(runtime) =
1506 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1507 {
1508 ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1509 } else {
1510 ("Lexical (semantic unavailable)".to_string(), None)
1511 }
1512 }
1513 #[cfg(feature = "clip")]
1514 SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1515 };
1516
1517 let mode_key = match args.mode {
1518 SearchMode::Sem => "semantic",
1519 SearchMode::Lex => "text",
1520 SearchMode::Auto => {
1521 if runtime_option.is_some() {
1522 "hybrid"
1523 } else {
1524 "text"
1525 }
1526 }
1527 #[cfg(feature = "clip")]
1528 SearchMode::Clip => "clip",
1529 };
1530
1531 #[cfg(feature = "clip")]
1533 if args.mode == SearchMode::Clip {
1534 use memvid_core::clip::{ClipConfig, ClipModel};
1535
1536 let config = ClipConfig::default();
1538 let clip = ClipModel::new(config).map_err(|e| {
1539 anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1540 })?;
1541
1542 let query_embedding = clip
1544 .encode_text(&args.query)
1545 .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1546
1547 let hits = mem.search_clip(&query_embedding, args.top_k)?;
1549
1550 for hit in &hits {
1552 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1553 tracing::debug!(
1554 frame_id = hit.frame_id,
1555 title = %frame.title.unwrap_or_default(),
1556 page = hit.page,
1557 distance = hit.distance,
1558 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1559 "CLIP raw hit"
1560 );
1561 } else {
1562 tracing::debug!(
1563 frame_id = hit.frame_id,
1564 page = hit.page,
1565 distance = hit.distance,
1566 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1567 "CLIP raw hit (missing frame)"
1568 );
1569 }
1570 }
1571
1572 const CLIP_MAX_DISTANCE: f32 = 1.26;
1585
1586 let search_hits: Vec<SearchHit> = hits
1588 .into_iter()
1589 .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1590 .enumerate()
1591 .filter_map(|(rank, hit)| {
1592 let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1595
1596 let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1598 let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1599 let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1600 let title = match (base_title, hit.page) {
1601 (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1602 (Some(t), None) => Some(t),
1603 (None, Some(p)) => Some(format!("Page {p}")),
1604 _ => None,
1605 };
1606 Some(SearchHit {
1607 rank: rank + 1,
1608 frame_id: hit.frame_id,
1609 uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1610 title,
1611 text: preview.clone(),
1612 chunk_text: Some(preview),
1613 range: (0, 0),
1614 chunk_range: None,
1615 matches: 0,
1616 score: Some(cosine_similarity),
1617 metadata: None,
1618 })
1619 })
1620 .collect();
1621
1622 let response = SearchResponse {
1623 query: args.query.clone(),
1624 hits: search_hits.clone(),
1625 total_hits: search_hits.len(),
1626 params: memvid_core::SearchParams {
1627 top_k: args.top_k,
1628 snippet_chars: args.snippet_chars,
1629 cursor: args.cursor.clone(),
1630 },
1631 elapsed_ms: 0,
1632 engine: SearchEngineKind::Hybrid, next_cursor: None,
1634 context: String::new(),
1635 stale_index_skips: 0,
1636 };
1637
1638 if args.json_legacy {
1639 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1640 emit_legacy_search_json(&response)?;
1641 } else if args.json {
1642 emit_search_json(&response, mode_key)?;
1643 } else {
1644 println!(
1645 "mode: {} k={} time: {} ms",
1646 mode_label, response.params.top_k, response.elapsed_ms
1647 );
1648 println!("engine: clip (MobileCLIP-S2)");
1649 println!(
1650 "hits: {} (showing {})",
1651 response.total_hits,
1652 response.hits.len()
1653 );
1654 emit_search_table(&response);
1655 }
1656 return Ok(());
1657 }
1658
1659 let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1661 let runtime = runtime_option
1662 .as_ref()
1663 .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1664
1665 let query_embedding = runtime.embed_query(&args.query)?;
1667
1668 let scope = args.scope.as_deref().or(args.uri.as_deref());
1670
1671 if !args.no_adaptive {
1672 let strategy = match args.adaptive_strategy {
1674 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1675 min_ratio: args.min_relevancy,
1676 },
1677 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1678 min_score: args.min_relevancy,
1679 },
1680 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1681 max_drop_ratio: 0.35, },
1683 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1684 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1685 relative_threshold: args.min_relevancy,
1686 max_drop_ratio: 0.35,
1687 absolute_min: 0.3,
1688 },
1689 };
1690
1691 let config = AdaptiveConfig {
1692 enabled: true,
1693 max_results: args.max_k,
1694 min_results: 1,
1695 strategy,
1696 normalize_scores: true,
1697 };
1698
1699 match mem.search_adaptive(
1700 &args.query,
1701 &query_embedding,
1702 config,
1703 args.snippet_chars,
1704 scope,
1705 ) {
1706 Ok(result) => {
1707 let mut resp = SearchResponse {
1708 query: args.query.clone(),
1709 hits: result.results,
1710 total_hits: result.stats.returned,
1711 params: memvid_core::SearchParams {
1712 top_k: result.stats.returned,
1713 snippet_chars: args.snippet_chars,
1714 cursor: args.cursor.clone(),
1715 },
1716 elapsed_ms: 0,
1717 engine: SearchEngineKind::Hybrid,
1718 next_cursor: None,
1719 context: String::new(),
1720 stale_index_skips: 0,
1721 };
1722 apply_preference_rerank(&mut resp);
1723 (
1724 resp,
1725 "semantic (adaptive vector search)".to_string(),
1726 Some(result.stats),
1727 )
1728 }
1729 Err(e) => {
1730 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1731 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1732 }
1733
1734 warn!("Adaptive search failed ({e}), falling back to fixed-k");
1735 match mem.vec_search_with_embedding(
1736 &args.query,
1737 &query_embedding,
1738 args.top_k,
1739 args.snippet_chars,
1740 scope,
1741 ) {
1742 Ok(mut resp) => {
1743 apply_preference_rerank(&mut resp);
1744 (resp, "semantic (vector search fallback)".to_string(), None)
1745 }
1746 Err(e2) => {
1747 if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1748 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1749 }
1750 return Err(anyhow!(
1751 "Both adaptive and fixed-k search failed: {e}, {e2}"
1752 ));
1753 }
1754 }
1755 }
1756 }
1757 } else {
1758 match mem.vec_search_with_embedding(
1760 &args.query,
1761 &query_embedding,
1762 args.top_k,
1763 args.snippet_chars,
1764 scope,
1765 ) {
1766 Ok(mut resp) => {
1767 apply_preference_rerank(&mut resp);
1769 (resp, "semantic (vector search)".to_string(), None)
1770 }
1771 Err(e) => {
1772 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1773 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1774 }
1775
1776 warn!("Vector search failed ({e}), falling back to lexical + rerank");
1778 let request = SearchRequest {
1779 query: args.query.clone(),
1780 top_k: args.top_k,
1781 snippet_chars: args.snippet_chars,
1782 uri: args.uri.clone(),
1783 scope: args.scope.clone(),
1784 cursor: args.cursor.clone(),
1785 #[cfg(feature = "temporal_track")]
1786 temporal: None,
1787 as_of_frame: args.as_of_frame,
1788 as_of_ts: args.as_of_ts,
1789 no_sketch: args.no_sketch,
1790 acl_context: None,
1791 acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1792 };
1793 let mut resp = mem.search(request)?;
1794 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1795 (resp, "semantic (fallback rerank)".to_string(), None)
1796 }
1797 }
1798 }
1799 } else {
1800 let request = SearchRequest {
1802 query: args.query.clone(),
1803 top_k: args.top_k,
1804 snippet_chars: args.snippet_chars,
1805 uri: args.uri.clone(),
1806 scope: args.scope.clone(),
1807 cursor: args.cursor.clone(),
1808 #[cfg(feature = "temporal_track")]
1809 temporal: None,
1810 as_of_frame: args.as_of_frame,
1811 as_of_ts: args.as_of_ts,
1812 no_sketch: args.no_sketch,
1813 acl_context: None,
1814 acl_enforcement_mode: memvid_core::types::AclEnforcementMode::Audit,
1815 };
1816
1817 let mut resp = mem.search(request)?;
1818
1819 if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1820 warn!("Search index unavailable; returning basic text results");
1821 }
1822
1823 let mut engine_label = match resp.engine {
1824 SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1825 SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1826 SearchEngineKind::Hybrid => "hybrid".to_string(),
1827 };
1828
1829 if runtime_option.is_some() {
1830 engine_label = format!("hybrid ({engine_label} + semantic)");
1831 }
1832
1833 if let Some(ref runtime) = runtime_option {
1834 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1835 }
1836
1837 (resp, engine_label, None)
1838 };
1839
1840 if args.json_legacy {
1841 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1842 emit_legacy_search_json(&response)?;
1843 } else if args.json {
1844 emit_search_json(&response, mode_key)?;
1845 } else {
1846 println!(
1847 "mode: {} k={} time: {} ms",
1848 mode_label, response.params.top_k, response.elapsed_ms
1849 );
1850 println!("engine: {}", engine_label);
1851
1852 if let Some(ref stats) = adaptive_stats {
1854 println!(
1855 "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1856 stats.total_considered,
1857 stats.returned,
1858 stats.triggered_by,
1859 stats.top_score.unwrap_or(0.0),
1860 stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1861 );
1862 }
1863
1864 println!(
1865 "hits: {} (showing {})",
1866 response.total_hits,
1867 response.hits.len()
1868 );
1869 emit_search_table(&response);
1870 }
1871
1872 #[cfg(feature = "replay")]
1874 let _ = mem.save_active_session();
1875
1876 Ok(())
1877}
1878
1879pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1880 crate::api::track_query_usage(config, 1)?;
1882
1883 let mut mem = open_read_only_mem(&args.file)?;
1884 let vector = if let Some(path) = args.embedding.as_deref() {
1885 read_embedding(path)?
1886 } else if let Some(vector_string) = &args.vector {
1887 parse_vector(vector_string)?
1888 } else {
1889 anyhow::bail!("provide --vector or --embedding for search input");
1890 };
1891
1892 let hits = mem
1893 .search_vec(&vector, args.limit)
1894 .map_err(|err| match err {
1895 MemvidError::VecDimensionMismatch { expected, actual } => {
1896 anyhow!(vec_dimension_mismatch_help(expected, actual))
1897 }
1898 other => anyhow!(other),
1899 })?;
1900 let mut enriched = Vec::with_capacity(hits.len());
1901 for hit in hits {
1902 let preview = mem.frame_preview_by_id(hit.frame_id)?;
1903 enriched.push((hit.frame_id, hit.distance, preview));
1904 }
1905
1906 if args.json {
1907 let json_hits: Vec<_> = enriched
1908 .iter()
1909 .map(|(frame_id, distance, preview)| {
1910 json!({
1911 "frame_id": frame_id,
1912 "distance": distance,
1913 "preview": preview,
1914 })
1915 })
1916 .collect();
1917 let json_str = serde_json::to_string_pretty(&json_hits)?;
1918 println!("{}", json_str.to_colored_json_auto()?);
1919 } else if enriched.is_empty() {
1920 println!("No vector matches found");
1921 } else {
1922 for (frame_id, distance, preview) in enriched {
1923 println!("frame {frame_id} (distance {distance:.6}): {preview}");
1924 }
1925 }
1926 Ok(())
1927}
1928
1929pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1930 use memvid_core::AuditOptions;
1931 use std::fs::File;
1932 use std::io::Write;
1933
1934 let mut mem = Memvid::open(&args.file)?;
1935
1936 let start = parse_date_boundary(args.start.as_ref(), false)?;
1938 let end = parse_date_boundary(args.end.as_ref(), true)?;
1939 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1940 if end_ts < start_ts {
1941 anyhow::bail!("--end must not be earlier than --start");
1942 }
1943 }
1944
1945 let ask_mode: AskMode = args.mode.into();
1947 let runtime = match args.mode {
1948 AskModeArg::Lex => None,
1949 AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1950 AskModeArg::Hybrid => try_load_embedding_runtime(config),
1951 };
1952 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1953
1954 let options = AuditOptions {
1956 top_k: Some(args.top_k),
1957 snippet_chars: Some(args.snippet_chars),
1958 mode: Some(ask_mode),
1959 scope: args.scope,
1960 start,
1961 end,
1962 include_snippets: true,
1963 };
1964
1965 let mut report = mem.audit(&args.question, Some(options), embedder)?;
1967
1968 if let Some(model_name) = args.use_model.as_deref() {
1970 let context = report
1972 .sources
1973 .iter()
1974 .filter_map(|s| s.snippet.clone())
1975 .collect::<Vec<_>>()
1976 .join("\n\n");
1977
1978 match run_model_inference(
1979 model_name,
1980 &report.question,
1981 &context,
1982 &[], None,
1984 None,
1985 None, ) {
1987 Ok(inference) => {
1988 report.answer = Some(inference.answer.answer);
1989 report.notes.push(format!(
1990 "Answer synthesized by model: {}",
1991 inference.answer.model
1992 ));
1993 }
1994 Err(err) => {
1995 warn!(
1996 "model inference unavailable for '{}': {err}. Using default answer.",
1997 model_name
1998 );
1999 }
2000 }
2001 }
2002
2003 let output = match args.format {
2005 AuditFormat::Text => report.to_text(),
2006 AuditFormat::Markdown => report.to_markdown(),
2007 AuditFormat::Json => serde_json::to_string_pretty(&report)?,
2008 };
2009
2010 if let Some(out_path) = args.out {
2012 let mut file = File::create(&out_path)?;
2013 file.write_all(output.as_bytes())?;
2014 println!("Audit report written to: {}", out_path.display());
2015 } else {
2016 println!("{}", output);
2017 }
2018
2019 Ok(())
2020}
2021
2022fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
2023 let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
2024
2025 let mut additional_params = serde_json::Map::new();
2026 if let Some(cursor) = &response.params.cursor {
2027 additional_params.insert("cursor".into(), json!(cursor));
2028 }
2029
2030 let mut params = serde_json::Map::new();
2031 params.insert("top_k".into(), json!(response.params.top_k));
2032 params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2033 params.insert("mode".into(), json!(mode));
2034 params.insert(
2035 "additional_params".into(),
2036 serde_json::Value::Object(additional_params),
2037 );
2038
2039 let mut metadata_json = serde_json::Map::new();
2040 metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2041 metadata_json.insert("total_hits".into(), json!(response.total_hits));
2042 metadata_json.insert(
2043 "next_cursor".into(),
2044 match &response.next_cursor {
2045 Some(cursor) => json!(cursor),
2046 None => serde_json::Value::Null,
2047 },
2048 );
2049 metadata_json.insert("engine".into(), json!(response.engine));
2050 metadata_json.insert("params".into(), serde_json::Value::Object(params));
2051
2052 let body = json!({
2053 "version": "mv2.result.v2",
2054 "query": response.query,
2055 "metadata": metadata_json,
2056 "hits": hits,
2057 "context": response.context,
2058 });
2059 let json_str = serde_json::to_string_pretty(&body)?;
2060 println!("{}", json_str.to_colored_json_auto()?);
2061 Ok(())
2062}
2063
2064fn emit_ask_json(
2065 response: &AskResponse,
2066 requested_mode: AskModeArg,
2067 inference: Option<&ModelInference>,
2068 include_sources: bool,
2069 mem: &mut Memvid,
2070) -> Result<()> {
2071 let hits: Vec<_> = response
2072 .retrieval
2073 .hits
2074 .iter()
2075 .map(search_hit_to_json)
2076 .collect();
2077
2078 let citations: Vec<_> = response
2079 .citations
2080 .iter()
2081 .map(|citation| {
2082 let mut map = serde_json::Map::new();
2083 map.insert("index".into(), json!(citation.index));
2084 map.insert("frame_id".into(), json!(citation.frame_id));
2085 map.insert("uri".into(), json!(citation.uri));
2086 if let Some(range) = citation.chunk_range {
2087 map.insert("chunk_range".into(), json!([range.0, range.1]));
2088 }
2089 if let Some(score) = citation.score {
2090 map.insert("score".into(), json!(score));
2091 }
2092 serde_json::Value::Object(map)
2093 })
2094 .collect();
2095
2096 let mut body = json!({
2097 "version": "mv2.ask.v1",
2098 "question": response.question,
2099 "answer": response.answer,
2100 "context_only": response.context_only,
2101 "mode": ask_mode_display(requested_mode),
2102 "retriever": ask_retriever_display(response.retriever),
2103 "top_k": response.retrieval.params.top_k,
2104 "results": hits,
2105 "citations": citations,
2106 "stats": {
2107 "retrieval_ms": response.stats.retrieval_ms,
2108 "synthesis_ms": response.stats.synthesis_ms,
2109 "latency_ms": response.stats.latency_ms,
2110 },
2111 "engine": search_engine_label(&response.retrieval.engine),
2112 "total_hits": response.retrieval.total_hits,
2113 "next_cursor": response.retrieval.next_cursor,
2114 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2115 });
2116
2117 if let Some(inf) = inference {
2118 let model = &inf.answer;
2119 if let serde_json::Value::Object(ref mut map) = body {
2120 map.insert("model".into(), json!(model.requested));
2121 if model.model != model.requested {
2122 map.insert("model_used".into(), json!(model.model));
2123 }
2124 map.insert("cached".into(), json!(inf.cached));
2125 if let Some(usage) = &inf.usage {
2127 map.insert(
2128 "usage".into(),
2129 json!({
2130 "input_tokens": usage.input_tokens,
2131 "output_tokens": usage.output_tokens,
2132 "total_tokens": usage.total_tokens,
2133 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2134 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2135 }),
2136 );
2137 }
2138 if let Some(grounding) = &inf.grounding {
2140 map.insert(
2141 "grounding".into(),
2142 json!({
2143 "score": grounding.score,
2144 "label": grounding.label(),
2145 "sentence_count": grounding.sentence_count,
2146 "grounded_sentences": grounding.grounded_sentences,
2147 "has_warning": grounding.has_warning,
2148 "warning_reason": grounding.warning_reason,
2149 }),
2150 );
2151 }
2152 }
2153 }
2154
2155 if include_sources {
2157 if let serde_json::Value::Object(ref mut map) = body {
2158 let sources = build_sources_json(response, mem);
2159 map.insert("sources".into(), json!(sources));
2160 }
2161 }
2162
2163 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2165 if let serde_json::Value::Object(ref mut map) = body {
2166 map.insert("follow_up".into(), follow_up);
2167 }
2168 }
2169
2170 println!("{}", serde_json::to_string_pretty(&body)?);
2171 Ok(())
2172}
2173
2174fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2175 response
2176 .citations
2177 .iter()
2178 .enumerate()
2179 .map(|(idx, citation)| {
2180 let mut source = serde_json::Map::new();
2181 source.insert("index".into(), json!(idx + 1));
2182 source.insert("frame_id".into(), json!(citation.frame_id));
2183 source.insert("uri".into(), json!(citation.uri));
2184
2185 if let Some(range) = citation.chunk_range {
2186 source.insert("chunk_range".into(), json!([range.0, range.1]));
2187 }
2188 if let Some(score) = citation.score {
2189 source.insert("score".into(), json!(score));
2190 }
2191
2192 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2194 if let Some(title) = frame.title {
2195 source.insert("title".into(), json!(title));
2196 }
2197 if !frame.tags.is_empty() {
2198 source.insert("tags".into(), json!(frame.tags));
2199 }
2200 if !frame.labels.is_empty() {
2201 source.insert("labels".into(), json!(frame.labels));
2202 }
2203 source.insert("frame_timestamp".into(), json!(frame.timestamp));
2204 if !frame.content_dates.is_empty() {
2205 source.insert("content_dates".into(), json!(frame.content_dates));
2206 }
2207 }
2208
2209 if let Some(hit) = response
2211 .retrieval
2212 .hits
2213 .iter()
2214 .find(|h| h.frame_id == citation.frame_id)
2215 {
2216 let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2217 source.insert("snippet".into(), json!(snippet));
2218 }
2219
2220 serde_json::Value::Object(source)
2221 })
2222 .collect()
2223}
2224
2225fn build_follow_up_suggestions(
2228 response: &AskResponse,
2229 inference: Option<&ModelInference>,
2230 mem: &mut Memvid,
2231) -> Option<serde_json::Value> {
2232 let needs_followup = inference
2234 .and_then(|inf| inf.grounding.as_ref())
2235 .map(|g| g.score < 0.3 || g.has_warning)
2236 .unwrap_or(false);
2237
2238 let low_retrieval = response
2240 .retrieval
2241 .hits
2242 .first()
2243 .and_then(|h| h.score)
2244 .map(|score| score < -2.0)
2245 .unwrap_or(true);
2246
2247 if !needs_followup && !low_retrieval {
2248 return None;
2249 }
2250
2251 let limit = std::num::NonZeroU64::new(20).unwrap();
2253 let timeline_query = TimelineQueryBuilder::default().limit(limit).build();
2254
2255 let available_topics: Vec<String> = mem
2256 .timeline(timeline_query)
2257 .ok()
2258 .map(|entries| {
2259 entries
2260 .iter()
2261 .filter_map(|e| {
2262 let preview = e.preview.trim();
2264 if preview.is_empty() || preview.len() < 5 {
2265 return None;
2266 }
2267 let first_line = preview.lines().next().unwrap_or(preview);
2269 if first_line.len() > 60 {
2270 Some(format!("{}...", &first_line[..57]))
2271 } else {
2272 Some(first_line.to_string())
2273 }
2274 })
2275 .collect::<std::collections::HashSet<_>>()
2276 .into_iter()
2277 .take(5)
2278 .collect()
2279 })
2280 .unwrap_or_default();
2281
2282 let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2284 "No relevant information found in memory"
2285 } else if inference
2286 .and_then(|i| i.grounding.as_ref())
2287 .map(|g| g.has_warning)
2288 .unwrap_or(false)
2289 {
2290 "Answer may not be well-supported by the available context"
2291 } else {
2292 "Low confidence in the answer"
2293 };
2294
2295 let suggestions: Vec<String> = if available_topics.is_empty() {
2297 vec![
2298 "What information is stored in this memory?".to_string(),
2299 "Can you list the main topics covered?".to_string(),
2300 ]
2301 } else {
2302 available_topics
2303 .iter()
2304 .take(3)
2305 .map(|topic| format!("Tell me about {}", topic))
2306 .chain(std::iter::once(
2307 "What topics are in this memory?".to_string(),
2308 ))
2309 .collect()
2310 };
2311
2312 Some(json!({
2313 "needed": true,
2314 "reason": reason,
2315 "hint": if available_topics.is_empty() {
2316 "This memory may not contain information about your query."
2317 } else {
2318 "This memory contains information about different topics. Try asking about those instead."
2319 },
2320 "available_topics": available_topics,
2321 "suggestions": suggestions
2322 }))
2323}
2324
2325fn emit_model_json(
2326 response: &AskResponse,
2327 requested_model: &str,
2328 inference: Option<&ModelInference>,
2329 include_sources: bool,
2330 mem: &mut Memvid,
2331) -> Result<()> {
2332 let answer = response.answer.clone().unwrap_or_default();
2333 let requested_label = inference
2334 .map(|m| m.answer.requested.clone())
2335 .unwrap_or_else(|| requested_model.to_string());
2336 let used_label = inference
2337 .map(|m| m.answer.model.clone())
2338 .unwrap_or_else(|| requested_model.to_string());
2339
2340 let mut body = json!({
2341 "question": response.question,
2342 "model": requested_label,
2343 "model_used": used_label,
2344 "answer": answer,
2345 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2346 });
2347
2348 if let Some(inf) = inference {
2350 if let serde_json::Value::Object(ref mut map) = body {
2351 map.insert("cached".into(), json!(inf.cached));
2352 if let Some(usage) = &inf.usage {
2353 map.insert(
2354 "usage".into(),
2355 json!({
2356 "input_tokens": usage.input_tokens,
2357 "output_tokens": usage.output_tokens,
2358 "total_tokens": usage.total_tokens,
2359 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2360 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2361 }),
2362 );
2363 }
2364 if let Some(grounding) = &inf.grounding {
2365 map.insert(
2366 "grounding".into(),
2367 json!({
2368 "score": grounding.score,
2369 "label": grounding.label(),
2370 "sentence_count": grounding.sentence_count,
2371 "grounded_sentences": grounding.grounded_sentences,
2372 "has_warning": grounding.has_warning,
2373 "warning_reason": grounding.warning_reason,
2374 }),
2375 );
2376 }
2377 }
2378 }
2379
2380 if include_sources {
2382 if let serde_json::Value::Object(ref mut map) = body {
2383 let sources = build_sources_json(response, mem);
2384 map.insert("sources".into(), json!(sources));
2385 }
2386 }
2387
2388 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2390 if let serde_json::Value::Object(ref mut map) = body {
2391 map.insert("follow_up".into(), follow_up);
2392 }
2393 }
2394
2395 let json_str = serde_json::to_string_pretty(&body)?;
2397 println!("{}", json_str.to_colored_json_auto()?);
2398 Ok(())
2399}
2400
2401fn emit_ask_pretty(
2402 response: &AskResponse,
2403 requested_mode: AskModeArg,
2404 inference: Option<&ModelInference>,
2405 include_sources: bool,
2406 mem: &mut Memvid,
2407) {
2408 println!(
2409 "mode: {} retriever: {} k={} latency: {} ms (retrieval {} ms)",
2410 ask_mode_pretty(requested_mode),
2411 ask_retriever_pretty(response.retriever),
2412 response.retrieval.params.top_k,
2413 response.stats.latency_ms,
2414 response.stats.retrieval_ms
2415 );
2416 if let Some(inference) = inference {
2417 let model = &inference.answer;
2418 let cached_label = if inference.cached { " [CACHED]" } else { "" };
2419 if model.requested.trim() == model.model {
2420 println!("model: {}{}", model.model, cached_label);
2421 } else {
2422 println!(
2423 "model requested: {} model used: {}{}",
2424 model.requested, model.model, cached_label
2425 );
2426 }
2427 if let Some(usage) = &inference.usage {
2429 let cost_label = if inference.cached {
2430 format!("$0.00 (saved ${:.6})", usage.cost_usd)
2431 } else {
2432 format!("${:.6}", usage.cost_usd)
2433 };
2434 println!(
2435 "tokens: {} input + {} output = {} cost: {}",
2436 usage.input_tokens, usage.output_tokens, usage.total_tokens, cost_label
2437 );
2438 }
2439 if let Some(grounding) = &inference.grounding {
2441 let warning = if grounding.has_warning {
2442 format!(
2443 " [WARNING: {}]",
2444 grounding
2445 .warning_reason
2446 .as_deref()
2447 .unwrap_or("potential hallucination")
2448 )
2449 } else {
2450 String::new()
2451 };
2452 println!(
2453 "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2454 grounding.score * 100.0,
2455 grounding.label(),
2456 grounding.grounded_sentences,
2457 grounding.sentence_count,
2458 warning
2459 );
2460 }
2461 }
2462 println!(
2463 "engine: {}",
2464 search_engine_label(&response.retrieval.engine)
2465 );
2466 println!(
2467 "hits: {} (showing {})",
2468 response.retrieval.total_hits,
2469 response.retrieval.hits.len()
2470 );
2471
2472 if response.context_only {
2473 println!();
2474 println!("Context-only mode: synthesis disabled.");
2475 println!();
2476 } else if let Some(answer) = &response.answer {
2477 println!();
2478 println!("Answer:\n{answer}");
2479 println!();
2480 }
2481
2482 if !response.citations.is_empty() {
2483 println!("Citations:");
2484 for citation in &response.citations {
2485 match citation.score {
2486 Some(score) => println!(
2487 "[{}] {} (frame {}, score {:.3})",
2488 citation.index, citation.uri, citation.frame_id, score
2489 ),
2490 None => println!(
2491 "[{}] {} (frame {})",
2492 citation.index, citation.uri, citation.frame_id
2493 ),
2494 }
2495 }
2496 println!();
2497 }
2498
2499 if include_sources && !response.citations.is_empty() {
2501 println!("=== SOURCES ===");
2502 println!();
2503 for citation in &response.citations {
2504 println!("[{}] {}", citation.index, citation.uri);
2505
2506 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2508 if let Some(title) = &frame.title {
2509 println!(" Title: {}", title);
2510 }
2511 println!(" Frame ID: {}", citation.frame_id);
2512 if let Some(score) = citation.score {
2513 println!(" Score: {:.4}", score);
2514 }
2515 if let Some((start, end)) = citation.chunk_range {
2516 println!(" Range: [{}..{})", start, end);
2517 }
2518 if !frame.tags.is_empty() {
2519 println!(" Tags: {}", frame.tags.join(", "));
2520 }
2521 if !frame.labels.is_empty() {
2522 println!(" Labels: {}", frame.labels.join(", "));
2523 }
2524 println!(" Timestamp: {}", frame.timestamp);
2525 if !frame.content_dates.is_empty() {
2526 println!(" Content Dates: {}", frame.content_dates.join(", "));
2527 }
2528 }
2529
2530 if let Some(hit) = response
2532 .retrieval
2533 .hits
2534 .iter()
2535 .find(|h| h.frame_id == citation.frame_id)
2536 {
2537 let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2538 let truncated = if snippet.len() > 200 {
2539 format!("{}...", &snippet[..200])
2540 } else {
2541 snippet.clone()
2542 };
2543 println!(" Snippet: {}", truncated.replace('\n', " "));
2544 }
2545 println!();
2546 }
2547 }
2548
2549 if !include_sources {
2550 println!();
2551 emit_search_table(&response.retrieval);
2552 }
2553
2554 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2556 if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2557 if needed {
2558 println!();
2559 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2560 println!("💡 FOLLOW-UP SUGGESTIONS");
2561 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2562
2563 if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2564 println!("Reason: {}", reason);
2565 }
2566
2567 if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2568 println!("Hint: {}", hint);
2569 }
2570
2571 if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2572 if !topics.is_empty() {
2573 println!();
2574 println!("Available topics in this memory:");
2575 for topic in topics.iter().filter_map(|t| t.as_str()) {
2576 println!(" • {}", topic);
2577 }
2578 }
2579 }
2580
2581 if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2582 if !suggestions.is_empty() {
2583 println!();
2584 println!("Try asking:");
2585 for (i, suggestion) in
2586 suggestions.iter().filter_map(|s| s.as_str()).enumerate()
2587 {
2588 println!(" {}. \"{}\"", i + 1, suggestion);
2589 }
2590 }
2591 }
2592 println!();
2593 }
2594 }
2595 }
2596}
2597
2598fn emit_verbatim_evidence_json(
2601 response: &AskResponse,
2602 include_sources: bool,
2603 mem: &mut Memvid,
2604) -> Result<()> {
2605 let evidence: Vec<_> = response
2607 .retrieval
2608 .hits
2609 .iter()
2610 .enumerate()
2611 .map(|(idx, hit)| {
2612 let mut entry = serde_json::Map::new();
2613 entry.insert("index".into(), json!(idx + 1));
2614 entry.insert("frame_id".into(), json!(hit.frame_id));
2615 entry.insert("uri".into(), json!(&hit.uri));
2616 if let Some(title) = &hit.title {
2617 entry.insert("title".into(), json!(title));
2618 }
2619 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2621 entry.insert("text".into(), json!(verbatim));
2622 if let Some(score) = hit.score {
2623 entry.insert("score".into(), json!(score));
2624 }
2625 serde_json::Value::Object(entry)
2626 })
2627 .collect();
2628
2629 let sources: Option<Vec<_>> = if include_sources {
2631 Some(
2632 response
2633 .retrieval
2634 .hits
2635 .iter()
2636 .filter_map(|hit| {
2637 mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2638 let mut source = serde_json::Map::new();
2639 source.insert("frame_id".into(), json!(frame.id));
2640 source.insert(
2641 "uri".into(),
2642 json!(frame.uri.as_deref().unwrap_or("(unknown)")),
2643 );
2644 if let Some(title) = &frame.title {
2645 source.insert("title".into(), json!(title));
2646 }
2647 source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2648 if !frame.tags.is_empty() {
2649 source.insert("tags".into(), json!(frame.tags));
2650 }
2651 if !frame.labels.is_empty() {
2652 source.insert("labels".into(), json!(frame.labels));
2653 }
2654 serde_json::Value::Object(source)
2655 })
2656 })
2657 .collect(),
2658 )
2659 } else {
2660 None
2661 };
2662
2663 let mut body = json!({
2664 "version": "mv2.evidence.v1",
2665 "mode": "verbatim",
2666 "question": response.question,
2667 "evidence": evidence,
2668 "evidence_count": evidence.len(),
2669 "total_hits": response.retrieval.total_hits,
2670 "stats": {
2671 "retrieval_ms": response.stats.retrieval_ms,
2672 "latency_ms": response.stats.latency_ms,
2673 },
2674 "engine": search_engine_label(&response.retrieval.engine),
2675 });
2676
2677 if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2678 map.insert("sources".into(), json!(sources));
2679 }
2680
2681 let json_str = serde_json::to_string_pretty(&body)?;
2682 println!("{}", json_str.to_colored_json_auto()?);
2683 Ok(())
2684}
2685
2686fn emit_verbatim_evidence_pretty(response: &AskResponse, include_sources: bool, mem: &mut Memvid) {
2688 println!(
2689 "mode: {} latency: {} ms (retrieval {} ms)",
2690 "verbatim evidence".cyan(),
2691 response.stats.latency_ms,
2692 response.stats.retrieval_ms
2693 );
2694 println!(
2695 "engine: {}",
2696 search_engine_label(&response.retrieval.engine)
2697 );
2698 println!(
2699 "hits: {} (showing {})",
2700 response.retrieval.total_hits,
2701 response.retrieval.hits.len()
2702 );
2703 println!();
2704
2705 println!("{}", "━".repeat(60));
2707 println!(
2708 "{}",
2709 format!(
2710 "VERBATIM EVIDENCE for: \"{}\"",
2711 truncate_with_ellipsis(&response.question, 40)
2712 )
2713 .bold()
2714 );
2715 println!("{}", "━".repeat(60));
2716 println!();
2717
2718 if response.retrieval.hits.is_empty() {
2719 println!("No evidence found.");
2720 return;
2721 }
2722
2723 let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2725 let (min_score, max_score) = score_range(&scores);
2726
2727 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2729 let uri = &hit.uri;
2730 let title = hit.title.as_deref().unwrap_or("Untitled");
2731 let score_str = hit
2732 .score
2733 .map(|s| {
2734 let normalized = normalize_bm25_for_display(s, min_score, max_score);
2735 format!(" (relevance: {:.0}%)", normalized)
2736 })
2737 .unwrap_or_default();
2738
2739 println!(
2740 "{}",
2741 format!("[{}] {}{}", idx + 1, title, score_str)
2742 .green()
2743 .bold()
2744 );
2745 println!(" Source: {} (frame {})", uri, hit.frame_id);
2746 println!();
2747
2748 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2750 for line in verbatim.lines() {
2752 if !line.trim().is_empty() {
2753 println!(" │ {}", line);
2754 }
2755 }
2756 println!();
2757 }
2758
2759 if include_sources {
2761 println!("{}", "━".repeat(60));
2762 println!("{}", "SOURCE DETAILS".bold());
2763 println!("{}", "━".repeat(60));
2764 println!();
2765
2766 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2767 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2768 println!(
2769 "{}",
2770 format!(
2771 "[{}] {}",
2772 idx + 1,
2773 frame.uri.as_deref().unwrap_or("(unknown)")
2774 )
2775 .cyan()
2776 );
2777 if let Some(title) = &frame.title {
2778 println!(" Title: {}", title);
2779 }
2780 println!(" Frame ID: {}", frame.id);
2781 println!(" Timestamp: {}", frame.timestamp);
2782 if !frame.tags.is_empty() {
2783 println!(" Tags: {}", frame.tags.join(", "));
2784 }
2785 if !frame.labels.is_empty() {
2786 println!(" Labels: {}", frame.labels.join(", "));
2787 }
2788 if !frame.content_dates.is_empty() {
2789 println!(" Content Dates: {}", frame.content_dates.join(", "));
2790 }
2791 println!();
2792 }
2793 }
2794 }
2795
2796 println!("{}", "─".repeat(60));
2798 println!(
2799 "{}",
2800 "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2801 );
2802 println!(
2803 "{}",
2804 "Use --use-model to get an AI-synthesized answer.".dimmed()
2805 );
2806}
2807
2808fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2809 let hits: Vec<_> = response
2810 .hits
2811 .iter()
2812 .map(|hit| {
2813 json!({
2814 "frame_id": hit.frame_id,
2815 "matches": hit.matches,
2816 "snippets": [hit.text.clone()],
2817 })
2818 })
2819 .collect();
2820 println!("{}", serde_json::to_string_pretty(&hits)?);
2821 Ok(())
2822}
2823
2824fn emit_search_table(response: &SearchResponse) {
2825 if response.hits.is_empty() {
2826 println!("No results for '{}'.", response.query);
2827 return;
2828 }
2829
2830 let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2832 let (min_score, max_score) = score_range(&scores);
2833
2834 for hit in &response.hits {
2835 println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2836 if let Some(title) = &hit.title {
2837 println!(" Title: {title}");
2838 }
2839 if let Some(score) = hit.score {
2840 let normalized = normalize_bm25_for_display(score, min_score, max_score);
2841 println!(" Relevance: {:.0}%", normalized);
2842 }
2843 println!(" Range: [{}..{})", hit.range.0, hit.range.1);
2844 if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2845 println!(" Chunk: [{}..{})", chunk_start, chunk_end);
2846 }
2847 if let Some(chunk_text) = &hit.chunk_text {
2848 println!(" Chunk Text: {}", chunk_text.trim());
2849 }
2850 if let Some(metadata) = &hit.metadata {
2851 if let Some(track) = &metadata.track {
2852 println!(" Track: {track}");
2853 }
2854 if !metadata.tags.is_empty() {
2855 println!(" Tags: {}", metadata.tags.join(", "));
2856 }
2857 if !metadata.labels.is_empty() {
2858 println!(" Labels: {}", metadata.labels.join(", "));
2859 }
2860 if let Some(created_at) = &metadata.created_at {
2861 println!(" Created: {created_at}");
2862 }
2863 if !metadata.content_dates.is_empty() {
2864 println!(" Content Dates: {}", metadata.content_dates.join(", "));
2865 }
2866 if !metadata.entities.is_empty() {
2867 let entity_strs: Vec<String> = metadata
2868 .entities
2869 .iter()
2870 .map(|e| format!("{} ({})", e.name, e.kind))
2871 .collect();
2872 println!(" Entities: {}", entity_strs.join(", "));
2873 }
2874 }
2875 println!(" Snippet: {}", hit.text.trim());
2876 println!();
2877 }
2878 if let Some(cursor) = &response.next_cursor {
2879 println!("Next cursor: {cursor}");
2880 }
2881}
2882
2883fn ask_mode_display(mode: AskModeArg) -> &'static str {
2884 match mode {
2885 AskModeArg::Lex => "lex",
2886 AskModeArg::Sem => "sem",
2887 AskModeArg::Hybrid => "hybrid",
2888 }
2889}
2890
2891fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2892 match mode {
2893 AskModeArg::Lex => "Lexical",
2894 AskModeArg::Sem => "Semantic",
2895 AskModeArg::Hybrid => "Hybrid",
2896 }
2897}
2898
2899fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2900 match retriever {
2901 AskRetriever::Lex => "lex",
2902 AskRetriever::Semantic => "semantic",
2903 AskRetriever::Hybrid => "hybrid",
2904 AskRetriever::LexFallback => "lex_fallback",
2905 AskRetriever::TimelineFallback => "timeline_fallback",
2906 }
2907}
2908
2909fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2910 match retriever {
2911 AskRetriever::Lex => "Lexical",
2912 AskRetriever::Semantic => "Semantic",
2913 AskRetriever::Hybrid => "Hybrid",
2914 AskRetriever::LexFallback => "Lexical (fallback)",
2915 AskRetriever::TimelineFallback => "Timeline (fallback)",
2916 }
2917}
2918
2919fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2920 match engine {
2921 SearchEngineKind::Tantivy => "text (tantivy)",
2922 SearchEngineKind::LexFallback => "text (fallback)",
2923 SearchEngineKind::Hybrid => "hybrid",
2924 }
2925}
2926
2927fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2928 let digest = hash(uri.as_bytes()).to_hex().to_string();
2929 let prefix_len = digest.len().min(12);
2930 let prefix = &digest[..prefix_len];
2931 format!("mv2-hit-{prefix}-{frame_id}-{start}")
2932}
2933
2934fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2935 if text.chars().count() <= limit {
2936 return text.to_string();
2937 }
2938
2939 let truncated: String = text.chars().take(limit).collect();
2940 format!("{truncated}...")
2941}
2942
2943fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2952 if (max_score - min_score).abs() < f32::EPSILON {
2953 return 100.0;
2955 }
2956 ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2958}
2959
2960fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2962 let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2963 if valid_scores.is_empty() {
2964 return (0.0, 0.0);
2965 }
2966 let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2967 let max = valid_scores
2968 .iter()
2969 .cloned()
2970 .fold(f32::NEG_INFINITY, f32::max);
2971 (min, max)
2972}
2973
2974fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2975 let mut hit_json = serde_json::Map::new();
2976 hit_json.insert("rank".into(), json!(hit.rank));
2977 if let Some(score) = hit.score {
2978 hit_json.insert("score".into(), json!(score));
2979 }
2980 hit_json.insert(
2981 "id".into(),
2982 json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2983 );
2984 hit_json.insert("frame_id".into(), json!(hit.frame_id));
2985 hit_json.insert("uri".into(), json!(hit.uri));
2986 if let Some(title) = &hit.title {
2987 hit_json.insert("title".into(), json!(title));
2988 }
2989 let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2990 hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2991 hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2992 hit_json.insert("text".into(), json!(hit.text));
2993
2994 let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2995 matches: hit.matches,
2996 ..SearchHitMetadata::default()
2997 });
2998 let mut meta_json = serde_json::Map::new();
2999 meta_json.insert("matches".into(), json!(metadata.matches));
3000 if !metadata.tags.is_empty() {
3001 meta_json.insert("tags".into(), json!(metadata.tags));
3002 }
3003 if !metadata.labels.is_empty() {
3004 meta_json.insert("labels".into(), json!(metadata.labels));
3005 }
3006 if let Some(track) = metadata.track {
3007 meta_json.insert("track".into(), json!(track));
3008 }
3009 if let Some(created_at) = metadata.created_at {
3010 meta_json.insert("created_at".into(), json!(created_at));
3011 }
3012 if !metadata.content_dates.is_empty() {
3013 meta_json.insert("content_dates".into(), json!(metadata.content_dates));
3014 }
3015 if !metadata.entities.is_empty() {
3016 let entities_json: Vec<serde_json::Value> = metadata
3017 .entities
3018 .iter()
3019 .map(|e| {
3020 let mut ent = serde_json::Map::new();
3021 ent.insert("name".into(), json!(e.name));
3022 ent.insert("kind".into(), json!(e.kind));
3023 if let Some(conf) = e.confidence {
3024 ent.insert("confidence".into(), json!(conf));
3025 }
3026 serde_json::Value::Object(ent)
3027 })
3028 .collect();
3029 meta_json.insert("entities".into(), json!(entities_json));
3030 }
3031 hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
3032 serde_json::Value::Object(hit_json)
3033}
3034fn apply_semantic_rerank(
3043 runtime: &EmbeddingRuntime,
3044 mem: &mut Memvid,
3045 response: &mut SearchResponse,
3046) -> Result<()> {
3047 if response.hits.is_empty() {
3048 return Ok(());
3049 }
3050
3051 let query_embedding = runtime.embed_query(&response.query)?;
3052 let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
3053 for hit in &response.hits {
3054 if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
3055 if embedding.len() == runtime.dimension() {
3056 let score = cosine_similarity(&query_embedding, &embedding);
3057 semantic_scores.insert(hit.frame_id, score);
3058 }
3059 }
3060 }
3061
3062 if semantic_scores.is_empty() {
3063 return Ok(());
3064 }
3065
3066 let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3068 .iter()
3069 .map(|(frame_id, score)| (*frame_id, *score))
3070 .collect();
3071 sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3072
3073 let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3074 for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3075 semantic_rank.insert(*frame_id, idx + 1);
3076 }
3077
3078 let query_lower = response.query.to_lowercase();
3080 let is_preference_query = query_lower.contains("suggest")
3081 || query_lower.contains("recommend")
3082 || query_lower.contains("should i")
3083 || query_lower.contains("what should")
3084 || query_lower.contains("prefer")
3085 || query_lower.contains("favorite")
3086 || query_lower.contains("best for me");
3087
3088 const RRF_K: f32 = 60.0;
3092
3093 let mut ordering: Vec<(usize, f32, usize)> = response
3094 .hits
3095 .iter()
3096 .enumerate()
3097 .map(|(idx, hit)| {
3098 let lexical_rank = hit.rank;
3099
3100 let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3102
3103 let semantic_rrf = semantic_rank
3105 .get(&hit.frame_id)
3106 .map(|rank| 1.0 / (RRF_K + *rank as f32))
3107 .unwrap_or(0.0);
3108
3109 let preference_boost = if is_preference_query {
3112 compute_preference_boost(&hit.text) * 0.01 } else {
3114 0.0
3115 };
3116
3117 let combined = lexical_rrf + semantic_rrf + preference_boost;
3119 (idx, combined, lexical_rank)
3120 })
3121 .collect();
3122
3123 ordering.sort_by(|a, b| {
3124 b.1.partial_cmp(&a.1)
3125 .unwrap_or(Ordering::Equal)
3126 .then(a.2.cmp(&b.2))
3127 });
3128
3129 let mut reordered = Vec::with_capacity(response.hits.len());
3130 for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3131 let mut hit = response.hits[idx].clone();
3132 hit.rank = rank_idx + 1;
3133 reordered.push(hit);
3134 }
3135
3136 response.hits = reordered;
3137 Ok(())
3138}
3139
3140fn apply_preference_rerank(response: &mut SearchResponse) {
3143 if response.hits.is_empty() {
3144 return;
3145 }
3146
3147 let query_lower = response.query.to_lowercase();
3149 let is_preference_query = query_lower.contains("suggest")
3150 || query_lower.contains("recommend")
3151 || query_lower.contains("should i")
3152 || query_lower.contains("what should")
3153 || query_lower.contains("prefer")
3154 || query_lower.contains("favorite")
3155 || query_lower.contains("best for me");
3156
3157 if !is_preference_query {
3158 return;
3159 }
3160
3161 let mut scored: Vec<(usize, f32, f32)> = response
3163 .hits
3164 .iter()
3165 .enumerate()
3166 .map(|(idx, hit)| {
3167 let original_score = hit.score.unwrap_or(0.0);
3168 let preference_boost = compute_preference_boost(&hit.text);
3169 let boosted_score = original_score + preference_boost;
3170 (idx, boosted_score, original_score)
3171 })
3172 .collect();
3173
3174 scored.sort_by(|a, b| {
3176 b.1.partial_cmp(&a.1)
3177 .unwrap_or(Ordering::Equal)
3178 .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3179 });
3180
3181 let mut reordered = Vec::with_capacity(response.hits.len());
3183 for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3184 let mut hit = response.hits[idx].clone();
3185 hit.rank = rank_idx + 1;
3186 reordered.push(hit);
3187 }
3188
3189 response.hits = reordered;
3190}
3191
3192fn compute_preference_boost(text: &str) -> f32 {
3201 let text_lower = text.to_lowercase();
3202 let mut boost = 0.0f32;
3203
3204 let established_context = [
3207 "i've been",
3209 "i've had",
3210 "i've used",
3211 "i've tried",
3212 "i recently",
3213 "i just",
3214 "lately",
3215 "i started",
3216 "i bought",
3217 "i harvested",
3218 "i grew",
3219 "my garden",
3221 "my home",
3222 "my house",
3223 "my setup",
3224 "my equipment",
3225 "my camera",
3226 "my car",
3227 "my phone",
3228 "i have a",
3229 "i own",
3230 "i got a",
3231 "i prefer",
3233 "i like to",
3234 "i love to",
3235 "i enjoy",
3236 "i usually",
3237 "i always",
3238 "i typically",
3239 "my favorite",
3240 "i tend to",
3241 "i often",
3242 "i use",
3244 "i grow",
3245 "i cook",
3246 "i make",
3247 "i work on",
3248 "i'm into",
3249 "i collect",
3250 ];
3251 for pattern in established_context {
3252 if text_lower.contains(pattern) {
3253 boost += 0.15;
3254 }
3255 }
3256
3257 let first_person = [" i ", " my ", " me "];
3259 for pattern in first_person {
3260 if text_lower.contains(pattern) {
3261 boost += 0.02;
3262 }
3263 }
3264
3265 let request_patterns = [
3268 "i'm trying to",
3269 "i want to",
3270 "i need to",
3271 "looking for",
3272 "can you suggest",
3273 "can you help",
3274 ];
3275 for pattern in request_patterns {
3276 if text_lower.contains(pattern) {
3277 boost += 0.02;
3278 }
3279 }
3280
3281 boost.min(0.5)
3283}
3284
3285fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3286 let mut dot = 0.0f32;
3287 let mut sum_a = 0.0f32;
3288 let mut sum_b = 0.0f32;
3289 for (x, y) in a.iter().zip(b.iter()) {
3290 dot += x * y;
3291 sum_a += x * x;
3292 sum_b += y * y;
3293 }
3294
3295 if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3296 0.0
3297 } else {
3298 dot / (sum_a.sqrt() * sum_b.sqrt())
3299 }
3300}
3301
3302#[cfg(feature = "local-embeddings")]
3310fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3311 if response.hits.is_empty() || response.hits.len() < 2 {
3312 return Ok(());
3313 }
3314
3315 let candidates_to_rerank = response.hits.len().min(50);
3317
3318 let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3321 .with_show_download_progress(true);
3322
3323 let mut reranker = match TextRerank::try_new(options) {
3324 Ok(r) => r,
3325 Err(e) => {
3326 warn!("Failed to initialize cross-encoder reranker: {e}");
3327 return Ok(());
3328 }
3329 };
3330
3331 let documents: Vec<String> = response.hits[..candidates_to_rerank]
3333 .iter()
3334 .map(|hit| hit.text.clone())
3335 .collect();
3336
3337 info!("Cross-encoder reranking {} candidates", documents.len());
3339 let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3340 Ok(results) => results,
3341 Err(e) => {
3342 warn!("Cross-encoder reranking failed: {e}");
3343 return Ok(());
3344 }
3345 };
3346
3347 let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3351
3352 let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3354 .iter()
3355 .filter_map(|h| h.score)
3356 .collect();
3357 let orig_min = original_scores
3358 .iter()
3359 .cloned()
3360 .fold(f32::INFINITY, f32::min);
3361 let orig_max = original_scores
3362 .iter()
3363 .cloned()
3364 .fold(f32::NEG_INFINITY, f32::max);
3365 let orig_range = (orig_max - orig_min).max(0.001); for result in rerank_results.iter() {
3368 let original_idx = result.index;
3369 let cross_encoder_score = result.score; let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3373 let normalized_original = (original_score - orig_min) / orig_range;
3374
3375 let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3379
3380 scored_hits.push((blended, original_idx));
3381 }
3382
3383 scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3385
3386 let mut reordered = Vec::with_capacity(response.hits.len());
3388 for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3389 let mut hit = response.hits[original_idx].clone();
3390 hit.rank = new_rank + 1;
3391 hit.score = Some(blended_score);
3393 reordered.push(hit);
3394 }
3395
3396 for hit in response.hits.iter().skip(candidates_to_rerank) {
3398 let mut h = hit.clone();
3399 h.rank = reordered.len() + 1;
3400 reordered.push(h);
3401 }
3402
3403 response.hits = reordered;
3404 info!("Cross-encoder reranking complete");
3405 Ok(())
3406}
3407
3408#[cfg(not(feature = "local-embeddings"))]
3411fn apply_cross_encoder_rerank(_response: &mut SearchResponse) -> Result<()> {
3412 Ok(())
3413}
3414
3415fn build_memory_context(mem: &Memvid) -> String {
3418 let entities = mem.memory_entities();
3419 if entities.is_empty() {
3420 return String::new();
3421 }
3422
3423 let mut sections = Vec::new();
3424 for entity in entities {
3425 let cards = mem.get_entity_memories(&entity);
3426 if cards.is_empty() {
3427 continue;
3428 }
3429
3430 let mut entity_lines = Vec::new();
3431 for card in cards {
3432 let polarity_marker = card
3434 .polarity
3435 .as_ref()
3436 .map(|p| match p.to_string().as_str() {
3437 "Positive" => " (+)",
3438 "Negative" => " (-)",
3439 _ => "",
3440 })
3441 .unwrap_or("");
3442 entity_lines.push(format!(
3443 " - {}: {}{}",
3444 card.slot, card.value, polarity_marker
3445 ));
3446 }
3447
3448 sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3449 }
3450
3451 sections.join("\n\n")
3452}
3453
3454fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3457 use std::collections::HashMap;
3458
3459 let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3461
3462 for hit in hits {
3463 if let Some(metadata) = &hit.metadata {
3464 for entity in &metadata.entities {
3465 entities_by_kind
3466 .entry(entity.kind.clone())
3467 .or_default()
3468 .push(entity.name.clone());
3469 }
3470 }
3471 }
3472
3473 if entities_by_kind.is_empty() {
3474 return String::new();
3475 }
3476
3477 let mut sections = Vec::new();
3479 let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3480 sorted_kinds.sort();
3481
3482 for kind in sorted_kinds {
3483 let names = entities_by_kind.get(kind).unwrap();
3484 let mut unique_names: Vec<_> = names.iter().collect();
3485 unique_names.sort();
3486 unique_names.dedup();
3487
3488 let names_str = unique_names
3489 .iter()
3490 .take(10) .map(|s| s.as_str())
3492 .collect::<Vec<_>>()
3493 .join(", ");
3494
3495 sections.push(format!("{}: {}", kind, names_str));
3496 }
3497
3498 sections.join("\n")
3499}