1use std::cmp::Ordering;
9use std::collections::HashMap;
10use std::num::NonZeroU64;
11use std::path::PathBuf;
12
13use anyhow::{anyhow, bail, Result};
14use blake3::hash;
15use clap::{ArgAction, Args, ValueEnum};
16use colored::Colorize;
17use colored_json::ToColoredJson;
18#[cfg(feature = "temporal_track")]
19use memvid_core::{
20 types::SearchHitTemporal, TemporalContext, TemporalFilter, TemporalNormalizer,
21 TemporalResolution, TemporalResolutionValue,
22};
23use memvid_core::{
24 types::{
25 AdaptiveConfig, AskContextFragment, AskContextFragmentKind, CutoffStrategy,
26 SearchHitMetadata,
27 },
28 AskMode, AskRequest, AskResponse, AskRetriever, FrameId, Memvid, MemvidError, SearchEngineKind,
29 SearchHit, SearchRequest, SearchResponse, TimelineEntry, TimelineQueryBuilder, VecEmbedder,
30};
31#[cfg(feature = "temporal_track")]
32use serde::Serialize;
33use serde_json::json;
34#[cfg(feature = "temporal_track")]
35use time::format_description::well_known::Rfc3339;
36use time::{Date, PrimitiveDateTime, Time};
37#[cfg(feature = "temporal_track")]
38use time::{Duration as TimeDuration, Month, OffsetDateTime, UtcOffset};
39use tracing::{info, warn};
40
41#[cfg(feature = "local-embeddings")]
42use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
43
44use memvid_ask_model::{
45 run_model_inference, ModelContextFragment, ModelContextFragmentKind, ModelInference,
46};
47
48use crate::config::{
50 load_embedding_runtime, load_embedding_runtime_for_mv2, resolve_llm_context_budget_override,
51 try_load_embedding_runtime, try_load_embedding_runtime_for_mv2, CliConfig,
52 EmbeddingModelChoice, EmbeddingRuntime,
53};
54use crate::utils::{
55 autodetect_memory_file, format_timestamp, looks_like_memory, open_read_only_mem,
56 parse_date_boundary, parse_vector, read_embedding,
57};
58
59const OUTPUT_CONTEXT_MAX_LEN: usize = 4_000;
60#[cfg(feature = "temporal_track")]
61const DEFAULT_TEMPORAL_TZ: &str = "America/Chicago";
62
63fn vec_dimension_mismatch_help(expected: u32, actual: usize) -> String {
64 let mut message = format!("Vector dimension mismatch (expected {expected}, got {actual}).");
65 message.push_str("\n\nThis usually means the memory was indexed with a different embedding model than the query embedding.");
66 if let Some(model) = EmbeddingModelChoice::from_dimension(expected) {
67 message.push_str(&format!(
68 "\n\nSuggested fix: re-run with `-m {}` (alias: `--embedding-model/--model {}`)",
69 model.name(),
70 model.name()
71 ));
72 if model.is_openai() {
73 message.push_str(" (and set `OPENAI_API_KEY`).");
74 } else {
75 message.push('.');
76 }
77 message.push_str(&format!(
78 "\nFor `ask`/`find` only: you can also use `--query-embedding-model {}`.",
79 model.name()
80 ));
81 message.push_str(&format!(
82 "\nIf you provided a raw vector (`vec-search --vector/--embedding`), it must have exactly {expected} floats."
83 ));
84 message.push_str("\nOr use `--mode lex` to disable semantic search.");
85 }
86 message
87}
88
89#[derive(Args)]
91pub struct TimelineArgs {
92 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
93 pub file: PathBuf,
94 #[arg(long)]
95 pub json: bool,
96 #[arg(long)]
97 pub reverse: bool,
98 #[arg(long, value_name = "LIMIT")]
99 pub limit: Option<NonZeroU64>,
100 #[arg(long, value_name = "TIMESTAMP")]
101 pub since: Option<i64>,
102 #[arg(long, value_name = "TIMESTAMP")]
103 pub until: Option<i64>,
104 #[cfg(feature = "temporal_track")]
105 #[arg(long = "on", value_name = "PHRASE")]
106 pub phrase: Option<String>,
107 #[cfg(feature = "temporal_track")]
108 #[arg(long = "tz", value_name = "IANA_ZONE")]
109 pub tz: Option<String>,
110 #[cfg(feature = "temporal_track")]
111 #[arg(long = "anchor", value_name = "RFC3339")]
112 pub anchor: Option<String>,
113 #[cfg(feature = "temporal_track")]
114 #[arg(long = "window", value_name = "MINUTES")]
115 pub window: Option<u64>,
116 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
118 pub as_of_frame: Option<u64>,
119 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
121 pub as_of_ts: Option<i64>,
122}
123
124#[cfg(feature = "temporal_track")]
126#[derive(Args)]
127pub struct WhenArgs {
128 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
129 pub file: PathBuf,
130 #[arg(long = "on", value_name = "PHRASE")]
131 pub phrase: String,
132 #[arg(long = "tz", value_name = "IANA_ZONE")]
133 pub tz: Option<String>,
134 #[arg(long = "anchor", value_name = "RFC3339")]
135 pub anchor: Option<String>,
136 #[arg(long = "window", value_name = "MINUTES")]
137 pub window: Option<u64>,
138 #[arg(long, value_name = "LIMIT")]
139 pub limit: Option<NonZeroU64>,
140 #[arg(long, value_name = "TIMESTAMP")]
141 pub since: Option<i64>,
142 #[arg(long, value_name = "TIMESTAMP")]
143 pub until: Option<i64>,
144 #[arg(long)]
145 pub reverse: bool,
146 #[arg(long)]
147 pub json: bool,
148}
149
150#[derive(Args)]
152pub struct AskArgs {
153 #[arg(value_name = "TARGET", num_args = 0..)]
154 pub targets: Vec<String>,
155 #[arg(long = "question", value_name = "TEXT")]
156 pub question: Option<String>,
157 #[arg(long = "uri", value_name = "URI")]
158 pub uri: Option<String>,
159 #[arg(long = "scope", value_name = "URI_PREFIX")]
160 pub scope: Option<String>,
161 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
162 pub top_k: usize,
163 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
164 pub snippet_chars: usize,
165 #[arg(long = "cursor", value_name = "TOKEN")]
166 pub cursor: Option<String>,
167 #[arg(long = "mode", value_enum, default_value = "hybrid")]
168 pub mode: AskModeArg,
169 #[arg(long)]
170 pub json: bool,
171 #[arg(long = "context-only", action = ArgAction::SetTrue)]
172 pub context_only: bool,
173 #[arg(long = "sources", action = ArgAction::SetTrue)]
175 pub sources: bool,
176 #[arg(long = "mask-pii", action = ArgAction::SetTrue)]
178 pub mask_pii: bool,
179 #[arg(long = "memories", action = ArgAction::SetTrue)]
181 pub memories: bool,
182 #[arg(long = "llm-context-depth", value_name = "CHARS")]
184 pub llm_context_depth: Option<usize>,
185 #[arg(long = "start", value_name = "DATE")]
186 pub start: Option<String>,
187 #[arg(long = "end", value_name = "DATE")]
188 pub end: Option<String>,
189 #[arg(
197 long = "use-model",
198 value_name = "MODEL",
199 num_args = 0..=1,
200 default_missing_value = "tinyllama"
201 )]
202 pub use_model: Option<String>,
203 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
206 pub query_embedding_model: Option<String>,
207 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
209 pub as_of_frame: Option<u64>,
210 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
212 pub as_of_ts: Option<i64>,
213 #[arg(long = "system-prompt", value_name = "TEXT")]
215 pub system_prompt: Option<String>,
216 #[arg(long = "no-rerank", action = ArgAction::SetTrue)]
218 pub no_rerank: bool,
219
220 #[arg(long = "no-llm", action = ArgAction::SetTrue)]
223 pub no_llm: bool,
224
225 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
229 pub no_adaptive: bool,
230 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
233 pub min_relevancy: f32,
234 #[arg(long = "max-k", value_name = "K", default_value = "100")]
237 pub max_k: usize,
238 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
240 pub adaptive_strategy: AdaptiveStrategyArg,
241}
242
243#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
245pub enum AskModeArg {
246 Lex,
247 Sem,
248 Hybrid,
249}
250
251impl From<AskModeArg> for AskMode {
252 fn from(value: AskModeArg) -> Self {
253 match value {
254 AskModeArg::Lex => AskMode::Lex,
255 AskModeArg::Sem => AskMode::Sem,
256 AskModeArg::Hybrid => AskMode::Hybrid,
257 }
258 }
259}
260
261#[derive(Args)]
263pub struct FindArgs {
264 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
265 pub file: PathBuf,
266 #[arg(long = "query", value_name = "TEXT")]
267 pub query: String,
268 #[arg(long = "uri", value_name = "URI")]
269 pub uri: Option<String>,
270 #[arg(long = "scope", value_name = "URI_PREFIX")]
271 pub scope: Option<String>,
272 #[arg(long = "top-k", value_name = "K", default_value = "8", alias = "limit")]
273 pub top_k: usize,
274 #[arg(long = "snippet-chars", value_name = "N", default_value = "480")]
275 pub snippet_chars: usize,
276 #[arg(long = "cursor", value_name = "TOKEN")]
277 pub cursor: Option<String>,
278 #[arg(long)]
279 pub json: bool,
280 #[arg(long = "json-legacy", conflicts_with = "json")]
281 pub json_legacy: bool,
282 #[arg(long = "mode", value_enum, default_value = "auto")]
283 pub mode: SearchMode,
284 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
286 pub as_of_frame: Option<u64>,
287 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
289 pub as_of_ts: Option<i64>,
290 #[arg(long = "query-embedding-model", value_name = "EMB_MODEL")]
293 pub query_embedding_model: Option<String>,
294
295 #[arg(long = "no-adaptive", action = ArgAction::SetTrue)]
299 pub no_adaptive: bool,
300 #[arg(long = "min-relevancy", value_name = "RATIO", default_value = "0.5")]
303 pub min_relevancy: f32,
304 #[arg(long = "max-k", value_name = "K", default_value = "100")]
307 pub max_k: usize,
308 #[arg(long = "adaptive-strategy", value_enum, default_value = "combined")]
310 pub adaptive_strategy: AdaptiveStrategyArg,
311
312 #[arg(long = "graph", action = ArgAction::SetTrue)]
315 pub graph: bool,
316
317 #[arg(long = "hybrid", action = ArgAction::SetTrue)]
320 pub hybrid: bool,
321
322 #[arg(long = "no-sketch", action = ArgAction::SetTrue)]
325 pub no_sketch: bool,
326}
327
328#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
330pub enum SearchMode {
331 Auto,
332 Lex,
333 Sem,
334 #[cfg(feature = "clip")]
336 Clip,
337}
338
339#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
341pub enum AdaptiveStrategyArg {
342 Relative,
344 Absolute,
346 Cliff,
348 Elbow,
350 Combined,
352}
353
354#[derive(Args)]
356pub struct VecSearchArgs {
357 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
358 pub file: PathBuf,
359 #[arg(long, conflicts_with = "embedding", value_name = "CSV")]
360 pub vector: Option<String>,
361 #[arg(long, conflicts_with = "vector", value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
362 pub embedding: Option<PathBuf>,
363 #[arg(long, value_name = "K", default_value = "10")]
364 pub limit: usize,
365 #[arg(long)]
366 pub json: bool,
367}
368
369#[derive(Args)]
371pub struct AuditArgs {
372 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
373 pub file: PathBuf,
374 #[arg(value_name = "QUESTION")]
376 pub question: String,
377 #[arg(long = "out", short = 'o', value_name = "PATH", value_parser = clap::value_parser!(PathBuf))]
379 pub out: Option<PathBuf>,
380 #[arg(long = "format", value_enum, default_value = "text")]
382 pub format: AuditFormat,
383 #[arg(long = "top-k", value_name = "K", default_value = "10")]
385 pub top_k: usize,
386 #[arg(long = "snippet-chars", value_name = "N", default_value = "500")]
388 pub snippet_chars: usize,
389 #[arg(long = "mode", value_enum, default_value = "hybrid")]
391 pub mode: AskModeArg,
392 #[arg(long = "scope", value_name = "URI_PREFIX")]
394 pub scope: Option<String>,
395 #[arg(long = "start", value_name = "DATE")]
397 pub start: Option<String>,
398 #[arg(long = "end", value_name = "DATE")]
400 pub end: Option<String>,
401 #[arg(long = "use-model", value_name = "MODEL")]
403 pub use_model: Option<String>,
404}
405
406#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
408pub enum AuditFormat {
409 Text,
411 Markdown,
413 Json,
415}
416
417pub fn handle_timeline(_config: &CliConfig, args: TimelineArgs) -> Result<()> {
422 let mut mem = open_read_only_mem(&args.file)?;
423 let mut builder = TimelineQueryBuilder::default();
424 #[cfg(feature = "temporal_track")]
425 if args.phrase.is_none()
426 && (args.tz.is_some() || args.anchor.is_some() || args.window.is_some())
427 {
428 bail!("E-TEMP-005 use --on when supplying --tz/--anchor/--window");
429 }
430 if let Some(limit) = args.limit {
431 builder = builder.limit(limit);
432 }
433 if let Some(since) = args.since {
434 builder = builder.since(since);
435 }
436 if let Some(until) = args.until {
437 builder = builder.until(until);
438 }
439 builder = builder.reverse(args.reverse);
440 #[cfg(feature = "temporal_track")]
441 let temporal_summary = if let Some(ref phrase) = args.phrase {
442 let (filter, summary) = build_temporal_filter(
443 phrase,
444 args.tz.as_deref(),
445 args.anchor.as_deref(),
446 args.window,
447 )?;
448 builder = builder.temporal(filter);
449 Some(summary)
450 } else {
451 None
452 };
453 let query = builder.build();
454 let mut entries = mem.timeline(query)?;
455
456 if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
458 entries.retain(|entry| {
459 if let Some(cutoff_frame) = args.as_of_frame {
461 if entry.frame_id > cutoff_frame {
462 return false;
463 }
464 }
465
466 if let Some(cutoff_ts) = args.as_of_ts {
468 if entry.timestamp > cutoff_ts {
469 return false;
470 }
471 }
472
473 true
474 });
475 }
476
477 if args.json {
478 #[cfg(feature = "temporal_track")]
479 if let Some(summary) = temporal_summary.as_ref() {
480 println!(
481 "{}",
482 serde_json::to_string_pretty(&TimelineOutput {
483 temporal: Some(summary_to_output(summary)),
484 entries: &entries,
485 })?
486 );
487 } else {
488 println!("{}", serde_json::to_string_pretty(&entries)?);
489 }
490 #[cfg(not(feature = "temporal_track"))]
491 println!("{}", serde_json::to_string_pretty(&entries)?);
492 } else if entries.is_empty() {
493 println!("Timeline is empty");
494 } else {
495 #[cfg(feature = "temporal_track")]
496 if let Some(summary) = temporal_summary.as_ref() {
497 print_temporal_summary(summary);
498 }
499 for entry in entries {
500 println!(
501 "#{} @ {} — {}",
502 entry.frame_id,
503 entry.timestamp,
504 entry.preview.replace('\n', " ")
505 );
506 if let Some(uri) = entry.uri.as_deref() {
507 println!(" URI: {uri}");
508 }
509 if !entry.child_frames.is_empty() {
510 let child_list = entry
511 .child_frames
512 .iter()
513 .map(|id| id.to_string())
514 .collect::<Vec<_>>()
515 .join(", ");
516 println!(" Child frames: {child_list}");
517 }
518 #[cfg(feature = "temporal_track")]
519 if let Some(temporal) = entry.temporal.as_ref() {
520 print_entry_temporal_details(temporal);
521 }
522 }
523 }
524 Ok(())
525}
526
527#[cfg(feature = "temporal_track")]
528pub fn handle_when(_config: &CliConfig, args: WhenArgs) -> Result<()> {
529 let mut mem = open_read_only_mem(&args.file)?;
530
531 let (filter, summary) = build_temporal_filter(
532 &args.phrase,
533 args.tz.as_deref(),
534 args.anchor.as_deref(),
535 args.window,
536 )?;
537
538 let mut builder = TimelineQueryBuilder::default();
539 if let Some(limit) = args.limit {
540 builder = builder.limit(limit);
541 }
542 if let Some(since) = args.since {
543 builder = builder.since(since);
544 }
545 if let Some(until) = args.until {
546 builder = builder.until(until);
547 }
548 builder = builder.reverse(args.reverse).temporal(filter.clone());
549 let entries = mem.timeline(builder.build())?;
550
551 if args.json {
552 let entry_views: Vec<WhenEntry> = entries.iter().map(entry_to_when_entry).collect();
553 let output = WhenOutput {
554 summary: summary_to_output(&summary),
555 entries: entry_views,
556 };
557 println!("{}", serde_json::to_string_pretty(&output)?);
558 return Ok(());
559 }
560
561 print_temporal_summary(&summary);
562 if entries.is_empty() {
563 println!("No frames matched the resolved window");
564 return Ok(());
565 }
566
567 for entry in &entries {
568 let iso = format_timestamp(entry.timestamp).unwrap_or_default();
569 println!(
570 "#{} @ {} ({iso}) — {}",
571 entry.frame_id,
572 entry.timestamp,
573 entry.preview.replace('\n', " ")
574 );
575 if let Some(uri) = entry.uri.as_deref() {
576 println!(" URI: {uri}");
577 }
578 if !entry.child_frames.is_empty() {
579 let child_list = entry
580 .child_frames
581 .iter()
582 .map(|id| id.to_string())
583 .collect::<Vec<_>>()
584 .join(", ");
585 println!(" Child frames: {child_list}");
586 }
587 if let Some(temporal) = entry.temporal.as_ref() {
588 print_entry_temporal_details(temporal);
589 }
590 }
591
592 Ok(())
593}
594
595#[cfg(feature = "temporal_track")]
596#[derive(Serialize)]
597struct TimelineOutput<'a> {
598 #[serde(skip_serializing_if = "Option::is_none")]
599 temporal: Option<TemporalSummaryOutput>,
600 entries: &'a [TimelineEntry],
601}
602
603#[cfg(feature = "temporal_track")]
604#[derive(Serialize)]
605struct WhenOutput {
606 summary: TemporalSummaryOutput,
607 entries: Vec<WhenEntry>,
608}
609
610#[cfg(feature = "temporal_track")]
611#[derive(Serialize)]
612struct WhenEntry {
613 frame_id: FrameId,
614 timestamp: i64,
615 #[serde(skip_serializing_if = "Option::is_none")]
616 timestamp_iso: Option<String>,
617 preview: String,
618 #[serde(skip_serializing_if = "Option::is_none")]
619 uri: Option<String>,
620 #[serde(skip_serializing_if = "Vec::is_empty")]
621 child_frames: Vec<FrameId>,
622 #[serde(skip_serializing_if = "Option::is_none")]
623 temporal: Option<SearchHitTemporal>,
624}
625
626#[cfg(feature = "temporal_track")]
627#[derive(Serialize)]
628struct TemporalSummaryOutput {
629 phrase: String,
630 timezone: String,
631 anchor_utc: i64,
632 anchor_iso: String,
633 confidence: u16,
634 #[serde(skip_serializing_if = "Vec::is_empty")]
635 flags: Vec<&'static str>,
636 resolution_kind: &'static str,
637 window_start_utc: Option<i64>,
638 window_start_iso: Option<String>,
639 window_end_utc: Option<i64>,
640 window_end_iso: Option<String>,
641 #[serde(skip_serializing_if = "Option::is_none")]
642 window_minutes: Option<u64>,
643}
644
645#[cfg(feature = "temporal_track")]
646struct TemporalSummary {
647 phrase: String,
648 tz: String,
649 anchor: OffsetDateTime,
650 start_utc: Option<i64>,
651 end_utc: Option<i64>,
652 resolution: TemporalResolution,
653 window_minutes: Option<u64>,
654}
655
656#[cfg(feature = "temporal_track")]
657fn build_temporal_filter(
658 phrase: &str,
659 tz_override: Option<&str>,
660 anchor_override: Option<&str>,
661 window_minutes: Option<u64>,
662) -> Result<(TemporalFilter, TemporalSummary)> {
663 let tz = tz_override
664 .unwrap_or(DEFAULT_TEMPORAL_TZ)
665 .trim()
666 .to_string();
667 if tz.is_empty() {
668 bail!("E-TEMP-003 timezone must not be empty");
669 }
670
671 let anchor = if let Some(raw) = anchor_override {
672 OffsetDateTime::parse(raw, &Rfc3339)
673 .map_err(|_| anyhow!("E-TEMP-002 anchor must be RFC3339: {raw}"))?
674 } else {
675 OffsetDateTime::now_utc()
676 };
677
678 let context = TemporalContext::new(anchor, tz.clone());
679 let normalizer = TemporalNormalizer::new(context);
680 let resolution = normalizer
681 .resolve(phrase)
682 .map_err(|err| anyhow!("E-TEMP-001 {err}"))?;
683
684 let (mut start, mut end) = resolution_bounds(&resolution)?;
685 if let Some(minutes) = window_minutes {
686 if minutes > 0 {
687 let delta = TimeDuration::minutes(minutes as i64);
688 if let (Some(s), Some(e)) = (start, end) {
689 if s == e {
690 start = Some(s.saturating_sub(delta.whole_seconds()));
691 end = Some(e.saturating_add(delta.whole_seconds()));
692 } else {
693 start = Some(s.saturating_sub(delta.whole_seconds()));
694 end = Some(e.saturating_add(delta.whole_seconds()));
695 }
696 }
697 }
698 }
699
700 let filter = TemporalFilter {
701 start_utc: start,
702 end_utc: end,
703 phrase: None,
704 tz: None,
705 };
706
707 let summary = TemporalSummary {
708 phrase: phrase.to_owned(),
709 tz,
710 anchor,
711 start_utc: start,
712 end_utc: end,
713 resolution,
714 window_minutes,
715 };
716
717 Ok((filter, summary))
718}
719
720#[cfg(feature = "temporal_track")]
721fn summary_to_output(summary: &TemporalSummary) -> TemporalSummaryOutput {
722 TemporalSummaryOutput {
723 phrase: summary.phrase.clone(),
724 timezone: summary.tz.clone(),
725 anchor_utc: summary.anchor.unix_timestamp(),
726 anchor_iso: summary
727 .anchor
728 .format(&Rfc3339)
729 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string()),
730 confidence: summary.resolution.confidence,
731 flags: summary
732 .resolution
733 .flags
734 .iter()
735 .map(|flag| flag.as_str())
736 .collect(),
737 resolution_kind: resolution_kind(&summary.resolution),
738 window_start_utc: summary.start_utc,
739 window_start_iso: summary.start_utc.and_then(format_timestamp),
740 window_end_utc: summary.end_utc,
741 window_end_iso: summary.end_utc.and_then(format_timestamp),
742 window_minutes: summary.window_minutes,
743 }
744}
745
746#[cfg(feature = "temporal_track")]
747fn entry_to_when_entry(entry: &TimelineEntry) -> WhenEntry {
748 WhenEntry {
749 frame_id: entry.frame_id,
750 timestamp: entry.timestamp,
751 timestamp_iso: format_timestamp(entry.timestamp),
752 preview: entry.preview.clone(),
753 uri: entry.uri.clone(),
754 child_frames: entry.child_frames.clone(),
755 temporal: entry.temporal.clone(),
756 }
757}
758
759#[cfg(feature = "temporal_track")]
760fn print_temporal_summary(summary: &TemporalSummary) {
761 println!("Phrase: \"{}\"", summary.phrase);
762 println!("Timezone: {}", summary.tz);
763 println!(
764 "Anchor: {}",
765 summary
766 .anchor
767 .format(&Rfc3339)
768 .unwrap_or_else(|_| summary.anchor.unix_timestamp().to_string())
769 );
770 let start_iso = summary.start_utc.and_then(format_timestamp);
771 let end_iso = summary.end_utc.and_then(format_timestamp);
772 match (start_iso, end_iso) {
773 (Some(start), Some(end)) if start == end => println!("Resolved to: {start}"),
774 (Some(start), Some(end)) => println!("Window: {start} → {end}"),
775 (Some(start), None) => println!("Window start: {start}"),
776 (None, Some(end)) => println!("Window end: {end}"),
777 _ => println!("Window: (not resolved)"),
778 }
779 println!("Confidence: {}", summary.resolution.confidence);
780 let flags: Vec<&'static str> = summary
781 .resolution
782 .flags
783 .iter()
784 .map(|flag| flag.as_str())
785 .collect();
786 if !flags.is_empty() {
787 println!("Flags: {}", flags.join(", "));
788 }
789 if let Some(window) = summary.window_minutes {
790 if window > 0 {
791 println!("Window padding: {window} minute(s)");
792 }
793 }
794 println!();
795}
796
797#[cfg(feature = "temporal_track")]
798fn print_entry_temporal_details(temporal: &SearchHitTemporal) {
799 if let Some(anchor) = temporal.anchor.as_ref() {
800 let iso = anchor
801 .iso_8601
802 .clone()
803 .or_else(|| format_timestamp(anchor.ts_utc));
804 println!(
805 " Anchor: {} (source: {:?})",
806 iso.unwrap_or_else(|| anchor.ts_utc.to_string()),
807 anchor.source
808 );
809 }
810 if !temporal.mentions.is_empty() {
811 println!(" Mentions:");
812 for mention in &temporal.mentions {
813 let iso = mention
814 .iso_8601
815 .clone()
816 .or_else(|| format_timestamp(mention.ts_utc))
817 .unwrap_or_else(|| mention.ts_utc.to_string());
818 let mut details = format!(
819 " - {} ({:?}, confidence {})",
820 iso, mention.kind, mention.confidence
821 );
822 if let Some(text) = mention.text.as_deref() {
823 details.push_str(&format!(" — \"{}\"", text));
824 }
825 println!("{details}");
826 }
827 }
828}
829
830#[cfg(feature = "temporal_track")]
831fn resolution_bounds(resolution: &TemporalResolution) -> Result<(Option<i64>, Option<i64>)> {
832 match &resolution.value {
833 TemporalResolutionValue::Date(date) => {
834 let ts = date_to_timestamp(*date);
835 Ok((Some(ts), Some(ts)))
836 }
837 TemporalResolutionValue::DateTime(dt) => {
838 let ts = dt.unix_timestamp();
839 Ok((Some(ts), Some(ts)))
840 }
841 TemporalResolutionValue::DateRange { start, end } => Ok((
842 Some(date_to_timestamp(*start)),
843 Some(date_to_timestamp(*end)),
844 )),
845 TemporalResolutionValue::DateTimeRange { start, end } => {
846 Ok((Some(start.unix_timestamp()), Some(end.unix_timestamp())))
847 }
848 TemporalResolutionValue::Month { year, month } => {
849 let start_date = Date::from_calendar_date(*year, *month, 1)
850 .map_err(|_| anyhow!("invalid month resolution"))?;
851 let end_date = last_day_in_month(*year, *month)
852 .map_err(|_| anyhow!("invalid month resolution"))?;
853 Ok((
854 Some(date_to_timestamp(start_date)),
855 Some(date_to_timestamp(end_date)),
856 ))
857 }
858 }
859}
860
861#[cfg(feature = "temporal_track")]
862fn resolution_kind(resolution: &TemporalResolution) -> &'static str {
863 match resolution.value {
864 TemporalResolutionValue::Date(_) => "date",
865 TemporalResolutionValue::DateTime(_) => "datetime",
866 TemporalResolutionValue::DateRange { .. } => "date_range",
867 TemporalResolutionValue::DateTimeRange { .. } => "datetime_range",
868 TemporalResolutionValue::Month { .. } => "month",
869 }
870}
871
872#[cfg(feature = "temporal_track")]
873fn date_to_timestamp(date: Date) -> i64 {
874 PrimitiveDateTime::new(date, Time::MIDNIGHT)
875 .assume_offset(UtcOffset::UTC)
876 .unix_timestamp()
877}
878
879#[cfg(feature = "temporal_track")]
880fn last_day_in_month(year: i32, month: Month) -> Result<Date> {
881 let mut date = Date::from_calendar_date(year, month, 1)
882 .map_err(|_| anyhow!("invalid month resolution"))?;
883 while let Some(next) = date.next_day() {
884 if next.month() == month {
885 date = next;
886 } else {
887 break;
888 }
889 }
890 Ok(date)
891}
892
893#[cfg(feature = "temporal_track")]
894
895fn apply_model_context_fragments(response: &mut AskResponse, fragments: Vec<ModelContextFragment>) {
896 if fragments.is_empty() {
897 return;
898 }
899
900 response.context_fragments = fragments
901 .into_iter()
902 .map(|fragment| AskContextFragment {
903 rank: fragment.rank,
904 frame_id: fragment.frame_id,
905 uri: fragment.uri,
906 title: fragment.title,
907 score: fragment.score,
908 matches: fragment.matches,
909 range: Some(fragment.range),
910 chunk_range: fragment.chunk_range,
911 text: fragment.text,
912 kind: Some(match fragment.kind {
913 ModelContextFragmentKind::Full => AskContextFragmentKind::Full,
914 ModelContextFragmentKind::Summary => AskContextFragmentKind::Summary,
915 }),
916 #[cfg(feature = "temporal_track")]
917 temporal: None,
918 })
919 .collect();
920}
921
922pub fn handle_ask(config: &CliConfig, args: AskArgs) -> Result<()> {
923 crate::utils::require_active_plan(config, "ask")?;
925
926 crate::api::track_query_usage(config, 1)?;
928
929 if args.uri.is_some() && args.scope.is_some() {
930 warn!("--scope ignored because --uri is provided");
931 }
932
933 let mut question_tokens = Vec::new();
934 let mut file_path: Option<PathBuf> = None;
935 for token in &args.targets {
936 if file_path.is_none() && looks_like_memory(token) {
937 file_path = Some(PathBuf::from(token));
938 } else {
939 question_tokens.push(token.clone());
940 }
941 }
942
943 let positional_question = if question_tokens.is_empty() {
944 None
945 } else {
946 Some(question_tokens.join(" "))
947 };
948
949 let question = args
950 .question
951 .or(positional_question)
952 .map(|value| value.trim().to_string())
953 .filter(|value| !value.is_empty());
954
955 let question = question
956 .ok_or_else(|| anyhow!("provide a question via positional arguments or --question"))?;
957
958 let (original_question, search_query) = {
961 let (model_for_expansion, api_key_for_expansion): (Option<&str>, Option<String>) =
964 if let Ok(key) = std::env::var("OPENAI_API_KEY") {
965 (Some("gpt-4o-mini"), Some(key))
967 } else if let Ok(key) = std::env::var("GROQ_API_KEY") {
968 (Some("llama-3.1-8b-instant"), Some(key))
970 } else if let Ok(key) = std::env::var("ANTHROPIC_API_KEY") {
971 (Some("claude-haiku-4-5"), Some(key))
973 } else if let Ok(key) = std::env::var("XAI_API_KEY") {
974 (Some("grok-4-fast"), Some(key))
976 } else if let Ok(key) = std::env::var("MISTRAL_API_KEY") {
977 (Some("mistral-small-latest"), Some(key))
979 } else {
980 (None, None)
982 };
983
984 let _ = (model_for_expansion, api_key_for_expansion); (question.clone(), question.clone())
995 };
996
997 let memory_path = match file_path {
998 Some(path) => path,
999 None => autodetect_memory_file()?,
1000 };
1001
1002 let start = parse_date_boundary(args.start.as_ref(), false)?;
1003 let end = parse_date_boundary(args.end.as_ref(), true)?;
1004 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1005 if end_ts < start_ts {
1006 anyhow::bail!("--end must not be earlier than --start");
1007 }
1008 }
1009
1010 let mut mem = Memvid::open(&memory_path)?;
1012
1013 #[cfg(feature = "replay")]
1015 let _ = mem.load_active_session();
1016
1017 let mv2_dimension = mem.effective_vec_index_dimension()?;
1019
1020 let stats = mem.stats()?;
1022 let has_vectors = stats.vector_count > 0;
1023 let effective_mode = if !has_vectors
1024 && matches!(args.mode, AskModeArg::Sem | AskModeArg::Hybrid)
1025 {
1026 tracing::info!("Memory has no embeddings (vector_count=0); falling back to lexical mode");
1027 AskModeArg::Lex
1028 } else {
1029 args.mode.clone()
1030 };
1031
1032 let ask_mode: AskMode = effective_mode.clone().into();
1033 let inferred_model_override = match effective_mode {
1034 AskModeArg::Lex => None,
1035 AskModeArg::Sem | AskModeArg::Hybrid => match mem.embedding_identity_summary(10_000) {
1036 memvid_core::EmbeddingIdentitySummary::Single(identity) => {
1037 identity.model.map(String::from)
1038 }
1039 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
1040 let models: Vec<_> = identities
1041 .iter()
1042 .filter_map(|entry| entry.identity.model.as_deref())
1043 .collect();
1044 anyhow::bail!(
1045 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1046 Detected models: {:?}\n\n\
1047 Suggested fix: split into separate memories per embedding model.",
1048 models
1049 );
1050 }
1051 memvid_core::EmbeddingIdentitySummary::Unknown => None,
1052 },
1053 };
1054 let emb_model_override = args
1055 .query_embedding_model
1056 .as_deref()
1057 .or(inferred_model_override.as_deref());
1058 let runtime = match effective_mode {
1059 AskModeArg::Lex => None,
1060 AskModeArg::Sem => Some(load_embedding_runtime_for_mv2(
1061 config,
1062 emb_model_override,
1063 mv2_dimension,
1064 )?),
1065 AskModeArg::Hybrid => {
1066 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension).or_else(
1068 || {
1069 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1071 .ok()
1072 .map(|rt| {
1073 tracing::debug!("hybrid ask: loaded embedding runtime after fallback");
1074 rt
1075 })
1076 },
1077 )
1078 }
1079 };
1080 if runtime.is_none() && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid) {
1081 anyhow::bail!(
1082 "semantic embeddings unavailable; install/cached model required for {:?} mode",
1083 effective_mode
1084 );
1085 }
1086
1087 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1088
1089 let adaptive = if !args.no_adaptive {
1091 Some(AdaptiveConfig {
1092 enabled: true,
1093 max_results: args.max_k,
1094 min_results: 1,
1095 normalize_scores: true,
1096 strategy: match args.adaptive_strategy {
1097 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1098 min_ratio: args.min_relevancy,
1099 },
1100 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1101 min_score: args.min_relevancy,
1102 },
1103 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1104 max_drop_ratio: 0.3,
1105 },
1106 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1107 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1108 relative_threshold: args.min_relevancy,
1109 max_drop_ratio: 0.3,
1110 absolute_min: 0.3,
1111 },
1112 },
1113 })
1114 } else {
1115 None
1116 };
1117
1118 let request = AskRequest {
1119 question: search_query, top_k: args.top_k,
1121 snippet_chars: args.snippet_chars,
1122 uri: args.uri.clone(),
1123 scope: args.scope.clone(),
1124 cursor: args.cursor.clone(),
1125 start,
1126 end,
1127 #[cfg(feature = "temporal_track")]
1128 temporal: None,
1129 context_only: args.context_only,
1130 mode: ask_mode,
1131 as_of_frame: args.as_of_frame,
1132 as_of_ts: args.as_of_ts,
1133 adaptive,
1134 };
1135 let mut response = mem.ask(request, embedder).map_err(|err| match err {
1136 MemvidError::VecDimensionMismatch { expected, actual } => {
1137 anyhow!(vec_dimension_mismatch_help(expected, actual))
1138 }
1139 other => anyhow!(other),
1140 })?;
1141
1142 response.question = original_question;
1145
1146 let is_temporal_query = {
1153 let q_lower = response.question.to_lowercase();
1154 q_lower.contains("current")
1155 || q_lower.contains("latest")
1156 || q_lower.contains("recent")
1157 || q_lower.contains("now")
1158 || q_lower.contains("today")
1159 || q_lower.contains("updated")
1160 || q_lower.contains("new ")
1161 || q_lower.contains("newest")
1162 };
1163 if !args.no_rerank
1164 && !response.retrieval.hits.is_empty()
1165 && matches!(effective_mode, AskModeArg::Sem | AskModeArg::Hybrid)
1166 && !is_temporal_query
1167 {
1168 let mut search_response = SearchResponse {
1170 query: response.question.clone(),
1171 hits: response.retrieval.hits.clone(),
1172 total_hits: response.retrieval.hits.len(),
1173 params: memvid_core::SearchParams {
1174 top_k: args.top_k,
1175 snippet_chars: args.snippet_chars,
1176 cursor: None,
1177 },
1178 elapsed_ms: 0,
1179 engine: memvid_core::SearchEngineKind::Hybrid,
1180 next_cursor: None,
1181 context: String::new(),
1182 };
1183
1184 if let Err(e) = apply_cross_encoder_rerank(&mut search_response) {
1185 warn!("Cross-encoder reranking failed: {e}");
1186 } else {
1187 response.retrieval.hits = search_response.hits;
1189 response.retrieval.context = response
1191 .retrieval
1192 .hits
1193 .iter()
1194 .take(10) .map(|hit| hit.text.as_str())
1196 .collect::<Vec<_>>()
1197 .join("\n\n---\n\n");
1198 }
1199 }
1200
1201 if args.memories {
1203 let memory_context = build_memory_context(&mem);
1204 if !memory_context.is_empty() {
1205 response.retrieval.context = format!(
1207 "=== KNOWN FACTS ===\n{}\n\n=== RETRIEVED CONTEXT ===\n{}",
1208 memory_context, response.retrieval.context
1209 );
1210 }
1211 }
1212
1213 let entity_context = build_entity_context_from_hits(&response.retrieval.hits);
1215 if !entity_context.is_empty() {
1216 response.retrieval.context = format!(
1218 "=== ENTITIES MENTIONED ===\n{}\n\n{}",
1219 entity_context, response.retrieval.context
1220 );
1221 }
1222
1223 if args.mask_pii {
1225 use memvid_core::pii::mask_pii;
1226
1227 response.retrieval.context = mask_pii(&response.retrieval.context);
1229
1230 for hit in &mut response.retrieval.hits {
1232 hit.text = mask_pii(&hit.text);
1233 if let Some(chunk_text) = &hit.chunk_text {
1234 hit.chunk_text = Some(mask_pii(chunk_text));
1235 }
1236 }
1237 }
1238
1239 let llm_context_override = resolve_llm_context_budget_override(args.llm_context_depth)?;
1240
1241 let mut model_result: Option<ModelInference> = None;
1242 if args.no_llm {
1243 if args.use_model.is_some() {
1245 warn!("--use-model ignored because --no-llm disables LLM synthesis");
1246 }
1247 if args.json {
1248 emit_verbatim_evidence_json(&response, args.sources, &mut mem)?;
1249 } else {
1250 emit_verbatim_evidence_pretty(&response, args.sources, &mut mem);
1251 }
1252
1253 #[cfg(feature = "replay")]
1255 let _ = mem.save_active_session();
1256
1257 return Ok(());
1258 } else if response.context_only {
1259 if args.use_model.is_some() {
1260 warn!("--use-model ignored because --context-only disables synthesis");
1261 }
1262 } else if let Some(model_name) = args.use_model.as_deref() {
1263 match run_model_inference(
1264 model_name,
1265 &response.question,
1266 &response.retrieval.context,
1267 &response.retrieval.hits,
1268 llm_context_override,
1269 None,
1270 args.system_prompt.as_deref(),
1271 ) {
1272 Ok(inference) => {
1273 response.answer = Some(inference.answer.answer.clone());
1274 response.retrieval.context = inference.context_body.clone();
1275 apply_model_context_fragments(&mut response, inference.context_fragments.clone());
1276 model_result = Some(inference);
1277 }
1278 Err(err) => {
1279 warn!(
1280 "model inference unavailable for '{}': {err}. Falling back to default summary.",
1281 model_name
1282 );
1283 }
1284 }
1285 }
1286
1287 #[cfg(feature = "replay")]
1289 if let Some(ref inference) = model_result {
1290 if let Some(model_name) = args.use_model.as_deref() {
1291 let retrieved_frames: Vec<u64> = response
1293 .retrieval
1294 .hits
1295 .iter()
1296 .map(|hit| hit.frame_id)
1297 .collect();
1298
1299 mem.record_ask_action(
1300 &response.question,
1301 model_name, model_name, inference.answer.answer.as_bytes(),
1304 0, retrieved_frames,
1306 );
1307 }
1308 }
1309
1310 if args.json {
1311 if let Some(model_name) = args.use_model.as_deref() {
1312 emit_model_json(
1313 &response,
1314 model_name,
1315 model_result.as_ref(),
1316 args.sources,
1317 &mut mem,
1318 )?;
1319 } else {
1320 emit_ask_json(
1321 &response,
1322 effective_mode.clone(),
1323 model_result.as_ref(),
1324 args.sources,
1325 &mut mem,
1326 )?;
1327 }
1328 } else {
1329 emit_ask_pretty(
1330 &response,
1331 effective_mode.clone(),
1332 model_result.as_ref(),
1333 args.sources,
1334 &mut mem,
1335 );
1336 }
1337
1338 #[cfg(feature = "replay")]
1340 let _ = mem.save_active_session();
1341
1342 Ok(())
1343}
1344
1345fn handle_graph_find(mem: &mut Memvid, args: &FindArgs) -> Result<()> {
1347 use memvid_core::graph_search::{hybrid_search, QueryPlanner};
1348 use memvid_core::types::QueryPlan;
1349
1350 let planner = QueryPlanner::new();
1351
1352 let plan = if args.graph {
1354 let plan = planner.plan(&args.query, args.top_k);
1356 match plan {
1358 QueryPlan::Hybrid { graph_filter, .. } if !graph_filter.is_empty() => {
1359 QueryPlan::graph_only(graph_filter, args.top_k)
1360 }
1361 _ => plan,
1362 }
1363 } else {
1364 planner.plan(&args.query, args.top_k)
1366 };
1367
1368 let hits = hybrid_search(mem, &plan)?;
1370
1371 if args.json {
1372 let output = serde_json::json!({
1374 "query": args.query,
1375 "mode": if args.graph { "graph" } else { "hybrid" },
1376 "plan": format!("{:?}", plan),
1377 "hits": hits.iter().map(|h| {
1378 serde_json::json!({
1379 "frame_id": h.frame_id,
1380 "score": h.score,
1381 "graph_score": h.graph_score,
1382 "vector_score": h.vector_score,
1383 "matched_entity": h.matched_entity,
1384 "preview": h.preview,
1385 })
1386 }).collect::<Vec<_>>(),
1387 });
1388 println!("{}", serde_json::to_string_pretty(&output)?);
1389 } else {
1390 let mode_str = if args.graph { "Graph" } else { "Hybrid" };
1392 println!("{} search for: \"{}\"", mode_str, args.query);
1393 println!("Plan: {:?}", plan);
1394 println!();
1395
1396 if hits.is_empty() {
1397 println!("No results found.");
1398 } else {
1399 println!("Results ({} hits):", hits.len());
1400 for (i, hit) in hits.iter().enumerate() {
1401 println!();
1402 println!(
1403 "{}. Frame {} (score: {:.3}, graph: {:.2}, text: {:.2})",
1404 i + 1,
1405 hit.frame_id,
1406 hit.score,
1407 hit.graph_score,
1408 hit.vector_score
1409 );
1410 if let Some(entity) = &hit.matched_entity {
1411 println!(" Matched entity: {}", entity);
1412 }
1413 if let Some(preview) = &hit.preview {
1414 let truncated = if preview.len() > 200 {
1415 format!("{}...", &preview[..200])
1416 } else {
1417 preview.clone()
1418 };
1419 println!(" {}", truncated.replace('\n', " "));
1420 }
1421 }
1422 }
1423 }
1424
1425 Ok(())
1426}
1427
1428pub fn handle_find(config: &CliConfig, args: FindArgs) -> Result<()> {
1429 crate::utils::require_active_plan(config, "find")?;
1431
1432 crate::api::track_query_usage(config, 1)?;
1434
1435 let mut mem = open_read_only_mem(&args.file)?;
1436
1437 #[cfg(feature = "replay")]
1439 let _ = mem.load_active_session();
1440
1441 if args.graph || args.hybrid {
1443 return handle_graph_find(&mut mem, &args);
1444 }
1445
1446 if args.uri.is_some() && args.scope.is_some() {
1447 warn!("--scope ignored because --uri is provided");
1448 }
1449
1450 let mv2_dimension = mem.effective_vec_index_dimension()?;
1452 let identity_summary = match args.mode {
1453 SearchMode::Sem | SearchMode::Auto => Some(mem.embedding_identity_summary(10_000)),
1454 #[cfg(feature = "clip")]
1455 SearchMode::Clip => None,
1456 SearchMode::Lex => None,
1457 };
1458
1459 let mut semantic_allowed = true;
1460 let inferred_model_override = match identity_summary.as_ref() {
1461 Some(memvid_core::EmbeddingIdentitySummary::Single(identity)) => {
1462 identity.model.as_deref().map(|value| value.to_string())
1463 }
1464 Some(memvid_core::EmbeddingIdentitySummary::Mixed(identities)) => {
1465 let models: Vec<_> = identities
1466 .iter()
1467 .filter_map(|entry| entry.identity.model.as_deref())
1468 .collect();
1469 if args.mode == SearchMode::Sem {
1470 anyhow::bail!(
1471 "memory contains mixed embedding models; semantic queries are unsafe.\n\n\
1472 Detected models: {:?}\n\n\
1473 Suggested fix: split into separate memories per embedding model.",
1474 models
1475 );
1476 }
1477 warn!(
1478 "semantic search disabled: mixed embedding models detected: {:?}",
1479 models
1480 );
1481 semantic_allowed = false;
1482 None
1483 }
1484 _ => None,
1485 };
1486
1487 let emb_model_override = args
1488 .query_embedding_model
1489 .as_deref()
1490 .or(inferred_model_override.as_deref());
1491
1492 let (mode_label, runtime_option) = match args.mode {
1493 SearchMode::Lex => ("Lexical (forced)".to_string(), None),
1494 SearchMode::Sem => {
1495 let runtime =
1496 load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)?;
1497 ("Semantic (vector search)".to_string(), Some(runtime))
1498 }
1499 SearchMode::Auto => {
1500 if !semantic_allowed {
1501 ("Lexical (semantic unsafe)".to_string(), None)
1502 } else if let Some(runtime) =
1503 try_load_embedding_runtime_for_mv2(config, emb_model_override, mv2_dimension)
1504 {
1505 ("Hybrid (lexical + semantic)".to_string(), Some(runtime))
1506 } else {
1507 ("Lexical (semantic unavailable)".to_string(), None)
1508 }
1509 }
1510 #[cfg(feature = "clip")]
1511 SearchMode::Clip => ("CLIP (visual search)".to_string(), None),
1512 };
1513
1514 let mode_key = match args.mode {
1515 SearchMode::Sem => "semantic",
1516 SearchMode::Lex => "text",
1517 SearchMode::Auto => {
1518 if runtime_option.is_some() {
1519 "hybrid"
1520 } else {
1521 "text"
1522 }
1523 }
1524 #[cfg(feature = "clip")]
1525 SearchMode::Clip => "clip",
1526 };
1527
1528 #[cfg(feature = "clip")]
1530 if args.mode == SearchMode::Clip {
1531 use memvid_core::clip::{ClipConfig, ClipModel};
1532
1533 let config = ClipConfig::default();
1535 let clip = ClipModel::new(config).map_err(|e| {
1536 anyhow!("Failed to initialize CLIP model: {}. Make sure the MobileCLIP-S2 ONNX models are installed.", e)
1537 })?;
1538
1539 let query_embedding = clip
1541 .encode_text(&args.query)
1542 .map_err(|e| anyhow!("Failed to encode query text: {}", e))?;
1543
1544 let hits = mem.search_clip(&query_embedding, args.top_k)?;
1546
1547 for hit in &hits {
1549 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
1550 tracing::debug!(
1551 frame_id = hit.frame_id,
1552 title = %frame.title.unwrap_or_default(),
1553 page = hit.page,
1554 distance = hit.distance,
1555 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1556 "CLIP raw hit"
1557 );
1558 } else {
1559 tracing::debug!(
1560 frame_id = hit.frame_id,
1561 page = hit.page,
1562 distance = hit.distance,
1563 cosine = 1.0 - (hit.distance * hit.distance / 2.0),
1564 "CLIP raw hit (missing frame)"
1565 );
1566 }
1567 }
1568
1569 const CLIP_MAX_DISTANCE: f32 = 1.26;
1582
1583 let search_hits: Vec<SearchHit> = hits
1585 .into_iter()
1586 .filter(|hit| hit.distance < CLIP_MAX_DISTANCE)
1587 .enumerate()
1588 .filter_map(|(rank, hit)| {
1589 let cosine_similarity = 1.0 - (hit.distance * hit.distance / 2.0);
1592
1593 let preview = mem.frame_preview_by_id(hit.frame_id).ok()?;
1595 let uri = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.uri);
1596 let base_title = mem.frame_by_id(hit.frame_id).ok().and_then(|f| f.title);
1597 let title = match (base_title, hit.page) {
1598 (Some(t), Some(p)) => Some(format!("{t} (page {p})")),
1599 (Some(t), None) => Some(t),
1600 (None, Some(p)) => Some(format!("Page {p}")),
1601 _ => None,
1602 };
1603 Some(SearchHit {
1604 rank: rank + 1,
1605 frame_id: hit.frame_id,
1606 uri: uri.unwrap_or_else(|| format!("mv2://frame/{}", hit.frame_id)),
1607 title,
1608 text: preview.clone(),
1609 chunk_text: Some(preview),
1610 range: (0, 0),
1611 chunk_range: None,
1612 matches: 0,
1613 score: Some(cosine_similarity),
1614 metadata: None,
1615 })
1616 })
1617 .collect();
1618
1619 let response = SearchResponse {
1620 query: args.query.clone(),
1621 hits: search_hits.clone(),
1622 total_hits: search_hits.len(),
1623 params: memvid_core::SearchParams {
1624 top_k: args.top_k,
1625 snippet_chars: args.snippet_chars,
1626 cursor: args.cursor.clone(),
1627 },
1628 elapsed_ms: 0,
1629 engine: SearchEngineKind::Hybrid, next_cursor: None,
1631 context: String::new(),
1632 };
1633
1634 if args.json_legacy {
1635 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1636 emit_legacy_search_json(&response)?;
1637 } else if args.json {
1638 emit_search_json(&response, mode_key)?;
1639 } else {
1640 println!(
1641 "mode: {} k={} time: {} ms",
1642 mode_label, response.params.top_k, response.elapsed_ms
1643 );
1644 println!("engine: clip (MobileCLIP-S2)");
1645 println!(
1646 "hits: {} (showing {})",
1647 response.total_hits,
1648 response.hits.len()
1649 );
1650 emit_search_table(&response);
1651 }
1652 return Ok(());
1653 }
1654
1655 let (response, engine_label, adaptive_stats) = if args.mode == SearchMode::Sem {
1657 let runtime = runtime_option
1658 .as_ref()
1659 .ok_or_else(|| anyhow!("Semantic search requires an embedding runtime"))?;
1660
1661 let query_embedding = runtime.embed_query(&args.query)?;
1663
1664 let scope = args.scope.as_deref().or(args.uri.as_deref());
1666
1667 if !args.no_adaptive {
1668 let strategy = match args.adaptive_strategy {
1670 AdaptiveStrategyArg::Relative => CutoffStrategy::RelativeThreshold {
1671 min_ratio: args.min_relevancy,
1672 },
1673 AdaptiveStrategyArg::Absolute => CutoffStrategy::AbsoluteThreshold {
1674 min_score: args.min_relevancy,
1675 },
1676 AdaptiveStrategyArg::Cliff => CutoffStrategy::ScoreCliff {
1677 max_drop_ratio: 0.35, },
1679 AdaptiveStrategyArg::Elbow => CutoffStrategy::Elbow { sensitivity: 1.0 },
1680 AdaptiveStrategyArg::Combined => CutoffStrategy::Combined {
1681 relative_threshold: args.min_relevancy,
1682 max_drop_ratio: 0.35,
1683 absolute_min: 0.3,
1684 },
1685 };
1686
1687 let config = AdaptiveConfig {
1688 enabled: true,
1689 max_results: args.max_k,
1690 min_results: 1,
1691 strategy,
1692 normalize_scores: true,
1693 };
1694
1695 match mem.search_adaptive(
1696 &args.query,
1697 &query_embedding,
1698 config,
1699 args.snippet_chars,
1700 scope,
1701 ) {
1702 Ok(result) => {
1703 let mut resp = SearchResponse {
1704 query: args.query.clone(),
1705 hits: result.results,
1706 total_hits: result.stats.returned,
1707 params: memvid_core::SearchParams {
1708 top_k: result.stats.returned,
1709 snippet_chars: args.snippet_chars,
1710 cursor: args.cursor.clone(),
1711 },
1712 elapsed_ms: 0,
1713 engine: SearchEngineKind::Hybrid,
1714 next_cursor: None,
1715 context: String::new(),
1716 };
1717 apply_preference_rerank(&mut resp);
1718 (
1719 resp,
1720 "semantic (adaptive vector search)".to_string(),
1721 Some(result.stats),
1722 )
1723 }
1724 Err(e) => {
1725 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1726 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1727 }
1728
1729 warn!("Adaptive search failed ({e}), falling back to fixed-k");
1730 match mem.vec_search_with_embedding(
1731 &args.query,
1732 &query_embedding,
1733 args.top_k,
1734 args.snippet_chars,
1735 scope,
1736 ) {
1737 Ok(mut resp) => {
1738 apply_preference_rerank(&mut resp);
1739 (resp, "semantic (vector search fallback)".to_string(), None)
1740 }
1741 Err(e2) => {
1742 if let MemvidError::VecDimensionMismatch { expected, actual } = e2 {
1743 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1744 }
1745 return Err(anyhow!(
1746 "Both adaptive and fixed-k search failed: {e}, {e2}"
1747 ));
1748 }
1749 }
1750 }
1751 }
1752 } else {
1753 match mem.vec_search_with_embedding(
1755 &args.query,
1756 &query_embedding,
1757 args.top_k,
1758 args.snippet_chars,
1759 scope,
1760 ) {
1761 Ok(mut resp) => {
1762 apply_preference_rerank(&mut resp);
1764 (resp, "semantic (vector search)".to_string(), None)
1765 }
1766 Err(e) => {
1767 if let MemvidError::VecDimensionMismatch { expected, actual } = e {
1768 return Err(anyhow!(vec_dimension_mismatch_help(expected, actual)));
1769 }
1770
1771 warn!("Vector search failed ({e}), falling back to lexical + rerank");
1773 let request = SearchRequest {
1774 query: args.query.clone(),
1775 top_k: args.top_k,
1776 snippet_chars: args.snippet_chars,
1777 uri: args.uri.clone(),
1778 scope: args.scope.clone(),
1779 cursor: args.cursor.clone(),
1780 #[cfg(feature = "temporal_track")]
1781 temporal: None,
1782 as_of_frame: args.as_of_frame,
1783 as_of_ts: args.as_of_ts,
1784 no_sketch: args.no_sketch,
1785 };
1786 let mut resp = mem.search(request)?;
1787 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1788 (resp, "semantic (fallback rerank)".to_string(), None)
1789 }
1790 }
1791 }
1792 } else {
1793 let request = SearchRequest {
1795 query: args.query.clone(),
1796 top_k: args.top_k,
1797 snippet_chars: args.snippet_chars,
1798 uri: args.uri.clone(),
1799 scope: args.scope.clone(),
1800 cursor: args.cursor.clone(),
1801 #[cfg(feature = "temporal_track")]
1802 temporal: None,
1803 as_of_frame: args.as_of_frame,
1804 as_of_ts: args.as_of_ts,
1805 no_sketch: args.no_sketch,
1806 };
1807
1808 let mut resp = mem.search(request)?;
1809
1810 if matches!(resp.engine, SearchEngineKind::LexFallback) && args.mode != SearchMode::Lex {
1811 warn!("Search index unavailable; returning basic text results");
1812 }
1813
1814 let mut engine_label = match resp.engine {
1815 SearchEngineKind::Tantivy => "text (tantivy)".to_string(),
1816 SearchEngineKind::LexFallback => "text (fallback)".to_string(),
1817 SearchEngineKind::Hybrid => "hybrid".to_string(),
1818 };
1819
1820 if runtime_option.is_some() {
1821 engine_label = format!("hybrid ({engine_label} + semantic)");
1822 }
1823
1824 if let Some(ref runtime) = runtime_option {
1825 apply_semantic_rerank(runtime, &mut mem, &mut resp)?;
1826 }
1827
1828 (resp, engine_label, None)
1829 };
1830
1831 if args.json_legacy {
1832 warn!("--json-legacy is deprecated; use --json for mv2.search.v1 output");
1833 emit_legacy_search_json(&response)?;
1834 } else if args.json {
1835 emit_search_json(&response, mode_key)?;
1836 } else {
1837 println!(
1838 "mode: {} k={} time: {} ms",
1839 mode_label, response.params.top_k, response.elapsed_ms
1840 );
1841 println!("engine: {}", engine_label);
1842
1843 if let Some(ref stats) = adaptive_stats {
1845 println!(
1846 "adaptive: {} -> {} results (cutoff: {}, top: {:.3}, ratio: {:.1}%)",
1847 stats.total_considered,
1848 stats.returned,
1849 stats.triggered_by,
1850 stats.top_score.unwrap_or(0.0),
1851 stats.cutoff_ratio.unwrap_or(0.0) * 100.0
1852 );
1853 }
1854
1855 println!(
1856 "hits: {} (showing {})",
1857 response.total_hits,
1858 response.hits.len()
1859 );
1860 emit_search_table(&response);
1861 }
1862
1863 #[cfg(feature = "replay")]
1865 let _ = mem.save_active_session();
1866
1867 Ok(())
1868}
1869
1870pub fn handle_vec_search(config: &CliConfig, args: VecSearchArgs) -> Result<()> {
1871 crate::api::track_query_usage(config, 1)?;
1873
1874 let mut mem = open_read_only_mem(&args.file)?;
1875 let vector = if let Some(path) = args.embedding.as_deref() {
1876 read_embedding(path)?
1877 } else if let Some(vector_string) = &args.vector {
1878 parse_vector(vector_string)?
1879 } else {
1880 anyhow::bail!("provide --vector or --embedding for search input");
1881 };
1882
1883 let hits = mem
1884 .search_vec(&vector, args.limit)
1885 .map_err(|err| match err {
1886 MemvidError::VecDimensionMismatch { expected, actual } => {
1887 anyhow!(vec_dimension_mismatch_help(expected, actual))
1888 }
1889 other => anyhow!(other),
1890 })?;
1891 let mut enriched = Vec::with_capacity(hits.len());
1892 for hit in hits {
1893 let preview = mem.frame_preview_by_id(hit.frame_id)?;
1894 enriched.push((hit.frame_id, hit.distance, preview));
1895 }
1896
1897 if args.json {
1898 let json_hits: Vec<_> = enriched
1899 .iter()
1900 .map(|(frame_id, distance, preview)| {
1901 json!({
1902 "frame_id": frame_id,
1903 "distance": distance,
1904 "preview": preview,
1905 })
1906 })
1907 .collect();
1908 let json_str = serde_json::to_string_pretty(&json_hits)?;
1909 println!("{}", json_str.to_colored_json_auto()?);
1910 } else if enriched.is_empty() {
1911 println!("No vector matches found");
1912 } else {
1913 for (frame_id, distance, preview) in enriched {
1914 println!("frame {frame_id} (distance {distance:.6}): {preview}");
1915 }
1916 }
1917 Ok(())
1918}
1919
1920pub fn handle_audit(config: &CliConfig, args: AuditArgs) -> Result<()> {
1921 use memvid_core::AuditOptions;
1922 use std::fs::File;
1923 use std::io::Write;
1924
1925 let mut mem = Memvid::open(&args.file)?;
1926
1927 let start = parse_date_boundary(args.start.as_ref(), false)?;
1929 let end = parse_date_boundary(args.end.as_ref(), true)?;
1930 if let (Some(start_ts), Some(end_ts)) = (start, end) {
1931 if end_ts < start_ts {
1932 anyhow::bail!("--end must not be earlier than --start");
1933 }
1934 }
1935
1936 let ask_mode: AskMode = args.mode.into();
1938 let runtime = match args.mode {
1939 AskModeArg::Lex => None,
1940 AskModeArg::Sem => Some(load_embedding_runtime(config)?),
1941 AskModeArg::Hybrid => try_load_embedding_runtime(config),
1942 };
1943 let embedder = runtime.as_ref().map(|inner| inner as &dyn VecEmbedder);
1944
1945 let options = AuditOptions {
1947 top_k: Some(args.top_k),
1948 snippet_chars: Some(args.snippet_chars),
1949 mode: Some(ask_mode),
1950 scope: args.scope,
1951 start,
1952 end,
1953 include_snippets: true,
1954 };
1955
1956 let mut report = mem.audit(&args.question, Some(options), embedder)?;
1958
1959 if let Some(model_name) = args.use_model.as_deref() {
1961 let context = report
1963 .sources
1964 .iter()
1965 .filter_map(|s| s.snippet.clone())
1966 .collect::<Vec<_>>()
1967 .join("\n\n");
1968
1969 match run_model_inference(
1970 model_name,
1971 &report.question,
1972 &context,
1973 &[], None,
1975 None,
1976 None, ) {
1978 Ok(inference) => {
1979 report.answer = Some(inference.answer.answer);
1980 report.notes.push(format!(
1981 "Answer synthesized by model: {}",
1982 inference.answer.model
1983 ));
1984 }
1985 Err(err) => {
1986 warn!(
1987 "model inference unavailable for '{}': {err}. Using default answer.",
1988 model_name
1989 );
1990 }
1991 }
1992 }
1993
1994 let output = match args.format {
1996 AuditFormat::Text => report.to_text(),
1997 AuditFormat::Markdown => report.to_markdown(),
1998 AuditFormat::Json => serde_json::to_string_pretty(&report)?,
1999 };
2000
2001 if let Some(out_path) = args.out {
2003 let mut file = File::create(&out_path)?;
2004 file.write_all(output.as_bytes())?;
2005 println!("Audit report written to: {}", out_path.display());
2006 } else {
2007 println!("{}", output);
2008 }
2009
2010 Ok(())
2011}
2012
2013fn emit_search_json(response: &SearchResponse, mode: &str) -> Result<()> {
2014 let hits: Vec<_> = response.hits.iter().map(search_hit_to_json).collect();
2015
2016 let mut additional_params = serde_json::Map::new();
2017 if let Some(cursor) = &response.params.cursor {
2018 additional_params.insert("cursor".into(), json!(cursor));
2019 }
2020
2021 let mut params = serde_json::Map::new();
2022 params.insert("top_k".into(), json!(response.params.top_k));
2023 params.insert("snippet_chars".into(), json!(response.params.snippet_chars));
2024 params.insert("mode".into(), json!(mode));
2025 params.insert(
2026 "additional_params".into(),
2027 serde_json::Value::Object(additional_params),
2028 );
2029
2030 let mut metadata_json = serde_json::Map::new();
2031 metadata_json.insert("elapsed_ms".into(), json!(response.elapsed_ms));
2032 metadata_json.insert("total_hits".into(), json!(response.total_hits));
2033 metadata_json.insert(
2034 "next_cursor".into(),
2035 match &response.next_cursor {
2036 Some(cursor) => json!(cursor),
2037 None => serde_json::Value::Null,
2038 },
2039 );
2040 metadata_json.insert("engine".into(), json!(response.engine));
2041 metadata_json.insert("params".into(), serde_json::Value::Object(params));
2042
2043 let body = json!({
2044 "version": "mv2.result.v2",
2045 "query": response.query,
2046 "metadata": metadata_json,
2047 "hits": hits,
2048 "context": response.context,
2049 });
2050 let json_str = serde_json::to_string_pretty(&body)?;
2051 println!("{}", json_str.to_colored_json_auto()?);
2052 Ok(())
2053}
2054
2055fn emit_ask_json(
2056 response: &AskResponse,
2057 requested_mode: AskModeArg,
2058 inference: Option<&ModelInference>,
2059 include_sources: bool,
2060 mem: &mut Memvid,
2061) -> Result<()> {
2062 let hits: Vec<_> = response
2063 .retrieval
2064 .hits
2065 .iter()
2066 .map(search_hit_to_json)
2067 .collect();
2068
2069 let citations: Vec<_> = response
2070 .citations
2071 .iter()
2072 .map(|citation| {
2073 let mut map = serde_json::Map::new();
2074 map.insert("index".into(), json!(citation.index));
2075 map.insert("frame_id".into(), json!(citation.frame_id));
2076 map.insert("uri".into(), json!(citation.uri));
2077 if let Some(range) = citation.chunk_range {
2078 map.insert("chunk_range".into(), json!([range.0, range.1]));
2079 }
2080 if let Some(score) = citation.score {
2081 map.insert("score".into(), json!(score));
2082 }
2083 serde_json::Value::Object(map)
2084 })
2085 .collect();
2086
2087 let mut body = json!({
2088 "version": "mv2.ask.v1",
2089 "question": response.question,
2090 "answer": response.answer,
2091 "context_only": response.context_only,
2092 "mode": ask_mode_display(requested_mode),
2093 "retriever": ask_retriever_display(response.retriever),
2094 "top_k": response.retrieval.params.top_k,
2095 "results": hits,
2096 "citations": citations,
2097 "stats": {
2098 "retrieval_ms": response.stats.retrieval_ms,
2099 "synthesis_ms": response.stats.synthesis_ms,
2100 "latency_ms": response.stats.latency_ms,
2101 },
2102 "engine": search_engine_label(&response.retrieval.engine),
2103 "total_hits": response.retrieval.total_hits,
2104 "next_cursor": response.retrieval.next_cursor,
2105 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2106 });
2107
2108 if let Some(inf) = inference {
2109 let model = &inf.answer;
2110 if let serde_json::Value::Object(ref mut map) = body {
2111 map.insert("model".into(), json!(model.requested));
2112 if model.model != model.requested {
2113 map.insert("model_used".into(), json!(model.model));
2114 }
2115 map.insert("cached".into(), json!(inf.cached));
2116 if let Some(usage) = &inf.usage {
2118 map.insert(
2119 "usage".into(),
2120 json!({
2121 "input_tokens": usage.input_tokens,
2122 "output_tokens": usage.output_tokens,
2123 "total_tokens": usage.total_tokens,
2124 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2125 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2126 }),
2127 );
2128 }
2129 if let Some(grounding) = &inf.grounding {
2131 map.insert(
2132 "grounding".into(),
2133 json!({
2134 "score": grounding.score,
2135 "label": grounding.label(),
2136 "sentence_count": grounding.sentence_count,
2137 "grounded_sentences": grounding.grounded_sentences,
2138 "has_warning": grounding.has_warning,
2139 "warning_reason": grounding.warning_reason,
2140 }),
2141 );
2142 }
2143 }
2144 }
2145
2146 if include_sources {
2148 if let serde_json::Value::Object(ref mut map) = body {
2149 let sources = build_sources_json(response, mem);
2150 map.insert("sources".into(), json!(sources));
2151 }
2152 }
2153
2154 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2156 if let serde_json::Value::Object(ref mut map) = body {
2157 map.insert("follow_up".into(), follow_up);
2158 }
2159 }
2160
2161 println!("{}", serde_json::to_string_pretty(&body)?);
2162 Ok(())
2163}
2164
2165fn build_sources_json(response: &AskResponse, mem: &mut Memvid) -> Vec<serde_json::Value> {
2166 response
2167 .citations
2168 .iter()
2169 .enumerate()
2170 .map(|(idx, citation)| {
2171 let mut source = serde_json::Map::new();
2172 source.insert("index".into(), json!(idx + 1));
2173 source.insert("frame_id".into(), json!(citation.frame_id));
2174 source.insert("uri".into(), json!(citation.uri));
2175
2176 if let Some(range) = citation.chunk_range {
2177 source.insert("chunk_range".into(), json!([range.0, range.1]));
2178 }
2179 if let Some(score) = citation.score {
2180 source.insert("score".into(), json!(score));
2181 }
2182
2183 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2185 if let Some(title) = frame.title {
2186 source.insert("title".into(), json!(title));
2187 }
2188 if !frame.tags.is_empty() {
2189 source.insert("tags".into(), json!(frame.tags));
2190 }
2191 if !frame.labels.is_empty() {
2192 source.insert("labels".into(), json!(frame.labels));
2193 }
2194 source.insert("frame_timestamp".into(), json!(frame.timestamp));
2195 if !frame.content_dates.is_empty() {
2196 source.insert("content_dates".into(), json!(frame.content_dates));
2197 }
2198 }
2199
2200 if let Some(hit) = response
2202 .retrieval
2203 .hits
2204 .iter()
2205 .find(|h| h.frame_id == citation.frame_id)
2206 {
2207 let snippet = hit.chunk_text.clone().unwrap_or_else(|| hit.text.clone());
2208 source.insert("snippet".into(), json!(snippet));
2209 }
2210
2211 serde_json::Value::Object(source)
2212 })
2213 .collect()
2214}
2215
2216fn build_follow_up_suggestions(
2219 response: &AskResponse,
2220 inference: Option<&ModelInference>,
2221 mem: &mut Memvid,
2222) -> Option<serde_json::Value> {
2223 let needs_followup = inference
2225 .and_then(|inf| inf.grounding.as_ref())
2226 .map(|g| g.score < 0.3 || g.has_warning)
2227 .unwrap_or(false);
2228
2229 let low_retrieval = response
2231 .retrieval
2232 .hits
2233 .first()
2234 .and_then(|h| h.score)
2235 .map(|score| score < -2.0)
2236 .unwrap_or(true);
2237
2238 if !needs_followup && !low_retrieval {
2239 return None;
2240 }
2241
2242 let limit = std::num::NonZeroU64::new(20).unwrap();
2244 let timeline_query = TimelineQueryBuilder::default().limit(limit).build();
2245
2246 let available_topics: Vec<String> = mem
2247 .timeline(timeline_query)
2248 .ok()
2249 .map(|entries| {
2250 entries
2251 .iter()
2252 .filter_map(|e| {
2253 let preview = e.preview.trim();
2255 if preview.is_empty() || preview.len() < 5 {
2256 return None;
2257 }
2258 let first_line = preview.lines().next().unwrap_or(preview);
2260 if first_line.len() > 60 {
2261 Some(format!("{}...", &first_line[..57]))
2262 } else {
2263 Some(first_line.to_string())
2264 }
2265 })
2266 .collect::<std::collections::HashSet<_>>()
2267 .into_iter()
2268 .take(5)
2269 .collect()
2270 })
2271 .unwrap_or_default();
2272
2273 let reason = if response.retrieval.hits.is_empty() || low_retrieval {
2275 "No relevant information found in memory"
2276 } else if inference
2277 .and_then(|i| i.grounding.as_ref())
2278 .map(|g| g.has_warning)
2279 .unwrap_or(false)
2280 {
2281 "Answer may not be well-supported by the available context"
2282 } else {
2283 "Low confidence in the answer"
2284 };
2285
2286 let suggestions: Vec<String> = if available_topics.is_empty() {
2288 vec![
2289 "What information is stored in this memory?".to_string(),
2290 "Can you list the main topics covered?".to_string(),
2291 ]
2292 } else {
2293 available_topics
2294 .iter()
2295 .take(3)
2296 .map(|topic| format!("Tell me about {}", topic))
2297 .chain(std::iter::once(
2298 "What topics are in this memory?".to_string(),
2299 ))
2300 .collect()
2301 };
2302
2303 Some(json!({
2304 "needed": true,
2305 "reason": reason,
2306 "hint": if available_topics.is_empty() {
2307 "This memory may not contain information about your query."
2308 } else {
2309 "This memory contains information about different topics. Try asking about those instead."
2310 },
2311 "available_topics": available_topics,
2312 "suggestions": suggestions
2313 }))
2314}
2315
2316fn emit_model_json(
2317 response: &AskResponse,
2318 requested_model: &str,
2319 inference: Option<&ModelInference>,
2320 include_sources: bool,
2321 mem: &mut Memvid,
2322) -> Result<()> {
2323 let answer = response.answer.clone().unwrap_or_default();
2324 let requested_label = inference
2325 .map(|m| m.answer.requested.clone())
2326 .unwrap_or_else(|| requested_model.to_string());
2327 let used_label = inference
2328 .map(|m| m.answer.model.clone())
2329 .unwrap_or_else(|| requested_model.to_string());
2330
2331 let mut body = json!({
2332 "question": response.question,
2333 "model": requested_label,
2334 "model_used": used_label,
2335 "answer": answer,
2336 "context": truncate_with_ellipsis(&response.retrieval.context, OUTPUT_CONTEXT_MAX_LEN),
2337 });
2338
2339 if let Some(inf) = inference {
2341 if let serde_json::Value::Object(ref mut map) = body {
2342 map.insert("cached".into(), json!(inf.cached));
2343 if let Some(usage) = &inf.usage {
2344 map.insert(
2345 "usage".into(),
2346 json!({
2347 "input_tokens": usage.input_tokens,
2348 "output_tokens": usage.output_tokens,
2349 "total_tokens": usage.total_tokens,
2350 "cost_usd": if inf.cached { 0.0 } else { usage.cost_usd },
2351 "saved_cost_usd": if inf.cached { usage.cost_usd } else { 0.0 },
2352 }),
2353 );
2354 }
2355 if let Some(grounding) = &inf.grounding {
2356 map.insert(
2357 "grounding".into(),
2358 json!({
2359 "score": grounding.score,
2360 "label": grounding.label(),
2361 "sentence_count": grounding.sentence_count,
2362 "grounded_sentences": grounding.grounded_sentences,
2363 "has_warning": grounding.has_warning,
2364 "warning_reason": grounding.warning_reason,
2365 }),
2366 );
2367 }
2368 }
2369 }
2370
2371 if include_sources {
2373 if let serde_json::Value::Object(ref mut map) = body {
2374 let sources = build_sources_json(response, mem);
2375 map.insert("sources".into(), json!(sources));
2376 }
2377 }
2378
2379 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2381 if let serde_json::Value::Object(ref mut map) = body {
2382 map.insert("follow_up".into(), follow_up);
2383 }
2384 }
2385
2386 let json_str = serde_json::to_string_pretty(&body)?;
2388 println!("{}", json_str.to_colored_json_auto()?);
2389 Ok(())
2390}
2391
2392fn emit_ask_pretty(
2393 response: &AskResponse,
2394 requested_mode: AskModeArg,
2395 inference: Option<&ModelInference>,
2396 include_sources: bool,
2397 mem: &mut Memvid,
2398) {
2399 println!(
2400 "mode: {} retriever: {} k={} latency: {} ms (retrieval {} ms)",
2401 ask_mode_pretty(requested_mode),
2402 ask_retriever_pretty(response.retriever),
2403 response.retrieval.params.top_k,
2404 response.stats.latency_ms,
2405 response.stats.retrieval_ms
2406 );
2407 if let Some(inference) = inference {
2408 let model = &inference.answer;
2409 let cached_label = if inference.cached { " [CACHED]" } else { "" };
2410 if model.requested.trim() == model.model {
2411 println!("model: {}{}", model.model, cached_label);
2412 } else {
2413 println!(
2414 "model requested: {} model used: {}{}",
2415 model.requested, model.model, cached_label
2416 );
2417 }
2418 if let Some(usage) = &inference.usage {
2420 let cost_label = if inference.cached {
2421 format!("$0.00 (saved ${:.6})", usage.cost_usd)
2422 } else {
2423 format!("${:.6}", usage.cost_usd)
2424 };
2425 println!(
2426 "tokens: {} input + {} output = {} cost: {}",
2427 usage.input_tokens, usage.output_tokens, usage.total_tokens, cost_label
2428 );
2429 }
2430 if let Some(grounding) = &inference.grounding {
2432 let warning = if grounding.has_warning {
2433 format!(
2434 " [WARNING: {}]",
2435 grounding
2436 .warning_reason
2437 .as_deref()
2438 .unwrap_or("potential hallucination")
2439 )
2440 } else {
2441 String::new()
2442 };
2443 println!(
2444 "grounding: {:.0}% ({}) - {}/{} sentences grounded{}",
2445 grounding.score * 100.0,
2446 grounding.label(),
2447 grounding.grounded_sentences,
2448 grounding.sentence_count,
2449 warning
2450 );
2451 }
2452 }
2453 println!(
2454 "engine: {}",
2455 search_engine_label(&response.retrieval.engine)
2456 );
2457 println!(
2458 "hits: {} (showing {})",
2459 response.retrieval.total_hits,
2460 response.retrieval.hits.len()
2461 );
2462
2463 if response.context_only {
2464 println!();
2465 println!("Context-only mode: synthesis disabled.");
2466 println!();
2467 } else if let Some(answer) = &response.answer {
2468 println!();
2469 println!("Answer:\n{answer}");
2470 println!();
2471 }
2472
2473 if !response.citations.is_empty() {
2474 println!("Citations:");
2475 for citation in &response.citations {
2476 match citation.score {
2477 Some(score) => println!(
2478 "[{}] {} (frame {}, score {:.3})",
2479 citation.index, citation.uri, citation.frame_id, score
2480 ),
2481 None => println!(
2482 "[{}] {} (frame {})",
2483 citation.index, citation.uri, citation.frame_id
2484 ),
2485 }
2486 }
2487 println!();
2488 }
2489
2490 if include_sources && !response.citations.is_empty() {
2492 println!("=== SOURCES ===");
2493 println!();
2494 for citation in &response.citations {
2495 println!("[{}] {}", citation.index, citation.uri);
2496
2497 if let Ok(frame) = mem.frame_by_id(citation.frame_id) {
2499 if let Some(title) = &frame.title {
2500 println!(" Title: {}", title);
2501 }
2502 println!(" Frame ID: {}", citation.frame_id);
2503 if let Some(score) = citation.score {
2504 println!(" Score: {:.4}", score);
2505 }
2506 if let Some((start, end)) = citation.chunk_range {
2507 println!(" Range: [{}..{})", start, end);
2508 }
2509 if !frame.tags.is_empty() {
2510 println!(" Tags: {}", frame.tags.join(", "));
2511 }
2512 if !frame.labels.is_empty() {
2513 println!(" Labels: {}", frame.labels.join(", "));
2514 }
2515 println!(" Timestamp: {}", frame.timestamp);
2516 if !frame.content_dates.is_empty() {
2517 println!(" Content Dates: {}", frame.content_dates.join(", "));
2518 }
2519 }
2520
2521 if let Some(hit) = response
2523 .retrieval
2524 .hits
2525 .iter()
2526 .find(|h| h.frame_id == citation.frame_id)
2527 {
2528 let snippet = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2529 let truncated = if snippet.len() > 200 {
2530 format!("{}...", &snippet[..200])
2531 } else {
2532 snippet.clone()
2533 };
2534 println!(" Snippet: {}", truncated.replace('\n', " "));
2535 }
2536 println!();
2537 }
2538 }
2539
2540 if !include_sources {
2541 println!();
2542 emit_search_table(&response.retrieval);
2543 }
2544
2545 if let Some(follow_up) = build_follow_up_suggestions(response, inference, mem) {
2547 if let Some(needed) = follow_up.get("needed").and_then(|v| v.as_bool()) {
2548 if needed {
2549 println!();
2550 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2551 println!("💡 FOLLOW-UP SUGGESTIONS");
2552 println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
2553
2554 if let Some(reason) = follow_up.get("reason").and_then(|v| v.as_str()) {
2555 println!("Reason: {}", reason);
2556 }
2557
2558 if let Some(hint) = follow_up.get("hint").and_then(|v| v.as_str()) {
2559 println!("Hint: {}", hint);
2560 }
2561
2562 if let Some(topics) = follow_up.get("available_topics").and_then(|v| v.as_array()) {
2563 if !topics.is_empty() {
2564 println!();
2565 println!("Available topics in this memory:");
2566 for topic in topics.iter().filter_map(|t| t.as_str()) {
2567 println!(" • {}", topic);
2568 }
2569 }
2570 }
2571
2572 if let Some(suggestions) = follow_up.get("suggestions").and_then(|v| v.as_array()) {
2573 if !suggestions.is_empty() {
2574 println!();
2575 println!("Try asking:");
2576 for (i, suggestion) in
2577 suggestions.iter().filter_map(|s| s.as_str()).enumerate()
2578 {
2579 println!(" {}. \"{}\"", i + 1, suggestion);
2580 }
2581 }
2582 }
2583 println!();
2584 }
2585 }
2586 }
2587}
2588
2589fn emit_verbatim_evidence_json(
2592 response: &AskResponse,
2593 include_sources: bool,
2594 mem: &mut Memvid,
2595) -> Result<()> {
2596 let evidence: Vec<_> = response
2598 .retrieval
2599 .hits
2600 .iter()
2601 .enumerate()
2602 .map(|(idx, hit)| {
2603 let mut entry = serde_json::Map::new();
2604 entry.insert("index".into(), json!(idx + 1));
2605 entry.insert("frame_id".into(), json!(hit.frame_id));
2606 entry.insert("uri".into(), json!(&hit.uri));
2607 if let Some(title) = &hit.title {
2608 entry.insert("title".into(), json!(title));
2609 }
2610 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2612 entry.insert("text".into(), json!(verbatim));
2613 if let Some(score) = hit.score {
2614 entry.insert("score".into(), json!(score));
2615 }
2616 serde_json::Value::Object(entry)
2617 })
2618 .collect();
2619
2620 let sources: Option<Vec<_>> = if include_sources {
2622 Some(
2623 response
2624 .retrieval
2625 .hits
2626 .iter()
2627 .filter_map(|hit| {
2628 mem.frame_by_id(hit.frame_id).ok().map(|frame| {
2629 let mut source = serde_json::Map::new();
2630 source.insert("frame_id".into(), json!(frame.id));
2631 source.insert(
2632 "uri".into(),
2633 json!(frame.uri.as_deref().unwrap_or("(unknown)")),
2634 );
2635 if let Some(title) = &frame.title {
2636 source.insert("title".into(), json!(title));
2637 }
2638 source.insert("timestamp".into(), json!(frame.timestamp.to_string()));
2639 if !frame.tags.is_empty() {
2640 source.insert("tags".into(), json!(frame.tags));
2641 }
2642 if !frame.labels.is_empty() {
2643 source.insert("labels".into(), json!(frame.labels));
2644 }
2645 serde_json::Value::Object(source)
2646 })
2647 })
2648 .collect(),
2649 )
2650 } else {
2651 None
2652 };
2653
2654 let mut body = json!({
2655 "version": "mv2.evidence.v1",
2656 "mode": "verbatim",
2657 "question": response.question,
2658 "evidence": evidence,
2659 "evidence_count": evidence.len(),
2660 "total_hits": response.retrieval.total_hits,
2661 "stats": {
2662 "retrieval_ms": response.stats.retrieval_ms,
2663 "latency_ms": response.stats.latency_ms,
2664 },
2665 "engine": search_engine_label(&response.retrieval.engine),
2666 });
2667
2668 if let (Some(sources), serde_json::Value::Object(ref mut map)) = (sources, &mut body) {
2669 map.insert("sources".into(), json!(sources));
2670 }
2671
2672 let json_str = serde_json::to_string_pretty(&body)?;
2673 println!("{}", json_str.to_colored_json_auto()?);
2674 Ok(())
2675}
2676
2677fn emit_verbatim_evidence_pretty(response: &AskResponse, include_sources: bool, mem: &mut Memvid) {
2679 println!(
2680 "mode: {} latency: {} ms (retrieval {} ms)",
2681 "verbatim evidence".cyan(),
2682 response.stats.latency_ms,
2683 response.stats.retrieval_ms
2684 );
2685 println!(
2686 "engine: {}",
2687 search_engine_label(&response.retrieval.engine)
2688 );
2689 println!(
2690 "hits: {} (showing {})",
2691 response.retrieval.total_hits,
2692 response.retrieval.hits.len()
2693 );
2694 println!();
2695
2696 println!("{}", "━".repeat(60));
2698 println!(
2699 "{}",
2700 format!(
2701 "VERBATIM EVIDENCE for: \"{}\"",
2702 truncate_with_ellipsis(&response.question, 40)
2703 )
2704 .bold()
2705 );
2706 println!("{}", "━".repeat(60));
2707 println!();
2708
2709 if response.retrieval.hits.is_empty() {
2710 println!("No evidence found.");
2711 return;
2712 }
2713
2714 let scores: Vec<Option<f32>> = response.retrieval.hits.iter().map(|h| h.score).collect();
2716 let (min_score, max_score) = score_range(&scores);
2717
2718 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2720 let uri = &hit.uri;
2721 let title = hit.title.as_deref().unwrap_or("Untitled");
2722 let score_str = hit
2723 .score
2724 .map(|s| {
2725 let normalized = normalize_bm25_for_display(s, min_score, max_score);
2726 format!(" (relevance: {:.0}%)", normalized)
2727 })
2728 .unwrap_or_default();
2729
2730 println!(
2731 "{}",
2732 format!("[{}] {}{}", idx + 1, title, score_str)
2733 .green()
2734 .bold()
2735 );
2736 println!(" Source: {} (frame {})", uri, hit.frame_id);
2737 println!();
2738
2739 let verbatim = hit.chunk_text.as_ref().unwrap_or(&hit.text);
2741 for line in verbatim.lines() {
2743 if !line.trim().is_empty() {
2744 println!(" │ {}", line);
2745 }
2746 }
2747 println!();
2748 }
2749
2750 if include_sources {
2752 println!("{}", "━".repeat(60));
2753 println!("{}", "SOURCE DETAILS".bold());
2754 println!("{}", "━".repeat(60));
2755 println!();
2756
2757 for (idx, hit) in response.retrieval.hits.iter().enumerate() {
2758 if let Ok(frame) = mem.frame_by_id(hit.frame_id) {
2759 println!(
2760 "{}",
2761 format!(
2762 "[{}] {}",
2763 idx + 1,
2764 frame.uri.as_deref().unwrap_or("(unknown)")
2765 )
2766 .cyan()
2767 );
2768 if let Some(title) = &frame.title {
2769 println!(" Title: {}", title);
2770 }
2771 println!(" Frame ID: {}", frame.id);
2772 println!(" Timestamp: {}", frame.timestamp);
2773 if !frame.tags.is_empty() {
2774 println!(" Tags: {}", frame.tags.join(", "));
2775 }
2776 if !frame.labels.is_empty() {
2777 println!(" Labels: {}", frame.labels.join(", "));
2778 }
2779 if !frame.content_dates.is_empty() {
2780 println!(" Content Dates: {}", frame.content_dates.join(", "));
2781 }
2782 println!();
2783 }
2784 }
2785 }
2786
2787 println!("{}", "─".repeat(60));
2789 println!(
2790 "{}",
2791 "Note: Showing verbatim evidence without LLM synthesis.".dimmed()
2792 );
2793 println!(
2794 "{}",
2795 "Use --use-model to get an AI-synthesized answer.".dimmed()
2796 );
2797}
2798
2799fn emit_legacy_search_json(response: &SearchResponse) -> Result<()> {
2800 let hits: Vec<_> = response
2801 .hits
2802 .iter()
2803 .map(|hit| {
2804 json!({
2805 "frame_id": hit.frame_id,
2806 "matches": hit.matches,
2807 "snippets": [hit.text.clone()],
2808 })
2809 })
2810 .collect();
2811 println!("{}", serde_json::to_string_pretty(&hits)?);
2812 Ok(())
2813}
2814
2815fn emit_search_table(response: &SearchResponse) {
2816 if response.hits.is_empty() {
2817 println!("No results for '{}'.", response.query);
2818 return;
2819 }
2820
2821 let scores: Vec<Option<f32>> = response.hits.iter().map(|h| h.score).collect();
2823 let (min_score, max_score) = score_range(&scores);
2824
2825 for hit in &response.hits {
2826 println!("#{} {} (matches {})", hit.rank, hit.uri, hit.matches);
2827 if let Some(title) = &hit.title {
2828 println!(" Title: {title}");
2829 }
2830 if let Some(score) = hit.score {
2831 let normalized = normalize_bm25_for_display(score, min_score, max_score);
2832 println!(" Relevance: {:.0}%", normalized);
2833 }
2834 println!(" Range: [{}..{})", hit.range.0, hit.range.1);
2835 if let Some((chunk_start, chunk_end)) = hit.chunk_range {
2836 println!(" Chunk: [{}..{})", chunk_start, chunk_end);
2837 }
2838 if let Some(chunk_text) = &hit.chunk_text {
2839 println!(" Chunk Text: {}", chunk_text.trim());
2840 }
2841 if let Some(metadata) = &hit.metadata {
2842 if let Some(track) = &metadata.track {
2843 println!(" Track: {track}");
2844 }
2845 if !metadata.tags.is_empty() {
2846 println!(" Tags: {}", metadata.tags.join(", "));
2847 }
2848 if !metadata.labels.is_empty() {
2849 println!(" Labels: {}", metadata.labels.join(", "));
2850 }
2851 if let Some(created_at) = &metadata.created_at {
2852 println!(" Created: {created_at}");
2853 }
2854 if !metadata.content_dates.is_empty() {
2855 println!(" Content Dates: {}", metadata.content_dates.join(", "));
2856 }
2857 if !metadata.entities.is_empty() {
2858 let entity_strs: Vec<String> = metadata
2859 .entities
2860 .iter()
2861 .map(|e| format!("{} ({})", e.name, e.kind))
2862 .collect();
2863 println!(" Entities: {}", entity_strs.join(", "));
2864 }
2865 }
2866 println!(" Snippet: {}", hit.text.trim());
2867 println!();
2868 }
2869 if let Some(cursor) = &response.next_cursor {
2870 println!("Next cursor: {cursor}");
2871 }
2872}
2873
2874fn ask_mode_display(mode: AskModeArg) -> &'static str {
2875 match mode {
2876 AskModeArg::Lex => "lex",
2877 AskModeArg::Sem => "sem",
2878 AskModeArg::Hybrid => "hybrid",
2879 }
2880}
2881
2882fn ask_mode_pretty(mode: AskModeArg) -> &'static str {
2883 match mode {
2884 AskModeArg::Lex => "Lexical",
2885 AskModeArg::Sem => "Semantic",
2886 AskModeArg::Hybrid => "Hybrid",
2887 }
2888}
2889
2890fn ask_retriever_display(retriever: AskRetriever) -> &'static str {
2891 match retriever {
2892 AskRetriever::Lex => "lex",
2893 AskRetriever::Semantic => "semantic",
2894 AskRetriever::Hybrid => "hybrid",
2895 AskRetriever::LexFallback => "lex_fallback",
2896 AskRetriever::TimelineFallback => "timeline_fallback",
2897 }
2898}
2899
2900fn ask_retriever_pretty(retriever: AskRetriever) -> &'static str {
2901 match retriever {
2902 AskRetriever::Lex => "Lexical",
2903 AskRetriever::Semantic => "Semantic",
2904 AskRetriever::Hybrid => "Hybrid",
2905 AskRetriever::LexFallback => "Lexical (fallback)",
2906 AskRetriever::TimelineFallback => "Timeline (fallback)",
2907 }
2908}
2909
2910fn search_engine_label(engine: &SearchEngineKind) -> &'static str {
2911 match engine {
2912 SearchEngineKind::Tantivy => "text (tantivy)",
2913 SearchEngineKind::LexFallback => "text (fallback)",
2914 SearchEngineKind::Hybrid => "hybrid",
2915 }
2916}
2917
2918fn build_hit_id(uri: &str, frame_id: u64, start: usize) -> String {
2919 let digest = hash(uri.as_bytes()).to_hex().to_string();
2920 let prefix_len = digest.len().min(12);
2921 let prefix = &digest[..prefix_len];
2922 format!("mv2-hit-{prefix}-{frame_id}-{start}")
2923}
2924
2925fn truncate_with_ellipsis(text: &str, limit: usize) -> String {
2926 if text.chars().count() <= limit {
2927 return text.to_string();
2928 }
2929
2930 let truncated: String = text.chars().take(limit).collect();
2931 format!("{truncated}...")
2932}
2933
2934fn normalize_bm25_for_display(score: f32, min_score: f32, max_score: f32) -> f32 {
2943 if (max_score - min_score).abs() < f32::EPSILON {
2944 return 100.0;
2946 }
2947 ((score - min_score) / (max_score - min_score) * 100.0).clamp(0.0, 100.0)
2949}
2950
2951fn score_range(scores: &[Option<f32>]) -> (f32, f32) {
2953 let valid_scores: Vec<f32> = scores.iter().filter_map(|s| *s).collect();
2954 if valid_scores.is_empty() {
2955 return (0.0, 0.0);
2956 }
2957 let min = valid_scores.iter().cloned().fold(f32::INFINITY, f32::min);
2958 let max = valid_scores
2959 .iter()
2960 .cloned()
2961 .fold(f32::NEG_INFINITY, f32::max);
2962 (min, max)
2963}
2964
2965fn search_hit_to_json(hit: &SearchHit) -> serde_json::Value {
2966 let mut hit_json = serde_json::Map::new();
2967 hit_json.insert("rank".into(), json!(hit.rank));
2968 if let Some(score) = hit.score {
2969 hit_json.insert("score".into(), json!(score));
2970 }
2971 hit_json.insert(
2972 "id".into(),
2973 json!(build_hit_id(&hit.uri, hit.frame_id, hit.range.0)),
2974 );
2975 hit_json.insert("frame_id".into(), json!(hit.frame_id));
2976 hit_json.insert("uri".into(), json!(hit.uri));
2977 if let Some(title) = &hit.title {
2978 hit_json.insert("title".into(), json!(title));
2979 }
2980 let chunk_range = hit.chunk_range.unwrap_or(hit.range);
2981 hit_json.insert("chunk_range".into(), json!([chunk_range.0, chunk_range.1]));
2982 hit_json.insert("range".into(), json!([hit.range.0, hit.range.1]));
2983 hit_json.insert("text".into(), json!(hit.text));
2984
2985 let metadata = hit.metadata.clone().unwrap_or_else(|| SearchHitMetadata {
2986 matches: hit.matches,
2987 ..SearchHitMetadata::default()
2988 });
2989 let mut meta_json = serde_json::Map::new();
2990 meta_json.insert("matches".into(), json!(metadata.matches));
2991 if !metadata.tags.is_empty() {
2992 meta_json.insert("tags".into(), json!(metadata.tags));
2993 }
2994 if !metadata.labels.is_empty() {
2995 meta_json.insert("labels".into(), json!(metadata.labels));
2996 }
2997 if let Some(track) = metadata.track {
2998 meta_json.insert("track".into(), json!(track));
2999 }
3000 if let Some(created_at) = metadata.created_at {
3001 meta_json.insert("created_at".into(), json!(created_at));
3002 }
3003 if !metadata.content_dates.is_empty() {
3004 meta_json.insert("content_dates".into(), json!(metadata.content_dates));
3005 }
3006 if !metadata.entities.is_empty() {
3007 let entities_json: Vec<serde_json::Value> = metadata
3008 .entities
3009 .iter()
3010 .map(|e| {
3011 let mut ent = serde_json::Map::new();
3012 ent.insert("name".into(), json!(e.name));
3013 ent.insert("kind".into(), json!(e.kind));
3014 if let Some(conf) = e.confidence {
3015 ent.insert("confidence".into(), json!(conf));
3016 }
3017 serde_json::Value::Object(ent)
3018 })
3019 .collect();
3020 meta_json.insert("entities".into(), json!(entities_json));
3021 }
3022 hit_json.insert("metadata".into(), serde_json::Value::Object(meta_json));
3023 serde_json::Value::Object(hit_json)
3024}
3025fn apply_semantic_rerank(
3034 runtime: &EmbeddingRuntime,
3035 mem: &mut Memvid,
3036 response: &mut SearchResponse,
3037) -> Result<()> {
3038 if response.hits.is_empty() {
3039 return Ok(());
3040 }
3041
3042 let query_embedding = runtime.embed_query(&response.query)?;
3043 let mut semantic_scores: HashMap<u64, f32> = HashMap::new();
3044 for hit in &response.hits {
3045 if let Some(embedding) = mem.frame_embedding(hit.frame_id)? {
3046 if embedding.len() == runtime.dimension() {
3047 let score = cosine_similarity(&query_embedding, &embedding);
3048 semantic_scores.insert(hit.frame_id, score);
3049 }
3050 }
3051 }
3052
3053 if semantic_scores.is_empty() {
3054 return Ok(());
3055 }
3056
3057 let mut sorted_semantic: Vec<(u64, f32)> = semantic_scores
3059 .iter()
3060 .map(|(frame_id, score)| (*frame_id, *score))
3061 .collect();
3062 sorted_semantic.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
3063
3064 let mut semantic_rank: HashMap<u64, usize> = HashMap::new();
3065 for (idx, (frame_id, _)) in sorted_semantic.iter().enumerate() {
3066 semantic_rank.insert(*frame_id, idx + 1);
3067 }
3068
3069 let query_lower = response.query.to_lowercase();
3071 let is_preference_query = query_lower.contains("suggest")
3072 || query_lower.contains("recommend")
3073 || query_lower.contains("should i")
3074 || query_lower.contains("what should")
3075 || query_lower.contains("prefer")
3076 || query_lower.contains("favorite")
3077 || query_lower.contains("best for me");
3078
3079 const RRF_K: f32 = 60.0;
3083
3084 let mut ordering: Vec<(usize, f32, usize)> = response
3085 .hits
3086 .iter()
3087 .enumerate()
3088 .map(|(idx, hit)| {
3089 let lexical_rank = hit.rank;
3090
3091 let lexical_rrf = 1.0 / (RRF_K + lexical_rank as f32);
3093
3094 let semantic_rrf = semantic_rank
3096 .get(&hit.frame_id)
3097 .map(|rank| 1.0 / (RRF_K + *rank as f32))
3098 .unwrap_or(0.0);
3099
3100 let preference_boost = if is_preference_query {
3103 compute_preference_boost(&hit.text) * 0.01 } else {
3105 0.0
3106 };
3107
3108 let combined = lexical_rrf + semantic_rrf + preference_boost;
3110 (idx, combined, lexical_rank)
3111 })
3112 .collect();
3113
3114 ordering.sort_by(|a, b| {
3115 b.1.partial_cmp(&a.1)
3116 .unwrap_or(Ordering::Equal)
3117 .then(a.2.cmp(&b.2))
3118 });
3119
3120 let mut reordered = Vec::with_capacity(response.hits.len());
3121 for (rank_idx, (idx, _, _)) in ordering.into_iter().enumerate() {
3122 let mut hit = response.hits[idx].clone();
3123 hit.rank = rank_idx + 1;
3124 reordered.push(hit);
3125 }
3126
3127 response.hits = reordered;
3128 Ok(())
3129}
3130
3131fn apply_preference_rerank(response: &mut SearchResponse) {
3134 if response.hits.is_empty() {
3135 return;
3136 }
3137
3138 let query_lower = response.query.to_lowercase();
3140 let is_preference_query = query_lower.contains("suggest")
3141 || query_lower.contains("recommend")
3142 || query_lower.contains("should i")
3143 || query_lower.contains("what should")
3144 || query_lower.contains("prefer")
3145 || query_lower.contains("favorite")
3146 || query_lower.contains("best for me");
3147
3148 if !is_preference_query {
3149 return;
3150 }
3151
3152 let mut scored: Vec<(usize, f32, f32)> = response
3154 .hits
3155 .iter()
3156 .enumerate()
3157 .map(|(idx, hit)| {
3158 let original_score = hit.score.unwrap_or(0.0);
3159 let preference_boost = compute_preference_boost(&hit.text);
3160 let boosted_score = original_score + preference_boost;
3161 (idx, boosted_score, original_score)
3162 })
3163 .collect();
3164
3165 scored.sort_by(|a, b| {
3167 b.1.partial_cmp(&a.1)
3168 .unwrap_or(Ordering::Equal)
3169 .then_with(|| b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal))
3170 });
3171
3172 let mut reordered = Vec::with_capacity(response.hits.len());
3174 for (rank_idx, (idx, _, _)) in scored.into_iter().enumerate() {
3175 let mut hit = response.hits[idx].clone();
3176 hit.rank = rank_idx + 1;
3177 reordered.push(hit);
3178 }
3179
3180 response.hits = reordered;
3181}
3182
3183fn compute_preference_boost(text: &str) -> f32 {
3192 let text_lower = text.to_lowercase();
3193 let mut boost = 0.0f32;
3194
3195 let established_context = [
3198 "i've been",
3200 "i've had",
3201 "i've used",
3202 "i've tried",
3203 "i recently",
3204 "i just",
3205 "lately",
3206 "i started",
3207 "i bought",
3208 "i harvested",
3209 "i grew",
3210 "my garden",
3212 "my home",
3213 "my house",
3214 "my setup",
3215 "my equipment",
3216 "my camera",
3217 "my car",
3218 "my phone",
3219 "i have a",
3220 "i own",
3221 "i got a",
3222 "i prefer",
3224 "i like to",
3225 "i love to",
3226 "i enjoy",
3227 "i usually",
3228 "i always",
3229 "i typically",
3230 "my favorite",
3231 "i tend to",
3232 "i often",
3233 "i use",
3235 "i grow",
3236 "i cook",
3237 "i make",
3238 "i work on",
3239 "i'm into",
3240 "i collect",
3241 ];
3242 for pattern in established_context {
3243 if text_lower.contains(pattern) {
3244 boost += 0.15;
3245 }
3246 }
3247
3248 let first_person = [" i ", " my ", " me "];
3250 for pattern in first_person {
3251 if text_lower.contains(pattern) {
3252 boost += 0.02;
3253 }
3254 }
3255
3256 let request_patterns = [
3259 "i'm trying to",
3260 "i want to",
3261 "i need to",
3262 "looking for",
3263 "can you suggest",
3264 "can you help",
3265 ];
3266 for pattern in request_patterns {
3267 if text_lower.contains(pattern) {
3268 boost += 0.02;
3269 }
3270 }
3271
3272 boost.min(0.5)
3274}
3275
3276fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3277 let mut dot = 0.0f32;
3278 let mut sum_a = 0.0f32;
3279 let mut sum_b = 0.0f32;
3280 for (x, y) in a.iter().zip(b.iter()) {
3281 dot += x * y;
3282 sum_a += x * x;
3283 sum_b += y * y;
3284 }
3285
3286 if sum_a <= f32::EPSILON || sum_b <= f32::EPSILON {
3287 0.0
3288 } else {
3289 dot / (sum_a.sqrt() * sum_b.sqrt())
3290 }
3291}
3292
3293#[cfg(feature = "local-embeddings")]
3301fn apply_cross_encoder_rerank(response: &mut SearchResponse) -> Result<()> {
3302 if response.hits.is_empty() || response.hits.len() < 2 {
3303 return Ok(());
3304 }
3305
3306 let candidates_to_rerank = response.hits.len().min(50);
3308
3309 let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
3312 .with_show_download_progress(true);
3313
3314 let mut reranker = match TextRerank::try_new(options) {
3315 Ok(r) => r,
3316 Err(e) => {
3317 warn!("Failed to initialize cross-encoder reranker: {e}");
3318 return Ok(());
3319 }
3320 };
3321
3322 let documents: Vec<String> = response.hits[..candidates_to_rerank]
3324 .iter()
3325 .map(|hit| hit.text.clone())
3326 .collect();
3327
3328 info!("Cross-encoder reranking {} candidates", documents.len());
3330 let rerank_results = match reranker.rerank(response.query.clone(), documents, false, None) {
3331 Ok(results) => results,
3332 Err(e) => {
3333 warn!("Cross-encoder reranking failed: {e}");
3334 return Ok(());
3335 }
3336 };
3337
3338 let mut scored_hits: Vec<(f32, usize)> = Vec::with_capacity(rerank_results.len());
3342
3343 let original_scores: Vec<f32> = response.hits[..candidates_to_rerank]
3345 .iter()
3346 .filter_map(|h| h.score)
3347 .collect();
3348 let orig_min = original_scores
3349 .iter()
3350 .cloned()
3351 .fold(f32::INFINITY, f32::min);
3352 let orig_max = original_scores
3353 .iter()
3354 .cloned()
3355 .fold(f32::NEG_INFINITY, f32::max);
3356 let orig_range = (orig_max - orig_min).max(0.001); for result in rerank_results.iter() {
3359 let original_idx = result.index;
3360 let cross_encoder_score = result.score; let original_score = response.hits[original_idx].score.unwrap_or(0.0);
3364 let normalized_original = (original_score - orig_min) / orig_range;
3365
3366 let blended = cross_encoder_score * 0.2 + normalized_original * 0.8;
3370
3371 scored_hits.push((blended, original_idx));
3372 }
3373
3374 scored_hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
3376
3377 let mut reordered = Vec::with_capacity(response.hits.len());
3379 for (new_rank, (blended_score, original_idx)) in scored_hits.into_iter().enumerate() {
3380 let mut hit = response.hits[original_idx].clone();
3381 hit.rank = new_rank + 1;
3382 hit.score = Some(blended_score);
3384 reordered.push(hit);
3385 }
3386
3387 for hit in response.hits.iter().skip(candidates_to_rerank) {
3389 let mut h = hit.clone();
3390 h.rank = reordered.len() + 1;
3391 reordered.push(h);
3392 }
3393
3394 response.hits = reordered;
3395 info!("Cross-encoder reranking complete");
3396 Ok(())
3397}
3398
3399#[cfg(not(feature = "local-embeddings"))]
3402fn apply_cross_encoder_rerank(_response: &mut SearchResponse) -> Result<()> {
3403 Ok(())
3404}
3405
3406fn build_memory_context(mem: &Memvid) -> String {
3409 let entities = mem.memory_entities();
3410 if entities.is_empty() {
3411 return String::new();
3412 }
3413
3414 let mut sections = Vec::new();
3415 for entity in entities {
3416 let cards = mem.get_entity_memories(&entity);
3417 if cards.is_empty() {
3418 continue;
3419 }
3420
3421 let mut entity_lines = Vec::new();
3422 for card in cards {
3423 let polarity_marker = card
3425 .polarity
3426 .as_ref()
3427 .map(|p| match p.to_string().as_str() {
3428 "Positive" => " (+)",
3429 "Negative" => " (-)",
3430 _ => "",
3431 })
3432 .unwrap_or("");
3433 entity_lines.push(format!(
3434 " - {}: {}{}",
3435 card.slot, card.value, polarity_marker
3436 ));
3437 }
3438
3439 sections.push(format!("{}:\n{}", entity, entity_lines.join("\n")));
3440 }
3441
3442 sections.join("\n\n")
3443}
3444
3445fn build_entity_context_from_hits(hits: &[SearchHit]) -> String {
3448 use std::collections::HashMap;
3449
3450 let mut entities_by_kind: HashMap<String, Vec<String>> = HashMap::new();
3452
3453 for hit in hits {
3454 if let Some(metadata) = &hit.metadata {
3455 for entity in &metadata.entities {
3456 entities_by_kind
3457 .entry(entity.kind.clone())
3458 .or_default()
3459 .push(entity.name.clone());
3460 }
3461 }
3462 }
3463
3464 if entities_by_kind.is_empty() {
3465 return String::new();
3466 }
3467
3468 let mut sections = Vec::new();
3470 let mut sorted_kinds: Vec<_> = entities_by_kind.keys().collect();
3471 sorted_kinds.sort();
3472
3473 for kind in sorted_kinds {
3474 let names = entities_by_kind.get(kind).unwrap();
3475 let mut unique_names: Vec<_> = names.iter().collect();
3476 unique_names.sort();
3477 unique_names.dedup();
3478
3479 let names_str = unique_names
3480 .iter()
3481 .take(10) .map(|s| s.as_str())
3483 .collect::<Vec<_>>()
3484 .join(", ");
3485
3486 sections.push(format!("{}: {}", kind, names_str));
3487 }
3488
3489 sections.join("\n")
3490}