1#[cfg(feature = "audio-playback")]
4use std::io::Cursor;
5use std::io::{self, Write};
6use std::path::{Path, PathBuf};
7use std::process::Command;
8#[cfg(feature = "audio-playback")]
9use std::time::Duration;
10
11use anyhow::{anyhow, bail, Context, Result};
12use clap::Args;
13use memvid_core::table::list_tables;
14use memvid_core::{
15 lockfile, normalize_text, Frame, FrameRole, MediaManifest, Memvid, TextChunkManifest,
16 TextChunkRange,
17};
18use serde_json::{json, Value};
19use tempfile::Builder;
20use tracing::warn;
21use uuid::Uuid;
22
23use crate::config::CliConfig;
24use crate::utils::{
25 format_bytes, format_percent, format_timestamp_ms, frame_status_str, open_read_only_mem,
26 owner_hint_to_json, parse_timecode, round_percent, select_frame, yes_no,
27};
28
29const DEFAULT_VIEW_PAGE_CHARS: usize = 1_200;
30const CHUNK_MANIFEST_KEY: &str = "memvid_chunks_v1";
31
32#[derive(Args)]
34pub struct ViewArgs {
35 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
36 pub file: PathBuf,
37 #[arg(long = "frame-id", value_name = "ID", conflicts_with = "uri")]
38 pub frame_id: Option<u64>,
39 #[arg(long, value_name = "URI", conflicts_with = "frame_id")]
40 pub uri: Option<String>,
41 #[arg(long)]
42 pub json: bool,
43 #[arg(long, conflicts_with = "json")]
44 pub binary: bool,
45 #[arg(long, conflicts_with_all = ["json", "binary"])]
46 pub preview: bool,
47 #[arg(
49 long = "start",
50 value_name = "HH:MM:SS",
51 requires = "preview",
52 conflicts_with_all = ["json", "binary", "play"]
53 )]
54 pub preview_start: Option<String>,
55 #[arg(
57 long = "end",
58 value_name = "HH:MM:SS",
59 requires = "preview",
60 conflicts_with_all = ["json", "binary", "play"]
61 )]
62 pub preview_end: Option<String>,
63 #[arg(long = "play", conflicts_with_all = ["json", "binary", "preview"])]
64 pub play: bool,
65 #[arg(long = "start-seconds", requires = "play")]
66 pub start_seconds: Option<f32>,
67 #[arg(long = "end-seconds", requires = "play")]
68 pub end_seconds: Option<f32>,
69 #[arg(long, value_name = "N", default_value_t = 1)]
70 pub page: usize,
71 #[arg(long = "page-size", value_name = "CHARS")]
72 pub page_size: Option<usize>,
73}
74
75#[derive(Args)]
77pub struct StatsArgs {
78 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
79 pub file: PathBuf,
80 #[arg(long)]
81 pub json: bool,
82 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
84 pub as_of_frame: Option<u64>,
85 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
87 pub as_of_ts: Option<i64>,
88}
89
90#[derive(Args)]
92pub struct WhoArgs {
93 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
94 pub file: PathBuf,
95 #[arg(long)]
96 pub json: bool,
97}
98
99pub fn handle_stats(_config: &CliConfig, args: StatsArgs) -> Result<()> {
101 let mut mem = Memvid::open_read_only(&args.file)?;
102 let stats = mem.stats()?;
103 let tables = list_tables(&mut mem).unwrap_or_default();
104 let vec_dimension = mem.effective_vec_index_dimension()?;
105 let embedding_identity = mem.embedding_identity_summary(10_000);
106
107 if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
110 eprintln!("Note: Replay filtering (--as-of-frame/--as-of-ts) shows current stats.");
111 eprintln!(" Use 'find' or 'timeline' commands for filtered results.");
112 }
113 let overhead_bytes = stats.size_bytes.saturating_sub(stats.payload_bytes);
114 let payload_share_percent: f64 = if stats.size_bytes > 0 {
115 round_percent((stats.payload_bytes as f64 / stats.size_bytes as f64) * 100.0)
116 } else {
117 0.0
118 };
119 let overhead_share_percent: f64 = if stats.size_bytes > 0 {
120 round_percent((100.0 - payload_share_percent).max(0.0))
121 } else {
122 0.0
123 };
124 let maintenance_command = format!(
125 "memvid doctor {} --vacuum --rebuild-time-index --rebuild-lex-index",
126 args.file.display()
127 );
128
129 if args.json {
130 let mut raw_json = serde_json::to_value(&stats)?;
131 if let Value::Object(ref mut obj) = raw_json {
132 obj.remove("tier");
133 }
134
135 let tables_json: Vec<serde_json::Value> = tables
137 .iter()
138 .map(|t| {
139 json!({
140 "table_id": t.table_id,
141 "source_file": t.source_file,
142 "n_rows": t.n_rows,
143 "n_cols": t.n_cols,
144 "pages": format!("{}-{}", t.page_start, t.page_end),
145 "quality": format!("{:?}", t.quality),
146 "headers": t.headers,
147 })
148 })
149 .collect();
150
151 let embedding_quality_json = if stats.has_vec_index {
153 mem.embedding_quality().ok().flatten().map(|eq| {
154 json!({
155 "vector_count": eq.vector_count,
156 "dimension": eq.dimension,
157 "avg_similarity": eq.avg_similarity,
158 "min_similarity": eq.min_similarity,
159 "max_similarity": eq.max_similarity,
160 "std_similarity": eq.std_similarity,
161 "clustering_coefficient": eq.clustering_coefficient,
162 "estimated_clusters": eq.estimated_clusters,
163 "recommended_threshold": eq.recommended_threshold,
164 "quality_rating": eq.quality_rating,
165 "quality_explanation": eq.quality_explanation,
166 })
167 })
168 } else {
169 None
170 };
171
172 let embedding_identity_json = match &embedding_identity {
173 memvid_core::EmbeddingIdentitySummary::Unknown => Value::Null,
174 memvid_core::EmbeddingIdentitySummary::Single(identity) => json!({
175 "provider": identity.provider.as_deref(),
176 "model": identity.model.as_deref(),
177 "dimension": identity.dimension.or(vec_dimension),
178 "normalized": identity.normalized,
179 }),
180 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
181 let values: Vec<Value> = identities
182 .iter()
183 .map(|entry| {
184 json!({
185 "provider": entry.identity.provider.as_deref(),
186 "model": entry.identity.model.as_deref(),
187 "dimension": entry.identity.dimension.or(vec_dimension),
188 "normalized": entry.identity.normalized,
189 "count": entry.count,
190 })
191 })
192 .collect();
193 json!({ "mixed": values })
194 }
195 };
196
197 let report = json!({
198 "summary": {
199 "sequence": stats.seq_no,
200 "frames": format!("{} total ({} active)", stats.frame_count, stats.active_frame_count),
201 "usage": format!(
202 "{} used / {} total ({})",
203 format_bytes(stats.size_bytes),
204 format_bytes(stats.capacity_bytes),
205 format_percent(stats.storage_utilisation_percent)
206 ),
207 "remaining": format!("{} free", format_bytes(stats.remaining_capacity_bytes)),
208 },
209 "storage": {
210 "payload": format!("{} ({})", format_bytes(stats.payload_bytes), format_percent(payload_share_percent)),
211 "overhead": format!("{} ({}) - WAL + indexes", format_bytes(overhead_bytes), format_percent(overhead_share_percent)),
212 "logical_payload": format!("{} before compression", format_bytes(stats.logical_bytes)),
213 "compression_savings": format!("{} saved ({})", format_bytes(stats.saved_bytes), format_percent(stats.savings_percent)),
214 "compression_ratio": format_percent(stats.compression_ratio_percent),
215 },
216 "frames": {
217 "average_stored": format_bytes(stats.average_frame_payload_bytes),
218 "average_logical": format_bytes(stats.average_frame_logical_bytes),
219 "clip_images": stats.clip_image_count,
220 },
221 "indexes": {
222 "lexical": yes_no(stats.has_lex_index),
223 "vector": yes_no(stats.has_vec_index),
224 "time": yes_no(stats.has_time_index),
225 },
226 "embedding_identity": embedding_identity_json,
227 "embedding_quality": embedding_quality_json,
228 "tables": {
229 "count": tables.len(),
230 "tables": tables_json,
231 },
232 "maintenance": maintenance_command,
233 "raw": raw_json,
234 });
235
236 println!("{}", serde_json::to_string_pretty(&report)?);
237 } else {
238 let seq_display = stats
239 .seq_no
240 .map(|seq| seq.to_string())
241 .unwrap_or_else(|| "n/a".to_string());
242
243 println!("Memory: {}", args.file.display());
244 println!("Sequence: {}", seq_display);
245 println!(
246 "Frames: {} total ({} active)",
247 stats.frame_count, stats.active_frame_count
248 );
249
250 println!("\nCapacity:");
251 println!(
252 " Usage: {} used / {} total ({})",
253 format_bytes(stats.size_bytes),
254 format_bytes(stats.capacity_bytes),
255 format_percent(stats.storage_utilisation_percent)
256 );
257 println!(
258 " Remaining: {}",
259 format_bytes(stats.remaining_capacity_bytes)
260 );
261
262 println!("\nStorage breakdown:");
263 println!(
264 " Payload: {} ({})",
265 format_bytes(stats.payload_bytes),
266 format_percent(payload_share_percent)
267 );
268 println!(
269 " Overhead: {} ({})",
270 format_bytes(overhead_bytes),
271 format_percent(overhead_share_percent)
272 );
273 println!(" ├─ WAL: {}", format_bytes(stats.wal_bytes));
275 println!(
276 " ├─ Lexical index: {}",
277 format_bytes(stats.lex_index_bytes)
278 );
279 println!(
280 " ├─ Vector index: {}",
281 format_bytes(stats.vec_index_bytes)
282 );
283 println!(
284 " └─ Time index: {}",
285 format_bytes(stats.time_index_bytes)
286 );
287 println!(
288 " Logical payload: {} before compression",
289 format_bytes(stats.logical_bytes)
290 );
291
292 if stats.has_vec_index {
293 println!("\nEmbeddings:");
294 if let Some(dim) = vec_dimension {
295 println!(" Dimension: {}", dim);
296 }
297 match &embedding_identity {
298 memvid_core::EmbeddingIdentitySummary::Unknown => {
299 println!(" Model: unknown (no persisted embedding identity)");
300 }
301 memvid_core::EmbeddingIdentitySummary::Single(identity) => {
302 if let Some(provider) = identity.provider.as_deref() {
303 println!(" Provider: {}", provider);
304 }
305 if let Some(model) = identity.model.as_deref() {
306 println!(" Model: {}", model);
307 }
308 }
309 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
310 println!(" Model: mixed ({} identities detected)", identities.len());
311 for entry in identities.iter().take(5) {
312 let provider = entry.identity.provider.as_deref().unwrap_or("unknown");
313 let model = entry.identity.model.as_deref().unwrap_or("unknown");
314 println!(" - {} / {} ({} frames)", provider, model, entry.count);
315 }
316 if identities.len() > 5 {
317 println!(" - ...");
318 }
319 }
320 }
321 }
322 println!(
323 " Compression savings: {} ({})",
324 format_bytes(stats.saved_bytes),
325 format_percent(stats.savings_percent)
326 );
327
328 println!("\nAverage frame:");
329 println!(
330 " Stored: {} Logical: {}",
331 format_bytes(stats.average_frame_payload_bytes),
332 format_bytes(stats.average_frame_logical_bytes)
333 );
334 if stats.clip_image_count > 0 {
335 println!(" CLIP images: {}", stats.clip_image_count);
336 }
337
338 if stats.active_frame_count > 0 {
340 let overhead_per_doc = overhead_bytes / stats.active_frame_count;
341 let lex_per_doc = stats.lex_index_bytes / stats.active_frame_count;
342 let vec_per_doc = stats.vec_index_bytes / stats.active_frame_count;
343
344 println!("\nPer-document overhead:");
345 println!(" Total: {}", format_bytes(overhead_per_doc));
346 if stats.has_lex_index {
347 println!(" Lexical: {}", format_bytes(lex_per_doc));
348 }
349 if stats.has_vec_index {
350 let vec_ratio = if stats.average_frame_payload_bytes > 0 {
351 vec_per_doc as f64 / stats.average_frame_payload_bytes as f64
352 } else {
353 0.0
354 };
355 println!(
356 " Vector: {} ({:.0}x text size)",
357 format_bytes(vec_per_doc),
358 vec_ratio
359 );
360 }
361 }
362
363 println!("\nIndexes:");
364 println!(
365 " Lexical: {} Vector: {} Time: {}",
366 yes_no(stats.has_lex_index),
367 yes_no(stats.has_vec_index),
368 yes_no(stats.has_time_index)
369 );
370
371 if stats.has_vec_index {
373 if let Ok(Some(eq)) = mem.embedding_quality() {
374 println!("\nEmbedding Quality:");
375 println!(
376 " Vectors: {} Dimension: {}",
377 eq.vector_count, eq.dimension
378 );
379 println!(
380 " Similarity: avg={:.3} min={:.3} max={:.3} std={:.3}",
381 eq.avg_similarity, eq.min_similarity, eq.max_similarity, eq.std_similarity
382 );
383 println!(
384 " Clusters: ~{} Quality: {}",
385 eq.estimated_clusters, eq.quality_rating
386 );
387 println!(
388 " Recommended --min-relevancy: {:.1}",
389 eq.recommended_threshold
390 );
391 println!(" {}", eq.quality_explanation);
392 }
393 }
394
395 if !tables.is_empty() {
396 println!("\nTables: {} extracted", tables.len());
397 for t in &tables {
398 println!(
399 " {} — {} rows × {} cols ({})",
400 t.table_id, t.n_rows, t.n_cols, t.source_file
401 );
402 }
403 }
404
405 println!("\nMaintenance:");
406 println!(
407 " Run `{}` to rebuild indexes and reclaim space.",
408 maintenance_command
409 );
410 }
411 Ok(())
412}
413
414pub fn handle_who(args: WhoArgs) -> Result<()> {
416 match lockfile::current_owner(&args.file)? {
417 Some(owner) => {
418 if args.json {
419 let output = json!({
420 "locked": true,
421 "owner": owner_hint_to_json(&owner),
422 });
423 println!("{}", serde_json::to_string_pretty(&output)?);
424 } else {
425 println!("{} is locked by:", args.file.display());
426 if let Some(pid) = owner.pid {
427 println!(" pid: {pid}");
428 }
429 if let Some(cmd) = owner.cmd.as_deref() {
430 println!(" cmd: {cmd}");
431 }
432 if let Some(started) = owner.started_at.as_deref() {
433 println!(" started_at: {started}");
434 }
435 if let Some(last) = owner.last_heartbeat.as_deref() {
436 println!(" last_heartbeat: {last}");
437 }
438 if let Some(interval) = owner.heartbeat_ms {
439 println!(" heartbeat_interval_ms: {interval}");
440 }
441 if let Some(file_id) = owner.file_id.as_deref() {
442 println!(" file_id: {file_id}");
443 }
444 if let Some(path) = owner.file_path.as_ref() {
445 println!(" file_path: {}", path.display());
446 }
447 }
448 }
449 None => {
450 if args.json {
451 let output = json!({"locked": false});
452 println!("{}", serde_json::to_string_pretty(&output)?);
453 } else {
454 println!("No active writer for {}", args.file.display());
455 }
456 }
457 }
458 Ok(())
459}
460
461pub fn handle_view(args: ViewArgs) -> Result<()> {
467 if args.page == 0 {
468 bail!("page must be greater than zero");
469 }
470 if let Some(size) = args.page_size {
471 if size == 0 {
472 bail!("page-size must be greater than zero");
473 }
474 }
475
476 let mut mem = open_read_only_mem(&args.file)?;
477 let frame = select_frame(&mut mem, args.frame_id, args.uri.as_deref())?;
478
479 if args.play {
480 #[cfg(feature = "audio-playback")]
481 {
482 play_frame_audio(&mut mem, &frame, args.start_seconds, args.end_seconds)?;
483 return Ok(());
484 }
485 #[cfg(not(feature = "audio-playback"))]
486 {
487 bail!("Audio playback requires the 'audio-playback' feature (only available on macOS)");
488 }
489 }
490
491 if args.preview {
492 let bounds = parse_preview_bounds(args.preview_start.as_ref(), args.preview_end.as_ref())?;
493 preview_frame_media(&mut mem, &frame, args.uri.as_deref(), bounds)?;
494 return Ok(());
495 }
496
497 if args.binary {
498 let bytes = mem.frame_canonical_payload(frame.id)?;
499 let mut stdout = io::stdout();
500 stdout.write_all(&bytes)?;
501 stdout.flush()?;
502 return Ok(());
503 }
504
505 let canonical_text = canonical_text_for_view(&mut mem, &frame)?;
506 let manifest_from_meta = canonical_manifest_from_frame(&canonical_text, &frame);
507
508 let page_size = args
509 .page_size
510 .or_else(|| manifest_from_meta.as_ref().map(|m| m.chunk_chars))
511 .unwrap_or(DEFAULT_VIEW_PAGE_CHARS);
512
513 let mut manifest = if args.page_size.is_none() {
514 manifest_from_meta.unwrap_or_else(|| compute_chunk_manifest(&canonical_text, page_size))
515 } else {
516 compute_chunk_manifest(&canonical_text, page_size)
517 };
518 if manifest.chunks.is_empty() {
519 manifest = TextChunkManifest {
520 chunk_chars: page_size,
521 chunks: vec![TextChunkRange {
522 start: 0,
523 end: canonical_text.chars().count(),
524 }],
525 };
526 }
527
528 if frame.role == FrameRole::DocumentChunk && args.page_size.is_none() {
529 let total_chars = canonical_text.chars().count();
530 manifest = TextChunkManifest {
531 chunk_chars: total_chars.max(1),
532 chunks: vec![TextChunkRange {
533 start: 0,
534 end: total_chars,
535 }],
536 };
537 }
538
539 let total_pages = manifest.chunks.len().max(1);
540 if args.page > total_pages {
541 bail!(
542 "page {} is out of range (total pages: {})",
543 args.page,
544 total_pages
545 );
546 }
547
548 let chunk = &manifest.chunks[args.page - 1];
549 let content = extract_chunk_slice(&canonical_text, chunk);
550
551 if args.json {
552 let mut frame_json = frame_to_json(&frame);
553 if let Some(obj) = frame_json.as_object_mut() {
554 if let Some(manifest_json) = obj.get_mut("chunk_manifest") {
557 if let Some(manifest_obj) = manifest_json.as_object_mut() {
558 let total = manifest.chunks.len();
559 if total > 0 {
560 let mut window = serde_json::Map::new();
561 let idx = args.page.saturating_sub(1).min(total - 1);
562 if idx > 0 {
563 let prev = &manifest.chunks[idx - 1];
564 window.insert("prev".into(), json!([prev.start, prev.end]));
565 }
566 let current = &manifest.chunks[idx];
567 window.insert("current".into(), json!([current.start, current.end]));
568 if idx + 1 < total {
569 let next = &manifest.chunks[idx + 1];
570 window.insert("next".into(), json!([next.start, next.end]));
571 }
572 manifest_obj.insert("chunks".into(), Value::Object(window));
573 }
574 }
575 }
576 }
577 let json = json!({
578 "frame": frame_json,
579 "page": args.page,
580 "page_size": manifest.chunk_chars,
581 "page_count": total_pages,
582 "has_prev": args.page > 1,
583 "has_next": args.page < total_pages,
584 "content": content,
585 });
586 println!("{}", serde_json::to_string_pretty(&json)?);
587 } else {
588 print_frame_summary(&mut mem, &frame)?;
589 println!(
590 "Page {}/{} ({} chars per page)",
591 args.page, total_pages, manifest.chunk_chars
592 );
593 println!();
594 println!("{}", content);
595 }
596 Ok(())
597}
598
599#[derive(Debug)]
600pub struct PreviewBounds {
601 pub start_ms: Option<u64>,
602 pub end_ms: Option<u64>,
603}
604
605pub fn parse_preview_bounds(
606 start: Option<&String>,
607 end: Option<&String>,
608) -> Result<Option<PreviewBounds>> {
609 let start_ms = match start {
610 Some(value) => Some(parse_timecode(value)?),
611 None => None,
612 };
613 let end_ms = match end {
614 Some(value) => Some(parse_timecode(value)?),
615 None => None,
616 };
617
618 if let (Some(s), Some(e)) = (start_ms, end_ms) {
619 if e <= s {
620 anyhow::bail!("--end must be greater than --start");
621 }
622 }
623
624 if start_ms.is_none() && end_ms.is_none() {
625 Ok(None)
626 } else {
627 Ok(Some(PreviewBounds { start_ms, end_ms }))
628 }
629}
630
631fn preview_frame_media(
632 mem: &mut Memvid,
633 frame: &Frame,
634 cli_uri: Option<&str>,
635 bounds: Option<PreviewBounds>,
636) -> Result<()> {
637 let manifest = mem.media_manifest(frame.id)?;
638 let mut mime = manifest
639 .as_ref()
640 .map(|m| m.mime.clone())
641 .or_else(|| frame.metadata.as_ref().and_then(|meta| meta.mime.clone()))
642 .unwrap_or_else(|| "application/octet-stream".to_string());
643
644 if mime == "application/octet-stream" {
646 if let Ok(bytes) = mem.frame_canonical_payload(frame.id) {
647 if let Some(kind) = infer::get(&bytes) {
648 mime = kind.mime_type().to_string();
649 }
650 }
651 }
652
653 let is_video = manifest
654 .as_ref()
655 .map(|media| media.kind.eq_ignore_ascii_case("video"))
656 .unwrap_or_else(|| mime.starts_with("video/"));
657
658 if is_video {
659 preview_frame_video(mem, frame, cli_uri, bounds, manifest, &mime)?;
660 } else {
661 if bounds.is_some() {
662 anyhow::bail!("--start/--end are only supported for video previews");
663 }
664 if is_image_mime(&mime) {
665 preview_frame_image(mem, frame, cli_uri)?;
666 } else if is_audio_mime(&mime) {
667 preview_frame_audio_file(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
668 } else {
669 preview_frame_document(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
670 }
671 }
672 Ok(())
673}
674
675fn preview_frame_video(
676 mem: &mut Memvid,
677 frame: &Frame,
678 cli_uri: Option<&str>,
679 bounds: Option<PreviewBounds>,
680 manifest: Option<MediaManifest>,
681 mime: &str,
682) -> Result<()> {
683 let extension = manifest
684 .as_ref()
685 .and_then(|m| m.filename.as_deref())
686 .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
687 .map(|ext| ext.trim_start_matches('.').to_ascii_lowercase())
688 .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
689 .unwrap_or_else(|| "mp4".to_string());
690
691 let mut temp_file = Builder::new()
692 .prefix("memvid-preview-")
693 .suffix(&format!(".{extension}"))
694 .tempfile_in(std::env::temp_dir())
695 .context("failed to create temporary preview file")?;
696
697 let mut reader = mem
698 .blob_reader(frame.id)
699 .context("failed to stream payload for preview")?;
700 io::copy(&mut reader, &mut temp_file).context("failed to write video data to preview file")?;
701 temp_file
702 .flush()
703 .context("failed to flush video preview to disk")?;
704
705 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
706 drop(file);
707
708 let mut display_path = preview_path.clone();
709 if let Some(ref span) = bounds {
710 let needs_trim = span.start_ms.is_some() || span.end_ms.is_some();
711 if needs_trim {
712 if let Some(trimmed) = maybe_trim_with_ffmpeg(&preview_path, &extension, span)? {
713 display_path = trimmed;
714 }
715 }
716 }
717
718 println!("Opening preview...");
719 open::that(&display_path).with_context(|| {
720 format!(
721 "failed to launch default video player for {}",
722 display_path.display()
723 )
724 })?;
725
726 let display_uri = cli_uri
727 .or_else(|| frame.uri.as_deref())
728 .unwrap_or("<unknown>");
729 println!(
730 "Opened preview for {} (frame {}) -> {} ({})",
731 display_uri,
732 frame.id,
733 display_path.display(),
734 mime
735 );
736 Ok(())
737}
738
739fn maybe_trim_with_ffmpeg(
740 source: &Path,
741 extension: &str,
742 bounds: &PreviewBounds,
743) -> Result<Option<PathBuf>> {
744 if bounds.start_ms.is_none() && bounds.end_ms.is_none() {
745 return Ok(None);
746 }
747
748 let ffmpeg = match which::which("ffmpeg") {
749 Ok(path) => path,
750 Err(_) => {
751 warn!("ffmpeg binary not found on PATH; opening full video");
752 return Ok(None);
753 }
754 };
755
756 let target = std::env::temp_dir().join(format!(
757 "memvid-preview-clip-{}.{}",
758 Uuid::new_v4(),
759 extension
760 ));
761
762 let mut command = Command::new(ffmpeg);
763 command.arg("-y");
764 if let Some(start) = bounds.start_ms {
765 command.arg("-ss").arg(format_timestamp_ms(start));
766 }
767 command.arg("-i").arg(source);
768 if let Some(end) = bounds.end_ms {
769 command.arg("-to").arg(format_timestamp_ms(end));
770 }
771 command.arg("-c").arg("copy");
772 command.arg(&target);
773
774 let status = command
775 .status()
776 .context("failed to run ffmpeg for preview trimming")?;
777 if status.success() {
778 return Ok(Some(target));
779 }
780
781 let details = status
782 .code()
783 .map(|code| code.to_string())
784 .unwrap_or_else(|| "terminated".to_string());
785 warn!("ffmpeg exited with status {details}; opening full video");
786 Ok(None)
787}
788
789fn preview_frame_image(mem: &mut Memvid, frame: &Frame, cli_uri: Option<&str>) -> Result<()> {
790 let bytes = mem
791 .frame_canonical_payload(frame.id)
792 .context("failed to load canonical payload for frame")?;
793 if bytes.is_empty() {
794 bail!("frame payload is empty; nothing to preview");
795 }
796
797 let detected_kind = infer::get(&bytes);
798 let mut mime = frame
799 .metadata
800 .as_ref()
801 .and_then(|meta| meta.mime.clone())
802 .filter(|value| is_image_mime(value));
803
804 if mime.is_none() {
805 if let Some(kind) = &detected_kind {
806 let candidate = kind.mime_type();
807 if is_image_mime(candidate) {
808 mime = Some(candidate.to_string());
809 }
810 }
811 }
812
813 let mime = mime.ok_or_else(|| anyhow!("frame does not contain an image payload"))?;
814 if !is_image_mime(&mime) {
815 bail!("frame mime type {mime} is not an image");
816 }
817
818 let extension = detected_kind
819 .as_ref()
820 .map(|kind| kind.extension().to_string())
821 .or_else(|| extension_from_mime(&mime).map(|ext| ext.to_string()))
822 .unwrap_or_else(|| "img".to_string());
823
824 let suffix = format!(".{extension}");
825 let mut temp_file = Builder::new()
826 .prefix("memvid-preview-")
827 .suffix(&suffix)
828 .tempfile_in(std::env::temp_dir())
829 .context("failed to create temporary preview file")?;
830 temp_file
831 .write_all(&bytes)
832 .context("failed to write image data to preview file")?;
833 temp_file
834 .flush()
835 .context("failed to flush preview file to disk")?;
836
837 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
838 drop(file);
839
840 println!("Opening preview...");
841 open::that(&preview_path).with_context(|| {
842 format!(
843 "failed to launch default image viewer for {}",
844 preview_path.display()
845 )
846 })?;
847
848 let display_uri = cli_uri
849 .or_else(|| frame.uri.as_deref())
850 .unwrap_or("<unknown>");
851 println!(
852 "Opened preview for {} (frame {}) -> {} ({})",
853 display_uri,
854 frame.id,
855 preview_path.display(),
856 mime
857 );
858 Ok(())
859}
860
861fn preview_frame_document(
862 mem: &mut Memvid,
863 frame: &Frame,
864 cli_uri: Option<&str>,
865 manifest: Option<&MediaManifest>,
866 mime: &str,
867) -> Result<()> {
868 let bytes = mem
869 .frame_canonical_payload(frame.id)
870 .context("failed to load canonical payload for frame")?;
871 if bytes.is_empty() {
872 bail!("frame payload is empty; nothing to preview");
873 }
874
875 let mut extension = manifest
876 .and_then(|m| m.filename.as_deref())
877 .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
878 .map(|ext| ext.trim_start_matches('.').to_string())
879 .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
880 .unwrap_or_else(|| "bin".to_string());
881
882 if extension == "bin" && std::str::from_utf8(&bytes).is_ok() {
883 extension = "txt".to_string();
884 }
885
886 let suffix = format!(".{extension}");
887 let mut temp_file = Builder::new()
888 .prefix("memvid-preview-")
889 .suffix(&suffix)
890 .tempfile_in(std::env::temp_dir())
891 .context("failed to create temporary preview file")?;
892 temp_file
893 .write_all(&bytes)
894 .context("failed to write document data to preview file")?;
895 temp_file
896 .flush()
897 .context("failed to flush preview file to disk")?;
898
899 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
900 drop(file);
901
902 println!("Opening preview...");
903 open::that(&preview_path).with_context(|| {
904 format!(
905 "failed to launch default viewer for {}",
906 preview_path.display()
907 )
908 })?;
909
910 let display_uri = cli_uri
911 .or_else(|| frame.uri.as_deref())
912 .unwrap_or("<unknown>");
913 println!(
914 "Opened preview for {} (frame {}) -> {} ({})",
915 display_uri,
916 frame.id,
917 preview_path.display(),
918 mime
919 );
920 Ok(())
921}
922
923fn preview_frame_audio_file(
924 mem: &mut Memvid,
925 frame: &Frame,
926 cli_uri: Option<&str>,
927 manifest: Option<&MediaManifest>,
928 mime: &str,
929) -> Result<()> {
930 let bytes = mem
931 .frame_canonical_payload(frame.id)
932 .context("failed to load canonical payload for frame")?;
933 if bytes.is_empty() {
934 bail!("frame payload is empty; nothing to preview");
935 }
936
937 let mut extension = manifest
938 .and_then(|m| m.filename.as_deref())
939 .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
940 .map(|ext| ext.trim_start_matches('.').to_string())
941 .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
942 .unwrap_or_else(|| "audio".to_string());
943
944 if extension == "bin" {
945 extension = "audio".to_string();
946 }
947
948 let suffix = format!(".{extension}");
949 let mut temp_file = Builder::new()
950 .prefix("memvid-preview-")
951 .suffix(&suffix)
952 .tempfile_in(std::env::temp_dir())
953 .context("failed to create temporary preview file")?;
954 temp_file
955 .write_all(&bytes)
956 .context("failed to write audio data to preview file")?;
957 temp_file
958 .flush()
959 .context("failed to flush preview file to disk")?;
960
961 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
962 drop(file);
963
964 println!("Opening preview...");
965 open::that(&preview_path).with_context(|| {
966 format!(
967 "failed to launch default audio player for {}",
968 preview_path.display()
969 )
970 })?;
971
972 let display_uri = cli_uri
973 .or_else(|| frame.uri.as_deref())
974 .unwrap_or("<unknown>");
975 println!(
976 "Opened preview for {} (frame {}) -> {} ({})",
977 display_uri,
978 frame.id,
979 preview_path.display(),
980 mime
981 );
982 Ok(())
983}
984
985#[cfg(feature = "audio-playback")]
986fn play_frame_audio(
987 mem: &mut Memvid,
988 frame: &Frame,
989 start_seconds: Option<f32>,
990 end_seconds: Option<f32>,
991) -> Result<()> {
992 use rodio::Source;
993
994 if let (Some(start), Some(end)) = (start_seconds, end_seconds) {
995 if end <= start {
996 bail!("--end-seconds must be greater than --start-seconds");
997 }
998 }
999
1000 let bytes = mem
1001 .frame_canonical_payload(frame.id)
1002 .context("failed to load canonical payload for frame")?;
1003 if bytes.is_empty() {
1004 bail!("frame payload is empty; nothing to play");
1005 }
1006
1007 let start = start_seconds.unwrap_or(0.0).max(0.0);
1008 let duration_meta = frame
1009 .metadata
1010 .as_ref()
1011 .and_then(|meta| meta.audio.as_ref())
1012 .and_then(|audio| audio.duration_secs)
1013 .unwrap_or(0.0);
1014
1015 if duration_meta > 0.0 && start >= duration_meta {
1016 bail!("start-seconds ({start:.2}) exceeds audio duration ({duration_meta:.2})");
1017 }
1018
1019 if let Some(end) = end_seconds {
1020 if duration_meta > 0.0 && end > duration_meta + f32::EPSILON {
1021 warn!(
1022 "requested end-seconds {:.2} exceeds known duration {:.2}; clamping",
1023 end, duration_meta
1024 );
1025 }
1026 }
1027
1028 let cursor = Cursor::new(bytes);
1029 let decoder = rodio::Decoder::new(cursor).context("failed to decode audio stream")?;
1030 let (_stream, stream_handle) =
1031 rodio::OutputStream::try_default().context("failed to open default audio output")?;
1032 let sink = rodio::Sink::try_new(&stream_handle).context("failed to create audio sink")?;
1033 let display_uri = frame.uri.as_deref().unwrap_or("<unknown>");
1034
1035 if let Some(end) = end_seconds {
1036 let effective_end = if duration_meta > 0.0 {
1037 end.min(duration_meta)
1038 } else {
1039 end
1040 };
1041 let duration = (effective_end - start).max(0.0);
1042 if duration <= 0.0 {
1043 bail!("playback duration is zero; adjust start/end seconds");
1044 }
1045 let source = decoder
1046 .skip_duration(Duration::from_secs_f32(start))
1047 .take_duration(Duration::from_secs_f32(duration));
1048 sink.append(source);
1049 let segment_desc = format!("{start:.2}s → {effective_end:.2}s");
1050 announce_playback(display_uri, &segment_desc);
1051 } else {
1052 let source = decoder.skip_duration(Duration::from_secs_f32(start));
1053 sink.append(source);
1054 let segment_desc = format!("{start:.2}s → end");
1055 announce_playback(display_uri, &segment_desc);
1056 }
1057 sink.sleep_until_end();
1058 Ok(())
1059}
1060
1061#[cfg(feature = "audio-playback")]
1062fn announce_playback(uri: &str, segment_desc: &str) {
1063 println!("Playing {uri} ({segment_desc})");
1064}
1065
1066fn is_image_mime(value: &str) -> bool {
1067 let normalized = value.split(';').next().unwrap_or(value).trim();
1068 normalized.to_ascii_lowercase().starts_with("image/")
1069}
1070
1071fn is_audio_mime(value: &str) -> bool {
1072 let normalized = value.split(';').next().unwrap_or(value).trim();
1073 normalized.to_ascii_lowercase().starts_with("audio/")
1074}
1075
1076pub fn extension_from_mime(mime: &str) -> Option<&'static str> {
1077 let normalized = mime
1078 .split(';')
1079 .next()
1080 .unwrap_or(mime)
1081 .trim()
1082 .to_ascii_lowercase();
1083 match normalized.as_str() {
1084 "image/jpeg" | "image/jpg" => Some("jpg"),
1085 "image/png" => Some("png"),
1086 "image/gif" => Some("gif"),
1087 "image/webp" => Some("webp"),
1088 "image/bmp" => Some("bmp"),
1089 "image/tiff" => Some("tiff"),
1090 "image/x-icon" | "image/vnd.microsoft.icon" => Some("ico"),
1091 "image/svg+xml" => Some("svg"),
1092 "video/mp4" | "video/iso.segment" => Some("mp4"),
1093 "video/quicktime" => Some("mov"),
1094 "video/webm" => Some("webm"),
1095 "video/x-matroska" | "video/matroska" => Some("mkv"),
1096 "video/x-msvideo" => Some("avi"),
1097 "video/mpeg" => Some("mpg"),
1098 "application/pdf" => Some("pdf"),
1099 "audio/mpeg" | "audio/mp3" => Some("mp3"),
1100 "audio/wav" | "audio/x-wav" => Some("wav"),
1101 "audio/x-flac" | "audio/flac" => Some("flac"),
1102 "audio/ogg" | "audio/vorbis" => Some("ogg"),
1103 "audio/x-m4a" | "audio/mp4" => Some("m4a"),
1104 "audio/aac" => Some("aac"),
1105 "audio/x-aiff" | "audio/aiff" => Some("aiff"),
1106 "text/plain" => Some("txt"),
1107 "text/markdown" | "text/x-markdown" => Some("md"),
1108 "text/html" => Some("html"),
1109 "application/xhtml+xml" => Some("xhtml"),
1110 "application/json" | "text/json" | "application/vnd.api+json" => Some("json"),
1111 "application/xml" | "text/xml" => Some("xml"),
1112 "text/csv" | "application/csv" => Some("csv"),
1113 "application/javascript" | "text/javascript" => Some("js"),
1114 "text/css" => Some("css"),
1115 "application/yaml" | "application/x-yaml" | "text/yaml" => Some("yaml"),
1116 "application/rtf" => Some("rtf"),
1117 "application/msword" => Some("doc"),
1118 "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => Some("docx"),
1119 "application/vnd.ms-powerpoint" => Some("ppt"),
1120 "application/vnd.openxmlformats-officedocument.presentationml.presentation" => Some("pptx"),
1121 "application/vnd.ms-excel" => Some("xls"),
1122 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => Some("xlsx"),
1123 "application/zip" => Some("zip"),
1124 "application/x-tar" => Some("tar"),
1125 "application/x-7z-compressed" => Some("7z"),
1126 _ => None,
1127 }
1128}
1129pub fn search_snippet(text: Option<&String>) -> Option<String> {
1130 text.and_then(|value| {
1131 let trimmed = value.trim();
1132 if trimmed.is_empty() {
1133 None
1134 } else {
1135 Some(trimmed.chars().take(160).collect())
1136 }
1137 })
1138}
1139pub fn frame_to_json(frame: &Frame) -> serde_json::Value {
1140 json!({
1141 "id": frame.id,
1142 "status": frame_status_str(frame.status),
1143 "timestamp": frame.timestamp,
1144 "kind": frame.kind,
1145 "track": frame.track,
1146 "uri": frame.uri,
1147 "title": frame.title,
1148 "payload_length": frame.payload_length,
1149 "canonical_encoding": format!("{:?}", frame.canonical_encoding),
1150 "canonical_length": frame.canonical_length,
1151 "role": format!("{:?}", frame.role),
1152 "parent_id": frame.parent_id,
1153 "chunk_index": frame.chunk_index,
1154 "chunk_count": frame.chunk_count,
1155 "tags": frame.tags,
1156 "labels": frame.labels,
1157 "search_text": frame.search_text,
1158 "metadata": frame.metadata,
1159 "extra_metadata": frame.extra_metadata,
1160 "content_dates": frame.content_dates,
1161 "chunk_manifest": frame.chunk_manifest,
1162 "supersedes": frame.supersedes,
1163 "superseded_by": frame.superseded_by,
1164 })
1165}
1166pub fn print_frame_summary(mem: &mut Memvid, frame: &Frame) -> Result<()> {
1167 println!("Frame {} [{}]", frame.id, frame_status_str(frame.status));
1168 println!("Timestamp: {}", frame.timestamp);
1169 if let Some(uri) = &frame.uri {
1170 println!("URI: {uri}");
1171 }
1172 if let Some(title) = &frame.title {
1173 println!("Title: {title}");
1174 }
1175 if let Some(kind) = &frame.kind {
1176 println!("Kind: {kind}");
1177 }
1178 if let Some(track) = &frame.track {
1179 println!("Track: {track}");
1180 }
1181 if let Some(supersedes) = frame.supersedes {
1182 println!("Supersedes frame: {supersedes}");
1183 }
1184 if let Some(successor) = frame.superseded_by {
1185 println!("Superseded by frame: {successor}");
1186 }
1187 println!(
1188 "Payload: {} bytes (canonical {:?}, logical {:?})",
1189 frame.payload_length, frame.canonical_encoding, frame.canonical_length
1190 );
1191 if !frame.tags.is_empty() {
1192 println!("Tags: {}", frame.tags.join(", "));
1193 }
1194 if !frame.labels.is_empty() {
1195 println!("Labels: {}", frame.labels.join(", "));
1196 }
1197 if let Some(snippet) = search_snippet(frame.search_text.as_ref()) {
1198 println!("Search text: {snippet}");
1199 }
1200 if let Some(meta) = &frame.metadata {
1201 let rendered = serde_json::to_string_pretty(meta)?;
1202 println!("Metadata: {rendered}");
1203 }
1204 if !frame.extra_metadata.is_empty() {
1205 let mut entries: Vec<_> = frame.extra_metadata.iter().collect();
1206 entries.sort_by(|a, b| a.0.cmp(b.0));
1207 println!("Extra metadata:");
1208 for (key, value) in entries {
1209 println!(" {key}: {value}");
1210 }
1211 }
1212 if !frame.content_dates.is_empty() {
1213 println!("Content dates: {}", frame.content_dates.join(", "));
1214 }
1215 match mem.frame_embedding(frame.id) {
1216 Ok(Some(embedding)) => println!("Embedding: {} dimensions", embedding.len()),
1217 Ok(None) => println!("Embedding: none"),
1218 Err(err) => println!("Embedding: unavailable ({err})"),
1219 }
1220 Ok(())
1221}
1222fn canonical_text_for_view(mem: &mut Memvid, frame: &Frame) -> Result<String> {
1223 let bytes = mem.frame_canonical_payload(frame.id)?;
1224 let raw = match String::from_utf8(bytes) {
1225 Ok(text) => text,
1226 Err(err) => {
1227 let bytes = err.into_bytes();
1228 String::from_utf8_lossy(&bytes).into_owned()
1229 }
1230 };
1231
1232 Ok(normalize_text(&raw, usize::MAX)
1233 .map(|n| n.text)
1234 .unwrap_or_default())
1235}
1236
1237fn manifests_match_text(text: &str, manifest: &TextChunkManifest) -> bool {
1238 if manifest.chunk_chars == 0 || manifest.chunks.is_empty() {
1239 return false;
1240 }
1241 let total_chars = text.chars().count();
1242 manifest
1243 .chunks
1244 .iter()
1245 .all(|chunk| chunk.start <= chunk.end && chunk.end <= total_chars)
1246}
1247
1248fn canonical_manifest_from_frame(text: &str, frame: &Frame) -> Option<TextChunkManifest> {
1249 let primary = frame
1250 .chunk_manifest
1251 .clone()
1252 .filter(|manifest| manifests_match_text(text, manifest));
1253 if primary.is_some() {
1254 return primary;
1255 }
1256
1257 frame
1258 .extra_metadata
1259 .get(CHUNK_MANIFEST_KEY)
1260 .and_then(|raw| serde_json::from_str::<TextChunkManifest>(raw).ok())
1261 .filter(|manifest| manifests_match_text(text, manifest))
1262}
1263
1264fn compute_chunk_manifest(text: &str, chunk_chars: usize) -> TextChunkManifest {
1265 let normalized = normalize_text(text, usize::MAX)
1266 .map(|n| n.text)
1267 .unwrap_or_default();
1268
1269 let effective_chunk = chunk_chars.max(1);
1270 let total_chars = normalized.chars().count();
1271 if total_chars == 0 {
1272 return TextChunkManifest {
1273 chunk_chars: effective_chunk,
1274 chunks: vec![TextChunkRange { start: 0, end: 0 }],
1275 };
1276 }
1277 if total_chars <= effective_chunk {
1278 return TextChunkManifest {
1279 chunk_chars: effective_chunk,
1280 chunks: vec![TextChunkRange {
1281 start: 0,
1282 end: total_chars,
1283 }],
1284 };
1285 }
1286 let mut chunks = Vec::new();
1287 let mut start = 0usize;
1288 while start < total_chars {
1289 let end = (start + effective_chunk).min(total_chars);
1290 chunks.push(TextChunkRange { start, end });
1291 start = end;
1292 }
1293 TextChunkManifest {
1294 chunk_chars: effective_chunk,
1295 chunks,
1296 }
1297}
1298
1299fn extract_chunk_slice(text: &str, range: &TextChunkRange) -> String {
1300 if range.start >= range.end || text.is_empty() {
1301 return String::new();
1302 }
1303 let mut start_byte = text.len();
1304 let mut end_byte = text.len();
1305 let mut idx = 0usize;
1306 for (byte_offset, _) in text.char_indices() {
1307 if idx == range.start {
1308 start_byte = byte_offset;
1309 }
1310 if idx == range.end {
1311 end_byte = byte_offset;
1312 break;
1313 }
1314 idx += 1;
1315 }
1316 if start_byte == text.len() {
1317 return String::new();
1318 }
1319 if end_byte == text.len() {
1320 end_byte = text.len();
1321 }
1322 text[start_byte..end_byte].to_string()
1323}