1#[cfg(feature = "audio-playback")]
4use std::io::Cursor;
5use std::io::{self, Write};
6use std::path::{Path, PathBuf};
7use std::process::Command;
8#[cfg(feature = "audio-playback")]
9use std::time::Duration;
10
11use anyhow::{anyhow, bail, Context, Result};
12use clap::Args;
13use memvid_core::table::list_tables;
14use memvid_core::{
15 lockfile, normalize_text, Frame, FrameRole, MediaManifest, Memvid, TextChunkManifest,
16 TextChunkRange,
17};
18use serde_json::{json, Value};
19use tempfile::Builder;
20use tracing::warn;
21use uuid::Uuid;
22use hex;
23
24use crate::config::CliConfig;
25use crate::utils::{
26 format_bytes, format_percent, format_timestamp_ms, frame_status_str, open_read_only_mem,
27 owner_hint_to_json, parse_timecode, round_percent, select_frame, yes_no,
28};
29
30const DEFAULT_VIEW_PAGE_CHARS: usize = 1_200;
31const CHUNK_MANIFEST_KEY: &str = "memvid_chunks_v1";
32
33#[derive(Args)]
35pub struct ViewArgs {
36 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
37 pub file: PathBuf,
38 #[arg(long = "frame-id", value_name = "ID", conflicts_with = "uri")]
39 pub frame_id: Option<u64>,
40 #[arg(long, value_name = "URI", conflicts_with = "frame_id")]
41 pub uri: Option<String>,
42 #[arg(long)]
43 pub json: bool,
44 #[arg(long, conflicts_with = "json")]
45 pub binary: bool,
46 #[arg(long, conflicts_with_all = ["json", "binary"])]
47 pub preview: bool,
48 #[arg(
50 long = "start",
51 value_name = "HH:MM:SS",
52 requires = "preview",
53 conflicts_with_all = ["json", "binary", "play"]
54 )]
55 pub preview_start: Option<String>,
56 #[arg(
58 long = "end",
59 value_name = "HH:MM:SS",
60 requires = "preview",
61 conflicts_with_all = ["json", "binary", "play"]
62 )]
63 pub preview_end: Option<String>,
64 #[arg(long = "play", conflicts_with_all = ["json", "binary", "preview"])]
65 pub play: bool,
66 #[arg(long = "start-seconds", requires = "play")]
67 pub start_seconds: Option<f32>,
68 #[arg(long = "end-seconds", requires = "play")]
69 pub end_seconds: Option<f32>,
70 #[arg(long, value_name = "N", default_value_t = 1)]
71 pub page: usize,
72 #[arg(long = "page-size", value_name = "CHARS")]
73 pub page_size: Option<usize>,
74}
75
76#[derive(Args)]
78pub struct StatsArgs {
79 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
80 pub file: PathBuf,
81 #[arg(long)]
82 pub json: bool,
83 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
85 pub as_of_frame: Option<u64>,
86 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
88 pub as_of_ts: Option<i64>,
89}
90
91#[derive(Args)]
93pub struct WhoArgs {
94 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
95 pub file: PathBuf,
96 #[arg(long)]
97 pub json: bool,
98}
99
100pub fn handle_stats(_config: &CliConfig, args: StatsArgs) -> Result<()> {
102 let mut mem = Memvid::open_read_only(&args.file)?;
103 let stats = mem.stats()?;
104 let tables = list_tables(&mut mem).unwrap_or_default();
105 let vec_dimension = mem.effective_vec_index_dimension()?;
106 let embedding_identity = mem.embedding_identity_summary(10_000);
107
108 if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
111 eprintln!("Note: Replay filtering (--as-of-frame/--as-of-ts) shows current stats.");
112 eprintln!(" Use 'find' or 'timeline' commands for filtered results.");
113 }
114 let overhead_bytes = stats.size_bytes.saturating_sub(stats.payload_bytes);
115 let payload_share_percent: f64 = if stats.size_bytes > 0 {
116 round_percent((stats.payload_bytes as f64 / stats.size_bytes as f64) * 100.0)
117 } else {
118 0.0
119 };
120 let overhead_share_percent: f64 = if stats.size_bytes > 0 {
121 round_percent((100.0 - payload_share_percent).max(0.0))
122 } else {
123 0.0
124 };
125 let maintenance_command = format!(
126 "memvid doctor {} --vacuum --rebuild-time-index --rebuild-lex-index",
127 args.file.display()
128 );
129
130 if args.json {
131 let mut raw_json = serde_json::to_value(&stats)?;
132 if let Value::Object(ref mut obj) = raw_json {
133 obj.remove("tier");
134 }
135
136 let tables_json: Vec<serde_json::Value> = tables
138 .iter()
139 .map(|t| {
140 json!({
141 "table_id": t.table_id,
142 "source_file": t.source_file,
143 "n_rows": t.n_rows,
144 "n_cols": t.n_cols,
145 "pages": format!("{}-{}", t.page_start, t.page_end),
146 "quality": format!("{:?}", t.quality),
147 "headers": t.headers,
148 })
149 })
150 .collect();
151
152 let embedding_quality_json = if stats.has_vec_index {
154 mem.embedding_quality().ok().flatten().map(|eq| {
155 json!({
156 "vector_count": eq.vector_count,
157 "dimension": eq.dimension,
158 "avg_similarity": eq.avg_similarity,
159 "min_similarity": eq.min_similarity,
160 "max_similarity": eq.max_similarity,
161 "std_similarity": eq.std_similarity,
162 "clustering_coefficient": eq.clustering_coefficient,
163 "estimated_clusters": eq.estimated_clusters,
164 "recommended_threshold": eq.recommended_threshold,
165 "quality_rating": eq.quality_rating,
166 "quality_explanation": eq.quality_explanation,
167 })
168 })
169 } else {
170 None
171 };
172
173 let embedding_identity_json = match &embedding_identity {
174 memvid_core::EmbeddingIdentitySummary::Unknown => Value::Null,
175 memvid_core::EmbeddingIdentitySummary::Single(identity) => json!({
176 "provider": identity.provider.as_deref(),
177 "model": identity.model.as_deref(),
178 "dimension": identity.dimension.or(vec_dimension),
179 "normalized": identity.normalized,
180 }),
181 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
182 let values: Vec<Value> = identities
183 .iter()
184 .map(|entry| {
185 json!({
186 "provider": entry.identity.provider.as_deref(),
187 "model": entry.identity.model.as_deref(),
188 "dimension": entry.identity.dimension.or(vec_dimension),
189 "normalized": entry.identity.normalized,
190 "count": entry.count,
191 })
192 })
193 .collect();
194 json!({ "mixed": values })
195 }
196 };
197
198 let enrichment_stats = mem.enrichment_stats();
200 let enrichment_json = json!({
201 "total_frames": enrichment_stats.total_frames,
202 "enriched_frames": enrichment_stats.enriched_frames,
203 "pending_frames": enrichment_stats.pending_frames,
204 "searchable_only": enrichment_stats.searchable_only,
205 });
206
207 let report = json!({
208 "summary": {
209 "sequence": stats.seq_no,
210 "frames": format!("{} total ({} active)", stats.frame_count, stats.active_frame_count),
211 "usage": format!(
212 "{} used / {} total ({})",
213 format_bytes(stats.size_bytes),
214 format_bytes(stats.capacity_bytes),
215 format_percent(stats.storage_utilisation_percent)
216 ),
217 "remaining": format!("{} free", format_bytes(stats.remaining_capacity_bytes)),
218 },
219 "storage": {
220 "payload": format!("{} ({})", format_bytes(stats.payload_bytes), format_percent(payload_share_percent)),
221 "overhead": format!("{} ({}) - WAL + indexes", format_bytes(overhead_bytes), format_percent(overhead_share_percent)),
222 "logical_payload": format!("{} before compression", format_bytes(stats.logical_bytes)),
223 "compression_savings": format!("{} saved ({})", format_bytes(stats.saved_bytes), format_percent(stats.savings_percent)),
224 "compression_ratio": format_percent(stats.compression_ratio_percent),
225 },
226 "frames": {
227 "average_stored": format_bytes(stats.average_frame_payload_bytes),
228 "average_logical": format_bytes(stats.average_frame_logical_bytes),
229 "clip_images": stats.clip_image_count,
230 },
231 "indexes": {
232 "lexical": yes_no(stats.has_lex_index),
233 "vector": yes_no(stats.has_vec_index),
234 "time": yes_no(stats.has_time_index),
235 },
236 "enrichment": enrichment_json,
237 "embedding_identity": embedding_identity_json,
238 "embedding_quality": embedding_quality_json,
239 "tables": {
240 "count": tables.len(),
241 "tables": tables_json,
242 },
243 "maintenance": maintenance_command,
244 "raw": raw_json,
245 });
246
247 println!("{}", serde_json::to_string_pretty(&report)?);
248 } else {
249 let seq_display = stats
250 .seq_no
251 .map(|seq| seq.to_string())
252 .unwrap_or_else(|| "n/a".to_string());
253
254 println!("Memory: {}", args.file.display());
255 println!("Sequence: {}", seq_display);
256 println!(
257 "Frames: {} total ({} active)",
258 stats.frame_count, stats.active_frame_count
259 );
260
261 println!("\nCapacity:");
262 println!(
263 " Usage: {} used / {} total ({})",
264 format_bytes(stats.size_bytes),
265 format_bytes(stats.capacity_bytes),
266 format_percent(stats.storage_utilisation_percent)
267 );
268 println!(
269 " Remaining: {}",
270 format_bytes(stats.remaining_capacity_bytes)
271 );
272
273 println!("\nStorage breakdown:");
274 println!(
275 " Payload: {} ({})",
276 format_bytes(stats.payload_bytes),
277 format_percent(payload_share_percent)
278 );
279 println!(
280 " Overhead: {} ({})",
281 format_bytes(overhead_bytes),
282 format_percent(overhead_share_percent)
283 );
284 println!(" ├─ WAL: {}", format_bytes(stats.wal_bytes));
286 println!(
287 " ├─ Lexical index: {}",
288 format_bytes(stats.lex_index_bytes)
289 );
290 println!(
291 " ├─ Vector index: {}",
292 format_bytes(stats.vec_index_bytes)
293 );
294 println!(
295 " └─ Time index: {}",
296 format_bytes(stats.time_index_bytes)
297 );
298 println!(
299 " Logical payload: {} before compression",
300 format_bytes(stats.logical_bytes)
301 );
302
303 if stats.has_vec_index {
304 println!("\nEmbeddings:");
305 if let Some(dim) = vec_dimension {
306 println!(" Dimension: {}", dim);
307 }
308 match &embedding_identity {
309 memvid_core::EmbeddingIdentitySummary::Unknown => {
310 println!(" Model: unknown (no persisted embedding identity)");
311 }
312 memvid_core::EmbeddingIdentitySummary::Single(identity) => {
313 if let Some(provider) = identity.provider.as_deref() {
314 println!(" Provider: {}", provider);
315 }
316 if let Some(model) = identity.model.as_deref() {
317 println!(" Model: {}", model);
318 }
319 }
320 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
321 println!(" Model: mixed ({} identities detected)", identities.len());
322 for entry in identities.iter().take(5) {
323 let provider = entry.identity.provider.as_deref().unwrap_or("unknown");
324 let model = entry.identity.model.as_deref().unwrap_or("unknown");
325 println!(" - {} / {} ({} frames)", provider, model, entry.count);
326 }
327 if identities.len() > 5 {
328 println!(" - ...");
329 }
330 }
331 }
332 }
333 println!(
334 " Compression savings: {} ({})",
335 format_bytes(stats.saved_bytes),
336 format_percent(stats.savings_percent)
337 );
338
339 println!("\nAverage frame:");
340 println!(
341 " Stored: {} Logical: {}",
342 format_bytes(stats.average_frame_payload_bytes),
343 format_bytes(stats.average_frame_logical_bytes)
344 );
345 if stats.clip_image_count > 0 {
346 println!(" CLIP images: {}", stats.clip_image_count);
347 }
348
349 if stats.active_frame_count > 0 {
351 let overhead_per_doc = overhead_bytes / stats.active_frame_count;
352 let lex_per_doc = stats.lex_index_bytes / stats.active_frame_count;
353 let vec_per_doc = stats.vec_index_bytes / stats.active_frame_count;
354
355 println!("\nPer-document overhead:");
356 println!(" Total: {}", format_bytes(overhead_per_doc));
357 if stats.has_lex_index {
358 println!(" Lexical: {}", format_bytes(lex_per_doc));
359 }
360 if stats.has_vec_index {
361 let vec_ratio = if stats.average_frame_payload_bytes > 0 {
362 vec_per_doc as f64 / stats.average_frame_payload_bytes as f64
363 } else {
364 0.0
365 };
366 println!(
367 " Vector: {} ({:.0}x text size)",
368 format_bytes(vec_per_doc),
369 vec_ratio
370 );
371 }
372 }
373
374 println!("\nIndexes:");
375 println!(
376 " Lexical: {} Vector: {} Time: {}",
377 yes_no(stats.has_lex_index),
378 yes_no(stats.has_vec_index),
379 yes_no(stats.has_time_index)
380 );
381
382 let enrichment_stats = mem.enrichment_stats();
384 if enrichment_stats.pending_frames > 0 || enrichment_stats.searchable_only > 0 {
385 println!("\nEnrichment:");
386 println!(
387 " Enriched: {} / {}",
388 enrichment_stats.enriched_frames, enrichment_stats.total_frames
389 );
390 if enrichment_stats.pending_frames > 0 {
391 println!(" Pending: {} frames", enrichment_stats.pending_frames);
392 println!(
393 " Run `memvid process-queue {}` to complete enrichment",
394 args.file.display()
395 );
396 }
397 }
398
399 if stats.has_vec_index {
401 if let Ok(Some(eq)) = mem.embedding_quality() {
402 println!("\nEmbedding Quality:");
403 println!(
404 " Vectors: {} Dimension: {}",
405 eq.vector_count, eq.dimension
406 );
407 println!(
408 " Similarity: avg={:.3} min={:.3} max={:.3} std={:.3}",
409 eq.avg_similarity, eq.min_similarity, eq.max_similarity, eq.std_similarity
410 );
411 println!(
412 " Clusters: ~{} Quality: {}",
413 eq.estimated_clusters, eq.quality_rating
414 );
415 println!(
416 " Recommended --min-relevancy: {:.1}",
417 eq.recommended_threshold
418 );
419 println!(" {}", eq.quality_explanation);
420 }
421 }
422
423 if !tables.is_empty() {
424 println!("\nTables: {} extracted", tables.len());
425 for t in &tables {
426 println!(
427 " {} — {} rows × {} cols ({})",
428 t.table_id, t.n_rows, t.n_cols, t.source_file
429 );
430 }
431 }
432
433 println!("\nMaintenance:");
434 println!(
435 " Run `{}` to rebuild indexes and reclaim space.",
436 maintenance_command
437 );
438 }
439 Ok(())
440}
441
442pub fn handle_who(args: WhoArgs) -> Result<()> {
444 match lockfile::current_owner(&args.file)? {
445 Some(owner) => {
446 if args.json {
447 let output = json!({
448 "locked": true,
449 "owner": owner_hint_to_json(&owner),
450 });
451 println!("{}", serde_json::to_string_pretty(&output)?);
452 } else {
453 println!("{} is locked by:", args.file.display());
454 if let Some(pid) = owner.pid {
455 println!(" pid: {pid}");
456 }
457 if let Some(cmd) = owner.cmd.as_deref() {
458 println!(" cmd: {cmd}");
459 }
460 if let Some(started) = owner.started_at.as_deref() {
461 println!(" started_at: {started}");
462 }
463 if let Some(last) = owner.last_heartbeat.as_deref() {
464 println!(" last_heartbeat: {last}");
465 }
466 if let Some(interval) = owner.heartbeat_ms {
467 println!(" heartbeat_interval_ms: {interval}");
468 }
469 if let Some(file_id) = owner.file_id.as_deref() {
470 println!(" file_id: {file_id}");
471 }
472 if let Some(path) = owner.file_path.as_ref() {
473 println!(" file_path: {}", path.display());
474 }
475 }
476 }
477 None => {
478 if args.json {
479 let output = json!({"locked": false});
480 println!("{}", serde_json::to_string_pretty(&output)?);
481 } else {
482 println!("No active writer for {}", args.file.display());
483 }
484 }
485 }
486 Ok(())
487}
488
489pub fn handle_view(args: ViewArgs) -> Result<()> {
495 if args.page == 0 {
496 bail!("page must be greater than zero");
497 }
498 if let Some(size) = args.page_size {
499 if size == 0 {
500 bail!("page-size must be greater than zero");
501 }
502 }
503
504 let mut mem = open_read_only_mem(&args.file)?;
505 let frame = select_frame(&mut mem, args.frame_id, args.uri.as_deref())?;
506
507 if args.play {
508 #[cfg(feature = "audio-playback")]
509 {
510 play_frame_audio(&mut mem, &frame, args.start_seconds, args.end_seconds)?;
511 return Ok(());
512 }
513 #[cfg(not(feature = "audio-playback"))]
514 {
515 bail!("Audio playback requires the 'audio-playback' feature (only available on macOS)");
516 }
517 }
518
519 if args.preview {
520 let bounds = parse_preview_bounds(args.preview_start.as_ref(), args.preview_end.as_ref())?;
521 preview_frame_media(&mut mem, &frame, args.uri.as_deref(), bounds)?;
522 return Ok(());
523 }
524
525 if args.binary {
526 let bytes = mem.frame_canonical_payload(frame.id)?;
527 let mut stdout = io::stdout();
528 stdout.write_all(&bytes)?;
529 stdout.flush()?;
530 return Ok(());
531 }
532
533 let canonical_text = canonical_text_for_view(&mut mem, &frame)?;
534 let manifest_from_meta = canonical_manifest_from_frame(&canonical_text, &frame);
535
536 let page_size = args
537 .page_size
538 .or_else(|| manifest_from_meta.as_ref().map(|m| m.chunk_chars))
539 .unwrap_or(DEFAULT_VIEW_PAGE_CHARS);
540
541 let mut manifest = if args.page_size.is_none() {
542 manifest_from_meta.unwrap_or_else(|| compute_chunk_manifest(&canonical_text, page_size))
543 } else {
544 compute_chunk_manifest(&canonical_text, page_size)
545 };
546 if manifest.chunks.is_empty() {
547 manifest = TextChunkManifest {
548 chunk_chars: page_size,
549 chunks: vec![TextChunkRange {
550 start: 0,
551 end: canonical_text.chars().count(),
552 }],
553 };
554 }
555
556 if frame.role == FrameRole::DocumentChunk && args.page_size.is_none() {
557 let total_chars = canonical_text.chars().count();
558 manifest = TextChunkManifest {
559 chunk_chars: total_chars.max(1),
560 chunks: vec![TextChunkRange {
561 start: 0,
562 end: total_chars,
563 }],
564 };
565 }
566
567 let total_pages = manifest.chunks.len().max(1);
568 if args.page > total_pages {
569 bail!(
570 "page {} is out of range (total pages: {})",
571 args.page,
572 total_pages
573 );
574 }
575
576 let chunk = &manifest.chunks[args.page - 1];
577 let content = extract_chunk_slice(&canonical_text, chunk);
578
579 if args.json {
580 let mut frame_json = frame_to_json(&frame);
581 if let Some(obj) = frame_json.as_object_mut() {
582 if let Some(manifest_json) = obj.get_mut("chunk_manifest") {
585 if let Some(manifest_obj) = manifest_json.as_object_mut() {
586 let total = manifest.chunks.len();
587 if total > 0 {
588 let mut window = serde_json::Map::new();
589 let idx = args.page.saturating_sub(1).min(total - 1);
590 if idx > 0 {
591 let prev = &manifest.chunks[idx - 1];
592 window.insert("prev".into(), json!([prev.start, prev.end]));
593 }
594 let current = &manifest.chunks[idx];
595 window.insert("current".into(), json!([current.start, current.end]));
596 if idx + 1 < total {
597 let next = &manifest.chunks[idx + 1];
598 window.insert("next".into(), json!([next.start, next.end]));
599 }
600 manifest_obj.insert("chunks".into(), Value::Object(window));
601 }
602 }
603 }
604 }
605 let json = json!({
606 "frame": frame_json,
607 "page": args.page,
608 "page_size": manifest.chunk_chars,
609 "page_count": total_pages,
610 "has_prev": args.page > 1,
611 "has_next": args.page < total_pages,
612 "content": content,
613 });
614 println!("{}", serde_json::to_string_pretty(&json)?);
615 } else {
616 print_frame_summary(&mut mem, &frame)?;
617 println!(
618 "Page {}/{} ({} chars per page)",
619 args.page, total_pages, manifest.chunk_chars
620 );
621 println!();
622 println!("{}", content);
623 }
624 Ok(())
625}
626
627#[derive(Debug)]
628pub struct PreviewBounds {
629 pub start_ms: Option<u64>,
630 pub end_ms: Option<u64>,
631}
632
633pub fn parse_preview_bounds(
634 start: Option<&String>,
635 end: Option<&String>,
636) -> Result<Option<PreviewBounds>> {
637 let start_ms = match start {
638 Some(value) => Some(parse_timecode(value)?),
639 None => None,
640 };
641 let end_ms = match end {
642 Some(value) => Some(parse_timecode(value)?),
643 None => None,
644 };
645
646 if let (Some(s), Some(e)) = (start_ms, end_ms) {
647 if e <= s {
648 anyhow::bail!("--end must be greater than --start");
649 }
650 }
651
652 if start_ms.is_none() && end_ms.is_none() {
653 Ok(None)
654 } else {
655 Ok(Some(PreviewBounds { start_ms, end_ms }))
656 }
657}
658
659fn preview_frame_media(
660 mem: &mut Memvid,
661 frame: &Frame,
662 cli_uri: Option<&str>,
663 bounds: Option<PreviewBounds>,
664) -> Result<()> {
665 let manifest = mem.media_manifest(frame.id)?;
666 let mut mime = manifest
667 .as_ref()
668 .map(|m| m.mime.clone())
669 .or_else(|| frame.metadata.as_ref().and_then(|meta| meta.mime.clone()))
670 .unwrap_or_else(|| "application/octet-stream".to_string());
671
672 if mime == "application/octet-stream" {
674 if let Ok(bytes) = mem.frame_canonical_payload(frame.id) {
675 if let Some(kind) = infer::get(&bytes) {
676 mime = kind.mime_type().to_string();
677 }
678 }
679 }
680
681 let is_video = manifest
682 .as_ref()
683 .map(|media| media.kind.eq_ignore_ascii_case("video"))
684 .unwrap_or_else(|| mime.starts_with("video/"));
685
686 if is_video {
687 preview_frame_video(mem, frame, cli_uri, bounds, manifest, &mime)?;
688 } else {
689 if bounds.is_some() {
690 anyhow::bail!("--start/--end are only supported for video previews");
691 }
692 if is_image_mime(&mime) {
693 preview_frame_image(mem, frame, cli_uri)?;
694 } else if is_audio_mime(&mime) {
695 preview_frame_audio_file(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
696 } else {
697 preview_frame_document(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
698 }
699 }
700 Ok(())
701}
702
703fn preview_frame_video(
704 mem: &mut Memvid,
705 frame: &Frame,
706 cli_uri: Option<&str>,
707 bounds: Option<PreviewBounds>,
708 manifest: Option<MediaManifest>,
709 mime: &str,
710) -> Result<()> {
711 let extension = manifest
712 .as_ref()
713 .and_then(|m| m.filename.as_deref())
714 .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
715 .map(|ext| ext.trim_start_matches('.').to_ascii_lowercase())
716 .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
717 .unwrap_or_else(|| "mp4".to_string());
718
719 let mut temp_file = Builder::new()
720 .prefix("memvid-preview-")
721 .suffix(&format!(".{extension}"))
722 .tempfile_in(std::env::temp_dir())
723 .context("failed to create temporary preview file")?;
724
725 let mut reader = mem
726 .blob_reader(frame.id)
727 .context("failed to stream payload for preview")?;
728 io::copy(&mut reader, &mut temp_file).context("failed to write video data to preview file")?;
729 temp_file
730 .flush()
731 .context("failed to flush video preview to disk")?;
732
733 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
734 drop(file);
735
736 let mut display_path = preview_path.clone();
737 if let Some(ref span) = bounds {
738 let needs_trim = span.start_ms.is_some() || span.end_ms.is_some();
739 if needs_trim {
740 if let Some(trimmed) = maybe_trim_with_ffmpeg(&preview_path, &extension, span)? {
741 display_path = trimmed;
742 }
743 }
744 }
745
746 println!("Opening preview...");
747 open::that(&display_path).with_context(|| {
748 format!(
749 "failed to launch default video player for {}",
750 display_path.display()
751 )
752 })?;
753
754 let display_uri = cli_uri
755 .or_else(|| frame.uri.as_deref())
756 .unwrap_or("<unknown>");
757 println!(
758 "Opened preview for {} (frame {}) -> {} ({})",
759 display_uri,
760 frame.id,
761 display_path.display(),
762 mime
763 );
764 Ok(())
765}
766
767fn maybe_trim_with_ffmpeg(
768 source: &Path,
769 extension: &str,
770 bounds: &PreviewBounds,
771) -> Result<Option<PathBuf>> {
772 if bounds.start_ms.is_none() && bounds.end_ms.is_none() {
773 return Ok(None);
774 }
775
776 let ffmpeg = match which::which("ffmpeg") {
777 Ok(path) => path,
778 Err(_) => {
779 warn!("ffmpeg binary not found on PATH; opening full video");
780 return Ok(None);
781 }
782 };
783
784 let target = std::env::temp_dir().join(format!(
785 "memvid-preview-clip-{}.{}",
786 Uuid::new_v4(),
787 extension
788 ));
789
790 let mut command = Command::new(ffmpeg);
791 command.arg("-y");
792 if let Some(start) = bounds.start_ms {
793 command.arg("-ss").arg(format_timestamp_ms(start));
794 }
795 command.arg("-i").arg(source);
796 if let Some(end) = bounds.end_ms {
797 command.arg("-to").arg(format_timestamp_ms(end));
798 }
799 command.arg("-c").arg("copy");
800 command.arg(&target);
801
802 let status = command
803 .status()
804 .context("failed to run ffmpeg for preview trimming")?;
805 if status.success() {
806 return Ok(Some(target));
807 }
808
809 let details = status
810 .code()
811 .map(|code| code.to_string())
812 .unwrap_or_else(|| "terminated".to_string());
813 warn!("ffmpeg exited with status {details}; opening full video");
814 Ok(None)
815}
816
817fn preview_frame_image(mem: &mut Memvid, frame: &Frame, cli_uri: Option<&str>) -> Result<()> {
818 let bytes = mem
819 .frame_canonical_payload(frame.id)
820 .context("failed to load canonical payload for frame")?;
821 if bytes.is_empty() {
822 bail!("frame payload is empty; nothing to preview");
823 }
824
825 let detected_kind = infer::get(&bytes);
826 let mut mime = frame
827 .metadata
828 .as_ref()
829 .and_then(|meta| meta.mime.clone())
830 .filter(|value| is_image_mime(value));
831
832 if mime.is_none() {
833 if let Some(kind) = &detected_kind {
834 let candidate = kind.mime_type();
835 if is_image_mime(candidate) {
836 mime = Some(candidate.to_string());
837 }
838 }
839 }
840
841 let mime = mime.ok_or_else(|| anyhow!("frame does not contain an image payload"))?;
842 if !is_image_mime(&mime) {
843 bail!("frame mime type {mime} is not an image");
844 }
845
846 let extension = detected_kind
847 .as_ref()
848 .map(|kind| kind.extension().to_string())
849 .or_else(|| extension_from_mime(&mime).map(|ext| ext.to_string()))
850 .unwrap_or_else(|| "img".to_string());
851
852 let suffix = format!(".{extension}");
853 let mut temp_file = Builder::new()
854 .prefix("memvid-preview-")
855 .suffix(&suffix)
856 .tempfile_in(std::env::temp_dir())
857 .context("failed to create temporary preview file")?;
858 temp_file
859 .write_all(&bytes)
860 .context("failed to write image data to preview file")?;
861 temp_file
862 .flush()
863 .context("failed to flush preview file to disk")?;
864
865 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
866 drop(file);
867
868 println!("Opening preview...");
869 open::that(&preview_path).with_context(|| {
870 format!(
871 "failed to launch default image viewer for {}",
872 preview_path.display()
873 )
874 })?;
875
876 let display_uri = cli_uri
877 .or_else(|| frame.uri.as_deref())
878 .unwrap_or("<unknown>");
879 println!(
880 "Opened preview for {} (frame {}) -> {} ({})",
881 display_uri,
882 frame.id,
883 preview_path.display(),
884 mime
885 );
886 Ok(())
887}
888
889fn preview_frame_document(
890 mem: &mut Memvid,
891 frame: &Frame,
892 cli_uri: Option<&str>,
893 manifest: Option<&MediaManifest>,
894 mime: &str,
895) -> Result<()> {
896 let display_uri = cli_uri
897 .or_else(|| frame.uri.as_deref())
898 .unwrap_or("<unknown>");
899
900 if let Some(source_path) = &frame.source_path {
903 let source = Path::new(source_path);
904 if source.exists() {
905 println!("Opening preview...");
906 open::that(source).with_context(|| {
907 format!("failed to launch default viewer for {}", source.display())
908 })?;
909 println!(
910 "Opened preview for {} (frame {}) -> {} ({})",
911 display_uri, frame.id, source_path, mime
912 );
913 return Ok(());
914 } else {
915 warn!(
916 "Original source file no longer exists: {}. Falling back to extracted content.",
917 source_path
918 );
919 }
920 }
921
922 let bytes = mem
924 .frame_canonical_payload(frame.id)
925 .context("failed to load canonical payload for frame")?;
926 if bytes.is_empty() {
927 bail!("frame payload is empty; nothing to preview");
928 }
929
930 let mut extension = manifest
931 .and_then(|m| m.filename.as_deref())
932 .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
933 .map(|ext| ext.trim_start_matches('.').to_string())
934 .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
935 .unwrap_or_else(|| "bin".to_string());
936
937 if frame.chunk_manifest.is_some() {
939 extension = "txt".to_string();
940 } else if extension == "bin" && std::str::from_utf8(&bytes).is_ok() {
941 extension = "txt".to_string();
942 }
943
944 let suffix = format!(".{extension}");
945 let mut temp_file = Builder::new()
946 .prefix("memvid-preview-")
947 .suffix(&suffix)
948 .tempfile_in(std::env::temp_dir())
949 .context("failed to create temporary preview file")?;
950 temp_file
951 .write_all(&bytes)
952 .context("failed to write document data to preview file")?;
953 temp_file
954 .flush()
955 .context("failed to flush preview file to disk")?;
956
957 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
958 drop(file);
959
960 println!("Opening preview...");
961 open::that(&preview_path).with_context(|| {
962 format!(
963 "failed to launch default viewer for {}",
964 preview_path.display()
965 )
966 })?;
967
968 println!(
969 "Opened preview for {} (frame {}) -> {} ({})",
970 display_uri,
971 frame.id,
972 preview_path.display(),
973 if frame.chunk_manifest.is_some() {
974 "text/plain (extracted)"
975 } else {
976 mime
977 }
978 );
979 Ok(())
980}
981
982fn preview_frame_audio_file(
983 mem: &mut Memvid,
984 frame: &Frame,
985 cli_uri: Option<&str>,
986 manifest: Option<&MediaManifest>,
987 mime: &str,
988) -> Result<()> {
989 let bytes = mem
990 .frame_canonical_payload(frame.id)
991 .context("failed to load canonical payload for frame")?;
992 if bytes.is_empty() {
993 bail!("frame payload is empty; nothing to preview");
994 }
995
996 let mut extension = manifest
997 .and_then(|m| m.filename.as_deref())
998 .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
999 .map(|ext| ext.trim_start_matches('.').to_string())
1000 .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
1001 .unwrap_or_else(|| "audio".to_string());
1002
1003 if extension == "bin" {
1004 extension = "audio".to_string();
1005 }
1006
1007 let suffix = format!(".{extension}");
1008 let mut temp_file = Builder::new()
1009 .prefix("memvid-preview-")
1010 .suffix(&suffix)
1011 .tempfile_in(std::env::temp_dir())
1012 .context("failed to create temporary preview file")?;
1013 temp_file
1014 .write_all(&bytes)
1015 .context("failed to write audio data to preview file")?;
1016 temp_file
1017 .flush()
1018 .context("failed to flush preview file to disk")?;
1019
1020 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
1021 drop(file);
1022
1023 println!("Opening preview...");
1024 open::that(&preview_path).with_context(|| {
1025 format!(
1026 "failed to launch default audio player for {}",
1027 preview_path.display()
1028 )
1029 })?;
1030
1031 let display_uri = cli_uri
1032 .or_else(|| frame.uri.as_deref())
1033 .unwrap_or("<unknown>");
1034 println!(
1035 "Opened preview for {} (frame {}) -> {} ({})",
1036 display_uri,
1037 frame.id,
1038 preview_path.display(),
1039 mime
1040 );
1041 Ok(())
1042}
1043
1044#[cfg(feature = "audio-playback")]
1045fn play_frame_audio(
1046 mem: &mut Memvid,
1047 frame: &Frame,
1048 start_seconds: Option<f32>,
1049 end_seconds: Option<f32>,
1050) -> Result<()> {
1051 use rodio::Source;
1052
1053 if let (Some(start), Some(end)) = (start_seconds, end_seconds) {
1054 if end <= start {
1055 bail!("--end-seconds must be greater than --start-seconds");
1056 }
1057 }
1058
1059 let bytes = mem
1060 .frame_canonical_payload(frame.id)
1061 .context("failed to load canonical payload for frame")?;
1062 if bytes.is_empty() {
1063 bail!("frame payload is empty; nothing to play");
1064 }
1065
1066 let start = start_seconds.unwrap_or(0.0).max(0.0);
1067 let duration_meta = frame
1068 .metadata
1069 .as_ref()
1070 .and_then(|meta| meta.audio.as_ref())
1071 .and_then(|audio| audio.duration_secs)
1072 .unwrap_or(0.0);
1073
1074 if duration_meta > 0.0 && start >= duration_meta {
1075 bail!("start-seconds ({start:.2}) exceeds audio duration ({duration_meta:.2})");
1076 }
1077
1078 if let Some(end) = end_seconds {
1079 if duration_meta > 0.0 && end > duration_meta + f32::EPSILON {
1080 warn!(
1081 "requested end-seconds {:.2} exceeds known duration {:.2}; clamping",
1082 end, duration_meta
1083 );
1084 }
1085 }
1086
1087 let cursor = Cursor::new(bytes);
1088 let decoder = rodio::Decoder::new(cursor).context("failed to decode audio stream")?;
1089 let (_stream, stream_handle) =
1090 rodio::OutputStream::try_default().context("failed to open default audio output")?;
1091 let sink = rodio::Sink::try_new(&stream_handle).context("failed to create audio sink")?;
1092 let display_uri = frame.uri.as_deref().unwrap_or("<unknown>");
1093
1094 if let Some(end) = end_seconds {
1095 let effective_end = if duration_meta > 0.0 {
1096 end.min(duration_meta)
1097 } else {
1098 end
1099 };
1100 let duration = (effective_end - start).max(0.0);
1101 if duration <= 0.0 {
1102 bail!("playback duration is zero; adjust start/end seconds");
1103 }
1104 let source = decoder
1105 .skip_duration(Duration::from_secs_f32(start))
1106 .take_duration(Duration::from_secs_f32(duration));
1107 sink.append(source);
1108 let segment_desc = format!("{start:.2}s → {effective_end:.2}s");
1109 announce_playback(display_uri, &segment_desc);
1110 } else {
1111 let source = decoder.skip_duration(Duration::from_secs_f32(start));
1112 sink.append(source);
1113 let segment_desc = format!("{start:.2}s → end");
1114 announce_playback(display_uri, &segment_desc);
1115 }
1116 sink.sleep_until_end();
1117 Ok(())
1118}
1119
1120#[cfg(feature = "audio-playback")]
1121fn announce_playback(uri: &str, segment_desc: &str) {
1122 println!("Playing {uri} ({segment_desc})");
1123}
1124
1125fn is_image_mime(value: &str) -> bool {
1126 let normalized = value.split(';').next().unwrap_or(value).trim();
1127 normalized.to_ascii_lowercase().starts_with("image/")
1128}
1129
1130fn is_audio_mime(value: &str) -> bool {
1131 let normalized = value.split(';').next().unwrap_or(value).trim();
1132 normalized.to_ascii_lowercase().starts_with("audio/")
1133}
1134
1135pub fn extension_from_mime(mime: &str) -> Option<&'static str> {
1136 let normalized = mime
1137 .split(';')
1138 .next()
1139 .unwrap_or(mime)
1140 .trim()
1141 .to_ascii_lowercase();
1142 match normalized.as_str() {
1143 "image/jpeg" | "image/jpg" => Some("jpg"),
1144 "image/png" => Some("png"),
1145 "image/gif" => Some("gif"),
1146 "image/webp" => Some("webp"),
1147 "image/bmp" => Some("bmp"),
1148 "image/tiff" => Some("tiff"),
1149 "image/x-icon" | "image/vnd.microsoft.icon" => Some("ico"),
1150 "image/svg+xml" => Some("svg"),
1151 "video/mp4" | "video/iso.segment" => Some("mp4"),
1152 "video/quicktime" => Some("mov"),
1153 "video/webm" => Some("webm"),
1154 "video/x-matroska" | "video/matroska" => Some("mkv"),
1155 "video/x-msvideo" => Some("avi"),
1156 "video/mpeg" => Some("mpg"),
1157 "application/pdf" => Some("pdf"),
1158 "audio/mpeg" | "audio/mp3" => Some("mp3"),
1159 "audio/wav" | "audio/x-wav" => Some("wav"),
1160 "audio/x-flac" | "audio/flac" => Some("flac"),
1161 "audio/ogg" | "audio/vorbis" => Some("ogg"),
1162 "audio/x-m4a" | "audio/mp4" => Some("m4a"),
1163 "audio/aac" => Some("aac"),
1164 "audio/x-aiff" | "audio/aiff" => Some("aiff"),
1165 "text/plain" => Some("txt"),
1166 "text/markdown" | "text/x-markdown" => Some("md"),
1167 "text/html" => Some("html"),
1168 "application/xhtml+xml" => Some("xhtml"),
1169 "application/json" | "text/json" | "application/vnd.api+json" => Some("json"),
1170 "application/xml" | "text/xml" => Some("xml"),
1171 "text/csv" | "application/csv" => Some("csv"),
1172 "application/javascript" | "text/javascript" => Some("js"),
1173 "text/css" => Some("css"),
1174 "application/yaml" | "application/x-yaml" | "text/yaml" => Some("yaml"),
1175 "application/rtf" => Some("rtf"),
1176 "application/msword" => Some("doc"),
1177 "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => Some("docx"),
1178 "application/vnd.ms-powerpoint" => Some("ppt"),
1179 "application/vnd.openxmlformats-officedocument.presentationml.presentation" => Some("pptx"),
1180 "application/vnd.ms-excel" => Some("xls"),
1181 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => Some("xlsx"),
1182 "application/zip" => Some("zip"),
1183 "application/x-tar" => Some("tar"),
1184 "application/x-7z-compressed" => Some("7z"),
1185 _ => None,
1186 }
1187}
1188pub fn search_snippet(text: Option<&String>) -> Option<String> {
1189 text.and_then(|value| {
1190 let trimmed = value.trim();
1191 if trimmed.is_empty() {
1192 None
1193 } else {
1194 Some(trimmed.chars().take(160).collect())
1195 }
1196 })
1197}
1198pub fn frame_to_json(frame: &Frame) -> serde_json::Value {
1199 json!({
1200 "id": frame.id,
1201 "status": frame_status_str(frame.status),
1202 "timestamp": frame.timestamp,
1203 "kind": frame.kind,
1204 "track": frame.track,
1205 "uri": frame.uri,
1206 "title": frame.title,
1207 "payload_length": frame.payload_length,
1208 "canonical_encoding": format!("{:?}", frame.canonical_encoding),
1209 "canonical_length": frame.canonical_length,
1210 "role": format!("{:?}", frame.role),
1211 "parent_id": frame.parent_id,
1212 "chunk_index": frame.chunk_index,
1213 "chunk_count": frame.chunk_count,
1214 "tags": frame.tags,
1215 "labels": frame.labels,
1216 "search_text": frame.search_text,
1217 "metadata": frame.metadata,
1218 "extra_metadata": frame.extra_metadata,
1219 "content_dates": frame.content_dates,
1220 "chunk_manifest": frame.chunk_manifest,
1221 "supersedes": frame.supersedes,
1222 "superseded_by": frame.superseded_by,
1223 "source_sha256": frame.source_sha256.map(|h| hex::encode(h)),
1224 "source_path": frame.source_path,
1225 })
1226}
1227pub fn print_frame_summary(mem: &mut Memvid, frame: &Frame) -> Result<()> {
1228 println!("Frame {} [{}]", frame.id, frame_status_str(frame.status));
1229 println!("Timestamp: {}", frame.timestamp);
1230 if let Some(uri) = &frame.uri {
1231 println!("URI: {uri}");
1232 }
1233 if let Some(title) = &frame.title {
1234 println!("Title: {title}");
1235 }
1236 if let Some(kind) = &frame.kind {
1237 println!("Kind: {kind}");
1238 }
1239 if let Some(track) = &frame.track {
1240 println!("Track: {track}");
1241 }
1242 if let Some(supersedes) = frame.supersedes {
1243 println!("Supersedes frame: {supersedes}");
1244 }
1245 if let Some(successor) = frame.superseded_by {
1246 println!("Superseded by frame: {successor}");
1247 }
1248 println!(
1249 "Payload: {} bytes (canonical {:?}, logical {:?})",
1250 frame.payload_length, frame.canonical_encoding, frame.canonical_length
1251 );
1252 if !frame.tags.is_empty() {
1253 println!("Tags: {}", frame.tags.join(", "));
1254 }
1255 if !frame.labels.is_empty() {
1256 println!("Labels: {}", frame.labels.join(", "));
1257 }
1258 if let Some(snippet) = search_snippet(frame.search_text.as_ref()) {
1259 println!("Search text: {snippet}");
1260 }
1261 if let Some(meta) = &frame.metadata {
1262 let rendered = serde_json::to_string_pretty(meta)?;
1263 println!("Metadata: {rendered}");
1264 }
1265 if !frame.extra_metadata.is_empty() {
1266 let mut entries: Vec<_> = frame.extra_metadata.iter().collect();
1267 entries.sort_by(|a, b| a.0.cmp(b.0));
1268 println!("Extra metadata:");
1269 for (key, value) in entries {
1270 println!(" {key}: {value}");
1271 }
1272 }
1273 if !frame.content_dates.is_empty() {
1274 println!("Content dates: {}", frame.content_dates.join(", "));
1275 }
1276 if let Some(hash) = frame.source_sha256 {
1278 println!("Source SHA256: {} (raw binary not stored)", hex::encode(hash));
1279 if let Some(path) = &frame.source_path {
1280 println!("Source path: {path}");
1281 }
1282 }
1283 match mem.frame_embedding(frame.id) {
1284 Ok(Some(embedding)) => println!("Embedding: {} dimensions", embedding.len()),
1285 Ok(None) => println!("Embedding: none"),
1286 Err(err) => println!("Embedding: unavailable ({err})"),
1287 }
1288 Ok(())
1289}
1290fn canonical_text_for_view(mem: &mut Memvid, frame: &Frame) -> Result<String> {
1291 let bytes = mem.frame_canonical_payload(frame.id)?;
1292 let raw = match String::from_utf8(bytes) {
1293 Ok(text) => text,
1294 Err(err) => {
1295 let bytes = err.into_bytes();
1296 String::from_utf8_lossy(&bytes).into_owned()
1297 }
1298 };
1299
1300 Ok(normalize_text(&raw, usize::MAX)
1301 .map(|n| n.text)
1302 .unwrap_or_default())
1303}
1304
1305fn manifests_match_text(text: &str, manifest: &TextChunkManifest) -> bool {
1306 if manifest.chunk_chars == 0 || manifest.chunks.is_empty() {
1307 return false;
1308 }
1309 let total_chars = text.chars().count();
1310 manifest
1311 .chunks
1312 .iter()
1313 .all(|chunk| chunk.start <= chunk.end && chunk.end <= total_chars)
1314}
1315
1316fn canonical_manifest_from_frame(text: &str, frame: &Frame) -> Option<TextChunkManifest> {
1317 let primary = frame
1318 .chunk_manifest
1319 .clone()
1320 .filter(|manifest| manifests_match_text(text, manifest));
1321 if primary.is_some() {
1322 return primary;
1323 }
1324
1325 frame
1326 .extra_metadata
1327 .get(CHUNK_MANIFEST_KEY)
1328 .and_then(|raw| serde_json::from_str::<TextChunkManifest>(raw).ok())
1329 .filter(|manifest| manifests_match_text(text, manifest))
1330}
1331
1332fn compute_chunk_manifest(text: &str, chunk_chars: usize) -> TextChunkManifest {
1333 let normalized = normalize_text(text, usize::MAX)
1334 .map(|n| n.text)
1335 .unwrap_or_default();
1336
1337 let effective_chunk = chunk_chars.max(1);
1338 let total_chars = normalized.chars().count();
1339 if total_chars == 0 {
1340 return TextChunkManifest {
1341 chunk_chars: effective_chunk,
1342 chunks: vec![TextChunkRange { start: 0, end: 0 }],
1343 };
1344 }
1345 if total_chars <= effective_chunk {
1346 return TextChunkManifest {
1347 chunk_chars: effective_chunk,
1348 chunks: vec![TextChunkRange {
1349 start: 0,
1350 end: total_chars,
1351 }],
1352 };
1353 }
1354 let mut chunks = Vec::new();
1355 let mut start = 0usize;
1356 while start < total_chars {
1357 let end = (start + effective_chunk).min(total_chars);
1358 chunks.push(TextChunkRange { start, end });
1359 start = end;
1360 }
1361 TextChunkManifest {
1362 chunk_chars: effective_chunk,
1363 chunks,
1364 }
1365}
1366
1367fn extract_chunk_slice(text: &str, range: &TextChunkRange) -> String {
1368 if range.start >= range.end || text.is_empty() {
1369 return String::new();
1370 }
1371 let mut start_byte = text.len();
1372 let mut end_byte = text.len();
1373 let mut idx = 0usize;
1374 for (byte_offset, _) in text.char_indices() {
1375 if idx == range.start {
1376 start_byte = byte_offset;
1377 }
1378 if idx == range.end {
1379 end_byte = byte_offset;
1380 break;
1381 }
1382 idx += 1;
1383 }
1384 if start_byte == text.len() {
1385 return String::new();
1386 }
1387 if end_byte == text.len() {
1388 end_byte = text.len();
1389 }
1390 text[start_byte..end_byte].to_string()
1391}