1#[cfg(feature = "audio-playback")]
4use std::io::Cursor;
5use std::io::{self, Write};
6use std::path::{Path, PathBuf};
7use std::process::Command;
8#[cfg(feature = "audio-playback")]
9use std::time::Duration;
10
11use anyhow::{anyhow, bail, Context, Result};
12use clap::Args;
13use hex;
14use memvid_core::table::list_tables;
15use memvid_core::{
16 lockfile, normalize_text, Frame, FrameRole, MediaManifest, Memvid, TextChunkManifest,
17 TextChunkRange,
18};
19use serde_json::{json, Value};
20use tempfile::Builder;
21use tracing::warn;
22use uuid::Uuid;
23
24use crate::config::CliConfig;
25use crate::utils::{
26 format_bytes, format_percent, format_timestamp_ms, frame_status_str, open_read_only_mem,
27 owner_hint_to_json, parse_timecode, round_percent, select_frame, yes_no,
28};
29
30const DEFAULT_VIEW_PAGE_CHARS: usize = 1_200;
31const CHUNK_MANIFEST_KEY: &str = "memvid_chunks_v1";
32
33#[derive(Args)]
35pub struct ViewArgs {
36 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
37 pub file: PathBuf,
38 #[arg(long = "frame-id", value_name = "ID", conflicts_with = "uri")]
39 pub frame_id: Option<u64>,
40 #[arg(long, value_name = "URI", conflicts_with = "frame_id")]
41 pub uri: Option<String>,
42 #[arg(long)]
43 pub json: bool,
44 #[arg(long, conflicts_with = "json")]
45 pub binary: bool,
46 #[arg(long, conflicts_with_all = ["json", "binary"])]
47 pub preview: bool,
48 #[arg(
50 long = "start",
51 value_name = "HH:MM:SS",
52 requires = "preview",
53 conflicts_with_all = ["json", "binary", "play"]
54 )]
55 pub preview_start: Option<String>,
56 #[arg(
58 long = "end",
59 value_name = "HH:MM:SS",
60 requires = "preview",
61 conflicts_with_all = ["json", "binary", "play"]
62 )]
63 pub preview_end: Option<String>,
64 #[arg(long = "play", conflicts_with_all = ["json", "binary", "preview"])]
65 pub play: bool,
66 #[arg(long = "start-seconds", requires = "play")]
67 pub start_seconds: Option<f32>,
68 #[arg(long = "end-seconds", requires = "play")]
69 pub end_seconds: Option<f32>,
70 #[arg(long, value_name = "N", default_value_t = 1)]
71 pub page: usize,
72 #[arg(long = "page-size", value_name = "CHARS")]
73 pub page_size: Option<usize>,
74}
75
76#[derive(Args)]
78pub struct StatsArgs {
79 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
80 pub file: PathBuf,
81 #[arg(long)]
82 pub json: bool,
83 #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
85 pub as_of_frame: Option<u64>,
86 #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
88 pub as_of_ts: Option<i64>,
89}
90
91#[derive(Args)]
93pub struct WhoArgs {
94 #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
95 pub file: PathBuf,
96 #[arg(long)]
97 pub json: bool,
98}
99
100pub fn handle_stats(_config: &CliConfig, args: StatsArgs) -> Result<()> {
102 let mut mem = Memvid::open_read_only(&args.file)?;
103 let stats = mem.stats()?;
104 let tables = list_tables(&mut mem).unwrap_or_default();
105 let vec_dimension = mem.effective_vec_index_dimension()?;
106 let embedding_identity = mem.embedding_identity_summary(10_000);
107
108 if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
111 eprintln!("Note: Replay filtering (--as-of-frame/--as-of-ts) shows current stats.");
112 eprintln!(" Use 'find' or 'timeline' commands for filtered results.");
113 }
114 let overhead_bytes = stats.size_bytes.saturating_sub(stats.payload_bytes);
115 let payload_share_percent: f64 = if stats.size_bytes > 0 {
116 round_percent((stats.payload_bytes as f64 / stats.size_bytes as f64) * 100.0)
117 } else {
118 0.0
119 };
120 let overhead_share_percent: f64 = if stats.size_bytes > 0 {
121 round_percent((100.0 - payload_share_percent).max(0.0))
122 } else {
123 0.0
124 };
125 let maintenance_command = format!(
126 "memvid doctor {} --vacuum --rebuild-time-index --rebuild-lex-index",
127 args.file.display()
128 );
129
130 if args.json {
131 let mut raw_json = serde_json::to_value(&stats)?;
132 if let Value::Object(ref mut obj) = raw_json {
133 obj.remove("tier");
134 }
135
136 let tables_json: Vec<serde_json::Value> = tables
138 .iter()
139 .map(|t| {
140 json!({
141 "table_id": t.table_id,
142 "source_file": t.source_file,
143 "n_rows": t.n_rows,
144 "n_cols": t.n_cols,
145 "pages": format!("{}-{}", t.page_start, t.page_end),
146 "quality": format!("{:?}", t.quality),
147 "headers": t.headers,
148 })
149 })
150 .collect();
151
152 let embedding_quality_json = if stats.has_vec_index {
154 mem.embedding_quality().ok().flatten().map(|eq| {
155 json!({
156 "vector_count": eq.vector_count,
157 "dimension": eq.dimension,
158 "avg_similarity": eq.avg_similarity,
159 "min_similarity": eq.min_similarity,
160 "max_similarity": eq.max_similarity,
161 "std_similarity": eq.std_similarity,
162 "clustering_coefficient": eq.clustering_coefficient,
163 "estimated_clusters": eq.estimated_clusters,
164 "recommended_threshold": eq.recommended_threshold,
165 "quality_rating": eq.quality_rating,
166 "quality_explanation": eq.quality_explanation,
167 })
168 })
169 } else {
170 None
171 };
172
173 let embedding_identity_json = match &embedding_identity {
174 memvid_core::EmbeddingIdentitySummary::Unknown => Value::Null,
175 memvid_core::EmbeddingIdentitySummary::Single(identity) => json!({
176 "provider": identity.provider.as_deref(),
177 "model": identity.model.as_deref(),
178 "dimension": identity.dimension.or(vec_dimension),
179 "normalized": identity.normalized,
180 }),
181 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
182 let values: Vec<Value> = identities
183 .iter()
184 .map(|entry| {
185 json!({
186 "provider": entry.identity.provider.as_deref(),
187 "model": entry.identity.model.as_deref(),
188 "dimension": entry.identity.dimension.or(vec_dimension),
189 "normalized": entry.identity.normalized,
190 "count": entry.count,
191 })
192 })
193 .collect();
194 json!({ "mixed": values })
195 }
196 };
197
198 let enrichment_stats = mem.enrichment_stats();
200 let enrichment_json = json!({
201 "total_frames": enrichment_stats.total_frames,
202 "enriched_frames": enrichment_stats.enriched_frames,
203 "pending_frames": enrichment_stats.pending_frames,
204 "searchable_only": enrichment_stats.searchable_only,
205 });
206
207 let ticket = mem.current_ticket();
209 let ticket_json = json!({
210 "issuer": ticket.issuer,
211 "seq_no": ticket.seq_no,
212 "expires_in_secs": ticket.expires_in_secs,
213 "capacity_bytes": ticket.capacity_bytes,
214 "verified": ticket.verified,
215 });
216
217 let report = json!({
218 "summary": {
219 "sequence": stats.seq_no,
220 "frames": format!("{} total ({} active)", stats.frame_count, stats.active_frame_count),
221 "usage": format!(
222 "{} used / {} total ({})",
223 format_bytes(stats.size_bytes),
224 format_bytes(stats.capacity_bytes),
225 format_percent(stats.storage_utilisation_percent)
226 ),
227 "remaining": format!("{} free", format_bytes(stats.remaining_capacity_bytes)),
228 },
229 "storage": {
230 "payload": format!("{} ({})", format_bytes(stats.payload_bytes), format_percent(payload_share_percent)),
231 "overhead": format!("{} ({}) - WAL + indexes", format_bytes(overhead_bytes), format_percent(overhead_share_percent)),
232 "logical_payload": format!("{} before compression", format_bytes(stats.logical_bytes)),
233 "compression_savings": format!("{} saved ({})", format_bytes(stats.saved_bytes), format_percent(stats.savings_percent)),
234 "compression_ratio": format_percent(stats.compression_ratio_percent),
235 },
236 "frames": {
237 "average_stored": format_bytes(stats.average_frame_payload_bytes),
238 "average_logical": format_bytes(stats.average_frame_logical_bytes),
239 "clip_images": stats.clip_image_count,
240 },
241 "indexes": {
242 "lexical": yes_no(stats.has_lex_index),
243 "vector": yes_no(stats.has_vec_index),
244 "time": yes_no(stats.has_time_index),
245 },
246 "enrichment": enrichment_json,
247 "ticket": ticket_json,
248 "embedding_identity": embedding_identity_json,
249 "embedding_quality": embedding_quality_json,
250 "tables": {
251 "count": tables.len(),
252 "tables": tables_json,
253 },
254 "maintenance": maintenance_command,
255 "raw": raw_json,
256 });
257
258 println!("{}", serde_json::to_string_pretty(&report)?);
259 } else {
260 let seq_display = stats
261 .seq_no
262 .map(|seq| seq.to_string())
263 .unwrap_or_else(|| "n/a".to_string());
264
265 println!("Memory: {}", args.file.display());
266 println!("Sequence: {}", seq_display);
267 println!(
268 "Frames: {} total ({} active)",
269 stats.frame_count, stats.active_frame_count
270 );
271
272 println!("\nCapacity:");
273 println!(
274 " Usage: {} used / {} total ({})",
275 format_bytes(stats.size_bytes),
276 format_bytes(stats.capacity_bytes),
277 format_percent(stats.storage_utilisation_percent)
278 );
279 println!(
280 " Remaining: {}",
281 format_bytes(stats.remaining_capacity_bytes)
282 );
283
284 let ticket = mem.current_ticket();
286 if ticket.seq_no > 0 {
287 let verified_str = if ticket.verified {
288 "✓ verified"
289 } else {
290 "⚠ unverified"
291 };
292 println!(
293 " Ticket: seq={} issuer={} ({})",
294 ticket.seq_no, ticket.issuer, verified_str
295 );
296 }
297
298 println!("\nStorage breakdown:");
299 println!(
300 " Payload: {} ({})",
301 format_bytes(stats.payload_bytes),
302 format_percent(payload_share_percent)
303 );
304 println!(
305 " Overhead: {} ({})",
306 format_bytes(overhead_bytes),
307 format_percent(overhead_share_percent)
308 );
309 println!(" ├─ WAL: {}", format_bytes(stats.wal_bytes));
311 println!(
312 " ├─ Lexical index: {}",
313 format_bytes(stats.lex_index_bytes)
314 );
315 println!(
316 " ├─ Vector index: {}",
317 format_bytes(stats.vec_index_bytes)
318 );
319 println!(
320 " └─ Time index: {}",
321 format_bytes(stats.time_index_bytes)
322 );
323 println!(
324 " Logical payload: {} before compression",
325 format_bytes(stats.logical_bytes)
326 );
327
328 if stats.has_vec_index {
329 println!("\nEmbeddings:");
330 if let Some(dim) = vec_dimension {
331 println!(" Dimension: {}", dim);
332 }
333 match &embedding_identity {
334 memvid_core::EmbeddingIdentitySummary::Unknown => {
335 println!(" Model: unknown (no persisted embedding identity)");
336 }
337 memvid_core::EmbeddingIdentitySummary::Single(identity) => {
338 if let Some(provider) = identity.provider.as_deref() {
339 println!(" Provider: {}", provider);
340 }
341 if let Some(model) = identity.model.as_deref() {
342 println!(" Model: {}", model);
343 }
344 }
345 memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
346 println!(" Model: mixed ({} identities detected)", identities.len());
347 for entry in identities.iter().take(5) {
348 let provider = entry.identity.provider.as_deref().unwrap_or("unknown");
349 let model = entry.identity.model.as_deref().unwrap_or("unknown");
350 println!(" - {} / {} ({} frames)", provider, model, entry.count);
351 }
352 if identities.len() > 5 {
353 println!(" - ...");
354 }
355 }
356 }
357 }
358 println!(
359 " Compression savings: {} ({})",
360 format_bytes(stats.saved_bytes),
361 format_percent(stats.savings_percent)
362 );
363
364 println!("\nAverage frame:");
365 println!(
366 " Stored: {} Logical: {}",
367 format_bytes(stats.average_frame_payload_bytes),
368 format_bytes(stats.average_frame_logical_bytes)
369 );
370 if stats.clip_image_count > 0 {
371 println!(" CLIP images: {}", stats.clip_image_count);
372 }
373
374 if stats.active_frame_count > 0 {
376 let overhead_per_doc = overhead_bytes / stats.active_frame_count;
377 let lex_per_doc = stats.lex_index_bytes / stats.active_frame_count;
378 let vec_per_doc = stats.vec_index_bytes / stats.active_frame_count;
379
380 println!("\nPer-document overhead:");
381 println!(" Total: {}", format_bytes(overhead_per_doc));
382 if stats.has_lex_index {
383 println!(" Lexical: {}", format_bytes(lex_per_doc));
384 }
385 if stats.has_vec_index {
386 let vec_ratio = if stats.average_frame_payload_bytes > 0 {
387 vec_per_doc as f64 / stats.average_frame_payload_bytes as f64
388 } else {
389 0.0
390 };
391 println!(
392 " Vector: {} ({:.0}x text size)",
393 format_bytes(vec_per_doc),
394 vec_ratio
395 );
396 }
397 }
398
399 println!("\nIndexes:");
400 println!(
401 " Lexical: {} Vector: {} Time: {}",
402 yes_no(stats.has_lex_index),
403 yes_no(stats.has_vec_index),
404 yes_no(stats.has_time_index)
405 );
406
407 let enrichment_stats = mem.enrichment_stats();
409 if enrichment_stats.pending_frames > 0 || enrichment_stats.searchable_only > 0 {
410 println!("\nEnrichment:");
411 println!(
412 " Enriched: {} / {}",
413 enrichment_stats.enriched_frames, enrichment_stats.total_frames
414 );
415 if enrichment_stats.pending_frames > 0 {
416 println!(" Pending: {} frames", enrichment_stats.pending_frames);
417 println!(
418 " Run `memvid process-queue {}` to complete enrichment",
419 args.file.display()
420 );
421 }
422 }
423
424 if stats.has_vec_index {
426 if let Ok(Some(eq)) = mem.embedding_quality() {
427 println!("\nEmbedding Quality:");
428 println!(
429 " Vectors: {} Dimension: {}",
430 eq.vector_count, eq.dimension
431 );
432 println!(
433 " Similarity: avg={:.3} min={:.3} max={:.3} std={:.3}",
434 eq.avg_similarity, eq.min_similarity, eq.max_similarity, eq.std_similarity
435 );
436 println!(
437 " Clusters: ~{} Quality: {}",
438 eq.estimated_clusters, eq.quality_rating
439 );
440 println!(
441 " Recommended --min-relevancy: {:.1}",
442 eq.recommended_threshold
443 );
444 println!(" {}", eq.quality_explanation);
445 }
446 }
447
448 if !tables.is_empty() {
449 println!("\nTables: {} extracted", tables.len());
450 for t in &tables {
451 println!(
452 " {} — {} rows × {} cols ({})",
453 t.table_id, t.n_rows, t.n_cols, t.source_file
454 );
455 }
456 }
457
458 println!("\nMaintenance:");
459 println!(
460 " Run `{}` to rebuild indexes and reclaim space.",
461 maintenance_command
462 );
463 }
464 Ok(())
465}
466
467pub fn handle_who(args: WhoArgs) -> Result<()> {
469 match lockfile::current_owner(&args.file)? {
470 Some(owner) => {
471 if args.json {
472 let output = json!({
473 "locked": true,
474 "owner": owner_hint_to_json(&owner),
475 });
476 println!("{}", serde_json::to_string_pretty(&output)?);
477 } else {
478 println!("{} is locked by:", args.file.display());
479 if let Some(pid) = owner.pid {
480 println!(" pid: {pid}");
481 }
482 if let Some(cmd) = owner.cmd.as_deref() {
483 println!(" cmd: {cmd}");
484 }
485 if let Some(started) = owner.started_at.as_deref() {
486 println!(" started_at: {started}");
487 }
488 if let Some(last) = owner.last_heartbeat.as_deref() {
489 println!(" last_heartbeat: {last}");
490 }
491 if let Some(interval) = owner.heartbeat_ms {
492 println!(" heartbeat_interval_ms: {interval}");
493 }
494 if let Some(file_id) = owner.file_id.as_deref() {
495 println!(" file_id: {file_id}");
496 }
497 if let Some(path) = owner.file_path.as_ref() {
498 println!(" file_path: {}", path.display());
499 }
500 }
501 }
502 None => {
503 if args.json {
504 let output = json!({"locked": false});
505 println!("{}", serde_json::to_string_pretty(&output)?);
506 } else {
507 println!("No active writer for {}", args.file.display());
508 }
509 }
510 }
511 Ok(())
512}
513
514pub fn handle_view(args: ViewArgs) -> Result<()> {
520 if args.page == 0 {
521 bail!("page must be greater than zero");
522 }
523 if let Some(size) = args.page_size {
524 if size == 0 {
525 bail!("page-size must be greater than zero");
526 }
527 }
528
529 let mut mem = open_read_only_mem(&args.file)?;
530 let frame = select_frame(&mut mem, args.frame_id, args.uri.as_deref())?;
531
532 if args.play {
533 #[cfg(feature = "audio-playback")]
534 {
535 play_frame_audio(&mut mem, &frame, args.start_seconds, args.end_seconds)?;
536 return Ok(());
537 }
538 #[cfg(not(feature = "audio-playback"))]
539 {
540 bail!("Audio playback requires the 'audio-playback' feature (only available on macOS)");
541 }
542 }
543
544 if args.preview {
545 let bounds = parse_preview_bounds(args.preview_start.as_ref(), args.preview_end.as_ref())?;
546 preview_frame_media(&mut mem, &frame, args.uri.as_deref(), bounds)?;
547 return Ok(());
548 }
549
550 if args.binary {
551 let bytes = mem.frame_canonical_payload(frame.id)?;
552 let mut stdout = io::stdout();
553 stdout.write_all(&bytes)?;
554 stdout.flush()?;
555 return Ok(());
556 }
557
558 let canonical_text = canonical_text_for_view(&mut mem, &frame)?;
559 let manifest_from_meta = canonical_manifest_from_frame(&canonical_text, &frame);
560
561 let page_size = args
562 .page_size
563 .or_else(|| manifest_from_meta.as_ref().map(|m| m.chunk_chars))
564 .unwrap_or(DEFAULT_VIEW_PAGE_CHARS);
565
566 let mut manifest = if args.page_size.is_none() {
567 manifest_from_meta.unwrap_or_else(|| compute_chunk_manifest(&canonical_text, page_size))
568 } else {
569 compute_chunk_manifest(&canonical_text, page_size)
570 };
571 if manifest.chunks.is_empty() {
572 manifest = TextChunkManifest {
573 chunk_chars: page_size,
574 chunks: vec![TextChunkRange {
575 start: 0,
576 end: canonical_text.chars().count(),
577 }],
578 };
579 }
580
581 if frame.role == FrameRole::DocumentChunk && args.page_size.is_none() {
582 let total_chars = canonical_text.chars().count();
583 manifest = TextChunkManifest {
584 chunk_chars: total_chars.max(1),
585 chunks: vec![TextChunkRange {
586 start: 0,
587 end: total_chars,
588 }],
589 };
590 }
591
592 let total_pages = manifest.chunks.len().max(1);
593 if args.page > total_pages {
594 bail!(
595 "page {} is out of range (total pages: {})",
596 args.page,
597 total_pages
598 );
599 }
600
601 let chunk = &manifest.chunks[args.page - 1];
602 let content = extract_chunk_slice(&canonical_text, chunk);
603
604 if args.json {
605 let mut frame_json = frame_to_json(&frame);
606 if let Some(obj) = frame_json.as_object_mut() {
607 if let Some(manifest_json) = obj.get_mut("chunk_manifest") {
610 if let Some(manifest_obj) = manifest_json.as_object_mut() {
611 let total = manifest.chunks.len();
612 if total > 0 {
613 let mut window = serde_json::Map::new();
614 let idx = args.page.saturating_sub(1).min(total - 1);
615 if idx > 0 {
616 let prev = &manifest.chunks[idx - 1];
617 window.insert("prev".into(), json!([prev.start, prev.end]));
618 }
619 let current = &manifest.chunks[idx];
620 window.insert("current".into(), json!([current.start, current.end]));
621 if idx + 1 < total {
622 let next = &manifest.chunks[idx + 1];
623 window.insert("next".into(), json!([next.start, next.end]));
624 }
625 manifest_obj.insert("chunks".into(), Value::Object(window));
626 }
627 }
628 }
629 }
630 let json = json!({
631 "frame": frame_json,
632 "page": args.page,
633 "page_size": manifest.chunk_chars,
634 "page_count": total_pages,
635 "has_prev": args.page > 1,
636 "has_next": args.page < total_pages,
637 "content": content,
638 });
639 println!("{}", serde_json::to_string_pretty(&json)?);
640 } else {
641 print_frame_summary(&mut mem, &frame)?;
642 println!(
643 "Page {}/{} ({} chars per page)",
644 args.page, total_pages, manifest.chunk_chars
645 );
646 println!();
647 println!("{}", content);
648 }
649 Ok(())
650}
651
652#[derive(Debug)]
653pub struct PreviewBounds {
654 pub start_ms: Option<u64>,
655 pub end_ms: Option<u64>,
656}
657
658pub fn parse_preview_bounds(
659 start: Option<&String>,
660 end: Option<&String>,
661) -> Result<Option<PreviewBounds>> {
662 let start_ms = match start {
663 Some(value) => Some(parse_timecode(value)?),
664 None => None,
665 };
666 let end_ms = match end {
667 Some(value) => Some(parse_timecode(value)?),
668 None => None,
669 };
670
671 if let (Some(s), Some(e)) = (start_ms, end_ms) {
672 if e <= s {
673 anyhow::bail!("--end must be greater than --start");
674 }
675 }
676
677 if start_ms.is_none() && end_ms.is_none() {
678 Ok(None)
679 } else {
680 Ok(Some(PreviewBounds { start_ms, end_ms }))
681 }
682}
683
684fn preview_frame_media(
685 mem: &mut Memvid,
686 frame: &Frame,
687 cli_uri: Option<&str>,
688 bounds: Option<PreviewBounds>,
689) -> Result<()> {
690 let manifest = mem.media_manifest(frame.id)?;
691 let mut mime = manifest
692 .as_ref()
693 .map(|m| m.mime.clone())
694 .or_else(|| frame.metadata.as_ref().and_then(|meta| meta.mime.clone()))
695 .unwrap_or_else(|| "application/octet-stream".to_string());
696
697 if mime == "application/octet-stream" {
699 if let Ok(bytes) = mem.frame_canonical_payload(frame.id) {
700 if let Some(kind) = infer::get(&bytes) {
701 mime = kind.mime_type().to_string();
702 }
703 }
704 }
705
706 let is_video = manifest
707 .as_ref()
708 .map(|media| media.kind.eq_ignore_ascii_case("video"))
709 .unwrap_or_else(|| mime.starts_with("video/"));
710
711 if is_video {
712 preview_frame_video(mem, frame, cli_uri, bounds, manifest, &mime)?;
713 } else {
714 if bounds.is_some() {
715 anyhow::bail!("--start/--end are only supported for video previews");
716 }
717 if is_image_mime(&mime) {
718 preview_frame_image(mem, frame, cli_uri)?;
719 } else if is_audio_mime(&mime) {
720 preview_frame_audio_file(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
721 } else {
722 preview_frame_document(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
723 }
724 }
725 Ok(())
726}
727
728fn preview_frame_video(
729 mem: &mut Memvid,
730 frame: &Frame,
731 cli_uri: Option<&str>,
732 bounds: Option<PreviewBounds>,
733 manifest: Option<MediaManifest>,
734 mime: &str,
735) -> Result<()> {
736 let extension = manifest
737 .as_ref()
738 .and_then(|m| m.filename.as_deref())
739 .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
740 .map(|ext| ext.trim_start_matches('.').to_ascii_lowercase())
741 .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
742 .unwrap_or_else(|| "mp4".to_string());
743
744 let mut temp_file = Builder::new()
745 .prefix("memvid-preview-")
746 .suffix(&format!(".{extension}"))
747 .tempfile_in(std::env::temp_dir())
748 .context("failed to create temporary preview file")?;
749
750 let mut reader = mem
751 .blob_reader(frame.id)
752 .context("failed to stream payload for preview")?;
753 io::copy(&mut reader, &mut temp_file).context("failed to write video data to preview file")?;
754 temp_file
755 .flush()
756 .context("failed to flush video preview to disk")?;
757
758 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
759 drop(file);
760
761 let mut display_path = preview_path.clone();
762 if let Some(ref span) = bounds {
763 let needs_trim = span.start_ms.is_some() || span.end_ms.is_some();
764 if needs_trim {
765 if let Some(trimmed) = maybe_trim_with_ffmpeg(&preview_path, &extension, span)? {
766 display_path = trimmed;
767 }
768 }
769 }
770
771 println!("Opening preview...");
772 open::that(&display_path).with_context(|| {
773 format!(
774 "failed to launch default video player for {}",
775 display_path.display()
776 )
777 })?;
778
779 let display_uri = cli_uri
780 .or_else(|| frame.uri.as_deref())
781 .unwrap_or("<unknown>");
782 println!(
783 "Opened preview for {} (frame {}) -> {} ({})",
784 display_uri,
785 frame.id,
786 display_path.display(),
787 mime
788 );
789 Ok(())
790}
791
792fn maybe_trim_with_ffmpeg(
793 source: &Path,
794 extension: &str,
795 bounds: &PreviewBounds,
796) -> Result<Option<PathBuf>> {
797 if bounds.start_ms.is_none() && bounds.end_ms.is_none() {
798 return Ok(None);
799 }
800
801 let ffmpeg = match which::which("ffmpeg") {
802 Ok(path) => path,
803 Err(_) => {
804 warn!("ffmpeg binary not found on PATH; opening full video");
805 return Ok(None);
806 }
807 };
808
809 let target = std::env::temp_dir().join(format!(
810 "memvid-preview-clip-{}.{}",
811 Uuid::new_v4(),
812 extension
813 ));
814
815 let mut command = Command::new(ffmpeg);
816 command.arg("-y");
817 if let Some(start) = bounds.start_ms {
818 command.arg("-ss").arg(format_timestamp_ms(start));
819 }
820 command.arg("-i").arg(source);
821 if let Some(end) = bounds.end_ms {
822 command.arg("-to").arg(format_timestamp_ms(end));
823 }
824 command.arg("-c").arg("copy");
825 command.arg(&target);
826
827 let status = command
828 .status()
829 .context("failed to run ffmpeg for preview trimming")?;
830 if status.success() {
831 return Ok(Some(target));
832 }
833
834 let details = status
835 .code()
836 .map(|code| code.to_string())
837 .unwrap_or_else(|| "terminated".to_string());
838 warn!("ffmpeg exited with status {details}; opening full video");
839 Ok(None)
840}
841
842fn preview_frame_image(mem: &mut Memvid, frame: &Frame, cli_uri: Option<&str>) -> Result<()> {
843 let bytes = mem
844 .frame_canonical_payload(frame.id)
845 .context("failed to load canonical payload for frame")?;
846 if bytes.is_empty() {
847 bail!("frame payload is empty; nothing to preview");
848 }
849
850 let detected_kind = infer::get(&bytes);
851 let mut mime = frame
852 .metadata
853 .as_ref()
854 .and_then(|meta| meta.mime.clone())
855 .filter(|value| is_image_mime(value));
856
857 if mime.is_none() {
858 if let Some(kind) = &detected_kind {
859 let candidate = kind.mime_type();
860 if is_image_mime(candidate) {
861 mime = Some(candidate.to_string());
862 }
863 }
864 }
865
866 let mime = mime.ok_or_else(|| anyhow!("frame does not contain an image payload"))?;
867 if !is_image_mime(&mime) {
868 bail!("frame mime type {mime} is not an image");
869 }
870
871 let extension = detected_kind
872 .as_ref()
873 .map(|kind| kind.extension().to_string())
874 .or_else(|| extension_from_mime(&mime).map(|ext| ext.to_string()))
875 .unwrap_or_else(|| "img".to_string());
876
877 let suffix = format!(".{extension}");
878 let mut temp_file = Builder::new()
879 .prefix("memvid-preview-")
880 .suffix(&suffix)
881 .tempfile_in(std::env::temp_dir())
882 .context("failed to create temporary preview file")?;
883 temp_file
884 .write_all(&bytes)
885 .context("failed to write image data to preview file")?;
886 temp_file
887 .flush()
888 .context("failed to flush preview file to disk")?;
889
890 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
891 drop(file);
892
893 println!("Opening preview...");
894 open::that(&preview_path).with_context(|| {
895 format!(
896 "failed to launch default image viewer for {}",
897 preview_path.display()
898 )
899 })?;
900
901 let display_uri = cli_uri
902 .or_else(|| frame.uri.as_deref())
903 .unwrap_or("<unknown>");
904 println!(
905 "Opened preview for {} (frame {}) -> {} ({})",
906 display_uri,
907 frame.id,
908 preview_path.display(),
909 mime
910 );
911 Ok(())
912}
913
914fn preview_frame_document(
915 mem: &mut Memvid,
916 frame: &Frame,
917 cli_uri: Option<&str>,
918 manifest: Option<&MediaManifest>,
919 mime: &str,
920) -> Result<()> {
921 let display_uri = cli_uri
922 .or_else(|| frame.uri.as_deref())
923 .unwrap_or("<unknown>");
924
925 if let Some(source_path) = &frame.source_path {
928 let source = Path::new(source_path);
929 if source.exists() {
930 println!("Opening preview...");
931 open::that(source).with_context(|| {
932 format!("failed to launch default viewer for {}", source.display())
933 })?;
934 println!(
935 "Opened preview for {} (frame {}) -> {} ({})",
936 display_uri, frame.id, source_path, mime
937 );
938 return Ok(());
939 } else {
940 warn!(
941 "Original source file no longer exists: {}. Falling back to extracted content.",
942 source_path
943 );
944 }
945 }
946
947 let bytes = mem
949 .frame_canonical_payload(frame.id)
950 .context("failed to load canonical payload for frame")?;
951 if bytes.is_empty() {
952 bail!("frame payload is empty; nothing to preview");
953 }
954
955 let mut extension = manifest
956 .and_then(|m| m.filename.as_deref())
957 .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
958 .map(|ext| ext.trim_start_matches('.').to_string())
959 .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
960 .unwrap_or_else(|| "bin".to_string());
961
962 if frame.chunk_manifest.is_some() {
964 extension = "txt".to_string();
965 } else if extension == "bin" && std::str::from_utf8(&bytes).is_ok() {
966 extension = "txt".to_string();
967 }
968
969 let suffix = format!(".{extension}");
970 let mut temp_file = Builder::new()
971 .prefix("memvid-preview-")
972 .suffix(&suffix)
973 .tempfile_in(std::env::temp_dir())
974 .context("failed to create temporary preview file")?;
975 temp_file
976 .write_all(&bytes)
977 .context("failed to write document data to preview file")?;
978 temp_file
979 .flush()
980 .context("failed to flush preview file to disk")?;
981
982 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
983 drop(file);
984
985 println!("Opening preview...");
986 open::that(&preview_path).with_context(|| {
987 format!(
988 "failed to launch default viewer for {}",
989 preview_path.display()
990 )
991 })?;
992
993 println!(
994 "Opened preview for {} (frame {}) -> {} ({})",
995 display_uri,
996 frame.id,
997 preview_path.display(),
998 if frame.chunk_manifest.is_some() {
999 "text/plain (extracted)"
1000 } else {
1001 mime
1002 }
1003 );
1004 Ok(())
1005}
1006
1007fn preview_frame_audio_file(
1008 mem: &mut Memvid,
1009 frame: &Frame,
1010 cli_uri: Option<&str>,
1011 manifest: Option<&MediaManifest>,
1012 mime: &str,
1013) -> Result<()> {
1014 let bytes = mem
1015 .frame_canonical_payload(frame.id)
1016 .context("failed to load canonical payload for frame")?;
1017 if bytes.is_empty() {
1018 bail!("frame payload is empty; nothing to preview");
1019 }
1020
1021 let mut extension = manifest
1022 .and_then(|m| m.filename.as_deref())
1023 .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
1024 .map(|ext| ext.trim_start_matches('.').to_string())
1025 .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
1026 .unwrap_or_else(|| "audio".to_string());
1027
1028 if extension == "bin" {
1029 extension = "audio".to_string();
1030 }
1031
1032 let suffix = format!(".{extension}");
1033 let mut temp_file = Builder::new()
1034 .prefix("memvid-preview-")
1035 .suffix(&suffix)
1036 .tempfile_in(std::env::temp_dir())
1037 .context("failed to create temporary preview file")?;
1038 temp_file
1039 .write_all(&bytes)
1040 .context("failed to write audio data to preview file")?;
1041 temp_file
1042 .flush()
1043 .context("failed to flush preview file to disk")?;
1044
1045 let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
1046 drop(file);
1047
1048 println!("Opening preview...");
1049 open::that(&preview_path).with_context(|| {
1050 format!(
1051 "failed to launch default audio player for {}",
1052 preview_path.display()
1053 )
1054 })?;
1055
1056 let display_uri = cli_uri
1057 .or_else(|| frame.uri.as_deref())
1058 .unwrap_or("<unknown>");
1059 println!(
1060 "Opened preview for {} (frame {}) -> {} ({})",
1061 display_uri,
1062 frame.id,
1063 preview_path.display(),
1064 mime
1065 );
1066 Ok(())
1067}
1068
1069#[cfg(feature = "audio-playback")]
1070fn play_frame_audio(
1071 mem: &mut Memvid,
1072 frame: &Frame,
1073 start_seconds: Option<f32>,
1074 end_seconds: Option<f32>,
1075) -> Result<()> {
1076 use rodio::Source;
1077
1078 if let (Some(start), Some(end)) = (start_seconds, end_seconds) {
1079 if end <= start {
1080 bail!("--end-seconds must be greater than --start-seconds");
1081 }
1082 }
1083
1084 let bytes = mem
1085 .frame_canonical_payload(frame.id)
1086 .context("failed to load canonical payload for frame")?;
1087 if bytes.is_empty() {
1088 bail!("frame payload is empty; nothing to play");
1089 }
1090
1091 let start = start_seconds.unwrap_or(0.0).max(0.0);
1092 let duration_meta = frame
1093 .metadata
1094 .as_ref()
1095 .and_then(|meta| meta.audio.as_ref())
1096 .and_then(|audio| audio.duration_secs)
1097 .unwrap_or(0.0);
1098
1099 if duration_meta > 0.0 && start >= duration_meta {
1100 bail!("start-seconds ({start:.2}) exceeds audio duration ({duration_meta:.2})");
1101 }
1102
1103 if let Some(end) = end_seconds {
1104 if duration_meta > 0.0 && end > duration_meta + f32::EPSILON {
1105 warn!(
1106 "requested end-seconds {:.2} exceeds known duration {:.2}; clamping",
1107 end, duration_meta
1108 );
1109 }
1110 }
1111
1112 let cursor = Cursor::new(bytes);
1113 let decoder = rodio::Decoder::new(cursor).context("failed to decode audio stream")?;
1114 let (_stream, stream_handle) =
1115 rodio::OutputStream::try_default().context("failed to open default audio output")?;
1116 let sink = rodio::Sink::try_new(&stream_handle).context("failed to create audio sink")?;
1117 let display_uri = frame.uri.as_deref().unwrap_or("<unknown>");
1118
1119 if let Some(end) = end_seconds {
1120 let effective_end = if duration_meta > 0.0 {
1121 end.min(duration_meta)
1122 } else {
1123 end
1124 };
1125 let duration = (effective_end - start).max(0.0);
1126 if duration <= 0.0 {
1127 bail!("playback duration is zero; adjust start/end seconds");
1128 }
1129 let source = decoder
1130 .skip_duration(Duration::from_secs_f32(start))
1131 .take_duration(Duration::from_secs_f32(duration));
1132 sink.append(source);
1133 let segment_desc = format!("{start:.2}s → {effective_end:.2}s");
1134 announce_playback(display_uri, &segment_desc);
1135 } else {
1136 let source = decoder.skip_duration(Duration::from_secs_f32(start));
1137 sink.append(source);
1138 let segment_desc = format!("{start:.2}s → end");
1139 announce_playback(display_uri, &segment_desc);
1140 }
1141 sink.sleep_until_end();
1142 Ok(())
1143}
1144
1145#[cfg(feature = "audio-playback")]
1146fn announce_playback(uri: &str, segment_desc: &str) {
1147 println!("Playing {uri} ({segment_desc})");
1148}
1149
1150fn is_image_mime(value: &str) -> bool {
1151 let normalized = value.split(';').next().unwrap_or(value).trim();
1152 normalized.to_ascii_lowercase().starts_with("image/")
1153}
1154
1155fn is_audio_mime(value: &str) -> bool {
1156 let normalized = value.split(';').next().unwrap_or(value).trim();
1157 normalized.to_ascii_lowercase().starts_with("audio/")
1158}
1159
1160pub fn extension_from_mime(mime: &str) -> Option<&'static str> {
1161 let normalized = mime
1162 .split(';')
1163 .next()
1164 .unwrap_or(mime)
1165 .trim()
1166 .to_ascii_lowercase();
1167 match normalized.as_str() {
1168 "image/jpeg" | "image/jpg" => Some("jpg"),
1169 "image/png" => Some("png"),
1170 "image/gif" => Some("gif"),
1171 "image/webp" => Some("webp"),
1172 "image/bmp" => Some("bmp"),
1173 "image/tiff" => Some("tiff"),
1174 "image/x-icon" | "image/vnd.microsoft.icon" => Some("ico"),
1175 "image/svg+xml" => Some("svg"),
1176 "video/mp4" | "video/iso.segment" => Some("mp4"),
1177 "video/quicktime" => Some("mov"),
1178 "video/webm" => Some("webm"),
1179 "video/x-matroska" | "video/matroska" => Some("mkv"),
1180 "video/x-msvideo" => Some("avi"),
1181 "video/mpeg" => Some("mpg"),
1182 "application/pdf" => Some("pdf"),
1183 "audio/mpeg" | "audio/mp3" => Some("mp3"),
1184 "audio/wav" | "audio/x-wav" => Some("wav"),
1185 "audio/x-flac" | "audio/flac" => Some("flac"),
1186 "audio/ogg" | "audio/vorbis" => Some("ogg"),
1187 "audio/x-m4a" | "audio/mp4" => Some("m4a"),
1188 "audio/aac" => Some("aac"),
1189 "audio/x-aiff" | "audio/aiff" => Some("aiff"),
1190 "text/plain" => Some("txt"),
1191 "text/markdown" | "text/x-markdown" => Some("md"),
1192 "text/html" => Some("html"),
1193 "application/xhtml+xml" => Some("xhtml"),
1194 "application/json" | "text/json" | "application/vnd.api+json" => Some("json"),
1195 "application/xml" | "text/xml" => Some("xml"),
1196 "text/csv" | "application/csv" => Some("csv"),
1197 "application/javascript" | "text/javascript" => Some("js"),
1198 "text/css" => Some("css"),
1199 "application/yaml" | "application/x-yaml" | "text/yaml" => Some("yaml"),
1200 "application/rtf" => Some("rtf"),
1201 "application/msword" => Some("doc"),
1202 "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => Some("docx"),
1203 "application/vnd.ms-powerpoint" => Some("ppt"),
1204 "application/vnd.openxmlformats-officedocument.presentationml.presentation" => Some("pptx"),
1205 "application/vnd.ms-excel" => Some("xls"),
1206 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => Some("xlsx"),
1207 "application/zip" => Some("zip"),
1208 "application/x-tar" => Some("tar"),
1209 "application/x-7z-compressed" => Some("7z"),
1210 _ => None,
1211 }
1212}
1213pub fn search_snippet(text: Option<&String>) -> Option<String> {
1214 text.and_then(|value| {
1215 let trimmed = value.trim();
1216 if trimmed.is_empty() {
1217 None
1218 } else {
1219 Some(trimmed.chars().take(160).collect())
1220 }
1221 })
1222}
1223pub fn frame_to_json(frame: &Frame) -> serde_json::Value {
1224 json!({
1225 "id": frame.id,
1226 "status": frame_status_str(frame.status),
1227 "timestamp": frame.timestamp,
1228 "kind": frame.kind,
1229 "track": frame.track,
1230 "uri": frame.uri,
1231 "title": frame.title,
1232 "payload_length": frame.payload_length,
1233 "canonical_encoding": format!("{:?}", frame.canonical_encoding),
1234 "canonical_length": frame.canonical_length,
1235 "role": format!("{:?}", frame.role),
1236 "parent_id": frame.parent_id,
1237 "chunk_index": frame.chunk_index,
1238 "chunk_count": frame.chunk_count,
1239 "tags": frame.tags,
1240 "labels": frame.labels,
1241 "search_text": frame.search_text,
1242 "metadata": frame.metadata,
1243 "extra_metadata": frame.extra_metadata,
1244 "content_dates": frame.content_dates,
1245 "chunk_manifest": frame.chunk_manifest,
1246 "supersedes": frame.supersedes,
1247 "superseded_by": frame.superseded_by,
1248 "source_sha256": frame.source_sha256.map(|h| hex::encode(h)),
1249 "source_path": frame.source_path,
1250 })
1251}
1252pub fn print_frame_summary(mem: &mut Memvid, frame: &Frame) -> Result<()> {
1253 println!("Frame {} [{}]", frame.id, frame_status_str(frame.status));
1254 println!("Timestamp: {}", frame.timestamp);
1255 if let Some(uri) = &frame.uri {
1256 println!("URI: {uri}");
1257 }
1258 if let Some(title) = &frame.title {
1259 println!("Title: {title}");
1260 }
1261 if let Some(kind) = &frame.kind {
1262 println!("Kind: {kind}");
1263 }
1264 if let Some(track) = &frame.track {
1265 println!("Track: {track}");
1266 }
1267 if let Some(supersedes) = frame.supersedes {
1268 println!("Supersedes frame: {supersedes}");
1269 }
1270 if let Some(successor) = frame.superseded_by {
1271 println!("Superseded by frame: {successor}");
1272 }
1273 println!(
1274 "Payload: {} bytes (canonical {:?}, logical {:?})",
1275 frame.payload_length, frame.canonical_encoding, frame.canonical_length
1276 );
1277 if !frame.tags.is_empty() {
1278 println!("Tags: {}", frame.tags.join(", "));
1279 }
1280 if !frame.labels.is_empty() {
1281 println!("Labels: {}", frame.labels.join(", "));
1282 }
1283 if let Some(snippet) = search_snippet(frame.search_text.as_ref()) {
1284 println!("Search text: {snippet}");
1285 }
1286 if let Some(meta) = &frame.metadata {
1287 let rendered = serde_json::to_string_pretty(meta)?;
1288 println!("Metadata: {rendered}");
1289 }
1290 if !frame.extra_metadata.is_empty() {
1291 let mut entries: Vec<_> = frame.extra_metadata.iter().collect();
1292 entries.sort_by(|a, b| a.0.cmp(b.0));
1293 println!("Extra metadata:");
1294 for (key, value) in entries {
1295 println!(" {key}: {value}");
1296 }
1297 }
1298 if !frame.content_dates.is_empty() {
1299 println!("Content dates: {}", frame.content_dates.join(", "));
1300 }
1301 if let Some(hash) = frame.source_sha256 {
1303 println!(
1304 "Source SHA256: {} (raw binary not stored)",
1305 hex::encode(hash)
1306 );
1307 if let Some(path) = &frame.source_path {
1308 println!("Source path: {path}");
1309 }
1310 }
1311 match mem.frame_embedding(frame.id) {
1312 Ok(Some(embedding)) => println!("Embedding: {} dimensions", embedding.len()),
1313 Ok(None) => println!("Embedding: none"),
1314 Err(err) => println!("Embedding: unavailable ({err})"),
1315 }
1316 Ok(())
1317}
1318fn canonical_text_for_view(mem: &mut Memvid, frame: &Frame) -> Result<String> {
1319 let bytes = mem.frame_canonical_payload(frame.id)?;
1320 let raw = match String::from_utf8(bytes) {
1321 Ok(text) => text,
1322 Err(err) => {
1323 let bytes = err.into_bytes();
1324 String::from_utf8_lossy(&bytes).into_owned()
1325 }
1326 };
1327
1328 Ok(normalize_text(&raw, usize::MAX)
1329 .map(|n| n.text)
1330 .unwrap_or_default())
1331}
1332
1333fn manifests_match_text(text: &str, manifest: &TextChunkManifest) -> bool {
1334 if manifest.chunk_chars == 0 || manifest.chunks.is_empty() {
1335 return false;
1336 }
1337 let total_chars = text.chars().count();
1338 manifest
1339 .chunks
1340 .iter()
1341 .all(|chunk| chunk.start <= chunk.end && chunk.end <= total_chars)
1342}
1343
1344fn canonical_manifest_from_frame(text: &str, frame: &Frame) -> Option<TextChunkManifest> {
1345 let primary = frame
1346 .chunk_manifest
1347 .clone()
1348 .filter(|manifest| manifests_match_text(text, manifest));
1349 if primary.is_some() {
1350 return primary;
1351 }
1352
1353 frame
1354 .extra_metadata
1355 .get(CHUNK_MANIFEST_KEY)
1356 .and_then(|raw| serde_json::from_str::<TextChunkManifest>(raw).ok())
1357 .filter(|manifest| manifests_match_text(text, manifest))
1358}
1359
1360fn compute_chunk_manifest(text: &str, chunk_chars: usize) -> TextChunkManifest {
1361 let normalized = normalize_text(text, usize::MAX)
1362 .map(|n| n.text)
1363 .unwrap_or_default();
1364
1365 let effective_chunk = chunk_chars.max(1);
1366 let total_chars = normalized.chars().count();
1367 if total_chars == 0 {
1368 return TextChunkManifest {
1369 chunk_chars: effective_chunk,
1370 chunks: vec![TextChunkRange { start: 0, end: 0 }],
1371 };
1372 }
1373 if total_chars <= effective_chunk {
1374 return TextChunkManifest {
1375 chunk_chars: effective_chunk,
1376 chunks: vec![TextChunkRange {
1377 start: 0,
1378 end: total_chars,
1379 }],
1380 };
1381 }
1382 let mut chunks = Vec::new();
1383 let mut start = 0usize;
1384 while start < total_chars {
1385 let end = (start + effective_chunk).min(total_chars);
1386 chunks.push(TextChunkRange { start, end });
1387 start = end;
1388 }
1389 TextChunkManifest {
1390 chunk_chars: effective_chunk,
1391 chunks,
1392 }
1393}
1394
1395fn extract_chunk_slice(text: &str, range: &TextChunkRange) -> String {
1396 if range.start >= range.end || text.is_empty() {
1397 return String::new();
1398 }
1399 let mut start_byte = text.len();
1400 let mut end_byte = text.len();
1401 let mut idx = 0usize;
1402 for (byte_offset, _) in text.char_indices() {
1403 if idx == range.start {
1404 start_byte = byte_offset;
1405 }
1406 if idx == range.end {
1407 end_byte = byte_offset;
1408 break;
1409 }
1410 idx += 1;
1411 }
1412 if start_byte == text.len() {
1413 return String::new();
1414 }
1415 if end_byte == text.len() {
1416 end_byte = text.len();
1417 }
1418 text[start_byte..end_byte].to_string()
1419}