memvid_cli/commands/
inspection.rs

1//! Inspection command handlers (view, stats, who)
2
3#[cfg(feature = "audio-playback")]
4use std::io::Cursor;
5use std::io::{self, Write};
6use std::path::{Path, PathBuf};
7use std::process::Command;
8#[cfg(feature = "audio-playback")]
9use std::time::Duration;
10
11use anyhow::{anyhow, bail, Context, Result};
12use clap::Args;
13use memvid_core::table::list_tables;
14use memvid_core::{
15    lockfile, normalize_text, Frame, FrameRole, MediaManifest, Memvid, TextChunkManifest,
16    TextChunkRange,
17};
18use serde_json::{json, Value};
19use tempfile::Builder;
20use tracing::warn;
21use uuid::Uuid;
22
23use crate::config::CliConfig;
24use crate::utils::{
25    format_bytes, format_percent, format_timestamp_ms, frame_status_str, open_read_only_mem,
26    owner_hint_to_json, parse_timecode, round_percent, select_frame, yes_no,
27};
28
29const DEFAULT_VIEW_PAGE_CHARS: usize = 1_200;
30const CHUNK_MANIFEST_KEY: &str = "memvid_chunks_v1";
31
32/// Arguments for the `view` subcommand
33#[derive(Args)]
34pub struct ViewArgs {
35    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
36    pub file: PathBuf,
37    #[arg(long = "frame-id", value_name = "ID", conflicts_with = "uri")]
38    pub frame_id: Option<u64>,
39    #[arg(long, value_name = "URI", conflicts_with = "frame_id")]
40    pub uri: Option<String>,
41    #[arg(long)]
42    pub json: bool,
43    #[arg(long, conflicts_with = "json")]
44    pub binary: bool,
45    #[arg(long, conflicts_with_all = ["json", "binary"])]
46    pub preview: bool,
47    /// Optional start time for video previews (HH:MM:SS[.mmm])
48    #[arg(
49        long = "start",
50        value_name = "HH:MM:SS",
51        requires = "preview",
52        conflicts_with_all = ["json", "binary", "play"]
53    )]
54    pub preview_start: Option<String>,
55    /// Optional end time for video previews (HH:MM:SS[.mmm])
56    #[arg(
57        long = "end",
58        value_name = "HH:MM:SS",
59        requires = "preview",
60        conflicts_with_all = ["json", "binary", "play"]
61    )]
62    pub preview_end: Option<String>,
63    #[arg(long = "play", conflicts_with_all = ["json", "binary", "preview"])]
64    pub play: bool,
65    #[arg(long = "start-seconds", requires = "play")]
66    pub start_seconds: Option<f32>,
67    #[arg(long = "end-seconds", requires = "play")]
68    pub end_seconds: Option<f32>,
69    #[arg(long, value_name = "N", default_value_t = 1)]
70    pub page: usize,
71    #[arg(long = "page-size", value_name = "CHARS")]
72    pub page_size: Option<usize>,
73}
74
75/// Arguments for the `stats` subcommand
76#[derive(Args)]
77pub struct StatsArgs {
78    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
79    pub file: PathBuf,
80    #[arg(long)]
81    pub json: bool,
82    /// Replay: Show stats for frames with ID <= AS_OF_FRAME (time-travel view)
83    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
84    pub as_of_frame: Option<u64>,
85    /// Replay: Show stats for frames with timestamp <= AS_OF_TS (time-travel view)
86    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
87    pub as_of_ts: Option<i64>,
88}
89
90/// Arguments for the `who` subcommand
91#[derive(Args)]
92pub struct WhoArgs {
93    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
94    pub file: PathBuf,
95    #[arg(long)]
96    pub json: bool,
97}
98
99/// Handler for `memvid stats`
100pub fn handle_stats(_config: &CliConfig, args: StatsArgs) -> Result<()> {
101    let mut mem = Memvid::open_read_only(&args.file)?;
102    let stats = mem.stats()?;
103    let tables = list_tables(&mut mem).unwrap_or_default();
104
105    // Note: Replay filtering for stats is currently not implemented
106    // The stats show the full memory state
107    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
108        eprintln!("Note: Replay filtering (--as-of-frame/--as-of-ts) shows current stats.");
109        eprintln!("      Use 'find' or 'timeline' commands for filtered results.");
110    }
111    let overhead_bytes = stats.size_bytes.saturating_sub(stats.payload_bytes);
112    let payload_share_percent: f64 = if stats.size_bytes > 0 {
113        round_percent((stats.payload_bytes as f64 / stats.size_bytes as f64) * 100.0)
114    } else {
115        0.0
116    };
117    let overhead_share_percent: f64 = if stats.size_bytes > 0 {
118        round_percent((100.0 - payload_share_percent).max(0.0))
119    } else {
120        0.0
121    };
122    let maintenance_command = format!(
123        "memvid doctor {} --vacuum --rebuild-time-index --rebuild-lex-index",
124        args.file.display()
125    );
126
127    if args.json {
128        let mut raw_json = serde_json::to_value(&stats)?;
129        if let Value::Object(ref mut obj) = raw_json {
130            obj.remove("tier");
131        }
132
133        // Build tables list for JSON output
134        let tables_json: Vec<serde_json::Value> = tables
135            .iter()
136            .map(|t| {
137                json!({
138                    "table_id": t.table_id,
139                    "source_file": t.source_file,
140                    "n_rows": t.n_rows,
141                    "n_cols": t.n_cols,
142                    "pages": format!("{}-{}", t.page_start, t.page_end),
143                    "quality": format!("{:?}", t.quality),
144                    "headers": t.headers,
145                })
146            })
147            .collect();
148
149        // Compute embedding quality for JSON output
150        let embedding_quality_json = if stats.has_vec_index {
151            mem.embedding_quality().ok().flatten().map(|eq| {
152                json!({
153                    "vector_count": eq.vector_count,
154                    "dimension": eq.dimension,
155                    "avg_similarity": eq.avg_similarity,
156                    "min_similarity": eq.min_similarity,
157                    "max_similarity": eq.max_similarity,
158                    "std_similarity": eq.std_similarity,
159                    "clustering_coefficient": eq.clustering_coefficient,
160                    "estimated_clusters": eq.estimated_clusters,
161                    "recommended_threshold": eq.recommended_threshold,
162                    "quality_rating": eq.quality_rating,
163                    "quality_explanation": eq.quality_explanation,
164                })
165            })
166        } else {
167            None
168        };
169
170        let report = json!({
171            "summary": {
172                "sequence": stats.seq_no,
173                "frames": format!("{} total ({} active)", stats.frame_count, stats.active_frame_count),
174                "usage": format!(
175                    "{} used / {} total ({})",
176                    format_bytes(stats.size_bytes),
177                    format_bytes(stats.capacity_bytes),
178                    format_percent(stats.storage_utilisation_percent)
179                ),
180                "remaining": format!("{} free", format_bytes(stats.remaining_capacity_bytes)),
181            },
182            "storage": {
183                "payload": format!("{} ({})", format_bytes(stats.payload_bytes), format_percent(payload_share_percent)),
184                "overhead": format!("{} ({}) - WAL + indexes", format_bytes(overhead_bytes), format_percent(overhead_share_percent)),
185                "logical_payload": format!("{} before compression", format_bytes(stats.logical_bytes)),
186                "compression_savings": format!("{} saved ({})", format_bytes(stats.saved_bytes), format_percent(stats.savings_percent)),
187                "compression_ratio": format_percent(stats.compression_ratio_percent),
188            },
189            "frames": {
190                "average_stored": format_bytes(stats.average_frame_payload_bytes),
191                "average_logical": format_bytes(stats.average_frame_logical_bytes),
192                "clip_images": stats.clip_image_count,
193            },
194            "indexes": {
195                "lexical": yes_no(stats.has_lex_index),
196                "vector": yes_no(stats.has_vec_index),
197                "time": yes_no(stats.has_time_index),
198            },
199            "embedding_quality": embedding_quality_json,
200            "tables": {
201                "count": tables.len(),
202                "tables": tables_json,
203            },
204            "maintenance": maintenance_command,
205            "raw": raw_json,
206        });
207
208        println!("{}", serde_json::to_string_pretty(&report)?);
209    } else {
210        let seq_display = stats
211            .seq_no
212            .map(|seq| seq.to_string())
213            .unwrap_or_else(|| "n/a".to_string());
214
215        println!("Memory: {}", args.file.display());
216        println!("Sequence: {}", seq_display);
217        println!(
218            "Frames: {} total ({} active)",
219            stats.frame_count, stats.active_frame_count
220        );
221
222        println!("\nCapacity:");
223        println!(
224            "  Usage: {} used / {} total ({})",
225            format_bytes(stats.size_bytes),
226            format_bytes(stats.capacity_bytes),
227            format_percent(stats.storage_utilisation_percent)
228        );
229        println!(
230            "  Remaining: {}",
231            format_bytes(stats.remaining_capacity_bytes)
232        );
233
234        println!("\nStorage breakdown:");
235        println!(
236            "  Payload: {} ({})",
237            format_bytes(stats.payload_bytes),
238            format_percent(payload_share_percent)
239        );
240        println!(
241            "  Overhead: {} ({})",
242            format_bytes(overhead_bytes),
243            format_percent(overhead_share_percent)
244        );
245        // PHASE 2: Detailed overhead breakdown for observability
246        println!("    ├─ WAL: {}", format_bytes(stats.wal_bytes));
247        println!(
248            "    ├─ Lexical index: {}",
249            format_bytes(stats.lex_index_bytes)
250        );
251        println!(
252            "    ├─ Vector index: {}",
253            format_bytes(stats.vec_index_bytes)
254        );
255        println!(
256            "    └─ Time index: {}",
257            format_bytes(stats.time_index_bytes)
258        );
259        println!(
260            "  Logical payload: {} before compression",
261            format_bytes(stats.logical_bytes)
262        );
263        println!(
264            "  Compression savings: {} ({})",
265            format_bytes(stats.saved_bytes),
266            format_percent(stats.savings_percent)
267        );
268
269        println!("\nAverage frame:");
270        println!(
271            "  Stored: {}   Logical: {}",
272            format_bytes(stats.average_frame_payload_bytes),
273            format_bytes(stats.average_frame_logical_bytes)
274        );
275        if stats.clip_image_count > 0 {
276            println!("  CLIP images: {}", stats.clip_image_count);
277        }
278
279        // PHASE 2: Per-document cost analysis
280        if stats.active_frame_count > 0 {
281            let overhead_per_doc = overhead_bytes / stats.active_frame_count;
282            let lex_per_doc = stats.lex_index_bytes / stats.active_frame_count;
283            let vec_per_doc = stats.vec_index_bytes / stats.active_frame_count;
284
285            println!("\nPer-document overhead:");
286            println!("  Total: {}", format_bytes(overhead_per_doc));
287            if stats.has_lex_index {
288                println!("  Lexical: {}", format_bytes(lex_per_doc));
289            }
290            if stats.has_vec_index {
291                let vec_ratio = if stats.average_frame_payload_bytes > 0 {
292                    vec_per_doc as f64 / stats.average_frame_payload_bytes as f64
293                } else {
294                    0.0
295                };
296                println!(
297                    "  Vector: {} ({:.0}x text size)",
298                    format_bytes(vec_per_doc),
299                    vec_ratio
300                );
301            }
302        }
303
304        println!("\nIndexes:");
305        println!(
306            "  Lexical: {}   Vector: {}   Time: {}",
307            yes_no(stats.has_lex_index),
308            yes_no(stats.has_vec_index),
309            yes_no(stats.has_time_index)
310        );
311
312        // Show embedding quality stats if vector index is available
313        if stats.has_vec_index {
314            if let Ok(Some(eq)) = mem.embedding_quality() {
315                println!("\nEmbedding Quality:");
316                println!(
317                    "  Vectors: {}   Dimension: {}",
318                    eq.vector_count, eq.dimension
319                );
320                println!(
321                    "  Similarity: avg={:.3}  min={:.3}  max={:.3}  std={:.3}",
322                    eq.avg_similarity, eq.min_similarity, eq.max_similarity, eq.std_similarity
323                );
324                println!(
325                    "  Clusters: ~{}   Quality: {}",
326                    eq.estimated_clusters, eq.quality_rating
327                );
328                println!(
329                    "  Recommended --min-relevancy: {:.1}",
330                    eq.recommended_threshold
331                );
332                println!("  {}", eq.quality_explanation);
333            }
334        }
335
336        if !tables.is_empty() {
337            println!("\nTables: {} extracted", tables.len());
338            for t in &tables {
339                println!(
340                    "  {} — {} rows × {} cols ({})",
341                    t.table_id, t.n_rows, t.n_cols, t.source_file
342                );
343            }
344        }
345
346        println!("\nMaintenance:");
347        println!(
348            "  Run `{}` to rebuild indexes and reclaim space.",
349            maintenance_command
350        );
351    }
352    Ok(())
353}
354
355/// Handler for `memvid who`
356pub fn handle_who(args: WhoArgs) -> Result<()> {
357    match lockfile::current_owner(&args.file)? {
358        Some(owner) => {
359            if args.json {
360                let output = json!({
361                    "locked": true,
362                    "owner": owner_hint_to_json(&owner),
363                });
364                println!("{}", serde_json::to_string_pretty(&output)?);
365            } else {
366                println!("{} is locked by:", args.file.display());
367                if let Some(pid) = owner.pid {
368                    println!("  pid: {pid}");
369                }
370                if let Some(cmd) = owner.cmd.as_deref() {
371                    println!("  cmd: {cmd}");
372                }
373                if let Some(started) = owner.started_at.as_deref() {
374                    println!("  started_at: {started}");
375                }
376                if let Some(last) = owner.last_heartbeat.as_deref() {
377                    println!("  last_heartbeat: {last}");
378                }
379                if let Some(interval) = owner.heartbeat_ms {
380                    println!("  heartbeat_interval_ms: {interval}");
381                }
382                if let Some(file_id) = owner.file_id.as_deref() {
383                    println!("  file_id: {file_id}");
384                }
385                if let Some(path) = owner.file_path.as_ref() {
386                    println!("  file_path: {}", path.display());
387                }
388            }
389        }
390        None => {
391            if args.json {
392                let output = json!({"locked": false});
393                println!("{}", serde_json::to_string_pretty(&output)?);
394            } else {
395                println!("No active writer for {}", args.file.display());
396            }
397        }
398    }
399    Ok(())
400}
401
402// ============================================================================
403// View command handler and helpers
404// ============================================================================
405
406/// Handler for `memvid view`
407pub fn handle_view(args: ViewArgs) -> Result<()> {
408    if args.page == 0 {
409        bail!("page must be greater than zero");
410    }
411    if let Some(size) = args.page_size {
412        if size == 0 {
413            bail!("page-size must be greater than zero");
414        }
415    }
416
417    let mut mem = open_read_only_mem(&args.file)?;
418    let frame = select_frame(&mut mem, args.frame_id, args.uri.as_deref())?;
419
420    if args.play {
421        #[cfg(feature = "audio-playback")]
422        {
423            play_frame_audio(&mut mem, &frame, args.start_seconds, args.end_seconds)?;
424            return Ok(());
425        }
426        #[cfg(not(feature = "audio-playback"))]
427        {
428            bail!("Audio playback requires the 'audio-playback' feature (only available on macOS)");
429        }
430    }
431
432    if args.preview {
433        let bounds = parse_preview_bounds(args.preview_start.as_ref(), args.preview_end.as_ref())?;
434        preview_frame_media(&mut mem, &frame, args.uri.as_deref(), bounds)?;
435        return Ok(());
436    }
437
438    if args.binary {
439        let bytes = mem.frame_canonical_payload(frame.id)?;
440        let mut stdout = io::stdout();
441        stdout.write_all(&bytes)?;
442        stdout.flush()?;
443        return Ok(());
444    }
445
446    let canonical_text = canonical_text_for_view(&mut mem, &frame)?;
447    let manifest_from_meta = canonical_manifest_from_frame(&canonical_text, &frame);
448
449    let page_size = args
450        .page_size
451        .or_else(|| manifest_from_meta.as_ref().map(|m| m.chunk_chars))
452        .unwrap_or(DEFAULT_VIEW_PAGE_CHARS);
453
454    let mut manifest = if args.page_size.is_none() {
455        manifest_from_meta.unwrap_or_else(|| compute_chunk_manifest(&canonical_text, page_size))
456    } else {
457        compute_chunk_manifest(&canonical_text, page_size)
458    };
459    if manifest.chunks.is_empty() {
460        manifest = TextChunkManifest {
461            chunk_chars: page_size,
462            chunks: vec![TextChunkRange {
463                start: 0,
464                end: canonical_text.chars().count(),
465            }],
466        };
467    }
468
469    if frame.role == FrameRole::DocumentChunk && args.page_size.is_none() {
470        let total_chars = canonical_text.chars().count();
471        manifest = TextChunkManifest {
472            chunk_chars: total_chars.max(1),
473            chunks: vec![TextChunkRange {
474                start: 0,
475                end: total_chars,
476            }],
477        };
478    }
479
480    let total_pages = manifest.chunks.len().max(1);
481    if args.page > total_pages {
482        bail!(
483            "page {} is out of range (total pages: {})",
484            args.page,
485            total_pages
486        );
487    }
488
489    let chunk = &manifest.chunks[args.page - 1];
490    let content = extract_chunk_slice(&canonical_text, chunk);
491
492    if args.json {
493        let mut frame_json = frame_to_json(&frame);
494        if let Some(obj) = frame_json.as_object_mut() {
495            // Note: Do NOT overwrite search_text - it contains the extracted text from the document.
496            // The "content" field shows the paginated payload view.
497            if let Some(manifest_json) = obj.get_mut("chunk_manifest") {
498                if let Some(manifest_obj) = manifest_json.as_object_mut() {
499                    let total = manifest.chunks.len();
500                    if total > 0 {
501                        let mut window = serde_json::Map::new();
502                        let idx = args.page.saturating_sub(1).min(total - 1);
503                        if idx > 0 {
504                            let prev = &manifest.chunks[idx - 1];
505                            window.insert("prev".into(), json!([prev.start, prev.end]));
506                        }
507                        let current = &manifest.chunks[idx];
508                        window.insert("current".into(), json!([current.start, current.end]));
509                        if idx + 1 < total {
510                            let next = &manifest.chunks[idx + 1];
511                            window.insert("next".into(), json!([next.start, next.end]));
512                        }
513                        manifest_obj.insert("chunks".into(), Value::Object(window));
514                    }
515                }
516            }
517        }
518        let json = json!({
519            "frame": frame_json,
520            "page": args.page,
521            "page_size": manifest.chunk_chars,
522            "page_count": total_pages,
523            "has_prev": args.page > 1,
524            "has_next": args.page < total_pages,
525            "content": content,
526        });
527        println!("{}", serde_json::to_string_pretty(&json)?);
528    } else {
529        print_frame_summary(&mut mem, &frame)?;
530        println!(
531            "Page {}/{} ({} chars per page)",
532            args.page, total_pages, manifest.chunk_chars
533        );
534        println!();
535        println!("{}", content);
536    }
537    Ok(())
538}
539
540#[derive(Debug)]
541pub struct PreviewBounds {
542    pub start_ms: Option<u64>,
543    pub end_ms: Option<u64>,
544}
545
546pub fn parse_preview_bounds(
547    start: Option<&String>,
548    end: Option<&String>,
549) -> Result<Option<PreviewBounds>> {
550    let start_ms = match start {
551        Some(value) => Some(parse_timecode(value)?),
552        None => None,
553    };
554    let end_ms = match end {
555        Some(value) => Some(parse_timecode(value)?),
556        None => None,
557    };
558
559    if let (Some(s), Some(e)) = (start_ms, end_ms) {
560        if e <= s {
561            anyhow::bail!("--end must be greater than --start");
562        }
563    }
564
565    if start_ms.is_none() && end_ms.is_none() {
566        Ok(None)
567    } else {
568        Ok(Some(PreviewBounds { start_ms, end_ms }))
569    }
570}
571
572fn preview_frame_media(
573    mem: &mut Memvid,
574    frame: &Frame,
575    cli_uri: Option<&str>,
576    bounds: Option<PreviewBounds>,
577) -> Result<()> {
578    let manifest = mem.media_manifest(frame.id)?;
579    let mut mime = manifest
580        .as_ref()
581        .map(|m| m.mime.clone())
582        .or_else(|| frame.metadata.as_ref().and_then(|meta| meta.mime.clone()))
583        .unwrap_or_else(|| "application/octet-stream".to_string());
584
585    // If mime is generic, try to detect from payload bytes
586    if mime == "application/octet-stream" {
587        if let Ok(bytes) = mem.frame_canonical_payload(frame.id) {
588            if let Some(kind) = infer::get(&bytes) {
589                mime = kind.mime_type().to_string();
590            }
591        }
592    }
593
594    let is_video = manifest
595        .as_ref()
596        .map(|media| media.kind.eq_ignore_ascii_case("video"))
597        .unwrap_or_else(|| mime.starts_with("video/"));
598
599    if is_video {
600        preview_frame_video(mem, frame, cli_uri, bounds, manifest, &mime)?;
601    } else {
602        if bounds.is_some() {
603            anyhow::bail!("--start/--end are only supported for video previews");
604        }
605        if is_image_mime(&mime) {
606            preview_frame_image(mem, frame, cli_uri)?;
607        } else if is_audio_mime(&mime) {
608            preview_frame_audio_file(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
609        } else {
610            preview_frame_document(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
611        }
612    }
613    Ok(())
614}
615
616fn preview_frame_video(
617    mem: &mut Memvid,
618    frame: &Frame,
619    cli_uri: Option<&str>,
620    bounds: Option<PreviewBounds>,
621    manifest: Option<MediaManifest>,
622    mime: &str,
623) -> Result<()> {
624    let extension = manifest
625        .as_ref()
626        .and_then(|m| m.filename.as_deref())
627        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
628        .map(|ext| ext.trim_start_matches('.').to_ascii_lowercase())
629        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
630        .unwrap_or_else(|| "mp4".to_string());
631
632    let mut temp_file = Builder::new()
633        .prefix("memvid-preview-")
634        .suffix(&format!(".{extension}"))
635        .tempfile_in(std::env::temp_dir())
636        .context("failed to create temporary preview file")?;
637
638    let mut reader = mem
639        .blob_reader(frame.id)
640        .context("failed to stream payload for preview")?;
641    io::copy(&mut reader, &mut temp_file).context("failed to write video data to preview file")?;
642    temp_file
643        .flush()
644        .context("failed to flush video preview to disk")?;
645
646    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
647    drop(file);
648
649    let mut display_path = preview_path.clone();
650    if let Some(ref span) = bounds {
651        let needs_trim = span.start_ms.is_some() || span.end_ms.is_some();
652        if needs_trim {
653            if let Some(trimmed) = maybe_trim_with_ffmpeg(&preview_path, &extension, span)? {
654                display_path = trimmed;
655            }
656        }
657    }
658
659    println!("Opening preview...");
660    open::that(&display_path).with_context(|| {
661        format!(
662            "failed to launch default video player for {}",
663            display_path.display()
664        )
665    })?;
666
667    let display_uri = cli_uri
668        .or_else(|| frame.uri.as_deref())
669        .unwrap_or("<unknown>");
670    println!(
671        "Opened preview for {} (frame {}) -> {} ({})",
672        display_uri,
673        frame.id,
674        display_path.display(),
675        mime
676    );
677    Ok(())
678}
679
680fn maybe_trim_with_ffmpeg(
681    source: &Path,
682    extension: &str,
683    bounds: &PreviewBounds,
684) -> Result<Option<PathBuf>> {
685    if bounds.start_ms.is_none() && bounds.end_ms.is_none() {
686        return Ok(None);
687    }
688
689    let ffmpeg = match which::which("ffmpeg") {
690        Ok(path) => path,
691        Err(_) => {
692            warn!("ffmpeg binary not found on PATH; opening full video");
693            return Ok(None);
694        }
695    };
696
697    let target = std::env::temp_dir().join(format!(
698        "memvid-preview-clip-{}.{}",
699        Uuid::new_v4(),
700        extension
701    ));
702
703    let mut command = Command::new(ffmpeg);
704    command.arg("-y");
705    if let Some(start) = bounds.start_ms {
706        command.arg("-ss").arg(format_timestamp_ms(start));
707    }
708    command.arg("-i").arg(source);
709    if let Some(end) = bounds.end_ms {
710        command.arg("-to").arg(format_timestamp_ms(end));
711    }
712    command.arg("-c").arg("copy");
713    command.arg(&target);
714
715    let status = command
716        .status()
717        .context("failed to run ffmpeg for preview trimming")?;
718    if status.success() {
719        return Ok(Some(target));
720    }
721
722    let details = status
723        .code()
724        .map(|code| code.to_string())
725        .unwrap_or_else(|| "terminated".to_string());
726    warn!("ffmpeg exited with status {details}; opening full video");
727    Ok(None)
728}
729
730fn preview_frame_image(mem: &mut Memvid, frame: &Frame, cli_uri: Option<&str>) -> Result<()> {
731    let bytes = mem
732        .frame_canonical_payload(frame.id)
733        .context("failed to load canonical payload for frame")?;
734    if bytes.is_empty() {
735        bail!("frame payload is empty; nothing to preview");
736    }
737
738    let detected_kind = infer::get(&bytes);
739    let mut mime = frame
740        .metadata
741        .as_ref()
742        .and_then(|meta| meta.mime.clone())
743        .filter(|value| is_image_mime(value));
744
745    if mime.is_none() {
746        if let Some(kind) = &detected_kind {
747            let candidate = kind.mime_type();
748            if is_image_mime(candidate) {
749                mime = Some(candidate.to_string());
750            }
751        }
752    }
753
754    let mime = mime.ok_or_else(|| anyhow!("frame does not contain an image payload"))?;
755    if !is_image_mime(&mime) {
756        bail!("frame mime type {mime} is not an image");
757    }
758
759    let extension = detected_kind
760        .as_ref()
761        .map(|kind| kind.extension().to_string())
762        .or_else(|| extension_from_mime(&mime).map(|ext| ext.to_string()))
763        .unwrap_or_else(|| "img".to_string());
764
765    let suffix = format!(".{extension}");
766    let mut temp_file = Builder::new()
767        .prefix("memvid-preview-")
768        .suffix(&suffix)
769        .tempfile_in(std::env::temp_dir())
770        .context("failed to create temporary preview file")?;
771    temp_file
772        .write_all(&bytes)
773        .context("failed to write image data to preview file")?;
774    temp_file
775        .flush()
776        .context("failed to flush preview file to disk")?;
777
778    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
779    drop(file);
780
781    println!("Opening preview...");
782    open::that(&preview_path).with_context(|| {
783        format!(
784            "failed to launch default image viewer for {}",
785            preview_path.display()
786        )
787    })?;
788
789    let display_uri = cli_uri
790        .or_else(|| frame.uri.as_deref())
791        .unwrap_or("<unknown>");
792    println!(
793        "Opened preview for {} (frame {}) -> {} ({})",
794        display_uri,
795        frame.id,
796        preview_path.display(),
797        mime
798    );
799    Ok(())
800}
801
802fn preview_frame_document(
803    mem: &mut Memvid,
804    frame: &Frame,
805    cli_uri: Option<&str>,
806    manifest: Option<&MediaManifest>,
807    mime: &str,
808) -> Result<()> {
809    let bytes = mem
810        .frame_canonical_payload(frame.id)
811        .context("failed to load canonical payload for frame")?;
812    if bytes.is_empty() {
813        bail!("frame payload is empty; nothing to preview");
814    }
815
816    let mut extension = manifest
817        .and_then(|m| m.filename.as_deref())
818        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
819        .map(|ext| ext.trim_start_matches('.').to_string())
820        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
821        .unwrap_or_else(|| "bin".to_string());
822
823    if extension == "bin" && std::str::from_utf8(&bytes).is_ok() {
824        extension = "txt".to_string();
825    }
826
827    let suffix = format!(".{extension}");
828    let mut temp_file = Builder::new()
829        .prefix("memvid-preview-")
830        .suffix(&suffix)
831        .tempfile_in(std::env::temp_dir())
832        .context("failed to create temporary preview file")?;
833    temp_file
834        .write_all(&bytes)
835        .context("failed to write document data to preview file")?;
836    temp_file
837        .flush()
838        .context("failed to flush preview file to disk")?;
839
840    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
841    drop(file);
842
843    println!("Opening preview...");
844    open::that(&preview_path).with_context(|| {
845        format!(
846            "failed to launch default viewer for {}",
847            preview_path.display()
848        )
849    })?;
850
851    let display_uri = cli_uri
852        .or_else(|| frame.uri.as_deref())
853        .unwrap_or("<unknown>");
854    println!(
855        "Opened preview for {} (frame {}) -> {} ({})",
856        display_uri,
857        frame.id,
858        preview_path.display(),
859        mime
860    );
861    Ok(())
862}
863
864fn preview_frame_audio_file(
865    mem: &mut Memvid,
866    frame: &Frame,
867    cli_uri: Option<&str>,
868    manifest: Option<&MediaManifest>,
869    mime: &str,
870) -> Result<()> {
871    let bytes = mem
872        .frame_canonical_payload(frame.id)
873        .context("failed to load canonical payload for frame")?;
874    if bytes.is_empty() {
875        bail!("frame payload is empty; nothing to preview");
876    }
877
878    let mut extension = manifest
879        .and_then(|m| m.filename.as_deref())
880        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
881        .map(|ext| ext.trim_start_matches('.').to_string())
882        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
883        .unwrap_or_else(|| "audio".to_string());
884
885    if extension == "bin" {
886        extension = "audio".to_string();
887    }
888
889    let suffix = format!(".{extension}");
890    let mut temp_file = Builder::new()
891        .prefix("memvid-preview-")
892        .suffix(&suffix)
893        .tempfile_in(std::env::temp_dir())
894        .context("failed to create temporary preview file")?;
895    temp_file
896        .write_all(&bytes)
897        .context("failed to write audio data to preview file")?;
898    temp_file
899        .flush()
900        .context("failed to flush preview file to disk")?;
901
902    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
903    drop(file);
904
905    println!("Opening preview...");
906    open::that(&preview_path).with_context(|| {
907        format!(
908            "failed to launch default audio player for {}",
909            preview_path.display()
910        )
911    })?;
912
913    let display_uri = cli_uri
914        .or_else(|| frame.uri.as_deref())
915        .unwrap_or("<unknown>");
916    println!(
917        "Opened preview for {} (frame {}) -> {} ({})",
918        display_uri,
919        frame.id,
920        preview_path.display(),
921        mime
922    );
923    Ok(())
924}
925
926#[cfg(feature = "audio-playback")]
927fn play_frame_audio(
928    mem: &mut Memvid,
929    frame: &Frame,
930    start_seconds: Option<f32>,
931    end_seconds: Option<f32>,
932) -> Result<()> {
933    use rodio::Source;
934
935    if let (Some(start), Some(end)) = (start_seconds, end_seconds) {
936        if end <= start {
937            bail!("--end-seconds must be greater than --start-seconds");
938        }
939    }
940
941    let bytes = mem
942        .frame_canonical_payload(frame.id)
943        .context("failed to load canonical payload for frame")?;
944    if bytes.is_empty() {
945        bail!("frame payload is empty; nothing to play");
946    }
947
948    let start = start_seconds.unwrap_or(0.0).max(0.0);
949    let duration_meta = frame
950        .metadata
951        .as_ref()
952        .and_then(|meta| meta.audio.as_ref())
953        .and_then(|audio| audio.duration_secs)
954        .unwrap_or(0.0);
955
956    if duration_meta > 0.0 && start >= duration_meta {
957        bail!("start-seconds ({start:.2}) exceeds audio duration ({duration_meta:.2})");
958    }
959
960    if let Some(end) = end_seconds {
961        if duration_meta > 0.0 && end > duration_meta + f32::EPSILON {
962            warn!(
963                "requested end-seconds {:.2} exceeds known duration {:.2}; clamping",
964                end, duration_meta
965            );
966        }
967    }
968
969    let cursor = Cursor::new(bytes);
970    let decoder = rodio::Decoder::new(cursor).context("failed to decode audio stream")?;
971    let (_stream, stream_handle) =
972        rodio::OutputStream::try_default().context("failed to open default audio output")?;
973    let sink = rodio::Sink::try_new(&stream_handle).context("failed to create audio sink")?;
974    let display_uri = frame.uri.as_deref().unwrap_or("<unknown>");
975
976    if let Some(end) = end_seconds {
977        let effective_end = if duration_meta > 0.0 {
978            end.min(duration_meta)
979        } else {
980            end
981        };
982        let duration = (effective_end - start).max(0.0);
983        if duration <= 0.0 {
984            bail!("playback duration is zero; adjust start/end seconds");
985        }
986        let source = decoder
987            .skip_duration(Duration::from_secs_f32(start))
988            .take_duration(Duration::from_secs_f32(duration));
989        sink.append(source);
990        let segment_desc = format!("{start:.2}s → {effective_end:.2}s");
991        announce_playback(display_uri, &segment_desc);
992    } else {
993        let source = decoder.skip_duration(Duration::from_secs_f32(start));
994        sink.append(source);
995        let segment_desc = format!("{start:.2}s → end");
996        announce_playback(display_uri, &segment_desc);
997    }
998    sink.sleep_until_end();
999    Ok(())
1000}
1001
1002#[cfg(feature = "audio-playback")]
1003fn announce_playback(uri: &str, segment_desc: &str) {
1004    println!("Playing {uri} ({segment_desc})");
1005}
1006
1007fn is_image_mime(value: &str) -> bool {
1008    let normalized = value.split(';').next().unwrap_or(value).trim();
1009    normalized.to_ascii_lowercase().starts_with("image/")
1010}
1011
1012fn is_audio_mime(value: &str) -> bool {
1013    let normalized = value.split(';').next().unwrap_or(value).trim();
1014    normalized.to_ascii_lowercase().starts_with("audio/")
1015}
1016
1017pub fn extension_from_mime(mime: &str) -> Option<&'static str> {
1018    let normalized = mime
1019        .split(';')
1020        .next()
1021        .unwrap_or(mime)
1022        .trim()
1023        .to_ascii_lowercase();
1024    match normalized.as_str() {
1025        "image/jpeg" | "image/jpg" => Some("jpg"),
1026        "image/png" => Some("png"),
1027        "image/gif" => Some("gif"),
1028        "image/webp" => Some("webp"),
1029        "image/bmp" => Some("bmp"),
1030        "image/tiff" => Some("tiff"),
1031        "image/x-icon" | "image/vnd.microsoft.icon" => Some("ico"),
1032        "image/svg+xml" => Some("svg"),
1033        "video/mp4" | "video/iso.segment" => Some("mp4"),
1034        "video/quicktime" => Some("mov"),
1035        "video/webm" => Some("webm"),
1036        "video/x-matroska" | "video/matroska" => Some("mkv"),
1037        "video/x-msvideo" => Some("avi"),
1038        "video/mpeg" => Some("mpg"),
1039        "application/pdf" => Some("pdf"),
1040        "audio/mpeg" | "audio/mp3" => Some("mp3"),
1041        "audio/wav" | "audio/x-wav" => Some("wav"),
1042        "audio/x-flac" | "audio/flac" => Some("flac"),
1043        "audio/ogg" | "audio/vorbis" => Some("ogg"),
1044        "audio/x-m4a" | "audio/mp4" => Some("m4a"),
1045        "audio/aac" => Some("aac"),
1046        "audio/x-aiff" | "audio/aiff" => Some("aiff"),
1047        "text/plain" => Some("txt"),
1048        "text/markdown" | "text/x-markdown" => Some("md"),
1049        "text/html" => Some("html"),
1050        "application/xhtml+xml" => Some("xhtml"),
1051        "application/json" | "text/json" | "application/vnd.api+json" => Some("json"),
1052        "application/xml" | "text/xml" => Some("xml"),
1053        "text/csv" | "application/csv" => Some("csv"),
1054        "application/javascript" | "text/javascript" => Some("js"),
1055        "text/css" => Some("css"),
1056        "application/yaml" | "application/x-yaml" | "text/yaml" => Some("yaml"),
1057        "application/rtf" => Some("rtf"),
1058        "application/msword" => Some("doc"),
1059        "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => Some("docx"),
1060        "application/vnd.ms-powerpoint" => Some("ppt"),
1061        "application/vnd.openxmlformats-officedocument.presentationml.presentation" => Some("pptx"),
1062        "application/vnd.ms-excel" => Some("xls"),
1063        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => Some("xlsx"),
1064        "application/zip" => Some("zip"),
1065        "application/x-tar" => Some("tar"),
1066        "application/x-7z-compressed" => Some("7z"),
1067        _ => None,
1068    }
1069}
1070pub fn search_snippet(text: Option<&String>) -> Option<String> {
1071    text.and_then(|value| {
1072        let trimmed = value.trim();
1073        if trimmed.is_empty() {
1074            None
1075        } else {
1076            Some(trimmed.chars().take(160).collect())
1077        }
1078    })
1079}
1080pub fn frame_to_json(frame: &Frame) -> serde_json::Value {
1081    json!({
1082        "id": frame.id,
1083        "status": frame_status_str(frame.status),
1084        "timestamp": frame.timestamp,
1085        "kind": frame.kind,
1086        "track": frame.track,
1087        "uri": frame.uri,
1088        "title": frame.title,
1089        "payload_length": frame.payload_length,
1090        "canonical_encoding": format!("{:?}", frame.canonical_encoding),
1091        "canonical_length": frame.canonical_length,
1092        "role": format!("{:?}", frame.role),
1093        "parent_id": frame.parent_id,
1094        "chunk_index": frame.chunk_index,
1095        "chunk_count": frame.chunk_count,
1096        "tags": frame.tags,
1097        "labels": frame.labels,
1098        "search_text": frame.search_text,
1099        "metadata": frame.metadata,
1100        "extra_metadata": frame.extra_metadata,
1101        "content_dates": frame.content_dates,
1102        "chunk_manifest": frame.chunk_manifest,
1103        "supersedes": frame.supersedes,
1104        "superseded_by": frame.superseded_by,
1105    })
1106}
1107pub fn print_frame_summary(mem: &mut Memvid, frame: &Frame) -> Result<()> {
1108    println!("Frame {} [{}]", frame.id, frame_status_str(frame.status));
1109    println!("Timestamp: {}", frame.timestamp);
1110    if let Some(uri) = &frame.uri {
1111        println!("URI: {uri}");
1112    }
1113    if let Some(title) = &frame.title {
1114        println!("Title: {title}");
1115    }
1116    if let Some(kind) = &frame.kind {
1117        println!("Kind: {kind}");
1118    }
1119    if let Some(track) = &frame.track {
1120        println!("Track: {track}");
1121    }
1122    if let Some(supersedes) = frame.supersedes {
1123        println!("Supersedes frame: {supersedes}");
1124    }
1125    if let Some(successor) = frame.superseded_by {
1126        println!("Superseded by frame: {successor}");
1127    }
1128    println!(
1129        "Payload: {} bytes (canonical {:?}, logical {:?})",
1130        frame.payload_length, frame.canonical_encoding, frame.canonical_length
1131    );
1132    if !frame.tags.is_empty() {
1133        println!("Tags: {}", frame.tags.join(", "));
1134    }
1135    if !frame.labels.is_empty() {
1136        println!("Labels: {}", frame.labels.join(", "));
1137    }
1138    if let Some(snippet) = search_snippet(frame.search_text.as_ref()) {
1139        println!("Search text: {snippet}");
1140    }
1141    if let Some(meta) = &frame.metadata {
1142        let rendered = serde_json::to_string_pretty(meta)?;
1143        println!("Metadata: {rendered}");
1144    }
1145    if !frame.extra_metadata.is_empty() {
1146        let mut entries: Vec<_> = frame.extra_metadata.iter().collect();
1147        entries.sort_by(|a, b| a.0.cmp(b.0));
1148        println!("Extra metadata:");
1149        for (key, value) in entries {
1150            println!("  {key}: {value}");
1151        }
1152    }
1153    if !frame.content_dates.is_empty() {
1154        println!("Content dates: {}", frame.content_dates.join(", "));
1155    }
1156    match mem.frame_embedding(frame.id) {
1157        Ok(Some(embedding)) => println!("Embedding: {} dimensions", embedding.len()),
1158        Ok(None) => println!("Embedding: none"),
1159        Err(err) => println!("Embedding: unavailable ({err})"),
1160    }
1161    Ok(())
1162}
1163fn canonical_text_for_view(mem: &mut Memvid, frame: &Frame) -> Result<String> {
1164    let bytes = mem.frame_canonical_payload(frame.id)?;
1165    let raw = match String::from_utf8(bytes) {
1166        Ok(text) => text,
1167        Err(err) => {
1168            let bytes = err.into_bytes();
1169            String::from_utf8_lossy(&bytes).into_owned()
1170        }
1171    };
1172
1173    Ok(normalize_text(&raw, usize::MAX)
1174        .map(|n| n.text)
1175        .unwrap_or_default())
1176}
1177
1178fn manifests_match_text(text: &str, manifest: &TextChunkManifest) -> bool {
1179    if manifest.chunk_chars == 0 || manifest.chunks.is_empty() {
1180        return false;
1181    }
1182    let total_chars = text.chars().count();
1183    manifest
1184        .chunks
1185        .iter()
1186        .all(|chunk| chunk.start <= chunk.end && chunk.end <= total_chars)
1187}
1188
1189fn canonical_manifest_from_frame(text: &str, frame: &Frame) -> Option<TextChunkManifest> {
1190    let primary = frame
1191        .chunk_manifest
1192        .clone()
1193        .filter(|manifest| manifests_match_text(text, manifest));
1194    if primary.is_some() {
1195        return primary;
1196    }
1197
1198    frame
1199        .extra_metadata
1200        .get(CHUNK_MANIFEST_KEY)
1201        .and_then(|raw| serde_json::from_str::<TextChunkManifest>(raw).ok())
1202        .filter(|manifest| manifests_match_text(text, manifest))
1203}
1204
1205fn compute_chunk_manifest(text: &str, chunk_chars: usize) -> TextChunkManifest {
1206    let normalized = normalize_text(text, usize::MAX)
1207        .map(|n| n.text)
1208        .unwrap_or_default();
1209
1210    let effective_chunk = chunk_chars.max(1);
1211    let total_chars = normalized.chars().count();
1212    if total_chars == 0 {
1213        return TextChunkManifest {
1214            chunk_chars: effective_chunk,
1215            chunks: vec![TextChunkRange { start: 0, end: 0 }],
1216        };
1217    }
1218    if total_chars <= effective_chunk {
1219        return TextChunkManifest {
1220            chunk_chars: effective_chunk,
1221            chunks: vec![TextChunkRange {
1222                start: 0,
1223                end: total_chars,
1224            }],
1225        };
1226    }
1227    let mut chunks = Vec::new();
1228    let mut start = 0usize;
1229    while start < total_chars {
1230        let end = (start + effective_chunk).min(total_chars);
1231        chunks.push(TextChunkRange { start, end });
1232        start = end;
1233    }
1234    TextChunkManifest {
1235        chunk_chars: effective_chunk,
1236        chunks,
1237    }
1238}
1239
1240fn extract_chunk_slice(text: &str, range: &TextChunkRange) -> String {
1241    if range.start >= range.end || text.is_empty() {
1242        return String::new();
1243    }
1244    let mut start_byte = text.len();
1245    let mut end_byte = text.len();
1246    let mut idx = 0usize;
1247    for (byte_offset, _) in text.char_indices() {
1248        if idx == range.start {
1249            start_byte = byte_offset;
1250        }
1251        if idx == range.end {
1252            end_byte = byte_offset;
1253            break;
1254        }
1255        idx += 1;
1256    }
1257    if start_byte == text.len() {
1258        return String::new();
1259    }
1260    if end_byte == text.len() {
1261        end_byte = text.len();
1262    }
1263    text[start_byte..end_byte].to_string()
1264}