memvid_cli/commands/
inspection.rs

1//! Inspection command handlers (view, stats, who)
2
3#[cfg(feature = "audio-playback")]
4use std::io::Cursor;
5use std::io::{self, Write};
6use std::path::{Path, PathBuf};
7use std::process::Command;
8#[cfg(feature = "audio-playback")]
9use std::time::Duration;
10
11use anyhow::{anyhow, bail, Context, Result};
12use clap::Args;
13use memvid_core::table::list_tables;
14use memvid_core::{
15    lockfile, normalize_text, Frame, FrameRole, MediaManifest, Memvid, TextChunkManifest,
16    TextChunkRange,
17};
18use serde_json::{json, Value};
19use tempfile::Builder;
20use tracing::warn;
21use uuid::Uuid;
22
23use crate::config::CliConfig;
24use crate::utils::{
25    format_bytes, format_percent, format_timestamp_ms, frame_status_str, open_read_only_mem,
26    owner_hint_to_json, parse_timecode, round_percent, select_frame, yes_no,
27};
28
29const DEFAULT_VIEW_PAGE_CHARS: usize = 1_200;
30const CHUNK_MANIFEST_KEY: &str = "memvid_chunks_v1";
31
32/// Arguments for the `view` subcommand
33#[derive(Args)]
34pub struct ViewArgs {
35    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
36    pub file: PathBuf,
37    #[arg(long = "frame-id", value_name = "ID", conflicts_with = "uri")]
38    pub frame_id: Option<u64>,
39    #[arg(long, value_name = "URI", conflicts_with = "frame_id")]
40    pub uri: Option<String>,
41    #[arg(long)]
42    pub json: bool,
43    #[arg(long, conflicts_with = "json")]
44    pub binary: bool,
45    #[arg(long, conflicts_with_all = ["json", "binary"])]
46    pub preview: bool,
47    /// Optional start time for video previews (HH:MM:SS[.mmm])
48    #[arg(
49        long = "start",
50        value_name = "HH:MM:SS",
51        requires = "preview",
52        conflicts_with_all = ["json", "binary", "play"]
53    )]
54    pub preview_start: Option<String>,
55    /// Optional end time for video previews (HH:MM:SS[.mmm])
56    #[arg(
57        long = "end",
58        value_name = "HH:MM:SS",
59        requires = "preview",
60        conflicts_with_all = ["json", "binary", "play"]
61    )]
62    pub preview_end: Option<String>,
63    #[arg(long = "play", conflicts_with_all = ["json", "binary", "preview"])]
64    pub play: bool,
65    #[arg(long = "start-seconds", requires = "play")]
66    pub start_seconds: Option<f32>,
67    #[arg(long = "end-seconds", requires = "play")]
68    pub end_seconds: Option<f32>,
69    #[arg(long, value_name = "N", default_value_t = 1)]
70    pub page: usize,
71    #[arg(long = "page-size", value_name = "CHARS")]
72    pub page_size: Option<usize>,
73}
74
75/// Arguments for the `stats` subcommand
76#[derive(Args)]
77pub struct StatsArgs {
78    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
79    pub file: PathBuf,
80    #[arg(long)]
81    pub json: bool,
82    /// Replay: Show stats for frames with ID <= AS_OF_FRAME (time-travel view)
83    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
84    pub as_of_frame: Option<u64>,
85    /// Replay: Show stats for frames with timestamp <= AS_OF_TS (time-travel view)
86    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
87    pub as_of_ts: Option<i64>,
88}
89
90/// Arguments for the `who` subcommand
91#[derive(Args)]
92pub struct WhoArgs {
93    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
94    pub file: PathBuf,
95    #[arg(long)]
96    pub json: bool,
97}
98
99/// Handler for `memvid stats`
100pub fn handle_stats(_config: &CliConfig, args: StatsArgs) -> Result<()> {
101    let mut mem = Memvid::open_read_only(&args.file)?;
102    let stats = mem.stats()?;
103    let tables = list_tables(&mut mem).unwrap_or_default();
104
105    // Note: Replay filtering for stats is currently not implemented
106    // The stats show the full memory state
107    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
108        eprintln!("Note: Replay filtering (--as-of-frame/--as-of-ts) shows current stats.");
109        eprintln!("      Use 'find' or 'timeline' commands for filtered results.");
110    }
111    let overhead_bytes = stats.size_bytes.saturating_sub(stats.payload_bytes);
112    let payload_share_percent: f64 = if stats.size_bytes > 0 {
113        round_percent((stats.payload_bytes as f64 / stats.size_bytes as f64) * 100.0)
114    } else {
115        0.0
116    };
117    let overhead_share_percent: f64 = if stats.size_bytes > 0 {
118        round_percent((100.0 - payload_share_percent).max(0.0))
119    } else {
120        0.0
121    };
122    let maintenance_command = format!(
123        "memvid doctor {} --vacuum --rebuild-time-index --rebuild-lex-index",
124        args.file.display()
125    );
126
127    if args.json {
128        let mut raw_json = serde_json::to_value(&stats)?;
129        if let Value::Object(ref mut obj) = raw_json {
130            obj.remove("tier");
131        }
132
133        // Build tables list for JSON output
134        let tables_json: Vec<serde_json::Value> = tables
135            .iter()
136            .map(|t| {
137                json!({
138                    "table_id": t.table_id,
139                    "source_file": t.source_file,
140                    "n_rows": t.n_rows,
141                    "n_cols": t.n_cols,
142                    "pages": format!("{}-{}", t.page_start, t.page_end),
143                    "quality": format!("{:?}", t.quality),
144                    "headers": t.headers,
145                })
146            })
147            .collect();
148
149        let report = json!({
150            "summary": {
151                "sequence": stats.seq_no,
152                "frames": format!("{} total ({} active)", stats.frame_count, stats.active_frame_count),
153                "usage": format!(
154                    "{} used / {} total ({})",
155                    format_bytes(stats.size_bytes),
156                    format_bytes(stats.capacity_bytes),
157                    format_percent(stats.storage_utilisation_percent)
158                ),
159                "remaining": format!("{} free", format_bytes(stats.remaining_capacity_bytes)),
160            },
161            "storage": {
162                "payload": format!("{} ({})", format_bytes(stats.payload_bytes), format_percent(payload_share_percent)),
163                "overhead": format!("{} ({}) - WAL + indexes", format_bytes(overhead_bytes), format_percent(overhead_share_percent)),
164                "logical_payload": format!("{} before compression", format_bytes(stats.logical_bytes)),
165                "compression_savings": format!("{} saved ({})", format_bytes(stats.saved_bytes), format_percent(stats.savings_percent)),
166                "compression_ratio": format_percent(stats.compression_ratio_percent),
167            },
168            "frames": {
169                "average_stored": format_bytes(stats.average_frame_payload_bytes),
170                "average_logical": format_bytes(stats.average_frame_logical_bytes),
171                "clip_images": stats.clip_image_count,
172            },
173            "indexes": {
174                "lexical": yes_no(stats.has_lex_index),
175                "vector": yes_no(stats.has_vec_index),
176                "time": yes_no(stats.has_time_index),
177            },
178            "tables": {
179                "count": tables.len(),
180                "tables": tables_json,
181            },
182            "maintenance": maintenance_command,
183            "raw": raw_json,
184        });
185
186        println!("{}", serde_json::to_string_pretty(&report)?);
187    } else {
188        let seq_display = stats
189            .seq_no
190            .map(|seq| seq.to_string())
191            .unwrap_or_else(|| "n/a".to_string());
192
193        println!("Memory: {}", args.file.display());
194        println!("Sequence: {}", seq_display);
195        println!(
196            "Frames: {} total ({} active)",
197            stats.frame_count, stats.active_frame_count
198        );
199
200        println!("\nCapacity:");
201        println!(
202            "  Usage: {} used / {} total ({})",
203            format_bytes(stats.size_bytes),
204            format_bytes(stats.capacity_bytes),
205            format_percent(stats.storage_utilisation_percent)
206        );
207        println!(
208            "  Remaining: {}",
209            format_bytes(stats.remaining_capacity_bytes)
210        );
211
212        println!("\nStorage breakdown:");
213        println!(
214            "  Payload: {} ({})",
215            format_bytes(stats.payload_bytes),
216            format_percent(payload_share_percent)
217        );
218        println!(
219            "  Overhead: {} ({})",
220            format_bytes(overhead_bytes),
221            format_percent(overhead_share_percent)
222        );
223        // PHASE 2: Detailed overhead breakdown for observability
224        println!("    ├─ WAL: {}", format_bytes(stats.wal_bytes));
225        println!(
226            "    ├─ Lexical index: {}",
227            format_bytes(stats.lex_index_bytes)
228        );
229        println!(
230            "    ├─ Vector index: {}",
231            format_bytes(stats.vec_index_bytes)
232        );
233        println!(
234            "    └─ Time index: {}",
235            format_bytes(stats.time_index_bytes)
236        );
237        println!(
238            "  Logical payload: {} before compression",
239            format_bytes(stats.logical_bytes)
240        );
241        println!(
242            "  Compression savings: {} ({})",
243            format_bytes(stats.saved_bytes),
244            format_percent(stats.savings_percent)
245        );
246
247        println!("\nAverage frame:");
248        println!(
249            "  Stored: {}   Logical: {}",
250            format_bytes(stats.average_frame_payload_bytes),
251            format_bytes(stats.average_frame_logical_bytes)
252        );
253        if stats.clip_image_count > 0 {
254            println!("  CLIP images: {}", stats.clip_image_count);
255        }
256
257        // PHASE 2: Per-document cost analysis
258        if stats.active_frame_count > 0 {
259            let overhead_per_doc = overhead_bytes / stats.active_frame_count;
260            let lex_per_doc = stats.lex_index_bytes / stats.active_frame_count;
261            let vec_per_doc = stats.vec_index_bytes / stats.active_frame_count;
262
263            println!("\nPer-document overhead:");
264            println!("  Total: {}", format_bytes(overhead_per_doc));
265            if stats.has_lex_index {
266                println!("  Lexical: {}", format_bytes(lex_per_doc));
267            }
268            if stats.has_vec_index {
269                let vec_ratio = if stats.average_frame_payload_bytes > 0 {
270                    vec_per_doc as f64 / stats.average_frame_payload_bytes as f64
271                } else {
272                    0.0
273                };
274                println!(
275                    "  Vector: {} ({:.0}x text size)",
276                    format_bytes(vec_per_doc),
277                    vec_ratio
278                );
279            }
280        }
281
282        println!("\nIndexes:");
283        println!(
284            "  Lexical: {}   Vector: {}   Time: {}",
285            yes_no(stats.has_lex_index),
286            yes_no(stats.has_vec_index),
287            yes_no(stats.has_time_index)
288        );
289
290        if !tables.is_empty() {
291            println!("\nTables: {} extracted", tables.len());
292            for t in &tables {
293                println!(
294                    "  {} — {} rows × {} cols ({})",
295                    t.table_id, t.n_rows, t.n_cols, t.source_file
296                );
297            }
298        }
299
300        println!("\nMaintenance:");
301        println!(
302            "  Run `{}` to rebuild indexes and reclaim space.",
303            maintenance_command
304        );
305    }
306    Ok(())
307}
308
309/// Handler for `memvid who`
310pub fn handle_who(args: WhoArgs) -> Result<()> {
311    match lockfile::current_owner(&args.file)? {
312        Some(owner) => {
313            if args.json {
314                let output = json!({
315                    "locked": true,
316                    "owner": owner_hint_to_json(&owner),
317                });
318                println!("{}", serde_json::to_string_pretty(&output)?);
319            } else {
320                println!("{} is locked by:", args.file.display());
321                if let Some(pid) = owner.pid {
322                    println!("  pid: {pid}");
323                }
324                if let Some(cmd) = owner.cmd.as_deref() {
325                    println!("  cmd: {cmd}");
326                }
327                if let Some(started) = owner.started_at.as_deref() {
328                    println!("  started_at: {started}");
329                }
330                if let Some(last) = owner.last_heartbeat.as_deref() {
331                    println!("  last_heartbeat: {last}");
332                }
333                if let Some(interval) = owner.heartbeat_ms {
334                    println!("  heartbeat_interval_ms: {interval}");
335                }
336                if let Some(file_id) = owner.file_id.as_deref() {
337                    println!("  file_id: {file_id}");
338                }
339                if let Some(path) = owner.file_path.as_ref() {
340                    println!("  file_path: {}", path.display());
341                }
342            }
343        }
344        None => {
345            if args.json {
346                let output = json!({"locked": false});
347                println!("{}", serde_json::to_string_pretty(&output)?);
348            } else {
349                println!("No active writer for {}", args.file.display());
350            }
351        }
352    }
353    Ok(())
354}
355
356// ============================================================================
357// View command handler and helpers
358// ============================================================================
359
360/// Handler for `memvid view`
361pub fn handle_view(args: ViewArgs) -> Result<()> {
362    if args.page == 0 {
363        bail!("page must be greater than zero");
364    }
365    if let Some(size) = args.page_size {
366        if size == 0 {
367            bail!("page-size must be greater than zero");
368        }
369    }
370
371    let mut mem = open_read_only_mem(&args.file)?;
372    let frame = select_frame(&mut mem, args.frame_id, args.uri.as_deref())?;
373
374    if args.play {
375        #[cfg(feature = "audio-playback")]
376        {
377            play_frame_audio(&mut mem, &frame, args.start_seconds, args.end_seconds)?;
378            return Ok(());
379        }
380        #[cfg(not(feature = "audio-playback"))]
381        {
382            bail!("Audio playback requires the 'audio-playback' feature (only available on macOS)");
383        }
384    }
385
386    if args.preview {
387        let bounds = parse_preview_bounds(args.preview_start.as_ref(), args.preview_end.as_ref())?;
388        preview_frame_media(&mut mem, &frame, args.uri.as_deref(), bounds)?;
389        return Ok(());
390    }
391
392    if args.binary {
393        let bytes = mem.frame_canonical_payload(frame.id)?;
394        let mut stdout = io::stdout();
395        stdout.write_all(&bytes)?;
396        stdout.flush()?;
397        return Ok(());
398    }
399
400    let canonical_text = canonical_text_for_view(&mut mem, &frame)?;
401    let manifest_from_meta = canonical_manifest_from_frame(&canonical_text, &frame);
402
403    let page_size = args
404        .page_size
405        .or_else(|| manifest_from_meta.as_ref().map(|m| m.chunk_chars))
406        .unwrap_or(DEFAULT_VIEW_PAGE_CHARS);
407
408    let mut manifest = if args.page_size.is_none() {
409        manifest_from_meta.unwrap_or_else(|| compute_chunk_manifest(&canonical_text, page_size))
410    } else {
411        compute_chunk_manifest(&canonical_text, page_size)
412    };
413    if manifest.chunks.is_empty() {
414        manifest = TextChunkManifest {
415            chunk_chars: page_size,
416            chunks: vec![TextChunkRange {
417                start: 0,
418                end: canonical_text.chars().count(),
419            }],
420        };
421    }
422
423    if frame.role == FrameRole::DocumentChunk && args.page_size.is_none() {
424        let total_chars = canonical_text.chars().count();
425        manifest = TextChunkManifest {
426            chunk_chars: total_chars.max(1),
427            chunks: vec![TextChunkRange {
428                start: 0,
429                end: total_chars,
430            }],
431        };
432    }
433
434    let total_pages = manifest.chunks.len().max(1);
435    if args.page > total_pages {
436        bail!(
437            "page {} is out of range (total pages: {})",
438            args.page,
439            total_pages
440        );
441    }
442
443    let chunk = &manifest.chunks[args.page - 1];
444    let content = extract_chunk_slice(&canonical_text, chunk);
445
446    if args.json {
447        let mut frame_json = frame_to_json(&frame);
448        if let Some(obj) = frame_json.as_object_mut() {
449            // Note: Do NOT overwrite search_text - it contains the extracted text from the document.
450            // The "content" field shows the paginated payload view.
451            if let Some(manifest_json) = obj.get_mut("chunk_manifest") {
452                if let Some(manifest_obj) = manifest_json.as_object_mut() {
453                    let total = manifest.chunks.len();
454                    if total > 0 {
455                        let mut window = serde_json::Map::new();
456                        let idx = args.page.saturating_sub(1).min(total - 1);
457                        if idx > 0 {
458                            let prev = &manifest.chunks[idx - 1];
459                            window.insert("prev".into(), json!([prev.start, prev.end]));
460                        }
461                        let current = &manifest.chunks[idx];
462                        window.insert("current".into(), json!([current.start, current.end]));
463                        if idx + 1 < total {
464                            let next = &manifest.chunks[idx + 1];
465                            window.insert("next".into(), json!([next.start, next.end]));
466                        }
467                        manifest_obj.insert("chunks".into(), Value::Object(window));
468                    }
469                }
470            }
471        }
472        let json = json!({
473            "frame": frame_json,
474            "page": args.page,
475            "page_size": manifest.chunk_chars,
476            "page_count": total_pages,
477            "has_prev": args.page > 1,
478            "has_next": args.page < total_pages,
479            "content": content,
480        });
481        println!("{}", serde_json::to_string_pretty(&json)?);
482    } else {
483        print_frame_summary(&mut mem, &frame)?;
484        println!(
485            "Page {}/{} ({} chars per page)",
486            args.page, total_pages, manifest.chunk_chars
487        );
488        println!();
489        println!("{}", content);
490    }
491    Ok(())
492}
493
494#[derive(Debug)]
495pub struct PreviewBounds {
496    pub start_ms: Option<u64>,
497    pub end_ms: Option<u64>,
498}
499
500pub fn parse_preview_bounds(
501    start: Option<&String>,
502    end: Option<&String>,
503) -> Result<Option<PreviewBounds>> {
504    let start_ms = match start {
505        Some(value) => Some(parse_timecode(value)?),
506        None => None,
507    };
508    let end_ms = match end {
509        Some(value) => Some(parse_timecode(value)?),
510        None => None,
511    };
512
513    if let (Some(s), Some(e)) = (start_ms, end_ms) {
514        if e <= s {
515            anyhow::bail!("--end must be greater than --start");
516        }
517    }
518
519    if start_ms.is_none() && end_ms.is_none() {
520        Ok(None)
521    } else {
522        Ok(Some(PreviewBounds { start_ms, end_ms }))
523    }
524}
525
526fn preview_frame_media(
527    mem: &mut Memvid,
528    frame: &Frame,
529    cli_uri: Option<&str>,
530    bounds: Option<PreviewBounds>,
531) -> Result<()> {
532    let manifest = mem.media_manifest(frame.id)?;
533    let mut mime = manifest
534        .as_ref()
535        .map(|m| m.mime.clone())
536        .or_else(|| frame.metadata.as_ref().and_then(|meta| meta.mime.clone()))
537        .unwrap_or_else(|| "application/octet-stream".to_string());
538
539    // If mime is generic, try to detect from payload bytes
540    if mime == "application/octet-stream" {
541        if let Ok(bytes) = mem.frame_canonical_payload(frame.id) {
542            if let Some(kind) = infer::get(&bytes) {
543                mime = kind.mime_type().to_string();
544            }
545        }
546    }
547
548    let is_video = manifest
549        .as_ref()
550        .map(|media| media.kind.eq_ignore_ascii_case("video"))
551        .unwrap_or_else(|| mime.starts_with("video/"));
552
553    if is_video {
554        preview_frame_video(mem, frame, cli_uri, bounds, manifest, &mime)?;
555    } else {
556        if bounds.is_some() {
557            anyhow::bail!("--start/--end are only supported for video previews");
558        }
559        if is_image_mime(&mime) {
560            preview_frame_image(mem, frame, cli_uri)?;
561        } else if is_audio_mime(&mime) {
562            preview_frame_audio_file(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
563        } else {
564            preview_frame_document(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
565        }
566    }
567    Ok(())
568}
569
570fn preview_frame_video(
571    mem: &mut Memvid,
572    frame: &Frame,
573    cli_uri: Option<&str>,
574    bounds: Option<PreviewBounds>,
575    manifest: Option<MediaManifest>,
576    mime: &str,
577) -> Result<()> {
578    let extension = manifest
579        .as_ref()
580        .and_then(|m| m.filename.as_deref())
581        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
582        .map(|ext| ext.trim_start_matches('.').to_ascii_lowercase())
583        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
584        .unwrap_or_else(|| "mp4".to_string());
585
586    let mut temp_file = Builder::new()
587        .prefix("memvid-preview-")
588        .suffix(&format!(".{extension}"))
589        .tempfile_in(std::env::temp_dir())
590        .context("failed to create temporary preview file")?;
591
592    let mut reader = mem
593        .blob_reader(frame.id)
594        .context("failed to stream payload for preview")?;
595    io::copy(&mut reader, &mut temp_file).context("failed to write video data to preview file")?;
596    temp_file
597        .flush()
598        .context("failed to flush video preview to disk")?;
599
600    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
601    drop(file);
602
603    let mut display_path = preview_path.clone();
604    if let Some(ref span) = bounds {
605        let needs_trim = span.start_ms.is_some() || span.end_ms.is_some();
606        if needs_trim {
607            if let Some(trimmed) = maybe_trim_with_ffmpeg(&preview_path, &extension, span)? {
608                display_path = trimmed;
609            }
610        }
611    }
612
613    println!("Opening preview...");
614    open::that(&display_path).with_context(|| {
615        format!(
616            "failed to launch default video player for {}",
617            display_path.display()
618        )
619    })?;
620
621    let display_uri = cli_uri
622        .or_else(|| frame.uri.as_deref())
623        .unwrap_or("<unknown>");
624    println!(
625        "Opened preview for {} (frame {}) -> {} ({})",
626        display_uri,
627        frame.id,
628        display_path.display(),
629        mime
630    );
631    Ok(())
632}
633
634fn maybe_trim_with_ffmpeg(
635    source: &Path,
636    extension: &str,
637    bounds: &PreviewBounds,
638) -> Result<Option<PathBuf>> {
639    if bounds.start_ms.is_none() && bounds.end_ms.is_none() {
640        return Ok(None);
641    }
642
643    let ffmpeg = match which::which("ffmpeg") {
644        Ok(path) => path,
645        Err(_) => {
646            warn!("ffmpeg binary not found on PATH; opening full video");
647            return Ok(None);
648        }
649    };
650
651    let target = std::env::temp_dir().join(format!(
652        "memvid-preview-clip-{}.{}",
653        Uuid::new_v4(),
654        extension
655    ));
656
657    let mut command = Command::new(ffmpeg);
658    command.arg("-y");
659    if let Some(start) = bounds.start_ms {
660        command.arg("-ss").arg(format_timestamp_ms(start));
661    }
662    command.arg("-i").arg(source);
663    if let Some(end) = bounds.end_ms {
664        command.arg("-to").arg(format_timestamp_ms(end));
665    }
666    command.arg("-c").arg("copy");
667    command.arg(&target);
668
669    let status = command
670        .status()
671        .context("failed to run ffmpeg for preview trimming")?;
672    if status.success() {
673        return Ok(Some(target));
674    }
675
676    let details = status
677        .code()
678        .map(|code| code.to_string())
679        .unwrap_or_else(|| "terminated".to_string());
680    warn!("ffmpeg exited with status {details}; opening full video");
681    Ok(None)
682}
683
684fn preview_frame_image(mem: &mut Memvid, frame: &Frame, cli_uri: Option<&str>) -> Result<()> {
685    let bytes = mem
686        .frame_canonical_payload(frame.id)
687        .context("failed to load canonical payload for frame")?;
688    if bytes.is_empty() {
689        bail!("frame payload is empty; nothing to preview");
690    }
691
692    let detected_kind = infer::get(&bytes);
693    let mut mime = frame
694        .metadata
695        .as_ref()
696        .and_then(|meta| meta.mime.clone())
697        .filter(|value| is_image_mime(value));
698
699    if mime.is_none() {
700        if let Some(kind) = &detected_kind {
701            let candidate = kind.mime_type();
702            if is_image_mime(candidate) {
703                mime = Some(candidate.to_string());
704            }
705        }
706    }
707
708    let mime = mime.ok_or_else(|| anyhow!("frame does not contain an image payload"))?;
709    if !is_image_mime(&mime) {
710        bail!("frame mime type {mime} is not an image");
711    }
712
713    let extension = detected_kind
714        .as_ref()
715        .map(|kind| kind.extension().to_string())
716        .or_else(|| extension_from_mime(&mime).map(|ext| ext.to_string()))
717        .unwrap_or_else(|| "img".to_string());
718
719    let suffix = format!(".{extension}");
720    let mut temp_file = Builder::new()
721        .prefix("memvid-preview-")
722        .suffix(&suffix)
723        .tempfile_in(std::env::temp_dir())
724        .context("failed to create temporary preview file")?;
725    temp_file
726        .write_all(&bytes)
727        .context("failed to write image data to preview file")?;
728    temp_file
729        .flush()
730        .context("failed to flush preview file to disk")?;
731
732    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
733    drop(file);
734
735    println!("Opening preview...");
736    open::that(&preview_path).with_context(|| {
737        format!(
738            "failed to launch default image viewer for {}",
739            preview_path.display()
740        )
741    })?;
742
743    let display_uri = cli_uri
744        .or_else(|| frame.uri.as_deref())
745        .unwrap_or("<unknown>");
746    println!(
747        "Opened preview for {} (frame {}) -> {} ({})",
748        display_uri,
749        frame.id,
750        preview_path.display(),
751        mime
752    );
753    Ok(())
754}
755
756fn preview_frame_document(
757    mem: &mut Memvid,
758    frame: &Frame,
759    cli_uri: Option<&str>,
760    manifest: Option<&MediaManifest>,
761    mime: &str,
762) -> Result<()> {
763    let bytes = mem
764        .frame_canonical_payload(frame.id)
765        .context("failed to load canonical payload for frame")?;
766    if bytes.is_empty() {
767        bail!("frame payload is empty; nothing to preview");
768    }
769
770    let mut extension = manifest
771        .and_then(|m| m.filename.as_deref())
772        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
773        .map(|ext| ext.trim_start_matches('.').to_string())
774        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
775        .unwrap_or_else(|| "bin".to_string());
776
777    if extension == "bin" && std::str::from_utf8(&bytes).is_ok() {
778        extension = "txt".to_string();
779    }
780
781    let suffix = format!(".{extension}");
782    let mut temp_file = Builder::new()
783        .prefix("memvid-preview-")
784        .suffix(&suffix)
785        .tempfile_in(std::env::temp_dir())
786        .context("failed to create temporary preview file")?;
787    temp_file
788        .write_all(&bytes)
789        .context("failed to write document data to preview file")?;
790    temp_file
791        .flush()
792        .context("failed to flush preview file to disk")?;
793
794    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
795    drop(file);
796
797    println!("Opening preview...");
798    open::that(&preview_path).with_context(|| {
799        format!(
800            "failed to launch default viewer for {}",
801            preview_path.display()
802        )
803    })?;
804
805    let display_uri = cli_uri
806        .or_else(|| frame.uri.as_deref())
807        .unwrap_or("<unknown>");
808    println!(
809        "Opened preview for {} (frame {}) -> {} ({})",
810        display_uri,
811        frame.id,
812        preview_path.display(),
813        mime
814    );
815    Ok(())
816}
817
818fn preview_frame_audio_file(
819    mem: &mut Memvid,
820    frame: &Frame,
821    cli_uri: Option<&str>,
822    manifest: Option<&MediaManifest>,
823    mime: &str,
824) -> Result<()> {
825    let bytes = mem
826        .frame_canonical_payload(frame.id)
827        .context("failed to load canonical payload for frame")?;
828    if bytes.is_empty() {
829        bail!("frame payload is empty; nothing to preview");
830    }
831
832    let mut extension = manifest
833        .and_then(|m| m.filename.as_deref())
834        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
835        .map(|ext| ext.trim_start_matches('.').to_string())
836        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
837        .unwrap_or_else(|| "audio".to_string());
838
839    if extension == "bin" {
840        extension = "audio".to_string();
841    }
842
843    let suffix = format!(".{extension}");
844    let mut temp_file = Builder::new()
845        .prefix("memvid-preview-")
846        .suffix(&suffix)
847        .tempfile_in(std::env::temp_dir())
848        .context("failed to create temporary preview file")?;
849    temp_file
850        .write_all(&bytes)
851        .context("failed to write audio data to preview file")?;
852    temp_file
853        .flush()
854        .context("failed to flush preview file to disk")?;
855
856    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
857    drop(file);
858
859    println!("Opening preview...");
860    open::that(&preview_path).with_context(|| {
861        format!(
862            "failed to launch default audio player for {}",
863            preview_path.display()
864        )
865    })?;
866
867    let display_uri = cli_uri
868        .or_else(|| frame.uri.as_deref())
869        .unwrap_or("<unknown>");
870    println!(
871        "Opened preview for {} (frame {}) -> {} ({})",
872        display_uri,
873        frame.id,
874        preview_path.display(),
875        mime
876    );
877    Ok(())
878}
879
880#[cfg(feature = "audio-playback")]
881fn play_frame_audio(
882    mem: &mut Memvid,
883    frame: &Frame,
884    start_seconds: Option<f32>,
885    end_seconds: Option<f32>,
886) -> Result<()> {
887    use rodio::Source;
888
889    if let (Some(start), Some(end)) = (start_seconds, end_seconds) {
890        if end <= start {
891            bail!("--end-seconds must be greater than --start-seconds");
892        }
893    }
894
895    let bytes = mem
896        .frame_canonical_payload(frame.id)
897        .context("failed to load canonical payload for frame")?;
898    if bytes.is_empty() {
899        bail!("frame payload is empty; nothing to play");
900    }
901
902    let start = start_seconds.unwrap_or(0.0).max(0.0);
903    let duration_meta = frame
904        .metadata
905        .as_ref()
906        .and_then(|meta| meta.audio.as_ref())
907        .and_then(|audio| audio.duration_secs)
908        .unwrap_or(0.0);
909
910    if duration_meta > 0.0 && start >= duration_meta {
911        bail!("start-seconds ({start:.2}) exceeds audio duration ({duration_meta:.2})");
912    }
913
914    if let Some(end) = end_seconds {
915        if duration_meta > 0.0 && end > duration_meta + f32::EPSILON {
916            warn!(
917                "requested end-seconds {:.2} exceeds known duration {:.2}; clamping",
918                end, duration_meta
919            );
920        }
921    }
922
923    let cursor = Cursor::new(bytes);
924    let decoder = rodio::Decoder::new(cursor).context("failed to decode audio stream")?;
925    let (_stream, stream_handle) =
926        rodio::OutputStream::try_default().context("failed to open default audio output")?;
927    let sink = rodio::Sink::try_new(&stream_handle).context("failed to create audio sink")?;
928    let display_uri = frame.uri.as_deref().unwrap_or("<unknown>");
929
930    if let Some(end) = end_seconds {
931        let effective_end = if duration_meta > 0.0 {
932            end.min(duration_meta)
933        } else {
934            end
935        };
936        let duration = (effective_end - start).max(0.0);
937        if duration <= 0.0 {
938            bail!("playback duration is zero; adjust start/end seconds");
939        }
940        let source = decoder
941            .skip_duration(Duration::from_secs_f32(start))
942            .take_duration(Duration::from_secs_f32(duration));
943        sink.append(source);
944        let segment_desc = format!("{start:.2}s → {effective_end:.2}s");
945        announce_playback(display_uri, &segment_desc);
946    } else {
947        let source = decoder.skip_duration(Duration::from_secs_f32(start));
948        sink.append(source);
949        let segment_desc = format!("{start:.2}s → end");
950        announce_playback(display_uri, &segment_desc);
951    }
952    sink.sleep_until_end();
953    Ok(())
954}
955
956#[cfg(feature = "audio-playback")]
957fn announce_playback(uri: &str, segment_desc: &str) {
958    println!("Playing {uri} ({segment_desc})");
959}
960
961fn is_image_mime(value: &str) -> bool {
962    let normalized = value.split(';').next().unwrap_or(value).trim();
963    normalized.to_ascii_lowercase().starts_with("image/")
964}
965
966fn is_audio_mime(value: &str) -> bool {
967    let normalized = value.split(';').next().unwrap_or(value).trim();
968    normalized.to_ascii_lowercase().starts_with("audio/")
969}
970
971pub fn extension_from_mime(mime: &str) -> Option<&'static str> {
972    let normalized = mime
973        .split(';')
974        .next()
975        .unwrap_or(mime)
976        .trim()
977        .to_ascii_lowercase();
978    match normalized.as_str() {
979        "image/jpeg" | "image/jpg" => Some("jpg"),
980        "image/png" => Some("png"),
981        "image/gif" => Some("gif"),
982        "image/webp" => Some("webp"),
983        "image/bmp" => Some("bmp"),
984        "image/tiff" => Some("tiff"),
985        "image/x-icon" | "image/vnd.microsoft.icon" => Some("ico"),
986        "image/svg+xml" => Some("svg"),
987        "video/mp4" | "video/iso.segment" => Some("mp4"),
988        "video/quicktime" => Some("mov"),
989        "video/webm" => Some("webm"),
990        "video/x-matroska" | "video/matroska" => Some("mkv"),
991        "video/x-msvideo" => Some("avi"),
992        "video/mpeg" => Some("mpg"),
993        "application/pdf" => Some("pdf"),
994        "audio/mpeg" | "audio/mp3" => Some("mp3"),
995        "audio/wav" | "audio/x-wav" => Some("wav"),
996        "audio/x-flac" | "audio/flac" => Some("flac"),
997        "audio/ogg" | "audio/vorbis" => Some("ogg"),
998        "audio/x-m4a" | "audio/mp4" => Some("m4a"),
999        "audio/aac" => Some("aac"),
1000        "audio/x-aiff" | "audio/aiff" => Some("aiff"),
1001        "text/plain" => Some("txt"),
1002        "text/markdown" | "text/x-markdown" => Some("md"),
1003        "text/html" => Some("html"),
1004        "application/xhtml+xml" => Some("xhtml"),
1005        "application/json" | "text/json" | "application/vnd.api+json" => Some("json"),
1006        "application/xml" | "text/xml" => Some("xml"),
1007        "text/csv" | "application/csv" => Some("csv"),
1008        "application/javascript" | "text/javascript" => Some("js"),
1009        "text/css" => Some("css"),
1010        "application/yaml" | "application/x-yaml" | "text/yaml" => Some("yaml"),
1011        "application/rtf" => Some("rtf"),
1012        "application/msword" => Some("doc"),
1013        "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => Some("docx"),
1014        "application/vnd.ms-powerpoint" => Some("ppt"),
1015        "application/vnd.openxmlformats-officedocument.presentationml.presentation" => Some("pptx"),
1016        "application/vnd.ms-excel" => Some("xls"),
1017        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => Some("xlsx"),
1018        "application/zip" => Some("zip"),
1019        "application/x-tar" => Some("tar"),
1020        "application/x-7z-compressed" => Some("7z"),
1021        _ => None,
1022    }
1023}
1024pub fn search_snippet(text: Option<&String>) -> Option<String> {
1025    text.and_then(|value| {
1026        let trimmed = value.trim();
1027        if trimmed.is_empty() {
1028            None
1029        } else {
1030            Some(trimmed.chars().take(160).collect())
1031        }
1032    })
1033}
1034pub fn frame_to_json(frame: &Frame) -> serde_json::Value {
1035    json!({
1036        "id": frame.id,
1037        "status": frame_status_str(frame.status),
1038        "timestamp": frame.timestamp,
1039        "kind": frame.kind,
1040        "track": frame.track,
1041        "uri": frame.uri,
1042        "title": frame.title,
1043        "payload_length": frame.payload_length,
1044        "canonical_encoding": format!("{:?}", frame.canonical_encoding),
1045        "canonical_length": frame.canonical_length,
1046        "role": format!("{:?}", frame.role),
1047        "parent_id": frame.parent_id,
1048        "chunk_index": frame.chunk_index,
1049        "chunk_count": frame.chunk_count,
1050        "tags": frame.tags,
1051        "labels": frame.labels,
1052        "search_text": frame.search_text,
1053        "metadata": frame.metadata,
1054        "extra_metadata": frame.extra_metadata,
1055        "content_dates": frame.content_dates,
1056        "chunk_manifest": frame.chunk_manifest,
1057        "supersedes": frame.supersedes,
1058        "superseded_by": frame.superseded_by,
1059    })
1060}
1061pub fn print_frame_summary(mem: &mut Memvid, frame: &Frame) -> Result<()> {
1062    println!("Frame {} [{}]", frame.id, frame_status_str(frame.status));
1063    println!("Timestamp: {}", frame.timestamp);
1064    if let Some(uri) = &frame.uri {
1065        println!("URI: {uri}");
1066    }
1067    if let Some(title) = &frame.title {
1068        println!("Title: {title}");
1069    }
1070    if let Some(kind) = &frame.kind {
1071        println!("Kind: {kind}");
1072    }
1073    if let Some(track) = &frame.track {
1074        println!("Track: {track}");
1075    }
1076    if let Some(supersedes) = frame.supersedes {
1077        println!("Supersedes frame: {supersedes}");
1078    }
1079    if let Some(successor) = frame.superseded_by {
1080        println!("Superseded by frame: {successor}");
1081    }
1082    println!(
1083        "Payload: {} bytes (canonical {:?}, logical {:?})",
1084        frame.payload_length, frame.canonical_encoding, frame.canonical_length
1085    );
1086    if !frame.tags.is_empty() {
1087        println!("Tags: {}", frame.tags.join(", "));
1088    }
1089    if !frame.labels.is_empty() {
1090        println!("Labels: {}", frame.labels.join(", "));
1091    }
1092    if let Some(snippet) = search_snippet(frame.search_text.as_ref()) {
1093        println!("Search text: {snippet}");
1094    }
1095    if let Some(meta) = &frame.metadata {
1096        let rendered = serde_json::to_string_pretty(meta)?;
1097        println!("Metadata: {rendered}");
1098    }
1099    if !frame.extra_metadata.is_empty() {
1100        let mut entries: Vec<_> = frame.extra_metadata.iter().collect();
1101        entries.sort_by(|a, b| a.0.cmp(b.0));
1102        println!("Extra metadata:");
1103        for (key, value) in entries {
1104            println!("  {key}: {value}");
1105        }
1106    }
1107    if !frame.content_dates.is_empty() {
1108        println!("Content dates: {}", frame.content_dates.join(", "));
1109    }
1110    match mem.frame_embedding(frame.id) {
1111        Ok(Some(embedding)) => println!("Embedding: {} dimensions", embedding.len()),
1112        Ok(None) => println!("Embedding: none"),
1113        Err(err) => println!("Embedding: unavailable ({err})"),
1114    }
1115    Ok(())
1116}
1117fn canonical_text_for_view(mem: &mut Memvid, frame: &Frame) -> Result<String> {
1118    let bytes = mem.frame_canonical_payload(frame.id)?;
1119    let raw = match String::from_utf8(bytes) {
1120        Ok(text) => text,
1121        Err(err) => {
1122            let bytes = err.into_bytes();
1123            String::from_utf8_lossy(&bytes).into_owned()
1124        }
1125    };
1126
1127    Ok(normalize_text(&raw, usize::MAX)
1128        .map(|n| n.text)
1129        .unwrap_or_default())
1130}
1131
1132fn manifests_match_text(text: &str, manifest: &TextChunkManifest) -> bool {
1133    if manifest.chunk_chars == 0 || manifest.chunks.is_empty() {
1134        return false;
1135    }
1136    let total_chars = text.chars().count();
1137    manifest
1138        .chunks
1139        .iter()
1140        .all(|chunk| chunk.start <= chunk.end && chunk.end <= total_chars)
1141}
1142
1143fn canonical_manifest_from_frame(text: &str, frame: &Frame) -> Option<TextChunkManifest> {
1144    let primary = frame
1145        .chunk_manifest
1146        .clone()
1147        .filter(|manifest| manifests_match_text(text, manifest));
1148    if primary.is_some() {
1149        return primary;
1150    }
1151
1152    frame
1153        .extra_metadata
1154        .get(CHUNK_MANIFEST_KEY)
1155        .and_then(|raw| serde_json::from_str::<TextChunkManifest>(raw).ok())
1156        .filter(|manifest| manifests_match_text(text, manifest))
1157}
1158
1159fn compute_chunk_manifest(text: &str, chunk_chars: usize) -> TextChunkManifest {
1160    let normalized = normalize_text(text, usize::MAX)
1161        .map(|n| n.text)
1162        .unwrap_or_default();
1163
1164    let effective_chunk = chunk_chars.max(1);
1165    let total_chars = normalized.chars().count();
1166    if total_chars == 0 {
1167        return TextChunkManifest {
1168            chunk_chars: effective_chunk,
1169            chunks: vec![TextChunkRange { start: 0, end: 0 }],
1170        };
1171    }
1172    if total_chars <= effective_chunk {
1173        return TextChunkManifest {
1174            chunk_chars: effective_chunk,
1175            chunks: vec![TextChunkRange {
1176                start: 0,
1177                end: total_chars,
1178            }],
1179        };
1180    }
1181    let mut chunks = Vec::new();
1182    let mut start = 0usize;
1183    while start < total_chars {
1184        let end = (start + effective_chunk).min(total_chars);
1185        chunks.push(TextChunkRange { start, end });
1186        start = end;
1187    }
1188    TextChunkManifest {
1189        chunk_chars: effective_chunk,
1190        chunks,
1191    }
1192}
1193
1194fn extract_chunk_slice(text: &str, range: &TextChunkRange) -> String {
1195    if range.start >= range.end || text.is_empty() {
1196        return String::new();
1197    }
1198    let mut start_byte = text.len();
1199    let mut end_byte = text.len();
1200    let mut idx = 0usize;
1201    for (byte_offset, _) in text.char_indices() {
1202        if idx == range.start {
1203            start_byte = byte_offset;
1204        }
1205        if idx == range.end {
1206            end_byte = byte_offset;
1207            break;
1208        }
1209        idx += 1;
1210    }
1211    if start_byte == text.len() {
1212        return String::new();
1213    }
1214    if end_byte == text.len() {
1215        end_byte = text.len();
1216    }
1217    text[start_byte..end_byte].to_string()
1218}