memvid_cli/commands/
inspection.rs

1//! Inspection command handlers (view, stats, who)
2
3#[cfg(feature = "audio-playback")]
4use std::io::Cursor;
5use std::io::{self, Write};
6use std::path::{Path, PathBuf};
7use std::process::Command;
8#[cfg(feature = "audio-playback")]
9use std::time::Duration;
10
11use anyhow::{anyhow, bail, Context, Result};
12use clap::Args;
13use memvid_core::table::list_tables;
14use memvid_core::{
15    lockfile, normalize_text, Frame, FrameRole, MediaManifest, Memvid, TextChunkManifest,
16    TextChunkRange,
17};
18use serde_json::{json, Value};
19use tempfile::Builder;
20use tracing::warn;
21use uuid::Uuid;
22
23use crate::config::CliConfig;
24use crate::utils::{
25    format_bytes, format_percent, format_timestamp_ms, frame_status_str, open_read_only_mem,
26    owner_hint_to_json, parse_timecode, round_percent, select_frame, yes_no,
27};
28
29const DEFAULT_VIEW_PAGE_CHARS: usize = 1_200;
30const CHUNK_MANIFEST_KEY: &str = "memvid_chunks_v1";
31
32/// Arguments for the `view` subcommand
33#[derive(Args)]
34pub struct ViewArgs {
35    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
36    pub file: PathBuf,
37    #[arg(long = "frame-id", value_name = "ID", conflicts_with = "uri")]
38    pub frame_id: Option<u64>,
39    #[arg(long, value_name = "URI", conflicts_with = "frame_id")]
40    pub uri: Option<String>,
41    #[arg(long)]
42    pub json: bool,
43    #[arg(long, conflicts_with = "json")]
44    pub binary: bool,
45    #[arg(long, conflicts_with_all = ["json", "binary"])]
46    pub preview: bool,
47    /// Optional start time for video previews (HH:MM:SS[.mmm])
48    #[arg(
49        long = "start",
50        value_name = "HH:MM:SS",
51        requires = "preview",
52        conflicts_with_all = ["json", "binary", "play"]
53    )]
54    pub preview_start: Option<String>,
55    /// Optional end time for video previews (HH:MM:SS[.mmm])
56    #[arg(
57        long = "end",
58        value_name = "HH:MM:SS",
59        requires = "preview",
60        conflicts_with_all = ["json", "binary", "play"]
61    )]
62    pub preview_end: Option<String>,
63    #[arg(long = "play", conflicts_with_all = ["json", "binary", "preview"])]
64    pub play: bool,
65    #[arg(long = "start-seconds", requires = "play")]
66    pub start_seconds: Option<f32>,
67    #[arg(long = "end-seconds", requires = "play")]
68    pub end_seconds: Option<f32>,
69    #[arg(long, value_name = "N", default_value_t = 1)]
70    pub page: usize,
71    #[arg(long = "page-size", value_name = "CHARS")]
72    pub page_size: Option<usize>,
73}
74
75/// Arguments for the `stats` subcommand
76#[derive(Args)]
77pub struct StatsArgs {
78    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
79    pub file: PathBuf,
80    #[arg(long)]
81    pub json: bool,
82    /// Replay: Show stats for frames with ID <= AS_OF_FRAME (time-travel view)
83    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
84    pub as_of_frame: Option<u64>,
85    /// Replay: Show stats for frames with timestamp <= AS_OF_TS (time-travel view)
86    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
87    pub as_of_ts: Option<i64>,
88}
89
90/// Arguments for the `who` subcommand
91#[derive(Args)]
92pub struct WhoArgs {
93    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
94    pub file: PathBuf,
95    #[arg(long)]
96    pub json: bool,
97}
98
99/// Handler for `memvid stats`
100pub fn handle_stats(_config: &CliConfig, args: StatsArgs) -> Result<()> {
101    let mut mem = Memvid::open_read_only(&args.file)?;
102    let stats = mem.stats()?;
103    let tables = list_tables(&mut mem).unwrap_or_default();
104    let vec_dimension = mem.effective_vec_index_dimension()?;
105    let embedding_identity = mem.embedding_identity_summary(10_000);
106
107    // Note: Replay filtering for stats is currently not implemented
108    // The stats show the full memory state
109    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
110        eprintln!("Note: Replay filtering (--as-of-frame/--as-of-ts) shows current stats.");
111        eprintln!("      Use 'find' or 'timeline' commands for filtered results.");
112    }
113    let overhead_bytes = stats.size_bytes.saturating_sub(stats.payload_bytes);
114    let payload_share_percent: f64 = if stats.size_bytes > 0 {
115        round_percent((stats.payload_bytes as f64 / stats.size_bytes as f64) * 100.0)
116    } else {
117        0.0
118    };
119    let overhead_share_percent: f64 = if stats.size_bytes > 0 {
120        round_percent((100.0 - payload_share_percent).max(0.0))
121    } else {
122        0.0
123    };
124    let maintenance_command = format!(
125        "memvid doctor {} --vacuum --rebuild-time-index --rebuild-lex-index",
126        args.file.display()
127    );
128
129    if args.json {
130        let mut raw_json = serde_json::to_value(&stats)?;
131        if let Value::Object(ref mut obj) = raw_json {
132            obj.remove("tier");
133        }
134
135        // Build tables list for JSON output
136        let tables_json: Vec<serde_json::Value> = tables
137            .iter()
138            .map(|t| {
139                json!({
140                    "table_id": t.table_id,
141                    "source_file": t.source_file,
142                    "n_rows": t.n_rows,
143                    "n_cols": t.n_cols,
144                    "pages": format!("{}-{}", t.page_start, t.page_end),
145                    "quality": format!("{:?}", t.quality),
146                    "headers": t.headers,
147                })
148            })
149            .collect();
150
151        // Compute embedding quality for JSON output
152        let embedding_quality_json = if stats.has_vec_index {
153            mem.embedding_quality().ok().flatten().map(|eq| {
154                json!({
155                    "vector_count": eq.vector_count,
156                    "dimension": eq.dimension,
157                    "avg_similarity": eq.avg_similarity,
158                    "min_similarity": eq.min_similarity,
159                    "max_similarity": eq.max_similarity,
160                    "std_similarity": eq.std_similarity,
161                    "clustering_coefficient": eq.clustering_coefficient,
162                    "estimated_clusters": eq.estimated_clusters,
163                    "recommended_threshold": eq.recommended_threshold,
164                    "quality_rating": eq.quality_rating,
165                    "quality_explanation": eq.quality_explanation,
166                })
167            })
168        } else {
169            None
170        };
171
172        let embedding_identity_json = match &embedding_identity {
173            memvid_core::EmbeddingIdentitySummary::Unknown => Value::Null,
174            memvid_core::EmbeddingIdentitySummary::Single(identity) => json!({
175                "provider": identity.provider.as_deref(),
176                "model": identity.model.as_deref(),
177                "dimension": identity.dimension.or(vec_dimension),
178                "normalized": identity.normalized,
179            }),
180            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
181                let values: Vec<Value> = identities
182                    .iter()
183                    .map(|entry| {
184                        json!({
185                            "provider": entry.identity.provider.as_deref(),
186                            "model": entry.identity.model.as_deref(),
187                            "dimension": entry.identity.dimension.or(vec_dimension),
188                            "normalized": entry.identity.normalized,
189                            "count": entry.count,
190                        })
191                    })
192                    .collect();
193                json!({ "mixed": values })
194            }
195        };
196
197        let report = json!({
198            "summary": {
199                "sequence": stats.seq_no,
200                "frames": format!("{} total ({} active)", stats.frame_count, stats.active_frame_count),
201                "usage": format!(
202                    "{} used / {} total ({})",
203                    format_bytes(stats.size_bytes),
204                    format_bytes(stats.capacity_bytes),
205                    format_percent(stats.storage_utilisation_percent)
206                ),
207                "remaining": format!("{} free", format_bytes(stats.remaining_capacity_bytes)),
208            },
209            "storage": {
210                "payload": format!("{} ({})", format_bytes(stats.payload_bytes), format_percent(payload_share_percent)),
211                "overhead": format!("{} ({}) - WAL + indexes", format_bytes(overhead_bytes), format_percent(overhead_share_percent)),
212                "logical_payload": format!("{} before compression", format_bytes(stats.logical_bytes)),
213                "compression_savings": format!("{} saved ({})", format_bytes(stats.saved_bytes), format_percent(stats.savings_percent)),
214                "compression_ratio": format_percent(stats.compression_ratio_percent),
215            },
216            "frames": {
217                "average_stored": format_bytes(stats.average_frame_payload_bytes),
218                "average_logical": format_bytes(stats.average_frame_logical_bytes),
219                "clip_images": stats.clip_image_count,
220            },
221            "indexes": {
222                "lexical": yes_no(stats.has_lex_index),
223                "vector": yes_no(stats.has_vec_index),
224                "time": yes_no(stats.has_time_index),
225            },
226            "embedding_identity": embedding_identity_json,
227            "embedding_quality": embedding_quality_json,
228            "tables": {
229                "count": tables.len(),
230                "tables": tables_json,
231            },
232            "maintenance": maintenance_command,
233            "raw": raw_json,
234        });
235
236        println!("{}", serde_json::to_string_pretty(&report)?);
237    } else {
238        let seq_display = stats
239            .seq_no
240            .map(|seq| seq.to_string())
241            .unwrap_or_else(|| "n/a".to_string());
242
243        println!("Memory: {}", args.file.display());
244        println!("Sequence: {}", seq_display);
245        println!(
246            "Frames: {} total ({} active)",
247            stats.frame_count, stats.active_frame_count
248        );
249
250        println!("\nCapacity:");
251        println!(
252            "  Usage: {} used / {} total ({})",
253            format_bytes(stats.size_bytes),
254            format_bytes(stats.capacity_bytes),
255            format_percent(stats.storage_utilisation_percent)
256        );
257        println!(
258            "  Remaining: {}",
259            format_bytes(stats.remaining_capacity_bytes)
260        );
261
262        println!("\nStorage breakdown:");
263        println!(
264            "  Payload: {} ({})",
265            format_bytes(stats.payload_bytes),
266            format_percent(payload_share_percent)
267        );
268        println!(
269            "  Overhead: {} ({})",
270            format_bytes(overhead_bytes),
271            format_percent(overhead_share_percent)
272        );
273        // PHASE 2: Detailed overhead breakdown for observability
274        println!("    ├─ WAL: {}", format_bytes(stats.wal_bytes));
275        println!(
276            "    ├─ Lexical index: {}",
277            format_bytes(stats.lex_index_bytes)
278        );
279        println!(
280            "    ├─ Vector index: {}",
281            format_bytes(stats.vec_index_bytes)
282        );
283        println!(
284            "    └─ Time index: {}",
285            format_bytes(stats.time_index_bytes)
286        );
287        println!(
288            "  Logical payload: {} before compression",
289            format_bytes(stats.logical_bytes)
290        );
291
292        if stats.has_vec_index {
293            println!("\nEmbeddings:");
294            if let Some(dim) = vec_dimension {
295                println!("  Dimension: {}", dim);
296            }
297            match &embedding_identity {
298                memvid_core::EmbeddingIdentitySummary::Unknown => {
299                    println!("  Model: unknown (no persisted embedding identity)");
300                }
301                memvid_core::EmbeddingIdentitySummary::Single(identity) => {
302                    if let Some(provider) = identity.provider.as_deref() {
303                        println!("  Provider: {}", provider);
304                    }
305                    if let Some(model) = identity.model.as_deref() {
306                        println!("  Model: {}", model);
307                    }
308                }
309                memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
310                    println!("  Model: mixed ({} identities detected)", identities.len());
311                    for entry in identities.iter().take(5) {
312                        let provider = entry.identity.provider.as_deref().unwrap_or("unknown");
313                        let model = entry.identity.model.as_deref().unwrap_or("unknown");
314                        println!("    - {} / {} ({} frames)", provider, model, entry.count);
315                    }
316                    if identities.len() > 5 {
317                        println!("    - ...");
318                    }
319                }
320            }
321        }
322        println!(
323            "  Compression savings: {} ({})",
324            format_bytes(stats.saved_bytes),
325            format_percent(stats.savings_percent)
326        );
327
328        println!("\nAverage frame:");
329        println!(
330            "  Stored: {}   Logical: {}",
331            format_bytes(stats.average_frame_payload_bytes),
332            format_bytes(stats.average_frame_logical_bytes)
333        );
334        if stats.clip_image_count > 0 {
335            println!("  CLIP images: {}", stats.clip_image_count);
336        }
337
338        // PHASE 2: Per-document cost analysis
339        if stats.active_frame_count > 0 {
340            let overhead_per_doc = overhead_bytes / stats.active_frame_count;
341            let lex_per_doc = stats.lex_index_bytes / stats.active_frame_count;
342            let vec_per_doc = stats.vec_index_bytes / stats.active_frame_count;
343
344            println!("\nPer-document overhead:");
345            println!("  Total: {}", format_bytes(overhead_per_doc));
346            if stats.has_lex_index {
347                println!("  Lexical: {}", format_bytes(lex_per_doc));
348            }
349            if stats.has_vec_index {
350                let vec_ratio = if stats.average_frame_payload_bytes > 0 {
351                    vec_per_doc as f64 / stats.average_frame_payload_bytes as f64
352                } else {
353                    0.0
354                };
355                println!(
356                    "  Vector: {} ({:.0}x text size)",
357                    format_bytes(vec_per_doc),
358                    vec_ratio
359                );
360            }
361        }
362
363        println!("\nIndexes:");
364        println!(
365            "  Lexical: {}   Vector: {}   Time: {}",
366            yes_no(stats.has_lex_index),
367            yes_no(stats.has_vec_index),
368            yes_no(stats.has_time_index)
369        );
370
371        // Show embedding quality stats if vector index is available
372        if stats.has_vec_index {
373            if let Ok(Some(eq)) = mem.embedding_quality() {
374                println!("\nEmbedding Quality:");
375                println!(
376                    "  Vectors: {}   Dimension: {}",
377                    eq.vector_count, eq.dimension
378                );
379                println!(
380                    "  Similarity: avg={:.3}  min={:.3}  max={:.3}  std={:.3}",
381                    eq.avg_similarity, eq.min_similarity, eq.max_similarity, eq.std_similarity
382                );
383                println!(
384                    "  Clusters: ~{}   Quality: {}",
385                    eq.estimated_clusters, eq.quality_rating
386                );
387                println!(
388                    "  Recommended --min-relevancy: {:.1}",
389                    eq.recommended_threshold
390                );
391                println!("  {}", eq.quality_explanation);
392            }
393        }
394
395        if !tables.is_empty() {
396            println!("\nTables: {} extracted", tables.len());
397            for t in &tables {
398                println!(
399                    "  {} — {} rows × {} cols ({})",
400                    t.table_id, t.n_rows, t.n_cols, t.source_file
401                );
402            }
403        }
404
405        println!("\nMaintenance:");
406        println!(
407            "  Run `{}` to rebuild indexes and reclaim space.",
408            maintenance_command
409        );
410    }
411    Ok(())
412}
413
414/// Handler for `memvid who`
415pub fn handle_who(args: WhoArgs) -> Result<()> {
416    match lockfile::current_owner(&args.file)? {
417        Some(owner) => {
418            if args.json {
419                let output = json!({
420                    "locked": true,
421                    "owner": owner_hint_to_json(&owner),
422                });
423                println!("{}", serde_json::to_string_pretty(&output)?);
424            } else {
425                println!("{} is locked by:", args.file.display());
426                if let Some(pid) = owner.pid {
427                    println!("  pid: {pid}");
428                }
429                if let Some(cmd) = owner.cmd.as_deref() {
430                    println!("  cmd: {cmd}");
431                }
432                if let Some(started) = owner.started_at.as_deref() {
433                    println!("  started_at: {started}");
434                }
435                if let Some(last) = owner.last_heartbeat.as_deref() {
436                    println!("  last_heartbeat: {last}");
437                }
438                if let Some(interval) = owner.heartbeat_ms {
439                    println!("  heartbeat_interval_ms: {interval}");
440                }
441                if let Some(file_id) = owner.file_id.as_deref() {
442                    println!("  file_id: {file_id}");
443                }
444                if let Some(path) = owner.file_path.as_ref() {
445                    println!("  file_path: {}", path.display());
446                }
447            }
448        }
449        None => {
450            if args.json {
451                let output = json!({"locked": false});
452                println!("{}", serde_json::to_string_pretty(&output)?);
453            } else {
454                println!("No active writer for {}", args.file.display());
455            }
456        }
457    }
458    Ok(())
459}
460
461// ============================================================================
462// View command handler and helpers
463// ============================================================================
464
465/// Handler for `memvid view`
466pub fn handle_view(args: ViewArgs) -> Result<()> {
467    if args.page == 0 {
468        bail!("page must be greater than zero");
469    }
470    if let Some(size) = args.page_size {
471        if size == 0 {
472            bail!("page-size must be greater than zero");
473        }
474    }
475
476    let mut mem = open_read_only_mem(&args.file)?;
477    let frame = select_frame(&mut mem, args.frame_id, args.uri.as_deref())?;
478
479    if args.play {
480        #[cfg(feature = "audio-playback")]
481        {
482            play_frame_audio(&mut mem, &frame, args.start_seconds, args.end_seconds)?;
483            return Ok(());
484        }
485        #[cfg(not(feature = "audio-playback"))]
486        {
487            bail!("Audio playback requires the 'audio-playback' feature (only available on macOS)");
488        }
489    }
490
491    if args.preview {
492        let bounds = parse_preview_bounds(args.preview_start.as_ref(), args.preview_end.as_ref())?;
493        preview_frame_media(&mut mem, &frame, args.uri.as_deref(), bounds)?;
494        return Ok(());
495    }
496
497    if args.binary {
498        let bytes = mem.frame_canonical_payload(frame.id)?;
499        let mut stdout = io::stdout();
500        stdout.write_all(&bytes)?;
501        stdout.flush()?;
502        return Ok(());
503    }
504
505    let canonical_text = canonical_text_for_view(&mut mem, &frame)?;
506    let manifest_from_meta = canonical_manifest_from_frame(&canonical_text, &frame);
507
508    let page_size = args
509        .page_size
510        .or_else(|| manifest_from_meta.as_ref().map(|m| m.chunk_chars))
511        .unwrap_or(DEFAULT_VIEW_PAGE_CHARS);
512
513    let mut manifest = if args.page_size.is_none() {
514        manifest_from_meta.unwrap_or_else(|| compute_chunk_manifest(&canonical_text, page_size))
515    } else {
516        compute_chunk_manifest(&canonical_text, page_size)
517    };
518    if manifest.chunks.is_empty() {
519        manifest = TextChunkManifest {
520            chunk_chars: page_size,
521            chunks: vec![TextChunkRange {
522                start: 0,
523                end: canonical_text.chars().count(),
524            }],
525        };
526    }
527
528    if frame.role == FrameRole::DocumentChunk && args.page_size.is_none() {
529        let total_chars = canonical_text.chars().count();
530        manifest = TextChunkManifest {
531            chunk_chars: total_chars.max(1),
532            chunks: vec![TextChunkRange {
533                start: 0,
534                end: total_chars,
535            }],
536        };
537    }
538
539    let total_pages = manifest.chunks.len().max(1);
540    if args.page > total_pages {
541        bail!(
542            "page {} is out of range (total pages: {})",
543            args.page,
544            total_pages
545        );
546    }
547
548    let chunk = &manifest.chunks[args.page - 1];
549    let content = extract_chunk_slice(&canonical_text, chunk);
550
551    if args.json {
552        let mut frame_json = frame_to_json(&frame);
553        if let Some(obj) = frame_json.as_object_mut() {
554            // Note: Do NOT overwrite search_text - it contains the extracted text from the document.
555            // The "content" field shows the paginated payload view.
556            if let Some(manifest_json) = obj.get_mut("chunk_manifest") {
557                if let Some(manifest_obj) = manifest_json.as_object_mut() {
558                    let total = manifest.chunks.len();
559                    if total > 0 {
560                        let mut window = serde_json::Map::new();
561                        let idx = args.page.saturating_sub(1).min(total - 1);
562                        if idx > 0 {
563                            let prev = &manifest.chunks[idx - 1];
564                            window.insert("prev".into(), json!([prev.start, prev.end]));
565                        }
566                        let current = &manifest.chunks[idx];
567                        window.insert("current".into(), json!([current.start, current.end]));
568                        if idx + 1 < total {
569                            let next = &manifest.chunks[idx + 1];
570                            window.insert("next".into(), json!([next.start, next.end]));
571                        }
572                        manifest_obj.insert("chunks".into(), Value::Object(window));
573                    }
574                }
575            }
576        }
577        let json = json!({
578            "frame": frame_json,
579            "page": args.page,
580            "page_size": manifest.chunk_chars,
581            "page_count": total_pages,
582            "has_prev": args.page > 1,
583            "has_next": args.page < total_pages,
584            "content": content,
585        });
586        println!("{}", serde_json::to_string_pretty(&json)?);
587    } else {
588        print_frame_summary(&mut mem, &frame)?;
589        println!(
590            "Page {}/{} ({} chars per page)",
591            args.page, total_pages, manifest.chunk_chars
592        );
593        println!();
594        println!("{}", content);
595    }
596    Ok(())
597}
598
599#[derive(Debug)]
600pub struct PreviewBounds {
601    pub start_ms: Option<u64>,
602    pub end_ms: Option<u64>,
603}
604
605pub fn parse_preview_bounds(
606    start: Option<&String>,
607    end: Option<&String>,
608) -> Result<Option<PreviewBounds>> {
609    let start_ms = match start {
610        Some(value) => Some(parse_timecode(value)?),
611        None => None,
612    };
613    let end_ms = match end {
614        Some(value) => Some(parse_timecode(value)?),
615        None => None,
616    };
617
618    if let (Some(s), Some(e)) = (start_ms, end_ms) {
619        if e <= s {
620            anyhow::bail!("--end must be greater than --start");
621        }
622    }
623
624    if start_ms.is_none() && end_ms.is_none() {
625        Ok(None)
626    } else {
627        Ok(Some(PreviewBounds { start_ms, end_ms }))
628    }
629}
630
631fn preview_frame_media(
632    mem: &mut Memvid,
633    frame: &Frame,
634    cli_uri: Option<&str>,
635    bounds: Option<PreviewBounds>,
636) -> Result<()> {
637    let manifest = mem.media_manifest(frame.id)?;
638    let mut mime = manifest
639        .as_ref()
640        .map(|m| m.mime.clone())
641        .or_else(|| frame.metadata.as_ref().and_then(|meta| meta.mime.clone()))
642        .unwrap_or_else(|| "application/octet-stream".to_string());
643
644    // If mime is generic, try to detect from payload bytes
645    if mime == "application/octet-stream" {
646        if let Ok(bytes) = mem.frame_canonical_payload(frame.id) {
647            if let Some(kind) = infer::get(&bytes) {
648                mime = kind.mime_type().to_string();
649            }
650        }
651    }
652
653    let is_video = manifest
654        .as_ref()
655        .map(|media| media.kind.eq_ignore_ascii_case("video"))
656        .unwrap_or_else(|| mime.starts_with("video/"));
657
658    if is_video {
659        preview_frame_video(mem, frame, cli_uri, bounds, manifest, &mime)?;
660    } else {
661        if bounds.is_some() {
662            anyhow::bail!("--start/--end are only supported for video previews");
663        }
664        if is_image_mime(&mime) {
665            preview_frame_image(mem, frame, cli_uri)?;
666        } else if is_audio_mime(&mime) {
667            preview_frame_audio_file(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
668        } else {
669            preview_frame_document(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
670        }
671    }
672    Ok(())
673}
674
675fn preview_frame_video(
676    mem: &mut Memvid,
677    frame: &Frame,
678    cli_uri: Option<&str>,
679    bounds: Option<PreviewBounds>,
680    manifest: Option<MediaManifest>,
681    mime: &str,
682) -> Result<()> {
683    let extension = manifest
684        .as_ref()
685        .and_then(|m| m.filename.as_deref())
686        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
687        .map(|ext| ext.trim_start_matches('.').to_ascii_lowercase())
688        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
689        .unwrap_or_else(|| "mp4".to_string());
690
691    let mut temp_file = Builder::new()
692        .prefix("memvid-preview-")
693        .suffix(&format!(".{extension}"))
694        .tempfile_in(std::env::temp_dir())
695        .context("failed to create temporary preview file")?;
696
697    let mut reader = mem
698        .blob_reader(frame.id)
699        .context("failed to stream payload for preview")?;
700    io::copy(&mut reader, &mut temp_file).context("failed to write video data to preview file")?;
701    temp_file
702        .flush()
703        .context("failed to flush video preview to disk")?;
704
705    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
706    drop(file);
707
708    let mut display_path = preview_path.clone();
709    if let Some(ref span) = bounds {
710        let needs_trim = span.start_ms.is_some() || span.end_ms.is_some();
711        if needs_trim {
712            if let Some(trimmed) = maybe_trim_with_ffmpeg(&preview_path, &extension, span)? {
713                display_path = trimmed;
714            }
715        }
716    }
717
718    println!("Opening preview...");
719    open::that(&display_path).with_context(|| {
720        format!(
721            "failed to launch default video player for {}",
722            display_path.display()
723        )
724    })?;
725
726    let display_uri = cli_uri
727        .or_else(|| frame.uri.as_deref())
728        .unwrap_or("<unknown>");
729    println!(
730        "Opened preview for {} (frame {}) -> {} ({})",
731        display_uri,
732        frame.id,
733        display_path.display(),
734        mime
735    );
736    Ok(())
737}
738
739fn maybe_trim_with_ffmpeg(
740    source: &Path,
741    extension: &str,
742    bounds: &PreviewBounds,
743) -> Result<Option<PathBuf>> {
744    if bounds.start_ms.is_none() && bounds.end_ms.is_none() {
745        return Ok(None);
746    }
747
748    let ffmpeg = match which::which("ffmpeg") {
749        Ok(path) => path,
750        Err(_) => {
751            warn!("ffmpeg binary not found on PATH; opening full video");
752            return Ok(None);
753        }
754    };
755
756    let target = std::env::temp_dir().join(format!(
757        "memvid-preview-clip-{}.{}",
758        Uuid::new_v4(),
759        extension
760    ));
761
762    let mut command = Command::new(ffmpeg);
763    command.arg("-y");
764    if let Some(start) = bounds.start_ms {
765        command.arg("-ss").arg(format_timestamp_ms(start));
766    }
767    command.arg("-i").arg(source);
768    if let Some(end) = bounds.end_ms {
769        command.arg("-to").arg(format_timestamp_ms(end));
770    }
771    command.arg("-c").arg("copy");
772    command.arg(&target);
773
774    let status = command
775        .status()
776        .context("failed to run ffmpeg for preview trimming")?;
777    if status.success() {
778        return Ok(Some(target));
779    }
780
781    let details = status
782        .code()
783        .map(|code| code.to_string())
784        .unwrap_or_else(|| "terminated".to_string());
785    warn!("ffmpeg exited with status {details}; opening full video");
786    Ok(None)
787}
788
789fn preview_frame_image(mem: &mut Memvid, frame: &Frame, cli_uri: Option<&str>) -> Result<()> {
790    let bytes = mem
791        .frame_canonical_payload(frame.id)
792        .context("failed to load canonical payload for frame")?;
793    if bytes.is_empty() {
794        bail!("frame payload is empty; nothing to preview");
795    }
796
797    let detected_kind = infer::get(&bytes);
798    let mut mime = frame
799        .metadata
800        .as_ref()
801        .and_then(|meta| meta.mime.clone())
802        .filter(|value| is_image_mime(value));
803
804    if mime.is_none() {
805        if let Some(kind) = &detected_kind {
806            let candidate = kind.mime_type();
807            if is_image_mime(candidate) {
808                mime = Some(candidate.to_string());
809            }
810        }
811    }
812
813    let mime = mime.ok_or_else(|| anyhow!("frame does not contain an image payload"))?;
814    if !is_image_mime(&mime) {
815        bail!("frame mime type {mime} is not an image");
816    }
817
818    let extension = detected_kind
819        .as_ref()
820        .map(|kind| kind.extension().to_string())
821        .or_else(|| extension_from_mime(&mime).map(|ext| ext.to_string()))
822        .unwrap_or_else(|| "img".to_string());
823
824    let suffix = format!(".{extension}");
825    let mut temp_file = Builder::new()
826        .prefix("memvid-preview-")
827        .suffix(&suffix)
828        .tempfile_in(std::env::temp_dir())
829        .context("failed to create temporary preview file")?;
830    temp_file
831        .write_all(&bytes)
832        .context("failed to write image data to preview file")?;
833    temp_file
834        .flush()
835        .context("failed to flush preview file to disk")?;
836
837    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
838    drop(file);
839
840    println!("Opening preview...");
841    open::that(&preview_path).with_context(|| {
842        format!(
843            "failed to launch default image viewer for {}",
844            preview_path.display()
845        )
846    })?;
847
848    let display_uri = cli_uri
849        .or_else(|| frame.uri.as_deref())
850        .unwrap_or("<unknown>");
851    println!(
852        "Opened preview for {} (frame {}) -> {} ({})",
853        display_uri,
854        frame.id,
855        preview_path.display(),
856        mime
857    );
858    Ok(())
859}
860
861fn preview_frame_document(
862    mem: &mut Memvid,
863    frame: &Frame,
864    cli_uri: Option<&str>,
865    manifest: Option<&MediaManifest>,
866    mime: &str,
867) -> Result<()> {
868    let bytes = mem
869        .frame_canonical_payload(frame.id)
870        .context("failed to load canonical payload for frame")?;
871    if bytes.is_empty() {
872        bail!("frame payload is empty; nothing to preview");
873    }
874
875    let mut extension = manifest
876        .and_then(|m| m.filename.as_deref())
877        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
878        .map(|ext| ext.trim_start_matches('.').to_string())
879        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
880        .unwrap_or_else(|| "bin".to_string());
881
882    if extension == "bin" && std::str::from_utf8(&bytes).is_ok() {
883        extension = "txt".to_string();
884    }
885
886    let suffix = format!(".{extension}");
887    let mut temp_file = Builder::new()
888        .prefix("memvid-preview-")
889        .suffix(&suffix)
890        .tempfile_in(std::env::temp_dir())
891        .context("failed to create temporary preview file")?;
892    temp_file
893        .write_all(&bytes)
894        .context("failed to write document data to preview file")?;
895    temp_file
896        .flush()
897        .context("failed to flush preview file to disk")?;
898
899    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
900    drop(file);
901
902    println!("Opening preview...");
903    open::that(&preview_path).with_context(|| {
904        format!(
905            "failed to launch default viewer for {}",
906            preview_path.display()
907        )
908    })?;
909
910    let display_uri = cli_uri
911        .or_else(|| frame.uri.as_deref())
912        .unwrap_or("<unknown>");
913    println!(
914        "Opened preview for {} (frame {}) -> {} ({})",
915        display_uri,
916        frame.id,
917        preview_path.display(),
918        mime
919    );
920    Ok(())
921}
922
923fn preview_frame_audio_file(
924    mem: &mut Memvid,
925    frame: &Frame,
926    cli_uri: Option<&str>,
927    manifest: Option<&MediaManifest>,
928    mime: &str,
929) -> Result<()> {
930    let bytes = mem
931        .frame_canonical_payload(frame.id)
932        .context("failed to load canonical payload for frame")?;
933    if bytes.is_empty() {
934        bail!("frame payload is empty; nothing to preview");
935    }
936
937    let mut extension = manifest
938        .and_then(|m| m.filename.as_deref())
939        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
940        .map(|ext| ext.trim_start_matches('.').to_string())
941        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
942        .unwrap_or_else(|| "audio".to_string());
943
944    if extension == "bin" {
945        extension = "audio".to_string();
946    }
947
948    let suffix = format!(".{extension}");
949    let mut temp_file = Builder::new()
950        .prefix("memvid-preview-")
951        .suffix(&suffix)
952        .tempfile_in(std::env::temp_dir())
953        .context("failed to create temporary preview file")?;
954    temp_file
955        .write_all(&bytes)
956        .context("failed to write audio data to preview file")?;
957    temp_file
958        .flush()
959        .context("failed to flush preview file to disk")?;
960
961    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
962    drop(file);
963
964    println!("Opening preview...");
965    open::that(&preview_path).with_context(|| {
966        format!(
967            "failed to launch default audio player for {}",
968            preview_path.display()
969        )
970    })?;
971
972    let display_uri = cli_uri
973        .or_else(|| frame.uri.as_deref())
974        .unwrap_or("<unknown>");
975    println!(
976        "Opened preview for {} (frame {}) -> {} ({})",
977        display_uri,
978        frame.id,
979        preview_path.display(),
980        mime
981    );
982    Ok(())
983}
984
985#[cfg(feature = "audio-playback")]
986fn play_frame_audio(
987    mem: &mut Memvid,
988    frame: &Frame,
989    start_seconds: Option<f32>,
990    end_seconds: Option<f32>,
991) -> Result<()> {
992    use rodio::Source;
993
994    if let (Some(start), Some(end)) = (start_seconds, end_seconds) {
995        if end <= start {
996            bail!("--end-seconds must be greater than --start-seconds");
997        }
998    }
999
1000    let bytes = mem
1001        .frame_canonical_payload(frame.id)
1002        .context("failed to load canonical payload for frame")?;
1003    if bytes.is_empty() {
1004        bail!("frame payload is empty; nothing to play");
1005    }
1006
1007    let start = start_seconds.unwrap_or(0.0).max(0.0);
1008    let duration_meta = frame
1009        .metadata
1010        .as_ref()
1011        .and_then(|meta| meta.audio.as_ref())
1012        .and_then(|audio| audio.duration_secs)
1013        .unwrap_or(0.0);
1014
1015    if duration_meta > 0.0 && start >= duration_meta {
1016        bail!("start-seconds ({start:.2}) exceeds audio duration ({duration_meta:.2})");
1017    }
1018
1019    if let Some(end) = end_seconds {
1020        if duration_meta > 0.0 && end > duration_meta + f32::EPSILON {
1021            warn!(
1022                "requested end-seconds {:.2} exceeds known duration {:.2}; clamping",
1023                end, duration_meta
1024            );
1025        }
1026    }
1027
1028    let cursor = Cursor::new(bytes);
1029    let decoder = rodio::Decoder::new(cursor).context("failed to decode audio stream")?;
1030    let (_stream, stream_handle) =
1031        rodio::OutputStream::try_default().context("failed to open default audio output")?;
1032    let sink = rodio::Sink::try_new(&stream_handle).context("failed to create audio sink")?;
1033    let display_uri = frame.uri.as_deref().unwrap_or("<unknown>");
1034
1035    if let Some(end) = end_seconds {
1036        let effective_end = if duration_meta > 0.0 {
1037            end.min(duration_meta)
1038        } else {
1039            end
1040        };
1041        let duration = (effective_end - start).max(0.0);
1042        if duration <= 0.0 {
1043            bail!("playback duration is zero; adjust start/end seconds");
1044        }
1045        let source = decoder
1046            .skip_duration(Duration::from_secs_f32(start))
1047            .take_duration(Duration::from_secs_f32(duration));
1048        sink.append(source);
1049        let segment_desc = format!("{start:.2}s → {effective_end:.2}s");
1050        announce_playback(display_uri, &segment_desc);
1051    } else {
1052        let source = decoder.skip_duration(Duration::from_secs_f32(start));
1053        sink.append(source);
1054        let segment_desc = format!("{start:.2}s → end");
1055        announce_playback(display_uri, &segment_desc);
1056    }
1057    sink.sleep_until_end();
1058    Ok(())
1059}
1060
1061#[cfg(feature = "audio-playback")]
1062fn announce_playback(uri: &str, segment_desc: &str) {
1063    println!("Playing {uri} ({segment_desc})");
1064}
1065
1066fn is_image_mime(value: &str) -> bool {
1067    let normalized = value.split(';').next().unwrap_or(value).trim();
1068    normalized.to_ascii_lowercase().starts_with("image/")
1069}
1070
1071fn is_audio_mime(value: &str) -> bool {
1072    let normalized = value.split(';').next().unwrap_or(value).trim();
1073    normalized.to_ascii_lowercase().starts_with("audio/")
1074}
1075
1076pub fn extension_from_mime(mime: &str) -> Option<&'static str> {
1077    let normalized = mime
1078        .split(';')
1079        .next()
1080        .unwrap_or(mime)
1081        .trim()
1082        .to_ascii_lowercase();
1083    match normalized.as_str() {
1084        "image/jpeg" | "image/jpg" => Some("jpg"),
1085        "image/png" => Some("png"),
1086        "image/gif" => Some("gif"),
1087        "image/webp" => Some("webp"),
1088        "image/bmp" => Some("bmp"),
1089        "image/tiff" => Some("tiff"),
1090        "image/x-icon" | "image/vnd.microsoft.icon" => Some("ico"),
1091        "image/svg+xml" => Some("svg"),
1092        "video/mp4" | "video/iso.segment" => Some("mp4"),
1093        "video/quicktime" => Some("mov"),
1094        "video/webm" => Some("webm"),
1095        "video/x-matroska" | "video/matroska" => Some("mkv"),
1096        "video/x-msvideo" => Some("avi"),
1097        "video/mpeg" => Some("mpg"),
1098        "application/pdf" => Some("pdf"),
1099        "audio/mpeg" | "audio/mp3" => Some("mp3"),
1100        "audio/wav" | "audio/x-wav" => Some("wav"),
1101        "audio/x-flac" | "audio/flac" => Some("flac"),
1102        "audio/ogg" | "audio/vorbis" => Some("ogg"),
1103        "audio/x-m4a" | "audio/mp4" => Some("m4a"),
1104        "audio/aac" => Some("aac"),
1105        "audio/x-aiff" | "audio/aiff" => Some("aiff"),
1106        "text/plain" => Some("txt"),
1107        "text/markdown" | "text/x-markdown" => Some("md"),
1108        "text/html" => Some("html"),
1109        "application/xhtml+xml" => Some("xhtml"),
1110        "application/json" | "text/json" | "application/vnd.api+json" => Some("json"),
1111        "application/xml" | "text/xml" => Some("xml"),
1112        "text/csv" | "application/csv" => Some("csv"),
1113        "application/javascript" | "text/javascript" => Some("js"),
1114        "text/css" => Some("css"),
1115        "application/yaml" | "application/x-yaml" | "text/yaml" => Some("yaml"),
1116        "application/rtf" => Some("rtf"),
1117        "application/msword" => Some("doc"),
1118        "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => Some("docx"),
1119        "application/vnd.ms-powerpoint" => Some("ppt"),
1120        "application/vnd.openxmlformats-officedocument.presentationml.presentation" => Some("pptx"),
1121        "application/vnd.ms-excel" => Some("xls"),
1122        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => Some("xlsx"),
1123        "application/zip" => Some("zip"),
1124        "application/x-tar" => Some("tar"),
1125        "application/x-7z-compressed" => Some("7z"),
1126        _ => None,
1127    }
1128}
1129pub fn search_snippet(text: Option<&String>) -> Option<String> {
1130    text.and_then(|value| {
1131        let trimmed = value.trim();
1132        if trimmed.is_empty() {
1133            None
1134        } else {
1135            Some(trimmed.chars().take(160).collect())
1136        }
1137    })
1138}
1139pub fn frame_to_json(frame: &Frame) -> serde_json::Value {
1140    json!({
1141        "id": frame.id,
1142        "status": frame_status_str(frame.status),
1143        "timestamp": frame.timestamp,
1144        "kind": frame.kind,
1145        "track": frame.track,
1146        "uri": frame.uri,
1147        "title": frame.title,
1148        "payload_length": frame.payload_length,
1149        "canonical_encoding": format!("{:?}", frame.canonical_encoding),
1150        "canonical_length": frame.canonical_length,
1151        "role": format!("{:?}", frame.role),
1152        "parent_id": frame.parent_id,
1153        "chunk_index": frame.chunk_index,
1154        "chunk_count": frame.chunk_count,
1155        "tags": frame.tags,
1156        "labels": frame.labels,
1157        "search_text": frame.search_text,
1158        "metadata": frame.metadata,
1159        "extra_metadata": frame.extra_metadata,
1160        "content_dates": frame.content_dates,
1161        "chunk_manifest": frame.chunk_manifest,
1162        "supersedes": frame.supersedes,
1163        "superseded_by": frame.superseded_by,
1164    })
1165}
1166pub fn print_frame_summary(mem: &mut Memvid, frame: &Frame) -> Result<()> {
1167    println!("Frame {} [{}]", frame.id, frame_status_str(frame.status));
1168    println!("Timestamp: {}", frame.timestamp);
1169    if let Some(uri) = &frame.uri {
1170        println!("URI: {uri}");
1171    }
1172    if let Some(title) = &frame.title {
1173        println!("Title: {title}");
1174    }
1175    if let Some(kind) = &frame.kind {
1176        println!("Kind: {kind}");
1177    }
1178    if let Some(track) = &frame.track {
1179        println!("Track: {track}");
1180    }
1181    if let Some(supersedes) = frame.supersedes {
1182        println!("Supersedes frame: {supersedes}");
1183    }
1184    if let Some(successor) = frame.superseded_by {
1185        println!("Superseded by frame: {successor}");
1186    }
1187    println!(
1188        "Payload: {} bytes (canonical {:?}, logical {:?})",
1189        frame.payload_length, frame.canonical_encoding, frame.canonical_length
1190    );
1191    if !frame.tags.is_empty() {
1192        println!("Tags: {}", frame.tags.join(", "));
1193    }
1194    if !frame.labels.is_empty() {
1195        println!("Labels: {}", frame.labels.join(", "));
1196    }
1197    if let Some(snippet) = search_snippet(frame.search_text.as_ref()) {
1198        println!("Search text: {snippet}");
1199    }
1200    if let Some(meta) = &frame.metadata {
1201        let rendered = serde_json::to_string_pretty(meta)?;
1202        println!("Metadata: {rendered}");
1203    }
1204    if !frame.extra_metadata.is_empty() {
1205        let mut entries: Vec<_> = frame.extra_metadata.iter().collect();
1206        entries.sort_by(|a, b| a.0.cmp(b.0));
1207        println!("Extra metadata:");
1208        for (key, value) in entries {
1209            println!("  {key}: {value}");
1210        }
1211    }
1212    if !frame.content_dates.is_empty() {
1213        println!("Content dates: {}", frame.content_dates.join(", "));
1214    }
1215    match mem.frame_embedding(frame.id) {
1216        Ok(Some(embedding)) => println!("Embedding: {} dimensions", embedding.len()),
1217        Ok(None) => println!("Embedding: none"),
1218        Err(err) => println!("Embedding: unavailable ({err})"),
1219    }
1220    Ok(())
1221}
1222fn canonical_text_for_view(mem: &mut Memvid, frame: &Frame) -> Result<String> {
1223    let bytes = mem.frame_canonical_payload(frame.id)?;
1224    let raw = match String::from_utf8(bytes) {
1225        Ok(text) => text,
1226        Err(err) => {
1227            let bytes = err.into_bytes();
1228            String::from_utf8_lossy(&bytes).into_owned()
1229        }
1230    };
1231
1232    Ok(normalize_text(&raw, usize::MAX)
1233        .map(|n| n.text)
1234        .unwrap_or_default())
1235}
1236
1237fn manifests_match_text(text: &str, manifest: &TextChunkManifest) -> bool {
1238    if manifest.chunk_chars == 0 || manifest.chunks.is_empty() {
1239        return false;
1240    }
1241    let total_chars = text.chars().count();
1242    manifest
1243        .chunks
1244        .iter()
1245        .all(|chunk| chunk.start <= chunk.end && chunk.end <= total_chars)
1246}
1247
1248fn canonical_manifest_from_frame(text: &str, frame: &Frame) -> Option<TextChunkManifest> {
1249    let primary = frame
1250        .chunk_manifest
1251        .clone()
1252        .filter(|manifest| manifests_match_text(text, manifest));
1253    if primary.is_some() {
1254        return primary;
1255    }
1256
1257    frame
1258        .extra_metadata
1259        .get(CHUNK_MANIFEST_KEY)
1260        .and_then(|raw| serde_json::from_str::<TextChunkManifest>(raw).ok())
1261        .filter(|manifest| manifests_match_text(text, manifest))
1262}
1263
1264fn compute_chunk_manifest(text: &str, chunk_chars: usize) -> TextChunkManifest {
1265    let normalized = normalize_text(text, usize::MAX)
1266        .map(|n| n.text)
1267        .unwrap_or_default();
1268
1269    let effective_chunk = chunk_chars.max(1);
1270    let total_chars = normalized.chars().count();
1271    if total_chars == 0 {
1272        return TextChunkManifest {
1273            chunk_chars: effective_chunk,
1274            chunks: vec![TextChunkRange { start: 0, end: 0 }],
1275        };
1276    }
1277    if total_chars <= effective_chunk {
1278        return TextChunkManifest {
1279            chunk_chars: effective_chunk,
1280            chunks: vec![TextChunkRange {
1281                start: 0,
1282                end: total_chars,
1283            }],
1284        };
1285    }
1286    let mut chunks = Vec::new();
1287    let mut start = 0usize;
1288    while start < total_chars {
1289        let end = (start + effective_chunk).min(total_chars);
1290        chunks.push(TextChunkRange { start, end });
1291        start = end;
1292    }
1293    TextChunkManifest {
1294        chunk_chars: effective_chunk,
1295        chunks,
1296    }
1297}
1298
1299fn extract_chunk_slice(text: &str, range: &TextChunkRange) -> String {
1300    if range.start >= range.end || text.is_empty() {
1301        return String::new();
1302    }
1303    let mut start_byte = text.len();
1304    let mut end_byte = text.len();
1305    let mut idx = 0usize;
1306    for (byte_offset, _) in text.char_indices() {
1307        if idx == range.start {
1308            start_byte = byte_offset;
1309        }
1310        if idx == range.end {
1311            end_byte = byte_offset;
1312            break;
1313        }
1314        idx += 1;
1315    }
1316    if start_byte == text.len() {
1317        return String::new();
1318    }
1319    if end_byte == text.len() {
1320        end_byte = text.len();
1321    }
1322    text[start_byte..end_byte].to_string()
1323}