Skip to main content

memvid_cli/commands/
inspection.rs

1//! Inspection command handlers (view, stats, who)
2
3#[cfg(feature = "audio-playback")]
4use std::io::Cursor;
5use std::io::{self, Write};
6use std::path::{Path, PathBuf};
7use std::process::Command;
8#[cfg(feature = "audio-playback")]
9use std::time::Duration;
10
11use anyhow::{anyhow, bail, Context, Result};
12use clap::Args;
13use hex;
14use memvid_core::table::list_tables;
15use memvid_core::{
16    lockfile, normalize_text, Frame, FrameRole, MediaManifest, Memvid, TextChunkManifest,
17    TextChunkRange,
18};
19use serde_json::{json, Value};
20use tempfile::Builder;
21use tracing::warn;
22use uuid::Uuid;
23
24use crate::config::CliConfig;
25use crate::utils::{
26    format_bytes, format_percent, format_timestamp_ms, frame_status_str, open_read_only_mem,
27    owner_hint_to_json, parse_timecode, round_percent, select_frame, yes_no,
28};
29
30const DEFAULT_VIEW_PAGE_CHARS: usize = 1_200;
31const CHUNK_MANIFEST_KEY: &str = "memvid_chunks_v1";
32
33/// Arguments for the `view` subcommand
34#[derive(Args)]
35pub struct ViewArgs {
36    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
37    pub file: PathBuf,
38    #[arg(long = "frame-id", value_name = "ID", conflicts_with = "uri")]
39    pub frame_id: Option<u64>,
40    #[arg(long, value_name = "URI", conflicts_with = "frame_id")]
41    pub uri: Option<String>,
42    #[arg(long)]
43    pub json: bool,
44    #[arg(long, conflicts_with = "json")]
45    pub binary: bool,
46    #[arg(long, conflicts_with_all = ["json", "binary"])]
47    pub preview: bool,
48    /// Optional start time for video previews (HH:MM:SS[.mmm])
49    #[arg(
50        long = "start",
51        value_name = "HH:MM:SS",
52        requires = "preview",
53        conflicts_with_all = ["json", "binary", "play"]
54    )]
55    pub preview_start: Option<String>,
56    /// Optional end time for video previews (HH:MM:SS[.mmm])
57    #[arg(
58        long = "end",
59        value_name = "HH:MM:SS",
60        requires = "preview",
61        conflicts_with_all = ["json", "binary", "play"]
62    )]
63    pub preview_end: Option<String>,
64    #[arg(long = "play", conflicts_with_all = ["json", "binary", "preview"])]
65    pub play: bool,
66    #[arg(long = "start-seconds", requires = "play")]
67    pub start_seconds: Option<f32>,
68    #[arg(long = "end-seconds", requires = "play")]
69    pub end_seconds: Option<f32>,
70    #[arg(long, value_name = "N", default_value_t = 1)]
71    pub page: usize,
72    #[arg(long = "page-size", value_name = "CHARS")]
73    pub page_size: Option<usize>,
74}
75
76/// Arguments for the `stats` subcommand
77#[derive(Args)]
78pub struct StatsArgs {
79    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
80    pub file: PathBuf,
81    #[arg(long)]
82    pub json: bool,
83    /// Replay: Show stats for frames with ID <= AS_OF_FRAME (time-travel view)
84    #[arg(long = "as-of-frame", value_name = "FRAME_ID")]
85    pub as_of_frame: Option<u64>,
86    /// Replay: Show stats for frames with timestamp <= AS_OF_TS (time-travel view)
87    #[arg(long = "as-of-ts", value_name = "UNIX_TIMESTAMP")]
88    pub as_of_ts: Option<i64>,
89}
90
91/// Arguments for the `who` subcommand
92#[derive(Args)]
93pub struct WhoArgs {
94    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
95    pub file: PathBuf,
96    #[arg(long)]
97    pub json: bool,
98}
99
100/// Handler for `memvid stats`
101pub fn handle_stats(_config: &CliConfig, args: StatsArgs) -> Result<()> {
102    let mut mem = Memvid::open_read_only(&args.file)?;
103    let stats = mem.stats()?;
104    let tables = list_tables(&mut mem).unwrap_or_default();
105    let vec_dimension = mem.effective_vec_index_dimension()?;
106    let embedding_identity = mem.embedding_identity_summary(10_000);
107
108    // Note: Replay filtering for stats is currently not implemented
109    // The stats show the full memory state
110    if args.as_of_frame.is_some() || args.as_of_ts.is_some() {
111        eprintln!("Note: Replay filtering (--as-of-frame/--as-of-ts) shows current stats.");
112        eprintln!("      Use 'find' or 'timeline' commands for filtered results.");
113    }
114    let overhead_bytes = stats.size_bytes.saturating_sub(stats.payload_bytes);
115    let payload_share_percent: f64 = if stats.size_bytes > 0 {
116        round_percent((stats.payload_bytes as f64 / stats.size_bytes as f64) * 100.0)
117    } else {
118        0.0
119    };
120    let overhead_share_percent: f64 = if stats.size_bytes > 0 {
121        round_percent((100.0 - payload_share_percent).max(0.0))
122    } else {
123        0.0
124    };
125    let maintenance_command = format!(
126        "memvid doctor {} --vacuum --rebuild-time-index --rebuild-lex-index",
127        args.file.display()
128    );
129
130    if args.json {
131        let mut raw_json = serde_json::to_value(&stats)?;
132        if let Value::Object(ref mut obj) = raw_json {
133            obj.remove("tier");
134        }
135
136        // Build tables list for JSON output
137        let tables_json: Vec<serde_json::Value> = tables
138            .iter()
139            .map(|t| {
140                json!({
141                    "table_id": t.table_id,
142                    "source_file": t.source_file,
143                    "n_rows": t.n_rows,
144                    "n_cols": t.n_cols,
145                    "pages": format!("{}-{}", t.page_start, t.page_end),
146                    "quality": format!("{:?}", t.quality),
147                    "headers": t.headers,
148                })
149            })
150            .collect();
151
152        // Compute embedding quality for JSON output
153        let embedding_quality_json = if stats.has_vec_index {
154            mem.embedding_quality().ok().flatten().map(|eq| {
155                json!({
156                    "vector_count": eq.vector_count,
157                    "dimension": eq.dimension,
158                    "avg_similarity": eq.avg_similarity,
159                    "min_similarity": eq.min_similarity,
160                    "max_similarity": eq.max_similarity,
161                    "std_similarity": eq.std_similarity,
162                    "clustering_coefficient": eq.clustering_coefficient,
163                    "estimated_clusters": eq.estimated_clusters,
164                    "recommended_threshold": eq.recommended_threshold,
165                    "quality_rating": eq.quality_rating,
166                    "quality_explanation": eq.quality_explanation,
167                })
168            })
169        } else {
170            None
171        };
172
173        let embedding_identity_json = match &embedding_identity {
174            memvid_core::EmbeddingIdentitySummary::Unknown => Value::Null,
175            memvid_core::EmbeddingIdentitySummary::Single(identity) => json!({
176                "provider": identity.provider.as_deref(),
177                "model": identity.model.as_deref(),
178                "dimension": identity.dimension.or(vec_dimension),
179                "normalized": identity.normalized,
180            }),
181            memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
182                let values: Vec<Value> = identities
183                    .iter()
184                    .map(|entry| {
185                        json!({
186                            "provider": entry.identity.provider.as_deref(),
187                            "model": entry.identity.model.as_deref(),
188                            "dimension": entry.identity.dimension.or(vec_dimension),
189                            "normalized": entry.identity.normalized,
190                            "count": entry.count,
191                        })
192                    })
193                    .collect();
194                json!({ "mixed": values })
195            }
196        };
197
198        // Get enrichment stats for JSON output
199        let enrichment_stats = mem.enrichment_stats();
200        let enrichment_json = json!({
201            "total_frames": enrichment_stats.total_frames,
202            "enriched_frames": enrichment_stats.enriched_frames,
203            "pending_frames": enrichment_stats.pending_frames,
204            "searchable_only": enrichment_stats.searchable_only,
205        });
206
207        // Get ticket info for JSON output
208        let ticket = mem.current_ticket();
209        let ticket_json = json!({
210            "issuer": ticket.issuer,
211            "seq_no": ticket.seq_no,
212            "expires_in_secs": ticket.expires_in_secs,
213            "capacity_bytes": ticket.capacity_bytes,
214            "verified": ticket.verified,
215        });
216
217        let report = json!({
218            "summary": {
219                "sequence": stats.seq_no,
220                "frames": format!("{} total ({} active)", stats.frame_count, stats.active_frame_count),
221                "usage": format!(
222                    "{} used / {} total ({})",
223                    format_bytes(stats.size_bytes),
224                    format_bytes(stats.capacity_bytes),
225                    format_percent(stats.storage_utilisation_percent)
226                ),
227                "remaining": format!("{} free", format_bytes(stats.remaining_capacity_bytes)),
228            },
229            "storage": {
230                "payload": format!("{} ({})", format_bytes(stats.payload_bytes), format_percent(payload_share_percent)),
231                "overhead": format!("{} ({}) - WAL + indexes", format_bytes(overhead_bytes), format_percent(overhead_share_percent)),
232                "logical_payload": format!("{} before compression", format_bytes(stats.logical_bytes)),
233                "compression_savings": format!("{} saved ({})", format_bytes(stats.saved_bytes), format_percent(stats.savings_percent)),
234                "compression_ratio": format_percent(stats.compression_ratio_percent),
235            },
236            "frames": {
237                "average_stored": format_bytes(stats.average_frame_payload_bytes),
238                "average_logical": format_bytes(stats.average_frame_logical_bytes),
239                "clip_images": stats.clip_image_count,
240            },
241            "indexes": {
242                "lexical": yes_no(stats.has_lex_index),
243                "vector": yes_no(stats.has_vec_index),
244                "time": yes_no(stats.has_time_index),
245            },
246            "enrichment": enrichment_json,
247            "ticket": ticket_json,
248            "embedding_identity": embedding_identity_json,
249            "embedding_quality": embedding_quality_json,
250            "tables": {
251                "count": tables.len(),
252                "tables": tables_json,
253            },
254            "maintenance": maintenance_command,
255            "raw": raw_json,
256        });
257
258        println!("{}", serde_json::to_string_pretty(&report)?);
259    } else {
260        let seq_display = stats
261            .seq_no
262            .map(|seq| seq.to_string())
263            .unwrap_or_else(|| "n/a".to_string());
264
265        println!("Memory: {}", args.file.display());
266        println!("Sequence: {}", seq_display);
267        println!(
268            "Frames: {} total ({} active)",
269            stats.frame_count, stats.active_frame_count
270        );
271
272        println!("\nCapacity:");
273        println!(
274            "  Usage: {} used / {} total ({})",
275            format_bytes(stats.size_bytes),
276            format_bytes(stats.capacity_bytes),
277            format_percent(stats.storage_utilisation_percent)
278        );
279        println!(
280            "  Remaining: {}",
281            format_bytes(stats.remaining_capacity_bytes)
282        );
283
284        // Show ticket verification status
285        let ticket = mem.current_ticket();
286        if ticket.seq_no > 0 {
287            let verified_str = if ticket.verified {
288                "✓ verified"
289            } else {
290                "⚠ unverified"
291            };
292            println!(
293                "  Ticket: seq={} issuer={} ({})",
294                ticket.seq_no, ticket.issuer, verified_str
295            );
296        }
297
298        println!("\nStorage breakdown:");
299        println!(
300            "  Payload: {} ({})",
301            format_bytes(stats.payload_bytes),
302            format_percent(payload_share_percent)
303        );
304        println!(
305            "  Overhead: {} ({})",
306            format_bytes(overhead_bytes),
307            format_percent(overhead_share_percent)
308        );
309        // PHASE 2: Detailed overhead breakdown for observability
310        println!("    ├─ WAL: {}", format_bytes(stats.wal_bytes));
311        println!(
312            "    ├─ Lexical index: {}",
313            format_bytes(stats.lex_index_bytes)
314        );
315        println!(
316            "    ├─ Vector index: {}",
317            format_bytes(stats.vec_index_bytes)
318        );
319        println!(
320            "    └─ Time index: {}",
321            format_bytes(stats.time_index_bytes)
322        );
323        println!(
324            "  Logical payload: {} before compression",
325            format_bytes(stats.logical_bytes)
326        );
327
328        if stats.has_vec_index {
329            println!("\nEmbeddings:");
330            if let Some(dim) = vec_dimension {
331                println!("  Dimension: {}", dim);
332            }
333            match &embedding_identity {
334                memvid_core::EmbeddingIdentitySummary::Unknown => {
335                    println!("  Model: unknown (no persisted embedding identity)");
336                }
337                memvid_core::EmbeddingIdentitySummary::Single(identity) => {
338                    if let Some(provider) = identity.provider.as_deref() {
339                        println!("  Provider: {}", provider);
340                    }
341                    if let Some(model) = identity.model.as_deref() {
342                        println!("  Model: {}", model);
343                    }
344                }
345                memvid_core::EmbeddingIdentitySummary::Mixed(identities) => {
346                    println!("  Model: mixed ({} identities detected)", identities.len());
347                    for entry in identities.iter().take(5) {
348                        let provider = entry.identity.provider.as_deref().unwrap_or("unknown");
349                        let model = entry.identity.model.as_deref().unwrap_or("unknown");
350                        println!("    - {} / {} ({} frames)", provider, model, entry.count);
351                    }
352                    if identities.len() > 5 {
353                        println!("    - ...");
354                    }
355                }
356            }
357        }
358        println!(
359            "  Compression savings: {} ({})",
360            format_bytes(stats.saved_bytes),
361            format_percent(stats.savings_percent)
362        );
363
364        println!("\nAverage frame:");
365        println!(
366            "  Stored: {}   Logical: {}",
367            format_bytes(stats.average_frame_payload_bytes),
368            format_bytes(stats.average_frame_logical_bytes)
369        );
370        if stats.clip_image_count > 0 {
371            println!("  CLIP images: {}", stats.clip_image_count);
372        }
373
374        // PHASE 2: Per-document cost analysis
375        if stats.active_frame_count > 0 {
376            let overhead_per_doc = overhead_bytes / stats.active_frame_count;
377            let lex_per_doc = stats.lex_index_bytes / stats.active_frame_count;
378            let vec_per_doc = stats.vec_index_bytes / stats.active_frame_count;
379
380            println!("\nPer-document overhead:");
381            println!("  Total: {}", format_bytes(overhead_per_doc));
382            if stats.has_lex_index {
383                println!("  Lexical: {}", format_bytes(lex_per_doc));
384            }
385            if stats.has_vec_index {
386                let vec_ratio = if stats.average_frame_payload_bytes > 0 {
387                    vec_per_doc as f64 / stats.average_frame_payload_bytes as f64
388                } else {
389                    0.0
390                };
391                println!(
392                    "  Vector: {} ({:.0}x text size)",
393                    format_bytes(vec_per_doc),
394                    vec_ratio
395                );
396            }
397        }
398
399        println!("\nIndexes:");
400        println!(
401            "  Lexical: {}   Vector: {}   Time: {}",
402            yes_no(stats.has_lex_index),
403            yes_no(stats.has_vec_index),
404            yes_no(stats.has_time_index)
405        );
406
407        // Show enrichment queue stats
408        let enrichment_stats = mem.enrichment_stats();
409        if enrichment_stats.pending_frames > 0 || enrichment_stats.searchable_only > 0 {
410            println!("\nEnrichment:");
411            println!(
412                "  Enriched: {} / {}",
413                enrichment_stats.enriched_frames, enrichment_stats.total_frames
414            );
415            if enrichment_stats.pending_frames > 0 {
416                println!("  Pending: {} frames", enrichment_stats.pending_frames);
417                println!(
418                    "  Run `memvid process-queue {}` to complete enrichment",
419                    args.file.display()
420                );
421            }
422        }
423
424        // Show embedding quality stats if vector index is available
425        if stats.has_vec_index {
426            if let Ok(Some(eq)) = mem.embedding_quality() {
427                println!("\nEmbedding Quality:");
428                println!(
429                    "  Vectors: {}   Dimension: {}",
430                    eq.vector_count, eq.dimension
431                );
432                println!(
433                    "  Similarity: avg={:.3}  min={:.3}  max={:.3}  std={:.3}",
434                    eq.avg_similarity, eq.min_similarity, eq.max_similarity, eq.std_similarity
435                );
436                println!(
437                    "  Clusters: ~{}   Quality: {}",
438                    eq.estimated_clusters, eq.quality_rating
439                );
440                println!(
441                    "  Recommended --min-relevancy: {:.1}",
442                    eq.recommended_threshold
443                );
444                println!("  {}", eq.quality_explanation);
445            }
446        }
447
448        if !tables.is_empty() {
449            println!("\nTables: {} extracted", tables.len());
450            for t in &tables {
451                println!(
452                    "  {} — {} rows × {} cols ({})",
453                    t.table_id, t.n_rows, t.n_cols, t.source_file
454                );
455            }
456        }
457
458        println!("\nMaintenance:");
459        println!(
460            "  Run `{}` to rebuild indexes and reclaim space.",
461            maintenance_command
462        );
463    }
464    Ok(())
465}
466
467/// Handler for `memvid who`
468pub fn handle_who(args: WhoArgs) -> Result<()> {
469    match lockfile::current_owner(&args.file)? {
470        Some(owner) => {
471            if args.json {
472                let output = json!({
473                    "locked": true,
474                    "owner": owner_hint_to_json(&owner),
475                });
476                println!("{}", serde_json::to_string_pretty(&output)?);
477            } else {
478                println!("{} is locked by:", args.file.display());
479                if let Some(pid) = owner.pid {
480                    println!("  pid: {pid}");
481                }
482                if let Some(cmd) = owner.cmd.as_deref() {
483                    println!("  cmd: {cmd}");
484                }
485                if let Some(started) = owner.started_at.as_deref() {
486                    println!("  started_at: {started}");
487                }
488                if let Some(last) = owner.last_heartbeat.as_deref() {
489                    println!("  last_heartbeat: {last}");
490                }
491                if let Some(interval) = owner.heartbeat_ms {
492                    println!("  heartbeat_interval_ms: {interval}");
493                }
494                if let Some(file_id) = owner.file_id.as_deref() {
495                    println!("  file_id: {file_id}");
496                }
497                if let Some(path) = owner.file_path.as_ref() {
498                    println!("  file_path: {}", path.display());
499                }
500            }
501        }
502        None => {
503            if args.json {
504                let output = json!({"locked": false});
505                println!("{}", serde_json::to_string_pretty(&output)?);
506            } else {
507                println!("No active writer for {}", args.file.display());
508            }
509        }
510    }
511    Ok(())
512}
513
514// ============================================================================
515// View command handler and helpers
516// ============================================================================
517
518/// Handler for `memvid view`
519pub fn handle_view(args: ViewArgs) -> Result<()> {
520    if args.page == 0 {
521        bail!("page must be greater than zero");
522    }
523    if let Some(size) = args.page_size {
524        if size == 0 {
525            bail!("page-size must be greater than zero");
526        }
527    }
528
529    let mut mem = open_read_only_mem(&args.file)?;
530    let frame = select_frame(&mut mem, args.frame_id, args.uri.as_deref())?;
531
532    if args.play {
533        #[cfg(feature = "audio-playback")]
534        {
535            play_frame_audio(&mut mem, &frame, args.start_seconds, args.end_seconds)?;
536            return Ok(());
537        }
538        #[cfg(not(feature = "audio-playback"))]
539        {
540            bail!("Audio playback requires the 'audio-playback' feature (only available on macOS)");
541        }
542    }
543
544    if args.preview {
545        let bounds = parse_preview_bounds(args.preview_start.as_ref(), args.preview_end.as_ref())?;
546        preview_frame_media(&mut mem, &frame, args.uri.as_deref(), bounds)?;
547        return Ok(());
548    }
549
550    if args.binary {
551        let bytes = mem.frame_canonical_payload(frame.id)?;
552        let mut stdout = io::stdout();
553        stdout.write_all(&bytes)?;
554        stdout.flush()?;
555        return Ok(());
556    }
557
558    let canonical_text = canonical_text_for_view(&mut mem, &frame)?;
559    let manifest_from_meta = canonical_manifest_from_frame(&canonical_text, &frame);
560
561    let page_size = args
562        .page_size
563        .or_else(|| manifest_from_meta.as_ref().map(|m| m.chunk_chars))
564        .unwrap_or(DEFAULT_VIEW_PAGE_CHARS);
565
566    let mut manifest = if args.page_size.is_none() {
567        manifest_from_meta.unwrap_or_else(|| compute_chunk_manifest(&canonical_text, page_size))
568    } else {
569        compute_chunk_manifest(&canonical_text, page_size)
570    };
571    if manifest.chunks.is_empty() {
572        manifest = TextChunkManifest {
573            chunk_chars: page_size,
574            chunks: vec![TextChunkRange {
575                start: 0,
576                end: canonical_text.chars().count(),
577            }],
578        };
579    }
580
581    if frame.role == FrameRole::DocumentChunk && args.page_size.is_none() {
582        let total_chars = canonical_text.chars().count();
583        manifest = TextChunkManifest {
584            chunk_chars: total_chars.max(1),
585            chunks: vec![TextChunkRange {
586                start: 0,
587                end: total_chars,
588            }],
589        };
590    }
591
592    let total_pages = manifest.chunks.len().max(1);
593    if args.page > total_pages {
594        bail!(
595            "page {} is out of range (total pages: {})",
596            args.page,
597            total_pages
598        );
599    }
600
601    let chunk = &manifest.chunks[args.page - 1];
602    let content = extract_chunk_slice(&canonical_text, chunk);
603
604    if args.json {
605        let mut frame_json = frame_to_json(&frame);
606        if let Some(obj) = frame_json.as_object_mut() {
607            // Note: Do NOT overwrite search_text - it contains the extracted text from the document.
608            // The "content" field shows the paginated payload view.
609            if let Some(manifest_json) = obj.get_mut("chunk_manifest") {
610                if let Some(manifest_obj) = manifest_json.as_object_mut() {
611                    let total = manifest.chunks.len();
612                    if total > 0 {
613                        let mut window = serde_json::Map::new();
614                        let idx = args.page.saturating_sub(1).min(total - 1);
615                        if idx > 0 {
616                            let prev = &manifest.chunks[idx - 1];
617                            window.insert("prev".into(), json!([prev.start, prev.end]));
618                        }
619                        let current = &manifest.chunks[idx];
620                        window.insert("current".into(), json!([current.start, current.end]));
621                        if idx + 1 < total {
622                            let next = &manifest.chunks[idx + 1];
623                            window.insert("next".into(), json!([next.start, next.end]));
624                        }
625                        manifest_obj.insert("chunks".into(), Value::Object(window));
626                    }
627                }
628            }
629        }
630        let json = json!({
631            "frame": frame_json,
632            "page": args.page,
633            "page_size": manifest.chunk_chars,
634            "page_count": total_pages,
635            "has_prev": args.page > 1,
636            "has_next": args.page < total_pages,
637            "content": content,
638        });
639        println!("{}", serde_json::to_string_pretty(&json)?);
640    } else {
641        print_frame_summary(&mut mem, &frame)?;
642        println!(
643            "Page {}/{} ({} chars per page)",
644            args.page, total_pages, manifest.chunk_chars
645        );
646        println!();
647        println!("{}", content);
648    }
649    Ok(())
650}
651
652#[derive(Debug)]
653pub struct PreviewBounds {
654    pub start_ms: Option<u64>,
655    pub end_ms: Option<u64>,
656}
657
658pub fn parse_preview_bounds(
659    start: Option<&String>,
660    end: Option<&String>,
661) -> Result<Option<PreviewBounds>> {
662    let start_ms = match start {
663        Some(value) => Some(parse_timecode(value)?),
664        None => None,
665    };
666    let end_ms = match end {
667        Some(value) => Some(parse_timecode(value)?),
668        None => None,
669    };
670
671    if let (Some(s), Some(e)) = (start_ms, end_ms) {
672        if e <= s {
673            anyhow::bail!("--end must be greater than --start");
674        }
675    }
676
677    if start_ms.is_none() && end_ms.is_none() {
678        Ok(None)
679    } else {
680        Ok(Some(PreviewBounds { start_ms, end_ms }))
681    }
682}
683
684fn preview_frame_media(
685    mem: &mut Memvid,
686    frame: &Frame,
687    cli_uri: Option<&str>,
688    bounds: Option<PreviewBounds>,
689) -> Result<()> {
690    let manifest = mem.media_manifest(frame.id)?;
691    let mut mime = manifest
692        .as_ref()
693        .map(|m| m.mime.clone())
694        .or_else(|| frame.metadata.as_ref().and_then(|meta| meta.mime.clone()))
695        .unwrap_or_else(|| "application/octet-stream".to_string());
696
697    // If mime is generic, try to detect from payload bytes
698    if mime == "application/octet-stream" {
699        if let Ok(bytes) = mem.frame_canonical_payload(frame.id) {
700            if let Some(kind) = infer::get(&bytes) {
701                mime = kind.mime_type().to_string();
702            }
703        }
704    }
705
706    let is_video = manifest
707        .as_ref()
708        .map(|media| media.kind.eq_ignore_ascii_case("video"))
709        .unwrap_or_else(|| mime.starts_with("video/"));
710
711    if is_video {
712        preview_frame_video(mem, frame, cli_uri, bounds, manifest, &mime)?;
713    } else {
714        if bounds.is_some() {
715            anyhow::bail!("--start/--end are only supported for video previews");
716        }
717        if is_image_mime(&mime) {
718            preview_frame_image(mem, frame, cli_uri)?;
719        } else if is_audio_mime(&mime) {
720            preview_frame_audio_file(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
721        } else {
722            preview_frame_document(mem, frame, cli_uri, manifest.as_ref(), &mime)?;
723        }
724    }
725    Ok(())
726}
727
728fn preview_frame_video(
729    mem: &mut Memvid,
730    frame: &Frame,
731    cli_uri: Option<&str>,
732    bounds: Option<PreviewBounds>,
733    manifest: Option<MediaManifest>,
734    mime: &str,
735) -> Result<()> {
736    let extension = manifest
737        .as_ref()
738        .and_then(|m| m.filename.as_deref())
739        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
740        .map(|ext| ext.trim_start_matches('.').to_ascii_lowercase())
741        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
742        .unwrap_or_else(|| "mp4".to_string());
743
744    let mut temp_file = Builder::new()
745        .prefix("memvid-preview-")
746        .suffix(&format!(".{extension}"))
747        .tempfile_in(std::env::temp_dir())
748        .context("failed to create temporary preview file")?;
749
750    let mut reader = mem
751        .blob_reader(frame.id)
752        .context("failed to stream payload for preview")?;
753    io::copy(&mut reader, &mut temp_file).context("failed to write video data to preview file")?;
754    temp_file
755        .flush()
756        .context("failed to flush video preview to disk")?;
757
758    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
759    drop(file);
760
761    let mut display_path = preview_path.clone();
762    if let Some(ref span) = bounds {
763        let needs_trim = span.start_ms.is_some() || span.end_ms.is_some();
764        if needs_trim {
765            if let Some(trimmed) = maybe_trim_with_ffmpeg(&preview_path, &extension, span)? {
766                display_path = trimmed;
767            }
768        }
769    }
770
771    println!("Opening preview...");
772    open::that(&display_path).with_context(|| {
773        format!(
774            "failed to launch default video player for {}",
775            display_path.display()
776        )
777    })?;
778
779    let display_uri = cli_uri
780        .or_else(|| frame.uri.as_deref())
781        .unwrap_or("<unknown>");
782    println!(
783        "Opened preview for {} (frame {}) -> {} ({})",
784        display_uri,
785        frame.id,
786        display_path.display(),
787        mime
788    );
789    Ok(())
790}
791
792fn maybe_trim_with_ffmpeg(
793    source: &Path,
794    extension: &str,
795    bounds: &PreviewBounds,
796) -> Result<Option<PathBuf>> {
797    if bounds.start_ms.is_none() && bounds.end_ms.is_none() {
798        return Ok(None);
799    }
800
801    let ffmpeg = match which::which("ffmpeg") {
802        Ok(path) => path,
803        Err(_) => {
804            warn!("ffmpeg binary not found on PATH; opening full video");
805            return Ok(None);
806        }
807    };
808
809    let target = std::env::temp_dir().join(format!(
810        "memvid-preview-clip-{}.{}",
811        Uuid::new_v4(),
812        extension
813    ));
814
815    let mut command = Command::new(ffmpeg);
816    command.arg("-y");
817    if let Some(start) = bounds.start_ms {
818        command.arg("-ss").arg(format_timestamp_ms(start));
819    }
820    command.arg("-i").arg(source);
821    if let Some(end) = bounds.end_ms {
822        command.arg("-to").arg(format_timestamp_ms(end));
823    }
824    command.arg("-c").arg("copy");
825    command.arg(&target);
826
827    let status = command
828        .status()
829        .context("failed to run ffmpeg for preview trimming")?;
830    if status.success() {
831        return Ok(Some(target));
832    }
833
834    let details = status
835        .code()
836        .map(|code| code.to_string())
837        .unwrap_or_else(|| "terminated".to_string());
838    warn!("ffmpeg exited with status {details}; opening full video");
839    Ok(None)
840}
841
842fn preview_frame_image(mem: &mut Memvid, frame: &Frame, cli_uri: Option<&str>) -> Result<()> {
843    let bytes = mem
844        .frame_canonical_payload(frame.id)
845        .context("failed to load canonical payload for frame")?;
846    if bytes.is_empty() {
847        bail!("frame payload is empty; nothing to preview");
848    }
849
850    let detected_kind = infer::get(&bytes);
851    let mut mime = frame
852        .metadata
853        .as_ref()
854        .and_then(|meta| meta.mime.clone())
855        .filter(|value| is_image_mime(value));
856
857    if mime.is_none() {
858        if let Some(kind) = &detected_kind {
859            let candidate = kind.mime_type();
860            if is_image_mime(candidate) {
861                mime = Some(candidate.to_string());
862            }
863        }
864    }
865
866    let mime = mime.ok_or_else(|| anyhow!("frame does not contain an image payload"))?;
867    if !is_image_mime(&mime) {
868        bail!("frame mime type {mime} is not an image");
869    }
870
871    let extension = detected_kind
872        .as_ref()
873        .map(|kind| kind.extension().to_string())
874        .or_else(|| extension_from_mime(&mime).map(|ext| ext.to_string()))
875        .unwrap_or_else(|| "img".to_string());
876
877    let suffix = format!(".{extension}");
878    let mut temp_file = Builder::new()
879        .prefix("memvid-preview-")
880        .suffix(&suffix)
881        .tempfile_in(std::env::temp_dir())
882        .context("failed to create temporary preview file")?;
883    temp_file
884        .write_all(&bytes)
885        .context("failed to write image data to preview file")?;
886    temp_file
887        .flush()
888        .context("failed to flush preview file to disk")?;
889
890    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
891    drop(file);
892
893    println!("Opening preview...");
894    open::that(&preview_path).with_context(|| {
895        format!(
896            "failed to launch default image viewer for {}",
897            preview_path.display()
898        )
899    })?;
900
901    let display_uri = cli_uri
902        .or_else(|| frame.uri.as_deref())
903        .unwrap_or("<unknown>");
904    println!(
905        "Opened preview for {} (frame {}) -> {} ({})",
906        display_uri,
907        frame.id,
908        preview_path.display(),
909        mime
910    );
911    Ok(())
912}
913
914fn preview_frame_document(
915    mem: &mut Memvid,
916    frame: &Frame,
917    cli_uri: Option<&str>,
918    manifest: Option<&MediaManifest>,
919    mime: &str,
920) -> Result<()> {
921    let display_uri = cli_uri
922        .or_else(|| frame.uri.as_deref())
923        .unwrap_or("<unknown>");
924
925    // For documents (PDFs, etc.), prefer opening the original source file if it still exists.
926    // The canonical payload for document frames contains extracted text, not the original binary.
927    if let Some(source_path) = &frame.source_path {
928        let source = Path::new(source_path);
929        if source.exists() {
930            println!("Opening preview...");
931            open::that(source).with_context(|| {
932                format!("failed to launch default viewer for {}", source.display())
933            })?;
934            println!(
935                "Opened preview for {} (frame {}) -> {} ({})",
936                display_uri, frame.id, source_path, mime
937            );
938            return Ok(());
939        } else {
940            warn!(
941                "Original source file no longer exists: {}. Falling back to extracted content.",
942                source_path
943            );
944        }
945    }
946
947    // Fall back to extracted content (text for PDFs, raw bytes for other documents)
948    let bytes = mem
949        .frame_canonical_payload(frame.id)
950        .context("failed to load canonical payload for frame")?;
951    if bytes.is_empty() {
952        bail!("frame payload is empty; nothing to preview");
953    }
954
955    let mut extension = manifest
956        .and_then(|m| m.filename.as_deref())
957        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
958        .map(|ext| ext.trim_start_matches('.').to_string())
959        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
960        .unwrap_or_else(|| "bin".to_string());
961
962    // For documents where we only have extracted text, use .txt extension
963    if frame.chunk_manifest.is_some() {
964        extension = "txt".to_string();
965    } else if extension == "bin" && std::str::from_utf8(&bytes).is_ok() {
966        extension = "txt".to_string();
967    }
968
969    let suffix = format!(".{extension}");
970    let mut temp_file = Builder::new()
971        .prefix("memvid-preview-")
972        .suffix(&suffix)
973        .tempfile_in(std::env::temp_dir())
974        .context("failed to create temporary preview file")?;
975    temp_file
976        .write_all(&bytes)
977        .context("failed to write document data to preview file")?;
978    temp_file
979        .flush()
980        .context("failed to flush preview file to disk")?;
981
982    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
983    drop(file);
984
985    println!("Opening preview...");
986    open::that(&preview_path).with_context(|| {
987        format!(
988            "failed to launch default viewer for {}",
989            preview_path.display()
990        )
991    })?;
992
993    println!(
994        "Opened preview for {} (frame {}) -> {} ({})",
995        display_uri,
996        frame.id,
997        preview_path.display(),
998        if frame.chunk_manifest.is_some() {
999            "text/plain (extracted)"
1000        } else {
1001            mime
1002        }
1003    );
1004    Ok(())
1005}
1006
1007fn preview_frame_audio_file(
1008    mem: &mut Memvid,
1009    frame: &Frame,
1010    cli_uri: Option<&str>,
1011    manifest: Option<&MediaManifest>,
1012    mime: &str,
1013) -> Result<()> {
1014    let bytes = mem
1015        .frame_canonical_payload(frame.id)
1016        .context("failed to load canonical payload for frame")?;
1017    if bytes.is_empty() {
1018        bail!("frame payload is empty; nothing to preview");
1019    }
1020
1021    let mut extension = manifest
1022        .and_then(|m| m.filename.as_deref())
1023        .and_then(|name| Path::new(name).extension().and_then(|ext| ext.to_str()))
1024        .map(|ext| ext.trim_start_matches('.').to_string())
1025        .or_else(|| extension_from_mime(mime).map(|ext| ext.to_string()))
1026        .unwrap_or_else(|| "audio".to_string());
1027
1028    if extension == "bin" {
1029        extension = "audio".to_string();
1030    }
1031
1032    let suffix = format!(".{extension}");
1033    let mut temp_file = Builder::new()
1034        .prefix("memvid-preview-")
1035        .suffix(&suffix)
1036        .tempfile_in(std::env::temp_dir())
1037        .context("failed to create temporary preview file")?;
1038    temp_file
1039        .write_all(&bytes)
1040        .context("failed to write audio data to preview file")?;
1041    temp_file
1042        .flush()
1043        .context("failed to flush preview file to disk")?;
1044
1045    let (file, preview_path) = temp_file.keep().context("failed to persist preview file")?;
1046    drop(file);
1047
1048    println!("Opening preview...");
1049    open::that(&preview_path).with_context(|| {
1050        format!(
1051            "failed to launch default audio player for {}",
1052            preview_path.display()
1053        )
1054    })?;
1055
1056    let display_uri = cli_uri
1057        .or_else(|| frame.uri.as_deref())
1058        .unwrap_or("<unknown>");
1059    println!(
1060        "Opened preview for {} (frame {}) -> {} ({})",
1061        display_uri,
1062        frame.id,
1063        preview_path.display(),
1064        mime
1065    );
1066    Ok(())
1067}
1068
1069#[cfg(feature = "audio-playback")]
1070fn play_frame_audio(
1071    mem: &mut Memvid,
1072    frame: &Frame,
1073    start_seconds: Option<f32>,
1074    end_seconds: Option<f32>,
1075) -> Result<()> {
1076    use rodio::Source;
1077
1078    if let (Some(start), Some(end)) = (start_seconds, end_seconds) {
1079        if end <= start {
1080            bail!("--end-seconds must be greater than --start-seconds");
1081        }
1082    }
1083
1084    let bytes = mem
1085        .frame_canonical_payload(frame.id)
1086        .context("failed to load canonical payload for frame")?;
1087    if bytes.is_empty() {
1088        bail!("frame payload is empty; nothing to play");
1089    }
1090
1091    let start = start_seconds.unwrap_or(0.0).max(0.0);
1092    let duration_meta = frame
1093        .metadata
1094        .as_ref()
1095        .and_then(|meta| meta.audio.as_ref())
1096        .and_then(|audio| audio.duration_secs)
1097        .unwrap_or(0.0);
1098
1099    if duration_meta > 0.0 && start >= duration_meta {
1100        bail!("start-seconds ({start:.2}) exceeds audio duration ({duration_meta:.2})");
1101    }
1102
1103    if let Some(end) = end_seconds {
1104        if duration_meta > 0.0 && end > duration_meta + f32::EPSILON {
1105            warn!(
1106                "requested end-seconds {:.2} exceeds known duration {:.2}; clamping",
1107                end, duration_meta
1108            );
1109        }
1110    }
1111
1112    let cursor = Cursor::new(bytes);
1113    let decoder = rodio::Decoder::new(cursor).context("failed to decode audio stream")?;
1114    let (_stream, stream_handle) =
1115        rodio::OutputStream::try_default().context("failed to open default audio output")?;
1116    let sink = rodio::Sink::try_new(&stream_handle).context("failed to create audio sink")?;
1117    let display_uri = frame.uri.as_deref().unwrap_or("<unknown>");
1118
1119    if let Some(end) = end_seconds {
1120        let effective_end = if duration_meta > 0.0 {
1121            end.min(duration_meta)
1122        } else {
1123            end
1124        };
1125        let duration = (effective_end - start).max(0.0);
1126        if duration <= 0.0 {
1127            bail!("playback duration is zero; adjust start/end seconds");
1128        }
1129        let source = decoder
1130            .skip_duration(Duration::from_secs_f32(start))
1131            .take_duration(Duration::from_secs_f32(duration));
1132        sink.append(source);
1133        let segment_desc = format!("{start:.2}s → {effective_end:.2}s");
1134        announce_playback(display_uri, &segment_desc);
1135    } else {
1136        let source = decoder.skip_duration(Duration::from_secs_f32(start));
1137        sink.append(source);
1138        let segment_desc = format!("{start:.2}s → end");
1139        announce_playback(display_uri, &segment_desc);
1140    }
1141    sink.sleep_until_end();
1142    Ok(())
1143}
1144
1145#[cfg(feature = "audio-playback")]
1146fn announce_playback(uri: &str, segment_desc: &str) {
1147    println!("Playing {uri} ({segment_desc})");
1148}
1149
1150fn is_image_mime(value: &str) -> bool {
1151    let normalized = value.split(';').next().unwrap_or(value).trim();
1152    normalized.to_ascii_lowercase().starts_with("image/")
1153}
1154
1155fn is_audio_mime(value: &str) -> bool {
1156    let normalized = value.split(';').next().unwrap_or(value).trim();
1157    normalized.to_ascii_lowercase().starts_with("audio/")
1158}
1159
1160pub fn extension_from_mime(mime: &str) -> Option<&'static str> {
1161    let normalized = mime
1162        .split(';')
1163        .next()
1164        .unwrap_or(mime)
1165        .trim()
1166        .to_ascii_lowercase();
1167    match normalized.as_str() {
1168        "image/jpeg" | "image/jpg" => Some("jpg"),
1169        "image/png" => Some("png"),
1170        "image/gif" => Some("gif"),
1171        "image/webp" => Some("webp"),
1172        "image/bmp" => Some("bmp"),
1173        "image/tiff" => Some("tiff"),
1174        "image/x-icon" | "image/vnd.microsoft.icon" => Some("ico"),
1175        "image/svg+xml" => Some("svg"),
1176        "video/mp4" | "video/iso.segment" => Some("mp4"),
1177        "video/quicktime" => Some("mov"),
1178        "video/webm" => Some("webm"),
1179        "video/x-matroska" | "video/matroska" => Some("mkv"),
1180        "video/x-msvideo" => Some("avi"),
1181        "video/mpeg" => Some("mpg"),
1182        "application/pdf" => Some("pdf"),
1183        "audio/mpeg" | "audio/mp3" => Some("mp3"),
1184        "audio/wav" | "audio/x-wav" => Some("wav"),
1185        "audio/x-flac" | "audio/flac" => Some("flac"),
1186        "audio/ogg" | "audio/vorbis" => Some("ogg"),
1187        "audio/x-m4a" | "audio/mp4" => Some("m4a"),
1188        "audio/aac" => Some("aac"),
1189        "audio/x-aiff" | "audio/aiff" => Some("aiff"),
1190        "text/plain" => Some("txt"),
1191        "text/markdown" | "text/x-markdown" => Some("md"),
1192        "text/html" => Some("html"),
1193        "application/xhtml+xml" => Some("xhtml"),
1194        "application/json" | "text/json" | "application/vnd.api+json" => Some("json"),
1195        "application/xml" | "text/xml" => Some("xml"),
1196        "text/csv" | "application/csv" => Some("csv"),
1197        "application/javascript" | "text/javascript" => Some("js"),
1198        "text/css" => Some("css"),
1199        "application/yaml" | "application/x-yaml" | "text/yaml" => Some("yaml"),
1200        "application/rtf" => Some("rtf"),
1201        "application/msword" => Some("doc"),
1202        "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => Some("docx"),
1203        "application/vnd.ms-powerpoint" => Some("ppt"),
1204        "application/vnd.openxmlformats-officedocument.presentationml.presentation" => Some("pptx"),
1205        "application/vnd.ms-excel" => Some("xls"),
1206        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => Some("xlsx"),
1207        "application/zip" => Some("zip"),
1208        "application/x-tar" => Some("tar"),
1209        "application/x-7z-compressed" => Some("7z"),
1210        _ => None,
1211    }
1212}
1213pub fn search_snippet(text: Option<&String>) -> Option<String> {
1214    text.and_then(|value| {
1215        let trimmed = value.trim();
1216        if trimmed.is_empty() {
1217            None
1218        } else {
1219            Some(trimmed.chars().take(160).collect())
1220        }
1221    })
1222}
1223pub fn frame_to_json(frame: &Frame) -> serde_json::Value {
1224    json!({
1225        "id": frame.id,
1226        "status": frame_status_str(frame.status),
1227        "timestamp": frame.timestamp,
1228        "kind": frame.kind,
1229        "track": frame.track,
1230        "uri": frame.uri,
1231        "title": frame.title,
1232        "payload_length": frame.payload_length,
1233        "canonical_encoding": format!("{:?}", frame.canonical_encoding),
1234        "canonical_length": frame.canonical_length,
1235        "role": format!("{:?}", frame.role),
1236        "parent_id": frame.parent_id,
1237        "chunk_index": frame.chunk_index,
1238        "chunk_count": frame.chunk_count,
1239        "tags": frame.tags,
1240        "labels": frame.labels,
1241        "search_text": frame.search_text,
1242        "metadata": frame.metadata,
1243        "extra_metadata": frame.extra_metadata,
1244        "content_dates": frame.content_dates,
1245        "chunk_manifest": frame.chunk_manifest,
1246        "supersedes": frame.supersedes,
1247        "superseded_by": frame.superseded_by,
1248        "source_sha256": frame.source_sha256.map(|h| hex::encode(h)),
1249        "source_path": frame.source_path,
1250    })
1251}
1252pub fn print_frame_summary(mem: &mut Memvid, frame: &Frame) -> Result<()> {
1253    println!("Frame {} [{}]", frame.id, frame_status_str(frame.status));
1254    println!("Timestamp: {}", frame.timestamp);
1255    if let Some(uri) = &frame.uri {
1256        println!("URI: {uri}");
1257    }
1258    if let Some(title) = &frame.title {
1259        println!("Title: {title}");
1260    }
1261    if let Some(kind) = &frame.kind {
1262        println!("Kind: {kind}");
1263    }
1264    if let Some(track) = &frame.track {
1265        println!("Track: {track}");
1266    }
1267    if let Some(supersedes) = frame.supersedes {
1268        println!("Supersedes frame: {supersedes}");
1269    }
1270    if let Some(successor) = frame.superseded_by {
1271        println!("Superseded by frame: {successor}");
1272    }
1273    println!(
1274        "Payload: {} bytes (canonical {:?}, logical {:?})",
1275        frame.payload_length, frame.canonical_encoding, frame.canonical_length
1276    );
1277    if !frame.tags.is_empty() {
1278        println!("Tags: {}", frame.tags.join(", "));
1279    }
1280    if !frame.labels.is_empty() {
1281        println!("Labels: {}", frame.labels.join(", "));
1282    }
1283    if let Some(snippet) = search_snippet(frame.search_text.as_ref()) {
1284        println!("Search text: {snippet}");
1285    }
1286    if let Some(meta) = &frame.metadata {
1287        let rendered = serde_json::to_string_pretty(meta)?;
1288        println!("Metadata: {rendered}");
1289    }
1290    if !frame.extra_metadata.is_empty() {
1291        let mut entries: Vec<_> = frame.extra_metadata.iter().collect();
1292        entries.sort_by(|a, b| a.0.cmp(b.0));
1293        println!("Extra metadata:");
1294        for (key, value) in entries {
1295            println!("  {key}: {value}");
1296        }
1297    }
1298    if !frame.content_dates.is_empty() {
1299        println!("Content dates: {}", frame.content_dates.join(", "));
1300    }
1301    // Show no-raw mode info if applicable
1302    if let Some(hash) = frame.source_sha256 {
1303        println!(
1304            "Source SHA256: {} (raw binary not stored)",
1305            hex::encode(hash)
1306        );
1307        if let Some(path) = &frame.source_path {
1308            println!("Source path: {path}");
1309        }
1310    }
1311    match mem.frame_embedding(frame.id) {
1312        Ok(Some(embedding)) => println!("Embedding: {} dimensions", embedding.len()),
1313        Ok(None) => println!("Embedding: none"),
1314        Err(err) => println!("Embedding: unavailable ({err})"),
1315    }
1316    Ok(())
1317}
1318fn canonical_text_for_view(mem: &mut Memvid, frame: &Frame) -> Result<String> {
1319    let bytes = mem.frame_canonical_payload(frame.id)?;
1320    let raw = match String::from_utf8(bytes) {
1321        Ok(text) => text,
1322        Err(err) => {
1323            let bytes = err.into_bytes();
1324            String::from_utf8_lossy(&bytes).into_owned()
1325        }
1326    };
1327
1328    Ok(normalize_text(&raw, usize::MAX)
1329        .map(|n| n.text)
1330        .unwrap_or_default())
1331}
1332
1333fn manifests_match_text(text: &str, manifest: &TextChunkManifest) -> bool {
1334    if manifest.chunk_chars == 0 || manifest.chunks.is_empty() {
1335        return false;
1336    }
1337    let total_chars = text.chars().count();
1338    manifest
1339        .chunks
1340        .iter()
1341        .all(|chunk| chunk.start <= chunk.end && chunk.end <= total_chars)
1342}
1343
1344fn canonical_manifest_from_frame(text: &str, frame: &Frame) -> Option<TextChunkManifest> {
1345    let primary = frame
1346        .chunk_manifest
1347        .clone()
1348        .filter(|manifest| manifests_match_text(text, manifest));
1349    if primary.is_some() {
1350        return primary;
1351    }
1352
1353    frame
1354        .extra_metadata
1355        .get(CHUNK_MANIFEST_KEY)
1356        .and_then(|raw| serde_json::from_str::<TextChunkManifest>(raw).ok())
1357        .filter(|manifest| manifests_match_text(text, manifest))
1358}
1359
1360fn compute_chunk_manifest(text: &str, chunk_chars: usize) -> TextChunkManifest {
1361    let normalized = normalize_text(text, usize::MAX)
1362        .map(|n| n.text)
1363        .unwrap_or_default();
1364
1365    let effective_chunk = chunk_chars.max(1);
1366    let total_chars = normalized.chars().count();
1367    if total_chars == 0 {
1368        return TextChunkManifest {
1369            chunk_chars: effective_chunk,
1370            chunks: vec![TextChunkRange { start: 0, end: 0 }],
1371        };
1372    }
1373    if total_chars <= effective_chunk {
1374        return TextChunkManifest {
1375            chunk_chars: effective_chunk,
1376            chunks: vec![TextChunkRange {
1377                start: 0,
1378                end: total_chars,
1379            }],
1380        };
1381    }
1382    let mut chunks = Vec::new();
1383    let mut start = 0usize;
1384    while start < total_chars {
1385        let end = (start + effective_chunk).min(total_chars);
1386        chunks.push(TextChunkRange { start, end });
1387        start = end;
1388    }
1389    TextChunkManifest {
1390        chunk_chars: effective_chunk,
1391        chunks,
1392    }
1393}
1394
1395fn extract_chunk_slice(text: &str, range: &TextChunkRange) -> String {
1396    if range.start >= range.end || text.is_empty() {
1397        return String::new();
1398    }
1399    let mut start_byte = text.len();
1400    let mut end_byte = text.len();
1401    let mut idx = 0usize;
1402    for (byte_offset, _) in text.char_indices() {
1403        if idx == range.start {
1404            start_byte = byte_offset;
1405        }
1406        if idx == range.end {
1407            end_byte = byte_offset;
1408            break;
1409        }
1410        idx += 1;
1411    }
1412    if start_byte == text.len() {
1413        return String::new();
1414    }
1415    if end_byte == text.len() {
1416        end_byte = text.len();
1417    }
1418    text[start_byte..end_byte].to_string()
1419}