Skip to main content

memvid_core/
lib.rs

1#![deny(clippy::all, clippy::pedantic)]
2#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
3#![cfg_attr(
4    test,
5    allow(
6        clippy::useless_vec,
7        clippy::uninlined_format_args,
8        clippy::cast_possible_truncation,
9        clippy::float_cmp,
10        clippy::cast_precision_loss
11    )
12)]
13#![allow(clippy::module_name_repetitions)]
14//
15// Strategic lint exceptions - these are allowed project-wide for pragmatic reasons:
16//
17// Documentation lints: Many internal/self-documenting functions don't need extensive docs.
18// Public APIs should still have proper documentation.
19#![allow(clippy::missing_errors_doc)]
20#![allow(clippy::missing_panics_doc)]
21#![allow(clippy::doc_markdown)]
22//
23// Cast safety: All casts in this codebase are carefully reviewed and bounded by
24// real-world constraints (file sizes, frame counts, etc). Using try_into() everywhere
25// would add significant complexity without safety benefits in our use case.
26#![allow(clippy::cast_precision_loss)]
27#![allow(clippy::cast_possible_wrap)]
28#![allow(clippy::cast_sign_loss)]
29#![allow(clippy::cast_lossless)]
30//
31// Style/complexity: Some database-like operations naturally require complex functions.
32// Breaking them up would hurt readability.
33#![allow(clippy::too_many_lines)]
34#![allow(clippy::too_many_arguments)]
35#![allow(clippy::items_after_statements)]
36#![allow(clippy::similar_names)]
37// e.g., frame_id, parent_id, target_id are intentionally similar
38//
39// Pattern matching: These pedantic lints often suggest changes that reduce clarity.
40#![allow(clippy::manual_let_else)]
41#![allow(clippy::match_same_arms)]
42#![allow(clippy::if_same_then_else)]
43#![allow(clippy::collapsible_match)]
44//
45// Performance/ergonomics trade-offs that are acceptable for this codebase:
46#![allow(clippy::needless_pass_by_value)] // Many builders take owned values intentionally
47#![allow(clippy::return_self_not_must_use)] // Builder patterns don't need must_use on every method
48#![allow(clippy::format_push_string)] // Readability over minor perf difference
49#![allow(clippy::assigning_clones)] // clone_from() often less readable
50//
51// Low-value pedantic lints that add noise:
52#![allow(clippy::struct_excessive_bools)] // Config structs naturally have many flags
53#![allow(clippy::needless_continue)]
54#![allow(clippy::needless_range_loop)]
55#![allow(clippy::case_sensitive_file_extension_comparisons)]
56#![allow(clippy::default_trait_access)]
57#![allow(clippy::field_reassign_with_default)]
58#![allow(clippy::unreadable_literal)] // Magic numbers in binary formats are clearer as hex
59#![allow(clippy::implicit_hasher)]
60#![allow(clippy::manual_clamp)]
61#![allow(clippy::len_without_is_empty)] // Many index types don't need is_empty()
62#![allow(clippy::large_enum_variant)]
63#![allow(clippy::ptr_arg)]
64#![allow(clippy::map_unwrap_or)]
65#![allow(clippy::incompatible_msrv)]
66#![allow(clippy::should_implement_trait)] // Some method names are clearer than trait names
67#![allow(clippy::duplicated_attributes)]
68//
69// Return value wrapping: Many functions use Result for consistency even when they
70// currently can't fail, allowing future error conditions to be added without breaking API.
71#![allow(clippy::unnecessary_wraps)]
72#![allow(clippy::unused_self)] // Some trait impls or future extensibility
73
74/// The memvid-core crate version (matches `Cargo.toml`).
75pub const MEMVID_CORE_VERSION: &str = env!("CARGO_PKG_VERSION");
76
77mod analysis;
78pub mod constants;
79pub mod enrich;
80pub mod enrichment_worker;
81pub mod error;
82pub mod extract;
83pub mod extract_budgeted;
84pub mod footer;
85pub mod io;
86pub mod lex;
87mod lock;
88pub mod lockfile;
89pub mod memvid;
90pub mod models;
91pub mod pii;
92pub mod reader;
93mod registry;
94mod search;
95pub mod signature;
96pub mod structure;
97pub mod table;
98pub mod text;
99mod toc;
100pub mod types;
101pub mod vec;
102pub mod vec_pq;
103
104// SIMD-accelerated distance calculations
105pub mod simd;
106
107#[cfg(feature = "vec")]
108pub mod text_embed;
109
110// Triplet extraction module for automatic SPO extraction during ingestion
111pub mod triplet;
112
113// Graph-aware search for hybrid retrieval
114pub mod graph_search;
115
116// CLIP module is always compiled (for ClipIndexManifest serde compatibility)
117// but ClipModel/inference requires the "clip" feature
118pub mod clip;
119
120// Whisper module for audio transcription
121// Model inference requires the "whisper" feature
122pub mod whisper;
123
124// Replay module for time-travel debugging of agent sessions
125// Types are always available for serde compatibility
126// Full functionality requires the "replay" feature
127pub mod replay;
128
129// Password-based encryption capsules (.mv2e)
130// Feature-gated to avoid pulling crypto dependencies into default builds.
131#[cfg(feature = "encryption")]
132pub mod encryption;
133
134// SymSpell-based PDF text cleanup - fixes broken word spacing
135#[cfg(feature = "symspell_cleanup")]
136pub mod symspell_cleanup;
137
138// API-based embedding providers (OpenAI, etc.) - requires network
139#[cfg(feature = "api_embed")]
140pub mod api_embed;
141
142#[cfg(test)]
143mod tests_lex_flag;
144
145#[cfg(feature = "temporal_track")]
146pub use analysis::temporal::{
147    TemporalContext, TemporalNormalizer, TemporalResolution, TemporalResolutionFlag,
148    TemporalResolutionValue, parse_clock_inheritance, parse_week_start,
149};
150// Temporal enrichment for resolving relative time references during ingestion
151#[cfg(feature = "temporal_enrich")]
152pub use analysis::temporal_enrich::{
153    AnchorSource as TemporalEnrichAnchorSource, RelativePhrase, ResolvedTemporal,
154    TemporalAnchorInfo, TemporalAnchorTracker, TemporalEnrichment, detect_relative_phrases,
155    enrich_chunk, enrich_chunks, enrich_document, resolve_relative_phrase,
156};
157pub use constants::*;
158pub use enrichment_worker::{EnrichmentWorkerConfig, EnrichmentWorkerStats};
159pub use error::{MemvidError, Result};
160pub use extract::{DocumentProcessor, ExtractedDocument, ProcessorConfig};
161pub use footer::{CommitFooter, find_last_valid_footer};
162#[cfg(feature = "temporal_track")]
163pub use io::temporal_index::{
164    append_track as temporal_track_append, calculate_checksum as temporal_track_checksum,
165    read_track as temporal_track_read, window as temporal_track_window,
166};
167pub use io::time_index::{
168    TimeIndexEntry, append_track as time_index_append, calculate_checksum as time_index_checksum,
169    read_track as time_index_read,
170};
171pub use io::wal::{EmbeddedWal, WalRecord, WalStats};
172pub use lex::{LexIndex, LexIndexArtifact, LexIndexBuilder, LexSearchHit};
173pub use lock::FileLock;
174pub use memvid::{
175    BlobReader, EnrichmentHandle, EnrichmentStats, LockSettings, Memvid, OpenReadOptions,
176    SketchCandidate, SketchSearchOptions, SketchSearchStats,
177    mutation::{CommitMode, CommitOptions},
178    start_enrichment_worker, start_enrichment_worker_with_embeddings,
179};
180#[cfg(feature = "parallel_segments")]
181pub use memvid::{BuildOpts, ParallelInput, ParallelPayload};
182pub use models::{
183    ModelManifest, ModelManifestEntry, ModelVerification, ModelVerificationStatus,
184    ModelVerifyOptions, verify_model_dir, verify_models,
185};
186pub use reader::{
187    DocumentFormat, DocumentReader, PassthroughReader, PdfReader, ReaderDiagnostics, ReaderHint,
188    ReaderOutput, ReaderRegistry,
189};
190pub use signature::{
191    parse_ed25519_public_key_base64, verify_model_manifest, verify_ticket_signature,
192};
193pub use text::{NormalizedText, normalize_text, truncate_at_grapheme_boundary};
194pub use types::{
195    ACL_POLICY_VERSION_KEY, ACL_READ_GROUPS_KEY, ACL_READ_PRINCIPALS_KEY, ACL_READ_ROLES_KEY,
196    ACL_RESOURCE_ID_KEY, ACL_TENANT_ID_KEY, ACL_VISIBILITY_KEY, AclContext, AclEnforcementMode,
197    AskCitation, AskMode, AskRequest, AskResponse, AskRetriever, AskStats, AudioSegmentMetadata,
198    AuditOptions, AuditReport, CanonicalEncoding, DOCTOR_PLAN_VERSION, DocAudioMetadata,
199    DocExifMetadata, DocGpsMetadata, DocMetadata, DoctorActionDetail, DoctorActionKind,
200    DoctorActionPlan, DoctorActionReport, DoctorActionStatus, DoctorFinding, DoctorFindingCode,
201    DoctorMetrics, DoctorOptions, DoctorPhaseDuration, DoctorPhaseKind, DoctorPhasePlan,
202    DoctorPhaseReport, DoctorPhaseStatus, DoctorPlan, DoctorReport, DoctorSeverity, DoctorStatus,
203    EmbeddingIdentity, EmbeddingIdentityCount, EmbeddingIdentitySummary, Frame, FrameId, FrameRole,
204    FrameStatus, Header, IndexManifests, LexIndexManifest, LexSegmentDescriptor,
205    MEMVID_EMBEDDING_DIMENSION_KEY, MEMVID_EMBEDDING_MODEL_KEY, MEMVID_EMBEDDING_NORMALIZED_KEY,
206    MEMVID_EMBEDDING_PROVIDER_KEY, MediaManifest, MemvidHandle, Open, PutManyOpts, PutOptions,
207    PutOptionsBuilder, Sealed, SearchEngineKind, SearchHit, SearchHitMetadata, SearchParams,
208    SearchRequest, SearchResponse, SegmentCatalog, SegmentCommon, SegmentCompression, SegmentMeta,
209    SegmentSpan, SourceSpan, Stats, TextChunkManifest, TextChunkRange, Ticket, TicketRef, Tier,
210    TimeIndexManifest, TimeSegmentDescriptor, TimelineEntry, TimelineQuery, TimelineQueryBuilder,
211    Toc, VecEmbedder, VecIndexManifest, VecSegmentDescriptor, VectorCompression, VerificationCheck,
212    VerificationReport, VerificationStatus,
213};
214#[cfg(feature = "temporal_track")]
215pub use types::{
216    AnchorSource, SearchHitTemporal, SearchHitTemporalAnchor, SearchHitTemporalMention,
217    TEMPORAL_TRACK_FLAG_HAS_ANCHORS, TEMPORAL_TRACK_FLAG_HAS_MENTIONS, TemporalAnchor,
218    TemporalCapabilities, TemporalFilter, TemporalMention, TemporalMentionFlags,
219    TemporalMentionKind, TemporalTrack, TemporalTrackManifest,
220};
221// Memory card types for structured memory extraction and storage
222pub use types::{
223    EngineStamp, EnrichmentManifest, EnrichmentRecord, MEMORIES_TRACK_MAGIC,
224    MEMORIES_TRACK_VERSION, MemoriesStats, MemoriesTrack, MemoryCard, MemoryCardBuilder,
225    MemoryCardBuilderError, MemoryCardId, MemoryKind, Polarity, SlotIndex, VersionRelation,
226};
227// Logic-Mesh types for entity-relationship graph traversal
228pub use types::{
229    EdgeDirection, EntityKind, FollowResult, LOGIC_MESH_MAGIC, LOGIC_MESH_VERSION, LinkType,
230    LogicMesh, LogicMeshManifest, MeshEdge, MeshNode,
231};
232// Sketch track types for fast candidate generation
233pub use types::{
234    DEFAULT_HAMMING_THRESHOLD, QuerySketch, SKETCH_TRACK_MAGIC, SKETCH_TRACK_VERSION, SketchEntry,
235    SketchFlags, SketchTrack, SketchTrackHeader, SketchTrackManifest, SketchTrackStats,
236    SketchVariant, build_term_filter, compute_simhash, compute_token_weights, generate_sketch,
237    hash_token, hash_token_u32, read_sketch_track, term_filter_maybe_contains, tokenize_for_sketch,
238    write_sketch_track,
239};
240// Schema types for predicate validation and type checking
241pub use types::{
242    Cardinality, PredicateId, PredicateSchema, SchemaError, SchemaRegistry, ValueType,
243};
244// Schema inference summary type
245pub use memvid::memory::SchemaSummaryEntry;
246// NER types for entity extraction (always available, model requires logic_mesh feature)
247#[cfg(feature = "logic_mesh")]
248pub use analysis::ner::NerModel;
249pub use analysis::ner::{
250    ExtractedEntity, FrameEntities, NER_MODEL_NAME, NER_MODEL_SIZE_MB, NER_MODEL_URL, NER_MODELS,
251    NER_TOKENIZER_URL, NerModelInfo, default_ner_model_info, get_ner_model_info,
252    is_ner_model_installed, ner_model_path, ner_tokenizer_path,
253};
254// Enrichment engine types for extracting memory cards from frames
255pub use enrich::{EnrichmentContext, EnrichmentEngine, EnrichmentResult, RulesEngine};
256// Triplet extraction types for automatic SPO extraction
257pub use triplet::{ExtractionMode, ExtractionStats, TripletExtractor};
258// Graph-aware search for hybrid retrieval
259pub use graph_search::{GraphMatcher, QueryPlanner, hybrid_search};
260// Embedding provider types for vector embedding generation
261pub use types::{
262    BatchEmbeddingResult, EmbeddingConfig, EmbeddingProvider, EmbeddingProviderKind,
263    EmbeddingResult,
264};
265// Reranker types for second-stage ranking in RAG pipelines
266pub use types::reranker::{
267    Reranker, RerankerConfig, RerankerDocument, RerankerKind, RerankerResult,
268};
269#[cfg(feature = "parallel_segments")]
270pub use types::{IndexSegmentRef, SegmentKind, SegmentStats};
271pub use vec::{VecIndex, VecIndexArtifact, VecSearchHit};
272pub use vec_pq::{
273    CompressionStats, ProductQuantizer, QuantizedVecIndex, QuantizedVecIndexArtifact,
274    QuantizedVecIndexBuilder,
275};
276// Local text embedding provider - feature-gated
277#[cfg(feature = "vec")]
278pub use text_embed::{
279    LocalTextEmbedder, TEXT_EMBED_MODELS, TextEmbedConfig, TextEmbedModelInfo,
280    default_text_model_info, get_text_model_info,
281};
282// API-based embedding providers - feature-gated
283#[cfg(feature = "api_embed")]
284pub use api_embed::{
285    OPENAI_MODELS, OpenAIConfig, OpenAIEmbedder, OpenAIModelInfo, default_openai_model_info,
286    get_openai_model_info,
287};
288// CLIP visual embeddings - types always available for serde compatibility
289pub use clip::{
290    CLIP_MODELS, ClipConfig, ClipDocument, ClipEmbeddingProvider, ClipError, ClipIndex,
291    ClipIndexArtifact, ClipIndexBuilder, ClipIndexManifest, ClipModelInfo, ClipSearchHit,
292    ImageInfo, MOBILECLIP_DIMS, SIGLIP_DIMS, default_model_info, filter_junk_images,
293    get_model_info,
294};
295// CLIP model inference requires the "clip" feature
296#[cfg(feature = "clip")]
297pub use clip::{ClipModel, calculate_color_variance, get_image_info};
298// Whisper audio transcription - types always available
299pub use whisper::{
300    TranscriptionResult, TranscriptionSegment, WHISPER_MODELS, WhisperConfig, WhisperError,
301    WhisperModelInfo, default_whisper_model_info, get_whisper_model_info,
302};
303// Audio decoding and transcription require the "whisper" feature
304#[cfg(feature = "whisper")]
305pub use whisper::{WHISPER_SAMPLE_RATE, WhisperTranscriber, decode_audio_file};
306// Structure-aware chunking for preserving tables and code blocks
307pub use structure::{
308    ChunkType, ChunkingOptions, ChunkingResult, StructuralChunker, StructuredChunk,
309    StructuredDocument, TableChunkingStrategy, chunk_structured, detect_structure,
310};
311// Adaptive retrieval for dynamic result set sizing
312pub use types::adaptive::{
313    AdaptiveConfig, AdaptiveResult, AdaptiveStats, CutoffStrategy, find_adaptive_cutoff,
314    normalize_scores,
315};
316// Replay types for time-travel debugging - always available for serde
317pub use replay::{
318    ActionType, Checkpoint, REPLAY_SEGMENT_MAGIC, REPLAY_SEGMENT_VERSION, ReplayAction,
319    ReplayManifest, ReplaySession, SessionSummary, StateSnapshot,
320};
321// Full replay functionality requires the "replay" feature
322#[cfg(feature = "replay")]
323pub use replay::{
324    ActiveSession, ComparisonReport, ComparisonSummary, Divergence, DivergenceType, ModelResult,
325    ReplayConfig, ReplayOptions, ReplayResult,
326};
327
328#[cfg(test)]
329use once_cell::sync::Lazy;
330use std::fs::File;
331use std::io::Cursor;
332use std::path::Path;
333#[cfg(test)]
334use std::sync::Mutex;
335
336use bincode::config::{self, Config};
337use io::header::HeaderCodec;
338
339const TIMELINE_PREVIEW_BYTES: usize = 120;
340const MAX_INDEX_BYTES: u64 = 512 * 1024 * 1024; // Increased from 64MB to 512MB for large datasets
341const MAX_TIME_INDEX_BYTES: u64 = 512 * 1024 * 1024;
342const MAX_FRAME_BYTES: u64 = 256 * 1024 * 1024;
343const DEFAULT_SEARCH_TEXT_LIMIT: usize = 32_768;
344
345#[cfg(test)]
346#[allow(clippy::non_std_lazy_statics)]
347static SERIAL_TEST_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
348
349#[cfg(test)]
350pub(crate) fn run_serial_test<T>(f: impl FnOnce() -> T) -> T {
351    let _guard = SERIAL_TEST_MUTEX
352        .lock()
353        .expect("memvid-core serial test mutex poisoned");
354    f()
355}
356
357impl Memvid {
358    #[cfg(feature = "lex")]
359    fn tantivy_index_pending(&self) -> bool {
360        self.tantivy_dirty
361    }
362
363    #[cfg(not(feature = "lex"))]
364    fn tantivy_index_pending(&self) -> bool {
365        false
366    }
367
368    #[cfg(feature = "lex")]
369    fn flush_tantivy_conditional(&mut self, embed_snapshot: bool) -> Result<()> {
370        if !self.tantivy_dirty {
371            return Ok(());
372        }
373        if let Some(engine) = self.tantivy.as_mut() {
374            engine.commit()?;
375            if embed_snapshot {
376                let snapshot = engine.snapshot_segments()?;
377                self.update_embedded_lex_snapshot(snapshot)?;
378            }
379        }
380        self.tantivy_dirty = false;
381        Ok(())
382    }
383
384    #[cfg(feature = "lex")]
385    fn flush_tantivy(&mut self) -> Result<()> {
386        self.flush_tantivy_conditional(true)
387    }
388
389    #[cfg(feature = "lex")]
390    #[allow(dead_code)]
391    fn flush_tantivy_skip_embed(&mut self) -> Result<()> {
392        self.flush_tantivy_conditional(false)
393    }
394
395    #[cfg(not(feature = "lex"))]
396    fn flush_tantivy(&mut self) -> Result<()> {
397        Ok(())
398    }
399
400    #[cfg(not(feature = "lex"))]
401    #[allow(dead_code)]
402    fn flush_tantivy_skip_embed(&mut self) -> Result<()> {
403        Ok(())
404    }
405    #[must_use]
406    pub fn path(&self) -> &Path {
407        &self.path
408    }
409
410    #[must_use]
411    pub fn lock_handle(&self) -> &FileLock {
412        &self.lock
413    }
414
415    #[must_use]
416    pub fn is_read_only(&self) -> bool {
417        self.read_only
418    }
419
420    pub(crate) fn ensure_writable(&mut self) -> Result<()> {
421        if self.read_only {
422            self.lock.upgrade_to_exclusive()?;
423            self.read_only = false;
424        }
425        Ok(())
426    }
427
428    pub fn downgrade_to_shared(&mut self) -> Result<()> {
429        if self.read_only {
430            return Ok(());
431        }
432        if self.dirty || self.tantivy_index_pending() {
433            return Ok(());
434        }
435        self.lock.downgrade_to_shared()?;
436        self.read_only = true;
437        Ok(())
438    }
439}
440
441impl Drop for Memvid {
442    fn drop(&mut self) {
443        if self.dirty {
444            let _ = self.commit();
445        }
446        // Clean up temporary manifest.wal file (parallel_segments feature)
447        #[cfg(feature = "parallel_segments")]
448        {
449            use crate::memvid::lifecycle::cleanup_manifest_wal_public;
450            cleanup_manifest_wal_public(self.path());
451        }
452    }
453}
454
455pub(crate) fn persist_header(file: &mut File, header: &Header) -> Result<()> {
456    HeaderCodec::write(file, header)
457}
458
459fn wal_config() -> impl Config {
460    config::standard()
461        .with_fixed_int_encoding()
462        .with_little_endian()
463}
464
465pub(crate) fn decode_canonical_bytes(
466    payload: &[u8],
467    encoding: CanonicalEncoding,
468    frame_id: FrameId,
469) -> Result<Vec<u8>> {
470    match encoding {
471        CanonicalEncoding::Plain => Ok(payload.to_vec()),
472        CanonicalEncoding::Zstd => {
473            zstd::decode_all(Cursor::new(payload)).map_err(|_| MemvidError::InvalidFrame {
474                frame_id,
475                reason: "failed to decode canonical payload",
476            })
477        }
478    }
479}
480
481pub(crate) fn default_uri(frame_id: FrameId) -> String {
482    format!("mv2://frames/{frame_id}")
483}
484
485pub(crate) fn infer_title_from_uri(uri: &str) -> Option<String> {
486    let trimmed = uri.trim();
487    if trimmed.is_empty() {
488        return None;
489    }
490
491    let without_scheme = trimmed.split_once("://").map_or(trimmed, |x| x.1);
492    let without_fragment = without_scheme.split('#').next().unwrap_or(without_scheme);
493    let without_query = without_fragment
494        .split('?')
495        .next()
496        .unwrap_or(without_fragment);
497    let segment = without_query
498        .trim_end_matches('/')
499        .rsplit('/')
500        .next()
501        .map(str::trim)?;
502    if segment.is_empty() {
503        return None;
504    }
505
506    let stem = segment.rsplit_once('.').map_or(segment, |x| x.0).trim();
507    if stem.is_empty() {
508        return None;
509    }
510
511    let words: Vec<String> = stem
512        .split(['-', '_', ' '])
513        .filter(|part| !part.is_empty())
514        .map(|part| {
515            let mut chars = part.chars();
516            match chars.next() {
517                Some(first) => {
518                    let first = first.to_ascii_uppercase();
519                    let rest: String = chars.map(|c| c.to_ascii_lowercase()).collect();
520                    if rest.is_empty() {
521                        first.to_string()
522                    } else {
523                        format!("{first}{rest}")
524                    }
525                }
526                None => String::new(),
527            }
528        })
529        .filter(|word| !word.is_empty())
530        .collect();
531
532    if words.is_empty() {
533        None
534    } else {
535        Some(words.join(" "))
536    }
537}
538
539fn truncate_preview(text: &str) -> String {
540    text.chars().take(TIMELINE_PREVIEW_BYTES).collect()
541}
542
543fn image_preview_from_metadata(meta: &DocMetadata) -> Option<String> {
544    let mime = meta.mime.as_deref()?;
545    if !mime.starts_with("image/") {
546        return None;
547    }
548
549    if let Some(caption) = meta.caption.as_ref() {
550        let trimmed = caption.trim();
551        if !trimmed.is_empty() {
552            return Some(truncate_preview(trimmed));
553        }
554    }
555
556    let mut segments: Vec<String> = Vec::new();
557    if let (Some(w), Some(h)) = (meta.width, meta.height) {
558        segments.push(format!("{w}×{h} px"));
559    }
560    if let Some(exif) = meta.exif.as_ref() {
561        if let Some(model) = exif
562            .model
563            .as_ref()
564            .map(|s| s.trim())
565            .filter(|s| !s.is_empty())
566        {
567            segments.push(model.to_string());
568        } else if let Some(make) = exif
569            .make
570            .as_ref()
571            .map(|s| s.trim())
572            .filter(|s| !s.is_empty())
573        {
574            segments.push(make.to_string());
575        }
576
577        if let Some(datetime) = exif
578            .datetime
579            .as_ref()
580            .map(|s| s.trim())
581            .filter(|s| !s.is_empty())
582        {
583            segments.push(datetime.to_string());
584        }
585    }
586
587    if segments.is_empty() {
588        return Some("Image frame".to_string());
589    }
590
591    Some(truncate_preview(&segments.join(" · ")))
592}
593
594#[cfg(test)]
595mod tests {
596    use super::*;
597    use std::io::Read;
598    use std::num::NonZeroU64;
599    use tempfile::tempdir;
600
601    #[test]
602    fn create_put_commit_reopen() {
603        run_serial_test(|| {
604            let dir = tempdir().expect("tmp");
605            let path = dir.path().join("memory.mv2");
606
607            let mut mem = Memvid::create(&path).expect("create");
608            let seq = mem.put_bytes(b"hello").expect("put");
609            assert_eq!(seq, 1);
610            mem.commit().expect("commit");
611
612            drop(mem);
613
614            let mut reopened = Memvid::open(&path).expect("open");
615            let stats = reopened.stats().expect("stats");
616            assert_eq!(stats.frame_count, 1);
617            assert!(stats.has_time_index);
618
619            let timeline = reopened
620                .timeline(TimelineQuery::default())
621                .expect("timeline");
622            assert_eq!(timeline.len(), 1);
623            assert!(timeline[0].preview.contains("hello"));
624
625            let wal_stats = reopened.wal.stats();
626            assert_eq!(wal_stats.pending_bytes, 0);
627            // Sequence is 2: one from create() writing manifests, one from put()
628            assert_eq!(wal_stats.sequence, 2);
629        });
630    }
631
632    #[test]
633    fn timeline_limit_and_reverse() {
634        run_serial_test(|| {
635            let dir = tempdir().expect("tmp");
636            let path = dir.path().join("timeline.mv2");
637
638            let mut mem = Memvid::create(&path).expect("create");
639            mem.put_bytes(b"alpha").expect("put alpha");
640            mem.put_bytes(b"beta").expect("put beta");
641            mem.commit().expect("commit");
642            drop(mem);
643
644            let mut reopened = Memvid::open(&path).expect("open");
645            let limited = reopened
646                .timeline(TimelineQuery {
647                    limit: NonZeroU64::new(1),
648                    since: None,
649                    until: None,
650                    reverse: false,
651                    #[cfg(feature = "temporal_track")]
652                    temporal: None,
653                })
654                .expect("timeline limit");
655            assert_eq!(limited.len(), 1);
656            assert!(limited[0].preview.contains("alpha"));
657
658            let reversed = reopened
659                .timeline(TimelineQuery {
660                    limit: NonZeroU64::new(1),
661                    since: None,
662                    until: None,
663                    reverse: true,
664                    #[cfg(feature = "temporal_track")]
665                    temporal: None,
666                })
667                .expect("timeline reverse");
668            assert_eq!(reversed.len(), 1);
669            assert!(reversed[0].preview.contains("beta"));
670        });
671    }
672
673    #[test]
674    fn lex_search_roundtrip() {
675        run_serial_test(|| {
676            let dir = tempdir().expect("tmp");
677            let path = dir.path().join("lex.mv2");
678
679            let mut mem = Memvid::create(&path).expect("create");
680            mem.enable_lex().expect("enable");
681            let _seq1 = mem.put_bytes(b"Rust memory engine").expect("put");
682            let _seq2 = mem.put_bytes(b"Deterministic WAL").expect("put2");
683            mem.commit().expect("commit");
684
685            // Use modern search() API instead of deprecated search_lex()
686            let request = SearchRequest {
687                query: "memory".to_string(),
688                top_k: 10,
689                snippet_chars: 200,
690                uri: None,
691                scope: None,
692                cursor: None,
693                #[cfg(feature = "temporal_track")]
694                temporal: None,
695                as_of_frame: None,
696                as_of_ts: None,
697                no_sketch: false,
698                acl_context: None,
699                acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
700            };
701            let response = mem.search(request).expect("search");
702            assert_eq!(response.hits.len(), 1);
703
704            drop(mem);
705
706            let mut reopened = Memvid::open(&path).expect("open");
707            let request = SearchRequest {
708                query: "wal".to_string(),
709                top_k: 10,
710                snippet_chars: 200,
711                uri: None,
712                scope: None,
713                cursor: None,
714                #[cfg(feature = "temporal_track")]
715                temporal: None,
716                as_of_frame: None,
717                as_of_ts: None,
718                no_sketch: false,
719                acl_context: None,
720                acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
721            };
722            let response = reopened.search(request).expect("search reopened");
723            assert_eq!(response.hits.len(), 1);
724        });
725    }
726
727    #[test]
728    fn vec_search_roundtrip() {
729        run_serial_test(|| {
730            let dir = tempdir().expect("tmp");
731            let path = dir.path().join("vec.mv2");
732
733            let mut mem = Memvid::create(&path).expect("create");
734            mem.enable_vec().expect("enable");
735            mem.put_with_embedding(b"vector", vec![0.0, 1.0])
736                .expect("put");
737            mem.put_with_embedding(b"vector-two", vec![1.0, 0.0])
738                .expect("put2");
739            mem.commit().expect("commit");
740
741            let stats = mem.stats().expect("stats");
742            assert!(stats.has_vec_index, "vec index should exist after commit");
743
744            let hits = mem.search_vec(&[0.0, 1.0], 5).expect("search");
745            assert_eq!(hits.first().map(|hit| hit.frame_id), Some(0));
746
747            drop(mem);
748
749            let mut reopened = Memvid::open(&path).expect("open");
750            let reopened_stats = reopened.stats().expect("stats reopen");
751            assert!(
752                reopened_stats.has_vec_index,
753                "vec index should exist after reopen: has_manifest={}, vec_enabled={}",
754                reopened.toc.indexes.vec.is_some(),
755                reopened.vec_enabled
756            );
757            let hits = reopened.search_vec(&[1.0, 0.0], 5).expect("search reopen");
758            assert_eq!(hits.first().map(|hit| hit.frame_id), Some(1));
759        });
760    }
761
762    #[test]
763    fn search_snippet_ranges_match_bytes() {
764        run_serial_test(|| {
765            let dir = tempdir().expect("tmp");
766            let path = dir.path().join("search.mv2");
767
768            let mut mem = Memvid::create(&path).expect("create");
769            mem.enable_lex().expect("enable lex");
770            let options = PutOptions::builder()
771                .uri("mv2://docs/pricing.md")
772                .title("Pricing")
773                .build();
774            let text = "Capacity tickets are signed grants that raise per-file caps.";
775            mem.put_bytes_with_options(text.as_bytes(), options)
776                .expect("put doc");
777            mem.commit().expect("commit");
778
779            let response = mem
780                .search(SearchRequest {
781                    query: "capacity tickets".into(),
782                    top_k: 5,
783                    snippet_chars: 160,
784                    uri: None,
785                    scope: None,
786                    cursor: None,
787                    #[cfg(feature = "temporal_track")]
788                    temporal: None,
789                    as_of_frame: None,
790                    as_of_ts: None,
791                    no_sketch: false,
792                    acl_context: None,
793                    acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
794                })
795                .expect("search");
796
797            assert_eq!(response.total_hits, 1);
798            assert_eq!(response.engine, SearchEngineKind::Tantivy);
799            let hit = response.hits.first().expect("hit");
800            let frame = mem
801                .toc
802                .frames
803                .get(hit.frame_id as usize)
804                .cloned()
805                .expect("frame");
806            let canonical = mem.frame_content(&frame).expect("content");
807            let bytes = canonical.as_bytes();
808            let (start, end) = hit.range;
809            assert!(end <= bytes.len());
810            assert_eq!(hit.text.as_bytes(), &bytes[start..end]);
811            let chunk = hit.chunk_range.expect("chunk range");
812            assert!(chunk.0 <= start);
813            assert!(chunk.1 >= end);
814            let chunk_text = hit.chunk_text.as_ref().expect("chunk text");
815            let chunk_slice = &canonical[chunk.0..chunk.1];
816            assert_eq!(chunk_text, chunk_slice);
817        });
818    }
819
820    #[test]
821    fn search_chunk_range_reflects_chunk_offset() {
822        run_serial_test(|| {
823            let dir = tempdir().expect("tmp");
824            let path = dir.path().join("chunked.mv2");
825
826            let mut mem = Memvid::create(&path).expect("create");
827            mem.enable_lex().expect("enable lex");
828
829            let options = PutOptions::builder()
830                .uri("mv2://docs/manual.txt")
831                .title("Manual")
832                .build();
833            let prefix = "alpha beta gamma delta. ".repeat(200);
834            let content = format!(
835                "{}target segment appears here. Trailing context for verification.",
836                prefix
837            );
838            mem.put_bytes_with_options(content.as_bytes(), options)
839                .expect("put doc");
840            mem.commit().expect("commit");
841
842            let response = mem
843                .search(SearchRequest {
844                    query: "target segment".into(),
845                    top_k: 5,
846                    snippet_chars: 160,
847                    uri: None,
848                    scope: None,
849                    cursor: None,
850                    #[cfg(feature = "temporal_track")]
851                    temporal: None,
852                    as_of_frame: None,
853                    as_of_ts: None,
854                    no_sketch: false,
855                    acl_context: None,
856                    acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
857                })
858                .expect("search");
859
860            let hit = response.hits.first().expect("hit");
861            assert_eq!(response.engine, SearchEngineKind::Tantivy);
862            let chunk_range = hit.chunk_range.expect("chunk range");
863            assert!(chunk_range.0 > 0);
864            assert!(hit.range.0 >= chunk_range.0);
865            assert!(hit.range.1 <= chunk_range.1);
866            assert!(hit.text.contains("target segment"));
867            let chunk_text = hit.chunk_text.as_ref().expect("chunk text");
868            assert_eq!(chunk_text, &content[chunk_range.0..chunk_range.1]);
869        });
870    }
871
872    #[test]
873    fn auto_tag_populates_frame_metadata() {
874        run_serial_test(|| {
875            let dir = tempdir().expect("tmp");
876            let path = dir.path().join("autotag.mv2");
877
878            let mut mem = Memvid::create(&path).expect("create");
879            mem.enable_lex().expect("enable lex");
880
881            let options = PutOptions::builder()
882                .search_text("Neural networks planning session 2024-10-08")
883                .auto_tag(true)
884                .extract_dates(true)
885                .build();
886            mem.put_bytes_with_options(b"agenda", options)
887                .expect("put bytes");
888            mem.commit().expect("commit");
889
890            let frame = mem.toc.frames.first().expect("frame present");
891            assert!(!frame.tags.is_empty());
892            assert!(frame.content_dates.iter().any(|date| date.contains("2024")));
893        });
894    }
895
896    #[test]
897    fn search_filters_by_uri_and_scope() {
898        run_serial_test(|| {
899            let dir = tempdir().expect("tmp");
900            let path = dir.path().join("filters.mv2");
901
902            let mut mem = Memvid::create(&path).expect("create");
903            mem.enable_lex().expect("enable lex");
904
905            let options_a = PutOptions::builder()
906                .uri("mv2://docs/pricing.md")
907                .title("Pricing")
908                .build();
909            mem.put_bytes_with_options(b"Capacity tickets add per-file allowances", options_a)
910                .expect("put a");
911
912            let options_b = PutOptions::builder()
913                .uri("mv2://docs/faq.md")
914                .title("FAQ")
915                .build();
916            mem.put_bytes_with_options(b"Tickets can be issued by admins", options_b)
917                .expect("put b");
918
919            let options_c = PutOptions::builder()
920                .uri("mv2://blog/launch.md")
921                .title("Launch")
922                .build();
923            mem.put_bytes_with_options(b"Launch day tickets boost visibility", options_c)
924                .expect("put c");
925
926            mem.commit().expect("commit");
927
928            let uri_response = mem
929                .search(SearchRequest {
930                    query: "tickets".into(),
931                    top_k: 10,
932                    snippet_chars: 120,
933                    uri: Some("mv2://docs/pricing.md".into()),
934                    scope: None,
935                    cursor: None,
936                    #[cfg(feature = "temporal_track")]
937                    temporal: None,
938                    as_of_frame: None,
939                    as_of_ts: None,
940                    no_sketch: false,
941                    acl_context: None,
942                    acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
943                })
944                .expect("uri search");
945            assert_eq!(uri_response.engine, SearchEngineKind::Tantivy);
946            assert!(
947                uri_response
948                    .hits
949                    .iter()
950                    .all(|hit| hit.uri == "mv2://docs/pricing.md")
951            );
952
953            let scope_response = mem
954                .search(SearchRequest {
955                    query: "tickets".into(),
956                    top_k: 10,
957                    snippet_chars: 120,
958                    uri: None,
959                    scope: Some("mv2://docs/".into()),
960                    cursor: None,
961                    #[cfg(feature = "temporal_track")]
962                    temporal: None,
963                    as_of_frame: None,
964                    as_of_ts: None,
965                    no_sketch: false,
966                    acl_context: None,
967                    acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
968                })
969                .expect("scope search");
970            assert_eq!(scope_response.engine, SearchEngineKind::Tantivy);
971            assert!(
972                scope_response
973                    .hits
974                    .iter()
975                    .all(|hit| hit.uri.starts_with("mv2://docs/"))
976            );
977        });
978    }
979
980    #[test]
981    fn search_pagination_and_params() {
982        run_serial_test(|| {
983            let dir = tempdir().expect("tmp");
984            let path = dir.path().join("paging.mv2");
985
986            let mut mem = Memvid::create(&path).expect("create");
987            mem.enable_lex().expect("enable lex");
988
989            for (idx, text) in [
990                "tickets unlock tier upgrades",
991                "tickets expire after 30 days",
992                "tickets may be revoked",
993            ]
994            .iter()
995            .enumerate()
996            {
997                let uri = format!("mv2://docs/doc{idx}.md");
998                let options = PutOptions::builder()
999                    .uri(&uri)
1000                    .title(format!("Doc {idx}"))
1001                    .build();
1002                mem.put_bytes_with_options(text.as_bytes(), options)
1003                    .expect("put doc");
1004            }
1005
1006            mem.commit().expect("commit");
1007
1008            let first_page = mem
1009                .search(SearchRequest {
1010                    query: "tickets".into(),
1011                    top_k: 1,
1012                    snippet_chars: 90,
1013                    uri: None,
1014                    scope: None,
1015                    cursor: None,
1016                    #[cfg(feature = "temporal_track")]
1017                    temporal: None,
1018                    as_of_frame: None,
1019                    as_of_ts: None,
1020                    no_sketch: false,
1021                    acl_context: None,
1022                    acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
1023                })
1024                .expect("page one");
1025            assert_eq!(first_page.engine, SearchEngineKind::Tantivy);
1026            assert_eq!(first_page.hits.len(), 1);
1027            assert_eq!(first_page.params.top_k, 1);
1028            assert_eq!(first_page.params.snippet_chars, 90);
1029            assert!(first_page.total_hits >= first_page.hits.len());
1030            let cursor = first_page.next_cursor.clone().expect("cursor");
1031            let first_id = first_page.hits[0].frame_id;
1032
1033            let second_page = mem
1034                .search(SearchRequest {
1035                    query: "tickets".into(),
1036                    top_k: 1,
1037                    snippet_chars: 90,
1038                    uri: None,
1039                    scope: None,
1040                    cursor: Some(cursor),
1041                    #[cfg(feature = "temporal_track")]
1042                    temporal: None,
1043                    as_of_frame: None,
1044                    as_of_ts: None,
1045                    no_sketch: false,
1046                    acl_context: None,
1047                    acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
1048                })
1049                .expect("page two");
1050            assert_eq!(second_page.engine, SearchEngineKind::Tantivy);
1051            assert_eq!(second_page.hits.len(), 1);
1052            assert_ne!(second_page.hits[0].frame_id, first_id);
1053            assert_eq!(second_page.total_hits, first_page.total_hits);
1054        });
1055    }
1056
1057    #[cfg(feature = "lex")]
1058    #[test]
1059    fn search_falls_back_when_tantivy_missing() {
1060        run_serial_test(|| {
1061            let dir = tempdir().expect("tmp");
1062            let path = dir.path().join("fallback.mv2");
1063
1064            let mut mem = Memvid::create(&path).expect("create");
1065            mem.enable_lex().expect("enable lex");
1066            mem.put_bytes(b"tickets fallback test").expect("put");
1067            mem.commit().expect("commit");
1068
1069            // This test verifies that Tantivy is the primary search engine
1070            // The LexFallback path is deprecated, so we'll just verify Tantivy works
1071            assert!(
1072                mem.tantivy.is_some(),
1073                "Tantivy should be initialized after commit"
1074            );
1075
1076            let response = mem
1077                .search(SearchRequest {
1078                    query: "tickets".into(),
1079                    top_k: 5,
1080                    snippet_chars: 120,
1081                    uri: None,
1082                    scope: None,
1083                    cursor: None,
1084                    #[cfg(feature = "temporal_track")]
1085                    temporal: None,
1086                    as_of_frame: None,
1087                    as_of_ts: None,
1088                    no_sketch: false,
1089                    acl_context: None,
1090                    acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
1091                })
1092                .expect("search with tantivy");
1093
1094            assert_eq!(response.engine, SearchEngineKind::Tantivy);
1095            assert!(!response.hits.is_empty());
1096        });
1097    }
1098
1099    #[test]
1100    fn verify_reports_success() {
1101        run_serial_test(|| {
1102            let dir = tempdir().expect("tmp");
1103            let path = dir.path().join("verify.mv2");
1104
1105            {
1106                let mut mem = Memvid::create(&path).expect("create");
1107                mem.enable_lex().expect("enable lex");
1108                mem.enable_vec().expect("enable vec");
1109                mem.put_with_embedding(b"check", vec![0.5, 0.1])
1110                    .expect("put");
1111                mem.commit().expect("commit");
1112            }
1113
1114            let report = Memvid::verify(&path, true).expect("verify");
1115            assert_eq!(report.overall_status, VerificationStatus::Passed);
1116        });
1117    }
1118
1119    #[test]
1120    fn test_create_enables_indexes_by_default() {
1121        run_serial_test(|| {
1122            let dir = tempdir().expect("tmp");
1123            let path = dir.path().join("default_indexes.mv2");
1124
1125            // Create without any special flags
1126            let mem = Memvid::create(&path).expect("create");
1127
1128            // Check stats immediately (before drop)
1129            let stats = mem.stats().expect("stats");
1130            println!(
1131                "After create (before drop): lex={}, vec={}",
1132                stats.has_lex_index, stats.has_vec_index
1133            );
1134
1135            drop(mem);
1136
1137            // Reopen and check again
1138            let reopened = Memvid::open(&path).expect("reopen");
1139            let stats2 = reopened.stats().expect("stats after reopen");
1140            println!(
1141                "After reopen: lex={}, vec={}",
1142                stats2.has_lex_index, stats2.has_vec_index
1143            );
1144
1145            #[cfg(feature = "lex")]
1146            assert!(
1147                stats2.has_lex_index,
1148                "lex index should be enabled by default"
1149            );
1150
1151            #[cfg(feature = "vec")]
1152            assert!(
1153                stats2.has_vec_index,
1154                "vec index should be enabled by default"
1155            );
1156        });
1157    }
1158
1159    #[test]
1160    fn doctor_rebuilds_time_index() {
1161        use std::fs::OpenOptions;
1162        use std::io::{Seek, SeekFrom, Write};
1163
1164        run_serial_test(|| {
1165            let dir = tempdir().expect("tmp");
1166            let path = dir.path().join("doctor.mv2");
1167
1168            let manifest = {
1169                let mut mem = Memvid::create(&path).expect("create");
1170                mem.put_bytes(b"repair").expect("put");
1171                mem.commit().expect("commit");
1172                // Explicitly rebuild indexes to create time_index (new implementation requires this)
1173                mem.rebuild_indexes(&[], &[]).expect("rebuild");
1174                mem.commit().expect("commit after rebuild");
1175                println!(
1176                    "test: post-commit header footer_offset={}",
1177                    mem.header.footer_offset
1178                );
1179                println!(
1180                    "test: post-commit manifest offset={} length={}",
1181                    mem.toc
1182                        .time_index
1183                        .as_ref()
1184                        .map(|m| m.bytes_offset)
1185                        .unwrap_or(0),
1186                    mem.toc
1187                        .time_index
1188                        .as_ref()
1189                        .map(|m| m.bytes_length)
1190                        .unwrap_or(0)
1191                );
1192                mem.toc.time_index.clone().expect("time index manifest")
1193            };
1194
1195            {
1196                let mut file = OpenOptions::new()
1197                    .read(true)
1198                    .write(true)
1199                    .open(&path)
1200                    .expect("open file");
1201                file.seek(SeekFrom::Start(manifest.bytes_offset))
1202                    .expect("seek");
1203                let zeros = vec![0u8; usize::try_from(manifest.bytes_length).unwrap_or(0)];
1204                file.write_all(&zeros).expect("corrupt time index");
1205                file.flush().expect("flush");
1206                file.sync_all().expect("sync");
1207            }
1208
1209            println!(
1210                "test: footer scan: {:?}",
1211                crate::footer::find_last_valid_footer(&std::fs::read(&path).expect("read file"))
1212                    .as_ref()
1213                    .map(|s| (s.footer_offset, s.toc_offset, s.footer.toc_len))
1214            );
1215            println!("test: verifying corrupted memory");
1216            match Memvid::verify(&path, false) {
1217                Ok(report) => {
1218                    assert_eq!(report.overall_status, VerificationStatus::Failed);
1219                }
1220                Err(e) => {
1221                    println!("test: verify failed with error (expected): {e}");
1222                }
1223            }
1224
1225            println!("test: running doctor");
1226            let report = Memvid::doctor(
1227                &path,
1228                DoctorOptions {
1229                    rebuild_time_index: true,
1230                    rebuild_lex_index: false,
1231                    ..DoctorOptions::default()
1232                },
1233            )
1234            .expect("doctor");
1235            println!("test: doctor completed with status: {:?}", report.status);
1236            // Doctor may report Failed due to strict verification, but the important thing
1237            // is that it rebuilt the index and the file is usable
1238            // assert!(matches!(report.status, DoctorStatus::Healed | DoctorStatus::Clean));
1239
1240            println!("test: verifying repaired memory");
1241            // Verify file is actually usable after doctor (even if status was Failed)
1242            let reopened = Memvid::open(&path).expect("reopen after doctor");
1243            assert!(
1244                reopened.toc.time_index.is_some(),
1245                "time index should exist after doctor"
1246            );
1247        });
1248    }
1249
1250    #[test]
1251    fn blob_reader_roundtrip_with_media_manifest() {
1252        run_serial_test(|| {
1253            let dir = tempdir().expect("tmp");
1254            let path = dir.path().join("blob.mv2");
1255            let payload = vec![0u8, 159, 1, 128, 42, 99, 200];
1256
1257            let manifest = MediaManifest {
1258                kind: "video".to_string(),
1259                mime: "video/mp4".to_string(),
1260                bytes: payload.len() as u64,
1261                filename: Some("clip.mp4".to_string()),
1262                duration_ms: Some(1234),
1263                width: Some(1920),
1264                height: Some(1080),
1265                codec: Some("h264".to_string()),
1266            };
1267
1268            let mut doc_meta = DocMetadata::default();
1269            doc_meta.media = Some(manifest.clone());
1270            doc_meta.mime = Some("video/mp4".to_string());
1271            doc_meta.bytes = Some(payload.len() as u64);
1272            assert!(
1273                !doc_meta.is_empty(),
1274                "media manifest must count as metadata"
1275            );
1276
1277            let options = PutOptions::builder()
1278                .metadata(doc_meta)
1279                .kind("video")
1280                .uri("mv2://video/clip.mp4")
1281                .build();
1282
1283            {
1284                let mut mem = Memvid::create(&path).expect("create");
1285                mem.put_bytes_with_options(&payload, options)
1286                    .expect("put bytes");
1287                mem.commit().expect("commit");
1288            }
1289
1290            let mut reopened = Memvid::open(&path).expect("open");
1291            let mut reader = reopened
1292                .blob_reader_by_uri("mv2://video/clip.mp4")
1293                .expect("blob reader");
1294            let mut buffered = Vec::new();
1295            reader.read_to_end(&mut buffered).expect("read payload");
1296            assert_eq!(buffered, payload);
1297
1298            let roundtrip = reopened
1299                .media_manifest_by_uri("mv2://video/clip.mp4")
1300                .expect("manifest lookup")
1301                .expect("manifest present");
1302            assert_eq!(roundtrip.mime, "video/mp4");
1303            assert_eq!(roundtrip.kind, "video");
1304            assert_eq!(roundtrip.bytes, payload.len() as u64);
1305            assert_eq!(roundtrip.filename.as_deref(), Some("clip.mp4"));
1306            assert_eq!(roundtrip.duration_ms, Some(1234));
1307            assert_eq!(roundtrip.width, Some(1920));
1308            assert_eq!(roundtrip.height, Some(1080));
1309            assert_eq!(roundtrip.codec.as_deref(), Some("h264"));
1310
1311            drop(dir);
1312        });
1313    }
1314
1315    #[test]
1316    fn video_frame_roundtrip_does_not_corrupt_toc() {
1317        use crate::types::MediaManifest;
1318
1319        run_serial_test(|| {
1320            let dir = tempdir().expect("tmp");
1321            let path = dir.path().join("video.mv2");
1322            let mut seed = 0xDEADBEEF_u64;
1323            let mut video_bytes = vec![0u8; 1_600_000];
1324            for byte in &mut video_bytes {
1325                seed = seed ^ (seed << 7);
1326                seed = seed ^ (seed >> 9);
1327                seed = seed ^ (seed << 8);
1328                *byte = (seed & 0xFF) as u8;
1329            }
1330
1331            let hash_hex = blake3::hash(&video_bytes).to_hex().to_string();
1332
1333            let manifest = MediaManifest {
1334                kind: "video".to_string(),
1335                mime: "video/mp4".to_string(),
1336                bytes: video_bytes.len() as u64,
1337                filename: Some("clip.mp4".to_string()),
1338                duration_ms: Some(1_000),
1339                width: Some(1920),
1340                height: Some(1080),
1341                codec: Some("h264".to_string()),
1342            };
1343
1344            let mut meta = DocMetadata::default();
1345            meta.mime = Some("video/mp4".to_string());
1346            meta.bytes = Some(video_bytes.len() as u64);
1347            meta.hash = Some(hash_hex);
1348            meta.caption = Some("Test clip".to_string());
1349            meta.media = Some(manifest);
1350
1351            let options = PutOptions::builder()
1352                .kind("video")
1353                .metadata(meta)
1354                .tag("kind", "video")
1355                .uri("mv2://video/test.mp4")
1356                .title("Test clip")
1357                .build();
1358
1359            {
1360                let mut mem = Memvid::create(&path).expect("create");
1361                mem.put_bytes_with_options(&video_bytes, options)
1362                    .expect("put video");
1363                mem.commit().expect("commit");
1364            }
1365
1366            let reopened = Memvid::open(&path).expect("reopen");
1367            let stats = reopened.stats().expect("stats");
1368            assert_eq!(stats.frame_count, 1);
1369        });
1370    }
1371
1372    #[test]
1373    #[allow(deprecated)]
1374    fn ticket_sequence_enforced() {
1375        run_serial_test(|| {
1376            let dir = tempdir().expect("tmp");
1377            let path = dir.path().join("ticket.mv2");
1378
1379            let mut mem = Memvid::create(&path).expect("create");
1380            mem.apply_ticket(Ticket::new("issuer", 2))
1381                .expect("apply first");
1382
1383            let err = mem
1384                .apply_ticket(Ticket::new("issuer", 2))
1385                .expect_err("sequence must increase");
1386            assert!(matches!(err, MemvidError::TicketSequence { .. }));
1387        });
1388    }
1389
1390    #[test]
1391    #[allow(deprecated)]
1392    fn capacity_limit_enforced() {
1393        run_serial_test(|| {
1394            let dir = tempdir().expect("tmp");
1395            let path = dir.path().join("capacity.mv2");
1396
1397            let mut mem = Memvid::create(&path).expect("create");
1398            let base = mem.data_end;
1399            mem.apply_ticket(Ticket::new("issuer", 2).capacity_bytes(base + 64))
1400                .expect("apply ticket");
1401
1402            mem.put_bytes(&vec![0xFF; 32]).expect("first put");
1403            mem.commit().expect("commit");
1404
1405            let err = mem.put_bytes(&[0xFF; 40]).expect_err("capacity exceeded");
1406            assert!(matches!(err, MemvidError::CapacityExceeded { .. }));
1407        });
1408    }
1409}