Skip to main content

memvid_core/
lib.rs

1#![deny(clippy::all, clippy::pedantic)]
2#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
3#![cfg_attr(
4    test,
5    allow(
6        clippy::useless_vec,
7        clippy::uninlined_format_args,
8        clippy::cast_possible_truncation,
9        clippy::float_cmp,
10        clippy::cast_precision_loss
11    )
12)]
13#![allow(clippy::module_name_repetitions)]
14//
15// Strategic lint exceptions - these are allowed project-wide for pragmatic reasons:
16//
17// Documentation lints: Many internal/self-documenting functions don't need extensive docs.
18// Public APIs should still have proper documentation.
19#![allow(clippy::missing_errors_doc)]
20#![allow(clippy::missing_panics_doc)]
21#![allow(clippy::doc_markdown)]
22//
23// Cast safety: All casts in this codebase are carefully reviewed and bounded by
24// real-world constraints (file sizes, frame counts, etc). Using try_into() everywhere
25// would add significant complexity without safety benefits in our use case.
26#![allow(clippy::cast_precision_loss)]
27#![allow(clippy::cast_possible_wrap)]
28#![allow(clippy::cast_sign_loss)]
29#![allow(clippy::cast_lossless)]
30//
31// Style/complexity: Some database-like operations naturally require complex functions.
32// Breaking them up would hurt readability.
33#![allow(clippy::too_many_lines)]
34#![allow(clippy::too_many_arguments)]
35#![allow(clippy::items_after_statements)]
36#![allow(clippy::similar_names)]
37// e.g., frame_id, parent_id, target_id are intentionally similar
38//
39// Pattern matching: These pedantic lints often suggest changes that reduce clarity.
40#![allow(clippy::manual_let_else)]
41#![allow(clippy::match_same_arms)]
42#![allow(clippy::if_same_then_else)]
43#![allow(clippy::collapsible_match)]
44//
45// Performance/ergonomics trade-offs that are acceptable for this codebase:
46#![allow(clippy::needless_pass_by_value)] // Many builders take owned values intentionally
47#![allow(clippy::return_self_not_must_use)] // Builder patterns don't need must_use on every method
48#![allow(clippy::format_push_string)] // Readability over minor perf difference
49#![allow(clippy::assigning_clones)] // clone_from() often less readable
50//
51// Low-value pedantic lints that add noise:
52#![allow(clippy::struct_excessive_bools)] // Config structs naturally have many flags
53#![allow(clippy::needless_continue)]
54#![allow(clippy::needless_range_loop)]
55#![allow(clippy::case_sensitive_file_extension_comparisons)]
56#![allow(clippy::default_trait_access)]
57#![allow(clippy::field_reassign_with_default)]
58#![allow(clippy::unreadable_literal)] // Magic numbers in binary formats are clearer as hex
59#![allow(clippy::implicit_hasher)]
60#![allow(clippy::manual_clamp)]
61#![allow(clippy::len_without_is_empty)] // Many index types don't need is_empty()
62#![allow(clippy::large_enum_variant)]
63#![allow(clippy::ptr_arg)]
64#![allow(clippy::map_unwrap_or)]
65#![allow(clippy::incompatible_msrv)]
66#![allow(clippy::should_implement_trait)] // Some method names are clearer than trait names
67#![allow(clippy::duplicated_attributes)]
68//
69// Return value wrapping: Many functions use Result for consistency even when they
70// currently can't fail, allowing future error conditions to be added without breaking API.
71#![allow(clippy::unnecessary_wraps)]
72#![allow(clippy::unused_self)] // Some trait impls or future extensibility
73
74/// The memvid-core crate version (matches `Cargo.toml`).
75pub const MEMVID_CORE_VERSION: &str = env!("CARGO_PKG_VERSION");
76
77mod analysis;
78pub mod constants;
79pub mod enrich;
80pub mod enrichment_worker;
81pub mod error;
82pub mod extract;
83pub mod extract_budgeted;
84pub mod footer;
85pub mod io;
86pub mod lex;
87mod lock;
88pub mod lockfile;
89pub mod memvid;
90pub mod models;
91pub mod pii;
92pub mod reader;
93mod registry;
94mod search;
95pub mod signature;
96pub mod structure;
97pub mod table;
98pub mod text;
99mod toc;
100pub mod types;
101pub mod vec;
102pub mod vec_pq;
103
104// SIMD-accelerated distance calculations
105pub mod simd;
106
107#[cfg(feature = "vec")]
108pub mod text_embed;
109
110// Triplet extraction module for automatic SPO extraction during ingestion
111pub mod triplet;
112
113// Graph-aware search for hybrid retrieval
114pub mod graph_search;
115
116// CLIP module is always compiled (for ClipIndexManifest serde compatibility)
117// but ClipModel/inference requires the "clip" feature
118pub mod clip;
119
120// Whisper module for audio transcription
121// Model inference requires the "whisper" feature
122pub mod whisper;
123
124// Replay module for time-travel debugging of agent sessions
125// Types are always available for serde compatibility
126// Full functionality requires the "replay" feature
127pub mod replay;
128
129// Password-based encryption capsules (.mv2e)
130// Feature-gated to avoid pulling crypto dependencies into default builds.
131#[cfg(feature = "encryption")]
132pub mod encryption;
133
134// SymSpell-based PDF text cleanup - fixes broken word spacing
135#[cfg(feature = "symspell_cleanup")]
136pub mod symspell_cleanup;
137
138// API-based embedding providers (OpenAI, etc.) - requires network
139#[cfg(feature = "api_embed")]
140pub mod api_embed;
141
142#[cfg(test)]
143mod tests_lex_flag;
144
145#[cfg(feature = "temporal_track")]
146pub use analysis::temporal::{
147    TemporalContext, TemporalNormalizer, TemporalResolution, TemporalResolutionFlag,
148    TemporalResolutionValue, parse_clock_inheritance, parse_week_start,
149};
150// Temporal enrichment for resolving relative time references during ingestion
151#[cfg(feature = "temporal_enrich")]
152pub use analysis::temporal_enrich::{
153    AnchorSource as TemporalEnrichAnchorSource, RelativePhrase, ResolvedTemporal,
154    TemporalAnchorInfo, TemporalAnchorTracker, TemporalEnrichment, detect_relative_phrases,
155    enrich_chunk, enrich_chunks, enrich_document, resolve_relative_phrase,
156};
157pub use constants::*;
158pub use enrichment_worker::{EnrichmentWorkerConfig, EnrichmentWorkerStats};
159pub use error::{MemvidError, Result};
160pub use extract::{DocumentProcessor, ExtractedDocument, ProcessorConfig};
161pub use footer::{CommitFooter, find_last_valid_footer};
162#[cfg(feature = "temporal_track")]
163pub use io::temporal_index::{
164    append_track as temporal_track_append, calculate_checksum as temporal_track_checksum,
165    read_track as temporal_track_read, window as temporal_track_window,
166};
167pub use io::time_index::{
168    TimeIndexEntry, append_track as time_index_append, calculate_checksum as time_index_checksum,
169    read_track as time_index_read,
170};
171pub use io::wal::{EmbeddedWal, WalRecord, WalStats};
172pub use lex::{LexIndex, LexIndexArtifact, LexIndexBuilder, LexSearchHit};
173pub use lock::FileLock;
174pub use memvid::{
175    BlobReader, EnrichmentHandle, EnrichmentStats, LockSettings, Memvid, OpenReadOptions,
176    SketchCandidate, SketchSearchOptions, SketchSearchStats,
177    mutation::{CommitMode, CommitOptions},
178    start_enrichment_worker, start_enrichment_worker_with_embeddings,
179};
180#[cfg(feature = "parallel_segments")]
181pub use memvid::{BuildOpts, ParallelInput, ParallelPayload};
182pub use models::{
183    ModelManifest, ModelManifestEntry, ModelVerification, ModelVerificationStatus,
184    ModelVerifyOptions, verify_model_dir, verify_models,
185};
186pub use reader::{
187    DocumentFormat, DocumentReader, PassthroughReader, PdfReader, ReaderDiagnostics, ReaderHint,
188    ReaderOutput, ReaderRegistry,
189};
190pub use signature::{
191    parse_ed25519_public_key_base64, verify_model_manifest, verify_ticket_signature,
192};
193pub use text::{NormalizedText, normalize_text, truncate_at_grapheme_boundary};
194#[cfg(feature = "temporal_track")]
195pub use types::{
196    AnchorSource, SearchHitTemporal, SearchHitTemporalAnchor, SearchHitTemporalMention,
197    TEMPORAL_TRACK_FLAG_HAS_ANCHORS, TEMPORAL_TRACK_FLAG_HAS_MENTIONS, TemporalAnchor,
198    TemporalCapabilities, TemporalFilter, TemporalMention, TemporalMentionFlags,
199    TemporalMentionKind, TemporalTrack, TemporalTrackManifest,
200};
201pub use types::{
202    AskCitation, AskMode, AskRequest, AskResponse, AskRetriever, AskStats, AudioSegmentMetadata,
203    AuditOptions, AuditReport, CanonicalEncoding, DOCTOR_PLAN_VERSION, DocAudioMetadata,
204    DocExifMetadata, DocGpsMetadata, DocMetadata, DoctorActionDetail, DoctorActionKind,
205    DoctorActionPlan, DoctorActionReport, DoctorActionStatus, DoctorFinding, DoctorFindingCode,
206    DoctorMetrics, DoctorOptions, DoctorPhaseDuration, DoctorPhaseKind, DoctorPhasePlan,
207    DoctorPhaseReport, DoctorPhaseStatus, DoctorPlan, DoctorReport, DoctorSeverity, DoctorStatus,
208    EmbeddingIdentity, EmbeddingIdentityCount, EmbeddingIdentitySummary, Frame, FrameId, FrameRole,
209    FrameStatus, Header, IndexManifests, LexIndexManifest, LexSegmentDescriptor,
210    MEMVID_EMBEDDING_DIMENSION_KEY, MEMVID_EMBEDDING_MODEL_KEY, MEMVID_EMBEDDING_NORMALIZED_KEY,
211    MEMVID_EMBEDDING_PROVIDER_KEY, MediaManifest, MemvidHandle, Open, PutOptions,
212    PutOptionsBuilder, Sealed, SearchEngineKind, SearchHit, SearchHitMetadata, SearchParams,
213    SearchRequest, SearchResponse, SegmentCatalog, SegmentCommon, SegmentCompression, SegmentMeta,
214    SegmentSpan, SourceSpan, Stats, TextChunkManifest, TextChunkRange, Ticket, TicketRef, Tier,
215    TimeIndexManifest, TimeSegmentDescriptor, TimelineEntry, TimelineQuery, TimelineQueryBuilder,
216    Toc, VecEmbedder, VecIndexManifest, VecSegmentDescriptor, VectorCompression, VerificationCheck,
217    VerificationReport, VerificationStatus,
218};
219// Memory card types for structured memory extraction and storage
220pub use types::{
221    EngineStamp, EnrichmentManifest, EnrichmentRecord, MEMORIES_TRACK_MAGIC,
222    MEMORIES_TRACK_VERSION, MemoriesStats, MemoriesTrack, MemoryCard, MemoryCardBuilder,
223    MemoryCardBuilderError, MemoryCardId, MemoryKind, Polarity, SlotIndex, VersionRelation,
224};
225// Logic-Mesh types for entity-relationship graph traversal
226pub use types::{
227    EdgeDirection, EntityKind, FollowResult, LOGIC_MESH_MAGIC, LOGIC_MESH_VERSION, LinkType,
228    LogicMesh, LogicMeshManifest, MeshEdge, MeshNode,
229};
230// Sketch track types for fast candidate generation
231pub use types::{
232    DEFAULT_HAMMING_THRESHOLD, QuerySketch, SKETCH_TRACK_MAGIC, SKETCH_TRACK_VERSION, SketchEntry,
233    SketchFlags, SketchTrack, SketchTrackHeader, SketchTrackManifest, SketchTrackStats,
234    SketchVariant, build_term_filter, compute_simhash, compute_token_weights, generate_sketch,
235    hash_token, hash_token_u32, read_sketch_track, term_filter_maybe_contains, tokenize_for_sketch,
236    write_sketch_track,
237};
238// Schema types for predicate validation and type checking
239pub use types::{
240    Cardinality, PredicateId, PredicateSchema, SchemaError, SchemaRegistry, ValueType,
241};
242// Schema inference summary type
243pub use memvid::memory::SchemaSummaryEntry;
244// NER types for entity extraction (always available, model requires logic_mesh feature)
245#[cfg(feature = "logic_mesh")]
246pub use analysis::ner::NerModel;
247pub use analysis::ner::{
248    ExtractedEntity, FrameEntities, NER_MODEL_NAME, NER_MODEL_SIZE_MB, NER_MODEL_URL, NER_MODELS,
249    NER_TOKENIZER_URL, NerModelInfo, default_ner_model_info, get_ner_model_info,
250    is_ner_model_installed, ner_model_path, ner_tokenizer_path,
251};
252// Enrichment engine types for extracting memory cards from frames
253pub use enrich::{EnrichmentContext, EnrichmentEngine, EnrichmentResult, RulesEngine};
254// Triplet extraction types for automatic SPO extraction
255pub use triplet::{ExtractionMode, ExtractionStats, TripletExtractor};
256// Graph-aware search for hybrid retrieval
257pub use graph_search::{GraphMatcher, QueryPlanner, hybrid_search};
258// Embedding provider types for vector embedding generation
259pub use types::{
260    BatchEmbeddingResult, EmbeddingConfig, EmbeddingProvider, EmbeddingProviderKind,
261    EmbeddingResult,
262};
263// Reranker types for second-stage ranking in RAG pipelines
264pub use types::reranker::{
265    Reranker, RerankerConfig, RerankerDocument, RerankerKind, RerankerResult,
266};
267#[cfg(feature = "parallel_segments")]
268pub use types::{IndexSegmentRef, SegmentKind, SegmentStats};
269pub use vec::{VecIndex, VecIndexArtifact, VecSearchHit};
270pub use vec_pq::{
271    CompressionStats, ProductQuantizer, QuantizedVecIndex, QuantizedVecIndexArtifact,
272    QuantizedVecIndexBuilder,
273};
274// Local text embedding provider - feature-gated
275#[cfg(feature = "vec")]
276pub use text_embed::{
277    LocalTextEmbedder, TEXT_EMBED_MODELS, TextEmbedConfig, TextEmbedModelInfo,
278    default_text_model_info, get_text_model_info,
279};
280// API-based embedding providers - feature-gated
281#[cfg(feature = "api_embed")]
282pub use api_embed::{
283    OPENAI_MODELS, OpenAIConfig, OpenAIEmbedder, OpenAIModelInfo, default_openai_model_info,
284    get_openai_model_info,
285};
286// CLIP visual embeddings - types always available for serde compatibility
287pub use clip::{
288    CLIP_MODELS, ClipConfig, ClipDocument, ClipEmbeddingProvider, ClipError, ClipIndex,
289    ClipIndexArtifact, ClipIndexBuilder, ClipIndexManifest, ClipModelInfo, ClipSearchHit,
290    ImageInfo, MOBILECLIP_DIMS, SIGLIP_DIMS, default_model_info, filter_junk_images,
291    get_model_info,
292};
293// CLIP model inference requires the "clip" feature
294#[cfg(feature = "clip")]
295pub use clip::{ClipModel, calculate_color_variance, get_image_info};
296// Whisper audio transcription - types always available
297pub use whisper::{
298    TranscriptionResult, TranscriptionSegment, WHISPER_MODELS, WhisperConfig, WhisperError,
299    WhisperModelInfo, default_whisper_model_info, get_whisper_model_info,
300};
301// Audio decoding and transcription require the "whisper" feature
302#[cfg(feature = "whisper")]
303pub use whisper::{WHISPER_SAMPLE_RATE, WhisperTranscriber, decode_audio_file};
304// Structure-aware chunking for preserving tables and code blocks
305pub use structure::{
306    ChunkType, ChunkingOptions, ChunkingResult, StructuralChunker, StructuredChunk,
307    StructuredDocument, TableChunkingStrategy, chunk_structured, detect_structure,
308};
309// Adaptive retrieval for dynamic result set sizing
310pub use types::adaptive::{
311    AdaptiveConfig, AdaptiveResult, AdaptiveStats, CutoffStrategy, find_adaptive_cutoff,
312    normalize_scores,
313};
314// Replay types for time-travel debugging - always available for serde
315pub use replay::{
316    ActionType, Checkpoint, REPLAY_SEGMENT_MAGIC, REPLAY_SEGMENT_VERSION, ReplayAction,
317    ReplayManifest, ReplaySession, SessionSummary, StateSnapshot,
318};
319// Full replay functionality requires the "replay" feature
320#[cfg(feature = "replay")]
321pub use replay::{
322    ActiveSession, ComparisonReport, ComparisonSummary, Divergence, DivergenceType, ModelResult,
323    ReplayConfig, ReplayOptions, ReplayResult,
324};
325
326#[cfg(test)]
327use once_cell::sync::Lazy;
328use std::fs::File;
329use std::io::Cursor;
330use std::path::Path;
331#[cfg(test)]
332use std::sync::Mutex;
333
334use bincode::config::{self, Config};
335use io::header::HeaderCodec;
336
337const TIMELINE_PREVIEW_BYTES: usize = 120;
338const MAX_INDEX_BYTES: u64 = 512 * 1024 * 1024; // Increased from 64MB to 512MB for large datasets
339const MAX_TIME_INDEX_BYTES: u64 = 512 * 1024 * 1024;
340const MAX_FRAME_BYTES: u64 = 256 * 1024 * 1024;
341const DEFAULT_SEARCH_TEXT_LIMIT: usize = 32_768;
342
343#[cfg(test)]
344#[allow(clippy::non_std_lazy_statics)]
345static SERIAL_TEST_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
346
347#[cfg(test)]
348pub(crate) fn run_serial_test<T>(f: impl FnOnce() -> T) -> T {
349    let _guard = SERIAL_TEST_MUTEX
350        .lock()
351        .expect("memvid-core serial test mutex poisoned");
352    f()
353}
354
355impl Memvid {
356    #[cfg(feature = "lex")]
357    fn tantivy_index_pending(&self) -> bool {
358        self.tantivy_dirty
359    }
360
361    #[cfg(not(feature = "lex"))]
362    fn tantivy_index_pending(&self) -> bool {
363        false
364    }
365
366    #[cfg(feature = "lex")]
367    fn flush_tantivy_conditional(&mut self, embed_snapshot: bool) -> Result<()> {
368        if !self.tantivy_dirty {
369            return Ok(());
370        }
371        if let Some(engine) = self.tantivy.as_mut() {
372            engine.commit()?;
373            if embed_snapshot {
374                let snapshot = engine.snapshot_segments()?;
375                self.update_embedded_lex_snapshot(snapshot)?;
376            }
377        }
378        self.tantivy_dirty = false;
379        Ok(())
380    }
381
382    #[cfg(feature = "lex")]
383    fn flush_tantivy(&mut self) -> Result<()> {
384        self.flush_tantivy_conditional(true)
385    }
386
387    #[cfg(feature = "lex")]
388    #[allow(dead_code)]
389    fn flush_tantivy_skip_embed(&mut self) -> Result<()> {
390        self.flush_tantivy_conditional(false)
391    }
392
393    #[cfg(not(feature = "lex"))]
394    fn flush_tantivy(&mut self) -> Result<()> {
395        Ok(())
396    }
397
398    #[cfg(not(feature = "lex"))]
399    #[allow(dead_code)]
400    fn flush_tantivy_skip_embed(&mut self) -> Result<()> {
401        Ok(())
402    }
403    #[must_use]
404    pub fn path(&self) -> &Path {
405        &self.path
406    }
407
408    #[must_use]
409    pub fn lock_handle(&self) -> &FileLock {
410        &self.lock
411    }
412
413    #[must_use]
414    pub fn is_read_only(&self) -> bool {
415        self.read_only
416    }
417
418    pub(crate) fn ensure_writable(&mut self) -> Result<()> {
419        if self.read_only {
420            self.lock.upgrade_to_exclusive()?;
421            self.read_only = false;
422        }
423        Ok(())
424    }
425
426    pub fn downgrade_to_shared(&mut self) -> Result<()> {
427        if self.read_only {
428            return Ok(());
429        }
430        if self.dirty || self.tantivy_index_pending() {
431            return Ok(());
432        }
433        self.lock.downgrade_to_shared()?;
434        self.read_only = true;
435        Ok(())
436    }
437}
438
439impl Drop for Memvid {
440    fn drop(&mut self) {
441        if self.dirty {
442            let _ = self.commit();
443        }
444        // Clean up temporary manifest.wal file (parallel_segments feature)
445        #[cfg(feature = "parallel_segments")]
446        {
447            use crate::memvid::lifecycle::cleanup_manifest_wal_public;
448            cleanup_manifest_wal_public(self.path());
449        }
450    }
451}
452
453pub(crate) fn persist_header(file: &mut File, header: &Header) -> Result<()> {
454    HeaderCodec::write(file, header)
455}
456
457fn wal_config() -> impl Config {
458    config::standard()
459        .with_fixed_int_encoding()
460        .with_little_endian()
461}
462
463pub(crate) fn decode_canonical_bytes(
464    payload: &[u8],
465    encoding: CanonicalEncoding,
466    frame_id: FrameId,
467) -> Result<Vec<u8>> {
468    match encoding {
469        CanonicalEncoding::Plain => Ok(payload.to_vec()),
470        CanonicalEncoding::Zstd => {
471            zstd::decode_all(Cursor::new(payload)).map_err(|_| MemvidError::InvalidFrame {
472                frame_id,
473                reason: "failed to decode canonical payload",
474            })
475        }
476    }
477}
478
479pub(crate) fn default_uri(frame_id: FrameId) -> String {
480    format!("mv2://frames/{frame_id}")
481}
482
483pub(crate) fn infer_title_from_uri(uri: &str) -> Option<String> {
484    let trimmed = uri.trim();
485    if trimmed.is_empty() {
486        return None;
487    }
488
489    let without_scheme = trimmed.split_once("://").map_or(trimmed, |x| x.1);
490    let without_fragment = without_scheme.split('#').next().unwrap_or(without_scheme);
491    let without_query = without_fragment
492        .split('?')
493        .next()
494        .unwrap_or(without_fragment);
495    let segment = without_query
496        .trim_end_matches('/')
497        .rsplit('/')
498        .next()
499        .map(str::trim)?;
500    if segment.is_empty() {
501        return None;
502    }
503
504    let stem = segment.rsplit_once('.').map_or(segment, |x| x.0).trim();
505    if stem.is_empty() {
506        return None;
507    }
508
509    let words: Vec<String> = stem
510        .split(['-', '_', ' '])
511        .filter(|part| !part.is_empty())
512        .map(|part| {
513            let mut chars = part.chars();
514            match chars.next() {
515                Some(first) => {
516                    let first = first.to_ascii_uppercase();
517                    let rest: String = chars.map(|c| c.to_ascii_lowercase()).collect();
518                    if rest.is_empty() {
519                        first.to_string()
520                    } else {
521                        format!("{first}{rest}")
522                    }
523                }
524                None => String::new(),
525            }
526        })
527        .filter(|word| !word.is_empty())
528        .collect();
529
530    if words.is_empty() {
531        None
532    } else {
533        Some(words.join(" "))
534    }
535}
536
537fn truncate_preview(text: &str) -> String {
538    text.chars().take(TIMELINE_PREVIEW_BYTES).collect()
539}
540
541fn image_preview_from_metadata(meta: &DocMetadata) -> Option<String> {
542    let mime = meta.mime.as_deref()?;
543    if !mime.starts_with("image/") {
544        return None;
545    }
546
547    if let Some(caption) = meta.caption.as_ref() {
548        let trimmed = caption.trim();
549        if !trimmed.is_empty() {
550            return Some(truncate_preview(trimmed));
551        }
552    }
553
554    let mut segments: Vec<String> = Vec::new();
555    if let (Some(w), Some(h)) = (meta.width, meta.height) {
556        segments.push(format!("{w}×{h} px"));
557    }
558    if let Some(exif) = meta.exif.as_ref() {
559        if let Some(model) = exif
560            .model
561            .as_ref()
562            .map(|s| s.trim())
563            .filter(|s| !s.is_empty())
564        {
565            segments.push(model.to_string());
566        } else if let Some(make) = exif
567            .make
568            .as_ref()
569            .map(|s| s.trim())
570            .filter(|s| !s.is_empty())
571        {
572            segments.push(make.to_string());
573        }
574
575        if let Some(datetime) = exif
576            .datetime
577            .as_ref()
578            .map(|s| s.trim())
579            .filter(|s| !s.is_empty())
580        {
581            segments.push(datetime.to_string());
582        }
583    }
584
585    if segments.is_empty() {
586        return Some("Image frame".to_string());
587    }
588
589    Some(truncate_preview(&segments.join(" · ")))
590}
591
592#[cfg(test)]
593mod tests {
594    use super::*;
595    use std::io::Read;
596    use std::num::NonZeroU64;
597    use tempfile::tempdir;
598
599    #[test]
600    fn create_put_commit_reopen() {
601        run_serial_test(|| {
602            let dir = tempdir().expect("tmp");
603            let path = dir.path().join("memory.mv2");
604
605            let mut mem = Memvid::create(&path).expect("create");
606            let seq = mem.put_bytes(b"hello").expect("put");
607            assert_eq!(seq, 1);
608            mem.commit().expect("commit");
609
610            drop(mem);
611
612            let mut reopened = Memvid::open(&path).expect("open");
613            let stats = reopened.stats().expect("stats");
614            assert_eq!(stats.frame_count, 1);
615            assert!(stats.has_time_index);
616
617            let timeline = reopened
618                .timeline(TimelineQuery::default())
619                .expect("timeline");
620            assert_eq!(timeline.len(), 1);
621            assert!(timeline[0].preview.contains("hello"));
622
623            let wal_stats = reopened.wal.stats();
624            assert_eq!(wal_stats.pending_bytes, 0);
625            // Sequence is 2: one from create() writing manifests, one from put()
626            assert_eq!(wal_stats.sequence, 2);
627        });
628    }
629
630    #[test]
631    fn timeline_limit_and_reverse() {
632        run_serial_test(|| {
633            let dir = tempdir().expect("tmp");
634            let path = dir.path().join("timeline.mv2");
635
636            let mut mem = Memvid::create(&path).expect("create");
637            mem.put_bytes(b"alpha").expect("put alpha");
638            mem.put_bytes(b"beta").expect("put beta");
639            mem.commit().expect("commit");
640            drop(mem);
641
642            let mut reopened = Memvid::open(&path).expect("open");
643            let limited = reopened
644                .timeline(TimelineQuery {
645                    limit: NonZeroU64::new(1),
646                    since: None,
647                    until: None,
648                    reverse: false,
649                    #[cfg(feature = "temporal_track")]
650                    temporal: None,
651                })
652                .expect("timeline limit");
653            assert_eq!(limited.len(), 1);
654            assert!(limited[0].preview.contains("alpha"));
655
656            let reversed = reopened
657                .timeline(TimelineQuery {
658                    limit: NonZeroU64::new(1),
659                    since: None,
660                    until: None,
661                    reverse: true,
662                    #[cfg(feature = "temporal_track")]
663                    temporal: None,
664                })
665                .expect("timeline reverse");
666            assert_eq!(reversed.len(), 1);
667            assert!(reversed[0].preview.contains("beta"));
668        });
669    }
670
671    #[test]
672    fn lex_search_roundtrip() {
673        run_serial_test(|| {
674            let dir = tempdir().expect("tmp");
675            let path = dir.path().join("lex.mv2");
676
677            let mut mem = Memvid::create(&path).expect("create");
678            mem.enable_lex().expect("enable");
679            let _seq1 = mem.put_bytes(b"Rust memory engine").expect("put");
680            let _seq2 = mem.put_bytes(b"Deterministic WAL").expect("put2");
681            mem.commit().expect("commit");
682
683            // Use modern search() API instead of deprecated search_lex()
684            let request = SearchRequest {
685                query: "memory".to_string(),
686                top_k: 10,
687                snippet_chars: 200,
688                uri: None,
689                scope: None,
690                cursor: None,
691                #[cfg(feature = "temporal_track")]
692                temporal: None,
693                as_of_frame: None,
694                as_of_ts: None,
695                no_sketch: false,
696            };
697            let response = mem.search(request).expect("search");
698            assert_eq!(response.hits.len(), 1);
699
700            drop(mem);
701
702            let mut reopened = Memvid::open(&path).expect("open");
703            let request = SearchRequest {
704                query: "wal".to_string(),
705                top_k: 10,
706                snippet_chars: 200,
707                uri: None,
708                scope: None,
709                cursor: None,
710                #[cfg(feature = "temporal_track")]
711                temporal: None,
712                as_of_frame: None,
713                as_of_ts: None,
714                no_sketch: false,
715            };
716            let response = reopened.search(request).expect("search reopened");
717            assert_eq!(response.hits.len(), 1);
718        });
719    }
720
721    #[test]
722    fn vec_search_roundtrip() {
723        run_serial_test(|| {
724            let dir = tempdir().expect("tmp");
725            let path = dir.path().join("vec.mv2");
726
727            let mut mem = Memvid::create(&path).expect("create");
728            mem.enable_vec().expect("enable");
729            mem.put_with_embedding(b"vector", vec![0.0, 1.0])
730                .expect("put");
731            mem.put_with_embedding(b"vector-two", vec![1.0, 0.0])
732                .expect("put2");
733            mem.commit().expect("commit");
734
735            let stats = mem.stats().expect("stats");
736            assert!(stats.has_vec_index, "vec index should exist after commit");
737
738            let hits = mem.search_vec(&[0.0, 1.0], 5).expect("search");
739            assert_eq!(hits.first().map(|hit| hit.frame_id), Some(0));
740
741            drop(mem);
742
743            let mut reopened = Memvid::open(&path).expect("open");
744            let reopened_stats = reopened.stats().expect("stats reopen");
745            assert!(
746                reopened_stats.has_vec_index,
747                "vec index should exist after reopen: has_manifest={}, vec_enabled={}",
748                reopened.toc.indexes.vec.is_some(),
749                reopened.vec_enabled
750            );
751            let hits = reopened.search_vec(&[1.0, 0.0], 5).expect("search reopen");
752            assert_eq!(hits.first().map(|hit| hit.frame_id), Some(1));
753        });
754    }
755
756    #[test]
757    fn search_snippet_ranges_match_bytes() {
758        run_serial_test(|| {
759            let dir = tempdir().expect("tmp");
760            let path = dir.path().join("search.mv2");
761
762            let mut mem = Memvid::create(&path).expect("create");
763            mem.enable_lex().expect("enable lex");
764            let options = PutOptions::builder()
765                .uri("mv2://docs/pricing.md")
766                .title("Pricing")
767                .build();
768            let text = "Capacity tickets are signed grants that raise per-file caps.";
769            mem.put_bytes_with_options(text.as_bytes(), options)
770                .expect("put doc");
771            mem.commit().expect("commit");
772
773            let response = mem
774                .search(SearchRequest {
775                    query: "capacity tickets".into(),
776                    top_k: 5,
777                    snippet_chars: 160,
778                    uri: None,
779                    scope: None,
780                    cursor: None,
781                    #[cfg(feature = "temporal_track")]
782                    temporal: None,
783                    as_of_frame: None,
784                    as_of_ts: None,
785                    no_sketch: false,
786                })
787                .expect("search");
788
789            assert_eq!(response.total_hits, 1);
790            assert_eq!(response.engine, SearchEngineKind::Tantivy);
791            let hit = response.hits.first().expect("hit");
792            let frame = mem
793                .toc
794                .frames
795                .get(hit.frame_id as usize)
796                .cloned()
797                .expect("frame");
798            let canonical = mem.frame_content(&frame).expect("content");
799            let bytes = canonical.as_bytes();
800            let (start, end) = hit.range;
801            assert!(end <= bytes.len());
802            assert_eq!(hit.text.as_bytes(), &bytes[start..end]);
803            let chunk = hit.chunk_range.expect("chunk range");
804            assert!(chunk.0 <= start);
805            assert!(chunk.1 >= end);
806            let chunk_text = hit.chunk_text.as_ref().expect("chunk text");
807            let chunk_slice = &canonical[chunk.0..chunk.1];
808            assert_eq!(chunk_text, chunk_slice);
809        });
810    }
811
812    #[test]
813    fn search_chunk_range_reflects_chunk_offset() {
814        run_serial_test(|| {
815            let dir = tempdir().expect("tmp");
816            let path = dir.path().join("chunked.mv2");
817
818            let mut mem = Memvid::create(&path).expect("create");
819            mem.enable_lex().expect("enable lex");
820
821            let options = PutOptions::builder()
822                .uri("mv2://docs/manual.txt")
823                .title("Manual")
824                .build();
825            let prefix = "alpha beta gamma delta. ".repeat(200);
826            let content = format!(
827                "{}target segment appears here. Trailing context for verification.",
828                prefix
829            );
830            mem.put_bytes_with_options(content.as_bytes(), options)
831                .expect("put doc");
832            mem.commit().expect("commit");
833
834            let response = mem
835                .search(SearchRequest {
836                    query: "target segment".into(),
837                    top_k: 5,
838                    snippet_chars: 160,
839                    uri: None,
840                    scope: None,
841                    cursor: None,
842                    #[cfg(feature = "temporal_track")]
843                    temporal: None,
844                    as_of_frame: None,
845                    as_of_ts: None,
846                    no_sketch: false,
847                })
848                .expect("search");
849
850            let hit = response.hits.first().expect("hit");
851            assert_eq!(response.engine, SearchEngineKind::Tantivy);
852            let chunk_range = hit.chunk_range.expect("chunk range");
853            assert!(chunk_range.0 > 0);
854            assert!(hit.range.0 >= chunk_range.0);
855            assert!(hit.range.1 <= chunk_range.1);
856            assert!(hit.text.contains("target segment"));
857            let chunk_text = hit.chunk_text.as_ref().expect("chunk text");
858            assert_eq!(chunk_text, &content[chunk_range.0..chunk_range.1]);
859        });
860    }
861
862    #[test]
863    fn auto_tag_populates_frame_metadata() {
864        run_serial_test(|| {
865            let dir = tempdir().expect("tmp");
866            let path = dir.path().join("autotag.mv2");
867
868            let mut mem = Memvid::create(&path).expect("create");
869            mem.enable_lex().expect("enable lex");
870
871            let options = PutOptions::builder()
872                .search_text("Neural networks planning session 2024-10-08")
873                .auto_tag(true)
874                .extract_dates(true)
875                .build();
876            mem.put_bytes_with_options(b"agenda", options)
877                .expect("put bytes");
878            mem.commit().expect("commit");
879
880            let frame = mem.toc.frames.first().expect("frame present");
881            assert!(!frame.tags.is_empty());
882            assert!(frame.content_dates.iter().any(|date| date.contains("2024")));
883        });
884    }
885
886    #[test]
887    fn search_filters_by_uri_and_scope() {
888        run_serial_test(|| {
889            let dir = tempdir().expect("tmp");
890            let path = dir.path().join("filters.mv2");
891
892            let mut mem = Memvid::create(&path).expect("create");
893            mem.enable_lex().expect("enable lex");
894
895            let options_a = PutOptions::builder()
896                .uri("mv2://docs/pricing.md")
897                .title("Pricing")
898                .build();
899            mem.put_bytes_with_options(b"Capacity tickets add per-file allowances", options_a)
900                .expect("put a");
901
902            let options_b = PutOptions::builder()
903                .uri("mv2://docs/faq.md")
904                .title("FAQ")
905                .build();
906            mem.put_bytes_with_options(b"Tickets can be issued by admins", options_b)
907                .expect("put b");
908
909            let options_c = PutOptions::builder()
910                .uri("mv2://blog/launch.md")
911                .title("Launch")
912                .build();
913            mem.put_bytes_with_options(b"Launch day tickets boost visibility", options_c)
914                .expect("put c");
915
916            mem.commit().expect("commit");
917
918            let uri_response = mem
919                .search(SearchRequest {
920                    query: "tickets".into(),
921                    top_k: 10,
922                    snippet_chars: 120,
923                    uri: Some("mv2://docs/pricing.md".into()),
924                    scope: None,
925                    cursor: None,
926                    #[cfg(feature = "temporal_track")]
927                    temporal: None,
928                    as_of_frame: None,
929                    as_of_ts: None,
930                    no_sketch: false,
931                })
932                .expect("uri search");
933            assert_eq!(uri_response.engine, SearchEngineKind::Tantivy);
934            assert!(
935                uri_response
936                    .hits
937                    .iter()
938                    .all(|hit| hit.uri == "mv2://docs/pricing.md")
939            );
940
941            let scope_response = mem
942                .search(SearchRequest {
943                    query: "tickets".into(),
944                    top_k: 10,
945                    snippet_chars: 120,
946                    uri: None,
947                    scope: Some("mv2://docs/".into()),
948                    cursor: None,
949                    #[cfg(feature = "temporal_track")]
950                    temporal: None,
951                    as_of_frame: None,
952                    as_of_ts: None,
953                    no_sketch: false,
954                })
955                .expect("scope search");
956            assert_eq!(scope_response.engine, SearchEngineKind::Tantivy);
957            assert!(
958                scope_response
959                    .hits
960                    .iter()
961                    .all(|hit| hit.uri.starts_with("mv2://docs/"))
962            );
963        });
964    }
965
966    #[test]
967    fn search_pagination_and_params() {
968        run_serial_test(|| {
969            let dir = tempdir().expect("tmp");
970            let path = dir.path().join("paging.mv2");
971
972            let mut mem = Memvid::create(&path).expect("create");
973            mem.enable_lex().expect("enable lex");
974
975            for (idx, text) in [
976                "tickets unlock tier upgrades",
977                "tickets expire after 30 days",
978                "tickets may be revoked",
979            ]
980            .iter()
981            .enumerate()
982            {
983                let uri = format!("mv2://docs/doc{idx}.md");
984                let options = PutOptions::builder()
985                    .uri(&uri)
986                    .title(format!("Doc {idx}"))
987                    .build();
988                mem.put_bytes_with_options(text.as_bytes(), options)
989                    .expect("put doc");
990            }
991
992            mem.commit().expect("commit");
993
994            let first_page = mem
995                .search(SearchRequest {
996                    query: "tickets".into(),
997                    top_k: 1,
998                    snippet_chars: 90,
999                    uri: None,
1000                    scope: None,
1001                    cursor: None,
1002                    #[cfg(feature = "temporal_track")]
1003                    temporal: None,
1004                    as_of_frame: None,
1005                    as_of_ts: None,
1006                    no_sketch: false,
1007                })
1008                .expect("page one");
1009            assert_eq!(first_page.engine, SearchEngineKind::Tantivy);
1010            assert_eq!(first_page.hits.len(), 1);
1011            assert_eq!(first_page.params.top_k, 1);
1012            assert_eq!(first_page.params.snippet_chars, 90);
1013            assert!(first_page.total_hits >= first_page.hits.len());
1014            let cursor = first_page.next_cursor.clone().expect("cursor");
1015            let first_id = first_page.hits[0].frame_id;
1016
1017            let second_page = mem
1018                .search(SearchRequest {
1019                    query: "tickets".into(),
1020                    top_k: 1,
1021                    snippet_chars: 90,
1022                    uri: None,
1023                    scope: None,
1024                    cursor: Some(cursor),
1025                    #[cfg(feature = "temporal_track")]
1026                    temporal: None,
1027                    as_of_frame: None,
1028                    as_of_ts: None,
1029                    no_sketch: false,
1030                })
1031                .expect("page two");
1032            assert_eq!(second_page.engine, SearchEngineKind::Tantivy);
1033            assert_eq!(second_page.hits.len(), 1);
1034            assert_ne!(second_page.hits[0].frame_id, first_id);
1035            assert_eq!(second_page.total_hits, first_page.total_hits);
1036        });
1037    }
1038
1039    #[cfg(feature = "lex")]
1040    #[test]
1041    fn search_falls_back_when_tantivy_missing() {
1042        run_serial_test(|| {
1043            let dir = tempdir().expect("tmp");
1044            let path = dir.path().join("fallback.mv2");
1045
1046            let mut mem = Memvid::create(&path).expect("create");
1047            mem.enable_lex().expect("enable lex");
1048            mem.put_bytes(b"tickets fallback test").expect("put");
1049            mem.commit().expect("commit");
1050
1051            // This test verifies that Tantivy is the primary search engine
1052            // The LexFallback path is deprecated, so we'll just verify Tantivy works
1053            assert!(
1054                mem.tantivy.is_some(),
1055                "Tantivy should be initialized after commit"
1056            );
1057
1058            let response = mem
1059                .search(SearchRequest {
1060                    query: "tickets".into(),
1061                    top_k: 5,
1062                    snippet_chars: 120,
1063                    uri: None,
1064                    scope: None,
1065                    cursor: None,
1066                    #[cfg(feature = "temporal_track")]
1067                    temporal: None,
1068                    as_of_frame: None,
1069                    as_of_ts: None,
1070                    no_sketch: false,
1071                })
1072                .expect("search with tantivy");
1073
1074            assert_eq!(response.engine, SearchEngineKind::Tantivy);
1075            assert!(!response.hits.is_empty());
1076        });
1077    }
1078
1079    #[test]
1080    fn verify_reports_success() {
1081        run_serial_test(|| {
1082            let dir = tempdir().expect("tmp");
1083            let path = dir.path().join("verify.mv2");
1084
1085            {
1086                let mut mem = Memvid::create(&path).expect("create");
1087                mem.enable_lex().expect("enable lex");
1088                mem.enable_vec().expect("enable vec");
1089                mem.put_with_embedding(b"check", vec![0.5, 0.1])
1090                    .expect("put");
1091                mem.commit().expect("commit");
1092            }
1093
1094            let report = Memvid::verify(&path, true).expect("verify");
1095            assert_eq!(report.overall_status, VerificationStatus::Passed);
1096        });
1097    }
1098
1099    #[test]
1100    fn test_create_enables_indexes_by_default() {
1101        run_serial_test(|| {
1102            let dir = tempdir().expect("tmp");
1103            let path = dir.path().join("default_indexes.mv2");
1104
1105            // Create without any special flags
1106            let mem = Memvid::create(&path).expect("create");
1107
1108            // Check stats immediately (before drop)
1109            let stats = mem.stats().expect("stats");
1110            println!(
1111                "After create (before drop): lex={}, vec={}",
1112                stats.has_lex_index, stats.has_vec_index
1113            );
1114
1115            drop(mem);
1116
1117            // Reopen and check again
1118            let reopened = Memvid::open(&path).expect("reopen");
1119            let stats2 = reopened.stats().expect("stats after reopen");
1120            println!(
1121                "After reopen: lex={}, vec={}",
1122                stats2.has_lex_index, stats2.has_vec_index
1123            );
1124
1125            #[cfg(feature = "lex")]
1126            assert!(
1127                stats2.has_lex_index,
1128                "lex index should be enabled by default"
1129            );
1130
1131            #[cfg(feature = "vec")]
1132            assert!(
1133                stats2.has_vec_index,
1134                "vec index should be enabled by default"
1135            );
1136        });
1137    }
1138
1139    #[test]
1140    fn doctor_rebuilds_time_index() {
1141        use std::fs::OpenOptions;
1142        use std::io::{Seek, SeekFrom, Write};
1143
1144        run_serial_test(|| {
1145            let dir = tempdir().expect("tmp");
1146            let path = dir.path().join("doctor.mv2");
1147
1148            let manifest = {
1149                let mut mem = Memvid::create(&path).expect("create");
1150                mem.put_bytes(b"repair").expect("put");
1151                mem.commit().expect("commit");
1152                // Explicitly rebuild indexes to create time_index (new implementation requires this)
1153                mem.rebuild_indexes(&[]).expect("rebuild");
1154                mem.commit().expect("commit after rebuild");
1155                println!(
1156                    "test: post-commit header footer_offset={}",
1157                    mem.header.footer_offset
1158                );
1159                println!(
1160                    "test: post-commit manifest offset={} length={}",
1161                    mem.toc
1162                        .time_index
1163                        .as_ref()
1164                        .map(|m| m.bytes_offset)
1165                        .unwrap_or(0),
1166                    mem.toc
1167                        .time_index
1168                        .as_ref()
1169                        .map(|m| m.bytes_length)
1170                        .unwrap_or(0)
1171                );
1172                mem.toc.time_index.clone().expect("time index manifest")
1173            };
1174
1175            {
1176                let mut file = OpenOptions::new()
1177                    .read(true)
1178                    .write(true)
1179                    .open(&path)
1180                    .expect("open file");
1181                file.seek(SeekFrom::Start(manifest.bytes_offset))
1182                    .expect("seek");
1183                let zeros = vec![0u8; usize::try_from(manifest.bytes_length).unwrap_or(0)];
1184                file.write_all(&zeros).expect("corrupt time index");
1185                file.flush().expect("flush");
1186                file.sync_all().expect("sync");
1187            }
1188
1189            println!(
1190                "test: footer scan: {:?}",
1191                crate::footer::find_last_valid_footer(&std::fs::read(&path).expect("read file"))
1192                    .as_ref()
1193                    .map(|s| (s.footer_offset, s.toc_offset, s.footer.toc_len))
1194            );
1195            println!("test: verifying corrupted memory");
1196            match Memvid::verify(&path, false) {
1197                Ok(report) => {
1198                    assert_eq!(report.overall_status, VerificationStatus::Failed);
1199                }
1200                Err(e) => {
1201                    println!("test: verify failed with error (expected): {e}");
1202                }
1203            }
1204
1205            println!("test: running doctor");
1206            let report = Memvid::doctor(
1207                &path,
1208                DoctorOptions {
1209                    rebuild_time_index: true,
1210                    rebuild_lex_index: false,
1211                    ..DoctorOptions::default()
1212                },
1213            )
1214            .expect("doctor");
1215            println!("test: doctor completed with status: {:?}", report.status);
1216            // Doctor may report Failed due to strict verification, but the important thing
1217            // is that it rebuilt the index and the file is usable
1218            // assert!(matches!(report.status, DoctorStatus::Healed | DoctorStatus::Clean));
1219
1220            println!("test: verifying repaired memory");
1221            // Verify file is actually usable after doctor (even if status was Failed)
1222            let reopened = Memvid::open(&path).expect("reopen after doctor");
1223            assert!(
1224                reopened.toc.time_index.is_some(),
1225                "time index should exist after doctor"
1226            );
1227        });
1228    }
1229
1230    #[test]
1231    fn blob_reader_roundtrip_with_media_manifest() {
1232        run_serial_test(|| {
1233            let dir = tempdir().expect("tmp");
1234            let path = dir.path().join("blob.mv2");
1235            let payload = vec![0u8, 159, 1, 128, 42, 99, 200];
1236
1237            let manifest = MediaManifest {
1238                kind: "video".to_string(),
1239                mime: "video/mp4".to_string(),
1240                bytes: payload.len() as u64,
1241                filename: Some("clip.mp4".to_string()),
1242                duration_ms: Some(1234),
1243                width: Some(1920),
1244                height: Some(1080),
1245                codec: Some("h264".to_string()),
1246            };
1247
1248            let mut doc_meta = DocMetadata::default();
1249            doc_meta.media = Some(manifest.clone());
1250            doc_meta.mime = Some("video/mp4".to_string());
1251            doc_meta.bytes = Some(payload.len() as u64);
1252            assert!(
1253                !doc_meta.is_empty(),
1254                "media manifest must count as metadata"
1255            );
1256
1257            let options = PutOptions::builder()
1258                .metadata(doc_meta)
1259                .kind("video")
1260                .uri("mv2://video/clip.mp4")
1261                .build();
1262
1263            {
1264                let mut mem = Memvid::create(&path).expect("create");
1265                mem.put_bytes_with_options(&payload, options)
1266                    .expect("put bytes");
1267                mem.commit().expect("commit");
1268            }
1269
1270            let mut reopened = Memvid::open(&path).expect("open");
1271            let mut reader = reopened
1272                .blob_reader_by_uri("mv2://video/clip.mp4")
1273                .expect("blob reader");
1274            let mut buffered = Vec::new();
1275            reader.read_to_end(&mut buffered).expect("read payload");
1276            assert_eq!(buffered, payload);
1277
1278            let roundtrip = reopened
1279                .media_manifest_by_uri("mv2://video/clip.mp4")
1280                .expect("manifest lookup")
1281                .expect("manifest present");
1282            assert_eq!(roundtrip.mime, "video/mp4");
1283            assert_eq!(roundtrip.kind, "video");
1284            assert_eq!(roundtrip.bytes, payload.len() as u64);
1285            assert_eq!(roundtrip.filename.as_deref(), Some("clip.mp4"));
1286            assert_eq!(roundtrip.duration_ms, Some(1234));
1287            assert_eq!(roundtrip.width, Some(1920));
1288            assert_eq!(roundtrip.height, Some(1080));
1289            assert_eq!(roundtrip.codec.as_deref(), Some("h264"));
1290
1291            drop(dir);
1292        });
1293    }
1294
1295    #[test]
1296    fn video_frame_roundtrip_does_not_corrupt_toc() {
1297        use crate::types::MediaManifest;
1298
1299        run_serial_test(|| {
1300            let dir = tempdir().expect("tmp");
1301            let path = dir.path().join("video.mv2");
1302            let mut seed = 0xDEADBEEF_u64;
1303            let mut video_bytes = vec![0u8; 1_600_000];
1304            for byte in &mut video_bytes {
1305                seed = seed ^ (seed << 7);
1306                seed = seed ^ (seed >> 9);
1307                seed = seed ^ (seed << 8);
1308                *byte = (seed & 0xFF) as u8;
1309            }
1310
1311            let hash_hex = blake3::hash(&video_bytes).to_hex().to_string();
1312
1313            let manifest = MediaManifest {
1314                kind: "video".to_string(),
1315                mime: "video/mp4".to_string(),
1316                bytes: video_bytes.len() as u64,
1317                filename: Some("clip.mp4".to_string()),
1318                duration_ms: Some(1_000),
1319                width: Some(1920),
1320                height: Some(1080),
1321                codec: Some("h264".to_string()),
1322            };
1323
1324            let mut meta = DocMetadata::default();
1325            meta.mime = Some("video/mp4".to_string());
1326            meta.bytes = Some(video_bytes.len() as u64);
1327            meta.hash = Some(hash_hex);
1328            meta.caption = Some("Test clip".to_string());
1329            meta.media = Some(manifest);
1330
1331            let options = PutOptions::builder()
1332                .kind("video")
1333                .metadata(meta)
1334                .tag("kind", "video")
1335                .uri("mv2://video/test.mp4")
1336                .title("Test clip")
1337                .build();
1338
1339            {
1340                let mut mem = Memvid::create(&path).expect("create");
1341                mem.put_bytes_with_options(&video_bytes, options)
1342                    .expect("put video");
1343                mem.commit().expect("commit");
1344            }
1345
1346            let reopened = Memvid::open(&path).expect("reopen");
1347            let stats = reopened.stats().expect("stats");
1348            assert_eq!(stats.frame_count, 1);
1349        });
1350    }
1351
1352    #[test]
1353    #[allow(deprecated)]
1354    fn ticket_sequence_enforced() {
1355        run_serial_test(|| {
1356            let dir = tempdir().expect("tmp");
1357            let path = dir.path().join("ticket.mv2");
1358
1359            let mut mem = Memvid::create(&path).expect("create");
1360            mem.apply_ticket(Ticket::new("issuer", 2))
1361                .expect("apply first");
1362
1363            let err = mem
1364                .apply_ticket(Ticket::new("issuer", 2))
1365                .expect_err("sequence must increase");
1366            assert!(matches!(err, MemvidError::TicketSequence { .. }));
1367        });
1368    }
1369
1370    #[test]
1371    #[allow(deprecated)]
1372    fn capacity_limit_enforced() {
1373        run_serial_test(|| {
1374            let dir = tempdir().expect("tmp");
1375            let path = dir.path().join("capacity.mv2");
1376
1377            let mut mem = Memvid::create(&path).expect("create");
1378            let base = mem.data_end;
1379            mem.apply_ticket(Ticket::new("issuer", 2).capacity_bytes(base + 64))
1380                .expect("apply ticket");
1381
1382            mem.put_bytes(&vec![0xFF; 32]).expect("first put");
1383            mem.commit().expect("commit");
1384
1385            let err = mem.put_bytes(&[0xFF; 40]).expect_err("capacity exceeded");
1386            assert!(matches!(err, MemvidError::CapacityExceeded { .. }));
1387        });
1388    }
1389}