1use std::convert::TryInto;
10use std::fs::{File, OpenOptions};
11use std::io::{Read, Seek, SeekFrom};
12use std::panic;
13use std::path::{Path, PathBuf};
14use std::sync::{Arc, RwLock};
15
16use crate::constants::{MAGIC, SPEC_VERSION, WAL_OFFSET, WAL_SIZE_TINY};
17use crate::error::{MemvidError, Result};
18use crate::footer::{FooterSlice, find_last_valid_footer};
19use crate::io::header::HeaderCodec;
20#[cfg(feature = "parallel_segments")]
21use crate::io::manifest_wal::ManifestWal;
22use crate::io::wal::EmbeddedWal;
23use crate::lock::{FileLock, LockMode};
24#[cfg(feature = "lex")]
25use crate::search::{EmbeddedLexStorage, TantivyEngine};
26#[cfg(feature = "temporal_track")]
27use crate::types::FrameId;
28#[cfg(feature = "parallel_segments")]
29use crate::types::IndexSegmentRef;
30use crate::types::{
31 FrameStatus, Header, IndexManifests, LogicMesh, MemoriesTrack, SchemaRegistry, SegmentCatalog,
32 SketchTrack, TicketRef, Tier, Toc, VectorCompression,
33};
34#[cfg(feature = "temporal_track")]
35use crate::{TemporalTrack, temporal_track_read};
36use crate::{lex::LexIndex, vec::VecIndex};
37use blake3::Hasher;
38use memmap2::Mmap;
39
40const DEFAULT_LOCK_TIMEOUT_MS: u64 = 250;
41const DEFAULT_HEARTBEAT_MS: u64 = 2_000;
42const DEFAULT_STALE_GRACE_MS: u64 = 10_000;
43
44pub struct Memvid {
49 pub(crate) file: File,
50 pub(crate) path: PathBuf,
51 pub(crate) lock: FileLock,
52 pub(crate) read_only: bool,
53 pub(crate) header: Header,
54 pub(crate) toc: Toc,
55 pub(crate) wal: EmbeddedWal,
56 pub(crate) pending_frame_inserts: u64,
60 pub(crate) data_end: u64,
61 pub(crate) generation: u64,
62 pub(crate) lock_settings: LockSettings,
63 pub(crate) lex_enabled: bool,
64 pub(crate) lex_index: Option<LexIndex>,
65 #[cfg(feature = "lex")]
66 #[allow(dead_code)]
67 pub(crate) lex_storage: Arc<RwLock<EmbeddedLexStorage>>,
68 pub(crate) vec_enabled: bool,
69 pub(crate) vec_compression: VectorCompression,
70 pub(crate) vec_index: Option<VecIndex>,
71 pub(crate) clip_enabled: bool,
73 pub(crate) clip_index: Option<crate::clip::ClipIndex>,
74 pub(crate) dirty: bool,
75 #[cfg(feature = "lex")]
76 pub(crate) tantivy: Option<TantivyEngine>,
77 #[cfg(feature = "lex")]
78 pub(crate) tantivy_dirty: bool,
79 #[cfg(feature = "temporal_track")]
80 pub(crate) temporal_track: Option<TemporalTrack>,
81 #[cfg(feature = "parallel_segments")]
82 pub(crate) manifest_wal: Option<ManifestWal>,
83 pub(crate) memories_track: MemoriesTrack,
85 pub(crate) logic_mesh: LogicMesh,
87 pub(crate) sketch_track: SketchTrack,
89 pub(crate) schema_registry: SchemaRegistry,
91 pub(crate) schema_strict: bool,
93 #[cfg(feature = "replay")]
95 pub(crate) active_session: Option<crate::replay::ActiveSession>,
96 #[cfg(feature = "replay")]
98 pub(crate) completed_sessions: Vec<crate::replay::ReplaySession>,
99}
100
101#[derive(Debug, Clone, Copy)]
103pub struct OpenReadOptions {
104 pub allow_repair: bool,
105}
106
107#[derive(Debug, Clone)]
108pub struct LockSettings {
109 pub timeout_ms: u64,
110 pub heartbeat_ms: u64,
111 pub stale_grace_ms: u64,
112 pub force_stale: bool,
113 pub command: Option<String>,
114}
115
116impl Default for LockSettings {
117 fn default() -> Self {
118 Self {
119 timeout_ms: DEFAULT_LOCK_TIMEOUT_MS,
120 heartbeat_ms: DEFAULT_HEARTBEAT_MS,
121 stale_grace_ms: DEFAULT_STALE_GRACE_MS,
122 force_stale: false,
123 command: None,
124 }
125 }
126}
127
128impl Default for OpenReadOptions {
129 fn default() -> Self {
130 Self {
131 allow_repair: false,
132 }
133 }
134}
135
136impl Memvid {
137 pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
140 let path_ref = path.as_ref();
141 ensure_single_file(path_ref)?;
142
143 OpenOptions::new()
144 .read(true)
145 .write(true)
146 .create(true)
147 .truncate(true)
148 .open(path_ref)?;
149 let (mut file, lock) = FileLock::open_and_lock(path_ref)?;
150
151 let header = Header {
152 magic: MAGIC,
153 version: SPEC_VERSION,
154 footer_offset: WAL_OFFSET + WAL_SIZE_TINY,
155 wal_offset: WAL_OFFSET,
156 wal_size: WAL_SIZE_TINY,
157 wal_checkpoint_pos: 0,
158 wal_sequence: 0,
159 toc_checksum: [0u8; 32],
160 };
161
162 let mut toc = empty_toc();
163 #[cfg(feature = "lex")]
165 {
166 toc.segment_catalog.lex_enabled = true;
167 }
168 file.set_len(header.footer_offset)?;
169 HeaderCodec::write(&mut file, &header)?;
170
171 let wal = EmbeddedWal::open(&file, &header)?;
172 let data_end = header.footer_offset;
173 #[cfg(feature = "lex")]
174 let lex_storage = Arc::new(RwLock::new(EmbeddedLexStorage::new()));
175 #[cfg(feature = "parallel_segments")]
176 let manifest_wal = ManifestWal::open(manifest_wal_path(path_ref))?;
177 #[cfg(feature = "parallel_segments")]
178 let manifest_wal_entries = manifest_wal.replay()?;
179
180 let mut memvid = Self {
181 file,
182 path: path_ref.to_path_buf(),
183 lock,
184 read_only: false,
185 header,
186 toc,
187 wal,
188 pending_frame_inserts: 0,
189 data_end,
190 generation: 0,
191 lock_settings: LockSettings::default(),
192 lex_enabled: cfg!(feature = "lex"), lex_index: None,
194 #[cfg(feature = "lex")]
195 lex_storage,
196 vec_enabled: cfg!(feature = "vec"), vec_compression: VectorCompression::None,
198 vec_index: None,
199 clip_enabled: cfg!(feature = "clip"), clip_index: None,
201 dirty: false,
202 #[cfg(feature = "lex")]
203 tantivy: None,
204 #[cfg(feature = "lex")]
205 tantivy_dirty: false,
206 #[cfg(feature = "temporal_track")]
207 temporal_track: None,
208 #[cfg(feature = "parallel_segments")]
209 manifest_wal: Some(manifest_wal),
210 memories_track: MemoriesTrack::new(),
211 logic_mesh: LogicMesh::new(),
212 sketch_track: SketchTrack::default(),
213 schema_registry: SchemaRegistry::new(),
214 schema_strict: false,
215 #[cfg(feature = "replay")]
216 active_session: None,
217 #[cfg(feature = "replay")]
218 completed_sessions: Vec::new(),
219 };
220
221 #[cfg(feature = "lex")]
222 memvid.init_tantivy()?;
223
224 #[cfg(feature = "parallel_segments")]
225 memvid.load_manifest_segments(manifest_wal_entries);
226
227 memvid.bootstrap_segment_catalog();
228
229 let empty_offset = memvid.data_end;
231 let empty_checksum = *b"\xe3\xb0\xc4\x42\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99\x6f\xb9\x24\
232 \x27\xae\x41\xe4\x64\x9b\x93\x4c\xa4\x95\x99\x1b\x78\x52\xb8\x55";
233
234 #[cfg(feature = "lex")]
235 if memvid.lex_enabled && memvid.toc.indexes.lex.is_none() {
236 memvid.toc.indexes.lex = Some(crate::types::LexIndexManifest {
237 doc_count: 0,
238 generation: 0,
239 bytes_offset: empty_offset,
240 bytes_length: 0,
241 checksum: empty_checksum,
242 });
243 }
244
245 #[cfg(feature = "vec")]
246 if memvid.vec_enabled && memvid.toc.indexes.vec.is_none() {
247 memvid.toc.indexes.vec = Some(crate::types::VecIndexManifest {
248 vector_count: 0,
249 dimension: 0,
250 bytes_offset: empty_offset,
251 bytes_length: 0,
252 checksum: empty_checksum,
253 compression_mode: memvid.vec_compression.clone(),
254 });
255 }
256
257 memvid.rewrite_toc_footer()?;
258 memvid.header.toc_checksum = memvid.toc.toc_checksum;
259 crate::persist_header(&mut memvid.file, &memvid.header)?;
260 memvid.file.sync_all()?;
261 Ok(memvid)
262 }
263
264 pub fn lock_settings(&self) -> &LockSettings {
265 &self.lock_settings
266 }
267
268 pub fn lock_settings_mut(&mut self) -> &mut LockSettings {
269 &mut self.lock_settings
270 }
271
272 pub fn set_vector_compression(&mut self, compression: VectorCompression) {
275 self.vec_compression = compression;
276 }
277
278 pub fn vector_compression(&self) -> &VectorCompression {
280 &self.vec_compression
281 }
282
283 pub fn next_frame_id(&self) -> u64 {
289 (self.toc.frames.len() as u64).saturating_add(self.pending_frame_inserts)
290 }
291
292 #[must_use]
296 pub fn frame_count(&self) -> usize {
297 self.toc.frames.len()
298 }
299
300 fn open_locked(mut file: File, lock: FileLock, path_ref: &Path) -> Result<Self> {
301 let mut magic = [0u8; 4];
304 let is_mv2e = file.read_exact(&mut magic).is_ok() && magic == *b"MV2E";
305 file.seek(SeekFrom::Start(0))?;
306 if is_mv2e {
307 return Err(MemvidError::EncryptedFile {
308 path: path_ref.to_path_buf(),
309 hint: format!("Run: memvid unlock {}", path_ref.display()),
310 });
311 }
312
313 let mut header = HeaderCodec::read(&mut file)?;
314 let toc = match read_toc(&mut file, &header) {
315 Ok(toc) => toc,
316 Err(err @ MemvidError::Decode(_)) | Err(err @ MemvidError::InvalidToc { .. }) => {
317 tracing::info!("toc decode failed ({}); attempting recovery", err);
318 let (toc, recovered_offset) = recover_toc(&mut file, Some(header.footer_offset))?;
319 if recovered_offset != header.footer_offset
320 || header.toc_checksum != toc.toc_checksum
321 {
322 header.footer_offset = recovered_offset;
323 header.toc_checksum = toc.toc_checksum;
324 crate::persist_header(&mut file, &header)?;
325 }
326 toc
327 }
328 Err(err) => return Err(err),
329 };
330 let checksum_result = toc.verify_checksum();
331
332 let file_len = file.metadata().map(|m| m.len()).unwrap_or(0);
334 if let Err(e) = validate_segment_integrity(&toc, &header, file_len) {
335 tracing::warn!("Segment integrity validation failed: {}", e);
336 }
339 ensure_non_overlapping_frames(&toc, file_len)?;
340
341 let wal = EmbeddedWal::open(&file, &header)?;
342 #[cfg(feature = "lex")]
343 let lex_storage = Arc::new(RwLock::new(EmbeddedLexStorage::from_manifest(
344 toc.indexes.lex.as_ref(),
345 &toc.indexes.lex_segments,
346 )));
347 #[cfg(feature = "parallel_segments")]
348 let manifest_wal = ManifestWal::open(manifest_wal_path(path_ref))?;
349 #[cfg(feature = "parallel_segments")]
350 let manifest_wal_entries = manifest_wal.replay()?;
351
352 let generation = detect_generation(&file)?.unwrap_or(0);
353 let read_only = lock.mode() == LockMode::Shared;
354
355 let mut memvid = Self {
356 file,
357 path: path_ref.to_path_buf(),
358 lock,
359 read_only,
360 header,
361 toc,
362 wal,
363 pending_frame_inserts: 0,
364 data_end: 0,
365 generation,
366 lock_settings: LockSettings::default(),
367 lex_enabled: false,
368 lex_index: None,
369 #[cfg(feature = "lex")]
370 lex_storage,
371 vec_enabled: false,
372 vec_compression: VectorCompression::None,
373 vec_index: None,
374 clip_enabled: false,
375 clip_index: None,
376 dirty: false,
377 #[cfg(feature = "lex")]
378 tantivy: None,
379 #[cfg(feature = "lex")]
380 tantivy_dirty: false,
381 #[cfg(feature = "temporal_track")]
382 temporal_track: None,
383 #[cfg(feature = "parallel_segments")]
384 manifest_wal: Some(manifest_wal),
385 memories_track: MemoriesTrack::new(),
386 logic_mesh: LogicMesh::new(),
387 sketch_track: SketchTrack::default(),
388 schema_registry: SchemaRegistry::new(),
389 schema_strict: false,
390 #[cfg(feature = "replay")]
391 active_session: None,
392 #[cfg(feature = "replay")]
393 completed_sessions: Vec::new(),
394 };
395 memvid.data_end = compute_data_end(&memvid.toc, &memvid.header);
396 memvid.lex_enabled = has_lex_index(&memvid.toc);
398 if memvid.lex_enabled {
399 memvid.load_lex_index_from_manifest()?;
400 }
401 #[cfg(feature = "lex")]
402 {
403 memvid.init_tantivy()?;
404 }
405 memvid.vec_enabled =
406 memvid.toc.indexes.vec.is_some() || !memvid.toc.segment_catalog.vec_segments.is_empty();
407 if memvid.vec_enabled {
408 memvid.load_vec_index_from_manifest()?;
409 }
410 memvid.clip_enabled = memvid.toc.indexes.clip.is_some();
411 if memvid.clip_enabled {
412 memvid.load_clip_index_from_manifest()?;
413 }
414 memvid.recover_wal()?;
415 #[cfg(feature = "parallel_segments")]
416 memvid.load_manifest_segments(manifest_wal_entries);
417 memvid.bootstrap_segment_catalog();
418 #[cfg(feature = "temporal_track")]
419 memvid.ensure_temporal_track_loaded()?;
420 memvid.load_memories_track()?;
421 memvid.load_logic_mesh()?;
422 memvid.load_sketch_track()?;
423 if checksum_result.is_err() {
424 memvid.toc.verify_checksum()?;
425 if memvid.toc.toc_checksum != memvid.header.toc_checksum {
426 memvid.header.toc_checksum = memvid.toc.toc_checksum;
427 crate::persist_header(&mut memvid.file, &memvid.header)?;
428 memvid.file.sync_all()?;
429 }
430 }
431 Ok(memvid)
432 }
433
434 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
436 let path_ref = path.as_ref();
437 ensure_single_file(path_ref)?;
438
439 let (file, lock) = FileLock::open_and_lock(path_ref)?;
440 Self::open_locked(file, lock, path_ref)
441 }
442
443 pub fn open_read_only<P: AsRef<Path>>(path: P) -> Result<Self> {
444 Self::open_read_only_with_options(path, OpenReadOptions::default())
445 }
446
447 pub fn open_read_only_with_options<P: AsRef<Path>>(
448 path: P,
449 options: OpenReadOptions,
450 ) -> Result<Self> {
451 let path_ref = path.as_ref();
452 ensure_single_file(path_ref)?;
453
454 if options.allow_repair {
455 return Self::open(path_ref);
456 }
457
458 Self::open_read_only_snapshot(path_ref)
459 }
460
461 fn open_read_only_snapshot(path_ref: &Path) -> Result<Self> {
462 let mut file = OpenOptions::new().read(true).write(true).open(path_ref)?;
463 let TailSnapshot {
464 toc,
465 footer_offset,
466 data_end,
467 generation,
468 } = load_tail_snapshot(&file)?;
469
470 let mut header = HeaderCodec::read(&mut file)?;
471 header.footer_offset = footer_offset;
472 header.toc_checksum = toc.toc_checksum;
473
474 let lock = FileLock::acquire_with_mode(&file, LockMode::Shared)?;
475 let wal = EmbeddedWal::open_read_only(&file, &header)?;
476
477 #[cfg(feature = "lex")]
478 let lex_storage = Arc::new(RwLock::new(EmbeddedLexStorage::from_manifest(
479 toc.indexes.lex.as_ref(),
480 &toc.indexes.lex_segments,
481 )));
482
483 let mut memvid = Self {
484 file,
485 path: path_ref.to_path_buf(),
486 lock,
487 read_only: true,
488 header,
489 toc,
490 wal,
491 pending_frame_inserts: 0,
492 data_end,
493 generation,
494 lock_settings: LockSettings::default(),
495 lex_enabled: false,
496 lex_index: None,
497 #[cfg(feature = "lex")]
498 lex_storage,
499 vec_enabled: false,
500 vec_compression: VectorCompression::None,
501 vec_index: None,
502 clip_enabled: false,
503 clip_index: None,
504 dirty: false,
505 #[cfg(feature = "lex")]
506 tantivy: None,
507 #[cfg(feature = "lex")]
508 tantivy_dirty: false,
509 #[cfg(feature = "temporal_track")]
510 temporal_track: None,
511 #[cfg(feature = "parallel_segments")]
512 manifest_wal: None,
513 memories_track: MemoriesTrack::new(),
514 logic_mesh: LogicMesh::new(),
515 sketch_track: SketchTrack::default(),
516 schema_registry: SchemaRegistry::new(),
517 schema_strict: false,
518 #[cfg(feature = "replay")]
519 active_session: None,
520 #[cfg(feature = "replay")]
521 completed_sessions: Vec::new(),
522 };
523
524 memvid.lex_enabled = has_lex_index(&memvid.toc);
526 if memvid.lex_enabled {
527 memvid.load_lex_index_from_manifest()?;
528 }
529 #[cfg(feature = "lex")]
530 memvid.init_tantivy()?;
531
532 memvid.vec_enabled =
533 memvid.toc.indexes.vec.is_some() || !memvid.toc.segment_catalog.vec_segments.is_empty();
534 if memvid.vec_enabled {
535 memvid.load_vec_index_from_manifest()?;
536 }
537 memvid.clip_enabled = memvid.toc.indexes.clip.is_some();
538 if memvid.clip_enabled {
539 memvid.load_clip_index_from_manifest()?;
540 }
541 memvid.load_memories_track()?;
543 memvid.load_logic_mesh()?;
544 memvid.load_sketch_track()?;
545
546 memvid.bootstrap_segment_catalog();
547 #[cfg(feature = "temporal_track")]
548 memvid.ensure_temporal_track_loaded()?;
549
550 Ok(memvid)
551 }
552
553 pub(crate) fn try_open<P: AsRef<Path>>(path: P) -> Result<Self> {
554 let path_ref = path.as_ref();
555 ensure_single_file(path_ref)?;
556
557 let file = OpenOptions::new().read(true).write(true).open(path_ref)?;
558 let lock = match FileLock::try_acquire(&file, path_ref)? {
559 Some(lock) => lock,
560 None => {
561 return Err(MemvidError::Lock(
562 "exclusive access unavailable for doctor".to_string(),
563 ));
564 }
565 };
566 Self::open_locked(file, lock, path_ref)
567 }
568
569 fn bootstrap_segment_catalog(&mut self) {
570 let catalog = &mut self.toc.segment_catalog;
571 if catalog.version == 0 {
572 catalog.version = 1;
573 }
574 if catalog.next_segment_id == 0 {
575 let mut max_id = 0u64;
576 for descriptor in &catalog.lex_segments {
577 max_id = max_id.max(descriptor.common.segment_id);
578 }
579 for descriptor in &catalog.vec_segments {
580 max_id = max_id.max(descriptor.common.segment_id);
581 }
582 for descriptor in &catalog.time_segments {
583 max_id = max_id.max(descriptor.common.segment_id);
584 }
585 #[cfg(feature = "temporal_track")]
586 for descriptor in &catalog.temporal_segments {
587 max_id = max_id.max(descriptor.common.segment_id);
588 }
589 #[cfg(feature = "parallel_segments")]
590 for descriptor in &catalog.index_segments {
591 max_id = max_id.max(descriptor.common.segment_id);
592 }
593 if max_id > 0 {
594 catalog.next_segment_id = max_id.saturating_add(1);
595 }
596 }
597 }
598
599 #[cfg(feature = "parallel_segments")]
600 fn load_manifest_segments(&mut self, entries: Vec<IndexSegmentRef>) {
601 if entries.is_empty() {
602 return;
603 }
604 for entry in entries {
605 let duplicate = self
606 .toc
607 .segment_catalog
608 .index_segments
609 .iter()
610 .any(|existing| existing.common.segment_id == entry.common.segment_id);
611 if !duplicate {
612 self.toc.segment_catalog.index_segments.push(entry);
613 }
614 }
615 }
616
617 fn load_memories_track(&mut self) -> Result<()> {
619 let manifest = match &self.toc.memories_track {
620 Some(m) => m,
621 None => return Ok(()),
622 };
623
624 let mut buf = vec![0u8; manifest.bytes_length as usize];
626 self.file
627 .seek(std::io::SeekFrom::Start(manifest.bytes_offset))?;
628 self.file.read_exact(&mut buf)?;
629
630 let actual_checksum: [u8; 32] = blake3::hash(&buf).into();
632 if actual_checksum != manifest.checksum {
633 return Err(MemvidError::InvalidToc {
634 reason: "memories track checksum mismatch".into(),
635 });
636 }
637
638 self.memories_track = MemoriesTrack::deserialize(&buf)?;
640
641 Ok(())
642 }
643
644 fn load_logic_mesh(&mut self) -> Result<()> {
646 let manifest = match &self.toc.logic_mesh {
647 Some(m) => m,
648 None => return Ok(()),
649 };
650
651 let mut buf = vec![0u8; manifest.bytes_length as usize];
653 self.file
654 .seek(std::io::SeekFrom::Start(manifest.bytes_offset))?;
655 self.file.read_exact(&mut buf)?;
656
657 let actual_checksum: [u8; 32] = blake3::hash(&buf).into();
659 if actual_checksum != manifest.checksum {
660 return Err(MemvidError::InvalidToc {
661 reason: "logic mesh checksum mismatch".into(),
662 });
663 }
664
665 self.logic_mesh = LogicMesh::deserialize(&buf)?;
667
668 Ok(())
669 }
670
671 fn load_sketch_track(&mut self) -> Result<()> {
673 let manifest = match &self.toc.sketch_track {
674 Some(m) => m.clone(),
675 None => return Ok(()),
676 };
677
678 self.sketch_track = crate::types::read_sketch_track(
680 &mut self.file,
681 manifest.bytes_offset,
682 manifest.bytes_length,
683 )?;
684
685 Ok(())
686 }
687
688 #[cfg(feature = "temporal_track")]
689 pub(crate) fn ensure_temporal_track_loaded(&mut self) -> Result<()> {
690 if self.temporal_track.is_some() {
691 return Ok(());
692 }
693 let manifest = match &self.toc.temporal_track {
694 Some(manifest) => manifest.clone(),
695 None => return Ok(()),
696 };
697 if manifest.bytes_length == 0 {
698 return Ok(());
699 }
700 let file_len = self.file.metadata()?.len();
701 let Some(end) = manifest.bytes_offset.checked_add(manifest.bytes_length) else {
702 return Ok(());
703 };
704 if end > file_len {
705 return Ok(());
706 }
707 match temporal_track_read(&mut self.file, manifest.bytes_offset, manifest.bytes_length) {
708 Ok(track) => self.temporal_track = Some(track),
709 Err(MemvidError::InvalidTemporalTrack { .. }) => {
710 return Ok(());
711 }
712 Err(err) => return Err(err),
713 }
714 Ok(())
715 }
716
717 #[cfg(feature = "temporal_track")]
718 pub(crate) fn temporal_track_ref(&mut self) -> Result<Option<&TemporalTrack>> {
719 self.ensure_temporal_track_loaded()?;
720 Ok(self.temporal_track.as_ref())
721 }
722
723 #[cfg(feature = "temporal_track")]
724 pub(crate) fn temporal_anchor_timestamp(&mut self, frame_id: FrameId) -> Result<Option<i64>> {
725 self.ensure_temporal_track_loaded()?;
726 let Some(track) = self.temporal_track.as_ref() else {
727 return Ok(None);
728 };
729 if !track.capabilities().has_anchors {
730 return Ok(None);
731 }
732 Ok(track
733 .anchor_for_frame(frame_id)
734 .map(|anchor| anchor.anchor_ts))
735 }
736
737 #[cfg(feature = "temporal_track")]
738 pub(crate) fn clear_temporal_track_cache(&mut self) {
739 self.temporal_track = None;
740 }
741
742 #[cfg(feature = "temporal_track")]
743 pub(crate) fn effective_temporal_timestamp(
744 &mut self,
745 frame_id: FrameId,
746 fallback: i64,
747 ) -> Result<i64> {
748 Ok(self
749 .temporal_anchor_timestamp(frame_id)?
750 .unwrap_or(fallback))
751 }
752
753 #[cfg(not(feature = "temporal_track"))]
754 pub(crate) fn effective_temporal_timestamp(
755 &mut self,
756 _frame_id: crate::types::FrameId,
757 fallback: i64,
758 ) -> Result<i64> {
759 Ok(fallback)
760 }
761
762 pub fn get_memory_binding(&self) -> Option<&crate::types::MemoryBinding> {
767 self.toc.memory_binding.as_ref()
768 }
769
770 #[allow(deprecated)]
779 pub fn bind_memory(
780 &mut self,
781 binding: crate::types::MemoryBinding,
782 ticket: crate::types::Ticket,
783 ) -> Result<()> {
784 if let Some(existing) = self.get_memory_binding() {
786 if existing.memory_id != binding.memory_id {
787 return Err(MemvidError::MemoryAlreadyBound {
788 existing_memory_id: existing.memory_id,
789 existing_memory_name: existing.memory_name.clone(),
790 bound_at: existing.bound_at.to_rfc3339(),
791 });
792 }
793 }
794
795 self.apply_ticket(ticket)?;
797
798 self.toc.memory_binding = Some(binding);
800 self.dirty = true;
801
802 Ok(())
803 }
804
805 pub fn set_memory_binding_only(&mut self, binding: crate::types::MemoryBinding) -> Result<()> {
815 self.ensure_writable()?;
816
817 if let Some(existing) = self.get_memory_binding() {
819 if existing.memory_id != binding.memory_id {
820 return Err(MemvidError::MemoryAlreadyBound {
821 existing_memory_id: existing.memory_id,
822 existing_memory_name: existing.memory_name.clone(),
823 bound_at: existing.bound_at.to_rfc3339(),
824 });
825 }
826 }
827
828 self.toc.memory_binding = Some(binding);
830 self.dirty = true;
831
832 Ok(())
833 }
834
835 pub fn unbind_memory(&mut self) -> Result<()> {
839 self.toc.memory_binding = None;
840 self.toc.ticket_ref = crate::types::TicketRef {
842 issuer: "free-tier".into(),
843 seq_no: 1,
844 expires_in_secs: 0,
845 capacity_bytes: crate::types::Tier::Free.capacity_bytes(),
846 verified: false,
847 };
848 self.dirty = true;
849 Ok(())
850 }
851}
852
853pub(crate) fn read_toc(file: &mut File, header: &Header) -> Result<Toc> {
854 use crate::footer::{CommitFooter, FOOTER_SIZE};
855
856 let len = file.metadata()?.len();
857 if len < header.footer_offset {
858 return Err(MemvidError::InvalidToc {
859 reason: "footer offset beyond file length".into(),
860 });
861 }
862
863 file.seek(SeekFrom::Start(header.footer_offset))?;
865 let total_size = (len - header.footer_offset) as usize;
866
867 if total_size < FOOTER_SIZE {
868 return Err(MemvidError::InvalidToc {
869 reason: "region too small to contain footer".into(),
870 });
871 }
872
873 let mut buf = Vec::with_capacity(total_size);
874 file.read_to_end(&mut buf)?;
875
876 let footer_start = buf.len() - FOOTER_SIZE;
878 let footer_bytes = &buf[footer_start..];
879 let footer = CommitFooter::decode(footer_bytes).ok_or(MemvidError::InvalidToc {
880 reason: "failed to decode commit footer".into(),
881 })?;
882
883 let toc_bytes = &buf[..footer_start];
885 if toc_bytes.len() != footer.toc_len as usize {
886 return Err(MemvidError::InvalidToc {
887 reason: "toc length mismatch".into(),
888 });
889 }
890 if !footer.hash_matches(toc_bytes) {
891 return Err(MemvidError::InvalidToc {
892 reason: "commit footer toc hash mismatch".into(),
893 });
894 }
895
896 verify_toc_prefix(toc_bytes)?;
897 let toc = Toc::decode(toc_bytes)?;
898 Ok(toc)
899}
900
901fn verify_toc_prefix(bytes: &[u8]) -> Result<()> {
902 const MAX_SEGMENTS: u64 = 1_000_000;
903 const MAX_FRAMES: u64 = 1_000_000;
904 const MIN_SEGMENT_META_BYTES: u64 = 32;
905 const MIN_FRAME_BYTES: u64 = 64;
906 let read_u64 = |range: std::ops::Range<usize>, context: &str| -> Result<u64> {
909 let slice = bytes.get(range).ok_or_else(|| MemvidError::InvalidToc {
910 reason: context.to_string().into(),
911 })?;
912 let array: [u8; 8] = slice.try_into().map_err(|_| MemvidError::InvalidToc {
913 reason: context.to_string().into(),
914 })?;
915 Ok(u64::from_le_bytes(array))
916 };
917
918 if bytes.len() < 24 {
919 return Err(MemvidError::InvalidToc {
920 reason: "toc trailer too small".into(),
921 });
922 }
923 let toc_version = read_u64(0..8, "toc version missing or truncated")?;
924 if toc_version > 32 {
925 return Err(MemvidError::InvalidToc {
926 reason: "toc version unreasonable".into(),
927 });
928 }
929 let segments_len = read_u64(8..16, "segment count missing or truncated")?;
930 if segments_len > MAX_SEGMENTS {
931 return Err(MemvidError::InvalidToc {
932 reason: "segment count unreasonable".into(),
933 });
934 }
935 let frames_len = read_u64(16..24, "frame count missing or truncated")?;
936 if frames_len > MAX_FRAMES {
937 return Err(MemvidError::InvalidToc {
938 reason: "frame count unreasonable".into(),
939 });
940 }
941 let required = segments_len
942 .saturating_mul(MIN_SEGMENT_META_BYTES)
943 .saturating_add(frames_len.saturating_mul(MIN_FRAME_BYTES));
944 if required > bytes.len() as u64 {
945 return Err(MemvidError::InvalidToc {
946 reason: "toc payload inconsistent with counts".into(),
947 });
948 }
949 Ok(())
950}
951
952fn ensure_non_overlapping_frames(toc: &Toc, file_len: u64) -> Result<()> {
960 let mut frames_by_offset: Vec<_> = toc
962 .frames
963 .iter()
964 .filter(|f| f.status == FrameStatus::Active && f.payload_length > 0)
965 .collect();
966 frames_by_offset.sort_by_key(|f| f.payload_offset);
967
968 let mut previous_end = 0u64;
969 for frame in frames_by_offset {
970 let end = frame
971 .payload_offset
972 .checked_add(frame.payload_length)
973 .ok_or_else(|| MemvidError::InvalidToc {
974 reason: "frame payload offsets overflow".into(),
975 })?;
976 if end > file_len {
977 return Err(MemvidError::InvalidToc {
978 reason: "frame payload exceeds file length".into(),
979 });
980 }
981 if frame.payload_offset < previous_end {
982 return Err(MemvidError::InvalidToc {
983 reason: format!(
984 "frame {} payload overlaps with previous frame (offset {} < previous end {})",
985 frame.id, frame.payload_offset, previous_end
986 )
987 .into(),
988 });
989 }
990 previous_end = end;
991 }
992 Ok(())
993}
994
995pub(crate) fn recover_toc(file: &mut File, hint: Option<u64>) -> Result<(Toc, u64)> {
996 let len = file.metadata()?.len();
997 let mmap = unsafe { Mmap::map(&*file)? };
999 tracing::debug!(file_len = len, "attempting toc recovery");
1000
1001 if let Some(footer_slice) = find_last_valid_footer(&mmap) {
1003 tracing::debug!(
1004 footer_offset = footer_slice.footer_offset,
1005 toc_offset = footer_slice.toc_offset,
1006 toc_len = footer_slice.toc_bytes.len(),
1007 "found valid footer during recovery"
1008 );
1009 match Toc::decode(footer_slice.toc_bytes) {
1011 Ok(toc) => {
1012 return Ok((toc, footer_slice.toc_offset as u64));
1013 }
1014 Err(err) => {
1015 tracing::warn!(
1016 error = %err,
1017 "footer-validated TOC failed to decode, falling back to scan"
1018 );
1019 }
1020 }
1021 }
1022
1023 if let Some(hint_offset) = hint {
1027 use crate::footer::FOOTER_SIZE;
1028
1029 let start = (hint_offset.min(len)) as usize;
1030 if mmap.len().saturating_sub(start) >= FOOTER_SIZE {
1031 let toc_end = mmap.len().saturating_sub(FOOTER_SIZE);
1032 if toc_end > start {
1033 let toc_bytes = &mmap[start..toc_end];
1034 if verify_toc_prefix(toc_bytes).is_ok() {
1035 let attempt = panic::catch_unwind(|| Toc::decode(toc_bytes));
1036 if let Ok(Ok(toc)) = attempt {
1037 tracing::debug!(
1038 recovered_offset = hint_offset,
1039 recovered_frames = toc.frames.len(),
1040 "recovered toc from hinted offset without validated footer"
1041 );
1042 return Ok((toc, hint_offset));
1043 }
1044 }
1045 }
1046 }
1047 }
1048
1049 let mut ranges = Vec::new();
1051 if let Some(hint_offset) = hint {
1052 let hint_idx = hint_offset.min(len) as usize;
1053 ranges.push((hint_idx, mmap.len()));
1054 if hint_idx > 0 {
1055 ranges.push((0, hint_idx));
1056 }
1057 } else {
1058 ranges.push((0, mmap.len()));
1059 }
1060
1061 for (start, end) in ranges {
1062 if let Some(found) = scan_range_for_toc(&mmap, start, end) {
1063 return Ok(found);
1064 }
1065 }
1066
1067 Err(MemvidError::InvalidToc {
1068 reason: "unable to recover table of contents from file trailer".into(),
1069 })
1070}
1071
1072fn scan_range_for_toc(data: &[u8], start: usize, end: usize) -> Option<(Toc, u64)> {
1073 if start >= end || end > data.len() {
1074 return None;
1075 }
1076 const MAX_TOC_BYTES: usize = 64 * 1024 * 1024;
1077 const ZERO_CHECKSUM: [u8; 32] = [0u8; 32];
1078
1079 let min_offset = data.len().saturating_sub(MAX_TOC_BYTES);
1082 let scan_start = start.max(min_offset);
1083
1084 for offset in (scan_start..end).rev() {
1085 let slice = &data[offset..];
1086 if slice.len() < 16 {
1087 continue;
1088 }
1089 debug_assert!(slice.len() <= MAX_TOC_BYTES);
1090
1091 if slice.len() < ZERO_CHECKSUM.len() {
1093 continue;
1094 }
1095 let (body, stored_checksum) = slice.split_at(slice.len() - ZERO_CHECKSUM.len());
1096 let mut hasher = Hasher::new();
1097 hasher.update(body);
1098 hasher.update(&ZERO_CHECKSUM);
1099 if hasher.finalize().as_bytes() != stored_checksum {
1100 continue;
1101 }
1102 if verify_toc_prefix(slice).is_err() {
1103 continue;
1104 }
1105 let attempt = panic::catch_unwind(|| Toc::decode(slice));
1106 if let Ok(Ok(toc)) = attempt {
1107 let recovered_offset = offset as u64;
1108 tracing::debug!(
1109 recovered_offset,
1110 recovered_frames = toc.frames.len(),
1111 "recovered toc via scan"
1112 );
1113 return Some((toc, recovered_offset));
1114 }
1115 }
1116 None
1117}
1118
1119pub(crate) fn prepare_toc_bytes(toc: &mut Toc) -> Result<Vec<u8>> {
1120 toc.toc_checksum = [0u8; 32];
1121 let bytes = toc.encode()?;
1122 let checksum = Toc::calculate_checksum(&bytes);
1123 toc.toc_checksum = checksum;
1124 toc.encode()
1125}
1126
1127pub(crate) fn empty_toc() -> Toc {
1128 Toc {
1129 toc_version: 0,
1130 segments: Vec::new(),
1131 frames: Vec::new(),
1132 indexes: IndexManifests::default(),
1133 time_index: None,
1134 temporal_track: None,
1135 memories_track: None,
1136 logic_mesh: None,
1137 sketch_track: None,
1138 segment_catalog: SegmentCatalog::default(),
1139 ticket_ref: TicketRef {
1140 issuer: "free-tier".into(),
1141 seq_no: 1,
1142 expires_in_secs: 0,
1143 capacity_bytes: Tier::Free.capacity_bytes(),
1144 verified: false,
1145 },
1146 memory_binding: None,
1147 replay_manifest: None,
1148 enrichment_queue: crate::types::EnrichmentQueueManifest::default(),
1149 merkle_root: [0u8; 32],
1150 toc_checksum: [0u8; 32],
1151 }
1152}
1153
1154pub(crate) fn compute_data_end(toc: &Toc, header: &Header) -> u64 {
1155 let wal_region_end = header.wal_offset.saturating_add(header.wal_size);
1162 let mut max_end = wal_region_end.max(header.footer_offset);
1163
1164 for frame in toc
1166 .frames
1167 .iter()
1168 .filter(|f| f.status == FrameStatus::Active && f.payload_length > 0)
1169 {
1170 if let Some(end) = frame.payload_offset.checked_add(frame.payload_length) {
1171 max_end = max_end.max(end);
1172 }
1173 }
1174
1175 let catalog = &toc.segment_catalog;
1177 for seg in &catalog.lex_segments {
1178 if let Some(end) = seg.common.bytes_offset.checked_add(seg.common.bytes_length) {
1179 max_end = max_end.max(end);
1180 }
1181 }
1182 for seg in &catalog.vec_segments {
1183 if let Some(end) = seg.common.bytes_offset.checked_add(seg.common.bytes_length) {
1184 max_end = max_end.max(end);
1185 }
1186 }
1187 for seg in &catalog.time_segments {
1188 if let Some(end) = seg.common.bytes_offset.checked_add(seg.common.bytes_length) {
1189 max_end = max_end.max(end);
1190 }
1191 }
1192 #[cfg(feature = "temporal_track")]
1193 for seg in &catalog.temporal_segments {
1194 if let Some(end) = seg.common.bytes_offset.checked_add(seg.common.bytes_length) {
1195 max_end = max_end.max(end);
1196 }
1197 }
1198 #[cfg(feature = "lex")]
1199 for seg in &catalog.tantivy_segments {
1200 if let Some(end) = seg.common.bytes_offset.checked_add(seg.common.bytes_length) {
1201 max_end = max_end.max(end);
1202 }
1203 }
1204 #[cfg(feature = "parallel_segments")]
1205 for seg in &catalog.index_segments {
1206 if let Some(end) = seg.common.bytes_offset.checked_add(seg.common.bytes_length) {
1207 max_end = max_end.max(end);
1208 }
1209 }
1210
1211 if let Some(manifest) = toc.indexes.lex.as_ref() {
1213 if let Some(end) = manifest.bytes_offset.checked_add(manifest.bytes_length) {
1214 max_end = max_end.max(end);
1215 }
1216 }
1217 if let Some(manifest) = toc.indexes.vec.as_ref() {
1218 if let Some(end) = manifest.bytes_offset.checked_add(manifest.bytes_length) {
1219 max_end = max_end.max(end);
1220 }
1221 }
1222 if let Some(manifest) = toc.indexes.clip.as_ref() {
1223 if let Some(end) = manifest.bytes_offset.checked_add(manifest.bytes_length) {
1224 max_end = max_end.max(end);
1225 }
1226 }
1227 if let Some(manifest) = toc.time_index.as_ref() {
1228 if let Some(end) = manifest.bytes_offset.checked_add(manifest.bytes_length) {
1229 max_end = max_end.max(end);
1230 }
1231 }
1232 #[cfg(feature = "temporal_track")]
1233 if let Some(track) = toc.temporal_track.as_ref() {
1234 if let Some(end) = track.bytes_offset.checked_add(track.bytes_length) {
1235 max_end = max_end.max(end);
1236 }
1237 }
1238 if let Some(track) = toc.memories_track.as_ref() {
1239 if let Some(end) = track.bytes_offset.checked_add(track.bytes_length) {
1240 max_end = max_end.max(end);
1241 }
1242 }
1243 if let Some(mesh) = toc.logic_mesh.as_ref() {
1244 if let Some(end) = mesh.bytes_offset.checked_add(mesh.bytes_length) {
1245 max_end = max_end.max(end);
1246 }
1247 }
1248 if let Some(track) = toc.sketch_track.as_ref() {
1249 if let Some(end) = track.bytes_offset.checked_add(track.bytes_length) {
1250 max_end = max_end.max(end);
1251 }
1252 }
1253 #[cfg(feature = "replay")]
1254 if let Some(manifest) = toc.replay_manifest.as_ref() {
1255 if let Some(end) = manifest.segment_offset.checked_add(manifest.segment_size) {
1256 max_end = max_end.max(end);
1257 }
1258 }
1259
1260 tracing::debug!(
1261 wal_region_end,
1262 footer_offset = header.footer_offset,
1263 computed_data_end = max_end,
1264 "compute_data_end"
1265 );
1266
1267 max_end
1268}
1269
1270struct TailSnapshot {
1271 toc: Toc,
1272 footer_offset: u64,
1273 data_end: u64,
1274 generation: u64,
1275}
1276
1277fn locate_footer_window(mmap: &[u8]) -> Option<(FooterSlice<'_>, usize)> {
1278 const MAX_SEARCH_SIZE: usize = 16 * 1024 * 1024;
1279 if mmap.is_empty() {
1280 return None;
1281 }
1282 let mut window = MAX_SEARCH_SIZE.min(mmap.len());
1283 loop {
1284 let start = mmap.len() - window;
1285 if let Some(slice) = find_last_valid_footer(&mmap[start..]) {
1286 return Some((slice, start));
1287 }
1288 if window == mmap.len() {
1289 break;
1290 }
1291 window = (window * 2).min(mmap.len());
1292 }
1293 None
1294}
1295
1296fn load_tail_snapshot(file: &File) -> Result<TailSnapshot> {
1297 let mmap = unsafe { Mmap::map(file)? };
1299
1300 let (slice, offset_adjustment) =
1301 locate_footer_window(&mmap).ok_or_else(|| MemvidError::InvalidToc {
1302 reason: "no valid commit footer found".into(),
1303 })?;
1304 let toc = Toc::decode(slice.toc_bytes)?;
1305 toc.verify_checksum()?;
1306
1307 Ok(TailSnapshot {
1308 toc,
1309 footer_offset: slice.footer_offset as u64 + offset_adjustment as u64,
1310 data_end: slice.footer_offset as u64 + offset_adjustment as u64,
1312 generation: slice.footer.generation,
1313 })
1314}
1315
1316fn detect_generation(file: &File) -> Result<Option<u64>> {
1317 let mmap = unsafe { Mmap::map(file)? };
1319
1320 Ok(locate_footer_window(&mmap).map(|(slice, _)| slice.footer.generation))
1321}
1322
1323pub(crate) fn ensure_single_file(path: &Path) -> Result<()> {
1324 if let Some(parent) = path.parent() {
1325 let name = path
1326 .file_name()
1327 .and_then(|n| n.to_str())
1328 .unwrap_or_default();
1329 let forbidden = ["-wal", "-shm", "-lock", "-journal"];
1330 for suffix in forbidden {
1331 let candidate = parent.join(format!("{name}{suffix}"));
1332 if candidate.exists() {
1333 return Err(MemvidError::AuxiliaryFileDetected { path: candidate });
1334 }
1335 }
1336 let hidden_forbidden = [".wal", ".shm", ".lock", ".journal"];
1337 for suffix in hidden_forbidden {
1338 let candidate = parent.join(format!(".{name}{suffix}"));
1339 if candidate.exists() {
1340 return Err(MemvidError::AuxiliaryFileDetected { path: candidate });
1341 }
1342 }
1343 }
1344 Ok(())
1345}
1346
1347#[cfg(feature = "parallel_segments")]
1348fn manifest_wal_path(path: &Path) -> PathBuf {
1349 let mut wal_path = path.to_path_buf();
1350 wal_path.set_extension("manifest.wal");
1351 wal_path
1352}
1353
1354#[cfg(feature = "parallel_segments")]
1355pub(crate) fn cleanup_manifest_wal_public(path: &Path) {
1356 let wal_path = manifest_wal_path(path);
1357 if wal_path.exists() {
1358 let _ = std::fs::remove_file(&wal_path);
1359 }
1360}
1361
1362pub(crate) fn has_lex_index(toc: &Toc) -> bool {
1365 toc.segment_catalog.lex_enabled
1366 || toc.indexes.lex.is_some()
1367 || !toc.indexes.lex_segments.is_empty()
1368 || !toc.segment_catalog.tantivy_segments.is_empty()
1369}
1370
1371#[cfg(feature = "lex")]
1374pub(crate) fn lex_doc_count(
1375 toc: &Toc,
1376 lex_storage: &crate::search::EmbeddedLexStorage,
1377) -> Option<u64> {
1378 if let Some(manifest) = &toc.indexes.lex {
1380 if manifest.doc_count > 0 {
1381 return Some(manifest.doc_count);
1382 }
1383 }
1384
1385 let storage_count = lex_storage.doc_count();
1387 if storage_count > 0 {
1388 return Some(storage_count);
1389 }
1390
1391 None
1395}
1396
1397#[allow(dead_code)]
1400fn validate_segment_integrity(toc: &Toc, header: &Header, file_len: u64) -> Result<()> {
1401 let data_limit = header.footer_offset;
1402
1403 #[cfg(feature = "replay")]
1407 if let Some(manifest) = toc.replay_manifest.as_ref() {
1408 if manifest.segment_size != 0 {
1409 let end = manifest
1410 .segment_offset
1411 .checked_add(manifest.segment_size)
1412 .ok_or_else(|| MemvidError::Doctor {
1413 reason: format!(
1414 "Replay segment offset overflow: {} + {}",
1415 manifest.segment_offset, manifest.segment_size
1416 ),
1417 })?;
1418
1419 if end > file_len {
1422 return Err(MemvidError::Doctor {
1423 reason: format!(
1424 "Replay segment out of bounds: offset={}, length={}, end={}, file_len={}",
1425 manifest.segment_offset, manifest.segment_size, end, file_len
1426 ),
1427 });
1428 }
1429 }
1430 }
1431
1432 for (idx, seg) in toc.segment_catalog.tantivy_segments.iter().enumerate() {
1434 let offset = seg.common.bytes_offset;
1435 let length = seg.common.bytes_length;
1436
1437 if length == 0 {
1438 continue; }
1440
1441 let end = offset
1442 .checked_add(length)
1443 .ok_or_else(|| MemvidError::Doctor {
1444 reason: format!(
1445 "Tantivy segment {} offset overflow: {} + {}",
1446 idx, offset, length
1447 ),
1448 })?;
1449
1450 if end > file_len || end > data_limit {
1451 return Err(MemvidError::Doctor {
1452 reason: format!(
1453 "Tantivy segment {} out of bounds: offset={}, length={}, end={}, file_len={}, data_limit={}",
1454 idx, offset, length, end, file_len, data_limit
1455 ),
1456 });
1457 }
1458 }
1459
1460 for (idx, seg) in toc.segment_catalog.time_segments.iter().enumerate() {
1462 let offset = seg.common.bytes_offset;
1463 let length = seg.common.bytes_length;
1464
1465 if length == 0 {
1466 continue;
1467 }
1468
1469 let end = offset
1470 .checked_add(length)
1471 .ok_or_else(|| MemvidError::Doctor {
1472 reason: format!(
1473 "Time segment {} offset overflow: {} + {}",
1474 idx, offset, length
1475 ),
1476 })?;
1477
1478 if end > file_len || end > data_limit {
1479 return Err(MemvidError::Doctor {
1480 reason: format!(
1481 "Time segment {} out of bounds: offset={}, length={}, end={}, file_len={}, data_limit={}",
1482 idx, offset, length, end, file_len, data_limit
1483 ),
1484 });
1485 }
1486 }
1487
1488 for (idx, seg) in toc.segment_catalog.vec_segments.iter().enumerate() {
1490 let offset = seg.common.bytes_offset;
1491 let length = seg.common.bytes_length;
1492
1493 if length == 0 {
1494 continue;
1495 }
1496
1497 let end = offset
1498 .checked_add(length)
1499 .ok_or_else(|| MemvidError::Doctor {
1500 reason: format!(
1501 "Vec segment {} offset overflow: {} + {}",
1502 idx, offset, length
1503 ),
1504 })?;
1505
1506 if end > file_len || end > data_limit {
1507 return Err(MemvidError::Doctor {
1508 reason: format!(
1509 "Vec segment {} out of bounds: offset={}, length={}, end={}, file_len={}, data_limit={}",
1510 idx, offset, length, end, file_len, data_limit
1511 ),
1512 });
1513 }
1514 }
1515
1516 log::debug!("✓ Segment integrity validation passed");
1517 Ok(())
1518}
1519
1520#[cfg(test)]
1521mod tests {
1522 use super::*;
1523 use tempfile::tempdir;
1524
1525 #[test]
1526 fn toc_prefix_underflow_surfaces_reason() {
1527 let err = verify_toc_prefix(&[0u8; 8]).expect_err("should reject short toc prefix");
1528 match err {
1529 MemvidError::InvalidToc { reason } => {
1530 assert!(
1531 reason.contains("trailer too small"),
1532 "unexpected reason: {reason}"
1533 );
1534 }
1535 other => panic!("unexpected error: {other:?}"),
1536 }
1537 }
1538
1539 #[test]
1540 fn ensure_single_file_blocks_sidecars() {
1541 let dir = tempdir().expect("tmp");
1542 let path = dir.path().join("mem.mv2");
1543 std::fs::write(dir.path().join("mem.mv2-wal"), b"junk").expect("sidecar");
1544 let result = Memvid::create(&path);
1545 assert!(matches!(
1546 result,
1547 Err(MemvidError::AuxiliaryFileDetected { .. })
1548 ));
1549 }
1550}