1use std::fs;
10use std::io::{Read, Seek, SeekFrom, Write};
11use std::path::{Path, PathBuf};
12use std::sync::OnceLock;
13
14use crate::decode::{self, DecodeOptions};
15use crate::encode::{self, EncodeOptions};
16use crate::error::{Result, TensogramError};
17use crate::framing;
18use crate::types::{DataObjectDescriptor, DecodedObject, GlobalMetadata};
19
20enum Backend {
23 Local {
24 path: PathBuf,
25 #[cfg(feature = "mmap")]
26 mmap: Option<memmap2::Mmap>,
27 },
28 #[cfg(feature = "remote")]
29 Remote(crate::remote::RemoteBackend),
30}
31
32impl Backend {
33 fn source_string(&self) -> String {
34 match self {
35 Backend::Local { path, .. } => path.display().to_string(),
36 #[cfg(feature = "remote")]
37 Backend::Remote(r) => r.source_url().to_string(),
38 }
39 }
40}
41
42pub struct TensogramFile {
47 backend: Backend,
48 message_offsets: OnceLock<Vec<(usize, usize)>>,
49}
50
51impl TensogramFile {
52 #[tracing::instrument(skip(path), fields(path = %path.as_ref().display()))]
53 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
54 let path = path.as_ref().to_path_buf();
55 if !path.exists() {
56 return Err(TensogramError::Io(std::io::Error::new(
57 std::io::ErrorKind::NotFound,
58 format!("file not found: {}", path.display()),
59 )));
60 }
61 Ok(TensogramFile {
62 backend: Backend::Local {
63 path,
64 #[cfg(feature = "mmap")]
65 mmap: None,
66 },
67 message_offsets: OnceLock::new(),
68 })
69 }
70
71 pub fn open_source(source: impl AsRef<str>) -> Result<Self> {
77 let source = source.as_ref();
78
79 #[cfg(feature = "remote")]
80 if crate::remote::is_remote_url(source) {
81 return Self::open_remote(source, &std::collections::BTreeMap::new());
82 }
83
84 Self::open(source)
85 }
86
87 #[cfg(feature = "remote")]
89 pub fn open_remote(
90 source: &str,
91 storage_options: &std::collections::BTreeMap<String, String>,
92 ) -> Result<Self> {
93 let remote = crate::remote::RemoteBackend::open(source, storage_options)?;
94 Ok(TensogramFile {
95 backend: Backend::Remote(remote),
96 message_offsets: OnceLock::new(),
97 })
98 }
99
100 #[tracing::instrument(skip(path), fields(path = %path.as_ref().display()))]
102 pub fn create(path: impl AsRef<Path>) -> Result<Self> {
103 let path = path.as_ref().to_path_buf();
104 if let Some(parent) = path.parent() {
105 fs::create_dir_all(parent).map_err(|e| {
106 TensogramError::Io(std::io::Error::new(
107 e.kind(),
108 format!("cannot create parent directory for {}: {e}", path.display()),
109 ))
110 })?;
111 }
112 fs::File::create(&path).map_err(|e| {
113 TensogramError::Io(std::io::Error::new(
114 e.kind(),
115 format!("cannot create {}: {e}", path.display()),
116 ))
117 })?;
118 Ok(TensogramFile {
119 backend: Backend::Local {
120 path,
121 #[cfg(feature = "mmap")]
122 mmap: None,
123 },
124 message_offsets: OnceLock::new(),
125 })
126 }
127
128 #[cfg(feature = "mmap")]
133 pub fn open_mmap(path: impl AsRef<Path>) -> Result<Self> {
134 let path = path.as_ref().to_path_buf();
135 let file = fs::File::open(&path).map_err(|e| {
136 TensogramError::Io(std::io::Error::new(
137 e.kind(),
138 format!("{}: {e}", path.display()),
139 ))
140 })?;
141 let mmap = unsafe { memmap2::Mmap::map(&file)? };
146 let offsets = framing::scan(&mmap);
147 Ok(TensogramFile {
148 backend: Backend::Local {
149 path,
150 mmap: Some(mmap),
151 },
152 message_offsets: OnceLock::from(offsets),
153 })
154 }
155
156 fn local_path(&self) -> Result<&PathBuf> {
159 match &self.backend {
160 Backend::Local { path, .. } => Ok(path),
161 #[cfg(feature = "remote")]
162 Backend::Remote(_) => Err(TensogramError::Remote(
163 "operation not supported on remote files".to_string(),
164 )),
165 }
166 }
167
168 fn ensure_scanned(&self) -> Result<()> {
169 if self.message_offsets.get().is_none() {
170 let path = self.local_path()?.clone();
171 let mut file = fs::File::open(&path).map_err(|e| {
172 TensogramError::Io(std::io::Error::new(
173 e.kind(),
174 format!("{}: {e}", path.display()),
175 ))
176 })?;
177 let offsets = framing::scan_file(&mut file)?;
178 let _ = self.message_offsets.set(offsets);
179 }
180 Ok(())
181 }
182
183 fn checked_offsets(&self, index: usize) -> Result<(usize, usize)> {
184 let offsets = self
185 .message_offsets
186 .get()
187 .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?;
188 if index >= offsets.len() {
189 return Err(TensogramError::Framing(format!(
190 "message index {} out of range (count={})",
191 index,
192 offsets.len()
193 )));
194 }
195 Ok(offsets[index])
196 }
197
198 pub fn message_count(&self) -> Result<usize> {
201 #[cfg(feature = "remote")]
202 if let Backend::Remote(remote) = &self.backend {
203 return remote.message_count();
204 }
205 self.ensure_scanned()?;
206 Ok(self
207 .message_offsets
208 .get()
209 .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?
210 .len())
211 }
212
213 pub fn append(
214 &mut self,
215 global_metadata: &GlobalMetadata,
216 descriptors: &[(&DataObjectDescriptor, &[u8])],
217 options: &EncodeOptions,
218 ) -> Result<()> {
219 #[cfg(feature = "mmap")]
220 if let Backend::Local { mmap: Some(_), .. } = &self.backend {
221 return Err(TensogramError::Io(std::io::Error::new(
222 std::io::ErrorKind::Unsupported,
223 "cannot append to a memory-mapped file (open without mmap to append)",
224 )));
225 }
226 let path = self.local_path()?.clone();
227 let msg = encode::encode(global_metadata, descriptors, options)?;
228 let mut file = fs::OpenOptions::new()
229 .create(true)
230 .append(true)
231 .open(&path)
232 .map_err(|e| {
233 TensogramError::Io(std::io::Error::new(
234 e.kind(),
235 format!("{}: {e}", path.display()),
236 ))
237 })?;
238 file.write_all(&msg)?;
239 self.message_offsets = OnceLock::new();
240 Ok(())
241 }
242
243 pub fn read_message(&self, index: usize) -> Result<Vec<u8>> {
244 #[cfg(feature = "remote")]
245 if let Backend::Remote(remote) = &self.backend {
246 return remote.read_message(index);
247 }
248
249 self.ensure_scanned()?;
250
251 let (offset, length) = self.checked_offsets(index)?;
252
253 #[cfg(feature = "mmap")]
254 if let Backend::Local {
255 mmap: Some(mmap), ..
256 } = &self.backend
257 {
258 return Ok(mmap[offset..offset + length].to_vec());
259 }
260
261 match &self.backend {
262 Backend::Local { path, .. } => {
263 let mut file = fs::File::open(path).map_err(|e| {
264 TensogramError::Io(std::io::Error::new(
265 e.kind(),
266 format!("{}: {e}", path.display()),
267 ))
268 })?;
269 file.seek(SeekFrom::Start(offset as u64))?;
270 let mut buf = vec![0u8; length];
271 file.read_exact(&mut buf)?;
272 Ok(buf)
273 }
274 #[cfg(feature = "remote")]
275 Backend::Remote(_) => Err(TensogramError::Remote(
276 "unreachable: remote handled above".to_string(),
277 )),
278 }
279 }
280
281 #[deprecated(note = "Use message_count() + read_message(index) for lazy access")]
282 pub fn messages(&self) -> Result<Vec<Vec<u8>>> {
283 self.ensure_scanned()?;
284 let count = self
285 .message_offsets
286 .get()
287 .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?
288 .len();
289 let mut msgs = Vec::with_capacity(count);
290 for i in 0..count {
291 msgs.push(self.read_message(i)?);
292 }
293 Ok(msgs)
294 }
295
296 pub fn decode_message(
297 &self,
298 index: usize,
299 options: &DecodeOptions,
300 ) -> Result<(GlobalMetadata, Vec<DecodedObject>)> {
301 let msg = self.read_message(index)?;
302 decode::decode(&msg, options)
303 }
304
305 pub fn iter(&self) -> Result<crate::iter::FileMessageIter> {
306 self.ensure_scanned()?;
307 let path = self.local_path()?.clone();
308 let offsets = self
309 .message_offsets
310 .get()
311 .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?
312 .clone();
313 crate::iter::FileMessageIter::new(path, offsets)
314 }
315
316 pub fn path(&self) -> Option<&Path> {
317 match &self.backend {
318 Backend::Local { path, .. } => Some(path),
319 #[cfg(feature = "remote")]
320 Backend::Remote(_) => None,
321 }
322 }
323
324 pub fn source(&self) -> String {
325 self.backend.source_string()
326 }
327
328 pub fn invalidate_offsets(&mut self) {
329 match &self.backend {
330 Backend::Local { .. } => {
331 self.message_offsets = OnceLock::new();
332 }
333 #[cfg(feature = "remote")]
334 Backend::Remote(_) => {}
335 }
336 }
337
338 pub fn decode_metadata(&self, msg_idx: usize) -> Result<GlobalMetadata> {
341 match &self.backend {
342 #[cfg(feature = "remote")]
343 Backend::Remote(remote) => remote.read_metadata(msg_idx),
344 _ => {
345 let msg = self.read_message(msg_idx)?;
346 decode::decode_metadata(&msg)
347 }
348 }
349 }
350
351 pub fn decode_descriptors(
352 &self,
353 msg_idx: usize,
354 ) -> Result<(GlobalMetadata, Vec<DataObjectDescriptor>)> {
355 match &self.backend {
356 #[cfg(feature = "remote")]
357 Backend::Remote(remote) => remote.read_descriptors(msg_idx),
358 _ => {
359 let msg = self.read_message(msg_idx)?;
360 decode::decode_descriptors(&msg)
361 }
362 }
363 }
364
365 pub fn decode_object(
366 &self,
367 msg_idx: usize,
368 obj_idx: usize,
369 options: &DecodeOptions,
370 ) -> Result<(GlobalMetadata, DataObjectDescriptor, Vec<u8>)> {
371 match &self.backend {
372 #[cfg(feature = "remote")]
373 Backend::Remote(remote) => remote.read_object(msg_idx, obj_idx, options),
374 _ => {
375 let msg = self.read_message(msg_idx)?;
376 decode::decode_object(&msg, obj_idx, options)
377 }
378 }
379 }
380
381 #[cfg(feature = "remote")]
382 pub fn decode_range_batch(
383 &self,
384 msg_indices: &[usize],
385 obj_idx: usize,
386 ranges: &[(u64, u64)],
387 options: &DecodeOptions,
388 ) -> Result<Vec<(DataObjectDescriptor, Vec<Vec<u8>>)>> {
389 match &self.backend {
390 Backend::Remote(remote) => {
391 remote.read_range_batch(msg_indices, obj_idx, ranges, options)
392 }
393 _ => Err(TensogramError::Io(std::io::Error::other(
394 "batch range decode requires a remote backend",
395 ))),
396 }
397 }
398
399 #[cfg(feature = "remote")]
400 pub fn decode_object_batch(
401 &self,
402 msg_indices: &[usize],
403 obj_idx: usize,
404 options: &DecodeOptions,
405 ) -> Result<Vec<(GlobalMetadata, DataObjectDescriptor, Vec<u8>)>> {
406 match &self.backend {
407 Backend::Remote(remote) => remote.read_object_batch(msg_indices, obj_idx, options),
408 _ => Err(TensogramError::Io(std::io::Error::other(
409 "batch object decode requires a remote backend",
410 ))),
411 }
412 }
413
414 pub fn decode_range(
415 &self,
416 msg_idx: usize,
417 obj_idx: usize,
418 ranges: &[(u64, u64)],
419 options: &DecodeOptions,
420 ) -> Result<(DataObjectDescriptor, Vec<Vec<u8>>)> {
421 match &self.backend {
422 #[cfg(feature = "remote")]
423 Backend::Remote(remote) => remote.read_range(msg_idx, obj_idx, ranges, options),
424 _ => {
425 let msg = self.read_message(msg_idx)?;
426 decode::decode_range(&msg, obj_idx, ranges, options)
427 }
428 }
429 }
430
431 pub fn is_remote(&self) -> bool {
432 #[cfg(feature = "remote")]
433 if matches!(self.backend, Backend::Remote(_)) {
434 return true;
435 }
436 false
437 }
438
439 #[cfg(feature = "async")]
442 pub async fn open_async(path: impl AsRef<Path>) -> Result<Self> {
443 let path = path.as_ref().to_path_buf();
444 if !path.exists() {
445 return Err(TensogramError::Io(std::io::Error::new(
446 std::io::ErrorKind::NotFound,
447 format!("file not found: {}", path.display()),
448 )));
449 }
450 let p = path.clone();
451 let offsets = tokio::task::spawn_blocking(move || {
452 let mut file = fs::File::open(&p).map_err(|e| {
453 TensogramError::Io(std::io::Error::new(
454 e.kind(),
455 format!("{}: {e}", p.display()),
456 ))
457 })?;
458 framing::scan_file(&mut file)
459 })
460 .await
461 .map_err(|e| TensogramError::Io(std::io::Error::other(e)))??;
462
463 Ok(TensogramFile {
464 backend: Backend::Local {
465 path,
466 #[cfg(feature = "mmap")]
467 mmap: None,
468 },
469 message_offsets: OnceLock::from(offsets),
470 })
471 }
472
473 #[cfg(feature = "async")]
474 pub async fn message_count_async(&self) -> Result<usize> {
475 #[cfg(feature = "remote")]
476 if let Backend::Remote(remote) = &self.backend {
477 return remote.message_count_async().await;
478 }
479
480 if self.message_offsets.get().is_none() {
481 let p = self.local_path()?.clone();
482 let offsets = tokio::task::spawn_blocking(move || {
483 let mut file = fs::File::open(&p)?;
484 framing::scan_file(&mut file)
485 })
486 .await
487 .map_err(|e| TensogramError::Io(std::io::Error::other(e)))??;
488 let _ = self.message_offsets.set(offsets);
489 }
490
491 Ok(self
492 .message_offsets
493 .get()
494 .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?
495 .len())
496 }
497
498 #[cfg(feature = "async")]
499 pub async fn read_message_async(&self, index: usize) -> Result<Vec<u8>> {
500 #[cfg(feature = "remote")]
501 if let Backend::Remote(remote) = &self.backend {
502 return remote.read_message_async(index).await;
503 }
504
505 if self.message_offsets.get().is_none() {
506 let p = self.local_path()?.clone();
507 let offsets = tokio::task::spawn_blocking(move || {
508 let mut file = fs::File::open(&p)?;
509 framing::scan_file(&mut file)
510 })
511 .await
512 .map_err(|e| TensogramError::Io(std::io::Error::other(e)))??;
513 let _ = self.message_offsets.set(offsets);
514 }
515
516 let (offset, length) = self.checked_offsets(index)?;
517
518 let p = self.local_path()?.clone();
519 tokio::task::spawn_blocking(move || {
520 let mut file = fs::File::open(&p)?;
521 file.seek(SeekFrom::Start(offset as u64))?;
522 let mut buf = vec![0u8; length];
523 file.read_exact(&mut buf)?;
524 Ok(buf)
525 })
526 .await
527 .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
528 }
529
530 #[cfg(feature = "async")]
531 pub async fn decode_message_async(
532 &self,
533 index: usize,
534 options: &DecodeOptions,
535 ) -> Result<(GlobalMetadata, Vec<DecodedObject>)> {
536 let msg = self.read_message_async(index).await?;
537 let opts = options.clone();
538 tokio::task::spawn_blocking(move || decode::decode(&msg, &opts))
539 .await
540 .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
541 }
542
543 #[cfg(all(feature = "remote", feature = "async"))]
544 pub async fn open_source_async(source: impl AsRef<str>) -> Result<Self> {
545 let source = source.as_ref();
546
547 if crate::remote::is_remote_url(source) {
548 return Self::open_remote_async(source, &std::collections::BTreeMap::new()).await;
549 }
550
551 Self::open_async(source).await
552 }
553
554 #[cfg(all(feature = "remote", feature = "async"))]
555 pub async fn open_remote_async(
556 source: &str,
557 storage_options: &std::collections::BTreeMap<String, String>,
558 ) -> Result<Self> {
559 let remote = crate::remote::RemoteBackend::open_async(source, storage_options).await?;
560 Ok(TensogramFile {
561 backend: Backend::Remote(remote),
562 message_offsets: OnceLock::new(),
563 })
564 }
565
566 #[cfg(feature = "async")]
567 pub async fn decode_metadata_async(&self, msg_idx: usize) -> Result<GlobalMetadata> {
568 #[cfg(feature = "remote")]
569 if let Backend::Remote(remote) = &self.backend {
570 return remote.read_metadata_async(msg_idx).await;
571 }
572
573 let msg = self.read_message_async(msg_idx).await?;
574 tokio::task::spawn_blocking(move || decode::decode_metadata(&msg))
575 .await
576 .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
577 }
578
579 #[cfg(feature = "async")]
580 pub async fn decode_descriptors_async(
581 &self,
582 msg_idx: usize,
583 ) -> Result<(GlobalMetadata, Vec<DataObjectDescriptor>)> {
584 #[cfg(feature = "remote")]
585 if let Backend::Remote(remote) = &self.backend {
586 return remote.read_descriptors_async(msg_idx).await;
587 }
588
589 let msg = self.read_message_async(msg_idx).await?;
590 tokio::task::spawn_blocking(move || decode::decode_descriptors(&msg))
591 .await
592 .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
593 }
594
595 #[cfg(feature = "async")]
596 pub async fn decode_object_async(
597 &self,
598 msg_idx: usize,
599 obj_idx: usize,
600 options: &DecodeOptions,
601 ) -> Result<(GlobalMetadata, DataObjectDescriptor, Vec<u8>)> {
602 #[cfg(feature = "remote")]
603 if let Backend::Remote(remote) = &self.backend {
604 return remote.read_object_async(msg_idx, obj_idx, options).await;
605 }
606
607 let msg = self.read_message_async(msg_idx).await?;
608 let opts = options.clone();
609 tokio::task::spawn_blocking(move || decode::decode_object(&msg, obj_idx, &opts))
610 .await
611 .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
612 }
613
614 #[cfg(feature = "async")]
615 pub async fn decode_range_async(
616 &self,
617 msg_idx: usize,
618 obj_idx: usize,
619 ranges: &[(u64, u64)],
620 options: &DecodeOptions,
621 ) -> Result<(DataObjectDescriptor, Vec<Vec<u8>>)> {
622 #[cfg(feature = "remote")]
623 if let Backend::Remote(remote) = &self.backend {
624 return remote
625 .read_range_async(msg_idx, obj_idx, ranges, options)
626 .await;
627 }
628
629 let msg = self.read_message_async(msg_idx).await?;
630 let ranges = ranges.to_vec();
631 let opts = options.clone();
632 tokio::task::spawn_blocking(move || decode::decode_range(&msg, obj_idx, &ranges, &opts))
633 .await
634 .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
635 }
636
637 #[cfg(all(feature = "remote", feature = "async"))]
638 pub async fn prefetch_layouts_async(&self, msg_indices: &[usize]) -> Result<()> {
639 if let Backend::Remote(remote) = &self.backend {
640 return remote.ensure_all_layouts_batch_async(msg_indices).await;
641 }
642 Ok(())
643 }
644
645 #[cfg(all(feature = "remote", feature = "async"))]
646 pub async fn decode_object_batch_async(
647 &self,
648 msg_indices: &[usize],
649 obj_idx: usize,
650 options: &DecodeOptions,
651 ) -> Result<Vec<(GlobalMetadata, DataObjectDescriptor, Vec<u8>)>> {
652 if let Backend::Remote(remote) = &self.backend {
653 return remote
654 .read_object_batch_async(msg_indices, obj_idx, options)
655 .await;
656 }
657 Err(TensogramError::Io(std::io::Error::other(
658 "batch object decode requires a remote backend",
659 )))
660 }
661
662 #[cfg(all(feature = "remote", feature = "async"))]
663 pub async fn decode_range_batch_async(
664 &self,
665 msg_indices: &[usize],
666 obj_idx: usize,
667 ranges: &[(u64, u64)],
668 options: &DecodeOptions,
669 ) -> Result<Vec<(DataObjectDescriptor, Vec<Vec<u8>>)>> {
670 if let Backend::Remote(remote) = &self.backend {
671 return remote
672 .read_range_batch_async(msg_indices, obj_idx, ranges, options)
673 .await;
674 }
675 Err(TensogramError::Io(std::io::Error::other(
676 "batch range decode requires a remote backend",
677 )))
678 }
679}
680
681#[cfg(test)]
682mod tests {
683 use super::*;
684 use crate::dtype::Dtype;
685 use crate::types::ByteOrder;
686 use std::collections::BTreeMap;
687
688 fn make_global_meta() -> GlobalMetadata {
689 GlobalMetadata {
690 version: 2,
691 extra: BTreeMap::new(),
692 ..Default::default()
693 }
694 }
695
696 fn make_descriptor(shape: Vec<u64>) -> DataObjectDescriptor {
697 let strides = if shape.is_empty() {
698 vec![]
699 } else {
700 let mut s = vec![1u64; shape.len()];
701 for i in (0..shape.len() - 1).rev() {
702 s[i] = s[i + 1] * shape[i + 1];
703 }
704 s
705 };
706
707 DataObjectDescriptor {
708 obj_type: "ntensor".to_string(),
709 ndim: shape.len() as u64,
710 shape,
711 strides,
712 dtype: Dtype::Float32,
713 byte_order: ByteOrder::native(),
714 encoding: "none".to_string(),
715 filter: "none".to_string(),
716 compression: "none".to_string(),
717 params: BTreeMap::new(),
718 hash: None,
719 }
720 }
721
722 #[test]
723 fn test_file_create_append_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
724 let dir = tempfile::tempdir()?;
725 let path = dir.path().join("test.tgm");
726
727 let mut file = TensogramFile::create(&path)?;
728 let meta = make_global_meta();
729 let desc = make_descriptor(vec![4]);
730 let data = vec![0u8; 16];
731 file.append(
732 &meta,
733 &[(&desc, data.as_slice())],
734 &EncodeOptions::default(),
735 )?;
736 file.append(
737 &meta,
738 &[(&desc, data.as_slice())],
739 &EncodeOptions::default(),
740 )?;
741
742 assert_eq!(file.message_count()?, 2);
743
744 #[allow(deprecated)]
745 let msgs = file.messages()?;
746 assert_eq!(msgs.len(), 2);
747
748 let (decoded_meta, objects) = file.decode_message(0, &DecodeOptions::default())?;
749 assert_eq!(decoded_meta.version, 2);
750 assert_eq!(objects.len(), 1);
751 assert_eq!(objects[0].1, data);
752 Ok(())
753 }
754
755 #[test]
756 fn test_file_lazy_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
757 let dir = tempfile::tempdir()?;
758 let path = dir.path().join("lazy.tgm");
759
760 let mut file = TensogramFile::create(&path)?;
761 let meta = make_global_meta();
762 let desc = make_descriptor(vec![4]);
763
764 let data0 = vec![0u8; 16];
765 let data1 = vec![1u8; 16];
766 let data2 = vec![2u8; 16];
767
768 file.append(
769 &meta,
770 &[(&desc, data0.as_slice())],
771 &EncodeOptions::default(),
772 )?;
773 file.append(
774 &meta,
775 &[(&desc, data1.as_slice())],
776 &EncodeOptions::default(),
777 )?;
778 file.append(
779 &meta,
780 &[(&desc, data2.as_slice())],
781 &EncodeOptions::default(),
782 )?;
783
784 assert_eq!(file.message_count()?, 3);
785
786 let (_, obj1) = file.decode_message(1, &DecodeOptions::default())?;
787 assert_eq!(obj1[0].1, data1);
788
789 let (_, obj0) = file.decode_message(0, &DecodeOptions::default())?;
790 assert_eq!(obj0[0].1, data0);
791
792 let (_, obj2) = file.decode_message(2, &DecodeOptions::default())?;
793 assert_eq!(obj2[0].1, data2);
794 Ok(())
795 }
796
797 #[test]
798 fn test_file_decode_metadata() -> std::result::Result<(), Box<dyn std::error::Error>> {
799 let dir = tempfile::tempdir()?;
800 let path = dir.path().join("meta.tgm");
801
802 let mut file = TensogramFile::create(&path)?;
803 let meta = make_global_meta();
804 let desc = make_descriptor(vec![4]);
805 let data = vec![0u8; 16];
806 file.append(
807 &meta,
808 &[(&desc, data.as_slice())],
809 &EncodeOptions::default(),
810 )?;
811
812 let decoded_meta = file.decode_metadata(0)?;
813 assert_eq!(decoded_meta.version, 2);
814 Ok(())
815 }
816
817 #[test]
818 fn test_file_decode_descriptors() -> std::result::Result<(), Box<dyn std::error::Error>> {
819 let dir = tempfile::tempdir()?;
820 let path = dir.path().join("descs.tgm");
821
822 let mut file = TensogramFile::create(&path)?;
823 let meta = make_global_meta();
824 let desc = make_descriptor(vec![4]);
825 let data = vec![0u8; 16];
826 file.append(
827 &meta,
828 &[(&desc, data.as_slice())],
829 &EncodeOptions::default(),
830 )?;
831
832 let (decoded_meta, descriptors) = file.decode_descriptors(0)?;
833 assert_eq!(decoded_meta.version, 2);
834 assert_eq!(descriptors.len(), 1);
835 assert_eq!(descriptors[0].shape, vec![4]);
836 Ok(())
837 }
838
839 #[test]
840 fn test_file_decode_object() -> std::result::Result<(), Box<dyn std::error::Error>> {
841 let dir = tempfile::tempdir()?;
842 let path = dir.path().join("obj.tgm");
843
844 let mut file = TensogramFile::create(&path)?;
845 let meta = make_global_meta();
846 let desc = make_descriptor(vec![4]);
847 let data = vec![42u8; 16];
848 file.append(
849 &meta,
850 &[(&desc, data.as_slice())],
851 &EncodeOptions::default(),
852 )?;
853
854 let (decoded_meta, decoded_desc, decoded_data) =
855 file.decode_object(0, 0, &DecodeOptions::default())?;
856 assert_eq!(decoded_meta.version, 2);
857 assert_eq!(decoded_desc.shape, vec![4]);
858 assert_eq!(decoded_data, data);
859 Ok(())
860 }
861
862 #[cfg(feature = "mmap")]
865 #[test]
866 fn test_mmap_open_and_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
867 let dir = tempfile::tempdir()?;
868 let path = dir.path().join("mmap.tgm");
869
870 let mut file = TensogramFile::create(&path)?;
871 let meta = make_global_meta();
872 let desc = make_descriptor(vec![4]);
873 let data0 = vec![10u8; 16];
874 let data1 = vec![20u8; 16];
875 file.append(
876 &meta,
877 &[(&desc, data0.as_slice())],
878 &EncodeOptions::default(),
879 )?;
880 file.append(
881 &meta,
882 &[(&desc, data1.as_slice())],
883 &EncodeOptions::default(),
884 )?;
885
886 let mmap_file = TensogramFile::open_mmap(&path)?;
887 assert_eq!(mmap_file.message_count()?, 2);
888
889 let (decoded_meta, objects) = mmap_file.decode_message(0, &DecodeOptions::default())?;
890 assert_eq!(decoded_meta.version, 2);
891 assert_eq!(objects[0].1, data0);
892
893 let (_, objects1) = mmap_file.decode_message(1, &DecodeOptions::default())?;
894 assert_eq!(objects1[0].1, data1);
895 Ok(())
896 }
897
898 #[cfg(feature = "mmap")]
899 #[test]
900 fn test_mmap_matches_regular_open() -> std::result::Result<(), Box<dyn std::error::Error>> {
901 let dir = tempfile::tempdir()?;
902 let path = dir.path().join("mmap_vs_regular.tgm");
903
904 let mut file = TensogramFile::create(&path)?;
905 let meta = make_global_meta();
906 let desc = make_descriptor(vec![4]);
907 let data = vec![42u8; 16];
908 file.append(
909 &meta,
910 &[(&desc, data.as_slice())],
911 &EncodeOptions::default(),
912 )?;
913
914 let regular = TensogramFile::open(&path)?;
915 let regular_msg = regular.read_message(0)?;
916
917 let mmap = TensogramFile::open_mmap(&path)?;
918 let mmap_msg = mmap.read_message(0)?;
919
920 assert_eq!(regular_msg, mmap_msg);
921 Ok(())
922 }
923
924 #[test]
925 fn test_file_iter_via_tensogram_file() -> std::result::Result<(), Box<dyn std::error::Error>> {
926 let dir = tempfile::tempdir()?;
927 let path = dir.path().join("iter.tgm");
928
929 let mut file = TensogramFile::create(&path)?;
930 let meta = make_global_meta();
931 let desc = make_descriptor(vec![4]);
932
933 let data0 = vec![0u8; 16];
934 let data1 = vec![1u8; 16];
935 let data2 = vec![2u8; 16];
936
937 file.append(
938 &meta,
939 &[(&desc, data0.as_slice())],
940 &EncodeOptions::default(),
941 )?;
942 file.append(
943 &meta,
944 &[(&desc, data1.as_slice())],
945 &EncodeOptions::default(),
946 )?;
947 file.append(
948 &meta,
949 &[(&desc, data2.as_slice())],
950 &EncodeOptions::default(),
951 )?;
952
953 let raw_messages: Vec<Vec<u8>> =
954 file.iter()?.collect::<std::result::Result<Vec<_>, _>>()?;
955 assert_eq!(raw_messages.len(), 3);
956
957 for (i, raw) in raw_messages.iter().enumerate() {
958 let (_, objects) = crate::decode::decode(raw, &DecodeOptions::default())?;
959 assert_eq!(objects[0].1, vec![i as u8; 16]);
960 }
961 Ok(())
962 }
963
964 #[cfg(feature = "async")]
967 #[tokio::test]
968 async fn test_async_open_and_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
969 let dir = tempfile::tempdir()?;
970 let path = dir.path().join("async.tgm");
971
972 let mut file = TensogramFile::create(&path)?;
973 let meta = make_global_meta();
974 let desc = make_descriptor(vec![4]);
975 let data0 = vec![10u8; 16];
976 let data1 = vec![20u8; 16];
977 file.append(
978 &meta,
979 &[(&desc, data0.as_slice())],
980 &EncodeOptions::default(),
981 )?;
982 file.append(
983 &meta,
984 &[(&desc, data1.as_slice())],
985 &EncodeOptions::default(),
986 )?;
987
988 let async_file = TensogramFile::open_async(&path).await?;
989 assert_eq!(async_file.message_count()?, 2);
990
991 let msg0 = async_file.read_message_async(0).await?;
992 let (meta0, objects0) = crate::decode::decode(&msg0, &DecodeOptions::default())?;
993 assert_eq!(meta0.version, 2);
994 assert_eq!(objects0[0].1, data0);
995
996 let (_, objects1) = async_file
997 .decode_message_async(1, &DecodeOptions::default())
998 .await?;
999 assert_eq!(objects1[0].1, data1);
1000 Ok(())
1001 }
1002
1003 #[cfg(feature = "async")]
1004 #[tokio::test]
1005 async fn test_async_matches_sync() -> std::result::Result<(), Box<dyn std::error::Error>> {
1006 let dir = tempfile::tempdir()?;
1007 let path = dir.path().join("async_vs_sync.tgm");
1008
1009 let mut file = TensogramFile::create(&path)?;
1010 let meta = make_global_meta();
1011 let desc = make_descriptor(vec![4]);
1012 let data = vec![42u8; 16];
1013 file.append(
1014 &meta,
1015 &[(&desc, data.as_slice())],
1016 &EncodeOptions::default(),
1017 )?;
1018
1019 let sync_file = TensogramFile::open(&path)?;
1020 let sync_msg = sync_file.read_message(0)?;
1021
1022 let async_file = TensogramFile::open_async(&path).await?;
1023 let async_msg = async_file.read_message_async(0).await?;
1024
1025 assert_eq!(sync_msg, async_msg);
1026 Ok(())
1027 }
1028
1029 #[test]
1032 fn test_local_decode_range_valid() -> std::result::Result<(), Box<dyn std::error::Error>> {
1033 let dir = tempfile::tempdir()?;
1034 let path = dir.path().join("range.tgm");
1035
1036 let mut file = TensogramFile::create(&path)?;
1037 let meta = make_global_meta();
1038 let desc = make_descriptor(vec![10]);
1039 let data: Vec<u8> = (0..40).collect(); file.append(
1041 &meta,
1042 &[(&desc, data.as_slice())],
1043 &EncodeOptions::default(),
1044 )?;
1045
1046 let (ret_desc, parts) = file.decode_range(0, 0, &[(2, 3)], &DecodeOptions::default())?;
1048 assert_eq!(ret_desc.shape, vec![10]);
1049 assert_eq!(parts.len(), 1);
1050 assert_eq!(parts[0].len(), 3 * 4); Ok(())
1052 }
1053
1054 #[test]
1055 fn test_local_decode_range_multiple_ranges()
1056 -> std::result::Result<(), Box<dyn std::error::Error>> {
1057 let dir = tempfile::tempdir()?;
1058 let path = dir.path().join("range_multi.tgm");
1059
1060 let mut file = TensogramFile::create(&path)?;
1061 let meta = make_global_meta();
1062 let desc = make_descriptor(vec![16]);
1063 let data: Vec<u8> = (0..64).collect(); file.append(
1065 &meta,
1066 &[(&desc, data.as_slice())],
1067 &EncodeOptions::default(),
1068 )?;
1069
1070 let ranges = vec![(0u64, 4u64), (8u64, 4u64)];
1071 let (_, parts) = file.decode_range(0, 0, &ranges, &DecodeOptions::default())?;
1072 assert_eq!(parts.len(), 2);
1073 assert_eq!(parts[0].len(), 4 * 4);
1074 assert_eq!(parts[1].len(), 4 * 4);
1075 Ok(())
1076 }
1077
1078 #[test]
1079 fn test_local_decode_range_invalid_object_index()
1080 -> std::result::Result<(), Box<dyn std::error::Error>> {
1081 let dir = tempfile::tempdir()?;
1082 let path = dir.path().join("range_bad_obj.tgm");
1083
1084 let mut file = TensogramFile::create(&path)?;
1085 let meta = make_global_meta();
1086 let desc = make_descriptor(vec![4]);
1087 let data = vec![0u8; 16];
1088 file.append(
1089 &meta,
1090 &[(&desc, data.as_slice())],
1091 &EncodeOptions::default(),
1092 )?;
1093
1094 let result = file.decode_range(0, 5, &[(0, 1)], &DecodeOptions::default());
1095 assert!(result.is_err());
1096 let msg = result.unwrap_err().to_string();
1097 assert!(
1098 msg.contains("out of range"),
1099 "expected 'out of range', got: {msg}"
1100 );
1101 Ok(())
1102 }
1103
1104 #[test]
1105 fn test_local_decode_range_invalid_message_index()
1106 -> std::result::Result<(), Box<dyn std::error::Error>> {
1107 let dir = tempfile::tempdir()?;
1108 let path = dir.path().join("range_bad_msg.tgm");
1109
1110 let mut file = TensogramFile::create(&path)?;
1111 let meta = make_global_meta();
1112 let desc = make_descriptor(vec![4]);
1113 let data = vec![0u8; 16];
1114 file.append(
1115 &meta,
1116 &[(&desc, data.as_slice())],
1117 &EncodeOptions::default(),
1118 )?;
1119
1120 let result = file.decode_range(5, 0, &[(0, 1)], &DecodeOptions::default());
1121 assert!(result.is_err());
1122 Ok(())
1123 }
1124
1125 #[cfg(feature = "mmap")]
1128 #[test]
1129 fn test_mmap_append_returns_unsupported() -> std::result::Result<(), Box<dyn std::error::Error>>
1130 {
1131 let dir = tempfile::tempdir()?;
1132 let path = dir.path().join("mmap_append.tgm");
1133
1134 {
1136 let mut file = TensogramFile::create(&path)?;
1137 let meta = make_global_meta();
1138 let desc = make_descriptor(vec![4]);
1139 let data = vec![0u8; 16];
1140 file.append(
1141 &meta,
1142 &[(&desc, data.as_slice())],
1143 &EncodeOptions::default(),
1144 )?;
1145 }
1146
1147 let mut mmap_file = TensogramFile::open_mmap(&path)?;
1149 let meta = make_global_meta();
1150 let desc = make_descriptor(vec![4]);
1151 let data = vec![0u8; 16];
1152 let result = mmap_file.append(
1153 &meta,
1154 &[(&desc, data.as_slice())],
1155 &EncodeOptions::default(),
1156 );
1157 match result {
1158 Ok(_) => panic!("expected error for append on mmap file"),
1159 Err(e) => {
1160 let err_msg = e.to_string();
1161 assert!(
1162 err_msg.contains("memory-mapped") || err_msg.contains("mmap"),
1163 "expected mmap-related error, got: {err_msg}"
1164 );
1165 }
1166 }
1167 Ok(())
1168 }
1169
1170 #[cfg(feature = "async")]
1173 #[tokio::test]
1174 async fn test_async_open_nonexistent_file()
1175 -> std::result::Result<(), Box<dyn std::error::Error>> {
1176 let result = TensogramFile::open_async("/tmp/nonexistent_tensogram_file_12345.tgm").await;
1177 match result {
1178 Ok(_) => panic!("expected error for nonexistent file"),
1179 Err(e) => {
1180 let err_msg = e.to_string();
1181 assert!(
1182 err_msg.contains("not found") || err_msg.contains("NotFound"),
1183 "expected not-found error, got: {err_msg}"
1184 );
1185 }
1186 }
1187 Ok(())
1188 }
1189
1190 #[cfg(feature = "async")]
1191 #[tokio::test]
1192 async fn test_async_message_index_out_of_range()
1193 -> std::result::Result<(), Box<dyn std::error::Error>> {
1194 let dir = tempfile::tempdir()?;
1195 let path = dir.path().join("async_oor.tgm");
1196
1197 let mut file = TensogramFile::create(&path)?;
1198 let meta = make_global_meta();
1199 let desc = make_descriptor(vec![4]);
1200 let data = vec![0u8; 16];
1201 file.append(
1202 &meta,
1203 &[(&desc, data.as_slice())],
1204 &EncodeOptions::default(),
1205 )?;
1206
1207 let async_file = TensogramFile::open_async(&path).await?;
1208 let result = async_file.read_message_async(5).await;
1209 assert!(result.is_err());
1210 let err_msg = result.unwrap_err().to_string();
1211 assert!(
1212 err_msg.contains("out of range"),
1213 "expected 'out of range', got: {err_msg}"
1214 );
1215 Ok(())
1216 }
1217
1218 #[cfg(feature = "async")]
1219 #[tokio::test]
1220 async fn test_async_decode_metadata_out_of_range()
1221 -> std::result::Result<(), Box<dyn std::error::Error>> {
1222 let dir = tempfile::tempdir()?;
1223 let path = dir.path().join("async_meta_oor.tgm");
1224
1225 let mut file = TensogramFile::create(&path)?;
1226 let meta = make_global_meta();
1227 let desc = make_descriptor(vec![4]);
1228 let data = vec![0u8; 16];
1229 file.append(
1230 &meta,
1231 &[(&desc, data.as_slice())],
1232 &EncodeOptions::default(),
1233 )?;
1234
1235 let async_file = TensogramFile::open_async(&path).await?;
1236 let result = async_file.decode_metadata_async(10).await;
1237 assert!(result.is_err());
1238 Ok(())
1239 }
1240
1241 #[cfg(feature = "async")]
1242 #[tokio::test]
1243 async fn test_async_decode_descriptors() -> std::result::Result<(), Box<dyn std::error::Error>>
1244 {
1245 let dir = tempfile::tempdir()?;
1246 let path = dir.path().join("async_descs.tgm");
1247
1248 let mut file = TensogramFile::create(&path)?;
1249 let meta = make_global_meta();
1250 let desc = make_descriptor(vec![4]);
1251 let data = vec![0u8; 16];
1252 file.append(
1253 &meta,
1254 &[(&desc, data.as_slice())],
1255 &EncodeOptions::default(),
1256 )?;
1257
1258 let async_file = TensogramFile::open_async(&path).await?;
1259 let (decoded_meta, descriptors) = async_file.decode_descriptors_async(0).await?;
1260 assert_eq!(decoded_meta.version, 2);
1261 assert_eq!(descriptors.len(), 1);
1262 assert_eq!(descriptors[0].shape, vec![4]);
1263 Ok(())
1264 }
1265
1266 #[cfg(feature = "async")]
1267 #[tokio::test]
1268 async fn test_async_decode_object() -> std::result::Result<(), Box<dyn std::error::Error>> {
1269 let dir = tempfile::tempdir()?;
1270 let path = dir.path().join("async_obj.tgm");
1271
1272 let mut file = TensogramFile::create(&path)?;
1273 let meta = make_global_meta();
1274 let desc = make_descriptor(vec![4]);
1275 let data = vec![42u8; 16];
1276 file.append(
1277 &meta,
1278 &[(&desc, data.as_slice())],
1279 &EncodeOptions::default(),
1280 )?;
1281
1282 let async_file = TensogramFile::open_async(&path).await?;
1283 let (decoded_meta, decoded_desc, decoded_data) = async_file
1284 .decode_object_async(0, 0, &DecodeOptions::default())
1285 .await?;
1286 assert_eq!(decoded_meta.version, 2);
1287 assert_eq!(decoded_desc.shape, vec![4]);
1288 assert_eq!(decoded_data, data);
1289 Ok(())
1290 }
1291
1292 #[test]
1295 fn test_open_source_local_path() -> std::result::Result<(), Box<dyn std::error::Error>> {
1296 let dir = tempfile::tempdir()?;
1297 let path = dir.path().join("local_source.tgm");
1298
1299 let mut file = TensogramFile::create(&path)?;
1300 let meta = make_global_meta();
1301 let desc = make_descriptor(vec![4]);
1302 let data = vec![0u8; 16];
1303 file.append(
1304 &meta,
1305 &[(&desc, data.as_slice())],
1306 &EncodeOptions::default(),
1307 )?;
1308
1309 let path_str = path.to_str().unwrap();
1310 let opened = TensogramFile::open_source(path_str)?;
1311 assert!(!opened.is_remote());
1312 assert_eq!(opened.message_count()?, 1);
1313 Ok(())
1314 }
1315
1316 #[test]
1319 fn test_path_returns_some_for_local() -> std::result::Result<(), Box<dyn std::error::Error>> {
1320 let dir = tempfile::tempdir()?;
1321 let path = dir.path().join("path_test.tgm");
1322
1323 let mut file = TensogramFile::create(&path)?;
1324 let meta = make_global_meta();
1325 let desc = make_descriptor(vec![4]);
1326 let data = vec![0u8; 16];
1327 file.append(
1328 &meta,
1329 &[(&desc, data.as_slice())],
1330 &EncodeOptions::default(),
1331 )?;
1332
1333 let opened = TensogramFile::open(&path)?;
1334 assert!(opened.path().is_some());
1335 assert_eq!(opened.path().unwrap(), path.as_path());
1336 Ok(())
1337 }
1338
1339 #[test]
1340 fn test_source_returns_path_string_for_local()
1341 -> std::result::Result<(), Box<dyn std::error::Error>> {
1342 let dir = tempfile::tempdir()?;
1343 let path = dir.path().join("source_test.tgm");
1344
1345 let mut file = TensogramFile::create(&path)?;
1346 let meta = make_global_meta();
1347 let desc = make_descriptor(vec![4]);
1348 let data = vec![0u8; 16];
1349 file.append(
1350 &meta,
1351 &[(&desc, data.as_slice())],
1352 &EncodeOptions::default(),
1353 )?;
1354
1355 let opened = TensogramFile::open(&path)?;
1356 let source = opened.source();
1357 assert!(
1358 source.contains("source_test.tgm"),
1359 "source() should contain the filename, got: {source}"
1360 );
1361 Ok(())
1362 }
1363
1364 #[test]
1367 fn test_open_nonexistent_file() {
1368 let result = TensogramFile::open("/tmp/nonexistent_tensogram_9999.tgm");
1369 match result {
1370 Ok(_) => panic!("expected error for nonexistent file"),
1371 Err(e) => {
1372 let err_msg = e.to_string();
1373 assert!(
1374 err_msg.contains("not found"),
1375 "expected 'not found', got: {err_msg}"
1376 );
1377 }
1378 }
1379 }
1380
1381 #[test]
1384 fn test_create_in_nested_path_creates_parent()
1385 -> std::result::Result<(), Box<dyn std::error::Error>> {
1386 let dir = tempfile::tempdir()?;
1388 let deep_path = dir.path().join("a").join("b").join("c").join("deep.tgm");
1389 let mut file = TensogramFile::create(&deep_path)?;
1390 let meta = make_global_meta();
1391 let desc = make_descriptor(vec![2]);
1392 let data = vec![0u8; 8];
1393 file.append(
1394 &meta,
1395 &[(&desc, data.as_slice())],
1396 &EncodeOptions::default(),
1397 )?;
1398 assert_eq!(file.message_count()?, 1);
1399 assert!(deep_path.exists());
1400 Ok(())
1401 }
1402
1403 #[cfg(unix)]
1413 #[test]
1414 fn test_create_in_nonwritable_location_returns_io_error()
1415 -> std::result::Result<(), Box<dyn std::error::Error>> {
1416 use std::os::unix::fs::PermissionsExt;
1417
1418 let dir = tempfile::tempdir()?;
1419 let dir_path = dir.path().to_path_buf();
1420
1421 let original = std::fs::metadata(&dir_path)?.permissions();
1423 let mut readonly = original.clone();
1424 readonly.set_mode(0o555);
1425 std::fs::set_permissions(&dir_path, readonly)?;
1426
1427 let probe_path = dir_path.join(".perm_probe");
1431 let probe_result = std::fs::File::create(&probe_path);
1432 if probe_result.is_ok() {
1433 let _ = std::fs::remove_file(&probe_path);
1435 std::fs::set_permissions(&dir_path, original)?;
1436 return Ok(());
1437 }
1438 drop(probe_result);
1439
1440 let target = dir_path.join("nope.tgm");
1441 let result = TensogramFile::create(&target);
1442
1443 std::fs::set_permissions(&dir_path, original)?;
1445
1446 let msg = match result {
1448 Ok(_) => panic!("expected Io error creating in non-writable dir, got Ok"),
1449 Err(e) => e.to_string(),
1450 };
1451 assert!(
1452 msg.contains("cannot create")
1453 || msg.contains("permission")
1454 || msg.contains("read-only"),
1455 "expected permission-related error, got: {msg}"
1456 );
1457 Ok(())
1458 }
1459
1460 #[test]
1461 fn test_read_message_from_deleted_file_errors()
1462 -> std::result::Result<(), Box<dyn std::error::Error>> {
1463 let dir = tempfile::tempdir()?;
1468 let path = dir.path().join("deleted.tgm");
1469
1470 {
1473 let mut writer = TensogramFile::create(&path)?;
1474 let meta = make_global_meta();
1475 let desc = make_descriptor(vec![2]);
1476 writer.append(
1477 &meta,
1478 &[(&desc, vec![0u8; 8].as_slice())],
1479 &EncodeOptions::default(),
1480 )?;
1481 }
1482
1483 let reader = TensogramFile::open(&path)?;
1487 assert_eq!(reader.message_count()?, 1);
1488
1489 std::fs::remove_file(&path)?;
1490
1491 let read_result = reader.read_message(0);
1493 assert!(
1494 read_result.is_err(),
1495 "expected read_message to fail after underlying file was deleted, got Ok"
1496 );
1497 let err_msg = read_result.unwrap_err().to_string();
1498 assert!(
1499 err_msg.contains("not found")
1500 || err_msg.contains("No such")
1501 || err_msg.contains("cannot"),
1502 "expected I/O error mentioning missing file, got: {err_msg}"
1503 );
1504
1505 assert!(TensogramFile::open(&path).is_err());
1507 Ok(())
1508 }
1509
1510 #[test]
1515 fn test_append_empty_message() -> std::result::Result<(), Box<dyn std::error::Error>> {
1516 let dir = tempfile::tempdir()?;
1517 let path = dir.path().join("empty_append.tgm");
1518 let mut file = TensogramFile::create(&path)?;
1519 let meta = make_global_meta();
1520 file.append(&meta, &[], &EncodeOptions::default())?;
1521 assert_eq!(file.message_count()?, 1);
1522 let (decoded_meta, objects) = file.decode_message(0, &DecodeOptions::default())?;
1524 assert_eq!(decoded_meta.version, 2);
1525 assert_eq!(objects.len(), 0);
1526 Ok(())
1527 }
1528
1529 #[test]
1530 fn test_file_iter_after_modification() -> std::result::Result<(), Box<dyn std::error::Error>> {
1531 let dir = tempfile::tempdir()?;
1532 let path = dir.path().join("modified.tgm");
1533 let mut file = TensogramFile::create(&path)?;
1534 let meta = make_global_meta();
1535 let desc = make_descriptor(vec![2]);
1536 let data = vec![0u8; 8];
1537 for _ in 0..3 {
1538 file.append(
1539 &meta,
1540 &[(&desc, data.as_slice())],
1541 &EncodeOptions::default(),
1542 )?;
1543 }
1544 assert_eq!(file.message_count()?, 3);
1545 drop(file);
1546
1547 let reopened = TensogramFile::open(&path)?;
1549 assert_eq!(reopened.message_count()?, 3);
1550 Ok(())
1551 }
1552
1553 #[test]
1554 fn test_decode_message_out_of_range_clearly_errors()
1555 -> std::result::Result<(), Box<dyn std::error::Error>> {
1556 let dir = tempfile::tempdir()?;
1557 let path = dir.path().join("oor.tgm");
1558 let mut file = TensogramFile::create(&path)?;
1559 let meta = make_global_meta();
1560 let desc = make_descriptor(vec![2]);
1561 file.append(
1562 &meta,
1563 &[(&desc, vec![0u8; 8].as_slice())],
1564 &EncodeOptions::default(),
1565 )?;
1566 let result = file.decode_message(99, &DecodeOptions::default());
1567 assert!(result.is_err());
1568 Ok(())
1569 }
1570
1571 #[test]
1574 fn test_invalidate_offsets_forces_rescan() -> std::result::Result<(), Box<dyn std::error::Error>>
1575 {
1576 let dir = tempfile::tempdir()?;
1577 let path = dir.path().join("invalidate.tgm");
1578
1579 let mut file = TensogramFile::create(&path)?;
1580 let meta = make_global_meta();
1581 let desc = make_descriptor(vec![4]);
1582 let data = vec![0u8; 16];
1583 file.append(
1584 &meta,
1585 &[(&desc, data.as_slice())],
1586 &EncodeOptions::default(),
1587 )?;
1588
1589 assert_eq!(file.message_count()?, 1);
1590
1591 file.append(
1593 &meta,
1594 &[(&desc, data.as_slice())],
1595 &EncodeOptions::default(),
1596 )?;
1597 assert_eq!(file.message_count()?, 2);
1599 Ok(())
1600 }
1601
1602 #[test]
1605 fn test_open_source_local_path_source_and_path_accessors()
1606 -> std::result::Result<(), Box<dyn std::error::Error>> {
1607 let dir = tempfile::tempdir()?;
1608 let path = dir.path().join("open_src_acc.tgm");
1609
1610 {
1612 let mut file = TensogramFile::create(&path)?;
1613 let meta = make_global_meta();
1614 let desc = make_descriptor(vec![4]);
1615 let data = vec![7u8; 16];
1616 file.append(
1617 &meta,
1618 &[(&desc, data.as_slice())],
1619 &EncodeOptions::default(),
1620 )?;
1621 }
1622
1623 let path_str = path.to_str().unwrap();
1624 let opened = TensogramFile::open_source(path_str)?;
1625
1626 assert!(
1628 opened.source().contains("open_src_acc.tgm"),
1629 "source() = {}",
1630 opened.source()
1631 );
1632 assert_eq!(opened.path().unwrap(), path.as_path());
1634 assert!(!opened.is_remote());
1636 let count = opened.message_count()?;
1638 assert_eq!(count, 1);
1639 Ok(())
1640 }
1641
1642 #[test]
1645 fn test_open_source_nonexistent() {
1646 let result = TensogramFile::open_source("/tmp/no_such_file_tensogram_abc.tgm");
1647 match result {
1648 Ok(_) => panic!("expected error for nonexistent file"),
1649 Err(e) => {
1650 let msg = e.to_string();
1651 assert!(
1652 msg.contains("not found"),
1653 "expected 'not found', got: {msg}"
1654 );
1655 }
1656 }
1657 }
1658
1659 #[test]
1662 fn test_read_message_index_out_of_range() -> std::result::Result<(), Box<dyn std::error::Error>>
1663 {
1664 let dir = tempfile::tempdir()?;
1665 let path = dir.path().join("oor.tgm");
1666
1667 let mut file = TensogramFile::create(&path)?;
1668 let meta = make_global_meta();
1669 let desc = make_descriptor(vec![4]);
1670 let data = vec![0u8; 16];
1671 file.append(
1672 &meta,
1673 &[(&desc, data.as_slice())],
1674 &EncodeOptions::default(),
1675 )?;
1676
1677 assert!(file.read_message(0).is_ok());
1679
1680 let result = file.read_message(1);
1682 assert!(result.is_err());
1683 let msg = result.unwrap_err().to_string();
1684 assert!(
1685 msg.contains("out of range"),
1686 "expected 'out of range', got: {msg}"
1687 );
1688
1689 let result = file.read_message(999);
1691 assert!(result.is_err());
1692 Ok(())
1693 }
1694
1695 #[test]
1698 fn test_local_decode_range_full_tensor() -> std::result::Result<(), Box<dyn std::error::Error>>
1699 {
1700 let dir = tempfile::tempdir()?;
1701 let path = dir.path().join("range_full.tgm");
1702
1703 let mut file = TensogramFile::create(&path)?;
1704 let meta = make_global_meta();
1705 let desc = make_descriptor(vec![8]);
1706 let data: Vec<u8> = (0..32).collect(); file.append(
1708 &meta,
1709 &[(&desc, data.as_slice())],
1710 &EncodeOptions::default(),
1711 )?;
1712
1713 let (ret_desc, parts) = file.decode_range(0, 0, &[(0, 8)], &DecodeOptions::default())?;
1715 assert_eq!(ret_desc.shape, vec![8]);
1716 assert_eq!(parts.len(), 1);
1717 assert_eq!(parts[0].len(), 32); assert_eq!(parts[0], data);
1719 Ok(())
1720 }
1721
1722 #[test]
1725 fn test_local_decode_object_valid() -> std::result::Result<(), Box<dyn std::error::Error>> {
1726 let dir = tempfile::tempdir()?;
1727 let path = dir.path().join("dec_obj.tgm");
1728
1729 let mut file = TensogramFile::create(&path)?;
1730 let meta = make_global_meta();
1731 let desc = make_descriptor(vec![4]);
1732 let data = vec![99u8; 16];
1733 file.append(
1734 &meta,
1735 &[(&desc, data.as_slice())],
1736 &EncodeOptions::default(),
1737 )?;
1738
1739 let (decoded_meta, decoded_desc, decoded_data) =
1740 file.decode_object(0, 0, &DecodeOptions::default())?;
1741 assert_eq!(decoded_meta.version, 2);
1742 assert_eq!(decoded_desc.shape, vec![4]);
1743 assert_eq!(decoded_data, data);
1744 Ok(())
1745 }
1746
1747 #[test]
1748 fn test_local_decode_object_out_of_range() -> std::result::Result<(), Box<dyn std::error::Error>>
1749 {
1750 let dir = tempfile::tempdir()?;
1751 let path = dir.path().join("dec_obj_oor.tgm");
1752
1753 let mut file = TensogramFile::create(&path)?;
1754 let meta = make_global_meta();
1755 let desc = make_descriptor(vec![4]);
1756 let data = vec![0u8; 16];
1757 file.append(
1758 &meta,
1759 &[(&desc, data.as_slice())],
1760 &EncodeOptions::default(),
1761 )?;
1762
1763 let result = file.decode_object(0, 5, &DecodeOptions::default());
1765 assert!(result.is_err());
1766 let msg = result.unwrap_err().to_string();
1767 assert!(
1768 msg.contains("out of range"),
1769 "expected 'out of range', got: {msg}"
1770 );
1771 Ok(())
1772 }
1773
1774 #[test]
1777 fn test_is_remote_false_for_local() -> std::result::Result<(), Box<dyn std::error::Error>> {
1778 let dir = tempfile::tempdir()?;
1779 let path = dir.path().join("not_remote.tgm");
1780 let file = TensogramFile::create(&path)?;
1781 assert!(!file.is_remote());
1782 Ok(())
1783 }
1784
1785 #[test]
1786 fn test_is_remote_false_for_opened_local() -> std::result::Result<(), Box<dyn std::error::Error>>
1787 {
1788 let dir = tempfile::tempdir()?;
1789 let path = dir.path().join("not_remote2.tgm");
1790 {
1791 let mut f = TensogramFile::create(&path)?;
1792 let meta = make_global_meta();
1793 let desc = make_descriptor(vec![2]);
1794 f.append(&meta, &[(&desc, &[0u8; 8])], &EncodeOptions::default())?;
1795 }
1796 let opened = TensogramFile::open(&path)?;
1797 assert!(!opened.is_remote());
1798 Ok(())
1799 }
1800
1801 #[test]
1804 fn test_file_decode_descriptors_msg_out_of_range()
1805 -> std::result::Result<(), Box<dyn std::error::Error>> {
1806 let dir = tempfile::tempdir()?;
1807 let path = dir.path().join("desc_oor.tgm");
1808
1809 let mut file = TensogramFile::create(&path)?;
1810 let meta = make_global_meta();
1811 let desc = make_descriptor(vec![4]);
1812 let data = vec![0u8; 16];
1813 file.append(
1814 &meta,
1815 &[(&desc, data.as_slice())],
1816 &EncodeOptions::default(),
1817 )?;
1818
1819 let result = file.decode_descriptors(10);
1821 assert!(result.is_err());
1822 Ok(())
1823 }
1824
1825 #[test]
1828 fn test_file_decode_metadata_out_of_range()
1829 -> std::result::Result<(), Box<dyn std::error::Error>> {
1830 let dir = tempfile::tempdir()?;
1831 let path = dir.path().join("meta_oor.tgm");
1832
1833 let mut file = TensogramFile::create(&path)?;
1834 let meta = make_global_meta();
1835 let desc = make_descriptor(vec![4]);
1836 let data = vec![0u8; 16];
1837 file.append(
1838 &meta,
1839 &[(&desc, data.as_slice())],
1840 &EncodeOptions::default(),
1841 )?;
1842
1843 let result = file.decode_metadata(99);
1844 assert!(result.is_err());
1845 Ok(())
1846 }
1847
1848 #[test]
1851 fn test_invalidate_offsets_explicit() -> std::result::Result<(), Box<dyn std::error::Error>> {
1852 let dir = tempfile::tempdir()?;
1853 let path = dir.path().join("inv_explicit.tgm");
1854
1855 let mut file = TensogramFile::create(&path)?;
1856 let meta = make_global_meta();
1857 let desc = make_descriptor(vec![4]);
1858 let data = vec![0u8; 16];
1859 file.append(
1860 &meta,
1861 &[(&desc, data.as_slice())],
1862 &EncodeOptions::default(),
1863 )?;
1864 assert_eq!(file.message_count()?, 1);
1865
1866 file.invalidate_offsets();
1868 assert_eq!(file.message_count()?, 1);
1869 Ok(())
1870 }
1871
1872 #[test]
1875 fn test_open_existing_file_and_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
1876 let dir = tempfile::tempdir()?;
1877 let path = dir.path().join("reopen.tgm");
1878
1879 {
1881 let mut file = TensogramFile::create(&path)?;
1882 let meta = make_global_meta();
1883 let desc = make_descriptor(vec![4]);
1884 let data = vec![55u8; 16];
1885 file.append(
1886 &meta,
1887 &[(&desc, data.as_slice())],
1888 &EncodeOptions::default(),
1889 )?;
1890 }
1891
1892 let file = TensogramFile::open(&path)?;
1894 assert_eq!(file.message_count()?, 1);
1895 let (meta, objs) = file.decode_message(0, &DecodeOptions::default())?;
1896 assert_eq!(meta.version, 2);
1897 assert_eq!(objs[0].1, vec![55u8; 16]);
1898 Ok(())
1899 }
1900
1901 #[test]
1904 fn test_local_decode_range_out_of_bounds_elements()
1905 -> std::result::Result<(), Box<dyn std::error::Error>> {
1906 let dir = tempfile::tempdir()?;
1907 let path = dir.path().join("range_oob.tgm");
1908
1909 let mut file = TensogramFile::create(&path)?;
1910 let meta = make_global_meta();
1911 let desc = make_descriptor(vec![4]); let data = vec![0u8; 16];
1913 file.append(
1914 &meta,
1915 &[(&desc, data.as_slice())],
1916 &EncodeOptions::default(),
1917 )?;
1918
1919 let result = file.decode_range(0, 0, &[(2, 10)], &DecodeOptions::default());
1921 assert!(result.is_err(), "expected error for out-of-bounds range");
1922 Ok(())
1923 }
1924
1925 #[test]
1929 fn test_path_some_for_created_file() -> std::result::Result<(), Box<dyn std::error::Error>> {
1930 let dir = tempfile::tempdir()?;
1931 let path = dir.path().join("path_created.tgm");
1932 let file = TensogramFile::create(&path)?;
1933 assert!(file.path().is_some());
1934 assert_eq!(file.path().unwrap(), path.as_path());
1935 Ok(())
1936 }
1937
1938 #[cfg(feature = "async")]
1941 #[tokio::test]
1942 async fn test_async_decode_range() -> std::result::Result<(), Box<dyn std::error::Error>> {
1943 let dir = tempfile::tempdir()?;
1944 let path = dir.path().join("async_range.tgm");
1945
1946 let mut file = TensogramFile::create(&path)?;
1947 let meta = make_global_meta();
1948 let desc = make_descriptor(vec![10]);
1949 let data: Vec<u8> = (0..40).collect(); file.append(
1951 &meta,
1952 &[(&desc, data.as_slice())],
1953 &EncodeOptions::default(),
1954 )?;
1955
1956 let async_file = TensogramFile::open_async(&path).await?;
1957 let (ret_desc, parts) = async_file
1958 .decode_range_async(0, 0, &[(0, 5)], &DecodeOptions::default())
1959 .await?;
1960 assert_eq!(ret_desc.shape, vec![10]);
1961 assert_eq!(parts.len(), 1);
1962 assert_eq!(parts[0].len(), 5 * 4); Ok(())
1964 }
1965
1966 #[cfg(feature = "async")]
1969 #[tokio::test]
1970 async fn test_async_decode_object_out_of_range()
1971 -> std::result::Result<(), Box<dyn std::error::Error>> {
1972 let dir = tempfile::tempdir()?;
1973 let path = dir.path().join("async_obj_oor.tgm");
1974
1975 let mut file = TensogramFile::create(&path)?;
1976 let meta = make_global_meta();
1977 let desc = make_descriptor(vec![4]);
1978 let data = vec![0u8; 16];
1979 file.append(
1980 &meta,
1981 &[(&desc, data.as_slice())],
1982 &EncodeOptions::default(),
1983 )?;
1984
1985 let async_file = TensogramFile::open_async(&path).await?;
1986 let result = async_file
1987 .decode_object_async(0, 99, &DecodeOptions::default())
1988 .await;
1989 assert!(result.is_err());
1990 let msg = result.unwrap_err().to_string();
1991 assert!(
1992 msg.contains("out of range"),
1993 "expected 'out of range', got: {msg}"
1994 );
1995 Ok(())
1996 }
1997
1998 #[cfg(feature = "async")]
2001 #[tokio::test]
2002 async fn test_async_message_count() -> std::result::Result<(), Box<dyn std::error::Error>> {
2003 let dir = tempfile::tempdir()?;
2004 let path = dir.path().join("async_count.tgm");
2005
2006 let mut file = TensogramFile::create(&path)?;
2007 let meta = make_global_meta();
2008 let desc = make_descriptor(vec![4]);
2009 let data = vec![0u8; 16];
2010 file.append(
2011 &meta,
2012 &[(&desc, data.as_slice())],
2013 &EncodeOptions::default(),
2014 )?;
2015 file.append(
2016 &meta,
2017 &[(&desc, data.as_slice())],
2018 &EncodeOptions::default(),
2019 )?;
2020
2021 let async_file = TensogramFile::open_async(&path).await?;
2022 let count = async_file.message_count_async().await?;
2023 assert_eq!(count, 2);
2024 Ok(())
2025 }
2026
2027 #[cfg(feature = "async")]
2030 #[tokio::test]
2031 async fn test_async_decode_descriptors_out_of_range()
2032 -> std::result::Result<(), Box<dyn std::error::Error>> {
2033 let dir = tempfile::tempdir()?;
2034 let path = dir.path().join("async_descs_oor.tgm");
2035
2036 let mut file = TensogramFile::create(&path)?;
2037 let meta = make_global_meta();
2038 let desc = make_descriptor(vec![4]);
2039 let data = vec![0u8; 16];
2040 file.append(
2041 &meta,
2042 &[(&desc, data.as_slice())],
2043 &EncodeOptions::default(),
2044 )?;
2045
2046 let async_file = TensogramFile::open_async(&path).await?;
2047 let result = async_file.decode_descriptors_async(10).await;
2048 assert!(result.is_err());
2049 Ok(())
2050 }
2051}