Skip to main content

tensogram/
file.rs

1// (C) Copyright 2026- ECMWF and individual contributors.
2//
3// This software is licensed under the terms of the Apache Licence Version 2.0
4// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5// In applying this licence, ECMWF does not waive the privileges and immunities
6// granted to it by virtue of its status as an intergovernmental organisation nor
7// does it submit to any jurisdiction.
8
9use std::fs;
10use std::io::{Read, Seek, SeekFrom, Write};
11use std::path::{Path, PathBuf};
12use std::sync::OnceLock;
13
14use crate::decode::{self, DecodeOptions};
15use crate::encode::{self, EncodeOptions};
16use crate::error::{Result, TensogramError};
17use crate::framing;
18use crate::types::{DataObjectDescriptor, DecodedObject, GlobalMetadata};
19
20// ── Backend enum ─────────────────────────────────────────────────────────────
21
22enum Backend {
23    Local {
24        path: PathBuf,
25        #[cfg(feature = "mmap")]
26        mmap: Option<memmap2::Mmap>,
27    },
28    #[cfg(feature = "remote")]
29    Remote(crate::remote::RemoteBackend),
30}
31
32impl Backend {
33    fn source_string(&self) -> String {
34        match self {
35            Backend::Local { path, .. } => path.display().to_string(),
36            #[cfg(feature = "remote")]
37            Backend::Remote(r) => r.source_url().to_string(),
38        }
39    }
40}
41
42/// A handle for reading/writing Tensogram message files.
43///
44/// Supports local files, memory-mapped files (`mmap` feature), and
45/// remote object stores (`remote` feature) via a unified API.
46pub struct TensogramFile {
47    backend: Backend,
48    message_offsets: OnceLock<Vec<(usize, usize)>>,
49}
50
51impl TensogramFile {
52    #[tracing::instrument(skip(path), fields(path = %path.as_ref().display()))]
53    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
54        let path = path.as_ref().to_path_buf();
55        if !path.exists() {
56            return Err(TensogramError::Io(std::io::Error::new(
57                std::io::ErrorKind::NotFound,
58                format!("file not found: {}", path.display()),
59            )));
60        }
61        Ok(TensogramFile {
62            backend: Backend::Local {
63                path,
64                #[cfg(feature = "mmap")]
65                mmap: None,
66            },
67            message_offsets: OnceLock::new(),
68        })
69    }
70
71    /// Open a local file or remote URL.
72    ///
73    /// Auto-detects remote URLs (s3://, s3a://, gs://, az://, azure://, http://, https://)
74    /// when the `remote` feature is enabled; otherwise treats the source
75    /// as a local path.
76    pub fn open_source(source: impl AsRef<str>) -> Result<Self> {
77        let source = source.as_ref();
78
79        #[cfg(feature = "remote")]
80        if crate::remote::is_remote_url(source) {
81            return Self::open_remote(source, &std::collections::BTreeMap::new());
82        }
83
84        Self::open(source)
85    }
86
87    /// Open a remote file with explicit storage options (credentials, region, etc.).
88    #[cfg(feature = "remote")]
89    pub fn open_remote(
90        source: &str,
91        storage_options: &std::collections::BTreeMap<String, String>,
92    ) -> Result<Self> {
93        let remote = crate::remote::RemoteBackend::open(source, storage_options)?;
94        Ok(TensogramFile {
95            backend: Backend::Remote(remote),
96            message_offsets: OnceLock::new(),
97        })
98    }
99
100    /// Create a new local file for writing (truncates if exists).
101    #[tracing::instrument(skip(path), fields(path = %path.as_ref().display()))]
102    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
103        let path = path.as_ref().to_path_buf();
104        if let Some(parent) = path.parent() {
105            fs::create_dir_all(parent).map_err(|e| {
106                TensogramError::Io(std::io::Error::new(
107                    e.kind(),
108                    format!("cannot create parent directory for {}: {e}", path.display()),
109                ))
110            })?;
111        }
112        fs::File::create(&path).map_err(|e| {
113            TensogramError::Io(std::io::Error::new(
114                e.kind(),
115                format!("cannot create {}: {e}", path.display()),
116            ))
117        })?;
118        Ok(TensogramFile {
119            backend: Backend::Local {
120                path,
121                #[cfg(feature = "mmap")]
122                mmap: None,
123            },
124            message_offsets: OnceLock::new(),
125        })
126    }
127
128    /// Open a file with memory-mapped I/O for zero-copy reads.
129    ///
130    /// The file is mapped into memory and `scan()` runs directly on the
131    /// mapped buffer. `read_message()` returns a copy from the mapped region.
132    #[cfg(feature = "mmap")]
133    pub fn open_mmap(path: impl AsRef<Path>) -> Result<Self> {
134        let path = path.as_ref().to_path_buf();
135        let file = fs::File::open(&path).map_err(|e| {
136            TensogramError::Io(std::io::Error::new(
137                e.kind(),
138                format!("{}: {e}", path.display()),
139            ))
140        })?;
141        // SAFETY: the file is opened read-only and we hold the path for
142        // the lifetime of TensogramFile. Concurrent writes by other
143        // processes would violate the contract, but that is the standard
144        // mmap caveat documented by memmap2.
145        let mmap = unsafe { memmap2::Mmap::map(&file)? };
146        let offsets = framing::scan(&mmap);
147        Ok(TensogramFile {
148            backend: Backend::Local {
149                path,
150                mmap: Some(mmap),
151            },
152            message_offsets: OnceLock::from(offsets),
153        })
154    }
155
156    // ── Helpers ──────────────────────────────────────────────────────────
157
158    fn local_path(&self) -> Result<&PathBuf> {
159        match &self.backend {
160            Backend::Local { path, .. } => Ok(path),
161            #[cfg(feature = "remote")]
162            Backend::Remote(_) => Err(TensogramError::Remote(
163                "operation not supported on remote files".to_string(),
164            )),
165        }
166    }
167
168    fn ensure_scanned(&self) -> Result<()> {
169        if self.message_offsets.get().is_none() {
170            let path = self.local_path()?.clone();
171            let mut file = fs::File::open(&path).map_err(|e| {
172                TensogramError::Io(std::io::Error::new(
173                    e.kind(),
174                    format!("{}: {e}", path.display()),
175                ))
176            })?;
177            let offsets = framing::scan_file(&mut file)?;
178            let _ = self.message_offsets.set(offsets);
179        }
180        Ok(())
181    }
182
183    fn checked_offsets(&self, index: usize) -> Result<(usize, usize)> {
184        let offsets = self
185            .message_offsets
186            .get()
187            .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?;
188        if index >= offsets.len() {
189            return Err(TensogramError::Framing(format!(
190                "message index {} out of range (count={})",
191                index,
192                offsets.len()
193            )));
194        }
195        Ok(offsets[index])
196    }
197
198    // ── Public API ───────────────────────────────────────────────────────
199
200    pub fn message_count(&self) -> Result<usize> {
201        #[cfg(feature = "remote")]
202        if let Backend::Remote(remote) = &self.backend {
203            return remote.message_count();
204        }
205        self.ensure_scanned()?;
206        Ok(self
207            .message_offsets
208            .get()
209            .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?
210            .len())
211    }
212
213    pub fn append(
214        &mut self,
215        global_metadata: &GlobalMetadata,
216        descriptors: &[(&DataObjectDescriptor, &[u8])],
217        options: &EncodeOptions,
218    ) -> Result<()> {
219        #[cfg(feature = "mmap")]
220        if let Backend::Local { mmap: Some(_), .. } = &self.backend {
221            return Err(TensogramError::Io(std::io::Error::new(
222                std::io::ErrorKind::Unsupported,
223                "cannot append to a memory-mapped file (open without mmap to append)",
224            )));
225        }
226        let path = self.local_path()?.clone();
227        let msg = encode::encode(global_metadata, descriptors, options)?;
228        let mut file = fs::OpenOptions::new()
229            .create(true)
230            .append(true)
231            .open(&path)
232            .map_err(|e| {
233                TensogramError::Io(std::io::Error::new(
234                    e.kind(),
235                    format!("{}: {e}", path.display()),
236                ))
237            })?;
238        file.write_all(&msg)?;
239        self.message_offsets = OnceLock::new();
240        Ok(())
241    }
242
243    pub fn read_message(&self, index: usize) -> Result<Vec<u8>> {
244        #[cfg(feature = "remote")]
245        if let Backend::Remote(remote) = &self.backend {
246            return remote.read_message(index);
247        }
248
249        self.ensure_scanned()?;
250
251        let (offset, length) = self.checked_offsets(index)?;
252
253        #[cfg(feature = "mmap")]
254        if let Backend::Local {
255            mmap: Some(mmap), ..
256        } = &self.backend
257        {
258            return Ok(mmap[offset..offset + length].to_vec());
259        }
260
261        match &self.backend {
262            Backend::Local { path, .. } => {
263                let mut file = fs::File::open(path).map_err(|e| {
264                    TensogramError::Io(std::io::Error::new(
265                        e.kind(),
266                        format!("{}: {e}", path.display()),
267                    ))
268                })?;
269                file.seek(SeekFrom::Start(offset as u64))?;
270                let mut buf = vec![0u8; length];
271                file.read_exact(&mut buf)?;
272                Ok(buf)
273            }
274            #[cfg(feature = "remote")]
275            Backend::Remote(_) => Err(TensogramError::Remote(
276                "unreachable: remote handled above".to_string(),
277            )),
278        }
279    }
280
281    #[deprecated(note = "Use message_count() + read_message(index) for lazy access")]
282    pub fn messages(&self) -> Result<Vec<Vec<u8>>> {
283        self.ensure_scanned()?;
284        let count = self
285            .message_offsets
286            .get()
287            .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?
288            .len();
289        let mut msgs = Vec::with_capacity(count);
290        for i in 0..count {
291            msgs.push(self.read_message(i)?);
292        }
293        Ok(msgs)
294    }
295
296    pub fn decode_message(
297        &self,
298        index: usize,
299        options: &DecodeOptions,
300    ) -> Result<(GlobalMetadata, Vec<DecodedObject>)> {
301        let msg = self.read_message(index)?;
302        decode::decode(&msg, options)
303    }
304
305    pub fn iter(&self) -> Result<crate::iter::FileMessageIter> {
306        self.ensure_scanned()?;
307        let path = self.local_path()?.clone();
308        let offsets = self
309            .message_offsets
310            .get()
311            .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?
312            .clone();
313        crate::iter::FileMessageIter::new(path, offsets)
314    }
315
316    pub fn path(&self) -> Option<&Path> {
317        match &self.backend {
318            Backend::Local { path, .. } => Some(path),
319            #[cfg(feature = "remote")]
320            Backend::Remote(_) => None,
321        }
322    }
323
324    pub fn source(&self) -> String {
325        self.backend.source_string()
326    }
327
328    pub fn invalidate_offsets(&mut self) {
329        match &self.backend {
330            Backend::Local { .. } => {
331                self.message_offsets = OnceLock::new();
332            }
333            #[cfg(feature = "remote")]
334            Backend::Remote(_) => {}
335        }
336    }
337
338    // ── Object-level access (efficient for remote) ───────────────────────
339
340    pub fn decode_metadata(&self, msg_idx: usize) -> Result<GlobalMetadata> {
341        match &self.backend {
342            #[cfg(feature = "remote")]
343            Backend::Remote(remote) => remote.read_metadata(msg_idx),
344            _ => {
345                let msg = self.read_message(msg_idx)?;
346                decode::decode_metadata(&msg)
347            }
348        }
349    }
350
351    pub fn decode_descriptors(
352        &self,
353        msg_idx: usize,
354    ) -> Result<(GlobalMetadata, Vec<DataObjectDescriptor>)> {
355        match &self.backend {
356            #[cfg(feature = "remote")]
357            Backend::Remote(remote) => remote.read_descriptors(msg_idx),
358            _ => {
359                let msg = self.read_message(msg_idx)?;
360                decode::decode_descriptors(&msg)
361            }
362        }
363    }
364
365    pub fn decode_object(
366        &self,
367        msg_idx: usize,
368        obj_idx: usize,
369        options: &DecodeOptions,
370    ) -> Result<(GlobalMetadata, DataObjectDescriptor, Vec<u8>)> {
371        match &self.backend {
372            #[cfg(feature = "remote")]
373            Backend::Remote(remote) => remote.read_object(msg_idx, obj_idx, options),
374            _ => {
375                let msg = self.read_message(msg_idx)?;
376                decode::decode_object(&msg, obj_idx, options)
377            }
378        }
379    }
380
381    #[cfg(feature = "remote")]
382    pub fn decode_range_batch(
383        &self,
384        msg_indices: &[usize],
385        obj_idx: usize,
386        ranges: &[(u64, u64)],
387        options: &DecodeOptions,
388    ) -> Result<Vec<(DataObjectDescriptor, Vec<Vec<u8>>)>> {
389        match &self.backend {
390            Backend::Remote(remote) => {
391                remote.read_range_batch(msg_indices, obj_idx, ranges, options)
392            }
393            _ => Err(TensogramError::Io(std::io::Error::other(
394                "batch range decode requires a remote backend",
395            ))),
396        }
397    }
398
399    #[cfg(feature = "remote")]
400    pub fn decode_object_batch(
401        &self,
402        msg_indices: &[usize],
403        obj_idx: usize,
404        options: &DecodeOptions,
405    ) -> Result<Vec<(GlobalMetadata, DataObjectDescriptor, Vec<u8>)>> {
406        match &self.backend {
407            Backend::Remote(remote) => remote.read_object_batch(msg_indices, obj_idx, options),
408            _ => Err(TensogramError::Io(std::io::Error::other(
409                "batch object decode requires a remote backend",
410            ))),
411        }
412    }
413
414    pub fn decode_range(
415        &self,
416        msg_idx: usize,
417        obj_idx: usize,
418        ranges: &[(u64, u64)],
419        options: &DecodeOptions,
420    ) -> Result<(DataObjectDescriptor, Vec<Vec<u8>>)> {
421        match &self.backend {
422            #[cfg(feature = "remote")]
423            Backend::Remote(remote) => remote.read_range(msg_idx, obj_idx, ranges, options),
424            _ => {
425                let msg = self.read_message(msg_idx)?;
426                decode::decode_range(&msg, obj_idx, ranges, options)
427            }
428        }
429    }
430
431    pub fn is_remote(&self) -> bool {
432        #[cfg(feature = "remote")]
433        if matches!(self.backend, Backend::Remote(_)) {
434            return true;
435        }
436        false
437    }
438
439    // ── Async API (requires `async` feature) ─────────────────────────────
440
441    #[cfg(feature = "async")]
442    pub async fn open_async(path: impl AsRef<Path>) -> Result<Self> {
443        let path = path.as_ref().to_path_buf();
444        if !path.exists() {
445            return Err(TensogramError::Io(std::io::Error::new(
446                std::io::ErrorKind::NotFound,
447                format!("file not found: {}", path.display()),
448            )));
449        }
450        let p = path.clone();
451        let offsets = tokio::task::spawn_blocking(move || {
452            let mut file = fs::File::open(&p).map_err(|e| {
453                TensogramError::Io(std::io::Error::new(
454                    e.kind(),
455                    format!("{}: {e}", p.display()),
456                ))
457            })?;
458            framing::scan_file(&mut file)
459        })
460        .await
461        .map_err(|e| TensogramError::Io(std::io::Error::other(e)))??;
462
463        Ok(TensogramFile {
464            backend: Backend::Local {
465                path,
466                #[cfg(feature = "mmap")]
467                mmap: None,
468            },
469            message_offsets: OnceLock::from(offsets),
470        })
471    }
472
473    #[cfg(feature = "async")]
474    pub async fn message_count_async(&self) -> Result<usize> {
475        #[cfg(feature = "remote")]
476        if let Backend::Remote(remote) = &self.backend {
477            return remote.message_count_async().await;
478        }
479
480        if self.message_offsets.get().is_none() {
481            let p = self.local_path()?.clone();
482            let offsets = tokio::task::spawn_blocking(move || {
483                let mut file = fs::File::open(&p)?;
484                framing::scan_file(&mut file)
485            })
486            .await
487            .map_err(|e| TensogramError::Io(std::io::Error::other(e)))??;
488            let _ = self.message_offsets.set(offsets);
489        }
490
491        Ok(self
492            .message_offsets
493            .get()
494            .ok_or_else(|| TensogramError::Framing("scan result missing".to_string()))?
495            .len())
496    }
497
498    #[cfg(feature = "async")]
499    pub async fn read_message_async(&self, index: usize) -> Result<Vec<u8>> {
500        #[cfg(feature = "remote")]
501        if let Backend::Remote(remote) = &self.backend {
502            return remote.read_message_async(index).await;
503        }
504
505        if self.message_offsets.get().is_none() {
506            let p = self.local_path()?.clone();
507            let offsets = tokio::task::spawn_blocking(move || {
508                let mut file = fs::File::open(&p)?;
509                framing::scan_file(&mut file)
510            })
511            .await
512            .map_err(|e| TensogramError::Io(std::io::Error::other(e)))??;
513            let _ = self.message_offsets.set(offsets);
514        }
515
516        let (offset, length) = self.checked_offsets(index)?;
517
518        let p = self.local_path()?.clone();
519        tokio::task::spawn_blocking(move || {
520            let mut file = fs::File::open(&p)?;
521            file.seek(SeekFrom::Start(offset as u64))?;
522            let mut buf = vec![0u8; length];
523            file.read_exact(&mut buf)?;
524            Ok(buf)
525        })
526        .await
527        .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
528    }
529
530    #[cfg(feature = "async")]
531    pub async fn decode_message_async(
532        &self,
533        index: usize,
534        options: &DecodeOptions,
535    ) -> Result<(GlobalMetadata, Vec<DecodedObject>)> {
536        let msg = self.read_message_async(index).await?;
537        let opts = options.clone();
538        tokio::task::spawn_blocking(move || decode::decode(&msg, &opts))
539            .await
540            .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
541    }
542
543    #[cfg(all(feature = "remote", feature = "async"))]
544    pub async fn open_source_async(source: impl AsRef<str>) -> Result<Self> {
545        let source = source.as_ref();
546
547        if crate::remote::is_remote_url(source) {
548            return Self::open_remote_async(source, &std::collections::BTreeMap::new()).await;
549        }
550
551        Self::open_async(source).await
552    }
553
554    #[cfg(all(feature = "remote", feature = "async"))]
555    pub async fn open_remote_async(
556        source: &str,
557        storage_options: &std::collections::BTreeMap<String, String>,
558    ) -> Result<Self> {
559        let remote = crate::remote::RemoteBackend::open_async(source, storage_options).await?;
560        Ok(TensogramFile {
561            backend: Backend::Remote(remote),
562            message_offsets: OnceLock::new(),
563        })
564    }
565
566    #[cfg(feature = "async")]
567    pub async fn decode_metadata_async(&self, msg_idx: usize) -> Result<GlobalMetadata> {
568        #[cfg(feature = "remote")]
569        if let Backend::Remote(remote) = &self.backend {
570            return remote.read_metadata_async(msg_idx).await;
571        }
572
573        let msg = self.read_message_async(msg_idx).await?;
574        tokio::task::spawn_blocking(move || decode::decode_metadata(&msg))
575            .await
576            .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
577    }
578
579    #[cfg(feature = "async")]
580    pub async fn decode_descriptors_async(
581        &self,
582        msg_idx: usize,
583    ) -> Result<(GlobalMetadata, Vec<DataObjectDescriptor>)> {
584        #[cfg(feature = "remote")]
585        if let Backend::Remote(remote) = &self.backend {
586            return remote.read_descriptors_async(msg_idx).await;
587        }
588
589        let msg = self.read_message_async(msg_idx).await?;
590        tokio::task::spawn_blocking(move || decode::decode_descriptors(&msg))
591            .await
592            .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
593    }
594
595    #[cfg(feature = "async")]
596    pub async fn decode_object_async(
597        &self,
598        msg_idx: usize,
599        obj_idx: usize,
600        options: &DecodeOptions,
601    ) -> Result<(GlobalMetadata, DataObjectDescriptor, Vec<u8>)> {
602        #[cfg(feature = "remote")]
603        if let Backend::Remote(remote) = &self.backend {
604            return remote.read_object_async(msg_idx, obj_idx, options).await;
605        }
606
607        let msg = self.read_message_async(msg_idx).await?;
608        let opts = options.clone();
609        tokio::task::spawn_blocking(move || decode::decode_object(&msg, obj_idx, &opts))
610            .await
611            .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
612    }
613
614    #[cfg(feature = "async")]
615    pub async fn decode_range_async(
616        &self,
617        msg_idx: usize,
618        obj_idx: usize,
619        ranges: &[(u64, u64)],
620        options: &DecodeOptions,
621    ) -> Result<(DataObjectDescriptor, Vec<Vec<u8>>)> {
622        #[cfg(feature = "remote")]
623        if let Backend::Remote(remote) = &self.backend {
624            return remote
625                .read_range_async(msg_idx, obj_idx, ranges, options)
626                .await;
627        }
628
629        let msg = self.read_message_async(msg_idx).await?;
630        let ranges = ranges.to_vec();
631        let opts = options.clone();
632        tokio::task::spawn_blocking(move || decode::decode_range(&msg, obj_idx, &ranges, &opts))
633            .await
634            .map_err(|e| TensogramError::Io(std::io::Error::other(e)))?
635    }
636
637    #[cfg(all(feature = "remote", feature = "async"))]
638    pub async fn prefetch_layouts_async(&self, msg_indices: &[usize]) -> Result<()> {
639        if let Backend::Remote(remote) = &self.backend {
640            return remote.ensure_all_layouts_batch_async(msg_indices).await;
641        }
642        Ok(())
643    }
644
645    #[cfg(all(feature = "remote", feature = "async"))]
646    pub async fn decode_object_batch_async(
647        &self,
648        msg_indices: &[usize],
649        obj_idx: usize,
650        options: &DecodeOptions,
651    ) -> Result<Vec<(GlobalMetadata, DataObjectDescriptor, Vec<u8>)>> {
652        if let Backend::Remote(remote) = &self.backend {
653            return remote
654                .read_object_batch_async(msg_indices, obj_idx, options)
655                .await;
656        }
657        Err(TensogramError::Io(std::io::Error::other(
658            "batch object decode requires a remote backend",
659        )))
660    }
661
662    #[cfg(all(feature = "remote", feature = "async"))]
663    pub async fn decode_range_batch_async(
664        &self,
665        msg_indices: &[usize],
666        obj_idx: usize,
667        ranges: &[(u64, u64)],
668        options: &DecodeOptions,
669    ) -> Result<Vec<(DataObjectDescriptor, Vec<Vec<u8>>)>> {
670        if let Backend::Remote(remote) = &self.backend {
671            return remote
672                .read_range_batch_async(msg_indices, obj_idx, ranges, options)
673                .await;
674        }
675        Err(TensogramError::Io(std::io::Error::other(
676            "batch range decode requires a remote backend",
677        )))
678    }
679}
680
681#[cfg(test)]
682mod tests {
683    use super::*;
684    use crate::dtype::Dtype;
685    use crate::types::ByteOrder;
686    use std::collections::BTreeMap;
687
688    fn make_global_meta() -> GlobalMetadata {
689        GlobalMetadata {
690            version: 2,
691            extra: BTreeMap::new(),
692            ..Default::default()
693        }
694    }
695
696    fn make_descriptor(shape: Vec<u64>) -> DataObjectDescriptor {
697        let strides = if shape.is_empty() {
698            vec![]
699        } else {
700            let mut s = vec![1u64; shape.len()];
701            for i in (0..shape.len() - 1).rev() {
702                s[i] = s[i + 1] * shape[i + 1];
703            }
704            s
705        };
706
707        DataObjectDescriptor {
708            obj_type: "ntensor".to_string(),
709            ndim: shape.len() as u64,
710            shape,
711            strides,
712            dtype: Dtype::Float32,
713            byte_order: ByteOrder::native(),
714            encoding: "none".to_string(),
715            filter: "none".to_string(),
716            compression: "none".to_string(),
717            params: BTreeMap::new(),
718            hash: None,
719        }
720    }
721
722    #[test]
723    fn test_file_create_append_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
724        let dir = tempfile::tempdir()?;
725        let path = dir.path().join("test.tgm");
726
727        let mut file = TensogramFile::create(&path)?;
728        let meta = make_global_meta();
729        let desc = make_descriptor(vec![4]);
730        let data = vec![0u8; 16];
731        file.append(
732            &meta,
733            &[(&desc, data.as_slice())],
734            &EncodeOptions::default(),
735        )?;
736        file.append(
737            &meta,
738            &[(&desc, data.as_slice())],
739            &EncodeOptions::default(),
740        )?;
741
742        assert_eq!(file.message_count()?, 2);
743
744        #[allow(deprecated)]
745        let msgs = file.messages()?;
746        assert_eq!(msgs.len(), 2);
747
748        let (decoded_meta, objects) = file.decode_message(0, &DecodeOptions::default())?;
749        assert_eq!(decoded_meta.version, 2);
750        assert_eq!(objects.len(), 1);
751        assert_eq!(objects[0].1, data);
752        Ok(())
753    }
754
755    #[test]
756    fn test_file_lazy_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
757        let dir = tempfile::tempdir()?;
758        let path = dir.path().join("lazy.tgm");
759
760        let mut file = TensogramFile::create(&path)?;
761        let meta = make_global_meta();
762        let desc = make_descriptor(vec![4]);
763
764        let data0 = vec![0u8; 16];
765        let data1 = vec![1u8; 16];
766        let data2 = vec![2u8; 16];
767
768        file.append(
769            &meta,
770            &[(&desc, data0.as_slice())],
771            &EncodeOptions::default(),
772        )?;
773        file.append(
774            &meta,
775            &[(&desc, data1.as_slice())],
776            &EncodeOptions::default(),
777        )?;
778        file.append(
779            &meta,
780            &[(&desc, data2.as_slice())],
781            &EncodeOptions::default(),
782        )?;
783
784        assert_eq!(file.message_count()?, 3);
785
786        let (_, obj1) = file.decode_message(1, &DecodeOptions::default())?;
787        assert_eq!(obj1[0].1, data1);
788
789        let (_, obj0) = file.decode_message(0, &DecodeOptions::default())?;
790        assert_eq!(obj0[0].1, data0);
791
792        let (_, obj2) = file.decode_message(2, &DecodeOptions::default())?;
793        assert_eq!(obj2[0].1, data2);
794        Ok(())
795    }
796
797    #[test]
798    fn test_file_decode_metadata() -> std::result::Result<(), Box<dyn std::error::Error>> {
799        let dir = tempfile::tempdir()?;
800        let path = dir.path().join("meta.tgm");
801
802        let mut file = TensogramFile::create(&path)?;
803        let meta = make_global_meta();
804        let desc = make_descriptor(vec![4]);
805        let data = vec![0u8; 16];
806        file.append(
807            &meta,
808            &[(&desc, data.as_slice())],
809            &EncodeOptions::default(),
810        )?;
811
812        let decoded_meta = file.decode_metadata(0)?;
813        assert_eq!(decoded_meta.version, 2);
814        Ok(())
815    }
816
817    #[test]
818    fn test_file_decode_descriptors() -> std::result::Result<(), Box<dyn std::error::Error>> {
819        let dir = tempfile::tempdir()?;
820        let path = dir.path().join("descs.tgm");
821
822        let mut file = TensogramFile::create(&path)?;
823        let meta = make_global_meta();
824        let desc = make_descriptor(vec![4]);
825        let data = vec![0u8; 16];
826        file.append(
827            &meta,
828            &[(&desc, data.as_slice())],
829            &EncodeOptions::default(),
830        )?;
831
832        let (decoded_meta, descriptors) = file.decode_descriptors(0)?;
833        assert_eq!(decoded_meta.version, 2);
834        assert_eq!(descriptors.len(), 1);
835        assert_eq!(descriptors[0].shape, vec![4]);
836        Ok(())
837    }
838
839    #[test]
840    fn test_file_decode_object() -> std::result::Result<(), Box<dyn std::error::Error>> {
841        let dir = tempfile::tempdir()?;
842        let path = dir.path().join("obj.tgm");
843
844        let mut file = TensogramFile::create(&path)?;
845        let meta = make_global_meta();
846        let desc = make_descriptor(vec![4]);
847        let data = vec![42u8; 16];
848        file.append(
849            &meta,
850            &[(&desc, data.as_slice())],
851            &EncodeOptions::default(),
852        )?;
853
854        let (decoded_meta, decoded_desc, decoded_data) =
855            file.decode_object(0, 0, &DecodeOptions::default())?;
856        assert_eq!(decoded_meta.version, 2);
857        assert_eq!(decoded_desc.shape, vec![4]);
858        assert_eq!(decoded_data, data);
859        Ok(())
860    }
861
862    // ── mmap tests ───────────────────────────────────────────────────────
863
864    #[cfg(feature = "mmap")]
865    #[test]
866    fn test_mmap_open_and_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
867        let dir = tempfile::tempdir()?;
868        let path = dir.path().join("mmap.tgm");
869
870        let mut file = TensogramFile::create(&path)?;
871        let meta = make_global_meta();
872        let desc = make_descriptor(vec![4]);
873        let data0 = vec![10u8; 16];
874        let data1 = vec![20u8; 16];
875        file.append(
876            &meta,
877            &[(&desc, data0.as_slice())],
878            &EncodeOptions::default(),
879        )?;
880        file.append(
881            &meta,
882            &[(&desc, data1.as_slice())],
883            &EncodeOptions::default(),
884        )?;
885
886        let mmap_file = TensogramFile::open_mmap(&path)?;
887        assert_eq!(mmap_file.message_count()?, 2);
888
889        let (decoded_meta, objects) = mmap_file.decode_message(0, &DecodeOptions::default())?;
890        assert_eq!(decoded_meta.version, 2);
891        assert_eq!(objects[0].1, data0);
892
893        let (_, objects1) = mmap_file.decode_message(1, &DecodeOptions::default())?;
894        assert_eq!(objects1[0].1, data1);
895        Ok(())
896    }
897
898    #[cfg(feature = "mmap")]
899    #[test]
900    fn test_mmap_matches_regular_open() -> std::result::Result<(), Box<dyn std::error::Error>> {
901        let dir = tempfile::tempdir()?;
902        let path = dir.path().join("mmap_vs_regular.tgm");
903
904        let mut file = TensogramFile::create(&path)?;
905        let meta = make_global_meta();
906        let desc = make_descriptor(vec![4]);
907        let data = vec![42u8; 16];
908        file.append(
909            &meta,
910            &[(&desc, data.as_slice())],
911            &EncodeOptions::default(),
912        )?;
913
914        let regular = TensogramFile::open(&path)?;
915        let regular_msg = regular.read_message(0)?;
916
917        let mmap = TensogramFile::open_mmap(&path)?;
918        let mmap_msg = mmap.read_message(0)?;
919
920        assert_eq!(regular_msg, mmap_msg);
921        Ok(())
922    }
923
924    #[test]
925    fn test_file_iter_via_tensogram_file() -> std::result::Result<(), Box<dyn std::error::Error>> {
926        let dir = tempfile::tempdir()?;
927        let path = dir.path().join("iter.tgm");
928
929        let mut file = TensogramFile::create(&path)?;
930        let meta = make_global_meta();
931        let desc = make_descriptor(vec![4]);
932
933        let data0 = vec![0u8; 16];
934        let data1 = vec![1u8; 16];
935        let data2 = vec![2u8; 16];
936
937        file.append(
938            &meta,
939            &[(&desc, data0.as_slice())],
940            &EncodeOptions::default(),
941        )?;
942        file.append(
943            &meta,
944            &[(&desc, data1.as_slice())],
945            &EncodeOptions::default(),
946        )?;
947        file.append(
948            &meta,
949            &[(&desc, data2.as_slice())],
950            &EncodeOptions::default(),
951        )?;
952
953        let raw_messages: Vec<Vec<u8>> =
954            file.iter()?.collect::<std::result::Result<Vec<_>, _>>()?;
955        assert_eq!(raw_messages.len(), 3);
956
957        for (i, raw) in raw_messages.iter().enumerate() {
958            let (_, objects) = crate::decode::decode(raw, &DecodeOptions::default())?;
959            assert_eq!(objects[0].1, vec![i as u8; 16]);
960        }
961        Ok(())
962    }
963
964    // ── async tests ──────────────────────────────────────────────────────
965
966    #[cfg(feature = "async")]
967    #[tokio::test]
968    async fn test_async_open_and_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
969        let dir = tempfile::tempdir()?;
970        let path = dir.path().join("async.tgm");
971
972        let mut file = TensogramFile::create(&path)?;
973        let meta = make_global_meta();
974        let desc = make_descriptor(vec![4]);
975        let data0 = vec![10u8; 16];
976        let data1 = vec![20u8; 16];
977        file.append(
978            &meta,
979            &[(&desc, data0.as_slice())],
980            &EncodeOptions::default(),
981        )?;
982        file.append(
983            &meta,
984            &[(&desc, data1.as_slice())],
985            &EncodeOptions::default(),
986        )?;
987
988        let async_file = TensogramFile::open_async(&path).await?;
989        assert_eq!(async_file.message_count()?, 2);
990
991        let msg0 = async_file.read_message_async(0).await?;
992        let (meta0, objects0) = crate::decode::decode(&msg0, &DecodeOptions::default())?;
993        assert_eq!(meta0.version, 2);
994        assert_eq!(objects0[0].1, data0);
995
996        let (_, objects1) = async_file
997            .decode_message_async(1, &DecodeOptions::default())
998            .await?;
999        assert_eq!(objects1[0].1, data1);
1000        Ok(())
1001    }
1002
1003    #[cfg(feature = "async")]
1004    #[tokio::test]
1005    async fn test_async_matches_sync() -> std::result::Result<(), Box<dyn std::error::Error>> {
1006        let dir = tempfile::tempdir()?;
1007        let path = dir.path().join("async_vs_sync.tgm");
1008
1009        let mut file = TensogramFile::create(&path)?;
1010        let meta = make_global_meta();
1011        let desc = make_descriptor(vec![4]);
1012        let data = vec![42u8; 16];
1013        file.append(
1014            &meta,
1015            &[(&desc, data.as_slice())],
1016            &EncodeOptions::default(),
1017        )?;
1018
1019        let sync_file = TensogramFile::open(&path)?;
1020        let sync_msg = sync_file.read_message(0)?;
1021
1022        let async_file = TensogramFile::open_async(&path).await?;
1023        let async_msg = async_file.read_message_async(0).await?;
1024
1025        assert_eq!(sync_msg, async_msg);
1026        Ok(())
1027    }
1028
1029    // ── decode_range on local files ──────────────────────────────────────
1030
1031    #[test]
1032    fn test_local_decode_range_valid() -> std::result::Result<(), Box<dyn std::error::Error>> {
1033        let dir = tempfile::tempdir()?;
1034        let path = dir.path().join("range.tgm");
1035
1036        let mut file = TensogramFile::create(&path)?;
1037        let meta = make_global_meta();
1038        let desc = make_descriptor(vec![10]);
1039        let data: Vec<u8> = (0..40).collect(); // 10 float32s = 40 bytes
1040        file.append(
1041            &meta,
1042            &[(&desc, data.as_slice())],
1043            &EncodeOptions::default(),
1044        )?;
1045
1046        // Decode elements 2..5 (3 elements)
1047        let (ret_desc, parts) = file.decode_range(0, 0, &[(2, 3)], &DecodeOptions::default())?;
1048        assert_eq!(ret_desc.shape, vec![10]);
1049        assert_eq!(parts.len(), 1);
1050        assert_eq!(parts[0].len(), 3 * 4); // 3 float32s = 12 bytes
1051        Ok(())
1052    }
1053
1054    #[test]
1055    fn test_local_decode_range_multiple_ranges()
1056    -> std::result::Result<(), Box<dyn std::error::Error>> {
1057        let dir = tempfile::tempdir()?;
1058        let path = dir.path().join("range_multi.tgm");
1059
1060        let mut file = TensogramFile::create(&path)?;
1061        let meta = make_global_meta();
1062        let desc = make_descriptor(vec![16]);
1063        let data: Vec<u8> = (0..64).collect(); // 16 float32s = 64 bytes
1064        file.append(
1065            &meta,
1066            &[(&desc, data.as_slice())],
1067            &EncodeOptions::default(),
1068        )?;
1069
1070        let ranges = vec![(0u64, 4u64), (8u64, 4u64)];
1071        let (_, parts) = file.decode_range(0, 0, &ranges, &DecodeOptions::default())?;
1072        assert_eq!(parts.len(), 2);
1073        assert_eq!(parts[0].len(), 4 * 4);
1074        assert_eq!(parts[1].len(), 4 * 4);
1075        Ok(())
1076    }
1077
1078    #[test]
1079    fn test_local_decode_range_invalid_object_index()
1080    -> std::result::Result<(), Box<dyn std::error::Error>> {
1081        let dir = tempfile::tempdir()?;
1082        let path = dir.path().join("range_bad_obj.tgm");
1083
1084        let mut file = TensogramFile::create(&path)?;
1085        let meta = make_global_meta();
1086        let desc = make_descriptor(vec![4]);
1087        let data = vec![0u8; 16];
1088        file.append(
1089            &meta,
1090            &[(&desc, data.as_slice())],
1091            &EncodeOptions::default(),
1092        )?;
1093
1094        let result = file.decode_range(0, 5, &[(0, 1)], &DecodeOptions::default());
1095        assert!(result.is_err());
1096        let msg = result.unwrap_err().to_string();
1097        assert!(
1098            msg.contains("out of range"),
1099            "expected 'out of range', got: {msg}"
1100        );
1101        Ok(())
1102    }
1103
1104    #[test]
1105    fn test_local_decode_range_invalid_message_index()
1106    -> std::result::Result<(), Box<dyn std::error::Error>> {
1107        let dir = tempfile::tempdir()?;
1108        let path = dir.path().join("range_bad_msg.tgm");
1109
1110        let mut file = TensogramFile::create(&path)?;
1111        let meta = make_global_meta();
1112        let desc = make_descriptor(vec![4]);
1113        let data = vec![0u8; 16];
1114        file.append(
1115            &meta,
1116            &[(&desc, data.as_slice())],
1117            &EncodeOptions::default(),
1118        )?;
1119
1120        let result = file.decode_range(5, 0, &[(0, 1)], &DecodeOptions::default());
1121        assert!(result.is_err());
1122        Ok(())
1123    }
1124
1125    // ── mmap append → Unsupported error ──────────────────────────────────
1126
1127    #[cfg(feature = "mmap")]
1128    #[test]
1129    fn test_mmap_append_returns_unsupported() -> std::result::Result<(), Box<dyn std::error::Error>>
1130    {
1131        let dir = tempfile::tempdir()?;
1132        let path = dir.path().join("mmap_append.tgm");
1133
1134        // Create and populate a file first
1135        {
1136            let mut file = TensogramFile::create(&path)?;
1137            let meta = make_global_meta();
1138            let desc = make_descriptor(vec![4]);
1139            let data = vec![0u8; 16];
1140            file.append(
1141                &meta,
1142                &[(&desc, data.as_slice())],
1143                &EncodeOptions::default(),
1144            )?;
1145        }
1146
1147        // Open with mmap and try to append
1148        let mut mmap_file = TensogramFile::open_mmap(&path)?;
1149        let meta = make_global_meta();
1150        let desc = make_descriptor(vec![4]);
1151        let data = vec![0u8; 16];
1152        let result = mmap_file.append(
1153            &meta,
1154            &[(&desc, data.as_slice())],
1155            &EncodeOptions::default(),
1156        );
1157        match result {
1158            Ok(_) => panic!("expected error for append on mmap file"),
1159            Err(e) => {
1160                let err_msg = e.to_string();
1161                assert!(
1162                    err_msg.contains("memory-mapped") || err_msg.contains("mmap"),
1163                    "expected mmap-related error, got: {err_msg}"
1164                );
1165            }
1166        }
1167        Ok(())
1168    }
1169
1170    // ── Async error cases ────────────────────────────────────────────────
1171
1172    #[cfg(feature = "async")]
1173    #[tokio::test]
1174    async fn test_async_open_nonexistent_file()
1175    -> std::result::Result<(), Box<dyn std::error::Error>> {
1176        let result = TensogramFile::open_async("/tmp/nonexistent_tensogram_file_12345.tgm").await;
1177        match result {
1178            Ok(_) => panic!("expected error for nonexistent file"),
1179            Err(e) => {
1180                let err_msg = e.to_string();
1181                assert!(
1182                    err_msg.contains("not found") || err_msg.contains("NotFound"),
1183                    "expected not-found error, got: {err_msg}"
1184                );
1185            }
1186        }
1187        Ok(())
1188    }
1189
1190    #[cfg(feature = "async")]
1191    #[tokio::test]
1192    async fn test_async_message_index_out_of_range()
1193    -> std::result::Result<(), Box<dyn std::error::Error>> {
1194        let dir = tempfile::tempdir()?;
1195        let path = dir.path().join("async_oor.tgm");
1196
1197        let mut file = TensogramFile::create(&path)?;
1198        let meta = make_global_meta();
1199        let desc = make_descriptor(vec![4]);
1200        let data = vec![0u8; 16];
1201        file.append(
1202            &meta,
1203            &[(&desc, data.as_slice())],
1204            &EncodeOptions::default(),
1205        )?;
1206
1207        let async_file = TensogramFile::open_async(&path).await?;
1208        let result = async_file.read_message_async(5).await;
1209        assert!(result.is_err());
1210        let err_msg = result.unwrap_err().to_string();
1211        assert!(
1212            err_msg.contains("out of range"),
1213            "expected 'out of range', got: {err_msg}"
1214        );
1215        Ok(())
1216    }
1217
1218    #[cfg(feature = "async")]
1219    #[tokio::test]
1220    async fn test_async_decode_metadata_out_of_range()
1221    -> std::result::Result<(), Box<dyn std::error::Error>> {
1222        let dir = tempfile::tempdir()?;
1223        let path = dir.path().join("async_meta_oor.tgm");
1224
1225        let mut file = TensogramFile::create(&path)?;
1226        let meta = make_global_meta();
1227        let desc = make_descriptor(vec![4]);
1228        let data = vec![0u8; 16];
1229        file.append(
1230            &meta,
1231            &[(&desc, data.as_slice())],
1232            &EncodeOptions::default(),
1233        )?;
1234
1235        let async_file = TensogramFile::open_async(&path).await?;
1236        let result = async_file.decode_metadata_async(10).await;
1237        assert!(result.is_err());
1238        Ok(())
1239    }
1240
1241    #[cfg(feature = "async")]
1242    #[tokio::test]
1243    async fn test_async_decode_descriptors() -> std::result::Result<(), Box<dyn std::error::Error>>
1244    {
1245        let dir = tempfile::tempdir()?;
1246        let path = dir.path().join("async_descs.tgm");
1247
1248        let mut file = TensogramFile::create(&path)?;
1249        let meta = make_global_meta();
1250        let desc = make_descriptor(vec![4]);
1251        let data = vec![0u8; 16];
1252        file.append(
1253            &meta,
1254            &[(&desc, data.as_slice())],
1255            &EncodeOptions::default(),
1256        )?;
1257
1258        let async_file = TensogramFile::open_async(&path).await?;
1259        let (decoded_meta, descriptors) = async_file.decode_descriptors_async(0).await?;
1260        assert_eq!(decoded_meta.version, 2);
1261        assert_eq!(descriptors.len(), 1);
1262        assert_eq!(descriptors[0].shape, vec![4]);
1263        Ok(())
1264    }
1265
1266    #[cfg(feature = "async")]
1267    #[tokio::test]
1268    async fn test_async_decode_object() -> std::result::Result<(), Box<dyn std::error::Error>> {
1269        let dir = tempfile::tempdir()?;
1270        let path = dir.path().join("async_obj.tgm");
1271
1272        let mut file = TensogramFile::create(&path)?;
1273        let meta = make_global_meta();
1274        let desc = make_descriptor(vec![4]);
1275        let data = vec![42u8; 16];
1276        file.append(
1277            &meta,
1278            &[(&desc, data.as_slice())],
1279            &EncodeOptions::default(),
1280        )?;
1281
1282        let async_file = TensogramFile::open_async(&path).await?;
1283        let (decoded_meta, decoded_desc, decoded_data) = async_file
1284            .decode_object_async(0, 0, &DecodeOptions::default())
1285            .await?;
1286        assert_eq!(decoded_meta.version, 2);
1287        assert_eq!(decoded_desc.shape, vec![4]);
1288        assert_eq!(decoded_data, data);
1289        Ok(())
1290    }
1291
1292    // ── open_source with local path ──────────────────────────────────────
1293
1294    #[test]
1295    fn test_open_source_local_path() -> std::result::Result<(), Box<dyn std::error::Error>> {
1296        let dir = tempfile::tempdir()?;
1297        let path = dir.path().join("local_source.tgm");
1298
1299        let mut file = TensogramFile::create(&path)?;
1300        let meta = make_global_meta();
1301        let desc = make_descriptor(vec![4]);
1302        let data = vec![0u8; 16];
1303        file.append(
1304            &meta,
1305            &[(&desc, data.as_slice())],
1306            &EncodeOptions::default(),
1307        )?;
1308
1309        let path_str = path.to_str().unwrap();
1310        let opened = TensogramFile::open_source(path_str)?;
1311        assert!(!opened.is_remote());
1312        assert_eq!(opened.message_count()?, 1);
1313        Ok(())
1314    }
1315
1316    // ── path() and source() for local files ──────────────────────────────
1317
1318    #[test]
1319    fn test_path_returns_some_for_local() -> std::result::Result<(), Box<dyn std::error::Error>> {
1320        let dir = tempfile::tempdir()?;
1321        let path = dir.path().join("path_test.tgm");
1322
1323        let mut file = TensogramFile::create(&path)?;
1324        let meta = make_global_meta();
1325        let desc = make_descriptor(vec![4]);
1326        let data = vec![0u8; 16];
1327        file.append(
1328            &meta,
1329            &[(&desc, data.as_slice())],
1330            &EncodeOptions::default(),
1331        )?;
1332
1333        let opened = TensogramFile::open(&path)?;
1334        assert!(opened.path().is_some());
1335        assert_eq!(opened.path().unwrap(), path.as_path());
1336        Ok(())
1337    }
1338
1339    #[test]
1340    fn test_source_returns_path_string_for_local()
1341    -> std::result::Result<(), Box<dyn std::error::Error>> {
1342        let dir = tempfile::tempdir()?;
1343        let path = dir.path().join("source_test.tgm");
1344
1345        let mut file = TensogramFile::create(&path)?;
1346        let meta = make_global_meta();
1347        let desc = make_descriptor(vec![4]);
1348        let data = vec![0u8; 16];
1349        file.append(
1350            &meta,
1351            &[(&desc, data.as_slice())],
1352            &EncodeOptions::default(),
1353        )?;
1354
1355        let opened = TensogramFile::open(&path)?;
1356        let source = opened.source();
1357        assert!(
1358            source.contains("source_test.tgm"),
1359            "source() should contain the filename, got: {source}"
1360        );
1361        Ok(())
1362    }
1363
1364    // ── open nonexistent file (sync) ─────────────────────────────────────
1365
1366    #[test]
1367    fn test_open_nonexistent_file() {
1368        let result = TensogramFile::open("/tmp/nonexistent_tensogram_9999.tgm");
1369        match result {
1370            Ok(_) => panic!("expected error for nonexistent file"),
1371            Err(e) => {
1372                let err_msg = e.to_string();
1373                assert!(
1374                    err_msg.contains("not found"),
1375                    "expected 'not found', got: {err_msg}"
1376                );
1377            }
1378        }
1379    }
1380
1381    // ── Coverage closers ─────────────────────────────────────────────────
1382
1383    #[test]
1384    fn test_create_in_nested_path_creates_parent()
1385    -> std::result::Result<(), Box<dyn std::error::Error>> {
1386        // Exercises the `fs::create_dir_all(parent)` branch in `create()`.
1387        let dir = tempfile::tempdir()?;
1388        let deep_path = dir.path().join("a").join("b").join("c").join("deep.tgm");
1389        let mut file = TensogramFile::create(&deep_path)?;
1390        let meta = make_global_meta();
1391        let desc = make_descriptor(vec![2]);
1392        let data = vec![0u8; 8];
1393        file.append(
1394            &meta,
1395            &[(&desc, data.as_slice())],
1396            &EncodeOptions::default(),
1397        )?;
1398        assert_eq!(file.message_count()?, 1);
1399        assert!(deep_path.exists());
1400        Ok(())
1401    }
1402
1403    /// Create a temp directory, drop its write bit, attempt to create a
1404    /// `.tgm` file inside, and assert that a typed `Io` error comes back.
1405    /// Restores the mode at the end so the tempdir cleans up correctly.
1406    ///
1407    /// Unix-only because Windows's permission model does not honour
1408    /// `chmod` in the same way. Skips gracefully if we detect that the
1409    /// current user bypasses directory-mode permission checks (running
1410    /// as root in a container, CAP_DAC_OVERRIDE set, etc.) — in those
1411    /// environments the test's premise cannot be deterministically met.
1412    #[cfg(unix)]
1413    #[test]
1414    fn test_create_in_nonwritable_location_returns_io_error()
1415    -> std::result::Result<(), Box<dyn std::error::Error>> {
1416        use std::os::unix::fs::PermissionsExt;
1417
1418        let dir = tempfile::tempdir()?;
1419        let dir_path = dir.path().to_path_buf();
1420
1421        // Drop all write bits (mode 0o555 = r-xr-xr-x).
1422        let original = std::fs::metadata(&dir_path)?.permissions();
1423        let mut readonly = original.clone();
1424        readonly.set_mode(0o555);
1425        std::fs::set_permissions(&dir_path, readonly)?;
1426
1427        // Probe: try to write to the chmod'd dir via std::fs. If the
1428        // probe succeeds, we're in an environment where dir-mode checks
1429        // are bypassed (root, CAP_DAC_OVERRIDE, etc.) — skip gracefully.
1430        let probe_path = dir_path.join(".perm_probe");
1431        let probe_result = std::fs::File::create(&probe_path);
1432        if probe_result.is_ok() {
1433            // Clean up and skip.
1434            let _ = std::fs::remove_file(&probe_path);
1435            std::fs::set_permissions(&dir_path, original)?;
1436            return Ok(());
1437        }
1438        drop(probe_result);
1439
1440        let target = dir_path.join("nope.tgm");
1441        let result = TensogramFile::create(&target);
1442
1443        // Always restore permissions so tempdir can clean up.
1444        std::fs::set_permissions(&dir_path, original)?;
1445
1446        // TensogramFile doesn't implement Debug, so use match instead of expect_err.
1447        let msg = match result {
1448            Ok(_) => panic!("expected Io error creating in non-writable dir, got Ok"),
1449            Err(e) => e.to_string(),
1450        };
1451        assert!(
1452            msg.contains("cannot create")
1453                || msg.contains("permission")
1454                || msg.contains("read-only"),
1455            "expected permission-related error, got: {msg}"
1456        );
1457        Ok(())
1458    }
1459
1460    #[test]
1461    fn test_read_message_from_deleted_file_errors()
1462    -> std::result::Result<(), Box<dyn std::error::Error>> {
1463        // TensogramFile is path-backed: each read reopens the file. If the
1464        // underlying path disappears out from under an open handle, the
1465        // next read_message call must surface a typed I/O error rather
1466        // than panic.
1467        let dir = tempfile::tempdir()?;
1468        let path = dir.path().join("deleted.tgm");
1469
1470        // Phase 1 — scope a write handle and let it flush on drop so the
1471        // file exists on disk before we delete it.
1472        {
1473            let mut writer = TensogramFile::create(&path)?;
1474            let meta = make_global_meta();
1475            let desc = make_descriptor(vec![2]);
1476            writer.append(
1477                &meta,
1478                &[(&desc, vec![0u8; 8].as_slice())],
1479                &EncodeOptions::default(),
1480            )?;
1481        }
1482
1483        // Phase 2 — open a read handle, cache the message offsets, then
1484        // delete the underlying file. The handle is still alive; the next
1485        // disk-backed operation should fail.
1486        let reader = TensogramFile::open(&path)?;
1487        assert_eq!(reader.message_count()?, 1);
1488
1489        std::fs::remove_file(&path)?;
1490
1491        // read_message must return an Io error because the path is gone.
1492        let read_result = reader.read_message(0);
1493        assert!(
1494            read_result.is_err(),
1495            "expected read_message to fail after underlying file was deleted, got Ok"
1496        );
1497        let err_msg = read_result.unwrap_err().to_string();
1498        assert!(
1499            err_msg.contains("not found")
1500                || err_msg.contains("No such")
1501                || err_msg.contains("cannot"),
1502            "expected I/O error mentioning missing file, got: {err_msg}"
1503        );
1504
1505        // Reopening the deleted path must also fail.
1506        assert!(TensogramFile::open(&path).is_err());
1507        Ok(())
1508    }
1509
1510    /// Appending a message with zero descriptors produces a valid
1511    /// header-only message (preamble + metadata + postamble). Exercises
1512    /// the edge case in `encode_inner` where the data-objects loop runs
1513    /// zero times.
1514    #[test]
1515    fn test_append_empty_message() -> std::result::Result<(), Box<dyn std::error::Error>> {
1516        let dir = tempfile::tempdir()?;
1517        let path = dir.path().join("empty_append.tgm");
1518        let mut file = TensogramFile::create(&path)?;
1519        let meta = make_global_meta();
1520        file.append(&meta, &[], &EncodeOptions::default())?;
1521        assert_eq!(file.message_count()?, 1);
1522        // The one message has zero objects.
1523        let (decoded_meta, objects) = file.decode_message(0, &DecodeOptions::default())?;
1524        assert_eq!(decoded_meta.version, 2);
1525        assert_eq!(objects.len(), 0);
1526        Ok(())
1527    }
1528
1529    #[test]
1530    fn test_file_iter_after_modification() -> std::result::Result<(), Box<dyn std::error::Error>> {
1531        let dir = tempfile::tempdir()?;
1532        let path = dir.path().join("modified.tgm");
1533        let mut file = TensogramFile::create(&path)?;
1534        let meta = make_global_meta();
1535        let desc = make_descriptor(vec![2]);
1536        let data = vec![0u8; 8];
1537        for _ in 0..3 {
1538            file.append(
1539                &meta,
1540                &[(&desc, data.as_slice())],
1541                &EncodeOptions::default(),
1542            )?;
1543        }
1544        assert_eq!(file.message_count()?, 3);
1545        drop(file);
1546
1547        // Reopen and verify count persists
1548        let reopened = TensogramFile::open(&path)?;
1549        assert_eq!(reopened.message_count()?, 3);
1550        Ok(())
1551    }
1552
1553    #[test]
1554    fn test_decode_message_out_of_range_clearly_errors()
1555    -> std::result::Result<(), Box<dyn std::error::Error>> {
1556        let dir = tempfile::tempdir()?;
1557        let path = dir.path().join("oor.tgm");
1558        let mut file = TensogramFile::create(&path)?;
1559        let meta = make_global_meta();
1560        let desc = make_descriptor(vec![2]);
1561        file.append(
1562            &meta,
1563            &[(&desc, vec![0u8; 8].as_slice())],
1564            &EncodeOptions::default(),
1565        )?;
1566        let result = file.decode_message(99, &DecodeOptions::default());
1567        assert!(result.is_err());
1568        Ok(())
1569    }
1570
1571    // ── invalidate_offsets ────────────────────────────────────────────────
1572
1573    #[test]
1574    fn test_invalidate_offsets_forces_rescan() -> std::result::Result<(), Box<dyn std::error::Error>>
1575    {
1576        let dir = tempfile::tempdir()?;
1577        let path = dir.path().join("invalidate.tgm");
1578
1579        let mut file = TensogramFile::create(&path)?;
1580        let meta = make_global_meta();
1581        let desc = make_descriptor(vec![4]);
1582        let data = vec![0u8; 16];
1583        file.append(
1584            &meta,
1585            &[(&desc, data.as_slice())],
1586            &EncodeOptions::default(),
1587        )?;
1588
1589        assert_eq!(file.message_count()?, 1);
1590
1591        // Append another message
1592        file.append(
1593            &meta,
1594            &[(&desc, data.as_slice())],
1595            &EncodeOptions::default(),
1596        )?;
1597        // After append, offsets are invalidated internally; verify rescan
1598        assert_eq!(file.message_count()?, 2);
1599        Ok(())
1600    }
1601
1602    // ── open_source dispatches to local backend ──────────────────────────
1603
1604    #[test]
1605    fn test_open_source_local_path_source_and_path_accessors()
1606    -> std::result::Result<(), Box<dyn std::error::Error>> {
1607        let dir = tempfile::tempdir()?;
1608        let path = dir.path().join("open_src_acc.tgm");
1609
1610        // Create a file with one message
1611        {
1612            let mut file = TensogramFile::create(&path)?;
1613            let meta = make_global_meta();
1614            let desc = make_descriptor(vec![4]);
1615            let data = vec![7u8; 16];
1616            file.append(
1617                &meta,
1618                &[(&desc, data.as_slice())],
1619                &EncodeOptions::default(),
1620            )?;
1621        }
1622
1623        let path_str = path.to_str().unwrap();
1624        let opened = TensogramFile::open_source(path_str)?;
1625
1626        // source() should include the filename
1627        assert!(
1628            opened.source().contains("open_src_acc.tgm"),
1629            "source() = {}",
1630            opened.source()
1631        );
1632        // path() should return the exact path for local files
1633        assert_eq!(opened.path().unwrap(), path.as_path());
1634        // is_remote() is false for local files
1635        assert!(!opened.is_remote());
1636        // Can read the message back
1637        let count = opened.message_count()?;
1638        assert_eq!(count, 1);
1639        Ok(())
1640    }
1641
1642    // ── open nonexistent via open_source ──────────────────────────────────
1643
1644    #[test]
1645    fn test_open_source_nonexistent() {
1646        let result = TensogramFile::open_source("/tmp/no_such_file_tensogram_abc.tgm");
1647        match result {
1648            Ok(_) => panic!("expected error for nonexistent file"),
1649            Err(e) => {
1650                let msg = e.to_string();
1651                assert!(
1652                    msg.contains("not found"),
1653                    "expected 'not found', got: {msg}"
1654                );
1655            }
1656        }
1657    }
1658
1659    // ── message index out of range (sync) ────────────────────────────────
1660
1661    #[test]
1662    fn test_read_message_index_out_of_range() -> std::result::Result<(), Box<dyn std::error::Error>>
1663    {
1664        let dir = tempfile::tempdir()?;
1665        let path = dir.path().join("oor.tgm");
1666
1667        let mut file = TensogramFile::create(&path)?;
1668        let meta = make_global_meta();
1669        let desc = make_descriptor(vec![4]);
1670        let data = vec![0u8; 16];
1671        file.append(
1672            &meta,
1673            &[(&desc, data.as_slice())],
1674            &EncodeOptions::default(),
1675        )?;
1676
1677        // Index 0 works
1678        assert!(file.read_message(0).is_ok());
1679
1680        // Index 1 is out of range
1681        let result = file.read_message(1);
1682        assert!(result.is_err());
1683        let msg = result.unwrap_err().to_string();
1684        assert!(
1685            msg.contains("out of range"),
1686            "expected 'out of range', got: {msg}"
1687        );
1688
1689        // Large index
1690        let result = file.read_message(999);
1691        assert!(result.is_err());
1692        Ok(())
1693    }
1694
1695    // ── decode_range valid extraction ─────────────────────────────────────
1696
1697    #[test]
1698    fn test_local_decode_range_full_tensor() -> std::result::Result<(), Box<dyn std::error::Error>>
1699    {
1700        let dir = tempfile::tempdir()?;
1701        let path = dir.path().join("range_full.tgm");
1702
1703        let mut file = TensogramFile::create(&path)?;
1704        let meta = make_global_meta();
1705        let desc = make_descriptor(vec![8]);
1706        let data: Vec<u8> = (0..32).collect(); // 8 float32s = 32 bytes
1707        file.append(
1708            &meta,
1709            &[(&desc, data.as_slice())],
1710            &EncodeOptions::default(),
1711        )?;
1712
1713        // Decode the entire range
1714        let (ret_desc, parts) = file.decode_range(0, 0, &[(0, 8)], &DecodeOptions::default())?;
1715        assert_eq!(ret_desc.shape, vec![8]);
1716        assert_eq!(parts.len(), 1);
1717        assert_eq!(parts[0].len(), 32); // all 8 float32s
1718        assert_eq!(parts[0], data);
1719        Ok(())
1720    }
1721
1722    // ── decode_object on local file ──────────────────────────────────────
1723
1724    #[test]
1725    fn test_local_decode_object_valid() -> std::result::Result<(), Box<dyn std::error::Error>> {
1726        let dir = tempfile::tempdir()?;
1727        let path = dir.path().join("dec_obj.tgm");
1728
1729        let mut file = TensogramFile::create(&path)?;
1730        let meta = make_global_meta();
1731        let desc = make_descriptor(vec![4]);
1732        let data = vec![99u8; 16];
1733        file.append(
1734            &meta,
1735            &[(&desc, data.as_slice())],
1736            &EncodeOptions::default(),
1737        )?;
1738
1739        let (decoded_meta, decoded_desc, decoded_data) =
1740            file.decode_object(0, 0, &DecodeOptions::default())?;
1741        assert_eq!(decoded_meta.version, 2);
1742        assert_eq!(decoded_desc.shape, vec![4]);
1743        assert_eq!(decoded_data, data);
1744        Ok(())
1745    }
1746
1747    #[test]
1748    fn test_local_decode_object_out_of_range() -> std::result::Result<(), Box<dyn std::error::Error>>
1749    {
1750        let dir = tempfile::tempdir()?;
1751        let path = dir.path().join("dec_obj_oor.tgm");
1752
1753        let mut file = TensogramFile::create(&path)?;
1754        let meta = make_global_meta();
1755        let desc = make_descriptor(vec![4]);
1756        let data = vec![0u8; 16];
1757        file.append(
1758            &meta,
1759            &[(&desc, data.as_slice())],
1760            &EncodeOptions::default(),
1761        )?;
1762
1763        // Object index 5 doesn't exist (only object 0)
1764        let result = file.decode_object(0, 5, &DecodeOptions::default());
1765        assert!(result.is_err());
1766        let msg = result.unwrap_err().to_string();
1767        assert!(
1768            msg.contains("out of range"),
1769            "expected 'out of range', got: {msg}"
1770        );
1771        Ok(())
1772    }
1773
1774    // ── Backend::Local — is_remote() returns false ────────────────────────
1775
1776    #[test]
1777    fn test_is_remote_false_for_local() -> std::result::Result<(), Box<dyn std::error::Error>> {
1778        let dir = tempfile::tempdir()?;
1779        let path = dir.path().join("not_remote.tgm");
1780        let file = TensogramFile::create(&path)?;
1781        assert!(!file.is_remote());
1782        Ok(())
1783    }
1784
1785    #[test]
1786    fn test_is_remote_false_for_opened_local() -> std::result::Result<(), Box<dyn std::error::Error>>
1787    {
1788        let dir = tempfile::tempdir()?;
1789        let path = dir.path().join("not_remote2.tgm");
1790        {
1791            let mut f = TensogramFile::create(&path)?;
1792            let meta = make_global_meta();
1793            let desc = make_descriptor(vec![2]);
1794            f.append(&meta, &[(&desc, &[0u8; 8])], &EncodeOptions::default())?;
1795        }
1796        let opened = TensogramFile::open(&path)?;
1797        assert!(!opened.is_remote());
1798        Ok(())
1799    }
1800
1801    // ── decode_descriptors via file ──────────────────────────────────────
1802
1803    #[test]
1804    fn test_file_decode_descriptors_msg_out_of_range()
1805    -> std::result::Result<(), Box<dyn std::error::Error>> {
1806        let dir = tempfile::tempdir()?;
1807        let path = dir.path().join("desc_oor.tgm");
1808
1809        let mut file = TensogramFile::create(&path)?;
1810        let meta = make_global_meta();
1811        let desc = make_descriptor(vec![4]);
1812        let data = vec![0u8; 16];
1813        file.append(
1814            &meta,
1815            &[(&desc, data.as_slice())],
1816            &EncodeOptions::default(),
1817        )?;
1818
1819        // Message index 10 is out of range
1820        let result = file.decode_descriptors(10);
1821        assert!(result.is_err());
1822        Ok(())
1823    }
1824
1825    // ── decode_metadata via file with out-of-range index ─────────────────
1826
1827    #[test]
1828    fn test_file_decode_metadata_out_of_range()
1829    -> std::result::Result<(), Box<dyn std::error::Error>> {
1830        let dir = tempfile::tempdir()?;
1831        let path = dir.path().join("meta_oor.tgm");
1832
1833        let mut file = TensogramFile::create(&path)?;
1834        let meta = make_global_meta();
1835        let desc = make_descriptor(vec![4]);
1836        let data = vec![0u8; 16];
1837        file.append(
1838            &meta,
1839            &[(&desc, data.as_slice())],
1840            &EncodeOptions::default(),
1841        )?;
1842
1843        let result = file.decode_metadata(99);
1844        assert!(result.is_err());
1845        Ok(())
1846    }
1847
1848    // ── invalidate_offsets explicit ───────────────────────────────────────
1849
1850    #[test]
1851    fn test_invalidate_offsets_explicit() -> std::result::Result<(), Box<dyn std::error::Error>> {
1852        let dir = tempfile::tempdir()?;
1853        let path = dir.path().join("inv_explicit.tgm");
1854
1855        let mut file = TensogramFile::create(&path)?;
1856        let meta = make_global_meta();
1857        let desc = make_descriptor(vec![4]);
1858        let data = vec![0u8; 16];
1859        file.append(
1860            &meta,
1861            &[(&desc, data.as_slice())],
1862            &EncodeOptions::default(),
1863        )?;
1864        assert_eq!(file.message_count()?, 1);
1865
1866        // Manually invalidate and verify rescan works
1867        file.invalidate_offsets();
1868        assert_eq!(file.message_count()?, 1);
1869        Ok(())
1870    }
1871
1872    // ── open file then re-open and read ──────────────────────────────────
1873
1874    #[test]
1875    fn test_open_existing_file_and_read() -> std::result::Result<(), Box<dyn std::error::Error>> {
1876        let dir = tempfile::tempdir()?;
1877        let path = dir.path().join("reopen.tgm");
1878
1879        // Create and write
1880        {
1881            let mut file = TensogramFile::create(&path)?;
1882            let meta = make_global_meta();
1883            let desc = make_descriptor(vec![4]);
1884            let data = vec![55u8; 16];
1885            file.append(
1886                &meta,
1887                &[(&desc, data.as_slice())],
1888                &EncodeOptions::default(),
1889            )?;
1890        }
1891
1892        // Re-open with open()
1893        let file = TensogramFile::open(&path)?;
1894        assert_eq!(file.message_count()?, 1);
1895        let (meta, objs) = file.decode_message(0, &DecodeOptions::default())?;
1896        assert_eq!(meta.version, 2);
1897        assert_eq!(objs[0].1, vec![55u8; 16]);
1898        Ok(())
1899    }
1900
1901    // ── decode_range out-of-bounds element range ─────────────────────────
1902
1903    #[test]
1904    fn test_local_decode_range_out_of_bounds_elements()
1905    -> std::result::Result<(), Box<dyn std::error::Error>> {
1906        let dir = tempfile::tempdir()?;
1907        let path = dir.path().join("range_oob.tgm");
1908
1909        let mut file = TensogramFile::create(&path)?;
1910        let meta = make_global_meta();
1911        let desc = make_descriptor(vec![4]); // 4 float32s
1912        let data = vec![0u8; 16];
1913        file.append(
1914            &meta,
1915            &[(&desc, data.as_slice())],
1916            &EncodeOptions::default(),
1917        )?;
1918
1919        // Request range that exceeds the tensor: offset=2, count=10 but only 4 elements
1920        let result = file.decode_range(0, 0, &[(2, 10)], &DecodeOptions::default());
1921        assert!(result.is_err(), "expected error for out-of-bounds range");
1922        Ok(())
1923    }
1924
1925    // ── path() returns None for remote-like scenarios ────────────────────
1926    // (We can only test the local branch here since remote requires feature)
1927
1928    #[test]
1929    fn test_path_some_for_created_file() -> std::result::Result<(), Box<dyn std::error::Error>> {
1930        let dir = tempfile::tempdir()?;
1931        let path = dir.path().join("path_created.tgm");
1932        let file = TensogramFile::create(&path)?;
1933        assert!(file.path().is_some());
1934        assert_eq!(file.path().unwrap(), path.as_path());
1935        Ok(())
1936    }
1937
1938    // ── async decode_range ───────────────────────────────────────────────
1939
1940    #[cfg(feature = "async")]
1941    #[tokio::test]
1942    async fn test_async_decode_range() -> std::result::Result<(), Box<dyn std::error::Error>> {
1943        let dir = tempfile::tempdir()?;
1944        let path = dir.path().join("async_range.tgm");
1945
1946        let mut file = TensogramFile::create(&path)?;
1947        let meta = make_global_meta();
1948        let desc = make_descriptor(vec![10]);
1949        let data: Vec<u8> = (0..40).collect(); // 10 float32s
1950        file.append(
1951            &meta,
1952            &[(&desc, data.as_slice())],
1953            &EncodeOptions::default(),
1954        )?;
1955
1956        let async_file = TensogramFile::open_async(&path).await?;
1957        let (ret_desc, parts) = async_file
1958            .decode_range_async(0, 0, &[(0, 5)], &DecodeOptions::default())
1959            .await?;
1960        assert_eq!(ret_desc.shape, vec![10]);
1961        assert_eq!(parts.len(), 1);
1962        assert_eq!(parts[0].len(), 5 * 4); // 5 float32s = 20 bytes
1963        Ok(())
1964    }
1965
1966    // ── async decode_object ──────────────────────────────────────────────
1967
1968    #[cfg(feature = "async")]
1969    #[tokio::test]
1970    async fn test_async_decode_object_out_of_range()
1971    -> std::result::Result<(), Box<dyn std::error::Error>> {
1972        let dir = tempfile::tempdir()?;
1973        let path = dir.path().join("async_obj_oor.tgm");
1974
1975        let mut file = TensogramFile::create(&path)?;
1976        let meta = make_global_meta();
1977        let desc = make_descriptor(vec![4]);
1978        let data = vec![0u8; 16];
1979        file.append(
1980            &meta,
1981            &[(&desc, data.as_slice())],
1982            &EncodeOptions::default(),
1983        )?;
1984
1985        let async_file = TensogramFile::open_async(&path).await?;
1986        let result = async_file
1987            .decode_object_async(0, 99, &DecodeOptions::default())
1988            .await;
1989        assert!(result.is_err());
1990        let msg = result.unwrap_err().to_string();
1991        assert!(
1992            msg.contains("out of range"),
1993            "expected 'out of range', got: {msg}"
1994        );
1995        Ok(())
1996    }
1997
1998    // ── async message_count_async ────────────────────────────────────────
1999
2000    #[cfg(feature = "async")]
2001    #[tokio::test]
2002    async fn test_async_message_count() -> std::result::Result<(), Box<dyn std::error::Error>> {
2003        let dir = tempfile::tempdir()?;
2004        let path = dir.path().join("async_count.tgm");
2005
2006        let mut file = TensogramFile::create(&path)?;
2007        let meta = make_global_meta();
2008        let desc = make_descriptor(vec![4]);
2009        let data = vec![0u8; 16];
2010        file.append(
2011            &meta,
2012            &[(&desc, data.as_slice())],
2013            &EncodeOptions::default(),
2014        )?;
2015        file.append(
2016            &meta,
2017            &[(&desc, data.as_slice())],
2018            &EncodeOptions::default(),
2019        )?;
2020
2021        let async_file = TensogramFile::open_async(&path).await?;
2022        let count = async_file.message_count_async().await?;
2023        assert_eq!(count, 2);
2024        Ok(())
2025    }
2026
2027    // ── async decode_descriptors ─────────────────────────────────────────
2028
2029    #[cfg(feature = "async")]
2030    #[tokio::test]
2031    async fn test_async_decode_descriptors_out_of_range()
2032    -> std::result::Result<(), Box<dyn std::error::Error>> {
2033        let dir = tempfile::tempdir()?;
2034        let path = dir.path().join("async_descs_oor.tgm");
2035
2036        let mut file = TensogramFile::create(&path)?;
2037        let meta = make_global_meta();
2038        let desc = make_descriptor(vec![4]);
2039        let data = vec![0u8; 16];
2040        file.append(
2041            &meta,
2042            &[(&desc, data.as_slice())],
2043            &EncodeOptions::default(),
2044        )?;
2045
2046        let async_file = TensogramFile::open_async(&path).await?;
2047        let result = async_file.decode_descriptors_async(10).await;
2048        assert!(result.is_err());
2049        Ok(())
2050    }
2051}