Skip to main content

hermes_core/directories/
directory.rs

1//! Async Directory abstraction for IO operations
2//!
3//! Supports network, local filesystem, and in-memory storage.
4//! All reads are async to minimize blocking on network latency.
5
6use async_trait::async_trait;
7use parking_lot::RwLock;
8use std::collections::HashMap;
9use std::io;
10use std::ops::Range;
11use std::path::{Path, PathBuf};
12use std::sync::Arc;
13
14/// Callback type for lazy range reading
15#[cfg(not(target_arch = "wasm32"))]
16pub type RangeReadFn = Arc<
17    dyn Fn(
18            Range<u64>,
19        )
20            -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<OwnedBytes>> + Send>>
21        + Send
22        + Sync,
23>;
24
25#[cfg(target_arch = "wasm32")]
26pub type RangeReadFn = Arc<
27    dyn Fn(
28        Range<u64>,
29    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<OwnedBytes>>>>,
30>;
31
32/// Unified file handle for both inline (mmap/RAM) and lazy (HTTP/filesystem) access.
33///
34/// Replaces the previous `FileSlice`, `LazyFileHandle`, and `LazyFileSlice` types.
35/// - **Inline**: data is available synchronously (mmap, RAM). Sync reads via `read_bytes_range_sync`.
36/// - **Lazy**: data is fetched on-demand via async callback (HTTP, filesystem).
37///
38/// Use `.slice()` to create sub-range views (zero-copy for Inline, offset-adjusted for Lazy).
39#[derive(Clone)]
40pub struct FileHandle {
41    inner: FileHandleInner,
42}
43
44#[derive(Clone)]
45enum FileHandleInner {
46    /// Data available inline — sync reads possible (mmap, RAM)
47    Inline {
48        data: OwnedBytes,
49        offset: u64,
50        len: u64,
51    },
52    /// Data fetched on-demand via async callback (HTTP, filesystem)
53    Lazy {
54        read_fn: RangeReadFn,
55        offset: u64,
56        len: u64,
57    },
58}
59
60impl std::fmt::Debug for FileHandle {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        match &self.inner {
63            FileHandleInner::Inline { len, offset, .. } => f
64                .debug_struct("FileHandle::Inline")
65                .field("offset", offset)
66                .field("len", len)
67                .finish(),
68            FileHandleInner::Lazy { len, offset, .. } => f
69                .debug_struct("FileHandle::Lazy")
70                .field("offset", offset)
71                .field("len", len)
72                .finish(),
73        }
74    }
75}
76
77impl FileHandle {
78    /// Create an inline file handle from owned bytes (mmap, RAM).
79    /// Sync reads are available.
80    pub fn from_bytes(data: OwnedBytes) -> Self {
81        let len = data.len() as u64;
82        Self {
83            inner: FileHandleInner::Inline {
84                data,
85                offset: 0,
86                len,
87            },
88        }
89    }
90
91    /// Create an empty file handle.
92    pub fn empty() -> Self {
93        Self::from_bytes(OwnedBytes::empty())
94    }
95
96    /// Create a lazy file handle from an async range-read callback.
97    /// Only async reads are available.
98    pub fn lazy(len: u64, read_fn: RangeReadFn) -> Self {
99        Self {
100            inner: FileHandleInner::Lazy {
101                read_fn,
102                offset: 0,
103                len,
104            },
105        }
106    }
107
108    /// Total length in bytes.
109    #[inline]
110    pub fn len(&self) -> u64 {
111        match &self.inner {
112            FileHandleInner::Inline { len, .. } => *len,
113            FileHandleInner::Lazy { len, .. } => *len,
114        }
115    }
116
117    /// Check if empty.
118    #[inline]
119    pub fn is_empty(&self) -> bool {
120        self.len() == 0
121    }
122
123    /// Whether synchronous reads are available (inline/mmap data).
124    #[inline]
125    pub fn is_sync(&self) -> bool {
126        matches!(&self.inner, FileHandleInner::Inline { .. })
127    }
128
129    /// Create a sub-range view. Zero-copy for Inline, offset-adjusted for Lazy.
130    pub fn slice(&self, range: Range<u64>) -> Self {
131        match &self.inner {
132            FileHandleInner::Inline { data, offset, len } => {
133                let new_offset = offset + range.start;
134                let new_len = range.end - range.start;
135                debug_assert!(
136                    new_offset + new_len <= offset + len,
137                    "slice out of bounds: {}+{} > {}+{}",
138                    new_offset,
139                    new_len,
140                    offset,
141                    len
142                );
143                Self {
144                    inner: FileHandleInner::Inline {
145                        data: data.clone(),
146                        offset: new_offset,
147                        len: new_len,
148                    },
149                }
150            }
151            FileHandleInner::Lazy {
152                read_fn,
153                offset,
154                len,
155            } => {
156                let new_offset = offset + range.start;
157                let new_len = range.end - range.start;
158                debug_assert!(
159                    new_offset + new_len <= offset + len,
160                    "slice out of bounds: {}+{} > {}+{}",
161                    new_offset,
162                    new_len,
163                    offset,
164                    len
165                );
166                Self {
167                    inner: FileHandleInner::Lazy {
168                        read_fn: Arc::clone(read_fn),
169                        offset: new_offset,
170                        len: new_len,
171                    },
172                }
173            }
174        }
175    }
176
177    /// Async range read — works for both Inline and Lazy.
178    pub async fn read_bytes_range(&self, range: Range<u64>) -> io::Result<OwnedBytes> {
179        match &self.inner {
180            FileHandleInner::Inline { data, offset, len } => {
181                if range.end > *len {
182                    return Err(io::Error::new(
183                        io::ErrorKind::InvalidInput,
184                        format!("Range {:?} out of bounds (len: {})", range, len),
185                    ));
186                }
187                let start = (*offset + range.start) as usize;
188                let end = (*offset + range.end) as usize;
189                Ok(data.slice(start..end))
190            }
191            FileHandleInner::Lazy {
192                read_fn,
193                offset,
194                len,
195            } => {
196                if range.end > *len {
197                    return Err(io::Error::new(
198                        io::ErrorKind::InvalidInput,
199                        format!("Range {:?} out of bounds (len: {})", range, len),
200                    ));
201                }
202                let abs_start = offset + range.start;
203                let abs_end = offset + range.end;
204                (read_fn)(abs_start..abs_end).await
205            }
206        }
207    }
208
209    /// Read all bytes.
210    pub async fn read_bytes(&self) -> io::Result<OwnedBytes> {
211        self.read_bytes_range(0..self.len()).await
212    }
213
214    /// Synchronous range read — only works for Inline handles.
215    /// Returns `Err` if the handle is Lazy.
216    #[inline]
217    pub fn read_bytes_range_sync(&self, range: Range<u64>) -> io::Result<OwnedBytes> {
218        match &self.inner {
219            FileHandleInner::Inline { data, offset, len } => {
220                if range.end > *len {
221                    return Err(io::Error::new(
222                        io::ErrorKind::InvalidInput,
223                        format!("Range {:?} out of bounds (len: {})", range, len),
224                    ));
225                }
226                let start = (*offset + range.start) as usize;
227                let end = (*offset + range.end) as usize;
228                Ok(data.slice(start..end))
229            }
230            FileHandleInner::Lazy { .. } => Err(io::Error::new(
231                io::ErrorKind::Unsupported,
232                "Synchronous read not available on lazy file handle",
233            )),
234        }
235    }
236
237    /// Synchronous read of all bytes — only works for Inline handles.
238    #[inline]
239    pub fn read_bytes_sync(&self) -> io::Result<OwnedBytes> {
240        self.read_bytes_range_sync(0..self.len())
241    }
242}
243
244/// Backing store for OwnedBytes — supports both heap Vec and mmap.
245#[derive(Clone)]
246enum SharedBytes {
247    Vec(Arc<Vec<u8>>),
248    #[cfg(feature = "native")]
249    Mmap(Arc<memmap2::Mmap>),
250}
251
252impl SharedBytes {
253    #[inline]
254    fn as_bytes(&self) -> &[u8] {
255        match self {
256            SharedBytes::Vec(v) => v.as_slice(),
257            #[cfg(feature = "native")]
258            SharedBytes::Mmap(m) => m.as_ref(),
259        }
260    }
261}
262
263impl std::fmt::Debug for SharedBytes {
264    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
265        match self {
266            SharedBytes::Vec(v) => write!(f, "Vec(len={})", v.len()),
267            #[cfg(feature = "native")]
268            SharedBytes::Mmap(m) => write!(f, "Mmap(len={})", m.len()),
269        }
270    }
271}
272
273/// Owned bytes with cheap cloning (Arc-backed)
274///
275/// Supports two backing stores:
276/// - `Vec<u8>` for owned data (RamDirectory, FsDirectory, decompressed blocks)
277/// - `Mmap` for zero-copy memory-mapped files (MmapDirectory, native only)
278#[derive(Debug, Clone)]
279pub struct OwnedBytes {
280    data: SharedBytes,
281    range: Range<usize>,
282}
283
284impl OwnedBytes {
285    pub fn new(data: Vec<u8>) -> Self {
286        let len = data.len();
287        Self {
288            data: SharedBytes::Vec(Arc::new(data)),
289            range: 0..len,
290        }
291    }
292
293    pub fn empty() -> Self {
294        Self {
295            data: SharedBytes::Vec(Arc::new(Vec::new())),
296            range: 0..0,
297        }
298    }
299
300    /// Create from a pre-existing Arc<Vec<u8>> with a sub-range.
301    /// Used by RamDirectory and CachingDirectory to share data without copying.
302    pub(crate) fn from_arc_vec(data: Arc<Vec<u8>>, range: Range<usize>) -> Self {
303        Self {
304            data: SharedBytes::Vec(data),
305            range,
306        }
307    }
308
309    /// Create from a memory-mapped file (zero-copy).
310    #[cfg(feature = "native")]
311    pub(crate) fn from_mmap(mmap: Arc<memmap2::Mmap>) -> Self {
312        let len = mmap.len();
313        Self {
314            data: SharedBytes::Mmap(mmap),
315            range: 0..len,
316        }
317    }
318
319    /// Create from a memory-mapped file with a sub-range (zero-copy).
320    #[cfg(feature = "native")]
321    pub(crate) fn from_mmap_range(mmap: Arc<memmap2::Mmap>, range: Range<usize>) -> Self {
322        Self {
323            data: SharedBytes::Mmap(mmap),
324            range,
325        }
326    }
327
328    pub fn len(&self) -> usize {
329        self.range.len()
330    }
331
332    pub fn is_empty(&self) -> bool {
333        self.range.is_empty()
334    }
335
336    pub fn slice(&self, range: Range<usize>) -> Self {
337        let start = self.range.start + range.start;
338        let end = self.range.start + range.end;
339        Self {
340            data: self.data.clone(),
341            range: start..end,
342        }
343    }
344
345    pub fn as_slice(&self) -> &[u8] {
346        &self.data.as_bytes()[self.range.clone()]
347    }
348
349    /// Returns `true` if the backing store is a memory-mapped file.
350    ///
351    /// Used to guard `madvise` calls: `MADV_DONTNEED` on heap memory
352    /// zeroes pages on Linux and corrupts allocator metadata.
353    #[cfg(feature = "native")]
354    #[inline]
355    pub fn is_mmap(&self) -> bool {
356        matches!(self.data, SharedBytes::Mmap(_))
357    }
358
359    pub fn to_vec(&self) -> Vec<u8> {
360        self.as_slice().to_vec()
361    }
362}
363
364impl AsRef<[u8]> for OwnedBytes {
365    fn as_ref(&self) -> &[u8] {
366        self.as_slice()
367    }
368}
369
370impl std::ops::Deref for OwnedBytes {
371    type Target = [u8];
372
373    fn deref(&self) -> &Self::Target {
374        self.as_slice()
375    }
376}
377
378/// Async directory trait for reading index files
379#[cfg(not(target_arch = "wasm32"))]
380#[async_trait]
381pub trait Directory: Send + Sync + 'static {
382    /// Check if a file exists
383    async fn exists(&self, path: &Path) -> io::Result<bool>;
384
385    /// Get file size
386    async fn file_size(&self, path: &Path) -> io::Result<u64>;
387
388    /// Open a file for reading (loads entire file into an inline FileHandle)
389    async fn open_read(&self, path: &Path) -> io::Result<FileHandle>;
390
391    /// Read a specific byte range from a file (optimized for network)
392    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes>;
393
394    /// List files in directory
395    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>>;
396
397    /// Open a file handle that fetches ranges on demand.
398    /// For mmap directories this returns an Inline handle (sync-capable).
399    /// For HTTP/filesystem directories this returns a Lazy handle.
400    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle>;
401}
402
403/// Async directory trait for reading index files (WASM version - no Send requirement)
404#[cfg(target_arch = "wasm32")]
405#[async_trait(?Send)]
406pub trait Directory: 'static {
407    /// Check if a file exists
408    async fn exists(&self, path: &Path) -> io::Result<bool>;
409
410    /// Get file size
411    async fn file_size(&self, path: &Path) -> io::Result<u64>;
412
413    /// Open a file for reading (loads entire file into an inline FileHandle)
414    async fn open_read(&self, path: &Path) -> io::Result<FileHandle>;
415
416    /// Read a specific byte range from a file (optimized for network)
417    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes>;
418
419    /// List files in directory
420    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>>;
421
422    /// Open a file handle that fetches ranges on demand.
423    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle>;
424}
425
426/// A writer for incrementally writing data to a directory file.
427///
428/// Avoids buffering entire files in memory during merge. File-backed
429/// directories write directly to disk; memory directories collect to Vec.
430pub trait StreamingWriter: io::Write + Send {
431    /// Finalize the write, making data available for reading.
432    fn finish(self: Box<Self>) -> io::Result<()>;
433
434    /// Bytes written so far.
435    fn bytes_written(&self) -> u64;
436}
437
438/// StreamingWriter backed by Vec<u8>, finalized via DirectoryWriter::write.
439/// Used as default/fallback and for RamDirectory.
440struct BufferedStreamingWriter {
441    path: PathBuf,
442    buffer: Vec<u8>,
443    /// Callback to write the buffer to the directory on finish.
444    /// We store the files Arc directly for RamDirectory.
445    files: Arc<RwLock<HashMap<PathBuf, Arc<Vec<u8>>>>>,
446}
447
448impl io::Write for BufferedStreamingWriter {
449    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
450        self.buffer.extend_from_slice(buf);
451        Ok(buf.len())
452    }
453
454    fn flush(&mut self) -> io::Result<()> {
455        Ok(())
456    }
457}
458
459impl StreamingWriter for BufferedStreamingWriter {
460    fn finish(self: Box<Self>) -> io::Result<()> {
461        self.files.write().insert(self.path, Arc::new(self.buffer));
462        Ok(())
463    }
464
465    fn bytes_written(&self) -> u64 {
466        self.buffer.len() as u64
467    }
468}
469
470/// Buffer size for FileStreamingWriter (8 MB).
471/// Large enough to coalesce millions of tiny writes (e.g. per-vector doc_id writes)
472/// into efficient sequential I/O.
473#[cfg(feature = "native")]
474const FILE_STREAMING_BUF_SIZE: usize = 8 * 1024 * 1024;
475
476/// StreamingWriter backed by a buffered std::fs::File for filesystem directories.
477#[cfg(feature = "native")]
478pub(crate) struct FileStreamingWriter {
479    pub(crate) file: io::BufWriter<std::fs::File>,
480    pub(crate) written: u64,
481}
482
483#[cfg(feature = "native")]
484impl FileStreamingWriter {
485    pub(crate) fn new(file: std::fs::File) -> Self {
486        Self {
487            file: io::BufWriter::with_capacity(FILE_STREAMING_BUF_SIZE, file),
488            written: 0,
489        }
490    }
491}
492
493#[cfg(feature = "native")]
494impl io::Write for FileStreamingWriter {
495    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
496        let n = self.file.write(buf)?;
497        self.written += n as u64;
498        Ok(n)
499    }
500
501    fn flush(&mut self) -> io::Result<()> {
502        self.file.flush()
503    }
504}
505
506#[cfg(feature = "native")]
507impl StreamingWriter for FileStreamingWriter {
508    fn finish(self: Box<Self>) -> io::Result<()> {
509        let file = self.file.into_inner().map_err(|e| e.into_error())?;
510        file.sync_all()?;
511        Ok(())
512    }
513
514    fn bytes_written(&self) -> u64 {
515        self.written
516    }
517}
518
519/// Async directory trait for writing index files
520#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
521#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
522pub trait DirectoryWriter: Directory {
523    /// Create/overwrite a file with data
524    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()>;
525
526    /// Delete a file
527    async fn delete(&self, path: &Path) -> io::Result<()>;
528
529    /// Atomic rename
530    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()>;
531
532    /// Sync all pending writes
533    async fn sync(&self) -> io::Result<()>;
534
535    /// Create a streaming writer for incremental file writes.
536    /// Call finish() on the returned writer to finalize.
537    async fn streaming_writer(&self, path: &Path) -> io::Result<Box<dyn StreamingWriter>>;
538}
539
540/// In-memory directory for testing and small indexes
541#[derive(Debug, Default)]
542pub struct RamDirectory {
543    files: Arc<RwLock<HashMap<PathBuf, Arc<Vec<u8>>>>>,
544}
545
546impl Clone for RamDirectory {
547    fn clone(&self) -> Self {
548        Self {
549            files: Arc::clone(&self.files),
550        }
551    }
552}
553
554impl RamDirectory {
555    pub fn new() -> Self {
556        Self::default()
557    }
558}
559
560#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
561#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
562impl Directory for RamDirectory {
563    async fn exists(&self, path: &Path) -> io::Result<bool> {
564        Ok(self.files.read().contains_key(path))
565    }
566
567    async fn file_size(&self, path: &Path) -> io::Result<u64> {
568        self.files
569            .read()
570            .get(path)
571            .map(|data| data.len() as u64)
572            .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "File not found"))
573    }
574
575    async fn open_read(&self, path: &Path) -> io::Result<FileHandle> {
576        let files = self.files.read();
577        let data = files
578            .get(path)
579            .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "File not found"))?;
580
581        Ok(FileHandle::from_bytes(OwnedBytes::from_arc_vec(
582            Arc::clone(data),
583            0..data.len(),
584        )))
585    }
586
587    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes> {
588        let files = self.files.read();
589        let data = files
590            .get(path)
591            .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "File not found"))?;
592
593        let start = range.start as usize;
594        let end = range.end as usize;
595
596        if end > data.len() {
597            return Err(io::Error::new(
598                io::ErrorKind::InvalidInput,
599                "Range out of bounds",
600            ));
601        }
602
603        Ok(OwnedBytes::from_arc_vec(Arc::clone(data), start..end))
604    }
605
606    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>> {
607        let files = self.files.read();
608        Ok(files
609            .keys()
610            .filter(|p| p.starts_with(prefix))
611            .cloned()
612            .collect())
613    }
614
615    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle> {
616        // RAM data is always available synchronously — return Inline handle
617        self.open_read(path).await
618    }
619}
620
621#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
622#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
623impl DirectoryWriter for RamDirectory {
624    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
625        self.files
626            .write()
627            .insert(path.to_path_buf(), Arc::new(data.to_vec()));
628        Ok(())
629    }
630
631    async fn delete(&self, path: &Path) -> io::Result<()> {
632        self.files.write().remove(path);
633        Ok(())
634    }
635
636    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
637        let mut files = self.files.write();
638        if let Some(data) = files.remove(from) {
639            files.insert(to.to_path_buf(), data);
640        }
641        Ok(())
642    }
643
644    async fn sync(&self) -> io::Result<()> {
645        Ok(())
646    }
647
648    async fn streaming_writer(&self, path: &Path) -> io::Result<Box<dyn StreamingWriter>> {
649        Ok(Box::new(BufferedStreamingWriter {
650            path: path.to_path_buf(),
651            buffer: Vec::new(),
652            files: Arc::clone(&self.files),
653        }))
654    }
655}
656
657/// Local filesystem directory with async IO via tokio
658#[cfg(feature = "native")]
659#[derive(Debug, Clone)]
660pub struct FsDirectory {
661    root: PathBuf,
662}
663
664#[cfg(feature = "native")]
665impl FsDirectory {
666    pub fn new(root: impl AsRef<Path>) -> Self {
667        Self {
668            root: root.as_ref().to_path_buf(),
669        }
670    }
671
672    fn resolve(&self, path: &Path) -> PathBuf {
673        self.root.join(path)
674    }
675}
676
677#[cfg(feature = "native")]
678#[async_trait]
679impl Directory for FsDirectory {
680    async fn exists(&self, path: &Path) -> io::Result<bool> {
681        let full_path = self.resolve(path);
682        Ok(tokio::fs::try_exists(&full_path).await.unwrap_or(false))
683    }
684
685    async fn file_size(&self, path: &Path) -> io::Result<u64> {
686        let full_path = self.resolve(path);
687        let metadata = tokio::fs::metadata(&full_path).await?;
688        Ok(metadata.len())
689    }
690
691    async fn open_read(&self, path: &Path) -> io::Result<FileHandle> {
692        let full_path = self.resolve(path);
693        let data = tokio::fs::read(&full_path).await?;
694        Ok(FileHandle::from_bytes(OwnedBytes::new(data)))
695    }
696
697    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes> {
698        use tokio::io::{AsyncReadExt, AsyncSeekExt};
699
700        let full_path = self.resolve(path);
701        let mut file = tokio::fs::File::open(&full_path).await?;
702
703        file.seek(std::io::SeekFrom::Start(range.start)).await?;
704
705        let len = (range.end - range.start) as usize;
706        let mut buffer = vec![0u8; len];
707        file.read_exact(&mut buffer).await?;
708
709        Ok(OwnedBytes::new(buffer))
710    }
711
712    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>> {
713        let full_path = self.resolve(prefix);
714        let mut entries = tokio::fs::read_dir(&full_path).await?;
715        let mut files = Vec::new();
716
717        while let Some(entry) = entries.next_entry().await? {
718            if entry.file_type().await?.is_file() {
719                files.push(entry.path().strip_prefix(&self.root).unwrap().to_path_buf());
720            }
721        }
722
723        Ok(files)
724    }
725
726    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle> {
727        let full_path = self.resolve(path);
728        let metadata = tokio::fs::metadata(&full_path).await?;
729        let file_size = metadata.len();
730
731        let read_fn: RangeReadFn = Arc::new(move |range: Range<u64>| {
732            let full_path = full_path.clone();
733            Box::pin(async move {
734                use tokio::io::{AsyncReadExt, AsyncSeekExt};
735
736                let mut file = tokio::fs::File::open(&full_path).await?;
737                file.seek(std::io::SeekFrom::Start(range.start)).await?;
738
739                let len = (range.end - range.start) as usize;
740                let mut buffer = vec![0u8; len];
741                file.read_exact(&mut buffer).await?;
742
743                Ok(OwnedBytes::new(buffer))
744            })
745        });
746
747        Ok(FileHandle::lazy(file_size, read_fn))
748    }
749}
750
751#[cfg(feature = "native")]
752#[async_trait]
753impl DirectoryWriter for FsDirectory {
754    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
755        let full_path = self.resolve(path);
756
757        // Ensure parent directory exists
758        if let Some(parent) = full_path.parent() {
759            tokio::fs::create_dir_all(parent).await?;
760        }
761
762        tokio::fs::write(&full_path, data).await
763    }
764
765    async fn delete(&self, path: &Path) -> io::Result<()> {
766        let full_path = self.resolve(path);
767        tokio::fs::remove_file(&full_path).await
768    }
769
770    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
771        let from_path = self.resolve(from);
772        let to_path = self.resolve(to);
773        tokio::fs::rename(&from_path, &to_path).await
774    }
775
776    async fn sync(&self) -> io::Result<()> {
777        // fsync the directory
778        let dir = std::fs::File::open(&self.root)?;
779        dir.sync_all()?;
780        Ok(())
781    }
782
783    async fn streaming_writer(&self, path: &Path) -> io::Result<Box<dyn StreamingWriter>> {
784        let full_path = self.resolve(path);
785        if let Some(parent) = full_path.parent() {
786            tokio::fs::create_dir_all(parent).await?;
787        }
788        let file = std::fs::File::create(&full_path)?;
789        Ok(Box::new(FileStreamingWriter::new(file)))
790    }
791}
792
793/// Caching wrapper for any Directory - caches file reads
794pub struct CachingDirectory<D: Directory> {
795    inner: D,
796    cache: RwLock<HashMap<PathBuf, Arc<Vec<u8>>>>,
797    max_cached_bytes: usize,
798    current_bytes: RwLock<usize>,
799}
800
801impl<D: Directory> CachingDirectory<D> {
802    pub fn new(inner: D, max_cached_bytes: usize) -> Self {
803        Self {
804            inner,
805            cache: RwLock::new(HashMap::new()),
806            max_cached_bytes,
807            current_bytes: RwLock::new(0),
808        }
809    }
810
811    fn try_cache(&self, path: &Path, data: &[u8]) {
812        let mut current = self.current_bytes.write();
813        if *current + data.len() <= self.max_cached_bytes {
814            self.cache
815                .write()
816                .insert(path.to_path_buf(), Arc::new(data.to_vec()));
817            *current += data.len();
818        }
819    }
820}
821
822#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
823#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
824impl<D: Directory> Directory for CachingDirectory<D> {
825    async fn exists(&self, path: &Path) -> io::Result<bool> {
826        if self.cache.read().contains_key(path) {
827            return Ok(true);
828        }
829        self.inner.exists(path).await
830    }
831
832    async fn file_size(&self, path: &Path) -> io::Result<u64> {
833        if let Some(data) = self.cache.read().get(path) {
834            return Ok(data.len() as u64);
835        }
836        self.inner.file_size(path).await
837    }
838
839    async fn open_read(&self, path: &Path) -> io::Result<FileHandle> {
840        // Check cache first
841        if let Some(data) = self.cache.read().get(path) {
842            return Ok(FileHandle::from_bytes(OwnedBytes::from_arc_vec(
843                Arc::clone(data),
844                0..data.len(),
845            )));
846        }
847
848        // Read from inner and potentially cache
849        let handle = self.inner.open_read(path).await?;
850        let bytes = handle.read_bytes().await?;
851
852        self.try_cache(path, bytes.as_slice());
853
854        Ok(FileHandle::from_bytes(bytes))
855    }
856
857    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes> {
858        // Check cache first
859        if let Some(data) = self.cache.read().get(path) {
860            let start = range.start as usize;
861            let end = range.end as usize;
862            return Ok(OwnedBytes::from_arc_vec(Arc::clone(data), start..end));
863        }
864
865        self.inner.read_range(path, range).await
866    }
867
868    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>> {
869        self.inner.list_files(prefix).await
870    }
871
872    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle> {
873        // For caching directory, delegate to inner - caching happens at read_range level
874        self.inner.open_lazy(path).await
875    }
876}
877
878#[cfg(test)]
879mod tests {
880    use super::*;
881
882    #[tokio::test]
883    async fn test_ram_directory() {
884        let dir = RamDirectory::new();
885
886        // Write file
887        dir.write(Path::new("test.txt"), b"hello world")
888            .await
889            .unwrap();
890
891        // Check exists
892        assert!(dir.exists(Path::new("test.txt")).await.unwrap());
893        assert!(!dir.exists(Path::new("nonexistent.txt")).await.unwrap());
894
895        // Read file
896        let slice = dir.open_read(Path::new("test.txt")).await.unwrap();
897        let data = slice.read_bytes().await.unwrap();
898        assert_eq!(data.as_slice(), b"hello world");
899
900        // Read range
901        let range_data = dir.read_range(Path::new("test.txt"), 0..5).await.unwrap();
902        assert_eq!(range_data.as_slice(), b"hello");
903
904        // Delete
905        dir.delete(Path::new("test.txt")).await.unwrap();
906        assert!(!dir.exists(Path::new("test.txt")).await.unwrap());
907    }
908
909    #[tokio::test]
910    async fn test_file_handle() {
911        let data = OwnedBytes::new(b"hello world".to_vec());
912        let handle = FileHandle::from_bytes(data);
913
914        assert_eq!(handle.len(), 11);
915        assert!(handle.is_sync());
916
917        let sub = handle.slice(0..5);
918        let bytes = sub.read_bytes().await.unwrap();
919        assert_eq!(bytes.as_slice(), b"hello");
920
921        let sub2 = handle.slice(6..11);
922        let bytes2 = sub2.read_bytes().await.unwrap();
923        assert_eq!(bytes2.as_slice(), b"world");
924
925        // Sync reads work on inline handles
926        let sync_bytes = handle.read_bytes_range_sync(0..5).unwrap();
927        assert_eq!(sync_bytes.as_slice(), b"hello");
928    }
929
930    #[tokio::test]
931    async fn test_owned_bytes() {
932        let bytes = OwnedBytes::new(vec![1, 2, 3, 4, 5]);
933
934        assert_eq!(bytes.len(), 5);
935        assert_eq!(bytes.as_slice(), &[1, 2, 3, 4, 5]);
936
937        let sliced = bytes.slice(1..4);
938        assert_eq!(sliced.as_slice(), &[2, 3, 4]);
939
940        // Original unchanged
941        assert_eq!(bytes.as_slice(), &[1, 2, 3, 4, 5]);
942    }
943}