Skip to main content

hermes_core/directories/
directory.rs

1//! Async Directory abstraction for IO operations
2//!
3//! Supports network, local filesystem, and in-memory storage.
4//! All reads are async to minimize blocking on network latency.
5
6use async_trait::async_trait;
7use parking_lot::RwLock;
8use std::collections::HashMap;
9use std::io;
10use std::ops::Range;
11use std::path::{Path, PathBuf};
12use std::sync::Arc;
13
14/// Callback type for lazy range reading
15#[cfg(not(target_arch = "wasm32"))]
16pub type RangeReadFn = Arc<
17    dyn Fn(
18            Range<u64>,
19        )
20            -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<OwnedBytes>> + Send>>
21        + Send
22        + Sync,
23>;
24
25#[cfg(target_arch = "wasm32")]
26pub type RangeReadFn = Arc<
27    dyn Fn(
28        Range<u64>,
29    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<OwnedBytes>>>>,
30>;
31
32/// Unified file handle for both inline (mmap/RAM) and lazy (HTTP/filesystem) access.
33///
34/// Replaces the previous `FileSlice`, `LazyFileHandle`, and `LazyFileSlice` types.
35/// - **Inline**: data is available synchronously (mmap, RAM). Sync reads via `read_bytes_range_sync`.
36/// - **Lazy**: data is fetched on-demand via async callback (HTTP, filesystem).
37///
38/// Use `.slice()` to create sub-range views (zero-copy for Inline, offset-adjusted for Lazy).
39#[derive(Clone)]
40pub struct FileHandle {
41    inner: FileHandleInner,
42}
43
44#[derive(Clone)]
45enum FileHandleInner {
46    /// Data available inline — sync reads possible (mmap, RAM)
47    Inline {
48        data: OwnedBytes,
49        offset: u64,
50        len: u64,
51    },
52    /// Data fetched on-demand via async callback (HTTP, filesystem)
53    Lazy {
54        read_fn: RangeReadFn,
55        offset: u64,
56        len: u64,
57    },
58}
59
60impl std::fmt::Debug for FileHandle {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        match &self.inner {
63            FileHandleInner::Inline { len, offset, .. } => f
64                .debug_struct("FileHandle::Inline")
65                .field("offset", offset)
66                .field("len", len)
67                .finish(),
68            FileHandleInner::Lazy { len, offset, .. } => f
69                .debug_struct("FileHandle::Lazy")
70                .field("offset", offset)
71                .field("len", len)
72                .finish(),
73        }
74    }
75}
76
77impl FileHandle {
78    /// Create an inline file handle from owned bytes (mmap, RAM).
79    /// Sync reads are available.
80    pub fn from_bytes(data: OwnedBytes) -> Self {
81        let len = data.len() as u64;
82        Self {
83            inner: FileHandleInner::Inline {
84                data,
85                offset: 0,
86                len,
87            },
88        }
89    }
90
91    /// Create an empty file handle.
92    pub fn empty() -> Self {
93        Self::from_bytes(OwnedBytes::empty())
94    }
95
96    /// Create a lazy file handle from an async range-read callback.
97    /// Only async reads are available.
98    pub fn lazy(len: u64, read_fn: RangeReadFn) -> Self {
99        Self {
100            inner: FileHandleInner::Lazy {
101                read_fn,
102                offset: 0,
103                len,
104            },
105        }
106    }
107
108    /// Total length in bytes.
109    #[inline]
110    pub fn len(&self) -> u64 {
111        match &self.inner {
112            FileHandleInner::Inline { len, .. } => *len,
113            FileHandleInner::Lazy { len, .. } => *len,
114        }
115    }
116
117    /// Check if empty.
118    #[inline]
119    pub fn is_empty(&self) -> bool {
120        self.len() == 0
121    }
122
123    /// Whether synchronous reads are available (inline/mmap data).
124    #[inline]
125    pub fn is_sync(&self) -> bool {
126        matches!(&self.inner, FileHandleInner::Inline { .. })
127    }
128
129    /// Create a sub-range view. Zero-copy for Inline, offset-adjusted for Lazy.
130    pub fn slice(&self, range: Range<u64>) -> Self {
131        match &self.inner {
132            FileHandleInner::Inline { data, offset, len } => {
133                let new_offset = offset + range.start;
134                let new_len = range.end - range.start;
135                debug_assert!(
136                    new_offset + new_len <= offset + len,
137                    "slice out of bounds: {}+{} > {}+{}",
138                    new_offset,
139                    new_len,
140                    offset,
141                    len
142                );
143                Self {
144                    inner: FileHandleInner::Inline {
145                        data: data.clone(),
146                        offset: new_offset,
147                        len: new_len,
148                    },
149                }
150            }
151            FileHandleInner::Lazy {
152                read_fn,
153                offset,
154                len,
155            } => {
156                let new_offset = offset + range.start;
157                let new_len = range.end - range.start;
158                debug_assert!(
159                    new_offset + new_len <= offset + len,
160                    "slice out of bounds: {}+{} > {}+{}",
161                    new_offset,
162                    new_len,
163                    offset,
164                    len
165                );
166                Self {
167                    inner: FileHandleInner::Lazy {
168                        read_fn: Arc::clone(read_fn),
169                        offset: new_offset,
170                        len: new_len,
171                    },
172                }
173            }
174        }
175    }
176
177    /// Async range read — works for both Inline and Lazy.
178    pub async fn read_bytes_range(&self, range: Range<u64>) -> io::Result<OwnedBytes> {
179        match &self.inner {
180            FileHandleInner::Inline { data, offset, len } => {
181                if range.end > *len {
182                    return Err(io::Error::new(
183                        io::ErrorKind::InvalidInput,
184                        format!("Range {:?} out of bounds (len: {})", range, len),
185                    ));
186                }
187                let start = (*offset + range.start) as usize;
188                let end = (*offset + range.end) as usize;
189                Ok(data.slice(start..end))
190            }
191            FileHandleInner::Lazy {
192                read_fn,
193                offset,
194                len,
195            } => {
196                if range.end > *len {
197                    return Err(io::Error::new(
198                        io::ErrorKind::InvalidInput,
199                        format!("Range {:?} out of bounds (len: {})", range, len),
200                    ));
201                }
202                let abs_start = offset + range.start;
203                let abs_end = offset + range.end;
204                (read_fn)(abs_start..abs_end).await
205            }
206        }
207    }
208
209    /// Read all bytes.
210    pub async fn read_bytes(&self) -> io::Result<OwnedBytes> {
211        self.read_bytes_range(0..self.len()).await
212    }
213
214    /// Synchronous range read — only works for Inline handles.
215    /// Returns `Err` if the handle is Lazy.
216    #[inline]
217    pub fn read_bytes_range_sync(&self, range: Range<u64>) -> io::Result<OwnedBytes> {
218        match &self.inner {
219            FileHandleInner::Inline { data, offset, len } => {
220                if range.end > *len {
221                    return Err(io::Error::new(
222                        io::ErrorKind::InvalidInput,
223                        format!("Range {:?} out of bounds (len: {})", range, len),
224                    ));
225                }
226                let start = (*offset + range.start) as usize;
227                let end = (*offset + range.end) as usize;
228                Ok(data.slice(start..end))
229            }
230            FileHandleInner::Lazy { .. } => Err(io::Error::new(
231                io::ErrorKind::Unsupported,
232                "Synchronous read not available on lazy file handle",
233            )),
234        }
235    }
236
237    /// Synchronous read of all bytes — only works for Inline handles.
238    #[inline]
239    pub fn read_bytes_sync(&self) -> io::Result<OwnedBytes> {
240        self.read_bytes_range_sync(0..self.len())
241    }
242}
243
244/// Backing store for OwnedBytes — supports both heap Vec and mmap.
245#[derive(Clone)]
246enum SharedBytes {
247    Vec(Arc<Vec<u8>>),
248    #[cfg(feature = "native")]
249    Mmap(Arc<memmap2::Mmap>),
250}
251
252impl SharedBytes {
253    #[inline]
254    fn as_bytes(&self) -> &[u8] {
255        match self {
256            SharedBytes::Vec(v) => v.as_slice(),
257            #[cfg(feature = "native")]
258            SharedBytes::Mmap(m) => m.as_ref(),
259        }
260    }
261}
262
263impl std::fmt::Debug for SharedBytes {
264    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
265        match self {
266            SharedBytes::Vec(v) => write!(f, "Vec(len={})", v.len()),
267            #[cfg(feature = "native")]
268            SharedBytes::Mmap(m) => write!(f, "Mmap(len={})", m.len()),
269        }
270    }
271}
272
273/// Owned bytes with cheap cloning (Arc-backed)
274///
275/// Supports two backing stores:
276/// - `Vec<u8>` for owned data (RamDirectory, FsDirectory, decompressed blocks)
277/// - `Mmap` for zero-copy memory-mapped files (MmapDirectory, native only)
278#[derive(Debug, Clone)]
279pub struct OwnedBytes {
280    data: SharedBytes,
281    range: Range<usize>,
282}
283
284impl OwnedBytes {
285    pub fn new(data: Vec<u8>) -> Self {
286        let len = data.len();
287        Self {
288            data: SharedBytes::Vec(Arc::new(data)),
289            range: 0..len,
290        }
291    }
292
293    pub fn empty() -> Self {
294        Self {
295            data: SharedBytes::Vec(Arc::new(Vec::new())),
296            range: 0..0,
297        }
298    }
299
300    /// Create from a pre-existing Arc<Vec<u8>> with a sub-range.
301    /// Used by RamDirectory and CachingDirectory to share data without copying.
302    pub(crate) fn from_arc_vec(data: Arc<Vec<u8>>, range: Range<usize>) -> Self {
303        Self {
304            data: SharedBytes::Vec(data),
305            range,
306        }
307    }
308
309    /// Create from a memory-mapped file (zero-copy).
310    #[cfg(feature = "native")]
311    pub(crate) fn from_mmap(mmap: Arc<memmap2::Mmap>) -> Self {
312        let len = mmap.len();
313        Self {
314            data: SharedBytes::Mmap(mmap),
315            range: 0..len,
316        }
317    }
318
319    /// Create from a memory-mapped file with a sub-range (zero-copy).
320    #[cfg(feature = "native")]
321    pub(crate) fn from_mmap_range(mmap: Arc<memmap2::Mmap>, range: Range<usize>) -> Self {
322        Self {
323            data: SharedBytes::Mmap(mmap),
324            range,
325        }
326    }
327
328    pub fn len(&self) -> usize {
329        self.range.len()
330    }
331
332    pub fn is_empty(&self) -> bool {
333        self.range.is_empty()
334    }
335
336    pub fn slice(&self, range: Range<usize>) -> Self {
337        let start = self.range.start + range.start;
338        let end = self.range.start + range.end;
339        Self {
340            data: self.data.clone(),
341            range: start..end,
342        }
343    }
344
345    pub fn as_slice(&self) -> &[u8] {
346        &self.data.as_bytes()[self.range.clone()]
347    }
348
349    pub fn to_vec(&self) -> Vec<u8> {
350        self.as_slice().to_vec()
351    }
352}
353
354impl AsRef<[u8]> for OwnedBytes {
355    fn as_ref(&self) -> &[u8] {
356        self.as_slice()
357    }
358}
359
360impl std::ops::Deref for OwnedBytes {
361    type Target = [u8];
362
363    fn deref(&self) -> &Self::Target {
364        self.as_slice()
365    }
366}
367
368/// Async directory trait for reading index files
369#[cfg(not(target_arch = "wasm32"))]
370#[async_trait]
371pub trait Directory: Send + Sync + 'static {
372    /// Check if a file exists
373    async fn exists(&self, path: &Path) -> io::Result<bool>;
374
375    /// Get file size
376    async fn file_size(&self, path: &Path) -> io::Result<u64>;
377
378    /// Open a file for reading (loads entire file into an inline FileHandle)
379    async fn open_read(&self, path: &Path) -> io::Result<FileHandle>;
380
381    /// Read a specific byte range from a file (optimized for network)
382    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes>;
383
384    /// List files in directory
385    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>>;
386
387    /// Open a file handle that fetches ranges on demand.
388    /// For mmap directories this returns an Inline handle (sync-capable).
389    /// For HTTP/filesystem directories this returns a Lazy handle.
390    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle>;
391}
392
393/// Async directory trait for reading index files (WASM version - no Send requirement)
394#[cfg(target_arch = "wasm32")]
395#[async_trait(?Send)]
396pub trait Directory: 'static {
397    /// Check if a file exists
398    async fn exists(&self, path: &Path) -> io::Result<bool>;
399
400    /// Get file size
401    async fn file_size(&self, path: &Path) -> io::Result<u64>;
402
403    /// Open a file for reading (loads entire file into an inline FileHandle)
404    async fn open_read(&self, path: &Path) -> io::Result<FileHandle>;
405
406    /// Read a specific byte range from a file (optimized for network)
407    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes>;
408
409    /// List files in directory
410    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>>;
411
412    /// Open a file handle that fetches ranges on demand.
413    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle>;
414}
415
416/// A writer for incrementally writing data to a directory file.
417///
418/// Avoids buffering entire files in memory during merge. File-backed
419/// directories write directly to disk; memory directories collect to Vec.
420pub trait StreamingWriter: io::Write + Send {
421    /// Finalize the write, making data available for reading.
422    fn finish(self: Box<Self>) -> io::Result<()>;
423
424    /// Bytes written so far.
425    fn bytes_written(&self) -> u64;
426}
427
428/// StreamingWriter backed by Vec<u8>, finalized via DirectoryWriter::write.
429/// Used as default/fallback and for RamDirectory.
430struct BufferedStreamingWriter {
431    path: PathBuf,
432    buffer: Vec<u8>,
433    /// Callback to write the buffer to the directory on finish.
434    /// We store the files Arc directly for RamDirectory.
435    files: Arc<RwLock<HashMap<PathBuf, Arc<Vec<u8>>>>>,
436}
437
438impl io::Write for BufferedStreamingWriter {
439    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
440        self.buffer.extend_from_slice(buf);
441        Ok(buf.len())
442    }
443
444    fn flush(&mut self) -> io::Result<()> {
445        Ok(())
446    }
447}
448
449impl StreamingWriter for BufferedStreamingWriter {
450    fn finish(self: Box<Self>) -> io::Result<()> {
451        self.files.write().insert(self.path, Arc::new(self.buffer));
452        Ok(())
453    }
454
455    fn bytes_written(&self) -> u64 {
456        self.buffer.len() as u64
457    }
458}
459
460/// Buffer size for FileStreamingWriter (8 MB).
461/// Large enough to coalesce millions of tiny writes (e.g. per-vector doc_id writes)
462/// into efficient sequential I/O.
463#[cfg(feature = "native")]
464const FILE_STREAMING_BUF_SIZE: usize = 8 * 1024 * 1024;
465
466/// StreamingWriter backed by a buffered std::fs::File for filesystem directories.
467#[cfg(feature = "native")]
468pub(crate) struct FileStreamingWriter {
469    pub(crate) file: io::BufWriter<std::fs::File>,
470    pub(crate) written: u64,
471}
472
473#[cfg(feature = "native")]
474impl FileStreamingWriter {
475    pub(crate) fn new(file: std::fs::File) -> Self {
476        Self {
477            file: io::BufWriter::with_capacity(FILE_STREAMING_BUF_SIZE, file),
478            written: 0,
479        }
480    }
481}
482
483#[cfg(feature = "native")]
484impl io::Write for FileStreamingWriter {
485    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
486        let n = self.file.write(buf)?;
487        self.written += n as u64;
488        Ok(n)
489    }
490
491    fn flush(&mut self) -> io::Result<()> {
492        self.file.flush()
493    }
494}
495
496#[cfg(feature = "native")]
497impl StreamingWriter for FileStreamingWriter {
498    fn finish(self: Box<Self>) -> io::Result<()> {
499        let file = self.file.into_inner().map_err(|e| e.into_error())?;
500        file.sync_all()?;
501        Ok(())
502    }
503
504    fn bytes_written(&self) -> u64 {
505        self.written
506    }
507}
508
509/// Async directory trait for writing index files
510#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
511#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
512pub trait DirectoryWriter: Directory {
513    /// Create/overwrite a file with data
514    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()>;
515
516    /// Delete a file
517    async fn delete(&self, path: &Path) -> io::Result<()>;
518
519    /// Atomic rename
520    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()>;
521
522    /// Sync all pending writes
523    async fn sync(&self) -> io::Result<()>;
524
525    /// Create a streaming writer for incremental file writes.
526    /// Call finish() on the returned writer to finalize.
527    async fn streaming_writer(&self, path: &Path) -> io::Result<Box<dyn StreamingWriter>>;
528}
529
530/// In-memory directory for testing and small indexes
531#[derive(Debug, Default)]
532pub struct RamDirectory {
533    files: Arc<RwLock<HashMap<PathBuf, Arc<Vec<u8>>>>>,
534}
535
536impl Clone for RamDirectory {
537    fn clone(&self) -> Self {
538        Self {
539            files: Arc::clone(&self.files),
540        }
541    }
542}
543
544impl RamDirectory {
545    pub fn new() -> Self {
546        Self::default()
547    }
548}
549
550#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
551#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
552impl Directory for RamDirectory {
553    async fn exists(&self, path: &Path) -> io::Result<bool> {
554        Ok(self.files.read().contains_key(path))
555    }
556
557    async fn file_size(&self, path: &Path) -> io::Result<u64> {
558        self.files
559            .read()
560            .get(path)
561            .map(|data| data.len() as u64)
562            .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "File not found"))
563    }
564
565    async fn open_read(&self, path: &Path) -> io::Result<FileHandle> {
566        let files = self.files.read();
567        let data = files
568            .get(path)
569            .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "File not found"))?;
570
571        Ok(FileHandle::from_bytes(OwnedBytes::from_arc_vec(
572            Arc::clone(data),
573            0..data.len(),
574        )))
575    }
576
577    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes> {
578        let files = self.files.read();
579        let data = files
580            .get(path)
581            .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "File not found"))?;
582
583        let start = range.start as usize;
584        let end = range.end as usize;
585
586        if end > data.len() {
587            return Err(io::Error::new(
588                io::ErrorKind::InvalidInput,
589                "Range out of bounds",
590            ));
591        }
592
593        Ok(OwnedBytes::from_arc_vec(Arc::clone(data), start..end))
594    }
595
596    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>> {
597        let files = self.files.read();
598        Ok(files
599            .keys()
600            .filter(|p| p.starts_with(prefix))
601            .cloned()
602            .collect())
603    }
604
605    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle> {
606        // RAM data is always available synchronously — return Inline handle
607        self.open_read(path).await
608    }
609}
610
611#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
612#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
613impl DirectoryWriter for RamDirectory {
614    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
615        self.files
616            .write()
617            .insert(path.to_path_buf(), Arc::new(data.to_vec()));
618        Ok(())
619    }
620
621    async fn delete(&self, path: &Path) -> io::Result<()> {
622        self.files.write().remove(path);
623        Ok(())
624    }
625
626    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
627        let mut files = self.files.write();
628        if let Some(data) = files.remove(from) {
629            files.insert(to.to_path_buf(), data);
630        }
631        Ok(())
632    }
633
634    async fn sync(&self) -> io::Result<()> {
635        Ok(())
636    }
637
638    async fn streaming_writer(&self, path: &Path) -> io::Result<Box<dyn StreamingWriter>> {
639        Ok(Box::new(BufferedStreamingWriter {
640            path: path.to_path_buf(),
641            buffer: Vec::new(),
642            files: Arc::clone(&self.files),
643        }))
644    }
645}
646
647/// Local filesystem directory with async IO via tokio
648#[cfg(feature = "native")]
649#[derive(Debug, Clone)]
650pub struct FsDirectory {
651    root: PathBuf,
652}
653
654#[cfg(feature = "native")]
655impl FsDirectory {
656    pub fn new(root: impl AsRef<Path>) -> Self {
657        Self {
658            root: root.as_ref().to_path_buf(),
659        }
660    }
661
662    fn resolve(&self, path: &Path) -> PathBuf {
663        self.root.join(path)
664    }
665}
666
667#[cfg(feature = "native")]
668#[async_trait]
669impl Directory for FsDirectory {
670    async fn exists(&self, path: &Path) -> io::Result<bool> {
671        let full_path = self.resolve(path);
672        Ok(tokio::fs::try_exists(&full_path).await.unwrap_or(false))
673    }
674
675    async fn file_size(&self, path: &Path) -> io::Result<u64> {
676        let full_path = self.resolve(path);
677        let metadata = tokio::fs::metadata(&full_path).await?;
678        Ok(metadata.len())
679    }
680
681    async fn open_read(&self, path: &Path) -> io::Result<FileHandle> {
682        let full_path = self.resolve(path);
683        let data = tokio::fs::read(&full_path).await?;
684        Ok(FileHandle::from_bytes(OwnedBytes::new(data)))
685    }
686
687    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes> {
688        use tokio::io::{AsyncReadExt, AsyncSeekExt};
689
690        let full_path = self.resolve(path);
691        let mut file = tokio::fs::File::open(&full_path).await?;
692
693        file.seek(std::io::SeekFrom::Start(range.start)).await?;
694
695        let len = (range.end - range.start) as usize;
696        let mut buffer = vec![0u8; len];
697        file.read_exact(&mut buffer).await?;
698
699        Ok(OwnedBytes::new(buffer))
700    }
701
702    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>> {
703        let full_path = self.resolve(prefix);
704        let mut entries = tokio::fs::read_dir(&full_path).await?;
705        let mut files = Vec::new();
706
707        while let Some(entry) = entries.next_entry().await? {
708            if entry.file_type().await?.is_file() {
709                files.push(entry.path().strip_prefix(&self.root).unwrap().to_path_buf());
710            }
711        }
712
713        Ok(files)
714    }
715
716    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle> {
717        let full_path = self.resolve(path);
718        let metadata = tokio::fs::metadata(&full_path).await?;
719        let file_size = metadata.len();
720
721        let read_fn: RangeReadFn = Arc::new(move |range: Range<u64>| {
722            let full_path = full_path.clone();
723            Box::pin(async move {
724                use tokio::io::{AsyncReadExt, AsyncSeekExt};
725
726                let mut file = tokio::fs::File::open(&full_path).await?;
727                file.seek(std::io::SeekFrom::Start(range.start)).await?;
728
729                let len = (range.end - range.start) as usize;
730                let mut buffer = vec![0u8; len];
731                file.read_exact(&mut buffer).await?;
732
733                Ok(OwnedBytes::new(buffer))
734            })
735        });
736
737        Ok(FileHandle::lazy(file_size, read_fn))
738    }
739}
740
741#[cfg(feature = "native")]
742#[async_trait]
743impl DirectoryWriter for FsDirectory {
744    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
745        let full_path = self.resolve(path);
746
747        // Ensure parent directory exists
748        if let Some(parent) = full_path.parent() {
749            tokio::fs::create_dir_all(parent).await?;
750        }
751
752        tokio::fs::write(&full_path, data).await
753    }
754
755    async fn delete(&self, path: &Path) -> io::Result<()> {
756        let full_path = self.resolve(path);
757        tokio::fs::remove_file(&full_path).await
758    }
759
760    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
761        let from_path = self.resolve(from);
762        let to_path = self.resolve(to);
763        tokio::fs::rename(&from_path, &to_path).await
764    }
765
766    async fn sync(&self) -> io::Result<()> {
767        // fsync the directory
768        let dir = std::fs::File::open(&self.root)?;
769        dir.sync_all()?;
770        Ok(())
771    }
772
773    async fn streaming_writer(&self, path: &Path) -> io::Result<Box<dyn StreamingWriter>> {
774        let full_path = self.resolve(path);
775        if let Some(parent) = full_path.parent() {
776            tokio::fs::create_dir_all(parent).await?;
777        }
778        let file = std::fs::File::create(&full_path)?;
779        Ok(Box::new(FileStreamingWriter::new(file)))
780    }
781}
782
783/// Caching wrapper for any Directory - caches file reads
784pub struct CachingDirectory<D: Directory> {
785    inner: D,
786    cache: RwLock<HashMap<PathBuf, Arc<Vec<u8>>>>,
787    max_cached_bytes: usize,
788    current_bytes: RwLock<usize>,
789}
790
791impl<D: Directory> CachingDirectory<D> {
792    pub fn new(inner: D, max_cached_bytes: usize) -> Self {
793        Self {
794            inner,
795            cache: RwLock::new(HashMap::new()),
796            max_cached_bytes,
797            current_bytes: RwLock::new(0),
798        }
799    }
800
801    fn try_cache(&self, path: &Path, data: &[u8]) {
802        let mut current = self.current_bytes.write();
803        if *current + data.len() <= self.max_cached_bytes {
804            self.cache
805                .write()
806                .insert(path.to_path_buf(), Arc::new(data.to_vec()));
807            *current += data.len();
808        }
809    }
810}
811
812#[cfg_attr(not(target_arch = "wasm32"), async_trait)]
813#[cfg_attr(target_arch = "wasm32", async_trait(?Send))]
814impl<D: Directory> Directory for CachingDirectory<D> {
815    async fn exists(&self, path: &Path) -> io::Result<bool> {
816        if self.cache.read().contains_key(path) {
817            return Ok(true);
818        }
819        self.inner.exists(path).await
820    }
821
822    async fn file_size(&self, path: &Path) -> io::Result<u64> {
823        if let Some(data) = self.cache.read().get(path) {
824            return Ok(data.len() as u64);
825        }
826        self.inner.file_size(path).await
827    }
828
829    async fn open_read(&self, path: &Path) -> io::Result<FileHandle> {
830        // Check cache first
831        if let Some(data) = self.cache.read().get(path) {
832            return Ok(FileHandle::from_bytes(OwnedBytes::from_arc_vec(
833                Arc::clone(data),
834                0..data.len(),
835            )));
836        }
837
838        // Read from inner and potentially cache
839        let handle = self.inner.open_read(path).await?;
840        let bytes = handle.read_bytes().await?;
841
842        self.try_cache(path, bytes.as_slice());
843
844        Ok(FileHandle::from_bytes(bytes))
845    }
846
847    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes> {
848        // Check cache first
849        if let Some(data) = self.cache.read().get(path) {
850            let start = range.start as usize;
851            let end = range.end as usize;
852            return Ok(OwnedBytes::from_arc_vec(Arc::clone(data), start..end));
853        }
854
855        self.inner.read_range(path, range).await
856    }
857
858    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>> {
859        self.inner.list_files(prefix).await
860    }
861
862    async fn open_lazy(&self, path: &Path) -> io::Result<FileHandle> {
863        // For caching directory, delegate to inner - caching happens at read_range level
864        self.inner.open_lazy(path).await
865    }
866}
867
868#[cfg(test)]
869mod tests {
870    use super::*;
871
872    #[tokio::test]
873    async fn test_ram_directory() {
874        let dir = RamDirectory::new();
875
876        // Write file
877        dir.write(Path::new("test.txt"), b"hello world")
878            .await
879            .unwrap();
880
881        // Check exists
882        assert!(dir.exists(Path::new("test.txt")).await.unwrap());
883        assert!(!dir.exists(Path::new("nonexistent.txt")).await.unwrap());
884
885        // Read file
886        let slice = dir.open_read(Path::new("test.txt")).await.unwrap();
887        let data = slice.read_bytes().await.unwrap();
888        assert_eq!(data.as_slice(), b"hello world");
889
890        // Read range
891        let range_data = dir.read_range(Path::new("test.txt"), 0..5).await.unwrap();
892        assert_eq!(range_data.as_slice(), b"hello");
893
894        // Delete
895        dir.delete(Path::new("test.txt")).await.unwrap();
896        assert!(!dir.exists(Path::new("test.txt")).await.unwrap());
897    }
898
899    #[tokio::test]
900    async fn test_file_handle() {
901        let data = OwnedBytes::new(b"hello world".to_vec());
902        let handle = FileHandle::from_bytes(data);
903
904        assert_eq!(handle.len(), 11);
905        assert!(handle.is_sync());
906
907        let sub = handle.slice(0..5);
908        let bytes = sub.read_bytes().await.unwrap();
909        assert_eq!(bytes.as_slice(), b"hello");
910
911        let sub2 = handle.slice(6..11);
912        let bytes2 = sub2.read_bytes().await.unwrap();
913        assert_eq!(bytes2.as_slice(), b"world");
914
915        // Sync reads work on inline handles
916        let sync_bytes = handle.read_bytes_range_sync(0..5).unwrap();
917        assert_eq!(sync_bytes.as_slice(), b"hello");
918    }
919
920    #[tokio::test]
921    async fn test_owned_bytes() {
922        let bytes = OwnedBytes::new(vec![1, 2, 3, 4, 5]);
923
924        assert_eq!(bytes.len(), 5);
925        assert_eq!(bytes.as_slice(), &[1, 2, 3, 4, 5]);
926
927        let sliced = bytes.slice(1..4);
928        assert_eq!(sliced.as_slice(), &[2, 3, 4]);
929
930        // Original unchanged
931        assert_eq!(bytes.as_slice(), &[1, 2, 3, 4, 5]);
932    }
933}