Skip to main content

doublecrypt_core/
fs.rs

1use std::cell::RefCell;
2use std::collections::HashMap;
3use std::num::NonZeroUsize;
4use std::sync::Arc;
5use std::time::{SystemTime, UNIX_EPOCH};
6
7use lru::LruCache;
8use rand::RngCore;
9
10use crate::allocator::{BitmapAllocator, SlotAllocator};
11use crate::block_store::BlockStore;
12use crate::codec::{
13    decrypt_block_to_plaintext, prepare_encrypted_block, prepare_encrypted_object,
14    read_encrypted_object, read_encrypted_raw, write_encrypted_object, ObjectCodec, PostcardCodec,
15};
16use crate::crypto::CryptoEngine;
17use crate::error::{FsError, FsResult};
18use crate::model::*;
19use crate::transaction::TransactionManager;
20
21/// The main filesystem core. Owns the block store, crypto, codec, allocator,
22/// and transaction manager. Provides high-level filesystem operations.
23///
24/// All path-accepting methods use `/`-separated paths.  An empty string or
25/// `"/"` refers to the root directory.  Parent directories must already exist;
26/// only `create_file` and `create_directory` create the leaf entry.
27pub struct FilesystemCore {
28    store: Arc<dyn BlockStore>,
29    crypto: Arc<dyn CryptoEngine>,
30    codec: PostcardCodec,
31    allocator: BitmapAllocator,
32    txn: TransactionManager,
33    /// Cached current superblock.
34    superblock: Option<Superblock>,
35    /// Next inode ID to allocate.
36    next_inode_id: InodeId,
37    /// Write buffer: dirty file chunks held in memory until flush.
38    write_buffer: HashMap<String, DirtyFile>,
39    /// Block ID of the most recently committed superblock object.
40    /// Freed when superseded by a new commit.
41    last_superblock_block: Option<u64>,
42    /// LRU cache of decrypted metadata objects (inodes, dir pages, extent maps)
43    /// keyed by block ID.  Avoids repeated decrypt + deserialize on every op.
44    obj_cache: RefCell<LruCache<u64, Vec<u8>>>,
45}
46
47/// Tracks one ancestor directory during path resolution, used by
48/// `commit_cow_chain` to propagate CoW writes back to the root.
49struct AncestorEntry {
50    inode: Inode,
51    dir_page: DirectoryPage,
52    child_index: usize,
53}
54
55/// Tracks in-memory buffered writes for a single file.
56///
57/// All dirty chunks are held in memory until `sync()` (or the next
58/// metadata-mutating operation) flushes them to the block store.
59/// This keeps `write_file()` purely in-memory for smooth throughput.
60struct DirtyFile {
61    /// In-memory chunk data keyed by chunk index (only partial chunks).
62    dirty_chunks: HashMap<u64, Vec<u8>>,
63    /// The file's inode at the time buffering started.
64    base_inode: Inode,
65    /// The file's extent map (updated in-place when chunks are eagerly flushed).
66    extent_map: ExtentMap,
67    /// Current logical file size (updated on every write).
68    size: u64,
69    /// Set to `true` when any data has been written (even if eagerly flushed).
70    metadata_dirty: bool,
71}
72
73/// Maximum payload size for a single file data chunk.
74/// Computed conservatively: block_size minus overhead for envelope framing.
75/// We'll compute this dynamically based on block size.
76fn max_chunk_payload(block_size: usize) -> usize {
77    // Rough overhead: 4 bytes length prefix, ~60 bytes envelope metadata,
78    // 16 bytes Poly1305 tag, some postcard framing. Be conservative.
79    if block_size > 200 {
80        block_size - 200
81    } else {
82        0
83    }
84}
85
86fn now_secs() -> u64 {
87    SystemTime::now()
88        .duration_since(UNIX_EPOCH)
89        .unwrap_or_default()
90        .as_secs()
91}
92
93impl FilesystemCore {
94    /// Create a new FilesystemCore backed by the given store and crypto engine.
95    pub fn new(store: Arc<dyn BlockStore>, crypto: Arc<dyn CryptoEngine>) -> Self {
96        let total_blocks = store.total_blocks();
97        Self {
98            store,
99            crypto,
100            codec: PostcardCodec,
101            allocator: BitmapAllocator::new(total_blocks),
102            txn: TransactionManager::new(),
103            superblock: None,
104            next_inode_id: 1,
105            write_buffer: HashMap::new(),
106            last_superblock_block: None,
107            obj_cache: RefCell::new(LruCache::new(NonZeroUsize::new(256).unwrap())),
108        }
109    }
110
111    // ── Initialization ──
112
113    /// Initialize a brand-new filesystem on the block store.
114    /// Writes the storage header, creates the root directory, and commits.
115    pub fn init_filesystem(&mut self) -> FsResult<()> {
116        let block_size = self.store.block_size() as u32;
117        let total_blocks = self.store.total_blocks();
118
119        // Write storage header to block 0 (unencrypted).
120        let header = StorageHeader::new(block_size, total_blocks);
121        let header_bytes = self.codec.serialize_object(&header)?;
122        let bs = self.store.block_size();
123        let mut block = vec![0u8; bs];
124        rand::thread_rng().fill_bytes(&mut block);
125        let len = header_bytes.len() as u32;
126        block[..4].copy_from_slice(&len.to_le_bytes());
127        block[4..4 + header_bytes.len()].copy_from_slice(&header_bytes);
128        self.store.write_block(BLOCK_STORAGE_HEADER, &block)?;
129
130        // Create root directory inode.
131        let root_inode_id = self.alloc_inode_id();
132        let dir_page = DirectoryPage::new();
133        let dir_page_block = self.allocator.allocate()?;
134        write_encrypted_object(
135            self.store.as_ref(),
136            self.crypto.as_ref(),
137            &self.codec,
138            dir_page_block,
139            ObjectKind::DirectoryPage,
140            &dir_page,
141        )?;
142
143        let ts = now_secs();
144        let root_inode = Inode {
145            id: root_inode_id,
146            kind: InodeKind::Directory,
147            size: 0,
148            directory_page_ref: ObjectRef::new(dir_page_block),
149            extent_map_ref: ObjectRef::null(),
150            created_at: ts,
151            modified_at: ts,
152        };
153        let root_inode_block = self.allocator.allocate()?;
154        write_encrypted_object(
155            self.store.as_ref(),
156            self.crypto.as_ref(),
157            &self.codec,
158            root_inode_block,
159            ObjectKind::Inode,
160            &root_inode,
161        )?;
162
163        // Create superblock.
164        let sb = Superblock {
165            generation: 1,
166            root_inode_ref: ObjectRef::new(root_inode_block),
167        };
168        self.superblock = Some(sb.clone());
169
170        // Commit.
171        self.txn.commit(
172            self.store.as_ref(),
173            self.crypto.as_ref(),
174            &self.codec,
175            &self.allocator,
176            &sb,
177        )?;
178
179        Ok(())
180    }
181
182    /// Open / mount an existing filesystem by recovering the latest root pointer.
183    pub fn open(&mut self) -> FsResult<()> {
184        // Verify storage header.
185        let header = self.read_storage_header()?;
186        if !header.is_valid() {
187            return Err(FsError::InvalidSuperblock);
188        }
189
190        // Recover latest root pointer.
191        let (rp, was_b) = TransactionManager::recover_latest(self.store.as_ref(), &self.codec)?
192            .ok_or(FsError::InvalidRootPointer)?;
193
194        // Read superblock.
195        let sb: Superblock = read_encrypted_object(
196            self.store.as_ref(),
197            self.crypto.as_ref(),
198            &self.codec,
199            rp.superblock_ref.block_id,
200        )?;
201
202        // Verify checksum.
203        let sb_bytes = self.codec.serialize_object(&sb)?;
204        let checksum = blake3::hash(&sb_bytes);
205        if *checksum.as_bytes() != rp.checksum {
206            return Err(FsError::InvalidSuperblock);
207        }
208
209        self.txn = TransactionManager::from_recovered(rp.generation, was_b);
210        self.superblock = Some(sb.clone());
211        self.last_superblock_block = Some(rp.superblock_ref.block_id);
212
213        // Rebuild allocator knowledge by walking the metadata tree.
214        self.rebuild_allocator(&sb)?;
215
216        Ok(())
217    }
218
219    // ── File operations ──
220
221    // ── Path helpers ──────────────────────────────────────────
222
223    /// Split a path into its directory components and the leaf name.
224    /// Returns `(["a","b"], "c")` for `"a/b/c"`, or `([], "c")` for `"c"`.
225    fn split_path(path: &str) -> FsResult<(Vec<&str>, &str)> {
226        let trimmed = path.trim_matches('/');
227        if trimmed.is_empty() {
228            return Err(FsError::Internal("empty path".into()));
229        }
230        let parts: Vec<&str> = trimmed.split('/').collect();
231        let (dirs, leaf) = parts.split_at(parts.len() - 1);
232        Ok((dirs.to_vec(), leaf[0]))
233    }
234
235    /// Parse a directory path (may be empty / "/" for root) into components.
236    fn split_dir_path(path: &str) -> Vec<&str> {
237        let trimmed = path.trim_matches('/');
238        if trimmed.is_empty() {
239            return Vec::new();
240        }
241        trimmed.split('/').collect()
242    }
243
244    /// Resolve a sequence of directory components starting from the root inode,
245    /// returning the ancestor chain needed for CoW commit propagation.
246    ///
247    /// Returns `(ancestors, target_inode, target_dir_page)` where `ancestors`
248    /// is a list of `(Inode, DirectoryPage, entry_index_in_parent)` from root
249    /// down to (but not including) the final resolved directory.
250    fn resolve_dir_chain(
251        &self,
252        components: &[&str],
253        root_inode: &Inode,
254    ) -> FsResult<(Vec<AncestorEntry>, Inode, DirectoryPage)> {
255        let mut ancestors: Vec<AncestorEntry> = Vec::new();
256        let mut current_inode = root_inode.clone();
257        let mut current_dir_page: DirectoryPage =
258            self.read_obj(current_inode.directory_page_ref.block_id)?;
259
260        for component in components {
261            let idx = current_dir_page
262                .entries
263                .iter()
264                .position(|e| e.name == *component)
265                .ok_or_else(|| FsError::DirectoryNotFound(component.to_string()))?;
266
267            let entry = &current_dir_page.entries[idx];
268            if entry.kind != InodeKind::Directory {
269                return Err(FsError::NotADirectory(component.to_string()));
270            }
271
272            let child_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
273            let child_dir_page: DirectoryPage =
274                self.read_obj(child_inode.directory_page_ref.block_id)?;
275
276            ancestors.push(AncestorEntry {
277                inode: current_inode,
278                dir_page: current_dir_page,
279                child_index: idx,
280            });
281
282            current_inode = child_inode;
283            current_dir_page = child_dir_page;
284        }
285
286        Ok((ancestors, current_inode, current_dir_page))
287    }
288
289    /// After mutating a directory's page, propagate CoW changes up through
290    /// the ancestor chain to the root, then commit a new superblock.
291    ///
292    /// `new_dir_page` is the already-modified DirectoryPage of the target dir.
293    /// `target_inode` is the inode of the directory that owns `new_dir_page`.
294    /// `ancestors` is the chain from root down to (but not including) target.
295    fn commit_cow_chain(
296        &mut self,
297        sb: &Superblock,
298        ancestors: &[AncestorEntry],
299        target_inode: &Inode,
300        new_dir_page: &DirectoryPage,
301    ) -> FsResult<()> {
302        // Collect old block IDs replaced by CoW.  Freed after commit succeeds.
303        let mut stale_blocks: Vec<u64> = Vec::new();
304
305        // Target directory: old dir page block.
306        stale_blocks.push(target_inode.directory_page_ref.block_id);
307        // Target inode block (its block ID is derived from the chain):
308        if ancestors.is_empty() {
309            // target IS the root inode.
310            stale_blocks.push(sb.root_inode_ref.block_id);
311        } else {
312            let last = ancestors.last().unwrap();
313            stale_blocks.push(last.dir_page.entries[last.child_index].inode_ref.block_id);
314        }
315
316        // Write the modified directory page.
317        let mut new_dp_block = self.allocator.allocate()?;
318        self.write_obj(new_dp_block, ObjectKind::DirectoryPage, new_dir_page)?;
319
320        // Write the modified directory inode.
321        let mut new_inode = target_inode.clone();
322        new_inode.directory_page_ref = ObjectRef::new(new_dp_block);
323        new_inode.modified_at = now_secs();
324        let mut new_inode_block = self.allocator.allocate()?;
325        self.write_obj(new_inode_block, ObjectKind::Inode, &new_inode)?;
326
327        // Propagate upward through ancestors (bottom to top).
328        for (i, ancestor) in ancestors.iter().rev().enumerate() {
329            // Old ancestor dir page block.
330            stale_blocks.push(ancestor.inode.directory_page_ref.block_id);
331            // Old ancestor inode block.
332            let rev_idx = ancestors.len() - 1 - i;
333            if rev_idx == 0 {
334                // This is the root — its block is in the superblock.
335                stale_blocks.push(sb.root_inode_ref.block_id);
336            } else {
337                let parent = &ancestors[rev_idx - 1];
338                stale_blocks.push(
339                    parent.dir_page.entries[parent.child_index]
340                        .inode_ref
341                        .block_id,
342                );
343            }
344
345            let mut parent_dp = ancestor.dir_page.clone();
346            parent_dp.entries[ancestor.child_index].inode_ref = ObjectRef::new(new_inode_block);
347
348            new_dp_block = self.allocator.allocate()?;
349            self.write_obj(new_dp_block, ObjectKind::DirectoryPage, &parent_dp)?;
350
351            let mut parent_inode = ancestor.inode.clone();
352            parent_inode.directory_page_ref = ObjectRef::new(new_dp_block);
353            parent_inode.modified_at = now_secs();
354            new_inode_block = self.allocator.allocate()?;
355            self.write_obj(new_inode_block, ObjectKind::Inode, &parent_inode)?;
356        }
357
358        // new_inode_block is now the new root inode block.
359        let new_sb = Superblock {
360            generation: sb.generation + 1,
361            root_inode_ref: ObjectRef::new(new_inode_block),
362        };
363        self.commit_superblock(new_sb)?;
364
365        // Free stale blocks after the commit has succeeded.
366        for block_id in stale_blocks {
367            let _ = self.allocator.free(block_id);
368        }
369
370        Ok(())
371    }
372
373    /// CoW-propagate a modified directory page upward through a sub-chain of
374    /// ancestors, stopping before the common ancestor level.
375    ///
376    /// Returns the new top-level inode block ID.  The caller must update the
377    /// common ancestor's dir-page entry to point to this block.
378    ///
379    /// `stale_blocks` collects old blocks replaced by CoW.  The old inode
380    /// block of the topmost node (referenced by the common ancestor's
381    /// directory entry) is **not** added — the caller handles that.
382    fn cow_subchain(
383        &mut self,
384        sub_ancestors: &[AncestorEntry],
385        target_inode: &Inode,
386        new_dir_page: &DirectoryPage,
387        stale_blocks: &mut Vec<u64>,
388    ) -> FsResult<u64> {
389        // Write the modified directory page.
390        stale_blocks.push(target_inode.directory_page_ref.block_id);
391        let mut new_dp_block = self.allocator.allocate()?;
392        self.write_obj(new_dp_block, ObjectKind::DirectoryPage, new_dir_page)?;
393
394        // Write the updated target inode.
395        let mut new_inode = target_inode.clone();
396        new_inode.directory_page_ref = ObjectRef::new(new_dp_block);
397        new_inode.modified_at = now_secs();
398        let mut new_inode_block = self.allocator.allocate()?;
399        self.write_obj(new_inode_block, ObjectKind::Inode, &new_inode)?;
400
401        // Propagate upward through sub-ancestors (bottom to top).
402        for ancestor in sub_ancestors.iter().rev() {
403            // Old child inode block (the one we just replaced).
404            stale_blocks.push(
405                ancestor.dir_page.entries[ancestor.child_index]
406                    .inode_ref
407                    .block_id,
408            );
409            // Old ancestor dir page block.
410            stale_blocks.push(ancestor.inode.directory_page_ref.block_id);
411
412            let mut parent_dp = ancestor.dir_page.clone();
413            parent_dp.entries[ancestor.child_index].inode_ref = ObjectRef::new(new_inode_block);
414
415            new_dp_block = self.allocator.allocate()?;
416            self.write_obj(new_dp_block, ObjectKind::DirectoryPage, &parent_dp)?;
417
418            let mut parent_inode = ancestor.inode.clone();
419            parent_inode.directory_page_ref = ObjectRef::new(new_dp_block);
420            parent_inode.modified_at = now_secs();
421            new_inode_block = self.allocator.allocate()?;
422            self.write_obj(new_inode_block, ObjectKind::Inode, &parent_inode)?;
423        }
424
425        Ok(new_inode_block)
426    }
427
428    // ── Public operations ─────────────────────────────────────
429
430    /// Create a new empty file at the given path.
431    ///
432    /// Parent directories must already exist.  The leaf name is created in
433    /// the innermost directory.
434    pub fn create_file(&mut self, path: &str) -> FsResult<()> {
435        let (dir_parts, leaf) = Self::split_path(path)?;
436        self.validate_name(leaf)?;
437        self.flush_all()?;
438        let sb = self
439            .superblock
440            .as_ref()
441            .ok_or(FsError::NotInitialized)?
442            .clone();
443
444        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
445        let (ancestors, target_inode, mut dir_page) =
446            self.resolve_dir_chain(&dir_parts, &root_inode)?;
447
448        if dir_page.entries.iter().any(|e| e.name == leaf) {
449            return Err(FsError::FileAlreadyExists(leaf.to_string()));
450        }
451
452        // Create empty extent map.
453        let extent_map = ExtentMap::new();
454        let em_block = self.allocator.allocate()?;
455        self.write_obj(em_block, ObjectKind::ExtentMap, &extent_map)?;
456
457        // Create file inode.
458        let inode_id = self.alloc_inode_id();
459        let ts = now_secs();
460        let file_inode = Inode {
461            id: inode_id,
462            kind: InodeKind::File,
463            size: 0,
464            directory_page_ref: ObjectRef::null(),
465            extent_map_ref: ObjectRef::new(em_block),
466            created_at: ts,
467            modified_at: ts,
468        };
469        let inode_block = self.allocator.allocate()?;
470        self.write_obj(inode_block, ObjectKind::Inode, &file_inode)?;
471
472        dir_page.entries.push(DirectoryEntry {
473            name: leaf.to_string(),
474            inode_ref: ObjectRef::new(inode_block),
475            inode_id,
476            kind: InodeKind::File,
477        });
478
479        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
480        Ok(())
481    }
482
483    /// Write data to a file at the given path.
484    ///
485    /// Writes are buffered in memory and only flushed to the block store on
486    /// `sync()` or when another metadata-mutating operation occurs.
487    /// This keeps every `write_file` call purely in-memory for smooth
488    /// throughput.  Call `sync()` periodically to bound memory usage.
489    pub fn write_file(&mut self, path: &str, offset: u64, data: &[u8]) -> FsResult<()> {
490        if data.is_empty() {
491            return Ok(());
492        }
493
494        let chunk_size = max_chunk_payload(self.store.block_size());
495        if chunk_size == 0 {
496            return Err(FsError::DataTooLarge(data.len()));
497        }
498
499        let path_key = path.trim_matches('/').to_string();
500
501        // Take the dirty entry out of the map so `self` is free for other
502        // borrows (disk reads, etc.).  We'll put it back at the end.
503        let mut dirty = match self.write_buffer.remove(&path_key) {
504            Some(d) => d,
505            None => {
506                // First buffered write — load metadata from disk.
507                let (dir_parts, leaf) = Self::split_path(path)?;
508                let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
509                let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
510                let (_, _, dir_page) = self.resolve_dir_chain(&dir_parts, &root_inode)?;
511                let entry = dir_page
512                    .entries
513                    .iter()
514                    .find(|e| e.name == leaf)
515                    .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
516                if entry.kind != InodeKind::File {
517                    return Err(FsError::NotAFile(leaf.to_string()));
518                }
519                let file_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
520                let mut extent_map: ExtentMap =
521                    self.read_obj(file_inode.extent_map_ref.block_id)?;
522                extent_map.entries.sort_by_key(|e| e.chunk_index);
523                DirtyFile {
524                    dirty_chunks: HashMap::new(),
525                    base_inode: file_inode.clone(),
526                    extent_map,
527                    size: file_inode.size,
528                    metadata_dirty: false,
529                }
530            }
531        };
532
533        let old_size = dirty.size as usize;
534        let write_start = offset as usize;
535        let write_end = write_start + data.len();
536        let new_size = std::cmp::max(old_size, write_end);
537
538        let first_chunk = if write_start >= old_size {
539            old_size / chunk_size
540        } else {
541            write_start / chunk_size
542        };
543        let last_chunk = (write_end - 1) / chunk_size;
544
545        for chunk_idx in first_chunk..=last_chunk {
546            let chunk_file_start = chunk_idx * chunk_size;
547            let chunk_file_end = std::cmp::min(chunk_file_start + chunk_size, new_size);
548            let chunk_len = chunk_file_end - chunk_file_start;
549            let chunk_idx_u64 = chunk_idx as u64;
550
551            // If this chunk isn't buffered yet, load its on-disk content (or zeros).
552            if !dirty.dirty_chunks.contains_key(&chunk_idx_u64) {
553                let mut buf = vec![0u8; chunk_len];
554                if chunk_file_start < old_size {
555                    if let Ok(pos) = dirty
556                        .extent_map
557                        .entries
558                        .binary_search_by_key(&chunk_idx_u64, |e| e.chunk_index)
559                    {
560                        let existing = &dirty.extent_map.entries[pos];
561                        let raw = read_encrypted_raw(
562                            self.store.as_ref(),
563                            self.crypto.as_ref(),
564                            &self.codec,
565                            existing.data_ref.block_id,
566                        )?;
567                        let copy_len = std::cmp::min(existing.plaintext_len as usize, chunk_len);
568                        let src_len = std::cmp::min(copy_len, raw.len());
569                        buf[..src_len].copy_from_slice(&raw[..src_len]);
570                    }
571                }
572                dirty.dirty_chunks.insert(chunk_idx_u64, buf);
573            }
574
575            let chunk_buf = dirty.dirty_chunks.get_mut(&chunk_idx_u64).unwrap();
576            if chunk_buf.len() < chunk_len {
577                chunk_buf.resize(chunk_len, 0);
578            }
579
580            // Overlay the write data onto the chunk.
581            let overlap_start = std::cmp::max(chunk_file_start, write_start);
582            let overlap_end = std::cmp::min(chunk_file_end, write_end);
583            if overlap_start < overlap_end {
584                let data_off = overlap_start - write_start;
585                let chunk_off = overlap_start - chunk_file_start;
586                let len = overlap_end - overlap_start;
587                chunk_buf[chunk_off..chunk_off + len]
588                    .copy_from_slice(&data[data_off..data_off + len]);
589            }
590        }
591
592        dirty.size = new_size as u64;
593        dirty.metadata_dirty = true;
594
595        self.write_buffer.insert(path_key, dirty);
596        Ok(())
597    }
598
599    /// Read file data at the given path. Returns the requested slice.
600    ///
601    /// If the file has buffered (unflushed) writes, reads are served from the
602    /// in-memory buffer merged with on-disk data.
603    pub fn read_file(&self, path: &str, offset: u64, len: usize) -> FsResult<Vec<u8>> {
604        let path_key = path.trim_matches('/');
605
606        if let Some(dirty) = self.write_buffer.get(path_key) {
607            return self.read_file_buffered(dirty, offset, len);
608        }
609
610        let (dir_parts, leaf) = Self::split_path(path)?;
611        let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
612        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
613        let (_, _, dir_page) = self.resolve_dir_chain(&dir_parts, &root_inode)?;
614
615        let entry = dir_page
616            .entries
617            .iter()
618            .find(|e| e.name == leaf)
619            .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
620
621        if entry.kind != InodeKind::File {
622            return Err(FsError::NotAFile(leaf.to_string()));
623        }
624
625        let file_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
626
627        if len == 0 || offset >= file_inode.size {
628            return Ok(Vec::new());
629        }
630
631        let extent_map: ExtentMap = self.read_obj(file_inode.extent_map_ref.block_id)?;
632        self.read_chunk_range(&extent_map, file_inode.size, offset, len)
633    }
634
635    /// List entries in a directory at the given path.
636    ///
637    /// Pass `""` or `"/"` to list the root directory.
638    pub fn list_directory(&self, path: &str) -> FsResult<Vec<DirListEntry>> {
639        let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
640        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
641
642        let components = Self::split_dir_path(path);
643        let (_, _, dir_page) = self.resolve_dir_chain(&components, &root_inode)?;
644
645        let dir_prefix = {
646            let trimmed = path.trim_matches('/');
647            if trimmed.is_empty() {
648                String::new()
649            } else {
650                format!("{}/", trimmed)
651            }
652        };
653
654        let mut result = Vec::new();
655        for entry in &dir_page.entries {
656            let inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
657            // Use buffered size if this file has pending writes.
658            let size = if entry.kind == InodeKind::File {
659                let full_path = format!("{}{}", dir_prefix, entry.name);
660                if let Some(dirty) = self.write_buffer.get(&full_path) {
661                    dirty.size
662                } else {
663                    inode.size
664                }
665            } else {
666                inode.size
667            };
668            result.push(DirListEntry {
669                name: entry.name.clone(),
670                kind: entry.kind,
671                size,
672                inode_id: entry.inode_id,
673            });
674        }
675        Ok(result)
676    }
677
678    /// Get metadata for a single file or directory without listing the
679    /// entire parent directory.  Much cheaper than [`list_directory()`] for
680    /// FUSE `getattr` / `lookup` operations.
681    pub fn stat(&self, path: &str) -> FsResult<DirListEntry> {
682        let path_key = path.trim_matches('/');
683
684        // Root directory.
685        if path_key.is_empty() {
686            let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
687            let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
688            return Ok(DirListEntry {
689                name: String::new(),
690                kind: InodeKind::Directory,
691                size: root_inode.size,
692                inode_id: root_inode.id,
693            });
694        }
695
696        let (dir_parts, leaf) = Self::split_path(path)?;
697        let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
698        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
699        let (_, _, dir_page) = self.resolve_dir_chain(&dir_parts, &root_inode)?;
700
701        let entry = dir_page
702            .entries
703            .iter()
704            .find(|e| e.name == leaf)
705            .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
706
707        let inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
708
709        // Use buffered size if this file has pending writes.
710        let size = if entry.kind == InodeKind::File {
711            if let Some(dirty) = self.write_buffer.get(path_key) {
712                dirty.size
713            } else {
714                inode.size
715            }
716        } else {
717            inode.size
718        };
719
720        Ok(DirListEntry {
721            name: entry.name.clone(),
722            kind: entry.kind,
723            size,
724            inode_id: entry.inode_id,
725        })
726    }
727
728    /// Create a subdirectory at the given path.
729    ///
730    /// Parent directories must already exist; only the leaf is created.
731    pub fn create_directory(&mut self, path: &str) -> FsResult<()> {
732        let (dir_parts, leaf) = Self::split_path(path)?;
733        self.validate_name(leaf)?;
734        self.flush_all()?;
735        let sb = self
736            .superblock
737            .as_ref()
738            .ok_or(FsError::NotInitialized)?
739            .clone();
740        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
741        let (ancestors, target_inode, mut dir_page) =
742            self.resolve_dir_chain(&dir_parts, &root_inode)?;
743
744        if dir_page.entries.iter().any(|e| e.name == leaf) {
745            return Err(FsError::DirectoryAlreadyExists(leaf.to_string()));
746        }
747
748        // Create empty directory page for the new subdirectory.
749        let sub_dp = DirectoryPage::new();
750        let sub_dp_block = self.allocator.allocate()?;
751        self.write_obj(sub_dp_block, ObjectKind::DirectoryPage, &sub_dp)?;
752
753        let inode_id = self.alloc_inode_id();
754        let ts = now_secs();
755        let dir_inode = Inode {
756            id: inode_id,
757            kind: InodeKind::Directory,
758            size: 0,
759            directory_page_ref: ObjectRef::new(sub_dp_block),
760            extent_map_ref: ObjectRef::null(),
761            created_at: ts,
762            modified_at: ts,
763        };
764        let inode_block = self.allocator.allocate()?;
765        self.write_obj(inode_block, ObjectKind::Inode, &dir_inode)?;
766
767        dir_page.entries.push(DirectoryEntry {
768            name: leaf.to_string(),
769            inode_ref: ObjectRef::new(inode_block),
770            inode_id,
771            kind: InodeKind::Directory,
772        });
773
774        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
775        Ok(())
776    }
777
778    /// Remove a file or empty directory at the given path.
779    pub fn remove_file(&mut self, path: &str) -> FsResult<()> {
780        let path_key = path.trim_matches('/').to_string();
781        self.write_buffer.remove(&path_key);
782        self.flush_all()?;
783        let (dir_parts, leaf) = Self::split_path(path)?;
784        let sb = self
785            .superblock
786            .as_ref()
787            .ok_or(FsError::NotInitialized)?
788            .clone();
789        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
790        let (ancestors, target_inode, mut dir_page) =
791            self.resolve_dir_chain(&dir_parts, &root_inode)?;
792
793        let idx = dir_page
794            .entries
795            .iter()
796            .position(|e| e.name == leaf)
797            .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
798
799        // Collect all blocks owned by the removed entry so we can free them.
800        let removed_entry = &dir_page.entries[idx];
801        let mut stale_blocks: Vec<u64> = Vec::new();
802        stale_blocks.push(removed_entry.inode_ref.block_id);
803        let removed_inode: Inode = self.read_obj(removed_entry.inode_ref.block_id)?;
804
805        match removed_entry.kind {
806            InodeKind::Directory => {
807                let sub_page: DirectoryPage =
808                    self.read_obj(removed_inode.directory_page_ref.block_id)?;
809                if !sub_page.entries.is_empty() {
810                    return Err(FsError::DirectoryNotEmpty(leaf.to_string()));
811                }
812                stale_blocks.push(removed_inode.directory_page_ref.block_id);
813            }
814            InodeKind::File => {
815                if !removed_inode.extent_map_ref.is_null() {
816                    stale_blocks.push(removed_inode.extent_map_ref.block_id);
817                    let extent_map: ExtentMap =
818                        self.read_obj(removed_inode.extent_map_ref.block_id)?;
819                    for ext in &extent_map.entries {
820                        stale_blocks.push(ext.data_ref.block_id);
821                    }
822                }
823            }
824        }
825
826        dir_page.entries.remove(idx);
827        // commit_cow_chain frees its own stale CoW blocks.
828        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
829
830        // Free blocks that belonged to the removed entry.
831        for block_id in stale_blocks {
832            let _ = self.allocator.free(block_id);
833        }
834
835        Ok(())
836    }
837
838    /// Rename or move a file or directory.  Supports both same-directory
839    /// renames and cross-directory moves.
840    pub fn rename(&mut self, old_path: &str, new_path: &str) -> FsResult<()> {
841        let (old_dir, old_leaf) = Self::split_path(old_path)?;
842        let (new_dir, new_leaf) = Self::split_path(new_path)?;
843        self.validate_name(new_leaf)?;
844        self.flush_all()?;
845
846        let sb = self
847            .superblock
848            .as_ref()
849            .ok_or(FsError::NotInitialized)?
850            .clone();
851        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
852
853        if old_dir == new_dir {
854            // ── Same-directory rename: just change the name in-place. ──
855            let (ancestors, target_inode, mut dir_page) =
856                self.resolve_dir_chain(&old_dir, &root_inode)?;
857
858            if dir_page.entries.iter().any(|e| e.name == new_leaf) {
859                return Err(FsError::FileAlreadyExists(new_leaf.to_string()));
860            }
861
862            let entry = dir_page
863                .entries
864                .iter_mut()
865                .find(|e| e.name == old_leaf)
866                .ok_or_else(|| FsError::FileNotFound(old_leaf.to_string()))?;
867
868            entry.name = new_leaf.to_string();
869            self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
870        } else {
871            // ── Cross-directory rename / move. ──
872
873            // Prevent moving a directory into its own subtree.
874            let src_full: Vec<&str> = old_dir
875                .iter()
876                .copied()
877                .chain(std::iter::once(old_leaf))
878                .collect();
879            if new_dir.len() >= src_full.len() && new_dir[..src_full.len()] == src_full[..] {
880                return Err(FsError::Internal(
881                    "cannot move a directory into itself".into(),
882                ));
883            }
884
885            // Find the Least Common Ancestor (LCA) of the two parent dirs.
886            let common_len = old_dir
887                .iter()
888                .zip(new_dir.iter())
889                .take_while(|(a, b)| a == b)
890                .count();
891            let common_parts = &old_dir[..common_len];
892            let src_remaining = &old_dir[common_len..];
893            let dst_remaining = &new_dir[common_len..];
894
895            // Resolve the common ancestor chain from root.
896            let (common_ancestors, common_inode, common_dir_page) =
897                self.resolve_dir_chain(common_parts, &root_inode)?;
898
899            // Resolve source sub-chain below the common ancestor.
900            let (full_src_ancestors, src_inode, src_dir_page) = if src_remaining.is_empty() {
901                (Vec::new(), common_inode.clone(), common_dir_page.clone())
902            } else {
903                self.resolve_dir_chain(src_remaining, &common_inode)?
904            };
905
906            // Resolve destination sub-chain below the common ancestor.
907            let (full_dst_ancestors, dst_inode, dst_dir_page) = if dst_remaining.is_empty() {
908                (Vec::new(), common_inode.clone(), common_dir_page.clone())
909            } else {
910                self.resolve_dir_chain(dst_remaining, &common_inode)?
911            };
912
913            // Validate: source must exist, destination name must not.
914            if dst_dir_page.entries.iter().any(|e| e.name == new_leaf) {
915                return Err(FsError::FileAlreadyExists(new_leaf.to_string()));
916            }
917            let src_idx = src_dir_page
918                .entries
919                .iter()
920                .position(|e| e.name == old_leaf)
921                .ok_or_else(|| FsError::FileNotFound(old_leaf.to_string()))?;
922
923            // Build the moved entry with its new name.
924            let mut moved_entry = src_dir_page.entries[src_idx].clone();
925            moved_entry.name = new_leaf.to_string();
926
927            let mut stale_blocks: Vec<u64> = Vec::new();
928
929            // Start with the common ancestor's dir page; both sides
930            // accumulate updates into this copy.
931            let mut merged_common_dp = common_dir_page.clone();
932
933            // ── Source side ──
934            if src_remaining.is_empty() {
935                // Source dir IS the common ancestor — remove directly.
936                let idx = merged_common_dp
937                    .entries
938                    .iter()
939                    .position(|e| e.name == old_leaf)
940                    .ok_or_else(|| FsError::FileNotFound(old_leaf.to_string()))?;
941                merged_common_dp.entries.remove(idx);
942            } else {
943                // CoW the source sub-chain with the entry removed.
944                // full_src_ancestors[0] is the common ancestor itself;
945                // pass only the levels below it.
946                let src_sub = &full_src_ancestors[1..];
947                let mut new_src_dp = src_dir_page.clone();
948                new_src_dp.entries.remove(src_idx);
949
950                let new_src_child =
951                    self.cow_subchain(src_sub, &src_inode, &new_src_dp, &mut stale_blocks)?;
952
953                // Update the common ancestor's entry for the source branch.
954                let src_child_name = src_remaining[0];
955                let ci = merged_common_dp
956                    .entries
957                    .iter()
958                    .position(|e| e.name == src_child_name)
959                    .ok_or_else(|| FsError::DirectoryNotFound(src_child_name.to_string()))?;
960                stale_blocks.push(merged_common_dp.entries[ci].inode_ref.block_id);
961                merged_common_dp.entries[ci].inode_ref = ObjectRef::new(new_src_child);
962            }
963
964            // ── Destination side ──
965            if dst_remaining.is_empty() {
966                // Destination dir IS the common ancestor — add directly.
967                merged_common_dp.entries.push(moved_entry);
968            } else {
969                // CoW the destination sub-chain with the entry added.
970                let dst_sub = &full_dst_ancestors[1..];
971                let mut new_dst_dp = dst_dir_page.clone();
972                new_dst_dp.entries.push(moved_entry);
973
974                let new_dst_child =
975                    self.cow_subchain(dst_sub, &dst_inode, &new_dst_dp, &mut stale_blocks)?;
976
977                // Update the common ancestor's entry for the dest branch.
978                let dst_child_name = dst_remaining[0];
979                let ci = merged_common_dp
980                    .entries
981                    .iter()
982                    .position(|e| e.name == dst_child_name)
983                    .ok_or_else(|| FsError::DirectoryNotFound(dst_child_name.to_string()))?;
984                stale_blocks.push(merged_common_dp.entries[ci].inode_ref.block_id);
985                merged_common_dp.entries[ci].inode_ref = ObjectRef::new(new_dst_child);
986            }
987
988            // CoW from the common ancestor up to root and commit.
989            self.commit_cow_chain(&sb, &common_ancestors, &common_inode, &merged_common_dp)?;
990
991            // Free sub-chain stale blocks (commit_cow_chain frees its own).
992            for block_id in stale_blocks {
993                let _ = self.allocator.free(block_id);
994            }
995        }
996
997        Ok(())
998    }
999
1000    /// Flush all buffered writes to the block store **without** calling
1001    /// `store.sync()` (fsync).
1002    ///
1003    /// Use this for FUSE `write`/`release` handlers where you want data
1004    /// committed to the block store but don't need a durable fsync.
1005    /// Call [`sync()`](Self::sync) for an explicit fsync.
1006    pub fn flush(&mut self) -> FsResult<()> {
1007        self.flush_all()
1008    }
1009
1010    /// Sync / flush. Writes all buffered data to blocks and calls through
1011    /// to the block store sync.
1012    pub fn sync(&mut self) -> FsResult<()> {
1013        self.flush_all()?;
1014        self.store.sync()
1015    }
1016
1017    /// Returns the number of free blocks in the allocator.
1018    #[cfg(test)]
1019    pub fn free_block_count(&self) -> u64 {
1020        self.allocator.free_count()
1021    }
1022
1023    /// Fill every unallocated block with cryptographically random data.
1024    ///
1025    /// This makes free space indistinguishable from encrypted ciphertext,
1026    /// preventing an observer from determining which blocks contain real
1027    /// data.  Call after `init_filesystem()` or `open()` when provisioning
1028    /// a new store, or periodically as a scrub operation.
1029    ///
1030    /// Uses batch writes when the block store supports them.
1031    pub fn scrub_free_blocks(&mut self) -> FsResult<()> {
1032        self.flush_all()?;
1033
1034        let free_ids = self.allocator.free_block_ids();
1035        if free_ids.is_empty() {
1036            return Ok(());
1037        }
1038
1039        let bs = self.store.block_size();
1040        let mut rng = rand::thread_rng();
1041
1042        // Write in batches to amortise call overhead and enable pipelined I/O.
1043        const BATCH: usize = 64;
1044        for chunk in free_ids.chunks(BATCH) {
1045            let mut pairs: Vec<(u64, Vec<u8>)> = Vec::with_capacity(chunk.len());
1046            for &id in chunk {
1047                let mut buf = vec![0u8; bs];
1048                rng.fill_bytes(&mut buf);
1049                pairs.push((id, buf));
1050            }
1051            let refs: Vec<(u64, &[u8])> = pairs.iter().map(|(id, d)| (*id, d.as_slice())).collect();
1052            self.store.write_blocks(&refs)?;
1053        }
1054
1055        self.store.sync()
1056    }
1057
1058    // ── Internal helpers ──
1059
1060    /// Flush a single file's buffered writes to the block store.
1061    fn flush_file(&mut self, path_key: &str) -> FsResult<()> {
1062        let dirty = match self.write_buffer.remove(path_key) {
1063            Some(d) => d,
1064            None => return Ok(()),
1065        };
1066
1067        if !dirty.metadata_dirty {
1068            return Ok(());
1069        }
1070
1071        // Re-resolve path from the current superblock.
1072        let (dir_parts, leaf) = Self::split_path(path_key)?;
1073        let sb = self
1074            .superblock
1075            .as_ref()
1076            .ok_or(FsError::NotInitialized)?
1077            .clone();
1078        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
1079        let (ancestors, target_inode, dir_page) =
1080            self.resolve_dir_chain(&dir_parts, &root_inode)?;
1081
1082        let mut extent_map = dirty.extent_map;
1083
1084        // Collect stale data chunk blocks being overwritten.
1085        let mut stale_blocks: Vec<u64> = Vec::new();
1086
1087        // Write each dirty chunk to a new block.
1088        // Pre-encrypt all dirty chunks and allocate blocks, then batch-write.
1089        let block_size = self.store.block_size();
1090        let mut batch: Vec<(u64, Vec<u8>)> = Vec::with_capacity(dirty.dirty_chunks.len());
1091
1092        for (&chunk_idx, chunk_data) in &dirty.dirty_chunks {
1093            // If this chunk already existed, the old data block is stale.
1094            if let Some(existing) = extent_map
1095                .entries
1096                .iter()
1097                .find(|e| e.chunk_index == chunk_idx)
1098            {
1099                stale_blocks.push(existing.data_ref.block_id);
1100            }
1101
1102            let data_block = self.allocator.allocate()?;
1103            let encrypted_block = prepare_encrypted_block(
1104                block_size,
1105                self.crypto.as_ref(),
1106                &self.codec,
1107                ObjectKind::FileDataChunk,
1108                chunk_data,
1109            )?;
1110            batch.push((data_block, encrypted_block));
1111
1112            if let Some(entry) = extent_map
1113                .entries
1114                .iter_mut()
1115                .find(|e| e.chunk_index == chunk_idx)
1116            {
1117                entry.data_ref = ObjectRef::new(data_block);
1118                entry.plaintext_len = chunk_data.len() as u32;
1119            } else {
1120                extent_map.entries.push(ExtentEntry {
1121                    chunk_index: chunk_idx,
1122                    data_ref: ObjectRef::new(data_block),
1123                    plaintext_len: chunk_data.len() as u32,
1124                });
1125            }
1126        }
1127
1128        // Single batched write for all dirty chunks + extent map + inode.
1129        extent_map.entries.sort_by_key(|e| e.chunk_index);
1130
1131        // Old extent map block is stale.
1132        if !dirty.base_inode.extent_map_ref.is_null() {
1133            stale_blocks.push(dirty.base_inode.extent_map_ref.block_id);
1134        }
1135
1136        // Prepare extent map block.
1137        let new_em_block = self.allocator.allocate()?;
1138        let em_encrypted = prepare_encrypted_object(
1139            block_size,
1140            self.crypto.as_ref(),
1141            &self.codec,
1142            ObjectKind::ExtentMap,
1143            &extent_map,
1144        )?;
1145        batch.push((new_em_block, em_encrypted));
1146
1147        // Old file inode block is stale — find it from the dir entry.
1148        let old_inode_block = dir_page
1149            .entries
1150            .iter()
1151            .find(|e| e.name == leaf)
1152            .map(|e| e.inode_ref.block_id);
1153        if let Some(blk) = old_inode_block {
1154            stale_blocks.push(blk);
1155        }
1156
1157        // Prepare inode block.
1158        let mut new_inode = dirty.base_inode;
1159        new_inode.size = dirty.size;
1160        new_inode.extent_map_ref = ObjectRef::new(new_em_block);
1161        new_inode.modified_at = now_secs();
1162        let new_inode_block = self.allocator.allocate()?;
1163        let inode_encrypted = prepare_encrypted_object(
1164            block_size,
1165            self.crypto.as_ref(),
1166            &self.codec,
1167            ObjectKind::Inode,
1168            &new_inode,
1169        )?;
1170        batch.push((new_inode_block, inode_encrypted));
1171
1172        // Write ALL blocks in one call (data chunks + extent map + inode).
1173        {
1174            let mut cache = self.obj_cache.borrow_mut();
1175            for &(id, _) in &batch {
1176                cache.pop(&id);
1177            }
1178            drop(cache);
1179            let refs: Vec<(u64, &[u8])> = batch
1180                .iter()
1181                .map(|(id, data)| (*id, data.as_slice()))
1182                .collect();
1183            self.store.write_blocks(&refs)?;
1184        }
1185
1186        // Update dir entry.
1187        let mut new_dir_page = dir_page.clone();
1188        for e in &mut new_dir_page.entries {
1189            if e.name == leaf {
1190                e.inode_ref = ObjectRef::new(new_inode_block);
1191            }
1192        }
1193
1194        // commit_cow_chain frees its own stale blocks (dir pages, ancestor inodes).
1195        self.commit_cow_chain(&sb, &ancestors, &target_inode, &new_dir_page)?;
1196
1197        // Free file-level stale blocks (data chunks, old extent map, old inode).
1198        for block_id in stale_blocks {
1199            let _ = self.allocator.free(block_id);
1200        }
1201
1202        Ok(())
1203    }
1204
1205    /// Flush all buffered file writes to the block store.
1206    fn flush_all(&mut self) -> FsResult<()> {
1207        let keys: Vec<String> = self.write_buffer.keys().cloned().collect();
1208        for key in keys {
1209            self.flush_file(&key)?;
1210        }
1211        Ok(())
1212    }
1213
1214    /// Read from a file that has dirty (buffered) chunks, merging in-memory
1215    /// data with on-disk data.
1216    fn read_file_buffered(&self, dirty: &DirtyFile, offset: u64, len: usize) -> FsResult<Vec<u8>> {
1217        let chunk_size = max_chunk_payload(self.store.block_size());
1218        let file_size = dirty.size as usize;
1219        let start = offset as usize;
1220        if start >= file_size || len == 0 {
1221            return Ok(Vec::new());
1222        }
1223        let end = std::cmp::min(start + len, file_size);
1224        let mut result = Vec::with_capacity(end - start);
1225
1226        let first_chunk = start / chunk_size;
1227        let last_chunk = (end - 1) / chunk_size;
1228
1229        for chunk_idx in first_chunk..=last_chunk {
1230            let chunk_file_start = chunk_idx * chunk_size;
1231            let chunk_file_end = std::cmp::min(chunk_file_start + chunk_size, file_size);
1232            let chunk_idx_u64 = chunk_idx as u64;
1233
1234            // Get chunk data from buffer or disk.
1235            let chunk_data: Vec<u8> = if let Some(buf) = dirty.dirty_chunks.get(&chunk_idx_u64) {
1236                buf.clone()
1237            } else if let Ok(pos) = dirty
1238                .extent_map
1239                .entries
1240                .binary_search_by_key(&chunk_idx_u64, |e| e.chunk_index)
1241            {
1242                let entry = &dirty.extent_map.entries[pos];
1243                let raw = read_encrypted_raw(
1244                    self.store.as_ref(),
1245                    self.crypto.as_ref(),
1246                    &self.codec,
1247                    entry.data_ref.block_id,
1248                )?;
1249                let plain_len = std::cmp::min(entry.plaintext_len as usize, raw.len());
1250                raw[..plain_len].to_vec()
1251            } else {
1252                vec![0u8; chunk_file_end - chunk_file_start]
1253            };
1254
1255            // Slice to the requested range within this chunk.
1256            let read_start = if chunk_idx == first_chunk {
1257                start - chunk_file_start
1258            } else {
1259                0
1260            };
1261            let read_end = if chunk_idx == last_chunk {
1262                end - chunk_file_start
1263            } else {
1264                chunk_data.len()
1265            };
1266            let read_end = std::cmp::min(read_end, chunk_data.len());
1267
1268            if read_start < read_end {
1269                result.extend_from_slice(&chunk_data[read_start..read_end]);
1270            }
1271        }
1272
1273        Ok(result)
1274    }
1275
1276    fn alloc_inode_id(&mut self) -> InodeId {
1277        let id = self.next_inode_id;
1278        self.next_inode_id += 1;
1279        id
1280    }
1281
1282    fn validate_name(&self, name: &str) -> FsResult<()> {
1283        if name.is_empty() || name.contains('/') || name.contains('\0') {
1284            return Err(FsError::Internal("invalid name".into()));
1285        }
1286        if name.len() > MAX_NAME_LEN {
1287            return Err(FsError::NameTooLong(name.len(), MAX_NAME_LEN));
1288        }
1289        Ok(())
1290    }
1291
1292    fn read_obj<T: serde::de::DeserializeOwned>(&self, block_id: u64) -> FsResult<T> {
1293        let mut cache = self.obj_cache.borrow_mut();
1294        if let Some(plaintext) = cache.get(&block_id) {
1295            return self.codec.deserialize_object(plaintext);
1296        }
1297        let plaintext = decrypt_block_to_plaintext(
1298            self.store.as_ref(),
1299            self.crypto.as_ref(),
1300            &self.codec,
1301            block_id,
1302        )?;
1303        let result = self.codec.deserialize_object(&plaintext);
1304        cache.put(block_id, plaintext);
1305        result
1306    }
1307
1308    fn write_obj<T: serde::Serialize>(
1309        &self,
1310        block_id: u64,
1311        kind: ObjectKind,
1312        obj: &T,
1313    ) -> FsResult<()> {
1314        // Invalidate any cached plaintext for this block (freed blocks may be
1315        // reallocated and written with different content).
1316        self.obj_cache.borrow_mut().pop(&block_id);
1317        write_encrypted_object(
1318            self.store.as_ref(),
1319            self.crypto.as_ref(),
1320            &self.codec,
1321            block_id,
1322            kind,
1323            obj,
1324        )
1325    }
1326
1327    /// Read only the chunks that overlap `[offset, offset+len)` from the
1328    /// extent map, avoiding the cost of decrypting the entire file.
1329    fn read_chunk_range(
1330        &self,
1331        extent_map: &ExtentMap,
1332        file_size: u64,
1333        offset: u64,
1334        len: usize,
1335    ) -> FsResult<Vec<u8>> {
1336        let chunk_size = max_chunk_payload(self.store.block_size());
1337        let end = std::cmp::min(offset as usize + len, file_size as usize);
1338        let start = offset as usize;
1339        if start >= end || chunk_size == 0 {
1340            return Ok(Vec::new());
1341        }
1342
1343        let first_chunk = (start / chunk_size) as u64;
1344        let last_chunk = ((end - 1) / chunk_size) as u64;
1345        let mut result = Vec::with_capacity(end - start);
1346
1347        for chunk_idx in first_chunk..=last_chunk {
1348            let chunk_file_start = chunk_idx as usize * chunk_size;
1349
1350            // Find the extent entry for this chunk via binary search.
1351            let chunk_data = if let Ok(pos) = extent_map
1352                .entries
1353                .binary_search_by_key(&chunk_idx, |e| e.chunk_index)
1354            {
1355                let entry = &extent_map.entries[pos];
1356                let raw = read_encrypted_raw(
1357                    self.store.as_ref(),
1358                    self.crypto.as_ref(),
1359                    &self.codec,
1360                    entry.data_ref.block_id,
1361                )?;
1362                let plain_len = std::cmp::min(entry.plaintext_len as usize, raw.len());
1363                raw[..plain_len].to_vec()
1364            } else {
1365                // Sparse hole: return zeros up to chunk boundary or file end.
1366                let hole_end = std::cmp::min(chunk_file_start + chunk_size, file_size as usize);
1367                vec![0u8; hole_end - chunk_file_start]
1368            };
1369
1370            // Slice to the requested range within this chunk.
1371            let read_start = if chunk_idx == first_chunk {
1372                start - chunk_file_start
1373            } else {
1374                0
1375            };
1376            let read_end = if chunk_idx == last_chunk {
1377                end - chunk_file_start
1378            } else {
1379                chunk_data.len()
1380            };
1381            let read_end = std::cmp::min(read_end, chunk_data.len());
1382
1383            if read_start < read_end {
1384                result.extend_from_slice(&chunk_data[read_start..read_end]);
1385            }
1386        }
1387
1388        Ok(result)
1389    }
1390
1391    fn read_storage_header(&self) -> FsResult<StorageHeader> {
1392        let block = self.store.read_block(BLOCK_STORAGE_HEADER)?;
1393        if block.len() < 4 {
1394            return Err(FsError::InvalidSuperblock);
1395        }
1396        let len = u32::from_le_bytes([block[0], block[1], block[2], block[3]]) as usize;
1397        if len == 0 || 4 + len > block.len() {
1398            return Err(FsError::InvalidSuperblock);
1399        }
1400        self.codec
1401            .deserialize_object::<StorageHeader>(&block[4..4 + len])
1402    }
1403
1404    fn commit_superblock(&mut self, sb: Superblock) -> FsResult<()> {
1405        let new_sb_block = self.txn.commit(
1406            self.store.as_ref(),
1407            self.crypto.as_ref(),
1408            &self.codec,
1409            &self.allocator,
1410            &sb,
1411        )?;
1412        // Free the previous superblock block now that the new one is committed.
1413        if let Some(old) = self.last_superblock_block {
1414            let _ = self.allocator.free(old);
1415        }
1416        self.last_superblock_block = Some(new_sb_block);
1417        self.superblock = Some(sb);
1418        Ok(())
1419    }
1420
1421    /// Walk the metadata tree from the superblock and mark all referenced blocks
1422    /// as allocated in the allocator. Used during open/mount.
1423    fn rebuild_allocator(&mut self, sb: &Superblock) -> FsResult<()> {
1424        // Mark superblock block.
1425        // The superblock_ref's block was allocated by the transaction manager.
1426        // We also need to mark root pointer blocks, but those are reserved (0,1,2).
1427
1428        // We need to find which block the superblock is stored in.
1429        // The root pointer tells us.
1430        let (rp, _) = TransactionManager::recover_latest(self.store.as_ref(), &self.codec)?
1431            .ok_or(FsError::InvalidRootPointer)?;
1432        self.allocator.mark_allocated(rp.superblock_ref.block_id)?;
1433
1434        // Walk root inode.
1435        self.mark_inode_tree(sb.root_inode_ref.block_id)?;
1436
1437        // Set next_inode_id to be higher than any seen inode.
1438        // (We updated it during the walk.)
1439
1440        Ok(())
1441    }
1442
1443    fn mark_inode_tree(&mut self, inode_block: u64) -> FsResult<()> {
1444        self.allocator.mark_allocated(inode_block)?;
1445        let inode: Inode = self.read_obj(inode_block)?;
1446
1447        if inode.id >= self.next_inode_id {
1448            self.next_inode_id = inode.id + 1;
1449        }
1450
1451        match inode.kind {
1452            InodeKind::Directory => {
1453                if !inode.directory_page_ref.is_null() {
1454                    self.allocator
1455                        .mark_allocated(inode.directory_page_ref.block_id)?;
1456                    let dir_page: DirectoryPage =
1457                        self.read_obj(inode.directory_page_ref.block_id)?;
1458                    for entry in &dir_page.entries {
1459                        self.mark_inode_tree(entry.inode_ref.block_id)?;
1460                    }
1461                }
1462            }
1463            InodeKind::File => {
1464                if !inode.extent_map_ref.is_null() {
1465                    self.allocator
1466                        .mark_allocated(inode.extent_map_ref.block_id)?;
1467                    let extent_map: ExtentMap = self.read_obj(inode.extent_map_ref.block_id)?;
1468                    for entry in &extent_map.entries {
1469                        self.allocator.mark_allocated(entry.data_ref.block_id)?;
1470                    }
1471                }
1472            }
1473        }
1474        Ok(())
1475    }
1476}
1477
1478/// Return type for directory listings (used by FFI and public API).
1479#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1480pub struct DirListEntry {
1481    pub name: String,
1482    pub kind: InodeKind,
1483    pub size: u64,
1484    pub inode_id: InodeId,
1485}