Skip to main content

doublecrypt_core/
fs.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use rand::RngCore;
6
7use crate::allocator::{BitmapAllocator, SlotAllocator};
8use crate::block_store::BlockStore;
9use crate::codec::{
10    read_encrypted_object, read_encrypted_raw, write_encrypted_object, write_encrypted_raw,
11    ObjectCodec, PostcardCodec,
12};
13use crate::crypto::CryptoEngine;
14use crate::error::{FsError, FsResult};
15use crate::model::*;
16use crate::transaction::TransactionManager;
17
18/// The main filesystem core. Owns the block store, crypto, codec, allocator,
19/// and transaction manager. Provides high-level filesystem operations.
20///
21/// All path-accepting methods use `/`-separated paths.  An empty string or
22/// `"/"` refers to the root directory.  Parent directories must already exist;
23/// only `create_file` and `create_directory` create the leaf entry.
24pub struct FilesystemCore {
25    store: Arc<dyn BlockStore>,
26    crypto: Arc<dyn CryptoEngine>,
27    codec: PostcardCodec,
28    allocator: BitmapAllocator,
29    txn: TransactionManager,
30    /// Cached current superblock.
31    superblock: Option<Superblock>,
32    /// Next inode ID to allocate.
33    next_inode_id: InodeId,
34    /// Write buffer: dirty file chunks held in memory until flush.
35    write_buffer: HashMap<String, DirtyFile>,
36}
37
38/// Tracks one ancestor directory during path resolution, used by
39/// `commit_cow_chain` to propagate CoW writes back to the root.
40struct AncestorEntry {
41    inode: Inode,
42    dir_page: DirectoryPage,
43    child_index: usize,
44}
45
46/// Tracks in-memory buffered writes for a single file.
47///
48/// All dirty chunks are held in memory until `sync()` (or the next
49/// metadata-mutating operation) flushes them to the block store.
50/// This keeps `write_file()` purely in-memory for smooth throughput.
51struct DirtyFile {
52    /// In-memory chunk data keyed by chunk index (only partial chunks).
53    dirty_chunks: HashMap<u64, Vec<u8>>,
54    /// The file's inode at the time buffering started.
55    base_inode: Inode,
56    /// The file's extent map (updated in-place when chunks are eagerly flushed).
57    extent_map: ExtentMap,
58    /// Current logical file size (updated on every write).
59    size: u64,
60    /// Set to `true` when any data has been written (even if eagerly flushed).
61    metadata_dirty: bool,
62}
63
64/// Maximum payload size for a single file data chunk.
65/// Computed conservatively: block_size minus overhead for envelope framing.
66/// We'll compute this dynamically based on block size.
67fn max_chunk_payload(block_size: usize) -> usize {
68    // Rough overhead: 4 bytes length prefix, ~60 bytes envelope metadata,
69    // 16 bytes Poly1305 tag, some postcard framing. Be conservative.
70    if block_size > 200 {
71        block_size - 200
72    } else {
73        0
74    }
75}
76
77fn now_secs() -> u64 {
78    SystemTime::now()
79        .duration_since(UNIX_EPOCH)
80        .unwrap_or_default()
81        .as_secs()
82}
83
84impl FilesystemCore {
85    /// Create a new FilesystemCore backed by the given store and crypto engine.
86    pub fn new(store: Arc<dyn BlockStore>, crypto: Arc<dyn CryptoEngine>) -> Self {
87        let total_blocks = store.total_blocks();
88        Self {
89            store,
90            crypto,
91            codec: PostcardCodec,
92            allocator: BitmapAllocator::new(total_blocks),
93            txn: TransactionManager::new(),
94            superblock: None,
95            next_inode_id: 1,
96            write_buffer: HashMap::new(),
97        }
98    }
99
100    // ── Initialization ──
101
102    /// Initialize a brand-new filesystem on the block store.
103    /// Writes the storage header, creates the root directory, and commits.
104    pub fn init_filesystem(&mut self) -> FsResult<()> {
105        let block_size = self.store.block_size() as u32;
106        let total_blocks = self.store.total_blocks();
107
108        // Write storage header to block 0 (unencrypted).
109        let header = StorageHeader::new(block_size, total_blocks);
110        let header_bytes = self.codec.serialize_object(&header)?;
111        let bs = self.store.block_size();
112        let mut block = vec![0u8; bs];
113        rand::thread_rng().fill_bytes(&mut block);
114        let len = header_bytes.len() as u32;
115        block[..4].copy_from_slice(&len.to_le_bytes());
116        block[4..4 + header_bytes.len()].copy_from_slice(&header_bytes);
117        self.store.write_block(BLOCK_STORAGE_HEADER, &block)?;
118
119        // Create root directory inode.
120        let root_inode_id = self.alloc_inode_id();
121        let dir_page = DirectoryPage::new();
122        let dir_page_block = self.allocator.allocate()?;
123        write_encrypted_object(
124            self.store.as_ref(),
125            self.crypto.as_ref(),
126            &self.codec,
127            dir_page_block,
128            ObjectKind::DirectoryPage,
129            &dir_page,
130        )?;
131
132        let ts = now_secs();
133        let root_inode = Inode {
134            id: root_inode_id,
135            kind: InodeKind::Directory,
136            size: 0,
137            directory_page_ref: ObjectRef::new(dir_page_block),
138            extent_map_ref: ObjectRef::null(),
139            created_at: ts,
140            modified_at: ts,
141        };
142        let root_inode_block = self.allocator.allocate()?;
143        write_encrypted_object(
144            self.store.as_ref(),
145            self.crypto.as_ref(),
146            &self.codec,
147            root_inode_block,
148            ObjectKind::Inode,
149            &root_inode,
150        )?;
151
152        // Create superblock.
153        let sb = Superblock {
154            generation: 1,
155            root_inode_ref: ObjectRef::new(root_inode_block),
156        };
157        self.superblock = Some(sb.clone());
158
159        // Commit.
160        self.txn.commit(
161            self.store.as_ref(),
162            self.crypto.as_ref(),
163            &self.codec,
164            &self.allocator,
165            &sb,
166        )?;
167
168        Ok(())
169    }
170
171    /// Open / mount an existing filesystem by recovering the latest root pointer.
172    pub fn open(&mut self) -> FsResult<()> {
173        // Verify storage header.
174        let header = self.read_storage_header()?;
175        if !header.is_valid() {
176            return Err(FsError::InvalidSuperblock);
177        }
178
179        // Recover latest root pointer.
180        let (rp, was_b) = TransactionManager::recover_latest(self.store.as_ref(), &self.codec)?
181            .ok_or(FsError::InvalidRootPointer)?;
182
183        // Read superblock.
184        let sb: Superblock = read_encrypted_object(
185            self.store.as_ref(),
186            self.crypto.as_ref(),
187            &self.codec,
188            rp.superblock_ref.block_id,
189        )?;
190
191        // Verify checksum.
192        let sb_bytes = self.codec.serialize_object(&sb)?;
193        let checksum = blake3::hash(&sb_bytes);
194        if *checksum.as_bytes() != rp.checksum {
195            return Err(FsError::InvalidSuperblock);
196        }
197
198        self.txn = TransactionManager::from_recovered(rp.generation, was_b);
199        self.superblock = Some(sb.clone());
200
201        // Rebuild allocator knowledge by walking the metadata tree.
202        self.rebuild_allocator(&sb)?;
203
204        Ok(())
205    }
206
207    // ── File operations ──
208
209    // ── Path helpers ──────────────────────────────────────────
210
211    /// Split a path into its directory components and the leaf name.
212    /// Returns `(["a","b"], "c")` for `"a/b/c"`, or `([], "c")` for `"c"`.
213    fn split_path(path: &str) -> FsResult<(Vec<&str>, &str)> {
214        let trimmed = path.trim_matches('/');
215        if trimmed.is_empty() {
216            return Err(FsError::Internal("empty path".into()));
217        }
218        let parts: Vec<&str> = trimmed.split('/').collect();
219        let (dirs, leaf) = parts.split_at(parts.len() - 1);
220        Ok((dirs.to_vec(), leaf[0]))
221    }
222
223    /// Parse a directory path (may be empty / "/" for root) into components.
224    fn split_dir_path(path: &str) -> Vec<&str> {
225        let trimmed = path.trim_matches('/');
226        if trimmed.is_empty() {
227            return Vec::new();
228        }
229        trimmed.split('/').collect()
230    }
231
232    /// Resolve a sequence of directory components starting from the root inode,
233    /// returning the ancestor chain needed for CoW commit propagation.
234    ///
235    /// Returns `(ancestors, target_inode, target_dir_page)` where `ancestors`
236    /// is a list of `(Inode, DirectoryPage, entry_index_in_parent)` from root
237    /// down to (but not including) the final resolved directory.
238    fn resolve_dir_chain(
239        &self,
240        components: &[&str],
241        root_inode: &Inode,
242    ) -> FsResult<(Vec<AncestorEntry>, Inode, DirectoryPage)> {
243        let mut ancestors: Vec<AncestorEntry> = Vec::new();
244        let mut current_inode = root_inode.clone();
245        let mut current_dir_page: DirectoryPage =
246            self.read_obj(current_inode.directory_page_ref.block_id)?;
247
248        for component in components {
249            let idx = current_dir_page
250                .entries
251                .iter()
252                .position(|e| e.name == *component)
253                .ok_or_else(|| FsError::DirectoryNotFound(component.to_string()))?;
254
255            let entry = &current_dir_page.entries[idx];
256            if entry.kind != InodeKind::Directory {
257                return Err(FsError::NotADirectory(component.to_string()));
258            }
259
260            let child_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
261            let child_dir_page: DirectoryPage =
262                self.read_obj(child_inode.directory_page_ref.block_id)?;
263
264            ancestors.push(AncestorEntry {
265                inode: current_inode,
266                dir_page: current_dir_page,
267                child_index: idx,
268            });
269
270            current_inode = child_inode;
271            current_dir_page = child_dir_page;
272        }
273
274        Ok((ancestors, current_inode, current_dir_page))
275    }
276
277    /// After mutating a directory's page, propagate CoW changes up through
278    /// the ancestor chain to the root, then commit a new superblock.
279    ///
280    /// `new_dir_page` is the already-modified DirectoryPage of the target dir.
281    /// `target_inode` is the inode of the directory that owns `new_dir_page`.
282    /// `ancestors` is the chain from root down to (but not including) target.
283    fn commit_cow_chain(
284        &mut self,
285        sb: &Superblock,
286        ancestors: &[AncestorEntry],
287        target_inode: &Inode,
288        new_dir_page: &DirectoryPage,
289    ) -> FsResult<()> {
290        // Write the modified directory page.
291        let mut new_dp_block = self.allocator.allocate()?;
292        self.write_obj(new_dp_block, ObjectKind::DirectoryPage, new_dir_page)?;
293
294        // Write the modified directory inode.
295        let mut new_inode = target_inode.clone();
296        new_inode.directory_page_ref = ObjectRef::new(new_dp_block);
297        new_inode.modified_at = now_secs();
298        let mut new_inode_block = self.allocator.allocate()?;
299        self.write_obj(new_inode_block, ObjectKind::Inode, &new_inode)?;
300
301        // Propagate upward through ancestors (bottom to top).
302        for ancestor in ancestors.iter().rev() {
303            let mut parent_dp = ancestor.dir_page.clone();
304            parent_dp.entries[ancestor.child_index].inode_ref = ObjectRef::new(new_inode_block);
305
306            new_dp_block = self.allocator.allocate()?;
307            self.write_obj(new_dp_block, ObjectKind::DirectoryPage, &parent_dp)?;
308
309            let mut parent_inode = ancestor.inode.clone();
310            parent_inode.directory_page_ref = ObjectRef::new(new_dp_block);
311            parent_inode.modified_at = now_secs();
312            new_inode_block = self.allocator.allocate()?;
313            self.write_obj(new_inode_block, ObjectKind::Inode, &parent_inode)?;
314        }
315
316        // new_inode_block is now the new root inode block.
317        let new_sb = Superblock {
318            generation: sb.generation + 1,
319            root_inode_ref: ObjectRef::new(new_inode_block),
320        };
321        self.commit_superblock(new_sb)?;
322        Ok(())
323    }
324
325    // ── Public operations ─────────────────────────────────────
326
327    /// Create a new empty file at the given path.
328    ///
329    /// Parent directories must already exist.  The leaf name is created in
330    /// the innermost directory.
331    pub fn create_file(&mut self, path: &str) -> FsResult<()> {
332        let (dir_parts, leaf) = Self::split_path(path)?;
333        self.validate_name(leaf)?;
334        self.flush_all()?;
335        let sb = self
336            .superblock
337            .as_ref()
338            .ok_or(FsError::NotInitialized)?
339            .clone();
340
341        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
342        let (ancestors, target_inode, mut dir_page) =
343            self.resolve_dir_chain(&dir_parts, &root_inode)?;
344
345        if dir_page.entries.iter().any(|e| e.name == leaf) {
346            return Err(FsError::FileAlreadyExists(leaf.to_string()));
347        }
348
349        // Create empty extent map.
350        let extent_map = ExtentMap::new();
351        let em_block = self.allocator.allocate()?;
352        self.write_obj(em_block, ObjectKind::ExtentMap, &extent_map)?;
353
354        // Create file inode.
355        let inode_id = self.alloc_inode_id();
356        let ts = now_secs();
357        let file_inode = Inode {
358            id: inode_id,
359            kind: InodeKind::File,
360            size: 0,
361            directory_page_ref: ObjectRef::null(),
362            extent_map_ref: ObjectRef::new(em_block),
363            created_at: ts,
364            modified_at: ts,
365        };
366        let inode_block = self.allocator.allocate()?;
367        self.write_obj(inode_block, ObjectKind::Inode, &file_inode)?;
368
369        dir_page.entries.push(DirectoryEntry {
370            name: leaf.to_string(),
371            inode_ref: ObjectRef::new(inode_block),
372            inode_id,
373            kind: InodeKind::File,
374        });
375
376        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
377        Ok(())
378    }
379
380    /// Write data to a file at the given path.
381    ///
382    /// Writes are buffered in memory and only flushed to the block store on
383    /// `sync()` or when another metadata-mutating operation occurs.
384    /// This keeps every `write_file` call purely in-memory for smooth
385    /// throughput.  Call `sync()` periodically to bound memory usage.
386    pub fn write_file(&mut self, path: &str, offset: u64, data: &[u8]) -> FsResult<()> {
387        if data.is_empty() {
388            return Ok(());
389        }
390
391        let chunk_size = max_chunk_payload(self.store.block_size());
392        if chunk_size == 0 {
393            return Err(FsError::DataTooLarge(data.len()));
394        }
395
396        let path_key = path.trim_matches('/').to_string();
397
398        // Take the dirty entry out of the map so `self` is free for other
399        // borrows (disk reads, etc.).  We'll put it back at the end.
400        let mut dirty = match self.write_buffer.remove(&path_key) {
401            Some(d) => d,
402            None => {
403                // First buffered write — load metadata from disk.
404                let (dir_parts, leaf) = Self::split_path(path)?;
405                let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
406                let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
407                let (_, _, dir_page) = self.resolve_dir_chain(&dir_parts, &root_inode)?;
408                let entry = dir_page
409                    .entries
410                    .iter()
411                    .find(|e| e.name == leaf)
412                    .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
413                if entry.kind != InodeKind::File {
414                    return Err(FsError::NotAFile(leaf.to_string()));
415                }
416                let file_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
417                let mut extent_map: ExtentMap =
418                    self.read_obj(file_inode.extent_map_ref.block_id)?;
419                extent_map.entries.sort_by_key(|e| e.chunk_index);
420                DirtyFile {
421                    dirty_chunks: HashMap::new(),
422                    base_inode: file_inode.clone(),
423                    extent_map,
424                    size: file_inode.size,
425                    metadata_dirty: false,
426                }
427            }
428        };
429
430        let old_size = dirty.size as usize;
431        let write_start = offset as usize;
432        let write_end = write_start + data.len();
433        let new_size = std::cmp::max(old_size, write_end);
434
435        let first_chunk = if write_start >= old_size {
436            old_size / chunk_size
437        } else {
438            write_start / chunk_size
439        };
440        let last_chunk = (new_size - 1) / chunk_size;
441
442        for chunk_idx in first_chunk..=last_chunk {
443            let chunk_file_start = chunk_idx * chunk_size;
444            let chunk_file_end = std::cmp::min(chunk_file_start + chunk_size, new_size);
445            let chunk_len = chunk_file_end - chunk_file_start;
446            let chunk_idx_u64 = chunk_idx as u64;
447
448            // If this chunk isn't buffered yet, load its on-disk content (or zeros).
449            if !dirty.dirty_chunks.contains_key(&chunk_idx_u64) {
450                let mut buf = vec![0u8; chunk_len];
451                if chunk_file_start < old_size {
452                    if let Ok(pos) = dirty
453                        .extent_map
454                        .entries
455                        .binary_search_by_key(&chunk_idx_u64, |e| e.chunk_index)
456                    {
457                        let existing = &dirty.extent_map.entries[pos];
458                        let raw = read_encrypted_raw(
459                            self.store.as_ref(),
460                            self.crypto.as_ref(),
461                            &self.codec,
462                            existing.data_ref.block_id,
463                        )?;
464                        let copy_len = std::cmp::min(existing.plaintext_len as usize, chunk_len);
465                        let src_len = std::cmp::min(copy_len, raw.len());
466                        buf[..src_len].copy_from_slice(&raw[..src_len]);
467                    }
468                }
469                dirty.dirty_chunks.insert(chunk_idx_u64, buf);
470            }
471
472            let chunk_buf = dirty.dirty_chunks.get_mut(&chunk_idx_u64).unwrap();
473            if chunk_buf.len() < chunk_len {
474                chunk_buf.resize(chunk_len, 0);
475            }
476
477            // Overlay the write data onto the chunk.
478            let overlap_start = std::cmp::max(chunk_file_start, write_start);
479            let overlap_end = std::cmp::min(chunk_file_end, write_end);
480            if overlap_start < overlap_end {
481                let data_off = overlap_start - write_start;
482                let chunk_off = overlap_start - chunk_file_start;
483                let len = overlap_end - overlap_start;
484                chunk_buf[chunk_off..chunk_off + len]
485                    .copy_from_slice(&data[data_off..data_off + len]);
486            }
487        }
488
489        dirty.size = new_size as u64;
490        dirty.metadata_dirty = true;
491
492        self.write_buffer.insert(path_key, dirty);
493        Ok(())
494    }
495
496    /// Read file data at the given path. Returns the requested slice.
497    ///
498    /// If the file has buffered (unflushed) writes, reads are served from the
499    /// in-memory buffer merged with on-disk data.
500    pub fn read_file(&self, path: &str, offset: u64, len: usize) -> FsResult<Vec<u8>> {
501        let path_key = path.trim_matches('/');
502
503        if let Some(dirty) = self.write_buffer.get(path_key) {
504            return self.read_file_buffered(dirty, offset, len);
505        }
506
507        let (dir_parts, leaf) = Self::split_path(path)?;
508        let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
509        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
510        let (_, _, dir_page) = self.resolve_dir_chain(&dir_parts, &root_inode)?;
511
512        let entry = dir_page
513            .entries
514            .iter()
515            .find(|e| e.name == leaf)
516            .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
517
518        if entry.kind != InodeKind::File {
519            return Err(FsError::NotAFile(leaf.to_string()));
520        }
521
522        let file_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
523        let extent_map: ExtentMap = self.read_obj(file_inode.extent_map_ref.block_id)?;
524
525        let full_data = self.read_all_chunks(&extent_map)?;
526
527        let start = offset as usize;
528        if start >= full_data.len() {
529            return Ok(Vec::new());
530        }
531        let end = std::cmp::min(start + len, full_data.len());
532        Ok(full_data[start..end].to_vec())
533    }
534
535    /// List entries in a directory at the given path.
536    ///
537    /// Pass `""` or `"/"` to list the root directory.
538    pub fn list_directory(&self, path: &str) -> FsResult<Vec<DirListEntry>> {
539        let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
540        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
541
542        let components = Self::split_dir_path(path);
543        let (_, _, dir_page) = self.resolve_dir_chain(&components, &root_inode)?;
544
545        let dir_prefix = {
546            let trimmed = path.trim_matches('/');
547            if trimmed.is_empty() {
548                String::new()
549            } else {
550                format!("{}/", trimmed)
551            }
552        };
553
554        let mut result = Vec::new();
555        for entry in &dir_page.entries {
556            let inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
557            // Use buffered size if this file has pending writes.
558            let size = if entry.kind == InodeKind::File {
559                let full_path = format!("{}{}", dir_prefix, entry.name);
560                if let Some(dirty) = self.write_buffer.get(&full_path) {
561                    dirty.size
562                } else {
563                    inode.size
564                }
565            } else {
566                inode.size
567            };
568            result.push(DirListEntry {
569                name: entry.name.clone(),
570                kind: entry.kind,
571                size,
572                inode_id: entry.inode_id,
573            });
574        }
575        Ok(result)
576    }
577
578    /// Create a subdirectory at the given path.
579    ///
580    /// Parent directories must already exist; only the leaf is created.
581    pub fn create_directory(&mut self, path: &str) -> FsResult<()> {
582        let (dir_parts, leaf) = Self::split_path(path)?;
583        self.validate_name(leaf)?;
584        self.flush_all()?;
585        let sb = self
586            .superblock
587            .as_ref()
588            .ok_or(FsError::NotInitialized)?
589            .clone();
590        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
591        let (ancestors, target_inode, mut dir_page) =
592            self.resolve_dir_chain(&dir_parts, &root_inode)?;
593
594        if dir_page.entries.iter().any(|e| e.name == leaf) {
595            return Err(FsError::DirectoryAlreadyExists(leaf.to_string()));
596        }
597
598        // Create empty directory page for the new subdirectory.
599        let sub_dp = DirectoryPage::new();
600        let sub_dp_block = self.allocator.allocate()?;
601        self.write_obj(sub_dp_block, ObjectKind::DirectoryPage, &sub_dp)?;
602
603        let inode_id = self.alloc_inode_id();
604        let ts = now_secs();
605        let dir_inode = Inode {
606            id: inode_id,
607            kind: InodeKind::Directory,
608            size: 0,
609            directory_page_ref: ObjectRef::new(sub_dp_block),
610            extent_map_ref: ObjectRef::null(),
611            created_at: ts,
612            modified_at: ts,
613        };
614        let inode_block = self.allocator.allocate()?;
615        self.write_obj(inode_block, ObjectKind::Inode, &dir_inode)?;
616
617        dir_page.entries.push(DirectoryEntry {
618            name: leaf.to_string(),
619            inode_ref: ObjectRef::new(inode_block),
620            inode_id,
621            kind: InodeKind::Directory,
622        });
623
624        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
625        Ok(())
626    }
627
628    /// Remove a file or empty directory at the given path.
629    pub fn remove_file(&mut self, path: &str) -> FsResult<()> {
630        let path_key = path.trim_matches('/').to_string();
631        self.write_buffer.remove(&path_key);
632        self.flush_all()?;
633        let (dir_parts, leaf) = Self::split_path(path)?;
634        let sb = self
635            .superblock
636            .as_ref()
637            .ok_or(FsError::NotInitialized)?
638            .clone();
639        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
640        let (ancestors, target_inode, mut dir_page) =
641            self.resolve_dir_chain(&dir_parts, &root_inode)?;
642
643        let idx = dir_page
644            .entries
645            .iter()
646            .position(|e| e.name == leaf)
647            .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
648
649        let entry = &dir_page.entries[idx];
650        if entry.kind == InodeKind::Directory {
651            let dir_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
652            let sub_page: DirectoryPage = self.read_obj(dir_inode.directory_page_ref.block_id)?;
653            if !sub_page.entries.is_empty() {
654                return Err(FsError::DirectoryNotEmpty(leaf.to_string()));
655            }
656        }
657
658        dir_page.entries.remove(idx);
659        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
660        Ok(())
661    }
662
663    /// Rename a file or directory.  Both `old_path` and `new_path` must share
664    /// the same parent directory (move across directories is not supported yet).
665    pub fn rename(&mut self, old_path: &str, new_path: &str) -> FsResult<()> {
666        let (old_dir, old_leaf) = Self::split_path(old_path)?;
667        let (new_dir, new_leaf) = Self::split_path(new_path)?;
668        self.validate_name(new_leaf)?;
669        self.flush_all()?;
670
671        if old_dir != new_dir {
672            return Err(FsError::Internal(
673                "rename across directories is not supported".into(),
674            ));
675        }
676
677        let sb = self
678            .superblock
679            .as_ref()
680            .ok_or(FsError::NotInitialized)?
681            .clone();
682        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
683        let (ancestors, target_inode, mut dir_page) =
684            self.resolve_dir_chain(&old_dir, &root_inode)?;
685
686        if dir_page.entries.iter().any(|e| e.name == new_leaf) {
687            return Err(FsError::FileAlreadyExists(new_leaf.to_string()));
688        }
689
690        let entry = dir_page
691            .entries
692            .iter_mut()
693            .find(|e| e.name == old_leaf)
694            .ok_or_else(|| FsError::FileNotFound(old_leaf.to_string()))?;
695
696        entry.name = new_leaf.to_string();
697
698        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
699        Ok(())
700    }
701
702    /// Sync / flush. Writes all buffered data to blocks and calls through
703    /// to the block store sync.
704    pub fn sync(&mut self) -> FsResult<()> {
705        self.flush_all()?;
706        self.store.sync()
707    }
708
709    // ── Internal helpers ──
710
711    /// Flush a single file's buffered writes to the block store.
712    fn flush_file(&mut self, path_key: &str) -> FsResult<()> {
713        let dirty = match self.write_buffer.remove(path_key) {
714            Some(d) => d,
715            None => return Ok(()),
716        };
717
718        if !dirty.metadata_dirty {
719            return Ok(());
720        }
721
722        // Re-resolve path from the current superblock.
723        let (dir_parts, leaf) = Self::split_path(path_key)?;
724        let sb = self
725            .superblock
726            .as_ref()
727            .ok_or(FsError::NotInitialized)?
728            .clone();
729        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
730        let (ancestors, target_inode, dir_page) =
731            self.resolve_dir_chain(&dir_parts, &root_inode)?;
732
733        let mut extent_map = dirty.extent_map;
734
735        // Write each dirty chunk to a new block.
736        for (&chunk_idx, chunk_data) in &dirty.dirty_chunks {
737            let data_block = self.allocator.allocate()?;
738            write_encrypted_raw(
739                self.store.as_ref(),
740                self.crypto.as_ref(),
741                &self.codec,
742                data_block,
743                ObjectKind::FileDataChunk,
744                chunk_data,
745            )?;
746
747            if let Some(entry) = extent_map
748                .entries
749                .iter_mut()
750                .find(|e| e.chunk_index == chunk_idx)
751            {
752                entry.data_ref = ObjectRef::new(data_block);
753                entry.plaintext_len = chunk_data.len() as u32;
754            } else {
755                extent_map.entries.push(ExtentEntry {
756                    chunk_index: chunk_idx,
757                    data_ref: ObjectRef::new(data_block),
758                    plaintext_len: chunk_data.len() as u32,
759                });
760            }
761        }
762
763        extent_map.entries.sort_by_key(|e| e.chunk_index);
764
765        // Write extent map.
766        let new_em_block = self.allocator.allocate()?;
767        self.write_obj(new_em_block, ObjectKind::ExtentMap, &extent_map)?;
768
769        // Write inode.
770        let mut new_inode = dirty.base_inode;
771        new_inode.size = dirty.size;
772        new_inode.extent_map_ref = ObjectRef::new(new_em_block);
773        new_inode.modified_at = now_secs();
774        let new_inode_block = self.allocator.allocate()?;
775        self.write_obj(new_inode_block, ObjectKind::Inode, &new_inode)?;
776
777        // Update dir entry.
778        let mut new_dir_page = dir_page.clone();
779        for e in &mut new_dir_page.entries {
780            if e.name == leaf {
781                e.inode_ref = ObjectRef::new(new_inode_block);
782            }
783        }
784
785        self.commit_cow_chain(&sb, &ancestors, &target_inode, &new_dir_page)?;
786        Ok(())
787    }
788
789    /// Flush all buffered file writes to the block store.
790    fn flush_all(&mut self) -> FsResult<()> {
791        let keys: Vec<String> = self.write_buffer.keys().cloned().collect();
792        for key in keys {
793            self.flush_file(&key)?;
794        }
795        Ok(())
796    }
797
798    /// Read from a file that has dirty (buffered) chunks, merging in-memory
799    /// data with on-disk data.
800    fn read_file_buffered(&self, dirty: &DirtyFile, offset: u64, len: usize) -> FsResult<Vec<u8>> {
801        let chunk_size = max_chunk_payload(self.store.block_size());
802        let file_size = dirty.size as usize;
803        let start = offset as usize;
804        if start >= file_size || len == 0 {
805            return Ok(Vec::new());
806        }
807        let end = std::cmp::min(start + len, file_size);
808        let mut result = Vec::with_capacity(end - start);
809
810        let first_chunk = start / chunk_size;
811        let last_chunk = (end - 1) / chunk_size;
812
813        for chunk_idx in first_chunk..=last_chunk {
814            let chunk_file_start = chunk_idx * chunk_size;
815            let chunk_file_end = std::cmp::min(chunk_file_start + chunk_size, file_size);
816            let chunk_idx_u64 = chunk_idx as u64;
817
818            // Get chunk data from buffer or disk.
819            let chunk_data: Vec<u8> = if let Some(buf) = dirty.dirty_chunks.get(&chunk_idx_u64) {
820                buf.clone()
821            } else if let Ok(pos) = dirty
822                .extent_map
823                .entries
824                .binary_search_by_key(&chunk_idx_u64, |e| e.chunk_index)
825            {
826                let entry = &dirty.extent_map.entries[pos];
827                let raw = read_encrypted_raw(
828                    self.store.as_ref(),
829                    self.crypto.as_ref(),
830                    &self.codec,
831                    entry.data_ref.block_id,
832                )?;
833                let plain_len = std::cmp::min(entry.plaintext_len as usize, raw.len());
834                raw[..plain_len].to_vec()
835            } else {
836                vec![0u8; chunk_file_end - chunk_file_start]
837            };
838
839            // Slice to the requested range within this chunk.
840            let read_start = if chunk_idx == first_chunk {
841                start - chunk_file_start
842            } else {
843                0
844            };
845            let read_end = if chunk_idx == last_chunk {
846                end - chunk_file_start
847            } else {
848                chunk_data.len()
849            };
850            let read_end = std::cmp::min(read_end, chunk_data.len());
851
852            if read_start < read_end {
853                result.extend_from_slice(&chunk_data[read_start..read_end]);
854            }
855        }
856
857        Ok(result)
858    }
859
860    fn alloc_inode_id(&mut self) -> InodeId {
861        let id = self.next_inode_id;
862        self.next_inode_id += 1;
863        id
864    }
865
866    fn validate_name(&self, name: &str) -> FsResult<()> {
867        if name.is_empty() || name.contains('/') || name.contains('\0') {
868            return Err(FsError::Internal("invalid name".into()));
869        }
870        if name.len() > MAX_NAME_LEN {
871            return Err(FsError::NameTooLong(name.len(), MAX_NAME_LEN));
872        }
873        Ok(())
874    }
875
876    fn read_obj<T: serde::de::DeserializeOwned>(&self, block_id: u64) -> FsResult<T> {
877        read_encrypted_object(
878            self.store.as_ref(),
879            self.crypto.as_ref(),
880            &self.codec,
881            block_id,
882        )
883    }
884
885    fn write_obj<T: serde::Serialize>(
886        &self,
887        block_id: u64,
888        kind: ObjectKind,
889        obj: &T,
890    ) -> FsResult<()> {
891        write_encrypted_object(
892            self.store.as_ref(),
893            self.crypto.as_ref(),
894            &self.codec,
895            block_id,
896            kind,
897            obj,
898        )
899    }
900
901    fn read_all_chunks(&self, extent_map: &ExtentMap) -> FsResult<Vec<u8>> {
902        let mut entries = extent_map.entries.clone();
903        entries.sort_by_key(|e| e.chunk_index);
904
905        let mut buf = Vec::new();
906        for entry in &entries {
907            let chunk = read_encrypted_raw(
908                self.store.as_ref(),
909                self.crypto.as_ref(),
910                &self.codec,
911                entry.data_ref.block_id,
912            )?;
913            // Only take plaintext_len bytes (chunk may have been decrypted from padded block).
914            let len = entry.plaintext_len as usize;
915            if len <= chunk.len() {
916                buf.extend_from_slice(&chunk[..len]);
917            } else {
918                buf.extend_from_slice(&chunk);
919            }
920        }
921        Ok(buf)
922    }
923
924    fn read_storage_header(&self) -> FsResult<StorageHeader> {
925        let block = self.store.read_block(BLOCK_STORAGE_HEADER)?;
926        if block.len() < 4 {
927            return Err(FsError::InvalidSuperblock);
928        }
929        let len = u32::from_le_bytes([block[0], block[1], block[2], block[3]]) as usize;
930        if len == 0 || 4 + len > block.len() {
931            return Err(FsError::InvalidSuperblock);
932        }
933        self.codec
934            .deserialize_object::<StorageHeader>(&block[4..4 + len])
935    }
936
937    fn commit_superblock(&mut self, sb: Superblock) -> FsResult<()> {
938        self.txn.commit(
939            self.store.as_ref(),
940            self.crypto.as_ref(),
941            &self.codec,
942            &self.allocator,
943            &sb,
944        )?;
945        self.superblock = Some(sb);
946        Ok(())
947    }
948
949    /// Walk the metadata tree from the superblock and mark all referenced blocks
950    /// as allocated in the allocator. Used during open/mount.
951    fn rebuild_allocator(&mut self, sb: &Superblock) -> FsResult<()> {
952        // Mark superblock block.
953        // The superblock_ref's block was allocated by the transaction manager.
954        // We also need to mark root pointer blocks, but those are reserved (0,1,2).
955
956        // We need to find which block the superblock is stored in.
957        // The root pointer tells us.
958        let (rp, _) = TransactionManager::recover_latest(self.store.as_ref(), &self.codec)?
959            .ok_or(FsError::InvalidRootPointer)?;
960        self.allocator.mark_allocated(rp.superblock_ref.block_id)?;
961
962        // Walk root inode.
963        self.mark_inode_tree(sb.root_inode_ref.block_id)?;
964
965        // Set next_inode_id to be higher than any seen inode.
966        // (We updated it during the walk.)
967
968        Ok(())
969    }
970
971    fn mark_inode_tree(&mut self, inode_block: u64) -> FsResult<()> {
972        self.allocator.mark_allocated(inode_block)?;
973        let inode: Inode = self.read_obj(inode_block)?;
974
975        if inode.id >= self.next_inode_id {
976            self.next_inode_id = inode.id + 1;
977        }
978
979        match inode.kind {
980            InodeKind::Directory => {
981                if !inode.directory_page_ref.is_null() {
982                    self.allocator
983                        .mark_allocated(inode.directory_page_ref.block_id)?;
984                    let dir_page: DirectoryPage =
985                        self.read_obj(inode.directory_page_ref.block_id)?;
986                    for entry in &dir_page.entries {
987                        self.mark_inode_tree(entry.inode_ref.block_id)?;
988                    }
989                }
990            }
991            InodeKind::File => {
992                if !inode.extent_map_ref.is_null() {
993                    self.allocator
994                        .mark_allocated(inode.extent_map_ref.block_id)?;
995                    let extent_map: ExtentMap = self.read_obj(inode.extent_map_ref.block_id)?;
996                    for entry in &extent_map.entries {
997                        self.allocator.mark_allocated(entry.data_ref.block_id)?;
998                    }
999                }
1000            }
1001        }
1002        Ok(())
1003    }
1004}
1005
1006/// Return type for directory listings (used by FFI and public API).
1007#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1008pub struct DirListEntry {
1009    pub name: String,
1010    pub kind: InodeKind,
1011    pub size: u64,
1012    pub inode_id: InodeId,
1013}