Skip to main content

doublecrypt_core/
fs.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use rand::RngCore;
6
7use crate::allocator::{BitmapAllocator, SlotAllocator};
8use crate::block_store::BlockStore;
9use crate::codec::{
10    read_encrypted_object, read_encrypted_raw, write_encrypted_object, write_encrypted_raw,
11    ObjectCodec, PostcardCodec,
12};
13use crate::crypto::CryptoEngine;
14use crate::error::{FsError, FsResult};
15use crate::model::*;
16use crate::transaction::TransactionManager;
17
18/// The main filesystem core. Owns the block store, crypto, codec, allocator,
19/// and transaction manager. Provides high-level filesystem operations.
20///
21/// All path-accepting methods use `/`-separated paths.  An empty string or
22/// `"/"` refers to the root directory.  Parent directories must already exist;
23/// only `create_file` and `create_directory` create the leaf entry.
24pub struct FilesystemCore {
25    store: Arc<dyn BlockStore>,
26    crypto: Arc<dyn CryptoEngine>,
27    codec: PostcardCodec,
28    allocator: BitmapAllocator,
29    txn: TransactionManager,
30    /// Cached current superblock.
31    superblock: Option<Superblock>,
32    /// Next inode ID to allocate.
33    next_inode_id: InodeId,
34    /// Write buffer: dirty file chunks held in memory until flush.
35    write_buffer: HashMap<String, DirtyFile>,
36}
37
38/// Tracks one ancestor directory during path resolution, used by
39/// `commit_cow_chain` to propagate CoW writes back to the root.
40struct AncestorEntry {
41    inode: Inode,
42    dir_page: DirectoryPage,
43    child_index: usize,
44}
45
46/// Tracks in-memory buffered writes for a single file.
47///
48/// Full chunks (size == `max_chunk_payload`) are eagerly written to the
49/// block store and removed from `dirty_chunks`, keeping only partial /
50/// in-progress chunks in memory.  The metadata commit (extent map, inode,
51/// CoW chain) is deferred to `sync()` or the next metadata operation.
52struct DirtyFile {
53    /// In-memory chunk data keyed by chunk index (only partial chunks).
54    dirty_chunks: HashMap<u64, Vec<u8>>,
55    /// The file's inode at the time buffering started.
56    base_inode: Inode,
57    /// The file's extent map (updated in-place when chunks are eagerly flushed).
58    extent_map: ExtentMap,
59    /// Current logical file size (updated on every write).
60    size: u64,
61    /// Set to `true` when any data has been written (even if eagerly flushed).
62    metadata_dirty: bool,
63}
64
65/// Maximum payload size for a single file data chunk.
66/// Computed conservatively: block_size minus overhead for envelope framing.
67/// We'll compute this dynamically based on block size.
68fn max_chunk_payload(block_size: usize) -> usize {
69    // Rough overhead: 4 bytes length prefix, ~60 bytes envelope metadata,
70    // 16 bytes Poly1305 tag, some postcard framing. Be conservative.
71    if block_size > 200 {
72        block_size - 200
73    } else {
74        0
75    }
76}
77
78fn now_secs() -> u64 {
79    SystemTime::now()
80        .duration_since(UNIX_EPOCH)
81        .unwrap_or_default()
82        .as_secs()
83}
84
85impl FilesystemCore {
86    /// Create a new FilesystemCore backed by the given store and crypto engine.
87    pub fn new(store: Arc<dyn BlockStore>, crypto: Arc<dyn CryptoEngine>) -> Self {
88        let total_blocks = store.total_blocks();
89        Self {
90            store,
91            crypto,
92            codec: PostcardCodec,
93            allocator: BitmapAllocator::new(total_blocks),
94            txn: TransactionManager::new(),
95            superblock: None,
96            next_inode_id: 1,
97            write_buffer: HashMap::new(),
98        }
99    }
100
101    // ── Initialization ──
102
103    /// Initialize a brand-new filesystem on the block store.
104    /// Writes the storage header, creates the root directory, and commits.
105    pub fn init_filesystem(&mut self) -> FsResult<()> {
106        let block_size = self.store.block_size() as u32;
107        let total_blocks = self.store.total_blocks();
108
109        // Write storage header to block 0 (unencrypted).
110        let header = StorageHeader::new(block_size, total_blocks);
111        let header_bytes = self.codec.serialize_object(&header)?;
112        let bs = self.store.block_size();
113        let mut block = vec![0u8; bs];
114        rand::thread_rng().fill_bytes(&mut block);
115        let len = header_bytes.len() as u32;
116        block[..4].copy_from_slice(&len.to_le_bytes());
117        block[4..4 + header_bytes.len()].copy_from_slice(&header_bytes);
118        self.store.write_block(BLOCK_STORAGE_HEADER, &block)?;
119
120        // Create root directory inode.
121        let root_inode_id = self.alloc_inode_id();
122        let dir_page = DirectoryPage::new();
123        let dir_page_block = self.allocator.allocate()?;
124        write_encrypted_object(
125            self.store.as_ref(),
126            self.crypto.as_ref(),
127            &self.codec,
128            dir_page_block,
129            ObjectKind::DirectoryPage,
130            &dir_page,
131        )?;
132
133        let ts = now_secs();
134        let root_inode = Inode {
135            id: root_inode_id,
136            kind: InodeKind::Directory,
137            size: 0,
138            directory_page_ref: ObjectRef::new(dir_page_block),
139            extent_map_ref: ObjectRef::null(),
140            created_at: ts,
141            modified_at: ts,
142        };
143        let root_inode_block = self.allocator.allocate()?;
144        write_encrypted_object(
145            self.store.as_ref(),
146            self.crypto.as_ref(),
147            &self.codec,
148            root_inode_block,
149            ObjectKind::Inode,
150            &root_inode,
151        )?;
152
153        // Create superblock.
154        let sb = Superblock {
155            generation: 1,
156            root_inode_ref: ObjectRef::new(root_inode_block),
157        };
158        self.superblock = Some(sb.clone());
159
160        // Commit.
161        self.txn.commit(
162            self.store.as_ref(),
163            self.crypto.as_ref(),
164            &self.codec,
165            &self.allocator,
166            &sb,
167        )?;
168
169        Ok(())
170    }
171
172    /// Open / mount an existing filesystem by recovering the latest root pointer.
173    pub fn open(&mut self) -> FsResult<()> {
174        // Verify storage header.
175        let header = self.read_storage_header()?;
176        if !header.is_valid() {
177            return Err(FsError::InvalidSuperblock);
178        }
179
180        // Recover latest root pointer.
181        let (rp, was_b) = TransactionManager::recover_latest(self.store.as_ref(), &self.codec)?
182            .ok_or(FsError::InvalidRootPointer)?;
183
184        // Read superblock.
185        let sb: Superblock = read_encrypted_object(
186            self.store.as_ref(),
187            self.crypto.as_ref(),
188            &self.codec,
189            rp.superblock_ref.block_id,
190        )?;
191
192        // Verify checksum.
193        let sb_bytes = self.codec.serialize_object(&sb)?;
194        let checksum = blake3::hash(&sb_bytes);
195        if *checksum.as_bytes() != rp.checksum {
196            return Err(FsError::InvalidSuperblock);
197        }
198
199        self.txn = TransactionManager::from_recovered(rp.generation, was_b);
200        self.superblock = Some(sb.clone());
201
202        // Rebuild allocator knowledge by walking the metadata tree.
203        self.rebuild_allocator(&sb)?;
204
205        Ok(())
206    }
207
208    // ── File operations ──
209
210    // ── Path helpers ──────────────────────────────────────────
211
212    /// Split a path into its directory components and the leaf name.
213    /// Returns `(["a","b"], "c")` for `"a/b/c"`, or `([], "c")` for `"c"`.
214    fn split_path(path: &str) -> FsResult<(Vec<&str>, &str)> {
215        let trimmed = path.trim_matches('/');
216        if trimmed.is_empty() {
217            return Err(FsError::Internal("empty path".into()));
218        }
219        let parts: Vec<&str> = trimmed.split('/').collect();
220        let (dirs, leaf) = parts.split_at(parts.len() - 1);
221        Ok((dirs.to_vec(), leaf[0]))
222    }
223
224    /// Parse a directory path (may be empty / "/" for root) into components.
225    fn split_dir_path(path: &str) -> Vec<&str> {
226        let trimmed = path.trim_matches('/');
227        if trimmed.is_empty() {
228            return Vec::new();
229        }
230        trimmed.split('/').collect()
231    }
232
233    /// Resolve a sequence of directory components starting from the root inode,
234    /// returning the ancestor chain needed for CoW commit propagation.
235    ///
236    /// Returns `(ancestors, target_inode, target_dir_page)` where `ancestors`
237    /// is a list of `(Inode, DirectoryPage, entry_index_in_parent)` from root
238    /// down to (but not including) the final resolved directory.
239    fn resolve_dir_chain(
240        &self,
241        components: &[&str],
242        root_inode: &Inode,
243    ) -> FsResult<(Vec<AncestorEntry>, Inode, DirectoryPage)> {
244        let mut ancestors: Vec<AncestorEntry> = Vec::new();
245        let mut current_inode = root_inode.clone();
246        let mut current_dir_page: DirectoryPage =
247            self.read_obj(current_inode.directory_page_ref.block_id)?;
248
249        for component in components {
250            let idx = current_dir_page
251                .entries
252                .iter()
253                .position(|e| e.name == *component)
254                .ok_or_else(|| FsError::DirectoryNotFound(component.to_string()))?;
255
256            let entry = &current_dir_page.entries[idx];
257            if entry.kind != InodeKind::Directory {
258                return Err(FsError::NotADirectory(component.to_string()));
259            }
260
261            let child_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
262            let child_dir_page: DirectoryPage =
263                self.read_obj(child_inode.directory_page_ref.block_id)?;
264
265            ancestors.push(AncestorEntry {
266                inode: current_inode,
267                dir_page: current_dir_page,
268                child_index: idx,
269            });
270
271            current_inode = child_inode;
272            current_dir_page = child_dir_page;
273        }
274
275        Ok((ancestors, current_inode, current_dir_page))
276    }
277
278    /// After mutating a directory's page, propagate CoW changes up through
279    /// the ancestor chain to the root, then commit a new superblock.
280    ///
281    /// `new_dir_page` is the already-modified DirectoryPage of the target dir.
282    /// `target_inode` is the inode of the directory that owns `new_dir_page`.
283    /// `ancestors` is the chain from root down to (but not including) target.
284    fn commit_cow_chain(
285        &mut self,
286        sb: &Superblock,
287        ancestors: &[AncestorEntry],
288        target_inode: &Inode,
289        new_dir_page: &DirectoryPage,
290    ) -> FsResult<()> {
291        // Write the modified directory page.
292        let mut new_dp_block = self.allocator.allocate()?;
293        self.write_obj(new_dp_block, ObjectKind::DirectoryPage, new_dir_page)?;
294
295        // Write the modified directory inode.
296        let mut new_inode = target_inode.clone();
297        new_inode.directory_page_ref = ObjectRef::new(new_dp_block);
298        new_inode.modified_at = now_secs();
299        let mut new_inode_block = self.allocator.allocate()?;
300        self.write_obj(new_inode_block, ObjectKind::Inode, &new_inode)?;
301
302        // Propagate upward through ancestors (bottom to top).
303        for ancestor in ancestors.iter().rev() {
304            let mut parent_dp = ancestor.dir_page.clone();
305            parent_dp.entries[ancestor.child_index].inode_ref = ObjectRef::new(new_inode_block);
306
307            new_dp_block = self.allocator.allocate()?;
308            self.write_obj(new_dp_block, ObjectKind::DirectoryPage, &parent_dp)?;
309
310            let mut parent_inode = ancestor.inode.clone();
311            parent_inode.directory_page_ref = ObjectRef::new(new_dp_block);
312            parent_inode.modified_at = now_secs();
313            new_inode_block = self.allocator.allocate()?;
314            self.write_obj(new_inode_block, ObjectKind::Inode, &parent_inode)?;
315        }
316
317        // new_inode_block is now the new root inode block.
318        let new_sb = Superblock {
319            generation: sb.generation + 1,
320            root_inode_ref: ObjectRef::new(new_inode_block),
321        };
322        self.commit_superblock(new_sb)?;
323        Ok(())
324    }
325
326    // ── Public operations ─────────────────────────────────────
327
328    /// Create a new empty file at the given path.
329    ///
330    /// Parent directories must already exist.  The leaf name is created in
331    /// the innermost directory.
332    pub fn create_file(&mut self, path: &str) -> FsResult<()> {
333        let (dir_parts, leaf) = Self::split_path(path)?;
334        self.validate_name(leaf)?;
335        self.flush_all()?;
336        let sb = self
337            .superblock
338            .as_ref()
339            .ok_or(FsError::NotInitialized)?
340            .clone();
341
342        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
343        let (ancestors, target_inode, mut dir_page) =
344            self.resolve_dir_chain(&dir_parts, &root_inode)?;
345
346        if dir_page.entries.iter().any(|e| e.name == leaf) {
347            return Err(FsError::FileAlreadyExists(leaf.to_string()));
348        }
349
350        // Create empty extent map.
351        let extent_map = ExtentMap::new();
352        let em_block = self.allocator.allocate()?;
353        self.write_obj(em_block, ObjectKind::ExtentMap, &extent_map)?;
354
355        // Create file inode.
356        let inode_id = self.alloc_inode_id();
357        let ts = now_secs();
358        let file_inode = Inode {
359            id: inode_id,
360            kind: InodeKind::File,
361            size: 0,
362            directory_page_ref: ObjectRef::null(),
363            extent_map_ref: ObjectRef::new(em_block),
364            created_at: ts,
365            modified_at: ts,
366        };
367        let inode_block = self.allocator.allocate()?;
368        self.write_obj(inode_block, ObjectKind::Inode, &file_inode)?;
369
370        dir_page.entries.push(DirectoryEntry {
371            name: leaf.to_string(),
372            inode_ref: ObjectRef::new(inode_block),
373            inode_id,
374            kind: InodeKind::File,
375        });
376
377        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
378        Ok(())
379    }
380
381    /// Write data to a file at the given path.
382    ///
383    /// Writes are buffered in memory and only flushed to the block store on
384    /// `sync()`, when another metadata operation occurs, or when the total
385    /// buffer exceeds ~16 MiB.  This turns many small sequential writes
386    /// (e.g. `dd bs=1k`) into a single bulk commit.
387    pub fn write_file(&mut self, path: &str, offset: u64, data: &[u8]) -> FsResult<()> {
388        if data.is_empty() {
389            return Ok(());
390        }
391
392        let chunk_size = max_chunk_payload(self.store.block_size());
393        if chunk_size == 0 {
394            return Err(FsError::DataTooLarge(data.len()));
395        }
396
397        let path_key = path.trim_matches('/').to_string();
398
399        // Take the dirty entry out of the map so `self` is free for other
400        // borrows (disk reads, etc.).  We'll put it back at the end.
401        let mut dirty = match self.write_buffer.remove(&path_key) {
402            Some(d) => d,
403            None => {
404                // First buffered write — load metadata from disk.
405                let (dir_parts, leaf) = Self::split_path(path)?;
406                let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
407                let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
408                let (_, _, dir_page) = self.resolve_dir_chain(&dir_parts, &root_inode)?;
409                let entry = dir_page
410                    .entries
411                    .iter()
412                    .find(|e| e.name == leaf)
413                    .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
414                if entry.kind != InodeKind::File {
415                    return Err(FsError::NotAFile(leaf.to_string()));
416                }
417                let file_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
418                let mut extent_map: ExtentMap =
419                    self.read_obj(file_inode.extent_map_ref.block_id)?;
420                extent_map.entries.sort_by_key(|e| e.chunk_index);
421                DirtyFile {
422                    dirty_chunks: HashMap::new(),
423                    base_inode: file_inode.clone(),
424                    extent_map,
425                    size: file_inode.size,
426                    metadata_dirty: false,
427                }
428            }
429        };
430
431        let old_size = dirty.size as usize;
432        let write_start = offset as usize;
433        let write_end = write_start + data.len();
434        let new_size = std::cmp::max(old_size, write_end);
435
436        let first_chunk = if write_start >= old_size {
437            old_size / chunk_size
438        } else {
439            write_start / chunk_size
440        };
441        let last_chunk = (new_size - 1) / chunk_size;
442
443        for chunk_idx in first_chunk..=last_chunk {
444            let chunk_file_start = chunk_idx * chunk_size;
445            let chunk_file_end = std::cmp::min(chunk_file_start + chunk_size, new_size);
446            let chunk_len = chunk_file_end - chunk_file_start;
447            let chunk_idx_u64 = chunk_idx as u64;
448
449            // If this chunk isn't buffered yet, load its on-disk content (or zeros).
450            if !dirty.dirty_chunks.contains_key(&chunk_idx_u64) {
451                let mut buf = vec![0u8; chunk_len];
452                if chunk_file_start < old_size {
453                    if let Ok(pos) = dirty
454                        .extent_map
455                        .entries
456                        .binary_search_by_key(&chunk_idx_u64, |e| e.chunk_index)
457                    {
458                        let existing = &dirty.extent_map.entries[pos];
459                        let raw = read_encrypted_raw(
460                            self.store.as_ref(),
461                            self.crypto.as_ref(),
462                            &self.codec,
463                            existing.data_ref.block_id,
464                        )?;
465                        let copy_len = std::cmp::min(existing.plaintext_len as usize, chunk_len);
466                        let src_len = std::cmp::min(copy_len, raw.len());
467                        buf[..src_len].copy_from_slice(&raw[..src_len]);
468                    }
469                }
470                dirty.dirty_chunks.insert(chunk_idx_u64, buf);
471            }
472
473            let chunk_buf = dirty.dirty_chunks.get_mut(&chunk_idx_u64).unwrap();
474            if chunk_buf.len() < chunk_len {
475                chunk_buf.resize(chunk_len, 0);
476            }
477
478            // Overlay the write data onto the chunk.
479            let overlap_start = std::cmp::max(chunk_file_start, write_start);
480            let overlap_end = std::cmp::min(chunk_file_end, write_end);
481            if overlap_start < overlap_end {
482                let data_off = overlap_start - write_start;
483                let chunk_off = overlap_start - chunk_file_start;
484                let len = overlap_end - overlap_start;
485                chunk_buf[chunk_off..chunk_off + len]
486                    .copy_from_slice(&data[data_off..data_off + len]);
487            }
488        }
489
490        dirty.size = new_size as u64;
491        dirty.metadata_dirty = true;
492
493        // Eagerly flush any full chunks to the block store so the in-memory
494        // buffer stays small.  The extent map is updated in-place; the
495        // metadata commit is deferred to sync().
496        let mut to_flush: Vec<u64> = Vec::new();
497        for (&idx, buf) in dirty.dirty_chunks.iter() {
498            if buf.len() >= chunk_size {
499                to_flush.push(idx);
500            }
501        }
502        for idx in to_flush {
503            let chunk_data = dirty.dirty_chunks.remove(&idx).unwrap();
504            let data_block = self.allocator.allocate()?;
505            write_encrypted_raw(
506                self.store.as_ref(),
507                self.crypto.as_ref(),
508                &self.codec,
509                data_block,
510                ObjectKind::FileDataChunk,
511                &chunk_data,
512            )?;
513            if let Some(entry) = dirty
514                .extent_map
515                .entries
516                .iter_mut()
517                .find(|e| e.chunk_index == idx)
518            {
519                entry.data_ref = ObjectRef::new(data_block);
520                entry.plaintext_len = chunk_data.len() as u32;
521            } else {
522                dirty.extent_map.entries.push(ExtentEntry {
523                    chunk_index: idx,
524                    data_ref: ObjectRef::new(data_block),
525                    plaintext_len: chunk_data.len() as u32,
526                });
527            }
528        }
529
530        dirty.extent_map.entries.sort_by_key(|e| e.chunk_index);
531        self.write_buffer.insert(path_key, dirty);
532        Ok(())
533    }
534
535    /// Read file data at the given path. Returns the requested slice.
536    ///
537    /// If the file has buffered (unflushed) writes, reads are served from the
538    /// in-memory buffer merged with on-disk data.
539    pub fn read_file(&self, path: &str, offset: u64, len: usize) -> FsResult<Vec<u8>> {
540        let path_key = path.trim_matches('/');
541
542        if let Some(dirty) = self.write_buffer.get(path_key) {
543            return self.read_file_buffered(dirty, offset, len);
544        }
545
546        let (dir_parts, leaf) = Self::split_path(path)?;
547        let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
548        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
549        let (_, _, dir_page) = self.resolve_dir_chain(&dir_parts, &root_inode)?;
550
551        let entry = dir_page
552            .entries
553            .iter()
554            .find(|e| e.name == leaf)
555            .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
556
557        if entry.kind != InodeKind::File {
558            return Err(FsError::NotAFile(leaf.to_string()));
559        }
560
561        let file_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
562        let extent_map: ExtentMap = self.read_obj(file_inode.extent_map_ref.block_id)?;
563
564        let full_data = self.read_all_chunks(&extent_map)?;
565
566        let start = offset as usize;
567        if start >= full_data.len() {
568            return Ok(Vec::new());
569        }
570        let end = std::cmp::min(start + len, full_data.len());
571        Ok(full_data[start..end].to_vec())
572    }
573
574    /// List entries in a directory at the given path.
575    ///
576    /// Pass `""` or `"/"` to list the root directory.
577    pub fn list_directory(&self, path: &str) -> FsResult<Vec<DirListEntry>> {
578        let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
579        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
580
581        let components = Self::split_dir_path(path);
582        let (_, _, dir_page) = self.resolve_dir_chain(&components, &root_inode)?;
583
584        let dir_prefix = {
585            let trimmed = path.trim_matches('/');
586            if trimmed.is_empty() {
587                String::new()
588            } else {
589                format!("{}/", trimmed)
590            }
591        };
592
593        let mut result = Vec::new();
594        for entry in &dir_page.entries {
595            let inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
596            // Use buffered size if this file has pending writes.
597            let size = if entry.kind == InodeKind::File {
598                let full_path = format!("{}{}", dir_prefix, entry.name);
599                if let Some(dirty) = self.write_buffer.get(&full_path) {
600                    dirty.size
601                } else {
602                    inode.size
603                }
604            } else {
605                inode.size
606            };
607            result.push(DirListEntry {
608                name: entry.name.clone(),
609                kind: entry.kind,
610                size,
611                inode_id: entry.inode_id,
612            });
613        }
614        Ok(result)
615    }
616
617    /// Create a subdirectory at the given path.
618    ///
619    /// Parent directories must already exist; only the leaf is created.
620    pub fn create_directory(&mut self, path: &str) -> FsResult<()> {
621        let (dir_parts, leaf) = Self::split_path(path)?;
622        self.validate_name(leaf)?;
623        self.flush_all()?;
624        let sb = self
625            .superblock
626            .as_ref()
627            .ok_or(FsError::NotInitialized)?
628            .clone();
629        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
630        let (ancestors, target_inode, mut dir_page) =
631            self.resolve_dir_chain(&dir_parts, &root_inode)?;
632
633        if dir_page.entries.iter().any(|e| e.name == leaf) {
634            return Err(FsError::DirectoryAlreadyExists(leaf.to_string()));
635        }
636
637        // Create empty directory page for the new subdirectory.
638        let sub_dp = DirectoryPage::new();
639        let sub_dp_block = self.allocator.allocate()?;
640        self.write_obj(sub_dp_block, ObjectKind::DirectoryPage, &sub_dp)?;
641
642        let inode_id = self.alloc_inode_id();
643        let ts = now_secs();
644        let dir_inode = Inode {
645            id: inode_id,
646            kind: InodeKind::Directory,
647            size: 0,
648            directory_page_ref: ObjectRef::new(sub_dp_block),
649            extent_map_ref: ObjectRef::null(),
650            created_at: ts,
651            modified_at: ts,
652        };
653        let inode_block = self.allocator.allocate()?;
654        self.write_obj(inode_block, ObjectKind::Inode, &dir_inode)?;
655
656        dir_page.entries.push(DirectoryEntry {
657            name: leaf.to_string(),
658            inode_ref: ObjectRef::new(inode_block),
659            inode_id,
660            kind: InodeKind::Directory,
661        });
662
663        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
664        Ok(())
665    }
666
667    /// Remove a file or empty directory at the given path.
668    pub fn remove_file(&mut self, path: &str) -> FsResult<()> {
669        let path_key = path.trim_matches('/').to_string();
670        self.write_buffer.remove(&path_key);
671        self.flush_all()?;
672        let (dir_parts, leaf) = Self::split_path(path)?;
673        let sb = self
674            .superblock
675            .as_ref()
676            .ok_or(FsError::NotInitialized)?
677            .clone();
678        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
679        let (ancestors, target_inode, mut dir_page) =
680            self.resolve_dir_chain(&dir_parts, &root_inode)?;
681
682        let idx = dir_page
683            .entries
684            .iter()
685            .position(|e| e.name == leaf)
686            .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
687
688        let entry = &dir_page.entries[idx];
689        if entry.kind == InodeKind::Directory {
690            let dir_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
691            let sub_page: DirectoryPage = self.read_obj(dir_inode.directory_page_ref.block_id)?;
692            if !sub_page.entries.is_empty() {
693                return Err(FsError::DirectoryNotEmpty(leaf.to_string()));
694            }
695        }
696
697        dir_page.entries.remove(idx);
698        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
699        Ok(())
700    }
701
702    /// Rename a file or directory.  Both `old_path` and `new_path` must share
703    /// the same parent directory (move across directories is not supported yet).
704    pub fn rename(&mut self, old_path: &str, new_path: &str) -> FsResult<()> {
705        let (old_dir, old_leaf) = Self::split_path(old_path)?;
706        let (new_dir, new_leaf) = Self::split_path(new_path)?;
707        self.validate_name(new_leaf)?;
708        self.flush_all()?;
709
710        if old_dir != new_dir {
711            return Err(FsError::Internal(
712                "rename across directories is not supported".into(),
713            ));
714        }
715
716        let sb = self
717            .superblock
718            .as_ref()
719            .ok_or(FsError::NotInitialized)?
720            .clone();
721        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
722        let (ancestors, target_inode, mut dir_page) =
723            self.resolve_dir_chain(&old_dir, &root_inode)?;
724
725        if dir_page.entries.iter().any(|e| e.name == new_leaf) {
726            return Err(FsError::FileAlreadyExists(new_leaf.to_string()));
727        }
728
729        let entry = dir_page
730            .entries
731            .iter_mut()
732            .find(|e| e.name == old_leaf)
733            .ok_or_else(|| FsError::FileNotFound(old_leaf.to_string()))?;
734
735        entry.name = new_leaf.to_string();
736
737        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
738        Ok(())
739    }
740
741    /// Sync / flush. Writes all buffered data to blocks and calls through
742    /// to the block store sync.
743    pub fn sync(&mut self) -> FsResult<()> {
744        self.flush_all()?;
745        self.store.sync()
746    }
747
748    // ── Internal helpers ──
749
750    /// Flush a single file's buffered writes to the block store.
751    fn flush_file(&mut self, path_key: &str) -> FsResult<()> {
752        let dirty = match self.write_buffer.remove(path_key) {
753            Some(d) => d,
754            None => return Ok(()),
755        };
756
757        if !dirty.metadata_dirty {
758            return Ok(());
759        }
760
761        // Re-resolve path from the current superblock.
762        let (dir_parts, leaf) = Self::split_path(path_key)?;
763        let sb = self
764            .superblock
765            .as_ref()
766            .ok_or(FsError::NotInitialized)?
767            .clone();
768        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
769        let (ancestors, target_inode, dir_page) =
770            self.resolve_dir_chain(&dir_parts, &root_inode)?;
771
772        let mut extent_map = dirty.extent_map;
773
774        // Write each dirty chunk to a new block.
775        for (&chunk_idx, chunk_data) in &dirty.dirty_chunks {
776            let data_block = self.allocator.allocate()?;
777            write_encrypted_raw(
778                self.store.as_ref(),
779                self.crypto.as_ref(),
780                &self.codec,
781                data_block,
782                ObjectKind::FileDataChunk,
783                chunk_data,
784            )?;
785
786            if let Some(entry) = extent_map
787                .entries
788                .iter_mut()
789                .find(|e| e.chunk_index == chunk_idx)
790            {
791                entry.data_ref = ObjectRef::new(data_block);
792                entry.plaintext_len = chunk_data.len() as u32;
793            } else {
794                extent_map.entries.push(ExtentEntry {
795                    chunk_index: chunk_idx,
796                    data_ref: ObjectRef::new(data_block),
797                    plaintext_len: chunk_data.len() as u32,
798                });
799            }
800        }
801
802        extent_map.entries.sort_by_key(|e| e.chunk_index);
803
804        // Write extent map.
805        let new_em_block = self.allocator.allocate()?;
806        self.write_obj(new_em_block, ObjectKind::ExtentMap, &extent_map)?;
807
808        // Write inode.
809        let mut new_inode = dirty.base_inode;
810        new_inode.size = dirty.size;
811        new_inode.extent_map_ref = ObjectRef::new(new_em_block);
812        new_inode.modified_at = now_secs();
813        let new_inode_block = self.allocator.allocate()?;
814        self.write_obj(new_inode_block, ObjectKind::Inode, &new_inode)?;
815
816        // Update dir entry.
817        let mut new_dir_page = dir_page.clone();
818        for e in &mut new_dir_page.entries {
819            if e.name == leaf {
820                e.inode_ref = ObjectRef::new(new_inode_block);
821            }
822        }
823
824        self.commit_cow_chain(&sb, &ancestors, &target_inode, &new_dir_page)?;
825        Ok(())
826    }
827
828    /// Flush all buffered file writes to the block store.
829    fn flush_all(&mut self) -> FsResult<()> {
830        let keys: Vec<String> = self.write_buffer.keys().cloned().collect();
831        for key in keys {
832            self.flush_file(&key)?;
833        }
834        Ok(())
835    }
836
837    /// Read from a file that has dirty (buffered) chunks, merging in-memory
838    /// data with on-disk data.
839    fn read_file_buffered(&self, dirty: &DirtyFile, offset: u64, len: usize) -> FsResult<Vec<u8>> {
840        let chunk_size = max_chunk_payload(self.store.block_size());
841        let file_size = dirty.size as usize;
842        let start = offset as usize;
843        if start >= file_size || len == 0 {
844            return Ok(Vec::new());
845        }
846        let end = std::cmp::min(start + len, file_size);
847        let mut result = Vec::with_capacity(end - start);
848
849        let first_chunk = start / chunk_size;
850        let last_chunk = (end - 1) / chunk_size;
851
852        for chunk_idx in first_chunk..=last_chunk {
853            let chunk_file_start = chunk_idx * chunk_size;
854            let chunk_file_end = std::cmp::min(chunk_file_start + chunk_size, file_size);
855            let chunk_idx_u64 = chunk_idx as u64;
856
857            // Get chunk data from buffer or disk.
858            let chunk_data: Vec<u8> = if let Some(buf) = dirty.dirty_chunks.get(&chunk_idx_u64) {
859                buf.clone()
860            } else if let Ok(pos) = dirty
861                .extent_map
862                .entries
863                .binary_search_by_key(&chunk_idx_u64, |e| e.chunk_index)
864            {
865                let entry = &dirty.extent_map.entries[pos];
866                let raw = read_encrypted_raw(
867                    self.store.as_ref(),
868                    self.crypto.as_ref(),
869                    &self.codec,
870                    entry.data_ref.block_id,
871                )?;
872                let plain_len = std::cmp::min(entry.plaintext_len as usize, raw.len());
873                raw[..plain_len].to_vec()
874            } else {
875                vec![0u8; chunk_file_end - chunk_file_start]
876            };
877
878            // Slice to the requested range within this chunk.
879            let read_start = if chunk_idx == first_chunk {
880                start - chunk_file_start
881            } else {
882                0
883            };
884            let read_end = if chunk_idx == last_chunk {
885                end - chunk_file_start
886            } else {
887                chunk_data.len()
888            };
889            let read_end = std::cmp::min(read_end, chunk_data.len());
890
891            if read_start < read_end {
892                result.extend_from_slice(&chunk_data[read_start..read_end]);
893            }
894        }
895
896        Ok(result)
897    }
898
899    fn alloc_inode_id(&mut self) -> InodeId {
900        let id = self.next_inode_id;
901        self.next_inode_id += 1;
902        id
903    }
904
905    fn validate_name(&self, name: &str) -> FsResult<()> {
906        if name.is_empty() || name.contains('/') || name.contains('\0') {
907            return Err(FsError::Internal("invalid name".into()));
908        }
909        if name.len() > MAX_NAME_LEN {
910            return Err(FsError::NameTooLong(name.len(), MAX_NAME_LEN));
911        }
912        Ok(())
913    }
914
915    fn read_obj<T: serde::de::DeserializeOwned>(&self, block_id: u64) -> FsResult<T> {
916        read_encrypted_object(
917            self.store.as_ref(),
918            self.crypto.as_ref(),
919            &self.codec,
920            block_id,
921        )
922    }
923
924    fn write_obj<T: serde::Serialize>(
925        &self,
926        block_id: u64,
927        kind: ObjectKind,
928        obj: &T,
929    ) -> FsResult<()> {
930        write_encrypted_object(
931            self.store.as_ref(),
932            self.crypto.as_ref(),
933            &self.codec,
934            block_id,
935            kind,
936            obj,
937        )
938    }
939
940    fn read_all_chunks(&self, extent_map: &ExtentMap) -> FsResult<Vec<u8>> {
941        let mut entries = extent_map.entries.clone();
942        entries.sort_by_key(|e| e.chunk_index);
943
944        let mut buf = Vec::new();
945        for entry in &entries {
946            let chunk = read_encrypted_raw(
947                self.store.as_ref(),
948                self.crypto.as_ref(),
949                &self.codec,
950                entry.data_ref.block_id,
951            )?;
952            // Only take plaintext_len bytes (chunk may have been decrypted from padded block).
953            let len = entry.plaintext_len as usize;
954            if len <= chunk.len() {
955                buf.extend_from_slice(&chunk[..len]);
956            } else {
957                buf.extend_from_slice(&chunk);
958            }
959        }
960        Ok(buf)
961    }
962
963    fn read_storage_header(&self) -> FsResult<StorageHeader> {
964        let block = self.store.read_block(BLOCK_STORAGE_HEADER)?;
965        if block.len() < 4 {
966            return Err(FsError::InvalidSuperblock);
967        }
968        let len = u32::from_le_bytes([block[0], block[1], block[2], block[3]]) as usize;
969        if len == 0 || 4 + len > block.len() {
970            return Err(FsError::InvalidSuperblock);
971        }
972        self.codec
973            .deserialize_object::<StorageHeader>(&block[4..4 + len])
974    }
975
976    fn commit_superblock(&mut self, sb: Superblock) -> FsResult<()> {
977        self.txn.commit(
978            self.store.as_ref(),
979            self.crypto.as_ref(),
980            &self.codec,
981            &self.allocator,
982            &sb,
983        )?;
984        self.superblock = Some(sb);
985        Ok(())
986    }
987
988    /// Walk the metadata tree from the superblock and mark all referenced blocks
989    /// as allocated in the allocator. Used during open/mount.
990    fn rebuild_allocator(&mut self, sb: &Superblock) -> FsResult<()> {
991        // Mark superblock block.
992        // The superblock_ref's block was allocated by the transaction manager.
993        // We also need to mark root pointer blocks, but those are reserved (0,1,2).
994
995        // We need to find which block the superblock is stored in.
996        // The root pointer tells us.
997        let (rp, _) = TransactionManager::recover_latest(self.store.as_ref(), &self.codec)?
998            .ok_or(FsError::InvalidRootPointer)?;
999        self.allocator.mark_allocated(rp.superblock_ref.block_id)?;
1000
1001        // Walk root inode.
1002        self.mark_inode_tree(sb.root_inode_ref.block_id)?;
1003
1004        // Set next_inode_id to be higher than any seen inode.
1005        // (We updated it during the walk.)
1006
1007        Ok(())
1008    }
1009
1010    fn mark_inode_tree(&mut self, inode_block: u64) -> FsResult<()> {
1011        self.allocator.mark_allocated(inode_block)?;
1012        let inode: Inode = self.read_obj(inode_block)?;
1013
1014        if inode.id >= self.next_inode_id {
1015            self.next_inode_id = inode.id + 1;
1016        }
1017
1018        match inode.kind {
1019            InodeKind::Directory => {
1020                if !inode.directory_page_ref.is_null() {
1021                    self.allocator
1022                        .mark_allocated(inode.directory_page_ref.block_id)?;
1023                    let dir_page: DirectoryPage =
1024                        self.read_obj(inode.directory_page_ref.block_id)?;
1025                    for entry in &dir_page.entries {
1026                        self.mark_inode_tree(entry.inode_ref.block_id)?;
1027                    }
1028                }
1029            }
1030            InodeKind::File => {
1031                if !inode.extent_map_ref.is_null() {
1032                    self.allocator
1033                        .mark_allocated(inode.extent_map_ref.block_id)?;
1034                    let extent_map: ExtentMap = self.read_obj(inode.extent_map_ref.block_id)?;
1035                    for entry in &extent_map.entries {
1036                        self.allocator.mark_allocated(entry.data_ref.block_id)?;
1037                    }
1038                }
1039            }
1040        }
1041        Ok(())
1042    }
1043}
1044
1045/// Return type for directory listings (used by FFI and public API).
1046#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1047pub struct DirListEntry {
1048    pub name: String,
1049    pub kind: InodeKind,
1050    pub size: u64,
1051    pub inode_id: InodeId,
1052}