Skip to main content

doublecrypt_core/
fs.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use rand::RngCore;
6
7use crate::allocator::{BitmapAllocator, SlotAllocator};
8use crate::block_store::BlockStore;
9use crate::codec::{
10    read_encrypted_object, read_encrypted_raw, write_encrypted_object, write_encrypted_raw,
11    ObjectCodec, PostcardCodec,
12};
13use crate::crypto::CryptoEngine;
14use crate::error::{FsError, FsResult};
15use crate::model::*;
16use crate::transaction::TransactionManager;
17
18/// The main filesystem core. Owns the block store, crypto, codec, allocator,
19/// and transaction manager. Provides high-level filesystem operations.
20///
21/// All path-accepting methods use `/`-separated paths.  An empty string or
22/// `"/"` refers to the root directory.  Parent directories must already exist;
23/// only `create_file` and `create_directory` create the leaf entry.
24pub struct FilesystemCore {
25    store: Arc<dyn BlockStore>,
26    crypto: Arc<dyn CryptoEngine>,
27    codec: PostcardCodec,
28    allocator: BitmapAllocator,
29    txn: TransactionManager,
30    /// Cached current superblock.
31    superblock: Option<Superblock>,
32    /// Next inode ID to allocate.
33    next_inode_id: InodeId,
34    /// Write buffer: dirty file chunks held in memory until flush.
35    write_buffer: HashMap<String, DirtyFile>,
36}
37
38/// Tracks one ancestor directory during path resolution, used by
39/// `commit_cow_chain` to propagate CoW writes back to the root.
40struct AncestorEntry {
41    inode: Inode,
42    dir_page: DirectoryPage,
43    child_index: usize,
44}
45
46/// Default write buffer limit before auto-flush (16 MiB).
47const WRITE_BUFFER_LIMIT: usize = 16 * 1024 * 1024;
48
49/// Tracks in-memory buffered writes for a single file.
50/// Dirty chunks are held in memory and flushed to the block store
51/// on `sync()`, when another metadata operation occurs, or when the
52/// total buffer size exceeds `WRITE_BUFFER_LIMIT`.
53struct DirtyFile {
54    /// In-memory chunk data keyed by chunk index.
55    dirty_chunks: HashMap<u64, Vec<u8>>,
56    /// The file's inode at the time buffering started.
57    base_inode: Inode,
58    /// The file's extent map (disk snapshot; updated with new entries on flush).
59    extent_map: ExtentMap,
60    /// Current logical file size (updated on every write).
61    size: u64,
62}
63
64/// Maximum payload size for a single file data chunk.
65/// Computed conservatively: block_size minus overhead for envelope framing.
66/// We'll compute this dynamically based on block size.
67fn max_chunk_payload(block_size: usize) -> usize {
68    // Rough overhead: 4 bytes length prefix, ~60 bytes envelope metadata,
69    // 16 bytes Poly1305 tag, some postcard framing. Be conservative.
70    if block_size > 200 {
71        block_size - 200
72    } else {
73        0
74    }
75}
76
77fn now_secs() -> u64 {
78    SystemTime::now()
79        .duration_since(UNIX_EPOCH)
80        .unwrap_or_default()
81        .as_secs()
82}
83
84impl FilesystemCore {
85    /// Create a new FilesystemCore backed by the given store and crypto engine.
86    pub fn new(store: Arc<dyn BlockStore>, crypto: Arc<dyn CryptoEngine>) -> Self {
87        let total_blocks = store.total_blocks();
88        Self {
89            store,
90            crypto,
91            codec: PostcardCodec,
92            allocator: BitmapAllocator::new(total_blocks),
93            txn: TransactionManager::new(),
94            superblock: None,
95            next_inode_id: 1,
96            write_buffer: HashMap::new(),
97        }
98    }
99
100    // ── Initialization ──
101
102    /// Initialize a brand-new filesystem on the block store.
103    /// Writes the storage header, creates the root directory, and commits.
104    pub fn init_filesystem(&mut self) -> FsResult<()> {
105        let block_size = self.store.block_size() as u32;
106        let total_blocks = self.store.total_blocks();
107
108        // Write storage header to block 0 (unencrypted).
109        let header = StorageHeader::new(block_size, total_blocks);
110        let header_bytes = self.codec.serialize_object(&header)?;
111        let bs = self.store.block_size();
112        let mut block = vec![0u8; bs];
113        rand::thread_rng().fill_bytes(&mut block);
114        let len = header_bytes.len() as u32;
115        block[..4].copy_from_slice(&len.to_le_bytes());
116        block[4..4 + header_bytes.len()].copy_from_slice(&header_bytes);
117        self.store.write_block(BLOCK_STORAGE_HEADER, &block)?;
118
119        // Create root directory inode.
120        let root_inode_id = self.alloc_inode_id();
121        let dir_page = DirectoryPage::new();
122        let dir_page_block = self.allocator.allocate()?;
123        write_encrypted_object(
124            self.store.as_ref(),
125            self.crypto.as_ref(),
126            &self.codec,
127            dir_page_block,
128            ObjectKind::DirectoryPage,
129            &dir_page,
130        )?;
131
132        let ts = now_secs();
133        let root_inode = Inode {
134            id: root_inode_id,
135            kind: InodeKind::Directory,
136            size: 0,
137            directory_page_ref: ObjectRef::new(dir_page_block),
138            extent_map_ref: ObjectRef::null(),
139            created_at: ts,
140            modified_at: ts,
141        };
142        let root_inode_block = self.allocator.allocate()?;
143        write_encrypted_object(
144            self.store.as_ref(),
145            self.crypto.as_ref(),
146            &self.codec,
147            root_inode_block,
148            ObjectKind::Inode,
149            &root_inode,
150        )?;
151
152        // Create superblock.
153        let sb = Superblock {
154            generation: 1,
155            root_inode_ref: ObjectRef::new(root_inode_block),
156        };
157        self.superblock = Some(sb.clone());
158
159        // Commit.
160        self.txn.commit(
161            self.store.as_ref(),
162            self.crypto.as_ref(),
163            &self.codec,
164            &self.allocator,
165            &sb,
166        )?;
167
168        Ok(())
169    }
170
171    /// Open / mount an existing filesystem by recovering the latest root pointer.
172    pub fn open(&mut self) -> FsResult<()> {
173        // Verify storage header.
174        let header = self.read_storage_header()?;
175        if !header.is_valid() {
176            return Err(FsError::InvalidSuperblock);
177        }
178
179        // Recover latest root pointer.
180        let (rp, was_b) = TransactionManager::recover_latest(self.store.as_ref(), &self.codec)?
181            .ok_or(FsError::InvalidRootPointer)?;
182
183        // Read superblock.
184        let sb: Superblock = read_encrypted_object(
185            self.store.as_ref(),
186            self.crypto.as_ref(),
187            &self.codec,
188            rp.superblock_ref.block_id,
189        )?;
190
191        // Verify checksum.
192        let sb_bytes = self.codec.serialize_object(&sb)?;
193        let checksum = blake3::hash(&sb_bytes);
194        if *checksum.as_bytes() != rp.checksum {
195            return Err(FsError::InvalidSuperblock);
196        }
197
198        self.txn = TransactionManager::from_recovered(rp.generation, was_b);
199        self.superblock = Some(sb.clone());
200
201        // Rebuild allocator knowledge by walking the metadata tree.
202        self.rebuild_allocator(&sb)?;
203
204        Ok(())
205    }
206
207    // ── File operations ──
208
209    // ── Path helpers ──────────────────────────────────────────
210
211    /// Split a path into its directory components and the leaf name.
212    /// Returns `(["a","b"], "c")` for `"a/b/c"`, or `([], "c")` for `"c"`.
213    fn split_path(path: &str) -> FsResult<(Vec<&str>, &str)> {
214        let trimmed = path.trim_matches('/');
215        if trimmed.is_empty() {
216            return Err(FsError::Internal("empty path".into()));
217        }
218        let parts: Vec<&str> = trimmed.split('/').collect();
219        let (dirs, leaf) = parts.split_at(parts.len() - 1);
220        Ok((dirs.to_vec(), leaf[0]))
221    }
222
223    /// Parse a directory path (may be empty / "/" for root) into components.
224    fn split_dir_path(path: &str) -> Vec<&str> {
225        let trimmed = path.trim_matches('/');
226        if trimmed.is_empty() {
227            return Vec::new();
228        }
229        trimmed.split('/').collect()
230    }
231
232    /// Resolve a sequence of directory components starting from the root inode,
233    /// returning the ancestor chain needed for CoW commit propagation.
234    ///
235    /// Returns `(ancestors, target_inode, target_dir_page)` where `ancestors`
236    /// is a list of `(Inode, DirectoryPage, entry_index_in_parent)` from root
237    /// down to (but not including) the final resolved directory.
238    fn resolve_dir_chain(
239        &self,
240        components: &[&str],
241        root_inode: &Inode,
242    ) -> FsResult<(Vec<AncestorEntry>, Inode, DirectoryPage)> {
243        let mut ancestors: Vec<AncestorEntry> = Vec::new();
244        let mut current_inode = root_inode.clone();
245        let mut current_dir_page: DirectoryPage =
246            self.read_obj(current_inode.directory_page_ref.block_id)?;
247
248        for component in components {
249            let idx = current_dir_page
250                .entries
251                .iter()
252                .position(|e| e.name == *component)
253                .ok_or_else(|| FsError::DirectoryNotFound(component.to_string()))?;
254
255            let entry = &current_dir_page.entries[idx];
256            if entry.kind != InodeKind::Directory {
257                return Err(FsError::NotADirectory(component.to_string()));
258            }
259
260            let child_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
261            let child_dir_page: DirectoryPage =
262                self.read_obj(child_inode.directory_page_ref.block_id)?;
263
264            ancestors.push(AncestorEntry {
265                inode: current_inode,
266                dir_page: current_dir_page,
267                child_index: idx,
268            });
269
270            current_inode = child_inode;
271            current_dir_page = child_dir_page;
272        }
273
274        Ok((ancestors, current_inode, current_dir_page))
275    }
276
277    /// After mutating a directory's page, propagate CoW changes up through
278    /// the ancestor chain to the root, then commit a new superblock.
279    ///
280    /// `new_dir_page` is the already-modified DirectoryPage of the target dir.
281    /// `target_inode` is the inode of the directory that owns `new_dir_page`.
282    /// `ancestors` is the chain from root down to (but not including) target.
283    fn commit_cow_chain(
284        &mut self,
285        sb: &Superblock,
286        ancestors: &[AncestorEntry],
287        target_inode: &Inode,
288        new_dir_page: &DirectoryPage,
289    ) -> FsResult<()> {
290        // Write the modified directory page.
291        let mut new_dp_block = self.allocator.allocate()?;
292        self.write_obj(new_dp_block, ObjectKind::DirectoryPage, new_dir_page)?;
293
294        // Write the modified directory inode.
295        let mut new_inode = target_inode.clone();
296        new_inode.directory_page_ref = ObjectRef::new(new_dp_block);
297        new_inode.modified_at = now_secs();
298        let mut new_inode_block = self.allocator.allocate()?;
299        self.write_obj(new_inode_block, ObjectKind::Inode, &new_inode)?;
300
301        // Propagate upward through ancestors (bottom to top).
302        for ancestor in ancestors.iter().rev() {
303            let mut parent_dp = ancestor.dir_page.clone();
304            parent_dp.entries[ancestor.child_index].inode_ref = ObjectRef::new(new_inode_block);
305
306            new_dp_block = self.allocator.allocate()?;
307            self.write_obj(new_dp_block, ObjectKind::DirectoryPage, &parent_dp)?;
308
309            let mut parent_inode = ancestor.inode.clone();
310            parent_inode.directory_page_ref = ObjectRef::new(new_dp_block);
311            parent_inode.modified_at = now_secs();
312            new_inode_block = self.allocator.allocate()?;
313            self.write_obj(new_inode_block, ObjectKind::Inode, &parent_inode)?;
314        }
315
316        // new_inode_block is now the new root inode block.
317        let new_sb = Superblock {
318            generation: sb.generation + 1,
319            root_inode_ref: ObjectRef::new(new_inode_block),
320        };
321        self.commit_superblock(new_sb)?;
322        Ok(())
323    }
324
325    // ── Public operations ─────────────────────────────────────
326
327    /// Create a new empty file at the given path.
328    ///
329    /// Parent directories must already exist.  The leaf name is created in
330    /// the innermost directory.
331    pub fn create_file(&mut self, path: &str) -> FsResult<()> {
332        let (dir_parts, leaf) = Self::split_path(path)?;
333        self.validate_name(leaf)?;
334        self.flush_all()?;
335        let sb = self
336            .superblock
337            .as_ref()
338            .ok_or(FsError::NotInitialized)?
339            .clone();
340
341        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
342        let (ancestors, target_inode, mut dir_page) =
343            self.resolve_dir_chain(&dir_parts, &root_inode)?;
344
345        if dir_page.entries.iter().any(|e| e.name == leaf) {
346            return Err(FsError::FileAlreadyExists(leaf.to_string()));
347        }
348
349        // Create empty extent map.
350        let extent_map = ExtentMap::new();
351        let em_block = self.allocator.allocate()?;
352        self.write_obj(em_block, ObjectKind::ExtentMap, &extent_map)?;
353
354        // Create file inode.
355        let inode_id = self.alloc_inode_id();
356        let ts = now_secs();
357        let file_inode = Inode {
358            id: inode_id,
359            kind: InodeKind::File,
360            size: 0,
361            directory_page_ref: ObjectRef::null(),
362            extent_map_ref: ObjectRef::new(em_block),
363            created_at: ts,
364            modified_at: ts,
365        };
366        let inode_block = self.allocator.allocate()?;
367        self.write_obj(inode_block, ObjectKind::Inode, &file_inode)?;
368
369        dir_page.entries.push(DirectoryEntry {
370            name: leaf.to_string(),
371            inode_ref: ObjectRef::new(inode_block),
372            inode_id,
373            kind: InodeKind::File,
374        });
375
376        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
377        Ok(())
378    }
379
380    /// Write data to a file at the given path.
381    ///
382    /// Writes are buffered in memory and only flushed to the block store on
383    /// `sync()`, when another metadata operation occurs, or when the total
384    /// buffer exceeds ~16 MiB.  This turns many small sequential writes
385    /// (e.g. `dd bs=1k`) into a single bulk commit.
386    pub fn write_file(&mut self, path: &str, offset: u64, data: &[u8]) -> FsResult<()> {
387        if data.is_empty() {
388            return Ok(());
389        }
390
391        let chunk_size = max_chunk_payload(self.store.block_size());
392        if chunk_size == 0 {
393            return Err(FsError::DataTooLarge(data.len()));
394        }
395
396        let path_key = path.trim_matches('/').to_string();
397
398        // Take the dirty entry out of the map so `self` is free for other
399        // borrows (disk reads, etc.).  We'll put it back at the end.
400        let mut dirty = match self.write_buffer.remove(&path_key) {
401            Some(d) => d,
402            None => {
403                // First buffered write — load metadata from disk.
404                let (dir_parts, leaf) = Self::split_path(path)?;
405                let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
406                let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
407                let (_, _, dir_page) = self.resolve_dir_chain(&dir_parts, &root_inode)?;
408                let entry = dir_page
409                    .entries
410                    .iter()
411                    .find(|e| e.name == leaf)
412                    .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
413                if entry.kind != InodeKind::File {
414                    return Err(FsError::NotAFile(leaf.to_string()));
415                }
416                let file_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
417                let mut extent_map: ExtentMap =
418                    self.read_obj(file_inode.extent_map_ref.block_id)?;
419                extent_map.entries.sort_by_key(|e| e.chunk_index);
420                DirtyFile {
421                    dirty_chunks: HashMap::new(),
422                    base_inode: file_inode.clone(),
423                    extent_map,
424                    size: file_inode.size,
425                }
426            }
427        };
428
429        let old_size = dirty.size as usize;
430        let write_start = offset as usize;
431        let write_end = write_start + data.len();
432        let new_size = std::cmp::max(old_size, write_end);
433
434        let first_chunk = if write_start >= old_size {
435            old_size / chunk_size
436        } else {
437            write_start / chunk_size
438        };
439        let last_chunk = (new_size - 1) / chunk_size;
440
441        for chunk_idx in first_chunk..=last_chunk {
442            let chunk_file_start = chunk_idx * chunk_size;
443            let chunk_file_end = std::cmp::min(chunk_file_start + chunk_size, new_size);
444            let chunk_len = chunk_file_end - chunk_file_start;
445            let chunk_idx_u64 = chunk_idx as u64;
446
447            // If this chunk isn't buffered yet, load its on-disk content (or zeros).
448            if !dirty.dirty_chunks.contains_key(&chunk_idx_u64) {
449                let mut buf = vec![0u8; chunk_len];
450                if chunk_file_start < old_size {
451                    if let Ok(pos) = dirty
452                        .extent_map
453                        .entries
454                        .binary_search_by_key(&chunk_idx_u64, |e| e.chunk_index)
455                    {
456                        let existing = &dirty.extent_map.entries[pos];
457                        let raw = read_encrypted_raw(
458                            self.store.as_ref(),
459                            self.crypto.as_ref(),
460                            &self.codec,
461                            existing.data_ref.block_id,
462                        )?;
463                        let copy_len = std::cmp::min(existing.plaintext_len as usize, chunk_len);
464                        let src_len = std::cmp::min(copy_len, raw.len());
465                        buf[..src_len].copy_from_slice(&raw[..src_len]);
466                    }
467                }
468                dirty.dirty_chunks.insert(chunk_idx_u64, buf);
469            }
470
471            let chunk_buf = dirty.dirty_chunks.get_mut(&chunk_idx_u64).unwrap();
472            if chunk_buf.len() < chunk_len {
473                chunk_buf.resize(chunk_len, 0);
474            }
475
476            // Overlay the write data onto the chunk.
477            let overlap_start = std::cmp::max(chunk_file_start, write_start);
478            let overlap_end = std::cmp::min(chunk_file_end, write_end);
479            if overlap_start < overlap_end {
480                let data_off = overlap_start - write_start;
481                let chunk_off = overlap_start - chunk_file_start;
482                let len = overlap_end - overlap_start;
483                chunk_buf[chunk_off..chunk_off + len]
484                    .copy_from_slice(&data[data_off..data_off + len]);
485            }
486        }
487
488        dirty.size = new_size as u64;
489        self.write_buffer.insert(path_key, dirty);
490
491        // Auto-flush if total buffered data exceeds the threshold.
492        let total_buffered: usize = self
493            .write_buffer
494            .values()
495            .flat_map(|d| d.dirty_chunks.values())
496            .map(|v| v.len())
497            .sum();
498        if total_buffered > WRITE_BUFFER_LIMIT {
499            self.flush_all()?;
500        }
501
502        Ok(())
503    }
504
505    /// Read file data at the given path. Returns the requested slice.
506    ///
507    /// If the file has buffered (unflushed) writes, reads are served from the
508    /// in-memory buffer merged with on-disk data.
509    pub fn read_file(&self, path: &str, offset: u64, len: usize) -> FsResult<Vec<u8>> {
510        let path_key = path.trim_matches('/');
511
512        if let Some(dirty) = self.write_buffer.get(path_key) {
513            return self.read_file_buffered(dirty, offset, len);
514        }
515
516        let (dir_parts, leaf) = Self::split_path(path)?;
517        let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
518        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
519        let (_, _, dir_page) = self.resolve_dir_chain(&dir_parts, &root_inode)?;
520
521        let entry = dir_page
522            .entries
523            .iter()
524            .find(|e| e.name == leaf)
525            .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
526
527        if entry.kind != InodeKind::File {
528            return Err(FsError::NotAFile(leaf.to_string()));
529        }
530
531        let file_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
532        let extent_map: ExtentMap = self.read_obj(file_inode.extent_map_ref.block_id)?;
533
534        let full_data = self.read_all_chunks(&extent_map)?;
535
536        let start = offset as usize;
537        if start >= full_data.len() {
538            return Ok(Vec::new());
539        }
540        let end = std::cmp::min(start + len, full_data.len());
541        Ok(full_data[start..end].to_vec())
542    }
543
544    /// List entries in a directory at the given path.
545    ///
546    /// Pass `""` or `"/"` to list the root directory.
547    pub fn list_directory(&self, path: &str) -> FsResult<Vec<DirListEntry>> {
548        let sb = self.superblock.as_ref().ok_or(FsError::NotInitialized)?;
549        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
550
551        let components = Self::split_dir_path(path);
552        let (_, _, dir_page) = self.resolve_dir_chain(&components, &root_inode)?;
553
554        let dir_prefix = {
555            let trimmed = path.trim_matches('/');
556            if trimmed.is_empty() {
557                String::new()
558            } else {
559                format!("{}/", trimmed)
560            }
561        };
562
563        let mut result = Vec::new();
564        for entry in &dir_page.entries {
565            let inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
566            // Use buffered size if this file has pending writes.
567            let size = if entry.kind == InodeKind::File {
568                let full_path = format!("{}{}", dir_prefix, entry.name);
569                if let Some(dirty) = self.write_buffer.get(&full_path) {
570                    dirty.size
571                } else {
572                    inode.size
573                }
574            } else {
575                inode.size
576            };
577            result.push(DirListEntry {
578                name: entry.name.clone(),
579                kind: entry.kind,
580                size,
581                inode_id: entry.inode_id,
582            });
583        }
584        Ok(result)
585    }
586
587    /// Create a subdirectory at the given path.
588    ///
589    /// Parent directories must already exist; only the leaf is created.
590    pub fn create_directory(&mut self, path: &str) -> FsResult<()> {
591        let (dir_parts, leaf) = Self::split_path(path)?;
592        self.validate_name(leaf)?;
593        self.flush_all()?;
594        let sb = self
595            .superblock
596            .as_ref()
597            .ok_or(FsError::NotInitialized)?
598            .clone();
599        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
600        let (ancestors, target_inode, mut dir_page) =
601            self.resolve_dir_chain(&dir_parts, &root_inode)?;
602
603        if dir_page.entries.iter().any(|e| e.name == leaf) {
604            return Err(FsError::DirectoryAlreadyExists(leaf.to_string()));
605        }
606
607        // Create empty directory page for the new subdirectory.
608        let sub_dp = DirectoryPage::new();
609        let sub_dp_block = self.allocator.allocate()?;
610        self.write_obj(sub_dp_block, ObjectKind::DirectoryPage, &sub_dp)?;
611
612        let inode_id = self.alloc_inode_id();
613        let ts = now_secs();
614        let dir_inode = Inode {
615            id: inode_id,
616            kind: InodeKind::Directory,
617            size: 0,
618            directory_page_ref: ObjectRef::new(sub_dp_block),
619            extent_map_ref: ObjectRef::null(),
620            created_at: ts,
621            modified_at: ts,
622        };
623        let inode_block = self.allocator.allocate()?;
624        self.write_obj(inode_block, ObjectKind::Inode, &dir_inode)?;
625
626        dir_page.entries.push(DirectoryEntry {
627            name: leaf.to_string(),
628            inode_ref: ObjectRef::new(inode_block),
629            inode_id,
630            kind: InodeKind::Directory,
631        });
632
633        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
634        Ok(())
635    }
636
637    /// Remove a file or empty directory at the given path.
638    pub fn remove_file(&mut self, path: &str) -> FsResult<()> {
639        let path_key = path.trim_matches('/').to_string();
640        self.write_buffer.remove(&path_key);
641        self.flush_all()?;
642        let (dir_parts, leaf) = Self::split_path(path)?;
643        let sb = self
644            .superblock
645            .as_ref()
646            .ok_or(FsError::NotInitialized)?
647            .clone();
648        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
649        let (ancestors, target_inode, mut dir_page) =
650            self.resolve_dir_chain(&dir_parts, &root_inode)?;
651
652        let idx = dir_page
653            .entries
654            .iter()
655            .position(|e| e.name == leaf)
656            .ok_or_else(|| FsError::FileNotFound(leaf.to_string()))?;
657
658        let entry = &dir_page.entries[idx];
659        if entry.kind == InodeKind::Directory {
660            let dir_inode: Inode = self.read_obj(entry.inode_ref.block_id)?;
661            let sub_page: DirectoryPage = self.read_obj(dir_inode.directory_page_ref.block_id)?;
662            if !sub_page.entries.is_empty() {
663                return Err(FsError::DirectoryNotEmpty(leaf.to_string()));
664            }
665        }
666
667        dir_page.entries.remove(idx);
668        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
669        Ok(())
670    }
671
672    /// Rename a file or directory.  Both `old_path` and `new_path` must share
673    /// the same parent directory (move across directories is not supported yet).
674    pub fn rename(&mut self, old_path: &str, new_path: &str) -> FsResult<()> {
675        let (old_dir, old_leaf) = Self::split_path(old_path)?;
676        let (new_dir, new_leaf) = Self::split_path(new_path)?;
677        self.validate_name(new_leaf)?;
678        self.flush_all()?;
679
680        if old_dir != new_dir {
681            return Err(FsError::Internal(
682                "rename across directories is not supported".into(),
683            ));
684        }
685
686        let sb = self
687            .superblock
688            .as_ref()
689            .ok_or(FsError::NotInitialized)?
690            .clone();
691        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
692        let (ancestors, target_inode, mut dir_page) =
693            self.resolve_dir_chain(&old_dir, &root_inode)?;
694
695        if dir_page.entries.iter().any(|e| e.name == new_leaf) {
696            return Err(FsError::FileAlreadyExists(new_leaf.to_string()));
697        }
698
699        let entry = dir_page
700            .entries
701            .iter_mut()
702            .find(|e| e.name == old_leaf)
703            .ok_or_else(|| FsError::FileNotFound(old_leaf.to_string()))?;
704
705        entry.name = new_leaf.to_string();
706
707        self.commit_cow_chain(&sb, &ancestors, &target_inode, &dir_page)?;
708        Ok(())
709    }
710
711    /// Sync / flush. Writes all buffered data to blocks and calls through
712    /// to the block store sync.
713    pub fn sync(&mut self) -> FsResult<()> {
714        self.flush_all()?;
715        self.store.sync()
716    }
717
718    // ── Internal helpers ──
719
720    /// Flush a single file's buffered writes to the block store.
721    fn flush_file(&mut self, path_key: &str) -> FsResult<()> {
722        let dirty = match self.write_buffer.remove(path_key) {
723            Some(d) => d,
724            None => return Ok(()),
725        };
726
727        if dirty.dirty_chunks.is_empty() {
728            return Ok(());
729        }
730
731        // Re-resolve path from the current superblock.
732        let (dir_parts, leaf) = Self::split_path(path_key)?;
733        let sb = self
734            .superblock
735            .as_ref()
736            .ok_or(FsError::NotInitialized)?
737            .clone();
738        let root_inode: Inode = self.read_obj(sb.root_inode_ref.block_id)?;
739        let (ancestors, target_inode, dir_page) =
740            self.resolve_dir_chain(&dir_parts, &root_inode)?;
741
742        let mut extent_map = dirty.extent_map;
743
744        // Write each dirty chunk to a new block.
745        for (&chunk_idx, chunk_data) in &dirty.dirty_chunks {
746            let data_block = self.allocator.allocate()?;
747            write_encrypted_raw(
748                self.store.as_ref(),
749                self.crypto.as_ref(),
750                &self.codec,
751                data_block,
752                ObjectKind::FileDataChunk,
753                chunk_data,
754            )?;
755
756            if let Some(entry) = extent_map
757                .entries
758                .iter_mut()
759                .find(|e| e.chunk_index == chunk_idx)
760            {
761                entry.data_ref = ObjectRef::new(data_block);
762                entry.plaintext_len = chunk_data.len() as u32;
763            } else {
764                extent_map.entries.push(ExtentEntry {
765                    chunk_index: chunk_idx,
766                    data_ref: ObjectRef::new(data_block),
767                    plaintext_len: chunk_data.len() as u32,
768                });
769            }
770        }
771
772        extent_map.entries.sort_by_key(|e| e.chunk_index);
773
774        // Write extent map.
775        let new_em_block = self.allocator.allocate()?;
776        self.write_obj(new_em_block, ObjectKind::ExtentMap, &extent_map)?;
777
778        // Write inode.
779        let mut new_inode = dirty.base_inode;
780        new_inode.size = dirty.size;
781        new_inode.extent_map_ref = ObjectRef::new(new_em_block);
782        new_inode.modified_at = now_secs();
783        let new_inode_block = self.allocator.allocate()?;
784        self.write_obj(new_inode_block, ObjectKind::Inode, &new_inode)?;
785
786        // Update dir entry.
787        let mut new_dir_page = dir_page.clone();
788        for e in &mut new_dir_page.entries {
789            if e.name == leaf {
790                e.inode_ref = ObjectRef::new(new_inode_block);
791            }
792        }
793
794        self.commit_cow_chain(&sb, &ancestors, &target_inode, &new_dir_page)?;
795        Ok(())
796    }
797
798    /// Flush all buffered file writes to the block store.
799    fn flush_all(&mut self) -> FsResult<()> {
800        let keys: Vec<String> = self.write_buffer.keys().cloned().collect();
801        for key in keys {
802            self.flush_file(&key)?;
803        }
804        Ok(())
805    }
806
807    /// Read from a file that has dirty (buffered) chunks, merging in-memory
808    /// data with on-disk data.
809    fn read_file_buffered(&self, dirty: &DirtyFile, offset: u64, len: usize) -> FsResult<Vec<u8>> {
810        let chunk_size = max_chunk_payload(self.store.block_size());
811        let file_size = dirty.size as usize;
812        let start = offset as usize;
813        if start >= file_size || len == 0 {
814            return Ok(Vec::new());
815        }
816        let end = std::cmp::min(start + len, file_size);
817        let mut result = Vec::with_capacity(end - start);
818
819        let first_chunk = start / chunk_size;
820        let last_chunk = (end - 1) / chunk_size;
821
822        for chunk_idx in first_chunk..=last_chunk {
823            let chunk_file_start = chunk_idx * chunk_size;
824            let chunk_file_end = std::cmp::min(chunk_file_start + chunk_size, file_size);
825            let chunk_idx_u64 = chunk_idx as u64;
826
827            // Get chunk data from buffer or disk.
828            let chunk_data: Vec<u8> = if let Some(buf) = dirty.dirty_chunks.get(&chunk_idx_u64) {
829                buf.clone()
830            } else if let Ok(pos) = dirty
831                .extent_map
832                .entries
833                .binary_search_by_key(&chunk_idx_u64, |e| e.chunk_index)
834            {
835                let entry = &dirty.extent_map.entries[pos];
836                let raw = read_encrypted_raw(
837                    self.store.as_ref(),
838                    self.crypto.as_ref(),
839                    &self.codec,
840                    entry.data_ref.block_id,
841                )?;
842                let plain_len = std::cmp::min(entry.plaintext_len as usize, raw.len());
843                raw[..plain_len].to_vec()
844            } else {
845                vec![0u8; chunk_file_end - chunk_file_start]
846            };
847
848            // Slice to the requested range within this chunk.
849            let read_start = if chunk_idx == first_chunk {
850                start - chunk_file_start
851            } else {
852                0
853            };
854            let read_end = if chunk_idx == last_chunk {
855                end - chunk_file_start
856            } else {
857                chunk_data.len()
858            };
859            let read_end = std::cmp::min(read_end, chunk_data.len());
860
861            if read_start < read_end {
862                result.extend_from_slice(&chunk_data[read_start..read_end]);
863            }
864        }
865
866        Ok(result)
867    }
868
869    fn alloc_inode_id(&mut self) -> InodeId {
870        let id = self.next_inode_id;
871        self.next_inode_id += 1;
872        id
873    }
874
875    fn validate_name(&self, name: &str) -> FsResult<()> {
876        if name.is_empty() || name.contains('/') || name.contains('\0') {
877            return Err(FsError::Internal("invalid name".into()));
878        }
879        if name.len() > MAX_NAME_LEN {
880            return Err(FsError::NameTooLong(name.len(), MAX_NAME_LEN));
881        }
882        Ok(())
883    }
884
885    fn read_obj<T: serde::de::DeserializeOwned>(&self, block_id: u64) -> FsResult<T> {
886        read_encrypted_object(
887            self.store.as_ref(),
888            self.crypto.as_ref(),
889            &self.codec,
890            block_id,
891        )
892    }
893
894    fn write_obj<T: serde::Serialize>(
895        &self,
896        block_id: u64,
897        kind: ObjectKind,
898        obj: &T,
899    ) -> FsResult<()> {
900        write_encrypted_object(
901            self.store.as_ref(),
902            self.crypto.as_ref(),
903            &self.codec,
904            block_id,
905            kind,
906            obj,
907        )
908    }
909
910    fn read_all_chunks(&self, extent_map: &ExtentMap) -> FsResult<Vec<u8>> {
911        let mut entries = extent_map.entries.clone();
912        entries.sort_by_key(|e| e.chunk_index);
913
914        let mut buf = Vec::new();
915        for entry in &entries {
916            let chunk = read_encrypted_raw(
917                self.store.as_ref(),
918                self.crypto.as_ref(),
919                &self.codec,
920                entry.data_ref.block_id,
921            )?;
922            // Only take plaintext_len bytes (chunk may have been decrypted from padded block).
923            let len = entry.plaintext_len as usize;
924            if len <= chunk.len() {
925                buf.extend_from_slice(&chunk[..len]);
926            } else {
927                buf.extend_from_slice(&chunk);
928            }
929        }
930        Ok(buf)
931    }
932
933    fn read_storage_header(&self) -> FsResult<StorageHeader> {
934        let block = self.store.read_block(BLOCK_STORAGE_HEADER)?;
935        if block.len() < 4 {
936            return Err(FsError::InvalidSuperblock);
937        }
938        let len = u32::from_le_bytes([block[0], block[1], block[2], block[3]]) as usize;
939        if len == 0 || 4 + len > block.len() {
940            return Err(FsError::InvalidSuperblock);
941        }
942        self.codec
943            .deserialize_object::<StorageHeader>(&block[4..4 + len])
944    }
945
946    fn commit_superblock(&mut self, sb: Superblock) -> FsResult<()> {
947        self.txn.commit(
948            self.store.as_ref(),
949            self.crypto.as_ref(),
950            &self.codec,
951            &self.allocator,
952            &sb,
953        )?;
954        self.superblock = Some(sb);
955        Ok(())
956    }
957
958    /// Walk the metadata tree from the superblock and mark all referenced blocks
959    /// as allocated in the allocator. Used during open/mount.
960    fn rebuild_allocator(&mut self, sb: &Superblock) -> FsResult<()> {
961        // Mark superblock block.
962        // The superblock_ref's block was allocated by the transaction manager.
963        // We also need to mark root pointer blocks, but those are reserved (0,1,2).
964
965        // We need to find which block the superblock is stored in.
966        // The root pointer tells us.
967        let (rp, _) = TransactionManager::recover_latest(self.store.as_ref(), &self.codec)?
968            .ok_or(FsError::InvalidRootPointer)?;
969        self.allocator.mark_allocated(rp.superblock_ref.block_id)?;
970
971        // Walk root inode.
972        self.mark_inode_tree(sb.root_inode_ref.block_id)?;
973
974        // Set next_inode_id to be higher than any seen inode.
975        // (We updated it during the walk.)
976
977        Ok(())
978    }
979
980    fn mark_inode_tree(&mut self, inode_block: u64) -> FsResult<()> {
981        self.allocator.mark_allocated(inode_block)?;
982        let inode: Inode = self.read_obj(inode_block)?;
983
984        if inode.id >= self.next_inode_id {
985            self.next_inode_id = inode.id + 1;
986        }
987
988        match inode.kind {
989            InodeKind::Directory => {
990                if !inode.directory_page_ref.is_null() {
991                    self.allocator
992                        .mark_allocated(inode.directory_page_ref.block_id)?;
993                    let dir_page: DirectoryPage =
994                        self.read_obj(inode.directory_page_ref.block_id)?;
995                    for entry in &dir_page.entries {
996                        self.mark_inode_tree(entry.inode_ref.block_id)?;
997                    }
998                }
999            }
1000            InodeKind::File => {
1001                if !inode.extent_map_ref.is_null() {
1002                    self.allocator
1003                        .mark_allocated(inode.extent_map_ref.block_id)?;
1004                    let extent_map: ExtentMap = self.read_obj(inode.extent_map_ref.block_id)?;
1005                    for entry in &extent_map.entries {
1006                        self.allocator.mark_allocated(entry.data_ref.block_id)?;
1007                    }
1008                }
1009            }
1010        }
1011        Ok(())
1012    }
1013}
1014
1015/// Return type for directory listings (used by FFI and public API).
1016#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1017pub struct DirListEntry {
1018    pub name: String,
1019    pub kind: InodeKind,
1020    pub size: u64,
1021    pub inode_id: InodeId,
1022}