rsext4 0.6.0

A lightweight ext4 file system.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
use super::{mkfs::read_superblock, *};

impl Ext4FileSystem {
    /// Creates the root directory tree during bootstrap.
    fn create_root_dir<B: BlockDevice>(&mut self, block_dev: &mut Jbd2Dev<B>) -> Ext4Result<()> {
        // The actual on-disk initialization lives in the dedicated directory
        // bootstrap helper.
        create_root_directory_entry(self, block_dev)
    }

    fn dirty_for_mount(superblock: &mut Ext4Superblock) {
        superblock.s_state &= !Ext4Superblock::EXT4_VALID_FS;
        superblock.s_mnt_count = superblock.s_mnt_count.saturating_add(1);
    }

    fn inode_cache_size(superblock: &Ext4Superblock) -> usize {
        match superblock.s_inode_size {
            0 => DEFAULT_INODE_SIZE as usize,
            n => n as usize,
        }
    }

    fn reset_runtime_from_superblock<B: BlockDevice>(
        &mut self,
        block_dev: &mut Jbd2Dev<B>,
    ) -> Ext4Result<()> {
        self.group_count = self.superblock.block_groups_count();
        self.group_descs =
            Self::load_group_descriptors(block_dev, &self.superblock, self.group_count)?;
        self.block_allocator = BlockAllocator::new(&self.superblock);
        self.inode_allocator = InodeAllocator::new(&self.superblock);
        self.bitmap_cache = BitmapCache::create_default();
        self.inodetable_cahce =
            InodeCache::new(INODE_CACHE_MAX, Self::inode_cache_size(&self.superblock));
        self.datablock_cache = DataBlockCache::new(DATABLOCK_CACHE_MAX, BLOCK_SIZE);
        Ok(())
    }

    fn reload_after_journal_replay<B: BlockDevice>(
        &mut self,
        block_dev: &mut Jbd2Dev<B>,
    ) -> Ext4Result<()> {
        self.superblock = read_superblock(block_dev).map_err(|_| Ext4Error::io())?;
        self.superblock.verify_superblock()?;
        Self::dirty_for_mount(&mut self.superblock);
        self.reset_runtime_from_superblock(block_dev)
    }

    fn clear_recovery_state(&mut self) {
        self.superblock.s_feature_incompat &= !Ext4Superblock::EXT4_FEATURE_INCOMPAT_RECOVER;
    }

    fn set_recovery_state(&mut self) {
        self.superblock.s_feature_incompat |= Ext4Superblock::EXT4_FEATURE_INCOMPAT_RECOVER;
    }

    fn valid_lost_found_hint<B: BlockDevice>(
        &mut self,
        block_dev: &mut Jbd2Dev<B>,
    ) -> Ext4Result<bool> {
        let ino = self.superblock.s_lpf_ino;
        if ino == 0 {
            return Ok(false);
        }

        let inode = self.get_inode_by_num(block_dev, InodeNumber::new(ino)?)?;
        Ok(inode.i_mode != 0 && inode.is_dir())
    }

    fn journal_blocks<B: BlockDevice>(
        &mut self,
        block_dev: &mut Jbd2Dev<B>,
        journal_inode: &mut Ext4Inode,
    ) -> Ext4Result<Vec<AbsoluteBN>> {
        let journal_block_count = journal_inode.size().div_ceil(BLOCK_SIZE as u64);
        let journal_block_map = resolve_inode_block_allextend(self, block_dev, journal_inode)?;
        let mut journal_blocks = Vec::new();
        for logical in 0..journal_block_count {
            let logical = u32::try_from(logical).map_err(|_| Ext4Error::corrupted())?;
            let phys = journal_block_map
                .get(&logical)
                .copied()
                .ok_or_else(Ext4Error::corrupted)?;
            journal_blocks.push(phys);
        }
        Ok(journal_blocks)
    }

    /// Mounts an ext4 filesystem from the given block device.
    pub fn mount<B: BlockDevice>(block_dev: &mut Jbd2Dev<B>) -> Result<Self, Ext4Error> {
        debug!("Start mounting Ext4 filesystem...");

        // Mount flow:
        // 1. read and verify the superblock,
        // 2. load only enough metadata to locate/replay the journal,
        // 3. reload metadata from the recovered home blocks,
        // 4. repair bootstrap directories if they are missing.
        let mut superblock = read_superblock(block_dev).map_err(|_| Ext4Error::io())?;

        if superblock.s_magic != EXT4_SUPER_MAGIC {
            error!(
                "Invalid magic: {:#x}, expected: {:#x}",
                superblock.s_magic, EXT4_SUPER_MAGIC
            );
            return Err(Ext4Error::invalid_magic());
        }
        debug!("Superblock magic verified");
        superblock.verify_superblock()?;

        // Continue mounting even for an error-state filesystem so higher layers
        // can inspect or attempt repair.
        if superblock.s_state & Ext4Superblock::EXT4_ERROR_FS != 0 {
            warn!("Filesystem is in error state");
        }

        // Mark the filesystem as "not cleanly unmounted" before any writes.
        Self::dirty_for_mount(&mut superblock);

        let group_count = superblock.block_groups_count();
        debug!("Block group count: {group_count}");

        let group_descs = Self::load_group_descriptors(block_dev, &superblock, group_count)?;
        debug!("Loaded {} group descriptors", group_descs.len());

        let block_allocator = BlockAllocator::new(&superblock);
        let inode_allocator = InodeAllocator::new(&superblock);
        debug!("Allocators initialized");

        let bitmap_cache = BitmapCache::create_default();
        debug!("Bitmap cache initialized (lazy loading)");

        // NOTE: inode size is a filesystem property (superblock.s_inode_size), not a fixed constant.
        // Using a wrong inode size will make inode table offsets incorrect and may read zeroed inodes
        // (e.g. /dev becomes mode=0, then VFS mount fails with ENOTDIR).
        let inode_cache = InodeCache::new(INODE_CACHE_MAX, Self::inode_cache_size(&superblock));
        debug!("Inode cache initialized");

        let datablock_cache = DataBlockCache::new(DATABLOCK_CACHE_MAX, BLOCK_SIZE);
        debug!("Data block cache initialized");

        let mut fs = Self {
            superblock,
            group_descs,
            block_allocator,
            inode_allocator,
            bitmap_cache,
            root_inode: InodeNumber::new(2)?,
            inodetable_cahce: inode_cache,
            datablock_cache,
            group_count,
            mounted: true,
            journal_sb_block_start: None,
        };
        // Dump the core topology once so later failures have useful context in
        // the logs.
        debug_super_and_desc(&fs.superblock, &fs);

        // Journal bootstrap has two stages: ensure the journal inode exists,
        // then load its superblock and enable replay on the device wrapper.
        {
            let needs_recovery = fs
                .superblock
                .has_feature_incompat(Ext4Superblock::EXT4_FEATURE_INCOMPAT_RECOVER);

            if fs.superblock.has_journal() {
                let journal_inode_num = InodeNumber::new(JOURNAL_FILE_INODE as u32)?;
                let journal_inode = fs
                    .get_inode_by_num(block_dev, journal_inode_num)
                    .inspect_err(|e| {
                        error!("Failed to load journal inode {journal_inode_num}: {e}");
                    })?;
                let journal_exists = journal_inode.i_mode != 0;

                if fs
                    .superblock
                    .has_feature_compat(Ext4Superblock::EXT4_FEATURE_COMPAT_HAS_JOURNAL)
                    && !journal_exists
                {
                    if needs_recovery {
                        error!("Journal inode missing while filesystem needs recovery");
                        return Err(Ext4Error::corrupted());
                    }
                    create_journal_entry(&mut fs, block_dev).expect("create journal entry failed");
                }
            }
            if needs_recovery && !fs.superblock.has_journal() {
                error!("Filesystem needs journal recovery, but no journal is present");
                return Err(Ext4Error::corrupted());
            }
            if (block_dev.is_use_journal() || needs_recovery) && fs.superblock.has_journal() {
                // By this point the journal inode must exist, so resolve its
                // first data block and hand the loaded journal superblock to
                // `Jbd2Dev`.
                let mut j_inode = fs
                    .get_inode_by_num(block_dev, InodeNumber::new(JOURNAL_FILE_INODE as u32)?)
                    .expect("load journal inode failed");

                let journal_blocks =
                    fs.journal_blocks(block_dev, &mut j_inode)
                        .inspect_err(|e| {
                            error!("Failed to resolve journal blocks: {e}");
                        })?;
                let journal_first_block = journal_blocks.first().copied().ok_or_else(|| {
                    error!("Journal has no mapped blocks");
                    Ext4Error::corrupted()
                })?;

                fs.journal_sb_block_start = Some(journal_first_block);
                let journal_data = fs
                    .datablock_cache
                    .get_or_load(block_dev, journal_first_block)
                    .expect("load journal superblock block failed")
                    .data
                    .clone();

                let j_sb = JournalSuperBllockS::from_disk_bytes(&journal_data);

                block_dev.set_journal_superblock_with_mapping(j_sb, journal_blocks)?;

                if needs_recovery {
                    // Replay before touching ordinary filesystem metadata.
                    // Until this completes, home blocks may be stale. A clean
                    // filesystem with journaling enabled still needs JBD2
                    // state initialized for future metadata writes, but it
                    // must not force replay without the ext4 recovery bit.
                    let original_journal_use = block_dev.is_use_journal();
                    if !original_journal_use {
                        info!("Filesystem needs journal recovery; enabling replay for mount");
                        block_dev.set_journal_use(true);
                    }
                    let replay_status = block_dev.journal_replay_checked();
                    block_dev.set_journal_use(original_journal_use);
                    if replay_status != ReplayStatus::Complete {
                        error!("Journal replay did not complete: status={replay_status:?}");
                        return Err(Ext4Error::corrupted());
                    }

                    // Journal replay can update the superblock, group
                    // descriptors, bitmaps, inode table, and directory blocks.
                    // Drop all metadata read before replay and continue
                    // mounting from the recovered on-disk state.
                    fs.reload_after_journal_replay(block_dev)?;
                    fs.clear_recovery_state();
                } else if block_dev.is_use_journal() {
                    fs.set_recovery_state();
                }
            }
            // If the filesystem was created without a journal (e.g. small images
            // where mkfs.ext4 omits it), disable journal_use so that metadata
            // writes bypass the journal path instead of hitting the
            // "system uninitialized" guard on every write.
            if !fs.superblock.has_journal() {
                block_dev.set_journal_use(false);
            }
        }

        // rootinode check !
        {
            let root_inode = fs.get_root(block_dev).map_err(|e| {
                error!("Failed to load root inode: {e}");
                Ext4Error::io()
            })?;
            if root_inode.i_mode == 0 || !root_inode.is_dir() {
                warn!(
                    "Root inode is uninitialized or not a directory, creating root and \
                     lost+found... i_mode: {}, is_dir: {}",
                    root_inode.i_mode,
                    root_inode.is_dir()
                );
                fs.create_root_dir(block_dev).map_err(|_| Ext4Error::io())?;
            }
        }

        // Verify the recovery directory after the root directory is known good.
        {
            if fs.valid_lost_found_hint(block_dev)? {
                let ino = fs.superblock.s_lpf_ino;
                info!("/lost+found exists (superblock hint inode={ino})");
            } else {
                if fs.superblock.s_lpf_ino != 0 {
                    let ino = fs.superblock.s_lpf_ino;
                    warn!("s_lpf_ino={ino} is not a valid directory, falling back to path scan");
                }

                match get_file_inode(&mut fs, block_dev, "/lost+found") {
                    Ok(Some((ino, inode))) if inode.is_dir() => {
                        fs.superblock.s_lpf_ino = ino.raw();
                        fs.sync_superblock(block_dev)?;
                        info!("/lost+found exists (path resolution, repaired hint inode={ino})");
                    }
                    Ok(Some((_ino, _inode))) => {
                        error!("/lost+found exists but is not a directory");
                        return Err(Ext4Error::corrupted());
                    }
                    Ok(None) => {
                        info!("/lost+found not found by path scan;will create!");
                        if create_lost_found_directory(&mut fs, block_dev).is_err() {
                            warn!("/lost+found missing and create failed");
                        }
                    }
                    Err(err) => {
                        error!("Failed to resolve /lost+found: {err}");
                        return Err(err);
                    }
                }
            }
        }

        // Emit a one-shot bitmap usage summary and verify bitmap checksums on
        // group 0 when metadata checksums are enabled.
        {
            let g0 = match fs.group_descs.first() {
                Some(desc) => desc,
                None => return Err(Ext4Error::bad_superblock()),
            };
            let inode_bitmap_blk = g0.inode_bitmap();
            let data_bitmap_blk = g0.block_bitmap();
            let inode_cache_key = CacheKey::new_inode(BGIndex::new(0));
            let data_cache_key = CacheKey::new_block(BGIndex::new(0));

            let inode_bitmap_data = fs
                .bitmap_cache
                .get_or_load(
                    block_dev,
                    inode_cache_key,
                    AbsoluteBN::new(inode_bitmap_blk),
                )
                .expect("block read failed")
                .clone();
            let blockbitmap_data = fs
                .bitmap_cache
                .get_or_load(block_dev, data_cache_key, AbsoluteBN::new(data_bitmap_blk))
                .expect("block read failed");

            if ext4_superblock_has_metadata_csum(&fs.superblock) {
                if !g0.is_inode_bitmap_uninit() {
                    let stored_inode = g0.inode_bitmap_csum(&fs.superblock);
                    let computed_inode =
                        ext4_inode_bitmap_csum32(&fs.superblock, &inode_bitmap_data.data);
                    let expected_inode = computed_inode;
                    if !g0.inode_bitmap_csum_matches(&fs.superblock, expected_inode) {
                        error!(
                            "Inode bitmap checksum mismatch group=0 expected={expected_inode:#x} \
                             stored={stored_inode:#x} inode_bitmap_block={inode_bitmap_blk} \
                             inode_table_block={} flags={:#x}",
                            g0.inode_table(),
                            g0.bg_flags
                        );
                        return Err(Ext4Error::checksum());
                    }
                }

                if !g0.is_block_bitmap_uninit() {
                    let stored_block = g0.block_bitmap_csum(&fs.superblock);
                    let computed_block =
                        ext4_block_bitmap_csum32(&fs.superblock, &blockbitmap_data.data);
                    let expected_block = computed_block;
                    if !g0.block_bitmap_csum_matches(&fs.superblock, expected_block) {
                        error!(
                            "Block bitmap checksum mismatch group=0 expected={expected_block:#x} \
                             stored={stored_block:#x} block_bitmap_block={data_bitmap_blk} \
                             inode_table_block={} flags={:#x}",
                            g0.inode_table(),
                            g0.bg_flags
                        );
                        return Err(Ext4Error::checksum());
                    }
                }
            }

            let mut inode_count: u64 = 0;
            let mut datablock_count: u64 = 0;
            let inode_data_array = &inode_bitmap_data.data;
            let datablock_array = &blockbitmap_data.data;

            inode_data_array.iter().for_each(|&bit| {
                let mut tmp = bit;
                loop {
                    if tmp == 0 {
                        break;
                    }
                    if tmp & 0x1 == 0x1 {
                        inode_count += 1;
                    }
                    tmp >>= 1;
                }
            });

            datablock_array.iter().for_each(|&bit| {
                let mut tmp = bit;
                loop {
                    if tmp == 0 {
                        break;
                    }
                    if tmp & 0x1 == 0x1 {
                        datablock_count += 1;
                    }
                    tmp >>= 1;
                }
            });

            debug!(
                "Bitmap usage: inodes used = {inode_count}, data blocks used = {datablock_count}"
            );
        }

        info!("Ext4 filesystem mounted");
        info!("  - block size: {} bytes", fs.superblock.block_size());
        info!("  - total blocks: {}", fs.superblock.blocks_count());
        info!("  - free blocks: {}", fs.superblock.free_blocks_count());
        info!("  - total inodes: {}", fs.superblock.s_inodes_count);
        info!("  - free inodes: {}", fs.superblock.s_free_inodes_count);
        // Flush metadata once at the end of mount so any replay state changes
        // or bootstrap repairs are persisted before normal operation begins.
        // The superblock is written with EXT4_VALID_FS cleared so a later mount
        // can distinguish an unclean shutdown from a real EXT4_ERROR_FS state.
        fs.sync_filesystem(block_dev)?;
        block_dev.umount_commit();

        Ok(fs)
    }

    /// Loads all block-group descriptors in on-disk order.
    fn load_group_descriptors<B: BlockDevice>(
        block_dev: &mut Jbd2Dev<B>,
        superblock: &Ext4Superblock,
        group_count: u32,
    ) -> Result<Vec<Ext4GroupDesc>, Ext4Error> {
        let mut group_descs = Vec::new();
        let gdt_base: u64 = BLOCK_SIZE as u64;

        // Cache the currently loaded GDT block to avoid rereading the same
        // block for neighboring descriptors.
        let mut current_block: Option<AbsoluteBN> = None;

        let desc_size = superblock.get_desc_size() as usize;

        debug!("Loading group descriptors: {group_count} groups, desc_size = {desc_size} bytes");
        for group_id in 0..group_count {
            let byte_offset = gdt_base + group_id as u64 * desc_size as u64;
            let block_size_u64 = BLOCK_SIZE as u64;
            let block_num = AbsoluteBN::new(byte_offset / block_size_u64);
            let in_block = (byte_offset % block_size_u64) as usize;

            if current_block != Some(block_num) {
                block_dev
                    .read_block(block_num)
                    .map_err(|_| Ext4Error::io())?;
                current_block = Some(block_num);
            }

            let buffer = block_dev.buffer();
            let end = in_block + desc_size;
            if end > buffer.len() {
                error!(
                    "GDT out of range: group_id={}, in_block={}, desc_size={}, buffer_len={}",
                    group_id,
                    in_block,
                    desc_size,
                    buffer.len()
                );
                return Err(Ext4Error::bad_superblock());
            }

            let desc = Ext4GroupDesc::from_disk_bytes(&buffer[in_block..end]);
            desc.verify_checksum(superblock, group_id)?;
            group_descs.push(desc);
        }

        debug!(
            "Successfully loaded {} group descriptors",
            group_descs.len()
        );
        Ok(group_descs)
    }
}

/// Thin compatibility wrapper around [`Ext4FileSystem::mount`].
pub fn mount<B: BlockDevice>(block_dev: &mut Jbd2Dev<B>) -> Ext4Result<Ext4FileSystem> {
    match Ext4FileSystem::mount(block_dev) {
        Ok(_fs) => {
            info!("Ext4 filesystem mounted");
            Ok(_fs)
        }
        Err(e) => {
            error!("Mount failed: {e}");
            Err(e)
        }
    }
}