Skip to main content

ext4_fs/
extfs.rs

1use crate::{
2    error::Ext4Error,
3    reader::FileReader,
4    structs::{
5        Descriptor, Directory, Ext4Hash, Extents, FileInfo, HashValue, Inode, InodeType, Stat,
6    },
7    superblock::block::{IncompatFlags, SuperBlock},
8};
9use base16ct::lower::encode_str;
10use digest_io::IoWrapper;
11use log::error;
12use md5::{Digest, Md5};
13use sha1::Sha1;
14use sha2::Sha256;
15use std::{
16    collections::HashMap,
17    io::{BufReader, Read, copy},
18};
19
20/// Create a reader that can parse the ext4 filesystem
21pub struct Ext4Reader<T: std::io::Seek + std::io::Read> {
22    pub fs: BufReader<T>,
23    /// Default is probably 4096
24    pub blocksize: u16,
25    /// Will be 0. Unless you are reading a disk image file like QCOW or VMDK
26    pub offset_start: u64,
27    pub(crate) descriptors: Option<Vec<Descriptor>>,
28    pub(crate) incompat_flags: Vec<IncompatFlags>,
29    pub(crate) blocks_per_group: u32,
30    pub(crate) fs_size: u64,
31    pub(crate) number_blocks: u32,
32    pub(crate) inode_size: u16,
33    pub(crate) inodes_per_group: u32,
34    pub(crate) cache_names: HashMap<u64, String>,
35    pub(crate) last_mount_path: String,
36}
37
38pub trait Ext4ReaderAction<'ext4, 'reader, T: std::io::Seek + std::io::Read> {
39    /// Return file info about the root directory. Can be used to start a file listing
40    fn root(&mut self) -> Result<FileInfo, Ext4Error>;
41    /// Read a directory based on provided inode value
42    fn read_dir(&mut self, inode: u32) -> Result<FileInfo, Ext4Error>;
43    /// Return the `SuperBlock` information for the ext4 filesystem
44    fn superblock(&mut self) -> Result<SuperBlock, Ext4Error>;
45    /// Return descriptors for the ext4 filesystem
46    fn descriptors(&mut self) -> Result<Vec<Descriptor>, Ext4Error>;
47    /// Return extents for a provide inode
48    fn extents(&mut self, inode: u32) -> Result<Option<Extents>, Ext4Error>;
49    /// Stat a file
50    fn stat(&mut self, inode: u32) -> Result<Stat, Ext4Error>;
51    /// Hash a file. MD5, SHA1, SHA256 are supported
52    fn hash(&mut self, inode: u32, hash: &Ext4Hash) -> Result<HashValue, Ext4Error>;
53    /// Create a reader to stream a file from the ext4 filesystem.
54    fn reader(&'reader mut self, inode: u32) -> Result<FileReader<'reader, T>, Ext4Error>;
55    /// Read the contents of a file into memory. **WARNING** this will read the entire file regardless of size into memory!
56    fn read(&mut self, inode: u32) -> Result<Vec<u8>, Ext4Error>;
57    /// Return verbose inode information for the provided inode
58    fn inode_verbose(&mut self, inode: u32) -> Result<Inode, Ext4Error>;
59}
60
61impl<T: std::io::Seek + std::io::Read> Ext4Reader<T> {
62    /// Initialize an ext4 filesystem reader. This reader will automatically set the correct blocksize if you do not know it
63    pub fn new(
64        fs: BufReader<T>,
65        blocksize: u16,
66        offset_start: u64,
67    ) -> Result<Ext4Reader<T>, Ext4Error> {
68        let mut reader = Ext4Reader {
69            fs,
70            blocksize,
71            offset_start,
72            descriptors: None,
73            incompat_flags: Vec::new(),
74            blocks_per_group: 0,
75            fs_size: 0,
76            number_blocks: 0,
77            inode_size: 0,
78            inodes_per_group: 0,
79            cache_names: HashMap::new(),
80            last_mount_path: String::new(),
81        };
82
83        let block = SuperBlock::read_superblock(&mut reader.fs, reader.offset_start)?;
84        reader.last_mount_path = block.last_mount_path;
85        let size = 1024;
86        let base: u16 = 2;
87        reader.blocksize = size * base.pow(block.block_size);
88        reader.incompat_flags = block.incompatible_features_flags.clone();
89        reader.blocks_per_group = block.number_blocks_per_block_group;
90        reader.fs_size = block.number_blocks as u64 * blocksize as u64;
91        reader.number_blocks = block.number_blocks;
92        reader.inode_size = block.inode_size;
93        reader.inodes_per_group = block.number_inodes_per_block_group;
94        reader.descriptors = Some(Descriptor::read_descriptor(&mut reader)?);
95        Ok(reader)
96    }
97}
98
99impl<'ext4, 'reader, T: std::io::Seek + std::io::Read> Ext4ReaderAction<'ext4, 'reader, T>
100    for Ext4Reader<T>
101{
102    fn root(&mut self) -> Result<FileInfo, Ext4Error> {
103        let root_inode = 2;
104        self.read_dir(root_inode)
105    }
106
107    fn read_dir(&mut self, inode: u32) -> Result<FileInfo, Ext4Error> {
108        let inode_value = Inode::read_inode_table(self, inode)?;
109
110        if let Some(extent) = &inode_value.extents {
111            let dirs = Directory::read_directory_data(self, extent)?;
112            let mut info = FileInfo::new(inode_value, dirs, inode as u64);
113            if let Some(name) = self.cache_names.get(&info.inode) {
114                info.name = name.clone();
115            }
116            let root = 2;
117            if inode == root {
118                info.name = format!(
119                    "{}/{}",
120                    self.last_mount_path.trim_end_matches("/"),
121                    info.name
122                );
123            }
124            update_cache(&mut self.cache_names, &info);
125            return Ok(info);
126        }
127        error!("[ext4-fs] No extent data found. Cannot read directory");
128        Err(Ext4Error::Directory)
129    }
130
131    fn superblock(&mut self) -> Result<SuperBlock, Ext4Error> {
132        SuperBlock::read_superblock(&mut self.fs, self.offset_start)
133    }
134
135    fn stat(&mut self, inode: u32) -> Result<Stat, Ext4Error> {
136        let inode_value = Inode::read_inode_table(self, inode)?;
137        Ok(Stat::new(inode_value, inode as u64))
138    }
139
140    fn hash(&mut self, inode: u32, hashes: &Ext4Hash) -> Result<HashValue, Ext4Error> {
141        if !hashes.md5 && !hashes.sha1 && !hashes.sha256 {
142            return Ok(HashValue {
143                md5: String::new(),
144                sha1: String::new(),
145                sha256: String::new(),
146            });
147        }
148        let inode_value = Inode::read_inode_table(self, inode)?;
149        if inode_value.inode_type != InodeType::File {
150            return Err(Ext4Error::NotAFile);
151        }
152        let mut md5 = IoWrapper(Md5::new());
153        let mut sha1 = IoWrapper(Sha1::new());
154        let mut sha256 = IoWrapper(Sha256::new());
155
156        let mut file_reader = self.reader(inode)?;
157        // Keep track of how many bytes we read
158        let mut bytes_read = 0;
159        // Keep track of our cumulative buffer size when reading in chunks
160        let mut buf_size = 0;
161        // Read file in small chunks
162        let mut temp_buf_size = 65536;
163        loop {
164            let mut temp_buf = vec![0u8; temp_buf_size];
165            let bytes = match file_reader.read(&mut temp_buf) {
166                Ok(result) => result,
167                Err(err) => {
168                    error!("[ext4-fs] Failed to read bytes for inode {inode}: {err:?}");
169                    return Err(Ext4Error::FailedToRead);
170                }
171            };
172
173            // If our reader returns 0 bytes. Then something went wrong
174            if bytes == 0 {
175                break;
176            }
177
178            bytes_read += bytes;
179            if bytes_read > inode_value.size as usize {
180                temp_buf_size = bytes_read - inode_value.size as usize;
181            }
182
183            // Make sure our temp buff does not have any extra zeros from the initialization
184            if bytes < temp_buf_size {
185                temp_buf = temp_buf[0..bytes].to_vec();
186            } else if bytes > inode_value.size as usize {
187                // Also check for opposite
188                // Small files maybe allocated more block bytes than needed
189                // Ex: A file less than 4k in size
190                temp_buf = temp_buf[0..inode_value.size as usize].to_vec();
191            }
192
193            // We may have read too many bytes at the end of the file
194            // If we have, adjust our buffer a little
195            if bytes_read > inode_value.size as usize && inode_value.size as usize > buf_size {
196                temp_buf = temp_buf[0..(inode_value.size as usize - buf_size)].to_vec();
197            }
198            buf_size += temp_buf.len();
199
200            if hashes.md5 {
201                let _ = copy(&mut temp_buf.as_slice(), &mut md5);
202            }
203            if hashes.sha1 {
204                let _ = copy(&mut temp_buf.as_slice(), &mut sha1);
205            }
206            if hashes.sha256 {
207                let _ = copy(&mut temp_buf.as_slice(), &mut sha256);
208            }
209
210            // Once we have read enough bytes, we are done
211            if bytes_read >= inode_value.size as usize {
212                break;
213            }
214        }
215
216        let mut hash_value = HashValue {
217            md5: String::new(),
218            sha1: String::new(),
219            sha256: String::new(),
220        };
221
222        if hashes.md5 {
223            let hash = md5.0.finalize();
224            let mut buf = [0u8; 32];
225            hash_value.md5 = encode_str(&hash, &mut buf).unwrap_or_default().to_string();
226        }
227        if hashes.sha1 {
228            let hash = sha1.0.finalize();
229            let mut buf = [0u8; 40];
230            hash_value.sha1 = encode_str(&hash, &mut buf).unwrap_or_default().to_string();
231        }
232        if hashes.sha256 {
233            let hash = sha256.0.finalize();
234            let mut buf = [0u8; 64];
235            hash_value.sha256 = encode_str(&hash, &mut buf).unwrap_or_default().to_string();
236        }
237
238        Ok(hash_value)
239    }
240
241    fn read(&mut self, inode: u32) -> Result<Vec<u8>, Ext4Error> {
242        let inode_value = Inode::read_inode_table(self, inode)?;
243        if inode_value.inode_type != InodeType::File {
244            return Err(Ext4Error::NotAFile);
245        }
246        let mut file_reader = self.reader(inode)?;
247        let mut buf = vec![0; inode_value.size as usize];
248        if let Err(err) = file_reader.read(&mut buf) {
249            error!("[ext4-fs] Could not read file: {err:?}");
250            return Err(Ext4Error::ReadFile);
251        }
252
253        Ok(buf)
254    }
255
256    fn reader(&'reader mut self, inode: u32) -> Result<FileReader<'reader, T>, Ext4Error> {
257        let inode_value = Inode::read_inode_table(self, inode)?;
258        if inode_value.inode_type != InodeType::File {
259            return Err(Ext4Error::NotAFile);
260        }
261        if let Some(extent) = inode_value.extents {
262            return Ok(Ext4Reader::file_reader(self, &extent, inode_value.size));
263        }
264        error!("[ext4-fs] No extent data found. Cannot read directory");
265        Err(Ext4Error::Directory)
266    }
267
268    fn descriptors(&mut self) -> Result<Vec<Descriptor>, Ext4Error> {
269        Descriptor::read_descriptor(self)
270    }
271
272    fn extents(&mut self, inode: u32) -> Result<Option<Extents>, Ext4Error> {
273        let inode_value = Inode::read_inode_table(self, inode)?;
274        Ok(inode_value.extents)
275    }
276
277    fn inode_verbose(&mut self, inode: u32) -> Result<Inode, Ext4Error> {
278        Inode::read_inode_table(self, inode)
279    }
280}
281
282fn update_cache(cache: &mut HashMap<u64, String>, info: &FileInfo) {
283    for entry in &info.children {
284        if entry.inode as u64 == info.inode {
285            continue;
286        }
287        cache.insert(entry.inode as u64, entry.name.clone());
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use crate::{
294        extfs::{Ext4Reader, Ext4ReaderAction},
295        structs::{Ext4Hash, FileInfo, FileType},
296    };
297    use std::{collections::HashMap, fs::File, io::BufReader, path::PathBuf};
298
299    fn walk_dir<T: std::io::Seek + std::io::Read>(
300        info: &FileInfo,
301        reader: &mut Ext4Reader<T>,
302        cache: &mut HashMap<u64, String>,
303    ) {
304        for entry in &info.children {
305            if entry.file_type == FileType::Directory
306                && entry.name != "."
307                && entry.name != ".."
308                && entry.inode != 2
309            {
310                let info = reader.read_dir(entry.inode).unwrap();
311                cache_paths(cache, &info);
312                walk_dir(&info, reader, cache);
313                continue;
314            }
315            if entry.file_type == FileType::Directory {
316                continue;
317            }
318        }
319    }
320
321    fn cache_paths(cache: &mut HashMap<u64, String>, info: &FileInfo) {
322        for entry in &info.children {
323            if entry.file_type != FileType::Directory || entry.name == "." || entry.name == ".." {
324                continue;
325            }
326            if cache.contains_key(&(entry.inode as u64))
327                && entry.inode != 2
328                && entry.name != "."
329                && entry.name != ".."
330            {
331                continue;
332            }
333
334            let path = cache.get(&(info.inode as u64)).unwrap();
335
336            cache.insert(
337                entry.inode as u64,
338                format!("{}/{}", path, entry.name.clone()),
339            );
340        }
341    }
342
343    #[test]
344    fn test_read_ext4_root() {
345        let mut test_location = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
346        test_location.push("tests/images/test.img");
347        let reader = File::open(test_location.to_str().unwrap()).unwrap();
348        let buf = BufReader::new(reader);
349        let mut ext4_reader = Ext4Reader::new(buf, 4096, 0).unwrap();
350        let dir = ext4_reader.root().unwrap();
351
352        assert_eq!(dir.created, 1759689014000000000);
353        assert_eq!(dir.changed, 1759713496631583423);
354        assert_eq!(dir.children.len(), 6);
355    }
356
357    #[test]
358    fn test_read_ext4_dir() {
359        let mut test_location = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
360        test_location.push("tests/images/test.img");
361        let reader = File::open(test_location.to_str().unwrap()).unwrap();
362        let buf = BufReader::new(reader);
363        let mut ext4_reader = Ext4Reader::new(buf, 4096, 0).unwrap();
364        ext4_reader.root().unwrap();
365        let dir = ext4_reader.read_dir(7634).unwrap();
366
367        assert_eq!(dir.created, 1759689167899447083);
368        assert_eq!(dir.changed, 1759689170863467296);
369        assert_eq!(dir.children.len(), 10);
370        assert_eq!(dir.parent_inode, 2);
371    }
372
373    #[test]
374    fn test_read_ext4_index_dir() {
375        let mut test_location = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
376        test_location.push("tests/images/test.img");
377        let reader = File::open(test_location.to_str().unwrap()).unwrap();
378        let buf = BufReader::new(reader);
379        let mut ext4_reader = Ext4Reader::new(buf, 4096, 0).unwrap();
380        ext4_reader.root().unwrap();
381        let dir = ext4_reader.read_dir(7633).unwrap();
382
383        assert_eq!(dir.created, 1759689153355347892);
384        assert_eq!(dir.changed, 1759689156340368251);
385        assert_eq!(dir.children.len(), 165);
386        assert_eq!(dir.parent_inode, 2);
387    }
388
389    #[test]
390    fn test_walk_dir() {
391        let mut test_location = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
392        test_location.push("tests/images/test.img");
393        let reader = File::open(test_location.to_str().unwrap()).unwrap();
394        let buf = BufReader::new(reader);
395        let mut ext4_reader = Ext4Reader::new(buf, 4096, 0).unwrap();
396        let root = ext4_reader.root().unwrap();
397        let mut cache = HashMap::new();
398        cache.insert(2, String::from(""));
399        cache_paths(&mut cache, &root);
400        walk_dir(&root, &mut ext4_reader, &mut cache);
401        assert_eq!(cache.len(), 10);
402    }
403
404    #[test]
405    fn test_stat() {
406        let mut test_location = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
407        test_location.push("tests/images/test.img");
408        let reader = File::open(test_location.to_str().unwrap()).unwrap();
409        let buf = BufReader::new(reader);
410        let mut ext4_reader = Ext4Reader::new(buf, 4096, 0).unwrap();
411        let root = ext4_reader.root().unwrap();
412        let mut cache = HashMap::new();
413        cache.insert(2, String::from(""));
414        cache_paths(&mut cache, &root);
415        walk_dir(&root, &mut ext4_reader, &mut cache);
416
417        let info = ext4_reader.stat(16).unwrap();
418        assert_eq!(info.created, 1759689156064366369);
419        assert_eq!(info.changed, 1759689156065366375);
420        assert_eq!(info.accessed, 1759689156064366369);
421        assert_eq!(info.modified, 1676375355000000000);
422        assert_eq!(
423            info.extended_attributes.get("security.selinux").unwrap(),
424            "unconfined_u:object_r:unlabeled_t:s0"
425        );
426    }
427
428    #[test]
429    fn test_hash_large_file() {
430        let mut test_location = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
431        test_location.push("tests/images/test.img");
432        let reader = File::open(test_location.to_str().unwrap()).unwrap();
433        let buf = BufReader::new(reader);
434        let mut ext4_reader = Ext4Reader::new(buf, 4096, 0).unwrap();
435        let hashes = Ext4Hash {
436            md5: true,
437            sha1: true,
438            sha256: true,
439        };
440        let info = ext4_reader.hash(676, &hashes).unwrap();
441        assert_eq!(info.md5, "df8e85bd10b33ac804b7c46073768dc9");
442        assert_eq!(info.sha1, "beb51c72d95518720c76e69fd2ad5f7a57e01d6b");
443        assert_eq!(
444            info.sha256,
445            "703df175cdcbbe0163f4ed7c83819070630b8bffdf65dc5739caef062a9c7a73"
446        );
447    }
448
449    #[test]
450    fn test_read_large_file() {
451        let mut test_location = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
452        test_location.push("tests/images/test.img");
453        let reader = File::open(test_location.to_str().unwrap()).unwrap();
454        let buf = BufReader::new(reader);
455        let mut ext4_reader = Ext4Reader::new(buf, 4096, 0).unwrap();
456        let info = ext4_reader.read(676).unwrap();
457        assert_eq!(info.len(), 274310864);
458    }
459
460    #[test]
461    fn test_descriptors() {
462        let mut test_location = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
463        test_location.push("tests/images/test.img");
464        let reader = File::open(test_location.to_str().unwrap()).unwrap();
465        let buf = BufReader::new(reader);
466        let mut ext4_reader = Ext4Reader::new(buf, 4096, 0).unwrap();
467        let info = ext4_reader.descriptors().unwrap();
468        assert_eq!(info.len(), 7);
469    }
470
471    #[test]
472    fn test_extents() {
473        let mut test_location = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
474        test_location.push("tests/images/test.img");
475        let reader = File::open(test_location.to_str().unwrap()).unwrap();
476        let buf = BufReader::new(reader);
477        let mut ext4_reader = Ext4Reader::new(buf, 4096, 0).unwrap();
478        let info = ext4_reader.extents(676).unwrap().unwrap();
479        assert_eq!(info.extent_descriptors.len(), 3);
480    }
481}