gistools/parsers/read/
mmap.rs

1use super::StdReader;
2use crate::parsers::Reader;
3use core::cell::RefCell;
4use half::f16;
5use memmap2::Mmap;
6use std::{
7    fs::File,
8    io::{self},
9    path::{Path, PathBuf},
10    string::{String, ToString},
11    vec::Vec,
12};
13
14/// A file reader for reading data from a file
15///
16/// Implements the [`Reader`] trait.
17#[derive(Debug)]
18pub struct MMapReader {
19    path: PathBuf,
20    _file: File,
21    mmap: Mmap,
22    size: u64,
23    cursor: RefCell<u64>,
24}
25impl MMapReader {
26    fn get_bytes(&self, byte_offset: Option<u64>, byte_length: u64) -> &[u8] {
27        let mut cursor = *self.cursor.borrow();
28        let offset = byte_offset.unwrap_or(cursor);
29        assert!(offset + byte_length <= self.size);
30        cursor = offset;
31
32        let buffer = &self.mmap[cursor as usize..(cursor + byte_length) as usize];
33        *self.cursor.borrow_mut() = offset + byte_length;
34
35        buffer
36    }
37}
38impl StdReader for MMapReader {
39    fn new<P: AsRef<Path>>(path: P) -> io::Result<Self> {
40        let path_buf = path.as_ref().to_path_buf();
41        let _file = File::open(path)?;
42        let mmap = unsafe { Mmap::map(&_file)? };
43        let size = _file.metadata().map(|metadata| metadata.len()).unwrap_or(0);
44        Ok(Self { path: path_buf, _file, mmap, size, cursor: 0.into() })
45    }
46}
47impl Clone for MMapReader {
48    fn clone(&self) -> Self {
49        MMapReader::new(&self.path).unwrap()
50    }
51}
52impl Reader for MMapReader {
53    fn len(&self) -> u64 {
54        self.mmap.len() as u64
55    }
56
57    // GETTERS
58
59    fn uint64(&self, byte_offset: Option<u64>, little_endian: Option<bool>) -> u64 {
60        if little_endian.unwrap_or(false) {
61            self.uint64_le(byte_offset)
62        } else {
63            self.uint64_be(byte_offset)
64        }
65    }
66    fn uint64_be(&self, byte_offset: Option<u64>) -> u64 {
67        let bytes = self.get_bytes(byte_offset, 8);
68        u64::from_be_bytes(bytes.try_into().expect("Failed to read 8 bytes"))
69    }
70    fn uint64_le(&self, byte_offset: Option<u64>) -> u64 {
71        let bytes = self.get_bytes(byte_offset, 8);
72        u64::from_le_bytes(bytes.try_into().expect("Failed to read 8 bytes"))
73    }
74
75    fn int64(&self, byte_offset: Option<u64>, little_endian: Option<bool>) -> i64 {
76        if little_endian.unwrap_or(false) {
77            self.int64_le(byte_offset)
78        } else {
79            self.int64_be(byte_offset)
80        }
81    }
82    fn int64_be(&self, byte_offset: Option<u64>) -> i64 {
83        let bytes = self.get_bytes(byte_offset, 8);
84        i64::from_be_bytes(bytes.try_into().expect("Failed to read 8 bytes"))
85    }
86    fn int64_le(&self, byte_offset: Option<u64>) -> i64 {
87        let bytes = self.get_bytes(byte_offset, 8);
88        i64::from_le_bytes(bytes.try_into().expect("Failed to read 8 bytes"))
89    }
90
91    fn f64(&self, byte_offset: Option<u64>, little_endian: Option<bool>) -> f64 {
92        if little_endian.unwrap_or(false) {
93            self.f64_le(byte_offset)
94        } else {
95            self.f64_be(byte_offset)
96        }
97    }
98    fn f64_be(&self, byte_offset: Option<u64>) -> f64 {
99        let bytes = self.get_bytes(byte_offset, 8);
100        f64::from_be_bytes(bytes.try_into().expect("Failed to read 8 bytes"))
101    }
102    fn f64_le(&self, byte_offset: Option<u64>) -> f64 {
103        let bytes = self.get_bytes(byte_offset, 8);
104        f64::from_le_bytes(bytes.try_into().expect("Failed to read 8 bytes"))
105    }
106
107    fn uint32(&self, byte_offset: Option<u64>, little_endian: Option<bool>) -> u32 {
108        if little_endian.unwrap_or(false) {
109            self.uint32_le(byte_offset)
110        } else {
111            self.uint32_be(byte_offset)
112        }
113    }
114    fn uint32_be(&self, byte_offset: Option<u64>) -> u32 {
115        let bytes = self.get_bytes(byte_offset, 4);
116        u32::from_be_bytes(bytes.try_into().expect("Failed to read 4 bytes"))
117    }
118    fn uint32_le(&self, byte_offset: Option<u64>) -> u32 {
119        let bytes = self.get_bytes(byte_offset, 4);
120        u32::from_le_bytes(bytes.try_into().expect("Failed to read 4 bytes"))
121    }
122
123    fn int32(&self, byte_offset: Option<u64>, little_endian: Option<bool>) -> i32 {
124        if little_endian.unwrap_or(false) {
125            self.int32_le(byte_offset)
126        } else {
127            self.int32_be(byte_offset)
128        }
129    }
130    fn int32_be(&self, byte_offset: Option<u64>) -> i32 {
131        let bytes = self.get_bytes(byte_offset, 4);
132        i32::from_be_bytes(bytes.try_into().expect("Failed to read 4 bytes"))
133    }
134    fn int32_le(&self, byte_offset: Option<u64>) -> i32 {
135        let bytes = self.get_bytes(byte_offset, 4);
136        i32::from_le_bytes(bytes.try_into().expect("Failed to read 4 bytes"))
137    }
138
139    fn f32(&self, byte_offset: Option<u64>, little_endian: Option<bool>) -> f32 {
140        if little_endian.unwrap_or(false) {
141            self.f32_le(byte_offset)
142        } else {
143            self.f32_be(byte_offset)
144        }
145    }
146    fn f32_be(&self, byte_offset: Option<u64>) -> f32 {
147        let bytes = self.get_bytes(byte_offset, 4);
148        f32::from_be_bytes(bytes.try_into().expect("Failed to read 4 bytes"))
149    }
150    fn f32_le(&self, byte_offset: Option<u64>) -> f32 {
151        let bytes = self.get_bytes(byte_offset, 4);
152        f32::from_le_bytes(bytes.try_into().expect("Failed to read 4 bytes"))
153    }
154
155    fn uint16(&self, byte_offset: Option<u64>, little_endian: Option<bool>) -> u16 {
156        if little_endian.unwrap_or(false) {
157            self.uint16_le(byte_offset)
158        } else {
159            self.uint16_be(byte_offset)
160        }
161    }
162    fn uint16_be(&self, byte_offset: Option<u64>) -> u16 {
163        let bytes = self.get_bytes(byte_offset, 2);
164        u16::from_be_bytes(bytes.try_into().expect("Failed to read 2 bytes"))
165    }
166    fn uint16_le(&self, byte_offset: Option<u64>) -> u16 {
167        let bytes = self.get_bytes(byte_offset, 2);
168        u16::from_le_bytes(bytes.try_into().expect("Failed to read 2 bytes"))
169    }
170
171    fn int16(&self, byte_offset: Option<u64>, little_endian: Option<bool>) -> i16 {
172        if little_endian.unwrap_or(false) {
173            self.int16_le(byte_offset)
174        } else {
175            self.int16_be(byte_offset)
176        }
177    }
178    fn int16_be(&self, byte_offset: Option<u64>) -> i16 {
179        let bytes = self.get_bytes(byte_offset, 2);
180        i16::from_be_bytes(bytes.try_into().expect("Failed to read 2 bytes"))
181    }
182    fn int16_le(&self, byte_offset: Option<u64>) -> i16 {
183        let bytes = self.get_bytes(byte_offset, 2);
184        i16::from_le_bytes(bytes.try_into().expect("Failed to read 2 bytes"))
185    }
186
187    fn f16(&self, byte_offset: Option<u64>, little_endian: Option<bool>) -> f32 {
188        if little_endian.unwrap_or(false) {
189            self.f16_le(byte_offset)
190        } else {
191            self.f16_be(byte_offset)
192        }
193    }
194    fn f16_be(&self, byte_offset: Option<u64>) -> f32 {
195        let bytes = self.get_bytes(byte_offset, 2);
196        let f = f16::from_be_bytes(bytes.try_into().expect("Failed to read 2 bytes"));
197        f32::from_bits(f.to_bits().into())
198    }
199    fn f16_le(&self, byte_offset: Option<u64>) -> f32 {
200        let bytes = self.get_bytes(byte_offset, 2);
201        let f = f16::from_le_bytes(bytes.try_into().expect("Failed to read 2 bytes"));
202        f32::from_bits(f.to_bits().into())
203    }
204
205    fn uint8(&self, byte_offset: Option<u64>) -> u8 {
206        let bytes = self.get_bytes(byte_offset, 1);
207        bytes[0]
208    }
209    fn int8(&self, byte_offset: Option<u64>) -> i8 {
210        let bytes = self.get_bytes(byte_offset, 1);
211        bytes[0] as i8
212    }
213
214    // Methods
215
216    fn tell(&self) -> u64 {
217        *self.cursor.borrow()
218    }
219    fn seek(&self, pos: u64) {
220        *self.cursor.borrow_mut() = pos;
221    }
222    fn slice(&self, begin: Option<u64>, end: Option<u64>) -> Vec<u8> {
223        let begin = begin.unwrap_or(*self.cursor.borrow()) as usize;
224        let end = end.unwrap_or(self.mmap.len() as u64) as usize;
225        assert!(end <= self.mmap.len());
226        self.mmap[begin..end].to_vec()
227    }
228
229    fn seek_slice(&self, size: usize) -> Vec<u8> {
230        assert!(*self.cursor.borrow() + size as u64 <= self.mmap.len() as u64);
231        *self.cursor.borrow_mut() += size as u64;
232        let cursor = *self.cursor.borrow();
233        self.slice(Some(cursor - size as u64), Some(cursor))
234    }
235    fn parse_string(&self, byte_offset: Option<u64>, byte_length: Option<u64>) -> String {
236        let offset = byte_offset.unwrap_or(*self.cursor.borrow()) as usize;
237        let length = byte_length.unwrap_or(self.mmap.len() as u64 - offset as u64) as usize;
238        let str_buf = &self.mmap[offset..offset + length];
239        let cleaned_str_buf: Vec<u8> = str_buf.iter().cloned().filter(|&b| b != 0).collect();
240        let string = String::from_utf8_lossy(&cleaned_str_buf).to_string();
241        *self.cursor.borrow_mut() = (offset + length) as u64;
242        string
243    }
244}
245impl From<PathBuf> for MMapReader {
246    fn from(path: PathBuf) -> Self {
247        MMapReader::new(path).unwrap()
248    }
249}
250impl From<String> for MMapReader {
251    fn from(path: String) -> Self {
252        MMapReader::new(path).unwrap()
253    }
254}
255impl From<&str> for MMapReader {
256    fn from(path: &str) -> Self {
257        MMapReader::new(path).unwrap()
258    }
259}