gaia_types/reader/mod.rs
1#![doc = include_str!("readme.md")]
2
3pub use self::{token::Token, token_stream::TokenStream};
4use crate::{GaiaDiagnostics, GaiaError};
5use byteorder::{ByteOrder, ReadBytesExt};
6use serde::{Deserialize, Serialize};
7use std::{
8 io::{Read, Seek, SeekFrom},
9 marker::PhantomData,
10};
11use url::Url;
12
13mod token;
14mod token_stream;
15
16/// Binary reader for reading data from types that implement ReadBytesExt.
17///
18/// This is a generic struct that can wrap any type implementing ReadBytesExt,
19/// providing binary data reading functionality.
20#[derive(Debug)]
21pub struct BinaryReader<R, E> {
22 reader: R,
23 position: u64,
24 endian: PhantomData<E>,
25 errors: Vec<GaiaError>,
26}
27
28impl<R: Read, E> Read for BinaryReader<R, E> {
29 fn read(&mut self, buffer: &mut [u8]) -> std::io::Result<usize> {
30 let bytes_read = self.reader.read(buffer)?;
31 self.position += bytes_read as u64;
32 Ok(bytes_read)
33 }
34}
35
36impl<R: Seek, E> Seek for BinaryReader<R, E> {
37 fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
38 let new_position = self.reader.seek(pos)?;
39 self.position = new_position;
40 Ok(new_position)
41 }
42}
43
44impl<R, E> BinaryReader<R, E> {
45 /// Create a new binary reader.
46 ///
47 /// # Arguments
48 /// * `reader` - The data source to read from.
49 ///
50 /// # Returns
51 /// A new BinaryReader instance.
52 pub fn new(reader: R) -> Self {
53 Self { reader, position: 0, endian: Default::default(), errors: vec![] }
54 }
55
56 /// Get the current reading position.
57 ///
58 /// # Returns
59 /// The current byte offset position.
60 pub fn get_position(&self) -> u64 {
61 self.position
62 }
63
64 /// Set the reading position.
65 ///
66 /// Note: This function will return an error if the underlying reader does not support Seek operations.
67 ///
68 /// # Arguments
69 /// * `position` - The new reading position.
70 ///
71 /// # Returns
72 /// The operation result containing the new position.
73 pub fn set_position(&mut self, position: u64) -> Result<u64, GaiaError>
74 where
75 R: Seek,
76 {
77 self.reader.seek(SeekFrom::Start(position))?;
78 self.position = position;
79 Ok(position)
80 }
81 /// Finish reading and return the results with diagnostics.
82 pub fn finish(self) -> GaiaDiagnostics<R> {
83 GaiaDiagnostics { result: Ok(self.reader), diagnostics: self.errors }
84 }
85
86 /// Get and clear the currently accumulated error list.
87 /// This method returns all errors encountered during reading and resets the internal error storage.
88 pub fn take_errors(&mut self) -> Vec<GaiaError> {
89 std::mem::take(&mut self.errors)
90 }
91}
92
93impl<R: ReadBytesExt, E: ByteOrder> BinaryReader<R, E> {
94 /// Read a u8 value.
95 ///
96 /// # Returns
97 /// The read u8 value or an IO error.
98 pub fn read_u8(&mut self) -> std::io::Result<u8> {
99 let value = self.reader.read_u8()?;
100 self.position += 1;
101 Ok(value)
102 }
103
104 /// Read a u16 value.
105 ///
106 /// # Returns
107 /// The read u16 value or an IO error.
108 pub fn read_u16(&mut self) -> std::io::Result<u16> {
109 let value = self.reader.read_u16::<E>()?;
110 self.position += 2;
111 Ok(value)
112 }
113
114 /// Read an i16 value.
115 ///
116 /// # Returns
117 /// The read i16 value or an IO error.
118 pub fn read_i16(&mut self) -> std::io::Result<i16> {
119 let value = self.reader.read_i16::<E>()?;
120 self.position += 2;
121 Ok(value)
122 }
123
124 /// Read a u32 value.
125 ///
126 /// # Returns
127 /// The read u32 value or an IO error.
128 pub fn read_u32(&mut self) -> std::io::Result<u32> {
129 let value = self.reader.read_u32::<E>()?;
130 self.position += 4;
131 Ok(value)
132 }
133
134 /// Read a u64 value.
135 ///
136 /// # Returns
137 /// The read u64 value or an IO error.
138 pub fn read_u64(&mut self) -> std::io::Result<u64> {
139 let value = self.reader.read_u64::<E>()?;
140 self.position += 8;
141 Ok(value)
142 }
143
144 /// Read an i32 value.
145 ///
146 /// # Returns
147 /// The read i32 value or an IO error.
148 pub fn read_i32(&mut self) -> std::io::Result<i32> {
149 let value = self.reader.read_i32::<E>()?;
150 self.position += 4;
151 Ok(value)
152 }
153
154 /// Read an i64 value.
155 ///
156 /// # Returns
157 /// The read i64 value or an IO error.
158 pub fn read_i64(&mut self) -> std::io::Result<i64> {
159 let value = self.reader.read_i64::<E>()?;
160 self.position += 8;
161 Ok(value)
162 }
163
164 /// Read an f32 value.
165 ///
166 /// # Returns
167 /// The read f32 value or an IO error.
168 pub fn read_f32(&mut self) -> std::io::Result<f32> {
169 let value = self.reader.read_f32::<E>()?;
170 self.position += 4;
171 Ok(value)
172 }
173
174 /// Read an f64 value.
175 ///
176 /// # Returns
177 /// The read f64 value or an IO error.
178 pub fn read_f64(&mut self) -> std::io::Result<f64> {
179 let value = self.reader.read_f64::<E>()?;
180 self.position += 8;
181 Ok(value)
182 }
183
184 /// Read a byte array of specified length.
185 ///
186 /// # Arguments
187 /// * `len` - The number of bytes to read.
188 ///
189 /// # Returns
190 /// The read byte array or an IO error.
191 pub fn read_bytes(&mut self, len: usize) -> std::io::Result<Vec<u8>> {
192 let mut buf = vec![0u8; len];
193 self.reader.read_exact(&mut buf)?;
194 self.position += len as u64;
195 Ok(buf)
196 }
197
198 /// Read a fixed-length byte array.
199 ///
200 /// # Returns
201 /// Returns the read byte array or an IO error.
202 pub fn read_array<const N: usize>(&mut self) -> std::io::Result<[u8; N]> {
203 let mut buf = [0u8; N];
204 self.reader.read_exact(&mut buf)?;
205 self.position += N as u64;
206 Ok(buf)
207 }
208
209 /// Skip the specified number of bytes.
210 ///
211 /// Note: This function will return an error if the underlying reader does not support Seek operations.
212 ///
213 /// # Arguments
214 /// * `count` - The number of bytes to skip.
215 ///
216 /// # Returns
217 /// Returns the operation result.
218 pub fn skip(&mut self, count: u64) -> std::io::Result<u64>
219 where
220 R: Seek,
221 {
222 let new_pos = self.reader.seek(SeekFrom::Current(count as i64))?;
223 self.position = new_pos;
224 Ok(new_pos)
225 }
226
227 /// Read a LEB128 encoded unsigned 32-bit integer.
228 ///
229 /// # Returns
230 /// Returns the read u32 value or an IO error.
231 pub fn read_u32_leb128(&mut self) -> std::io::Result<u32> {
232 let mut result = 0u32;
233 let mut shift = 0;
234
235 loop {
236 let byte = self.read_u8()?;
237 result |= ((byte & 0x7F) as u32) << shift;
238
239 if byte & 0x80 == 0 {
240 break;
241 }
242
243 shift += 7;
244 if shift >= 32 {
245 return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "LEB128 value too large for u32"));
246 }
247 }
248
249 Ok(result)
250 }
251
252 /// Read a LEB128 encoded signed 32-bit integer.
253 ///
254 /// # Returns
255 /// Returns the read i32 value or an IO error.
256 pub fn read_i32_leb128(&mut self) -> std::io::Result<i32> {
257 let mut result = 0i32;
258 let mut shift = 0;
259 let mut byte;
260
261 loop {
262 byte = self.read_u8()?;
263 result |= ((byte & 0x7F) as i32) << shift;
264 shift += 7;
265
266 if byte & 0x80 == 0 {
267 break;
268 }
269
270 if shift >= 32 {
271 return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "LEB128 value too large for i32"));
272 }
273 }
274
275 // Sign extension
276 if shift < 32 && (byte & 0x40) != 0 {
277 result |= !0 << shift;
278 }
279
280 Ok(result)
281 }
282
283 /// Read a LEB128 encoded signed 64-bit integer.
284 ///
285 /// # Returns
286 /// Returns the read i64 value or an IO error.
287 pub fn read_i64_leb128(&mut self) -> std::io::Result<i64> {
288 let mut result = 0i64;
289 let mut shift = 0;
290 let mut byte;
291
292 loop {
293 byte = self.read_u8()?;
294 result |= ((byte & 0x7F) as i64) << shift;
295 shift += 7;
296
297 if byte & 0x80 == 0 {
298 break;
299 }
300
301 if shift >= 64 {
302 return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "LEB128 value too large for i64"));
303 }
304 }
305
306 // Sign extension
307 if shift < 64 && (byte & 0x40) != 0 {
308 result |= !0 << shift;
309 }
310
311 Ok(result)
312 }
313}
314
315impl<R, E> BinaryReader<R, E> {
316 /// Calculate the byte length of a LEB128 encoded value (static method).
317 ///
318 /// # Arguments
319 /// * `value` - The value to calculate the length for.
320 ///
321 /// # Returns
322 /// Returns the encoded byte length.
323 pub fn leb128_size(mut value: u32) -> u32 {
324 let mut size = 0;
325 loop {
326 value >>= 7;
327 size += 1;
328 if value == 0 {
329 break;
330 }
331 }
332 size
333 }
334}
335
336/// Source code position information, representing the position of code in a source file.
337///
338/// This struct is used to track source code position information, including line number, column number, etc.
339#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
340pub struct SourcePosition {
341 /// Line number, starting from 1.
342 ///
343 /// Represents the line number of the current position, the first line is 1.
344 pub line: u32,
345 /// Column number, starting from 1.
346 ///
347 /// Represents the column number of the current position, the first column is 1.
348 pub column: u32,
349 /// Byte offset, starting from 0.
350 ///
351 /// Represents the byte offset from the start of the file to the current position.
352 pub offset: usize,
353 /// Length, representing the number of bytes covered by this position.
354 ///
355 /// Usually used to represent the length of a token or symbol.
356 pub length: usize,
357}
358
359/// Source location, including file URL and position information.
360///
361/// This struct extends SourcePosition by adding file URL information,
362/// allowing it to represent the position of code in a specific file.
363#[derive(Clone, Debug, Serialize, Deserialize)]
364pub struct SourceLocation {
365 /// Line number, starting from 1.
366 ///
367 /// Represents the line number of the current position.
368 pub line: u32,
369 /// Column number, starting from 1.
370 ///
371 /// Represents the column number of the current position.
372 pub column: u32,
373 /// The URL of the source file, optional.
374 ///
375 /// If present, represents the URL or path of the file containing the code.
376 /// Can be a filesystem path or a network URL.
377 pub url: Option<Url>,
378}
379
380impl Default for SourceLocation {
381 fn default() -> Self {
382 Self { line: 1, column: 1, url: None }
383 }
384}