Skip to main content

gaia_types/reader/
mod.rs

1#![doc = include_str!("readme.md")]
2
3pub use self::{token::Token, token_stream::TokenStream};
4use crate::{GaiaDiagnostics, GaiaError};
5use byteorder::{ByteOrder, ReadBytesExt};
6use serde::{Deserialize, Serialize};
7use std::{
8    io::{Read, Seek, SeekFrom},
9    marker::PhantomData,
10};
11use url::Url;
12
13mod token;
14mod token_stream;
15
16/// Binary reader for reading data from types that implement ReadBytesExt.
17///
18/// This is a generic struct that can wrap any type implementing ReadBytesExt,
19/// providing binary data reading functionality.
20#[derive(Debug)]
21pub struct BinaryReader<R, E> {
22    reader: R,
23    position: u64,
24    endian: PhantomData<E>,
25    errors: Vec<GaiaError>,
26}
27
28impl<R: Read, E> Read for BinaryReader<R, E> {
29    fn read(&mut self, buffer: &mut [u8]) -> std::io::Result<usize> {
30        let bytes_read = self.reader.read(buffer)?;
31        self.position += bytes_read as u64;
32        Ok(bytes_read)
33    }
34}
35
36impl<R: Seek, E> Seek for BinaryReader<R, E> {
37    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
38        let new_position = self.reader.seek(pos)?;
39        self.position = new_position;
40        Ok(new_position)
41    }
42}
43
44impl<R, E> BinaryReader<R, E> {
45    /// Create a new binary reader.
46    ///
47    /// # Arguments
48    /// * `reader` - The data source to read from.
49    ///
50    /// # Returns
51    /// A new BinaryReader instance.
52    pub fn new(reader: R) -> Self {
53        Self { reader, position: 0, endian: Default::default(), errors: vec![] }
54    }
55
56    /// Get the current reading position.
57    ///
58    /// # Returns
59    /// The current byte offset position.
60    pub fn get_position(&self) -> u64 {
61        self.position
62    }
63
64    /// Set the reading position.
65    ///
66    /// Note: This function will return an error if the underlying reader does not support Seek operations.
67    ///
68    /// # Arguments
69    /// * `position` - The new reading position.
70    ///
71    /// # Returns
72    /// The operation result containing the new position.
73    pub fn set_position(&mut self, position: u64) -> Result<u64, GaiaError>
74    where
75        R: Seek,
76    {
77        self.reader.seek(SeekFrom::Start(position))?;
78        self.position = position;
79        Ok(position)
80    }
81    /// Finish reading and return the results with diagnostics.
82    pub fn finish(self) -> GaiaDiagnostics<R> {
83        GaiaDiagnostics { result: Ok(self.reader), diagnostics: self.errors }
84    }
85
86    /// Get and clear the currently accumulated error list.
87    /// This method returns all errors encountered during reading and resets the internal error storage.
88    pub fn take_errors(&mut self) -> Vec<GaiaError> {
89        std::mem::take(&mut self.errors)
90    }
91}
92
93impl<R: ReadBytesExt, E: ByteOrder> BinaryReader<R, E> {
94    /// Read a u8 value.
95    ///
96    /// # Returns
97    /// The read u8 value or an IO error.
98    pub fn read_u8(&mut self) -> std::io::Result<u8> {
99        let value = self.reader.read_u8()?;
100        self.position += 1;
101        Ok(value)
102    }
103
104    /// Read a u16 value.
105    ///
106    /// # Returns
107    /// The read u16 value or an IO error.
108    pub fn read_u16(&mut self) -> std::io::Result<u16> {
109        let value = self.reader.read_u16::<E>()?;
110        self.position += 2;
111        Ok(value)
112    }
113
114    /// Read an i16 value.
115    ///
116    /// # Returns
117    /// The read i16 value or an IO error.
118    pub fn read_i16(&mut self) -> std::io::Result<i16> {
119        let value = self.reader.read_i16::<E>()?;
120        self.position += 2;
121        Ok(value)
122    }
123
124    /// Read a u32 value.
125    ///
126    /// # Returns
127    /// The read u32 value or an IO error.
128    pub fn read_u32(&mut self) -> std::io::Result<u32> {
129        let value = self.reader.read_u32::<E>()?;
130        self.position += 4;
131        Ok(value)
132    }
133
134    /// Read a u64 value.
135    ///
136    /// # Returns
137    /// The read u64 value or an IO error.
138    pub fn read_u64(&mut self) -> std::io::Result<u64> {
139        let value = self.reader.read_u64::<E>()?;
140        self.position += 8;
141        Ok(value)
142    }
143
144    /// Read an i32 value.
145    ///
146    /// # Returns
147    /// The read i32 value or an IO error.
148    pub fn read_i32(&mut self) -> std::io::Result<i32> {
149        let value = self.reader.read_i32::<E>()?;
150        self.position += 4;
151        Ok(value)
152    }
153
154    /// Read an i64 value.
155    ///
156    /// # Returns
157    /// The read i64 value or an IO error.
158    pub fn read_i64(&mut self) -> std::io::Result<i64> {
159        let value = self.reader.read_i64::<E>()?;
160        self.position += 8;
161        Ok(value)
162    }
163
164    /// Read an f32 value.
165    ///
166    /// # Returns
167    /// The read f32 value or an IO error.
168    pub fn read_f32(&mut self) -> std::io::Result<f32> {
169        let value = self.reader.read_f32::<E>()?;
170        self.position += 4;
171        Ok(value)
172    }
173
174    /// Read an f64 value.
175    ///
176    /// # Returns
177    /// The read f64 value or an IO error.
178    pub fn read_f64(&mut self) -> std::io::Result<f64> {
179        let value = self.reader.read_f64::<E>()?;
180        self.position += 8;
181        Ok(value)
182    }
183
184    /// Read a byte array of specified length.
185    ///
186    /// # Arguments
187    /// * `len` - The number of bytes to read.
188    ///
189    /// # Returns
190    /// The read byte array or an IO error.
191    pub fn read_bytes(&mut self, len: usize) -> std::io::Result<Vec<u8>> {
192        let mut buf = vec![0u8; len];
193        self.reader.read_exact(&mut buf)?;
194        self.position += len as u64;
195        Ok(buf)
196    }
197
198    /// Read a fixed-length byte array.
199    ///
200    /// # Returns
201    /// Returns the read byte array or an IO error.
202    pub fn read_array<const N: usize>(&mut self) -> std::io::Result<[u8; N]> {
203        let mut buf = [0u8; N];
204        self.reader.read_exact(&mut buf)?;
205        self.position += N as u64;
206        Ok(buf)
207    }
208
209    /// Skip the specified number of bytes.
210    ///
211    /// Note: This function will return an error if the underlying reader does not support Seek operations.
212    ///
213    /// # Arguments
214    /// * `count` - The number of bytes to skip.
215    ///
216    /// # Returns
217    /// Returns the operation result.
218    pub fn skip(&mut self, count: u64) -> std::io::Result<u64>
219    where
220        R: Seek,
221    {
222        let new_pos = self.reader.seek(SeekFrom::Current(count as i64))?;
223        self.position = new_pos;
224        Ok(new_pos)
225    }
226
227    /// Read a LEB128 encoded unsigned 32-bit integer.
228    ///
229    /// # Returns
230    /// Returns the read u32 value or an IO error.
231    pub fn read_u32_leb128(&mut self) -> std::io::Result<u32> {
232        let mut result = 0u32;
233        let mut shift = 0;
234
235        loop {
236            let byte = self.read_u8()?;
237            result |= ((byte & 0x7F) as u32) << shift;
238
239            if byte & 0x80 == 0 {
240                break;
241            }
242
243            shift += 7;
244            if shift >= 32 {
245                return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "LEB128 value too large for u32"));
246            }
247        }
248
249        Ok(result)
250    }
251
252    /// Read a LEB128 encoded signed 32-bit integer.
253    ///
254    /// # Returns
255    /// Returns the read i32 value or an IO error.
256    pub fn read_i32_leb128(&mut self) -> std::io::Result<i32> {
257        let mut result = 0i32;
258        let mut shift = 0;
259        let mut byte;
260
261        loop {
262            byte = self.read_u8()?;
263            result |= ((byte & 0x7F) as i32) << shift;
264            shift += 7;
265
266            if byte & 0x80 == 0 {
267                break;
268            }
269
270            if shift >= 32 {
271                return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "LEB128 value too large for i32"));
272            }
273        }
274
275        // Sign extension
276        if shift < 32 && (byte & 0x40) != 0 {
277            result |= !0 << shift;
278        }
279
280        Ok(result)
281    }
282
283    /// Read a LEB128 encoded signed 64-bit integer.
284    ///
285    /// # Returns
286    /// Returns the read i64 value or an IO error.
287    pub fn read_i64_leb128(&mut self) -> std::io::Result<i64> {
288        let mut result = 0i64;
289        let mut shift = 0;
290        let mut byte;
291
292        loop {
293            byte = self.read_u8()?;
294            result |= ((byte & 0x7F) as i64) << shift;
295            shift += 7;
296
297            if byte & 0x80 == 0 {
298                break;
299            }
300
301            if shift >= 64 {
302                return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "LEB128 value too large for i64"));
303            }
304        }
305
306        // Sign extension
307        if shift < 64 && (byte & 0x40) != 0 {
308            result |= !0 << shift;
309        }
310
311        Ok(result)
312    }
313}
314
315impl<R, E> BinaryReader<R, E> {
316    /// Calculate the byte length of a LEB128 encoded value (static method).
317    ///
318    /// # Arguments
319    /// * `value` - The value to calculate the length for.
320    ///
321    /// # Returns
322    /// Returns the encoded byte length.
323    pub fn leb128_size(mut value: u32) -> u32 {
324        let mut size = 0;
325        loop {
326            value >>= 7;
327            size += 1;
328            if value == 0 {
329                break;
330            }
331        }
332        size
333    }
334}
335
336/// Source code position information, representing the position of code in a source file.
337///
338/// This struct is used to track source code position information, including line number, column number, etc.
339#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
340pub struct SourcePosition {
341    /// Line number, starting from 1.
342    ///
343    /// Represents the line number of the current position, the first line is 1.
344    pub line: u32,
345    /// Column number, starting from 1.
346    ///
347    /// Represents the column number of the current position, the first column is 1.
348    pub column: u32,
349    /// Byte offset, starting from 0.
350    ///
351    /// Represents the byte offset from the start of the file to the current position.
352    pub offset: usize,
353    /// Length, representing the number of bytes covered by this position.
354    ///
355    /// Usually used to represent the length of a token or symbol.
356    pub length: usize,
357}
358
359/// Source location, including file URL and position information.
360///
361/// This struct extends SourcePosition by adding file URL information,
362/// allowing it to represent the position of code in a specific file.
363#[derive(Clone, Debug, Serialize, Deserialize)]
364pub struct SourceLocation {
365    /// Line number, starting from 1.
366    ///
367    /// Represents the line number of the current position.
368    pub line: u32,
369    /// Column number, starting from 1.
370    ///
371    /// Represents the column number of the current position.
372    pub column: u32,
373    /// The URL of the source file, optional.
374    ///
375    /// If present, represents the URL or path of the file containing the code.
376    /// Can be a filesystem path or a network URL.
377    pub url: Option<Url>,
378}
379
380impl Default for SourceLocation {
381    fn default() -> Self {
382        Self { line: 1, column: 1, url: None }
383    }
384}