orthrus_core/
data.rs

1//! Endian-aware manipulation for data streams.
2//!
3//! This crate contains several types that allow you to read and write data with a specific endianness.
4//! * [`DataCursor`] is for data where it owns the byte slice directly, such as in-memory files.
5//! * [`DataCursorRef`] is for borrowed data and allows for reading.
6//! * [`DataCursorMut`] is for borrowed mutable data and allows both reading and writing.
7//! * [`DataStream`] allows for any stream that supports [`Read`]/[`Write`]/[`Seek`].
8//!
9//! Additionally, this provides several traits to allow for a more modular integration.
10//! * [`IntoDataStream`] allows you to convert into the above types in a generic way.
11//! * [`ReadExt`] provides for endian-aware reading.
12//! * [`WriteExt`] provides for endian-aware writing.
13//! * [`SeekExt`] provides for optional seeking, if `ReadExt` and `WriteExt` are not enough.
14
15use core::mem::MaybeUninit;
16use core::ops::{Deref, DerefMut};
17use std::fs::File;
18use std::io::{BufReader, Cursor, Empty};
19use std::sync::Arc;
20
21use snafu::prelude::*;
22
23#[cfg(feature = "alloc")]
24extern crate alloc;
25#[cfg(feature = "alloc")]
26use alloc::borrow::Cow;
27#[cfg(feature = "std")]
28use std::{
29    io::{ErrorKind, Read, Seek, SeekFrom, Write},
30    path::Path,
31};
32
33#[derive(Debug, Snafu)]
34pub enum Utf8ErrorSource {
35    #[snafu(display("Invalid UTF-8 sequence"))]
36    Slice { source: core::str::Utf8Error },
37    #[snafu(display("Invalid UTF-8 sequence"))]
38    String { source: alloc::string::FromUtf8Error },
39}
40
41/// Error conditions for when reading/writing data.
42#[derive(Debug, Snafu)]
43#[non_exhaustive]
44pub enum DataError {
45    /// Thrown if reading/writing tries to go out of bounds.
46    #[snafu(display("Tried to read out-of-bounds"))]
47    EndOfFile,
48
49    /// Thrown if UTF-8 validation fails when trying to convert a string.
50    #[snafu(display("{source}"))]
51    InvalidString { source: Utf8ErrorSource },
52
53    /// Thrown when an I/O operation fails on a [`DataStream`].
54    #[cfg(feature = "std")]
55    #[snafu(display("I/O error: {source}"))]
56    Io { source: std::io::Error },
57}
58
59impl From<core::str::Utf8Error> for DataError {
60    #[inline]
61    fn from(source: core::str::Utf8Error) -> Self {
62        DataError::InvalidString { source: Utf8ErrorSource::Slice { source } }
63    }
64}
65
66impl From<alloc::string::FromUtf8Error> for DataError {
67    #[inline]
68    fn from(source: alloc::string::FromUtf8Error) -> Self {
69        DataError::InvalidString { source: Utf8ErrorSource::String { source } }
70    }
71}
72
73/// Represents the endianness of the data being read or written.
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75pub enum Endian {
76    Little,
77    Big,
78}
79
80impl Default for Endian {
81    #[inline]
82    fn default() -> Self {
83        #[cfg(target_endian = "little")]
84        {
85            Self::Little
86        }
87        #[cfg(target_endian = "big")]
88        {
89            Self::Big
90        }
91    }
92}
93
94/// Trait for types that support endian-aware operations.
95pub trait EndianExt {
96    /// Returns the current endianness.
97    fn endian(&self) -> Endian;
98
99    /// Sets the endianness.
100    fn set_endian(&mut self, endian: Endian);
101}
102
103/// Trait for types that support seeking operations.
104pub trait SeekExt {
105    /// Returns the current position.
106    fn position(&mut self) -> Result<u64, DataError>;
107
108    /// Sets the current position.
109    ///
110    /// # Errors
111    /// Returns an error if the position cannot be set.
112    fn set_position(&mut self, position: u64) -> Result<u64, DataError>;
113
114    /// Returns the total length of the data.
115    ///
116    /// # Errors
117    /// Returns an error if unable to determine the length of the stream.
118    fn len(&mut self) -> Result<u64, DataError>;
119
120    /// Returns `true` if the remaining data is empty.
121    ///
122    /// # Errors
123    /// Returns an error if unable to determine either the length of the stream or the position inside it.
124    fn is_empty(&mut self) -> Result<bool, DataError>;
125}
126
127/// Trait for types that support reading operations.
128pub trait ReadExt: EndianExt {
129    /// Reads exactly N bytes from the current stream.
130    ///
131    /// # Errors
132    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
133    fn read_exact<const N: usize>(&mut self) -> Result<[u8; N], DataError>;
134
135    /// Attempts to fill the buffer with data.
136    ///
137    /// # Errors
138    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
139    fn read_length(&mut self, buffer: &mut [u8]) -> Result<usize, DataError>;
140
141    /// Reads a slice of the given length from the current position.
142    ///
143    /// # Errors
144    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
145    #[cfg(not(feature = "alloc"))]
146    fn read_slice(&mut self, length: usize) -> Result<&[u8], DataError>;
147
148    /// Reads a slice of the given length from the current position.
149    ///
150    /// # Errors
151    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
152    #[cfg(feature = "alloc")]
153    fn read_slice(&mut self, length: usize) -> Result<Cow<[u8]>, DataError>;
154
155    /// Reads a UTF-8 encoded string of the given length from the current position.
156    ///
157    /// # Errors
158    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
159    /// Returns [`InvalidStr`](Error::InvalidStr) if the bytes are not valid UTF-8.
160    #[inline]
161    #[cfg(not(feature = "alloc"))]
162    fn read_string(&mut self, length: usize) -> Result<&str, DataError> {
163        let slice = self.read_slice(length)?;
164        core::str::from_utf8(slice).context(InvalidStrSnafu)
165    }
166
167    /// Returns the remaining data from the current position.
168    ///
169    /// # Errors
170    /// Returns an error if the remaining data cannot be read.
171    #[cfg(not(feature = "alloc"))]
172    fn remaining_slice(&mut self) -> Result<&[u8], DataError>;
173
174    /// Returns the remaining data from the current position.
175    ///
176    /// # Errors
177    /// Returns an error if the remaining data cannot be read.
178    #[cfg(feature = "alloc")]
179    fn remaining_slice(&mut self) -> Result<Cow<[u8]>, DataError>;
180
181    /// Reads a UTF-8 encoded string of the given length from the current position.
182    ///
183    /// # Errors
184    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
185    /// Returns [`InvalidStr`](Error::InvalidStr) if the bytes are not valid UTF-8.
186    #[inline]
187    #[cfg(feature = "alloc")]
188    fn read_string(&mut self, length: usize) -> Result<Cow<str>, DataError> {
189        let slice = self.read_slice(length)?;
190        match slice {
191            Cow::Borrowed(bytes) => Ok(Cow::Borrowed(core::str::from_utf8(bytes)?)),
192            Cow::Owned(bytes) => Ok(Cow::Owned(String::from_utf8(bytes)?)),
193        }
194    }
195
196    /// Reads an unsigned 8-bit integer.
197    ///
198    /// # Errors
199    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
200    #[inline]
201    fn read_u8(&mut self) -> Result<u8, DataError> {
202        Ok(self.read_exact::<1>()?[0])
203    }
204
205    /// Reads a signed 8-bit integer.
206    ///
207    /// # Errors
208    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
209    #[inline]
210    fn read_i8(&mut self) -> Result<i8, DataError> {
211        Ok(self.read_u8()? as i8)
212    }
213
214    /// Reads an unsigned 16-bit integer.
215    ///
216    /// # Errors
217    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
218    #[inline]
219    fn read_u16(&mut self) -> Result<u16, DataError> {
220        let bytes = self.read_exact()?;
221        Ok(match self.endian() {
222            Endian::Little => u16::from_le_bytes(bytes),
223            Endian::Big => u16::from_be_bytes(bytes),
224        })
225    }
226
227    /// Reads a signed 16-bit integer.
228    ///
229    /// # Errors
230    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
231    #[inline]
232    fn read_i16(&mut self) -> Result<i16, DataError> {
233        Ok(self.read_u16()? as i16)
234    }
235
236    /// Reads an unsigned 32-bit integer.
237    ///
238    /// # Errors
239    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
240    #[inline]
241    fn read_u32(&mut self) -> Result<u32, DataError> {
242        let bytes = self.read_exact()?;
243        Ok(match self.endian() {
244            Endian::Little => u32::from_le_bytes(bytes),
245            Endian::Big => u32::from_be_bytes(bytes),
246        })
247    }
248
249    /// Reads a signed 32-bit integer.
250    ///
251    /// # Errors
252    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
253    #[inline]
254    fn read_i32(&mut self) -> Result<i32, DataError> {
255        Ok(self.read_u32()? as i32)
256    }
257
258    /// Reads an unsigned 64-bit integer.
259    ///
260    /// # Errors
261    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
262    #[inline]
263    fn read_u64(&mut self) -> Result<u64, DataError> {
264        let bytes = self.read_exact()?;
265        Ok(match self.endian() {
266            Endian::Little => u64::from_le_bytes(bytes),
267            Endian::Big => u64::from_be_bytes(bytes),
268        })
269    }
270
271    /// Reads a signed 64-bit integer.
272    ///
273    /// # Errors
274    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
275    #[inline]
276    fn read_i64(&mut self) -> Result<i64, DataError> {
277        Ok(self.read_u64()? as i64)
278    }
279
280    /// Reads a 32-bit floating point number.
281    ///
282    /// # Errors
283    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
284    #[inline]
285    fn read_f32(&mut self) -> Result<f32, DataError> {
286        let bytes = self.read_exact()?;
287        Ok(match self.endian() {
288            Endian::Little => f32::from_le_bytes(bytes),
289            Endian::Big => f32::from_be_bytes(bytes),
290        })
291    }
292
293    /// Reads a 64-bit floating point number.
294    ///
295    /// # Errors
296    /// Returns [`EndOfFile`](Error::EndOfFile) if trying to read out of bounds.
297    #[inline]
298    fn read_f64(&mut self) -> Result<f64, DataError> {
299        let bytes = self.read_exact()?;
300        Ok(match self.endian() {
301            Endian::Little => f64::from_le_bytes(bytes),
302            Endian::Big => f64::from_be_bytes(bytes),
303        })
304    }
305}
306
307/// Trait for types that support writing operations.
308pub trait WriteExt: EndianExt {
309    /// Writes exactly N bytes to the current stream.
310    ///
311    /// # Errors
312    /// Returns an error if the write operation fails.
313    fn write_exact<const N: usize>(&mut self, bytes: &[u8; N]) -> Result<(), DataError>;
314
315    /// Writes an unsigned 8-bit integer.
316    ///
317    /// # Errors
318    /// Returns an error if the write operation fails.
319    #[inline]
320    fn write_u8(&mut self, value: u8) -> Result<(), DataError> {
321        self.write_exact(&[value])
322    }
323
324    /// Writes a signed 8-bit integer.
325    ///
326    /// # Errors
327    /// Returns an error if the write operation fails.
328    #[inline]
329    fn write_i8(&mut self, value: i8) -> Result<(), DataError> {
330        self.write_u8(value as u8)
331    }
332
333    /// Writes an unsigned 16-bit integer.
334    ///
335    /// # Errors
336    /// Returns an error if the write operation fails.
337    #[inline]
338    fn write_u16(&mut self, value: u16) -> Result<(), DataError> {
339        let bytes = match self.endian() {
340            Endian::Little => value.to_le_bytes(),
341            Endian::Big => value.to_be_bytes(),
342        };
343        self.write_exact(&bytes)
344    }
345
346    /// Writes a signed 16-bit integer.
347    ///
348    /// # Errors
349    /// Returns an error if the write operation fails.
350    #[inline]
351    fn write_i16(&mut self, value: i16) -> Result<(), DataError> {
352        self.write_u16(value as u16)
353    }
354
355    /// Writes an unsigned 32-bit integer.
356    ///
357    /// # Errors
358    /// Returns an error if the write operation fails.
359    #[inline]
360    fn write_u32(&mut self, value: u32) -> Result<(), DataError> {
361        let bytes = match self.endian() {
362            Endian::Little => value.to_le_bytes(),
363            Endian::Big => value.to_be_bytes(),
364        };
365        self.write_exact(&bytes)
366    }
367
368    /// Writes a signed 32-bit integer.
369    ///
370    /// # Errors
371    /// Returns an error if the write operation fails.
372    #[inline]
373    fn write_i32(&mut self, value: i32) -> Result<(), DataError> {
374        self.write_u32(value as u32)
375    }
376
377    /// Writes an unsigned 64-bit integer.
378    ///
379    /// # Errors
380    /// Returns an error if the write operation fails.
381    #[inline]
382    fn write_u64(&mut self, value: u64) -> Result<(), DataError> {
383        let bytes = match self.endian() {
384            Endian::Little => value.to_le_bytes(),
385            Endian::Big => value.to_be_bytes(),
386        };
387        self.write_exact(&bytes)
388    }
389
390    /// Writes a signed 64-bit integer.
391    ///
392    /// # Errors
393    /// Returns an error if the write operation fails.
394    #[inline]
395    fn write_i64(&mut self, value: i64) -> Result<(), DataError> {
396        self.write_u64(value as u64)
397    }
398
399    /// Writes a 32-bit floating point number.
400    ///
401    /// # Errors
402    /// Returns an error if the write operation fails.
403    #[inline]
404    fn write_f32(&mut self, value: f32) -> Result<(), DataError> {
405        let bytes = match self.endian() {
406            Endian::Little => value.to_le_bytes(),
407            Endian::Big => value.to_be_bytes(),
408        };
409        self.write_exact(&bytes)
410    }
411
412    /// Writes a 64-bit floating point number.
413    ///
414    /// # Errors
415    /// Returns an error if the write operation fails.
416    #[inline]
417    fn write_f64(&mut self, value: f64) -> Result<(), DataError> {
418        let bytes = match self.endian() {
419            Endian::Little => value.to_le_bytes(),
420            Endian::Big => value.to_be_bytes(),
421        };
422        self.write_exact(&bytes)
423    }
424}
425
426/// An owned, in-memory file that allows endian-aware read and write.
427///
428/// This is architected to assume a fixed length, and is `no_std` compatible.
429#[derive(Debug)]
430pub struct DataCursor {
431    data: Box<[u8]>,
432    position: usize,
433    endian: Endian,
434}
435
436impl DataCursor {
437    /// Creates a new `DataCursor` with the given data and endianness.
438    #[inline]
439    pub fn new<I: Into<Box<[u8]>>>(data: I, endian: Endian) -> Self {
440        Self { data: data.into(), position: 0, endian }
441    }
442
443    /// Creates a new `DataCursor` with the given path and endianness.
444    ///
445    /// # Errors
446    /// Returns an error if the file does not exist or is unable to be opened.
447    #[cfg(feature = "std")]
448    #[inline]
449    pub fn from_path<P: AsRef<Path>>(path: P, endian: Endian) -> std::io::Result<Self> {
450        Ok(Self::new(std::fs::read(path)?, endian))
451    }
452
453    /// Consumes the `DataCursor` and returns the underlying data.
454    #[inline]
455    #[must_use]
456    pub fn into_inner(self) -> Box<[u8]> {
457        self.data
458    }
459
460    /// Shrinks the underlying data to the new length and returns the modified `DataCursor`.
461    #[inline]
462    #[must_use]
463    pub fn shrink_to(mut self, new_len: usize) -> Self {
464        // If the user tries to expand, just keep the current length.
465        if new_len < self.data.len() {
466            // Otherwise, modify the current buffer to drop all data past the desired length.
467            self.data = self.data[..new_len].into();
468            // Make sure our new position is within the bounds!
469            if self.position > new_len {
470                self.position = new_len;
471            }
472        }
473        self
474    }
475
476    /// Copies data from this `DataCursor` to another mutable slice.
477    #[inline]
478    pub fn copy_data_to(&self, other: &mut [u8]) {
479        let len = self.data.len().min(other.len());
480        // SAFETY: We have a valid length, other cannot overlap self since there's no way to acquire a mutable
481        // reference, and we will always have a valid alignment.
482        unsafe {
483            core::ptr::copy_nonoverlapping(self.data.as_ptr(), other.as_mut_ptr(), len);
484        }
485    }
486
487    /// Copies data within the `DataCursor` from one range to another position.
488    ///
489    /// Due to the way that Yaz0 and Yay0 compression work, if this function is used to copy overlapping
490    /// sections, the initial value will repeat itself. If you don't need this behavior, consider using a more
491    /// normal memcpy.
492    ///
493    /// # Example
494    /// ```
495    /// # use orthrus_core::prelude::*;
496    /// let mut cursor = DataCursor::new(vec![1, 2, 3, 4, 5].into_boxed_slice(), Endian::Little);
497    /// cursor.copy_within(1..4, 2).unwrap();
498    /// assert_eq!(&cursor.into_inner()[..], &[1, 2, 2, 2, 2]);
499    /// ```
500    ///
501    /// # Errors
502    /// Returns [`EndOfFile`](Error::EndOfFile) if either the source range or the destination range would be
503    /// out of bounds.
504    #[inline]
505    pub fn copy_within(&mut self, src: core::ops::Range<usize>, dest: usize) -> Result<(), DataError> {
506        let length = src.end.saturating_sub(src.start);
507        ensure!(
508            src.end <= self.data.len() && dest.saturating_add(length) <= self.data.len(),
509            EndOfFileSnafu
510        );
511
512        if src.contains(&dest) {
513            for i in 0..length {
514                // SAFETY: We want specific behavior if the ranges overlap, due to how Yaz0 compression works.
515                // Both ranges are within bounds and have a valid alignment.
516                unsafe {
517                    *self.data.as_mut_ptr().add(dest.saturating_add(i)) =
518                        *self.data.as_ptr().add(src.start.saturating_add(i));
519                }
520            }
521        } else {
522            // SAFETY: Both ranges are within bounds, do not overlap, and have a valid alignment.
523            unsafe {
524                core::ptr::copy_nonoverlapping(
525                    self.data.as_ptr().add(src.start),
526                    self.data.as_mut_ptr().add(dest),
527                    length,
528                );
529            }
530        }
531        Ok(())
532    }
533}
534
535impl EndianExt for DataCursor {
536    #[inline]
537    fn endian(&self) -> Endian {
538        self.endian
539    }
540
541    #[inline]
542    fn set_endian(&mut self, endian: Endian) {
543        self.endian = endian;
544    }
545}
546
547impl SeekExt for DataCursor {
548    #[inline]
549    fn position(&mut self) -> Result<u64, DataError> {
550        Ok(self.position as u64)
551    }
552
553    #[inline]
554    fn set_position(&mut self, position: u64) -> Result<u64, DataError> {
555        let pos = core::cmp::min(position, self.data.len() as u64);
556        self.position = pos as usize;
557        Ok(pos)
558    }
559
560    #[inline]
561    fn len(&mut self) -> Result<u64, DataError> {
562        Ok(self.data.len() as u64)
563    }
564
565    #[inline]
566    fn is_empty(&mut self) -> Result<bool, DataError> {
567        Ok(self.len()? - self.position()? == 0)
568    }
569}
570
571impl ReadExt for DataCursor {
572    #[inline]
573    fn read_exact<const N: usize>(&mut self) -> Result<[u8; N], DataError> {
574        ensure!(self.position.saturating_add(N) <= self.data.len(), EndOfFileSnafu);
575
576        let mut result: MaybeUninit<[u8; N]> = MaybeUninit::uninit();
577        // SAFETY: We're within bounds of `self.data` and will always have a valid alignment. We use
578        // MaybeUninit here to skip some overhead when we immediately overwrite it with new data.
579        unsafe {
580            core::ptr::copy_nonoverlapping(
581                self.data.as_ptr().add(self.position),
582                result.as_mut_ptr().cast(),
583                N,
584            );
585        }
586        self.position = self.position.saturating_add(N);
587        // SAFETY: We've initialized this data, so this is safe.
588        Ok(unsafe { result.assume_init() })
589    }
590
591    #[inline]
592    fn read_length(&mut self, buffer: &mut [u8]) -> Result<usize, DataError> {
593        let length = buffer.len().min(self.data.len().saturating_sub(self.position));
594
595        // SAFETY: We're within the bounds of both `buf` and `self.data`, and will always have a valid
596        // alignment. There is no way to get a mutable reference to the inner data, so buffer cannot overlap.
597        unsafe {
598            let src_ptr = self.data.as_ptr().add(self.position);
599            core::ptr::copy_nonoverlapping(src_ptr, buffer.as_mut_ptr(), length);
600        }
601        self.position = self.position.saturating_add(length);
602        Ok(length)
603    }
604
605    #[inline]
606    #[cfg(not(feature = "alloc"))]
607    fn read_slice(&mut self, length: usize) -> Result<&[u8], DataError> {
608        ensure!(
609            self.position.saturating_add(length) <= self.data.len(),
610            EndOfFileSnafu
611        );
612
613        // SAFETY: We're within bounds of `self.data` and will always have a valid alignment.
614        let result = unsafe {
615            let ptr = self.data.as_ptr().add(self.position);
616            core::slice::from_raw_parts(ptr, length)
617        };
618        self.position += length;
619        Ok(result)
620    }
621
622    #[inline]
623    #[cfg(feature = "alloc")]
624    fn read_slice(&mut self, length: usize) -> Result<Cow<[u8]>, DataError> {
625        ensure!(
626            self.position.saturating_add(length) <= self.data.len(),
627            EndOfFileSnafu
628        );
629
630        // SAFETY: We're within bounds of `self.data` and will always have a valid alignment.
631        let result = unsafe {
632            let ptr = self.data.as_ptr().add(self.position);
633            core::slice::from_raw_parts(ptr, length)
634        };
635        self.position = self.position.saturating_add(length);
636        Ok(Cow::Borrowed(result))
637    }
638
639    #[inline]
640    #[cfg(not(feature = "alloc"))]
641    fn remaining_slice(&mut self) -> Result<&[u8], DataError> {
642        // SAFETY: We're within bounds since we're reading to the end, and will always have a valid alignment.
643        let result = unsafe {
644            let ptr = self.data.as_ptr().add(self.position);
645            core::slice::from_raw_parts(ptr, self.data.len().saturating_sub(self.position))
646        };
647        self.position = self.data.len();
648        Ok(result)
649    }
650
651    #[inline]
652    #[cfg(feature = "alloc")]
653    fn remaining_slice(&mut self) -> Result<Cow<[u8]>, DataError> {
654        // SAFETY: We're within bounds since we're reading to the end, and will always have a valid alignment.
655        let result = unsafe {
656            let ptr = self.data.as_ptr().add(self.position);
657            core::slice::from_raw_parts(ptr, self.data.len().saturating_sub(self.position))
658        };
659        self.position = self.data.len();
660        Ok(Cow::Borrowed(result))
661    }
662}
663
664impl WriteExt for DataCursor {
665    #[inline]
666    fn write_exact<const N: usize>(&mut self, bytes: &[u8; N]) -> Result<(), DataError> {
667        ensure!(self.position.saturating_add(N) <= self.data.len(), EndOfFileSnafu);
668
669        // SAFETY: We're within the bounds of `self.data`, `bytes` will always be valid, and we'll always have
670        // a valid alignment.
671        unsafe {
672            let dst_ptr = self.data.as_mut_ptr().add(self.position);
673            core::ptr::copy_nonoverlapping(bytes.as_ptr(), dst_ptr, N);
674        }
675        self.position = self.position.saturating_add(N);
676        Ok(())
677    }
678}
679
680impl From<Box<[u8]>> for DataCursor {
681    #[inline]
682    fn from(value: Box<[u8]>) -> Self {
683        Self { data: value, position: 0, endian: Endian::default() }
684    }
685}
686
687#[cfg(feature = "std")]
688impl From<Vec<u8>> for DataCursor {
689    #[inline]
690    fn from(value: Vec<u8>) -> Self {
691        Self { data: value.into_boxed_slice(), position: 0, endian: Endian::default() }
692    }
693}
694
695impl Deref for DataCursor {
696    type Target = [u8];
697
698    #[inline]
699    fn deref(&self) -> &Self::Target {
700        &self.data
701    }
702}
703
704impl DerefMut for DataCursor {
705    #[inline]
706    fn deref_mut(&mut self) -> &mut Self::Target {
707        &mut self.data
708    }
709}
710
711impl AsMut<[u8]> for DataCursor {
712    #[inline]
713    fn as_mut(&mut self) -> &mut [u8] {
714        &mut self.data
715    }
716}
717
718/// A borrowed, in-memory file that allows endian-aware read.
719///
720/// This is architected to assume a fixed length, and is `no_std` compatible.
721#[derive(Debug)]
722pub struct DataCursorRef<'a> {
723    data: &'a [u8],
724    position: usize,
725    endian: Endian,
726}
727
728impl<'a> DataCursorRef<'a> {
729    /// Creates a new `DataCursorRef` with the given data and endianness.
730    #[inline]
731    #[must_use]
732    pub const fn new(data: &'a [u8], endian: Endian) -> Self {
733        Self { data, position: 0, endian }
734    }
735
736    /// Consumes the `DataCursorRef` and returns the underlying data.
737    #[inline]
738    #[must_use]
739    pub const fn into_inner(self) -> &'a [u8] {
740        self.data
741    }
742
743    /// Copies data from this `DataCursorRef` to a mutable slice.
744    #[inline]
745    pub fn copy_data_to(&self, other: &mut [u8]) {
746        let len = self.data.len().min(other.len());
747        // SAFETY: We have a valid length, other cannot overlap self since there's no way to acquire a mutable
748        // reference, and we will always have a valid alignment.
749        unsafe {
750            core::ptr::copy_nonoverlapping(self.data.as_ptr(), other.as_mut_ptr(), len);
751        }
752    }
753}
754
755impl EndianExt for DataCursorRef<'_> {
756    #[inline]
757    fn endian(&self) -> Endian {
758        self.endian
759    }
760
761    #[inline]
762    fn set_endian(&mut self, endian: Endian) {
763        self.endian = endian;
764    }
765}
766
767impl SeekExt for DataCursorRef<'_> {
768    #[inline]
769    fn position(&mut self) -> Result<u64, DataError> {
770        Ok(self.position as u64)
771    }
772
773    #[inline]
774    fn set_position(&mut self, position: u64) -> Result<u64, DataError> {
775        let pos = core::cmp::min(position, self.data.len() as u64);
776        self.position = pos as usize;
777        Ok(pos)
778    }
779
780    #[inline]
781    fn len(&mut self) -> Result<u64, DataError> {
782        Ok(self.data.len() as u64)
783    }
784
785    #[inline]
786    fn is_empty(&mut self) -> Result<bool, DataError> {
787        Ok(self.len()? - self.position()? == 0)
788    }
789}
790
791impl ReadExt for DataCursorRef<'_> {
792    #[inline]
793    fn read_exact<const N: usize>(&mut self) -> Result<[u8; N], DataError> {
794        ensure!(self.position.saturating_add(N) <= self.data.len(), EndOfFileSnafu);
795
796        let mut result: MaybeUninit<[u8; N]> = MaybeUninit::uninit();
797        // SAFETY: We're within bounds of `self.data` and will always have a valid alignment. We use
798        // MaybeUninit here to skip some overhead when we immediately overwrite it with new data.
799        unsafe {
800            core::ptr::copy_nonoverlapping(
801                self.data.as_ptr().add(self.position),
802                result.as_mut_ptr().cast(),
803                N,
804            );
805        }
806        self.position = self.position.saturating_add(N);
807        // SAFETY: We've initialized this with data, so it's safe.
808        Ok(unsafe { result.assume_init() })
809    }
810
811    #[inline]
812    fn read_length(&mut self, buffer: &mut [u8]) -> Result<usize, DataError> {
813        let length = buffer.len().min(self.data.len().saturating_sub(self.position));
814
815        // SAFETY: We're within the bounds of both `buf` and `self.data`, and will always have a valid
816        // alignment. There is no way to get a mutable reference to the inner data, so buffer cannot overlap.
817        unsafe {
818            let src_ptr = self.data.as_ptr().add(self.position);
819            core::ptr::copy_nonoverlapping(src_ptr, buffer.as_mut_ptr(), length);
820        }
821        self.position = self.position.saturating_add(length);
822        Ok(length)
823    }
824
825    #[inline]
826    #[cfg(not(feature = "alloc"))]
827    fn read_slice(&mut self, length: usize) -> Result<&[u8], DataError> {
828        ensure!(
829            self.position.saturating_add(length) <= self.data.len(),
830            EndOfFileSnafu
831        );
832
833        // SAFETY: We're within bounds of `self.data` and will always have a valid alignment.
834        let result = unsafe {
835            let ptr = self.data.as_ptr().add(self.position);
836            core::slice::from_raw_parts(ptr, length)
837        };
838        self.position += length;
839        Ok(result)
840    }
841
842    #[inline]
843    #[cfg(feature = "alloc")]
844    fn read_slice(&mut self, length: usize) -> Result<Cow<[u8]>, DataError> {
845        ensure!(
846            self.position.saturating_add(length) <= self.data.len(),
847            EndOfFileSnafu
848        );
849
850        // SAFETY: We're within bounds of `self.data` and will always have a valid alignment.
851        let result = unsafe {
852            let ptr = self.data.as_ptr().add(self.position);
853            core::slice::from_raw_parts(ptr, length)
854        };
855        self.position = self.position.saturating_add(length);
856        Ok(Cow::Borrowed(result))
857    }
858
859    #[inline]
860    #[cfg(not(feature = "alloc"))]
861    fn remaining_slice(&mut self) -> Result<&[u8], DataError> {
862        // SAFETY: We're within bounds since we're reading to the end, and will always have a valid alignment.
863        let result = unsafe {
864            let ptr = self.data.as_ptr().add(self.position);
865            core::slice::from_raw_parts(ptr, self.data.len().saturating_sub(self.position))
866        };
867        self.position = self.data.len();
868        Ok(result)
869    }
870
871    #[inline]
872    #[cfg(feature = "alloc")]
873    fn remaining_slice(&mut self) -> Result<Cow<[u8]>, DataError> {
874        // SAFETY: We're within bounds since we're reading to the end, and will always have a valid alignment.
875        let result = unsafe {
876            let ptr = self.data.as_ptr().add(self.position);
877            core::slice::from_raw_parts(ptr, self.data.len().saturating_sub(self.position))
878        };
879        self.position = self.data.len();
880        Ok(Cow::Borrowed(result))
881    }
882}
883
884impl Deref for DataCursorRef<'_> {
885    type Target = [u8];
886
887    #[inline]
888    fn deref(&self) -> &Self::Target {
889        self.data
890    }
891}
892
893/// A mutable, in-memory file that allows endian-aware read and write.
894///
895/// This is architected to assume a fixed length, and is `no_std` compatible.
896#[derive(Debug)]
897pub struct DataCursorMut<'a> {
898    data: &'a mut [u8],
899    position: usize,
900    endian: Endian,
901}
902
903impl<'a> DataCursorMut<'a> {
904    /// Creates a new `DataCursorMut` with the given data and endianness.
905    #[inline]
906    pub fn new(data: &'a mut [u8], endian: Endian) -> Self {
907        Self { data, position: 0, endian }
908    }
909
910    /// Consumes the `DataCursorMut` and returns the underlying data.
911    #[inline]
912    #[must_use]
913    pub fn into_inner(self) -> &'a mut [u8] {
914        self.data
915    }
916
917    /// Copies data from this `DataCursorMut` to another mutable slice.
918    #[inline]
919    pub fn copy_data_to(&self, other: &mut [u8]) {
920        let len = self.data.len().min(other.len());
921        // SAFETY: We're within bounds of both slices, and they don't overlap.
922        unsafe {
923            core::ptr::copy_nonoverlapping(self.data.as_ptr(), other.as_mut_ptr(), len);
924        }
925    }
926
927    /// Copies data within the `DataCursorMut` from one range to another position.
928    ///
929    /// Due to the way that Yaz0 and Yay0 compression work, if this function is used to copy overlapping
930    /// sections, the initial value will repeat itself. If you don't need this behavior, consider using a more
931    /// normal memcpy.
932    ///
933    /// # Example
934    /// ```
935    /// # use orthrus_core::prelude::*;
936    /// let mut data = [1, 2, 3, 4, 5];
937    /// let mut cursor = DataCursorMut::new(&mut data, Endian::Little);
938    /// cursor.copy_within(1..4, 2).unwrap();
939    /// assert_eq!(&cursor.into_inner()[..], &[1, 2, 2, 2, 2]);
940    /// ```
941    ///
942    /// # Errors
943    /// Returns [`EndOfFile`](Error::EndOfFile) if either the source range or the destination range would be
944    /// out of bounds.
945    #[inline]
946    pub fn copy_within(&mut self, src: core::ops::Range<usize>, dest: usize) -> Result<(), DataError> {
947        let length = src.end.saturating_sub(src.start);
948        ensure!(
949            src.end <= self.data.len() && dest.saturating_add(length) <= self.data.len(),
950            EndOfFileSnafu
951        );
952
953        if src.contains(&dest) {
954            for i in 0..length {
955                // SAFETY: We want specific behavior if they do overlap, due to how Yaz0 compression works.
956                // Both ranges are within bounds and have a valid alignment.
957                unsafe {
958                    *self.data.as_mut_ptr().add(dest.saturating_add(i)) =
959                        *self.data.as_ptr().add(src.start.saturating_add(i));
960                }
961            }
962        } else {
963            // SAFETY: Both ranges are within bounds, do not overlap, and have a valid alignment.
964            unsafe {
965                core::ptr::copy_nonoverlapping(
966                    self.data.as_ptr().add(src.start),
967                    self.data.as_mut_ptr().add(dest),
968                    length,
969                );
970            }
971        }
972        Ok(())
973    }
974}
975
976impl EndianExt for DataCursorMut<'_> {
977    #[inline]
978    fn endian(&self) -> Endian {
979        self.endian
980    }
981
982    #[inline]
983    fn set_endian(&mut self, endian: Endian) {
984        self.endian = endian;
985    }
986}
987
988impl SeekExt for DataCursorMut<'_> {
989    #[inline]
990    fn position(&mut self) -> Result<u64, DataError> {
991        Ok(self.position as u64)
992    }
993
994    #[inline]
995    fn set_position(&mut self, position: u64) -> Result<u64, DataError> {
996        let pos = core::cmp::min(position, self.data.len() as u64);
997        self.position = pos as usize;
998        Ok(pos)
999    }
1000
1001    #[inline]
1002    fn len(&mut self) -> Result<u64, DataError> {
1003        Ok(self.data.len() as u64)
1004    }
1005
1006    #[inline]
1007    fn is_empty(&mut self) -> Result<bool, DataError> {
1008        Ok(self.len()? - self.position()? == 0)
1009    }
1010}
1011
1012impl ReadExt for DataCursorMut<'_> {
1013    #[inline]
1014    fn read_exact<const N: usize>(&mut self) -> Result<[u8; N], DataError> {
1015        ensure!(self.position.saturating_add(N) <= self.data.len(), EndOfFileSnafu);
1016
1017        let mut result: MaybeUninit<[u8; N]> = MaybeUninit::uninit();
1018        // SAFETY: We're within bounds of `self.data` and will always have a valid alignment. We use
1019        // MaybeUninit here to skip some overhead when we immediately overwrite it with new data.
1020        unsafe {
1021            core::ptr::copy_nonoverlapping(
1022                self.data.as_ptr().add(self.position),
1023                result.as_mut_ptr().cast(),
1024                N,
1025            );
1026        }
1027        self.position = self.position.saturating_add(N);
1028        // SAFETY: We've initialized this with data, so it's safe.
1029        Ok(unsafe { result.assume_init() })
1030    }
1031
1032    #[inline]
1033    fn read_length(&mut self, buffer: &mut [u8]) -> Result<usize, DataError> {
1034        let length = buffer.len().min(self.data.len().saturating_sub(self.position));
1035
1036        // SAFETY: We're within the bounds of both `buf` and `self.data`, and will always have a valid
1037        // alignment. There is no way to get a mutable reference to the inner data, so buffer cannot overlap.
1038        unsafe {
1039            let src_ptr = self.data.as_ptr().add(self.position);
1040            core::ptr::copy_nonoverlapping(src_ptr, buffer.as_mut_ptr(), length);
1041        }
1042        self.position = self.position.saturating_add(length);
1043        Ok(length)
1044    }
1045
1046    #[inline]
1047    #[cfg(not(feature = "alloc"))]
1048    fn read_slice(&mut self, length: usize) -> Result<&[u8], DataError> {
1049        ensure!(
1050            self.position.saturating_add(length) <= self.data.len(),
1051            EndOfFileSnafu
1052        );
1053
1054        // SAFETY: We're within bounds of `self.data` and will always have a valid alignment.
1055        let result = unsafe {
1056            let ptr = self.data.as_ptr().add(self.position);
1057            core::slice::from_raw_parts(ptr, length)
1058        };
1059        self.position += length;
1060        Ok(result)
1061    }
1062
1063    #[inline]
1064    #[cfg(feature = "alloc")]
1065    fn read_slice(&mut self, length: usize) -> Result<Cow<[u8]>, DataError> {
1066        ensure!(
1067            self.position.saturating_add(length) <= self.data.len(),
1068            EndOfFileSnafu
1069        );
1070
1071        // SAFETY: We're within bounds of `self.data` and will always have a valid alignment.
1072        let result = unsafe {
1073            let ptr = self.data.as_ptr().add(self.position);
1074            core::slice::from_raw_parts(ptr, length)
1075        };
1076        self.position = self.position.saturating_add(length);
1077        Ok(Cow::Borrowed(result))
1078    }
1079
1080    #[inline]
1081    #[cfg(not(feature = "alloc"))]
1082    fn remaining_slice(&mut self) -> Result<&[u8], DataError> {
1083        // SAFETY: We're within bounds since we're reading to the end, and will always have a valid alignment.
1084        let result = unsafe {
1085            let ptr = self.data.as_ptr().add(self.position);
1086            core::slice::from_raw_parts(ptr, self.data.len().saturating_sub(self.position))
1087        };
1088        self.position = self.data.len();
1089        Ok(result)
1090    }
1091
1092    #[inline]
1093    #[cfg(feature = "alloc")]
1094    fn remaining_slice(&mut self) -> Result<Cow<[u8]>, DataError> {
1095        // SAFETY: We're within bounds since we're reading to the end, and will always have a valid alignment.
1096        let result = unsafe {
1097            let ptr = self.data.as_ptr().add(self.position);
1098            core::slice::from_raw_parts(ptr, self.data.len().saturating_sub(self.position))
1099        };
1100        self.position = self.data.len();
1101        Ok(Cow::Borrowed(result))
1102    }
1103}
1104
1105impl WriteExt for DataCursorMut<'_> {
1106    #[inline]
1107    fn write_exact<const N: usize>(&mut self, bytes: &[u8; N]) -> Result<(), DataError> {
1108        ensure!(self.position.saturating_add(N) <= self.data.len(), EndOfFileSnafu);
1109
1110        // SAFETY: We're within the bounds of `self.data`, `bytes` will always be valid, and we'll always have
1111        // a valid alignment.
1112        unsafe {
1113            let dst_ptr = self.data.as_mut_ptr().add(self.position);
1114            core::ptr::copy_nonoverlapping(bytes.as_ptr(), dst_ptr, N);
1115        }
1116        self.position = self.position.saturating_add(N);
1117        Ok(())
1118    }
1119}
1120
1121impl Deref for DataCursorMut<'_> {
1122    type Target = [u8];
1123
1124    #[inline]
1125    fn deref(&self) -> &Self::Target {
1126        self.data
1127    }
1128}
1129
1130impl DerefMut for DataCursorMut<'_> {
1131    #[inline]
1132    fn deref_mut(&mut self) -> &mut Self::Target {
1133        self.data
1134    }
1135}
1136
1137impl AsMut<[u8]> for DataCursorMut<'_> {
1138    #[inline]
1139    fn as_mut(&mut self) -> &mut [u8] {
1140        self.data
1141    }
1142}
1143
1144/// A stream that allows endian-aware read and write.
1145///
1146/// This struct is generic over any type `T` that implements some combination of
1147/// `Read`, `Write`, and `Seek`. Methods are conditionally available based on
1148/// the traits implemented by `T`.
1149#[derive(Debug)]
1150pub struct DataStream<T> {
1151    inner: T,
1152    endian: Endian,
1153}
1154
1155impl<T> DataStream<T> {
1156    /// Creates a new `DataStream` with the given inner stream and endianness.
1157    #[inline]
1158    pub const fn new(inner: T, endian: Endian) -> Self {
1159        Self { inner, endian }
1160    }
1161}
1162
1163impl<T> EndianExt for DataStream<T> {
1164    #[inline]
1165    fn endian(&self) -> Endian {
1166        self.endian
1167    }
1168
1169    #[inline]
1170    fn set_endian(&mut self, endian: Endian) {
1171        self.endian = endian;
1172    }
1173}
1174
1175impl<T: Seek> SeekExt for DataStream<T> {
1176    #[inline]
1177    fn position(&mut self) -> Result<u64, DataError> {
1178        self.inner.stream_position().context(IoSnafu)
1179    }
1180
1181    #[inline]
1182    fn set_position(&mut self, position: u64) -> Result<u64, DataError> {
1183        self.inner.seek(SeekFrom::Start(position)).context(IoSnafu)
1184    }
1185
1186    /// Returns the total length of the data.
1187    ///
1188    /// Note that this can be an expensive operation due to seeking. You should instead use something like
1189    /// [`std::fs::Metadata::len`].
1190    ///
1191    /// # Errors
1192    /// Returns an error if unable to determine the length of the stream.
1193    #[inline]
1194    fn len(&mut self) -> Result<u64, DataError> {
1195        let old_pos = self.stream_position().context(IoSnafu)?;
1196        let len = self.seek(SeekFrom::End(0)).context(IoSnafu)?;
1197
1198        // Avoid seeking a third time when we were already at the end of the
1199        // stream. The branch is usually way cheaper than a seek operation.
1200        if old_pos != len {
1201            self.seek(SeekFrom::Start(old_pos)).context(IoSnafu)?;
1202        }
1203
1204        Ok(len)
1205    }
1206
1207    /// Returns `true` if the remaining data is empty.
1208    ///
1209    /// Note that this can be an expensive operation due to seeking.
1210    ///
1211    /// # Errors
1212    /// Returns an error if unable to determine either the length of the stream or the position inside it.
1213    #[inline]
1214    fn is_empty(&mut self) -> Result<bool, DataError> {
1215        let old_pos = self.stream_position().context(IoSnafu)?;
1216        let len = self.seek(SeekFrom::End(0)).context(IoSnafu)?;
1217
1218        // Avoid seeking a third time when we were already at the end of the
1219        // stream. The branch is usually way cheaper than a seek operation.
1220        if old_pos != len {
1221            self.seek(SeekFrom::Start(old_pos)).context(IoSnafu)?;
1222        }
1223
1224        Ok((len - old_pos) == 0)
1225    }
1226}
1227
1228impl<T: Read> ReadExt for DataStream<T> {
1229    #[inline]
1230    fn read_exact<const N: usize>(&mut self) -> Result<[u8; N], DataError> {
1231        let mut buffer = [0u8; N];
1232        self.inner.read_exact(&mut buffer).context(IoSnafu)?;
1233        Ok(buffer)
1234    }
1235
1236    #[inline]
1237    fn read_length(&mut self, buffer: &mut [u8]) -> Result<usize, DataError> {
1238        match self.inner.read_exact(buffer) {
1239            Ok(()) => Ok(buffer.len()),
1240            Err(e) if e.kind() == ErrorKind::UnexpectedEof => self.inner.read(buffer).context(IoSnafu),
1241            Err(e) => Err(DataError::Io { source: e }),
1242        }
1243    }
1244
1245    #[inline]
1246    fn read_slice(&mut self, length: usize) -> Result<Cow<[u8]>, DataError> {
1247        let mut buffer = vec![0u8; length];
1248        self.inner.read_exact(&mut buffer).context(IoSnafu)?;
1249        Ok(Cow::Owned(buffer))
1250    }
1251
1252    #[inline]
1253    fn remaining_slice(&mut self) -> Result<Cow<[u8]>, DataError> {
1254        let mut buffer = Vec::new();
1255        self.inner.read_to_end(&mut buffer).context(IoSnafu)?;
1256        Ok(Cow::Owned(buffer))
1257    }
1258}
1259
1260impl<T: Write> WriteExt for DataStream<T> {
1261    #[inline]
1262    fn write_exact<const N: usize>(&mut self, bytes: &[u8; N]) -> Result<(), DataError> {
1263        self.inner.write_all(bytes).context(IoSnafu)
1264    }
1265}
1266
1267impl<T> Deref for DataStream<T> {
1268    type Target = T;
1269
1270    #[inline]
1271    fn deref(&self) -> &Self::Target {
1272        &self.inner
1273    }
1274}
1275
1276impl<T> DerefMut for DataStream<T> {
1277    #[inline]
1278    fn deref_mut(&mut self) -> &mut Self::Target {
1279        &mut self.inner
1280    }
1281}
1282
1283// TODO: these are a placeholder solution until specialization is stabilized
1284// https://github.com/rust-lang/rust/issues/31844
1285/// Trait to convert data types into an endian-aware stream.
1286///
1287/// # Example
1288/// ```
1289/// # use orthrus_core::prelude::*;
1290/// fn parse_data<T: IntoDataStream>(input: T) {
1291///     let mut data = input.into_stream(Endian::Little);
1292/// }
1293/// ```
1294pub trait IntoDataStream {
1295    type Reader: ReadExt + SeekExt;
1296
1297    fn into_stream(self, endian: Endian) -> Self::Reader;
1298}
1299
1300impl IntoDataStream for Box<[u8]> {
1301    type Reader = DataCursor;
1302
1303    fn into_stream(self, endian: Endian) -> Self::Reader {
1304        DataCursor::new(self, endian)
1305    }
1306}
1307
1308impl<'a> IntoDataStream for &'a [u8] {
1309    type Reader = DataCursorRef<'a>;
1310
1311    fn into_stream(self, endian: Endian) -> Self::Reader {
1312        DataCursorRef::new(self, endian)
1313    }
1314}
1315
1316impl<'a> IntoDataStream for &'a mut [u8] {
1317    type Reader = DataCursorMut<'a>;
1318
1319    fn into_stream(self, endian: Endian) -> Self::Reader {
1320        DataCursorMut::new(self, endian)
1321    }
1322}
1323
1324impl IntoDataStream for &File {
1325    type Reader = DataStream<Self>;
1326
1327    fn into_stream(self, endian: Endian) -> Self::Reader {
1328        DataStream::new(self, endian)
1329    }
1330}
1331
1332impl IntoDataStream for File {
1333    type Reader = DataStream<Self>;
1334
1335    fn into_stream(self, endian: Endian) -> Self::Reader {
1336        DataStream::new(self, endian)
1337    }
1338}
1339
1340impl IntoDataStream for Arc<File> {
1341    type Reader = DataStream<Self>;
1342
1343    fn into_stream(self, endian: Endian) -> Self::Reader {
1344        DataStream::new(self, endian)
1345    }
1346}
1347
1348impl IntoDataStream for Empty {
1349    type Reader = DataStream<Self>;
1350
1351    fn into_stream(self, endian: Endian) -> Self::Reader {
1352        DataStream::new(self, endian)
1353    }
1354}
1355
1356impl<R: Read + Seek> IntoDataStream for Box<R> {
1357    type Reader = DataStream<Self>;
1358
1359    fn into_stream(self, endian: Endian) -> Self::Reader {
1360        DataStream::new(self, endian)
1361    }
1362}
1363
1364impl<R: Read + Seek> IntoDataStream for BufReader<R> {
1365    type Reader = DataStream<Self>;
1366
1367    fn into_stream(self, endian: Endian) -> Self::Reader {
1368        DataStream::new(self, endian)
1369    }
1370}
1371
1372impl<T: AsRef<[u8]>> IntoDataStream for Cursor<T> {
1373    type Reader = DataStream<Self>;
1374
1375    fn into_stream(self, endian: Endian) -> Self::Reader {
1376        DataStream::new(self, endian)
1377    }
1378}