binrw/
file_ptr.rs

1//! Type definitions and helpers for handling indirection within a file.
2//!
3//! # Best practices
4//!
5//! Indirections that are not collections (e.g. a single offset to a global file
6//! header) can use `FilePtr` to immediately read the offset and then parse the
7//! pointed-to value. However, using `FilePtr` inside a collection is
8//! inefficient because it seeks to and reads each pointed-to value immediately
9//! after the offset is read. In these cases, it is faster to read the offset
10//! table into a collection (e.g. `Vec<u32>`) and then either pass it to
11//! [`parse_from_iter`] or write a function that is called to lazily load values
12//! as needed.
13//!
14//! ## Using `parse_from_iter` to read an offset table
15//!
16//! ### With relative offsets
17//!
18//! In this example, the offsets in the offset table start counting from the
19//! beginning of the values section, and are in a random order.
20//!
21//! Since the values section exists immediately after the offset table, no
22//! seeking is required before reading the values.
23//!
24//! Since the offsets are in a random order, the position of the stream must be
25//! returned to a known state using `restore_position` on the values field.
26//! Then, `seek_before` is used on the next field to skip past the values data
27//! and continue reading the rest of the object.
28//!
29//! ```
30//! # use binrw::{args, BinRead, BinReaderExt, io::{Cursor, SeekFrom}};
31//! use binrw::file_ptr::parse_from_iter;
32//!
33//! #[derive(BinRead)]
34//! #[br(big)]
35//! struct Object {
36//!     count: u16,
37//!     #[br(args { count: count.into() })]
38//!     offsets: Vec<u16>,
39//!     #[br(parse_with = parse_from_iter(offsets.iter().copied()), restore_position)]
40//!     values: Vec<u8>,
41//!     #[br(seek_before(SeekFrom::Current(count.into())))]
42//!     extra: u16,
43//! }
44//!
45//! # let mut x = Cursor::new(b"\0\x02\0\x01\0\0\x03\x04\xff\xff");
46//! # let x = Object::read(&mut x).unwrap();
47//! # assert_eq!(x.values, &[4, 3]);
48//! # assert_eq!(x.extra, 0xffff);
49//! ```
50//!
51//! ### With absolute offsets
52//!
53//! In this example, the offsets in the offset table start from the beginning of
54//! the file, and are in sequential order.
55//!
56//! Since the offsets start from the beginning of the file, it is necessary to
57//! use `seek_before` to reposition the stream to the beginning of the file
58//! before reading the values.
59//!
60//! Since the offsets are in order, no seeking is required after the values are
61//! read, since the stream will already be pointed at the end of the values
62//! section.
63//!
64//! ```
65//! # use binrw::{args, BinRead, BinReaderExt, io::{Cursor, SeekFrom}};
66//! use binrw::file_ptr::parse_from_iter;
67//!
68//! #[derive(BinRead)]
69//! #[br(big)]
70//! struct Object {
71//!     count: u16,
72//!     #[br(args { count: count.into() })]
73//!     offsets: Vec<u16>,
74//!     #[br(
75//!         parse_with = parse_from_iter(offsets.iter().copied()),
76//!         seek_before(SeekFrom::Start(0))
77//!     )]
78//!     values: Vec<u8>,
79//!     extra: u16,
80//! }
81//!
82//! # let mut x = Cursor::new(b"\0\x02\0\x06\0\x07\x04\x03\xff\xff");
83//! # let x = Object::read(&mut x).unwrap();
84//! # assert_eq!(x.values, &[4, 3]);
85//! # assert_eq!(x.extra, 0xffff);
86//! ```
87//!
88//! ## Using a function to lazily load values
89//!
90//! In this example, only the offset table is parsed. Values pointed to by the
91//! offset table are loaded on demand by calling `Object::get` as needed at
92//! runtime.
93//!
94//! ```
95//! # use binrw::{args, BinRead, BinResult, BinReaderExt, helpers::until_eof, io::{Cursor, Read, Seek, SeekFrom}};
96//!
97//! #[derive(BinRead)]
98//! # #[derive(Debug, Eq, PartialEq)]
99//! #[br(big)]
100//! struct Item(u8);
101//!
102//! #[derive(BinRead)]
103//! #[br(big, stream = s)]
104//! struct Object {
105//!     count: u16,
106//!     #[br(args { count: count.into() })]
107//!     offsets: Vec<u16>,
108//!     #[br(try_calc = s.stream_position())]
109//!     data_offset: u64,
110//! }
111//!
112//! impl Object {
113//!     pub fn get<R: Read + Seek>(&self, source: &mut R, index: usize) -> Option<BinResult<Item>> {
114//!         self.offsets.get(index).map(|offset| {
115//!             let offset = self.data_offset + u64::from(*offset);
116//!             source.seek(SeekFrom::Start(offset))?;
117//!             Item::read(source)
118//!         })
119//!     }
120//! }
121//!
122//! # let mut s = Cursor::new(b"\0\x02\0\x01\0\0\x03\x04");
123//! # let x = Object::read(&mut s).unwrap();
124//! # assert!(matches!(x.get(&mut s, 0), Some(Ok(Item(4)))));
125//! # assert!(matches!(x.get(&mut s, 1), Some(Ok(Item(3)))));
126//! # assert!(matches!(x.get(&mut s, 2), None));
127//! ```
128
129use crate::NamedArgs;
130use crate::{
131    io::{Read, Seek, SeekFrom},
132    BinRead, BinResult, Endian,
133};
134use core::num::{
135    NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroU128, NonZeroU16,
136    NonZeroU32, NonZeroU64, NonZeroU8,
137};
138use core::ops::{Deref, DerefMut};
139
140/// A type alias for [`FilePtr`] with 8-bit offsets.
141pub type FilePtr8<T> = FilePtr<u8, T>;
142/// A type alias for [`FilePtr`] with 16-bit offsets.
143pub type FilePtr16<T> = FilePtr<u16, T>;
144/// A type alias for [`FilePtr`] with 32-bit offsets.
145pub type FilePtr32<T> = FilePtr<u32, T>;
146/// A type alias for [`FilePtr`] with 64-bit offsets.
147pub type FilePtr64<T> = FilePtr<u64, T>;
148/// A type alias for [`FilePtr`] with 128-bit offsets.
149pub type FilePtr128<T> = FilePtr<u128, T>;
150
151/// A type alias for [`FilePtr`] with non-zero 8-bit offsets.
152pub type NonZeroFilePtr8<T> = FilePtr<NonZeroU8, T>;
153/// A type alias for [`FilePtr`] with non-zero 16-bit offsets.
154pub type NonZeroFilePtr16<T> = FilePtr<NonZeroU16, T>;
155/// A type alias for [`FilePtr`] with non-zero 32-bit offsets.
156pub type NonZeroFilePtr32<T> = FilePtr<NonZeroU32, T>;
157/// A type alias for [`FilePtr`] with non-zero 64-bit offsets.
158pub type NonZeroFilePtr64<T> = FilePtr<NonZeroU64, T>;
159/// A type alias for [`FilePtr`] with non-zero 128-bit offsets.
160pub type NonZeroFilePtr128<T> = FilePtr<NonZeroU128, T>;
161
162/// A wrapper type which represents a layer of indirection within a file.
163///
164/// The pointer type `Ptr` is an offset to a value within the data stream, and
165/// the value type `T` is the value at that offset. [Dereferencing] a `FilePtr`
166/// yields the pointed-to value. When deriving `BinRead`, the
167/// [offset](crate::docs::attribute#offset) directive can be used to adjust the
168/// offset before the pointed-to value is read.
169///
170/// `FilePtr` is not efficient when reading offset tables; see the
171/// [module documentation](binrw::file_ptr) for more information.
172///
173/// [Dereferencing]: core::ops::Deref
174///
175/// # Examples
176///
177/// ```
178/// # use binrw::{prelude::*, io::Cursor, FilePtr};
179/// #
180/// #[derive(BinRead)]
181/// struct Test {
182///     indirect_value: FilePtr<u32, u8>
183/// }
184///
185/// let test: Test = Cursor::new(b"\0\0\0\x08\0\0\0\0\xff").read_be().unwrap();
186/// assert_eq!(test.indirect_value.ptr, 8);
187/// assert_eq!(*test.indirect_value, 0xFF);
188/// ```
189///
190/// Example data mapped out:
191///
192/// ```hex
193///           [pointer]           [value]
194/// 00000000: 0000 0008 0000 0000 ff                   ............
195/// ```
196#[derive(Debug, Eq)]
197pub struct FilePtr<Ptr: IntoSeekFrom, T> {
198    /// The raw offset to the value.
199    pub ptr: Ptr,
200
201    /// The pointed-to value.
202    pub value: T,
203}
204
205impl<Ptr, Value> BinRead for FilePtr<Ptr, Value>
206where
207    Ptr: for<'a> BinRead<Args<'a> = ()> + IntoSeekFrom,
208    Value: BinRead,
209{
210    type Args<'a> = FilePtrArgs<Value::Args<'a>>;
211
212    fn read_options<R: Read + Seek>(
213        reader: &mut R,
214        endian: Endian,
215        args: Self::Args<'_>,
216    ) -> BinResult<Self> {
217        let ptr = Ptr::read_options(reader, endian, ())?;
218        let value = Self::read_value(ptr, Value::read_options, reader, endian, args)?;
219        Ok(FilePtr { ptr, value })
220    }
221}
222
223impl<Ptr, Value> FilePtr<Ptr, Value>
224where
225    Ptr: IntoSeekFrom,
226{
227    /// Reads an offset, then seeks to and parses the pointed-to value using the
228    /// [`BinRead`] implementation for `Value`. Returns the pointed-to value.
229    ///
230    /// # Errors
231    ///
232    /// If reading fails, an [`Error`](crate::Error) variant will be returned.
233    #[binrw::parser(reader, endian)]
234    pub fn parse<Args>(args: FilePtrArgs<Args>, ...) -> BinResult<Value>
235    where
236        Ptr: for<'a> BinRead<Args<'a> = ()> + IntoSeekFrom,
237        Value: for<'a> BinRead<Args<'a> = Args>,
238    {
239        Self::read_options(reader, endian, args).map(Self::into_inner)
240    }
241
242    /// Creates a parser that reads an offset, then seeks to and parses the
243    /// pointed-to value using the given `parser` function. Returns the
244    /// pointed-to value.
245    ///
246    /// # Errors
247    ///
248    /// If reading fails, an [`Error`](crate::Error) variant will be returned.
249    ///
250    /// # Examples
251    ///
252    /// ```
253    /// # use binrw::{helpers::read_u24, prelude::*};
254    /// use binrw::FilePtr16;
255    ///
256    /// #[derive(BinRead)]
257    /// struct Test {
258    ///     #[br(parse_with = FilePtr16::parse_with(read_u24))]
259    ///     value: u32
260    /// }
261    ///
262    /// let mut data = binrw::io::Cursor::new(b"\x02\x00\x07\x0f\x10");
263    /// let result = Test::read_le(&mut data).unwrap();
264    /// assert_eq!(result.value, 0x100f07);
265    /// ```
266    pub fn parse_with<R, F, Args>(
267        parser: F,
268    ) -> impl Fn(&mut R, Endian, FilePtrArgs<Args>) -> BinResult<Value>
269    where
270        R: Read + Seek,
271        F: Fn(&mut R, Endian, Args) -> BinResult<Value>,
272        Ptr: for<'a> BinRead<Args<'a> = ()> + IntoSeekFrom,
273    {
274        let parser = Self::with(parser);
275        move |reader, endian, args| parser(reader, endian, args).map(Self::into_inner)
276    }
277
278    /// Creates a parser that reads an offset, then seeks to and parses the
279    /// pointed-to value using the given `parser` function. Returns a
280    /// [`FilePtr`] containing the offset and value.
281    ///
282    /// # Errors
283    ///
284    /// If reading fails, an [`Error`](crate::Error) variant will be returned.
285    ///
286    /// # Examples
287    ///
288    /// ```
289    /// # use binrw::{helpers::read_u24, prelude::*};
290    /// use binrw::FilePtr16;
291    ///
292    /// #[derive(BinRead)]
293    /// struct Test {
294    ///     #[br(parse_with = FilePtr16::with(read_u24))]
295    ///     value: FilePtr16<u32>
296    /// }
297    ///
298    /// let mut data = binrw::io::Cursor::new(b"\x02\x00\x07\x0f\x10");
299    /// let result = Test::read_le(&mut data).unwrap();
300    /// assert_eq!(result.value.ptr, 2);
301    /// assert_eq!(result.value.value, 0x100f07);
302    /// ```
303    pub fn with<R, F, Args>(
304        parser: F,
305    ) -> impl Fn(&mut R, Endian, FilePtrArgs<Args>) -> BinResult<Self>
306    where
307        R: Read + Seek,
308        F: Fn(&mut R, Endian, Args) -> BinResult<Value>,
309        Ptr: for<'a> BinRead<Args<'a> = ()> + IntoSeekFrom,
310    {
311        move |reader, endian, args| {
312            let ptr = Ptr::read_options(reader, endian, ())?;
313            let value = Self::read_value(ptr, &parser, reader, endian, args)?;
314            Ok(Self { ptr, value })
315        }
316    }
317
318    /// Consumes this object, returning the pointed-to value.
319    pub fn into_inner(self) -> Value {
320        self.value
321    }
322
323    fn read_value<R, Parser, Args>(
324        ptr: Ptr,
325        parser: Parser,
326        reader: &mut R,
327        endian: Endian,
328        args: FilePtrArgs<Args>,
329    ) -> BinResult<Value>
330    where
331        R: Read + Seek,
332        Parser: FnOnce(&mut R, Endian, Args) -> BinResult<Value>,
333    {
334        let relative_to = args.offset;
335        let before = reader.stream_position()?;
336        reader.seek(SeekFrom::Start(relative_to))?;
337        reader.seek(ptr.into_seek_from())?;
338        let value = parser(reader, endian, args.inner);
339        reader.seek(SeekFrom::Start(before))?;
340        value
341    }
342}
343
344impl<Ptr, Value> Deref for FilePtr<Ptr, Value>
345where
346    Ptr: IntoSeekFrom,
347{
348    type Target = Value;
349
350    /// Dereferences the value stored by `FilePtr`.
351    ///
352    /// # Examples
353    ///
354    /// ```
355    /// # use binrw::{prelude::*};
356    /// use binrw::FilePtr16;
357    ///
358    /// #[derive(BinRead)]
359    /// struct Test {
360    ///     value: FilePtr16<u16>
361    /// }
362    ///
363    /// let mut data = binrw::io::Cursor::new(b"\x02\x00\x01\x00");
364    /// let result = Test::read_le(&mut data).unwrap();
365    /// assert_eq!(result.value.ptr, 2);
366    /// assert_eq!(result.value.value, 1);
367    /// assert_eq!(*result.value, 1);
368    /// ```
369    fn deref(&self) -> &Self::Target {
370        &self.value
371    }
372}
373
374impl<Ptr, Value> DerefMut for FilePtr<Ptr, Value>
375where
376    Ptr: IntoSeekFrom,
377{
378    /// Mutably dereferences the value stored by `FilePtr`.
379    ///
380    /// # Examples
381    ///
382    /// ```
383    /// # use binrw::{prelude::*};
384    /// use binrw::FilePtr16;
385    ///
386    /// #[derive(BinRead)]
387    /// struct Test {
388    ///     value: FilePtr16<u16>
389    /// }
390    ///
391    /// let mut data = binrw::io::Cursor::new(b"\x02\x00\x01\x00");
392    /// let mut result = Test::read_le(&mut data).unwrap();
393    /// assert_eq!(result.value.ptr, 2);
394    /// assert_eq!(result.value.value, 1);
395    /// *result.value = 42;
396    /// assert_eq!(result.value.value, 42);
397    /// ```
398    fn deref_mut(&mut self) -> &mut Value {
399        &mut self.value
400    }
401}
402
403impl<Ptr, Value> PartialEq<FilePtr<Ptr, Value>> for FilePtr<Ptr, Value>
404where
405    Ptr: IntoSeekFrom,
406    Value: PartialEq,
407{
408    fn eq(&self, other: &Self) -> bool {
409        self.value == other.value
410    }
411}
412
413/// Creates a parser that reads a collection of values from an iterator of
414/// file offsets using the [`BinRead`] implementation of `Value`.
415///
416/// Offsets are treated as relative to the position of the reader when
417/// parsing begins. Use the [`seek_before`] directive to reposition the
418/// stream in this case.
419///
420/// See the [module documentation](binrw::file_ptr) for more information on how
421/// use `parse_from_iter`.
422///
423/// [`seek_before`]: crate::docs::attribute#padding-and-alignment
424///
425/// # Examples
426///
427/// ```
428/// # use binrw::{args, BinRead, BinReaderExt, io::Cursor};
429/// #[derive(BinRead)]
430/// #[br(big)]
431/// struct Header {
432///     count: u16,
433///
434///     #[br(args { count: count.into() })]
435///     offsets: Vec<u16>,
436/// }
437///
438/// #[derive(BinRead)]
439/// #[br(big)]
440/// struct Object {
441///     header: Header,
442///     #[br(parse_with = binrw::file_ptr::parse_from_iter(header.offsets.iter().copied()))]
443///     values: Vec<u8>,
444/// }
445///
446/// # let mut x = Cursor::new(b"\0\x02\0\x01\0\0\x03\x04");
447/// # let x = Object::read(&mut x).unwrap();
448/// # assert_eq!(x.values, &[4, 3]);
449/// ```
450pub fn parse_from_iter<Ptr, Value, Ret, Args, It, Reader>(
451    it: It,
452) -> impl FnOnce(&mut Reader, Endian, Args) -> BinResult<Ret>
453where
454    Ptr: IntoSeekFrom,
455    Value: for<'a> BinRead<Args<'a> = Args>,
456    Ret: FromIterator<Value>,
457    Args: Clone,
458    It: IntoIterator<Item = Ptr>,
459    Reader: Read + Seek,
460{
461    parse_from_iter_with(it, Value::read_options)
462}
463
464/// Creates a parser that reads a collection of values from an iterator of
465/// file offsets using the given `parser` function.
466///
467/// Offsets are treated as relative to the position of the reader when
468/// parsing begins. Use the [`seek_before`] directive to reposition the
469/// stream in this case.
470///
471/// See the [module documentation](binrw::file_ptr) for more information on how
472/// to use `parse_from_iter_with`.
473///
474/// [`seek_before`]: crate::docs::attribute#padding-and-alignment
475///
476/// # Examples
477///
478/// ```
479/// # use binrw::{args, BinRead, BinReaderExt, io::Cursor};
480/// #[derive(BinRead)]
481/// #[br(big)]
482/// struct Header {
483///     count: u16,
484///
485///     #[br(args { count: count.into() })]
486///     offsets: Vec<u16>,
487/// }
488///
489/// # #[derive(Debug, Eq, PartialEq)]
490/// struct Item(u8);
491///
492/// #[derive(BinRead)]
493/// #[br(big)]
494/// struct Object {
495///     header: Header,
496///     #[br(parse_with = binrw::file_ptr::parse_from_iter_with(header.offsets.iter().copied(), |reader, endian, args| {
497///        u8::read_options(reader, endian, args).map(Item)
498///     }))]
499///     values: Vec<Item>,
500/// }
501///
502/// # let mut x = Cursor::new(b"\0\x02\0\x01\0\0\x03\x04");
503/// # let x = Object::read(&mut x).unwrap();
504/// # assert_eq!(x.values, &[Item(4), Item(3)]);
505/// ```
506pub fn parse_from_iter_with<Ptr, Value, Ret, Args, It, F, Reader>(
507    it: It,
508    parser: F,
509) -> impl FnOnce(&mut Reader, Endian, Args) -> BinResult<Ret>
510where
511    Ptr: IntoSeekFrom,
512    Ret: FromIterator<Value>,
513    Args: Clone,
514    It: IntoIterator<Item = Ptr>,
515    F: Fn(&mut Reader, Endian, Args) -> BinResult<Value>,
516    Reader: Read + Seek,
517{
518    move |reader, endian, args| {
519        let base_pos = reader.stream_position()?;
520        it.into_iter()
521            .map(move |ptr| {
522                // Avoid unnecessary seeks:
523                // 1. Unnecessary seeking backwards to the base position
524                //    will cause forward-only readers to fail always even if
525                //    the offsets are ordered;
526                // 2. Seeks that change the position when it does not need
527                //    to change may unnecessarily flush a buffered reader
528                //    cache.
529                match ptr.into_seek_from() {
530                    seek @ SeekFrom::Current(offset) => {
531                        if let Some(new_pos) = base_pos.checked_add_signed(offset) {
532                            if new_pos != reader.stream_position()? {
533                                reader.seek(SeekFrom::Start(new_pos))?;
534                            }
535                        } else {
536                            reader.seek(seek)?;
537                        }
538                    }
539                    seek => {
540                        reader.seek(seek)?;
541                    }
542                }
543
544                parser(reader, endian, args.clone())
545            })
546            .collect()
547    }
548}
549
550/// A trait to convert from an integer into [`SeekFrom::Current`].
551pub trait IntoSeekFrom: Copy {
552    /// Converts the value.
553    fn into_seek_from(self) -> SeekFrom;
554}
555
556macro_rules! impl_into_seek_from {
557    ($($t:ty),*) => {
558        $(
559            impl IntoSeekFrom for $t {
560                fn into_seek_from(self) -> SeekFrom {
561                    SeekFrom::Current(TryInto::try_into(self).unwrap())
562                }
563            }
564        )*
565    };
566}
567
568impl_into_seek_from!(i8, i16, i32, i64, i128, u8, u16, u32, u64, u128);
569
570macro_rules! impl_into_seek_from_for_non_zero {
571    ($($t:ty),*) => {
572        $(
573            impl IntoSeekFrom for $t {
574                fn into_seek_from(self) -> SeekFrom {
575                    self.get().into_seek_from()
576                }
577            }
578        )*
579    };
580}
581
582impl_into_seek_from_for_non_zero!(
583    NonZeroI128,
584    NonZeroI16,
585    NonZeroI32,
586    NonZeroI64,
587    NonZeroI8,
588    NonZeroU128,
589    NonZeroU16,
590    NonZeroU32,
591    NonZeroU64,
592    NonZeroU8
593);
594
595/// Named arguments for the [`BinRead::read_options()`] implementation of [`FilePtr`].
596///
597/// The `inner` field can be omitted completely if the inner type doesn’t
598/// require arguments, in which case a default value will be used.
599#[derive(Clone, Default, NamedArgs)]
600pub struct FilePtrArgs<Inner> {
601    /// An absolute offset added to the [`FilePtr::ptr`](crate::FilePtr::ptr)
602    /// offset before reading the pointed-to value.
603    #[named_args(default = 0)]
604    pub offset: u64,
605
606    /// The [arguments](crate::BinRead::Args) for the inner type.
607    #[named_args(try_optional)]
608    pub inner: Inner,
609}