Skip to main content

binrw/
file_ptr.rs

1//! Type definitions and helpers for handling indirection within a file.
2//!
3//! # Best practices
4//!
5//! Indirections that are not collections (e.g. a single offset to a global file
6//! header) can use `FilePtr` to immediately read the offset and then parse the
7//! pointed-to value. However, using `FilePtr` inside a collection is
8//! inefficient because it seeks to and reads each pointed-to value immediately
9//! after the offset is read. In these cases, it is faster to read the offset
10//! table into a collection (e.g. `Vec<u32>`) and then either pass it to
11//! [`parse_from_iter`] or write a function that is called to lazily load values
12//! as needed.
13//!
14//! ## Using `parse_from_iter` to read an offset table
15//!
16//! ### With relative offsets
17//!
18//! In this example, the offsets in the offset table start counting from the
19//! beginning of the values section, and are in a random order.
20//!
21//! Since the values section exists immediately after the offset table, no
22//! seeking is required before reading the values.
23//!
24//! Since the offsets are in a random order, the position of the stream must be
25//! returned to a known state using `restore_position` on the values field.
26//! Then, `seek_before` is used on the next field to skip past the values data
27//! and continue reading the rest of the object.
28//!
29//! ```
30//! # use binrw::{args, BinRead, BinReaderExt, io::{Cursor, SeekFrom}};
31//! use binrw::file_ptr::parse_from_iter;
32//!
33//! #[derive(BinRead)]
34//! #[br(big)]
35//! struct Object {
36//!     count: u16,
37//!     #[br(args { count: count.into() })]
38//!     offsets: Vec<u16>,
39//!     #[br(parse_with = parse_from_iter(offsets.iter().copied()), restore_position)]
40//!     values: Vec<u8>,
41//!     #[br(seek_before(SeekFrom::Current(count.into())))]
42//!     extra: u16,
43//! }
44//!
45//! # let mut x = Cursor::new(b"\0\x02\0\x01\0\0\x03\x04\xff\xff");
46//! # let x = Object::read(&mut x).unwrap();
47//! # assert_eq!(x.values, &[4, 3]);
48//! # assert_eq!(x.extra, 0xffff);
49//! ```
50//!
51//! ### With absolute offsets
52//!
53//! In this example, the offsets in the offset table start from the beginning of
54//! the file, and are in sequential order.
55//!
56//! Since the offsets start from the beginning of the file, it is necessary to
57//! use `seek_before` to reposition the stream to the beginning of the file
58//! before reading the values.
59//!
60//! Since the offsets are in order, no seeking is required after the values are
61//! read, since the stream will already be pointed at the end of the values
62//! section.
63//!
64//! ```
65//! # use binrw::{args, BinRead, BinReaderExt, io::{Cursor, SeekFrom}};
66//! use binrw::file_ptr::parse_from_iter;
67//!
68//! #[derive(BinRead)]
69//! #[br(big)]
70//! struct Object {
71//!     count: u16,
72//!     #[br(args { count: count.into() })]
73//!     offsets: Vec<u16>,
74//!     #[br(
75//!         parse_with = parse_from_iter(offsets.iter().copied()),
76//!         seek_before(SeekFrom::Start(0))
77//!     )]
78//!     values: Vec<u8>,
79//!     extra: u16,
80//! }
81//!
82//! # let mut x = Cursor::new(b"\0\x02\0\x06\0\x07\x04\x03\xff\xff");
83//! # let x = Object::read(&mut x).unwrap();
84//! # assert_eq!(x.values, &[4, 3]);
85//! # assert_eq!(x.extra, 0xffff);
86//! ```
87//!
88//! ## Using a function to lazily load values
89//!
90//! In this example, only the offset table is parsed. Items pointed to by the
91//! offset table are loaded on demand from the original stream by passing it to
92//! `Object::get` along with the index of the desired item.
93//!
94//! ```
95//! # use binrw::{args, BinRead, BinResult, BinReaderExt, helpers::until_eof, io::{Cursor, Read, Seek, SeekFrom}};
96//!
97//! #[derive(BinRead)]
98//! # #[derive(Debug, Eq, PartialEq)]
99//! #[br(big)]
100//! struct Item(u8);
101//!
102//! #[derive(BinRead)]
103//! #[br(big, stream = s)]
104//! struct Object {
105//!     count: u16,
106//!     #[br(args { count: count.into() })]
107//!     offsets: Vec<u16>,
108//!     #[br(try_calc = s.stream_position())]
109//!     data_offset: u64,
110//! }
111//!
112//! impl Object {
113//!     pub fn get<Reader>(&self, source: &mut Reader, index: usize) -> Option<BinResult<Item>>
114//!     where Reader: Read + Seek
115//!     {
116//!         self.offsets.get(index).map(|offset| {
117//!             let offset = self.data_offset + u64::from(*offset);
118//!             source.seek(SeekFrom::Start(offset))?;
119//!             Item::read(source)
120//!         })
121//!     }
122//! }
123//!
124//! # let mut s = Cursor::new(b"\0\x02\0\x01\0\0\x03\x04");
125//! # let x = Object::read(&mut s).unwrap();
126//! # assert!(matches!(x.get(&mut s, 0), Some(Ok(Item(4)))));
127//! # assert!(matches!(x.get(&mut s, 1), Some(Ok(Item(3)))));
128//! # assert!(matches!(x.get(&mut s, 2), None));
129//! ```
130
131use crate::NamedArgs;
132use crate::{
133    io::{Read, Seek, SeekFrom},
134    BinRead, BinResult, Endian,
135};
136use core::num::{
137    NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroU128, NonZeroU16,
138    NonZeroU32, NonZeroU64, NonZeroU8,
139};
140use core::ops::{Deref, DerefMut};
141
142/// A type alias for [`FilePtr`] with 8-bit offsets.
143pub type FilePtr8<T> = FilePtr<u8, T>;
144/// A type alias for [`FilePtr`] with 16-bit offsets.
145pub type FilePtr16<T> = FilePtr<u16, T>;
146/// A type alias for [`FilePtr`] with 32-bit offsets.
147pub type FilePtr32<T> = FilePtr<u32, T>;
148/// A type alias for [`FilePtr`] with 64-bit offsets.
149pub type FilePtr64<T> = FilePtr<u64, T>;
150/// A type alias for [`FilePtr`] with 128-bit offsets.
151pub type FilePtr128<T> = FilePtr<u128, T>;
152
153/// A type alias for [`FilePtr`] with non-zero 8-bit offsets.
154pub type NonZeroFilePtr8<T> = FilePtr<NonZeroU8, T>;
155/// A type alias for [`FilePtr`] with non-zero 16-bit offsets.
156pub type NonZeroFilePtr16<T> = FilePtr<NonZeroU16, T>;
157/// A type alias for [`FilePtr`] with non-zero 32-bit offsets.
158pub type NonZeroFilePtr32<T> = FilePtr<NonZeroU32, T>;
159/// A type alias for [`FilePtr`] with non-zero 64-bit offsets.
160pub type NonZeroFilePtr64<T> = FilePtr<NonZeroU64, T>;
161/// A type alias for [`FilePtr`] with non-zero 128-bit offsets.
162pub type NonZeroFilePtr128<T> = FilePtr<NonZeroU128, T>;
163
164/// A wrapper type which represents a layer of indirection within a file.
165///
166/// The pointer type `Ptr` is an offset to a value within the data stream, and
167/// the value type `T` is the value at that offset. [Dereferencing] a `FilePtr`
168/// yields the pointed-to value. When deriving `BinRead`, the
169/// [offset](crate::docs::attribute#offset) directive can be used to adjust the
170/// offset before the pointed-to value is read.
171///
172/// `FilePtr` is not efficient when reading offset tables; see the
173/// [module documentation](binrw::file_ptr) for more information.
174///
175/// [Dereferencing]: core::ops::Deref
176///
177/// # Examples
178///
179/// ```
180/// # use binrw::{prelude::*, io::Cursor, FilePtr};
181/// #
182/// #[derive(BinRead)]
183/// struct Test {
184///     indirect_value: FilePtr<u32, u8>
185/// }
186///
187/// let test: Test = Cursor::new(b"\0\0\0\x08\0\0\0\0\xff").read_be().unwrap();
188/// assert_eq!(test.indirect_value.ptr, 8);
189/// assert_eq!(*test.indirect_value, 0xFF);
190/// ```
191///
192/// Example data mapped out:
193///
194/// ```hex
195///           [pointer]           [value]
196/// 00000000: 0000 0008 0000 0000 ff                   ............
197/// ```
198#[derive(Debug, Eq)]
199pub struct FilePtr<Ptr: IntoSeekFrom, T> {
200    /// The raw offset to the value.
201    pub ptr: Ptr,
202
203    /// The pointed-to value.
204    pub value: T,
205}
206
207impl<Ptr, Value> BinRead for FilePtr<Ptr, Value>
208where
209    Ptr: for<'a> BinRead<Args<'a> = ()> + IntoSeekFrom,
210    Value: BinRead,
211{
212    type Args<'a> = FilePtrArgs<Value::Args<'a>>;
213
214    fn read_options<R: Read + Seek>(
215        reader: &mut R,
216        endian: Endian,
217        args: Self::Args<'_>,
218    ) -> BinResult<Self> {
219        let ptr = Ptr::read_options(reader, endian, ())?;
220        let value = Self::read_value(ptr, Value::read_options, reader, endian, args)?;
221        Ok(FilePtr { ptr, value })
222    }
223}
224
225impl<Ptr, Value> FilePtr<Ptr, Value>
226where
227    Ptr: IntoSeekFrom,
228{
229    /// Reads an offset, then seeks to and parses the pointed-to value using the
230    /// [`BinRead`] implementation for `Value`. Returns the pointed-to value.
231    ///
232    /// # Errors
233    ///
234    /// If reading fails, an [`Error`](crate::Error) variant will be returned.
235    #[binrw::parser(reader, endian)]
236    pub fn parse<Args>(args: FilePtrArgs<Args>, _: ...) -> BinResult<Value>
237    where
238        Ptr: for<'a> BinRead<Args<'a> = ()> + IntoSeekFrom,
239        Value: for<'a> BinRead<Args<'a> = Args>,
240    {
241        Self::read_options(reader, endian, args).map(Self::into_inner)
242    }
243
244    /// Creates a parser that reads an offset, then seeks to and parses the
245    /// pointed-to value using the given `parser` function. Returns the
246    /// pointed-to value.
247    ///
248    /// # Errors
249    ///
250    /// If reading fails, an [`Error`](crate::Error) variant will be returned.
251    ///
252    /// # Examples
253    ///
254    /// ```
255    /// # use binrw::{helpers::read_u24, prelude::*};
256    /// use binrw::FilePtr16;
257    ///
258    /// #[derive(BinRead)]
259    /// struct Test {
260    ///     #[br(parse_with = FilePtr16::parse_with(read_u24))]
261    ///     value: u32
262    /// }
263    ///
264    /// let mut data = binrw::io::Cursor::new(b"\x02\x00\x07\x0f\x10");
265    /// let result = Test::read_le(&mut data).unwrap();
266    /// assert_eq!(result.value, 0x100f07);
267    /// ```
268    pub fn parse_with<R, F, Args>(
269        parser: F,
270    ) -> impl Fn(&mut R, Endian, FilePtrArgs<Args>) -> BinResult<Value>
271    where
272        R: Read + Seek,
273        F: Fn(&mut R, Endian, Args) -> BinResult<Value>,
274        Ptr: for<'a> BinRead<Args<'a> = ()> + IntoSeekFrom,
275    {
276        let parser = Self::with(parser);
277        move |reader, endian, args| parser(reader, endian, args).map(Self::into_inner)
278    }
279
280    /// Creates a parser that reads an offset, then seeks to and parses the
281    /// pointed-to value using the given `parser` function. Returns a
282    /// [`FilePtr`] containing the offset and value.
283    ///
284    /// # Errors
285    ///
286    /// If reading fails, an [`Error`](crate::Error) variant will be returned.
287    ///
288    /// # Examples
289    ///
290    /// ```
291    /// # use binrw::{helpers::read_u24, prelude::*};
292    /// use binrw::FilePtr16;
293    ///
294    /// #[derive(BinRead)]
295    /// struct Test {
296    ///     #[br(parse_with = FilePtr16::with(read_u24))]
297    ///     value: FilePtr16<u32>
298    /// }
299    ///
300    /// let mut data = binrw::io::Cursor::new(b"\x02\x00\x07\x0f\x10");
301    /// let result = Test::read_le(&mut data).unwrap();
302    /// assert_eq!(result.value.ptr, 2);
303    /// assert_eq!(result.value.value, 0x100f07);
304    /// ```
305    pub fn with<R, F, Args>(
306        parser: F,
307    ) -> impl Fn(&mut R, Endian, FilePtrArgs<Args>) -> BinResult<Self>
308    where
309        R: Read + Seek,
310        F: Fn(&mut R, Endian, Args) -> BinResult<Value>,
311        Ptr: for<'a> BinRead<Args<'a> = ()> + IntoSeekFrom,
312    {
313        move |reader, endian, args| {
314            let ptr = Ptr::read_options(reader, endian, ())?;
315            let value = Self::read_value(ptr, &parser, reader, endian, args)?;
316            Ok(Self { ptr, value })
317        }
318    }
319
320    /// Consumes this object, returning the pointed-to value.
321    pub fn into_inner(self) -> Value {
322        self.value
323    }
324
325    fn read_value<R, Parser, Args>(
326        ptr: Ptr,
327        parser: Parser,
328        reader: &mut R,
329        endian: Endian,
330        args: FilePtrArgs<Args>,
331    ) -> BinResult<Value>
332    where
333        R: Read + Seek,
334        Parser: FnOnce(&mut R, Endian, Args) -> BinResult<Value>,
335    {
336        let relative_to = args.offset;
337        let before = reader.stream_position()?;
338        reader.seek(SeekFrom::Start(relative_to))?;
339        reader.seek(ptr.into_seek_from())?;
340        let value = parser(reader, endian, args.inner);
341        reader.seek(SeekFrom::Start(before))?;
342        value
343    }
344}
345
346impl<Ptr, Value> Deref for FilePtr<Ptr, Value>
347where
348    Ptr: IntoSeekFrom,
349{
350    type Target = Value;
351
352    /// Dereferences the value stored by `FilePtr`.
353    ///
354    /// # Examples
355    ///
356    /// ```
357    /// # use binrw::{prelude::*};
358    /// use binrw::FilePtr16;
359    ///
360    /// #[derive(BinRead)]
361    /// struct Test {
362    ///     value: FilePtr16<u16>
363    /// }
364    ///
365    /// let mut data = binrw::io::Cursor::new(b"\x02\x00\x01\x00");
366    /// let result = Test::read_le(&mut data).unwrap();
367    /// assert_eq!(result.value.ptr, 2);
368    /// assert_eq!(result.value.value, 1);
369    /// assert_eq!(*result.value, 1);
370    /// ```
371    fn deref(&self) -> &Self::Target {
372        &self.value
373    }
374}
375
376impl<Ptr, Value> DerefMut for FilePtr<Ptr, Value>
377where
378    Ptr: IntoSeekFrom,
379{
380    /// Mutably dereferences the value stored by `FilePtr`.
381    ///
382    /// # Examples
383    ///
384    /// ```
385    /// # use binrw::{prelude::*};
386    /// use binrw::FilePtr16;
387    ///
388    /// #[derive(BinRead)]
389    /// struct Test {
390    ///     value: FilePtr16<u16>
391    /// }
392    ///
393    /// let mut data = binrw::io::Cursor::new(b"\x02\x00\x01\x00");
394    /// let mut result = Test::read_le(&mut data).unwrap();
395    /// assert_eq!(result.value.ptr, 2);
396    /// assert_eq!(result.value.value, 1);
397    /// *result.value = 42;
398    /// assert_eq!(result.value.value, 42);
399    /// ```
400    fn deref_mut(&mut self) -> &mut Value {
401        &mut self.value
402    }
403}
404
405impl<Ptr, Value> PartialEq<FilePtr<Ptr, Value>> for FilePtr<Ptr, Value>
406where
407    Ptr: IntoSeekFrom,
408    Value: PartialEq,
409{
410    fn eq(&self, other: &Self) -> bool {
411        self.value == other.value
412    }
413}
414
415/// Creates a parser that reads a collection of values from an iterator of
416/// file offsets using the [`BinRead`] implementation of `Value`.
417///
418/// Offsets are treated as relative to the position of the reader when
419/// parsing begins. Use the [`seek_before`] directive to reposition the
420/// stream in this case.
421///
422/// See the [module documentation](binrw::file_ptr) for more information on how
423/// use `parse_from_iter`.
424///
425/// [`seek_before`]: crate::docs::attribute#padding-and-alignment
426///
427/// # Examples
428///
429/// ```
430/// # use binrw::{args, BinRead, BinReaderExt, io::Cursor};
431/// #[derive(BinRead)]
432/// #[br(big)]
433/// struct Header {
434///     count: u16,
435///
436///     #[br(args { count: count.into() })]
437///     offsets: Vec<u16>,
438/// }
439///
440/// #[derive(BinRead)]
441/// #[br(big)]
442/// struct Object {
443///     header: Header,
444///     #[br(parse_with = binrw::file_ptr::parse_from_iter(header.offsets.iter().copied()))]
445///     values: Vec<u8>,
446/// }
447///
448/// # let mut x = Cursor::new(b"\0\x02\0\x01\0\0\x03\x04");
449/// # let x = Object::read(&mut x).unwrap();
450/// # assert_eq!(x.values, &[4, 3]);
451/// ```
452pub fn parse_from_iter<Ptr, Value, Ret, Args, It, Reader>(
453    it: It,
454) -> impl FnOnce(&mut Reader, Endian, Args) -> BinResult<Ret>
455where
456    Ptr: IntoSeekFrom,
457    Value: for<'a> BinRead<Args<'a> = Args>,
458    Ret: FromIterator<Value>,
459    Args: Clone,
460    It: IntoIterator<Item = Ptr>,
461    Reader: Read + Seek,
462{
463    parse_from_iter_with(it, Value::read_options)
464}
465
466/// Creates a parser that reads a collection of values from an iterator of
467/// file offsets using the given `parser` function.
468///
469/// Offsets are treated as relative to the position of the reader when
470/// parsing begins. Use the [`seek_before`] directive to reposition the
471/// stream in this case.
472///
473/// See the [module documentation](binrw::file_ptr) for more information on how
474/// to use `parse_from_iter_with`.
475///
476/// [`seek_before`]: crate::docs::attribute#padding-and-alignment
477///
478/// # Examples
479///
480/// ```
481/// # use binrw::{args, BinRead, BinReaderExt, io::Cursor};
482/// #[derive(BinRead)]
483/// #[br(big)]
484/// struct Header {
485///     count: u16,
486///
487///     #[br(args { count: count.into() })]
488///     offsets: Vec<u16>,
489/// }
490///
491/// # #[derive(Debug, Eq, PartialEq)]
492/// struct Item(u8);
493///
494/// #[derive(BinRead)]
495/// #[br(big)]
496/// struct Object {
497///     header: Header,
498///     #[br(parse_with = binrw::file_ptr::parse_from_iter_with(header.offsets.iter().copied(), |reader, endian, args| {
499///        u8::read_options(reader, endian, args).map(Item)
500///     }))]
501///     values: Vec<Item>,
502/// }
503///
504/// # let mut x = Cursor::new(b"\0\x02\0\x01\0\0\x03\x04");
505/// # let x = Object::read(&mut x).unwrap();
506/// # assert_eq!(x.values, &[Item(4), Item(3)]);
507/// ```
508pub fn parse_from_iter_with<Ptr, Value, Ret, Args, It, F, Reader>(
509    it: It,
510    parser: F,
511) -> impl FnOnce(&mut Reader, Endian, Args) -> BinResult<Ret>
512where
513    Ptr: IntoSeekFrom,
514    Ret: FromIterator<Value>,
515    Args: Clone,
516    It: IntoIterator<Item = Ptr>,
517    F: Fn(&mut Reader, Endian, Args) -> BinResult<Value>,
518    Reader: Read + Seek,
519{
520    move |reader, endian, args| {
521        let base_pos = reader.stream_position()?;
522        it.into_iter()
523            .map(move |ptr| {
524                // Avoid unnecessary seeks:
525                // 1. Unnecessary seeking backwards to the base position
526                //    will cause forward-only readers to fail always even if
527                //    the offsets are ordered;
528                // 2. Seeks that change the position when it does not need
529                //    to change may unnecessarily flush a buffered reader
530                //    cache.
531                match ptr.into_seek_from() {
532                    seek @ SeekFrom::Current(offset) => {
533                        if let Some(new_pos) = base_pos.checked_add_signed(offset) {
534                            if new_pos != reader.stream_position()? {
535                                reader.seek(SeekFrom::Start(new_pos))?;
536                            }
537                        } else {
538                            reader.seek(seek)?;
539                        }
540                    }
541                    seek => {
542                        reader.seek(seek)?;
543                    }
544                }
545
546                parser(reader, endian, args.clone())
547            })
548            .collect()
549    }
550}
551
552/// A trait to convert from an integer into [`SeekFrom::Current`].
553pub trait IntoSeekFrom: Copy {
554    /// Converts the value.
555    fn into_seek_from(self) -> SeekFrom;
556}
557
558macro_rules! impl_into_seek_from {
559    ($($t:ty),*) => {
560        $(
561            impl IntoSeekFrom for $t {
562                fn into_seek_from(self) -> SeekFrom {
563                    SeekFrom::Current(TryInto::try_into(self).unwrap())
564                }
565            }
566        )*
567    };
568}
569
570impl_into_seek_from!(i8, i16, i32, i64, i128, u8, u16, u32, u64, u128);
571
572macro_rules! impl_into_seek_from_for_non_zero {
573    ($($t:ty),*) => {
574        $(
575            impl IntoSeekFrom for $t {
576                fn into_seek_from(self) -> SeekFrom {
577                    self.get().into_seek_from()
578                }
579            }
580        )*
581    };
582}
583
584impl_into_seek_from_for_non_zero!(
585    NonZeroI128,
586    NonZeroI16,
587    NonZeroI32,
588    NonZeroI64,
589    NonZeroI8,
590    NonZeroU128,
591    NonZeroU16,
592    NonZeroU32,
593    NonZeroU64,
594    NonZeroU8
595);
596
597/// Named arguments for the [`BinRead::read_options()`] implementation of [`FilePtr`].
598///
599/// The `inner` field can be omitted completely if the inner type doesn’t
600/// require arguments, in which case a default value will be used.
601#[derive(Clone, Default, NamedArgs)]
602pub struct FilePtrArgs<Inner> {
603    /// An absolute offset added to the [`FilePtr::ptr`](crate::FilePtr::ptr)
604    /// offset before reading the pointed-to value.
605    #[named_args(default = 0)]
606    pub offset: u64,
607
608    /// The [arguments](crate::BinRead::Args) for the inner type.
609    #[named_args(try_optional)]
610    pub inner: Inner,
611}