ms_pdb/
modi.rs

1//! Reads data from Module Info (`modi`) streams.
2//!
3//! # References
4//! * <https://llvm.org/docs/PDB/ModiStream.html>
5//! * [`MODI_60_Persist` in `dbi.h`]
6
7use crate::dbi::ModuleInfoFixed;
8use crate::utils::vec::replace_range_copy;
9use crate::StreamData;
10use crate::{dbi::ModuleInfo, syms::SymIter};
11use anyhow::{anyhow, bail, Result};
12use ms_codeview::parser::Parser;
13use std::mem::size_of;
14use std::ops::Range;
15use sync_file::ReadAt;
16use tracing::{debug, warn};
17use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, LE, U32};
18
19/// The Module Symbols substream begins with this header. It is located at stream offset 0 in the
20/// Module Stream.
21#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
22#[repr(C)]
23pub struct ModuleSymbolsHeader {
24    /// Indicates the version of the module symbol stream. Use the `CV_SIGNATURE_*` constants.
25    /// The expected value is `CV_SIGNATURE_C13`.
26    pub signature: U32<LE>,
27}
28
29const MODULE_SYMBOLS_HEADER_LEN: usize = 4;
30static_assertions::const_assert_eq!(size_of::<ModuleSymbolsHeader>(), MODULE_SYMBOLS_HEADER_LEN);
31
32/// Actual signature is >64K
33pub const CV_SIGNATURE_C6: u32 = 0;
34/// First explicit signature
35pub const CV_SIGNATURE_C7: u32 = 1;
36/// C11 (vc5.x) 32-bit types
37pub const CV_SIGNATURE_C11: u32 = 2;
38/// C13 (vc7.x) zero terminated names
39pub const CV_SIGNATURE_C13: u32 = 4;
40/// All signatures from 5 to 64K are reserved
41pub const CV_SIGNATURE_RESERVED: u32 = 5;
42
43impl<F: ReadAt> crate::Pdb<F> {
44    /// Reads a Module Info stream. The caller must provide a [`ModuleInfo`] structure, which comes
45    /// from the DBI Stream.  Use [`crate::dbi::read_dbi_stream`] to enumerate [`ModuleInfo`] values.
46    ///
47    /// If the Module Info record has a NIL stream, then this function returns `Ok(None)`.
48    pub fn read_module_stream(
49        &self,
50        mod_info: &ModuleInfo,
51    ) -> Result<Option<ModiStreamData<StreamData>>, anyhow::Error> {
52        let Some(stream) = mod_info.stream() else {
53            return Ok(None);
54        };
55
56        let stream_data = self.read_stream(stream)?;
57        Ok(Some(ModiStreamData::new(stream_data, mod_info.header())?))
58    }
59}
60
61/// Contains the stream data for a Module Info stream.
62#[allow(missing_docs)]
63pub struct ModiStreamData<Data> {
64    /// The contents of the stream.
65    pub stream_data: Data,
66    pub sym_byte_size: u32,
67    pub c11_byte_size: u32,
68    pub c13_byte_size: u32,
69    pub global_refs_size: u32,
70}
71
72impl<Data: AsRef<[u8]>> ModiStreamData<Data> {
73    /// Initializes a new `ModiStreamData`. This validates the byte sizes of the substreams,
74    /// which are specified in the [`ModuleInfo`] structure, not within the Module Stream itself.
75    pub fn new(stream_data: Data, module: &ModuleInfoFixed) -> anyhow::Result<Self> {
76        let stream_bytes: &[u8] = stream_data.as_ref();
77
78        // Validate the byte sizes against the size of the stream data.
79        let sym_byte_size = module.sym_byte_size.get();
80        let c11_byte_size = module.c11_byte_size.get();
81        let c13_byte_size = module.c13_byte_size.get();
82
83        let mut p = Parser::new(stream_bytes);
84
85        p.skip(sym_byte_size as usize).map_err(|_| {
86            anyhow!("Module info has a sym_byte_size that exceeds the size of the stream.")
87        })?;
88        p.skip(c11_byte_size as usize).map_err(|_| {
89            anyhow!("Module info has a c11_byte_size that exceeds the size of the stream.")
90        })?;
91        p.skip(c13_byte_size as usize).map_err(|_| {
92            anyhow!("Module info has a c13_byte_size that exceeds the size of the stream.")
93        })?;
94
95        let mut global_refs_size;
96        if !p.is_empty() {
97            global_refs_size = p
98                .u32()
99                .map_err(|_| anyhow!("Failed to decode global_refs_size. There are {} bytes after the module symbols substream.", p.len()))?;
100
101            if global_refs_size == 0xffff_ffff {
102                warn!("Module has global_refs_size = 0xffff_ffff");
103                global_refs_size = 0;
104            } else {
105                p.skip(global_refs_size as usize)
106                .map_err(|_| anyhow!("Failed to decode global_refs substream. global_refs_size = 0x{:x}, but there are only 0x{:x} bytes left.",
107                global_refs_size,
108                p.len()
109            ))?;
110            }
111
112            if !p.is_empty() {
113                debug!(stream_len = p.len(), "Module stream has extra bytes at end");
114            }
115        } else {
116            global_refs_size = 0;
117        }
118
119        Ok(Self {
120            stream_data,
121            sym_byte_size,
122            c11_byte_size,
123            c13_byte_size,
124            global_refs_size,
125        })
126    }
127
128    /// Returns an iterator for the symbol data for this module.
129    pub fn iter_syms(&self) -> SymIter<'_> {
130        if let Ok(sym_data) = self.sym_data() {
131            SymIter::new(sym_data)
132        } else {
133            SymIter::new(&[])
134        }
135    }
136
137    fn nested_slice(&self, range: Range<usize>) -> Result<&[u8]> {
138        if let Some(b) = self.stream_data.as_ref().get(range) {
139            Ok(b)
140        } else {
141            bail!("Range within module stream is invalid")
142        }
143    }
144
145    fn nested_slice_mut(&mut self, range: Range<usize>) -> Result<&mut [u8]>
146    where
147        Data: AsMut<[u8]>,
148    {
149        if let Some(b) = self.stream_data.as_mut().get_mut(range) {
150            Ok(b)
151        } else {
152            bail!("Range within module stream is invalid")
153        }
154    }
155
156    /// Returns a reference to the encoded symbol data for this module.
157    ///
158    /// This _does not_ include the CodeView signature.
159    pub fn sym_data(&self) -> Result<&[u8]> {
160        self.nested_slice(MODULE_SYMBOLS_HEADER_LEN..self.sym_byte_size as usize)
161    }
162
163    /// Returns a mutable reference to the encoded symbol data for this module.
164    ///
165    /// This _does not_ include the CodeView signature.
166    pub fn sym_data_mut(&mut self) -> Result<&mut [u8]>
167    where
168        Data: AsMut<[u8]>,
169    {
170        self.nested_slice_mut(MODULE_SYMBOLS_HEADER_LEN..self.sym_byte_size as usize)
171    }
172
173    /// Returns a reference to the encoded symbol data for this module.
174    ///
175    /// This _does_ include the CodeView signature.
176    pub fn full_sym_data(&self) -> Result<&[u8]> {
177        self.nested_slice(0..self.sym_byte_size as usize)
178    }
179
180    /// Returns a mutable reference to the encoded symbol data for this module.
181    ///
182    /// This _does_ include the CodeView signature.
183    pub fn full_sym_data_mut(&mut self) -> Result<&mut [u8]>
184    where
185        Data: AsMut<[u8]>,
186    {
187        self.nested_slice_mut(0..self.sym_byte_size as usize)
188    }
189
190    /// Returns the byte range of the C13 Line Data within this Module Information Stream.
191    pub fn c13_line_data_range(&self) -> Range<usize> {
192        if self.c13_byte_size == 0 {
193            return 0..0;
194        }
195
196        let start = self.sym_byte_size as usize + self.c11_byte_size as usize;
197        start..start + self.c13_byte_size as usize
198    }
199
200    /// Returns the byte data for the C13 line data.
201    pub fn c13_line_data_bytes(&self) -> &[u8] {
202        if self.c13_byte_size == 0 {
203            return &[];
204        }
205
206        // The range has already been validated.
207        let stream_data: &[u8] = self.stream_data.as_ref();
208        let range = self.c13_line_data_range();
209        &stream_data[range]
210    }
211
212    /// Returns a mutable reference to the byte data for the C13 Line Data.
213    pub fn c13_line_data_bytes_mut(&mut self) -> &mut [u8]
214    where
215        Data: AsMut<[u8]>,
216    {
217        if self.c13_byte_size == 0 {
218            return &mut [];
219        }
220
221        // The range has already been validated.
222        let range = self.c13_line_data_range();
223        let stream_data: &mut [u8] = self.stream_data.as_mut();
224        &mut stream_data[range]
225    }
226
227    /// Returns an object which can decode the C13 Line Data.
228    pub fn c13_line_data(&self) -> crate::lines::LineData<'_> {
229        crate::lines::LineData::new(self.c13_line_data_bytes())
230    }
231
232    /// Returns an object which can decode and modify the C13 Line Data.
233    pub fn c13_line_data_mut(&mut self) -> crate::lines::LineDataMut<'_>
234    where
235        Data: AsMut<[u8]>,
236    {
237        crate::lines::LineDataMut::new(self.c13_line_data_bytes_mut())
238    }
239
240    /// Gets the byte range within the stream data for the global refs
241    pub fn global_refs_range(&self) -> Range<usize> {
242        if self.global_refs_size == 0 {
243            return 0..0;
244        }
245
246        // The Global Refs start after the C13 line data.
247        // This offset was validated in Self::new().
248        // The size_of::<u32>() is for the global_refs_size field itself.
249        let global_refs_offset = self.sym_byte_size as usize
250            + self.c11_byte_size as usize
251            + self.c13_byte_size as usize
252            + size_of::<U32<LE>>();
253        global_refs_offset..global_refs_offset + self.global_refs_size as usize
254    }
255
256    /// Returns a reference to the global refs stored in this Module Stream.
257    ///
258    /// Each value in the returned slice is a byte offset into the Global Symbol Stream of
259    /// a global symbol that this module references.
260    pub fn global_refs(&self) -> Result<&[U32<LE>]> {
261        let range = self.global_refs_range();
262        let stream_data: &[u8] = self.stream_data.as_ref();
263        if let Some(global_refs_bytes) = stream_data.get(range) {
264            if let Ok(global_refs) = FromBytes::ref_from_bytes(global_refs_bytes) {
265                Ok(global_refs)
266            } else {
267                bail!("Invalid size for global refs")
268            }
269        } else {
270            bail!("Invalid range for global refs")
271        }
272    }
273
274    /// Returns a mutable reference to the global refs stored in this Module Stream.
275    pub fn global_refs_mut(&mut self) -> Result<&mut [U32<LE>]>
276    where
277        Data: AsMut<[u8]>,
278    {
279        let range = self.global_refs_range();
280        let stream_data: &mut [u8] = self.stream_data.as_mut();
281        if let Some(global_refs_bytes) = stream_data.get_mut(range) {
282            if let Ok(global_refs) = FromBytes::mut_from_bytes(global_refs_bytes) {
283                Ok(global_refs)
284            } else {
285                bail!("Invalid size for global refs")
286            }
287        } else {
288            bail!("Invalid range for global refs")
289        }
290    }
291}
292
293impl ModiStreamData<Vec<u8>> {
294    /// Replace the symbol data for this module.  `new_sym_data` includes the CodeView signature.
295    pub fn replace_sym_data(&mut self, new_sym_data: &[u8]) {
296        if new_sym_data.len() == self.sym_byte_size as usize {
297            self.stream_data[..new_sym_data.len()].copy_from_slice(new_sym_data);
298        } else {
299            replace_range_copy(
300                &mut self.stream_data,
301                0,
302                self.sym_byte_size as usize,
303                new_sym_data,
304            );
305            self.sym_byte_size = new_sym_data.len() as u32;
306        }
307    }
308
309    /// Remove the Global Refs section, if present.
310    pub fn truncate_global_refs(&mut self) {
311        if self.global_refs_size == 0 {
312            return;
313        }
314
315        let global_refs_offset =
316            self.sym_byte_size as usize + self.c11_byte_size as usize + self.c13_byte_size as usize;
317
318        self.stream_data.truncate(global_refs_offset);
319        self.global_refs_size = 0;
320    }
321}