Skip to main content

ms_pdb/
modi.rs

1//! Reads data from Module Info (`modi`) streams.
2//!
3//! # References
4//! * <https://llvm.org/docs/PDB/ModiStream.html>
5//! * [`MODI_60_Persist` in `dbi.h`]
6
7use crate::StreamData;
8use crate::dbi::ModuleInfoFixed;
9use crate::utils::vec::replace_range_copy;
10use crate::{dbi::ModuleInfo, syms::SymIter};
11use anyhow::{Result, anyhow, bail};
12use ms_codeview::parser::Parser;
13use std::mem::size_of;
14use std::ops::Range;
15use sync_file::ReadAt;
16use tracing::{debug, warn};
17use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, LE, U32, Unaligned};
18
19/// The Module Symbols substream begins with this header. It is located at stream offset 0 in the
20/// Module Stream.
21#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
22#[repr(C)]
23pub struct ModuleSymbolsHeader {
24    /// Indicates the version of the module symbol stream. Use the `CV_SIGNATURE_*` constants.
25    /// The expected value is `CV_SIGNATURE_C13`.
26    pub signature: U32<LE>,
27}
28
29const MODULE_SYMBOLS_HEADER_LEN: usize = 4;
30static_assertions::const_assert_eq!(size_of::<ModuleSymbolsHeader>(), MODULE_SYMBOLS_HEADER_LEN);
31
32/// Actual signature is >64K
33pub const CV_SIGNATURE_C6: u32 = 0;
34/// First explicit signature
35pub const CV_SIGNATURE_C7: u32 = 1;
36/// C11 (vc5.x) 32-bit types
37pub const CV_SIGNATURE_C11: u32 = 2;
38/// C13 (vc7.x) zero terminated names
39pub const CV_SIGNATURE_C13: u32 = 4;
40/// All signatures from 5 to 64K are reserved
41pub const CV_SIGNATURE_RESERVED: u32 = 5;
42
43impl<F: ReadAt> crate::Pdb<F> {
44    /// Reads a Module Info stream. The caller must provide a [`ModuleInfo`] structure, which comes
45    /// from the DBI Stream.  Use [`crate::dbi::read_dbi_stream`] to enumerate [`ModuleInfo`] values.
46    ///
47    /// If the Module Info record has a NIL stream, then this function returns `Ok(None)`.
48    pub fn read_module_stream(
49        &self,
50        mod_info: &ModuleInfo,
51    ) -> Result<Option<ModiStreamData<StreamData>>, anyhow::Error> {
52        let Some(stream) = mod_info.stream() else {
53            return Ok(None);
54        };
55
56        let stream_data = self.read_stream(stream)?;
57        Ok(Some(ModiStreamData::new(stream_data, mod_info.header())?))
58    }
59
60    /// Reads the symbol data for a specific module.
61    ///
62    /// The returned buffer will contain a 4-byte header. The caller can use `SymIter::for_module_syms`
63    /// on the returned buffer.
64    ///
65    /// If the given module does not have a module stream, then this function will return `Ok`
66    /// with a zero-length buffer, so callers should be prepared to deal with the presence
67    /// _or absence_ of the 4-byte header.
68    pub fn read_module_symbols(&self, module: &ModuleInfo) -> Result<Vec<u32>> {
69        let len_u32 = module.sym_size() as usize / 4;
70        if len_u32 < 4 {
71            return Ok(Vec::new());
72        }
73
74        let Some(module_stream) = module.stream() else {
75            return Ok(Vec::new());
76        };
77
78        let mut syms: Vec<u32> = vec![0; len_u32];
79        let sr = self.get_stream_reader(module_stream)?;
80        sr.read_exact_at(syms.as_mut_bytes(), 0)?;
81        Ok(syms)
82    }
83}
84
85/// Contains the stream data for a Module Info stream.
86#[allow(missing_docs)]
87pub struct ModiStreamData<Data> {
88    /// The contents of the stream.
89    pub stream_data: Data,
90    pub sym_byte_size: u32,
91    pub c11_byte_size: u32,
92    pub c13_byte_size: u32,
93    pub global_refs_size: u32,
94}
95
96impl<Data: AsRef<[u8]>> ModiStreamData<Data> {
97    /// Initializes a new `ModiStreamData`. This validates the byte sizes of the substreams,
98    /// which are specified in the [`ModuleInfo`] structure, not within the Module Stream itself.
99    pub fn new(stream_data: Data, module: &ModuleInfoFixed) -> anyhow::Result<Self> {
100        let stream_bytes: &[u8] = stream_data.as_ref();
101
102        // Validate the byte sizes against the size of the stream data.
103        let sym_byte_size = module.sym_byte_size.get();
104        let c11_byte_size = module.c11_byte_size.get();
105        let c13_byte_size = module.c13_byte_size.get();
106
107        let mut p = Parser::new(stream_bytes);
108
109        p.skip(sym_byte_size as usize).map_err(|_| {
110            anyhow!("Module info has a sym_byte_size that exceeds the size of the stream.")
111        })?;
112        p.skip(c11_byte_size as usize).map_err(|_| {
113            anyhow!("Module info has a c11_byte_size that exceeds the size of the stream.")
114        })?;
115        p.skip(c13_byte_size as usize).map_err(|_| {
116            anyhow!("Module info has a c13_byte_size that exceeds the size of the stream.")
117        })?;
118
119        let mut global_refs_size;
120        if !p.is_empty() {
121            global_refs_size = p
122                .u32()
123                .map_err(|_| anyhow!("Failed to decode global_refs_size. There are {} bytes after the module symbols substream.", p.len()))?;
124
125            if global_refs_size == 0xffff_ffff {
126                warn!("Module has global_refs_size = 0xffff_ffff");
127                global_refs_size = 0;
128            } else {
129                p.skip(global_refs_size as usize)
130                .map_err(|_| anyhow!("Failed to decode global_refs substream. global_refs_size = 0x{:x}, but there are only 0x{:x} bytes left.",
131                global_refs_size,
132                p.len()
133            ))?;
134            }
135
136            if !p.is_empty() {
137                debug!(stream_len = p.len(), "Module stream has extra bytes at end");
138            }
139        } else {
140            global_refs_size = 0;
141        }
142
143        Ok(Self {
144            stream_data,
145            sym_byte_size,
146            c11_byte_size,
147            c13_byte_size,
148            global_refs_size,
149        })
150    }
151
152    /// Returns an iterator for the symbol data for this module.
153    pub fn iter_syms(&self) -> SymIter<'_> {
154        if let Ok(sym_data) = self.sym_data() {
155            SymIter::new(sym_data)
156        } else {
157            SymIter::new(&[])
158        }
159    }
160
161    fn nested_slice(&self, range: Range<usize>) -> Result<&[u8]> {
162        if let Some(b) = self.stream_data.as_ref().get(range) {
163            Ok(b)
164        } else {
165            bail!("Range within module stream is invalid")
166        }
167    }
168
169    fn nested_slice_mut(&mut self, range: Range<usize>) -> Result<&mut [u8]>
170    where
171        Data: AsMut<[u8]>,
172    {
173        if let Some(b) = self.stream_data.as_mut().get_mut(range) {
174            Ok(b)
175        } else {
176            bail!("Range within module stream is invalid")
177        }
178    }
179
180    /// Returns a reference to the encoded symbol data for this module.
181    ///
182    /// This _does not_ include the CodeView signature.
183    pub fn sym_data(&self) -> Result<&[u8]> {
184        self.nested_slice(MODULE_SYMBOLS_HEADER_LEN..self.sym_byte_size as usize)
185    }
186
187    /// Returns a mutable reference to the encoded symbol data for this module.
188    ///
189    /// This _does not_ include the CodeView signature.
190    pub fn sym_data_mut(&mut self) -> Result<&mut [u8]>
191    where
192        Data: AsMut<[u8]>,
193    {
194        self.nested_slice_mut(MODULE_SYMBOLS_HEADER_LEN..self.sym_byte_size as usize)
195    }
196
197    /// Returns a reference to the encoded symbol data for this module.
198    ///
199    /// This _does_ include the CodeView signature.
200    pub fn full_sym_data(&self) -> Result<&[u8]> {
201        self.nested_slice(0..self.sym_byte_size as usize)
202    }
203
204    /// Returns a mutable reference to the encoded symbol data for this module.
205    ///
206    /// This _does_ include the CodeView signature.
207    pub fn full_sym_data_mut(&mut self) -> Result<&mut [u8]>
208    where
209        Data: AsMut<[u8]>,
210    {
211        self.nested_slice_mut(0..self.sym_byte_size as usize)
212    }
213
214    /// Returns the byte range of the C13 Line Data within this Module Information Stream.
215    pub fn c13_line_data_range(&self) -> Range<usize> {
216        if self.c13_byte_size == 0 {
217            return 0..0;
218        }
219
220        let start = self.sym_byte_size as usize + self.c11_byte_size as usize;
221        start..start + self.c13_byte_size as usize
222    }
223
224    /// Returns the byte data for the C13 line data.
225    pub fn c13_line_data_bytes(&self) -> &[u8] {
226        if self.c13_byte_size == 0 {
227            return &[];
228        }
229
230        // The range has already been validated.
231        let stream_data: &[u8] = self.stream_data.as_ref();
232        let range = self.c13_line_data_range();
233        &stream_data[range]
234    }
235
236    /// Returns a mutable reference to the byte data for the C13 Line Data.
237    pub fn c13_line_data_bytes_mut(&mut self) -> &mut [u8]
238    where
239        Data: AsMut<[u8]>,
240    {
241        if self.c13_byte_size == 0 {
242            return &mut [];
243        }
244
245        // The range has already been validated.
246        let range = self.c13_line_data_range();
247        let stream_data: &mut [u8] = self.stream_data.as_mut();
248        &mut stream_data[range]
249    }
250
251    /// Returns an object which can decode the C13 Line Data.
252    pub fn c13_line_data(&self) -> crate::lines::LineData<'_> {
253        crate::lines::LineData::new(self.c13_line_data_bytes())
254    }
255
256    /// Returns an object which can decode and modify the C13 Line Data.
257    pub fn c13_line_data_mut(&mut self) -> crate::lines::LineDataMut<'_>
258    where
259        Data: AsMut<[u8]>,
260    {
261        crate::lines::LineDataMut::new(self.c13_line_data_bytes_mut())
262    }
263
264    /// Gets the byte range within the stream data for the global refs
265    pub fn global_refs_range(&self) -> Range<usize> {
266        if self.global_refs_size == 0 {
267            return 0..0;
268        }
269
270        // The Global Refs start after the C13 line data.
271        // This offset was validated in Self::new().
272        // The size_of::<u32>() is for the global_refs_size field itself.
273        let global_refs_offset = self.sym_byte_size as usize
274            + self.c11_byte_size as usize
275            + self.c13_byte_size as usize
276            + size_of::<U32<LE>>();
277        global_refs_offset..global_refs_offset + self.global_refs_size as usize
278    }
279
280    /// Returns a reference to the global refs stored in this Module Stream.
281    ///
282    /// Each value in the returned slice is a byte offset into the Global Symbol Stream of
283    /// a global symbol that this module references.
284    pub fn global_refs(&self) -> Result<&[U32<LE>]> {
285        let range = self.global_refs_range();
286        let stream_data: &[u8] = self.stream_data.as_ref();
287        if let Some(global_refs_bytes) = stream_data.get(range) {
288            if let Ok(global_refs) = FromBytes::ref_from_bytes(global_refs_bytes) {
289                Ok(global_refs)
290            } else {
291                bail!("Invalid size for global refs")
292            }
293        } else {
294            bail!("Invalid range for global refs")
295        }
296    }
297
298    /// Returns a mutable reference to the global refs stored in this Module Stream.
299    pub fn global_refs_mut(&mut self) -> Result<&mut [U32<LE>]>
300    where
301        Data: AsMut<[u8]>,
302    {
303        let range = self.global_refs_range();
304        let stream_data: &mut [u8] = self.stream_data.as_mut();
305        if let Some(global_refs_bytes) = stream_data.get_mut(range) {
306            if let Ok(global_refs) = FromBytes::mut_from_bytes(global_refs_bytes) {
307                Ok(global_refs)
308            } else {
309                bail!("Invalid size for global refs")
310            }
311        } else {
312            bail!("Invalid range for global refs")
313        }
314    }
315}
316
317impl ModiStreamData<Vec<u8>> {
318    /// Replace the symbol data for this module.  `new_sym_data` includes the CodeView signature.
319    pub fn replace_sym_data(&mut self, new_sym_data: &[u8]) {
320        if new_sym_data.len() == self.sym_byte_size as usize {
321            self.stream_data[..new_sym_data.len()].copy_from_slice(new_sym_data);
322        } else {
323            replace_range_copy(
324                &mut self.stream_data,
325                0,
326                self.sym_byte_size as usize,
327                new_sym_data,
328            );
329            self.sym_byte_size = new_sym_data.len() as u32;
330        }
331    }
332
333    /// Remove the Global Refs section, if present.
334    pub fn truncate_global_refs(&mut self) {
335        if self.global_refs_size == 0 {
336            return;
337        }
338
339        let global_refs_offset =
340            self.sym_byte_size as usize + self.c11_byte_size as usize + self.c13_byte_size as usize;
341
342        self.stream_data.truncate(global_refs_offset);
343        self.global_refs_size = 0;
344    }
345}