Skip to main content

ms_pdb/
lines.rs

1//! Decodes line information found in Module Streams.
2//!
3//! # References
4//! * [/ZH (Hash algorithm for calculation of file checksum in debug info)](https://learn.microsoft.com/en-us/cpp/build/reference/zh?view=msvc-170)
5
6mod checksum;
7mod subsection;
8
9pub use checksum::*;
10pub use subsection::*;
11
12use crate::codeview::syms::OffsetSegment;
13use crate::names::NameIndex;
14use anyhow::{Context, bail};
15use ms_codeview::parser::{Parser, ParserError, ParserMut};
16use ms_codeview::{HasRestLen, IteratorWithRangesExt};
17use std::mem::{size_of, take};
18use tracing::{trace, warn};
19use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, LE, U16, U32, Unaligned};
20
21/// Enumerates the kind of subsections found in C13 Line Data.
22///
23/// See `cvinfo.h`, `DEBUG_S_SUBSECTION_TYPE`.
24#[derive(Copy, Clone, Eq, PartialEq)]
25#[repr(transparent)]
26pub struct SubsectionKind(pub u32);
27
28macro_rules! subsections {
29    ($( $(#[$a:meta])*  $name:ident = $value:expr;)*) => {
30        impl SubsectionKind {
31            $(
32                $(#[$a])*
33                #[allow(missing_docs)]
34                pub const $name: SubsectionKind = SubsectionKind($value);
35            )*
36        }
37
38        impl std::fmt::Debug for SubsectionKind {
39            fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
40                let s: &str = match *self {
41                    $( SubsectionKind::$name => stringify!($name), )*
42                    _ => return write!(fmt, "??(0x{:x})", self.0),
43                };
44                fmt.write_str(s)
45            }
46        }
47    }
48}
49
50subsections! {
51    SYMBOLS = 0xf1;
52    /// Contains C13 Line Data
53    LINES = 0xf2;
54    STRING_TABLE = 0xf3;
55    /// Contains file checksums and pointers to file names. For a given module, there should be
56    /// at most one `FILE_CHECKSUMS` subsection.
57    FILE_CHECKSUMS = 0xf4;
58
59    FRAMEDATA = 0xF5;
60    INLINEELINES = 0xF6;
61    CROSSSCOPEIMPORTS = 0xF7;
62    CROSSSCOPEEXPORTS = 0xF8;
63
64    IL_LINES = 0xF9;
65    FUNC_MDTOKEN_MAP = 0xFA;
66    TYPE_MDTOKEN_MAP = 0xFB;
67    MERGED_ASSEMBLYINPUT = 0xFC;
68
69    COFF_SYMBOL_RVA = 0xFD;
70}
71
72/// Enables decoding of the line data stored in a Module Stream. This decodes the "C13 line data"
73/// substream.
74pub struct LineData<'a> {
75    bytes: &'a [u8],
76}
77
78impl<'a> LineData<'a> {
79    /// Use this to create a new decoder for the C13 line data. Usually, you want to pass the
80    /// result of calling `ModiStreamData::c13_line_data_bytes()` to this function.
81    pub fn new(bytes: &'a [u8]) -> Self {
82        Self { bytes }
83    }
84
85    /// Iterates subsections
86    pub fn subsections(&self) -> SubsectionIter<'a> {
87        SubsectionIter::new(self.bytes)
88    }
89
90    /// Finds the `FILE_CHECKSUMS` subsection. There should only be one.
91    pub fn find_checksums_bytes(&self) -> Option<&'a [u8]> {
92        for subsection in self.subsections() {
93            if subsection.kind == SubsectionKind::FILE_CHECKSUMS {
94                return Some(subsection.data);
95            }
96        }
97        None
98    }
99
100    /// Finds the `FILE_CHECKSUMS` subsection. There should only be one.
101    pub fn find_checksums(&self) -> Option<FileChecksumsSubsection<'a>> {
102        let subsection_bytes = self.find_checksums_bytes()?;
103        Some(FileChecksumsSubsection::new(subsection_bytes))
104    }
105
106    /// Iterates the `NameIndex` values that appear in this Line Data section.
107    ///
108    /// This may iterate the same `NameIndex` value more than once.
109    pub fn iter_name_index<F>(&self, mut f: F) -> anyhow::Result<()>
110    where
111        F: FnMut(NameIndex),
112    {
113        if let Some(checksums) = self.find_checksums() {
114            for subsection in self.subsections() {
115                match subsection.kind {
116                    SubsectionKind::LINES => {
117                        let lines_subsection = LinesSubsection::parse(subsection.data)?;
118                        for block in lines_subsection.blocks() {
119                            let file = checksums.get_file(block.header.file_index.get())?;
120                            let ni = file.header.name.get();
121                            f(NameIndex(ni));
122                        }
123                    }
124                    _ => {}
125                }
126            }
127        } else {
128            for subsection in self.subsections() {
129                match subsection.kind {
130                    SubsectionKind::LINES => {
131                        bail!(
132                            "This C13 Line Data substream contains LINES subsections, but does not contain a FILE_CHECKSUMS subsection."
133                        );
134                    }
135                    _ => {}
136                }
137            }
138        };
139
140        Ok(())
141    }
142}
143
144/// Enables decoding of the line data stored in a Module Stream. This decodes the "C13 line data"
145/// substream.
146pub struct LineDataMut<'a> {
147    bytes: &'a mut [u8],
148}
149
150impl<'a> LineDataMut<'a> {
151    /// Initializes a new `LineDataMut`. This does not validate the contents of the data.
152    pub fn new(bytes: &'a mut [u8]) -> Self {
153        Self { bytes }
154    }
155
156    /// Iterates subsections, with mutable access.
157    pub fn subsections_mut(&mut self) -> SubsectionIterMut<'_> {
158        SubsectionIterMut::new(self.bytes)
159    }
160
161    /// Iterates through all of the name indexes stored within this Line Data.
162    /// Remaps all entries using `f` as the remapping function.
163    ///
164    /// `NameIndex` values are found in the `FILE_CHECKSUMS` debug subsections. However, it is not
165    /// possible to directly enumerate the entries stored within a `FILE_CHECKSUMS` subsection,
166    /// because they are not at guaranteed positions. There may be gaps.
167    ///
168    /// To find the `NameIndex` values within each `FILE_CHECKSUMS` debug subsection, we first scan
169    /// the `LINES` subsections that point to them, and use a `HashSet` to avoid modifying the
170    /// same `NameIndex` more than once.
171    pub fn remap_name_indexes<F>(&mut self, name_remapping: F) -> anyhow::Result<()>
172    where
173        F: Fn(NameIndex) -> anyhow::Result<NameIndex>,
174    {
175        for subsection in self.subsections_mut() {
176            match subsection.kind {
177                SubsectionKind::FILE_CHECKSUMS => {
178                    let mut checksums = FileChecksumsSubsectionMut::new(subsection.data);
179                    for checksum in checksums.iter_mut() {
180                        // This `name_offset` value points into the Names stream (/names).
181                        let old_name = NameIndex(checksum.header.name.get());
182                        let new_name = name_remapping(old_name)
183                            .with_context(|| format!("old_name: {old_name}"))?;
184                        checksum.header.name = U32::new(new_name.0);
185                    }
186                }
187
188                _ => {}
189            }
190        }
191
192        Ok(())
193    }
194}
195
196/// Represents one contribution. Each contribution consists of a sequence of variable-length
197/// blocks.
198///
199/// Each `LINES` subsection represents one "contribution", which has a `ContributionHeader`,
200/// followed by a sequence of blocks. Each block is a variable-length record.
201pub struct LinesSubsection<'a> {
202    /// The fixed-size header of the `Lines` subsection.
203    pub contribution: &'a Contribution,
204    /// Contains a sequence of variable-sized "blocks". Each block specifies a source file
205    /// and a set of mappings from instruction offsets to line numbers within that source file.
206    pub blocks_data: &'a [u8],
207}
208
209impl<'a> LinesSubsection<'a> {
210    /// Parses the contribution header and prepares for iteration of blocks.
211    pub fn parse(bytes: &'a [u8]) -> Result<Self, ParserError> {
212        let mut p = Parser::new(bytes);
213        Ok(Self {
214            contribution: p.get()?,
215            blocks_data: p.into_rest(),
216        })
217    }
218
219    /// Iterates through the line number blocks.
220    pub fn blocks(&self) -> IterBlocks<'a> {
221        IterBlocks {
222            bytes: self.blocks_data,
223            have_columns: self.contribution.have_columns(),
224        }
225    }
226}
227
228/// Represents one contribution. Each contribution consists of a sequence of variable-length
229/// blocks.
230///
231/// Each `LINES` subsection represents one "contribution", which has a `ContributionHeader`,
232/// followed by a sequence of blocks. Each block is a variable-length record.
233pub struct LinesSubsectionMut<'a> {
234    /// The fixed-size header of the `Lines` subsection.
235    pub contribution: &'a mut Contribution,
236    /// Contains a sequence of variable-sized "blocks". Each block specifies a source file
237    /// and a set of mappings from instruction offsets to line numbers within that source file.
238    pub blocks_data: &'a mut [u8],
239}
240
241impl<'a> LinesSubsectionMut<'a> {
242    /// Parses the contribution header and prepares for iteration of blocks.
243    pub fn parse(bytes: &'a mut [u8]) -> Result<Self, ParserError> {
244        let mut p = ParserMut::new(bytes);
245        Ok(Self {
246            contribution: p.get_mut()?,
247            blocks_data: p.into_rest(),
248        })
249    }
250
251    /// Iterates through the line number blocks.
252    pub fn blocks(&self) -> IterBlocks<'_> {
253        IterBlocks {
254            bytes: self.blocks_data,
255            have_columns: self.contribution.have_columns(),
256        }
257    }
258
259    /// Iterates through the line number blocks, with mutable access.
260    pub fn blocks_mut(&mut self) -> IterBlocksMut<'_> {
261        IterBlocksMut {
262            bytes: self.blocks_data,
263            have_columns: self.contribution.have_columns(),
264        }
265    }
266}
267
268/// Iterator state for `LinesSubsection::blocks`.
269pub struct IterBlocks<'a> {
270    bytes: &'a [u8],
271    have_columns: bool,
272}
273
274impl<'a> HasRestLen for IterBlocks<'a> {
275    fn rest_len(&self) -> usize {
276        self.bytes.len()
277    }
278}
279
280impl<'a> Iterator for IterBlocks<'a> {
281    type Item = Block<'a>;
282
283    fn next(&mut self) -> Option<Self::Item> {
284        if self.bytes.is_empty() {
285            return None;
286        }
287
288        let mut p = Parser::new(self.bytes);
289        let Ok(header) = p.get::<BlockHeader>() else {
290            warn!("failed to read BlockHeader");
291            return None;
292        };
293
294        let block_size: usize = header.block_size.get() as usize;
295        let Some(data_len) = block_size.checked_sub(size_of::<BlockHeader>()) else {
296            warn!("invalid block; block_size is less than size of block header");
297            return None;
298        };
299
300        trace!(
301            file_index = header.file_index.get(),
302            num_lines = header.num_lines.get(),
303            block_size = header.block_size.get(),
304            data_len,
305            "block header"
306        );
307
308        let Ok(data) = p.bytes(data_len) else {
309            warn!(
310                needed_bytes = data_len,
311                have_bytes = p.len(),
312                "invalid block: need more bytes for block contents"
313            );
314            return None;
315        };
316
317        self.bytes = p.into_rest();
318        Some(Block {
319            header,
320            data,
321            have_columns: self.have_columns,
322        })
323    }
324}
325
326/// Iterator state for `LinesSubsection::blocks`.
327pub struct IterBlocksMut<'a> {
328    bytes: &'a mut [u8],
329    have_columns: bool,
330}
331
332impl<'a> HasRestLen for IterBlocksMut<'a> {
333    fn rest_len(&self) -> usize {
334        self.bytes.len()
335    }
336}
337
338impl<'a> Iterator for IterBlocksMut<'a> {
339    type Item = BlockMut<'a>;
340
341    fn next(&mut self) -> Option<Self::Item> {
342        if self.bytes.is_empty() {
343            return None;
344        }
345
346        let mut p = ParserMut::new(take(&mut self.bytes));
347        let Ok(header) = p.get_mut::<BlockHeader>() else {
348            warn!("failed to read BlockHeader");
349            return None;
350        };
351
352        let block_size: usize = header.block_size.get() as usize;
353        let Some(data_len) = block_size.checked_sub(size_of::<BlockHeader>()) else {
354            warn!("invalid block; block_size is less than size of block header");
355            return None;
356        };
357
358        trace!(
359            "block header: file_index = {}, num_lines = {}, block_size = {}, data_len = {}",
360            header.file_index.get(),
361            header.num_lines.get(),
362            header.block_size.get(),
363            data_len
364        );
365
366        let Ok(data) = p.bytes_mut(data_len) else {
367            warn!(
368                "invalid block: need {} bytes for block contents, only have {}",
369                data_len,
370                p.len()
371            );
372            return None;
373        };
374
375        self.bytes = p.into_rest();
376        Some(BlockMut {
377            header,
378            data,
379            have_columns: self.have_columns,
380        })
381    }
382}
383
384/// One block of line data. Each block has a header which points to a source file. All of the line
385/// locations within the block point to line numbers (and potentially column numbers) within that
386/// source file.
387pub struct Block<'a> {
388    /// Fixed-size header for the block.
389    pub header: &'a BlockHeader,
390    /// If `true`, then this block has column numbers as well as line numbers.
391    pub have_columns: bool,
392    /// Contains the encoded line numbers, followed by column numbers. The number of entries is
393    /// specified by `header.num_lines`.
394    pub data: &'a [u8],
395}
396
397impl<'a> Block<'a> {
398    /// Gets the line records for this block.
399    pub fn lines(&self) -> &'a [LineRecord] {
400        let num_lines = self.header.num_lines.get() as usize;
401        if let Ok((lines, _)) = <[LineRecord]>::ref_from_prefix_with_elems(self.data, num_lines) {
402            lines
403        } else {
404            warn!("failed to get lines_data for a block; wrong size");
405            &[]
406        }
407    }
408
409    /// Gets the column records for this block, if it has any.
410    pub fn columns(&self) -> Option<&'a [ColumnRecord]> {
411        if !self.have_columns {
412            return None;
413        }
414
415        let num_lines = self.header.num_lines.get() as usize;
416        let lines_size = num_lines * size_of::<LineRecord>();
417        let Some(column_data) = self.data.get(lines_size..) else {
418            warn!("failed to get column data for a block; wrong size");
419            return None;
420        };
421
422        let Ok((columns, _)) = <[ColumnRecord]>::ref_from_prefix_with_elems(column_data, num_lines)
423        else {
424            warn!("failed to get column data for a block; byte size is wrong");
425            return None;
426        };
427
428        Some(columns)
429    }
430}
431
432/// One block of line data. Each block has a header which points to a source file. All of the line
433/// locations within the block point to line numbers (and potentially column numbers) within that
434/// source file.
435pub struct BlockMut<'a> {
436    /// Fixed-size header for the block.
437    pub header: &'a mut BlockHeader,
438    /// If `true`, then this block has column numbers as well as line numbers.
439    pub have_columns: bool,
440    /// Contains the encoded line numbers, followed by column numbers. The number of entries is
441    /// specified by `header.num_lines`.
442    pub data: &'a mut [u8],
443}
444
445/// A single line record
446///
447/// See `CV_Line_t` in `cvinfo.h`
448#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Clone)]
449#[repr(C)]
450pub struct LineRecord {
451    /// The byte offset from the start of this contribution (in the instruction stream, not the
452    /// Lines Data) for this line
453    pub offset: U32<LE>,
454
455    /// Encodes three bit-fields
456    ///
457    /// * Bits 0-23 are `line_num_start`. This is the 1-based starting line number within the source
458    ///   file of this line record.
459    /// * Bits 24-30 are `delta_line_end`. It specifies a value to add to line_num_start to find the
460    ///   ending line. If this value is zero, then this line record encodes only a single line, not
461    ///   a span of lines.
462    /// * Bit 31 is the `statement` bit field. If set to 1, it indicates that this line record describes a statement.
463    pub flags: U32<LE>,
464}
465
466impl LineRecord {
467    /// The line number of this location. This value is 1-based.
468    pub fn line_num_start(&self) -> u32 {
469        self.flags.get() & 0x00_ff_ff_ff
470    }
471
472    /// If non-zero, then this indicates the delta in bytes within the source file from the start
473    /// of the source location to the end of the source location.
474    pub fn delta_line_end(&self) -> u8 {
475        ((self.flags.get() >> 24) & 0x7f) as u8
476    }
477
478    /// True if this location points to a statement.
479    pub fn statement(&self) -> bool {
480        (self.flags.get() >> 31) != 0
481    }
482}
483
484impl std::fmt::Debug for LineRecord {
485    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
486        write!(fmt, "+{} L{}", self.offset.get(), self.line_num_start())?;
487
488        let delta_line_end = self.delta_line_end();
489        if delta_line_end != 0 {
490            write!(fmt, "..+{delta_line_end}")?;
491        }
492
493        if self.statement() {
494            write!(fmt, " S")?;
495        }
496
497        Ok(())
498    }
499}
500
501/// A single column record
502#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
503#[repr(C)]
504pub struct ColumnRecord {
505    /// byte offset in a source line
506    pub start_offset: U16<LE>,
507    /// byte offset in a source line
508    pub end_offset: U16<LE>,
509}
510
511#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
512#[repr(C)]
513#[allow(missing_docs)]
514pub struct Contribution {
515    pub offset: U32<LE>,
516    pub segment: U16<LE>,
517    pub flags: U16<LE>,
518    pub size: U32<LE>,
519    // Followed by a sequence of block records. Each block is variable-length and begins with
520    // BlockHeader.
521}
522
523impl Contribution {
524    /// Indicates whether this block (contribution) also has column numbers.
525    pub fn have_columns(&self) -> bool {
526        (self.flags.get() & CV_LINES_HAVE_COLUMNS) != 0
527    }
528
529    /// Get the `segment:offset` of this contribution.
530    pub fn offset_segment(&self) -> OffsetSegment {
531        OffsetSegment {
532            offset: self.offset,
533            segment: self.segment,
534        }
535    }
536}
537
538/// Bit flag for `Contribution::flags` field
539pub const CV_LINES_HAVE_COLUMNS: u16 = 0x0001;
540
541#[allow(missing_docs)]
542pub struct LinesEntry<'a> {
543    pub header: &'a Contribution,
544    pub blocks: &'a [u8],
545}
546
547/// Header for a variable-length Block record.
548///
549/// Each block contains a sequence of line records, and optionally column records.
550#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
551#[repr(C)]
552pub struct BlockHeader {
553    /// The byte offset into the file checksums subsection for this file.
554    pub file_index: U32<LE>,
555    /// The number of `LineRecord` entries that immediately follow this structure. Also, if the
556    /// contribution header indicates that the contribution has column values, this specifies
557    /// the number of column records that follow the file records.
558    pub num_lines: U32<LE>,
559    /// Size of the data for this block. This value includes the size of the block header itself,
560    /// so the minimum value value is 12.
561    pub block_size: U32<LE>,
562    // Followed by [u8; block_size - 12]. This data contains [LineRecord; num_lines], optionally
563    // followed by [ColumnRecord; num_lines].
564}
565
566/// Updates a C13 Line Data substream after NameIndex values have been updated and after
567/// file lists for a given module have been rearranged (sorted).
568pub fn fixup_c13_line_data(
569    file_permutation: &[u32], // maps new-->old for files within a module
570    sorted_names: &crate::names::NameIndexMapping,
571    c13_line_data: &mut crate::lines::LineDataMut<'_>,
572) -> anyhow::Result<()> {
573    // maps old --> new, for the file_index values in DEBUG_S_LINES blocks
574    let mut checksum_files_mapping: Vec<(u32, u32)> = Vec::with_capacity(file_permutation.len());
575
576    for subsection in c13_line_data.subsections_mut() {
577        match subsection.kind {
578            SubsectionKind::FILE_CHECKSUMS => {
579                let mut checksums = FileChecksumsSubsectionMut::new(subsection.data);
580                let mut checksum_ranges = Vec::with_capacity(file_permutation.len());
581                for (checksum_range, checksum) in checksums.iter_mut().with_ranges() {
582                    // This `name_offset` value points into the Names stream (/names).
583                    let old_name = NameIndex(checksum.header.name.get());
584                    let new_name = sorted_names
585                        .map_old_to_new(old_name)
586                        .with_context(|| format!("old_name: {old_name}"))?;
587                    checksum.header.name = U32::new(new_name.0);
588                    checksum_ranges.push(checksum_range);
589                }
590
591                // Next, we are going to rearrange the FileChecksum records within this
592                // section, using the permutation that was generated in dbi::sources::sort_sources().
593
594                let mut new_checksums: Vec<u8> = Vec::with_capacity(subsection.data.len());
595                for &old_file_index in file_permutation.iter() {
596                    let old_range = checksum_ranges[old_file_index as usize].clone();
597                    checksum_files_mapping
598                        .push((old_range.start as u32, new_checksums.len() as u32));
599                    let old_checksum_data = &subsection.data[old_range];
600                    new_checksums.extend_from_slice(old_checksum_data);
601                }
602                checksum_files_mapping.sort_unstable();
603
604                assert_eq!(new_checksums.len(), subsection.data.len());
605                subsection.data.copy_from_slice(&new_checksums);
606            }
607
608            _ => {}
609        }
610    }
611
612    // There is a data-flow dependency (on checksum_files_mapping) between these two loops; the
613    // loops cannot be combined. The first loop builds checksum_files_mapping; the second loop
614    // reads from it.
615
616    for subsection in c13_line_data.subsections_mut() {
617        match subsection.kind {
618            SubsectionKind::LINES => {
619                // We need to rewrite the file_index values within each line block.
620                let mut lines = LinesSubsectionMut::parse(subsection.data)?;
621                for block in lines.blocks_mut() {
622                    let old_file_index = block.header.file_index.get();
623                    match checksum_files_mapping
624                        .binary_search_by_key(&old_file_index, |&(old, _new)| old)
625                    {
626                        Ok(i) => {
627                            let (_old, new) = checksum_files_mapping[i];
628                            block.header.file_index = U32::new(new);
629                        }
630                        Err(_) => {
631                            bail!(
632                                "DEBUG_S_LINES section contains invalid file index: {old_file_index}"
633                            );
634                        }
635                    }
636                }
637            }
638
639            _ => {}
640        }
641    }
642
643    Ok(())
644}
645
646/// This special line number is part of the "Just My Code" MSVC compiler feature.
647///
648/// Debuggers that implement the "Just My Code" feature look for this constant when handling
649/// "Step Into" requests. If the user asks to "step into" a function call, the debugger will look
650/// up the line number of the start of the function. If the line number is `JMC_LINE_NO_STEP_INTO`,
651/// then the debugger will _not_ step into the function. Instead, it will step over it.
652///
653/// This is useful for implementations of standard library functions, like
654/// `std::vector<T>::size()`. Often calls to such functions are embedded in complex statements,
655/// and the user wants to debug other parts of the complex statement, not the `size()` call.
656///
657/// # References
658/// * <https://learn.microsoft.com/en-us/cpp/build/reference/jmc?view=msvc-170>
659/// * <https://learn.microsoft.com/en-us/visualstudio/debugger/just-my-code>
660pub const JMC_LINE_NO_STEP_INTO: u32 = 0xf00f00;
661
662/// This special line number is part of the "Just My Code" MSVC compiler feature.
663pub const JMC_LINE_FEE_FEE: u32 = 0xfeefee;
664
665/// Returns true if `line` is a number that is used by the "Just My Code" MSVC compiler feature.
666pub fn is_jmc_line(line: u32) -> bool {
667    line == JMC_LINE_NO_STEP_INTO || line == JMC_LINE_FEE_FEE
668}