ms_pdb/
lines.rs

1//! Decodes line information found in Module Streams.
2//!
3//! # References
4//! * [/ZH (Hash algorithm for calculation of file checksum in debug info)](https://learn.microsoft.com/en-us/cpp/build/reference/zh?view=msvc-170)
5
6mod checksum;
7mod subsection;
8
9pub use checksum::*;
10pub use subsection::*;
11
12use crate::names::NameIndex;
13use crate::parser::{Parser, ParserError, ParserMut};
14use crate::utils::iter::{HasRestLen, IteratorWithRangesExt};
15use anyhow::{bail, Context};
16use std::mem::{size_of, take};
17use tracing::{trace, warn};
18use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, LE, U16, U32};
19
20/// Enumerates the kind of subsections found in C13 Line Data.
21///
22/// See `cvinfo.h`, `DEBUG_S_SUBSECTION_TYPE`.
23#[derive(Copy, Clone, Eq, PartialEq)]
24#[repr(transparent)]
25pub struct SubsectionKind(pub u32);
26
27macro_rules! subsections {
28    ($( $(#[$a:meta])*  $name:ident = $value:expr;)*) => {
29        impl SubsectionKind {
30            $(
31                $(#[$a])*
32                #[allow(missing_docs)]
33                pub const $name: SubsectionKind = SubsectionKind($value);
34            )*
35        }
36
37        impl std::fmt::Debug for SubsectionKind {
38            fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
39                let s: &str = match *self {
40                    $( SubsectionKind::$name => stringify!($name), )*
41                    _ => return write!(fmt, "??(0x{:x})", self.0),
42                };
43                fmt.write_str(s)
44            }
45        }
46    }
47}
48
49subsections! {
50    SYMBOLS = 0xf1;
51    /// Contains C13 Line Data
52    LINES = 0xf2;
53    STRING_TABLE = 0xf3;
54    /// Contains file checksums and pointers to file names. For a given module, there should be
55    /// at most one `FILE_CHECKSUMS` subsection.
56    FILE_CHECKSUMS = 0xf4;
57
58    FRAMEDATA = 0xF5;
59    INLINEELINES = 0xF6;
60    CROSSSCOPEIMPORTS = 0xF7;
61    CROSSSCOPEEXPORTS = 0xF8;
62
63    IL_LINES = 0xF9;
64    FUNC_MDTOKEN_MAP = 0xFA;
65    TYPE_MDTOKEN_MAP = 0xFB;
66    MERGED_ASSEMBLYINPUT = 0xFC;
67
68    COFF_SYMBOL_RVA = 0xFD;
69}
70
71/// Enables decoding of the line data stored in a Module Stream. This decodes the "C13 line data"
72/// substream.
73pub struct LineData<'a> {
74    bytes: &'a [u8],
75}
76
77impl<'a> LineData<'a> {
78    /// Use this to create a new decoder for the C13 line data. Usually, you want to pass the
79    /// result of calling `ModiStreamData::c13_line_data_bytes()` to this function.
80    pub fn new(bytes: &'a [u8]) -> Self {
81        Self { bytes }
82    }
83
84    /// Iterates subsections
85    pub fn subsections(&self) -> SubsectionIter<'a> {
86        SubsectionIter::new(self.bytes)
87    }
88
89    /// Finds the `FILE_CHECKSUMS` subsection. There should only be one.
90    pub fn find_checksums_bytes(&self) -> Option<&'a [u8]> {
91        for subsection in self.subsections() {
92            if subsection.kind == SubsectionKind::FILE_CHECKSUMS {
93                return Some(subsection.data);
94            }
95        }
96        None
97    }
98
99    /// Finds the `FILE_CHECKSUMS` subsection. There should only be one.
100    pub fn find_checksums(&self) -> Option<FileChecksumsSubsection<'a>> {
101        let subsection_bytes = self.find_checksums_bytes()?;
102        Some(FileChecksumsSubsection::new(subsection_bytes))
103    }
104
105    /// Iterates the `NameIndex` values that appear in this Line Data section.
106    ///
107    /// This may iterate the same `NameIndex` value more than once.
108    pub fn iter_name_index<F>(&self, mut f: F) -> anyhow::Result<()>
109    where
110        F: FnMut(NameIndex),
111    {
112        if let Some(checksums) = self.find_checksums() {
113            for subsection in self.subsections() {
114                match subsection.kind {
115                    SubsectionKind::LINES => {
116                        let lines_subsection = LinesSubsection::parse(subsection.data)?;
117                        for block in lines_subsection.blocks() {
118                            let file = checksums.get_file(block.header.file_index.get())?;
119                            let ni = file.header.name.get();
120                            f(NameIndex(ni));
121                        }
122                    }
123                    _ => {}
124                }
125            }
126        } else {
127            for subsection in self.subsections() {
128                match subsection.kind {
129                    SubsectionKind::LINES => {
130                        bail!("This C13 Line Data substream contains LINES subsections, but does not contain a FILE_CHECKSUMS subsection.");
131                    }
132                    _ => {}
133                }
134            }
135        };
136
137        Ok(())
138    }
139}
140
141/// Enables decoding of the line data stored in a Module Stream. This decodes the "C13 line data"
142/// substream.
143pub struct LineDataMut<'a> {
144    bytes: &'a mut [u8],
145}
146
147impl<'a> LineDataMut<'a> {
148    /// Initializes a new `LineDataMut`. This does not validate the contents of the data.
149    pub fn new(bytes: &'a mut [u8]) -> Self {
150        Self { bytes }
151    }
152
153    /// Iterates subsections, with mutable access.
154    pub fn subsections_mut(&mut self) -> SubsectionIterMut<'_> {
155        SubsectionIterMut::new(self.bytes)
156    }
157
158    /// Iterates through all of the name indexes stored within this Line Data.
159    /// Remaps all entries using `f` as the remapping function.
160    ///
161    /// `NameIndex` values are found in the `FILE_CHECKSUMS` debug subsections. However, it is not
162    /// possible to directly enumerate the entries stored within a `FILE_CHECKSUMS` subsection,
163    /// because they are not at guaranteed positions. There may be gaps.
164    ///
165    /// To find the `NameIndex` values within each `FILE_CHECKSUMS` debug subsection, we first scan
166    /// the `LINES` subsections that point to them, and use a `HashSet` to avoid modifying the
167    /// same `NameIndex` more than once.
168    pub fn remap_name_indexes<F>(&mut self, name_remapping: F) -> anyhow::Result<()>
169    where
170        F: Fn(NameIndex) -> anyhow::Result<NameIndex>,
171    {
172        for subsection in self.subsections_mut() {
173            match subsection.kind {
174                SubsectionKind::FILE_CHECKSUMS => {
175                    let mut checksums = FileChecksumsSubsectionMut::new(subsection.data);
176                    for checksum in checksums.iter_mut() {
177                        // This `name_offset` value points into the Names stream (/names).
178                        let old_name = NameIndex(checksum.header.name.get());
179                        let new_name = name_remapping(old_name)
180                            .with_context(|| format!("old_name: {old_name}"))?;
181                        checksum.header.name = U32::new(new_name.0);
182                    }
183                }
184
185                _ => {}
186            }
187        }
188
189        Ok(())
190    }
191}
192
193/// Represents one contribution. Each contribution consists of a sequence of variable-length
194/// blocks.
195///
196/// Each `LINES` subsection represents one "contribution", which has a `ContributionHeader`,
197/// followed by a sequence of blocks. Each block is a variable-length record.
198pub struct LinesSubsection<'a> {
199    /// The fixed-size header of the `Lines` subsection.
200    pub contribution: &'a Contribution,
201    /// Contains a sequence of variable-sized "blocks". Each block specifies a source file
202    /// and a set of mappings from instruction offsets to line numbers within that source file.
203    pub blocks_data: &'a [u8],
204}
205
206impl<'a> LinesSubsection<'a> {
207    /// Parses the contribution header and prepares for iteration of blocks.
208    pub fn parse(bytes: &'a [u8]) -> Result<Self, ParserError> {
209        let mut p = Parser::new(bytes);
210        Ok(Self {
211            contribution: p.get()?,
212            blocks_data: p.into_rest(),
213        })
214    }
215
216    /// Iterates through the line number blocks.
217    pub fn blocks(&self) -> IterBlocks<'a> {
218        IterBlocks {
219            bytes: self.blocks_data,
220            have_columns: self.contribution.have_columns(),
221        }
222    }
223}
224
225/// Represents one contribution. Each contribution consists of a sequence of variable-length
226/// blocks.
227///
228/// Each `LINES` subsection represents one "contribution", which has a `ContributionHeader`,
229/// followed by a sequence of blocks. Each block is a variable-length record.
230pub struct LinesSubsectionMut<'a> {
231    /// The fixed-size header of the `Lines` subsection.
232    pub contribution: &'a mut Contribution,
233    /// Contains a sequence of variable-sized "blocks". Each block specifies a source file
234    /// and a set of mappings from instruction offsets to line numbers within that source file.
235    pub blocks_data: &'a mut [u8],
236}
237
238impl<'a> LinesSubsectionMut<'a> {
239    /// Parses the contribution header and prepares for iteration of blocks.
240    pub fn parse(bytes: &'a mut [u8]) -> Result<Self, ParserError> {
241        let mut p = ParserMut::new(bytes);
242        Ok(Self {
243            contribution: p.get_mut()?,
244            blocks_data: p.into_rest(),
245        })
246    }
247
248    /// Iterates through the line number blocks.
249    pub fn blocks(&self) -> IterBlocks<'_> {
250        IterBlocks {
251            bytes: self.blocks_data,
252            have_columns: self.contribution.have_columns(),
253        }
254    }
255
256    /// Iterates through the line number blocks, with mutable access.
257    pub fn blocks_mut(&mut self) -> IterBlocksMut<'_> {
258        IterBlocksMut {
259            bytes: self.blocks_data,
260            have_columns: self.contribution.have_columns(),
261        }
262    }
263}
264
265/// Iterator state for `LinesSubsection::blocks`.
266pub struct IterBlocks<'a> {
267    bytes: &'a [u8],
268    have_columns: bool,
269}
270
271impl<'a> HasRestLen for IterBlocks<'a> {
272    fn rest_len(&self) -> usize {
273        self.bytes.len()
274    }
275}
276
277impl<'a> Iterator for IterBlocks<'a> {
278    type Item = Block<'a>;
279
280    fn next(&mut self) -> Option<Self::Item> {
281        if self.bytes.is_empty() {
282            return None;
283        }
284
285        let mut p = Parser::new(self.bytes);
286        let Ok(header) = p.get::<BlockHeader>() else {
287            warn!("failed to read BlockHeader");
288            return None;
289        };
290
291        let block_size: usize = header.block_size.get() as usize;
292        let Some(data_len) = block_size.checked_sub(size_of::<BlockHeader>()) else {
293            warn!("invalid block; block_size is less than size of block header");
294            return None;
295        };
296
297        trace!(
298            file_index = header.file_index.get(),
299            num_lines = header.num_lines.get(),
300            block_size = header.block_size.get(),
301            data_len,
302            "block header"
303        );
304
305        let Ok(data) = p.bytes(data_len) else {
306            warn!(
307                needed_bytes = data_len,
308                have_bytes = p.len(),
309                "invalid block: need more bytes for block contents"
310            );
311            return None;
312        };
313
314        self.bytes = p.into_rest();
315        Some(Block {
316            header,
317            data,
318            have_columns: self.have_columns,
319        })
320    }
321}
322
323/// Iterator state for `LinesSubsection::blocks`.
324pub struct IterBlocksMut<'a> {
325    bytes: &'a mut [u8],
326    have_columns: bool,
327}
328
329impl<'a> HasRestLen for IterBlocksMut<'a> {
330    fn rest_len(&self) -> usize {
331        self.bytes.len()
332    }
333}
334
335impl<'a> Iterator for IterBlocksMut<'a> {
336    type Item = BlockMut<'a>;
337
338    fn next(&mut self) -> Option<Self::Item> {
339        if self.bytes.is_empty() {
340            return None;
341        }
342
343        let mut p = ParserMut::new(take(&mut self.bytes));
344        let Ok(header) = p.get_mut::<BlockHeader>() else {
345            warn!("failed to read BlockHeader");
346            return None;
347        };
348
349        let block_size: usize = header.block_size.get() as usize;
350        let Some(data_len) = block_size.checked_sub(size_of::<BlockHeader>()) else {
351            warn!("invalid block; block_size is less than size of block header");
352            return None;
353        };
354
355        trace!(
356            "block header: file_index = {}, num_lines = {}, block_size = {}, data_len = {}",
357            header.file_index.get(),
358            header.num_lines.get(),
359            header.block_size.get(),
360            data_len
361        );
362
363        let Ok(data) = p.bytes_mut(data_len) else {
364            warn!(
365                "invalid block: need {} bytes for block contents, only have {}",
366                data_len,
367                p.len()
368            );
369            return None;
370        };
371
372        self.bytes = p.into_rest();
373        Some(BlockMut {
374            header,
375            data,
376            have_columns: self.have_columns,
377        })
378    }
379}
380
381/// One block of line data. Each block has a header which points to a source file. All of the line
382/// locations within the block point to line numbers (and potentially column numbers) within that
383/// source file.
384pub struct Block<'a> {
385    /// Fixed-size header for the block.
386    pub header: &'a BlockHeader,
387    /// If `true`, then this block has column numbers as well as line numbers.
388    pub have_columns: bool,
389    /// Contains the encoded line numbers, followed by column numbers. The number of entries is
390    /// specified by `header.num_lines`.
391    pub data: &'a [u8],
392}
393
394impl<'a> Block<'a> {
395    /// Gets the line records for this block.
396    pub fn lines(&self) -> &'a [LineRecord] {
397        let num_lines = self.header.num_lines.get() as usize;
398        if let Ok((lines, _)) = <[LineRecord]>::ref_from_prefix_with_elems(self.data, num_lines) {
399            lines
400        } else {
401            warn!("failed to get lines_data for a block; wrong size");
402            &[]
403        }
404    }
405
406    /// Gets the column records for this block, if it has any.
407    pub fn columns(&self) -> Option<&'a [ColumnRecord]> {
408        if !self.have_columns {
409            return None;
410        }
411
412        let num_lines = self.header.num_lines.get() as usize;
413        let lines_size = num_lines * size_of::<LineRecord>();
414        let Some(column_data) = self.data.get(lines_size..) else {
415            warn!("failed to get column data for a block; wrong size");
416            return None;
417        };
418
419        let Ok((columns, _)) = <[ColumnRecord]>::ref_from_prefix_with_elems(column_data, num_lines)
420        else {
421            warn!("failed to get column data for a block; byte size is wrong");
422            return None;
423        };
424
425        Some(columns)
426    }
427}
428
429/// One block of line data. Each block has a header which points to a source file. All of the line
430/// locations within the block point to line numbers (and potentially column numbers) within that
431/// source file.
432pub struct BlockMut<'a> {
433    /// Fixed-size header for the block.
434    pub header: &'a mut BlockHeader,
435    /// If `true`, then this block has column numbers as well as line numbers.
436    pub have_columns: bool,
437    /// Contains the encoded line numbers, followed by column numbers. The number of entries is
438    /// specified by `header.num_lines`.
439    pub data: &'a mut [u8],
440}
441
442/// A single line record
443///
444/// See `CV_Line_t` in `cvinfo.h`
445#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Clone)]
446#[repr(C)]
447pub struct LineRecord {
448    /// The byte offset from the start of this contribution (in the instruction stream, not the
449    /// Lines Data) for this line
450    pub offset: U32<LE>,
451
452    /// Encodes three bit-fields
453    ///
454    /// * Bits 0-23 are `line_num_start`. This is the 1-based starting line number within the source
455    ///   file of this line record.
456    /// * Bits 24-30 are `delta_line_end`. It specifies a value to add to line_num_start to find the
457    ///   ending line. If this value is zero, then this line record encodes only a single line, not
458    ///   a span of lines.
459    /// * Bit 31 is the `statement` bit field. If set to 1, it indicates that this line record describes a statement.
460    pub flags: U32<LE>,
461}
462
463impl LineRecord {
464    /// The line number of this location. This value is 1-based.
465    pub fn line_num_start(&self) -> u32 {
466        self.flags.get() & 0x00_ff_ff_ff
467    }
468
469    /// If non-zero, then this indicates the delta in bytes within the source file from the start
470    /// of the source location to the end of the source location.
471    pub fn delta_line_end(&self) -> u8 {
472        ((self.flags.get() >> 24) & 0x7f) as u8
473    }
474
475    /// True if this location points to a statement.
476    pub fn statement(&self) -> bool {
477        (self.flags.get() >> 31) != 0
478    }
479}
480
481impl std::fmt::Debug for LineRecord {
482    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
483        write!(fmt, "+{} L{}", self.offset.get(), self.line_num_start())?;
484
485        let delta_line_end = self.delta_line_end();
486        if delta_line_end != 0 {
487            write!(fmt, "..+{}", delta_line_end)?;
488        }
489
490        if self.statement() {
491            write!(fmt, " S")?;
492        }
493
494        Ok(())
495    }
496}
497
498/// A single column record
499#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
500#[repr(C)]
501pub struct ColumnRecord {
502    /// byte offset in a source line
503    pub start_offset: U16<LE>,
504    /// byte offset in a source line
505    pub end_offset: U16<LE>,
506}
507
508#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
509#[repr(C)]
510#[allow(missing_docs)]
511pub struct Contribution {
512    pub contribution_offset: U32<LE>,
513    pub contribution_segment: U16<LE>,
514    pub flags: U16<LE>,
515    pub contribution_size: U32<LE>,
516    // Followed by a sequence of block records. Each block is variable-length and begins with
517    // BlockHeader.
518}
519
520impl Contribution {
521    /// Indicates whether this block (contribution) also has column numbers.
522    pub fn have_columns(&self) -> bool {
523        (self.flags.get() & CV_LINES_HAVE_COLUMNS) != 0
524    }
525}
526
527/// Bit flag for `Contribution::flags` field
528pub const CV_LINES_HAVE_COLUMNS: u16 = 0x0001;
529
530#[allow(missing_docs)]
531pub struct LinesEntry<'a> {
532    pub header: &'a Contribution,
533    pub blocks: &'a [u8],
534}
535
536/// Header for a variable-length Block record.
537///
538/// Each block contains a sequence of line records, and optionally column records.
539#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
540#[repr(C)]
541pub struct BlockHeader {
542    /// The byte offset into the file checksums subsection for this file.
543    pub file_index: U32<LE>,
544    /// The number of `LineRecord` entries that immediately follow this structure. Also, if the
545    /// contribution header indicates that the contribution has column values, this specifies
546    /// the number of column records that follow the file records.
547    pub num_lines: U32<LE>,
548    /// Size of the data for this block. This value includes the size of the block header itself,
549    /// so the minimum value value is 12.
550    pub block_size: U32<LE>,
551    // Followed by [u8; block_size - 12]. This data contains [LineRecord; num_lines], optionally
552    // followed by [ColumnRecord; num_lines].
553}
554
555/// Updates a C13 Line Data substream after NameIndex values have been updated and after
556/// file lists for a given module have been rearranged (sorted).
557pub fn fixup_c13_line_data(
558    file_permutation: &[u32], // maps new-->old for files within a module
559    sorted_names: &crate::names::NameIndexMapping,
560    c13_line_data: &mut crate::lines::LineDataMut<'_>,
561) -> anyhow::Result<()> {
562    // maps old --> new, for the file_index values in DEBUG_S_LINES blocks
563    let mut checksum_files_mapping: Vec<(u32, u32)> = Vec::with_capacity(file_permutation.len());
564
565    for subsection in c13_line_data.subsections_mut() {
566        match subsection.kind {
567            SubsectionKind::FILE_CHECKSUMS => {
568                let mut checksums = FileChecksumsSubsectionMut::new(subsection.data);
569                let mut checksum_ranges = Vec::with_capacity(file_permutation.len());
570                for (checksum_range, checksum) in checksums.iter_mut().with_ranges() {
571                    // This `name_offset` value points into the Names stream (/names).
572                    let old_name = NameIndex(checksum.header.name.get());
573                    let new_name = sorted_names
574                        .map_old_to_new(old_name)
575                        .with_context(|| format!("old_name: {old_name}"))?;
576                    checksum.header.name = U32::new(new_name.0);
577                    checksum_ranges.push(checksum_range);
578                }
579
580                // Next, we are going to rearrange the FileChecksum records within this
581                // section, using the permutation that was generated in dbi::sources::sort_sources().
582
583                let mut new_checksums: Vec<u8> = Vec::with_capacity(subsection.data.len());
584                for &old_file_index in file_permutation.iter() {
585                    let old_range = checksum_ranges[old_file_index as usize].clone();
586                    checksum_files_mapping
587                        .push((old_range.start as u32, new_checksums.len() as u32));
588                    let old_checksum_data = &subsection.data[old_range];
589                    new_checksums.extend_from_slice(old_checksum_data);
590                }
591                checksum_files_mapping.sort_unstable();
592
593                assert_eq!(new_checksums.len(), subsection.data.len());
594                subsection.data.copy_from_slice(&new_checksums);
595            }
596
597            _ => {}
598        }
599    }
600
601    // There is a data-flow dependency (on checksum_files_mapping) between these two loops; the
602    // loops cannot be combined. The first loop builds checksum_files_mapping; the second loop
603    // reads from it.
604
605    for subsection in c13_line_data.subsections_mut() {
606        match subsection.kind {
607            SubsectionKind::LINES => {
608                // We need to rewrite the file_index values within each line block.
609                let mut lines = LinesSubsectionMut::parse(subsection.data)?;
610                for block in lines.blocks_mut() {
611                    let old_file_index = block.header.file_index.get();
612                    match checksum_files_mapping
613                        .binary_search_by_key(&old_file_index, |&(old, _new)| old)
614                    {
615                        Ok(i) => {
616                            let (_old, new) = checksum_files_mapping[i];
617                            block.header.file_index = U32::new(new);
618                        }
619                        Err(_) => {
620                            bail!("DEBUG_S_LINES section contains invalid file index: {old_file_index}");
621                        }
622                    }
623                }
624            }
625
626            _ => {}
627        }
628    }
629
630    Ok(())
631}
632
633/// This special line number is part of the "Just My Code" MSVC compiler feature.
634///
635/// Debuggers that implement the "Just My Code" feature look for this constant when handling
636/// "Step Into" requests. If the user asks to "step into" a function call, the debugger will look
637/// up the line number of the start of the function. If the line number is `JMC_LINE_NO_STEP_INTO`,
638/// then the debugger will _not_ step into the function. Instead, it will step over it.
639///
640/// This is useful for implementations of standard library functions, like
641/// `std::vector<T>::size()`. Often calls to such functions are embedded in complex statements,
642/// and the user wants to debug other parts of the complex statement, not the `size()` call.
643///
644/// # References
645/// * <https://learn.microsoft.com/en-us/cpp/build/reference/jmc?view=msvc-170>
646/// * <https://learn.microsoft.com/en-us/visualstudio/debugger/just-my-code>
647pub const JMC_LINE_NO_STEP_INTO: u32 = 0xf00f00;
648
649/// This special line number is part of the "Just My Code" MSVC compiler feature.
650pub const JMC_LINE_FEE_FEE: u32 = 0xfeefee;
651
652/// Returns true if `line` is a number that is used by the "Just My Code" MSVC compiler feature.
653pub fn is_jmc_line(line: u32) -> bool {
654    line == JMC_LINE_NO_STEP_INTO || line == JMC_LINE_FEE_FEE
655}