ms_pdb/
lines.rs

1//! Decodes line information found in Module Streams.
2//!
3//! # References
4//! * [/ZH (Hash algorithm for calculation of file checksum in debug info)](https://learn.microsoft.com/en-us/cpp/build/reference/zh?view=msvc-170)
5
6mod checksum;
7mod subsection;
8
9pub use checksum::*;
10pub use subsection::*;
11
12use crate::codeview::syms::OffsetSegment;
13use crate::names::NameIndex;
14use anyhow::{bail, Context};
15use ms_codeview::parser::{Parser, ParserError, ParserMut};
16use ms_codeview::{HasRestLen, IteratorWithRangesExt};
17use std::mem::{size_of, take};
18use tracing::{trace, warn};
19use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, LE, U16, U32};
20
21/// Enumerates the kind of subsections found in C13 Line Data.
22///
23/// See `cvinfo.h`, `DEBUG_S_SUBSECTION_TYPE`.
24#[derive(Copy, Clone, Eq, PartialEq)]
25#[repr(transparent)]
26pub struct SubsectionKind(pub u32);
27
28macro_rules! subsections {
29    ($( $(#[$a:meta])*  $name:ident = $value:expr;)*) => {
30        impl SubsectionKind {
31            $(
32                $(#[$a])*
33                #[allow(missing_docs)]
34                pub const $name: SubsectionKind = SubsectionKind($value);
35            )*
36        }
37
38        impl std::fmt::Debug for SubsectionKind {
39            fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
40                let s: &str = match *self {
41                    $( SubsectionKind::$name => stringify!($name), )*
42                    _ => return write!(fmt, "??(0x{:x})", self.0),
43                };
44                fmt.write_str(s)
45            }
46        }
47    }
48}
49
50subsections! {
51    SYMBOLS = 0xf1;
52    /// Contains C13 Line Data
53    LINES = 0xf2;
54    STRING_TABLE = 0xf3;
55    /// Contains file checksums and pointers to file names. For a given module, there should be
56    /// at most one `FILE_CHECKSUMS` subsection.
57    FILE_CHECKSUMS = 0xf4;
58
59    FRAMEDATA = 0xF5;
60    INLINEELINES = 0xF6;
61    CROSSSCOPEIMPORTS = 0xF7;
62    CROSSSCOPEEXPORTS = 0xF8;
63
64    IL_LINES = 0xF9;
65    FUNC_MDTOKEN_MAP = 0xFA;
66    TYPE_MDTOKEN_MAP = 0xFB;
67    MERGED_ASSEMBLYINPUT = 0xFC;
68
69    COFF_SYMBOL_RVA = 0xFD;
70}
71
72/// Enables decoding of the line data stored in a Module Stream. This decodes the "C13 line data"
73/// substream.
74pub struct LineData<'a> {
75    bytes: &'a [u8],
76}
77
78impl<'a> LineData<'a> {
79    /// Use this to create a new decoder for the C13 line data. Usually, you want to pass the
80    /// result of calling `ModiStreamData::c13_line_data_bytes()` to this function.
81    pub fn new(bytes: &'a [u8]) -> Self {
82        Self { bytes }
83    }
84
85    /// Iterates subsections
86    pub fn subsections(&self) -> SubsectionIter<'a> {
87        SubsectionIter::new(self.bytes)
88    }
89
90    /// Finds the `FILE_CHECKSUMS` subsection. There should only be one.
91    pub fn find_checksums_bytes(&self) -> Option<&'a [u8]> {
92        for subsection in self.subsections() {
93            if subsection.kind == SubsectionKind::FILE_CHECKSUMS {
94                return Some(subsection.data);
95            }
96        }
97        None
98    }
99
100    /// Finds the `FILE_CHECKSUMS` subsection. There should only be one.
101    pub fn find_checksums(&self) -> Option<FileChecksumsSubsection<'a>> {
102        let subsection_bytes = self.find_checksums_bytes()?;
103        Some(FileChecksumsSubsection::new(subsection_bytes))
104    }
105
106    /// Iterates the `NameIndex` values that appear in this Line Data section.
107    ///
108    /// This may iterate the same `NameIndex` value more than once.
109    pub fn iter_name_index<F>(&self, mut f: F) -> anyhow::Result<()>
110    where
111        F: FnMut(NameIndex),
112    {
113        if let Some(checksums) = self.find_checksums() {
114            for subsection in self.subsections() {
115                match subsection.kind {
116                    SubsectionKind::LINES => {
117                        let lines_subsection = LinesSubsection::parse(subsection.data)?;
118                        for block in lines_subsection.blocks() {
119                            let file = checksums.get_file(block.header.file_index.get())?;
120                            let ni = file.header.name.get();
121                            f(NameIndex(ni));
122                        }
123                    }
124                    _ => {}
125                }
126            }
127        } else {
128            for subsection in self.subsections() {
129                match subsection.kind {
130                    SubsectionKind::LINES => {
131                        bail!("This C13 Line Data substream contains LINES subsections, but does not contain a FILE_CHECKSUMS subsection.");
132                    }
133                    _ => {}
134                }
135            }
136        };
137
138        Ok(())
139    }
140}
141
142/// Enables decoding of the line data stored in a Module Stream. This decodes the "C13 line data"
143/// substream.
144pub struct LineDataMut<'a> {
145    bytes: &'a mut [u8],
146}
147
148impl<'a> LineDataMut<'a> {
149    /// Initializes a new `LineDataMut`. This does not validate the contents of the data.
150    pub fn new(bytes: &'a mut [u8]) -> Self {
151        Self { bytes }
152    }
153
154    /// Iterates subsections, with mutable access.
155    pub fn subsections_mut(&mut self) -> SubsectionIterMut<'_> {
156        SubsectionIterMut::new(self.bytes)
157    }
158
159    /// Iterates through all of the name indexes stored within this Line Data.
160    /// Remaps all entries using `f` as the remapping function.
161    ///
162    /// `NameIndex` values are found in the `FILE_CHECKSUMS` debug subsections. However, it is not
163    /// possible to directly enumerate the entries stored within a `FILE_CHECKSUMS` subsection,
164    /// because they are not at guaranteed positions. There may be gaps.
165    ///
166    /// To find the `NameIndex` values within each `FILE_CHECKSUMS` debug subsection, we first scan
167    /// the `LINES` subsections that point to them, and use a `HashSet` to avoid modifying the
168    /// same `NameIndex` more than once.
169    pub fn remap_name_indexes<F>(&mut self, name_remapping: F) -> anyhow::Result<()>
170    where
171        F: Fn(NameIndex) -> anyhow::Result<NameIndex>,
172    {
173        for subsection in self.subsections_mut() {
174            match subsection.kind {
175                SubsectionKind::FILE_CHECKSUMS => {
176                    let mut checksums = FileChecksumsSubsectionMut::new(subsection.data);
177                    for checksum in checksums.iter_mut() {
178                        // This `name_offset` value points into the Names stream (/names).
179                        let old_name = NameIndex(checksum.header.name.get());
180                        let new_name = name_remapping(old_name)
181                            .with_context(|| format!("old_name: {old_name}"))?;
182                        checksum.header.name = U32::new(new_name.0);
183                    }
184                }
185
186                _ => {}
187            }
188        }
189
190        Ok(())
191    }
192}
193
194/// Represents one contribution. Each contribution consists of a sequence of variable-length
195/// blocks.
196///
197/// Each `LINES` subsection represents one "contribution", which has a `ContributionHeader`,
198/// followed by a sequence of blocks. Each block is a variable-length record.
199pub struct LinesSubsection<'a> {
200    /// The fixed-size header of the `Lines` subsection.
201    pub contribution: &'a Contribution,
202    /// Contains a sequence of variable-sized "blocks". Each block specifies a source file
203    /// and a set of mappings from instruction offsets to line numbers within that source file.
204    pub blocks_data: &'a [u8],
205}
206
207impl<'a> LinesSubsection<'a> {
208    /// Parses the contribution header and prepares for iteration of blocks.
209    pub fn parse(bytes: &'a [u8]) -> Result<Self, ParserError> {
210        let mut p = Parser::new(bytes);
211        Ok(Self {
212            contribution: p.get()?,
213            blocks_data: p.into_rest(),
214        })
215    }
216
217    /// Iterates through the line number blocks.
218    pub fn blocks(&self) -> IterBlocks<'a> {
219        IterBlocks {
220            bytes: self.blocks_data,
221            have_columns: self.contribution.have_columns(),
222        }
223    }
224}
225
226/// Represents one contribution. Each contribution consists of a sequence of variable-length
227/// blocks.
228///
229/// Each `LINES` subsection represents one "contribution", which has a `ContributionHeader`,
230/// followed by a sequence of blocks. Each block is a variable-length record.
231pub struct LinesSubsectionMut<'a> {
232    /// The fixed-size header of the `Lines` subsection.
233    pub contribution: &'a mut Contribution,
234    /// Contains a sequence of variable-sized "blocks". Each block specifies a source file
235    /// and a set of mappings from instruction offsets to line numbers within that source file.
236    pub blocks_data: &'a mut [u8],
237}
238
239impl<'a> LinesSubsectionMut<'a> {
240    /// Parses the contribution header and prepares for iteration of blocks.
241    pub fn parse(bytes: &'a mut [u8]) -> Result<Self, ParserError> {
242        let mut p = ParserMut::new(bytes);
243        Ok(Self {
244            contribution: p.get_mut()?,
245            blocks_data: p.into_rest(),
246        })
247    }
248
249    /// Iterates through the line number blocks.
250    pub fn blocks(&self) -> IterBlocks<'_> {
251        IterBlocks {
252            bytes: self.blocks_data,
253            have_columns: self.contribution.have_columns(),
254        }
255    }
256
257    /// Iterates through the line number blocks, with mutable access.
258    pub fn blocks_mut(&mut self) -> IterBlocksMut<'_> {
259        IterBlocksMut {
260            bytes: self.blocks_data,
261            have_columns: self.contribution.have_columns(),
262        }
263    }
264}
265
266/// Iterator state for `LinesSubsection::blocks`.
267pub struct IterBlocks<'a> {
268    bytes: &'a [u8],
269    have_columns: bool,
270}
271
272impl<'a> HasRestLen for IterBlocks<'a> {
273    fn rest_len(&self) -> usize {
274        self.bytes.len()
275    }
276}
277
278impl<'a> Iterator for IterBlocks<'a> {
279    type Item = Block<'a>;
280
281    fn next(&mut self) -> Option<Self::Item> {
282        if self.bytes.is_empty() {
283            return None;
284        }
285
286        let mut p = Parser::new(self.bytes);
287        let Ok(header) = p.get::<BlockHeader>() else {
288            warn!("failed to read BlockHeader");
289            return None;
290        };
291
292        let block_size: usize = header.block_size.get() as usize;
293        let Some(data_len) = block_size.checked_sub(size_of::<BlockHeader>()) else {
294            warn!("invalid block; block_size is less than size of block header");
295            return None;
296        };
297
298        trace!(
299            file_index = header.file_index.get(),
300            num_lines = header.num_lines.get(),
301            block_size = header.block_size.get(),
302            data_len,
303            "block header"
304        );
305
306        let Ok(data) = p.bytes(data_len) else {
307            warn!(
308                needed_bytes = data_len,
309                have_bytes = p.len(),
310                "invalid block: need more bytes for block contents"
311            );
312            return None;
313        };
314
315        self.bytes = p.into_rest();
316        Some(Block {
317            header,
318            data,
319            have_columns: self.have_columns,
320        })
321    }
322}
323
324/// Iterator state for `LinesSubsection::blocks`.
325pub struct IterBlocksMut<'a> {
326    bytes: &'a mut [u8],
327    have_columns: bool,
328}
329
330impl<'a> HasRestLen for IterBlocksMut<'a> {
331    fn rest_len(&self) -> usize {
332        self.bytes.len()
333    }
334}
335
336impl<'a> Iterator for IterBlocksMut<'a> {
337    type Item = BlockMut<'a>;
338
339    fn next(&mut self) -> Option<Self::Item> {
340        if self.bytes.is_empty() {
341            return None;
342        }
343
344        let mut p = ParserMut::new(take(&mut self.bytes));
345        let Ok(header) = p.get_mut::<BlockHeader>() else {
346            warn!("failed to read BlockHeader");
347            return None;
348        };
349
350        let block_size: usize = header.block_size.get() as usize;
351        let Some(data_len) = block_size.checked_sub(size_of::<BlockHeader>()) else {
352            warn!("invalid block; block_size is less than size of block header");
353            return None;
354        };
355
356        trace!(
357            "block header: file_index = {}, num_lines = {}, block_size = {}, data_len = {}",
358            header.file_index.get(),
359            header.num_lines.get(),
360            header.block_size.get(),
361            data_len
362        );
363
364        let Ok(data) = p.bytes_mut(data_len) else {
365            warn!(
366                "invalid block: need {} bytes for block contents, only have {}",
367                data_len,
368                p.len()
369            );
370            return None;
371        };
372
373        self.bytes = p.into_rest();
374        Some(BlockMut {
375            header,
376            data,
377            have_columns: self.have_columns,
378        })
379    }
380}
381
382/// One block of line data. Each block has a header which points to a source file. All of the line
383/// locations within the block point to line numbers (and potentially column numbers) within that
384/// source file.
385pub struct Block<'a> {
386    /// Fixed-size header for the block.
387    pub header: &'a BlockHeader,
388    /// If `true`, then this block has column numbers as well as line numbers.
389    pub have_columns: bool,
390    /// Contains the encoded line numbers, followed by column numbers. The number of entries is
391    /// specified by `header.num_lines`.
392    pub data: &'a [u8],
393}
394
395impl<'a> Block<'a> {
396    /// Gets the line records for this block.
397    pub fn lines(&self) -> &'a [LineRecord] {
398        let num_lines = self.header.num_lines.get() as usize;
399        if let Ok((lines, _)) = <[LineRecord]>::ref_from_prefix_with_elems(self.data, num_lines) {
400            lines
401        } else {
402            warn!("failed to get lines_data for a block; wrong size");
403            &[]
404        }
405    }
406
407    /// Gets the column records for this block, if it has any.
408    pub fn columns(&self) -> Option<&'a [ColumnRecord]> {
409        if !self.have_columns {
410            return None;
411        }
412
413        let num_lines = self.header.num_lines.get() as usize;
414        let lines_size = num_lines * size_of::<LineRecord>();
415        let Some(column_data) = self.data.get(lines_size..) else {
416            warn!("failed to get column data for a block; wrong size");
417            return None;
418        };
419
420        let Ok((columns, _)) = <[ColumnRecord]>::ref_from_prefix_with_elems(column_data, num_lines)
421        else {
422            warn!("failed to get column data for a block; byte size is wrong");
423            return None;
424        };
425
426        Some(columns)
427    }
428}
429
430/// One block of line data. Each block has a header which points to a source file. All of the line
431/// locations within the block point to line numbers (and potentially column numbers) within that
432/// source file.
433pub struct BlockMut<'a> {
434    /// Fixed-size header for the block.
435    pub header: &'a mut BlockHeader,
436    /// If `true`, then this block has column numbers as well as line numbers.
437    pub have_columns: bool,
438    /// Contains the encoded line numbers, followed by column numbers. The number of entries is
439    /// specified by `header.num_lines`.
440    pub data: &'a mut [u8],
441}
442
443/// A single line record
444///
445/// See `CV_Line_t` in `cvinfo.h`
446#[derive(IntoBytes, FromBytes, KnownLayout, Immutable, Unaligned, Clone)]
447#[repr(C)]
448pub struct LineRecord {
449    /// The byte offset from the start of this contribution (in the instruction stream, not the
450    /// Lines Data) for this line
451    pub offset: U32<LE>,
452
453    /// Encodes three bit-fields
454    ///
455    /// * Bits 0-23 are `line_num_start`. This is the 1-based starting line number within the source
456    ///   file of this line record.
457    /// * Bits 24-30 are `delta_line_end`. It specifies a value to add to line_num_start to find the
458    ///   ending line. If this value is zero, then this line record encodes only a single line, not
459    ///   a span of lines.
460    /// * Bit 31 is the `statement` bit field. If set to 1, it indicates that this line record describes a statement.
461    pub flags: U32<LE>,
462}
463
464impl LineRecord {
465    /// The line number of this location. This value is 1-based.
466    pub fn line_num_start(&self) -> u32 {
467        self.flags.get() & 0x00_ff_ff_ff
468    }
469
470    /// If non-zero, then this indicates the delta in bytes within the source file from the start
471    /// of the source location to the end of the source location.
472    pub fn delta_line_end(&self) -> u8 {
473        ((self.flags.get() >> 24) & 0x7f) as u8
474    }
475
476    /// True if this location points to a statement.
477    pub fn statement(&self) -> bool {
478        (self.flags.get() >> 31) != 0
479    }
480}
481
482impl std::fmt::Debug for LineRecord {
483    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
484        write!(fmt, "+{} L{}", self.offset.get(), self.line_num_start())?;
485
486        let delta_line_end = self.delta_line_end();
487        if delta_line_end != 0 {
488            write!(fmt, "..+{}", delta_line_end)?;
489        }
490
491        if self.statement() {
492            write!(fmt, " S")?;
493        }
494
495        Ok(())
496    }
497}
498
499/// A single column record
500#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
501#[repr(C)]
502pub struct ColumnRecord {
503    /// byte offset in a source line
504    pub start_offset: U16<LE>,
505    /// byte offset in a source line
506    pub end_offset: U16<LE>,
507}
508
509#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
510#[repr(C)]
511#[allow(missing_docs)]
512pub struct Contribution {
513    pub offset: U32<LE>,
514    pub segment: U16<LE>,
515    pub flags: U16<LE>,
516    pub size: U32<LE>,
517    // Followed by a sequence of block records. Each block is variable-length and begins with
518    // BlockHeader.
519}
520
521impl Contribution {
522    /// Indicates whether this block (contribution) also has column numbers.
523    pub fn have_columns(&self) -> bool {
524        (self.flags.get() & CV_LINES_HAVE_COLUMNS) != 0
525    }
526
527    /// Get the `segment:offset` of this contribution.
528    pub fn offset_segment(&self) -> OffsetSegment {
529        OffsetSegment {
530            offset: self.offset,
531            segment: self.segment,
532        }
533    }
534}
535
536/// Bit flag for `Contribution::flags` field
537pub const CV_LINES_HAVE_COLUMNS: u16 = 0x0001;
538
539#[allow(missing_docs)]
540pub struct LinesEntry<'a> {
541    pub header: &'a Contribution,
542    pub blocks: &'a [u8],
543}
544
545/// Header for a variable-length Block record.
546///
547/// Each block contains a sequence of line records, and optionally column records.
548#[derive(IntoBytes, FromBytes, Immutable, KnownLayout, Unaligned)]
549#[repr(C)]
550pub struct BlockHeader {
551    /// The byte offset into the file checksums subsection for this file.
552    pub file_index: U32<LE>,
553    /// The number of `LineRecord` entries that immediately follow this structure. Also, if the
554    /// contribution header indicates that the contribution has column values, this specifies
555    /// the number of column records that follow the file records.
556    pub num_lines: U32<LE>,
557    /// Size of the data for this block. This value includes the size of the block header itself,
558    /// so the minimum value value is 12.
559    pub block_size: U32<LE>,
560    // Followed by [u8; block_size - 12]. This data contains [LineRecord; num_lines], optionally
561    // followed by [ColumnRecord; num_lines].
562}
563
564/// Updates a C13 Line Data substream after NameIndex values have been updated and after
565/// file lists for a given module have been rearranged (sorted).
566pub fn fixup_c13_line_data(
567    file_permutation: &[u32], // maps new-->old for files within a module
568    sorted_names: &crate::names::NameIndexMapping,
569    c13_line_data: &mut crate::lines::LineDataMut<'_>,
570) -> anyhow::Result<()> {
571    // maps old --> new, for the file_index values in DEBUG_S_LINES blocks
572    let mut checksum_files_mapping: Vec<(u32, u32)> = Vec::with_capacity(file_permutation.len());
573
574    for subsection in c13_line_data.subsections_mut() {
575        match subsection.kind {
576            SubsectionKind::FILE_CHECKSUMS => {
577                let mut checksums = FileChecksumsSubsectionMut::new(subsection.data);
578                let mut checksum_ranges = Vec::with_capacity(file_permutation.len());
579                for (checksum_range, checksum) in checksums.iter_mut().with_ranges() {
580                    // This `name_offset` value points into the Names stream (/names).
581                    let old_name = NameIndex(checksum.header.name.get());
582                    let new_name = sorted_names
583                        .map_old_to_new(old_name)
584                        .with_context(|| format!("old_name: {old_name}"))?;
585                    checksum.header.name = U32::new(new_name.0);
586                    checksum_ranges.push(checksum_range);
587                }
588
589                // Next, we are going to rearrange the FileChecksum records within this
590                // section, using the permutation that was generated in dbi::sources::sort_sources().
591
592                let mut new_checksums: Vec<u8> = Vec::with_capacity(subsection.data.len());
593                for &old_file_index in file_permutation.iter() {
594                    let old_range = checksum_ranges[old_file_index as usize].clone();
595                    checksum_files_mapping
596                        .push((old_range.start as u32, new_checksums.len() as u32));
597                    let old_checksum_data = &subsection.data[old_range];
598                    new_checksums.extend_from_slice(old_checksum_data);
599                }
600                checksum_files_mapping.sort_unstable();
601
602                assert_eq!(new_checksums.len(), subsection.data.len());
603                subsection.data.copy_from_slice(&new_checksums);
604            }
605
606            _ => {}
607        }
608    }
609
610    // There is a data-flow dependency (on checksum_files_mapping) between these two loops; the
611    // loops cannot be combined. The first loop builds checksum_files_mapping; the second loop
612    // reads from it.
613
614    for subsection in c13_line_data.subsections_mut() {
615        match subsection.kind {
616            SubsectionKind::LINES => {
617                // We need to rewrite the file_index values within each line block.
618                let mut lines = LinesSubsectionMut::parse(subsection.data)?;
619                for block in lines.blocks_mut() {
620                    let old_file_index = block.header.file_index.get();
621                    match checksum_files_mapping
622                        .binary_search_by_key(&old_file_index, |&(old, _new)| old)
623                    {
624                        Ok(i) => {
625                            let (_old, new) = checksum_files_mapping[i];
626                            block.header.file_index = U32::new(new);
627                        }
628                        Err(_) => {
629                            bail!("DEBUG_S_LINES section contains invalid file index: {old_file_index}");
630                        }
631                    }
632                }
633            }
634
635            _ => {}
636        }
637    }
638
639    Ok(())
640}
641
642/// This special line number is part of the "Just My Code" MSVC compiler feature.
643///
644/// Debuggers that implement the "Just My Code" feature look for this constant when handling
645/// "Step Into" requests. If the user asks to "step into" a function call, the debugger will look
646/// up the line number of the start of the function. If the line number is `JMC_LINE_NO_STEP_INTO`,
647/// then the debugger will _not_ step into the function. Instead, it will step over it.
648///
649/// This is useful for implementations of standard library functions, like
650/// `std::vector<T>::size()`. Often calls to such functions are embedded in complex statements,
651/// and the user wants to debug other parts of the complex statement, not the `size()` call.
652///
653/// # References
654/// * <https://learn.microsoft.com/en-us/cpp/build/reference/jmc?view=msvc-170>
655/// * <https://learn.microsoft.com/en-us/visualstudio/debugger/just-my-code>
656pub const JMC_LINE_NO_STEP_INTO: u32 = 0xf00f00;
657
658/// This special line number is part of the "Just My Code" MSVC compiler feature.
659pub const JMC_LINE_FEE_FEE: u32 = 0xfeefee;
660
661/// Returns true if `line` is a number that is used by the "Just My Code" MSVC compiler feature.
662pub fn is_jmc_line(line: u32) -> bool {
663    line == JMC_LINE_NO_STEP_INTO || line == JMC_LINE_FEE_FEE
664}