probe_rs_debug/
source_instructions.rs

1use super::{
2    ColumnType, DebugError, DebugInfo, GimliReader, canonical_path_eq,
3    unit_info::{self, UnitInfo},
4};
5use gimli::LineSequence;
6use serde::Serialize;
7use std::{
8    fmt::{Debug, Formatter},
9    num::NonZeroU64,
10    ops::Range,
11};
12use typed_path::{TypedPath, TypedPathBuf};
13
14/// A verified breakpoint represents an instruction address, and the source location that it corresponds to it,
15/// for locations in the target binary that comply with the DWARF standard terminology for "recommended breakpoint location".
16/// This typically refers to instructions that are not part of the prologue or epilogue, and are part of the user code,
17/// or are the final instruction in a sequence, before the processor begins the epilogue code.
18/// The `probe-rs` debugger uses this information to identify valid halt locations for breakpoints and stepping.
19#[derive(Clone, Debug)]
20pub struct VerifiedBreakpoint {
21    /// The address in target memory, where the breakpoint can be set.
22    pub address: u64,
23    /// If the breakpoint request was for a specific source location, then this field will contain the resolved source location.
24    pub source_location: SourceLocation,
25}
26
27impl VerifiedBreakpoint {
28    /// Return the first valid breakpoint location of the statement that is greater than OR equal to `address`.
29    /// e.g., if the `address` is the current program counter, then the return value will be the next valid halt address
30    /// in the current sequence.
31    pub(crate) fn for_address(
32        debug_info: &DebugInfo,
33        address: u64,
34    ) -> Result<VerifiedBreakpoint, DebugError> {
35        let instruction_sequence = InstructionSequence::from_address(debug_info, address)?;
36
37        // Cycle through various degrees of matching, to find the most relevant source location.
38        if let Some(verified_breakpoint) = match_address(&instruction_sequence, address, debug_info)
39        {
40            tracing::debug!(
41                "Found valid breakpoint for address: {:#010x} : {verified_breakpoint:?}",
42                &address
43            );
44            return Ok(verified_breakpoint);
45        }
46        // If we get here, we have not found a valid breakpoint location.
47        let message = format!(
48            "Could not identify a valid breakpoint for address: {address:#010x}. Please consider using instruction level stepping."
49        );
50        Err(DebugError::WarnAndContinue { message })
51    }
52
53    /// Identifying the breakpoint location for a specific location (path, line, colunmn) is a bit more complex,
54    /// compared to the `for_address()` method, due to a few factors:
55    /// - The correct program instructions, may be in any of the compilation units of the current program.
56    /// - The debug information may not contain data for the "specific source" location requested:
57    ///   - DWARFv5 standard, section 6.2, allows omissions based on certain conditions. In this case,
58    ///     we need to find the closest "relevant" source location that has valid debug information.
59    ///   - The requested location may not be a valid source location, e.g. when the
60    ///     debug information has been optimized away. In this case we will return an appropriate error.
61    ///
62    /// #### The logic used to find the "most relevant" source location is as follows:
63    /// 1. Filter  [`UnitInfo`], by using [`gimli::LineProgramHeader`] to match units that include
64    ///    the requested path.
65    /// 2. For each matching compilation unit, get the [`gimli::LineProgram`] and
66    ///    [`Vec<LineSequence>`][LineSequence].
67    /// 3. Filter the [`Vec<LineSequence>`][LineSequence] entries to only include sequences that match the requested path.
68    /// 3. Convert remaining [`LineSequence`], to [`InstructionSequence`].
69    /// 4. Return the first [`InstructionSequence`] that contains the requested source location.
70    ///    1. This may be an exact match on file/line/column, or,
71    ///    2. Failing an exact match, a match on file/line only.
72    ///    3. Failing that, a match on file only, where the line number is the "next" available instruction,
73    ///       on the next available line of the specified file.
74    pub(crate) fn for_source_location(
75        debug_info: &DebugInfo,
76        path: TypedPath,
77        line: u64,
78        column: Option<u64>,
79    ) -> Result<Self, DebugError> {
80        for program_unit in &debug_info.unit_infos {
81            let Some(ref line_program) = program_unit.unit.line_program else {
82                // Not all compilation units need to have debug line information, so we skip those.
83                continue;
84            };
85
86            let mut num_files = line_program.header().file_names().len();
87
88            // For DWARF version 5, the current compilation file is included in the file names, with index 0.
89            //
90            // For earlier versions, the current compilation file is not included in the file names, but index 0 still refers to it.
91            // To get the correct number of files, we have to add 1 here.
92            if program_unit.unit.header.version() <= 4 {
93                num_files += 1;
94            }
95
96            // There can be multiple file indices which match, due to the inclusion of the current compilation file with index 0.
97            //
98            // At least for DWARF 4 there are cases where the current compilation file is also included in the file names with
99            // a non-zero index.
100            let matching_file_indices: Vec<_> = (0..num_files)
101                .filter_map(|file_index| {
102                    let file_index = file_index as u64;
103
104                    debug_info
105                        .get_path(&program_unit.unit, file_index)
106                        .and_then(|combined_path: TypedPathBuf| {
107                            if canonical_path_eq(path, combined_path.to_path()) {
108                                tracing::debug!(
109                                    "Found matching file index: {file_index} for path: {path}",
110                                    file_index = file_index,
111                                    path = path.display()
112                                );
113                                Some(file_index)
114                            } else {
115                                None
116                            }
117                        })
118                })
119                .collect();
120
121            if matching_file_indices.is_empty() {
122                continue;
123            }
124
125            let Ok((complete_line_program, line_sequences)) = line_program.clone().sequences()
126            else {
127                tracing::debug!("Failed to get line sequences for line program");
128                continue;
129            };
130
131            for line_sequence in line_sequences {
132                let instruction_sequence = InstructionSequence::from_line_sequence(
133                    debug_info,
134                    program_unit,
135                    &complete_line_program,
136                    &line_sequence,
137                );
138
139                for matching_file_index in &matching_file_indices {
140                    // Cycle through various degrees of matching, to find the most relevant source location.
141                    if let Some(verified_breakpoint) = match_file_line_column(
142                        &instruction_sequence,
143                        *matching_file_index,
144                        line,
145                        column,
146                        debug_info,
147                        program_unit,
148                    ) {
149                        return Ok(verified_breakpoint);
150                    }
151
152                    if let Some(verified_breakpoint) = match_file_line_first_available_column(
153                        &instruction_sequence,
154                        *matching_file_index,
155                        line,
156                        debug_info,
157                        program_unit,
158                    ) {
159                        return Ok(verified_breakpoint);
160                    }
161                }
162            }
163        }
164        // If we get here, we have not found a valid breakpoint location.
165        Err(DebugError::Other(format!(
166            "No valid breakpoint information found for file: {}, line: {line:?}, column: {column:?}",
167            path.display()
168        )))
169    }
170}
171
172/// Find the valid halt instruction location that is equal to, or greater than, the address.
173fn match_address(
174    instruction_sequence: &InstructionSequence<'_>,
175    address: u64,
176    debug_info: &DebugInfo,
177) -> Option<VerifiedBreakpoint> {
178    if instruction_sequence.address_range.contains(&address) {
179        let instruction_location =
180            instruction_sequence
181                .instructions
182                .iter()
183                .find(|instruction_location| {
184                    instruction_location.instruction_type == InstructionType::HaltLocation
185                        && instruction_location.address >= address
186                })?;
187
188        let source_location = SourceLocation::from_instruction_location(
189            debug_info,
190            instruction_sequence.program_unit,
191            instruction_location,
192        )?;
193
194        Some(VerifiedBreakpoint {
195            address: instruction_location.address,
196            source_location,
197        })
198    } else {
199        None
200    }
201}
202
203/// Find the valid halt instruction location that matches the file, line and column.
204fn match_file_line_column(
205    instruction_sequence: &InstructionSequence<'_>,
206    matching_file_index: u64,
207    line: u64,
208    column: Option<u64>,
209    debug_info: &DebugInfo,
210    program_unit: &UnitInfo,
211) -> Option<VerifiedBreakpoint> {
212    let instruction_location =
213        instruction_sequence
214            .instructions
215            .iter()
216            .find(|instruction_location| {
217                instruction_location.instruction_type == InstructionType::HaltLocation
218                    && matching_file_index == instruction_location.file_index
219                    && NonZeroU64::new(line) == instruction_location.line
220                    && column
221                        .map(ColumnType::Column)
222                        .is_some_and(|col| col == instruction_location.column)
223            })?;
224
225    let source_location =
226        SourceLocation::from_instruction_location(debug_info, program_unit, instruction_location)?;
227
228    Some(VerifiedBreakpoint {
229        address: instruction_location.address,
230        source_location,
231    })
232}
233
234/// Find the first valid halt instruction location that matches the file and line, ignoring column.
235fn match_file_line_first_available_column(
236    instruction_sequence: &InstructionSequence<'_>,
237    matching_file_index: u64,
238    line: u64,
239    debug_info: &DebugInfo,
240    program_unit: &UnitInfo,
241) -> Option<VerifiedBreakpoint> {
242    let instruction_location =
243        instruction_sequence
244            .instructions
245            .iter()
246            .find(|instruction_location| {
247                instruction_location.instruction_type == InstructionType::HaltLocation
248                    && matching_file_index == instruction_location.file_index
249                    && NonZeroU64::new(line) == instruction_location.line
250            })?;
251
252    let source_location =
253        SourceLocation::from_instruction_location(debug_info, program_unit, instruction_location)?;
254
255    Some(VerifiedBreakpoint {
256        address: instruction_location.address,
257        source_location,
258    })
259}
260
261fn serialize_typed_path<S>(path: &TypedPathBuf, serializer: S) -> Result<S::Ok, S::Error>
262where
263    S: serde::Serializer,
264{
265    serializer.serialize_str(&path.to_string_lossy())
266}
267
268/// A specific location in source code.
269/// Each unique line, column, file and directory combination is a unique source location.
270#[derive(Clone, PartialEq, Eq, Serialize)]
271pub struct SourceLocation {
272    /// The path to the source file
273    #[serde(serialize_with = "serialize_typed_path")]
274    pub path: TypedPathBuf,
275    /// The line number in the source file with zero based indexing.
276    pub line: Option<u64>,
277    /// The column number in the source file.
278    pub column: Option<ColumnType>,
279    /// The address of the source location.
280    pub address: Option<u64>,
281}
282
283impl Debug for SourceLocation {
284    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
285        write!(
286            f,
287            "{}:{:?}:{:?}",
288            self.path.to_path().display(),
289            self.line,
290            self.column
291        )
292    }
293}
294
295impl SourceLocation {
296    /// Resolve debug information for a [`InstructionLocation`] and create a [`SourceLocation`].
297    fn from_instruction_location(
298        debug_info: &DebugInfo,
299        program_unit: &unit_info::UnitInfo,
300        instruction_location: &InstructionLocation,
301    ) -> Option<SourceLocation> {
302        debug_info
303            .find_file_and_directory(&program_unit.unit, instruction_location.file_index)
304            .map(|path| SourceLocation {
305                line: instruction_location.line.map(std::num::NonZeroU64::get),
306                column: Some(instruction_location.column),
307                path,
308                address: Some(instruction_location.address),
309            })
310    }
311
312    /// Get the file name of the source file
313    pub fn file_name(&self) -> Option<String> {
314        self.path
315            .file_name()
316            .map(|name| String::from_utf8_lossy(name).to_string())
317    }
318}
319
320/// Keep track of all the instruction locations required to satisfy the operations of [`SteppingMode`][s].
321/// This is a list of target instructions, belonging to a [`gimli::LineSequence`],
322/// and filters it to only user code instructions (no prologue code, and no non-statement instructions),
323/// so that we are left only with what DWARF terms as "recommended breakpoint location".
324///
325/// [s]: crate::debug::debug_step::SteppingMode
326struct InstructionSequence<'debug_info> {
327    /// The `address_range.start` is the starting address of the program counter for which this sequence is valid,
328    /// and allows us to identify target instruction locations where the program counter lies inside the prologue.
329    /// The `address_range.end` is the first address that is not covered by this sequence within the line number program,
330    /// and allows us to identify when stepping over a instruction location would result in leaving a sequence.
331    /// - This is typically the instruction address of the first instruction in the next sequence,
332    ///   which may also be the first instruction in a new function.
333    address_range: Range<u64>,
334    // NOTE: Use Vec as a container, because we will have relatively few statements per sequence, and we need to maintain the order.
335    instructions: Vec<InstructionLocation>,
336    // The following private fields are required to resolve the source location information for
337    // each instruction location.
338    debug_info: &'debug_info DebugInfo,
339    program_unit: &'debug_info UnitInfo,
340}
341
342impl Debug for InstructionSequence<'_> {
343    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
344        writeln!(
345            f,
346            "Instruction Sequence with address range: {:#010x} - {:#010x}",
347            self.address_range.start, self.address_range.end
348        )?;
349        for instruction_location in &self.instructions {
350            writeln!(
351                f,
352                "\t{instruction_location:?} - {}",
353                self.debug_info
354                    .get_path(&self.program_unit.unit, instruction_location.file_index)
355                    .map(|file_path| file_path.to_string_lossy().to_string())
356                    .unwrap_or("<unknown file>".to_string())
357            )?;
358        }
359        Ok(())
360    }
361}
362
363impl<'debug_info> InstructionSequence<'debug_info> {
364    /// Extract all the instruction locations, belonging to the active sequence (i.e. the sequence that contains the `address`).
365    fn from_address(
366        debug_info: &'debug_info DebugInfo,
367        program_counter: u64,
368    ) -> Result<Self, DebugError> {
369        let program_unit = debug_info.compile_unit_info(program_counter)?;
370        let (offset, address_size) = if let Some(line_program) =
371            program_unit.unit.line_program.clone()
372        {
373            (
374                line_program.header().offset(),
375                line_program.header().address_size(),
376            )
377        } else {
378            let message = "The specified source location does not have any line_program information available. Please consider using instruction level stepping.".to_string();
379            return Err(DebugError::WarnAndContinue { message });
380        };
381
382        // Get the sequences of rows from the CompleteLineProgram at the given program_counter.
383        let incomplete_line_program =
384            debug_info
385                .debug_line_section
386                .program(offset, address_size, None, None)?;
387        let (complete_line_program, line_sequences) = incomplete_line_program.sequences()?;
388
389        // Get the sequence of rows that belongs to the program_counter.
390        let Some(line_sequence) = line_sequences.iter().find(|line_sequence| {
391            line_sequence.start <= program_counter && program_counter < line_sequence.end
392        }) else {
393            let message = "The specified source location does not have any line information available. Please consider using instruction level stepping.".to_string();
394            return Err(DebugError::WarnAndContinue { message });
395        };
396        let instruction_sequence = Self::from_line_sequence(
397            debug_info,
398            program_unit,
399            &complete_line_program,
400            line_sequence,
401        );
402
403        if instruction_sequence.len() == 0 {
404            let message = "Could not find valid instruction locations for this address. Consider using instruction level stepping.".to_string();
405            Err(DebugError::WarnAndContinue { message })
406        } else {
407            tracing::trace!(
408                "Instruction location for pc={:#010x}\n{:?}",
409                program_counter,
410                instruction_sequence
411            );
412            Ok(instruction_sequence)
413        }
414    }
415
416    /// Build [`InstructionSequence`] from a [`gimli::LineSequence`], with all the markers we need to determine valid halt locations.
417    fn from_line_sequence(
418        debug_info: &'debug_info DebugInfo,
419        program_unit: &'debug_info UnitInfo,
420        complete_line_program: &gimli::CompleteLineProgram<GimliReader>,
421        line_sequence: &LineSequence<GimliReader>,
422    ) -> Self {
423        let program_language = program_unit.get_language();
424        let mut sequence_rows = complete_line_program.resume_from(line_sequence);
425
426        // We have enough information to create the InstructionSequence.
427        let mut instruction_sequence = InstructionSequence {
428            address_range: line_sequence.start..line_sequence.end,
429            instructions: Vec::new(),
430            debug_info,
431            program_unit,
432        };
433        let mut prologue_completed = false;
434        let mut previous_row: Option<gimli::LineRow> = None;
435        while let Ok(Some((_, row))) = sequence_rows.next_row() {
436            // Don't do anything until we are at least at the prologue_end() of a function.
437            if row.prologue_end() {
438                prologue_completed = true;
439            }
440
441            // For GNU C, it is known that the `DW_LNS_set_prologue_end` is not set, so we employ the same heuristic as GDB to determine when the prologue is complete.
442            // For other C compilers in the C99/11/17 standard, they will either set the `DW_LNS_set_prologue_end` or they will trigger this heuristic also.
443            // See https://gcc.gnu.org/legacy-ml/gcc-patches/2011-03/msg02106.html
444            if !prologue_completed
445                && matches!(
446                    program_language,
447                    gimli::DW_LANG_C99 | gimli::DW_LANG_C11 | gimli::DW_LANG_C17
448                )
449                && let Some(prev_row) = previous_row
450                && (row.end_sequence()
451                    || (row.is_stmt()
452                        && (row.file_index() == prev_row.file_index()
453                            && (row.line() != prev_row.line() || row.line().is_none()))))
454            {
455                prologue_completed = true;
456            }
457
458            if !prologue_completed {
459                log_row_eval(line_sequence, row, "  inside prologue>");
460            } else {
461                log_row_eval(line_sequence, row, "  after prologue>");
462            }
463
464            // The end of the sequence is not a valid halt location,
465            // nor is it a valid instruction in the current sequence.
466            if row.end_sequence() {
467                break;
468            }
469
470            instruction_sequence.add(prologue_completed, row, previous_row.as_ref());
471            previous_row = Some(*row);
472        }
473        instruction_sequence
474    }
475
476    /// Add a instruction location to the list.
477    fn add(
478        &mut self,
479        prologue_completed: bool,
480        row: &gimli::LineRow,
481        previous_row: Option<&gimli::LineRow>,
482    ) {
483        // Workaround the line number issue (if recorded as 0 in the DWARF, then gimli reports it as None).
484        // For debug purposes, it makes more sense to be the same as the previous line, which almost always
485        // has the same file index and column value.
486        // This prevents the debugger from jumping to the top of the file unexpectedly.
487        let mut instruction_line = row.line();
488        if let Some(prev_row) = previous_row
489            && row.line().is_none()
490            && prev_row.line().is_some()
491            && row.file_index() == prev_row.file_index()
492            && prev_row.column() == row.column()
493        {
494            instruction_line = prev_row.line();
495        }
496
497        let instruction_location = InstructionLocation {
498            address: row.address(),
499            file_index: row.file_index(),
500            line: instruction_line,
501            column: row.column().into(),
502            instruction_type: if !prologue_completed {
503                InstructionType::Prologue
504            } else if row.epilogue_begin() || row.is_stmt() {
505                InstructionType::HaltLocation
506            } else {
507                InstructionType::Unspecified
508            },
509        };
510
511        self.instructions.push(instruction_location);
512    }
513
514    /// Get the number of instruction locations in the list.
515    fn len(&self) -> usize {
516        self.instructions.len()
517    }
518}
519
520#[derive(Debug, Clone, Copy, PartialEq)]
521/// The type of instruction, as defined by [`gimli::LineRow`] attributes and relative position in the sequence.
522enum InstructionType {
523    /// We need to keep track of source lines that signal function signatures,
524    /// even if their program lines are not valid halt locations.
525    Prologue,
526    /// DWARF defined "recommended breakpoint location",
527    /// typically marked with `is_stmt` or `epilogue_begin`.
528    HaltLocation,
529    /// Any other instruction that is not part of the prologue or epilogue, and is not a statement,
530    /// is considered to be an unspecified instruction type.
531    Unspecified,
532}
533
534#[derive(Clone, Copy)]
535/// - A [`InstructionLocation`] filters and maps [`gimli::LineRow`] entries to be used for determining valid halt points.
536///   - Each [`InstructionLocation`] maps to a single machine instruction on target.
537///   - For establishing valid halt locations (breakpoint or stepping), we are only interested,
538///     in the [`InstructionLocation`]'s that represent DWARF defined `statements`,
539///     which are not part of the prologue or epilogue.
540/// - A line of code in a source file may contain multiple instruction locations, in which case
541///   a new [`InstructionLocation`] with unique `column` is created.
542/// - A [`InstructionSequence`] is a series of contiguous [`InstructionLocation`]'s.
543struct InstructionLocation {
544    address: u64,
545    file_index: u64,
546    line: Option<NonZeroU64>,
547    column: ColumnType,
548    instruction_type: InstructionType,
549}
550
551impl Debug for InstructionLocation {
552    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
553        write!(
554            f,
555            "Instruction @ {:010x}, on line={:04}  col={:05}  f={:02}, type={:?}",
556            self.address,
557            match self.line {
558                Some(line) => line.get(),
559                None => 0,
560            },
561            match self.column {
562                ColumnType::LeftEdge => 0,
563                ColumnType::Column(column) => column,
564            },
565            self.file_index,
566            self.instruction_type,
567        )
568    }
569}
570
571/// Helper function to avoid code duplication when logging of information during row evaluation.
572fn log_row_eval(
573    active_sequence: &LineSequence<super::GimliReader>,
574    row: &gimli::LineRow,
575    status: &str,
576) {
577    tracing::trace!(
578        "Sequence: line={:04} col={:05} f={:02} stmt={:5} ep={:5} es={:5} eb={:5} : {:#010X}<={:#010X}<{:#010X} : {}",
579        match row.line() {
580            Some(line) => line.get(),
581            None => 0,
582        },
583        match row.column() {
584            gimli::ColumnType::LeftEdge => 0,
585            gimli::ColumnType::Column(column) => column.get(),
586        },
587        row.file_index(),
588        row.is_stmt(),
589        row.prologue_end(),
590        row.end_sequence(),
591        row.epilogue_begin(),
592        active_sequence.start,
593        row.address(),
594        active_sequence.end,
595        status
596    );
597}