probe_rs_debug/
source_instructions.rs

1use super::{
2    ColumnType, DebugError, DebugInfo, GimliReader, canonical_path_eq,
3    unit_info::{self, UnitInfo},
4};
5use gimli::LineSequence;
6use serde::Serialize;
7use std::{
8    fmt::{Debug, Formatter},
9    num::NonZeroU64,
10    ops::Range,
11};
12use typed_path::{TypedPath, TypedPathBuf};
13
14/// A verified breakpoint represents an instruction address, and the source location that it corresponds to it,
15/// for locations in the target binary that comply with the DWARF standard terminology for "recommended breakpoint location".
16/// This typically refers to instructions that are not part of the prologue or epilogue, and are part of the user code,
17/// or are the final instruction in a sequence, before the processor begins the epilogue code.
18/// The `probe-rs` debugger uses this information to identify valid halt locations for breakpoints and stepping.
19#[derive(Clone, Debug)]
20pub struct VerifiedBreakpoint {
21    /// The address in target memory, where the breakpoint can be set.
22    pub address: u64,
23    /// If the breakpoint request was for a specific source location, then this field will contain the resolved source location.
24    pub source_location: SourceLocation,
25}
26
27impl VerifiedBreakpoint {
28    /// Return the first valid breakpoint location of the statement that is greater than OR equal to `address`.
29    /// e.g., if the `address` is the current program counter, then the return value will be the next valid halt address
30    /// in the current sequence.
31    pub(crate) fn for_address(
32        debug_info: &DebugInfo,
33        address: u64,
34    ) -> Result<VerifiedBreakpoint, DebugError> {
35        let instruction_sequence = InstructionSequence::from_address(debug_info, address)?;
36
37        // Cycle through various degrees of matching, to find the most relevant source location.
38        if let Some(verified_breakpoint) = match_address(&instruction_sequence, address, debug_info)
39        {
40            tracing::debug!(
41                "Found valid breakpoint for address: {:#010x} : {verified_breakpoint:?}",
42                &address
43            );
44            return Ok(verified_breakpoint);
45        }
46        // If we get here, we have not found a valid breakpoint location.
47        let message = format!(
48            "Could not identify a valid breakpoint for address: {address:#010x}. Please consider using instruction level stepping."
49        );
50        Err(DebugError::WarnAndContinue { message })
51    }
52
53    /// Identifying the breakpoint location for a specific location (path, line, colunmn) is a bit more complex,
54    /// compared to the `for_address()` method, due to a few factors:
55    /// - The correct program instructions, may be in any of the compilation units of the current program.
56    /// - The debug information may not contain data for the "specific source" location requested:
57    ///   - DWARFv5 standard, section 6.2, allows omissions based on certain conditions. In this case,
58    ///     we need to find the closest "relevant" source location that has valid debug information.
59    ///   - The requested location may not be a valid source location, e.g. when the
60    ///     debug information has been optimized away. In this case we will return an appropriate error.
61    ///
62    /// #### The logic used to find the "most relevant" source location is as follows:
63    /// 1. Filter  [`UnitInfo`], by using [`gimli::LineProgramHeader`] to match units that include
64    ///    the requested path.
65    /// 2. For each matching compilation unit, get the [`gimli::LineProgram`] and
66    ///    [`Vec<LineSequence>`][LineSequence].
67    /// 3. Filter the [`Vec<LineSequence>`][LineSequence] entries to only include sequences that match the requested path.
68    /// 3. Convert remaining [`LineSequence`], to [`InstructionSequence`].
69    /// 4. Return the first [`InstructionSequence`] that contains the requested source location.
70    ///    1. This may be an exact match on file/line/column, or,
71    ///    2. Failing an exact match, a match on file/line only.
72    ///    3. Failing that, a match on file only, where the line number is the "next" available instruction,
73    ///       on the next available line of the specified file.
74    pub(crate) fn for_source_location(
75        debug_info: &DebugInfo,
76        path: TypedPath,
77        line: u64,
78        column: Option<u64>,
79    ) -> Result<Self, DebugError> {
80        for program_unit in &debug_info.unit_infos {
81            let Some(ref line_program) = program_unit.unit.line_program else {
82                // Not all compilation units need to have debug line information, so we skip those.
83                continue;
84            };
85
86            let mut num_files = line_program.header().file_names().len();
87
88            // For DWARF version 5, the current compilation file is included in the file names, with index 0.
89            //
90            // For earlier versions, the current compilation file is not included in the file names, but index 0 still refers to it.
91            // To get the correct number of files, we have to add 1 here.
92            if program_unit.unit.header.version() <= 4 {
93                num_files += 1;
94            }
95
96            // There can be multiple file indices which match, due to the inclusion of the current compilation file with index 0.
97            //
98            // At least for DWARF 4 there are cases where the current compilation file is also included in the file names with
99            // a non-zero index.
100            let matching_file_indices: Vec<_> = (0..num_files)
101                .filter_map(|file_index| {
102                    let file_index = file_index as u64;
103
104                    debug_info
105                        .get_path(&program_unit.unit, file_index)
106                        .and_then(|combined_path: TypedPathBuf| {
107                            if canonical_path_eq(path, combined_path.to_path()) {
108                                tracing::debug!(
109                                    "Found matching file index: {file_index} for path: {path}",
110                                    file_index = file_index,
111                                    path = path.display()
112                                );
113                                Some(file_index)
114                            } else {
115                                None
116                            }
117                        })
118                })
119                .collect();
120
121            if matching_file_indices.is_empty() {
122                continue;
123            }
124
125            let Ok((complete_line_program, line_sequences)) = line_program.clone().sequences()
126            else {
127                tracing::debug!("Failed to get line sequences for line program");
128                continue;
129            };
130
131            for line_sequence in line_sequences {
132                let instruction_sequence = InstructionSequence::from_line_sequence(
133                    debug_info,
134                    program_unit,
135                    &complete_line_program,
136                    &line_sequence,
137                );
138
139                for matching_file_index in &matching_file_indices {
140                    // Cycle through various degrees of matching, to find the most relevant source location.
141                    if let Some(verified_breakpoint) = match_file_line_column(
142                        &instruction_sequence,
143                        *matching_file_index,
144                        line,
145                        column,
146                        debug_info,
147                        program_unit,
148                    ) {
149                        return Ok(verified_breakpoint);
150                    }
151
152                    if let Some(verified_breakpoint) = match_file_line_first_available_column(
153                        &instruction_sequence,
154                        *matching_file_index,
155                        line,
156                        debug_info,
157                        program_unit,
158                    ) {
159                        return Ok(verified_breakpoint);
160                    }
161                }
162            }
163        }
164        // If we get here, we have not found a valid breakpoint location.
165        Err(DebugError::Other(format!(
166            "No valid breakpoint information found for file: {}, line: {line:?}, column: {column:?}",
167            path.display()
168        )))
169    }
170}
171
172/// Find the valid halt instruction location that is equal to, or greater than, the address.
173fn match_address(
174    instruction_sequence: &InstructionSequence<'_>,
175    address: u64,
176    debug_info: &DebugInfo,
177) -> Option<VerifiedBreakpoint> {
178    if instruction_sequence.address_range.contains(&address) {
179        let instruction_location =
180            instruction_sequence
181                .instructions
182                .iter()
183                .find(|instruction_location| {
184                    instruction_location.instruction_type == InstructionType::HaltLocation
185                        && instruction_location.address >= address
186                })?;
187
188        let source_location = SourceLocation::from_instruction_location(
189            debug_info,
190            instruction_sequence.program_unit,
191            instruction_location,
192        )?;
193
194        Some(VerifiedBreakpoint {
195            address: instruction_location.address,
196            source_location,
197        })
198    } else {
199        None
200    }
201}
202
203/// Find the valid halt instruction location that matches the file, line and column.
204fn match_file_line_column(
205    instruction_sequence: &InstructionSequence<'_>,
206    matching_file_index: u64,
207    line: u64,
208    column: Option<u64>,
209    debug_info: &DebugInfo,
210    program_unit: &UnitInfo,
211) -> Option<VerifiedBreakpoint> {
212    let instruction_location =
213        instruction_sequence
214            .instructions
215            .iter()
216            .find(|instruction_location| {
217                instruction_location.instruction_type == InstructionType::HaltLocation
218                    && matching_file_index == instruction_location.file_index
219                    && NonZeroU64::new(line) == instruction_location.line
220                    && column
221                        .map(ColumnType::Column)
222                        .is_some_and(|col| col == instruction_location.column)
223            })?;
224
225    let source_location =
226        SourceLocation::from_instruction_location(debug_info, program_unit, instruction_location)?;
227
228    Some(VerifiedBreakpoint {
229        address: instruction_location.address,
230        source_location,
231    })
232}
233
234/// Find the first valid halt instruction location that matches the file and line, ignoring column.
235fn match_file_line_first_available_column(
236    instruction_sequence: &InstructionSequence<'_>,
237    matching_file_index: u64,
238    line: u64,
239    debug_info: &DebugInfo,
240    program_unit: &UnitInfo,
241) -> Option<VerifiedBreakpoint> {
242    let instruction_location =
243        instruction_sequence
244            .instructions
245            .iter()
246            .find(|instruction_location| {
247                instruction_location.instruction_type == InstructionType::HaltLocation
248                    && matching_file_index == instruction_location.file_index
249                    && NonZeroU64::new(line) == instruction_location.line
250            })?;
251
252    let source_location =
253        SourceLocation::from_instruction_location(debug_info, program_unit, instruction_location)?;
254
255    Some(VerifiedBreakpoint {
256        address: instruction_location.address,
257        source_location,
258    })
259}
260
261fn serialize_typed_path<S>(path: &TypedPathBuf, serializer: S) -> Result<S::Ok, S::Error>
262where
263    S: serde::Serializer,
264{
265    serializer.serialize_str(&path.to_string_lossy())
266}
267
268/// A specific location in source code.
269/// Each unique line, column, file and directory combination is a unique source location.
270#[derive(Clone, PartialEq, Eq, Serialize)]
271pub struct SourceLocation {
272    /// The path to the source file
273    #[serde(serialize_with = "serialize_typed_path")]
274    pub path: TypedPathBuf,
275    /// The line number in the source file with zero based indexing.
276    pub line: Option<u64>,
277    /// The column number in the source file.
278    pub column: Option<ColumnType>,
279    /// The address of the source location.
280    pub address: Option<u64>,
281}
282
283impl Debug for SourceLocation {
284    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
285        write!(
286            f,
287            "{}:{:?}:{:?}",
288            self.path.to_path().display(),
289            self.line,
290            self.column
291        )
292    }
293}
294
295impl SourceLocation {
296    /// Resolve debug information for a [`InstructionLocation`] and create a [`SourceLocation`].
297    fn from_instruction_location(
298        debug_info: &DebugInfo,
299        program_unit: &unit_info::UnitInfo,
300        instruction_location: &InstructionLocation,
301    ) -> Option<SourceLocation> {
302        debug_info
303            .find_file_and_directory(&program_unit.unit, instruction_location.file_index)
304            .map(|path| SourceLocation {
305                line: instruction_location.line.map(std::num::NonZeroU64::get),
306                column: Some(instruction_location.column),
307                path,
308                address: Some(instruction_location.address),
309            })
310    }
311
312    /// Get the file name of the source file
313    pub fn file_name(&self) -> Option<String> {
314        self.path
315            .file_name()
316            .map(|name| String::from_utf8_lossy(name).to_string())
317    }
318}
319
320/// Keep track of all the instruction locations required to satisfy the operations of [`SteppingMode`][s].
321/// This is a list of target instructions, belonging to a [`gimli::LineSequence`],
322/// and filters it to only user code instructions (no prologue code, and no non-statement instructions),
323/// so that we are left only with what DWARF terms as "recommended breakpoint location".
324///
325/// [s]: crate::debug::debug_step::SteppingMode
326struct InstructionSequence<'debug_info> {
327    /// The `address_range.start` is the starting address of the program counter for which this sequence is valid,
328    /// and allows us to identify target instruction locations where the program counter lies inside the prologue.
329    /// The `address_range.end` is the first address that is not covered by this sequence within the line number program,
330    /// and allows us to identify when stepping over a instruction location would result in leaving a sequence.
331    /// - This is typically the instruction address of the first instruction in the next sequence,
332    ///   which may also be the first instruction in a new function.
333    address_range: Range<u64>,
334    // NOTE: Use Vec as a container, because we will have relatively few statements per sequence, and we need to maintain the order.
335    instructions: Vec<InstructionLocation>,
336    // The following private fields are required to resolve the source location information for
337    // each instruction location.
338    debug_info: &'debug_info DebugInfo,
339    program_unit: &'debug_info UnitInfo,
340}
341
342impl Debug for InstructionSequence<'_> {
343    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
344        writeln!(
345            f,
346            "Instruction Sequence with address range: {:#010x} - {:#010x}",
347            self.address_range.start, self.address_range.end
348        )?;
349        for instruction_location in &self.instructions {
350            writeln!(
351                f,
352                "\t{instruction_location:?} - {}",
353                self.debug_info
354                    .get_path(&self.program_unit.unit, instruction_location.file_index)
355                    .map(|file_path| file_path.to_string_lossy().to_string())
356                    .unwrap_or("<unknown file>".to_string())
357            )?;
358        }
359        Ok(())
360    }
361}
362
363impl<'debug_info> InstructionSequence<'debug_info> {
364    /// Extract all the instruction locations, belonging to the active sequence (i.e. the sequence that contains the `address`).
365    fn from_address(
366        debug_info: &'debug_info DebugInfo,
367        program_counter: u64,
368    ) -> Result<Self, DebugError> {
369        let program_unit = debug_info.compile_unit_info(program_counter)?;
370        let (offset, address_size) = if let Some(line_program) =
371            program_unit.unit.line_program.clone()
372        {
373            (
374                line_program.header().offset(),
375                line_program.header().address_size(),
376            )
377        } else {
378            let message = "The specified source location does not have any line_program information available. Please consider using instruction level stepping.".to_string();
379            return Err(DebugError::WarnAndContinue { message });
380        };
381
382        // Get the sequences of rows from the CompleteLineProgram at the given program_counter.
383        let incomplete_line_program =
384            debug_info
385                .debug_line_section
386                .program(offset, address_size, None, None)?;
387        let (complete_line_program, line_sequences) = incomplete_line_program.sequences()?;
388
389        // Get the sequence of rows that belongs to the program_counter.
390        let Some(line_sequence) = line_sequences.iter().find(|line_sequence| {
391            line_sequence.start <= program_counter && program_counter < line_sequence.end
392        }) else {
393            let message = "The specified source location does not have any line information available. Please consider using instruction level stepping.".to_string();
394            return Err(DebugError::WarnAndContinue { message });
395        };
396        let instruction_sequence = Self::from_line_sequence(
397            debug_info,
398            program_unit,
399            &complete_line_program,
400            line_sequence,
401        );
402
403        if instruction_sequence.len() == 0 {
404            let message = "Could not find valid instruction locations for this address. Consider using instruction level stepping.".to_string();
405            Err(DebugError::WarnAndContinue { message })
406        } else {
407            tracing::trace!(
408                "Instruction location for pc={:#010x}\n{:?}",
409                program_counter,
410                instruction_sequence
411            );
412            Ok(instruction_sequence)
413        }
414    }
415
416    /// Build [`InstructionSequence`] from a [`gimli::LineSequence`], with all the markers we need to determine valid halt locations.
417    fn from_line_sequence(
418        debug_info: &'debug_info DebugInfo,
419        program_unit: &'debug_info UnitInfo,
420        complete_line_program: &gimli::CompleteLineProgram<GimliReader>,
421        line_sequence: &LineSequence<GimliReader>,
422    ) -> Self {
423        let program_language = program_unit.get_language();
424        let mut sequence_rows = complete_line_program.resume_from(line_sequence);
425
426        // We have enough information to create the InstructionSequence.
427        let mut instruction_sequence = InstructionSequence {
428            address_range: line_sequence.start..line_sequence.end,
429            instructions: Vec::new(),
430            debug_info,
431            program_unit,
432        };
433        let mut prologue_completed = false;
434        let mut previous_row: Option<gimli::LineRow> = None;
435        while let Ok(Some((_, row))) = sequence_rows.next_row() {
436            // Don't do anything until we are at least at the prologue_end() of a function.
437            if row.prologue_end() {
438                prologue_completed = true;
439            }
440
441            // For GNU C, it is known that the `DW_LNS_set_prologue_end` is not set, so we employ the same heuristic as GDB to determine when the prologue is complete.
442            // For other C compilers in the C99/11/17 standard, they will either set the `DW_LNS_set_prologue_end` or they will trigger this heuristic also.
443            // See https://gcc.gnu.org/legacy-ml/gcc-patches/2011-03/msg02106.html
444            if !prologue_completed
445                && matches!(
446                    program_language,
447                    gimli::DW_LANG_C99 | gimli::DW_LANG_C11 | gimli::DW_LANG_C17
448                )
449            {
450                if let Some(prev_row) = previous_row {
451                    if row.end_sequence()
452                        || (row.is_stmt()
453                            && (row.file_index() == prev_row.file_index()
454                                && (row.line() != prev_row.line() || row.line().is_none())))
455                    {
456                        prologue_completed = true;
457                    }
458                }
459            }
460
461            if !prologue_completed {
462                log_row_eval(line_sequence, row, "  inside prologue>");
463            } else {
464                log_row_eval(line_sequence, row, "  after prologue>");
465            }
466
467            // The end of the sequence is not a valid halt location,
468            // nor is it a valid instruction in the current sequence.
469            if row.end_sequence() {
470                break;
471            }
472
473            instruction_sequence.add(prologue_completed, row, previous_row.as_ref());
474            previous_row = Some(*row);
475        }
476        instruction_sequence
477    }
478
479    /// Add a instruction location to the list.
480    fn add(
481        &mut self,
482        prologue_completed: bool,
483        row: &gimli::LineRow,
484        previous_row: Option<&gimli::LineRow>,
485    ) {
486        // Workaround the line number issue (if recorded as 0 in the DWARF, then gimli reports it as None).
487        // For debug purposes, it makes more sense to be the same as the previous line, which almost always
488        // has the same file index and column value.
489        // This prevents the debugger from jumping to the top of the file unexpectedly.
490        let mut instruction_line = row.line();
491        if let Some(prev_row) = previous_row {
492            if row.line().is_none()
493                && prev_row.line().is_some()
494                && row.file_index() == prev_row.file_index()
495                && prev_row.column() == row.column()
496            {
497                instruction_line = prev_row.line();
498            }
499        }
500
501        let instruction_location = InstructionLocation {
502            address: row.address(),
503            file_index: row.file_index(),
504            line: instruction_line,
505            column: row.column().into(),
506            instruction_type: if !prologue_completed {
507                InstructionType::Prologue
508            } else if row.epilogue_begin() || row.is_stmt() {
509                InstructionType::HaltLocation
510            } else {
511                InstructionType::Unspecified
512            },
513        };
514
515        self.instructions.push(instruction_location);
516    }
517
518    /// Get the number of instruction locations in the list.
519    fn len(&self) -> usize {
520        self.instructions.len()
521    }
522}
523
524#[derive(Debug, Clone, Copy, PartialEq)]
525/// The type of instruction, as defined by [`gimli::LineRow`] attributes and relative position in the sequence.
526enum InstructionType {
527    /// We need to keep track of source lines that signal function signatures,
528    /// even if their program lines are not valid halt locations.
529    Prologue,
530    /// DWARF defined "recommended breakpoint location",
531    /// typically marked with `is_stmt` or `epilogue_begin`.
532    HaltLocation,
533    /// Any other instruction that is not part of the prologue or epilogue, and is not a statement,
534    /// is considered to be an unspecified instruction type.
535    Unspecified,
536}
537
538#[derive(Clone, Copy)]
539/// - A [`InstructionLocation`] filters and maps [`gimli::LineRow`] entries to be used for determining valid halt points.
540///   - Each [`InstructionLocation`] maps to a single machine instruction on target.
541///   - For establishing valid halt locations (breakpoint or stepping), we are only interested,
542///     in the [`InstructionLocation`]'s that represent DWARF defined `statements`,
543///     which are not part of the prologue or epilogue.
544/// - A line of code in a source file may contain multiple instruction locations, in which case
545///   a new [`InstructionLocation`] with unique `column` is created.
546/// - A [`InstructionSequence`] is a series of contiguous [`InstructionLocation`]'s.
547struct InstructionLocation {
548    address: u64,
549    file_index: u64,
550    line: Option<NonZeroU64>,
551    column: ColumnType,
552    instruction_type: InstructionType,
553}
554
555impl Debug for InstructionLocation {
556    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
557        write!(
558            f,
559            "Instruction @ {:010x}, on line={:04}  col={:05}  f={:02}, type={:?}",
560            self.address,
561            match self.line {
562                Some(line) => line.get(),
563                None => 0,
564            },
565            match self.column {
566                ColumnType::LeftEdge => 0,
567                ColumnType::Column(column) => column,
568            },
569            self.file_index,
570            self.instruction_type,
571        )
572    }
573}
574
575/// Helper function to avoid code duplication when logging of information during row evaluation.
576fn log_row_eval(
577    active_sequence: &LineSequence<super::GimliReader>,
578    row: &gimli::LineRow,
579    status: &str,
580) {
581    tracing::trace!(
582        "Sequence: line={:04} col={:05} f={:02} stmt={:5} ep={:5} es={:5} eb={:5} : {:#010X}<={:#010X}<{:#010X} : {}",
583        match row.line() {
584            Some(line) => line.get(),
585            None => 0,
586        },
587        match row.column() {
588            gimli::ColumnType::LeftEdge => 0,
589            gimli::ColumnType::Column(column) => column.get(),
590        },
591        row.file_index(),
592        row.is_stmt(),
593        row.prologue_end(),
594        row.end_sequence(),
595        row.epilogue_begin(),
596        active_sequence.start,
597        row.address(),
598        active_sequence.end,
599        status
600    );
601}