1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
//! Heuristics for correcting instruction pointers based on the CPU architecture.

use crate::types::{Arch, CpuFamily};

const SIGILL: u32 = 4;
const SIGBUS: u32 = 10;
const SIGSEGV: u32 = 11;

/// Helper to work with instruction addresses.
///
/// The most useful function is `InstructionInfo::caller_address` which applies
/// some heuristics to determine the call site of a function call based on the
/// return address. See `InstructionInfo::caller_address` for more information.
///
/// See <https://goo.gl/g17EAn> for detailed information on this topic.
pub struct InstructionInfo {
    /// The address of the instruction we want to use as a base.
    pub addr: u64,
    /// The architecture we are dealing with.
    pub arch: Arch,
    /// This is true if the frame is the cause of the crash.
    pub crashing_frame: bool,
    /// If a signal is known that triggers the crash, it can be stored here.
    pub signal: Option<u32>,
    /// The optional value of the IP register.
    pub ip_reg: Option<u64>,
}

impl InstructionInfo {
    /// Tries to resolve the start address of the current instruction.
    ///
    /// For architectures without fixed alignment (such as Intel with variable
    /// instruction lengths), this will return the same address. Otherwise, the
    /// address is aligned to the architecture's instruction alignment.
    pub fn aligned_address(&self) -> u64 {
        if let Some(alignment) = self.arch.instruction_alignment() {
            self.addr - (self.addr % alignment)
        } else {
            self.addr
        }
    }

    /// Return the previous instruction to the current one if we can
    /// determine this for the current architecture.
    /// Returns the instruction preceding the current one.
    ///
    /// For known architectures, this will return the start address of the
    /// instruction immediately before the current one in the machine code.
    /// This is likely the instruction that was just executed or that called
    /// a function returning at the current address.
    ///
    /// For unknown architectures or those using variable instruction size, the
    /// exact start address cannot be determined. Instead, an address *within*
    /// the preceding instruction will be returned. For this reason, the return
    /// value of this function should be considered an upper bound.
    pub fn previous_address(&self) -> u64 {
        let instruction_size = self.arch.instruction_alignment().unwrap_or(1);

        // In MIPS, the return address apparently often points two instructions after the the
        // previous program counter. On other architectures, just subtract one instruction.
        let pc_offset = match self.arch.cpu_family() {
            CpuFamily::Mips32 | CpuFamily::Mips64 => 2 * instruction_size,
            _ => instruction_size,
        };

        self.aligned_address() - pc_offset
    }

    /// Returns whether the application attempted to jump to an invalid,
    /// privileged or misaligned address. This indicates, that certain
    /// adjustments should be made on the caller instruction address.
    pub fn is_crash_signal(&self) -> bool {
        match self.signal {
            Some(SIGILL) | Some(SIGBUS) | Some(SIGSEGV) => true,
            _ => false,
        }
    }

    /// Determines whether the given address should be adjusted to resolve the
    /// call site of a stack frame.
    ///
    /// This generally applies to all frames except the crashing / suspended
    /// frame. However, if the process crashed with an illegal instruction,
    /// even the top-most frame needs to be adjusted to account for the signal
    /// handler.
    pub fn should_adjust_caller(&self) -> bool {
        // All frames other than the crashing frame (or suspended frame for
        // other threads) report the return address. This address (generally)
        // points to the instruction after the function call. Therefore, we
        // need to adjust the caller address for these frames.
        if !self.crashing_frame {
            return true;
        }

        // KSCrash applies a heuristic to remove the signal handler frame from
        // the top of the stack trace, if the crash was caused by certain
        // signals. However, that means that the top-most frame contains a
        // return address just like any other and needs to be adjusted.
        if let Some(ip) = self.ip_reg {
            if ip != self.addr && self.is_crash_signal() {
                return true;
            }
        }

        // The crashing frame usually contains the actual register contents,
        // which points to the exact instruction that crashed and must not be
        // adjusted.
        false
    }

    /// Determines the address of the call site based on a return address.
    ///
    /// In the top most frame (often referred to as context frame), this is the
    /// value of the instruction pointer register. In all other frames, the
    /// return address is generally one instruction after the jump / call.
    ///
    /// This function actually resolves an address _within_ the call instruction
    /// rather than its beginning. Also, in some cases the top most frame has
    /// been modified by certain signal handlers or return optimizations. A set
    /// of heuristics tries to recover this for well-known cases.
    pub fn caller_address(&self) -> u64 {
        if self.should_adjust_caller() {
            self.previous_address()
        } else {
            self.aligned_address()
        }

        // NOTE: Currently, we only provide stack traces from KSCrash and
        // Breakpad. Both already apply a set of heuristics while stackwalking
        // in order to fix return addresses. It seems that no further heuristics
        // are necessary at the moment.
    }
}