1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
//! [`EmbeddedProfiler`] implementation based on [`DWT`].
//!
//! This profiler depends on the [`DWT`] hardware which is not available on cortex-M0.
//! The profiler's resolution is the same as the core clock. The cycle count clock is
//! free-running, so overflows are likely if you have long running functions to profile.
//! To mitigate this, one can use the `extended` feature, which extends the resolution of
//! the counter from [`u32`] to [`u64`] using the [`DebugMonitor`] exception. It is set
//! to expire just before overflow, so you can expect an exception to fire every 2**32
//! clock cycles.
//!
//! Snapshots are logged using [`log::info!`], so having a logger installed is required
//! if you want to use [`embedded_profiling::log_snapshot`] or functions that call it
//! (like [`embedded_profiling::profile_function`]).
//!
//! ## Example Usage
//!
//!```no_run
//! # use cortex_m::peripheral::Peripherals as CorePeripherals;
//! # const CORE_FREQ: u32 = 120_000_000;
//! let mut core = CorePeripherals::take().unwrap();
//! // (...)
//! let dwt_profiler = cortex_m::singleton!(: ep_dwt::DwtProfiler::<CORE_FREQ> =
//! ep_dwt::DwtProfiler::<CORE_FREQ>::new(&mut core.DCB, core.DWT, CORE_FREQ).unwrap())
//! .unwrap();
//! unsafe {
//! embedded_profiling::set_profiler(dwt_profiler).unwrap();
//! }
//! // (...)
//! embedded_profiling::profile("print_profile", || println!("Hello, world"));
//! ```
//!
//! [`DWT`]: cortex_m::peripheral::DWT
//! [`DebugMonitor`]: `cortex_m::peripheral::scb::Exception::DebugMonitor`
#![cfg_attr(not(test), no_std)]
use embedded_profiling::{EPContainer, EPInstant, EPSnapshot, EmbeddedProfiler};
use cortex_m::peripheral::{DCB, DWT};
#[cfg(feature = "extended")]
use core::sync::atomic::{AtomicU32, Ordering};
#[cfg(feature = "extended")]
use cortex_m_rt::exception;
#[cfg(feature = "extended")]
/// Tracker of `cyccnt` cycle count overflows to extend this timer to 64 bit
static ROLLOVER_COUNT: AtomicU32 = AtomicU32::new(0);
#[cfg(feature = "extended")]
// For extended mode to work, we really need a u64 container. Double check this.
static_assertions::assert_type_eq_all!(EPContainer, u64);
/// DWT trace unit implementing [`EmbeddedProfiler`].
///
/// The frequency of the [`DWT`] is encoded using the parameter `FREQ`.
pub struct DwtProfiler<const FREQ: u32> {
dwt: DWT,
}
impl<const FREQ: u32> DwtProfiler<FREQ> {
/// Enable the [`DWT`] and provide a new [`EmbeddedProfiler`].
///
/// Note that the `sysclk` parameter should come from e.g. the HAL's clock generation function
/// so the real speed and the declared speed can be compared.
///
/// # Panics
/// asserts that the compile time constant `FREQ` matches the runtime provided `sysclk`
pub fn new(dcb: &mut DCB, mut dwt: DWT, sysclk: u32) -> Self {
assert!(FREQ == sysclk);
// Enable the DWT block
dcb.enable_trace();
#[cfg(feature = "extended")]
// Enable DebugMonitor exceptions to fire to track overflows
unsafe {
dcb.demcr.modify(|f| f | 1 << 16);
}
DWT::unlock();
// reset cycle count and enable it to run
unsafe { dwt.cyccnt.write(0) };
dwt.enable_cycle_counter();
Self { dwt }
}
/// binary GCD function stolen from wikipedia, made const
const fn gcd(mut u: EPContainer, mut v: EPContainer) -> EPContainer {
// Base cases: gcd(n, 0) = gcd(0, n) = n
if u == 0 {
return v;
} else if v == 0 {
return u;
}
// Using identities 2 and 3:
// gcd(2ⁱ u, 2ʲ v) = 2ᵏ gcd(u, v) with u, v odd and k = min(i, j)
// 2ᵏ is the greatest power of two that divides both u and v
let i = u.trailing_zeros();
u >>= i;
let j = v.trailing_zeros();
v >>= j;
// min(i, j);
let k = if i <= j { i } else { j };
loop {
// u and v are odd at the start of the loop
// debug_assert!(u % 2 == 1, "u = {} is even", u);
// debug_assert!(v % 2 == 1, "v = {} is even", v);
// Swap if necessary so u <= v
if u > v {
// swap(&mut u, &mut v);
let tmp = u;
u = v;
v = tmp;
}
// Using identity 4 (gcd(u, v) = gcd(|v-u|, min(u, v))
v -= u;
// Identity 1: gcd(u, 0) = u
// The shift by k is necessary to add back the 2ᵏ factor that was removed before the loop
if v == 0 {
return u << k;
}
// Identity 3: gcd(u, 2ʲ v) = gcd(u, v) (u is known to be odd)
v >>= v.trailing_zeros();
}
}
/// Reduce the fraction we need to convert between 1µs precision and whatever our core clock is running at
pub(crate) const fn reduced_fraction() -> (EPContainer, EPContainer) {
let gcd = Self::gcd(1_000_000, FREQ as EPContainer) as EPContainer;
(1_000_000 / gcd, FREQ as EPContainer / gcd)
}
}
impl<const FREQ: u32> EmbeddedProfiler for DwtProfiler<FREQ> {
fn read_clock(&self) -> EPInstant {
// get the cycle count and add the rollover if we're extended
#[allow(unused_mut)]
let mut count = self.dwt.cyccnt.read() as EPContainer;
#[cfg(feature = "extended")]
{
count +=
ROLLOVER_COUNT.load(Ordering::Relaxed) as EPContainer * u32::MAX as EPContainer;
}
// convert count and return the instant
let (red_num, red_denom) = Self::reduced_fraction();
EPInstant::from_ticks(count * red_num / red_denom)
}
fn log_snapshot(&self, snapshot: &EPSnapshot) {
log::info!("{}", snapshot);
}
}
#[cfg(feature = "extended")]
#[exception]
#[allow(non_snake_case)]
fn DebugMonitor() {
ROLLOVER_COUNT.fetch_add(1, Ordering::Relaxed);
}
