tsc_trace/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::cell::{Cell, RefCell};
4use std::io::{Result, Write};
5use std::arch::asm;
6
7/// capacity in number of traces per thread
8#[cfg(all(not(feature = "off"), feature = "capacity_1_million"))]
9pub const TSC_TRACE_CAPACITY: usize = 1_000_000;
10
11/// capacity in number of traces per thread
12#[cfg(all(not(feature = "off"), feature = "capacity_8_million"))]
13pub const TSC_TRACE_CAPACITY: usize = 8_000_000;
14
15/// capacity in number of traces per thread
16#[cfg(all(not(feature = "off"), feature = "capacity_16_million"))]
17pub const TSC_TRACE_CAPACITY: usize = 16_000_000;
18
19/// capacity in number of traces per thread
20#[cfg(all(not(feature = "off"), feature = "capacity_32_million"))]
21pub const TSC_TRACE_CAPACITY: usize = 32_000_000;
22
23/// capacity in number of traces per thread
24#[cfg(all(not(feature = "off"), feature = "capacity_64_million"))]
25pub const TSC_TRACE_CAPACITY: usize = 64_000_000;
26
27/// capacity in number of traces per thread
28#[cfg(feature = "off")]
29pub const TSC_TRACE_CAPACITY: usize = 0;
30
31/// capacity in number of traces per thread
32#[cfg(all(
33    not(feature = "off"),
34    not(feature = "capacity_1_million"),
35    not(feature = "capacity_8_million"),
36    not(feature = "capacity_16_million"),
37    not(feature = "capacity_32_million"),
38    not(feature = "capacity_64_million")
39))]
40pub const TSC_TRACE_CAPACITY: usize = 1_000_000;
41
42const CAPACITY: usize = TSC_TRACE_CAPACITY * 3;
43
44#[cfg(feature = "const_array")]
45thread_local! {
46    static TSC_TRACE_SPANS: RefCell<[u64; CAPACITY]> = const { RefCell::new([0; CAPACITY]) };
47    static TSC_TRACE_INDEX: Cell<usize> = const { Cell::new(0) };
48}
49
50#[cfg(not(feature = "const_array"))]
51thread_local! {
52    static TSC_TRACE_SPANS: RefCell<Vec<u64>> = RefCell::new(Vec::with_capacity(CAPACITY));
53    static TSC_TRACE_INDEX: Cell<usize> = const { Cell::new(0) };
54}
55
56/// Writes the current thread's array of traces in the format:
57///
58/// tag,start_rdtsc,stop_rdtsc,stop_minus_start\n
59///
60/// Stops writing once it encounters a stop_rdtsc of zero,
61/// assuming that's an unused portion of the array
62pub fn write_traces_csv(writer: &mut impl Write) -> Result<()> {
63    let mut res = Ok(());
64    TSC_TRACE_SPANS.with(|spans| {
65        let spans = spans.borrow();
66        for chunk in spans.chunks_exact(3) {
67            if let &[tag, start, stop] = chunk {
68                if stop == 0 {
69                    break;
70                }
71                if let e @ Err(_) = writeln!(writer, "{tag},{start},{stop},{}", stop - start) {
72                    res = e;
73                    break;
74                }
75            }
76        }
77    });
78    res
79}
80
81/// Writes the current thread's array of traces in a binary format.
82/// This is, in order:
83///
84/// tag: u64
85/// start_rdtsc: u64
86/// stop_rdtsc: u64
87///
88/// There are no delimiters between each field or between traces.
89/// Assumes little-endian since this library only works for x86.
90/// Unlike print_csv, the difference between stop and start is not calculated.
91/// Writes the entire array, even zeroed / unused portions.
92///
93/// This is suitable for import to Clickhouse via format RowBinary
94/// <https://clickhouse.com/docs/en/interfaces/formats#rowbinary>
95pub fn write_traces_binary(writer: &mut impl Write) -> Result<()> {
96    let mut res = Ok(());
97    TSC_TRACE_SPANS.with(|spans| {
98        let spans = spans.borrow();
99        let bytes: &[u8] = bytemuck::cast_slice(&*spans);
100        if let e @ Err(_) = writer.write_all(&bytes) {
101            res = e;
102        }
103    });
104    res
105}
106
107/// Reads the processor's timestamp counter. If the `"lfence"` feature is enabled, includes lfence instructions before and after.
108#[inline(always)]
109#[cfg(target_arch = "x86")]
110pub fn rdtsc() -> u64 {
111    #[cfg(feature = "lfence")]
112    use core::arch::x86::_mm_lfence;
113    use core::arch::x86::_rdtsc;
114    unsafe {
115        #[cfg(feature = "lfence")]
116        _mm_lfence();
117        let r = _rdtsc();
118        #[cfg(feature = "lfence")]
119        _mm_lfence();
120        r
121    }
122}
123
124/// Reads the processor's timestamp counter. If the `"lfence"` feature is enabled, includes lfence instructions before and after.
125#[inline(always)]
126#[cfg(target_arch = "x86_64")]
127pub fn rdtsc() -> u64 {
128    #[cfg(feature = "lfence")]
129    use core::arch::x86_64::_mm_lfence;
130    use core::arch::x86_64::_rdtsc;
131    unsafe {
132        #[cfg(feature = "lfence")]
133        _mm_lfence();
134        let r = _rdtsc();
135        #[cfg(feature = "lfence")]
136        _mm_lfence();
137        r
138    }
139}
140
141/// Workaround for ARM chips. Does not actually use rdtsc, as it is only supported on x86.
142#[inline(always)]
143#[cfg(target_arch = "aarch64")]
144pub fn rdtsc() -> u64 {
145    let r: u64;
146    unsafe{
147        asm!(
148            "mrs x0, cntvct_el0",
149            out("x0") r
150        );
151    }
152    r
153}
154
155#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
156pub fn rdtsc() -> u64 {
157    unimplemented!("x86 or x86_64 needed for rdtsc, aarch64 needed for workaround")
158}
159
160/// This struct must be public so that the trace_span! macro can make an instance of it in your code.
161/// Don't rely on any details of it, use the macro instead.
162pub struct TraceSpan {
163    tag: u64,
164    start: u64,
165}
166
167impl TraceSpan {
168    /// Do not call this, use the trace_span! macro instead.
169    pub fn new(tag: u64) -> Self {
170        TraceSpan {
171            tag,
172            start: rdtsc(),
173        }
174    }
175}
176
177impl Drop for TraceSpan {
178    fn drop(&mut self) {
179        let stop = rdtsc();
180        _insert_trace(self.tag, self.start, stop);
181    }
182}
183
184/// Must be public for use by the insert_trace! macro.
185/// Use that macro instead, don't use this directly.
186#[inline(always)]
187pub fn _insert_trace(tag: u64, start: u64, stop: u64) {
188    TSC_TRACE_INDEX.with(|index| {
189        let mut i = index.get();
190        if i >= CAPACITY {
191            i = 0;
192        }
193
194        #[cfg(feature = "const_array")]
195        TSC_TRACE_SPANS.with(|spans| {
196            let mut spans = spans.borrow_mut();
197            spans[i] = tag;
198            spans[i + 1] = start;
199            spans[i + 2] = stop;
200            i += 3;
201        });
202
203        #[cfg(not(feature = "const_array"))]
204        TSC_TRACE_SPANS.with(|spans| {
205            let mut spans = spans.borrow_mut();
206            if spans.len() >= CAPACITY {
207                spans[i] = tag;
208                spans[i + 1] = start;
209                spans[i + 2] = stop;
210                i += 3;
211            } else {
212                spans.push(tag);
213                spans.push(start);
214                spans.push(stop);
215                i += 3;
216            }
217        });
218
219        index.set(i);
220    })
221}
222
223#[macro_export]
224#[cfg(not(feature = "off"))]
225/// `trace_span!(tag)` Starts a trace span with the given u64 tag that ends at the end of this scope.
226/// Creates a local variable named _tsc_trace_span, so don't use that name yourself.
227macro_rules! trace_span {
228    ($e:expr) => {
229        let _tsc_trace_span = TraceSpan::new(($e) as u64);
230    };
231}
232
233#[macro_export]
234#[cfg(feature = "off")]
235macro_rules! trace_span {
236    ($e:expr) => {};
237}
238
239#[macro_export]
240#[cfg(not(feature = "off"))]
241/// `insert_trace!(tag, start, stop)`
242/// Takes any 3 arbitrary expressions that `as u64` works on,
243/// immediately inserts them into the thread local array as if they were a single trace.
244macro_rules! insert_trace {
245    ($a:expr, $b:expr, $c:expr) => {
246        _insert_trace(($a) as u64, ($b) as u64, ($c) as u64);
247    };
248}
249
250#[macro_export]
251#[cfg(feature = "off")]
252macro_rules! insert_trace {
253    ($a:expr, $b:expr, $c:expr) => {};
254}