perfcnt/linux/
mod.rs

1//! A wrapper around perf_event open (http://lxr.free-electrons.com/source/tools/perf/design.txt)
2
3use std::fmt;
4use std::fs::File;
5use std::io;
6use std::io::{Error, Read};
7use std::mem;
8use std::os::unix::io::FromRawFd;
9use std::ptr;
10use std::slice;
11use std::str;
12
13use libc::{pid_t, strlen, MAP_SHARED};
14use mmap;
15use x86::*;
16
17#[allow(dead_code, non_camel_case_types)]
18mod hw_breakpoint;
19#[allow(dead_code, non_camel_case_types)]
20mod perf_event;
21
22pub mod parser;
23pub mod perf_file;
24pub mod perf_format;
25
26use self::perf_format::{EventAttrFlags, ReadFormatFlags, SampleFormatFlags};
27
28use crate::AbstractPerfCounter;
29use x86::perfcnt::intel::{EventDescription, Tuple};
30
31const IOCTL: usize = 16;
32const PERF_EVENT_OPEN: usize = 298;
33
34fn perf_event_open(
35    hw_event: &perf_format::EventAttr,
36    pid: perf_event::__kernel_pid_t,
37    cpu: ::libc::c_int,
38    group_fd: ::libc::c_int,
39    flags: ::libc::c_int,
40) -> isize {
41    unsafe {
42        syscall!(
43            PERF_EVENT_OPEN,
44            hw_event as *const perf_format::EventAttr as usize,
45            pid,
46            cpu,
47            group_fd,
48            flags
49        ) as isize
50    }
51}
52
53fn ioctl(fd: ::libc::c_int, request: u64, value: ::libc::c_int) -> isize {
54    unsafe { syscall!(IOCTL, fd, request, value) as isize }
55}
56
57pub struct PerfCounterBuilderLinux {
58    group: isize,
59    pid: pid_t,
60    cpu: isize,
61    flags: i32,
62    attrs: perf_format::EventAttr,
63}
64
65impl Default for PerfCounterBuilderLinux {
66    fn default() -> PerfCounterBuilderLinux {
67        PerfCounterBuilderLinux {
68            group: -1,
69            pid: 0,
70            cpu: -1,
71            flags: 0,
72            attrs: Default::default(),
73        }
74    }
75}
76
77#[derive(Debug, Clone, Copy)]
78pub enum HardwareEventType {
79    /// Total cycles.  Be wary of what happens during CPU frequency scaling.
80    CPUCycles = perf_event::PERF_COUNT_HW_CPU_CYCLES as isize,
81
82    /// Retired instructions.  Be careful, these can be affected by various issues, most notably
83    /// hardware interrupt counts.
84    Instructions = perf_event::PERF_COUNT_HW_INSTRUCTIONS as isize,
85
86    /// Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending
87    /// on your CPU. This may include prefetches and coherency messages; again this depends on the
88    /// design of your CPU.
89    CacheReferences = perf_event::PERF_COUNT_HW_CACHE_REFERENCES as isize,
90
91    /// Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in
92    /// conjunction with the [CacheReferences] event to calculate cache miss rates.
93    CacheMisses = perf_event::PERF_COUNT_HW_CACHE_MISSES as isize,
94
95    /// Retired branch instructions.  Prior to Linux 2.6.34, this used the wrong event on AMD
96    /// processors.
97    BranchInstructions = perf_event::PERF_COUNT_HW_BRANCH_INSTRUCTIONS as isize,
98
99    /// Mispredicted branch instructions.
100    BranchMisses = perf_event::PERF_COUNT_HW_BRANCH_MISSES as isize,
101
102    /// Bus cycles, which can be different from total cycles.
103    BusCycles = perf_event::PERF_COUNT_HW_BUS_CYCLES as isize,
104
105    /// Stalled cycles during issue. (Since Linux 3.0)
106    StalledCyclesFrontend = perf_event::PERF_COUNT_HW_STALLED_CYCLES_FRONTEND as isize,
107
108    /// Stalled cycles during retirement. (Since Linux 3.0)
109    StalledCyclesBackend = perf_event::PERF_COUNT_HW_STALLED_CYCLES_BACKEND as isize,
110
111    /// Total cycles; not affected by CPU frequency scaling. (Since Linux 3.3)
112    RefCPUCycles = perf_event::PERF_COUNT_HW_REF_CPU_CYCLES as isize,
113}
114
115#[derive(Debug, Clone, Copy)]
116pub enum SoftwareEventType {
117    /// This reports the CPU clock, a high-resolution per-CPU timer.
118    CpuClock = perf_event::PERF_COUNT_SW_CPU_CLOCK as isize,
119
120    /// This reports a clock count specific to the task that is running.
121    TaskClock = perf_event::PERF_COUNT_SW_TASK_CLOCK as isize,
122
123    /// This reports the number of page faults.
124    PageFaults = perf_event::PERF_COUNT_SW_PAGE_FAULTS as isize,
125
126    /// This counts context switches.
127    ///
128    /// Until Linux 2.6.34, these were all reported as user-space events, after that
129    /// they are reported as happening in the kernel.
130    ContextSwitches = perf_event::PERF_COUNT_SW_CONTEXT_SWITCHES as isize,
131
132    /// This reports the number of times the process has migrated to a new CPU.
133    CpuMigrations = perf_event::PERF_COUNT_SW_CPU_MIGRATIONS as isize,
134
135    /// This counts the number of minor page faults.  These did not require disk I/O to handle.
136    PageFaultsMin = perf_event::PERF_COUNT_SW_PAGE_FAULTS_MIN as isize,
137
138    /// This counts the number of major page faults.  These required disk I/O to handle.
139    PageFaultsMaj = perf_event::PERF_COUNT_SW_PAGE_FAULTS_MAJ as isize,
140
141    /// This counts the number of alignment faults.
142    ///
143    /// These happen when unaligned memory accesses happen; the kernel
144    /// can handle these but it reduces performance. This happens only on some architectures (never on x86).
145    ///
146    /// (Since Linux 2.6.33)
147    AlignmentFaults = perf_event::PERF_COUNT_SW_ALIGNMENT_FAULTS as isize,
148
149    /// This counts the number of emulation faults.  The kernel sometimes traps on unimplemented  instructions  and
150    /// emulates them for user space.  This can negatively impact performance.
151    ///
152    /// (Since Linux 2.6.33)
153    EmulationFaults = perf_event::PERF_COUNT_SW_EMULATION_FAULTS as isize,
154}
155
156#[derive(Debug, Clone, Copy)]
157pub enum CacheId {
158    /// For measuring Level 1 Data Cache
159    L1D = perf_event::PERF_COUNT_HW_CACHE_L1D as isize,
160
161    /// For measuring Level 1 Instruction Cache
162    L1I = perf_event::PERF_COUNT_HW_CACHE_L1I as isize,
163
164    /// For measuring Last-Level Cache
165    LL = perf_event::PERF_COUNT_HW_CACHE_LL as isize,
166
167    /// For measuring the Data TLB
168    DTLB = perf_event::PERF_COUNT_HW_CACHE_DTLB as isize,
169
170    /// For measuring the Instruction TLB
171    ITLB = perf_event::PERF_COUNT_HW_CACHE_ITLB as isize,
172
173    /// For measuring the branch prediction unit
174    BPU = perf_event::PERF_COUNT_HW_CACHE_BPU as isize,
175
176    /// For measuring local memory accesses
177    ///
178    /// (Since Linux 3.0)
179    NODE = perf_event::PERF_COUNT_HW_CACHE_NODE as isize,
180}
181
182#[derive(Debug, Clone, Copy)]
183pub enum CacheOpId {
184    /// For read accesses
185    Read = perf_event::PERF_COUNT_HW_CACHE_OP_READ as isize,
186
187    /// For write accesses
188    Write = perf_event::PERF_COUNT_HW_CACHE_OP_WRITE as isize,
189
190    /// For prefetch accesses
191    Prefetch = perf_event::PERF_COUNT_HW_CACHE_OP_PREFETCH as isize,
192}
193
194#[derive(Debug, Clone, Copy)]
195pub enum CacheOpResultId {
196    /// To measure accesses.
197    Access = perf_event::PERF_COUNT_HW_CACHE_RESULT_ACCESS as isize,
198
199    /// To measure misses.
200    Miss = perf_event::PERF_COUNT_HW_CACHE_RESULT_MISS as isize,
201}
202
203impl PerfCounterBuilderLinux {
204    /// Instantiate a generic performance counter for hardware events as defined by the Linux interface.
205    pub fn from_hardware_event(event: HardwareEventType) -> PerfCounterBuilderLinux {
206        let mut pc: PerfCounterBuilderLinux = Default::default();
207
208        pc.attrs.attr_type = perf_event::PERF_TYPE_HARDWARE;
209        pc.attrs.config = event as u64;
210        pc
211    }
212
213    /// Instantiate a generic performance counter for software events as defined by the Linux interface.
214    pub fn from_software_event(event: SoftwareEventType) -> PerfCounterBuilderLinux {
215        let mut pc: PerfCounterBuilderLinux = Default::default();
216
217        pc.attrs.attr_type = perf_event::PERF_TYPE_SOFTWARE;
218        pc.attrs.config = event as u64;
219        pc
220    }
221
222    /// Instantiate a generic performance counter for software events as defined by the Linux interface.
223    pub fn from_cache_event(
224        cache_id: CacheId,
225        cache_op_id: CacheOpId,
226        cache_op_result_id: CacheOpResultId,
227    ) -> PerfCounterBuilderLinux {
228        let mut pc: PerfCounterBuilderLinux = Default::default();
229
230        pc.attrs.attr_type = perf_event::PERF_TYPE_HW_CACHE;
231        pc.attrs.config =
232            (cache_id as u64) | (cache_op_id as u64) << 8 | (cache_op_result_id as u64) << 16;
233        pc
234    }
235
236    //pub fn from_breakpoint_event() -> PerfCounterBuilderLinux {
237    // NYI
238    //}
239
240    /// Instantiate a H/W performance counter using a hardware event as described in Intels SDM.
241    pub fn from_intel_event_description(counter: &EventDescription) -> PerfCounterBuilderLinux {
242        let mut pc: PerfCounterBuilderLinux = Default::default();
243        let mut config: u64 = 0;
244
245        match counter.event_code {
246            Tuple::One(code) => config |= (code as u64) << 0,
247            Tuple::Two(_, _) => unreachable!(), // NYI
248        };
249        match counter.umask {
250            Tuple::One(code) => config |= (code as u64) << 8,
251            Tuple::Two(_, _) => unreachable!(), // NYI
252        };
253        config |= (counter.counter_mask as u64) << 24;
254
255        if counter.edge_detect {
256            config |= 1 << 18;
257        }
258        if counter.any_thread {
259            config |= 1 << 21;
260        }
261        if counter.invert {
262            config |= 1 << 23;
263        }
264
265        pc.attrs.attr_type = perf_event::PERF_TYPE_RAW;
266        pc.attrs.config = config;
267        pc
268    }
269
270    /// Set counter group.
271    pub fn set_group<'a>(&'a mut self, group_fd: isize) -> &'a mut PerfCounterBuilderLinux {
272        self.group = group_fd;
273        self
274    }
275
276    /// Sets PERF_FLAG_FD_OUTPUT
277    ///
278    /// This flag re-routes the output from an event to the group leader.
279    pub fn set_flag_fd_output<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
280        self.flags |= 0x02; //PERF_FLAG_FD_OUTPUT;
281        self
282    }
283
284    /// Sets PERF_FLAG_PID_CGROUP
285    ///
286    /// This flag activates per-container system-wide monitoring.  A
287    /// container is an abstraction that isolates a set of resources for
288    /// finer grain control (CPUs, memory, etc.).   In  this  mode,  the
289    /// event  is  measured  only if the thread running on the monitored
290    /// CPU belongs to the designated container (cgroup).
291    pub fn set_flag_pid_cgroup<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
292        self.flags |= 0x04; //PERF_FLAG_PID_CGROUP;
293        self
294    }
295
296    /// Add a sample period.
297    pub fn set_sample_period<'a>(&'a mut self, period: u64) -> &'a mut PerfCounterBuilderLinux {
298        self.attrs.sample_period_freq = period;
299        self
300    }
301
302    /// Add a sample frequency.
303    pub fn set_sample_frequency<'a>(
304        &'a mut self,
305        frequency: u64,
306    ) -> &'a mut PerfCounterBuilderLinux {
307        self.attrs.sample_period_freq = frequency;
308        self.attrs.settings.insert(EventAttrFlags::EVENT_ATTR_FREQ);
309        self
310    }
311
312    /// The counter starts out disabled.
313    pub fn disable<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
314        self.attrs
315            .settings
316            .insert(EventAttrFlags::EVENT_ATTR_DISABLED);
317        self
318    }
319
320    /// This counter should count events of child tasks as well as the task specified.
321    pub fn inherit<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
322        self.attrs
323            .settings
324            .insert(EventAttrFlags::EVENT_ATTR_INHERIT);
325        self
326    }
327
328    /// The pinned bit specifies that the counter should always be on the CPU if at all possible.
329    /// It applies only to  hardware counters and only to group leaders.
330    pub fn pinned<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
331        self.attrs
332            .settings
333            .insert(EventAttrFlags::EVENT_ATTR_PINNED);
334        self
335    }
336
337    /// The counter is exclusive i.e., when this counter's group is on the CPU,
338    /// it should be the only group using the CPU's counters.
339    pub fn exclusive<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
340        self.attrs
341            .settings
342            .insert(EventAttrFlags::EVENT_ATTR_EXCLUSIVE);
343        self
344    }
345
346    /// The counter excludes events that happen in user space.
347    pub fn exclude_user<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
348        self.attrs
349            .settings
350            .insert(EventAttrFlags::EVENT_ATTR_EXCLUDE_USER);
351        self
352    }
353
354    /// The counter excludes events that happen in the kernel.
355    pub fn exclude_kernel<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
356        self.attrs
357            .settings
358            .insert(EventAttrFlags::EVENT_ATTR_EXCLUDE_KERNEL);
359        self
360    }
361
362    /// The counter excludes events that happen in the hypervisor.
363    pub fn exclude_hv<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
364        self.attrs
365            .settings
366            .insert(EventAttrFlags::EVENT_ATTR_EXCLUDE_HV);
367        self
368    }
369
370    /// The counter doesn't count when the CPU is idle.
371    pub fn exclude_idle<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
372        self.attrs
373            .settings
374            .insert(EventAttrFlags::EVENT_ATTR_EXCLUDE_IDLE);
375        self
376    }
377
378    /// Enables recording of exec mmap events.
379    pub fn enable_mmap<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
380        self.attrs.settings.insert(EventAttrFlags::EVENT_ATTR_MMAP);
381        self
382    }
383
384    /// The counter will save event counts on context switch for inherited tasks.
385    /// This is meaningful only if the inherit field is set.
386    pub fn inherit_stat<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
387        self.attrs
388            .settings
389            .insert(EventAttrFlags::EVENT_ATTR_INHERIT_STAT);
390        self
391    }
392
393    /// The counter is automatically enabled after a call to exec.
394    pub fn enable_on_exec<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
395        self.attrs
396            .settings
397            .insert(EventAttrFlags::EVENT_ATTR_ENABLE_ON_EXEC);
398        self
399    }
400
401    /// fork/exit notifications are included in the ring buffer.
402    pub fn enable_task_notification<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
403        self.attrs.settings.insert(EventAttrFlags::EVENT_ATTR_TASK);
404        self
405    }
406
407    /// The counter has  a  sampling  interrupt happen when we cross the wakeup_watermark
408    /// boundary.  Otherwise interrupts happen after wakeup_events samples.
409    pub fn enable_watermark<'a>(
410        &'a mut self,
411        watermark_events: u32,
412    ) -> &'a mut PerfCounterBuilderLinux {
413        self.attrs
414            .settings
415            .insert(EventAttrFlags::EVENT_ATTR_WATERMARK);
416        self.attrs.wakeup_events_watermark = watermark_events;
417        self
418    }
419
420    /// Sampled IP counter can have arbitrary skid.
421    pub fn set_ip_sample_arbitrary_skid<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
422        self.attrs
423            .settings
424            .insert(EventAttrFlags::EVENT_ATTR_SAMPLE_IP_ARBITRARY_SKID);
425        self
426    }
427
428    /// Sampled IP counter requested to have constant skid.
429    pub fn set_ip_sample_constant_skid<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
430        self.attrs
431            .settings
432            .insert(EventAttrFlags::EVENT_ATTR_SAMPLE_IP_CONSTANT_SKID);
433        self
434    }
435
436    /// Sampled IP counter requested to have 0 skid.
437    pub fn set_ip_sample_req_zero_skid<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
438        self.attrs
439            .settings
440            .insert(EventAttrFlags::EVENT_ATTR_SAMPLE_IP_REQ_ZERO_SKID);
441        self
442    }
443
444    /// The counterpart of enable_mmap, but enables including data mmap events in the ring-buffer.
445    pub fn enable_mmap_data<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
446        self.attrs
447            .settings
448            .insert(EventAttrFlags::EVENT_ATTR_MMAP_DATA);
449        self
450    }
451
452    /// Sampled IP counter must have 0 skid.
453    pub fn set_ip_sample_zero_skid<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
454        self.attrs
455            .settings
456            .insert(EventAttrFlags::EVENT_ATTR_SAMPLE_IP_ZERO_SKID);
457        self
458    }
459
460    /// Adds the 64-bit time_enabled field.  This can be used to calculate estimated totals if the PMU is overcommitted
461    /// and multiplexing is happening.
462    pub fn enable_read_format_time_enabled<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
463        self.attrs
464            .read_format
465            .insert(ReadFormatFlags::FORMAT_TOTAL_TIME_ENABLED);
466        self
467    }
468
469    /// Adds the 64-bit time_running field.  This can be used to calculate estimated totals if the PMU is  overcommitted
470    /// and  multiplexing is happening.
471    pub fn enable_read_format_time_running<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
472        self.attrs
473            .read_format
474            .insert(ReadFormatFlags::FORMAT_TOTAL_TIME_RUNNING);
475        self
476    }
477
478    /// Adds a 64-bit unique value that corresponds to the event group.
479    pub fn enable_read_format_id<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
480        self.attrs.read_format.insert(ReadFormatFlags::FORMAT_ID);
481        self
482    }
483
484    /// Allows all counter values in an event group to be read with one read.
485    pub fn enable_read_format_group<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
486        self.attrs.read_format.insert(ReadFormatFlags::FORMAT_GROUP);
487        self
488    }
489
490    pub fn enable_sampling_ip<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
491        self.attrs
492            .sample_type
493            .insert(SampleFormatFlags::PERF_SAMPLE_IP);
494        self
495    }
496
497    pub fn enable_sampling_tid<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
498        self.attrs
499            .sample_type
500            .insert(SampleFormatFlags::PERF_SAMPLE_TID);
501        self
502    }
503
504    pub fn enable_sampling_time<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
505        self.attrs
506            .sample_type
507            .insert(SampleFormatFlags::PERF_SAMPLE_TIME);
508        self
509    }
510
511    pub fn enable_sampling_addr<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
512        self.attrs
513            .sample_type
514            .insert(SampleFormatFlags::PERF_SAMPLE_ADDR);
515        self
516    }
517
518    pub fn enable_sampling_read<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
519        self.attrs
520            .sample_type
521            .insert(SampleFormatFlags::PERF_SAMPLE_READ);
522        self
523    }
524
525    pub fn enable_sampling_callchain<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
526        self.attrs
527            .sample_type
528            .insert(SampleFormatFlags::PERF_SAMPLE_CALLCHAIN);
529        self
530    }
531
532    pub fn enable_sampling_sample_id<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
533        self.attrs
534            .sample_type
535            .insert(SampleFormatFlags::PERF_SAMPLE_ID);
536        self
537    }
538
539    pub fn enable_sampling_cpu<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
540        self.attrs
541            .sample_type
542            .insert(SampleFormatFlags::PERF_SAMPLE_CPU);
543        self
544    }
545
546    pub fn enable_sampling_period<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
547        self.attrs
548            .sample_type
549            .insert(SampleFormatFlags::PERF_SAMPLE_PERIOD);
550        self
551    }
552
553    pub fn enable_sampling_stream_id<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
554        self.attrs
555            .sample_type
556            .insert(SampleFormatFlags::PERF_SAMPLE_STREAM_ID);
557        self
558    }
559
560    pub fn enable_sampling_raw<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
561        self.attrs
562            .sample_type
563            .insert(SampleFormatFlags::PERF_SAMPLE_RAW);
564        self
565    }
566
567    pub fn enable_sampling_branch_stack<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
568        self.attrs
569            .sample_type
570            .insert(SampleFormatFlags::PERF_SAMPLE_BRANCH_STACK);
571        self
572    }
573
574    pub fn enable_sampling_regs_user<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
575        self.attrs
576            .sample_type
577            .insert(SampleFormatFlags::PERF_SAMPLE_REGS_USER);
578        self
579    }
580
581    pub fn enable_sampling_stack_user<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
582        self.attrs
583            .sample_type
584            .insert(SampleFormatFlags::PERF_SAMPLE_STACK_USER);
585        self
586    }
587
588    pub fn enable_sampling_sample_weight<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
589        self.attrs
590            .sample_type
591            .insert(SampleFormatFlags::PERF_SAMPLE_WEIGHT);
592        self
593    }
594
595    pub fn enable_sampling_data_src<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
596        self.attrs
597            .sample_type
598            .insert(SampleFormatFlags::PERF_SAMPLE_DATA_SRC);
599        self
600    }
601
602    pub fn enable_sampling_identifier<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
603        self.attrs
604            .sample_type
605            .insert(SampleFormatFlags::PERF_SAMPLE_IDENTIFIER);
606        self
607    }
608
609    pub fn enable_sampling_transaction<'a>(&'a mut self) -> &'a PerfCounterBuilderLinux {
610        self.attrs
611            .sample_type
612            .insert(SampleFormatFlags::PERF_SAMPLE_TRANSACTION);
613        self
614    }
615
616    /// Measure for all PIDs on the core.
617    pub fn for_all_pids<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
618        self.pid = -1;
619        self
620    }
621
622    /// Measure for a specific PID.
623    pub fn for_pid<'a>(&'a mut self, pid: i32) -> &'a mut PerfCounterBuilderLinux {
624        self.pid = pid;
625        self
626    }
627
628    /// Pin counter to CPU.
629    pub fn on_cpu<'a>(&'a mut self, cpu: isize) -> &'a mut PerfCounterBuilderLinux {
630        self.cpu = cpu;
631        self
632    }
633
634    /// Measure on all CPUs.
635    pub fn on_all_cpus<'a>(&'a mut self) -> &'a mut PerfCounterBuilderLinux {
636        self.cpu = -1;
637        self
638    }
639
640    pub fn finish_sampling_counter(&self) -> Result<PerfCounter, io::Error> {
641        let fd = perf_event_open(
642            &self.attrs,
643            self.pid,
644            self.cpu as i32,
645            self.group as i32,
646            self.flags,
647        ) as ::libc::c_int;
648        if fd < 0 {
649            return Err(Error::from_raw_os_error(-fd));
650        }
651
652        Ok(PerfCounter {
653            fd,
654            file: unsafe { File::from_raw_fd(fd) },
655            attributes: self.attrs,
656        })
657    }
658
659    /// Instantiate the performance counter.
660    pub fn finish(&self) -> Result<PerfCounter, io::Error> {
661        let fd = perf_event_open(
662            &self.attrs,
663            self.pid,
664            self.cpu as i32,
665            self.group as i32,
666            self.flags,
667        ) as ::libc::c_int;
668        if fd < 0 {
669            return Err(Error::from_raw_os_error(-fd));
670        }
671
672        Ok(PerfCounter {
673            fd,
674            file: unsafe { File::from_raw_fd(fd) },
675            attributes: self.attrs,
676        })
677    }
678}
679
680#[repr(C)]
681#[derive(Default, Debug)]
682pub struct FileReadFormat {
683    /// The value of the event
684    pub value: u64,
685    /// if PERF_FORMAT_TOTAL_TIME_ENABLED
686    pub time_enabled: u64,
687    /// if PERF_FORMAT_TOTAL_TIME_RUNNING
688    pub time_running: u64,
689    /// if PERF_FORMAT_ID
690    pub id: u64,
691}
692
693impl FileReadFormat {
694    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> FileReadFormat {
695        let value: u64 = read(ptr, 0);
696        let time_enabled: u64 = read(ptr, 8);
697        let time_running: u64 = read(ptr, 16);
698        let id: u64 = read(ptr, 24);
699
700        FileReadFormat {
701            value,
702            time_enabled,
703            time_running,
704            id,
705        }
706    }
707}
708
709#[repr(C)]
710pub struct MMAPPage {
711    /// version number of this structure
712    version: u32,
713    /// lowest version this is compat with
714    compat_version: u32,
715    /// seqlock for synchronization
716    lock: u32,
717    /// hardware counter identifier
718    index: u32,
719    /// add to hardware counter value
720    offset: i64,
721    /// time event active
722    time_enabled: u64,
723    /// time event on CPU
724    time_running: u64,
725    capabilities: u64,
726    pmc_width: u16,
727    time_shift: u16,
728    time_mult: u32,
729    time_offset: u64,
730    /// Pad to 1k
731    reserved: [u64; 120],
732    /// head in the data section
733    data_head: u64,
734    /// user-space written tail
735    data_tail: u64,
736}
737
738impl fmt::Debug for MMAPPage {
739    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
740        write!(f, "MMAPPage {{ version: {} compat_version: {} lock: {} index: {} offset: {} time_enabled: {} time_running: {} capabilities: {} pmc_width: {} time_shift: {} time_mult: {}  time_offset: {} data_head: {} data_tail: {} }}",
741            self.version, self.compat_version, self.lock,
742            self.index, self.offset, self.time_enabled, self.time_running,
743            self.capabilities, self.pmc_width, self.time_shift, self.time_mult,
744            self.time_offset, self.data_head, self.data_tail)
745    }
746}
747
748pub struct PerfCounter {
749    fd: ::libc::c_int,
750    file: File,
751    attributes: perf_format::EventAttr,
752}
753
754impl PerfCounter {
755    /// Read the file descriptor and parse the return format.
756    pub fn read_fd(&mut self) -> Result<FileReadFormat, io::Error> {
757        unsafe {
758            let mut value: FileReadFormat = Default::default();
759            let ptr = mem::transmute::<&mut FileReadFormat, &mut u8>(&mut value);
760            let slice = slice::from_raw_parts_mut::<u8>(ptr, mem::size_of::<FileReadFormat>());
761            self.file.read_exact(slice)?;
762            Ok(value)
763        }
764    }
765}
766
767impl<'a> AbstractPerfCounter for PerfCounter {
768    fn reset(&self) -> Result<(), io::Error> {
769        let ret = ioctl(self.fd, perf_event::PERF_EVENT_IOC_RESET, 0);
770        if ret == -1 {
771            return Err(Error::last_os_error());
772        }
773        Ok(())
774    }
775
776    fn start(&self) -> Result<(), io::Error> {
777        let ret = ioctl(self.fd, perf_event::PERF_EVENT_IOC_ENABLE, 0);
778        if ret == -1 {
779            return Err(Error::last_os_error());
780        }
781        Ok(())
782    }
783
784    fn stop(&self) -> Result<(), io::Error> {
785        let ret = ioctl(self.fd, perf_event::PERF_EVENT_IOC_DISABLE, 0);
786        if ret == -1 {
787            return Err(Error::last_os_error());
788        }
789        Ok(())
790    }
791
792    fn read(&mut self) -> Result<u64, io::Error> {
793        let value: FileReadFormat = self.read_fd()?;
794        return Ok(value.value);
795    }
796}
797
798pub struct SamplingPerfCounter {
799    pc: PerfCounter,
800    map: mmap::MemoryMap,
801    events_size: usize,
802}
803
804unsafe fn read<U: Copy>(ptr: *const u8, offset: isize) -> U {
805    let newptr = mem::transmute::<*const u8, *const U>(ptr.offset(offset));
806    ptr::read(newptr)
807}
808
809/*
810enum EventHeaderMisc {
811
812    /// Unknown CPU mode.
813    CPUMODE_UNKNOWN
814
815    /// Sample happened in the kernel.
816    KERNEL
817
818    /// Sample happened in user code.
819    USER
820
821    /// Sample happened in the hypervisor.
822    HYPERVISOR
823
824    /// Sample happened in the guest kernel.
825    GUEST_KERNEL
826
827    /// Sample happened in guest user code.
828    GUEST_USER
829
830
831    In addition, one of the following bits can be set:
832    MMAP_DATA
833           This is set when the mapping is not executable; otherwise the mapping is executable.
834
835    EXACT_IP
836           This indicates that the content of PERF_SAMPLE_IP points to the actual instruction  that  triggered  the  event.
837           See also perf_event_attr.precise_ip.
838
839    EXT_RESERVED
840           This indicates there is extended data available (currently not used).
841
842}*/
843
844#[derive(Default, Debug)]
845struct EventHeader {
846    event_type: u32,
847    misc: u16,
848    size: u16,
849}
850
851impl EventHeader {
852    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> EventHeader {
853        let event_type: u32 = read(ptr, 0);
854        let misc: u16 = read(ptr, 4);
855        let size: u16 = read(ptr, 6);
856        EventHeader {
857            event_type,
858            misc,
859            size,
860        }
861    }
862}
863
864/// The MMAP events record the PROT_EXEC mappings so that we can correlate user-space IPs to code.
865#[repr(C)]
866#[derive(Debug)]
867pub struct MMAPRecord {
868    header: EventHeader,
869    pid: u32,
870    tid: u32,
871    addr: u64,
872    len: u64,
873    pgoff: u64,
874    filename: String,
875}
876
877impl MMAPRecord {
878    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> MMAPRecord {
879        let header: EventHeader = EventHeader::copy_from_raw_ptr(ptr);
880        let pid: u32 = read(ptr, 8);
881        let tid: u32 = read(ptr, 12);
882        let addr: u64 = read(ptr, 16);
883        let len: u64 = read(ptr, 24);
884        let pgoff: u64 = read(ptr, 32);
885        let filename = {
886            let str_start = ptr.offset(40);
887            let strlen_ptr = mem::transmute::<*const u8, &i8>(str_start);
888            let length = strlen(strlen_ptr) as usize;
889            let slice = slice::from_raw_parts(str_start, length);
890            String::from(str::from_utf8(slice).unwrap())
891        };
892
893        MMAPRecord {
894            header,
895            pid,
896            tid,
897            addr,
898            len,
899            pgoff,
900            filename,
901        }
902    }
903}
904
905/// This record indicates when events are lost.
906#[derive(Debug)]
907pub struct LostRecord {
908    header: EventHeader,
909    /// Unique event ID of the samples that were lost.
910    id: u64,
911    /// The number of events that were lost.
912    lost: u64,
913}
914
915impl LostRecord {
916    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> LostRecord {
917        let header: EventHeader = EventHeader::copy_from_raw_ptr(ptr);
918        let id: u64 = read(ptr, 8);
919        let lost: u64 = read(ptr, 16);
920
921        LostRecord {
922            header,
923            id,
924            lost,
925        }
926    }
927}
928
929/// This record indicates a change in the process name.
930#[derive(Debug)]
931pub struct CommRecord {
932    header: EventHeader,
933    pid: u32,
934    tid: u32,
935    comm: String,
936}
937
938impl CommRecord {
939    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> CommRecord {
940        let header: EventHeader = EventHeader::copy_from_raw_ptr(ptr);
941        let pid: u32 = read(ptr, 8);
942        let tid: u32 = read(ptr, 12);
943
944        let comm = {
945            let str_start = ptr.offset(16);
946            let strlen_ptr = mem::transmute::<*const u8, &i8>(str_start);
947            let length = strlen(strlen_ptr) as usize;
948            let slice = slice::from_raw_parts(str_start, length);
949            String::from(str::from_utf8(slice).unwrap())
950        };
951        CommRecord {
952            header,
953            pid,
954            tid,
955            comm,
956        }
957    }
958}
959
960/// This record indicates a process exit event.
961#[derive(Debug)]
962pub struct ExitRecord {
963    header: EventHeader,
964    pid: u32,
965    ppid: u32,
966    tid: u32,
967    ptid: u32,
968    time: u64,
969}
970
971impl ExitRecord {
972    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> ExitRecord {
973        let header: EventHeader = EventHeader::copy_from_raw_ptr(ptr);
974        let pid: u32 = read(ptr, 8);
975        let ppid: u32 = read(ptr, 12);
976        let tid: u32 = read(ptr, 16);
977        let ptid: u32 = read(ptr, 20);
978        let time: u64 = read(ptr, 24);
979
980        ExitRecord {
981            header,
982            pid,
983            ppid,
984            tid,
985            ptid,
986            time,
987        }
988    }
989}
990
991/// This record indicates a throttle/unthrottle event.
992#[repr(C)]
993#[derive(Debug)]
994pub struct ThrottleRecord {
995    header: EventHeader,
996    time: u64,
997    id: u64,
998    stream_id: u64,
999}
1000
1001impl ThrottleRecord {
1002    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> ThrottleRecord {
1003        let header: EventHeader = EventHeader::copy_from_raw_ptr(ptr);
1004        let time: u64 = read(ptr, 8);
1005        let id: u64 = read(ptr, 16);
1006        let stream_id: u64 = read(ptr, 24);
1007
1008        ThrottleRecord {
1009            header,
1010            time,
1011            id,
1012            stream_id,
1013        }
1014    }
1015}
1016
1017/// This record indicates a fork event.
1018#[derive(Debug)]
1019pub struct ForkRecord {
1020    header: EventHeader,
1021    pid: u32,
1022    ppid: u32,
1023    tid: u32,
1024    ptid: u32,
1025    time: u64,
1026}
1027
1028impl ForkRecord {
1029    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> ForkRecord {
1030        let header: EventHeader = EventHeader::copy_from_raw_ptr(ptr);
1031        let pid: u32 = read(ptr, 8);
1032        let ppid: u32 = read(ptr, 12);
1033        let tid: u32 = read(ptr, 16);
1034        let ptid: u32 = read(ptr, 20);
1035        let time: u64 = read(ptr, 24);
1036
1037        ForkRecord {
1038            header,
1039            pid,
1040            ppid,
1041            tid,
1042            ptid,
1043            time,
1044        }
1045    }
1046}
1047
1048/// This record indicates a read event.
1049#[repr(C)]
1050#[derive(Debug)]
1051pub struct ReadRecord {
1052    header: EventHeader,
1053    pid: u32,
1054    tid: u32,
1055    value: FileReadFormat, // TODO with PERF_FORMAT_GROUP: values: Vec<FileReadFormat>
1056}
1057
1058impl ReadRecord {
1059    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> ReadRecord {
1060        let header: EventHeader = EventHeader::copy_from_raw_ptr(ptr);
1061        let pid: u32 = read(ptr, 8);
1062        let tid: u32 = read(ptr, 12);
1063        let frf: FileReadFormat = FileReadFormat::copy_from_raw_ptr(ptr.offset(16));
1064
1065        ReadRecord {
1066            header,
1067            pid,
1068            tid,
1069            value: frf,
1070        }
1071    }
1072}
1073
1074#[derive(Debug)]
1075struct BranchEntry {
1076    pub from: u64,
1077    pub to: u64,
1078    flags: u64,
1079}
1080
1081/// This record indicates a sample.
1082#[derive(Debug)]
1083pub struct SampleRecord {
1084    header: EventHeader,
1085    /// if PERF_SAMPLE_IP
1086    ip: u64,
1087    /// if PERF_SAMPLE_TID
1088    pid: u32,
1089    /// if PERF_SAMPLE_TID
1090    tid: u32,
1091    /// if PERF_SAMPLE_TIME
1092    time: u64,
1093    /// if PERF_SAMPLE_ADDR
1094    addr: u64,
1095    /// if PERF_SAMPLE_ID
1096    id: u64,
1097    /// if PERF_SAMPLE_STREAM_ID
1098    stream_id: u64,
1099    /// if PERF_SAMPLE_CPU
1100    cpu: u32,
1101    /// if PERF_SAMPLE_CPU
1102    res: u32,
1103    /// if PERF_SAMPLE_PERIOD
1104    period: u64,
1105
1106    /// if PERF_SAMPLE_READ
1107    /// # TODO
1108    /// FILE GROUP FORMAT is different...
1109    v: FileReadFormat,
1110
1111    //u64   nr;         /* if PERF_SAMPLE_CALLCHAIN */
1112    //u64   ips[nr];    /* if PERF_SAMPLE_CALLCHAIN */
1113    ips: Vec<u64>,
1114
1115    /// u32   size;       /* if PERF_SAMPLE_RAW */
1116    /// char  data[size]; /* if PERF_SAMPLE_RAW */
1117    raw_sample: Vec<u8>,
1118
1119    /// u64   bnr;        /* if PERF_SAMPLE_BRANCH_STACK */
1120    /// struct perf_branch_entry lbr[bnr];
1121    lbr: Vec<BranchEntry>,
1122
1123    /// u64   abi;        /* if PERF_SAMPLE_REGS_USER */
1124    abi: u64,
1125
1126    ///  u64   regs[weight(mask)];
1127    /// if PERF_SAMPLE_REGS_USER
1128    regs: Vec<u64>,
1129
1130    /// u64   size;       /* if PERF_SAMPLE_STACK_USER */
1131    /// char  data[size]; /* if PERF_SAMPLE_STACK_USER */
1132    user_stack: Vec<u8>,
1133
1134    /// u64   dyn_size;   /* if PERF_SAMPLE_STACK_USER */
1135    dyn_size: u64,
1136    /// u64   weight;     /* if PERF_SAMPLE_WEIGHT */
1137    weight: u64,
1138    /// u64   data_src;   /* if PERF_SAMPLE_DATA_SRC */
1139    data_str: u64,
1140}
1141
1142impl SampleRecord {
1143    unsafe fn copy_from_raw_ptr(ptr: *const u8) -> SampleRecord {
1144        let header: EventHeader = EventHeader::copy_from_raw_ptr(ptr);
1145        let ip: u64 = read(ptr, 8);
1146        let pid: u32 = read(ptr, 16);
1147        let tid: u32 = read(ptr, 20);
1148        let time: u64 = read(ptr, 24);
1149        let addr: u64 = read(ptr, 32);
1150        let id: u64 = read(ptr, 40);
1151        let stream_id: u64 = read(ptr, 48);
1152        let cpu: u32 = read(ptr, 52);
1153        let res: u32 = read(ptr, 56);
1154        let period: u64 = read(ptr, 64);
1155
1156        // TODO:
1157        let v: FileReadFormat = FileReadFormat::copy_from_raw_ptr(ptr.offset(72));
1158        let ips: Vec<u64> = Vec::new();
1159        let raw_sample: Vec<u8> = Vec::new();
1160        let lbr: Vec<BranchEntry> = Vec::new();
1161        let abi: u64 = 0;
1162        let regs: Vec<u64> = Vec::new();
1163        let user_stack: Vec<u8> = Vec::new();
1164        let dyn_size: u64 = 0;
1165        let weight: u64 = 0;
1166        let data_str: u64 = 0;
1167
1168        SampleRecord {
1169            header,
1170            ip,
1171            pid,
1172            tid,
1173            time,
1174            addr,
1175            id,
1176            stream_id,
1177            cpu,
1178            res,
1179            period,
1180            v,
1181            ips,
1182            raw_sample,
1183            lbr,
1184            abi,
1185            regs,
1186            user_stack,
1187            dyn_size,
1188            weight,
1189            data_str,
1190        }
1191    }
1192}
1193
1194#[derive(Debug)]
1195pub enum Event {
1196    MMAP(MMAPRecord),
1197    Lost(LostRecord),
1198    Comm(CommRecord),
1199    Exit(ExitRecord),
1200    Throttle(ThrottleRecord),
1201    Unthrottle(ThrottleRecord),
1202    Fork(ForkRecord),
1203    Read(ReadRecord),
1204    Sample(SampleRecord),
1205}
1206
1207impl Iterator for SamplingPerfCounter {
1208    type Item = Event;
1209
1210    /// Iterate over the event buffer.
1211    ///
1212    /// We copy and transform the events for two reasons:
1213    ///  * The exposed C struct layout would be difficult to read with request.
1214    ///  * We need to advance the tail pointer to make space for new events.
1215    fn next(&mut self) -> Option<Event> {
1216        if self.header().data_tail < self.header().data_head {
1217            let offset: isize = (self.header().data_tail as usize % self.events_size) as isize;
1218
1219            let mut bytes_read = 0;
1220            let event_ptr = unsafe { self.events().offset(offset) };
1221            let event: EventHeader = unsafe { EventHeader::copy_from_raw_ptr(event_ptr) };
1222            bytes_read += mem::size_of::<EventHeader>() as u64;
1223
1224            let record = match event.event_type {
1225                perf_event::PERF_RECORD_MMAP => {
1226                    let record: MMAPRecord = unsafe { MMAPRecord::copy_from_raw_ptr(event_ptr) };
1227                    Some(Event::MMAP(record))
1228                }
1229                perf_event::PERF_RECORD_LOST => {
1230                    let record: LostRecord = unsafe { LostRecord::copy_from_raw_ptr(event_ptr) };
1231                    Some(Event::Lost(record))
1232                }
1233                perf_event::PERF_RECORD_COMM => {
1234                    let record: CommRecord = unsafe { CommRecord::copy_from_raw_ptr(event_ptr) };
1235                    Some(Event::Comm(record))
1236                }
1237                perf_event::PERF_RECORD_EXIT => {
1238                    let record: ExitRecord = unsafe { ExitRecord::copy_from_raw_ptr(event_ptr) };
1239                    Some(Event::Exit(record))
1240                }
1241                perf_event::PERF_RECORD_THROTTLE => {
1242                    let record: ThrottleRecord =
1243                        unsafe { ThrottleRecord::copy_from_raw_ptr(event_ptr) };
1244                    Some(Event::Throttle(record))
1245                }
1246                perf_event::PERF_RECORD_UNTHROTTLE => {
1247                    let record: ThrottleRecord =
1248                        unsafe { ThrottleRecord::copy_from_raw_ptr(event_ptr) };
1249                    Some(Event::Unthrottle(record))
1250                }
1251                perf_event::PERF_RECORD_FORK => {
1252                    let record: ForkRecord = unsafe { ForkRecord::copy_from_raw_ptr(event_ptr) };
1253                    Some(Event::Fork(record))
1254                }
1255                perf_event::PERF_RECORD_READ => {
1256                    let record: ReadRecord = unsafe { ReadRecord::copy_from_raw_ptr(event_ptr) };
1257                    Some(Event::Read(record))
1258                }
1259                perf_event::PERF_RECORD_SAMPLE => {
1260                    let record: SampleRecord =
1261                        unsafe { SampleRecord::copy_from_raw_ptr(event_ptr) };
1262                    Some(Event::Sample(record))
1263                }
1264                perf_event::PERF_RECORD_MMAP2 => {
1265                    // XXX: Not described in the man page?
1266                    unreachable!();
1267                }
1268                _ => {
1269                    panic!("Unknown type!");
1270                }
1271            };
1272
1273            //bytes_read += size;
1274
1275            let header = self.mut_header();
1276            header.data_tail = bytes_read;
1277
1278            record
1279        } else {
1280            None
1281        }
1282    }
1283}
1284
1285impl SamplingPerfCounter {
1286    pub fn new(pc: PerfCounter) -> SamplingPerfCounter {
1287        let size = (1 + 16) * 4096;
1288        let res: mmap::MemoryMap = mmap::MemoryMap::new(
1289            size,
1290            &[
1291                mmap::MapOption::MapFd(pc.fd),
1292                mmap::MapOption::MapOffset(0),
1293                mmap::MapOption::MapNonStandardFlags(MAP_SHARED),
1294                mmap::MapOption::MapReadable,
1295            ],
1296        )
1297        .unwrap();
1298
1299        SamplingPerfCounter {
1300            pc,
1301            map: res,
1302            events_size: 16 * 4096,
1303        }
1304    }
1305
1306    fn header(&self) -> &MMAPPage {
1307        unsafe { mem::transmute::<*mut u8, &MMAPPage>(self.map.data()) }
1308    }
1309
1310    fn mut_header(&mut self) -> &mut MMAPPage {
1311        unsafe { mem::transmute::<*mut u8, &mut MMAPPage>(self.map.data()) }
1312    }
1313
1314    fn events(&self) -> *const u8 {
1315        unsafe { self.map.data().offset(4096) }
1316    }
1317
1318    pub fn print(&mut self) {
1319        let event: Event = self.next().unwrap();
1320        println!("{:?}", event);
1321        match event {
1322            Event::MMAP(a) => println!("{:?}", a.filename),
1323            Event::Lost(a) => println!("{:?}", a),
1324            Event::Comm(a) => println!("{:?}", a),
1325            Event::Exit(a) => println!("{:?}", a),
1326            Event::Throttle(a) => println!("{:?}", a),
1327            Event::Unthrottle(a) => println!("{:?}", a),
1328            Event::Fork(a) => println!("{:?}", a),
1329            Event::Read(a) => println!("{:?}", a),
1330            Event::Sample(a) => println!("{:?}", a),
1331        }
1332    }
1333}