linux_perf_event_reader/
event_record.rs

1use crate::raw_data::RawData;
2use crate::utils::HexValue;
3use crate::{
4    constants, CommonData, CpuMode, Endianness, RecordIdParseInfo, RecordParseInfo, RecordType,
5    SampleRecord,
6};
7use byteorder::{BigEndian, ByteOrder, LittleEndian};
8use std::fmt;
9
10/// Get the ID from an event record, if the sample format includes SampleFormat::IDENTIFIER.
11///
12/// This can be used if it is not known which `perf_event_attr` describes this record,
13/// but only if all potential attrs include `PERF_SAMPLE_IDENTIFIER`.
14/// Once the record's ID is known, this ID can be mapped to the right attr,
15/// and then the information from the attr can be used to parse the rest of this record.
16pub fn get_record_identifier<T: ByteOrder>(
17    record_type: RecordType,
18    mut data: RawData,
19    sample_id_all: bool,
20) -> Option<u64> {
21    if record_type.is_user_type() {
22        None
23    } else if record_type == RecordType::SAMPLE {
24        // if IDENTIFIER is set, every SAMPLE record starts with the event ID.
25        data.read_u64::<T>().ok()
26    } else if sample_id_all {
27        // if IDENTIFIER and SAMPLE_ID_ALL are set, every non-SAMPLE record ends with the event ID.
28        let id_offset_from_start = data.len().checked_sub(8)?;
29        data.skip(id_offset_from_start).ok()?;
30        data.read_u64::<T>().ok()
31    } else {
32        None
33    }
34}
35
36/// Get the ID from an event record, with the help of `RecordIdParseInfo`.
37///
38/// This can be used if it is not known which `perf_event_attr` describes this record,
39/// but only if all potential attrs have the same `RecordIdParseInfo`.
40/// Once the record's ID is known, this ID can be mapped to the right attr,
41/// and then the information from the attr can be used to parse the rest of this record.
42pub fn get_record_id<T: ByteOrder>(
43    record_type: RecordType,
44    mut data: RawData,
45    parse_info: &RecordIdParseInfo,
46) -> Option<u64> {
47    if record_type.is_user_type() {
48        return None;
49    }
50
51    if record_type == RecordType::SAMPLE {
52        if let Some(id_offset_from_start) = parse_info.sample_record_id_offset_from_start {
53            data.skip(id_offset_from_start as usize).ok()?;
54            data.read_u64::<T>().ok()
55        } else {
56            None
57        }
58    } else if let Some(id_offset_from_end) = parse_info.nonsample_record_id_offset_from_end {
59        let id_offset_from_start = data.len().checked_sub(id_offset_from_end as usize)?;
60        data.skip(id_offset_from_start).ok()?;
61        data.read_u64::<T>().ok()
62    } else {
63        None
64    }
65}
66
67/// Get the timestamp from an event record, with the help of `RecordParseInfo`.
68///
69/// This can be used for record sorting, without having to wrap the record into
70/// a `RawRecord`.o
71pub fn get_record_timestamp<T: ByteOrder>(
72    record_type: RecordType,
73    mut data: RawData,
74    parse_info: &RecordParseInfo,
75) -> Option<u64> {
76    if record_type.is_user_type() {
77        return None;
78    }
79
80    if record_type == RecordType::SAMPLE {
81        if let Some(time_offset_from_start) = parse_info.sample_record_time_offset_from_start {
82            data.skip(time_offset_from_start as usize).ok()?;
83            data.read_u64::<T>().ok()
84        } else {
85            None
86        }
87    } else if let Some(time_offset_from_end) = parse_info.nonsample_record_time_offset_from_end {
88        let time_offset_from_start = data.len().checked_sub(time_offset_from_end as usize)?;
89        data.skip(time_offset_from_start).ok()?;
90        data.read_u64::<T>().ok()
91    } else {
92        None
93    }
94}
95
96/// A fully parsed event record.
97#[derive(Debug, Clone, PartialEq, Eq)]
98#[allow(clippy::large_enum_variant)]
99#[non_exhaustive]
100pub enum EventRecord<'a> {
101    Sample(SampleRecord<'a>),
102    Comm(CommOrExecRecord<'a>),
103    Exit(ForkOrExitRecord),
104    Fork(ForkOrExitRecord),
105    Mmap(MmapRecord<'a>),
106    Mmap2(Mmap2Record<'a>),
107    Lost(LostRecord),
108    Throttle(ThrottleRecord),
109    Unthrottle(ThrottleRecord),
110    ContextSwitch(ContextSwitchRecord),
111    Raw(RawEventRecord<'a>),
112}
113
114#[derive(Debug, Clone, PartialEq, Eq)]
115pub struct ForkOrExitRecord {
116    pub pid: i32,
117    pub ppid: i32,
118    pub tid: i32,
119    pub ptid: i32,
120    pub timestamp: u64,
121}
122
123impl ForkOrExitRecord {
124    pub fn parse<T: ByteOrder>(data: RawData) -> Result<Self, std::io::Error> {
125        let mut cur = data;
126
127        let pid = cur.read_i32::<T>()?;
128        let ppid = cur.read_i32::<T>()?;
129        let tid = cur.read_i32::<T>()?;
130        let ptid = cur.read_i32::<T>()?;
131        let timestamp = cur.read_u64::<T>()?;
132
133        Ok(Self {
134            pid,
135            ppid,
136            tid,
137            ptid,
138            timestamp,
139        })
140    }
141}
142
143#[derive(Clone, PartialEq, Eq)]
144pub struct CommOrExecRecord<'a> {
145    pub pid: i32,
146    pub tid: i32,
147    pub name: RawData<'a>,
148    pub is_execve: bool,
149}
150
151impl<'a> CommOrExecRecord<'a> {
152    pub fn parse<T: ByteOrder>(data: RawData<'a>, misc: u16) -> Result<Self, std::io::Error> {
153        let mut cur = data;
154        let pid = cur.read_i32::<T>()?;
155        let tid = cur.read_i32::<T>()?;
156        let name = cur.read_string().unwrap_or(cur); // TODO: return error if no string terminator found
157
158        // TODO: Maybe feature-gate this on 3.16+
159        let is_execve = misc & constants::PERF_RECORD_MISC_COMM_EXEC != 0;
160
161        Ok(Self {
162            pid,
163            tid,
164            name,
165            is_execve,
166        })
167    }
168}
169
170impl fmt::Debug for CommOrExecRecord<'_> {
171    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
172        use std::str;
173
174        let mut map = fmt.debug_map();
175        map.entry(&"pid", &self.pid).entry(&"tid", &self.tid);
176
177        if let Ok(string) = str::from_utf8(&self.name.as_slice()) {
178            map.entry(&"name", &string);
179        } else {
180            map.entry(&"name", &self.name);
181        }
182
183        map.entry(&"is_execve", &self.is_execve);
184        map.finish()
185    }
186}
187
188/// These aren't emitted by the kernel any more - the kernel uses MMAP2 events
189/// these days.
190/// However, `perf record` still emits synthetic MMAP events (not MMAP2!) for
191/// the kernel image. So if you want to symbolicate kernel addresses you still
192/// need to process these.
193/// The kernel image MMAP events have pid -1.
194#[derive(Clone, PartialEq, Eq)]
195pub struct MmapRecord<'a> {
196    pub pid: i32,
197    pub tid: i32,
198    pub address: u64,
199    pub length: u64,
200    pub page_offset: u64,
201    pub is_executable: bool,
202    pub cpu_mode: CpuMode,
203    pub path: RawData<'a>,
204}
205
206impl<'a> MmapRecord<'a> {
207    pub fn parse<T: ByteOrder>(data: RawData<'a>, misc: u16) -> Result<Self, std::io::Error> {
208        let mut cur = data;
209
210        // struct {
211        //   struct perf_event_header header;
212        //
213        //   u32 pid, tid;
214        //   u64 addr;
215        //   u64 len;
216        //   u64 pgoff;
217        //   char filename[];
218        //   struct sample_id sample_id;
219        // };
220
221        let pid = cur.read_i32::<T>()?;
222        let tid = cur.read_i32::<T>()?;
223        let address = cur.read_u64::<T>()?;
224        let length = cur.read_u64::<T>()?;
225        let page_offset = cur.read_u64::<T>()?;
226        let path = cur.read_string().unwrap_or(cur); // TODO: return error if no string terminator found
227        let is_executable = misc & constants::PERF_RECORD_MISC_MMAP_DATA == 0;
228
229        Ok(MmapRecord {
230            pid,
231            tid,
232            address,
233            length,
234            page_offset,
235            is_executable,
236            cpu_mode: CpuMode::from_misc(misc),
237            path,
238        })
239    }
240}
241
242impl fmt::Debug for MmapRecord<'_> {
243    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
244        fmt.debug_map()
245            .entry(&"pid", &self.pid)
246            .entry(&"tid", &self.tid)
247            .entry(&"address", &HexValue(self.address))
248            .entry(&"length", &HexValue(self.length))
249            .entry(&"page_offset", &HexValue(self.page_offset))
250            .entry(&"cpu_mode", &self.cpu_mode)
251            .entry(&"path", &&*String::from_utf8_lossy(&self.path.as_slice()))
252            .finish()
253    }
254}
255
256#[derive(Debug, Clone, PartialEq, Eq)]
257pub enum Mmap2FileId {
258    InodeAndVersion(Mmap2InodeAndVersion),
259    BuildId(Vec<u8>),
260}
261
262#[derive(Clone, PartialEq, Eq)]
263pub struct Mmap2Record<'a> {
264    pub pid: i32,
265    pub tid: i32,
266    pub address: u64,
267    pub length: u64,
268    pub page_offset: u64,
269    pub file_id: Mmap2FileId,
270    pub protection: u32,
271    pub flags: u32,
272    pub cpu_mode: CpuMode,
273    pub path: RawData<'a>,
274}
275
276impl<'a> Mmap2Record<'a> {
277    pub fn parse<T: ByteOrder>(data: RawData<'a>, misc: u16) -> Result<Self, std::io::Error> {
278        let mut cur = data;
279
280        let pid = cur.read_i32::<T>()?;
281        let tid = cur.read_i32::<T>()?;
282        let address = cur.read_u64::<T>()?;
283        let length = cur.read_u64::<T>()?;
284        let page_offset = cur.read_u64::<T>()?;
285        let file_id = if misc & constants::PERF_RECORD_MISC_MMAP_BUILD_ID != 0 {
286            let build_id_len = cur.read_u8()?;
287            assert!(build_id_len <= 20);
288            let _align = cur.read_u8()?;
289            let _align = cur.read_u16::<T>()?;
290            let mut build_id_bytes = [0; 20];
291            cur.read_exact(&mut build_id_bytes)?;
292            Mmap2FileId::BuildId(build_id_bytes[..build_id_len as usize].to_owned())
293        } else {
294            let major = cur.read_u32::<T>()?;
295            let minor = cur.read_u32::<T>()?;
296            let inode = cur.read_u64::<T>()?;
297            let inode_generation = cur.read_u64::<T>()?;
298            Mmap2FileId::InodeAndVersion(Mmap2InodeAndVersion {
299                major,
300                minor,
301                inode,
302                inode_generation,
303            })
304        };
305        let protection = cur.read_u32::<T>()?;
306        let flags = cur.read_u32::<T>()?;
307        let path = cur.read_string().unwrap_or(cur); // TODO: return error if no string terminator found
308
309        Ok(Mmap2Record {
310            pid,
311            tid,
312            address,
313            length,
314            page_offset,
315            file_id,
316            protection,
317            flags,
318            cpu_mode: CpuMode::from_misc(misc),
319            path,
320        })
321    }
322}
323
324impl fmt::Debug for Mmap2Record<'_> {
325    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
326        fmt.debug_map()
327            .entry(&"pid", &self.pid)
328            .entry(&"tid", &self.tid)
329            .entry(&"address", &HexValue(self.address))
330            .entry(&"length", &HexValue(self.length))
331            .entry(&"page_offset", &HexValue(self.page_offset))
332            // .entry(&"major", &self.major)
333            // .entry(&"minor", &self.minor)
334            // .entry(&"inode", &self.inode)
335            // .entry(&"inode_generation", &self.inode_generation)
336            .entry(&"protection", &HexValue(self.protection as _))
337            .entry(&"flags", &HexValue(self.flags as _))
338            .entry(&"cpu_mode", &self.cpu_mode)
339            .entry(&"path", &&*String::from_utf8_lossy(&self.path.as_slice()))
340            .finish()
341    }
342}
343
344#[derive(Debug, Clone, PartialEq, Eq)]
345pub struct Mmap2InodeAndVersion {
346    pub major: u32,
347    pub minor: u32,
348    pub inode: u64,
349    pub inode_generation: u64,
350}
351
352#[derive(Debug, Clone, PartialEq, Eq)]
353pub struct LostRecord {
354    pub id: u64,
355    pub count: u64,
356}
357
358impl LostRecord {
359    pub fn parse<T: ByteOrder>(data: RawData) -> Result<Self, std::io::Error> {
360        let mut cur = data;
361
362        let id = cur.read_u64::<T>()?;
363        let count = cur.read_u64::<T>()?;
364        Ok(LostRecord { id, count })
365    }
366}
367
368#[derive(Debug, Clone, PartialEq, Eq)]
369pub struct ThrottleRecord {
370    pub id: u64,
371    pub timestamp: u64,
372}
373
374impl ThrottleRecord {
375    pub fn parse<T: ByteOrder>(data: RawData) -> Result<Self, std::io::Error> {
376        let mut cur = data;
377
378        let timestamp = cur.read_u64::<T>()?;
379        let id = cur.read_u64::<T>()?;
380        Ok(ThrottleRecord { id, timestamp })
381    }
382}
383
384#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
385pub enum ContextSwitchRecord {
386    In {
387        prev_pid: Option<i32>,
388        prev_tid: Option<i32>,
389    },
390    Out {
391        next_pid: Option<i32>,
392        next_tid: Option<i32>,
393        preempted: TaskWasPreempted,
394    },
395}
396
397impl ContextSwitchRecord {
398    pub fn from_misc(misc: u16) -> Self {
399        Self::from_misc_pid_tid(misc, None, None)
400    }
401
402    pub fn parse_cpu_wide<T: ByteOrder>(data: RawData, misc: u16) -> Result<Self, std::io::Error> {
403        let mut cur = data;
404
405        let pid = cur.read_i32::<T>()?;
406        let tid = cur.read_i32::<T>()?;
407        Ok(Self::from_misc_pid_tid(misc, Some(pid), Some(tid)))
408    }
409
410    pub fn from_misc_pid_tid(misc: u16, pid: Option<i32>, tid: Option<i32>) -> Self {
411        let is_out = misc & constants::PERF_RECORD_MISC_SWITCH_OUT != 0;
412        if is_out {
413            let is_out_preempt = misc & constants::PERF_RECORD_MISC_SWITCH_OUT_PREEMPT != 0;
414            ContextSwitchRecord::Out {
415                next_pid: pid,
416                next_tid: tid,
417                preempted: if is_out_preempt {
418                    TaskWasPreempted::Yes
419                } else {
420                    TaskWasPreempted::No
421                },
422            }
423        } else {
424            ContextSwitchRecord::In {
425                prev_pid: pid,
426                prev_tid: tid,
427            }
428        }
429    }
430}
431
432/// Whether a task was in the `TASK_RUNNING` state when it was switched
433/// away from.
434///
435/// This helps understanding whether a workload is CPU or IO bound.
436#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
437pub enum TaskWasPreempted {
438    /// When switching out, the task was in the `TASK_RUNNING` state.
439    Yes,
440    /// When switching out, the task was in a non-running state.
441    No,
442}
443
444/// An unparsed event record.
445///
446/// This can be converted into a parsed record by calling `.parse()`.
447///
448/// The raw record also provides access to "common data" like the ID, timestamp,
449/// tid etc., i.e. the information that was requested with [`SampleFormat`](crate::SampleFormat) and
450/// [`AttrFlags::SAMPLE_ID_ALL`](crate::AttrFlags::SAMPLE_ID_ALL).
451#[derive(Clone, PartialEq, Eq)]
452pub struct RawEventRecord<'a> {
453    /// The record type. Must be a builtin type, i.e. not a user type.
454    pub record_type: RecordType,
455    /// The `misc` value on this record.
456    pub misc: u16,
457    /// The raw bytes in the body of this record.
458    pub data: RawData<'a>,
459    /// The parse info from our corresponding evnt's attr.
460    pub parse_info: RecordParseInfo,
461}
462
463impl<'a> RawEventRecord<'a> {
464    /// Create a new `RawEventRecord`. Must only be called if `record_type.is_builtin_type()` is `true`.
465    pub fn new(
466        record_type: RecordType,
467        misc: u16,
468        data: RawData<'a>,
469        parse_info: RecordParseInfo,
470    ) -> Self {
471        Self {
472            record_type,
473            misc,
474            data,
475            parse_info,
476        }
477    }
478
479    /// Parse "common data" on this record, see [`CommonData`].
480    ///
481    /// The available information is determined by the event attr, specifically
482    /// by the requested [`SampleFormat`](crate::SampleFormat) and by the
483    /// presence of the [`AttrFlags::SAMPLE_ID_ALL`](crate::AttrFlags::SAMPLE_ID_ALL)
484    /// flag: The `SampleFormat` determines the available fields, and the
485    /// `SAMPLE_ID_ALL` flag determines the record types on which these fields
486    /// are available. If `SAMPLE_ID_ALL` is set, the requested fields are
487    /// available on all records, otherwise only on sample records
488    /// ([`RecordType::SAMPLE`]).
489    pub fn common_data(&self) -> Result<CommonData, std::io::Error> {
490        if self.record_type.is_user_type() {
491            return Ok(Default::default());
492        }
493
494        if self.record_type == RecordType::SAMPLE {
495            CommonData::parse_sample(self.data, &self.parse_info)
496        } else {
497            CommonData::parse_nonsample(self.data, &self.parse_info)
498        }
499    }
500
501    /// The record timestamp, if available.
502    pub fn timestamp(&self) -> Option<u64> {
503        match self.parse_info.endian {
504            Endianness::LittleEndian => self.timestamp_impl::<LittleEndian>(),
505            Endianness::BigEndian => self.timestamp_impl::<BigEndian>(),
506        }
507    }
508
509    fn timestamp_impl<T: ByteOrder>(&self) -> Option<u64> {
510        get_record_timestamp::<T>(self.record_type, self.data, &self.parse_info)
511    }
512
513    /// The ID, if available.
514    pub fn id(&self) -> Option<u64> {
515        match self.parse_info.endian {
516            Endianness::LittleEndian => self.id_impl::<LittleEndian>(),
517            Endianness::BigEndian => self.id_impl::<BigEndian>(),
518        }
519    }
520
521    fn id_impl<T: ByteOrder>(&self) -> Option<u64> {
522        get_record_id::<T>(self.record_type, self.data, &self.parse_info.id_parse_info)
523    }
524
525    /// Parses this raw record into an [`EventRecord`].
526    pub fn parse(&self) -> Result<EventRecord<'a>, std::io::Error> {
527        match self.parse_info.endian {
528            Endianness::LittleEndian => self.parse_impl::<LittleEndian>(),
529            Endianness::BigEndian => self.parse_impl::<BigEndian>(),
530        }
531    }
532
533    fn parse_impl<T: ByteOrder>(&self) -> Result<EventRecord<'a>, std::io::Error> {
534        let parse_info = &self.parse_info;
535        let event = match self.record_type {
536            // Kernel built-in record types
537            RecordType::MMAP => EventRecord::Mmap(MmapRecord::parse::<T>(self.data, self.misc)?),
538            RecordType::LOST => EventRecord::Lost(LostRecord::parse::<T>(self.data)?),
539            RecordType::COMM => {
540                EventRecord::Comm(CommOrExecRecord::parse::<T>(self.data, self.misc)?)
541            }
542            RecordType::EXIT => EventRecord::Exit(ForkOrExitRecord::parse::<T>(self.data)?),
543            RecordType::THROTTLE => EventRecord::Throttle(ThrottleRecord::parse::<T>(self.data)?),
544            RecordType::UNTHROTTLE => {
545                EventRecord::Unthrottle(ThrottleRecord::parse::<T>(self.data)?)
546            }
547            RecordType::FORK => EventRecord::Fork(ForkOrExitRecord::parse::<T>(self.data)?),
548            // READ
549            RecordType::SAMPLE => {
550                EventRecord::Sample(SampleRecord::parse::<T>(self.data, self.misc, parse_info)?)
551            }
552            RecordType::MMAP2 => EventRecord::Mmap2(Mmap2Record::parse::<T>(self.data, self.misc)?),
553            // AUX
554            // ITRACE_START
555            // LOST_SAMPLES
556            RecordType::SWITCH => {
557                EventRecord::ContextSwitch(ContextSwitchRecord::from_misc(self.misc))
558            }
559            RecordType::SWITCH_CPU_WIDE => EventRecord::ContextSwitch(
560                ContextSwitchRecord::parse_cpu_wide::<T>(self.data, self.misc)?,
561            ),
562            // NAMESPACES
563            // KSYMBOL
564            // BPF_EVENT
565            // CGROUP
566            // TEXT_POKE
567            // AUX_OUTPUT_HW_ID
568            _ => EventRecord::Raw(self.clone()),
569        };
570        Ok(event)
571    }
572}
573
574impl fmt::Debug for RawEventRecord<'_> {
575    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
576        fmt.debug_map()
577            .entry(&"record_type", &self.record_type)
578            .entry(&"misc", &self.misc)
579            .entry(&"data.len", &self.data.len())
580            .finish()
581    }
582}