linux_perf_data/
file_reader.rs

1use byteorder::{BigEndian, ByteOrder, LittleEndian};
2use linear_map::LinearMap;
3use linux_perf_event_reader::{
4    get_record_id, get_record_identifier, get_record_timestamp, AttrFlags, Endianness,
5    PerfEventHeader, RawData, RawEventRecord, RecordIdParseInfo, RecordParseInfo, RecordType,
6    SampleFormat,
7};
8
9use std::collections::{HashMap, VecDeque};
10use std::io::{Cursor, Read, Seek, SeekFrom};
11
12use super::error::{Error, ReadError};
13use super::feature_sections::AttributeDescription;
14use super::features::Feature;
15use super::header::PerfHeader;
16use super::perf_file::PerfFile;
17use super::record::{PerfFileRecord, RawUserRecord, UserRecordType};
18use super::section::PerfFileSection;
19use super::simpleperf;
20use super::sorter::Sorter;
21
22/// A parser for the perf.data file format.
23///
24/// # Example
25///
26/// ```
27/// use linux_perf_data::{AttributeDescription, PerfFileReader, PerfFileRecord};
28///
29/// # fn wrapper() -> Result<(), linux_perf_data::Error> {
30/// let file = std::fs::File::open("perf.data")?;
31/// let reader = std::io::BufReader::new(file);
32/// let PerfFileReader { mut perf_file, mut record_iter } = PerfFileReader::parse_file(reader)?;
33/// let event_names: Vec<_> =
34///     perf_file.event_attributes().iter().filter_map(AttributeDescription::name).collect();
35/// println!("perf events: {}", event_names.join(", "));
36///
37/// while let Some(record) = record_iter.next_record(&mut perf_file)? {
38///     match record {
39///         PerfFileRecord::EventRecord { attr_index, record } => {
40///             let record_type = record.record_type;
41///             let parsed_record = record.parse()?;
42///             println!("{:?} for event {}: {:?}", record_type, attr_index, parsed_record);
43///         }
44///         PerfFileRecord::UserRecord(record) => {
45///             let record_type = record.record_type;
46///             let parsed_record = record.parse()?;
47///             println!("{:?}: {:?}", record_type, parsed_record);
48///         }
49///     }
50/// }
51/// # Ok(())
52/// # }
53/// ```
54pub struct PerfFileReader<R: Read> {
55    pub perf_file: PerfFile,
56    pub record_iter: PerfRecordIter<R>,
57}
58
59impl<C: Read + Seek> PerfFileReader<C> {
60    pub fn parse_file(mut cursor: C) -> Result<Self, Error> {
61        let header = PerfHeader::parse(&mut cursor)?;
62        match &header.magic {
63            b"PERFILE2" => {
64                Self::parse_file_impl::<LittleEndian>(cursor, header, Endianness::LittleEndian)
65            }
66            b"2ELIFREP" => {
67                Self::parse_file_impl::<BigEndian>(cursor, header, Endianness::BigEndian)
68            }
69            _ => Err(Error::UnrecognizedMagicValue(header.magic)),
70        }
71    }
72
73    fn parse_file_impl<T>(
74        mut cursor: C,
75        header: PerfHeader,
76        endian: Endianness,
77    ) -> Result<Self, Error>
78    where
79        T: ByteOrder,
80    {
81        // Read the section information for each feature, starting just after the data section.
82        let feature_pos = header.data_section.offset + header.data_section.size;
83        cursor.seek(SeekFrom::Start(feature_pos))?;
84        let mut feature_sections_info = Vec::new();
85        for feature in header.features.iter() {
86            let section = PerfFileSection::parse::<_, T>(&mut cursor)?;
87            feature_sections_info.push((feature, section));
88        }
89
90        let mut feature_sections = LinearMap::new();
91        for (feature, section) in feature_sections_info {
92            let offset = section.offset;
93            let size = usize::try_from(section.size).map_err(|_| Error::SectionSizeTooBig)?;
94            let mut data = vec![0; size];
95            cursor.seek(SeekFrom::Start(offset))?;
96            cursor.read_exact(&mut data)?;
97            feature_sections.insert(feature, data);
98        }
99
100        let attributes =
101            if let Some(event_desc_section) = feature_sections.get(&Feature::EVENT_DESC) {
102                AttributeDescription::parse_event_desc_section::<_, T>(Cursor::new(
103                    &event_desc_section[..],
104                ))?
105            } else if header.event_types_section.size != 0 {
106                AttributeDescription::parse_event_types_section::<_, T>(
107                    &mut cursor,
108                    &header.event_types_section,
109                    header.attr_size,
110                )?
111            } else if let Some(simpleperf_meta_info) =
112                feature_sections.get(&Feature::SIMPLEPERF_META_INFO)
113            {
114                let info_map = simpleperf::parse_meta_info_map(&simpleperf_meta_info[..])?;
115                let event_types = simpleperf::get_event_types(&info_map)
116                    .ok_or(Error::NoEventTypesInSimpleperfMetaInfo)?;
117                AttributeDescription::parse_simpleperf_attr_section::<_, T>(
118                    &mut cursor,
119                    &header.attr_section,
120                    header.attr_size,
121                    &event_types,
122                )?
123            } else {
124                AttributeDescription::parse_attr_section::<_, T>(
125                    &mut cursor,
126                    &header.attr_section,
127                    header.attr_size,
128                )?
129            };
130
131        let mut event_id_to_attr_index = HashMap::new();
132        for (attr_index, AttributeDescription { event_ids, .. }) in attributes.iter().enumerate() {
133            for event_id in event_ids {
134                event_id_to_attr_index.insert(*event_id, attr_index);
135            }
136        }
137
138        let parse_infos: Vec<_> = attributes
139            .iter()
140            .map(|attr| RecordParseInfo::new(&attr.attr, endian))
141            .collect();
142
143        let first_attr = attributes.first().ok_or(Error::NoAttributes)?;
144
145        let first_has_sample_id_all = first_attr.attr.flags.contains(AttrFlags::SAMPLE_ID_ALL);
146        let (first_parse_info, remaining_parse_infos) = parse_infos.split_first().unwrap();
147
148        let id_parse_infos = if remaining_parse_infos.is_empty() {
149            IdParseInfos::OnlyOneEvent
150        } else if remaining_parse_infos
151            .iter()
152            .all(|parse_info| parse_info.id_parse_info == first_parse_info.id_parse_info)
153        {
154            IdParseInfos::Same(first_parse_info.id_parse_info)
155        } else {
156            // Make sure that all attributes have IDENTIFIER and the same SAMPLE_ID_ALL setting.
157            // Otherwise we won't be able to know which attr a record belongs to; we need to know
158            // the record's ID for that, and we can only read the ID if it's in the same location
159            // regardless of attr.
160            // In theory we could make the requirements weaker, and take the record type into
161            // account for disambiguation. For example, if there are two events, but one of them
162            // only creates SAMPLE records and the other only non-SAMPLE records, we don't
163            // necessarily need IDENTIFIER in order to be able to read the record ID.
164            for (attr_index, AttributeDescription { attr, .. }) in attributes.iter().enumerate() {
165                if !attr.sample_format.contains(SampleFormat::IDENTIFIER) {
166                    return Err(Error::NoIdentifierDespiteMultiEvent(attr_index));
167                }
168                if attr.flags.contains(AttrFlags::SAMPLE_ID_ALL) != first_has_sample_id_all {
169                    return Err(Error::InconsistentSampleIdAllWithMultiEvent(attr_index));
170                }
171            }
172
173            IdParseInfos::PerAttribute(first_has_sample_id_all)
174        };
175
176        // Move the cursor to the start of the data section so that we can start
177        // reading records from it.
178        cursor.seek(SeekFrom::Start(header.data_section.offset))?;
179
180        let perf_file = PerfFile {
181            endian,
182            features: header.features,
183            feature_sections,
184            attributes,
185        };
186
187        let record_iter = PerfRecordIter {
188            reader: cursor,
189            endian,
190            id_parse_infos,
191            parse_infos,
192            event_id_to_attr_index,
193            read_offset: 0,
194            record_data_len: header.data_section.size,
195            sorter: Sorter::new(),
196            buffers_for_recycling: VecDeque::new(),
197            current_event_body: Vec::new(),
198        };
199
200        Ok(Self {
201            perf_file,
202            record_iter,
203        })
204    }
205}
206
207/// An iterator which incrementally reads and sorts the records from a perf.data file.
208pub struct PerfRecordIter<R: Read> {
209    reader: R,
210    endian: Endianness,
211    read_offset: u64,
212    record_data_len: u64,
213    current_event_body: Vec<u8>,
214    id_parse_infos: IdParseInfos,
215    /// Guaranteed to have at least one element
216    parse_infos: Vec<RecordParseInfo>,
217    event_id_to_attr_index: HashMap<u64, usize>,
218    sorter: Sorter<RecordSortKey, PendingRecord>,
219    buffers_for_recycling: VecDeque<Vec<u8>>,
220}
221
222impl<R: Read> PerfRecordIter<R> {
223    /// Iterates the records in this file. The records are emitted in the
224    /// correct order, i.e. sorted by time.
225    ///
226    /// `next_record` does some internal buffering so that the sort order can
227    /// be guaranteed. This buffering takes advantage of `FINISHED_ROUND`
228    /// records so that we don't buffer more records than necessary.
229    pub fn next_record(
230        &mut self,
231        _perf_file: &mut PerfFile,
232    ) -> Result<Option<PerfFileRecord>, Error> {
233        if !self.sorter.has_more() {
234            self.read_next_round()?;
235        }
236        if let Some(pending_record) = self.sorter.get_next() {
237            let record = self.convert_pending_record(pending_record);
238            return Ok(Some(record));
239        }
240        Ok(None)
241    }
242
243    /// Reads events into self.sorter until a FINISHED_ROUND record is found
244    /// and self.sorter is non-empty, or until we've run out of records to read.
245    fn read_next_round(&mut self) -> Result<(), Error> {
246        if self.endian == Endianness::LittleEndian {
247            self.read_next_round_impl::<byteorder::LittleEndian>()
248        } else {
249            self.read_next_round_impl::<byteorder::BigEndian>()
250        }
251    }
252
253    /// Reads events into self.sorter until a FINISHED_ROUND record is found
254    /// and self.sorter is non-empty, or until we've run out of records to read.
255    fn read_next_round_impl<T: ByteOrder>(&mut self) -> Result<(), Error> {
256        while self.read_offset < self.record_data_len {
257            let offset = self.read_offset;
258            let header = PerfEventHeader::parse::<_, T>(&mut self.reader)?;
259            let size = header.size as usize;
260            if size < PerfEventHeader::STRUCT_SIZE {
261                return Err(Error::InvalidPerfEventSize);
262            }
263            self.read_offset += u64::from(header.size);
264
265            if UserRecordType::try_from(RecordType(header.type_))
266                == Some(UserRecordType::PERF_FINISHED_ROUND)
267            {
268                self.sorter.finish_round();
269                if self.sorter.has_more() {
270                    // The sorter is non-empty. We're done.
271                    return Ok(());
272                }
273
274                // Keep going so that we never exit the loop with sorter
275                // being empty, unless we've truly run out of data to read.
276                continue;
277            }
278
279            let event_body_len = size - PerfEventHeader::STRUCT_SIZE;
280            let mut buffer = self.buffers_for_recycling.pop_front().unwrap_or_default();
281            buffer.resize(event_body_len, 0);
282            self.reader
283                .read_exact(&mut buffer)
284                .map_err(|_| ReadError::PerfEventData)?;
285
286            let data = RawData::from(&buffer[..]);
287
288            let record_type = RecordType(header.type_);
289            let (attr_index, timestamp) = if record_type.is_builtin_type() {
290                let attr_index = match &self.id_parse_infos {
291                    IdParseInfos::OnlyOneEvent => 0,
292                    IdParseInfos::Same(id_parse_info) => {
293                        get_record_id::<T>(record_type, data, id_parse_info)
294                            .and_then(|id| self.event_id_to_attr_index.get(&id).cloned())
295                            .unwrap_or(0)
296                    }
297                    IdParseInfos::PerAttribute(sample_id_all) => {
298                        // We have IDENTIFIER (guaranteed by PerAttribute).
299                        get_record_identifier::<T>(record_type, data, *sample_id_all)
300                            .and_then(|id| self.event_id_to_attr_index.get(&id).cloned())
301                            .unwrap_or(0)
302                    }
303                };
304                let parse_info = self.parse_infos[attr_index];
305                let timestamp = get_record_timestamp::<T>(record_type, data, &parse_info);
306                (Some(attr_index), timestamp)
307            } else {
308                // user type
309                (None, None)
310            };
311
312            let sort_key = RecordSortKey { timestamp, offset };
313            let misc = header.misc;
314            let pending_record = PendingRecord {
315                record_type,
316                misc,
317                buffer,
318                attr_index,
319            };
320            self.sorter.insert_unordered(sort_key, pending_record);
321        }
322
323        // Everything has been read.
324        self.sorter.finish();
325
326        Ok(())
327    }
328
329    /// Converts pending_record into an RawRecord which references the data in self.current_event_body.
330    fn convert_pending_record(&mut self, pending_record: PendingRecord) -> PerfFileRecord {
331        let PendingRecord {
332            record_type,
333            misc,
334            buffer,
335            attr_index,
336            ..
337        } = pending_record;
338        let prev_buffer = std::mem::replace(&mut self.current_event_body, buffer);
339        self.buffers_for_recycling.push_back(prev_buffer);
340
341        let data = RawData::from(&self.current_event_body[..]);
342
343        if let Some(record_type) = UserRecordType::try_from(record_type) {
344            let endian = self.endian;
345            PerfFileRecord::UserRecord(RawUserRecord {
346                record_type,
347                misc,
348                data,
349                endian,
350            })
351        } else {
352            let attr_index = attr_index.unwrap();
353            let parse_info = self.parse_infos[attr_index];
354            let record = RawEventRecord {
355                record_type,
356                misc,
357                data,
358                parse_info,
359            };
360            PerfFileRecord::EventRecord { attr_index, record }
361        }
362    }
363}
364
365#[derive(Clone, Debug, PartialEq, Eq)]
366struct PendingRecord {
367    record_type: RecordType,
368    misc: u16,
369    buffer: Vec<u8>,
370    attr_index: Option<usize>,
371}
372
373#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
374struct RecordSortKey {
375    timestamp: Option<u64>,
376    offset: u64,
377}
378
379#[derive(Debug, Clone)]
380enum IdParseInfos {
381    /// There is only one event.
382    OnlyOneEvent,
383    /// There are multiple events, but all events are parsed the same way.
384    Same(RecordIdParseInfo),
385    /// All elements are guaranteed to have [`SampleFormat::IDENTIFIER`] set in `attr.sample_format`.
386    /// The inner element indicates sample_id_all.
387    PerAttribute(bool),
388}