1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
use linux_perf_event_reader::{Endianness, RawData};
use std::io::{Read, Seek};

use super::buffered_reader::BufferedReader;
use super::error::JitDumpError;
use super::header::JitDumpHeader;
use super::read_exact::ReadExactOrUntilEof;
use super::record::{JitDumpRawRecord, JitDumpRecordHeader, JitDumpRecordType};

/// Parses a jitdump file and allows iterating over records.
///
/// This reader works with complete jitdump files as well as with partial files
/// which are still being written to. This makes it useful in live-profiling
/// settings.
///
/// The records refer to memory owned by the reader, to minimize copies.
#[derive(Debug, Clone)]
pub struct JitDumpReader<R: Read> {
    reader: BufferedReader<R>,
    header: JitDumpHeader,
    endian: Endianness,
    pending_record_header: Option<JitDumpRecordHeader>,
    current_record_start_offset: u64,
}

impl<R: Read> JitDumpReader<R> {
    /// Create a new `JitDumpReader`. `JitDumpReader` does its own buffering so
    /// there is no need to wrap a [`File`](std::fs::File) into a `BufReader`.
    pub fn new(reader: R) -> Result<Self, JitDumpError> {
        Self::new_with_buffer_size(reader, 4 * 1024)
    }

    /// Create a new `JitDumpReader`, with a manually-specified buffer chunk size.
    pub fn new_with_buffer_size(mut reader: R, buffer_size: usize) -> Result<Self, JitDumpError> {
        let mut buf = vec![0; buffer_size];
        let first_data_len = reader
            .read_exact_or_until_eof(&mut buf)
            .map_err(JitDumpError::Io)?;

        let first_data = &buf[..first_data_len];
        let header = JitDumpHeader::parse(RawData::Single(first_data))?;
        let total_header_size = header.total_size;
        let endian = match &header.magic {
            b"DTiJ" => Endianness::LittleEndian,
            b"JiTD" => Endianness::BigEndian,
            _ => panic!(),
        };

        Ok(Self {
            reader: BufferedReader::new_with_partially_read_buffer(
                reader,
                buf,
                total_header_size as usize,
                first_data_len,
            ),
            header,
            endian,
            pending_record_header: None,
            current_record_start_offset: total_header_size as u64,
        })
    }

    /// The file header.
    pub fn header(&self) -> &JitDumpHeader {
        &self.header
    }

    /// The file endian.
    pub fn endian(&self) -> Endianness {
        self.endian
    }

    /// Returns the header of the next record.
    pub fn next_record_header(&mut self) -> Result<Option<JitDumpRecordHeader>, std::io::Error> {
        if self.pending_record_header.is_none() {
            if let Some(record_header_bytes) =
                self.reader.consume_data(JitDumpRecordHeader::SIZE)?
            {
                self.pending_record_header =
                    Some(JitDumpRecordHeader::parse(self.endian, record_header_bytes).unwrap());
            }
        };
        Ok(self.pending_record_header.clone())
    }

    /// Returns the timestamp of the next record.
    ///
    /// When operating on partial files, `None` means that not enough bytes for the header
    /// of the next record are available. `Some` means that we have enough bytes for the
    /// header but we may not have enough bytes to get the entire record.
    ///
    /// If `next_record_timestamp` returns `Ok(Some(...))`, the next call to `next_record()`
    /// can still return `None`!
    pub fn next_record_timestamp(&mut self) -> Result<Option<u64>, std::io::Error> {
        Ok(self.next_record_header()?.map(|r| r.timestamp))
    }

    /// Returns the record type of the next record.
    pub fn next_record_type(&mut self) -> Result<Option<JitDumpRecordType>, std::io::Error> {
        Ok(self.next_record_header()?.map(|r| r.record_type))
    }

    /// Returns the file offset at which the next record (specifically its record header) starts.
    pub fn next_record_offset(&self) -> u64 {
        self.current_record_start_offset
    }

    /// Returns the next record.
    ///
    /// When operating on partial files, this will return `Ok(None)` if the entire record is
    /// not available yet. Future calls to `next_record` may return `Ok(Some)` if the
    /// data has become available in the meantime, because they will call `read` on `R` again.
    pub fn next_record(&mut self) -> Result<Option<JitDumpRawRecord>, std::io::Error> {
        let record_size = match self.next_record_header()? {
            Some(header) => header.total_size,
            None => return Ok(None),
        };
        let body_size = record_size as usize - JitDumpRecordHeader::SIZE;

        match self.reader.consume_data(body_size)? {
            Some(record_body_data) => {
                let record_header = self.pending_record_header.take().unwrap();
                let start_offset = self.current_record_start_offset;
                self.current_record_start_offset += record_size as u64;
                Ok(Some(JitDumpRawRecord {
                    endian: self.endian,
                    start_offset,
                    record_size,
                    record_type: record_header.record_type,
                    timestamp: record_header.timestamp,
                    body: record_body_data,
                }))
            }
            None => Ok(None),
        }
    }
}

impl<R: Read + Seek> JitDumpReader<R> {
    /// Skip the upcoming record. If this returns true, the record has been skipped.
    /// If `false` is returned, it means the file could not be seeked far enough to
    /// skip the entire record (for example because this is a partial file which has
    /// not been fully written), and the next record remains unchanged from before the
    /// call to `skip_next_record`.
    ///
    /// You may want to call this if you've called `next_record_type` and have
    /// determined that you're not interested in the upcoming record. It saves having
    /// to read the full record into a contiguous slice of memory.
    pub fn skip_next_record(&mut self) -> Result<bool, std::io::Error> {
        let record_size = match self.next_record_header()? {
            Some(record_header) => record_header.total_size,
            None => return Ok(false),
        };
        let body_size = record_size as usize - JitDumpRecordHeader::SIZE; // TODO: Handle underflow

        self.reader.skip_bytes(body_size)?;
        self.pending_record_header.take();
        self.current_record_start_offset += record_size as u64;
        Ok(true)
    }
}