perf_tools/
pprof.rs

1// Copyright (C) 2022 The Perf-tools Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12// implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use chrono::{offset::LocalResult, DateTime, Local, NaiveDateTime, TimeZone};
17use lazy_static::lazy_static;
18use prost::Message;
19use regex::Regex;
20use std::collections::HashMap;
21use std::io;
22use std::time::Duration;
23
24pub mod pb {
25    include!(concat!(env!("OUT_DIR"), "/perftools.profiles.rs"));
26}
27
28#[derive(PartialEq, Hash, std::cmp::Eq)]
29struct Stack {
30    pc: u64,
31    func: String,
32    module: String,
33}
34
35#[derive(PartialEq, Hash, std::cmp::Eq)]
36struct Sample {
37    stacks: Vec<Stack>,
38}
39
40struct PerfReader {
41    sample: HashMap<Sample, u64>,
42    captured_time: DateTime<Local>,
43    duration: Duration,
44    freq: u64,
45}
46
47#[derive(Default)]
48pub struct PprofConverterBuilder {}
49
50impl PprofConverterBuilder {
51    pub fn build(&mut self) -> PprofConverter {
52        PprofConverter::new()
53    }
54}
55
56impl PerfReader {
57    fn new<R>(mut reader: R) -> io::Result<Self>
58    where
59        R: io::BufRead,
60    {
61        let mut buf = Vec::new();
62        let mut is_event_line = true;
63        let mut sample = HashMap::default();
64        let mut header = Vec::new();
65        let mut stack = Vec::new();
66        let mut start_usec = 0;
67        let mut end_usec = 0;
68
69        lazy_static! {
70            static ref RE: Regex = Regex::new(r"\S+\s+\d+\s+(\d+)\.(\d+)").unwrap();
71        }
72
73        loop {
74            buf.clear();
75            if let Ok(n) = reader.read_until(b'\n', &mut buf) {
76                if n == 0 {
77                    break;
78                }
79                let line = String::from_utf8_lossy(&buf);
80                if line.starts_with('#') {
81                    header.push(line.trim().to_string());
82                    continue;
83                }
84                let line = line.trim();
85                if line.is_empty() {
86                    // return one stack
87                    is_event_line = true;
88                    if !stack.is_empty() {
89                        let count = sample
90                            .entry(Sample {
91                                stacks: stack.split_off(0),
92                            })
93                            .or_insert(0);
94                        *count += 1;
95                    }
96                    continue;
97                }
98                if is_event_line {
99                    // event line
100                    if let Some(caps) = RE.captures(line) {
101                        let sec: u64 = caps.get(1).unwrap().as_str().parse().unwrap();
102                        let usec: u64 = caps.get(2).unwrap().as_str().parse().unwrap();
103                        if sample.is_empty() {
104                            start_usec = sec * 1_000_000 + usec;
105                        } else {
106                            end_usec = sec * 1_000_000 + usec;
107                        }
108                    }
109
110                    is_event_line = false;
111                    continue;
112                } else {
113                    // stack line
114                    let line = line.splitn(2, ' ').collect::<Vec<&str>>();
115                    if let Ok(pc) = u64::from_str_radix(line[0], 16) {
116                        let line = line[1].rsplitn(2, ' ').collect::<Vec<&str>>();
117                        stack.push(Stack {
118                            pc,
119                            func: line[1].to_string(),
120                            module: line[0].to_string(),
121                        });
122                    }
123                }
124            } else {
125                break;
126            }
127        }
128
129        if end_usec == 0 {
130            return Err(io::Error::new(io::ErrorKind::Other, "can't find duration"));
131        }
132
133        let (captured_time, freq) = PerfReader::verify_header(&header)?;
134
135        Ok(PerfReader {
136            sample,
137            captured_time,
138            duration: Duration::from_micros(end_usec - start_usec),
139            freq,
140        })
141    }
142
143    fn verify_header(header: &[String]) -> io::Result<(DateTime<Local>, u64)> {
144        let mut dt = None;
145        let mut freq = 0;
146
147        for h in header {
148            // sample_freq } = 997
149            let re = Regex::new(r"sample_freq\s+}\s+=\s+(\d+)").unwrap();
150            // captured on    : Thu Mar 10 10:45:19 2022
151            if h.contains("captured on") {
152                let line = h.splitn(2, ':').collect::<Vec<&str>>();
153                if line.len() == 2 {
154                    if let Ok(time) = NaiveDateTime::parse_from_str(line[1].trim(), "%c") {
155                        dt = if let LocalResult::Single(t) = Local.from_local_datetime(&time) {
156                            Some(t)
157                        } else {
158                            None
159                        };
160                    }
161                }
162            } else if let Some(caps) = re.captures(h) {
163                if let Some(v) = caps.get(1) {
164                    freq = v
165                        .as_str()
166                        .parse()
167                        .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("{}", e)))?;
168                }
169            }
170        }
171        let captured_time = dt.ok_or_else(|| {
172            io::Error::new(
173                io::ErrorKind::Other,
174                "captured time isn't found in the header",
175            )
176        })?;
177        if freq == 0 {}
178
179        Ok((captured_time, freq))
180    }
181}
182
183pub struct PprofConverter {
184    str_map: HashMap<String, u64>,
185
186    location: LocationId,
187    function: FunctionId,
188}
189
190struct FunctionId {
191    next_id: u64,
192    map: HashMap<String, (u64, u64)>, // name, (id, str_id)
193}
194
195struct LocationId {
196    next_id: u64,
197    map: HashMap<u64, (u64, u64)>, // address, (id, funciton_id)
198}
199
200impl PprofConverter {
201    fn new() -> Self {
202        let mut str_map: HashMap<String, u64> = HashMap::default();
203        for (i, s) in vec!["", "samples", "count", "cpu", "nanoseconds"]
204            .iter()
205            .enumerate()
206        {
207            str_map.insert(s.to_string(), i as u64);
208        }
209
210        PprofConverter {
211            str_map,
212            location: LocationId {
213                next_id: 0,
214                map: HashMap::default(),
215            },
216            function: FunctionId {
217                next_id: 0,
218                map: HashMap::default(),
219            },
220        }
221    }
222
223    fn location_id(&mut self, addr: u64, name: &str) -> u64 {
224        let loc_id = self.location.map.entry(addr).or_insert_with(|| {
225            self.location.next_id += 1;
226            let func_id = self
227                .function
228                .map
229                .entry(name.to_string())
230                .or_insert_with(|| {
231                    let s = self.str_map.len() as u64;
232                    let str_id = self.str_map.entry(name.to_string()).or_insert(s);
233                    self.function.next_id += 1;
234                    (self.function.next_id, *str_id)
235                });
236            (self.location.next_id, func_id.0)
237        });
238        loc_id.0
239    }
240
241    fn finish<R, W>(&mut self, reader: R, mut writer: W) -> io::Result<()>
242    where
243        R: io::BufRead,
244        W: io::Write,
245    {
246        let perf = PerfReader::new(reader)?;
247        let sample: Vec<pb::Sample> = perf
248            .sample
249            .iter()
250            .map(|(s, count)| pb::Sample {
251                location_id: s
252                    .stacks
253                    .iter()
254                    .map(|s| self.location_id(s.pc, &s.func))
255                    .collect(),
256                value: vec![
257                    *count as i64,
258                    *count as i64 * 1_000_000_000 / perf.freq as i64,
259                ],
260                label: Vec::new(),
261            })
262            .collect();
263
264        let mut function: Vec<pb::Function> = self
265            .function
266            .map
267            .iter()
268            .map(|(_, v)| pb::Function {
269                id: v.0,
270                name: v.1 as i64,
271                ..Default::default()
272            })
273            .collect();
274        function.sort_by(|a, b| a.id.cmp(&b.id));
275
276        let mut string_table: Vec<(String, u64)> =
277            self.str_map.iter().map(|(k, v)| (k.clone(), *v)).collect();
278        string_table.sort_by(|a, b| a.1.cmp(&b.1));
279
280        let mut location: Vec<pb::Location> = self
281            .location
282            .map
283            .iter()
284            .map(|(k, v)| pb::Location {
285                id: v.0,
286                address: *k,
287                line: vec![pb::Line {
288                    function_id: v.1,
289                    line: 0,
290                }],
291                ..Default::default()
292            })
293            .collect();
294        location.sort_by(|a, b| a.id.cmp(&b.id));
295
296        let mut content = Vec::new();
297        pb::Profile {
298            sample_type: vec![
299                pb::ValueType { r#type: 1, unit: 2 },
300                pb::ValueType { r#type: 3, unit: 4 },
301            ],
302            sample,
303            location,
304            function,
305            time_nanos: perf.captured_time.timestamp_nanos(),
306            duration_nanos: perf.duration.as_nanos() as i64,
307            string_table: string_table.into_iter().map(|(k, _)| k).collect(),
308            period: 1_000_000_000 / perf.freq as i64,
309            period_type: Some(pb::ValueType { r#type: 3, unit: 4 }),
310            ..pb::Profile::default()
311        }
312        .encode(&mut content)
313        .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("{}", e)))?;
314        writer.write_all(&content)
315    }
316
317    pub fn from_reader<R, W>(&mut self, reader: R, writer: W) -> io::Result<()>
318    where
319        R: io::BufRead,
320        W: io::Write,
321    {
322        self.finish(reader, writer)
323    }
324}