iai_callgrind_runner/runner/tool/
logfile_parser.rs

1use std::path::{Path, PathBuf};
2
3use anyhow::{Context, Result};
4use itertools::Itertools;
5use lazy_static::lazy_static;
6use log::debug;
7use regex::Regex;
8
9use super::error_metric_parser::ErrorMetricLogfileParser;
10use super::generic_parser::GenericLogfileParser;
11use super::{ToolOutputPath, ValgrindTool};
12use crate::error::Error;
13use crate::runner::dhat::logfile_parser::DhatLogfileParser;
14use crate::runner::summary::{
15    SegmentDetails, ToolMetricSummary, ToolMetrics, ToolRun, ToolRunSegment,
16};
17use crate::util::EitherOrBoth;
18
19// The different regex have to consider --time-stamp=yes
20lazy_static! {
21    pub static ref EXTRACT_FIELDS_RE: Regex = regex::Regex::new(
22        r"^\s*(==|--)([0-9:.]+\s+)?[0-9]+(==|--)\s*(?<key>.*?)\s*:\s*(?<value>.*)\s*$"
23    )
24    .expect("Regex should compile");
25    pub static ref EMPTY_LINE_RE: Regex =
26        regex::Regex::new(r"^\s*(==|--)([0-9:.]+\s+)?[0-9]+(==|--)\s*$")
27            .expect("Regex should compile");
28    pub static ref STRIP_PREFIX_RE: Regex =
29        regex::Regex::new(r"^\s*(==|--)([0-9:.]+\s+)?[0-9]+(==|--) (?<rest>.*)$")
30            .expect("Regex should compile");
31    static ref EXTRACT_PID_RE: Regex =
32        regex::Regex::new(r"^\s*(==|--)([0-9:.]+\s+)?(?<pid>[0-9]+)(==|--).*")
33            .expect("Regex should compile");
34}
35
36pub trait LogfileParser {
37    fn parse_single(&self, path: PathBuf) -> Result<Logfile>;
38    fn parse(&self, output_path: &ToolOutputPath) -> Result<Vec<Logfile>> {
39        let log_path = output_path.to_log_output();
40        debug!("{}: Parsing log file '{}'", output_path.tool.id(), log_path);
41
42        let mut logfiles = vec![];
43        let Ok(paths) = log_path.real_paths() else {
44            return Ok(vec![]);
45        };
46
47        for path in paths {
48            let logfile = self.parse_single(path)?;
49            logfiles.push(logfile);
50        }
51
52        logfiles.sort_by_key(|x| x.header.pid);
53        Ok(logfiles)
54    }
55}
56
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub struct Header {
59    pub command: String,
60    pub pid: i32,
61    pub parent_pid: Option<i32>,
62}
63
64#[derive(Debug, Clone, PartialEq)]
65pub struct Logfile {
66    pub path: PathBuf,
67    pub header: Header,
68    pub details: Vec<String>,
69    pub metrics: ToolMetrics,
70}
71
72#[derive(Debug, Clone, PartialEq)]
73pub struct LogfileSummary {
74    pub logfile: EitherOrBoth<Logfile>,
75    pub metrics_summary: ToolMetricSummary,
76}
77
78impl From<Logfile> for SegmentDetails {
79    fn from(value: Logfile) -> Self {
80        Self {
81            command: value.header.command,
82            pid: value.header.pid,
83            parent_pid: value.header.parent_pid,
84            details: (!value.details.is_empty()).then(|| value.details.join("\n")),
85            path: value.path,
86            part: None,
87            thread: None,
88        }
89    }
90}
91
92// Logfiles are separated per process but not per threads by any tool
93impl From<EitherOrBoth<Vec<Logfile>>> for ToolRun {
94    fn from(logfiles: EitherOrBoth<Vec<Logfile>>) -> Self {
95        let mut total: Option<ToolMetricSummary> = None;
96
97        let segments: Vec<ToolRunSegment> = match logfiles {
98            EitherOrBoth::Left(new) => new
99                .into_iter()
100                .map(|logfile| {
101                    let metrics_summary = ToolMetricSummary::from_new_metrics(&logfile.metrics);
102                    if let Some(entry) = total.as_mut() {
103                        entry.add_mut(&metrics_summary);
104                    } else {
105                        total = Some(metrics_summary.clone());
106                    }
107
108                    ToolRunSegment {
109                        details: EitherOrBoth::Left(logfile.into()),
110                        metrics_summary,
111                    }
112                })
113                .collect(),
114            EitherOrBoth::Right(old) => old
115                .into_iter()
116                .map(|logfile| {
117                    let metrics_summary = ToolMetricSummary::from_old_metrics(&logfile.metrics);
118                    if let Some(entry) = total.as_mut() {
119                        entry.add_mut(&metrics_summary);
120                    } else {
121                        total = Some(metrics_summary.clone());
122                    }
123
124                    ToolRunSegment {
125                        details: EitherOrBoth::Right(logfile.into()),
126                        metrics_summary,
127                    }
128                })
129                .collect(),
130            EitherOrBoth::Both(new, old) => new
131                .into_iter()
132                .zip_longest(old)
133                .map(|either_or_both| match either_or_both {
134                    itertools::EitherOrBoth::Both(new, old) => {
135                        let metrics_summary = ToolMetricSummary::try_from_new_and_old_metrics(
136                            &new.metrics,
137                            &old.metrics,
138                        )
139                        .expect("The cost kinds should match");
140
141                        if let Some(entry) = total.as_mut() {
142                            entry.add_mut(&metrics_summary);
143                        } else {
144                            total = Some(metrics_summary.clone());
145                        }
146
147                        ToolRunSegment {
148                            details: EitherOrBoth::Both(new.into(), old.into()),
149                            metrics_summary,
150                        }
151                    }
152                    itertools::EitherOrBoth::Left(new) => {
153                        let metrics_summary = ToolMetricSummary::from_new_metrics(&new.metrics);
154                        if let Some(entry) = total.as_mut() {
155                            entry.add_mut(&metrics_summary);
156                        } else {
157                            total = Some(metrics_summary.clone());
158                        }
159
160                        ToolRunSegment {
161                            details: EitherOrBoth::Left(new.into()),
162                            metrics_summary,
163                        }
164                    }
165                    itertools::EitherOrBoth::Right(old) => {
166                        let metrics_summary = ToolMetricSummary::from_old_metrics(&old.metrics);
167                        if let Some(entry) = total.as_mut() {
168                            entry.add_mut(&metrics_summary);
169                        } else {
170                            total = Some(metrics_summary.clone());
171                        }
172
173                        ToolRunSegment {
174                            details: EitherOrBoth::Right(old.into()),
175                            metrics_summary,
176                        }
177                    }
178                })
179                .collect(),
180        };
181
182        Self {
183            segments,
184            total: total.expect("A total should be present"),
185        }
186    }
187}
188
189pub fn extract_pid(line: &str) -> Result<i32> {
190    EXTRACT_PID_RE
191        .captures(line.trim())
192        .context("Log output should not be malformed")?
193        .name("pid")
194        .context("Log output should contain pid")?
195        .as_str()
196        .parse::<i32>()
197        .context("Pid should be valid")
198}
199
200/// Parse the logfile header
201///
202/// The logfile header is the same for all tools
203pub fn parse_header(path: &Path, mut lines: impl Iterator<Item = String>) -> Result<Header> {
204    let next = lines.next();
205
206    let (pid, next) = if let Some(next) = next {
207        (extract_pid(&next)?, next)
208    } else {
209        return Err(Error::ParseError((path.to_owned(), "Empty file".to_owned())).into());
210    };
211
212    let mut parent_pid = None;
213    let mut command = None;
214    for line in std::iter::once(next).chain(lines) {
215        if EMPTY_LINE_RE.is_match(&line) {
216            // The header is separated from the body by at least one empty line. The first
217            // empty line is removed from the iterator.
218            break;
219        } else if let Some(caps) = EXTRACT_FIELDS_RE.captures(&line) {
220            let key = caps.name("key").unwrap().as_str();
221
222            // These unwraps are safe. If there is a key, there is also a value present
223            match key.to_ascii_lowercase().as_str() {
224                "command" => {
225                    let value = caps.name("value").unwrap().as_str();
226                    command = Some(value.to_owned());
227                }
228                "parent pid" => {
229                    let value = caps.name("value").unwrap().as_str().to_owned();
230                    parent_pid = Some(
231                        value
232                            .as_str()
233                            .parse::<i32>()
234                            .context("Failed parsing log file: Parent pid should be valid")?,
235                    );
236                }
237                _ => {
238                    // Ignore other header lines
239                }
240            }
241        } else {
242            // Some malformed header line which we ignore
243        }
244    }
245
246    Ok(Header {
247        command: command.with_context(|| {
248            format!(
249                "Error parsing header of logfile '{}': A command should be present",
250                path.display()
251            )
252        })?,
253        pid,
254        parent_pid,
255    })
256}
257
258pub fn parser_factory(tool: ValgrindTool, root_dir: PathBuf) -> Box<dyn LogfileParser> {
259    match tool {
260        ValgrindTool::DHAT => Box::new(DhatLogfileParser { root_dir }),
261        ValgrindTool::Memcheck | ValgrindTool::DRD | ValgrindTool::Helgrind => {
262            Box::new(ErrorMetricLogfileParser { root_dir })
263        }
264        _ => Box::new(GenericLogfileParser { root_dir }),
265    }
266}