inferno 0.11.21

Rust port of the FlameGraph performance profiling tool suite
Documentation
use std::{cmp::Ordering, io};

use log::warn;

use crate::collapse::{common::Occurrences, Collapse};

static START_LINE: &str = "Level,Function Name,Number of Calls,Elapsed Inclusive Time %,Elapsed Exclusive Time %,Avg Elapsed Inclusive Time,Avg Elapsed Exclusive Time,Module Name,";

/// A stack collapser for the output of the Visual Studio built in profiler.
#[derive(Default)]
pub struct Folder {
    /// Function entries on the stack in this entry thus far.
    stack: Vec<(String, usize)>,
}

impl Collapse for Folder {
    fn collapse<R, W>(&mut self, mut reader: R, writer: W) -> io::Result<()>
    where
        R: std::io::BufRead,
        W: std::io::Write,
    {
        // Skip the header
        let mut line = Vec::new();
        if reader.read_until(b'\n', &mut line)? == 0 {
            warn!("File ended before start of call graph");
            return Ok(());
        };

        let header = String::from_utf8_lossy(&line).to_string();
        if !line_matches_start_line(&header) {
            return invalid_data_error!(
                "Expected first line to be header line\n    {}\nbut instead got\n    {}",
                START_LINE,
                header
            );
        }

        // Process the data
        let mut occurences = Occurrences::new(1);
        loop {
            line.clear();
            if reader.read_until(b'\n', &mut line)? == 0 {
                break;
            }
            let l = String::from_utf8_lossy(&line);
            let line = l.trim_end();
            if line.is_empty() {
                continue;
            } else {
                self.on_line(line, &mut occurences)?;
            }
        }

        self.write_stack(&mut occurences);

        // Write the results
        occurences.write_and_clear(writer)?;

        // Reset the state
        self.stack.clear();
        Ok(())
    }

    fn is_applicable(&mut self, input: &str) -> Option<bool> {
        let line = input
            .lines()
            .next()
            .expect("there is always at least one line (even if empty)");

        Some(line_matches_start_line(line))
    }
}

impl Folder {
    // Parse lines with values in the order as specified by `START_LINE`, comma delimited.
    // Level,Function Name,Number of Calls,...
    // 6,"System.String.IsNullOrEmpty(string)",4,0.00,0.00,0.00,0.00,"mscorlib.dll",
    fn on_line(&mut self, line: &str, occurences: &mut Occurrences) -> io::Result<()> {
        let (depth, remainder) = get_next_number(line)?;

        if remainder.is_empty() {
            return invalid_data_error!("Missing function name in line:\n{}", line);
        }

        // Function names are always wrapped in quotes. By trimming the leading double quote, we
        // know that the next double quote is the double quote closing the function name. Splitting
        // on this double quote, we get the function name and the remainder of the line.
        let split = if let Some(remainder) = remainder.strip_prefix('"') {
            remainder.split_once('"')
        } else {
            return invalid_data_error!("Unable to parse function name from line:\n{}", line);
        };

        if let Some((function_name, remainder)) = split {
            let (number_of_calls, _) = get_next_number(remainder)?;

            let prev_depth = self.stack.len();
            // There are 3 separate cases to handle regarding the depth:
            // 1. prev_depth + 1 == depth -> a new function is called, we only need to
            //    store the function name and the number of times it is called from the
            //    outer function
            // 2. prev_depth == depth -> the previous function call was a leaf node, so we
            //    need to save the current stack and replace the top node with our node
            //    call
            // 3. prev_depth > depth -> the previous function call was a leaf node, so we
            //    need to save the current stack and than we need to pop the top nodes
            //    until the top node is our parent (i.e. the function which called us)
            match prev_depth.cmp(&depth) {
                // Case 1
                Ordering::Less => {
                    assert_eq!(prev_depth + 1, depth);
                    self.stack
                        .push((function_name.to_string(), number_of_calls));
                }
                // Case 2
                Ordering::Equal => {
                    self.write_stack(occurences);
                    self.stack.pop();
                    self.stack
                        .push((function_name.to_string(), number_of_calls));
                }
                // Case 3
                Ordering::Greater => {
                    // The Visual Studio profiler outputs the number of times a function is called.
                    //
                    // Let's say we have a function `A()` which is called 500 times, and which
                    // calls a function `B()` 300 times. If we didn't do anything special here,
                    // this would result in `A()` being assigned 800 samples, giving the impression
                    // that `A()` only calls `B()` less than 50% of the time, while in fact it is
                    // called more than 50% of the time.
                    //
                    // To prevent this from happening, we instead subtract the number of calls from
                    // the previous node (in this case `B()`) from the current node (in this case
                    // `A()`. This leaves `A()` with the correct number of 500 samples.
                    //
                    // The top node is always written, but while walking down the stack, if the
                    // previous number of calls is equal to the current number of calls, we don't
                    // want to write the current top node, because that would duplicate the number
                    // of samples for the current node.
                    let mut prev_number_of_calls = 0;
                    for _ in 0..(prev_depth - depth + 1) {
                        if prev_number_of_calls != self.stack.last().unwrap().1 {
                            self.write_stack(occurences);
                        }
                        prev_number_of_calls = self.stack.pop().unwrap().1;

                        if self.stack.is_empty() {
                            break;
                        }

                        let last = self.stack.len() - 1;
                        let number_of_calls = &self.stack[last].1;
                        if prev_number_of_calls < *number_of_calls {
                            self.stack[last].1 -= prev_number_of_calls;
                        }
                    }

                    self.stack
                        .push((function_name.to_string(), number_of_calls));
                }
            }
        } else {
            return invalid_data_error!("Unable to parse function name from line:\n{}", line);
        }

        Ok(())
    }

    // Store the current stack in `occurences`
    fn write_stack(&self, occurrences: &mut Occurrences) {
        if let Some(nsamples) = self.stack.last().map(|(_, n)| *n).filter(|n| *n > 0) {
            let functions: Vec<_> = self.stack.iter().map(|(f, _)| &f[..]).collect();
            occurrences.insert(functions.join(";"), nsamples);
        }
    }
}

/// Gets the number from the start of the line. This can either be a number <1000, in which case the
/// line doesn't contain double quotes, or the number can be >1000, in which case the line does
/// contain double quotes. In both cases `line` may start with a leading comma, which will be
/// ignored.
///
/// ### Example inputs
/// - Number <1000: `471,91.25,18.39,401.92,81.02,"Raytracer.exe",`
/// - Number >1000: `"2,893,824",54.37,4.21,0.04,0.00,"Raytracer.exe",`
fn get_next_number(line: &str) -> io::Result<(usize, &str)> {
    // Trim the leading comma, if any
    let line = line.strip_prefix(',').unwrap_or(line);

    // If the number is >1000, it is wrapped in quotes, so we need to remove those, and make sure
    // that we also remove the leading comma from the remainder after we are done parsing the
    // number.
    // If the number is <1000, we remove the leading comma from the remainder here and we don't
    // have to do anything after parsing the number.
    let mut remove_leading_comma = false;
    let field = if let Some(line) = line.strip_prefix('"') {
        remove_leading_comma = true;
        line.split_once('"')
    } else {
        line.split_once(',').or(Some((line, "")))
    };

    // Parse the number
    if let Some((num, mut remainder)) = field {
        let mut initial = true;
        let mut current_group_count = 0;
        let mut n = 0;
        let mut separator = None;
        for c in num.chars() {
            if c.is_ascii_digit() {
                if current_group_count > 2 {
                    return invalid_data_error!("Missing thousands separator in number '{}'", num);
                }

                n *= 10;
                n += c as u32 - '0' as u32;

                current_group_count += 1;
                continue;
            }

            if c == ',' || c == '.' || c == ' ' {
                if !initial && current_group_count < 3 {
                    return invalid_data_error!("Missing thousands separator in number '{}'", num);
                }

                match separator {
                    Some(sep) if sep != c => {
                        // Ensure that we always use the same separator, this takes care of
                        // handling floating point numbers (which aren't valid here), because those
                        // would use at least 2 different separators.
                        return invalid_data_error!("Unable to parse integer from '{}'", num);
                    }
                    Some(_) => {}
                    None => separator = Some(c),
                }

                initial = false;
                current_group_count = 0;
                continue;
            }

            return invalid_data_error!("Unable to parse integer from '{}'", num);
        }

        if remove_leading_comma {
            // `remainder` still has a leading comma, because the number is >1000. We need to
            // remove it so we are consistent regardless of whether the number was wrapped in
            // double quotes or not.
            remainder = remainder.strip_prefix(',').unwrap_or(remainder);
        }

        return Ok((n as usize, remainder));
    }

    invalid_data_error!("Invalid number in line:\n{}", line)
}

/// Some files may start with the <U+FEFF> character (zero width no-break space). This
/// causes the call to `starts_with` to return false, which in this case isn't what we want.
/// As this character has no influence on the rest of the file, we can safely ignore it.
fn line_matches_start_line(line: &str) -> bool {
    line.trim()
        .trim_start_matches('\u{feff}')
        .starts_with(START_LINE)
}

#[cfg(test)]
mod tests {
    use super::get_next_number;

    #[test]
    fn get_next_number_default() {
        let result = get_next_number(r#"471,91.25,18.39,401.92,81.02,"Raytracer.exe","#);
        assert!(result.is_ok());

        let (result, _) = result.unwrap();
        assert_eq!(result, 471);
    }

    #[test]
    fn get_next_number_with_leading_comma() {
        let result = get_next_number(r#",471,91.25,18.39,401.92,81.02,"Raytracer.exe","#);
        assert!(result.is_ok());

        let (result, _) = result.unwrap();
        assert_eq!(result, 471);
    }

    #[test]
    fn get_next_number_with_thousands_sep() {
        let result = get_next_number(r#""2,893,824",54.37,4.21,0.04,0.00,"Raytracer.exe","#);
        assert!(result.is_ok());

        let (result, _) = result.unwrap();
        assert_eq!(result, 2_893_824);
    }

    #[test]
    fn get_next_number_missing_thousands_seps() {
        assert!(get_next_number(r#""2893824",54.37,4.21,0.04,0.00,"Raytracer.exe","#).is_err());
    }

    #[test]
    fn get_next_number_missing_thousands_sep() {
        assert!(get_next_number(r#""28,93824",54.37,4.21,0.04,0.00,"Raytracer.exe","#).is_err());
    }

    #[test]
    fn get_next_number_with_float() {
        assert!(get_next_number(r#""2,893.82",54.37,4.21,0.04,0.00,"Raytracer.exe","#).is_err());
    }

    #[test]
    fn get_next_number_with_text_input() {
        assert!(get_next_number(r#""text",54.37,4.21,0.04,0.00,"Raytracer.exe","#).is_err());
    }
}