1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
use std::{cmp::Ordering, io};

use log::warn;

use crate::collapse::{common::Occurrences, Collapse};

static START_LINE: &str = "Level,Function Name,Number of Calls,Elapsed Inclusive Time %,Elapsed Exclusive Time %,Avg Elapsed Inclusive Time,Avg Elapsed Exclusive Time,Module Name,";

/// A stack collapser for the output of the Visual Studio built in profiler.
#[derive(Default)]
pub struct Folder {
    /// Function entries on the stack in this entry thus far.
    stack: Vec<(String, usize)>,
}

impl Collapse for Folder {
    fn collapse<R, W>(&mut self, mut reader: R, writer: W) -> io::Result<()>
    where
        R: std::io::BufRead,
        W: std::io::Write,
    {
        // Skip the header
        let mut line = Vec::new();
        if reader.read_until(b'\n', &mut line)? == 0 {
            warn!("File ended before start of call graph");
            return Ok(());
        };

        let header = String::from_utf8_lossy(&line).to_string();
        if !line_matches_start_line(&header) {
            return invalid_data_error!(
                "Expected first line to be header line\n    {}\nbut instead got\n    {}",
                START_LINE,
                header
            );
        }

        // Process the data
        let mut occurences = Occurrences::new(1);
        loop {
            line.clear();
            if reader.read_until(b'\n', &mut line)? == 0 {
                break;
            }
            let l = String::from_utf8_lossy(&line);
            let line = l.trim_end();
            if line.is_empty() {
                continue;
            } else {
                self.on_line(line, &mut occurences)?;
            }
        }

        self.write_stack(&mut occurences);

        // Write the results
        occurences.write_and_clear(writer)?;

        // Reset the state
        self.stack.clear();
        Ok(())
    }

    fn is_applicable(&mut self, input: &str) -> Option<bool> {
        let line = input
            .lines()
            .next()
            .expect("there is always at least one line (even if empty)");

        Some(line_matches_start_line(line))
    }
}

impl Folder {
    // Parse lines with values in the order as specified by `START_LINE`, comma delimited.
    // Level,Function Name,Number of Calls,...
    // 6,"System.String.IsNullOrEmpty(string)",4,0.00,0.00,0.00,0.00,"mscorlib.dll",
    fn on_line(&mut self, line: &str, occurences: &mut Occurrences) -> io::Result<()> {
        let (depth, remainder) = get_next_number(line)?;

        if remainder.is_empty() {
            return invalid_data_error!("Missing function name in line:\n{}", line);
        }

        // Function names are always wrapped in quotes. By trimming the leading double quote, we
        // know that the next double quote is the double quote closing the function name. Splitting
        // on this double quote, we get the function name and the remainder of the line.
        let split = if let Some(remainder) = remainder.strip_prefix('"') {
            remainder.split_once('"')
        } else {
            return invalid_data_error!("Unable to parse function name from line:\n{}", line);
        };

        if let Some((function_name, remainder)) = split {
            let (number_of_calls, _) = get_next_number(remainder)?;

            let prev_depth = self.stack.len();
            // There are 3 separate cases to handle regarding the depth:
            // 1. prev_depth + 1 == depth -> a new function is called, we only need to
            //    store the function name and the number of times it is called from the
            //    outer function
            // 2. prev_depth == depth -> the previous function call was a leaf node, so we
            //    need to save the current stack and replace the top node with our node
            //    call
            // 3. prev_depth > depth -> the previous function call was a leaf node, so we
            //    need to save the current stack and than we need to pop the top nodes
            //    until the top node is our parent (i.e. the function which called us)
            match prev_depth.cmp(&depth) {
                // Case 1
                Ordering::Less => {
                    assert_eq!(prev_depth + 1, depth);
                    self.stack
                        .push((function_name.to_string(), number_of_calls));
                }
                // Case 2
                Ordering::Equal => {
                    self.write_stack(occurences);
                    self.stack.pop();
                    self.stack
                        .push((function_name.to_string(), number_of_calls));
                }
                // Case 3
                Ordering::Greater => {
                    // The Visual Studio profiler outputs the number of times a function is called.
                    //
                    // Let's say we have a function `A()` which is called 500 times, and which
                    // calls a function `B()` 300 times. If we didn't do anything special here,
                    // this would result in `A()` being assigned 800 samples, giving the impression
                    // that `A()` only calls `B()` less than 50% of the time, while in fact it is
                    // called more than 50% of the time.
                    //
                    // To prevent this from happening, we instead subtract the number of calls from
                    // the previous node (in this case `B()`) from the current node (in this case
                    // `A()`. This leaves `A()` with the correct number of 500 samples.
                    //
                    // The top node is always written, but while walking down the stack, if the
                    // previous number of calls is equal to the current number of calls, we don't
                    // want to write the current top node, because that would duplicate the number
                    // of samples for the current node.
                    let mut prev_number_of_calls = 0;
                    for _ in 0..(prev_depth - depth + 1) {
                        if prev_number_of_calls != self.stack.last().unwrap().1 {
                            self.write_stack(occurences);
                        }
                        prev_number_of_calls = self.stack.pop().unwrap().1;

                        if self.stack.is_empty() {
                            break;
                        }

                        let last = self.stack.len() - 1;
                        let number_of_calls = &self.stack[last].1;
                        if prev_number_of_calls < *number_of_calls {
                            self.stack[last].1 -= prev_number_of_calls;
                        }
                    }

                    self.stack
                        .push((function_name.to_string(), number_of_calls));
                }
            }
        } else {
            return invalid_data_error!("Unable to parse function name from line:\n{}", line);
        }

        Ok(())
    }

    // Store the current stack in `occurences`
    fn write_stack(&self, occurrences: &mut Occurrences) {
        if let Some(nsamples) = self.stack.last().map(|(_, n)| *n).filter(|n| *n > 0) {
            let functions: Vec<_> = self.stack.iter().map(|(f, _)| &f[..]).collect();
            occurrences.insert(functions.join(";"), nsamples);
        }
    }
}

/// Gets the number from the start of the line. This can either be a number <1000, in which case the
/// line doesn't contain double quotes, or the number can be >1000, in which case the line does
/// contain double quotes. In both cases `line` may start with a leading comma, which will be
/// ignored.
///
/// ### Example inputs
/// - Number <1000: `471,91.25,18.39,401.92,81.02,"Raytracer.exe",`
/// - Number >1000: `"2,893,824",54.37,4.21,0.04,0.00,"Raytracer.exe",`
fn get_next_number(line: &str) -> io::Result<(usize, &str)> {
    // Trim the leading comma, if any
    let line = line.strip_prefix(',').unwrap_or(line);

    // If the number is >1000, it is wrapped in quotes, so we need to remove those, and make sure
    // that we also remove the leading comma from the remainder after we are done parsing the
    // number.
    // If the number is <1000, we remove the leading comma from the remainder here and we don't
    // have to do anything after parsing the number.
    let mut remove_leading_comma = false;
    let field = if let Some(line) = line.strip_prefix('"') {
        remove_leading_comma = true;
        line.split_once('"')
    } else {
        line.split_once(',').or(Some((line, "")))
    };

    // Parse the number
    if let Some((num, mut remainder)) = field {
        let mut initial = true;
        let mut current_group_count = 0;
        let mut n = 0;
        let mut separator = None;
        for c in num.chars() {
            if c.is_ascii_digit() {
                if current_group_count > 2 {
                    return invalid_data_error!("Missing thousands separator in number '{}'", num);
                }

                n *= 10;
                n += c as u32 - '0' as u32;

                current_group_count += 1;
                continue;
            }

            if c == ',' || c == '.' || c == ' ' {
                if !initial && current_group_count < 3 {
                    return invalid_data_error!("Missing thousands separator in number '{}'", num);
                }

                match separator {
                    Some(sep) if sep != c => {
                        // Ensure that we always use the same separator, this takes care of
                        // handling floating point numbers (which aren't valid here), because those
                        // would use at least 2 different separators.
                        return invalid_data_error!("Unable to parse integer from '{}'", num);
                    }
                    Some(_) => {}
                    None => separator = Some(c),
                }

                initial = false;
                current_group_count = 0;
                continue;
            }

            return invalid_data_error!("Unable to parse integer from '{}'", num);
        }

        if remove_leading_comma {
            // `remainder` still has a leading comma, because the number is >1000. We need to
            // remove it so we are consistent regardless of whether the number was wrapped in
            // double quotes or not.
            remainder = remainder.strip_prefix(',').unwrap_or(remainder);
        }

        return Ok((n as usize, remainder));
    }

    invalid_data_error!("Invalid number in line:\n{}", line)
}

/// Some files may start with the <U+FEFF> character (zero width no-break space). This
/// causes the call to `starts_with` to return false, which in this case isn't what we want.
/// As this character has no influence on the rest of the file, we can safely ignore it.
fn line_matches_start_line(line: &str) -> bool {
    line.trim()
        .trim_start_matches('\u{feff}')
        .starts_with(START_LINE)
}

#[cfg(test)]
mod tests {
    use super::get_next_number;

    #[test]
    fn get_next_number_default() {
        let result = get_next_number(r#"471,91.25,18.39,401.92,81.02,"Raytracer.exe","#);
        assert!(result.is_ok());

        let (result, _) = result.unwrap();
        assert_eq!(result, 471);
    }

    #[test]
    fn get_next_number_with_leading_comma() {
        let result = get_next_number(r#",471,91.25,18.39,401.92,81.02,"Raytracer.exe","#);
        assert!(result.is_ok());

        let (result, _) = result.unwrap();
        assert_eq!(result, 471);
    }

    #[test]
    fn get_next_number_with_thousands_sep() {
        let result = get_next_number(r#""2,893,824",54.37,4.21,0.04,0.00,"Raytracer.exe","#);
        assert!(result.is_ok());

        let (result, _) = result.unwrap();
        assert_eq!(result, 2_893_824);
    }

    #[test]
    fn get_next_number_missing_thousands_seps() {
        assert!(get_next_number(r#""2893824",54.37,4.21,0.04,0.00,"Raytracer.exe","#).is_err());
    }

    #[test]
    fn get_next_number_missing_thousands_sep() {
        assert!(get_next_number(r#""28,93824",54.37,4.21,0.04,0.00,"Raytracer.exe","#).is_err());
    }

    #[test]
    fn get_next_number_with_float() {
        assert!(get_next_number(r#""2,893.82",54.37,4.21,0.04,0.00,"Raytracer.exe","#).is_err());
    }

    #[test]
    fn get_next_number_with_text_input() {
        assert!(get_next_number(r#""text",54.37,4.21,0.04,0.00,"Raytracer.exe","#).is_err());
    }
}