libdd_profiling/
api.rs

1// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use libdd_profiling_protobuf::prost_impls;
5use std::ops::{Add, Sub};
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7
8#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
9pub struct ValueType<'a> {
10    pub r#type: &'a str,
11    pub unit: &'a str,
12}
13
14impl<'a> ValueType<'a> {
15    #[inline(always)]
16    pub const fn new(r#type: &'a str, unit: &'a str) -> Self {
17        Self { r#type, unit }
18    }
19}
20
21#[derive(Copy, Clone, Debug, Eq, PartialEq)]
22pub struct Period<'a> {
23    pub r#type: ValueType<'a>,
24    pub value: i64,
25}
26
27#[derive(Copy, Clone, Default, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
28#[repr(C)]
29pub struct ManagedStringId {
30    pub value: u32,
31}
32
33impl ManagedStringId {
34    pub const fn empty() -> Self {
35        Self::new(0)
36    }
37
38    pub const fn new(value: u32) -> Self {
39        ManagedStringId { value }
40    }
41}
42
43#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
44pub struct Mapping<'a> {
45    /// Address at which the binary (or DLL) is loaded into memory.
46    pub memory_start: u64,
47
48    /// The limit of the address range occupied by this mapping.
49    pub memory_limit: u64,
50
51    /// Offset in the binary that corresponds to the first mapped address.
52    pub file_offset: u64,
53
54    /// The object this entry is loaded from.  This can be a filename on
55    /// disk for the main binary and shared libraries, or virtual
56    /// abstractions like "[vdso]".
57    pub filename: &'a str,
58
59    /// A string that uniquely identifies a particular program version
60    /// with high probability. E.g., for binaries generated by GNU tools,
61    /// it could be the contents of the .note.gnu.build-id field.
62    pub build_id: &'a str,
63}
64
65#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
66// Same as Mapping, but using StringIds
67pub struct StringIdMapping {
68    pub memory_start: u64,
69    pub memory_limit: u64,
70    pub file_offset: u64,
71    pub filename: ManagedStringId,
72    pub build_id: ManagedStringId,
73}
74
75#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
76pub struct Function<'a> {
77    /// Name of the function, in human-readable form if available.
78    pub name: &'a str,
79
80    /// Name of the function, as identified by the system.
81    /// For instance, it can be a C++ mangled name.
82    pub system_name: &'a str,
83
84    /// Source file containing the function.
85    pub filename: &'a str,
86}
87
88#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
89// Same as Function, but using StringIds
90pub struct StringIdFunction {
91    pub name: ManagedStringId,
92    pub system_name: ManagedStringId,
93    pub filename: ManagedStringId,
94}
95
96#[derive(Clone, Debug, Eq, PartialEq)]
97pub struct Line<'a> {
98    /// The corresponding profile.Function for this line.
99    pub function: Function<'a>,
100
101    /// Line number in source code.
102    pub line: i64,
103}
104
105#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
106pub struct Location<'a> {
107    pub mapping: Mapping<'a>,
108    pub function: Function<'a>,
109
110    /// The instruction address for this location, if available.  It
111    /// should be within [Mapping.memory_start...Mapping.memory_limit]
112    /// for the corresponding mapping. A non-leaf address may be in the
113    /// middle of a call instruction. It is up to display tools to find
114    /// the beginning of the instruction if necessary.
115    pub address: u64,
116    pub line: i64,
117}
118
119#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
120// Same as Location, but using StringIds
121pub struct StringIdLocation {
122    pub mapping: StringIdMapping,
123    pub function: StringIdFunction,
124    pub address: u64,
125    pub line: i64,
126}
127
128#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Hash, Ord, PartialOrd)]
129pub struct Label<'a> {
130    pub key: &'a str,
131
132    /// At most one of the following must be present
133    pub str: &'a str,
134    pub num: i64,
135
136    /// Should only be present when num is present.
137    /// Specifies the units of num.
138    /// Use arbitrary string (for example, "requests") as a custom count unit.
139    /// If no unit is specified, consumer may apply heuristic to deduce the unit.
140    /// Consumers may also  interpret units like "bytes" and "kilobytes" as memory
141    /// units and units like "seconds" and "nanoseconds" as time units,
142    /// and apply appropriate unit conversions to these.
143    pub num_unit: &'a str,
144}
145
146#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
147// Same as Label, but using StringIds
148pub struct StringIdLabel {
149    pub key: ManagedStringId,
150
151    /// At most one of the following must be present
152    pub str: ManagedStringId,
153    pub num: i64,
154
155    /// Should only be present when num is present.
156    pub num_unit: ManagedStringId,
157}
158
159impl Label<'_> {
160    pub fn uses_at_most_one_of_str_and_num(&self) -> bool {
161        self.str.is_empty() || (self.num == 0 && self.num_unit.is_empty())
162    }
163}
164
165#[derive(Clone, Debug, Eq, PartialEq)]
166pub struct Sample<'a> {
167    /// The leaf is at locations[0].
168    pub locations: Vec<Location<'a>>,
169
170    /// The type and unit of each value is defined by the corresponding
171    /// entry in Profile.sample_type. All samples must have the same
172    /// number of values, the same as the length of Profile.sample_type.
173    /// When aggregating multiple samples into a single sample, the
174    /// result has a list of values that is the element-wise sum of the
175    /// lists of the originals.
176    pub values: &'a [i64],
177
178    /// label includes additional context for this sample. It can include
179    /// things like a thread id, allocation size, etc
180    pub labels: Vec<Label<'a>>,
181}
182
183#[derive(Clone, Debug, Eq, PartialEq)]
184// Same as Sample, but using StringIds
185pub struct StringIdSample<'a> {
186    pub locations: Vec<StringIdLocation>,
187    pub values: &'a [i64],
188    pub labels: Vec<StringIdLabel>,
189}
190
191#[derive(Debug)]
192#[cfg_attr(test, derive(bolero::generator::TypeGenerator))]
193pub enum UpscalingInfo {
194    Poisson {
195        // sum_value_offset and count_value_offset are offsets in the profile values type array
196        sum_value_offset: usize,
197        count_value_offset: usize,
198        sampling_distance: u64,
199    },
200    PoissonNonSampleTypeCount {
201        // sum_value_offset is an offset in the profile values type array
202        sum_value_offset: usize,
203        count_value: u64,
204        sampling_distance: u64,
205    },
206    Proportional {
207        scale: f64,
208    },
209}
210
211impl std::fmt::Display for UpscalingInfo {
212    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
213        match self {
214            UpscalingInfo::Poisson {
215                sum_value_offset,
216                count_value_offset,
217                sampling_distance,
218            } => write!(
219                f,
220                "Poisson = sum_value_offset: {sum_value_offset}, count_value_offset: {count_value_offset}, sampling_distance: {sampling_distance}"
221            ),
222            UpscalingInfo::PoissonNonSampleTypeCount {
223                sum_value_offset,
224                count_value,
225                sampling_distance,
226            } => write!(
227                f,
228                "Poisson = sum_value_offset: {sum_value_offset}, count_value: {count_value}, sampling_distance: {sampling_distance}",
229            ),
230            UpscalingInfo::Proportional { scale } => {
231                write!(f, "Proportional = scale: {scale}")
232            }
233        }
234    }
235}
236
237impl UpscalingInfo {
238    pub fn check_validity(&self, number_of_values: usize) -> anyhow::Result<()> {
239        match self {
240            UpscalingInfo::Poisson {
241                sum_value_offset,
242                count_value_offset,
243                sampling_distance,
244            } => {
245                anyhow::ensure!(
246                    sum_value_offset < &number_of_values && count_value_offset < &number_of_values,
247                    "sum_value_offset {sum_value_offset} and count_value_offset {count_value_offset} must be strictly less than {number_of_values}"
248                );
249                anyhow::ensure!(
250                    sampling_distance != &0,
251                    "sampling_distance {sampling_distance} must be greater than 0"
252                )
253            }
254            UpscalingInfo::PoissonNonSampleTypeCount {
255                sum_value_offset,
256                count_value,
257                sampling_distance,
258            } => {
259                anyhow::ensure!(
260                    sum_value_offset < &number_of_values,
261                    "sum_value_offset {sum_value_offset} must be strictly less than {number_of_values}"
262                );
263                anyhow::ensure!(
264                    count_value != &0,
265                    "count_value {count_value} must be greater than 0"
266                );
267                anyhow::ensure!(
268                    sampling_distance != &0,
269                    "sampling_distance {sampling_distance} must be greater than 0"
270                )
271            }
272            UpscalingInfo::Proportional { scale: _ } => (),
273        }
274        anyhow::Ok(())
275    }
276}
277
278pub struct Profile<'a> {
279    pub duration: Duration,
280    pub period: Option<(i64, ValueType<'a>)>,
281    pub sample_types: Vec<ValueType<'a>>,
282    pub samples: Vec<Sample<'a>>,
283    pub start_time: SystemTime,
284}
285
286fn string_table_fetch(pprof: &prost_impls::Profile, id: i64) -> anyhow::Result<&String> {
287    pprof
288        .string_table
289        .get(id as u64 as usize)
290        .ok_or_else(|| anyhow::anyhow!("String {id} was not found."))
291}
292
293fn mapping_fetch(pprof: &prost_impls::Profile, id: u64) -> anyhow::Result<Mapping<'_>> {
294    if id == 0 {
295        return Ok(Mapping::default());
296    }
297
298    match pprof.mappings.iter().find(|item| item.id == id) {
299        Some(mapping) => Ok(Mapping {
300            memory_start: mapping.memory_start,
301            memory_limit: mapping.memory_limit,
302            file_offset: mapping.file_offset,
303            filename: string_table_fetch(pprof, mapping.filename)?,
304            build_id: string_table_fetch(pprof, mapping.build_id)?,
305        }),
306        None => anyhow::bail!("Mapping {id} was not found."),
307    }
308}
309
310fn function_fetch(pprof: &prost_impls::Profile, id: u64) -> anyhow::Result<Function<'_>> {
311    if id == 0 {
312        return Ok(Function::default());
313    }
314
315    match pprof.functions.iter().find(|item| item.id == id) {
316        Some(function) => Ok(Function {
317            name: string_table_fetch(pprof, function.name)?,
318            system_name: string_table_fetch(pprof, function.system_name)?,
319            filename: string_table_fetch(pprof, function.filename)?,
320        }),
321        None => anyhow::bail!("Function {id} was not found."),
322    }
323}
324
325fn location_fetch(pprof: &prost_impls::Profile, id: u64) -> anyhow::Result<Location<'_>> {
326    if id == 0 {
327        return Ok(Location::default());
328    }
329
330    match pprof.locations.iter().find(|item| item.id == id) {
331        Some(location) => {
332            anyhow::ensure!(!location.is_folded, "expected Location to not be folded");
333            anyhow::ensure!(
334                location.lines.len() == 1,
335                "expected Location to have exactly 1 Line"
336            );
337            // Safety: guarded by len check above.
338            let line = unsafe { location.lines.get_unchecked(0) };
339            let function = function_fetch(pprof, line.function_id)?;
340
341            Ok(Location {
342                mapping: mapping_fetch(pprof, location.mapping_id)?,
343                function,
344                address: location.address,
345                line: line.line,
346            })
347        }
348        None => anyhow::bail!("Location {id} was not found."),
349    }
350}
351
352fn locations_fetch<'a>(
353    pprof: &'a prost_impls::Profile,
354    ids: &'a [u64],
355) -> anyhow::Result<Vec<Location<'a>>> {
356    let mut locations = Vec::with_capacity(ids.len());
357    for id in ids {
358        let location = location_fetch(pprof, *id)?;
359        locations.push(location);
360    }
361    Ok(locations)
362}
363
364impl<'a> TryFrom<&'a prost_impls::Profile> for Profile<'a> {
365    type Error = anyhow::Error;
366
367    fn try_from(pprof: &'a prost_impls::Profile) -> Result<Self, Self::Error> {
368        assert!(pprof.duration_nanos >= 0);
369        let duration = Duration::from_nanos(pprof.duration_nanos as u64);
370        let start_time = if pprof.time_nanos.is_negative() {
371            UNIX_EPOCH.sub(Duration::from_nanos(pprof.time_nanos.unsigned_abs()))
372        } else {
373            UNIX_EPOCH.add(Duration::from_nanos(pprof.time_nanos as u64))
374        };
375
376        let period = match pprof.period_type {
377            Some(t) => {
378                let r#type = ValueType::new(
379                    string_table_fetch(pprof, t.r#type)?,
380                    string_table_fetch(pprof, t.unit)?,
381                );
382                Some((pprof.period, r#type))
383            }
384            None => None,
385        };
386
387        let mut sample_types = Vec::with_capacity(pprof.samples.len());
388        for t in pprof.sample_types.iter() {
389            sample_types.push(ValueType::new(
390                string_table_fetch(pprof, t.r#type)?,
391                string_table_fetch(pprof, t.unit)?,
392            ));
393        }
394
395        let mut samples = Vec::with_capacity(pprof.samples.len());
396        for sample in pprof.samples.iter() {
397            let locations = locations_fetch(pprof, &sample.location_ids)?;
398
399            let mut labels = Vec::with_capacity(sample.labels.len());
400            for label in sample.labels.iter() {
401                labels.push(Label {
402                    key: string_table_fetch(pprof, label.key)?,
403                    str: string_table_fetch(pprof, label.str)?,
404                    num: label.num,
405                    num_unit: string_table_fetch(pprof, label.num_unit)?,
406                })
407            }
408            let sample = Sample {
409                locations,
410                values: &sample.values,
411                labels,
412            };
413            samples.push(sample);
414        }
415
416        Ok(Profile {
417            duration,
418            period,
419            sample_types,
420            samples,
421            start_time,
422        })
423    }
424}
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429
430    #[test]
431    fn label_uses_at_most_one_of_str_and_num() {
432        let label = Label {
433            key: "name",
434            str: "levi",
435            num: 0,
436            num_unit: "name", // can't use num_unit with str
437        };
438        assert!(!label.uses_at_most_one_of_str_and_num());
439
440        let label = Label {
441            key: "name",
442            str: "levi",
443            num: 10, // can't use num with str
444            num_unit: "",
445        };
446        assert!(!label.uses_at_most_one_of_str_and_num());
447
448        let label = Label {
449            key: "name",
450            str: "levi",
451            num: 0,
452            num_unit: "",
453        };
454        assert!(label.uses_at_most_one_of_str_and_num());
455
456        let label = Label {
457            key: "process_id",
458            str: "",
459            num: 0,
460            num_unit: "",
461        };
462        assert!(label.uses_at_most_one_of_str_and_num());
463
464        let label = Label {
465            key: "local root span id",
466            str: "",
467            num: 10901,
468            num_unit: "",
469        };
470        assert!(label.uses_at_most_one_of_str_and_num());
471
472        let label = Label {
473            key: "duration",
474            str: "",
475            num: 12345,
476            num_unit: "nanoseconds",
477        };
478        assert!(label.uses_at_most_one_of_str_and_num());
479    }
480}