libdd_profiling/internal/profile/
mod.rs

1// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4#[cfg(test)]
5mod fuzz_tests;
6
7pub mod interning_api;
8
9use self::api::UpscalingInfo;
10use super::*;
11use crate::api;
12use crate::api::ManagedStringId;
13use crate::collections::identifiable::*;
14use crate::collections::string_storage::{CachedProfileId, ManagedStringStorage};
15use crate::collections::string_table::{self, StringTable};
16use crate::iter::{IntoLendingIterator, LendingIterator};
17use crate::profiles::{Compressor, DefaultProfileCodec};
18use anyhow::Context;
19use interning_api::Generation;
20use libdd_profiling_protobuf::{self as protobuf, Record, Value, NO_OPT_ZERO, OPT_ZERO};
21use std::borrow::Cow;
22use std::collections::HashMap;
23use std::io;
24use std::sync::atomic::AtomicU64;
25use std::sync::{Arc, Mutex};
26use std::time::{Duration, SystemTime};
27
28pub struct Profile {
29    /// When profiles are reset, the sample-types need to be preserved. This
30    /// maintains them in a way that does not depend on the string table. The
31    /// Option part is this is taken from the old profile and moved to the new
32    /// one.
33    owned_sample_types: Option<Box<[owned_types::ValueType]>>,
34    /// When profiles are reset, the period needs to be preserved. This
35    /// stores it in a way that does not depend on the string table.
36    owned_period: Option<owned_types::Period>,
37    active_samples: AtomicU64,
38    endpoints: Endpoints,
39    functions: FxIndexSet<Function>,
40    generation: interning_api::Generation,
41    labels: FxIndexSet<Label>,
42    label_sets: FxIndexSet<LabelSet>,
43    locations: FxIndexSet<Location>,
44    mappings: FxIndexSet<Mapping>,
45    observations: Observations,
46    period: Option<(i64, ValueType)>,
47    sample_types: Box<[ValueType]>,
48    stack_traces: FxIndexSet<StackTrace>,
49    start_time: SystemTime,
50    strings: StringTable,
51    string_storage: Option<Arc<Mutex<ManagedStringStorage>>>,
52    string_storage_cached_profile_id: Option<CachedProfileId>,
53    timestamp_key: StringId,
54    upscaling_rules: UpscalingRules,
55}
56
57pub struct EncodedProfile {
58    pub start: SystemTime,
59    pub end: SystemTime,
60    pub buffer: Vec<u8>,
61    pub endpoints_stats: ProfiledEndpointsStats,
62}
63
64impl EncodedProfile {
65    pub fn test_instance() -> anyhow::Result<Self> {
66        use std::io::Read;
67
68        fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
69            let mut file = std::fs::File::open(path)?;
70            let metadata = file.metadata()?;
71            let mut buffer = Vec::with_capacity(metadata.len() as usize);
72            file.read_to_end(&mut buffer)?;
73
74            Ok(buffer)
75        }
76
77        let small_pprof_name = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/profile.pprof");
78        let buffer = open(small_pprof_name).map_err(|e| anyhow::anyhow!("{e}"))?;
79        let start = SystemTime::UNIX_EPOCH
80            .checked_add(Duration::from_nanos(12000000034))
81            .context("Translating time failed")?;
82        let end = SystemTime::UNIX_EPOCH
83            .checked_add(Duration::from_nanos(56000000078))
84            .context("Translating time failed")?;
85        let endpoints_stats = ProfiledEndpointsStats::default();
86        Ok(EncodedProfile {
87            start,
88            end,
89            buffer,
90            endpoints_stats,
91        })
92    }
93}
94
95/// Public API
96impl Profile {
97    /// When testing on some profiles that can't be shared publicly,
98    /// level 1 provided better compressed files while taking less or equal
99    /// time compared to lz4.
100    pub const COMPRESSION_LEVEL: i32 = 1;
101
102    /// Add the endpoint data to the endpoint mappings.
103    /// The `endpoint` string will be interned.
104    pub fn add_endpoint(
105        &mut self,
106        local_root_span_id: u64,
107        endpoint: Cow<str>,
108    ) -> anyhow::Result<()> {
109        let interned_endpoint = self.try_intern(endpoint.as_ref())?;
110
111        let mappings = &mut self.endpoints.mappings;
112        mappings.try_reserve(1)?;
113        mappings.insert(local_root_span_id, interned_endpoint);
114        Ok(())
115    }
116
117    pub fn add_endpoint_count(&mut self, endpoint: Cow<str>, value: i64) -> anyhow::Result<()> {
118        self.endpoints
119            .stats
120            .add_endpoint_count(endpoint.into_owned(), value);
121        Ok(())
122    }
123
124    pub fn try_add_sample(
125        &mut self,
126        sample: api::Sample,
127        timestamp: Option<Timestamp>,
128    ) -> anyhow::Result<()> {
129        #[cfg(debug_assertions)]
130        {
131            self.validate_sample_labels(&sample)?;
132        }
133
134        let labels = {
135            let mut lbls = Vec::new();
136            lbls.try_reserve_exact(sample.labels.len())?;
137            for label in &sample.labels {
138                let key = self.try_intern(label.key)?;
139                let internal_label = if !label.str.is_empty() {
140                    let str = self.try_intern(label.str)?;
141                    Label::str(key, str)
142                } else {
143                    let num = label.num;
144                    let num_unit = self.try_intern(label.num_unit)?;
145                    Label::num(key, num, num_unit)
146                };
147
148                let id = self.labels.try_dedup(internal_label)?;
149                lbls.push(id);
150            }
151            lbls.into_boxed_slice()
152        };
153
154        let mut locations = Vec::new();
155        locations
156            .try_reserve_exact(sample.locations.len())
157            .context("failed to reserve memory for sample locations")?;
158        for location in &sample.locations {
159            locations.push(self.try_add_location(location)?);
160        }
161
162        self.try_add_sample_internal(sample.values, labels, locations, timestamp)
163    }
164
165    pub fn add_string_id_sample(
166        &mut self,
167        sample: api::StringIdSample,
168        timestamp: Option<Timestamp>,
169    ) -> anyhow::Result<()> {
170        anyhow::ensure!(
171            self.string_storage.is_some(),
172            "Current sample makes use of ManagedStringIds but profile was not created using a string table"
173        );
174
175        self.validate_string_id_sample_labels(&sample)?;
176
177        let labels = sample
178            .labels
179            .iter()
180            .map(|label| -> anyhow::Result<LabelId> {
181                let key = self.resolve(label.key)?;
182                let internal_label = if label.str != ManagedStringId::empty() {
183                    let str = self.resolve(label.str)?;
184                    Label::str(key, str)
185                } else {
186                    let num = label.num;
187                    let num_unit = self.resolve(label.num_unit)?;
188                    Label::num(key, num, num_unit)
189                };
190
191                self.labels.try_dedup(internal_label)
192            })
193            .collect::<Result<Box<[_]>, _>>()?;
194
195        let mut locations = Vec::new();
196        locations.try_reserve_exact(sample.locations.len())?;
197        for location in &sample.locations {
198            locations.push(self.add_string_id_location(location)?);
199        }
200
201        self.try_add_sample_internal(sample.values, labels, locations, timestamp)
202    }
203
204    fn try_add_sample_internal(
205        &mut self,
206        values: &[i64],
207        labels: Box<[LabelId]>,
208        locations: Vec<LocationId>,
209        timestamp: Option<Timestamp>,
210    ) -> anyhow::Result<()> {
211        anyhow::ensure!(
212            values.len() == self.sample_types.len(),
213            "expected {} sample types, but sample had {} sample types",
214            self.sample_types.len(),
215            values.len(),
216        );
217
218        let labels = self.label_sets.try_dedup(LabelSet::new(labels))?;
219
220        let stacktrace = self.try_add_stacktrace(locations)?;
221        self.observations
222            .add(Sample::new(labels, stacktrace), timestamp, values)?;
223        Ok(())
224    }
225
226    pub fn add_upscaling_rule(
227        &mut self,
228        offset_values: &[usize],
229        label_name: &str,
230        label_value: &str,
231        upscaling_info: UpscalingInfo,
232    ) -> anyhow::Result<()> {
233        let label_name_id = self.try_intern(label_name)?;
234        let label_value_id = self.try_intern(label_value)?;
235        self.upscaling_rules.add(
236            offset_values,
237            (label_name, label_name_id),
238            (label_value, label_value_id),
239            upscaling_info,
240            self.sample_types.len(),
241        )?;
242
243        Ok(())
244    }
245
246    pub fn get_generation(&self) -> anyhow::Result<Generation> {
247        Ok(self.generation)
248    }
249
250    pub fn resolve(&mut self, id: ManagedStringId) -> anyhow::Result<StringId> {
251        let non_empty_string_id = if let Some(valid_id) = NonZeroU32::new(id.value) {
252            valid_id
253        } else {
254            return Ok(StringId::ZERO); // Both string tables use zero for the empty string
255        };
256
257        let string_storage = self.string_storage
258            .as_ref()
259            // Safety: We always get here through a direct or indirect call to add_string_id_sample,
260            // which already ensured that the string storage exists.
261            .ok_or_else(|| anyhow::anyhow!("Current sample makes use of ManagedStringIds but profile was not created using a string table"))?;
262
263        let mut write_locked_storage = string_storage
264            .lock()
265            .map_err(|_| anyhow::anyhow!("string storage lock was poisoned"))?;
266
267        let cached_profile_id = match self.string_storage_cached_profile_id.as_ref() {
268            Some(cached_profile_id) => cached_profile_id,
269            None => {
270                let new_id = write_locked_storage.next_cached_profile_id()?;
271                self.string_storage_cached_profile_id.get_or_insert(new_id)
272            }
273        };
274
275        write_locked_storage.get_seq_num(non_empty_string_id, &mut self.strings, cached_profile_id)
276    }
277
278    /// This is used heavily enough in tests to make a helper.
279    #[cfg(test)]
280    pub fn new(sample_types: &[api::ValueType], period: Option<api::Period>) -> Self {
281        #[allow(clippy::unwrap_used)]
282        Self::try_new(sample_types, period).unwrap()
283    }
284
285    /// Tries to create a profile with the given `period`.
286    /// Initializes the string table to hold:
287    ///  - "" (the empty string)
288    ///  - "local root span id"
289    ///  - "trace endpoint"
290    ///
291    /// All other fields are default.
292    pub fn try_new(
293        sample_types: &[api::ValueType],
294        period: Option<api::Period>,
295    ) -> io::Result<Self> {
296        Self::try_new_internal(
297            Self::backup_period(period),
298            Self::backup_sample_types(sample_types),
299            None,
300        )
301    }
302
303    #[inline]
304    pub fn try_with_string_storage(
305        sample_types: &[api::ValueType],
306        period: Option<api::Period>,
307        string_storage: Arc<Mutex<ManagedStringStorage>>,
308    ) -> io::Result<Self> {
309        Self::try_new_internal(
310            Self::backup_period(period),
311            Self::backup_sample_types(sample_types),
312            Some(string_storage),
313        )
314    }
315
316    /// Resets all data except the sample types and period.
317    /// Returns the previous Profile on success.
318    #[inline]
319    pub fn reset_and_return_previous(&mut self) -> anyhow::Result<Profile> {
320        let current_active_samples = self.sample_block()?;
321        anyhow::ensure!(
322            current_active_samples == 0,
323            "Can't rotate the profile, there are still active samples. Drain them and try again."
324        );
325
326        let mut profile = Profile::try_new_internal(
327            self.owned_period.take(),
328            self.owned_sample_types.take(),
329            self.string_storage.clone(),
330        )
331        .context("failed to initialize new profile")?;
332
333        std::mem::swap(&mut *self, &mut profile);
334        Ok(profile)
335    }
336
337    /// Serialize the aggregated profile, adding the end time and duration.
338    /// # Arguments
339    /// * `end_time` - Optional end time of the profile. Passing None will use the current time.
340    /// * `duration` - Optional duration of the profile. Passing None will try to calculate the
341    ///   duration based on the end time minus the start time, but under anomalous conditions this
342    ///   may fail as system clocks can be adjusted. The programmer may also accidentally pass an
343    ///   earlier time. The duration will be set to zero these cases.
344    pub fn serialize_into_compressed_pprof(
345        self,
346        end_time: Option<SystemTime>,
347        duration: Option<Duration>,
348    ) -> anyhow::Result<EncodedProfile> {
349        // On 2023-08-23, we analyzed the uploaded tarball size per language.
350        // These tarballs include 1 or more profiles, but for most languages
351        // using libdatadog (all?) there is only 1 profile, so this is a good
352        // proxy for the compressed, final size of the profiles.
353        // We found that for all languages using libdatadog, the average
354        // tarball was at least 18 KiB. This initial size of 32KiB should
355        // definitely outperform starting at zero for time consumed, allocator
356        // pressure, and allocator fragmentation.
357        const INITIAL_PPROF_BUFFER_SIZE: usize = 32 * 1024;
358
359        // 2025-10-16: a profile larger than 10 MiB will be skipped, but a
360        // higher limit is accepted for upload. A limit of 16 MiB allows us to
361        // be a little bit decoupled from the exact limit so that if the
362        // backend decides to accept larger pprofs, clients don't have to be
363        // recompiled. But setting a much higher limit would be wasteful.
364        const MAX_PROFILE_SIZE: usize = 16 * 1024 * 1024;
365
366        let mut compressor = Compressor::<DefaultProfileCodec>::try_new(
367            INITIAL_PPROF_BUFFER_SIZE,
368            MAX_PROFILE_SIZE,
369            Self::COMPRESSION_LEVEL,
370        )
371        .context("failed to create compressor")?;
372
373        let mut encoded_profile = self.encode(&mut compressor, end_time, duration)?;
374        encoded_profile.buffer = compressor.finish()?;
375        Ok(encoded_profile)
376    }
377
378    /// Encodes the profile. Note that the buffer will be empty. The caller
379    /// needs to flush/finish the writer, then fill/replace the buffer.
380    fn encode<W: io::Write>(
381        mut self,
382        writer: &mut W,
383        end_time: Option<SystemTime>,
384        duration: Option<Duration>,
385    ) -> anyhow::Result<EncodedProfile> {
386        let end = end_time.unwrap_or_else(SystemTime::now);
387        let start = self.start_time;
388        let endpoints_stats = std::mem::take(&mut self.endpoints.stats);
389        let duration_nanos = duration
390            .unwrap_or_else(|| {
391                end.duration_since(start).unwrap_or({
392                    // Let's not throw away the whole profile just because the clocks were wrong.
393                    // todo: log that the clock went backward (or programmer mistake).
394                    Duration::ZERO
395                })
396            })
397            .as_nanos()
398            .min(i64::MAX as u128) as i64;
399
400        let mut extended_label_sets: Vec<Vec<Label>> = Vec::with_capacity(self.label_sets.len());
401
402        for label_set in std::mem::take(&mut self.label_sets) {
403            let endpoint_label = self.get_endpoint_for_label_set(&label_set)?;
404            // Leave one space for the timestamp if needed
405            let mut labels = Vec::with_capacity(
406                label_set.len() + 1 + if endpoint_label.is_some() { 1 } else { 0 },
407            );
408            for l in label_set.iter() {
409                labels.push(*self.get_label(*l)?);
410            }
411            if let Some(endpoint_label) = endpoint_label {
412                labels.push(endpoint_label);
413            }
414            extended_label_sets.push(labels);
415        }
416
417        let iter = std::mem::take(&mut self.observations).try_into_iter()?;
418        for (sample, timestamp, mut values) in iter {
419            let labels = &mut extended_label_sets[sample.labels.to_raw_id()];
420            let location_ids: Vec<_> = self
421                .get_stacktrace(sample.stacktrace)?
422                .locations
423                .iter()
424                .map(Id::to_raw_id)
425                .collect();
426            self.check_location_ids_are_valid(&location_ids, self.locations.len())?;
427            self.upscaling_rules.upscale_values(&mut values, labels)?;
428
429            // Use the extra slot in the labels vector to store the timestamp without any reallocs.
430            if let Some(ts) = timestamp {
431                labels.push(Label::num(self.timestamp_key, ts.get(), StringId::ZERO))
432            }
433            let pprof_labels: Vec<_> = labels.iter().map(protobuf::Label::from).collect();
434            if timestamp.is_some() {
435                labels.pop();
436            }
437
438            let item = protobuf::Sample {
439                location_ids: Record::from(location_ids.as_slice()),
440                values: Record::from(values.as_slice()),
441                // SAFETY: converting &[Label] to &[Field<Label,..>] which is
442                // safe, because Field is repr(transparent).
443                labels: unsafe {
444                    &*(pprof_labels.as_slice() as *const [protobuf::Label]
445                        as *const [Record<protobuf::Label, 3, NO_OPT_ZERO>])
446                },
447            };
448
449            Record::<_, 2, NO_OPT_ZERO>::from(item).encode(writer)?;
450        }
451
452        // `Sample`s must be emitted before `SampleTypes` since we consume
453        // fields as we convert (using `into_iter`).  This allows Rust to
454        // release memory faster, reducing our peak RSS, but means that we
455        // must process fields in dependency order, regardless of the numeric
456        // field index in the `pprof` protobuf.
457        // It is valid to emit protobuf fields out of order. See example in:
458        // https://protobuf.dev/programming-guides/encoding/#optional
459        //
460        // In this case, we use `sample_types` during upscaling of `samples`,
461        // so we must serialize `Sample` before `SampleType`.
462        for sample_type in self.sample_types.iter() {
463            Record::<_, 1, NO_OPT_ZERO>::from(*sample_type).encode(writer)?;
464        }
465
466        for (offset, item) in self.mappings.into_iter().enumerate() {
467            let mapping = protobuf::Mapping {
468                id: Record::from((offset + 1) as u64),
469                memory_start: Record::from(item.memory_start),
470                memory_limit: Record::from(item.memory_limit),
471                file_offset: Record::from(item.file_offset),
472                filename: Record::from(item.filename),
473                build_id: Record::from(item.build_id),
474            };
475            Record::<_, 3, NO_OPT_ZERO>::from(mapping).encode(writer)?;
476        }
477
478        for (offset, item) in self.locations.into_iter().enumerate() {
479            let location = protobuf::Location {
480                id: Record::from((offset + 1) as u64),
481                mapping_id: Record::from(item.mapping_id.map(MappingId::into_raw_id).unwrap_or(0)),
482                address: Record::from(item.address),
483                line: Record::from(protobuf::Line {
484                    function_id: Record::from(item.function_id.into_raw_id()),
485                    lineno: Record::from(item.line),
486                }),
487            };
488            Record::<_, 4, NO_OPT_ZERO>::from(location).encode(writer)?;
489        }
490
491        for (offset, item) in self.functions.into_iter().enumerate() {
492            let function = protobuf::Function {
493                id: Record::from((offset + 1) as u64),
494                name: Record::from(item.name),
495                system_name: Record::from(item.system_name),
496                filename: Record::from(item.filename),
497            };
498            Record::<_, 5, NO_OPT_ZERO>::from(function).encode(writer)?;
499        }
500
501        let mut lender = self.strings.into_lending_iter();
502        while let Some(item) = lender.next() {
503            Record::<_, 6, NO_OPT_ZERO>::from(item).encode(writer)?;
504        }
505
506        let time_nanos = self
507            .start_time
508            .duration_since(SystemTime::UNIX_EPOCH)
509            .map_or(0, |duration| {
510                duration.as_nanos().min(i64::MAX as u128) as i64
511            });
512
513        Record::<_, 9, OPT_ZERO>::from(time_nanos).encode(writer)?;
514        Record::<_, 10, OPT_ZERO>::from(duration_nanos).encode(writer)?;
515
516        if let Some((period, period_type)) = self.period {
517            Record::<_, 11, OPT_ZERO>::from(period_type).encode(writer)?;
518            Record::<_, 12, OPT_ZERO>::from(period).encode(writer)?;
519        };
520
521        Ok(EncodedProfile {
522            start,
523            end,
524            buffer: Vec::new(),
525            endpoints_stats,
526        })
527    }
528
529    pub fn set_start_time(&mut self, start_time: SystemTime) -> anyhow::Result<()> {
530        self.start_time = start_time;
531        Ok(())
532    }
533
534    pub fn with_start_time(mut self, start_time: SystemTime) -> anyhow::Result<Self> {
535        self.set_start_time(start_time)?;
536        Ok(self)
537    }
538
539    /// In incident 35390 (JIRA PROF-11456) we observed invalid location_ids being present in
540    /// emitted profiles. We're doing extra checks here so that if we see incorrect ids again,
541    /// we are 100% sure they were not introduced prior to this stage.
542    fn check_location_ids_are_valid(&self, location_ids: &[u64], len: usize) -> anyhow::Result<()> {
543        let len: u64 = u64::try_from(len)?;
544        for id in location_ids.iter() {
545            let id = *id;
546            // Location ids start from 1, that's why they're <= len instead of < len
547            anyhow::ensure!(
548                id > 0 && id <= len,
549                "invalid location id found during serialization {:?}, len was {:?}",
550                id,
551                len
552            )
553        }
554        Ok(())
555    }
556}
557
558/// Private helper functions
559impl Profile {
560    fn try_add_function(&mut self, function: &api::Function) -> anyhow::Result<FunctionId> {
561        let name = self.try_intern(function.name)?;
562        let system_name = self.try_intern(function.system_name)?;
563        let filename = self.try_intern(function.filename)?;
564
565        self.functions.try_dedup(Function {
566            name,
567            system_name,
568            filename,
569        })
570    }
571
572    fn add_string_id_function(
573        &mut self,
574        function: &api::StringIdFunction,
575    ) -> anyhow::Result<FunctionId> {
576        let name = self.resolve(function.name)?;
577        let system_name = self.resolve(function.system_name)?;
578        let filename = self.resolve(function.filename)?;
579
580        self.functions.try_dedup(Function {
581            name,
582            system_name,
583            filename,
584        })
585    }
586
587    fn try_add_location(&mut self, location: &api::Location) -> anyhow::Result<LocationId> {
588        let mapping_id = self.try_add_mapping(&location.mapping)?;
589        let function_id = self.try_add_function(&location.function)?;
590        self.locations.checked_dedup(Location {
591            mapping_id,
592            function_id,
593            address: location.address,
594            line: location.line,
595        })
596    }
597
598    fn add_string_id_location(
599        &mut self,
600        location: &api::StringIdLocation,
601    ) -> anyhow::Result<LocationId> {
602        let mapping_id = self.add_string_id_mapping(&location.mapping)?;
603        let function_id = self.add_string_id_function(&location.function)?;
604        self.locations.checked_dedup(Location {
605            mapping_id,
606            function_id,
607            address: location.address,
608            line: location.line,
609        })
610    }
611
612    fn try_add_mapping(&mut self, mapping: &api::Mapping) -> anyhow::Result<Option<MappingId>> {
613        #[inline]
614        fn is_zero_mapping(mapping: &api::Mapping) -> bool {
615            // - PHP, Python, and Ruby use a mapping only as required.
616            // - .NET uses only the filename.
617            // - The native profiler uses all fields.
618            // We strike a balance for optimizing for the dynamic languages
619            // and the others by mixing branches and branchless programming.
620            let filename = mapping.filename.len();
621            let build_id = mapping.build_id.len();
622            if 0 != (filename | build_id) {
623                return false;
624            }
625
626            let memory_start = mapping.memory_start;
627            let memory_limit = mapping.memory_limit;
628            let file_offset = mapping.file_offset;
629            0 == (memory_start | memory_limit | file_offset)
630        }
631
632        if is_zero_mapping(mapping) {
633            return Ok(None);
634        }
635
636        let filename = self.try_intern(mapping.filename)?;
637        let build_id = self.try_intern(mapping.build_id)?;
638
639        let id = self.mappings.try_dedup(Mapping {
640            memory_start: mapping.memory_start,
641            memory_limit: mapping.memory_limit,
642            file_offset: mapping.file_offset,
643            filename,
644            build_id,
645        })?;
646        Ok(Some(id))
647    }
648
649    fn add_string_id_mapping(
650        &mut self,
651        mapping: &api::StringIdMapping,
652    ) -> anyhow::Result<Option<MappingId>> {
653        #[inline]
654        fn is_zero_mapping(mapping: &api::StringIdMapping) -> bool {
655            // See the other is_zero_mapping for more info, but only Ruby is
656            // using this API at the moment, so we optimize for the whole
657            // thing being a zero representation.
658            let memory_start = mapping.memory_start;
659            let memory_limit = mapping.memory_limit;
660            let file_offset = mapping.file_offset;
661            let strings = (mapping.filename.value | mapping.build_id.value) as u64;
662            0 == (memory_start | memory_limit | file_offset | strings)
663        }
664
665        if is_zero_mapping(mapping) {
666            return Ok(None);
667        }
668
669        let filename = self.resolve(mapping.filename)?;
670        let build_id = self.resolve(mapping.build_id)?;
671
672        let id = self.mappings.try_dedup(Mapping {
673            memory_start: mapping.memory_start,
674            memory_limit: mapping.memory_limit,
675            file_offset: mapping.file_offset,
676            filename,
677            build_id,
678        })?;
679        Ok(Some(id))
680    }
681
682    fn try_add_stacktrace(&mut self, locations: Vec<LocationId>) -> anyhow::Result<StackTraceId> {
683        self.stack_traces.try_dedup(StackTrace { locations })
684    }
685
686    #[inline]
687    fn backup_period(src: Option<api::Period>) -> Option<owned_types::Period> {
688        src.as_ref().map(owned_types::Period::from)
689    }
690
691    #[inline]
692    fn backup_sample_types(src: &[api::ValueType]) -> Option<Box<[owned_types::ValueType]>> {
693        Some(src.iter().map(owned_types::ValueType::from).collect())
694    }
695
696    /// Fetches the endpoint information for the label. There may be errors,
697    /// but there may also be no endpoint information for a given endpoint.
698    /// Hence, the return type of Result<Option<_>, _>.
699    fn get_endpoint_for_label(&self, label: &Label) -> anyhow::Result<Option<Label>> {
700        anyhow::ensure!(
701            label.get_key() == self.endpoints.local_root_span_id_label,
702            "bug: get_endpoint_for_label should only be called on labels with the key \"local root span id\""
703        );
704
705        anyhow::ensure!(
706            label.has_num_value(),
707            "the local root span id label value must be sent as a number, not a string, given {:?}",
708            label
709        );
710
711        let local_root_span_id = if let LabelValue::Num { num, .. } = label.get_value() {
712            // Safety: the value is an u64, but pprof only has signed values, so we
713            // transmute it; the backend does the same.
714            #[allow(
715                unknown_lints,
716                unnecessary_transmutes,
717                reason = "i64::cast_unsigned requires MSRV 1.87.0"
718            )]
719            unsafe {
720                std::mem::transmute::<i64, u64>(*num)
721            }
722        } else {
723            return Err(anyhow::format_err!("the local root span id label value must be sent as a number, not a string, given {:?}",
724            label));
725        };
726
727        Ok(self
728            .endpoints
729            .mappings
730            .get(&local_root_span_id)
731            .map(|v| Label::str(self.endpoints.endpoint_label, *v)))
732    }
733
734    fn get_endpoint_for_label_set(&self, label_set: &LabelSet) -> anyhow::Result<Option<Label>> {
735        if let Some(label) = label_set.iter().find_map(|id| {
736            if let Ok(label) = self.get_label(*id) {
737                if label.get_key() == self.endpoints.local_root_span_id_label {
738                    return Some(label);
739                }
740            }
741            None
742        }) {
743            self.get_endpoint_for_label(label)
744        } else {
745            Ok(None)
746        }
747    }
748
749    fn get_label(&self, id: LabelId) -> anyhow::Result<&Label> {
750        self.labels
751            .get_index(id.to_offset())
752            .context("LabelId to have a valid interned index")
753    }
754
755    #[allow(dead_code)]
756    fn get_label_set(&self, id: LabelSetId) -> anyhow::Result<&LabelSet> {
757        self.label_sets
758            .get_index(id.to_offset())
759            .context("LabelSetId to have a valid interned index")
760    }
761
762    fn get_stacktrace(&self, st: StackTraceId) -> anyhow::Result<&StackTrace> {
763        self.stack_traces
764            .get_index(st.to_raw_id())
765            .with_context(|| format!("StackTraceId {st:?} to exist in profile"))
766    }
767
768    /// Interns the `str` as a string, returning the id in the string table.
769    /// The empty string is guaranteed to have an id of [StringId::ZERO].
770    #[inline]
771    fn intern(&mut self, item: &str) -> StringId {
772        self.strings.intern(item)
773    }
774
775    /// Interns the `str` as a string, returning the id in the string table.
776    /// The empty string is guaranteed to have an id of [StringId::ZERO].
777    #[inline]
778    fn try_intern(&mut self, item: &str) -> Result<StringId, string_table::Error> {
779        self.strings.try_intern(item)
780    }
781
782    /// Creates a profile from the period, sample types, and start time using
783    /// the owned values.
784    fn try_new_internal(
785        owned_period: Option<owned_types::Period>,
786        owned_sample_types: Option<Box<[owned_types::ValueType]>>,
787        string_storage: Option<Arc<Mutex<ManagedStringStorage>>>,
788    ) -> io::Result<Self> {
789        let start_time = SystemTime::now();
790        let mut profile = Self {
791            owned_period,
792            owned_sample_types,
793            active_samples: Default::default(),
794            endpoints: Default::default(),
795            functions: Default::default(),
796            generation: Generation::new(),
797
798            labels: Default::default(),
799            label_sets: Default::default(),
800            locations: Default::default(),
801            mappings: Default::default(),
802            observations: Default::default(),
803            period: None,
804            sample_types: Box::new([]),
805            stack_traces: Default::default(),
806            start_time,
807            strings: Default::default(),
808            string_storage,
809            string_storage_cached_profile_id: None, /* Never reuse an id! See comments on
810                                                     * CachedProfileId for why. */
811            timestamp_key: Default::default(),
812            upscaling_rules: Default::default(),
813        };
814
815        let _id = profile.intern("");
816        debug_assert!(_id == StringId::ZERO);
817
818        profile.endpoints.local_root_span_id_label = profile.intern("local root span id");
819        profile.endpoints.endpoint_label = profile.intern("trace endpoint");
820        profile.timestamp_key = profile.intern("end_timestamp_ns");
821
822        // Break "cannot borrow `*self` as mutable because it is also borrowed
823        // as immutable" by moving it out, borrowing it, and putting it back.
824        let owned_sample_types = profile.owned_sample_types.take();
825        profile.sample_types = match &owned_sample_types {
826            None => Box::new([]),
827            Some(sample_types) => sample_types
828                .iter()
829                .map(|sample_type| ValueType {
830                    r#type: Record::from(profile.intern(&sample_type.typ)),
831                    unit: Record::from(profile.intern(&sample_type.unit)),
832                })
833                .collect(),
834        };
835        profile.owned_sample_types = owned_sample_types;
836
837        // Break "cannot borrow `*self` as mutable because it is also borrowed
838        // as immutable" by moving it out, borrowing it, and putting it back.
839        let owned_period = profile.owned_period.take();
840        if let Some(owned_types::Period { value, typ }) = &owned_period {
841            profile.period = Some((
842                *value,
843                ValueType {
844                    r#type: Record::from(profile.intern(&typ.typ)),
845                    unit: Record::from(profile.intern(&typ.unit)),
846                },
847            ));
848        };
849        profile.owned_period = owned_period;
850
851        profile.observations = Observations::try_new(profile.sample_types.len())?;
852        Ok(profile)
853    }
854
855    #[cfg(debug_assertions)]
856    fn validate_sample_labels(&mut self, sample: &api::Sample) -> anyhow::Result<()> {
857        let mut seen: HashMap<&str, &api::Label> = HashMap::new();
858
859        for label in sample.labels.iter() {
860            if let Some(duplicate) = seen.insert(label.key, label) {
861                anyhow::bail!("Duplicate label on sample: {duplicate:?} {label:?}");
862            }
863
864            if label.key == "local root span id" {
865                anyhow::ensure!(
866                    label.str.is_empty() && label.num != 0,
867                    "Invalid \"local root span id\" label: {label:?}"
868                );
869            }
870
871            anyhow::ensure!(
872                label.key != "end_timestamp_ns",
873                "Timestamp should not be passed as a label {label:?}"
874            );
875        }
876        Ok(())
877    }
878
879    fn validate_string_id_sample_labels(
880        &mut self,
881        sample: &api::StringIdSample,
882    ) -> anyhow::Result<()> {
883        let mut seen: HashMap<ManagedStringId, &api::StringIdLabel> = HashMap::new();
884
885        for label in sample.labels.iter() {
886            if let Some(duplicate) = seen.insert(label.key, label) {
887                anyhow::bail!("Duplicate label on sample: {:?} {:?}", duplicate, label);
888            }
889
890            let key_id: StringId = self.resolve(label.key)?;
891
892            if key_id == self.endpoints.local_root_span_id_label {
893                anyhow::ensure!(
894                    label.str != ManagedStringId::empty() && label.num != 0,
895                    "Invalid \"local root span id\" label: {:?}",
896                    label
897                );
898            }
899
900            anyhow::ensure!(
901                key_id != self.timestamp_key,
902                "Timestamp should not be passed as a label {:?}",
903                label
904            );
905        }
906        Ok(())
907    }
908}
909
910/// For testing and debugging purposes
911impl Profile {
912    #[cfg(test)]
913    fn interned_strings_count(&self) -> usize {
914        self.strings.len()
915    }
916
917    // Ideally, these would be [cgf(test)]. But its used in other module's test
918    // code, which would break if we did so. We could try to do something with
919    // a test "feature", but this naming scheme is sufficient for now.
920    pub fn only_for_testing_num_aggregated_samples(&self) -> usize {
921        self.observations.aggregated_samples_count()
922    }
923
924    pub fn only_for_testing_num_timestamped_samples(&self) -> usize {
925        self.observations.timestamped_samples_count()
926    }
927}
928
929#[cfg(test)]
930mod api_tests {
931    use super::*;
932    use crate::pprof::test_utils::{roundtrip_to_pprof, sorted_samples, string_table_fetch};
933    use libdd_profiling_protobuf::prost_impls;
934
935    #[test]
936    fn interning() {
937        let sample_types = [api::ValueType::new("samples", "count")];
938        let mut profiles = Profile::new(&sample_types, None);
939
940        let expected_id = StringId::from_offset(profiles.interned_strings_count());
941
942        let string = "a";
943        let id1 = profiles.intern(string);
944        let id2 = profiles.intern(string);
945
946        assert_eq!(id1, id2);
947        assert_eq!(id1, expected_id);
948    }
949
950    #[test]
951    fn api() {
952        let sample_types = [
953            api::ValueType::new("samples", "count"),
954            api::ValueType::new("wall-time", "nanoseconds"),
955        ];
956
957        let mapping = api::Mapping {
958            filename: "php",
959            ..Default::default()
960        };
961
962        let index = api::Function {
963            filename: "index.php",
964            ..Default::default()
965        };
966
967        let locations = vec![
968            api::Location {
969                mapping,
970                function: api::Function {
971                    name: "phpinfo",
972                    system_name: "phpinfo",
973                    filename: "index.php",
974                },
975                ..Default::default()
976            },
977            api::Location {
978                mapping,
979                function: index,
980                line: 3,
981                ..Default::default()
982            },
983        ];
984
985        let mut profile = Profile::new(&sample_types, None);
986        assert_eq!(profile.only_for_testing_num_aggregated_samples(), 0);
987
988        profile
989            .try_add_sample(
990                api::Sample {
991                    locations,
992                    values: &[1, 10000],
993                    labels: vec![],
994                },
995                None,
996            )
997            .expect("add to succeed");
998
999        assert_eq!(profile.only_for_testing_num_aggregated_samples(), 1);
1000    }
1001
1002    fn provide_distinct_locations() -> Profile {
1003        let sample_types = [api::ValueType::new("samples", "count")];
1004
1005        let mapping = api::Mapping {
1006            filename: "php",
1007            ..Default::default()
1008        };
1009
1010        let main_locations = vec![api::Location {
1011            mapping,
1012            function: api::Function {
1013                name: "{main}",
1014                system_name: "{main}",
1015                filename: "index.php",
1016            },
1017            ..Default::default()
1018        }];
1019        let test_locations = vec![api::Location {
1020            mapping,
1021            function: api::Function {
1022                name: "test",
1023                system_name: "test",
1024                filename: "index.php",
1025            },
1026            ..Default::default()
1027        }];
1028        let timestamp_locations = vec![api::Location {
1029            mapping,
1030            function: api::Function {
1031                name: "test",
1032                system_name: "test",
1033                filename: "index.php",
1034            },
1035            ..Default::default()
1036        }];
1037
1038        let values = &[1];
1039        let labels = vec![api::Label {
1040            key: "pid",
1041            num: 101,
1042            ..Default::default()
1043        }];
1044
1045        let main_sample = api::Sample {
1046            locations: main_locations,
1047            values,
1048            labels: labels.clone(),
1049        };
1050
1051        let test_sample = api::Sample {
1052            locations: test_locations,
1053            values,
1054            labels: labels.clone(),
1055        };
1056
1057        let timestamp_sample = api::Sample {
1058            locations: timestamp_locations,
1059            values,
1060            labels,
1061        };
1062
1063        let mut profile = Profile::new(&sample_types, None);
1064        assert_eq!(profile.only_for_testing_num_aggregated_samples(), 0);
1065
1066        profile
1067            .try_add_sample(main_sample, None)
1068            .expect("profile to not be full");
1069        assert_eq!(profile.only_for_testing_num_aggregated_samples(), 1);
1070
1071        profile
1072            .try_add_sample(test_sample, None)
1073            .expect("profile to not be full");
1074        assert_eq!(profile.only_for_testing_num_aggregated_samples(), 2);
1075
1076        assert_eq!(profile.only_for_testing_num_timestamped_samples(), 0);
1077        profile
1078            .try_add_sample(timestamp_sample, Timestamp::new(42))
1079            .expect("profile to not be full");
1080        assert_eq!(profile.only_for_testing_num_timestamped_samples(), 1);
1081        profile
1082    }
1083
1084    #[test]
1085    fn impl_from_profile_for_pprof_profile() {
1086        let locations = provide_distinct_locations();
1087        let profile = roundtrip_to_pprof(locations).unwrap();
1088
1089        assert_eq!(profile.samples.len(), 3);
1090        assert_eq!(profile.mappings.len(), 1);
1091        assert_eq!(profile.locations.len(), 2); // one of them dedups
1092        assert_eq!(profile.functions.len(), 2);
1093
1094        for (index, mapping) in profile.mappings.iter().enumerate() {
1095            assert_eq!(
1096                (index + 1) as u64,
1097                mapping.id,
1098                "id {id} didn't match offset {offset} for {mapping:#?}",
1099                id = mapping.id,
1100                offset = index + 1
1101            );
1102        }
1103
1104        for (index, location) in profile.locations.iter().enumerate() {
1105            assert_eq!((index + 1) as u64, location.id);
1106        }
1107
1108        for (index, function) in profile.functions.iter().enumerate() {
1109            assert_eq!((index + 1) as u64, function.id);
1110        }
1111        let samples = sorted_samples(&profile);
1112
1113        let sample = samples.first().expect("index 0 to exist");
1114        assert_eq!(sample.labels.len(), 1);
1115        let label = sample.labels.first().expect("index 0 to exist");
1116        let actual = api::Label {
1117            key: string_table_fetch(&profile, label.key),
1118            str: string_table_fetch(&profile, label.str),
1119            num: label.num,
1120            num_unit: string_table_fetch(&profile, label.num_unit),
1121        };
1122        let expected = api::Label {
1123            key: "pid",
1124            str: "",
1125            num: 101,
1126            num_unit: "",
1127        };
1128        assert_eq!(expected, actual);
1129
1130        let sample = samples.get(2).expect("index 2 to exist");
1131        assert_eq!(sample.labels.len(), 2);
1132        let label = sample.labels.first().expect("index 0 to exist");
1133        let actual = api::Label {
1134            key: string_table_fetch(&profile, label.key),
1135            str: string_table_fetch(&profile, label.str),
1136            num: label.num,
1137            num_unit: string_table_fetch(&profile, label.num_unit),
1138        };
1139        let expected = api::Label {
1140            key: "pid",
1141            str: "",
1142            num: 101,
1143            num_unit: "",
1144        };
1145        assert_eq!(expected, actual);
1146
1147        let label = sample.labels.get(1).expect("index 1 to exist");
1148        let actual = api::Label {
1149            key: string_table_fetch(&profile, label.key),
1150            str: string_table_fetch(&profile, label.str),
1151            num: label.num,
1152            num_unit: string_table_fetch(&profile, label.num_unit),
1153        };
1154        let expected = api::Label {
1155            key: "end_timestamp_ns",
1156            str: "",
1157            num: 42,
1158            num_unit: "",
1159        };
1160        assert_eq!(expected, actual);
1161        let key = string_table_fetch(&profile, label.key);
1162        let str = string_table_fetch(&profile, label.str);
1163        let num_unit = string_table_fetch(&profile, label.num_unit);
1164        assert_eq!(key, "end_timestamp_ns");
1165        assert_eq!(label.num, 42);
1166        assert_eq!(str, "");
1167        assert_eq!(num_unit, "");
1168    }
1169
1170    #[test]
1171    fn reset() {
1172        let mut profile = provide_distinct_locations();
1173        /* This set of asserts is to make sure it's a non-empty profile that we
1174         * are working with so that we can test that reset works.
1175         */
1176        assert!(!profile.functions.is_empty());
1177        assert!(!profile.labels.is_empty());
1178        assert!(!profile.label_sets.is_empty());
1179        assert!(!profile.locations.is_empty());
1180        assert!(!profile.mappings.is_empty());
1181        assert!(!profile.observations.is_empty());
1182        assert!(!profile.sample_types.as_ref().is_empty());
1183        assert!(profile.period.is_none());
1184        assert!(profile.endpoints.mappings.is_empty());
1185        assert!(profile.endpoints.stats.is_empty());
1186
1187        let prev = profile
1188            .reset_and_return_previous()
1189            .expect("reset to succeed");
1190
1191        // These should all be empty now
1192        assert!(profile.functions.is_empty());
1193        assert!(profile.labels.is_empty());
1194        assert!(profile.label_sets.is_empty());
1195        assert!(profile.locations.is_empty());
1196        assert!(profile.mappings.is_empty());
1197        assert!(profile.observations.is_empty());
1198        assert!(profile.endpoints.mappings.is_empty());
1199        assert!(profile.endpoints.stats.is_empty());
1200        assert!(profile.upscaling_rules.is_empty());
1201
1202        assert_eq!(profile.period, prev.period);
1203        assert_eq!(profile.sample_types, prev.sample_types);
1204
1205        // The string table should have at least the empty string.
1206        assert!(profile.strings.len() > 0);
1207    }
1208
1209    #[test]
1210    fn reset_period() {
1211        /* The previous test (reset) checked quite a few properties already, so
1212         * this one will focus only on the period.
1213         */
1214        let sample_types = [api::ValueType::new("wall-time", "nanoseconds")];
1215        let period = api::Period {
1216            r#type: sample_types[0],
1217            value: 10_000_000,
1218        };
1219        let mut profile = Profile::new(&sample_types, Some(period));
1220
1221        let prev = profile
1222            .reset_and_return_previous()
1223            .expect("reset to succeed");
1224
1225        // Resolve the string values to check that they match (their string
1226        // table offsets may not match).
1227        let mut strings: Vec<Box<str>> = Vec::with_capacity(profile.strings.len());
1228        let mut strings_iter = profile.strings.into_lending_iter();
1229        while let Some(item) = strings_iter.next() {
1230            strings.push(Box::from(item));
1231        }
1232
1233        for (value, period_type) in [profile.period.unwrap(), prev.period.unwrap()] {
1234            assert_eq!(value, period.value);
1235            let r#type: &str = &strings[usize::from(period_type.r#type.value)];
1236            let unit: &str = &strings[usize::from(period_type.unit.value)];
1237            assert_eq!(r#type, period.r#type.r#type);
1238            assert_eq!(unit, period.r#type.unit);
1239        }
1240    }
1241
1242    #[test]
1243    fn adding_local_root_span_id_with_string_value_fails() {
1244        let sample_types = [api::ValueType::new("wall-time", "nanoseconds")];
1245
1246        let mut profile: Profile = Profile::new(&sample_types, None);
1247
1248        let id_label = api::Label {
1249            key: "local root span id",
1250            str: "10", // bad value, should use .num instead for local root span id
1251            num: 0,
1252            num_unit: "",
1253        };
1254
1255        let sample = api::Sample {
1256            locations: vec![],
1257            values: &[1, 10000],
1258            labels: vec![id_label],
1259        };
1260
1261        assert!(profile.try_add_sample(sample, None).is_err());
1262    }
1263
1264    #[test]
1265    fn lazy_endpoints() -> anyhow::Result<()> {
1266        let sample_types = [
1267            api::ValueType::new("samples", "count"),
1268            api::ValueType::new("wall-time", "nanoseconds"),
1269        ];
1270
1271        let mut profile: Profile = Profile::new(&sample_types, None);
1272
1273        let id_label = api::Label {
1274            key: "local root span id",
1275            str: "",
1276            num: 10,
1277            num_unit: "",
1278        };
1279
1280        let id2_label = api::Label {
1281            key: "local root span id",
1282            str: "",
1283            num: 11,
1284            num_unit: "",
1285        };
1286
1287        let other_label = api::Label {
1288            key: "other",
1289            str: "test",
1290            num: 0,
1291            num_unit: "",
1292        };
1293
1294        let sample1 = api::Sample {
1295            locations: vec![],
1296            values: &[1, 10000],
1297            labels: vec![id_label, other_label],
1298        };
1299
1300        let sample2 = api::Sample {
1301            locations: vec![],
1302            values: &[1, 10000],
1303            labels: vec![id2_label, other_label],
1304        };
1305
1306        profile
1307            .try_add_sample(sample1, None)
1308            .expect("add to success");
1309
1310        profile
1311            .try_add_sample(sample2, None)
1312            .expect("add to success");
1313
1314        profile.add_endpoint(10, Cow::from("my endpoint"))?;
1315
1316        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1317        assert_eq!(serialized_profile.samples.len(), 2);
1318        let samples = sorted_samples(&serialized_profile);
1319
1320        let s1 = samples.first().expect("sample");
1321
1322        // The trace endpoint label should be added to the first sample
1323        assert_eq!(s1.labels.len(), 3);
1324
1325        let l1 = s1.labels.first().expect("label");
1326
1327        assert_eq!(
1328            string_table_fetch(&serialized_profile, l1.key),
1329            "local root span id"
1330        );
1331        assert_eq!(l1.num, 10);
1332
1333        let l2 = s1.labels.get(1).expect("label");
1334
1335        assert_eq!(string_table_fetch(&serialized_profile, l2.key), "other");
1336        assert_eq!(string_table_fetch(&serialized_profile, l2.str), "test");
1337
1338        let l3 = s1.labels.get(2).expect("label");
1339
1340        assert_eq!(
1341            string_table_fetch(&serialized_profile, l3.key),
1342            "trace endpoint"
1343        );
1344        assert_eq!(
1345            string_table_fetch(&serialized_profile, l3.str),
1346            "my endpoint"
1347        );
1348
1349        let s2 = samples.get(1).expect("sample");
1350
1351        // The trace endpoint label shouldn't be added to second sample because the span id doesn't
1352        // match
1353        assert_eq!(s2.labels.len(), 2);
1354        Ok(())
1355    }
1356
1357    #[test]
1358    fn endpoint_counts_empty_test() {
1359        let sample_types = [
1360            api::ValueType::new("samples", "count"),
1361            api::ValueType::new("wall-time", "nanoseconds"),
1362        ];
1363
1364        let profile: Profile = Profile::new(&sample_types, None);
1365
1366        let encoded_profile = profile
1367            .serialize_into_compressed_pprof(None, None)
1368            .expect("Unable to encode/serialize the profile");
1369
1370        let endpoints_stats = encoded_profile.endpoints_stats;
1371        assert!(endpoints_stats.is_empty());
1372    }
1373
1374    #[test]
1375    fn endpoint_counts_test() -> anyhow::Result<()> {
1376        let sample_types = [
1377            api::ValueType::new("samples", "count"),
1378            api::ValueType::new("wall-time", "nanoseconds"),
1379        ];
1380
1381        let mut profile: Profile = Profile::new(&sample_types, None);
1382
1383        let one_endpoint = "my endpoint";
1384        profile.add_endpoint_count(Cow::from(one_endpoint), 1)?;
1385        profile.add_endpoint_count(Cow::from(one_endpoint), 1)?;
1386
1387        let second_endpoint = "other endpoint";
1388        profile.add_endpoint_count(Cow::from(second_endpoint), 1)?;
1389
1390        let encoded_profile = profile
1391            .serialize_into_compressed_pprof(None, None)
1392            .expect("Unable to encode/serialize the profile");
1393
1394        let endpoints_stats = encoded_profile.endpoints_stats;
1395
1396        let mut count: HashMap<String, i64> = HashMap::new();
1397        count.insert(one_endpoint.to_string(), 2);
1398        count.insert(second_endpoint.to_string(), 1);
1399
1400        let expected_endpoints_stats = ProfiledEndpointsStats::from(count);
1401
1402        assert_eq!(endpoints_stats, expected_endpoints_stats);
1403        Ok(())
1404    }
1405
1406    #[test]
1407    fn local_root_span_id_label_cannot_occur_more_than_once() {
1408        let sample_types = [api::ValueType::new("wall-time", "nanoseconds")];
1409
1410        let mut profile: Profile = Profile::new(&sample_types, None);
1411
1412        let labels = vec![
1413            api::Label {
1414                key: "local root span id",
1415                str: "",
1416                num: 5738080760940355267_i64,
1417                num_unit: "",
1418            },
1419            api::Label {
1420                key: "local root span id",
1421                str: "",
1422                num: 8182855815056056749_i64,
1423                num_unit: "",
1424            },
1425        ];
1426
1427        let sample = api::Sample {
1428            locations: vec![],
1429            values: &[10000],
1430            labels,
1431        };
1432
1433        profile.try_add_sample(sample, None).unwrap_err();
1434    }
1435
1436    #[test]
1437    fn test_no_upscaling_if_no_rules() {
1438        let sample_types = vec![
1439            api::ValueType::new("samples", "count"),
1440            api::ValueType::new("wall-time", "nanoseconds"),
1441        ];
1442
1443        let mut profile: Profile = Profile::new(&sample_types, None);
1444
1445        let id_label = api::Label {
1446            key: "my label",
1447            str: "coco",
1448            num: 0,
1449            num_unit: "",
1450        };
1451
1452        let sample1 = api::Sample {
1453            locations: vec![],
1454            values: &[1, 10000],
1455            labels: vec![id_label],
1456        };
1457
1458        profile
1459            .try_add_sample(sample1, None)
1460            .expect("add to success");
1461
1462        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1463
1464        assert_eq!(serialized_profile.samples.len(), 1);
1465        let first = serialized_profile.samples.first().expect("one sample");
1466
1467        assert_eq!(first.values[0], 1);
1468        assert_eq!(first.values[1], 10000);
1469    }
1470
1471    fn create_samples_types() -> Vec<api::ValueType<'static>> {
1472        vec![
1473            api::ValueType::new("samples", "count"),
1474            api::ValueType::new("wall-time", "nanoseconds"),
1475            api::ValueType::new("cpu-time", "nanoseconds"),
1476        ]
1477    }
1478
1479    fn create_label(key: &'static str, str: &'static str) -> api::Label<'static> {
1480        api::Label {
1481            key,
1482            str,
1483            num: 0,
1484            num_unit: "",
1485        }
1486    }
1487
1488    #[test]
1489    fn test_upscaling_by_value_a_zero_value() {
1490        let sample_types = create_samples_types();
1491
1492        let mut profile = Profile::new(&sample_types, None);
1493
1494        let sample1 = api::Sample {
1495            locations: vec![],
1496            values: &[0, 10000, 42],
1497            labels: vec![],
1498        };
1499
1500        profile
1501            .try_add_sample(sample1, None)
1502            .expect("add to success");
1503
1504        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
1505        let values_offset = vec![0];
1506        profile
1507            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info)
1508            .expect("Rule added");
1509
1510        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1511
1512        assert_eq!(serialized_profile.samples.len(), 1);
1513        let first = serialized_profile.samples.first().expect("one sample");
1514
1515        assert_eq!(first.values, vec![0, 10000, 42]);
1516    }
1517
1518    #[test]
1519    fn test_upscaling_by_value_on_one_value() {
1520        let sample_types = create_samples_types();
1521
1522        let mut profile: Profile = Profile::new(&sample_types, None);
1523
1524        let sample1 = api::Sample {
1525            locations: vec![],
1526            values: &[1, 10000, 42],
1527            labels: vec![],
1528        };
1529
1530        profile
1531            .try_add_sample(sample1, None)
1532            .expect("add to success");
1533
1534        let upscaling_info = UpscalingInfo::Proportional { scale: 2.7 };
1535        let values_offset = vec![0];
1536        profile
1537            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info)
1538            .expect("Rule added");
1539
1540        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1541
1542        assert_eq!(serialized_profile.samples.len(), 1);
1543        let first = serialized_profile.samples.first().expect("one sample");
1544
1545        assert_eq!(first.values, vec![3, 10000, 42]);
1546    }
1547
1548    #[test]
1549    fn test_upscaling_by_value_on_one_value_with_poisson() {
1550        let sample_types = create_samples_types();
1551
1552        let mut profile = Profile::new(&sample_types, None);
1553
1554        let sample1 = api::Sample {
1555            locations: vec![],
1556            values: &[1, 16, 29],
1557            labels: vec![],
1558        };
1559
1560        profile
1561            .try_add_sample(sample1, None)
1562            .expect("add to success");
1563
1564        let upscaling_info = UpscalingInfo::Poisson {
1565            sum_value_offset: 1,
1566            count_value_offset: 2,
1567            sampling_distance: 10,
1568        };
1569        let values_offset: Vec<usize> = vec![1];
1570        profile
1571            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info)
1572            .expect("Rule added");
1573
1574        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1575
1576        assert_eq!(serialized_profile.samples.len(), 1);
1577        let first = serialized_profile.samples.first().expect("one sample");
1578
1579        assert_eq!(first.values, vec![1, 298, 29]);
1580    }
1581
1582    #[test]
1583    fn test_upscaling_by_value_on_one_value_with_poisson_count() {
1584        let sample_types = create_samples_types();
1585
1586        let mut profile = Profile::new(&sample_types, None);
1587
1588        let sample1 = api::Sample {
1589            locations: vec![],
1590            values: &[1, 16, 29],
1591            labels: vec![],
1592        };
1593
1594        profile
1595            .try_add_sample(sample1, None)
1596            .expect("add to success");
1597
1598        let upscaling_info = UpscalingInfo::PoissonNonSampleTypeCount {
1599            sum_value_offset: 1,
1600            count_value: 29,
1601            sampling_distance: 10,
1602        };
1603        let values_offset: Vec<usize> = vec![1];
1604        profile
1605            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info)
1606            .expect("Rule added");
1607
1608        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1609
1610        assert_eq!(serialized_profile.samples.len(), 1);
1611        let first = serialized_profile.samples.first().expect("one sample");
1612
1613        assert_eq!(first.values, vec![1, 298, 29]);
1614    }
1615
1616    #[test]
1617    fn test_upscaling_by_value_on_zero_value_with_poisson() {
1618        let sample_types = create_samples_types();
1619
1620        let mut profile = Profile::new(&sample_types, None);
1621
1622        let sample1 = api::Sample {
1623            locations: vec![],
1624            values: &[1, 16, 0],
1625            labels: vec![],
1626        };
1627
1628        profile
1629            .try_add_sample(sample1, None)
1630            .expect("add to success");
1631
1632        let upscaling_info = UpscalingInfo::Poisson {
1633            sum_value_offset: 1,
1634            count_value_offset: 2,
1635            sampling_distance: 10,
1636        };
1637        let values_offset: Vec<usize> = vec![1];
1638        profile
1639            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info)
1640            .expect("Rule added");
1641
1642        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1643
1644        assert_eq!(serialized_profile.samples.len(), 1);
1645        let first = serialized_profile.samples.first().expect("one sample");
1646
1647        assert_eq!(first.values, vec![1, 16, 0]);
1648    }
1649
1650    #[test]
1651    fn test_cannot_add_a_rule_with_invalid_poisson_info() {
1652        let sample_types = create_samples_types();
1653
1654        let mut profile: Profile = Profile::new(&sample_types, None);
1655
1656        let sample1 = api::Sample {
1657            locations: vec![],
1658            values: &[1, 16, 0],
1659            labels: vec![],
1660        };
1661
1662        profile
1663            .try_add_sample(sample1, None)
1664            .expect("add to success");
1665
1666        // invalid sampling_distance value
1667        let upscaling_info = UpscalingInfo::Poisson {
1668            sum_value_offset: 1,
1669            count_value_offset: 2,
1670            sampling_distance: 0,
1671        };
1672
1673        let values_offset: Vec<usize> = vec![1];
1674        profile
1675            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info)
1676            .expect_err("Cannot add a rule if sampling_distance is equal to 0");
1677
1678        // x value is greater than the number of value types
1679        let upscaling_info2 = UpscalingInfo::Poisson {
1680            sum_value_offset: 42,
1681            count_value_offset: 2,
1682            sampling_distance: 10,
1683        };
1684        profile
1685            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info2)
1686            .expect_err("Cannot add a rule if the offset x is invalid");
1687
1688        // y value is greater than the number of value types
1689        let upscaling_info3 = UpscalingInfo::Poisson {
1690            sum_value_offset: 1,
1691            count_value_offset: 42,
1692            sampling_distance: 10,
1693        };
1694        profile
1695            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info3)
1696            .expect_err("Cannot add a rule if the offset y is invalid");
1697    }
1698
1699    #[test]
1700    fn test_upscaling_by_value_on_two_values() {
1701        let sample_types = create_samples_types();
1702
1703        let mut profile: Profile = Profile::new(&sample_types, None);
1704
1705        let sample1 = api::Sample {
1706            locations: vec![],
1707            values: &[1, 10000, 21],
1708            labels: vec![],
1709        };
1710
1711        let mapping = api::Mapping {
1712            filename: "php",
1713            ..Default::default()
1714        };
1715
1716        let main_locations = vec![api::Location {
1717            mapping,
1718            function: api::Function {
1719                name: "{main}",
1720                system_name: "{main}",
1721                filename: "index.php",
1722            },
1723            address: 0,
1724            line: 0,
1725        }];
1726
1727        let sample2 = api::Sample {
1728            locations: main_locations,
1729            values: &[5, 24, 99],
1730            labels: vec![],
1731        };
1732
1733        profile
1734            .try_add_sample(sample1, None)
1735            .expect("add to success");
1736        profile
1737            .try_add_sample(sample2, None)
1738            .expect("add to success");
1739
1740        // upscale the first value and the last one
1741        let values_offset: Vec<usize> = vec![0, 2];
1742
1743        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
1744        profile
1745            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info)
1746            .expect("Rule added");
1747
1748        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1749        let samples = sorted_samples(&serialized_profile);
1750        let first = samples.first().expect("first sample");
1751
1752        assert_eq!(first.values, vec![2, 10000, 42]);
1753
1754        let second = samples.get(1).expect("second sample");
1755
1756        assert_eq!(second.values, vec![10, 24, 198]);
1757    }
1758
1759    #[test]
1760    fn test_upscaling_by_value_on_two_value_with_two_rules() {
1761        let sample_types = create_samples_types();
1762
1763        let mut profile: Profile = Profile::new(&sample_types, None);
1764
1765        let sample1 = api::Sample {
1766            locations: vec![],
1767            values: &[1, 10000, 21],
1768            labels: vec![],
1769        };
1770
1771        let mapping = api::Mapping {
1772            filename: "php",
1773            ..Default::default()
1774        };
1775
1776        let main_locations = vec![api::Location {
1777            mapping,
1778            function: api::Function {
1779                name: "{main}",
1780                system_name: "{main}",
1781                filename: "index.php",
1782            },
1783            ..Default::default()
1784        }];
1785
1786        let sample2 = api::Sample {
1787            locations: main_locations,
1788            values: &[5, 24, 99],
1789            labels: vec![],
1790        };
1791
1792        profile
1793            .try_add_sample(sample1, None)
1794            .expect("add to success");
1795        profile
1796            .try_add_sample(sample2, None)
1797            .expect("add to success");
1798
1799        let mut values_offset: Vec<usize> = vec![0];
1800
1801        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
1802        profile
1803            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info)
1804            .expect("Rule added");
1805
1806        // add another byvaluerule on the 3rd offset
1807        values_offset.clear();
1808        values_offset.push(2);
1809
1810        let upscaling_info2 = UpscalingInfo::Proportional { scale: 5.0 };
1811
1812        profile
1813            .add_upscaling_rule(values_offset.as_slice(), "", "", upscaling_info2)
1814            .expect("Rule added");
1815
1816        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1817        let samples = sorted_samples(&serialized_profile);
1818        let first = samples.first().expect("first sample");
1819
1820        assert_eq!(first.values, vec![2, 10000, 105]);
1821
1822        let second = samples.get(1).expect("second sample");
1823
1824        assert_eq!(second.values, vec![10, 24, 495]);
1825    }
1826
1827    #[test]
1828    fn test_no_upscaling_by_label_if_no_match() {
1829        let sample_types = create_samples_types();
1830
1831        let mut profile: Profile = Profile::new(&sample_types, None);
1832
1833        let id_label = create_label("my_label", "coco");
1834
1835        let sample1 = api::Sample {
1836            locations: vec![],
1837            values: &[1, 10000, 42],
1838            labels: vec![id_label],
1839        };
1840
1841        profile
1842            .try_add_sample(sample1, None)
1843            .expect("add to success");
1844
1845        let values_offset: Vec<usize> = vec![0];
1846
1847        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
1848        profile
1849            .add_upscaling_rule(
1850                values_offset.as_slice(),
1851                "my label",
1852                "foobar",
1853                upscaling_info,
1854            )
1855            .expect("Rule added");
1856
1857        let upscaling_info2 = UpscalingInfo::Proportional { scale: 2.0 };
1858        profile
1859            .add_upscaling_rule(
1860                values_offset.as_slice(),
1861                "my other label",
1862                "coco",
1863                upscaling_info2,
1864            )
1865            .expect("Rule added");
1866
1867        let upscaling_info3 = UpscalingInfo::Proportional { scale: 2.0 };
1868        profile
1869            .add_upscaling_rule(
1870                values_offset.as_slice(),
1871                "my other label",
1872                "foobar",
1873                upscaling_info3,
1874            )
1875            .expect("Rule added");
1876
1877        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1878
1879        assert_eq!(serialized_profile.samples.len(), 1);
1880        let first = serialized_profile.samples.first().expect("one sample");
1881
1882        assert_eq!(first.values, vec![1, 10000, 42]);
1883    }
1884
1885    #[test]
1886    fn test_upscaling_by_label_on_one_value() {
1887        let sample_types = create_samples_types();
1888
1889        let mut profile: Profile = Profile::new(&sample_types, None);
1890
1891        let id_label = create_label("my label", "coco");
1892
1893        let sample1 = api::Sample {
1894            locations: vec![],
1895            values: &[1, 10000, 42],
1896            labels: vec![id_label],
1897        };
1898
1899        profile
1900            .try_add_sample(sample1, None)
1901            .expect("add to success");
1902
1903        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
1904        let values_offset: Vec<usize> = vec![0];
1905        profile
1906            .add_upscaling_rule(
1907                values_offset.as_slice(),
1908                id_label.key,
1909                id_label.str,
1910                upscaling_info,
1911            )
1912            .expect("Rule added");
1913
1914        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1915
1916        assert_eq!(serialized_profile.samples.len(), 1);
1917        let first = serialized_profile.samples.first().expect("one sample");
1918
1919        assert_eq!(first.values, vec![2, 10000, 42]);
1920    }
1921
1922    #[test]
1923    fn test_upscaling_by_label_on_only_sample_out_of_two() {
1924        let sample_types = create_samples_types();
1925
1926        let mut profile: Profile = Profile::new(&sample_types, None);
1927
1928        let id_label = create_label("my label", "coco");
1929
1930        let sample1 = api::Sample {
1931            locations: vec![],
1932            values: &[1, 10000, 42],
1933            labels: vec![id_label],
1934        };
1935
1936        let mapping = api::Mapping {
1937            filename: "php",
1938            ..Default::default()
1939        };
1940
1941        let main_locations = vec![api::Location {
1942            mapping,
1943            function: api::Function {
1944                name: "{main}",
1945                system_name: "{main}",
1946                filename: "index.php",
1947            },
1948            ..Default::default()
1949        }];
1950
1951        let sample2 = api::Sample {
1952            locations: main_locations,
1953            values: &[5, 24, 99],
1954            labels: vec![],
1955        };
1956
1957        profile
1958            .try_add_sample(sample1, None)
1959            .expect("add to success");
1960        profile
1961            .try_add_sample(sample2, None)
1962            .expect("add to success");
1963
1964        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
1965        let values_offset: Vec<usize> = vec![0];
1966        profile
1967            .add_upscaling_rule(
1968                values_offset.as_slice(),
1969                id_label.key,
1970                id_label.str,
1971                upscaling_info,
1972            )
1973            .expect("Rule added");
1974
1975        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
1976        let samples = sorted_samples(&serialized_profile);
1977
1978        let first = samples.first().expect("one sample");
1979
1980        assert_eq!(first.values, vec![2, 10000, 42]);
1981
1982        let second = samples.get(1).expect("one sample");
1983
1984        assert_eq!(second.values, vec![5, 24, 99]);
1985    }
1986
1987    #[test]
1988    fn test_upscaling_by_label_with_two_different_rules_on_two_different_sample() {
1989        let sample_types = create_samples_types();
1990
1991        let mut profile: Profile = Profile::new(&sample_types, None);
1992
1993        let id_no_match_label = create_label("another label", "do not care");
1994
1995        let id_label = create_label("my label", "coco");
1996
1997        let sample1 = api::Sample {
1998            locations: vec![],
1999            values: &[1, 10000, 42],
2000            labels: vec![id_label, id_no_match_label],
2001        };
2002
2003        let mapping = api::Mapping {
2004            filename: "php",
2005            ..Default::default()
2006        };
2007
2008        let main_locations = vec![api::Location {
2009            mapping,
2010            function: api::Function {
2011                name: "{main}",
2012                system_name: "{main}",
2013                filename: "index.php",
2014            },
2015            ..Default::default()
2016        }];
2017
2018        let id_label2 = api::Label {
2019            key: "my other label",
2020            str: "foobar",
2021            num: 10,
2022            num_unit: "",
2023        };
2024
2025        let sample2 = api::Sample {
2026            locations: main_locations,
2027            values: &[5, 24, 99],
2028            labels: vec![id_no_match_label, id_label2],
2029        };
2030
2031        profile
2032            .try_add_sample(sample1, None)
2033            .expect("add to success");
2034        profile
2035            .try_add_sample(sample2, None)
2036            .expect("add to success");
2037
2038        // add rule for the first sample on the 1st value
2039        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
2040        let mut values_offset: Vec<usize> = vec![0];
2041        profile
2042            .add_upscaling_rule(
2043                values_offset.as_slice(),
2044                id_label.key,
2045                id_label.str,
2046                upscaling_info,
2047            )
2048            .expect("Rule added");
2049
2050        // add rule for the second sample on the 3rd value
2051        let upscaling_info2 = UpscalingInfo::Proportional { scale: 10.0 };
2052        values_offset.clear();
2053        values_offset.push(2);
2054        profile
2055            .add_upscaling_rule(
2056                values_offset.as_slice(),
2057                id_label2.key,
2058                id_label2.str,
2059                upscaling_info2,
2060            )
2061            .expect("Rule added");
2062
2063        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
2064        let samples = sorted_samples(&serialized_profile);
2065        let first = samples.first().expect("one sample");
2066
2067        assert_eq!(first.values, vec![2, 10000, 42]);
2068
2069        let second = samples.get(1).expect("one sample");
2070
2071        assert_eq!(second.values, vec![5, 24, 990]);
2072    }
2073
2074    #[test]
2075    fn test_upscaling_by_label_on_two_values() {
2076        let sample_types = create_samples_types();
2077
2078        let mut profile: Profile = Profile::new(&sample_types, None);
2079
2080        let id_label = create_label("my label", "coco");
2081
2082        let sample1 = api::Sample {
2083            locations: vec![],
2084            values: &[1, 10000, 42],
2085            labels: vec![id_label],
2086        };
2087
2088        profile
2089            .try_add_sample(sample1, None)
2090            .expect("add to success");
2091
2092        // upscale samples and wall-time values
2093        let values_offset: Vec<usize> = vec![0, 1];
2094
2095        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
2096        profile
2097            .add_upscaling_rule(
2098                values_offset.as_slice(),
2099                id_label.key,
2100                id_label.str,
2101                upscaling_info,
2102            )
2103            .expect("Rule added");
2104
2105        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
2106
2107        assert_eq!(serialized_profile.samples.len(), 1);
2108        let first = serialized_profile.samples.first().expect("one sample");
2109
2110        assert_eq!(first.values, vec![2, 20000, 42]);
2111    }
2112    #[test]
2113    fn test_upscaling_by_value_and_by_label_different_values() {
2114        let sample_types = create_samples_types();
2115
2116        let mut profile: Profile = Profile::new(&sample_types, None);
2117
2118        let id_label = create_label("my label", "coco");
2119
2120        let sample1 = api::Sample {
2121            locations: vec![],
2122            values: &[1, 10000, 42],
2123            labels: vec![id_label],
2124        };
2125
2126        profile
2127            .try_add_sample(sample1, None)
2128            .expect("add to success");
2129
2130        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
2131        let mut value_offsets: Vec<usize> = vec![0];
2132        profile
2133            .add_upscaling_rule(value_offsets.as_slice(), "", "", upscaling_info)
2134            .expect("Rule added");
2135
2136        // a bylabel rule on the third offset
2137        let upscaling_info2 = UpscalingInfo::Proportional { scale: 5.0 };
2138        value_offsets.clear();
2139        value_offsets.push(2);
2140        profile
2141            .add_upscaling_rule(
2142                value_offsets.as_slice(),
2143                id_label.key,
2144                id_label.str,
2145                upscaling_info2,
2146            )
2147            .expect("Rule added");
2148
2149        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
2150
2151        assert_eq!(serialized_profile.samples.len(), 1);
2152        let first = serialized_profile.samples.first().expect("one sample");
2153
2154        assert_eq!(first.values, vec![2, 10000, 210]);
2155    }
2156
2157    #[test]
2158    fn test_add_same_byvalue_rule_twice() {
2159        let sample_types = create_samples_types();
2160
2161        let mut profile: Profile = Profile::new(&sample_types, None);
2162
2163        // adding same offsets
2164        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
2165        let mut value_offsets: Vec<usize> = vec![0, 2];
2166        profile
2167            .add_upscaling_rule(value_offsets.as_slice(), "", "", upscaling_info)
2168            .expect("Rule added");
2169
2170        let upscaling_info2 = UpscalingInfo::Proportional { scale: 2.0 };
2171        profile
2172            .add_upscaling_rule(value_offsets.as_slice(), "", "", upscaling_info2)
2173            .expect_err("Duplicated rules");
2174
2175        // adding offsets with overlap on 2
2176        value_offsets.clear();
2177        value_offsets.push(2);
2178        value_offsets.push(1);
2179        let upscaling_info3 = UpscalingInfo::Proportional { scale: 2.0 };
2180        profile
2181            .add_upscaling_rule(value_offsets.as_slice(), "", "", upscaling_info3)
2182            .expect_err("Duplicated rules");
2183
2184        // same offsets in different order
2185        value_offsets.clear();
2186        value_offsets.push(2);
2187        value_offsets.push(0);
2188        let upscaling_info4 = UpscalingInfo::Proportional { scale: 2.0 };
2189        profile
2190            .add_upscaling_rule(value_offsets.as_slice(), "", "", upscaling_info4)
2191            .expect_err("Duplicated rules");
2192    }
2193
2194    #[test]
2195    fn test_add_two_bylabel_rules_with_overlap_on_values() {
2196        let sample_types = create_samples_types();
2197
2198        let mut profile: Profile = Profile::new(&sample_types, None);
2199
2200        // adding same offsets
2201        let mut value_offsets: Vec<usize> = vec![0, 2];
2202        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
2203        profile
2204            .add_upscaling_rule(value_offsets.as_slice(), "my label", "coco", upscaling_info)
2205            .expect("Rule added");
2206        let upscaling_info2 = UpscalingInfo::Proportional { scale: 2.0 };
2207        profile
2208            .add_upscaling_rule(
2209                value_offsets.as_slice(),
2210                "my label",
2211                "coco",
2212                upscaling_info2,
2213            )
2214            .expect_err("Duplicated rules");
2215
2216        // adding offsets with overlap on 2
2217        value_offsets.clear();
2218        value_offsets.append(&mut vec![2, 1]);
2219        let upscaling_info3 = UpscalingInfo::Proportional { scale: 2.0 };
2220        profile
2221            .add_upscaling_rule(
2222                value_offsets.as_slice(),
2223                "my label",
2224                "coco",
2225                upscaling_info3,
2226            )
2227            .expect_err("Duplicated rules");
2228
2229        // same offsets in different order
2230        value_offsets.clear();
2231        value_offsets.push(2);
2232        value_offsets.push(0);
2233        let upscaling_info4 = UpscalingInfo::Proportional { scale: 2.0 };
2234        profile
2235            .add_upscaling_rule(
2236                value_offsets.as_slice(),
2237                "my label",
2238                "coco",
2239                upscaling_info4,
2240            )
2241            .expect_err("Duplicated rules");
2242    }
2243
2244    #[test]
2245    fn test_fail_if_bylabel_rule_and_by_value_rule_with_overlap_on_values() {
2246        let sample_types = create_samples_types();
2247
2248        let mut profile: Profile = Profile::new(&sample_types, None);
2249
2250        // adding same offsets
2251        let mut value_offsets: Vec<usize> = vec![0, 2];
2252        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
2253
2254        // add by value rule
2255        profile
2256            .add_upscaling_rule(value_offsets.as_slice(), "", "", upscaling_info)
2257            .expect("Rule added");
2258
2259        // add by-label rule
2260        let upscaling_info2 = UpscalingInfo::Proportional { scale: 2.0 };
2261        profile
2262            .add_upscaling_rule(
2263                value_offsets.as_slice(),
2264                "my label",
2265                "coco",
2266                upscaling_info2,
2267            )
2268            .expect_err("Duplicated rules");
2269
2270        // adding offsets with overlap on 2
2271        value_offsets.clear();
2272        value_offsets.append(&mut vec![2, 1]);
2273        let upscaling_info3 = UpscalingInfo::Proportional { scale: 2.0 };
2274        profile
2275            .add_upscaling_rule(
2276                value_offsets.as_slice(),
2277                "my label",
2278                "coco",
2279                upscaling_info3,
2280            )
2281            .expect_err("Duplicated rules");
2282
2283        // same offsets in different order
2284        value_offsets.clear();
2285        value_offsets.push(2);
2286        value_offsets.push(0);
2287        let upscaling_info4 = UpscalingInfo::Proportional { scale: 2.0 };
2288        profile
2289            .add_upscaling_rule(
2290                value_offsets.as_slice(),
2291                "my label",
2292                "coco",
2293                upscaling_info4,
2294            )
2295            .expect_err("Duplicated rules");
2296    }
2297
2298    #[test]
2299    fn test_add_rule_with_offset_out_of_bound() {
2300        let sample_types = create_samples_types();
2301
2302        let mut profile: Profile = Profile::new(&sample_types, None);
2303
2304        // adding same offsets
2305        let by_value_offsets: Vec<usize> = vec![0, 4];
2306        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
2307        profile
2308            .add_upscaling_rule(
2309                by_value_offsets.as_slice(),
2310                "my label",
2311                "coco",
2312                upscaling_info,
2313            )
2314            .expect_err("Invalid offset");
2315    }
2316
2317    #[test]
2318    fn test_add_rule_with_offset_out_of_bound_poisson_function() {
2319        let sample_types = create_samples_types();
2320
2321        let mut profile: Profile = Profile::new(&sample_types, None);
2322
2323        // adding same offsets
2324        let by_value_offsets: Vec<usize> = vec![0, 4];
2325        let upscaling_info = UpscalingInfo::Poisson {
2326            sum_value_offset: 1,
2327            count_value_offset: 100,
2328            sampling_distance: 1,
2329        };
2330        profile
2331            .add_upscaling_rule(
2332                by_value_offsets.as_slice(),
2333                "my label",
2334                "coco",
2335                upscaling_info,
2336            )
2337            .expect_err("Invalid offset");
2338    }
2339
2340    #[test]
2341    fn test_add_rule_with_offset_out_of_bound_poisson_function2() {
2342        let sample_types = create_samples_types();
2343
2344        let mut profile: Profile = Profile::new(&sample_types, None);
2345
2346        // adding same offsets
2347        let by_value_offsets: Vec<usize> = vec![0, 4];
2348        let upscaling_info = UpscalingInfo::Poisson {
2349            sum_value_offset: 100,
2350            count_value_offset: 1,
2351            sampling_distance: 1,
2352        };
2353        profile
2354            .add_upscaling_rule(
2355                by_value_offsets.as_slice(),
2356                "my label",
2357                "coco",
2358                upscaling_info,
2359            )
2360            .expect_err("Invalid offset");
2361    }
2362
2363    #[test]
2364    fn test_add_rule_with_offset_out_of_bound_poisson_function3() {
2365        let sample_types = create_samples_types();
2366
2367        let mut profile: Profile = Profile::new(&sample_types, None);
2368
2369        // adding same offsets
2370        let by_value_offsets: Vec<usize> = vec![0, 4];
2371        let upscaling_info = UpscalingInfo::Poisson {
2372            sum_value_offset: 1100,
2373            count_value_offset: 100,
2374            sampling_distance: 1,
2375        };
2376        profile
2377            .add_upscaling_rule(
2378                by_value_offsets.as_slice(),
2379                "my label",
2380                "coco",
2381                upscaling_info,
2382            )
2383            .expect_err("Invalid offset");
2384    }
2385
2386    #[test]
2387    fn test_fails_when_adding_byvalue_rule_colliding_on_offset_with_existing_bylabel_rule() {
2388        let sample_types = create_samples_types();
2389
2390        let mut profile: Profile = Profile::new(&sample_types, None);
2391
2392        let id_label = create_label("my label", "coco");
2393
2394        let sample1 = api::Sample {
2395            locations: vec![],
2396            values: &[1, 10000, 42],
2397            labels: vec![id_label],
2398        };
2399
2400        profile
2401            .try_add_sample(sample1, None)
2402            .expect("add to success");
2403
2404        let mut value_offsets: Vec<usize> = vec![0, 1];
2405        // Add by-label rule first
2406        let upscaling_info2 = UpscalingInfo::Proportional { scale: 2.0 };
2407        profile
2408            .add_upscaling_rule(
2409                value_offsets.as_slice(),
2410                id_label.key,
2411                id_label.str,
2412                upscaling_info2,
2413            )
2414            .expect("Rule added");
2415
2416        // add by-value rule
2417        let upscaling_info = UpscalingInfo::Proportional { scale: 2.0 };
2418        value_offsets.clear();
2419        value_offsets.push(0);
2420        profile
2421            .add_upscaling_rule(value_offsets.as_slice(), "", "", upscaling_info)
2422            .expect_err("Rule added");
2423    }
2424
2425    #[test]
2426    fn local_root_span_id_label_as_i64() -> anyhow::Result<()> {
2427        let sample_types = vec![
2428            api::ValueType {
2429                r#type: "samples",
2430                unit: "count",
2431            },
2432            api::ValueType {
2433                r#type: "wall-time",
2434                unit: "nanoseconds",
2435            },
2436        ];
2437
2438        let mut profile = Profile::new(&sample_types, None);
2439
2440        let id_label = api::Label {
2441            key: "local root span id",
2442            str: "",
2443            num: 10,
2444            num_unit: "",
2445        };
2446
2447        let large_span_id = u64::MAX;
2448        // Safety: an u64 can fit into an i64, and we're testing that it's not mis-handled.
2449        #[allow(
2450            unknown_lints,
2451            unnecessary_transmutes,
2452            reason = "u64::cast_signed requires MSRV 1.87.0"
2453        )]
2454        let large_num: i64 = unsafe { std::mem::transmute(large_span_id) };
2455
2456        let id2_label = api::Label {
2457            key: "local root span id",
2458            str: "",
2459            num: large_num,
2460            num_unit: "",
2461        };
2462
2463        let sample1 = api::Sample {
2464            locations: vec![],
2465            values: &[1, 10000],
2466            labels: vec![id_label],
2467        };
2468
2469        let sample2 = api::Sample {
2470            locations: vec![],
2471            values: &[1, 10000],
2472            labels: vec![id2_label],
2473        };
2474
2475        profile
2476            .try_add_sample(sample1, None)
2477            .expect("add to success");
2478        profile
2479            .try_add_sample(sample2, None)
2480            .expect("add to success");
2481
2482        profile.add_endpoint(10, Cow::from("endpoint 10"))?;
2483        profile.add_endpoint(large_span_id, Cow::from("large endpoint"))?;
2484
2485        let serialized_profile = roundtrip_to_pprof(profile).unwrap();
2486        assert_eq!(serialized_profile.samples.len(), 2);
2487
2488        // Find common label strings in the string table.
2489        let locate_string = |string: &str| -> i64 {
2490            // The table is supposed to be unique, so we shouldn't have to worry about duplicates.
2491            serialized_profile
2492                .string_table
2493                .iter()
2494                .enumerate()
2495                .find_map(|(offset, str)| {
2496                    if str == string {
2497                        Some(offset as i64)
2498                    } else {
2499                        None
2500                    }
2501                })
2502                .unwrap()
2503        };
2504
2505        let local_root_span_id = locate_string("local root span id");
2506        let trace_endpoint = locate_string("trace endpoint");
2507
2508        // Set up the expected labels per sample
2509        let expected_labels = [
2510            [
2511                prost_impls::Label {
2512                    key: local_root_span_id,
2513                    num: large_num,
2514                    ..Default::default()
2515                },
2516                prost_impls::Label {
2517                    key: trace_endpoint,
2518                    str: locate_string("large endpoint"),
2519                    ..Default::default()
2520                },
2521            ],
2522            [
2523                prost_impls::Label {
2524                    key: local_root_span_id,
2525                    num: 10,
2526                    ..Default::default()
2527                },
2528                prost_impls::Label {
2529                    key: trace_endpoint,
2530                    str: locate_string("endpoint 10"),
2531                    ..Default::default()
2532                },
2533            ],
2534        ];
2535
2536        // Finally, match the labels.
2537        for (sample, labels) in sorted_samples(&serialized_profile)
2538            .iter()
2539            .zip(expected_labels.iter())
2540        {
2541            assert_eq!(sample.labels, labels);
2542        }
2543        Ok(())
2544    }
2545
2546    #[test]
2547    fn test_regression_managed_string_table_correctly_maps_ids() {
2548        let storage = Arc::new(Mutex::new(ManagedStringStorage::new()));
2549        let hello_id: u32;
2550        let world_id: u32;
2551
2552        {
2553            let mut storage_guard = storage.lock().unwrap();
2554            hello_id = storage_guard.intern("hello").unwrap();
2555            world_id = storage_guard.intern("world").unwrap();
2556        }
2557
2558        let sample_types = [api::ValueType::new("samples", "count")];
2559        let mut profile =
2560            Profile::try_with_string_storage(&sample_types, None, storage.clone()).unwrap();
2561
2562        let location = api::StringIdLocation {
2563            function: api::StringIdFunction {
2564                name: api::ManagedStringId { value: hello_id },
2565                filename: api::ManagedStringId { value: world_id },
2566                ..Default::default()
2567            },
2568            ..Default::default()
2569        };
2570
2571        let sample = api::StringIdSample {
2572            locations: vec![location],
2573            values: &[1],
2574            labels: vec![],
2575        };
2576
2577        profile.add_string_id_sample(sample.clone(), None).unwrap();
2578        profile.add_string_id_sample(sample.clone(), None).unwrap();
2579
2580        let pprof_first_profile =
2581            roundtrip_to_pprof(profile.reset_and_return_previous().unwrap()).unwrap();
2582
2583        assert!(pprof_first_profile
2584            .string_table
2585            .iter()
2586            .any(|s| s == "hello"));
2587        assert!(pprof_first_profile
2588            .string_table
2589            .iter()
2590            .any(|s| s == "world"));
2591
2592        // If the cache invalidation on the managed string table is working correctly, these strings
2593        // get correctly re-added to the profile's string table
2594
2595        profile.add_string_id_sample(sample.clone(), None).unwrap();
2596        profile.add_string_id_sample(sample.clone(), None).unwrap();
2597        let pprof_second_profile = roundtrip_to_pprof(profile).unwrap();
2598
2599        assert!(pprof_second_profile
2600            .string_table
2601            .iter()
2602            .any(|s| s == "hello"));
2603        assert!(pprof_second_profile
2604            .string_table
2605            .iter()
2606            .any(|s| s == "world"));
2607    }
2608}