Skip to main content

libdd_profiling_protobuf/
lib.rs

1// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4#![cfg_attr(not(test), deny(clippy::panic))]
5#![cfg_attr(not(test), deny(clippy::unwrap_used))]
6#![cfg_attr(not(test), deny(clippy::expect_used))]
7#![cfg_attr(not(test), deny(clippy::unimplemented))]
8
9//! This crate implements Protobuf encoders for [`profiles`] which write to a
10//! [`Write`]. It has encoders for:
11//!
12//! - [Function]
13//! - [Label]
14//! - [Location] and [Line]
15//! - [Mapping]
16//! - [Sample]
17//! - [ValueType]
18//!
19//! There is no encoder for a Profile message. It would require borrowing a
20//! lot of data, which becomes unwieldy. It also isn't very compatible with
21//! writing a streaming serializer to lower peak memory usage.
22//!
23//! Encoding often happens one byte at a time, so a buffered writer should
24//! probably be used.
25//!
26//! Indices into the string table are represented by [StringOffset], which uses
27//! a 32-bit number. ID fields are still 64-bit, so the user can control their
28//! values, potentially using a 64-bit address for its value.
29//!
30//! The types are generally `#[repr(C)]` so they can be used in FFI one day.
31//!
32//! Here is a condensed reference for the parts of protobuf used by profiles:
33//!
34//! ```reference
35//! message    := (tag value)*
36//! tag        := (field << 3) bit-or wire_type;
37//!                 encoded as uint32 varint
38//! value      := varint      for wire_type == VARINT,
39//!               len-prefix  for wire_type == LEN,
40//! varint     := int64 | uint64
41//! len-prefix := size (message | string | packed);
42//!                 size encoded as int32 varint
43//! string     := valid UTF-8 string;
44//!                 max 2GB of bytes
45//! packed     := varint*
46//!                 consecutive values of the type specified in `.proto`
47//! ```
48//!
49//! A [`Record`] represents a [`Tag`] and [`Value`] pair, where the
50//! [`WireType`] comes from [`Value::WIRE_TYPE`].
51//!
52//! Protos must be smaller than 2 GiB when encoded. Many proto implementations
53//! will refuse to encode or decode messages that exceed this limit.
54//!
55//! [`profiles`]: https://github.com/google/pprof/blob/main/proto/profile.proto
56
57mod function;
58mod label;
59mod location;
60mod mapping;
61mod sample;
62mod string;
63mod value_type;
64mod varint;
65
66#[cfg(feature = "prost_impls")]
67pub mod prost_impls;
68
69pub use function::*;
70pub use label::*;
71pub use location::*;
72pub use mapping::*;
73pub use sample::*;
74pub use string::*;
75pub use value_type::*;
76
77use std::fmt::{Debug, Formatter};
78use std::io::{self, Write};
79
80/// A record is responsible for encoding the field number, wire type and
81/// payload. The wire type tells the parser how big the payload after it is.
82/// For more details, refer to the [Condensed Reference Card].
83///
84/// The `P` is the payload, the `F` is the field number, and `O` is whether to
85/// apply the zero-sized optimization or not. Most of the time, it shouldn't
86/// matter if the optimization is applied. However, if something is part of
87/// a repeated field, then applying the optimization would change the number
88/// of elements in the array.
89///
90/// [Condensed Reference Card]: https://protobuf.dev/programming-guides/encoding/#cheat-sheet
91#[derive(Copy, Clone, Default, Eq, PartialEq, Hash)]
92#[repr(transparent)]
93#[cfg_attr(feature = "bolero", derive(bolero::generator::TypeGenerator))]
94pub struct Record<P: Value, const F: u32, const O: bool> {
95    /// The value of the record. This is pub because of a quirk in Rust's
96    /// orphan rules which prevent implementing `From<Record<P,...> for P`.
97    pub value: P,
98}
99
100/// Represents the wire type for the in-wire protobuf encoding. There are more
101/// types than are represented here; these are just the ones used in profiles.
102/// See [Message Structure] for more documentation.
103///
104/// [Message Structure]: https://protobuf.dev/programming-guides/encoding/#structure
105#[derive(Clone, Copy, Debug, PartialEq, Eq)]
106#[repr(u8)]
107pub enum WireType {
108    Varint = 0,
109    LengthDelimited = 2,
110}
111
112/// A value (or payload) is stored differently depending on the wire_type. In
113/// profiles, there two types of payloads: varints and len-prefixed types.
114///
115/// # Safety
116///
117/// The [`Default`] implementation _must_ provide all zero values.
118pub unsafe trait Value: Default + Eq {
119    /// The wire type this value uses.
120    const WIRE_TYPE: WireType;
121
122    /// The number of bytes it takes to encode this value. Do not include the
123    /// number of bytes it takes to encode the length-prefix as a varint. For
124    /// example, using this snippet of the reference:
125    ///
126    /// ```reference
127    /// len-prefix := size (message | string | packed);
128    ///                size encoded as int32 varint
129    /// ```
130    ///
131    /// Calculate the number of bytes for `(message | string | packed)` only.
132    ///
133    /// For a varint, returns between 1 and 10 bytes for the number of bytes
134    /// used to encode the varint.
135    ///
136    /// Returns u64 rather than u31 to avoid a lot of overflow checking.
137    fn proto_len(&self) -> u64;
138
139    /// Encode the value to the in-wire protobuf format. For length-delimited
140    /// types, do not include the length-prefix; see [`Value::proto_len`] for
141    /// more details.
142    ///
143    /// Encoding often happens one byte at a time, so a buffered writer should
144    /// probably be used.
145    fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()>;
146}
147
148/// Intended to be provided to a [`Record`] to mean that it _should_ optimize
149/// for a value of zero. See also [`NO_OPT_ZERO`].
150pub const OPT_ZERO: bool = true;
151
152/// Intended to be provided to a [`Record`] to mean that it shouldn't optimize
153/// for a value of zero. Should be used on fields that should not be zero, such
154/// as `Mapping.id` and for Records which hold arrays, since that would cause
155/// the length of the decoded array to change, which is unexpected. Things
156/// like sample types shouldn't get optimized away, since they get used
157/// element-wise and this would screw up the pairing.
158pub const NO_OPT_ZERO: bool = false;
159
160impl<P: Value, const F: u32, const O: bool> From<P> for Record<P, F, O> {
161    fn from(value: P) -> Self {
162        Record { value }
163    }
164}
165
166/// # Safety
167/// The Default implementation will return all zero-representations.
168unsafe impl<P: Value, const F: u32, const O: bool> Value for Record<P, F, O> {
169    const WIRE_TYPE: WireType = P::WIRE_TYPE;
170
171    fn proto_len(&self) -> u64 {
172        if O && self.value == P::default() {
173            return 0;
174        }
175        let proto_len = self.value.proto_len();
176        let len = if P::WIRE_TYPE == WireType::LengthDelimited {
177            proto_len.proto_len()
178        } else {
179            0
180        };
181        let tag = Tag::new(F, P::WIRE_TYPE).proto_len();
182        tag + len + proto_len
183    }
184
185    fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
186        if O && self.value == P::default() {
187            return Ok(());
188        }
189        Tag::new(F, P::WIRE_TYPE).encode(writer)?;
190        if P::WIRE_TYPE == WireType::LengthDelimited {
191            varint::encode(self.value.proto_len(), writer)?;
192        }
193        self.value.encode(writer)
194    }
195}
196
197impl<P: Debug + Value, const F: u32, const O: bool> Debug for Record<P, F, O> {
198    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
199        f.debug_struct("Record")
200            .field("value", &self.value)
201            .field("number", &F)
202            .field("optimize_for_zero", &O)
203            .finish()
204    }
205}
206
207/// The smallest possible protobuf field number.
208const MIN_FIELD: u32 = 1;
209
210/// The largest possible protobuf field number.
211const MAX_FIELD: u32 = (1 << 29) - 1;
212
213/// A tag is a combination of a wire_type, stored in the least significant
214/// three bits, and the field number that is defined in the .proto file.
215#[derive(Copy, Clone)]
216pub struct Tag(u32);
217
218impl Tag {
219    #[cfg_attr(debug_assertions, track_caller)]
220    #[inline]
221    pub const fn new(field: u32, wire_type: WireType) -> Self {
222        debug_assert!(field >= MIN_FIELD && field <= MAX_FIELD);
223        Self((field << 3) | wire_type as u32)
224    }
225
226    #[inline]
227    pub fn proto_len(self) -> u64 {
228        varint::proto_len(self.0 as u64)
229    }
230
231    #[inline]
232    pub fn encode<W: Write>(self, writer: &mut W) -> io::Result<()> {
233        varint::encode(self.0 as u64, writer)
234    }
235}
236
237/// # Safety
238/// The Default implementation will return all zero-representations.
239unsafe impl<T: Value> Value for &'_ [T] {
240    const WIRE_TYPE: WireType = WireType::LengthDelimited;
241
242    fn proto_len(&self) -> u64 {
243        self.iter().map(Value::proto_len).sum()
244    }
245
246    fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
247        for value in self.iter() {
248            value.encode(writer)?;
249        }
250        Ok(())
251    }
252}