libdd_profiling_protobuf/lib.rs
1// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4#![cfg_attr(not(test), deny(clippy::panic))]
5#![cfg_attr(not(test), deny(clippy::unwrap_used))]
6#![cfg_attr(not(test), deny(clippy::expect_used))]
7#![cfg_attr(not(test), deny(clippy::unimplemented))]
8
9//! This crate implements Protobuf encoders for [`profiles`] which write to a
10//! [`Write`]. It has encoders for:
11//!
12//! - [Function]
13//! - [Label]
14//! - [Location] and [Line]
15//! - [Mapping]
16//! - [Sample]
17//! - [ValueType]
18//!
19//! There is no encoder for a Profile message. It would require borrowing a
20//! lot of data, which becomes unwieldy. It also isn't very compatible with
21//! writing a streaming serializer to lower peak memory usage.
22//!
23//! Encoding often happens one byte at a time, so a buffered writer should
24//! probably be used.
25//!
26//! Indices into the string table are represented by [StringOffset], which uses
27//! a 32-bit number. ID fields are still 64-bit, so the user can control their
28//! values, potentially using a 64-bit address for its value.
29//!
30//! The types are generally `#[repr(C)]` so they can be used in FFI one day.
31//!
32//! Here is a condensed reference for the parts of protobuf used by profiles:
33//!
34//! ```reference
35//! message := (tag value)*
36//! tag := (field << 3) bit-or wire_type;
37//! encoded as uint32 varint
38//! value := varint for wire_type == VARINT,
39//! len-prefix for wire_type == LEN,
40//! varint := int64 | uint64
41//! len-prefix := size (message | string | packed);
42//! size encoded as int32 varint
43//! string := valid UTF-8 string;
44//! max 2GB of bytes
45//! packed := varint*
46//! consecutive values of the type specified in `.proto`
47//! ```
48//!
49//! A [`Record`] represents a [`Tag`] and [`Value`] pair, where the
50//! [`WireType`] comes from [`Value::WIRE_TYPE`].
51//!
52//! Protos must be smaller than 2 GiB when encoded. Many proto implementations
53//! will refuse to encode or decode messages that exceed this limit.
54//!
55//! [`profiles`]: https://github.com/google/pprof/blob/main/proto/profile.proto
56
57mod function;
58mod label;
59mod location;
60mod mapping;
61mod sample;
62mod string;
63mod value_type;
64mod varint;
65
66#[cfg(feature = "prost_impls")]
67pub mod prost_impls;
68
69pub use function::*;
70pub use label::*;
71pub use location::*;
72pub use mapping::*;
73pub use sample::*;
74pub use string::*;
75pub use value_type::*;
76
77use std::fmt::{Debug, Formatter};
78use std::io::{self, Write};
79
80/// A record is responsible for encoding the field number, wire type and
81/// payload. The wire type tells the parser how big the payload after it is.
82/// For more details, refer to the [Condensed Reference Card].
83///
84/// The `P` is the payload, the `F` is the field number, and `O` is whether to
85/// apply the zero-sized optimization or not. Most of the time, it shouldn't
86/// matter if the optimization is applied. However, if something is part of
87/// a repeated field, then applying the optimization would change the number
88/// of elements in the array.
89///
90/// [Condensed Reference Card]: https://protobuf.dev/programming-guides/encoding/#cheat-sheet
91#[derive(Copy, Clone, Default, Eq, PartialEq, Hash)]
92#[repr(transparent)]
93#[cfg_attr(feature = "bolero", derive(bolero::generator::TypeGenerator))]
94pub struct Record<P: Value, const F: u32, const O: bool> {
95 /// The value of the record. This is pub because of a quirk in Rust's
96 /// orphan rules which prevent implementing `From<Record<P,...> for P`.
97 pub value: P,
98}
99
100/// Represents the wire type for the in-wire protobuf encoding. There are more
101/// types than are represented here; these are just the ones used in profiles.
102/// See [Message Structure] for more documentation.
103///
104/// [Message Structure]: https://protobuf.dev/programming-guides/encoding/#structure
105#[derive(Clone, Copy, Debug, PartialEq, Eq)]
106#[repr(u8)]
107pub enum WireType {
108 Varint = 0,
109 LengthDelimited = 2,
110}
111
112/// A value (or payload) is stored differently depending on the wire_type. In
113/// profiles, there two types of payloads: varints and len-prefixed types.
114///
115/// # Safety
116///
117/// The [`Default`] implementation _must_ provide all zero values.
118pub unsafe trait Value: Default + Eq {
119 /// The wire type this value uses.
120 const WIRE_TYPE: WireType;
121
122 /// The number of bytes it takes to encode this value. Do not include the
123 /// number of bytes it takes to encode the length-prefix as a varint. For
124 /// example, using this snippet of the reference:
125 ///
126 /// ```reference
127 /// len-prefix := size (message | string | packed);
128 /// size encoded as int32 varint
129 /// ```
130 ///
131 /// Calculate the number of bytes for `(message | string | packed)` only.
132 ///
133 /// For a varint, returns between 1 and 10 bytes for the number of bytes
134 /// used to encode the varint.
135 ///
136 /// Returns u64 rather than u31 to avoid a lot of overflow checking.
137 fn proto_len(&self) -> u64;
138
139 /// Encode the value to the in-wire protobuf format. For length-delimited
140 /// types, do not include the length-prefix; see [`Value::proto_len`] for
141 /// more details.
142 ///
143 /// Encoding often happens one byte at a time, so a buffered writer should
144 /// probably be used.
145 fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()>;
146}
147
148/// Intended to be provided to a [`Record`] to mean that it _should_ optimize
149/// for a value of zero. See also [`NO_OPT_ZERO`].
150pub const OPT_ZERO: bool = true;
151
152/// Intended to be provided to a [`Record`] to mean that it shouldn't optimize
153/// for a value of zero. Should be used on fields that should not be zero, such
154/// as `Mapping.id` and for Records which hold arrays, since that would cause
155/// the length of the decoded array to change, which is unexpected. Things
156/// like sample types shouldn't get optimized away, since they get used
157/// element-wise and this would screw up the pairing.
158pub const NO_OPT_ZERO: bool = false;
159
160impl<P: Value, const F: u32, const O: bool> From<P> for Record<P, F, O> {
161 fn from(value: P) -> Self {
162 Record { value }
163 }
164}
165
166/// # Safety
167/// The Default implementation will return all zero-representations.
168unsafe impl<P: Value, const F: u32, const O: bool> Value for Record<P, F, O> {
169 const WIRE_TYPE: WireType = P::WIRE_TYPE;
170
171 fn proto_len(&self) -> u64 {
172 if O && self.value == P::default() {
173 return 0;
174 }
175 let proto_len = self.value.proto_len();
176 let len = if P::WIRE_TYPE == WireType::LengthDelimited {
177 proto_len.proto_len()
178 } else {
179 0
180 };
181 let tag = Tag::new(F, P::WIRE_TYPE).proto_len();
182 tag + len + proto_len
183 }
184
185 fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
186 if O && self.value == P::default() {
187 return Ok(());
188 }
189 Tag::new(F, P::WIRE_TYPE).encode(writer)?;
190 if P::WIRE_TYPE == WireType::LengthDelimited {
191 varint::encode(self.value.proto_len(), writer)?;
192 }
193 self.value.encode(writer)
194 }
195}
196
197impl<P: Debug + Value, const F: u32, const O: bool> Debug for Record<P, F, O> {
198 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
199 f.debug_struct("Record")
200 .field("value", &self.value)
201 .field("number", &F)
202 .field("optimize_for_zero", &O)
203 .finish()
204 }
205}
206
207/// The smallest possible protobuf field number.
208const MIN_FIELD: u32 = 1;
209
210/// The largest possible protobuf field number.
211const MAX_FIELD: u32 = (1 << 29) - 1;
212
213/// A tag is a combination of a wire_type, stored in the least significant
214/// three bits, and the field number that is defined in the .proto file.
215#[derive(Copy, Clone)]
216pub struct Tag(u32);
217
218impl Tag {
219 #[cfg_attr(debug_assertions, track_caller)]
220 #[inline]
221 pub const fn new(field: u32, wire_type: WireType) -> Self {
222 debug_assert!(field >= MIN_FIELD && field <= MAX_FIELD);
223 Self((field << 3) | wire_type as u32)
224 }
225
226 #[inline]
227 pub fn proto_len(self) -> u64 {
228 varint::proto_len(self.0 as u64)
229 }
230
231 #[inline]
232 pub fn encode<W: Write>(self, writer: &mut W) -> io::Result<()> {
233 varint::encode(self.0 as u64, writer)
234 }
235}
236
237/// # Safety
238/// The Default implementation will return all zero-representations.
239unsafe impl<T: Value> Value for &'_ [T] {
240 const WIRE_TYPE: WireType = WireType::LengthDelimited;
241
242 fn proto_len(&self) -> u64 {
243 self.iter().map(Value::proto_len).sum()
244 }
245
246 fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
247 for value in self.iter() {
248 value.encode(writer)?;
249 }
250 Ok(())
251 }
252}