opentelemetry_datadog/exporter/
intern.rs1use indexmap::set::IndexSet;
2use opentelemetry::{StringValue, Value};
3use rmp::encode::{RmpWrite, ValueWriteError};
4use std::{
5 cell::RefCell,
6 hash::{BuildHasherDefault, Hash},
7};
8
9#[cfg(feature = "intern-ahash")]
10type InternHasher = ahash::AHasher;
11
12#[cfg(all(feature = "intern-std", not(feature = "intern-ahash")))]
13type InternHasher = std::collections::hash_map::DefaultHasher;
14
15#[derive(PartialEq)]
16pub(crate) enum InternValue<'a> {
17 RegularString(&'a str),
18 OpenTelemetryValue(&'a Value),
19}
20
21impl Hash for InternValue<'_> {
22 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
23 match &self {
24 InternValue::RegularString(s) => s.hash(state),
25 InternValue::OpenTelemetryValue(v) => match v {
26 Value::Bool(x) => x.hash(state),
27 Value::I64(x) => x.hash(state),
28 Value::String(x) => x.hash(state),
29 Value::F64(x) => x.to_bits().hash(state),
30 Value::Array(a) => match a {
31 opentelemetry::Array::Bool(x) => x.hash(state),
32 opentelemetry::Array::I64(x) => x.hash(state),
33 opentelemetry::Array::F64(floats) => {
34 for f in floats {
35 f.to_bits().hash(state);
36 }
37 }
38 opentelemetry::Array::String(x) => x.hash(state),
39 &_ => {}
40 },
41 &_ => {}
42 },
43 }
44 }
45}
46
47impl Eq for InternValue<'_> {}
48
49const BOOLEAN_TRUE: &str = "true";
50const BOOLEAN_FALSE: &str = "false";
51const LEFT_SQUARE_BRACKET: u8 = b'[';
52const RIGHT_SQUARE_BRACKET: u8 = b']';
53const COMMA: u8 = b',';
54const DOUBLE_QUOTE: u8 = b'"';
55const EMPTY_ARRAY: &str = "[]";
56
57trait WriteAsLiteral {
58 fn write_to(&self, buffer: &mut Vec<u8>);
59}
60
61impl WriteAsLiteral for bool {
62 fn write_to(&self, buffer: &mut Vec<u8>) {
63 buffer.extend_from_slice(if *self { BOOLEAN_TRUE } else { BOOLEAN_FALSE }.as_bytes());
64 }
65}
66
67impl WriteAsLiteral for i64 {
68 fn write_to(&self, buffer: &mut Vec<u8>) {
69 buffer.extend_from_slice(itoa::Buffer::new().format(*self).as_bytes());
70 }
71}
72
73impl WriteAsLiteral for f64 {
74 fn write_to(&self, buffer: &mut Vec<u8>) {
75 buffer.extend_from_slice(ryu::Buffer::new().format(*self).as_bytes());
76 }
77}
78
79impl WriteAsLiteral for StringValue {
80 fn write_to(&self, buffer: &mut Vec<u8>) {
81 buffer.push(DOUBLE_QUOTE);
82 buffer.extend_from_slice(self.as_str().as_bytes());
83 buffer.push(DOUBLE_QUOTE);
84 }
85}
86
87impl InternValue<'_> {
88 pub(crate) fn write_as_str<W: RmpWrite>(
89 &self,
90 payload: &mut W,
91 reusable_buffer: &mut Vec<u8>,
92 ) -> Result<(), ValueWriteError<W::Error>> {
93 match self {
94 InternValue::RegularString(x) => rmp::encode::write_str(payload, x),
95 InternValue::OpenTelemetryValue(v) => match v {
96 Value::Bool(x) => {
97 rmp::encode::write_str(payload, if *x { BOOLEAN_TRUE } else { BOOLEAN_FALSE })
98 }
99 Value::I64(x) => rmp::encode::write_str(payload, itoa::Buffer::new().format(*x)),
100 Value::F64(x) => rmp::encode::write_str(payload, ryu::Buffer::new().format(*x)),
101 Value::String(x) => rmp::encode::write_str(payload, x.as_ref()),
102 Value::Array(array) => match array {
103 opentelemetry::Array::Bool(x) => {
104 Self::write_generic_array(payload, reusable_buffer, x)
105 }
106 opentelemetry::Array::I64(x) => {
107 Self::write_generic_array(payload, reusable_buffer, x)
108 }
109 opentelemetry::Array::F64(x) => {
110 Self::write_generic_array(payload, reusable_buffer, x)
111 }
112 opentelemetry::Array::String(x) => {
113 Self::write_generic_array(payload, reusable_buffer, x)
114 }
115 _ => Self::write_empty_array(payload),
116 },
117 _ => Self::write_empty_array(payload),
118 },
119 }
120 }
121
122 fn write_empty_array<W: RmpWrite>(payload: &mut W) -> Result<(), ValueWriteError<W::Error>> {
123 rmp::encode::write_str(payload, EMPTY_ARRAY)
124 }
125
126 fn write_buffer_as_string<W: RmpWrite>(
127 payload: &mut W,
128 reusable_buffer: &[u8],
129 ) -> Result<(), ValueWriteError<W::Error>> {
130 rmp::encode::write_str_len(payload, reusable_buffer.len() as u32)?;
131 payload
132 .write_bytes(reusable_buffer)
133 .map_err(ValueWriteError::InvalidDataWrite)
134 }
135
136 fn write_generic_array<W: RmpWrite, T: WriteAsLiteral>(
137 payload: &mut W,
138 reusable_buffer: &mut Vec<u8>,
139 array: &[T],
140 ) -> Result<(), ValueWriteError<W::Error>> {
141 if array.is_empty() {
142 return Self::write_empty_array(payload);
143 }
144
145 reusable_buffer.clear();
146 reusable_buffer.push(LEFT_SQUARE_BRACKET);
147
148 array[0].write_to(reusable_buffer);
149
150 for value in array[1..].iter() {
151 reusable_buffer.push(COMMA);
152 value.write_to(reusable_buffer);
153 }
154
155 reusable_buffer.push(RIGHT_SQUARE_BRACKET);
156
157 Self::write_buffer_as_string(payload, reusable_buffer)
158 }
159}
160
161pub(crate) struct StringInterner<'a> {
162 data: IndexSet<InternValue<'a>, BuildHasherDefault<InternHasher>>,
163}
164
165impl<'a> StringInterner<'a> {
166 pub(crate) fn new() -> StringInterner<'a> {
167 StringInterner {
168 data: IndexSet::with_capacity_and_hasher(128, BuildHasherDefault::default()),
169 }
170 }
171
172 pub(crate) fn intern(&mut self, data: &'a str) -> u32 {
173 if let Some(idx) = self.data.get_index_of(&InternValue::RegularString(data)) {
174 return idx as u32;
175 }
176 self.data.insert_full(InternValue::RegularString(data)).0 as u32
177 }
178
179 pub(crate) fn intern_value(&mut self, data: &'a Value) -> u32 {
180 if let Some(idx) = self
181 .data
182 .get_index_of(&InternValue::OpenTelemetryValue(data))
183 {
184 return idx as u32;
185 }
186 self.data
187 .insert_full(InternValue::OpenTelemetryValue(data))
188 .0 as u32
189 }
190
191 pub(crate) fn write_dictionary<W: RmpWrite>(
192 &self,
193 payload: &mut W,
194 ) -> Result<(), ValueWriteError<W::Error>> {
195 thread_local! {
196 static BUFFER: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(4096));
197 }
198
199 BUFFER.with(|cell| {
200 let reusable_buffer = &mut cell.borrow_mut();
201 rmp::encode::write_array_len(payload, self.data.len() as u32)?;
202 for data in self.data.iter() {
203 data.write_as_str(payload, reusable_buffer)?;
204 }
205
206 Ok(())
207 })
208 }
209}
210
211#[cfg(test)]
212mod tests {
213 use opentelemetry::Array;
214
215 use super::*;
216
217 #[test]
218 fn test_intern() {
219 let a = "a".to_string();
220 let b = "b";
221 let c = "c";
222
223 let mut intern = StringInterner::new();
224 let a_idx = intern.intern(a.as_str());
225 let b_idx = intern.intern(b);
226 let c_idx = intern.intern(c);
227 let d_idx = intern.intern(a.as_str());
228 let e_idx = intern.intern(c);
229
230 assert_eq!(a_idx, 0);
231 assert_eq!(b_idx, 1);
232 assert_eq!(c_idx, 2);
233 assert_eq!(d_idx, a_idx);
234 assert_eq!(e_idx, c_idx);
235 }
236
237 #[test]
238 fn test_intern_bool() {
239 let a = Value::Bool(true);
240 let b = Value::Bool(false);
241 let c = "c";
242
243 let mut intern = StringInterner::new();
244 let a_idx = intern.intern_value(&a);
245 let b_idx = intern.intern_value(&b);
246 let c_idx = intern.intern(c);
247 let d_idx = intern.intern_value(&a);
248 let e_idx = intern.intern(c);
249
250 assert_eq!(a_idx, 0);
251 assert_eq!(b_idx, 1);
252 assert_eq!(c_idx, 2);
253 assert_eq!(d_idx, a_idx);
254 assert_eq!(e_idx, c_idx);
255 }
256
257 #[test]
258 fn test_intern_i64() {
259 let a = Value::I64(1234567890);
260 let b = Value::I64(-1234567890);
261 let c = "c";
262 let d = Value::I64(1234567890);
263
264 let mut intern = StringInterner::new();
265 let a_idx = intern.intern_value(&a);
266 let b_idx = intern.intern_value(&b);
267 let c_idx = intern.intern(c);
268 let d_idx = intern.intern_value(&a);
269 let e_idx = intern.intern(c);
270 let f_idx = intern.intern_value(&d);
271
272 assert_eq!(a_idx, 0);
273 assert_eq!(b_idx, 1);
274 assert_eq!(c_idx, 2);
275 assert_eq!(d_idx, a_idx);
276 assert_eq!(e_idx, c_idx);
277 assert_eq!(f_idx, a_idx);
278 }
279
280 #[test]
281 fn test_intern_f64() {
282 let a = Value::F64(123456.7890);
283 let b = Value::F64(-1234567.890);
284 let c = "c";
285 let d = Value::F64(-1234567.890);
286
287 let mut intern = StringInterner::new();
288 let a_idx = intern.intern_value(&a);
289 let b_idx = intern.intern_value(&b);
290 let c_idx = intern.intern(c);
291 let d_idx = intern.intern_value(&a);
292 let e_idx = intern.intern(c);
293 let f_idx = intern.intern_value(&d);
294
295 assert_eq!(a_idx, 0);
296 assert_eq!(b_idx, 1);
297 assert_eq!(c_idx, 2);
298 assert_eq!(d_idx, a_idx);
299 assert_eq!(e_idx, c_idx);
300 assert_eq!(b_idx, f_idx);
301 }
302
303 #[test]
304 fn test_intern_array_of_booleans() {
305 let a = Value::Array(Array::Bool(vec![true, false]));
306 let b = Value::Array(Array::Bool(vec![false, true]));
307 let c = "c";
308 let d = Value::Array(Array::Bool(vec![]));
309 let f = Value::Array(Array::Bool(vec![false, true]));
310
311 let mut intern = StringInterner::new();
312 let a_idx = intern.intern_value(&a);
313 let b_idx = intern.intern_value(&b);
314 let c_idx = intern.intern(c);
315 let d_idx = intern.intern_value(&a);
316 let e_idx = intern.intern(c);
317 let f_idx = intern.intern_value(&d);
318 let g_idx = intern.intern_value(&f);
319
320 assert_eq!(a_idx, 0);
321 assert_eq!(b_idx, 1);
322 assert_eq!(c_idx, 2);
323 assert_eq!(d_idx, a_idx);
324 assert_eq!(e_idx, c_idx);
325 assert_eq!(f_idx, 3);
326 assert_eq!(g_idx, b_idx);
327 }
328
329 #[test]
330 fn test_intern_array_of_i64() {
331 let a = Value::Array(Array::I64(vec![123, -123]));
332 let b = Value::Array(Array::I64(vec![-123, 123]));
333 let c = "c";
334 let d = Value::Array(Array::I64(vec![]));
335 let f = Value::Array(Array::I64(vec![-123, 123]));
336
337 let mut intern = StringInterner::new();
338 let a_idx = intern.intern_value(&a);
339 let b_idx = intern.intern_value(&b);
340 let c_idx = intern.intern(c);
341 let d_idx = intern.intern_value(&a);
342 let e_idx = intern.intern(c);
343 let f_idx = intern.intern_value(&d);
344 let g_idx = intern.intern_value(&f);
345
346 assert_eq!(a_idx, 0);
347 assert_eq!(b_idx, 1);
348 assert_eq!(c_idx, 2);
349 assert_eq!(d_idx, a_idx);
350 assert_eq!(e_idx, c_idx);
351 assert_eq!(f_idx, 3);
352 assert_eq!(g_idx, b_idx);
353 }
354
355 #[test]
356 fn test_intern_array_of_f64() {
357 let f1 = 123.0f64;
358 let f2 = 0f64;
359
360 let a = Value::Array(Array::F64(vec![f1, f2]));
361 let b = Value::Array(Array::F64(vec![f2, f1]));
362 let c = "c";
363 let d = Value::Array(Array::F64(vec![]));
364 let f = Value::Array(Array::F64(vec![f2, f1]));
365
366 let mut intern = StringInterner::new();
367 let a_idx = intern.intern_value(&a);
368 let b_idx = intern.intern_value(&b);
369 let c_idx = intern.intern(c);
370 let d_idx = intern.intern_value(&a);
371 let e_idx = intern.intern(c);
372 let f_idx = intern.intern_value(&d);
373 let g_idx = intern.intern_value(&f);
374
375 assert_eq!(a_idx, 0);
376 assert_eq!(b_idx, 1);
377 assert_eq!(c_idx, 2);
378 assert_eq!(d_idx, a_idx);
379 assert_eq!(e_idx, c_idx);
380 assert_eq!(f_idx, 3);
381 assert_eq!(g_idx, b_idx);
382 }
383
384 #[test]
385 fn test_intern_array_of_string() {
386 let s1 = "a";
387 let s2 = "b";
388
389 let a = Value::Array(Array::String(vec![
390 StringValue::from(s1),
391 StringValue::from(s2),
392 ]));
393 let b = Value::Array(Array::String(vec![
394 StringValue::from(s2),
395 StringValue::from(s1),
396 ]));
397 let c = "c";
398 let d = Value::Array(Array::String(vec![]));
399 let f = Value::Array(Array::String(vec![
400 StringValue::from(s2),
401 StringValue::from(s1),
402 ]));
403
404 let mut intern = StringInterner::new();
405 let a_idx = intern.intern_value(&a);
406 let b_idx = intern.intern_value(&b);
407 let c_idx = intern.intern(c);
408 let d_idx = intern.intern_value(&a);
409 let e_idx = intern.intern(c);
410 let f_idx = intern.intern_value(&d);
411 let g_idx = intern.intern_value(&f);
412
413 assert_eq!(a_idx, 0);
414 assert_eq!(b_idx, 1);
415 assert_eq!(c_idx, 2);
416 assert_eq!(d_idx, a_idx);
417 assert_eq!(e_idx, c_idx);
418 assert_eq!(f_idx, 3);
419 assert_eq!(g_idx, b_idx);
420 }
421
422 #[test]
423 fn test_write_boolean_literal() {
424 let mut buffer: Vec<u8> = vec![];
425
426 true.write_to(&mut buffer);
427
428 assert_eq!(&buffer[..], b"true");
429
430 buffer.clear();
431
432 false.write_to(&mut buffer);
433
434 assert_eq!(&buffer[..], b"false");
435 }
436
437 #[test]
438 fn test_write_i64_literal() {
439 let mut buffer: Vec<u8> = vec![];
440
441 1234567890i64.write_to(&mut buffer);
442
443 assert_eq!(&buffer[..], b"1234567890");
444
445 buffer.clear();
446
447 (-1234567890i64).write_to(&mut buffer);
448
449 assert_eq!(&buffer[..], b"-1234567890");
450 }
451
452 #[test]
453 fn test_write_f64_literal() {
454 let mut buffer: Vec<u8> = vec![];
455
456 let f1 = 12345.678f64;
457 let f2 = -12345.678f64;
458
459 f1.write_to(&mut buffer);
460
461 assert_eq!(&buffer[..], format!("{f1}").as_bytes());
462
463 buffer.clear();
464
465 f2.write_to(&mut buffer);
466
467 assert_eq!(&buffer[..], format!("{f2}").as_bytes());
468 }
469
470 #[test]
471 fn test_write_string_literal() {
472 let mut buffer: Vec<u8> = vec![];
473
474 let s1 = StringValue::from("abc");
475 let s2 = StringValue::from("");
476
477 s1.write_to(&mut buffer);
478
479 assert_eq!(&buffer[..], format!("\"{s1}\"").as_bytes());
480
481 buffer.clear();
482
483 s2.write_to(&mut buffer);
484
485 assert_eq!(&buffer[..], format!("\"{s2}\"").as_bytes());
486 }
487
488 fn test_encoding_intern_value(value: InternValue<'_>) {
489 let mut expected: Vec<u8> = vec![];
490 let mut actual: Vec<u8> = vec![];
491
492 let mut buffer = vec![];
493
494 value.write_as_str(&mut actual, &mut buffer).unwrap();
495
496 let InternValue::OpenTelemetryValue(value) = value else {
497 return;
498 };
499
500 rmp::encode::write_str(&mut expected, value.as_str().as_ref()).unwrap();
501
502 assert_eq!(expected, actual);
503 }
504
505 #[test]
506 fn test_encode_boolean() {
507 test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::Bool(true)));
508 test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::Bool(false)));
509 }
510
511 #[test]
512 fn test_encode_i64() {
513 test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::I64(123)));
514 test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::I64(0)));
515 test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::I64(-123)));
516 }
517
518 #[test]
519 fn test_encode_f64() {
520 test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::F64(123.456f64)));
521 test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::F64(-123.456f64)));
522 }
523}