opentelemetry_datadog/exporter/
intern.rs

1use indexmap::set::IndexSet;
2use opentelemetry::{StringValue, Value};
3use rmp::encode::{RmpWrite, ValueWriteError};
4use std::{
5    cell::RefCell,
6    hash::{BuildHasherDefault, Hash},
7};
8
9#[cfg(feature = "intern-ahash")]
10type InternHasher = ahash::AHasher;
11
12#[cfg(all(feature = "intern-std", not(feature = "intern-ahash")))]
13type InternHasher = std::collections::hash_map::DefaultHasher;
14
15#[derive(PartialEq)]
16pub(crate) enum InternValue<'a> {
17    RegularString(&'a str),
18    OpenTelemetryValue(&'a Value),
19}
20
21impl Hash for InternValue<'_> {
22    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
23        match &self {
24            InternValue::RegularString(s) => s.hash(state),
25            InternValue::OpenTelemetryValue(v) => match v {
26                Value::Bool(x) => x.hash(state),
27                Value::I64(x) => x.hash(state),
28                Value::String(x) => x.hash(state),
29                Value::F64(x) => x.to_bits().hash(state),
30                Value::Array(a) => match a {
31                    opentelemetry::Array::Bool(x) => x.hash(state),
32                    opentelemetry::Array::I64(x) => x.hash(state),
33                    opentelemetry::Array::F64(floats) => {
34                        for f in floats {
35                            f.to_bits().hash(state);
36                        }
37                    }
38                    opentelemetry::Array::String(x) => x.hash(state),
39                    &_ => {}
40                },
41                &_ => {}
42            },
43        }
44    }
45}
46
47impl Eq for InternValue<'_> {}
48
49const BOOLEAN_TRUE: &str = "true";
50const BOOLEAN_FALSE: &str = "false";
51const LEFT_SQUARE_BRACKET: u8 = b'[';
52const RIGHT_SQUARE_BRACKET: u8 = b']';
53const COMMA: u8 = b',';
54const DOUBLE_QUOTE: u8 = b'"';
55const EMPTY_ARRAY: &str = "[]";
56
57trait WriteAsLiteral {
58    fn write_to(&self, buffer: &mut Vec<u8>);
59}
60
61impl WriteAsLiteral for bool {
62    fn write_to(&self, buffer: &mut Vec<u8>) {
63        buffer.extend_from_slice(if *self { BOOLEAN_TRUE } else { BOOLEAN_FALSE }.as_bytes());
64    }
65}
66
67impl WriteAsLiteral for i64 {
68    fn write_to(&self, buffer: &mut Vec<u8>) {
69        buffer.extend_from_slice(itoa::Buffer::new().format(*self).as_bytes());
70    }
71}
72
73impl WriteAsLiteral for f64 {
74    fn write_to(&self, buffer: &mut Vec<u8>) {
75        buffer.extend_from_slice(ryu::Buffer::new().format(*self).as_bytes());
76    }
77}
78
79impl WriteAsLiteral for StringValue {
80    fn write_to(&self, buffer: &mut Vec<u8>) {
81        buffer.push(DOUBLE_QUOTE);
82        buffer.extend_from_slice(self.as_str().as_bytes());
83        buffer.push(DOUBLE_QUOTE);
84    }
85}
86
87impl InternValue<'_> {
88    pub(crate) fn write_as_str<W: RmpWrite>(
89        &self,
90        payload: &mut W,
91        reusable_buffer: &mut Vec<u8>,
92    ) -> Result<(), ValueWriteError<W::Error>> {
93        match self {
94            InternValue::RegularString(x) => rmp::encode::write_str(payload, x),
95            InternValue::OpenTelemetryValue(v) => match v {
96                Value::Bool(x) => {
97                    rmp::encode::write_str(payload, if *x { BOOLEAN_TRUE } else { BOOLEAN_FALSE })
98                }
99                Value::I64(x) => rmp::encode::write_str(payload, itoa::Buffer::new().format(*x)),
100                Value::F64(x) => rmp::encode::write_str(payload, ryu::Buffer::new().format(*x)),
101                Value::String(x) => rmp::encode::write_str(payload, x.as_ref()),
102                Value::Array(array) => match array {
103                    opentelemetry::Array::Bool(x) => {
104                        Self::write_generic_array(payload, reusable_buffer, x)
105                    }
106                    opentelemetry::Array::I64(x) => {
107                        Self::write_generic_array(payload, reusable_buffer, x)
108                    }
109                    opentelemetry::Array::F64(x) => {
110                        Self::write_generic_array(payload, reusable_buffer, x)
111                    }
112                    opentelemetry::Array::String(x) => {
113                        Self::write_generic_array(payload, reusable_buffer, x)
114                    }
115                    _ => Self::write_empty_array(payload),
116                },
117                _ => Self::write_empty_array(payload),
118            },
119        }
120    }
121
122    fn write_empty_array<W: RmpWrite>(payload: &mut W) -> Result<(), ValueWriteError<W::Error>> {
123        rmp::encode::write_str(payload, EMPTY_ARRAY)
124    }
125
126    fn write_buffer_as_string<W: RmpWrite>(
127        payload: &mut W,
128        reusable_buffer: &[u8],
129    ) -> Result<(), ValueWriteError<W::Error>> {
130        rmp::encode::write_str_len(payload, reusable_buffer.len() as u32)?;
131        payload
132            .write_bytes(reusable_buffer)
133            .map_err(ValueWriteError::InvalidDataWrite)
134    }
135
136    fn write_generic_array<W: RmpWrite, T: WriteAsLiteral>(
137        payload: &mut W,
138        reusable_buffer: &mut Vec<u8>,
139        array: &[T],
140    ) -> Result<(), ValueWriteError<W::Error>> {
141        if array.is_empty() {
142            return Self::write_empty_array(payload);
143        }
144
145        reusable_buffer.clear();
146        reusable_buffer.push(LEFT_SQUARE_BRACKET);
147
148        array[0].write_to(reusable_buffer);
149
150        for value in array[1..].iter() {
151            reusable_buffer.push(COMMA);
152            value.write_to(reusable_buffer);
153        }
154
155        reusable_buffer.push(RIGHT_SQUARE_BRACKET);
156
157        Self::write_buffer_as_string(payload, reusable_buffer)
158    }
159}
160
161pub(crate) struct StringInterner<'a> {
162    data: IndexSet<InternValue<'a>, BuildHasherDefault<InternHasher>>,
163}
164
165impl<'a> StringInterner<'a> {
166    pub(crate) fn new() -> StringInterner<'a> {
167        StringInterner {
168            data: IndexSet::with_capacity_and_hasher(128, BuildHasherDefault::default()),
169        }
170    }
171
172    pub(crate) fn intern(&mut self, data: &'a str) -> u32 {
173        if let Some(idx) = self.data.get_index_of(&InternValue::RegularString(data)) {
174            return idx as u32;
175        }
176        self.data.insert_full(InternValue::RegularString(data)).0 as u32
177    }
178
179    pub(crate) fn intern_value(&mut self, data: &'a Value) -> u32 {
180        if let Some(idx) = self
181            .data
182            .get_index_of(&InternValue::OpenTelemetryValue(data))
183        {
184            return idx as u32;
185        }
186        self.data
187            .insert_full(InternValue::OpenTelemetryValue(data))
188            .0 as u32
189    }
190
191    pub(crate) fn write_dictionary<W: RmpWrite>(
192        &self,
193        payload: &mut W,
194    ) -> Result<(), ValueWriteError<W::Error>> {
195        thread_local! {
196            static BUFFER: RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(4096));
197        }
198
199        BUFFER.with(|cell| {
200            let reusable_buffer = &mut cell.borrow_mut();
201            rmp::encode::write_array_len(payload, self.data.len() as u32)?;
202            for data in self.data.iter() {
203                data.write_as_str(payload, reusable_buffer)?;
204            }
205
206            Ok(())
207        })
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use opentelemetry::Array;
214
215    use super::*;
216
217    #[test]
218    fn test_intern() {
219        let a = "a".to_string();
220        let b = "b";
221        let c = "c";
222
223        let mut intern = StringInterner::new();
224        let a_idx = intern.intern(a.as_str());
225        let b_idx = intern.intern(b);
226        let c_idx = intern.intern(c);
227        let d_idx = intern.intern(a.as_str());
228        let e_idx = intern.intern(c);
229
230        assert_eq!(a_idx, 0);
231        assert_eq!(b_idx, 1);
232        assert_eq!(c_idx, 2);
233        assert_eq!(d_idx, a_idx);
234        assert_eq!(e_idx, c_idx);
235    }
236
237    #[test]
238    fn test_intern_bool() {
239        let a = Value::Bool(true);
240        let b = Value::Bool(false);
241        let c = "c";
242
243        let mut intern = StringInterner::new();
244        let a_idx = intern.intern_value(&a);
245        let b_idx = intern.intern_value(&b);
246        let c_idx = intern.intern(c);
247        let d_idx = intern.intern_value(&a);
248        let e_idx = intern.intern(c);
249
250        assert_eq!(a_idx, 0);
251        assert_eq!(b_idx, 1);
252        assert_eq!(c_idx, 2);
253        assert_eq!(d_idx, a_idx);
254        assert_eq!(e_idx, c_idx);
255    }
256
257    #[test]
258    fn test_intern_i64() {
259        let a = Value::I64(1234567890);
260        let b = Value::I64(-1234567890);
261        let c = "c";
262        let d = Value::I64(1234567890);
263
264        let mut intern = StringInterner::new();
265        let a_idx = intern.intern_value(&a);
266        let b_idx = intern.intern_value(&b);
267        let c_idx = intern.intern(c);
268        let d_idx = intern.intern_value(&a);
269        let e_idx = intern.intern(c);
270        let f_idx = intern.intern_value(&d);
271
272        assert_eq!(a_idx, 0);
273        assert_eq!(b_idx, 1);
274        assert_eq!(c_idx, 2);
275        assert_eq!(d_idx, a_idx);
276        assert_eq!(e_idx, c_idx);
277        assert_eq!(f_idx, a_idx);
278    }
279
280    #[test]
281    fn test_intern_f64() {
282        let a = Value::F64(123456.7890);
283        let b = Value::F64(-1234567.890);
284        let c = "c";
285        let d = Value::F64(-1234567.890);
286
287        let mut intern = StringInterner::new();
288        let a_idx = intern.intern_value(&a);
289        let b_idx = intern.intern_value(&b);
290        let c_idx = intern.intern(c);
291        let d_idx = intern.intern_value(&a);
292        let e_idx = intern.intern(c);
293        let f_idx = intern.intern_value(&d);
294
295        assert_eq!(a_idx, 0);
296        assert_eq!(b_idx, 1);
297        assert_eq!(c_idx, 2);
298        assert_eq!(d_idx, a_idx);
299        assert_eq!(e_idx, c_idx);
300        assert_eq!(b_idx, f_idx);
301    }
302
303    #[test]
304    fn test_intern_array_of_booleans() {
305        let a = Value::Array(Array::Bool(vec![true, false]));
306        let b = Value::Array(Array::Bool(vec![false, true]));
307        let c = "c";
308        let d = Value::Array(Array::Bool(vec![]));
309        let f = Value::Array(Array::Bool(vec![false, true]));
310
311        let mut intern = StringInterner::new();
312        let a_idx = intern.intern_value(&a);
313        let b_idx = intern.intern_value(&b);
314        let c_idx = intern.intern(c);
315        let d_idx = intern.intern_value(&a);
316        let e_idx = intern.intern(c);
317        let f_idx = intern.intern_value(&d);
318        let g_idx = intern.intern_value(&f);
319
320        assert_eq!(a_idx, 0);
321        assert_eq!(b_idx, 1);
322        assert_eq!(c_idx, 2);
323        assert_eq!(d_idx, a_idx);
324        assert_eq!(e_idx, c_idx);
325        assert_eq!(f_idx, 3);
326        assert_eq!(g_idx, b_idx);
327    }
328
329    #[test]
330    fn test_intern_array_of_i64() {
331        let a = Value::Array(Array::I64(vec![123, -123]));
332        let b = Value::Array(Array::I64(vec![-123, 123]));
333        let c = "c";
334        let d = Value::Array(Array::I64(vec![]));
335        let f = Value::Array(Array::I64(vec![-123, 123]));
336
337        let mut intern = StringInterner::new();
338        let a_idx = intern.intern_value(&a);
339        let b_idx = intern.intern_value(&b);
340        let c_idx = intern.intern(c);
341        let d_idx = intern.intern_value(&a);
342        let e_idx = intern.intern(c);
343        let f_idx = intern.intern_value(&d);
344        let g_idx = intern.intern_value(&f);
345
346        assert_eq!(a_idx, 0);
347        assert_eq!(b_idx, 1);
348        assert_eq!(c_idx, 2);
349        assert_eq!(d_idx, a_idx);
350        assert_eq!(e_idx, c_idx);
351        assert_eq!(f_idx, 3);
352        assert_eq!(g_idx, b_idx);
353    }
354
355    #[test]
356    fn test_intern_array_of_f64() {
357        let f1 = 123.0f64;
358        let f2 = 0f64;
359
360        let a = Value::Array(Array::F64(vec![f1, f2]));
361        let b = Value::Array(Array::F64(vec![f2, f1]));
362        let c = "c";
363        let d = Value::Array(Array::F64(vec![]));
364        let f = Value::Array(Array::F64(vec![f2, f1]));
365
366        let mut intern = StringInterner::new();
367        let a_idx = intern.intern_value(&a);
368        let b_idx = intern.intern_value(&b);
369        let c_idx = intern.intern(c);
370        let d_idx = intern.intern_value(&a);
371        let e_idx = intern.intern(c);
372        let f_idx = intern.intern_value(&d);
373        let g_idx = intern.intern_value(&f);
374
375        assert_eq!(a_idx, 0);
376        assert_eq!(b_idx, 1);
377        assert_eq!(c_idx, 2);
378        assert_eq!(d_idx, a_idx);
379        assert_eq!(e_idx, c_idx);
380        assert_eq!(f_idx, 3);
381        assert_eq!(g_idx, b_idx);
382    }
383
384    #[test]
385    fn test_intern_array_of_string() {
386        let s1 = "a";
387        let s2 = "b";
388
389        let a = Value::Array(Array::String(vec![
390            StringValue::from(s1),
391            StringValue::from(s2),
392        ]));
393        let b = Value::Array(Array::String(vec![
394            StringValue::from(s2),
395            StringValue::from(s1),
396        ]));
397        let c = "c";
398        let d = Value::Array(Array::String(vec![]));
399        let f = Value::Array(Array::String(vec![
400            StringValue::from(s2),
401            StringValue::from(s1),
402        ]));
403
404        let mut intern = StringInterner::new();
405        let a_idx = intern.intern_value(&a);
406        let b_idx = intern.intern_value(&b);
407        let c_idx = intern.intern(c);
408        let d_idx = intern.intern_value(&a);
409        let e_idx = intern.intern(c);
410        let f_idx = intern.intern_value(&d);
411        let g_idx = intern.intern_value(&f);
412
413        assert_eq!(a_idx, 0);
414        assert_eq!(b_idx, 1);
415        assert_eq!(c_idx, 2);
416        assert_eq!(d_idx, a_idx);
417        assert_eq!(e_idx, c_idx);
418        assert_eq!(f_idx, 3);
419        assert_eq!(g_idx, b_idx);
420    }
421
422    #[test]
423    fn test_write_boolean_literal() {
424        let mut buffer: Vec<u8> = vec![];
425
426        true.write_to(&mut buffer);
427
428        assert_eq!(&buffer[..], b"true");
429
430        buffer.clear();
431
432        false.write_to(&mut buffer);
433
434        assert_eq!(&buffer[..], b"false");
435    }
436
437    #[test]
438    fn test_write_i64_literal() {
439        let mut buffer: Vec<u8> = vec![];
440
441        1234567890i64.write_to(&mut buffer);
442
443        assert_eq!(&buffer[..], b"1234567890");
444
445        buffer.clear();
446
447        (-1234567890i64).write_to(&mut buffer);
448
449        assert_eq!(&buffer[..], b"-1234567890");
450    }
451
452    #[test]
453    fn test_write_f64_literal() {
454        let mut buffer: Vec<u8> = vec![];
455
456        let f1 = 12345.678f64;
457        let f2 = -12345.678f64;
458
459        f1.write_to(&mut buffer);
460
461        assert_eq!(&buffer[..], format!("{f1}").as_bytes());
462
463        buffer.clear();
464
465        f2.write_to(&mut buffer);
466
467        assert_eq!(&buffer[..], format!("{f2}").as_bytes());
468    }
469
470    #[test]
471    fn test_write_string_literal() {
472        let mut buffer: Vec<u8> = vec![];
473
474        let s1 = StringValue::from("abc");
475        let s2 = StringValue::from("");
476
477        s1.write_to(&mut buffer);
478
479        assert_eq!(&buffer[..], format!("\"{s1}\"").as_bytes());
480
481        buffer.clear();
482
483        s2.write_to(&mut buffer);
484
485        assert_eq!(&buffer[..], format!("\"{s2}\"").as_bytes());
486    }
487
488    fn test_encoding_intern_value(value: InternValue<'_>) {
489        let mut expected: Vec<u8> = vec![];
490        let mut actual: Vec<u8> = vec![];
491
492        let mut buffer = vec![];
493
494        value.write_as_str(&mut actual, &mut buffer).unwrap();
495
496        let InternValue::OpenTelemetryValue(value) = value else {
497            return;
498        };
499
500        rmp::encode::write_str(&mut expected, value.as_str().as_ref()).unwrap();
501
502        assert_eq!(expected, actual);
503    }
504
505    #[test]
506    fn test_encode_boolean() {
507        test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::Bool(true)));
508        test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::Bool(false)));
509    }
510
511    #[test]
512    fn test_encode_i64() {
513        test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::I64(123)));
514        test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::I64(0)));
515        test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::I64(-123)));
516    }
517
518    #[test]
519    fn test_encode_f64() {
520        test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::F64(123.456f64)));
521        test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::F64(-123.456f64)));
522    }
523}