Skip to main content

libdd_trace_normalization/
normalizer.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::normalize_utils;
5use libdd_trace_protobuf::pb;
6
7const TAG_SAMPLING_PRIORITY: &str = "_sampling_priority_v1";
8const TAG_ORIGIN: &str = "_dd.origin";
9
10#[allow(dead_code)]
11#[derive(Debug, Eq, PartialEq)]
12pub enum SamplerPriority {
13    AutoDrop = 0,
14    AutoKeep = 1,
15    UserKeep = 2,
16    None = i8::MIN as isize,
17}
18
19pub(crate) fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> {
20    anyhow::ensure!(s.trace_id != 0, "TraceID is zero (reason:trace_id_zero)");
21    anyhow::ensure!(s.span_id != 0, "SpanID is zero (reason:span_id_zero)");
22
23    // TODO: component2name: check for a feature flag to determine the component tag to become the
24    // span name https://github.com/DataDog/datadog-agent/blob/dc88d14851354cada1d15265220a39dce8840dcc/pkg/trace/agent/normalizer.go#L64
25
26    normalize_utils::normalize_service(&mut s.service);
27    normalize_utils::normalize_name(&mut s.name);
28    normalize_utils::normalize_resource(&mut s.resource, &s.name);
29    normalize_utils::normalize_parent_id(&mut s.parent_id, s.trace_id, s.span_id);
30    normalize_utils::normalize_span_start_duration(&mut s.start, &mut s.duration);
31    normalize_utils::normalize_span_type(&mut s.r#type);
32
33    if let Some(env_tag) = s.meta.get_mut("env") {
34        normalize_utils::normalize_tag(env_tag);
35    }
36
37    if let Some(code) = s.meta.get("http.status_code") {
38        if !is_valid_status_code(code) {
39            s.meta.remove("http.status_code");
40        }
41    };
42
43    Ok(())
44}
45
46pub(crate) fn is_valid_status_code(sc: &str) -> bool {
47    if let Ok(code) = sc.parse::<i64>() {
48        return (100..600).contains(&code);
49    }
50    false
51}
52
53/// normalize_trace takes a trace and
54/// * returns an error if there is a trace ID discrepancy between 2 spans
55/// * returns an error if at least one span cannot be normalized
56pub fn normalize_trace(trace: &mut [pb::Span]) -> anyhow::Result<()> {
57    let first_trace_id = match trace.first() {
58        Some(first_span) => first_span.trace_id,
59        None => anyhow::bail!("Normalize Trace Error: Trace is empty"),
60    };
61
62    for span in trace {
63        if span.trace_id != first_trace_id {
64            anyhow::bail!(format!(
65                "Normalize Trace Error: Trace has foreign span: {:?}",
66                span
67            ));
68        }
69        normalize_span(span)?;
70    }
71    Ok(())
72}
73
74/// normalize_chunk takes a trace chunk and
75/// * populates origin field if it wasn't populated
76/// * populates priority field if it wasn't populated the root span is used to populate these
77///   fields, and it's index in TraceChunk spans vec must be passed.
78pub fn normalize_chunk(chunk: &mut pb::TraceChunk, root_span_index: usize) -> anyhow::Result<()> {
79    // check if priority is not populated
80    let root_span = match chunk.spans.get(root_span_index) {
81        Some(span) => span,
82        None => {
83            anyhow::bail!("Normalize Chunk Error: root_span_index > length of trace chunk spans")
84        }
85    };
86
87    if chunk.priority == SamplerPriority::None as i32 {
88        // Older tracers set sampling priority in the root span.
89        if let Some(root_span_priority) = root_span.metrics.get(TAG_SAMPLING_PRIORITY) {
90            chunk.priority = *root_span_priority as i32;
91        } else {
92            for span in &chunk.spans {
93                if let Some(priority) = span.metrics.get(TAG_SAMPLING_PRIORITY) {
94                    chunk.priority = *priority as i32;
95                    break;
96                }
97            }
98        }
99    }
100    // check if origin is not populated
101    if chunk.origin.is_empty() {
102        if let Some(origin) = root_span.meta.get(TAG_ORIGIN) {
103            // Older tracers set origin in the root span.
104            chunk.origin = origin.to_string();
105        }
106    }
107    Ok(())
108}
109
110#[cfg(test)]
111mod tests {
112    use crate::normalize_utils;
113    use crate::normalize_utils::{DEFAULT_SPAN_NAME, MAX_TYPE_LEN};
114    use crate::normalizer;
115    use libdd_trace_protobuf::pb;
116    use rand::Rng;
117    use std::collections::HashMap;
118    use std::time::SystemTime;
119
120    fn new_test_span() -> pb::Span {
121        let mut rng = rand::thread_rng();
122
123        pb::Span {
124            duration: 10000000,
125            error: 0,
126            resource: "GET /some/raclette".to_string(),
127            service: "django".to_string(),
128            name: "django.controller".to_string(),
129            span_id: rng.gen(),
130            start: 1448466874000000000,
131            trace_id: 424242,
132            meta: HashMap::from([
133                ("user".to_string(), "leo".to_string()),
134                ("pool".to_string(), "fondue".to_string()),
135            ]),
136            metrics: HashMap::from([("cheese_weight".to_string(), 100000.0)]),
137            parent_id: 1111,
138            r#type: "http".to_string(),
139            meta_struct: HashMap::new(),
140            span_links: vec![],
141            span_events: vec![],
142        }
143    }
144
145    fn new_test_chunk_with_span(span: pb::Span) -> pb::TraceChunk {
146        pb::TraceChunk {
147            priority: 1,
148            origin: "".to_string(),
149            spans: vec![span],
150            tags: HashMap::new(),
151            dropped_trace: false,
152        }
153    }
154
155    #[test]
156    fn test_normalize_name_passes() {
157        let mut test_span = new_test_span();
158        let before_name = test_span.name.clone();
159        assert!(normalizer::normalize_span(&mut test_span).is_ok());
160        assert_eq!(before_name, test_span.name);
161    }
162
163    #[test]
164    fn test_normalize_empty_name() {
165        let mut test_span = new_test_span();
166        test_span.name = "".to_string();
167        assert!(normalizer::normalize_span(&mut test_span).is_ok());
168        assert_eq!(test_span.name, DEFAULT_SPAN_NAME);
169    }
170
171    #[test]
172    fn test_normalize_long_name() {
173        let mut test_span = new_test_span();
174        test_span.name = "CAMEMBERT".repeat(100);
175        assert!(normalizer::normalize_span(&mut test_span).is_ok());
176        assert!(test_span.name.len() == normalize_utils::MAX_NAME_LEN);
177    }
178
179    #[test]
180    fn test_normalize_name_no_alphanumeric() {
181        let mut test_span = new_test_span();
182        test_span.name = "/".to_string();
183        assert!(normalizer::normalize_span(&mut test_span).is_ok());
184        assert_eq!(test_span.name, DEFAULT_SPAN_NAME);
185    }
186
187    #[test]
188    fn test_normalize_name_for_metrics() {
189        let expected_names = HashMap::from([
190            (
191                "pylons.controller".to_string(),
192                "pylons.controller".to_string(),
193            ),
194            (
195                "trace-api.request".to_string(),
196                "trace_api.request".to_string(),
197            ),
198        ]);
199
200        let mut test_span = new_test_span();
201        for (name, expected_name) in expected_names {
202            test_span.name = name;
203            assert!(normalizer::normalize_span(&mut test_span).is_ok());
204            assert_eq!(test_span.name, expected_name);
205        }
206    }
207
208    #[test]
209    fn test_normalize_resource_passes() {
210        let mut test_span = new_test_span();
211        let before_resource = test_span.resource.clone();
212        assert!(normalizer::normalize_span(&mut test_span).is_ok());
213        assert_eq!(before_resource, test_span.resource);
214    }
215
216    #[test]
217    fn test_normalize_empty_resource() {
218        let mut test_span = new_test_span();
219        test_span.resource = "".to_string();
220        assert!(normalizer::normalize_span(&mut test_span).is_ok());
221        assert_eq!(test_span.resource, test_span.name);
222    }
223
224    #[test]
225    fn test_normalize_trace_id_passes() {
226        let mut test_span = new_test_span();
227        let before_trace_id = test_span.trace_id;
228        assert!(normalizer::normalize_span(&mut test_span).is_ok());
229        assert_eq!(before_trace_id, test_span.trace_id);
230    }
231
232    #[test]
233    fn test_normalize_no_trace_id() {
234        let mut test_span = new_test_span();
235        test_span.trace_id = 0;
236        assert!(normalizer::normalize_span(&mut test_span).is_err());
237    }
238
239    #[test]
240    fn test_normalize_component_to_name() {
241        let mut test_span = new_test_span();
242        let before_trace_id = test_span.trace_id;
243        assert!(normalizer::normalize_span(&mut test_span).is_ok());
244        assert_eq!(before_trace_id, test_span.trace_id);
245    }
246
247    // TODO: Add a unit test for testing Component2Name, one that is
248    //       implemented within the normalize function.
249
250    #[test]
251    fn test_normalize_span_id_passes() {
252        let mut test_span = new_test_span();
253        let before_span_id = test_span.span_id;
254        assert!(normalizer::normalize_span(&mut test_span).is_ok());
255        assert_eq!(before_span_id, test_span.span_id);
256    }
257
258    #[test]
259    fn test_normalize_no_span_id() {
260        let mut test_span = new_test_span();
261        test_span.span_id = 0;
262        assert!(normalizer::normalize_span(&mut test_span).is_err());
263    }
264
265    #[test]
266    fn test_normalize_start_passes() {
267        let mut test_span = new_test_span();
268        let before_start = test_span.start;
269        assert!(normalizer::normalize_span(&mut test_span).is_ok());
270        assert_eq!(before_start, test_span.start);
271    }
272
273    fn get_current_time() -> i64 {
274        SystemTime::UNIX_EPOCH.elapsed().unwrap().as_nanos() as i64
275    }
276
277    #[test]
278    fn test_normalize_start_too_small() {
279        let mut test_span = new_test_span();
280
281        test_span.start = 42;
282        let min_start = get_current_time() - test_span.duration;
283
284        assert!(normalizer::normalize_span(&mut test_span).is_ok());
285        assert!(test_span.start >= min_start);
286        assert!(test_span.start <= get_current_time());
287    }
288
289    #[test]
290    fn test_normalize_start_too_small_with_large_duration() {
291        let mut test_span = new_test_span();
292
293        test_span.start = 42;
294        test_span.duration = get_current_time() * 2;
295        let min_start = get_current_time();
296
297        assert!(normalizer::normalize_span(&mut test_span).is_ok());
298        assert!(test_span.start >= min_start); // start should have been reset to current time
299        assert!(test_span.start <= get_current_time()); //start should have been reset to current
300                                                        // time
301    }
302
303    #[test]
304    fn test_normalize_duration_passes() {
305        let mut test_span = new_test_span();
306        let before_duration = test_span.duration;
307
308        assert!(normalizer::normalize_span(&mut test_span).is_ok());
309        assert_eq!(before_duration, test_span.duration);
310    }
311
312    #[test]
313    fn test_normalize_empty_duration() {
314        let mut test_span = new_test_span();
315        test_span.duration = 0;
316
317        assert!(normalizer::normalize_span(&mut test_span).is_ok());
318        assert_eq!(test_span.duration, 0);
319    }
320
321    #[test]
322    fn test_normalize_negative_duration() {
323        let mut test_span = new_test_span();
324        test_span.duration = -50;
325
326        assert!(normalizer::normalize_span(&mut test_span).is_ok());
327        assert_eq!(test_span.duration, 0);
328    }
329
330    #[test]
331    fn test_normalize_large_duration() {
332        let mut test_span = new_test_span();
333        test_span.duration = i64::MAX;
334
335        assert!(normalizer::normalize_span(&mut test_span).is_ok());
336        assert_eq!(test_span.duration, 0);
337    }
338
339    #[test]
340    fn test_normalize_error_passes() {
341        let mut test_span = new_test_span();
342        let before_error = test_span.error;
343
344        assert!(normalizer::normalize_span(&mut test_span).is_ok());
345        assert_eq!(before_error, test_span.error);
346    }
347
348    #[test]
349    fn test_normalize_metrics_passes() {
350        let mut test_span = new_test_span();
351        let before_metrics = test_span.metrics.clone();
352
353        assert!(normalizer::normalize_span(&mut test_span).is_ok());
354        assert_eq!(before_metrics, test_span.metrics);
355    }
356
357    #[test]
358    fn test_normalize_meta_passes() {
359        let mut test_span = new_test_span();
360        let before_meta = test_span.meta.clone();
361
362        assert!(normalizer::normalize_span(&mut test_span).is_ok());
363        assert_eq!(before_meta, test_span.meta);
364    }
365
366    #[test]
367    fn test_normalize_parent_id_passes() {
368        let mut test_span = new_test_span();
369        let before_parent_id = test_span.parent_id;
370
371        assert!(normalizer::normalize_span(&mut test_span).is_ok());
372        assert_eq!(before_parent_id, test_span.parent_id);
373    }
374
375    #[test]
376    fn test_normalize_type_passes() {
377        let mut test_span = new_test_span();
378        let before_type = test_span.r#type.clone();
379
380        assert!(normalizer::normalize_span(&mut test_span).is_ok());
381        assert_eq!(before_type, test_span.r#type);
382    }
383
384    #[test]
385    fn test_normalize_type_too_long() {
386        let mut test_span = new_test_span();
387        test_span.r#type = "sql".repeat(1000);
388
389        assert!(normalizer::normalize_span(&mut test_span).is_ok());
390        assert_eq!(test_span.r#type.len(), MAX_TYPE_LEN);
391    }
392
393    #[test]
394    fn test_normalize_service_tag() {
395        let mut test_span = new_test_span();
396        test_span.service = "retargeting(api-Staging ".to_string();
397
398        assert!(normalizer::normalize_span(&mut test_span).is_ok());
399        assert_eq!(test_span.service, "retargeting_api-staging");
400    }
401
402    #[test]
403    fn test_normalize_env() {
404        let mut test_span = new_test_span();
405        test_span
406            .meta
407            .insert("env".to_string(), "DEVELOPMENT".to_string());
408
409        assert!(normalizer::normalize_span(&mut test_span).is_ok());
410        assert_eq!("development", test_span.meta.get("env").unwrap());
411    }
412
413    #[test]
414    fn test_special_zipkin_root_span() {
415        let mut test_span = new_test_span();
416        test_span.parent_id = 42;
417        test_span.trace_id = 42;
418        test_span.span_id = 42;
419
420        let before_trace_id = test_span.trace_id;
421        let before_span_id = test_span.span_id;
422
423        assert!(normalizer::normalize_span(&mut test_span).is_ok());
424        assert_eq!(test_span.parent_id, 0);
425        assert_eq!(test_span.trace_id, before_trace_id);
426        assert_eq!(test_span.span_id, before_span_id);
427    }
428
429    #[test]
430    fn test_normalize_trace_empty() {
431        let mut trace = vec![];
432        let result = normalizer::normalize_trace(&mut trace);
433        assert!(result.is_err());
434        assert!(result
435            .unwrap_err()
436            .to_string()
437            .contains("Normalize Trace Error: Trace is empty"));
438    }
439
440    #[test]
441    fn test_normalize_trace_trace_id_mismatch() {
442        let mut span_1 = new_test_span();
443        let mut span_2 = new_test_span();
444        span_1.trace_id = 1;
445        span_2.trace_id = 2;
446
447        let mut trace = vec![span_1, span_2];
448        let result = normalizer::normalize_trace(&mut trace);
449        assert!(result.is_err());
450        assert!(result
451            .unwrap_err()
452            .to_string()
453            .contains("Normalize Trace Error: Trace has foreign span"));
454    }
455
456    #[test]
457    fn test_normalize_trace_invalid_span_name() {
458        let span_1 = new_test_span();
459        let mut span_2 = new_test_span();
460        span_2.name = "".to_string(); // will be normalized
461
462        let mut trace = vec![span_1, span_2];
463        assert!(normalizer::normalize_trace(&mut trace).is_ok());
464        assert_eq!(trace[1].name, DEFAULT_SPAN_NAME);
465    }
466
467    #[test]
468    fn test_normalize_trace() {
469        let span_1 = new_test_span();
470        let mut span_2 = new_test_span();
471        span_2.span_id += 1;
472
473        let mut trace = vec![span_1, span_2];
474        assert!(normalizer::normalize_trace(&mut trace).is_ok());
475    }
476
477    #[test]
478    fn test_is_valid_status_code() {
479        assert!(normalizer::is_valid_status_code("100"));
480        assert!(normalizer::is_valid_status_code("599"));
481        assert!(!normalizer::is_valid_status_code("99"));
482        assert!(!normalizer::is_valid_status_code("600"));
483        assert!(!normalizer::is_valid_status_code("Invalid status code"));
484    }
485
486    #[test]
487    fn test_normalize_chunk_populating_origin() {
488        let mut root = new_test_span();
489        root.meta
490            .insert(normalizer::TAG_ORIGIN.to_string(), "rum".to_string());
491
492        let mut chunk = new_test_chunk_with_span(root);
493        chunk.origin = "".to_string();
494        assert!(normalizer::normalize_chunk(&mut chunk, 0).is_ok());
495        assert_eq!("rum".to_string(), chunk.origin);
496    }
497
498    #[test]
499    fn test_normalize_chunk_not_populating_origin() {
500        let mut root = new_test_span();
501        root.meta
502            .insert(normalizer::TAG_ORIGIN.to_string(), "rum".to_string());
503
504        let mut chunk = new_test_chunk_with_span(root);
505        chunk.origin = "lambda".to_string();
506        assert!(normalizer::normalize_chunk(&mut chunk, 0).is_ok());
507        assert_eq!("lambda".to_string(), chunk.origin);
508    }
509
510    #[test]
511    fn test_normalize_chunk_populating_sampling_priority() {
512        let mut root = new_test_span();
513        root.metrics.insert(
514            normalizer::TAG_SAMPLING_PRIORITY.to_string(),
515            normalizer::SamplerPriority::UserKeep as i32 as f64,
516        );
517
518        let mut chunk = new_test_chunk_with_span(root);
519        chunk.priority = normalizer::SamplerPriority::None as i32;
520        assert!(normalizer::normalize_chunk(&mut chunk, 0).is_ok());
521        assert_eq!(normalizer::SamplerPriority::UserKeep as i32, chunk.priority);
522    }
523
524    #[test]
525    fn test_normalize_chunk_not_populating_sampling_priority() {
526        let mut root = new_test_span();
527        root.metrics.insert(
528            normalizer::TAG_SAMPLING_PRIORITY.to_string(),
529            normalizer::SamplerPriority::UserKeep as i32 as f64,
530        );
531
532        let mut chunk = new_test_chunk_with_span(root);
533        chunk.priority = normalizer::SamplerPriority::AutoDrop as i32;
534        assert!(normalizer::normalize_chunk(&mut chunk, 0).is_ok());
535        assert_eq!(normalizer::SamplerPriority::AutoDrop as i32, chunk.priority);
536    }
537
538    #[test]
539    fn test_normalize_chunk_invalid_root_span() {
540        let mut chunk = new_test_chunk_with_span(new_test_span());
541
542        let result = normalizer::normalize_chunk(&mut chunk, 1);
543        assert!(result.is_err());
544        assert_eq!(
545            result.unwrap_err().to_string(),
546            "Normalize Chunk Error: root_span_index > length of trace chunk spans"
547        );
548    }
549
550    #[test]
551    fn test_normalize_populate_priority_from_any_span() {
552        let mut chunk = new_test_chunk_with_span(new_test_span());
553        chunk.priority = normalizer::SamplerPriority::None as i32;
554        chunk.spans = vec![new_test_span(), new_test_span(), new_test_span()];
555        chunk.spans[1].metrics.insert(
556            normalizer::TAG_SAMPLING_PRIORITY.to_string(),
557            normalizer::SamplerPriority::UserKeep as i32 as f64,
558        );
559        assert!(normalizer::normalize_chunk(&mut chunk, 0).is_ok());
560        assert_eq!(normalizer::SamplerPriority::UserKeep as i32, chunk.priority);
561    }
562}