libdd_trace_normalization/
normalize_utils.rs1use std::time::SystemTime;
5
6pub(crate) const MAX_TYPE_LEN: usize = 100;
8const YEAR_2000_NANOSEC_TS: i64 = 946684800000000000;
11pub(crate) const DEFAULT_SPAN_NAME: &str = "unnamed_operation";
14pub(crate) const DEFAULT_SERVICE_NAME: &str = "unnamed-service";
17pub(crate) const MAX_NAME_LEN: usize = 100;
19const MAX_SERVICE_LEN: usize = 100;
21const MAX_TAG_LEN: usize = 200;
23
24pub fn normalize_service(svc: &mut String) {
26 truncate_utf8(svc, MAX_SERVICE_LEN);
27 normalize_tag(svc);
28 if svc.is_empty() {
29 DEFAULT_SERVICE_NAME.clone_into(svc);
30 }
31}
32
33pub fn normalize_name(name: &mut String) {
35 truncate_utf8(name, MAX_NAME_LEN);
36 normalize_metric_name(name);
37 if name.is_empty() {
38 DEFAULT_SPAN_NAME.clone_into(name);
39 }
40}
41
42#[allow(clippy::ptr_arg)]
43pub fn normalize_resource(resource: &mut String, name: &str) {
44 if resource.is_empty() {
45 name.clone_into(resource);
46 }
47}
48
49pub fn normalize_span_type(span_type: &mut String) {
50 truncate_utf8(span_type, MAX_TYPE_LEN);
51}
52
53pub fn normalize_span_start_duration(start: &mut i64, duration: &mut i64) {
54 if *duration < 0 {
57 *duration = 0;
58 }
59 if *duration > i64::MAX - *start {
60 *duration = 0;
61 }
62
63 if *start < YEAR_2000_NANOSEC_TS {
64 let now = SystemTime::UNIX_EPOCH.elapsed().map_or_else(
65 |e| -(e.duration().as_nanos() as i64),
66 |t| t.as_nanos() as i64,
67 );
68 *start = now - *duration;
69 if *start < 0 {
70 *start = now;
71 }
72 }
73}
74
75pub fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) {
76 if *parent_id == trace_id && *parent_id == span_id {
82 *parent_id = 0;
83 }
84}
85
86pub fn normalize_tag(tag: &mut String) {
87 let bytes = unsafe { tag.as_mut_vec() };
89 if bytes.is_empty() {
90 return;
91 }
92 let mut read_cursor = 0;
93 let mut write_cursor = 0;
94 let mut is_in_illegal_span = true;
95 let mut codepoints_written = 0;
96
97 loop {
98 if read_cursor >= bytes.len()
99 || write_cursor >= 2 * MAX_TAG_LEN
100 || codepoints_written >= MAX_TAG_LEN
101 {
102 break;
103 }
104
105 let b = bytes[read_cursor];
106 match b {
108 b'a'..=b'z' | b':' => {
109 bytes[write_cursor] = b;
110 is_in_illegal_span = false;
111 write_cursor += 1;
112 codepoints_written += 1;
113 read_cursor += 1;
114 continue;
115 }
116 b'A'..=b'Z' => {
117 bytes[write_cursor] = b - b'A' + b'a';
118 is_in_illegal_span = false;
119 write_cursor += 1;
120 codepoints_written += 1;
121 read_cursor += 1;
122 continue;
123 }
124 b'0'..=b'9' | b'.' | b'/' | b'-' => {
125 if write_cursor != 0 {
126 bytes[write_cursor] = b;
127 is_in_illegal_span = false;
128 write_cursor += 1;
129 codepoints_written += 1;
130 }
131 read_cursor += 1;
132 continue;
133 }
134 b'_' if !is_in_illegal_span => {
135 if write_cursor != 0 {
136 bytes[write_cursor] = b;
137 is_in_illegal_span = true;
138 write_cursor += 1;
139 codepoints_written += 1;
140 }
141 read_cursor += 1;
142 continue;
143 }
144 0x00..=0x7F if !is_in_illegal_span => {
146 bytes[write_cursor] = b'_';
147 is_in_illegal_span = true;
148 write_cursor += 1;
149 codepoints_written += 1;
150 read_cursor += 1;
151 continue;
152 }
153 0x00..=0x7F if is_in_illegal_span => {
154 read_cursor += 1;
155 continue;
156 }
157 _ => {}
158 }
159
160 let mut c = {
162 let mut it = bytes[read_cursor..].iter();
163 #[allow(clippy::unwrap_used)]
166 std::char::from_u32(crate::utf8_helpers::next_code_point(&mut it).unwrap()).unwrap()
167 };
168 let mut len_utf8 = c.len_utf8();
169 read_cursor += len_utf8;
170
171 if c.is_lowercase() {
172 c.encode_utf8(&mut bytes[write_cursor..write_cursor + len_utf8]);
173 is_in_illegal_span = false;
174 write_cursor += len_utf8;
175 codepoints_written += 1;
176 continue;
177 }
178 if c.is_uppercase() {
179 if let Some(lower) = c.to_lowercase().next() {
182 if lower.len_utf8() <= len_utf8 {
183 c = lower;
184 len_utf8 = c.len_utf8();
185 }
186 }
187 }
188
189 if c.is_alphabetic() {
194 c.encode_utf8(&mut bytes[write_cursor..write_cursor + len_utf8]);
195 is_in_illegal_span = false;
196 write_cursor += len_utf8;
197 codepoints_written += 1;
198 } else if c.is_numeric() {
199 if write_cursor != 0 {
200 c.encode_utf8(&mut bytes[write_cursor..write_cursor + len_utf8]);
201 is_in_illegal_span = false;
202 write_cursor += len_utf8;
203 codepoints_written += 1;
204 }
205 } else if !is_in_illegal_span {
206 bytes[write_cursor] = b'_';
207 is_in_illegal_span = true;
208 write_cursor += 1;
209 codepoints_written += 1;
210 }
211 }
212 if is_in_illegal_span && write_cursor > 0 {
214 write_cursor -= 1;
215 }
216 bytes.truncate(write_cursor);
217}
218
219fn normalize_metric_name(name: &mut String) {
220 let bytes = unsafe { name.as_mut_vec() };
222 if bytes.is_empty() {
223 return;
224 }
225
226 let Some((mut read_cursor, _)) = bytes
228 .iter()
229 .enumerate()
230 .find(|(_, c)| c.is_ascii_alphabetic())
231 else {
232 *name = String::new();
233 return;
234 };
235 let mut write_cursor = 0;
236 let mut last_written_char = 0;
237 loop {
238 if read_cursor >= bytes.len() {
239 break;
240 }
241 match (bytes[read_cursor], last_written_char) {
242 (b @ (b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9'), _) => {
243 bytes[write_cursor] = b;
244 last_written_char = b;
245 }
246 (b'.', b'_') => {
248 write_cursor -= 1;
251 bytes[write_cursor] = b'.';
252 last_written_char = b'.'
253 }
254 (_, b'_' | b'.') => {}
256 (b @ (b'_' | b'.'), _) => {
257 bytes[write_cursor] = b;
258 last_written_char = b;
259 }
260 (_, _) => {
262 bytes[write_cursor] = b'_';
263 last_written_char = b'_';
264 }
265 }
266 write_cursor += 1;
267 read_cursor += 1;
268 }
269 if last_written_char == b'_' {
270 write_cursor -= 1;
271 }
272 bytes.truncate(write_cursor);
273}
274
275pub(crate) fn truncate_utf8(s: &mut String, limit: usize) {
279 let boundary = crate::utf8_helpers::floor_char_boundary(s, limit);
280 s.truncate(boundary);
281}
282
283#[cfg(test)]
284mod tests {
285
286 use super::*;
287 use duplicate::duplicate_item;
288
289 #[duplicate_item(
290 test_name input expected;
291 [test_normalize_empty_string] [""] ["unnamed_operation"];
292 [test_normalize_valid_string] ["good"] ["good"];
293 [test_normalize_long_string] ["Too-Long-.".repeat(20).as_str()] ["Too_Long.".repeat(10)];
294 [test_normalize_dash_string] ["bad-name"] ["bad_name"];
295 [test_normalize_invalid_string] ["&***"] ["unnamed_operation"];
296 [test_normalize_invalid_prefix] ["&&&&&&&_test-name-"] ["test_name"];
297 )]
298 #[test]
299 fn test_name() {
300 let mut val = input.to_owned();
301 normalize_name(&mut val);
302 assert_eq!(val, expected);
303 }
304
305 #[duplicate_item(
306 test_name input expected;
307 [test_normalize_empty_service] [""] ["unnamed-service"];
308 [test_normalize_valid_service] ["good"] ["good"];
309 [test_normalize_long_service] ["Too$Long$.".repeat(20).as_str()] ["too_long_.".repeat(10)];
310 [test_normalize_dash_service] ["bad&service"] ["bad_service"];
311 )]
312 #[test]
313 fn test_name() {
314 let mut val = input.to_owned();
315 normalize_service(&mut val);
316 assert_eq!(val, expected);
317 }
318
319 #[duplicate_item(
320 test_name input expected;
321 [test_normalize_tag_1] ["#test_starting_hash"] ["test_starting_hash"];
322 [test_normalize_tag_2] ["TestCAPSandSuch"] ["testcapsandsuch"];
323 [test_normalize_tag_3] ["Test Conversion Of Weird !@#$%^&**() Characters"] ["test_conversion_of_weird_characters"];
324 [test_normalize_tag_4] ["$#weird_starting"] ["weird_starting"];
325 [test_normalize_tag_5] ["allowed:c0l0ns"] ["allowed:c0l0ns"];
326 [test_normalize_tag_6] ["1love"] ["love"];
327 [test_normalize_tag_7] ["ünicöde"] ["ünicöde"];
328 [test_normalize_tag_8] ["ünicöde:metäl"] ["ünicöde:metäl"];
329 [test_normalize_tag_9] ["Data🐨dog🐶 繋がっ⛰てて"] ["data_dog_繋がっ_てて"];
330 [test_normalize_tag_10] [" spaces "] ["spaces"];
331 [test_normalize_tag_11] [" #hashtag!@#spaces #__<># "] ["hashtag_spaces"];
332 [test_normalize_tag_12] [":testing"] [":testing"];
333 [test_normalize_tag_13] ["_foo"] ["foo"];
334 [test_normalize_tag_14] [":::test"] [":::test"];
335 [test_normalize_tag_15] ["contiguous_____underscores"] ["contiguous_underscores"];
336 [test_normalize_tag_16] ["foo_"] ["foo"];
337 [test_normalize_tag_17] ["\u{017F}odd_\u{017F}case\u{017F}"] ["\u{017F}odd_\u{017F}case\u{017F}"] ; [test_normalize_tag_18] [""] [""];
339 [test_normalize_tag_19] [" "] [""];
340 [test_normalize_tag_20] ["ok"] ["ok"];
341 [test_normalize_tag_21] ["™Ö™Ö™™Ö™"] ["ö_ö_ö"];
342 [test_normalize_tag_22] ["AlsO:ök"] ["also:ök"];
343 [test_normalize_tag_23] [":still_ok"] [":still_ok"];
344 [test_normalize_tag_24] ["___trim"] ["trim"];
345 [test_normalize_tag_25] ["12.:trim@"] [":trim"];
346 [test_normalize_tag_26] ["12.:trim@@"] [":trim"];
347 [test_normalize_tag_27] ["fun:ky__tag/1"] ["fun:ky_tag/1"];
348 [test_normalize_tag_28] ["fun:ky@tag/2"] ["fun:ky_tag/2"];
349 [test_normalize_tag_29] ["fun:ky@@@tag/3"] ["fun:ky_tag/3"];
350 [test_normalize_tag_30] ["tag:1/2.3"] ["tag:1/2.3"];
351 [test_normalize_tag_31] ["---fun:k####y_ta@#g/1_@@#"]["fun:k_y_ta_g/1"];
352 [test_normalize_tag_32] ["AlsO:œ#@ö))œk"] ["also:œ_ö_œk"];
353 [test_normalize_tag_33] ["a".repeat(888).as_str()] ["a".repeat(200)];
354 [test_normalize_tag_34] [("a".to_owned() + &"🐶".repeat(799)).as_str()] ["a"];
355 [test_normalize_tag_35] [("a".to_string() + &char::REPLACEMENT_CHARACTER.to_string()).as_str()] ["a"];
356 [test_normalize_tag_36] [("a".to_string() + &char::REPLACEMENT_CHARACTER.to_string() + &char::REPLACEMENT_CHARACTER.to_string()).as_str()] ["a"];
357 [test_normalize_tag_37] [("a".to_string() + &char::REPLACEMENT_CHARACTER.to_string() + &char::REPLACEMENT_CHARACTER.to_string() + "b").as_str()] ["a_b"];
358 [test_normalize_tag_38]
359 ["A00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 000000000000"]
360 ["a00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_0"]
361 ;
362 )]
363 #[test]
364 fn test_name() {
365 let mut v = input.to_owned();
366 normalize_tag(&mut v);
367 assert_eq!(v, expected)
368 }
369}