libdd_common/tag.rs
1// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use serde::{Deserialize, Serialize};
5use std::borrow::Cow;
6use std::fmt::{Debug, Display, Formatter};
7
8pub use static_assertions::{const_assert, const_assert_ne};
9
10#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
11#[serde(transparent)]
12pub struct Tag {
13 /// Many tags are made from literal strings, such as:
14 /// - "language:native"
15 /// - "src_library:libdatadog"
16 /// - "type:timeout"
17 ///
18 /// So being able to save allocations is nice.
19 value: Cow<'static, str>,
20}
21
22impl Tag {
23 /// Used by the `tag!` macro. Not meant to be used directly, please use
24 /// the macro instead.
25 /// # Safety
26 /// Do not use directly, use through the `tag!` macro which enforces the
27 /// safety invariants at compile time.
28 pub const unsafe fn from_static_unchecked(value: &'static str) -> Self {
29 Self {
30 value: Cow::Borrowed(value),
31 }
32 }
33}
34
35/// Creates a tag from a key and value known at compile-time, and fails to
36/// compile if it's known to be invalid (it may still emit an invalid tag, not
37/// all tag validation is currently done client-side). If the key or value
38/// aren't known at compile-time, then use [Tag::new].
39// todo: what's a good way to keep these in-sync with Tag::from_value?
40// This can be a little more strict because it's compile-time evaluated.
41// https://docs.datadoghq.com/getting_started/tagging/#define-tags
42#[macro_export]
43macro_rules! tag {
44 ($key:expr, $val:expr) => {{
45 // Keys come in "value" or "key:value" format. This pattern is always
46 // the key:value format, which means the value should not be empty.
47 // todo: the implementation here differs subtly from Tag::from_value,
48 // which checks that the whole thing doesn't end with a colon.
49 $crate::tag::const_assert!(!$val.is_empty());
50
51 const COMBINED: &'static str = $crate::const_format::concatcp!($key, ":", $val);
52
53 // Tags must start with a letter. This is more restrictive than is
54 // required (could be a unicode alphabetic char) and can be lifted
55 // if it's causing problems.
56 $crate::tag::const_assert!(COMBINED.as_bytes()[0].is_ascii_alphabetic());
57
58 // Tags can be up to 200 characters long and support Unicode letters
59 // (which includes most character sets, including languages such as
60 // Japanese).
61 // Presently, engineers interpretted this to be 200 bytes, not unicode
62 // characters. However, if the 200th character is unicode, it's
63 // allowed to spill over due to a historical bug. For now, we'll
64 // ignore this and hard-code 200 bytes.
65 $crate::tag::const_assert!(COMBINED.as_bytes().len() <= 200);
66
67 #[allow(unused_unsafe)]
68 let tag = unsafe { $crate::tag::Tag::from_static_unchecked(COMBINED) };
69 tag
70 }};
71}
72
73impl Debug for Tag {
74 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
75 f.debug_struct("Tag").field("value", &self.value).finish()
76 }
77}
78
79impl AsRef<str> for Tag {
80 fn as_ref(&self) -> &str {
81 self.value.as_ref()
82 }
83}
84
85// Any type which implements Display automatically has to_string.
86impl Display for Tag {
87 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
88 write!(f, "{}", self.value)
89 }
90}
91
92impl Tag {
93 /// Validates a tag.
94 fn from_value<'a, IntoCow>(chunk: IntoCow) -> anyhow::Result<Self>
95 where
96 IntoCow: Into<Cow<'a, str>>,
97 {
98 let chunk = chunk.into();
99
100 /* The docs have various rules, which we are choosing not to enforce:
101 * https://docs.datadoghq.com/getting_started/tagging/#defining-tags
102 * The reason is that if tracing and profiling disagree on what valid
103 * tags are, then the user experience is degraded.
104 * So... we mostly just pass it along and handle it in the backend.
105 * However, we do enforce some rules around the colon, because they
106 * are likely to be errors (such as passed in empty string).
107 */
108
109 anyhow::ensure!(!chunk.is_empty(), "tag is empty");
110
111 let mut chars = chunk.chars();
112 anyhow::ensure!(
113 chars.next() != Some(':'),
114 "tag '{chunk}' begins with a colon"
115 );
116 anyhow::ensure!(chars.last() != Some(':'), "tag '{chunk}' ends with a colon");
117
118 let value = Cow::Owned(chunk.into_owned());
119 Ok(Tag { value })
120 }
121
122 /// Creates a tag from a key and value. It's preferred to use the `tag!`
123 /// macro when the key and value are both known at compile-time.
124 pub fn new<K, V>(key: K, value: V) -> anyhow::Result<Self>
125 where
126 K: AsRef<str>,
127 V: AsRef<str>,
128 {
129 let key = key.as_ref();
130 let value = value.as_ref();
131
132 Tag::from_value(format!("{key}:{value}"))
133 }
134}
135
136/// Parse a string of tags typically provided by environment variables
137/// The tags are expected to be either space or comma separated:
138/// "key1:value1,key2:value2"
139/// "key1:value1 key2:value2"
140/// Tag names and values are required and may not be empty.
141///
142/// Returns a tuple of the correctly parsed tags and an optional error message
143/// describing issues encountered during parsing.
144pub fn parse_tags(str: &str) -> (Vec<Tag>, Option<String>) {
145 let chunks = str
146 .split(&[',', ' '][..])
147 .filter(|str| !str.is_empty())
148 .map(Tag::from_value);
149
150 let mut tags = vec![];
151 let mut error_message = String::new();
152 for result in chunks {
153 match result {
154 Ok(tag) => tags.push(tag),
155 Err(err) => {
156 if error_message.is_empty() {
157 error_message += "Errors while parsing tags: ";
158 } else {
159 error_message += ", ";
160 }
161 error_message += &err.to_string();
162 }
163 }
164 }
165
166 let error_message = if error_message.is_empty() {
167 None
168 } else {
169 Some(error_message)
170 };
171 (tags, error_message)
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177
178 #[test]
179 fn test_is_send() {
180 // fails to compile if false
181 fn is_send<T: Send>(_t: T) -> bool {
182 true
183 }
184 assert!(is_send(tag!("src_library", "libdatadog")));
185 }
186
187 #[test]
188 fn test_empty_key() {
189 let _ = Tag::new("", "woof").expect_err("empty key is not allowed");
190 }
191
192 #[test]
193 fn test_empty_value() {
194 let _ = Tag::new("key1", "").expect_err("empty value is an error");
195 }
196
197 #[test]
198 fn test_bad_utf8() {
199 // 0b1111_0xxx is the start of a 4-byte sequence, but there aren't any
200 // more chars, so it will get converted into the utf8 replacement
201 // character. This results in a string with an "a" and a replacement
202 // char, so it should be an error (no valid chars). However, we don't
203 // enforce many things about tags yet client-side, so we let it slide.
204 let bytes = &[b'a', 0b1111_0111];
205 let key = String::from_utf8_lossy(bytes);
206 let t = Tag::new(key, "value").unwrap();
207 assert_eq!("a\u{FFFD}:value", t.to_string());
208 }
209
210 #[test]
211 fn test_value_has_colon() {
212 let result = Tag::new("env", "staging:east").expect("values can have colons");
213 assert_eq!("env:staging:east", result.to_string());
214
215 let result = tag!("env", "staging:east");
216 assert_eq!("env:staging:east", result.to_string());
217 }
218
219 #[test]
220 fn test_suspicious_tags() {
221 // Based on tag rules, these should all fail. However, there is a risk
222 // that profile tags will then differ or cause failures compared to
223 // trace tags. These require cross-team, cross-language collaboration.
224 let cases = [
225 ("_begins_with_non-letter".to_string(), "value"),
226 ("the-tag-length-is-over-200-characters".repeat(6), "value"),
227 ];
228
229 for case in cases {
230 let result = Tag::new(case.0, case.1);
231 // Again, these should fail, but it's not implemented yet
232 assert!(result.is_ok())
233 }
234 }
235
236 #[test]
237 fn test_missing_colon_parsing() {
238 let tag = Tag::from_value("tag").unwrap();
239 assert_eq!("tag", tag.to_string());
240 }
241
242 #[test]
243 fn test_leading_colon_parsing() {
244 let _ = Tag::from_value(":tag").expect_err("Cannot start with a colon");
245 }
246
247 #[test]
248 fn test_tailing_colon_parsing() {
249 let _ = Tag::from_value("tag:").expect_err("Cannot end with a colon");
250 }
251
252 #[test]
253 fn test_tags_parsing() {
254 let cases = [
255 ("", vec![]),
256 (",", vec![]),
257 (" , ", vec![]),
258 // Testing that values can contain colons
259 (
260 "env:staging:east,location:nyc:ny",
261 vec![
262 Tag::new("env", "staging:east").unwrap(),
263 Tag::new("location", "nyc:ny").unwrap(),
264 ],
265 ),
266 // Testing value format (no key)
267 ("value", vec![Tag::from_value("value").unwrap()]),
268 (
269 "state:utah,state:idaho",
270 vec![
271 Tag::new("state", "utah").unwrap(),
272 Tag::new("state", "idaho").unwrap(),
273 ],
274 ),
275 (
276 "key1:value1 key2:value2 key3:value3",
277 vec![
278 Tag::new("key1", "value1").unwrap(),
279 Tag::new("key2", "value2").unwrap(),
280 Tag::new("key3", "value3").unwrap(),
281 ],
282 ),
283 (
284 // Testing consecutive separators being collapsed
285 "key1:value1, key2:value2 ,key3:value3 , key4:value4",
286 vec![
287 Tag::new("key1", "value1").unwrap(),
288 Tag::new("key2", "value2").unwrap(),
289 Tag::new("key3", "value3").unwrap(),
290 Tag::new("key4", "value4").unwrap(),
291 ],
292 ),
293 ];
294
295 for case in cases {
296 let expected = case.1;
297 let (actual, error_message) = parse_tags(case.0);
298 assert_eq!(expected, actual);
299 assert!(error_message.is_none());
300 }
301 }
302}