libdd_common/
tag.rs

1// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use serde::{Deserialize, Serialize};
5use std::borrow::Cow;
6use std::fmt::{Debug, Display, Formatter};
7
8pub use static_assertions::{const_assert, const_assert_ne};
9
10#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
11#[serde(transparent)]
12pub struct Tag {
13    /// Many tags are made from literal strings, such as:
14    ///  - "language:native"
15    ///  - "src_library:libdatadog"
16    ///  - "type:timeout"
17    ///
18    /// So being able to save allocations is nice.
19    value: Cow<'static, str>,
20}
21
22impl Tag {
23    /// Used by the `tag!` macro. Not meant to be used directly, please use
24    /// the macro instead.
25    /// # Safety
26    /// Do not use directly, use through the `tag!` macro which enforces the
27    /// safety invariants at compile time.
28    pub const unsafe fn from_static_unchecked(value: &'static str) -> Self {
29        Self {
30            value: Cow::Borrowed(value),
31        }
32    }
33}
34
35/// Creates a tag from a key and value known at compile-time, and fails to
36/// compile if it's known to be invalid (it may still emit an invalid tag, not
37/// all tag validation is currently done client-side). If the key or value
38/// aren't known at compile-time, then use [Tag::new].
39// todo: what's a good way to keep these in-sync with Tag::from_value?
40// This can be a little more strict because it's compile-time evaluated.
41// https://docs.datadoghq.com/getting_started/tagging/#define-tags
42#[macro_export]
43macro_rules! tag {
44    ($key:expr, $val:expr) => {{
45        // Keys come in "value" or "key:value" format. This pattern is always
46        // the key:value format, which means the value should not be empty.
47        // todo: the implementation here differs subtly from Tag::from_value,
48        //       which checks that the whole thing doesn't end with a colon.
49        $crate::tag::const_assert!(!$val.is_empty());
50
51        const COMBINED: &'static str = $crate::const_format::concatcp!($key, ":", $val);
52
53        // Tags must start with a letter. This is more restrictive than is
54        // required (could be a unicode alphabetic char) and can be lifted
55        // if it's causing problems.
56        $crate::tag::const_assert!(COMBINED.as_bytes()[0].is_ascii_alphabetic());
57
58        // Tags can be up to 200 characters long and support Unicode letters
59        // (which includes most character sets, including languages such as
60        // Japanese).
61        // Presently, engineers interpretted this to be 200 bytes, not unicode
62        // characters. However, if the 200th character is unicode, it's
63        // allowed to spill over due to a historical bug. For now, we'll
64        // ignore this and hard-code 200 bytes.
65        $crate::tag::const_assert!(COMBINED.as_bytes().len() <= 200);
66
67        #[allow(unused_unsafe)]
68        let tag = unsafe { $crate::tag::Tag::from_static_unchecked(COMBINED) };
69        tag
70    }};
71}
72
73impl Debug for Tag {
74    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
75        f.debug_struct("Tag").field("value", &self.value).finish()
76    }
77}
78
79impl AsRef<str> for Tag {
80    fn as_ref(&self) -> &str {
81        self.value.as_ref()
82    }
83}
84
85// Any type which implements Display automatically has to_string.
86impl Display for Tag {
87    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
88        write!(f, "{}", self.value)
89    }
90}
91
92impl Tag {
93    /// Validates a tag.
94    fn from_value<'a, IntoCow>(chunk: IntoCow) -> anyhow::Result<Self>
95    where
96        IntoCow: Into<Cow<'a, str>>,
97    {
98        let chunk = chunk.into();
99
100        /* The docs have various rules, which we are choosing not to enforce:
101         * https://docs.datadoghq.com/getting_started/tagging/#defining-tags
102         * The reason is that if tracing and profiling disagree on what valid
103         * tags are, then the user experience is degraded.
104         * So... we mostly just pass it along and handle it in the backend.
105         * However, we do enforce some rules around the colon, because they
106         * are likely to be errors (such as passed in empty string).
107         */
108
109        anyhow::ensure!(!chunk.is_empty(), "tag is empty");
110
111        let mut chars = chunk.chars();
112        anyhow::ensure!(
113            chars.next() != Some(':'),
114            "tag '{chunk}' begins with a colon"
115        );
116        anyhow::ensure!(chars.last() != Some(':'), "tag '{chunk}' ends with a colon");
117
118        let value = Cow::Owned(chunk.into_owned());
119        Ok(Tag { value })
120    }
121
122    /// Creates a tag from a key and value. It's preferred to use the `tag!`
123    /// macro when the key and value are both known at compile-time.
124    pub fn new<K, V>(key: K, value: V) -> anyhow::Result<Self>
125    where
126        K: AsRef<str>,
127        V: AsRef<str>,
128    {
129        let key = key.as_ref();
130        let value = value.as_ref();
131
132        Tag::from_value(format!("{key}:{value}"))
133    }
134}
135
136/// Parse a string of tags typically provided by environment variables
137/// The tags are expected to be either space or comma separated:
138///     "key1:value1,key2:value2"
139///     "key1:value1 key2:value2"
140/// Tag names and values are required and may not be empty.
141///
142/// Returns a tuple of the correctly parsed tags and an optional error message
143/// describing issues encountered during parsing.
144pub fn parse_tags(str: &str) -> (Vec<Tag>, Option<String>) {
145    let chunks = str
146        .split(&[',', ' '][..])
147        .filter(|str| !str.is_empty())
148        .map(Tag::from_value);
149
150    let mut tags = vec![];
151    let mut error_message = String::new();
152    for result in chunks {
153        match result {
154            Ok(tag) => tags.push(tag),
155            Err(err) => {
156                if error_message.is_empty() {
157                    error_message += "Errors while parsing tags: ";
158                } else {
159                    error_message += ", ";
160                }
161                error_message += &err.to_string();
162            }
163        }
164    }
165
166    let error_message = if error_message.is_empty() {
167        None
168    } else {
169        Some(error_message)
170    };
171    (tags, error_message)
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_is_send() {
180        // fails to compile if false
181        fn is_send<T: Send>(_t: T) -> bool {
182            true
183        }
184        assert!(is_send(tag!("src_library", "libdatadog")));
185    }
186
187    #[test]
188    fn test_empty_key() {
189        let _ = Tag::new("", "woof").expect_err("empty key is not allowed");
190    }
191
192    #[test]
193    fn test_empty_value() {
194        let _ = Tag::new("key1", "").expect_err("empty value is an error");
195    }
196
197    #[test]
198    fn test_bad_utf8() {
199        // 0b1111_0xxx is the start of a 4-byte sequence, but there aren't any
200        // more chars, so it  will get converted into the utf8 replacement
201        // character. This results in a string with an "a" and a replacement
202        // char, so it should be an error (no valid chars). However, we don't
203        // enforce many things about tags yet client-side, so we let it slide.
204        let bytes = &[b'a', 0b1111_0111];
205        let key = String::from_utf8_lossy(bytes);
206        let t = Tag::new(key, "value").unwrap();
207        assert_eq!("a\u{FFFD}:value", t.to_string());
208    }
209
210    #[test]
211    fn test_value_has_colon() {
212        let result = Tag::new("env", "staging:east").expect("values can have colons");
213        assert_eq!("env:staging:east", result.to_string());
214
215        let result = tag!("env", "staging:east");
216        assert_eq!("env:staging:east", result.to_string());
217    }
218
219    #[test]
220    fn test_suspicious_tags() {
221        // Based on tag rules, these should all fail. However, there is a risk
222        // that profile tags will then differ or cause failures compared to
223        // trace tags. These require cross-team, cross-language collaboration.
224        let cases = [
225            ("_begins_with_non-letter".to_string(), "value"),
226            ("the-tag-length-is-over-200-characters".repeat(6), "value"),
227        ];
228
229        for case in cases {
230            let result = Tag::new(case.0, case.1);
231            // Again, these should fail, but it's not implemented yet
232            assert!(result.is_ok())
233        }
234    }
235
236    #[test]
237    fn test_missing_colon_parsing() {
238        let tag = Tag::from_value("tag").unwrap();
239        assert_eq!("tag", tag.to_string());
240    }
241
242    #[test]
243    fn test_leading_colon_parsing() {
244        let _ = Tag::from_value(":tag").expect_err("Cannot start with a colon");
245    }
246
247    #[test]
248    fn test_tailing_colon_parsing() {
249        let _ = Tag::from_value("tag:").expect_err("Cannot end with a colon");
250    }
251
252    #[test]
253    fn test_tags_parsing() {
254        let cases = [
255            ("", vec![]),
256            (",", vec![]),
257            (" , ", vec![]),
258            // Testing that values can contain colons
259            (
260                "env:staging:east,location:nyc:ny",
261                vec![
262                    Tag::new("env", "staging:east").unwrap(),
263                    Tag::new("location", "nyc:ny").unwrap(),
264                ],
265            ),
266            // Testing value format (no key)
267            ("value", vec![Tag::from_value("value").unwrap()]),
268            (
269                "state:utah,state:idaho",
270                vec![
271                    Tag::new("state", "utah").unwrap(),
272                    Tag::new("state", "idaho").unwrap(),
273                ],
274            ),
275            (
276                "key1:value1 key2:value2 key3:value3",
277                vec![
278                    Tag::new("key1", "value1").unwrap(),
279                    Tag::new("key2", "value2").unwrap(),
280                    Tag::new("key3", "value3").unwrap(),
281                ],
282            ),
283            (
284                // Testing consecutive separators being collapsed
285                "key1:value1, key2:value2 ,key3:value3 , key4:value4",
286                vec![
287                    Tag::new("key1", "value1").unwrap(),
288                    Tag::new("key2", "value2").unwrap(),
289                    Tag::new("key3", "value3").unwrap(),
290                    Tag::new("key4", "value4").unwrap(),
291                ],
292            ),
293        ];
294
295        for case in cases {
296            let expected = case.1;
297            let (actual, error_message) = parse_tags(case.0);
298            assert_eq!(expected, actual);
299            assert!(error_message.is_none());
300        }
301    }
302}