Skip to main content

libdd_trace_obfuscation/
obfuscate.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use libdd_trace_protobuf::pb::{
5    self, attribute_any_value::AttributeAnyValueType,
6    attribute_array_value::AttributeArrayValueType,
7};
8
9use crate::{
10    credit_cards::is_card_number,
11    http::obfuscate_url_string,
12    json::JsonObfuscator,
13    memcached::obfuscate_memcached_string,
14    obfuscation_config::ObfuscationConfig,
15    redis::{obfuscate_redis_string, quantize_redis_string, remove_all_redis_args},
16    replacer::replace_span_tags,
17    sql::{DbmsKind, SqlObfuscationMode},
18};
19
20/// `TAG_REDIS_RAW_COMMAND` represents a redis raw command tag
21const TAG_REDIS_RAW_COMMAND: &str = "redis.raw_command";
22/// `TAG_VALKEY_RAW_COMMAND` represents a valkey raw command tag
23const TAG_VALKEY_RAW_COMMAND: &str = "valkey.raw_command";
24/// `TAG_MEMCACHED_COMMAND` represents a memcached command tag
25const TAG_MEMCACHED_COMMAND: &str = "memcached.command";
26/// `TAG_MONGO_DBQUERY` represents a `MongoDB` query tag
27const TAG_MONGO_DBQUERY: &str = "mongodb.query";
28/// `TAG_ELASTIC_BODY` represents an Elasticsearch body tag
29const TAG_ELASTIC_BODY: &str = "elasticsearch.body";
30/// `TAG_OPEN_SEARCH_BODY` represents an `OpenSearch` body tag
31const TAG_OPEN_SEARCH_BODY: &str = "opensearch.body";
32/// `TAG_SQLQUERY` represents a SQL query tag
33const TAG_SQLQUERY: &str = "sql.query";
34/// `TAG_HTTPURL` represents an HTTP URL tag
35const TAG_HTTPURL: &str = "http.url";
36/// `TAG_DBMS` represents a DBMS tag
37const TAG_DBMS: &str = "db.type";
38/// `TAG_CARD_NUMBER` represents a card number tag
39const TAG_CARD_NUMBER: &str = "card.number";
40
41/// Obfuscate a resource name for client-side stats (Version 1).
42///
43/// Applies the same resource transformations as `obfuscate_span`, but only for span types whose
44/// resource names are modified:
45/// - `"sql"`, `"cassandra"`: SQL obfuscation
46/// - `"redis"`, `"valkey"`: Redis quantization (command names only)
47///
48/// Returns `Some(obfuscated)` if the resource was modified, `None` if no obfuscation was needed.
49#[must_use]
50pub fn obfuscate_resource_for_stats(
51    span_type: &str,
52    resource: &str,
53    dbms_hint: Option<&str>,
54    sql_obfuscation_mode: SqlObfuscationMode,
55) -> Option<String> {
56    match span_type {
57        "sql" | "cassandra" if !resource.is_empty() => {
58            let dbms: DbmsKind = dbms_hint
59                .and_then(|d| d.try_into().ok())
60                .unwrap_or_default();
61            let config = &crate::sql::SqlObfuscateConfig {
62                obfuscation_mode: sql_obfuscation_mode,
63                ..Default::default()
64            };
65            Some(crate::sql::obfuscate_sql(resource, config, dbms))
66        }
67        "redis" | "valkey" => Some(quantize_redis_string(resource)),
68        _ => None,
69    }
70}
71
72/// `obfuscate_span` goes through `span` fields and applies obfuscation on it
73// TODO(APMSP-2764): return parsing errors in a vec to log them ?
74pub fn obfuscate_span(span: &mut pb::Span, config: &ObfuscationConfig) {
75    for span_event in &mut span.span_events {
76        obfuscate_span_event(span_event, config);
77    }
78
79    if let Some(credit_card) = span.meta.get_mut(TAG_CARD_NUMBER) {
80        if config.credit_cards.enabled && is_card_number(&credit_card, config.credit_cards.luhn) {
81            *credit_card = "?".to_string();
82        }
83    }
84    match span.r#type.as_str() {
85        "web" | "http" if !span.meta.is_empty() => {
86            if let Some(url) = span.meta.get_mut(TAG_HTTPURL) {
87                *url = obfuscate_url_string(
88                    url,
89                    config.http.remove_query_string,
90                    config.http.remove_paths_with_digits,
91                );
92            }
93        }
94        "memcached" if config.memcached.enabled => {
95            if let Some(cmd) = span.meta.get_mut(TAG_MEMCACHED_COMMAND) {
96                if config.memcached.keep_command {
97                    *cmd = obfuscate_memcached_string(cmd);
98                } else {
99                    *cmd = String::new();
100                }
101            }
102        }
103        "redis" => {
104            span.resource = quantize_redis_string(&span.resource);
105            if config.redis.enabled && !span.meta.is_empty() {
106                if let Some(redis_cmd) = span.meta.get_mut(TAG_REDIS_RAW_COMMAND) {
107                    if config.redis.remove_all_args {
108                        *redis_cmd = remove_all_redis_args(redis_cmd);
109                    } else {
110                        *redis_cmd = obfuscate_redis_string(redis_cmd);
111                    }
112                }
113            }
114        }
115        "valkey" => {
116            span.resource = quantize_redis_string(&span.resource);
117            if config.valkey.enabled && !span.meta.is_empty() {
118                if let Some(valkey_cmd) = span.meta.get_mut(TAG_VALKEY_RAW_COMMAND) {
119                    if config.valkey.remove_all_args {
120                        *valkey_cmd = remove_all_redis_args(valkey_cmd);
121                    } else {
122                        *valkey_cmd = obfuscate_redis_string(valkey_cmd);
123                    }
124                }
125            }
126        }
127        "sql" | "cassandra" if !span.resource.is_empty() => {
128            let dbms: DbmsKind = span
129                .meta
130                .get(TAG_DBMS)
131                .map(String::as_str)
132                .and_then(|dbms| TryInto::try_into(dbms).ok())
133                .unwrap_or_default();
134            let obfuscated_query = crate::sql::obfuscate_sql(&span.resource, &config.sql, dbms);
135            span.resource.clone_from(&obfuscated_query);
136            span.meta.insert(TAG_SQLQUERY.to_owned(), obfuscated_query);
137        }
138        "elasticsearch" if config.elasticsearch.enabled => {
139            if let Some(elastic_query) = span.meta.get_mut(TAG_ELASTIC_BODY) {
140                // FIXME(APMSP-2673): optimization opportunity here: keep the obfuscators cached to
141                // avoid having clones and re-hashsing strings when putting them in
142                // HashSets
143                let (res, _err) =
144                    JsonObfuscator::new(config.elasticsearch.clone()).obfuscate(elastic_query);
145                *elastic_query = res;
146            }
147        }
148        "opensearch" if config.opensearch.enabled => {
149            if let Some(opensearch_query) = span.meta.get_mut(TAG_OPEN_SEARCH_BODY) {
150                // FIXME(APMSP-2673): optimization opportunity here: keep the obfuscators cached to
151                // avoid having clones and re-hashsing strings when putting them in
152                // HashSets
153                let (res, _err) =
154                    JsonObfuscator::new(config.opensearch.clone()).obfuscate(opensearch_query);
155                *opensearch_query = res;
156            }
157        }
158        "mongodb" if config.mongodb.enabled => {
159            if let Some(mongodb_query) = span.meta.get_mut(TAG_MONGO_DBQUERY) {
160                // FIXME(APMSP-2673): optimization opportunity here: keep the obfuscators cached to
161                // avoid having clones and re-hashsing strings when putting them in
162                // HashSets
163                let (res, _err) =
164                    JsonObfuscator::new(config.mongodb.clone()).obfuscate(mongodb_query);
165
166                *mongodb_query = res;
167            }
168        }
169
170        _ => {}
171    }
172    if let Some(tag_replace_rules) = &config.tag_replace_rules {
173        replace_span_tags(span, tag_replace_rules, &mut String::new());
174    }
175}
176
177pub fn obfuscate_span_event(event: &mut pb::SpanEvent, config: &ObfuscationConfig) {
178    if config.credit_cards.enabled {
179        for (k, v) in &mut event.attributes {
180            if !should_obfuscate_cc_key(k, config) {
181                continue;
182            }
183            let str_value = match v.r#type() {
184                pb::attribute_any_value::AttributeAnyValueType::StringValue => {
185                    v.string_value.clone()
186                }
187                pb::attribute_any_value::AttributeAnyValueType::BoolValue => continue, /* Booleans can't be credit cards */
188                pb::attribute_any_value::AttributeAnyValueType::IntValue => v.int_value.to_string(),
189                pb::attribute_any_value::AttributeAnyValueType::DoubleValue => {
190                    v.double_value.to_string()
191                }
192                pb::attribute_any_value::AttributeAnyValueType::ArrayValue => {
193                    if let Some(array_value) = v.array_value.as_mut() {
194                        obfuscate_attribute_array(array_value, config);
195                    }
196                    continue;
197                }
198            };
199            if is_card_number(&str_value, config.credit_cards.luhn) {
200                v.string_value = "?".to_string();
201                v.r#type = AttributeAnyValueType::StringValue.into();
202            }
203        }
204    }
205}
206
207fn obfuscate_attribute_array(v: &mut pb::AttributeArray, config: &ObfuscationConfig) {
208    for elt in &mut v.values {
209        let string_value = match elt.r#type() {
210            pb::attribute_array_value::AttributeArrayValueType::StringValue => {
211                elt.string_value.clone()
212            }
213            pb::attribute_array_value::AttributeArrayValueType::BoolValue => continue, /* Booleans can't be credit cards */
214            pb::attribute_array_value::AttributeArrayValueType::IntValue => {
215                elt.int_value.to_string()
216            }
217            pb::attribute_array_value::AttributeArrayValueType::DoubleValue => {
218                elt.double_value.to_string()
219            }
220        };
221        if is_card_number(&string_value, config.credit_cards.luhn) {
222            elt.string_value = "?".to_string();
223            elt.r#type = AttributeArrayValueType::StringValue.into();
224        }
225    }
226}
227
228/// `should_obfuscate_cc_key` returns true if the value for the given key should be obfuscated
229/// This is used to skip known safe attributes and specifically configured safe tags
230fn should_obfuscate_cc_key(key: &str, config: &ObfuscationConfig) -> bool {
231    match key {
232	     | "_sample_rate"
233		 | "_sampling_priority_v1"
234		 | "account_id"
235		 | "aws_account"
236		 | "error"
237		 | "error.msg"
238		 | "error.type"
239		 | "error.stack"
240		 | "env"
241		 | "graphql.field"
242		 | "graphql.query"
243		 | "graphql.type"
244		 | "graphql.operation.name"
245		 | "grpc.code"
246		 | "grpc.method"
247		 | "grpc.request"
248		 | "http.status_code"
249		 | "http.method"
250		 | "runtime-id"
251		 | "out.host"
252		 | "out.port"
253		 | "sampling.priority"
254		 | "span.type"
255		 | "span.name"
256		 | "service.name"
257		 | "service"
258		 | "sql.query"
259		 | "version"
260		  // Data Job Monitoring tags - these values are frequently similar to credit card numbers
261		 | "databricks_job_id"
262		 | "databricks_job_run_id"
263		 | "databricks_task_run_id"
264		 | "config.spark_app_startTime"
265		 | "config.spark_databricks_job_parentRunId" =>
266		{return false;}
267		_=> {}
268	}
269    if key.starts_with('_') {
270        return false;
271    }
272    if config.credit_cards.keep_values.contains(key) {
273        return false;
274    }
275    true
276}
277
278#[cfg(test)]
279mod tests {
280    use super::{obfuscate_resource_for_stats, obfuscate_span};
281    use crate::{obfuscation_config, replacer};
282    use libdd_trace_utils::test_utils;
283
284    // test helper with default params
285    fn obfuscate_stats(span_type: &str, resource: &str) -> Option<String> {
286        obfuscate_resource_for_stats(
287            span_type,
288            resource,
289            None,
290            crate::sql::SqlObfuscationMode::default(),
291        )
292    }
293
294    #[test]
295    fn test_obfuscate_resource_for_stats_sql() {
296        let result = obfuscate_stats("sql", "SELECT * FROM users WHERE id = 42");
297        assert_eq!(result.unwrap(), "SELECT * FROM users WHERE id = ?");
298    }
299
300    #[test]
301    fn test_obfuscate_resource_for_stats_cassandra() {
302        let result = obfuscate_stats("cassandra", "SELECT * FROM table1 WHERE id = 42");
303        assert_eq!(result.unwrap(), "SELECT * FROM table1 WHERE id = ?");
304    }
305
306    #[test]
307    fn test_obfuscate_resource_for_stats_redis() {
308        let result = obfuscate_stats("redis", "SET mykey myvalue\nGET mykey");
309        assert!(result.is_some());
310        // quantize_redis_string extracts command names
311        assert_eq!(result.unwrap(), "SET GET");
312    }
313
314    #[test]
315    fn test_obfuscate_resource_for_stats_valkey() {
316        let result = obfuscate_stats("valkey", "SET mykey myvalue\nGET mykey");
317        assert_eq!(result.unwrap(), "SET GET");
318    }
319
320    #[test]
321    fn test_obfuscate_resource_for_stats_no_match() {
322        assert!(obfuscate_stats("http", "/api/users").is_none());
323        assert!(obfuscate_stats("web", "/api/users").is_none());
324        assert!(obfuscate_stats("grpc", "MyService/MyMethod").is_none());
325    }
326
327    #[test]
328    fn test_obfuscate_resource_for_stats_empty_sql() {
329        assert!(obfuscate_stats("sql", "").is_none());
330    }
331
332    #[test]
333    fn test_obfuscates_span_url_strings() {
334        let mut span = test_utils::create_test_span(111, 222, 0, 1, true);
335        span.r#type = "http".to_string();
336        span.meta.insert(
337            "http.url".to_string(),
338            "http://foo.com/id/123/page/q?search=bar&page=2".to_string(),
339        );
340        let obf_config = obfuscation_config::ObfuscationConfig {
341            http: obfuscation_config::HttpConfig {
342                remove_query_string: true,
343                remove_paths_with_digits: true,
344            },
345            ..Default::default()
346        };
347        obfuscate_span(&mut span, &obf_config);
348        assert_eq!(
349            span.meta.get("http.url").unwrap(),
350            "http://foo.com/id/?/page/q?"
351        );
352    }
353
354    #[test]
355    #[cfg_attr(miri, ignore)]
356    fn test_replace_span_tags() {
357        let mut span = test_utils::create_test_span(111, 222, 0, 1, true);
358        span.meta
359            .insert("custom.tag".to_string(), "/foo/bar/foo".to_string());
360
361        let parsed_rules = replacer::parse_rules_from_string(
362            r#"[{"name": "custom.tag", "pattern": "(/foo/bar/).*", "repl": "${1}extra"}]"#,
363        )
364        .unwrap();
365        let obf_config = obfuscation_config::ObfuscationConfig {
366            tag_replace_rules: Some(parsed_rules),
367            ..Default::default()
368        };
369
370        obfuscate_span(&mut span, &obf_config);
371
372        assert_eq!(span.meta.get("custom.tag").unwrap(), "/foo/bar/extra");
373    }
374
375    #[test]
376    fn obfuscate_all_redis_args() {
377        let mut span = test_utils::create_test_span(111, 222, 0, 1, true);
378        span.r#type = "redis".to_string();
379        span.meta.insert(
380            "redis.raw_command".to_string(),
381            "GEOADD key longitude latitude member".to_string(),
382        );
383        let obf_config = obfuscation_config::ObfuscationConfig {
384            redis: obfuscation_config::RedisConfig {
385                enabled: true,
386                remove_all_args: true,
387            },
388            ..Default::default()
389        };
390        obfuscate_span(&mut span, &obf_config);
391        assert_eq!(span.meta.get("redis.raw_command").unwrap(), "GEOADD ?");
392    }
393
394    #[test]
395    fn obfuscate_redis_raw_query() {
396        let mut span = test_utils::create_test_span(111, 222, 0, 1, true);
397        span.r#type = "redis".to_string();
398        span.meta.insert(
399            "redis.raw_command".to_string(),
400            "GEOADD key longitude latitude member".to_string(),
401        );
402        let obf_config = obfuscation_config::ObfuscationConfig {
403            redis: obfuscation_config::RedisConfig {
404                enabled: true,
405                remove_all_args: false,
406            },
407            ..Default::default()
408        };
409        obfuscate_span(&mut span, &obf_config);
410        assert_eq!(
411            span.meta.get("redis.raw_command").unwrap(),
412            "GEOADD key longitude latitude ?"
413        );
414    }
415}