Skip to main content

tga/classify/sources/
datadog.rs

1//! Datadog deployment-event classification source.
2//!
3//! Why: when a commit SHA is referenced by a Datadog deployment event, the
4//! work type is unambiguously `devops` (or the operator-configured category).
5//! Deployment evidence is a very strong signal (0.95 confidence) — stronger
6//! than any commit-message heuristic.
7//!
8//! What: extracts commit SHAs from commit messages (both full 40-char and
9//! short 7–12 char forms), then queries the Datadog Events API
10//! (`GET /api/v1/events?sources=deployment&tags=commit:<sha>`) to see if
11//! the commit was associated with a deployment event. When found, returns
12//! an [`super::ExternalSignal`] at the configured confidence.
13//!
14//! Note: Datadog API calls require both an API key (`DD-API-KEY`) and an
15//! application key (`DD-APPLICATION-KEY`) in HTTP headers.
16//!
17//! Test: see `tests::extract_commit_shas_*` for extractor coverage and
18//! `tests::fetch_and_classify_via_wiremock` for the HTTP path.
19
20use std::collections::HashMap;
21
22use regex::Regex;
23use serde::{Deserialize, Serialize};
24use tracing::{debug, warn};
25
26use super::{DatadogSourceConfig, ExternalSignal};
27
28/// Default confidence for Datadog deployment-event signals.
29///
30/// Why: deployment evidence is highly authoritative — if Datadog recorded a
31/// deployment event for this commit, the work is clearly `devops`.
32/// What: 0.95 — above the JIRA/Linear standard (0.92) but below Tier-0
33/// manual overrides (1.0).
34/// Test: verified in `tests::classify_deployment_uses_configured_confidence`.
35pub const DATADOG_DEFAULT_CONFIDENCE: f64 = 0.95;
36
37/// Regex matching a full (40-char) or short (7–12 char) Git SHA.
38///
39/// Why: developers sometimes paste SHAs into commit messages; the Datadog
40/// query correlates on the commit SHA.
41/// What: matches word-boundary-guarded lowercase hex strings of 7–40 chars.
42/// Test: covered by `tests::extract_commit_shas_*`.
43fn sha_regex() -> Regex {
44    Regex::new(r"\b([0-9a-f]{7,40})\b").expect("static regex is valid")
45}
46
47/// Extract all plausible Git commit SHAs from a commit message.
48///
49/// Why: Datadog deployment events are keyed by commit SHA; extracting SHAs
50/// from the message is the join key between the commit and the event.
51/// What: returns a `Vec<String>` of unique SHA-like substrings (7–40 hex
52/// chars), in left-to-right order. The extractor is intentionally broad —
53/// false-positives are cheap to discard (the API returns no event).
54/// Test: covered by `tests::extract_commit_shas_full` and
55/// `tests::extract_commit_shas_short`.
56pub fn extract_commit_shas(message: &str) -> Vec<String> {
57    let re = sha_regex();
58    let mut seen = std::collections::HashSet::new();
59    let mut out = Vec::new();
60    for cap in re.captures_iter(message) {
61        if let Some(sha_m) = cap.get(1) {
62            let sha = sha_m.as_str().to_string();
63            if seen.insert(sha.clone()) {
64                out.push(sha);
65            }
66        }
67    }
68    out
69}
70
71/// A Datadog event as returned by the Events API.
72///
73/// Why: we only need to know whether the event list is non-empty (indicating
74/// at least one deployment event matched the tag query).
75/// What: a minimal serde struct over the `GET /api/v1/events` response
76/// envelope. When `events` is non-empty, the commit has a deployment record.
77/// Test: covered by resolver integration tests with wiremock.
78#[derive(Debug, Deserialize, Serialize)]
79pub struct DatadogEventsResponse {
80    /// List of events matching the query. Non-empty = deployment found.
81    #[serde(default)]
82    pub events: Vec<DatadogEvent>,
83}
84
85/// A single Datadog event (minimal fields).
86#[derive(Debug, Deserialize, Serialize)]
87pub struct DatadogEvent {
88    /// Event ID (numeric string).
89    pub id: Option<serde_json::Value>,
90    /// Event title (e.g. `"Deployment"`).
91    #[serde(default)]
92    pub title: String,
93    /// Tags attached to this event (e.g. `"commit:abc1234"`).
94    #[serde(default)]
95    pub tags: Vec<String>,
96}
97
98/// Check whether a commit SHA has a matching deployment event.
99///
100/// Why: the API call is the join between the commit SHA and the Datadog
101/// deployment record; isolating it here allows mock-HTTP testing.
102/// What: queries `GET /api/v1/events?sources=deployment&tags=commit:<sha>`
103/// with the configured API and application keys. Returns `true` when the
104/// response contains at least one event.
105/// Test: integration-tested via wiremock.
106pub async fn has_deployment_event(
107    client: &reqwest::Client,
108    config: &DatadogSourceConfig,
109    sha: &str,
110    api_base_override: Option<&str>,
111) -> bool {
112    let api_key = match std::env::var(&config.api_key_env) {
113        Ok(k) if !k.is_empty() => k,
114        _ => {
115            warn!(
116                api_key_env = %config.api_key_env,
117                "Datadog API key env var `{}` is not set — skipping Datadog lookups",
118                config.api_key_env,
119            );
120            return false;
121        }
122    };
123
124    let app_key = match std::env::var(&config.app_key_env) {
125        Ok(k) if !k.is_empty() => k,
126        _ => {
127            warn!(
128                app_key_env = %config.app_key_env,
129                "Datadog app key env var `{}` is not set — skipping Datadog lookups",
130                config.app_key_env,
131            );
132            return false;
133        }
134    };
135
136    let site = config.dd_site.as_deref().unwrap_or("datadoghq.com");
137    let base = api_base_override
138        .map(|u| u.to_string())
139        .unwrap_or_else(|| format!("https://api.{site}"));
140
141    // Build the events query. We filter by `sources=deployment` and tag the
142    // commit SHA as `commit:<sha>`. The API requires a time window; we use
143    // a wide window (now − 1 year) to catch historical deployments.
144    let now = std::time::SystemTime::now()
145        .duration_since(std::time::UNIX_EPOCH)
146        .map(|d| d.as_secs())
147        .unwrap_or(0);
148    let start = now.saturating_sub(365 * 24 * 3600);
149
150    let url = format!(
151        "{base}/api/v1/events?sources=deployment&tags=commit:{sha}&start={start}&end={now}"
152    );
153
154    let resp = match client
155        .get(&url)
156        .header("DD-API-KEY", &api_key)
157        .header("DD-APPLICATION-KEY", &app_key)
158        .send()
159        .await
160    {
161        Ok(r) => r,
162        Err(e) => {
163            warn!(sha, error = %e, "Datadog Events API request failed; skipping");
164            return false;
165        }
166    };
167
168    if !resp.status().is_success() {
169        warn!(
170            sha,
171            status = %resp.status(),
172            "Datadog Events API returned non-success status; skipping"
173        );
174        return false;
175    }
176
177    match resp.json::<DatadogEventsResponse>().await {
178        Ok(r) => {
179            let found = !r.events.is_empty();
180            debug!(sha, found, "Datadog deployment query complete");
181            found
182        }
183        Err(e) => {
184            warn!(sha, error = %e, "failed to parse Datadog Events response; skipping");
185            false
186        }
187    }
188}
189
190/// Check a batch of SHAs for deployment events.
191///
192/// Why: a commit message may contain multiple SHA references; checking
193/// each unique one minimises redundant API calls.
194/// What: deduplicates `shas`, queries each, and returns a map from SHA
195/// to `Option<ExternalSignal>`.
196/// Test: covered by resolver integration tests.
197pub async fn check_shas_batch(
198    client: &reqwest::Client,
199    config: &DatadogSourceConfig,
200    shas: &[String],
201    api_base_override: Option<&str>,
202) -> HashMap<String, Option<ExternalSignal>> {
203    let mut out = HashMap::new();
204    for sha in shas {
205        if out.contains_key(sha) {
206            continue;
207        }
208        let found = has_deployment_event(client, config, sha, api_base_override).await;
209        let signal = if found {
210            let confidence = config.confidence.unwrap_or(DATADOG_DEFAULT_CONFIDENCE);
211            Some(ExternalSignal {
212                category: config.default_category.clone(),
213                confidence,
214                source: format!("datadog:deployment:{sha}"),
215            })
216        } else {
217            None
218        };
219        out.insert(sha.clone(), signal);
220    }
221    out
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227
228    /// Why: full 40-char SHAs are the most common form in commit messages
229    /// (e.g. `"cherry-pick from abc1234..."`).
230    /// What: assert extraction of a full 40-char SHA.
231    /// Test: pure regex, no HTTP.
232    #[test]
233    fn extract_commit_shas_full() {
234        let shas = extract_commit_shas("cherry-pick from a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2"); // pragma: allowlist secret
235        assert_eq!(shas, vec!["a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2"]); // pragma: allowlist secret
236    }
237
238    /// Why: short SHAs (7–12 chars) are common in manual cherry-pick
239    /// references and deployment notes.
240    /// What: assert extraction of a short 7-char SHA.
241    /// Test: pure regex, no HTTP.
242    #[test]
243    fn extract_commit_shas_short() {
244        let shas = extract_commit_shas("deploy: abc1234 to production");
245        assert_eq!(shas, vec!["abc1234"]);
246    }
247
248    /// Why: deduplication must prevent the same SHA from triggering multiple
249    /// API calls.
250    /// What: assert multi-SHA messages are deduplicated.
251    /// Test: pure regex, no HTTP.
252    #[test]
253    fn extract_commit_shas_dedup() {
254        let shas = extract_commit_shas("reverts abc1234 and abc1234 again, plus def5678");
255        assert_eq!(shas, vec!["abc1234", "def5678"]);
256    }
257
258    /// Why: messages without SHAs should yield an empty vec so we don't
259    /// make unnecessary API calls.
260    /// What: assert empty result on plain commit messages.
261    /// Test: pure regex, no HTTP.
262    #[test]
263    fn extract_commit_shas_plain_message_yields_empty() {
264        // "abc" (3 chars) is too short; "feat:" contains a non-hex colon.
265        assert!(extract_commit_shas("feat: add login flow").is_empty());
266    }
267
268    /// Why: `DatadogSourceConfig` must round-trip through YAML deserialization
269    /// with `deny_unknown_fields` so config typos surface at load time.
270    /// What: deserialize a full `type: datadog` source config and assert fields.
271    /// Test: pure deserialization, no HTTP.
272    #[test]
273    fn datadog_source_config_deserializes() {
274        use crate::classify::sources::SourceConfig;
275        let yaml = r#"
276type: datadog
277api_key_env: DATADOG_API_KEY
278app_key_env: DATADOG_APP_KEY
279dd_site: datadoghq.com
280service: my-service
281default_category: devops
282confidence: 0.95
283"#;
284        let cfg: SourceConfig = serde_yaml::from_str(yaml).expect("deserialize");
285        match cfg {
286            SourceConfig::Datadog(d) => {
287                assert_eq!(d.api_key_env, "DATADOG_API_KEY"); // pragma: allowlist secret
288                assert_eq!(d.app_key_env, "DATADOG_APP_KEY"); // pragma: allowlist secret
289                assert_eq!(d.dd_site.as_deref(), Some("datadoghq.com"));
290                assert_eq!(d.service.as_deref(), Some("my-service"));
291                assert_eq!(d.default_category, "devops");
292                assert!(d
293                    .confidence
294                    .map(|c| (c - 0.95_f64).abs() < f64::EPSILON)
295                    .unwrap_or(false));
296            }
297            other => panic!("expected Datadog variant, got {other:?}"),
298        }
299    }
300
301    /// Why: `deny_unknown_fields` on `DatadogSourceConfig` must reject YAML
302    /// typos with a parse error.
303    /// What: attempt to deserialize with an unknown field and assert `Err`.
304    /// Test: pure deserialization, no HTTP.
305    #[test]
306    fn datadog_source_config_unknown_field_is_rejected() {
307        let yaml = r#"
308type: datadog
309api_key_env: DATADOG_API_KEY
310app_key_env: DATADOG_APP_KEY
311default_category: devops
312unknown_field: oops
313"#;
314        let result: Result<crate::classify::sources::SourceConfig, _> = serde_yaml::from_str(yaml);
315        assert!(result.is_err(), "unknown field must be rejected");
316    }
317
318    /// Why: wiremock integration test — verifies the full HTTP path including
319    /// DD-API-KEY and DD-APPLICATION-KEY headers.
320    /// What: mock the Datadog Events API returning a deployment event; assert
321    /// `has_deployment_event` returns true and the signal is correct.
322    /// Test: wiremock mock of Datadog Events API.
323    #[tokio::test]
324    async fn fetch_and_classify_via_wiremock() {
325        use wiremock::matchers::{header, method, path_regex};
326        use wiremock::{Mock, MockServer, ResponseTemplate};
327
328        let server = MockServer::start().await;
329
330        let body = serde_json::json!({
331            "events": [
332                {
333                    "id": 12345,
334                    "title": "Deployment",
335                    "tags": ["commit:abc1234", "env:production"]
336                }
337            ]
338        });
339
340        Mock::given(method("GET"))
341            .and(path_regex(r"/api/v1/events.*"))
342            .and(header("DD-API-KEY", "test-api-key"))
343            .and(header("DD-APPLICATION-KEY", "test-app-key"))
344            .respond_with(ResponseTemplate::new(200).set_body_json(body))
345            .mount(&server)
346            .await;
347
348        unsafe { std::env::set_var("DD_API_KEY_WT", "test-api-key") }; // pragma: allowlist secret
349        unsafe { std::env::set_var("DD_APP_KEY_WT", "test-app-key") }; // pragma: allowlist secret
350
351        let config = DatadogSourceConfig {
352            api_key_env: "DD_API_KEY_WT".to_string(), // pragma: allowlist secret
353            app_key_env: "DD_APP_KEY_WT".to_string(), // pragma: allowlist secret
354            dd_site: Some("datadoghq.com".to_string()),
355            service: Some("my-service".to_string()),
356            default_category: "devops".to_string(),
357            confidence: Some(0.95),
358        };
359
360        let client = reqwest::Client::new();
361        let found = has_deployment_event(&client, &config, "abc1234", Some(&server.uri())).await;
362        assert!(found, "deployment event should be found");
363
364        // Now verify the batch helper produces the right signal.
365        let map = check_shas_batch(
366            &client,
367            &config,
368            &["abc1234".to_string()],
369            Some(&server.uri()),
370        )
371        .await;
372        let signal = map.get("abc1234").and_then(|s| s.as_ref()).expect("signal");
373        assert_eq!(signal.category, "devops");
374        assert!(
375            (signal.confidence - 0.95_f64).abs() < f64::EPSILON,
376            "confidence should be 0.95"
377        );
378        assert!(signal.source.contains("abc1234"));
379
380        unsafe { std::env::remove_var("DD_API_KEY_WT") };
381        unsafe { std::env::remove_var("DD_APP_KEY_WT") };
382    }
383
384    /// Why: when no deployment event is found the batch helper must return
385    /// `None` so the pipeline falls through to commit-message rules.
386    /// What: mock an empty events list; assert signal is None.
387    /// Test: wiremock mock of Datadog Events API.
388    #[tokio::test]
389    async fn no_deployment_event_yields_none_signal() {
390        use wiremock::matchers::{method, path_regex};
391        use wiremock::{Mock, MockServer, ResponseTemplate};
392
393        let server = MockServer::start().await;
394
395        let body = serde_json::json!({"events": []});
396        Mock::given(method("GET"))
397            .and(path_regex(r"/api/v1/events.*"))
398            .respond_with(ResponseTemplate::new(200).set_body_json(body))
399            .mount(&server)
400            .await;
401
402        unsafe { std::env::set_var("DD_API_KEY_EMPTY", "test-api-key") }; // pragma: allowlist secret
403        unsafe { std::env::set_var("DD_APP_KEY_EMPTY", "test-app-key") }; // pragma: allowlist secret
404
405        let config = DatadogSourceConfig {
406            api_key_env: "DD_API_KEY_EMPTY".to_string(), // pragma: allowlist secret
407            app_key_env: "DD_APP_KEY_EMPTY".to_string(), // pragma: allowlist secret
408            dd_site: None,
409            service: None,
410            default_category: "devops".to_string(),
411            confidence: None,
412        };
413
414        let client = reqwest::Client::new();
415        let map = check_shas_batch(
416            &client,
417            &config,
418            &["deadbeef".to_string()],
419            Some(&server.uri()),
420        )
421        .await;
422        let signal = map.get("deadbeef").expect("key present");
423        assert!(
424            signal.is_none(),
425            "no events should yield None signal, got {signal:?}"
426        );
427
428        unsafe { std::env::remove_var("DD_API_KEY_EMPTY") };
429        unsafe { std::env::remove_var("DD_APP_KEY_EMPTY") };
430    }
431}