tga 1.3.0

Developer productivity analytics — git commit collection, classification, and reporting
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
//! Per-run external source resolver with in-memory caching.
//!
//! Why: a single `tga classify` run over 15k commits may reference hundreds
//! of unique JIRA/GitHub tickets. Re-fetching the same ticket for every commit
//! that mentions it would flood the external APIs with duplicate requests and
//! be slow. The resolver caches every external lookup for the lifetime of one
//! classification run (in-memory; never persisted to disk).
//!
//! What: [`ExternalSourceResolver`] wraps a `reqwest::Client` and a per-source
//! cache. [`ExternalSourceResolver::resolve`] accepts a commit message, extracts
//! ticket keys, checks the cache, fetches only misses, and returns the
//! highest-priority [`super::ExternalSignal`] found across all configured sources.
//!
//! Test: covered by `tests::resolver_uses_cached_result_on_second_call` and the
//! mock-HTTP integration tests.

use std::collections::HashMap;
use std::sync::Mutex;

use tracing::debug;

use super::{
    github_issues::{self, GitHubRef},
    jira, ExternalSignal, SourceConfig,
};

/// In-memory cache for JIRA lookups (`key → Option<ExternalSignal>`).
type JiraCache = HashMap<String, Option<ExternalSignal>>;
/// In-memory cache for GitHub Issues lookups (`"owner/repo#N" → Option<ExternalSignal>`).
type GithubCache = HashMap<String, Option<ExternalSignal>>;

/// Per-source internal state.
enum SourceState {
    Jira {
        config: super::JiraSourceConfig,
        cache: Mutex<JiraCache>,
        /// Test-only base URL override (points at a wiremock server).
        base_url_override: Option<String>,
    },
    GithubIssues {
        config: super::GithubIssuesSourceConfig,
        cache: Mutex<GithubCache>,
        /// Test-only API base URL override.
        api_base_override: Option<String>,
    },
}

/// Per-run resolver that dispatches commit messages to configured external
/// sources and caches the results.
///
/// Why: concentrating the dispatch, caching, and priority logic here keeps the
/// pipeline free of HTTP concerns and makes the resolver trivially testable
/// via the override setters.
/// What: holds one [`SourceState`] per configured source, a shared
/// `reqwest::Client`, and exposes [`Self::resolve`] which returns the first
/// non-`None` signal across all sources (JIRA sources checked before GitHub
/// sources, following the priority model in issue #260).
/// Test: see `tests::*` in this module; end-to-end integration is in
/// `pipeline::tests`.
pub struct ExternalSourceResolver {
    client: reqwest::Client,
    sources: Vec<SourceState>,
}

impl ExternalSourceResolver {
    /// Build a resolver from a slice of [`SourceConfig`]s.
    ///
    /// Why: the pipeline constructs the resolver once per run from the config;
    /// construction is cheap (no HTTP calls).
    /// What: builds one [`SourceState`] per config entry, sharing a single
    /// `reqwest::Client` across all sources.
    /// Test: see `tests::resolver_builds_from_empty_sources`.
    pub fn new(sources: &[SourceConfig]) -> Self {
        let client = reqwest::Client::new();
        let states = sources
            .iter()
            .map(|cfg| match cfg {
                SourceConfig::Jira(j) => SourceState::Jira {
                    config: j.clone(),
                    cache: Mutex::new(HashMap::new()),
                    base_url_override: None,
                },
                SourceConfig::GithubIssues(g) => SourceState::GithubIssues {
                    config: g.clone(),
                    cache: Mutex::new(HashMap::new()),
                    api_base_override: None,
                },
            })
            .collect();
        Self {
            client,
            sources: states,
        }
    }

    /// Resolve a commit message against all configured sources.
    ///
    /// Why: the pipeline calls this once per commit; having a single entry
    /// point that walks all sources in priority order avoids duplicating the
    /// dispatch logic.
    /// What: extracts JIRA keys and GitHub refs from `message`; for each
    /// source in order (JIRA before GitHub), checks the cache and fetches
    /// misses; returns the first non-`None` signal found. Returns `None` if
    /// no source matched.
    /// Test: covered by `tests::resolver_returns_jira_signal_when_configured`
    /// and `tests::resolver_returns_none_when_no_keys`.
    pub async fn resolve(&self, message: &str) -> Option<ExternalSignal> {
        for state in &self.sources {
            if let Some(signal) = self.resolve_source(message, state).await {
                return Some(signal);
            }
        }
        None
    }

    async fn resolve_source(&self, message: &str, state: &SourceState) -> Option<ExternalSignal> {
        match state {
            SourceState::Jira {
                config,
                cache,
                base_url_override,
            } => {
                let keys = jira::extract_jira_keys(message);
                if keys.is_empty() {
                    return None;
                }
                // Filter by project_keys if configured.
                let filtered: Vec<String> = if config.project_keys.is_empty() {
                    keys
                } else {
                    keys.into_iter()
                        .filter(|k| {
                            config
                                .project_keys
                                .iter()
                                .any(|pk| k.starts_with(&format!("{pk}-")))
                        })
                        .collect()
                };
                if filtered.is_empty() {
                    return None;
                }

                // Separate cached from uncached.
                let (cached_hits, misses): (Vec<_>, Vec<_>) = {
                    let guard = cache.lock().expect("jira cache lock");
                    filtered
                        .iter()
                        .partition(|k| guard.contains_key(k.as_str()))
                };

                // Return immediately if a cached hit has a signal.
                {
                    let guard = cache.lock().expect("jira cache lock");
                    for k in &cached_hits {
                        if let Some(Some(sig)) = guard.get(k.as_str()) {
                            debug!(key = k.as_str(), "jira cache hit");
                            return Some(sig.clone());
                        }
                    }
                }

                // Fetch misses.
                let fetched = jira::fetch_issues_batch(
                    &self.client,
                    config,
                    &misses.iter().map(|s| s.to_string()).collect::<Vec<_>>(),
                    base_url_override.as_deref(),
                )
                .await;

                // Populate cache.
                {
                    let mut guard = cache.lock().expect("jira cache lock");
                    for (k, sig) in &fetched {
                        guard.insert(k.clone(), sig.clone());
                    }
                }

                // Return first hit from freshly-fetched results.
                for k in &misses {
                    if let Some(Some(sig)) = fetched.get(k.as_str()) {
                        return Some(sig.clone());
                    }
                }
                None
            }

            SourceState::GithubIssues {
                config,
                cache,
                api_base_override,
            } => {
                let refs: Vec<GitHubRef> = github_issues::extract_github_refs(message);
                if refs.is_empty() {
                    return None;
                }

                // Check cache first.
                {
                    let guard = cache.lock().expect("github cache lock");
                    for gh_ref in &refs {
                        let repo = gh_ref.repo.as_deref().unwrap_or(&config.repo);
                        let key = format!("{repo}#{}", gh_ref.number);
                        if let Some(Some(sig)) = guard.get(&key) {
                            debug!(cache_key = %key, "github cache hit");
                            return Some(sig.clone());
                        }
                    }
                }

                // Fetch misses.
                let fetched = github_issues::fetch_issues_batch(
                    &self.client,
                    config,
                    &refs,
                    api_base_override.as_deref(),
                )
                .await;

                // Populate cache.
                {
                    let mut guard = cache.lock().expect("github cache lock");
                    for (k, sig) in &fetched {
                        guard.insert(k.clone(), sig.clone());
                    }
                }

                // Return first hit.
                for gh_ref in &refs {
                    let repo = gh_ref.repo.as_deref().unwrap_or(&config.repo);
                    let key = format!("{repo}#{}", gh_ref.number);
                    if let Some(Some(sig)) = fetched.get(&key) {
                        return Some(sig.clone());
                    }
                }
                None
            }
        }
    }

    /// Override the JIRA base URL for a source at index `idx`.
    ///
    /// Why: integration tests use wiremock servers that listen on random ports;
    /// this seam lets tests inject the mock server URL without modifying the
    /// config struct.
    /// What: replaces `base_url_override` for the source at `idx` (0-based).
    /// Test: used by all JIRA integration tests.
    #[cfg(test)]
    pub fn with_jira_base_url(mut self, idx: usize, url: String) -> Self {
        if let Some(SourceState::Jira {
            ref mut base_url_override,
            ..
        }) = self.sources.get_mut(idx)
        {
            *base_url_override = Some(url);
        }
        self
    }

    /// Override the GitHub API base URL for a source at index `idx`.
    ///
    /// Why: same as `with_jira_base_url` but for GitHub Integration tests.
    /// What: replaces `api_base_override` for the source at `idx`.
    /// Test: used by all GitHub integration tests.
    #[cfg(test)]
    pub fn with_github_api_base(mut self, idx: usize, url: String) -> Self {
        if let Some(SourceState::GithubIssues {
            ref mut api_base_override,
            ..
        }) = self.sources.get_mut(idx)
        {
            *api_base_override = Some(url);
        }
        self
    }
}

#[cfg(test)]
mod tests {
    use std::collections::HashMap;

    use wiremock::matchers::{method, path};
    use wiremock::{Mock, MockServer, ResponseTemplate};

    use super::*;
    use crate::classify::sources::{
        GithubIssuesSourceConfig, JiraFieldMappings, JiraSourceConfig, SourceConfig,
    };

    /// Why: the resolver must work with no sources configured (e.g. no
    /// `sources:` block in the rules file) and return `None` without
    /// panicking.
    /// What: assert `resolve` on an empty resolver returns `None`.
    /// Test: no HTTP, pure unit.
    #[tokio::test]
    async fn resolver_builds_from_empty_sources() {
        let resolver = ExternalSourceResolver::new(&[]);
        assert!(resolver.resolve("feat: add login").await.is_none());
    }

    /// Why: commits with no ticket keys must not trigger any HTTP calls and
    /// must return `None` cleanly.
    /// What: configure a JIRA source and resolve a message without a ticket key.
    /// Test: no HTTP (no keys → no fetch).
    #[tokio::test]
    async fn resolver_returns_none_for_messages_without_keys() {
        let config = JiraSourceConfig {
            base_url: "https://acme.atlassian.net".to_string(),
            token_env: "JIRA_API_TOKEN".to_string(),
            username: None,
            email_env: None,
            project_keys: vec!["PROJ".to_string()],
            field_mappings: JiraFieldMappings::default(),
        };
        let resolver = ExternalSourceResolver::new(&[SourceConfig::Jira(config)]);
        let result = resolver.resolve("feat: add login flow").await;
        assert!(result.is_none(), "no keys → no signal");
    }

    /// Why: the resolver must correctly route JIRA keys to the JIRA source
    /// and return the mapped category.
    /// What: stand up a wiremock server that returns a JIRA `Bug` issue type,
    /// configure a mapping `Bug → bug_fix`, and assert the signal comes back.
    /// Test: requires `wiremock` dev-dep; mocks one JIRA HTTP call.
    #[tokio::test]
    async fn resolver_returns_jira_signal_for_bug_issue_type() {
        let server = MockServer::start().await;

        let body = serde_json::json!({
            "key": "PROJ-1234",
            "fields": {
                "issuetype": {"name": "Bug"},
                "labels": [],
                "components": []
            }
        });

        Mock::given(method("GET"))
            .and(path("/rest/api/3/issue/PROJ-1234"))
            .respond_with(ResponseTemplate::new(200).set_body_json(body))
            .mount(&server)
            .await;

        // Set token env var for this test.
        unsafe { std::env::set_var("JIRA_API_TOKEN_TEST_BUG", "test-token") };

        let mut issue_type_map = HashMap::new();
        issue_type_map.insert("Bug".to_string(), "bug_fix".to_string());

        let config = JiraSourceConfig {
            base_url: server.uri(),
            token_env: "JIRA_API_TOKEN_TEST_BUG".to_string(),
            username: None,
            email_env: None,
            project_keys: vec![],
            field_mappings: JiraFieldMappings {
                issue_type: issue_type_map,
                labels: HashMap::new(),
                components: HashMap::new(),
            },
        };
        let resolver = ExternalSourceResolver::new(&[SourceConfig::Jira(config)])
            .with_jira_base_url(0, server.uri());

        let signal = resolver
            .resolve("PROJ-1234 fix null pointer")
            .await
            .expect("should have signal");
        assert_eq!(signal.category, "bug_fix");
        assert!(signal.source.contains("issue_type"));

        unsafe { std::env::remove_var("JIRA_API_TOKEN_TEST_BUG") };
    }

    /// Why: the cache must prevent duplicate HTTP calls for the same ticket
    /// key on multiple commits.
    /// What: mount a JIRA mock that expects exactly one call, then resolve
    /// the same key twice. If the second call hits the server, the test fails
    /// because wiremock will see 2 calls.
    /// Test: wiremock with `expect(1)`.
    #[tokio::test]
    async fn resolver_caches_jira_result_across_calls() {
        let server = MockServer::start().await;

        let body = serde_json::json!({
            "key": "PROJ-99",
            "fields": {
                "issuetype": {"name": "Story"},
                "labels": [],
                "components": []
            }
        });

        Mock::given(method("GET"))
            .and(path("/rest/api/3/issue/PROJ-99"))
            .respond_with(ResponseTemplate::new(200).set_body_json(body))
            // Exactly one HTTP call allowed — second call must be cache hit.
            .expect(1)
            .mount(&server)
            .await;

        unsafe { std::env::set_var("JIRA_API_TOKEN_CACHE_TEST", "test-token") };

        let mut issue_type_map = HashMap::new();
        issue_type_map.insert("Story".to_string(), "new_feature".to_string());

        let config = JiraSourceConfig {
            base_url: server.uri(),
            token_env: "JIRA_API_TOKEN_CACHE_TEST".to_string(),
            username: None,
            email_env: None,
            project_keys: vec![],
            field_mappings: JiraFieldMappings {
                issue_type: issue_type_map,
                labels: HashMap::new(),
                components: HashMap::new(),
            },
        };
        let resolver = ExternalSourceResolver::new(&[SourceConfig::Jira(config)])
            .with_jira_base_url(0, server.uri());

        // First call — should fetch from mock.
        let s1 = resolver.resolve("PROJ-99 add widget").await;
        assert!(s1.is_some());

        // Second call — must use the cache (wiremock will fail if it sees a
        // second request).
        let s2 = resolver.resolve("PROJ-99 related commit").await;
        assert_eq!(s1, s2);

        unsafe { std::env::remove_var("JIRA_API_TOKEN_CACHE_TEST") };
    }

    /// Why: when the JIRA token env var is unset the resolver must return
    /// `None` rather than panicking or making unauthenticated requests.
    /// What: configure a JIRA source with a token env var that is definitely
    /// not set, resolve a message with a matching key, assert `None`.
    /// Test: no HTTP expected (token check happens before fetch).
    #[tokio::test]
    async fn resolver_skips_jira_when_token_unset() {
        // Guarantee the env var is absent for this test.
        unsafe { std::env::remove_var("JIRA_TOKEN_DEFINITELY_NOT_SET_XYZ") };

        let config = JiraSourceConfig {
            base_url: "https://acme.atlassian.net".to_string(),
            token_env: "JIRA_TOKEN_DEFINITELY_NOT_SET_XYZ".to_string(),
            username: None,
            email_env: None,
            project_keys: vec![],
            field_mappings: JiraFieldMappings::default(),
        };
        let resolver = ExternalSourceResolver::new(&[SourceConfig::Jira(config)]);
        let result = resolver.resolve("PROJ-1234 update").await;
        assert!(result.is_none(), "missing token must yield None, not panic");
    }

    /// Why: the GitHub Issues resolver must correctly map labels to categories
    /// via wiremock.
    /// What: stand up a GitHub mock returning a `bug`-labelled issue and
    /// assert the resolver returns `bug_fix`.
    /// Test: wiremock mock of GitHub Issues REST v3.
    #[tokio::test]
    async fn resolver_returns_github_signal_for_bug_label() {
        let server = MockServer::start().await;

        let body = serde_json::json!({
            "number": 42,
            "labels": [{"name": "bug"}, {"name": "help wanted"}]
        });

        Mock::given(method("GET"))
            .and(path("/repos/acme/widgets/issues/42"))
            .respond_with(ResponseTemplate::new(200).set_body_json(body))
            .mount(&server)
            .await;

        unsafe { std::env::set_var("GITHUB_TOKEN_TEST_BUG", "test-token") };

        let mut label_map = HashMap::new();
        label_map.insert("bug".to_string(), "bug_fix".to_string());

        let config = GithubIssuesSourceConfig {
            repo: "acme/widgets".to_string(),
            token_env: "GITHUB_TOKEN_TEST_BUG".to_string(),
            label_mappings: label_map,
        };

        let resolver = ExternalSourceResolver::new(&[SourceConfig::GithubIssues(config)])
            .with_github_api_base(0, server.uri());

        let signal = resolver
            .resolve("fix: closes #42")
            .await
            .expect("should have signal");
        assert_eq!(signal.category, "bug_fix");
        assert!(signal.source.contains("bug"));

        unsafe { std::env::remove_var("GITHUB_TOKEN_TEST_BUG") };
    }
}