tga 2.8.1

Developer productivity analytics — git commit collection, classification, and reporting
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
//! End-to-end classification pipeline: read DB → classify → write back.

use std::collections::HashMap;

use futures::stream::StreamExt;
use rusqlite::params;
use tracing::{info, warn};

use crate::classify::classifier::{ClassificationEngine, ClassificationEngineConfig};
use crate::classify::errors::Result;
use crate::classify::rules::default_rules;
use crate::classify::sources::ExternalSourceResolver;
use crate::classify::tiers::bedrock::DEFAULT_BEDROCK_MODEL;
use crate::classify::tiers::llm::ANTHROPIC_DEFAULT_MODEL;
use crate::classify::tiers::ClassificationResult;
use crate::core::config::{Config, LlmSource};
use crate::core::db::Database;
use crate::core::models::ClassificationMethod;

/// Default minimum coverage threshold (percent) below which the pipeline
/// emits a warning. Used when no config-level override is supplied.
#[allow(dead_code)]
const DEFAULT_MIN_COVERAGE_PCT: f64 = 20.0;

/// Aggregate statistics from a single pipeline run.
///
/// Why: callers (CLI, tests) need a uniform shape describing how many
/// commits were classified and via which tier; coverage breakdowns let
/// reports surface gaps per repository.
/// What: counters per-tier (`by_method`), per-category (`by_category`),
/// and per-repo coverage. Populated by [`ClassificationPipeline::run`].
/// Test: covered by `tests::pipeline_runs_against_in_memory_db` and
/// `pipeline_force_reclassifies_rows`.
#[derive(Debug, Clone, Default)]
pub struct ClassificationStats {
    /// Total commits processed.
    pub total_commits: usize,
    /// Commits that received a non-uncategorized verdict.
    pub classified: usize,
    /// Count of verdicts per tier (`"exact_rule"`, `"regex_rule"`, ...).
    pub by_method: HashMap<String, usize>,
    /// Count of verdicts per category.
    pub by_category: HashMap<String, usize>,
    /// Overall classification coverage as a percentage (0–100).
    ///
    /// Defined as `classified / total_commits * 100`. Zero when
    /// `total_commits == 0`.
    pub coverage_pct: f64,
    /// Per-repository coverage (repo_name → coverage percentage).
    pub coverage_by_repo: HashMap<String, RepoCoverage>,
}

/// Per-repository coverage breakdown.
///
/// Why: a global coverage number hides the case where one repo classifies
/// at 95% and another at 5%; surfacing per-repo coverage lets operators
/// drill in.
/// What: total commits, classified count, and percentage (0–100).
/// Test: covered by classification pipeline integration tests.
#[derive(Debug, Clone, Default)]
pub struct RepoCoverage {
    /// Total commits seen for this repository.
    pub total: usize,
    /// Commits with a non-`"uncategorized"` verdict.
    pub classified: usize,
    /// `classified / total * 100`.
    pub coverage_pct: f64,
}

/// Stage-2 pipeline: classify every unclassified commit currently in the DB.
///
/// Why: classification touches multiple tiers (rules / regex / fuzzy / LLM)
/// and the engine config / taxonomy / JIRA mappings all live in `Config`;
/// concentrating orchestration here keeps the binary's `commands/classify.rs`
/// thin.
/// What: holds the validated [`Config`] plus toggles for `force` re-classify,
/// `since`/`until` date bounds, and `repos` filter. Built via [`Self::new`] +
/// builder methods.
/// Test: covered by `classify::tests::pipeline_runs_against_in_memory_db`.
pub struct ClassificationPipeline {
    config: Config,
    /// When `true`, re-classify commits that already carry a verdict.
    ///
    /// Defaults to `false` (skip-if-classified). See [`Self::with_force`].
    force: bool,
    /// Optional lower bound on `commits.timestamp` (ISO8601: `YYYY-MM-DD`).
    ///
    /// Only consulted when `force == true`; without force the default
    /// "missing-verdict" filter is the only selector. See [`Self::with_since`].
    since: Option<String>,
    /// Optional upper bound on `commits.timestamp` (ISO8601: `YYYY-MM-DD`).
    ///
    /// Scopes re-classification to commits on or before this date.
    /// See [`Self::with_until`].
    until: Option<String>,
    /// Optional repository filter: only classify commits from these repos.
    ///
    /// When non-empty, only commits whose `repository` column matches one of
    /// the listed names are considered. See [`Self::with_repos`].
    repos: Vec<String>,
}

impl ClassificationPipeline {
    /// Construct a new pipeline bound to the given config.
    ///
    /// Why: pipelines start with re-classification disabled by default
    /// (the common "fill in missing verdicts" case); operators opt in to
    /// `force` via the builder.
    /// What: stores the config; sets `force = false`, `since = None`.
    /// Test: covered by `tests::pipeline_constructs_with_default_config`
    /// in `collect::tests` and the pipeline integration tests.
    pub fn new(config: Config) -> Self {
        Self {
            config,
            force: false,
            since: None,
            until: None,
            repos: Vec::new(),
        }
    }

    /// Re-classify commits even if they already have a `classification_id`.
    ///
    /// Why: when the rule set is updated (or a JIRA project mapping is
    /// added), operators need to retroactively apply the new rules to
    /// historical data. Without this, the pipeline skips classified rows
    /// and the new rules never fire on them. Issue #205.
    /// What: flips the read query from "WHERE classification_id IS NULL" to
    /// "any commit", and the write-back replaces the existing
    /// `classifications` row in place (no orphan rows).
    /// Test: see `pipeline_force_reclassifies_rows` in this module.
    pub fn with_force(mut self, force: bool) -> Self {
        self.force = force;
        self
    }

    /// Bound `--force` rewrites to commits whose `timestamp` is on or after
    /// the given ISO8601 date.
    ///
    /// Why: full-corpus rewrites are expensive; the common case for the
    /// retroactive flow is "apply the new rules to the last quarter".
    /// What: stores the string verbatim; the read query appends a
    /// `timestamp >= ?` predicate when set. No-op when `force` is `false`.
    /// Test: covered by the same integration test as `with_force`.
    pub fn with_since(mut self, since: Option<String>) -> Self {
        self.since = since;
        self
    }

    /// Bound classification to commits on or before the given ISO8601 date.
    ///
    /// Why: complements `with_since` to allow a bounded window like
    /// `--since 2026-01-01 --until 2026-03-31` without touching commits
    /// outside that quarter.
    /// What: stores the string; the read query appends a `timestamp <= ?`
    /// predicate when set.
    /// Test: covered by pipeline integration tests that exercise date windows.
    pub fn with_until(mut self, until: Option<String>) -> Self {
        self.until = until;
        self
    }

    /// Restrict classification to commits from specific repositories.
    ///
    /// Why: the `--repos` filter lets operators classify only a slice of the
    /// DB (e.g. one service) without running across the full corpus.
    /// What: when non-empty, adds a `WHERE repository IN (…)` clause to the
    /// candidate commit query.
    /// Test: see `tests::classify_repos_filter_*` in this module.
    pub fn with_repos(mut self, repos: Vec<String>) -> Self {
        self.repos = repos;
        self
    }

    /// Execute the pipeline against `db`.
    ///
    /// Workflow:
    /// 1. Load rules from `config.classification.rules_file`, or fall back
    ///    to [`default_rules`].
    /// 2. Build the [`ClassificationEngine`].
    /// 3. Query all commits with `classification_id IS NULL`.
    /// 4. Classify in parallel (Rayon) using tiers 1–3.
    /// 5. Optionally invoke the async LLM tier for commits still uncategorized.
    /// 6. Write `classifications` rows and update each commit's
    ///    `classification_id` and `confidence`.
    ///
    /// # Errors
    ///
    /// Returns an error if the DB queries, rule loading, or migrations fail.
    /// Build the [`ClassificationEngine`] from this pipeline's config.
    ///
    /// Why: both [`Self::run`] and [`Self::backfill_complexity`] need an
    /// identically-configured engine; extracting this keeps the rule-merge
    /// and config-mapping logic in one place.
    /// What: loads/merges rules, maps `Config` → `ClassificationEngineConfig`,
    /// and constructs the engine (without the DB-backed override tier).
    /// When the top-level `llm:` section is present it takes precedence over
    /// legacy `classification.*` fields; legacy fields emit a deprecation
    /// warning when `llm:` is absent but those fields are used.
    /// Test: exercised indirectly by the pipeline integration tests.
    ///
    /// # Errors
    ///
    /// Returns an error if rules fail to load/compile or the LLM provider
    /// fails to initialize.
    async fn build_engine(&self) -> Result<ClassificationEngine> {
        // Load user-supplied rule files (single or multiple, #445 batch C).
        // When `rules_files` is non-empty, load and merge them in order via
        // `RuleSet::merge`. The last file's `extend_defaults` flag wins.
        // For back-compat the comment above still refers to "rules_file" but the
        // implementation now drives off `rules_files`.
        let ruleset = {
            use crate::classify::rules::load_rules_multi;
            let class_cfg = self.config.classification.as_ref();
            let paths: Vec<&std::path::PathBuf> = class_cfg
                .map(|c| c.rules_files.iter().collect())
                .unwrap_or_default();

            if paths.is_empty() {
                default_rules()
            } else {
                let path_refs: Vec<&std::path::Path> = paths.iter().map(|p| p.as_path()).collect();
                let custom = load_rules_multi(&path_refs)?;
                if custom.extend_defaults {
                    // Merge: start with defaults, let custom rules override by id.
                    let mut merged = default_rules();
                    let custom_ids: std::collections::HashSet<String> =
                        custom.rules.iter().map(|r| r.id.clone()).collect();
                    merged.rules.retain(|r| !custom_ids.contains(&r.id));
                    merged.rules.extend(custom.rules);
                    merged
                } else {
                    custom
                }
            }
        };

        // Determine whether the LLM tier is requested and which source.
        //
        // Precedence (highest first):
        // 1. Top-level `llm:` section (new, preferred): presence of the section
        //    SELF-ENABLES the LLM tier — no `classification.use_llm: true` required.
        //    The intent is: if you wrote `llm:` in config, you mean to use the LLM.
        // 2. Legacy `classification.use_llm: true` — still honored when no `llm:`
        //    section is present.
        //
        // Note: an explicit `use_llm: false` in `classification:` does NOT suppress
        // the `llm:` section; the `llm:` section is always self-enabling. Users who
        // need to temporarily disable the LLM tier while keeping the `llm:` config
        // should remove or comment out the `llm:` block.
        let use_llm = self.config.llm.is_some()
            || self
                .config
                .classification
                .as_ref()
                .map(|c| c.use_llm)
                .unwrap_or(false);

        let engine_cfg = match self.config.classification.as_ref() {
            Some(c) => ClassificationEngineConfig {
                use_llm: c.use_llm,
                llm_model: c.llm_model.clone().unwrap_or_else(|| "gpt-4o-mini".into()),
                llm_provider: c.llm_provider.clone(),
                openrouter_api_key: c.openrouter_api_key.clone(),
                confidence_threshold: c.confidence_threshold,
                weighted_sum: c.weighted_sum.clone(),
            },
            None => ClassificationEngineConfig::default(),
        };

        let custom_taxonomy = self
            .config
            .classification
            .as_ref()
            .map(|c| c.custom_categories.clone())
            .unwrap_or_default();

        let jira_mappings = self
            .config
            .jira
            .as_ref()
            .map(|j| j.jira_project_mappings.clone())
            .unwrap_or_default();

        let jira_confidence = self
            .config
            .jira
            .as_ref()
            .and_then(|j| j.jira_project_mapping_confidence);

        // Build the engine without an injected LLM tier first, then attach
        // the LLM tier (which may require async SDK init for Bedrock) below.
        let engine_cfg_no_llm = ClassificationEngineConfig {
            use_llm: false,
            ..engine_cfg.clone()
        };
        let mut engine = ClassificationEngine::with_taxonomy_mappings_and_confidence(
            ruleset,
            engine_cfg_no_llm,
            custom_taxonomy,
            jira_mappings,
            jira_confidence,
            // Override-tier DB wiring is deferred: rusqlite::Connection is
            // not Send + Sync, so plumbing the live connection through the
            // Rayon batch would require a redesign. The override tier is
            // still constructible via `with_taxonomy_and_mappings` for
            // single-threaded callers and tests.
            None,
        )?;

        // Wire the LLM tier when requested, preferring the `llm:` section.
        if use_llm {
            let llm_classifier = if let Some(llm_cfg) = self.config.llm.as_ref() {
                // New path: `llm:` section present.
                //
                // Model resolution order:
                //  1. Explicit `llm.model` in the `llm:` section.
                //  2. Legacy `classification.llm_model` (migration compat).
                //  3. Source-aware default: bedrock → DEFAULT_BEDROCK_MODEL,
                //     anthropic-api → ANTHROPIC_DEFAULT_MODEL,
                //     openrouter → "gpt-4o-mini".
                //
                // Why: using `gpt-4o-mini` as the universal fallback causes
                // invalid-model errors for `bedrock` and `anthropic-api` sources
                // when `llm.model` is unset. Each provider requires a model id
                // from its own namespace.
                let source_default = match llm_cfg.source {
                    LlmSource::Bedrock => DEFAULT_BEDROCK_MODEL,
                    LlmSource::AnthropicApi => ANTHROPIC_DEFAULT_MODEL,
                    LlmSource::Openrouter => "gpt-4o-mini",
                };
                let model = llm_cfg
                    .model
                    .as_deref()
                    .or(self
                        .config
                        .classification
                        .as_ref()
                        .and_then(|c| c.llm_model.as_deref()))
                    .unwrap_or(source_default);
                crate::classify::tiers::llm::LlmClassifier::from_llm_config(llm_cfg, model)
                    .await
                    .map_err(|e| {
                        crate::classify::errors::ClassifyError::Config(format!(
                            "LLM provider init failed (llm: section): {e}"
                        ))
                    })?
            } else {
                // Legacy path: `classification.llm_provider` etc.
                // Emit a single deprecation warning so existing configs
                // get a clear upgrade path.
                if self
                    .config
                    .classification
                    .as_ref()
                    .map(|c| c.openrouter_api_key.is_some() || c.llm_provider != "auto")
                    .unwrap_or(false)
                {
                    warn!(
                        "classification.openrouter_api_key / classification.llm_provider \
                             are deprecated. Migrate to the top-level `llm:` section: \
                             'llm:\\n  source: openrouter\\n  api_key_env: OPENROUTER_API_KEY'"
                    );
                }
                crate::classify::tiers::llm::LlmClassifier::from_provider_async(
                    &engine_cfg.llm_provider,
                    &engine_cfg.llm_model,
                    engine_cfg.openrouter_api_key.clone(),
                )
                .await
                .map_err(|e| {
                    crate::classify::errors::ClassifyError::Config(format!(
                        "LLM provider init failed: {e}"
                    ))
                })?
            };

            // Fail-loudly guard: when LLM is enabled but no credential resolves,
            // error before writing any DB rows. The error names the specific env
            // var (when an `llm:` section is present) so the user knows exactly
            // what to export. No DB writes occur.
            if !llm_classifier.has_api_key() {
                let var_hint = self
                    .config
                    .llm
                    .as_ref()
                    .map(|l| format!(" ('{}')", l.api_key_env))
                    .unwrap_or_default();
                return Err(crate::classify::errors::ClassifyError::Config(format!(
                    "LLM tier is enabled but no API key or credentials could be resolved. \
                     Ensure the environment variable{var_hint} named by llm.api_key_env \
                     is set and non-empty (for openrouter/anthropic-api), or that valid \
                     AWS credentials are present in the credential chain (for bedrock). \
                     No database writes will occur."
                )));
            }

            engine.attach_llm(llm_classifier);
        }

        Ok(engine)
    }

    /// Build an [`ExternalSourceResolver`] from the pipeline's config, or
    /// return `None` when external sources are disabled or none are configured.
    ///
    /// Why: the resolver is an optional component — teams without JIRA/GitHub
    /// or running in offline CI should not pay any overhead for it. This
    /// method centralises the "should I build a resolver?" decision.
    /// What: returns `None` when `no_external` is `true` OR when the
    /// `sources` list is empty; otherwise constructs a fresh resolver.
    /// Test: exercised by the pipeline integration tests via `run`.
    fn build_resolver(&self) -> Option<ExternalSourceResolver> {
        let no_external = self
            .config
            .classification
            .as_ref()
            .map(|c| c.no_external)
            .unwrap_or(false);
        if no_external {
            return None;
        }
        let sources = self
            .config
            .classification
            .as_ref()
            .map(|c| c.sources.as_slice())
            .unwrap_or(&[]);
        if sources.is_empty() {
            return None;
        }
        Some(ExternalSourceResolver::new(sources))
    }

    /// Execute the pipeline against `db`.
    ///
    /// Workflow:
    /// 1. Build the [`ClassificationEngine`] from config (rules + LLM tier).
    /// 2. Optionally build an [`ExternalSourceResolver`] from `config.sources`.
    /// 3. Query all commits with `classification_id IS NULL`.
    /// 4. Classify in parallel (Rayon) using tiers 0–3.
    /// 5. Optionally invoke the async LLM tier for low-confidence verdicts.
    /// 6. Write `classifications` rows (including `complexity`) and update
    ///    each commit's `classification_id` and `confidence`.
    ///
    /// # Errors
    ///
    /// Returns an error if the DB queries, rule loading, or migrations fail.
    pub async fn run(&self, db: &mut Database) -> Result<ClassificationStats> {
        // 1. Build engine (async to support Bedrock credential init).
        let engine = self.build_engine().await?;
        // 2. Build optional external source resolver.
        let resolver = self.build_resolver();
        self.run_with_engine_and_resolver(db, engine, resolver)
            .await
    }

    /// Run the classification pipeline using a caller-supplied engine.
    ///
    /// Why: tests need to inject an engine wired to a mock LLM endpoint;
    /// [`Self::run`] builds the engine itself and delegates here.
    /// What: identical to [`Self::run`] but skips engine construction.
    /// Test: the complexity-write integration test calls this directly.
    ///
    /// # Errors
    ///
    /// Returns an error if DB queries or write-back fail.
    #[allow(dead_code)]
    pub(crate) async fn run_with_engine(
        &self,
        db: &mut Database,
        engine: ClassificationEngine,
    ) -> Result<ClassificationStats> {
        self.run_with_engine_and_resolver(db, engine, None).await
    }

    /// Run with a caller-supplied engine and optional external resolver.
    ///
    /// Why: tests need to inject both a mock LLM engine and a mock external
    /// resolver independently; this overload allows both injections at once.
    /// What: the innermost execution entry point; all other `run*` variants
    /// delegate here.
    /// Test: used by resolver integration tests.
    ///
    /// # Errors
    ///
    /// Returns an error if DB queries or write-back fail.
    pub(crate) async fn run_with_engine_and_resolver(
        &self,
        db: &mut Database,
        engine: ClassificationEngine,
        resolver: Option<ExternalSourceResolver>,
    ) -> Result<ClassificationStats> {
        // 2. Read candidate commits. The default flow returns only the
        //    rows that lack a verdict; `--force` widens this to every row
        //    (optionally bounded by `--since`/`--until`/`--repos`).
        let commits = super::pipeline_db::read_candidate_commits(
            db,
            self.force,
            self.since.as_deref(),
            self.until.as_deref(),
            &self.repos,
        )?;
        let total = commits.len();
        info!(
            total,
            force = self.force,
            since = ?self.since,
            until = ?self.until,
            repos = ?self.repos,
            "starting classification"
        );

        if commits.is_empty() {
            return Ok(ClassificationStats::default());
        }

        // 3a. Tier 0 (override) pre-pass — done serially against the live
        //     DB connection. Commits with a hit skip the parallel cascade.
        let overrides = super::pipeline_db::read_overrides(db, &commits)?;

        // 3b. Tiers 1–3 in parallel for commits without an override.
        let pairs: Vec<(&str, bool)> = commits
            .iter()
            .map(|c| (c.message.as_str(), c.is_merge))
            .collect();
        let mut results = engine.classify_batch(&pairs);

        // Apply Tier-0 manual overrides (highest precedence).
        for (idx, commit) in commits.iter().enumerate() {
            if let Some(r) = overrides.get(&commit.id) {
                results[idx] = r.clone();
            }
        }

        // Tier 0.5: external sources (JIRA / GitHub Issues).
        //
        // Why: external ticket-type signals are more authoritative than
        // commit-message heuristics but must still defer to manual overrides
        // (Tier 0). We run the resolver serially (network I/O bound) for
        // commits that do not already have a Tier-0 override verdict.
        // The resolver caches results in-memory so the same ticket is only
        // fetched once per run, keeping the HTTP budget proportional to the
        // number of *unique* referenced tickets — not the number of commits.
        if let Some(res) = &resolver {
            let pb = super::pipeline_db::make_progress(commits.len() as u64, "External sources");
            for (idx, commit) in commits.iter().enumerate() {
                // Skip commits already resolved by Tier 0 (manual override).
                if overrides.contains_key(&commit.id) {
                    pb.inc(1);
                    continue;
                }
                if let Some(signal) = res.resolve(&commit.message).await {
                    let top_level = engine.taxonomy().resolve(&signal.category);
                    results[idx] = ClassificationResult {
                        category: signal.category,
                        subcategory: None,
                        top_level,
                        confidence: signal.confidence,
                        method: ClassificationMethod::ExternalSource,
                        ticket_id:
                            crate::classify::tiers::regex_tier::RegexMatcher::extract_ticket_id(
                                &commit.message,
                            ),
                        complexity: None,
                    };
                }
                pb.inc(1);
            }
            pb.finish_and_clear();
        }

        // 4. LLM fallback (async, bounded-concurrency) for entries whose
        //    verdict confidence is at or below `llm_fallback_threshold`. The
        //    default threshold is `0.65` (1.3.0+), which routes low-confidence
        //    deterministic verdicts (fuzzy 0.40/0.60, weighted-sum below 0.65)
        //    through the LLM when `use_llm: true`.
        //
        //    Fan-out is bounded by `llm_fallback_concurrency` via
        //    `buffer_unordered`, which yields ~order-of-magnitude wall-clock
        //    savings on large corpora compared to a serial `for ... .await`.
        //    We collect (commit_idx, new_result) pairs first, then write them
        //    back, so the borrow checker doesn't see mutable refs into
        //    `results` while futures are in flight.
        if engine.config().use_llm {
            // Single startup-time diagnostic when the LLM tier is on but no
            // credential is reachable. Without this, the fallback would emit
            // a warn-per-commit ("did not improve confidence") that obscures
            // the real misconfiguration.
            if matches!(engine.llm_has_api_key(), Some(false)) {
                warn!(
                    "LLM tier enabled but no API key resolved \
                     (OPENAI_API_KEY / OPENROUTER_API_KEY unset); \
                     fallback will short-circuit silently"
                );
            }

            let fallback_threshold = self
                .config
                .classification
                .as_ref()
                .map(|c| c.llm_fallback_threshold)
                .unwrap_or(0.65);
            let concurrency = self
                .config
                .classification
                .as_ref()
                .map(|c| c.llm_fallback_concurrency.max(1))
                .unwrap_or(8);

            // Pre-collect (idx, message, is_merge, original_confidence) for
            // every commit that needs an LLM call. The original verdict is
            // kept by index in `results` and consulted again at write-back.
            let pending: Vec<(usize, String, bool, f64)> = commits
                .iter()
                .enumerate()
                .filter_map(|(idx, commit)| {
                    if results[idx].confidence <= fallback_threshold {
                        Some((
                            idx,
                            commit.message.clone(),
                            commit.is_merge,
                            results[idx].confidence,
                        ))
                    } else {
                        None
                    }
                })
                .collect();

            let pb = super::pipeline_db::make_progress(pending.len() as u64, "LLM fallback");
            let engine_ref = &engine;
            let pb_ref = &pb;
            let new_results: Vec<(usize, ClassificationResult, f64)> =
                futures::stream::iter(pending.into_iter().map(
                    |(idx, message, _is_merge, original_conf)| async move {
                        // Direct LLM dispatch — calling `engine_ref.classify`
                        // here would re-run `classify_sync` first and short-
                        // circuit on the same low-confidence tier-1-3 verdict
                        // that triggered the fallback, so the LLM tier would
                        // never be reached (issue #99).
                        let r = engine_ref
                            .llm_classify_only(&message)
                            .await
                            .unwrap_or_else(ClassificationResult::unclassified);
                        pb_ref.inc(1);
                        (idx, r, original_conf)
                    },
                ))
                .buffer_unordered(concurrency)
                .collect()
                .await;
            pb.finish_and_clear();

            // Overwrite-guard: only adopt the LLM verdict if it strictly
            // improves confidence over the original. Otherwise keep the
            // tier-1..3 verdict so a failed/empty LLM call doesn't regress
            // confidence to 0.0. Errors inside `classify` are already
            // logged at lower layers and surfaced as low-confidence
            // verdicts; we treat them uniformly via this guard.
            for (idx, r, original_conf) in new_results {
                if r.confidence > original_conf {
                    results[idx] = r;
                } else {
                    warn!(
                        commit_idx = idx,
                        original_conf,
                        new_conf = r.confidence,
                        "LLM fallback did not improve confidence; keeping original verdict"
                    );
                }
            }
        }

        // 5. Write back + coverage bookkeeping.
        let checkpoint_every = self
            .config
            .classification
            .as_ref()
            .map(|c| c.checkpoint_every)
            .unwrap_or(0);
        let mut stats =
            super::pipeline_db::write_results(db, &commits, &results, checkpoint_every)?;
        super::pipeline_db::compute_coverage(&mut stats);
        super::pipeline_db::persist_repository_status(db, &stats)?;
        super::pipeline_db::report_coverage(&stats, self.min_coverage_pct());
        info!(
            total = stats.total_commits,
            classified = stats.classified,
            coverage_pct = stats.coverage_pct,
            "classification complete"
        );
        Ok(stats)
    }

    /// Effective minimum-coverage warning threshold for this pipeline.
    fn min_coverage_pct(&self) -> f64 {
        self.config
            .classification
            .as_ref()
            .map(|c| c.min_coverage_pct)
            .unwrap_or(DEFAULT_MIN_COVERAGE_PCT)
    }

    /// Backfill missing `complexity` scores for already-classified commits.
    ///
    /// Why: rows classified before this feature (or by non-LLM tiers) have
    /// `complexity IS NULL`. This fills them in without disturbing the
    /// existing category/confidence/method verdict, so a corpus can gain
    /// complexity scores incrementally.
    /// What: selects `classifications` rows where `complexity IS NULL` and
    /// `method != 'exact_rule'`, asks the LLM for a complexity score per
    /// commit, and writes only the `complexity` column back.
    /// Test: see `tests/` — pre-seed a NULL row and a scored row, run this,
    /// assert the NULL row is filled and the scored row is unchanged.
    ///
    /// # Errors
    ///
    /// Returns an error if engine construction or DB access fails.
    pub async fn backfill_complexity(&self, db: &mut Database) -> Result<usize> {
        let engine = self.build_engine().await?;
        Self::backfill_complexity_with_engine(db, &engine).await
    }

    /// Backfill complexity using a caller-supplied engine.
    ///
    /// Why: tests inject an engine wired to a mock LLM endpoint;
    /// [`Self::backfill_complexity`] builds the engine and delegates here.
    /// What: the engine-agnostic core of the backfill.
    /// Test: the backfill integration test calls this directly.
    ///
    /// # Errors
    ///
    /// Returns an error if DB access fails.
    pub(crate) async fn backfill_complexity_with_engine(
        db: &mut Database,
        engine: &ClassificationEngine,
    ) -> Result<usize> {
        // Collect candidate rows. Rows produced by
        // the `exact_rule` tier are excluded — they were never LLM-eligible.
        let candidates = super::pipeline_db::read_complexity_backfill_candidates(db)?;
        let total = candidates.len();
        info!(total, "starting complexity backfill");
        if candidates.is_empty() {
            return Ok(0);
        }

        let pb = super::pipeline_db::make_progress(total as u64, "Complexity backfill");
        let mut updated = 0_usize;
        {
            let conn = db.connection_mut();
            let tx = conn.transaction().map_err(crate::core::TgaError::from)?;
            {
                let mut update_stmt = tx
                    .prepare("UPDATE classifications SET complexity = ?1 WHERE id = ?2")
                    .map_err(crate::core::TgaError::from)?;
                for cand in &candidates {
                    let verdict = engine.llm_classify_only(&cand.message).await;
                    let complexity = verdict.and_then(|r| r.complexity);
                    match complexity {
                        Some(score) => {
                            update_stmt
                                .execute(params![score as i64, cand.classification_id])
                                .map_err(crate::core::TgaError::from)?;
                            updated += 1;
                            info!(
                                commit_sha = %cand.commit_sha,
                                score,
                                "backfilled complexity"
                            );
                        }
                        None => {
                            warn!(
                                commit_sha = %cand.commit_sha,
                                "LLM returned no complexity score; leaving NULL"
                            );
                        }
                    }
                    pb.inc(1);
                }
            }
            tx.commit().map_err(crate::core::TgaError::from)?;
        }
        pb.finish_and_clear();
        info!(updated, total, "complexity backfill complete");
        Ok(updated)
    }
}