use std::collections::HashMap;
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use crate::classify::taxonomy::SubcategoryDef;
use crate::core::errors::{Result, TgaError};
pub mod aliases;
pub mod azdo;
pub mod validator;
pub use aliases::{AliasFile, DeveloperAliasEntry};
pub use azdo::AzureDevOpsConfig;
pub use validator::{ConfigError, ConfigValidator};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Config {
#[serde(default)]
pub repositories: Vec<RepositoryConfig>,
#[serde(default)]
pub team: Option<TeamConfig>,
#[serde(default)]
pub output: Option<OutputConfig>,
#[serde(default)]
pub classification: Option<ClassificationConfig>,
#[serde(default)]
pub github: Option<GithubConfig>,
#[serde(default)]
pub bitbucket: Option<BitbucketConfig>,
#[serde(default)]
pub jira: Option<JiraConfig>,
#[serde(default)]
pub linear: Option<LinearConfig>,
#[serde(default)]
pub pm: Option<PmConfig>,
#[serde(default)]
pub dora: Option<DoraConfig>,
#[serde(default)]
pub reachability: ReachabilityConfig,
#[serde(default)]
pub version: Option<String>,
#[serde(default)]
pub profile: Option<String>,
#[serde(default)]
pub developer_aliases: HashMap<String, Vec<String>>,
#[serde(default)]
pub aliases_file: Option<String>,
#[serde(default)]
pub analysis: Option<AnalysisConfig>,
#[serde(default)]
pub cache: Option<CacheConfig>,
#[serde(skip)]
pub source_path: Option<PathBuf>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AnalysisConfig {
#[serde(default)]
pub ml_categorization: Option<MlCategorizationConfig>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct MlCategorizationConfig {
#[serde(default)]
pub enabled: bool,
#[serde(default)]
pub model: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct CacheConfig {
#[serde(default)]
pub directory: Option<PathBuf>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct RepositoryConfig {
pub path: PathBuf,
#[serde(default)]
pub name: Option<String>,
#[serde(default)]
pub branch: Option<String>,
#[serde(default)]
pub since_date: Option<String>,
#[serde(default)]
pub until_date: Option<String>,
#[serde(default, alias = "owner")]
pub org: Option<String>,
#[serde(default)]
pub head_only: bool,
#[serde(default)]
pub fetch_timeout_secs: Option<u64>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TeamConfig {
#[serde(default)]
pub members: Vec<TeamMember>,
#[serde(default)]
pub aliases: HashMap<String, String>,
#[serde(default)]
pub canonical_domain: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TeamMember {
pub name: String,
pub email: String,
#[serde(default)]
pub aliases: Vec<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct OutputConfig {
#[serde(default)]
pub format: Option<String>,
#[serde(default, alias = "output_path")]
pub directory: Option<PathBuf>,
#[serde(default)]
pub formats: Vec<String>,
#[serde(default)]
pub include_unclassified: bool,
#[serde(default)]
pub include_merges: bool,
#[serde(default)]
pub include_files: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct ClassificationConfig {
#[serde(default)]
pub rules_file: Option<PathBuf>,
#[serde(default)]
pub use_llm: bool,
#[serde(default)]
pub llm_model: Option<String>,
#[serde(default = "default_llm_provider")]
pub llm_provider: String,
#[serde(default)]
pub openrouter_api_key: Option<String>,
#[serde(default = "default_confidence_threshold")]
pub confidence_threshold: f64,
#[serde(default)]
pub custom_categories: Vec<SubcategoryDef>,
#[serde(default = "default_min_coverage_pct")]
pub min_coverage_pct: f64,
#[serde(default = "default_llm_fallback_threshold")]
pub llm_fallback_threshold: f64,
#[serde(default)]
pub weighted_sum: crate::classify::tiers::weighted_sum::WeightedSumConfig,
#[serde(default = "default_llm_fallback_concurrency")]
pub llm_fallback_concurrency: usize,
#[serde(default)]
pub no_external: bool,
#[serde(default)]
pub checkpoint_every: usize,
#[serde(default)]
pub sources: Vec<crate::classify::sources::SourceConfig>,
}
fn default_confidence_threshold() -> f64 {
0.7
}
fn default_min_coverage_pct() -> f64 {
20.0
}
fn default_llm_provider() -> String {
"auto".to_string()
}
fn default_llm_fallback_concurrency() -> usize {
8
}
fn default_llm_fallback_threshold() -> f64 {
0.65
}
impl Default for ClassificationConfig {
fn default() -> Self {
Self {
rules_file: None,
use_llm: false,
llm_model: None,
llm_provider: default_llm_provider(),
openrouter_api_key: None,
confidence_threshold: default_confidence_threshold(),
custom_categories: Vec::new(),
min_coverage_pct: default_min_coverage_pct(),
llm_fallback_threshold: default_llm_fallback_threshold(),
llm_fallback_concurrency: default_llm_fallback_concurrency(),
no_external: false,
sources: Vec::new(),
weighted_sum: crate::classify::tiers::weighted_sum::WeightedSumConfig::default(),
checkpoint_every: 0,
}
}
}
fn default_true() -> bool {
true
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReachabilityConfig {
#[serde(default = "default_true")]
pub track_tags: bool,
#[serde(default = "default_true")]
pub track_release_branches: bool,
#[serde(default = "default_release_branch_patterns")]
pub release_branch_patterns: Vec<String>,
}
fn default_release_branch_patterns() -> Vec<String> {
vec![
"release/*".to_string(),
"hotfix/*".to_string(),
"chore/release-*".to_string(),
"v*".to_string(),
]
}
impl Default for ReachabilityConfig {
fn default() -> Self {
Self {
track_tags: true,
track_release_branches: true,
release_branch_patterns: default_release_branch_patterns(),
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct LinearConfig {
#[serde(default)]
pub api_key: Option<String>,
#[serde(default)]
pub team_keys: Vec<String>,
#[serde(default = "default_true")]
pub fetch_on_reference: bool,
#[serde(default)]
pub ticket_regex: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct PmConfig {
#[serde(default)]
pub azure_devops: Option<AzureDevOpsConfig>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DoraConfig {
#[serde(default = "default_deployment_source")]
pub deployment_source: String,
#[serde(default = "default_deployment_tag_pattern")]
pub deployment_tag_pattern: String,
#[serde(default = "default_production_branch")]
pub production_branch: String,
#[serde(default)]
pub deployment_workflow: Option<String>,
#[serde(default)]
pub failure_signals: Vec<FailureSignal>,
#[serde(default)]
pub datadog_dir: Option<PathBuf>,
}
fn default_deployment_source() -> String {
"git_tags".to_string()
}
fn default_deployment_tag_pattern() -> String {
r"^v?[0-9]+\.[0-9]+\.[0-9]+(-[A-Za-z0-9.\-]+)?$".to_string()
}
fn default_production_branch() -> String {
"main".to_string()
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct FailureSignal {
#[serde(default)]
pub work_type: Option<String>,
#[serde(default)]
pub on_branch: Option<String>,
#[serde(default)]
pub commit_message_pattern: Option<String>,
#[serde(default = "default_failure_window_hours")]
pub within_hours: u32,
}
fn default_failure_window_hours() -> u32 {
48
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GithubConfig {
#[serde(default)]
pub token: Option<String>,
#[serde(default)]
pub org: Option<String>,
#[serde(default)]
pub repo: Option<String>,
#[serde(default)]
pub fetch_prs: bool,
#[serde(default)]
pub ticket_regex: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct BitbucketConfig {
#[serde(default)]
pub username: Option<String>,
#[serde(default)]
pub app_password: Option<String>,
#[serde(default)]
pub token: Option<String>,
#[serde(default)]
pub workspace: Option<String>,
#[serde(default)]
pub repo_slug: Option<String>,
#[serde(default)]
pub fetch_prs: bool,
#[serde(default)]
pub api_base_url: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct JiraConfig {
#[serde(default)]
pub url: Option<String>,
#[serde(default)]
pub username: Option<String>,
#[serde(default)]
pub token: Option<String>,
#[serde(default)]
pub project_key: Option<String>,
#[serde(default, alias = "jira_project_mapping")]
pub jira_project_mappings: HashMap<String, String>,
#[serde(default)]
pub jira_project_mapping_confidence: Option<f64>,
#[serde(default)]
pub ticket_regex: Option<String>,
}
pub fn expand_path(path: &Path) -> PathBuf {
let s = match path.to_str() {
Some(s) => s,
None => return path.to_path_buf(),
};
if let Some(rest) = s.strip_prefix("~/") {
if let Some(home) = std::env::var_os("HOME") {
return PathBuf::from(home).join(rest);
}
} else if s == "~" {
if let Some(home) = std::env::var_os("HOME") {
return PathBuf::from(home);
}
}
path.to_path_buf()
}
impl Config {
pub fn load(path: &Path) -> Result<Config> {
let resolved = expand_path(path);
tracing::debug!(path = %resolved.display(), "loading config");
let text = std::fs::read_to_string(&resolved)?;
let mut cfg: Config = serde_yaml::from_str(&text)?;
cfg.source_path = Some(resolved);
cfg.validate_ticket_regexes()?;
Ok(cfg)
}
fn validate_ticket_regexes(&self) -> Result<()> {
fn check(section: &str, pat: &Option<String>) -> Result<()> {
if let Some(p) = pat {
regex::Regex::new(p).map_err(|e| {
TgaError::ConfigError(format!(
"{section}.ticket_regex is not a valid regular expression: {e}"
))
})?;
}
Ok(())
}
if let Some(jira) = &self.jira {
check("jira", &jira.ticket_regex)?;
}
if let Some(gh) = &self.github {
check("github", &gh.ticket_regex)?;
}
if let Some(linear) = &self.linear {
check("linear", &linear.ticket_regex)?;
}
if let Some(adc) = self.azure_devops_config() {
check("pm.azure_devops", &Some(adc.ticket_regex.clone()))?;
}
if let Some(dora) = &self.dora {
check(
"dora.deployment_tag_pattern",
&Some(dora.deployment_tag_pattern.clone()),
)?;
for (i, sig) in dora.failure_signals.iter().enumerate() {
let label = format!("dora.failure_signals[{i}].commit_message_pattern");
check(&label, &sig.commit_message_pattern)?;
}
}
Ok(())
}
pub fn config_dir(&self) -> Option<&Path> {
self.source_path.as_deref().and_then(|p| p.parent())
}
pub fn resolved_aliases(&self) -> HashMap<String, Vec<String>> {
match self.resolved_alias_map(self.config_dir()) {
Ok(map) if !map.is_empty() => map,
_ => {
if let Some(team) = &self.team {
team.members
.iter()
.map(|m| (m.name.clone(), m.aliases.clone()))
.collect()
} else {
HashMap::new()
}
}
}
}
pub fn resolved_alias_map(
&self,
config_dir: Option<&Path>,
) -> Result<HashMap<String, Vec<String>>> {
let mut merged = self.developer_aliases.clone();
if let Some(rel) = &self.aliases_file {
let expanded = expand_path(Path::new(rel));
let resolved = if expanded.is_absolute() {
expanded
} else if let Some(dir) = config_dir {
dir.join(expanded)
} else {
expanded
};
let external = AliasFile::load(&resolved).map_err(|e| {
TgaError::ConfigError(format!(
"failed to load aliases_file {}: {e}",
resolved.display()
))
})?;
for (name, list) in external.to_alias_map() {
merged.insert(name, list);
}
}
Ok(merged)
}
pub fn azure_devops_config(&self) -> Option<&AzureDevOpsConfig> {
self.pm.as_ref().and_then(|p| p.azure_devops.as_ref())
}
pub fn validate(&self) -> Result<()> {
if self.repositories.is_empty() {
return Err(TgaError::ValidationError(
"at least one repository must be configured".into(),
));
}
for r in &self.repositories {
if r.path.as_os_str().is_empty() {
return Err(TgaError::ValidationError(
"repository.path must not be empty".into(),
));
}
}
if let Some(adzo_config) = self.azure_devops_config() {
adzo_config.validate()?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn azdo_cfg_with_regex(pat: &str) -> AzureDevOpsConfig {
AzureDevOpsConfig {
organization_url: "https://dev.azure.com/myorg".into(),
pat: "secret".into(),
project: Some("MyProject".into()),
projects: vec![],
ticket_regex: pat.into(),
team_keys: vec![],
fetch_on_reference: true,
fetch_prs: false,
}
}
fn cfg_with_ado_regex(pat: &str) -> Config {
Config {
pm: Some(PmConfig {
azure_devops: Some(azdo_cfg_with_regex(pat)),
}),
..Config::default()
}
}
#[test]
fn validate_ticket_regexes_accepts_valid_ado_pattern() {
cfg_with_ado_regex(r"#(\d{4,8})\b")
.validate_ticket_regexes()
.expect("valid ADO regex accepted");
}
#[test]
fn validate_ticket_regexes_rejects_bad_ado_pattern() {
let err = cfg_with_ado_regex("[unclosed")
.validate_ticket_regexes()
.expect_err("malformed ADO ticket_regex must be rejected");
let msg = format!("{err}");
assert!(
msg.contains("pm.azure_devops"),
"error should name the section: {msg}"
);
assert!(
msg.contains("ticket_regex"),
"error should name the field: {msg}"
);
}
#[test]
fn classification_config_unknown_field_is_rejected() {
let yaml = "rules_path: ./my-rules.yaml\nuse_llm: false\n";
let result: std::result::Result<ClassificationConfig, serde_yaml::Error> =
serde_yaml::from_str(yaml);
assert!(
result.is_err(),
"ClassificationConfig with unknown `rules_path:` must be rejected"
);
}
}