tga_core/config/mod.rs
1//! Configuration types deserialized from YAML.
2//!
3//! The full configuration schema is documented in
4//! `docs/requirements/configuration.md`. This module implements the practical
5//! subset needed by the pipeline; unknown YAML keys are ignored (forward
6//! compatible) so newer config files can be loaded by older binaries without
7//! a hard failure.
8//!
9//! Paths support tilde-expansion (`~`, `~/foo`) via [`expand_path`].
10//!
11//! # Example
12//!
13//! ```no_run
14//! use std::path::Path;
15//! use tga_core::config::Config;
16//!
17//! let cfg = Config::load(Path::new("config.yaml")).expect("load");
18//! println!("repos: {}", cfg.repositories.len());
19//! ```
20
21use std::collections::HashMap;
22use std::path::{Path, PathBuf};
23
24use serde::{Deserialize, Serialize};
25
26use crate::errors::{Result, TgaError};
27
28/// Top-level configuration root.
29///
30/// Mirrors the YAML schema from the Python predecessor. All top-level
31/// sections are optional except `repositories`, which must contain at
32/// least one entry to be useful.
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct Config {
35 /// Repositories to analyze.
36 #[serde(default)]
37 pub repositories: Vec<RepositoryConfig>,
38
39 /// Team / member roster and aliases.
40 #[serde(default)]
41 pub team: Option<TeamConfig>,
42
43 /// Output destination and format flags.
44 #[serde(default)]
45 pub output: Option<OutputConfig>,
46
47 /// Classification cascade settings.
48 #[serde(default)]
49 pub classification: Option<ClassificationConfig>,
50
51 /// GitHub API credentials and scope.
52 #[serde(default)]
53 pub github: Option<GithubConfig>,
54
55 /// JIRA API credentials and scope.
56 #[serde(default)]
57 pub jira: Option<JiraConfig>,
58
59 /// Schema version string (e.g. `"1.0"`).
60 ///
61 /// Stored for forward compatibility with the Python predecessor's YAML
62 /// format. Not enforced by the Rust loader — present so files written
63 /// for the Python tool deserialize cleanly.
64 #[serde(default)]
65 pub version: Option<String>,
66
67 /// Named profile (e.g. `"balanced"`).
68 ///
69 /// Stored for forward compatibility with the Python predecessor. Not
70 /// currently consumed by the Rust pipeline.
71 #[serde(default)]
72 pub profile: Option<String>,
73
74 /// Python-compatible flat alias map: canonical name → list of email
75 /// addresses or login aliases.
76 ///
77 /// When non-empty, takes precedence over [`TeamConfig::members`] for
78 /// identity resolution (see [`Config::resolved_aliases`]).
79 #[serde(default)]
80 pub developer_aliases: HashMap<String, Vec<String>>,
81
82 /// Analysis settings (ML categorization, etc.).
83 ///
84 /// Parsed for forward compatibility; individual sub-features gate their
85 /// own behavior on its presence.
86 #[serde(default)]
87 pub analysis: Option<AnalysisConfig>,
88
89 /// Cache directory and related settings.
90 #[serde(default)]
91 pub cache: Option<CacheConfig>,
92}
93
94/// Analysis pipeline configuration (forward-compat with Python schema).
95#[derive(Debug, Clone, Default, Serialize, Deserialize)]
96pub struct AnalysisConfig {
97 /// ML-based commit categorization settings.
98 #[serde(default)]
99 pub ml_categorization: Option<MlCategorizationConfig>,
100}
101
102/// ML categorization toggle and model selection.
103#[derive(Debug, Clone, Default, Serialize, Deserialize)]
104pub struct MlCategorizationConfig {
105 /// Whether ML categorization is enabled.
106 #[serde(default)]
107 pub enabled: bool,
108
109 /// Optional model identifier.
110 #[serde(default)]
111 pub model: Option<String>,
112}
113
114/// Cache layer configuration.
115#[derive(Debug, Clone, Default, Serialize, Deserialize)]
116pub struct CacheConfig {
117 /// Filesystem directory used for cached artifacts. Supports `~` expansion.
118 #[serde(default)]
119 pub directory: Option<PathBuf>,
120}
121
122/// A single repository to collect commits from.
123#[derive(Debug, Clone, Default, Serialize, Deserialize)]
124pub struct RepositoryConfig {
125 /// Local filesystem path to the repository (supports `~` expansion).
126 pub path: PathBuf,
127
128 /// Display name used in reports. Falls back to the directory basename.
129 #[serde(default)]
130 pub name: Option<String>,
131
132 /// Branch override; if `None`, the default branch is auto-detected.
133 #[serde(default)]
134 pub branch: Option<String>,
135
136 /// Inclusive start date for commit collection (ISO 8601).
137 #[serde(default)]
138 pub since_date: Option<String>,
139
140 /// Inclusive end date for commit collection (ISO 8601).
141 #[serde(default)]
142 pub until_date: Option<String>,
143}
144
145/// Team roster and identity aliases.
146#[derive(Debug, Clone, Default, Serialize, Deserialize)]
147pub struct TeamConfig {
148 /// Canonical team members.
149 #[serde(default)]
150 pub members: Vec<TeamMember>,
151
152 /// Free-form aliases map: alias → canonical name.
153 #[serde(default)]
154 pub aliases: HashMap<String, String>,
155}
156
157/// A canonical team member with optional alias list.
158#[derive(Debug, Clone, Default, Serialize, Deserialize)]
159pub struct TeamMember {
160 /// Canonical display name.
161 pub name: String,
162
163 /// Primary email address (canonical).
164 pub email: String,
165
166 /// Alternative names/emails that map to this member.
167 #[serde(default)]
168 pub aliases: Vec<String>,
169}
170
171/// Output / reporting configuration.
172#[derive(Debug, Clone, Default, Serialize, Deserialize)]
173pub struct OutputConfig {
174 /// Single output format identifier (`csv`, `json`, `markdown`).
175 ///
176 /// Retained for backward compatibility; prefer [`OutputConfig::formats`].
177 #[serde(default)]
178 pub format: Option<String>,
179
180 /// Destination directory for reports.
181 ///
182 /// Accepts both `directory` (Python-compat) and `output_path` (legacy
183 /// Rust) keys in the YAML.
184 #[serde(default, alias = "output_path")]
185 pub directory: Option<PathBuf>,
186
187 /// Output format list (e.g. `["csv", "markdown"]`).
188 #[serde(default)]
189 pub formats: Vec<String>,
190
191 /// Include unclassified commits in output.
192 #[serde(default)]
193 pub include_unclassified: bool,
194
195 /// Include merge commits in output.
196 #[serde(default)]
197 pub include_merges: bool,
198
199 /// Include file-level details in output.
200 #[serde(default)]
201 pub include_files: bool,
202}
203
204/// Classification cascade configuration.
205#[derive(Debug, Clone, Default, Serialize, Deserialize)]
206pub struct ClassificationConfig {
207 /// Path to user-supplied rules YAML/JSON.
208 #[serde(default)]
209 pub rules_file: Option<PathBuf>,
210
211 /// Whether to engage the LLM fallback tier.
212 #[serde(default)]
213 pub use_llm: bool,
214
215 /// LLM model identifier (provider-specific).
216 #[serde(default)]
217 pub llm_model: Option<String>,
218
219 /// Minimum confidence required to accept a classification.
220 #[serde(default = "default_confidence_threshold")]
221 pub confidence_threshold: f64,
222}
223
224fn default_confidence_threshold() -> f64 {
225 0.7
226}
227
228/// GitHub API integration settings.
229#[derive(Debug, Clone, Default, Serialize, Deserialize)]
230pub struct GithubConfig {
231 /// Personal access token (often sourced from `GITHUB_TOKEN`).
232 #[serde(default)]
233 pub token: Option<String>,
234
235 /// Organization slug for org-wide queries.
236 #[serde(default)]
237 pub org: Option<String>,
238
239 /// Single-repository slug (`owner/name`).
240 #[serde(default)]
241 pub repo: Option<String>,
242
243 /// Whether to fetch pull request metadata.
244 #[serde(default)]
245 pub fetch_prs: bool,
246}
247
248/// JIRA Cloud / Server integration settings.
249#[derive(Debug, Clone, Default, Serialize, Deserialize)]
250pub struct JiraConfig {
251 /// Base URL of the JIRA instance.
252 #[serde(default)]
253 pub url: Option<String>,
254
255 /// API username (typically an email address for Cloud).
256 #[serde(default)]
257 pub username: Option<String>,
258
259 /// API token.
260 #[serde(default)]
261 pub token: Option<String>,
262
263 /// Project key for filtering issues (e.g. `API`).
264 #[serde(default)]
265 pub project_key: Option<String>,
266}
267
268/// Expand a leading `~` in a path to the current user's home directory.
269///
270/// Returns the path unchanged if it does not start with `~`. If `~` is
271/// present but the home directory cannot be determined, the path is also
272/// returned unchanged.
273pub fn expand_path(path: &Path) -> PathBuf {
274 let s = match path.to_str() {
275 Some(s) => s,
276 None => return path.to_path_buf(),
277 };
278 if let Some(rest) = s.strip_prefix("~/") {
279 if let Some(home) = std::env::var_os("HOME") {
280 return PathBuf::from(home).join(rest);
281 }
282 } else if s == "~" {
283 if let Some(home) = std::env::var_os("HOME") {
284 return PathBuf::from(home);
285 }
286 }
287 path.to_path_buf()
288}
289
290impl Config {
291 /// Load a YAML configuration from disk.
292 ///
293 /// # Errors
294 ///
295 /// - [`TgaError::IoError`] if the file cannot be read.
296 /// - [`TgaError::SerdeYamlError`] if YAML parsing fails.
297 pub fn load(path: &Path) -> Result<Config> {
298 let resolved = expand_path(path);
299 tracing::debug!(path = %resolved.display(), "loading config");
300 let text = std::fs::read_to_string(&resolved)?;
301 let cfg: Config = serde_yaml::from_str(&text)?;
302 Ok(cfg)
303 }
304
305 /// Resolve identity aliases from either the Python-compatible
306 /// [`Config::developer_aliases`] map or from [`TeamConfig::members`].
307 ///
308 /// `developer_aliases` (when non-empty) takes precedence. The returned
309 /// map is keyed by canonical name; values are the list of email
310 /// addresses or login aliases that should resolve to that name.
311 pub fn resolved_aliases(&self) -> HashMap<String, Vec<String>> {
312 if !self.developer_aliases.is_empty() {
313 self.developer_aliases.clone()
314 } else if let Some(team) = &self.team {
315 team.members
316 .iter()
317 .map(|m| (m.name.clone(), m.aliases.clone()))
318 .collect()
319 } else {
320 HashMap::new()
321 }
322 }
323
324 /// Validate cross-field invariants of the config.
325 ///
326 /// # Errors
327 ///
328 /// Returns [`TgaError::ValidationError`] if any invariant is violated.
329 pub fn validate(&self) -> Result<()> {
330 if self.repositories.is_empty() {
331 return Err(TgaError::ValidationError(
332 "at least one repository must be configured".into(),
333 ));
334 }
335 for r in &self.repositories {
336 if r.path.as_os_str().is_empty() {
337 return Err(TgaError::ValidationError(
338 "repository.path must not be empty".into(),
339 ));
340 }
341 }
342 Ok(())
343 }
344}