Skip to main content

tga_core/config/
mod.rs

1//! Configuration types deserialized from YAML.
2//!
3//! The full configuration schema is documented in
4//! `docs/requirements/configuration.md`. This module implements the practical
5//! subset needed by the pipeline; unknown YAML keys are ignored (forward
6//! compatible) so newer config files can be loaded by older binaries without
7//! a hard failure.
8//!
9//! Paths support tilde-expansion (`~`, `~/foo`) via [`expand_path`].
10//!
11//! # Example
12//!
13//! ```no_run
14//! use std::path::Path;
15//! use tga_core::config::Config;
16//!
17//! let cfg = Config::load(Path::new("config.yaml")).expect("load");
18//! println!("repos: {}", cfg.repositories.len());
19//! ```
20
21use std::collections::HashMap;
22use std::path::{Path, PathBuf};
23
24use serde::{Deserialize, Serialize};
25
26use crate::errors::{Result, TgaError};
27
28/// Top-level configuration root.
29///
30/// Mirrors the YAML schema from the Python predecessor. All top-level
31/// sections are optional except `repositories`, which must contain at
32/// least one entry to be useful.
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct Config {
35    /// Repositories to analyze.
36    #[serde(default)]
37    pub repositories: Vec<RepositoryConfig>,
38
39    /// Team / member roster and aliases.
40    #[serde(default)]
41    pub team: Option<TeamConfig>,
42
43    /// Output destination and format flags.
44    #[serde(default)]
45    pub output: Option<OutputConfig>,
46
47    /// Classification cascade settings.
48    #[serde(default)]
49    pub classification: Option<ClassificationConfig>,
50
51    /// GitHub API credentials and scope.
52    #[serde(default)]
53    pub github: Option<GithubConfig>,
54
55    /// JIRA API credentials and scope.
56    #[serde(default)]
57    pub jira: Option<JiraConfig>,
58
59    /// Schema version string (e.g. `"1.0"`).
60    ///
61    /// Stored for forward compatibility with the Python predecessor's YAML
62    /// format. Not enforced by the Rust loader — present so files written
63    /// for the Python tool deserialize cleanly.
64    #[serde(default)]
65    pub version: Option<String>,
66
67    /// Named profile (e.g. `"balanced"`).
68    ///
69    /// Stored for forward compatibility with the Python predecessor. Not
70    /// currently consumed by the Rust pipeline.
71    #[serde(default)]
72    pub profile: Option<String>,
73
74    /// Python-compatible flat alias map: canonical name → list of email
75    /// addresses or login aliases.
76    ///
77    /// When non-empty, takes precedence over [`TeamConfig::members`] for
78    /// identity resolution (see [`Config::resolved_aliases`]).
79    #[serde(default)]
80    pub developer_aliases: HashMap<String, Vec<String>>,
81
82    /// Analysis settings (ML categorization, etc.).
83    ///
84    /// Parsed for forward compatibility; individual sub-features gate their
85    /// own behavior on its presence.
86    #[serde(default)]
87    pub analysis: Option<AnalysisConfig>,
88
89    /// Cache directory and related settings.
90    #[serde(default)]
91    pub cache: Option<CacheConfig>,
92}
93
94/// Analysis pipeline configuration (forward-compat with Python schema).
95#[derive(Debug, Clone, Default, Serialize, Deserialize)]
96pub struct AnalysisConfig {
97    /// ML-based commit categorization settings.
98    #[serde(default)]
99    pub ml_categorization: Option<MlCategorizationConfig>,
100}
101
102/// ML categorization toggle and model selection.
103#[derive(Debug, Clone, Default, Serialize, Deserialize)]
104pub struct MlCategorizationConfig {
105    /// Whether ML categorization is enabled.
106    #[serde(default)]
107    pub enabled: bool,
108
109    /// Optional model identifier.
110    #[serde(default)]
111    pub model: Option<String>,
112}
113
114/// Cache layer configuration.
115#[derive(Debug, Clone, Default, Serialize, Deserialize)]
116pub struct CacheConfig {
117    /// Filesystem directory used for cached artifacts. Supports `~` expansion.
118    #[serde(default)]
119    pub directory: Option<PathBuf>,
120}
121
122/// A single repository to collect commits from.
123#[derive(Debug, Clone, Default, Serialize, Deserialize)]
124pub struct RepositoryConfig {
125    /// Local filesystem path to the repository (supports `~` expansion).
126    pub path: PathBuf,
127
128    /// Display name used in reports. Falls back to the directory basename.
129    #[serde(default)]
130    pub name: Option<String>,
131
132    /// Branch override; if `None`, the default branch is auto-detected.
133    #[serde(default)]
134    pub branch: Option<String>,
135
136    /// Inclusive start date for commit collection (ISO 8601).
137    #[serde(default)]
138    pub since_date: Option<String>,
139
140    /// Inclusive end date for commit collection (ISO 8601).
141    #[serde(default)]
142    pub until_date: Option<String>,
143}
144
145/// Team roster and identity aliases.
146#[derive(Debug, Clone, Default, Serialize, Deserialize)]
147pub struct TeamConfig {
148    /// Canonical team members.
149    #[serde(default)]
150    pub members: Vec<TeamMember>,
151
152    /// Free-form aliases map: alias → canonical name.
153    #[serde(default)]
154    pub aliases: HashMap<String, String>,
155}
156
157/// A canonical team member with optional alias list.
158#[derive(Debug, Clone, Default, Serialize, Deserialize)]
159pub struct TeamMember {
160    /// Canonical display name.
161    pub name: String,
162
163    /// Primary email address (canonical).
164    pub email: String,
165
166    /// Alternative names/emails that map to this member.
167    #[serde(default)]
168    pub aliases: Vec<String>,
169}
170
171/// Output / reporting configuration.
172#[derive(Debug, Clone, Default, Serialize, Deserialize)]
173pub struct OutputConfig {
174    /// Single output format identifier (`csv`, `json`, `markdown`).
175    ///
176    /// Retained for backward compatibility; prefer [`OutputConfig::formats`].
177    #[serde(default)]
178    pub format: Option<String>,
179
180    /// Destination directory for reports.
181    ///
182    /// Accepts both `directory` (Python-compat) and `output_path` (legacy
183    /// Rust) keys in the YAML.
184    #[serde(default, alias = "output_path")]
185    pub directory: Option<PathBuf>,
186
187    /// Output format list (e.g. `["csv", "markdown"]`).
188    #[serde(default)]
189    pub formats: Vec<String>,
190
191    /// Include unclassified commits in output.
192    #[serde(default)]
193    pub include_unclassified: bool,
194
195    /// Include merge commits in output.
196    #[serde(default)]
197    pub include_merges: bool,
198
199    /// Include file-level details in output.
200    #[serde(default)]
201    pub include_files: bool,
202}
203
204/// Classification cascade configuration.
205#[derive(Debug, Clone, Default, Serialize, Deserialize)]
206pub struct ClassificationConfig {
207    /// Path to user-supplied rules YAML/JSON.
208    #[serde(default)]
209    pub rules_file: Option<PathBuf>,
210
211    /// Whether to engage the LLM fallback tier.
212    #[serde(default)]
213    pub use_llm: bool,
214
215    /// LLM model identifier (provider-specific).
216    #[serde(default)]
217    pub llm_model: Option<String>,
218
219    /// Minimum confidence required to accept a classification.
220    #[serde(default = "default_confidence_threshold")]
221    pub confidence_threshold: f64,
222}
223
224fn default_confidence_threshold() -> f64 {
225    0.7
226}
227
228/// GitHub API integration settings.
229#[derive(Debug, Clone, Default, Serialize, Deserialize)]
230pub struct GithubConfig {
231    /// Personal access token (often sourced from `GITHUB_TOKEN`).
232    #[serde(default)]
233    pub token: Option<String>,
234
235    /// Organization slug for org-wide queries.
236    #[serde(default)]
237    pub org: Option<String>,
238
239    /// Single-repository slug (`owner/name`).
240    #[serde(default)]
241    pub repo: Option<String>,
242
243    /// Whether to fetch pull request metadata.
244    #[serde(default)]
245    pub fetch_prs: bool,
246}
247
248/// JIRA Cloud / Server integration settings.
249#[derive(Debug, Clone, Default, Serialize, Deserialize)]
250pub struct JiraConfig {
251    /// Base URL of the JIRA instance.
252    #[serde(default)]
253    pub url: Option<String>,
254
255    /// API username (typically an email address for Cloud).
256    #[serde(default)]
257    pub username: Option<String>,
258
259    /// API token.
260    #[serde(default)]
261    pub token: Option<String>,
262
263    /// Project key for filtering issues (e.g. `API`).
264    #[serde(default)]
265    pub project_key: Option<String>,
266}
267
268/// Expand a leading `~` in a path to the current user's home directory.
269///
270/// Returns the path unchanged if it does not start with `~`. If `~` is
271/// present but the home directory cannot be determined, the path is also
272/// returned unchanged.
273pub fn expand_path(path: &Path) -> PathBuf {
274    let s = match path.to_str() {
275        Some(s) => s,
276        None => return path.to_path_buf(),
277    };
278    if let Some(rest) = s.strip_prefix("~/") {
279        if let Some(home) = std::env::var_os("HOME") {
280            return PathBuf::from(home).join(rest);
281        }
282    } else if s == "~" {
283        if let Some(home) = std::env::var_os("HOME") {
284            return PathBuf::from(home);
285        }
286    }
287    path.to_path_buf()
288}
289
290impl Config {
291    /// Load a YAML configuration from disk.
292    ///
293    /// # Errors
294    ///
295    /// - [`TgaError::IoError`] if the file cannot be read.
296    /// - [`TgaError::SerdeYamlError`] if YAML parsing fails.
297    pub fn load(path: &Path) -> Result<Config> {
298        let resolved = expand_path(path);
299        tracing::debug!(path = %resolved.display(), "loading config");
300        let text = std::fs::read_to_string(&resolved)?;
301        let cfg: Config = serde_yaml::from_str(&text)?;
302        Ok(cfg)
303    }
304
305    /// Resolve identity aliases from either the Python-compatible
306    /// [`Config::developer_aliases`] map or from [`TeamConfig::members`].
307    ///
308    /// `developer_aliases` (when non-empty) takes precedence. The returned
309    /// map is keyed by canonical name; values are the list of email
310    /// addresses or login aliases that should resolve to that name.
311    pub fn resolved_aliases(&self) -> HashMap<String, Vec<String>> {
312        if !self.developer_aliases.is_empty() {
313            self.developer_aliases.clone()
314        } else if let Some(team) = &self.team {
315            team.members
316                .iter()
317                .map(|m| (m.name.clone(), m.aliases.clone()))
318                .collect()
319        } else {
320            HashMap::new()
321        }
322    }
323
324    /// Validate cross-field invariants of the config.
325    ///
326    /// # Errors
327    ///
328    /// Returns [`TgaError::ValidationError`] if any invariant is violated.
329    pub fn validate(&self) -> Result<()> {
330        if self.repositories.is_empty() {
331            return Err(TgaError::ValidationError(
332                "at least one repository must be configured".into(),
333            ));
334        }
335        for r in &self.repositories {
336            if r.path.as_os_str().is_empty() {
337                return Err(TgaError::ValidationError(
338                    "repository.path must not be empty".into(),
339                ));
340            }
341        }
342        Ok(())
343    }
344}