Skip to main content

tga/core/models/
mod.rs

1//! Domain models corresponding to the v1 database schema.
2//!
3//! These structs are the in-memory representation of rows in the core
4//! tables. They are intentionally serialization-friendly via `serde` so
5//! that they can be emitted as JSON in reports without an intermediate
6//! DTO layer.
7
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10
11/// A single commit observed in a repository.
12///
13/// Why: rows in the `commits` SQLite table need a typed in-memory
14/// counterpart that both extractors and aggregators can share.
15/// What: maps 1:1 onto the v1 `commits` schema. `Serialize`/`Deserialize`
16/// derives let report formatters emit it as JSON without a DTO layer.
17/// Test: covered indirectly by every test that inserts into the
18/// `commits` table (see `core::tests::database_opens_with_wal_and_migrations_apply`).
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct Commit {
21    /// Primary key (database-assigned).
22    pub id: i64,
23
24    /// Full git OID (hex).
25    pub sha: String,
26
27    /// Foreign key into [`Author`].
28    pub author_id: Option<i64>,
29
30    /// Author display name as recorded in the commit.
31    pub author_name: String,
32
33    /// Author email as recorded in the commit.
34    pub author_email: String,
35
36    /// Author timestamp (UTC).
37    pub timestamp: DateTime<Utc>,
38
39    /// Commit message body (raw).
40    pub message: String,
41
42    /// Repository identifier (path or canonical name).
43    pub repository: String,
44
45    /// Number of files changed.
46    pub files_changed: u32,
47
48    /// Lines added.
49    pub insertions: u32,
50
51    /// Lines deleted.
52    pub deletions: u32,
53
54    /// Foreign key into [`Classification`], if classified.
55    pub classification_id: Option<i64>,
56
57    /// Confidence assigned by the classifier (0.0–1.0).
58    pub confidence: Option<f64>,
59
60    /// True for merge commits (parents > 1).
61    pub is_merge: bool,
62
63    /// True if the commit message references a known ticket system
64    /// (JIRA/Linear-style `PROJ-123`, GitHub `fixes #123`, or bare `#123`).
65    ///
66    /// Computed at extraction time by [`crate::collect::ticket::is_ticketed`]
67    /// and persisted on the `commits` row.
68    pub ticketed: bool,
69}
70
71/// A canonical author / developer identity.
72///
73/// Why: the same physical developer often commits under multiple
74/// `(name, email)` pairs; the `authors` table holds one row per resolved
75/// identity so reports collapse them.
76/// What: maps to the `authors` v1 schema with the alias list stored as a
77/// JSON-encoded string.
78/// Test: covered by `collect::identity::resolver` tests that exercise the
79/// upsert path.
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct Author {
82    /// Primary key (database-assigned).
83    pub id: i64,
84
85    /// Canonical display name.
86    pub canonical_name: String,
87
88    /// Canonical email address.
89    pub canonical_email: String,
90
91    /// JSON-encoded array of alias strings (names or emails).
92    pub aliases: String,
93}
94
95/// A classification verdict produced by the cascade.
96///
97/// Why: classifications are stored once per unique outcome and referenced
98/// by `commits.classification_id`, so the same `(category, subcategory,
99/// method)` triple is not duplicated per commit.
100/// What: maps to the `classifications` v1 schema; `method` records which
101/// cascade tier produced the verdict.
102/// Test: covered by `classify::pipeline` tests that exercise full-cascade
103/// runs against an in-memory DB.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct Classification {
106    /// Primary key (database-assigned).
107    pub id: i64,
108
109    /// Top-level category (e.g. `feature`, `bugfix`, `chore`).
110    pub category: String,
111
112    /// Optional finer-grained label.
113    pub subcategory: Option<String>,
114
115    /// Associated ticket identifier (e.g. `API-123`), if any.
116    pub ticket_id: Option<String>,
117
118    /// Confidence in this verdict (0.0–1.0).
119    pub confidence: f64,
120
121    /// Which tier of the cascade produced this verdict.
122    pub method: ClassificationMethod,
123}
124
125/// File-level change record attached to a commit.
126///
127/// Why: per-file change data feeds the "files churned" and
128/// "complexity" metrics; the per-file granularity must survive
129/// round-tripping through SQLite.
130/// What: maps to the `files` v1 schema with a typed `change_type`
131/// (added / modified / deleted / renamed).
132/// Test: covered indirectly by the git-extractor tests
133/// (`collect::git::extractor`).
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct FileChange {
136    /// Primary key (database-assigned).
137    pub id: i64,
138
139    /// Foreign key into [`Commit`].
140    pub commit_id: i64,
141
142    /// Relative path within the repository.
143    pub path: String,
144
145    /// Type of change.
146    pub change_type: ChangeType,
147
148    /// Lines added in this file.
149    pub insertions: u32,
150
151    /// Lines deleted in this file.
152    pub deletions: u32,
153}
154
155/// A pull request record (typically GitHub).
156///
157/// Why: PR data drives the velocity / DORA lead-time / cycle-time
158/// metrics; storing the full PR row lets us recompute those metrics
159/// without re-fetching from the provider.
160/// What: maps to the `pull_requests` v1 schema. Provider-specific PR
161/// numbering means the `(provider, pr_number, repository)` triple is
162/// the persistence-level unique identity.
163/// Test: covered by `collect::github::client` and
164/// `collect::bitbucket::client` tests that round-trip PR data through
165/// the DB.
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct PullRequest {
168    /// Primary key (database-assigned).
169    pub id: i64,
170
171    /// PR number within its repository.
172    pub pr_number: u64,
173
174    /// Repository this PR belongs to. Together with `provider` and
175    /// `pr_number` this forms the persistence-level unique identity of a
176    /// PR. GitHub assigns `pr_number` per-repository (so #1 in repo A is
177    /// not the same PR as #1 in repo B); without this field the
178    /// `(provider, pr_number)` unique index from migration v10 silently
179    /// dropped cross-repo collisions during multi-repo collection (#88).
180    ///
181    /// Format is provider-specific:
182    /// - GitHub: `"owner/repo"` (e.g. `"acme/widgets"`)
183    /// - Bitbucket: `"workspace/repo_slug"`
184    /// - Azure DevOps: `"project"` (PRs are project-scoped)
185    pub repository: String,
186
187    /// PR title.
188    pub title: String,
189
190    /// Author login.
191    pub author: String,
192
193    /// Lifecycle state.
194    pub state: PrState,
195
196    /// PR creation timestamp (UTC).
197    pub created_at: DateTime<Utc>,
198
199    /// Merge timestamp, if merged.
200    pub merged_at: Option<DateTime<Utc>>,
201
202    /// JSON-encoded array of commit SHAs in the PR.
203    pub commit_shas: String,
204}
205
206/// Cascade tier that produced a classification.
207///
208/// Why: knowing which tier of the four-tier cascade produced a verdict
209/// lets analytics tools surface low-confidence verdicts (e.g. the
210/// catch-all routes through `LlmFallback` when LLM is enabled).
211/// What: enum tagged with snake_case string values for DB persistence.
212/// Test: covered by `classify::tests::engine_classify_batch_does_not_panic`
213/// which asserts the cascade reports the correct tier.
214#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
215#[serde(rename_all = "snake_case")]
216pub enum ClassificationMethod {
217    /// Matched a deterministic exact rule.
218    ExactRule,
219    /// Matched a regex rule.
220    RegexRule,
221    /// Matched via fuzzy similarity.
222    FuzzyMatch,
223    /// Assigned by an LLM fallback.
224    LlmFallback,
225    /// Set manually by a user override.
226    Manual,
227    /// Derived from an external ticket source (JIRA issue type or GitHub
228    /// Issues label). Added for issue #260.
229    ExternalSource,
230    /// Composed from multiple weak signals via a weighted-sum model.
231    ///
232    /// Tier 2.5 — sits between the regex tier (Tier 2) and the fuzzy tier
233    /// (Tier 3). Blends keyword-density, ticket-prefix presence, message-length
234    /// bucket, merge indicator, and file-path signals into per-category scores.
235    /// Added for issue #270.
236    WeightedSum,
237    /// Derived from the catch-all rule (lowest-priority, confidence 0.3).
238    ///
239    /// Why: distinguishes the explicit catch-all from other fuzzy verdicts so
240    /// downstream consumers can filter "true unknowns" separately.
241    /// Added for issue #445 batch C.
242    CatchAll,
243    /// Applied by the `repo_categories` fallback tier (Tier 5, #445 batch C).
244    ///
245    /// Why: lets callers distinguish a confident classification from a
246    /// repo-default assignment, enabling metric-level filtering.
247    RepoCategoryFallback,
248}
249
250impl ClassificationMethod {
251    /// Stable string representation used for DB storage.
252    ///
253    /// Why: rusqlite needs a `&str` to bind to the `method` column; the
254    /// values must stay stable across releases so existing rows continue
255    /// to round-trip correctly.
256    /// What: returns the lowercase snake_case label for each variant.
257    /// Test: covered indirectly by every classification test that reads
258    /// or writes the `classifications` table.
259    pub fn as_str(&self) -> &'static str {
260        match self {
261            ClassificationMethod::ExactRule => "exact_rule",
262            ClassificationMethod::RegexRule => "regex_rule",
263            ClassificationMethod::FuzzyMatch => "fuzzy_match",
264            ClassificationMethod::LlmFallback => "llm_fallback",
265            ClassificationMethod::Manual => "manual",
266            ClassificationMethod::ExternalSource => "external_source",
267            ClassificationMethod::WeightedSum => "weighted_sum",
268            ClassificationMethod::CatchAll => "catch_all",
269            ClassificationMethod::RepoCategoryFallback => "repo_category_fallback",
270        }
271    }
272}
273
274/// File change kind for [`FileChange`].
275///
276/// Why: distinguishing add / modify / delete / rename lets reports
277/// separate "new code" from "code shuffled around" without re-parsing
278/// the git diff.
279/// What: 4-variant enum with snake_case strings for DB persistence.
280/// Test: covered by `collect::git::diff` extractor tests.
281#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
282#[serde(rename_all = "snake_case")]
283pub enum ChangeType {
284    /// File was added.
285    Added,
286    /// File contents were modified.
287    Modified,
288    /// File was deleted.
289    Deleted,
290    /// File was renamed (and possibly modified).
291    Renamed,
292}
293
294impl ChangeType {
295    /// Stable string representation used for DB storage.
296    ///
297    /// Why: see [`ClassificationMethod::as_str`] — same persistence
298    /// invariant applies.
299    /// What: returns the snake_case label for each variant.
300    /// Test: covered by `collect::git::diff` tests.
301    pub fn as_str(&self) -> &'static str {
302        match self {
303            ChangeType::Added => "added",
304            ChangeType::Modified => "modified",
305            ChangeType::Deleted => "deleted",
306            ChangeType::Renamed => "renamed",
307        }
308    }
309}
310
311/// Lifecycle state of a [`PullRequest`].
312///
313/// Why: cycle-time and DORA lead-time only apply to merged PRs;
314/// surfacing the state lets reports filter without joining extra tables.
315/// What: open / closed / merged tri-state with snake_case persistence.
316/// Test: covered by `collect::github::client` round-trip tests.
317#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
318#[serde(rename_all = "snake_case")]
319pub enum PrState {
320    /// PR is open.
321    Open,
322    /// PR was closed without merging.
323    Closed,
324    /// PR was merged.
325    Merged,
326}
327
328impl PrState {
329    /// Stable string representation used for DB storage.
330    ///
331    /// Why: see [`ClassificationMethod::as_str`] — same persistence
332    /// invariant applies.
333    /// What: returns the snake_case label for each variant.
334    /// Test: covered by `collect::github::client` round-trip tests.
335    pub fn as_str(&self) -> &'static str {
336        match self {
337            PrState::Open => "open",
338            PrState::Closed => "closed",
339            PrState::Merged => "merged",
340        }
341    }
342}