tga/core/models/mod.rs
1//! Domain models corresponding to the v1 database schema.
2//!
3//! These structs are the in-memory representation of rows in the core
4//! tables. They are intentionally serialization-friendly via `serde` so
5//! that they can be emitted as JSON in reports without an intermediate
6//! DTO layer.
7
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10
11/// A single commit observed in a repository.
12///
13/// Why: rows in the `commits` SQLite table need a typed in-memory
14/// counterpart that both extractors and aggregators can share.
15/// What: maps 1:1 onto the v1 `commits` schema. `Serialize`/`Deserialize`
16/// derives let report formatters emit it as JSON without a DTO layer.
17/// Test: covered indirectly by every test that inserts into the
18/// `commits` table (see `core::tests::database_opens_with_wal_and_migrations_apply`).
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct Commit {
21 /// Primary key (database-assigned).
22 pub id: i64,
23
24 /// Full git OID (hex).
25 pub sha: String,
26
27 /// Foreign key into [`Author`].
28 pub author_id: Option<i64>,
29
30 /// Author display name as recorded in the commit.
31 pub author_name: String,
32
33 /// Author email as recorded in the commit.
34 pub author_email: String,
35
36 /// Author timestamp (UTC).
37 pub timestamp: DateTime<Utc>,
38
39 /// Commit message body (raw).
40 pub message: String,
41
42 /// Repository identifier (path or canonical name).
43 pub repository: String,
44
45 /// Number of files changed.
46 pub files_changed: u32,
47
48 /// Lines added.
49 pub insertions: u32,
50
51 /// Lines deleted.
52 pub deletions: u32,
53
54 /// Foreign key into [`Classification`], if classified.
55 pub classification_id: Option<i64>,
56
57 /// Confidence assigned by the classifier (0.0–1.0).
58 pub confidence: Option<f64>,
59
60 /// True for merge commits (parents > 1).
61 pub is_merge: bool,
62
63 /// True if the commit message references a known ticket system
64 /// (JIRA/Linear-style `PROJ-123`, GitHub `fixes #123`, or bare `#123`).
65 ///
66 /// Computed at extraction time by [`crate::collect::ticket::is_ticketed`]
67 /// and persisted on the `commits` row.
68 pub ticketed: bool,
69}
70
71/// A canonical author / developer identity.
72///
73/// Why: the same physical developer often commits under multiple
74/// `(name, email)` pairs; the `authors` table holds one row per resolved
75/// identity so reports collapse them.
76/// What: maps to the `authors` v1 schema with the alias list stored as a
77/// JSON-encoded string.
78/// Test: covered by `collect::identity::resolver` tests that exercise the
79/// upsert path.
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct Author {
82 /// Primary key (database-assigned).
83 pub id: i64,
84
85 /// Canonical display name.
86 pub canonical_name: String,
87
88 /// Canonical email address.
89 pub canonical_email: String,
90
91 /// JSON-encoded array of alias strings (names or emails).
92 pub aliases: String,
93}
94
95/// A classification verdict produced by the cascade.
96///
97/// Why: classifications are stored once per unique outcome and referenced
98/// by `commits.classification_id`, so the same `(category, subcategory,
99/// method)` triple is not duplicated per commit.
100/// What: maps to the `classifications` v1 schema; `method` records which
101/// cascade tier produced the verdict.
102/// Test: covered by `classify::pipeline` tests that exercise full-cascade
103/// runs against an in-memory DB.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct Classification {
106 /// Primary key (database-assigned).
107 pub id: i64,
108
109 /// Top-level category (e.g. `feature`, `bugfix`, `chore`).
110 pub category: String,
111
112 /// Optional finer-grained label.
113 pub subcategory: Option<String>,
114
115 /// Associated ticket identifier (e.g. `API-123`), if any.
116 pub ticket_id: Option<String>,
117
118 /// Confidence in this verdict (0.0–1.0).
119 pub confidence: f64,
120
121 /// Which tier of the cascade produced this verdict.
122 pub method: ClassificationMethod,
123}
124
125/// File-level change record attached to a commit.
126///
127/// Why: per-file change data feeds the "files churned" and
128/// "complexity" metrics; the per-file granularity must survive
129/// round-tripping through SQLite.
130/// What: maps to the `files` v1 schema with a typed `change_type`
131/// (added / modified / deleted / renamed).
132/// Test: covered indirectly by the git-extractor tests
133/// (`collect::git::extractor`).
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct FileChange {
136 /// Primary key (database-assigned).
137 pub id: i64,
138
139 /// Foreign key into [`Commit`].
140 pub commit_id: i64,
141
142 /// Relative path within the repository.
143 pub path: String,
144
145 /// Type of change.
146 pub change_type: ChangeType,
147
148 /// Lines added in this file.
149 pub insertions: u32,
150
151 /// Lines deleted in this file.
152 pub deletions: u32,
153}
154
155/// A pull request record (typically GitHub).
156///
157/// Why: PR data drives the velocity / DORA lead-time / cycle-time
158/// metrics; storing the full PR row lets us recompute those metrics
159/// without re-fetching from the provider.
160/// What: maps to the `pull_requests` v1 schema. Provider-specific PR
161/// numbering means the `(provider, pr_number, repository)` triple is
162/// the persistence-level unique identity.
163/// Test: covered by `collect::github::client` and
164/// `collect::bitbucket::client` tests that round-trip PR data through
165/// the DB.
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct PullRequest {
168 /// Primary key (database-assigned).
169 pub id: i64,
170
171 /// PR number within its repository.
172 pub pr_number: u64,
173
174 /// Repository this PR belongs to. Together with `provider` and
175 /// `pr_number` this forms the persistence-level unique identity of a
176 /// PR. GitHub assigns `pr_number` per-repository (so #1 in repo A is
177 /// not the same PR as #1 in repo B); without this field the
178 /// `(provider, pr_number)` unique index from migration v10 silently
179 /// dropped cross-repo collisions during multi-repo collection (#88).
180 ///
181 /// Format is provider-specific:
182 /// - GitHub: `"owner/repo"` (e.g. `"acme/widgets"`)
183 /// - Bitbucket: `"workspace/repo_slug"`
184 /// - Azure DevOps: `"project"` (PRs are project-scoped)
185 pub repository: String,
186
187 /// PR title.
188 pub title: String,
189
190 /// Author login.
191 pub author: String,
192
193 /// Lifecycle state.
194 pub state: PrState,
195
196 /// PR creation timestamp (UTC).
197 pub created_at: DateTime<Utc>,
198
199 /// Merge timestamp, if merged.
200 pub merged_at: Option<DateTime<Utc>>,
201
202 /// JSON-encoded array of commit SHAs in the PR.
203 pub commit_shas: String,
204}
205
206/// Cascade tier that produced a classification.
207///
208/// Why: knowing which tier of the four-tier cascade produced a verdict
209/// lets analytics tools surface low-confidence verdicts (e.g. the
210/// catch-all routes through `LlmFallback` when LLM is enabled).
211/// What: enum tagged with snake_case string values for DB persistence.
212/// Test: covered by `classify::tests::engine_classify_batch_does_not_panic`
213/// which asserts the cascade reports the correct tier.
214#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
215#[serde(rename_all = "snake_case")]
216pub enum ClassificationMethod {
217 /// Matched a deterministic exact rule.
218 ExactRule,
219 /// Matched a regex rule.
220 RegexRule,
221 /// Matched via fuzzy similarity.
222 FuzzyMatch,
223 /// Assigned by an LLM fallback.
224 LlmFallback,
225 /// Set manually by a user override.
226 Manual,
227 /// Derived from an external ticket source (JIRA issue type or GitHub
228 /// Issues label). Added for issue #260.
229 ExternalSource,
230 /// Composed from multiple weak signals via a weighted-sum model.
231 ///
232 /// Tier 2.5 — sits between the regex tier (Tier 2) and the fuzzy tier
233 /// (Tier 3). Blends keyword-density, ticket-prefix presence, message-length
234 /// bucket, merge indicator, and file-path signals into per-category scores.
235 /// Added for issue #270.
236 WeightedSum,
237 /// Derived from the catch-all rule (lowest-priority, confidence 0.3).
238 ///
239 /// Why: distinguishes the explicit catch-all from other fuzzy verdicts so
240 /// downstream consumers can filter "true unknowns" separately.
241 /// Added for issue #445 batch C.
242 CatchAll,
243 /// Applied by the `repo_categories` fallback tier (Tier 5, #445 batch C).
244 ///
245 /// Why: lets callers distinguish a confident classification from a
246 /// repo-default assignment, enabling metric-level filtering.
247 RepoCategoryFallback,
248}
249
250impl ClassificationMethod {
251 /// Stable string representation used for DB storage.
252 ///
253 /// Why: rusqlite needs a `&str` to bind to the `method` column; the
254 /// values must stay stable across releases so existing rows continue
255 /// to round-trip correctly.
256 /// What: returns the lowercase snake_case label for each variant.
257 /// Test: covered indirectly by every classification test that reads
258 /// or writes the `classifications` table.
259 pub fn as_str(&self) -> &'static str {
260 match self {
261 ClassificationMethod::ExactRule => "exact_rule",
262 ClassificationMethod::RegexRule => "regex_rule",
263 ClassificationMethod::FuzzyMatch => "fuzzy_match",
264 ClassificationMethod::LlmFallback => "llm_fallback",
265 ClassificationMethod::Manual => "manual",
266 ClassificationMethod::ExternalSource => "external_source",
267 ClassificationMethod::WeightedSum => "weighted_sum",
268 ClassificationMethod::CatchAll => "catch_all",
269 ClassificationMethod::RepoCategoryFallback => "repo_category_fallback",
270 }
271 }
272}
273
274/// File change kind for [`FileChange`].
275///
276/// Why: distinguishing add / modify / delete / rename lets reports
277/// separate "new code" from "code shuffled around" without re-parsing
278/// the git diff.
279/// What: 4-variant enum with snake_case strings for DB persistence.
280/// Test: covered by `collect::git::diff` extractor tests.
281#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
282#[serde(rename_all = "snake_case")]
283pub enum ChangeType {
284 /// File was added.
285 Added,
286 /// File contents were modified.
287 Modified,
288 /// File was deleted.
289 Deleted,
290 /// File was renamed (and possibly modified).
291 Renamed,
292}
293
294impl ChangeType {
295 /// Stable string representation used for DB storage.
296 ///
297 /// Why: see [`ClassificationMethod::as_str`] — same persistence
298 /// invariant applies.
299 /// What: returns the snake_case label for each variant.
300 /// Test: covered by `collect::git::diff` tests.
301 pub fn as_str(&self) -> &'static str {
302 match self {
303 ChangeType::Added => "added",
304 ChangeType::Modified => "modified",
305 ChangeType::Deleted => "deleted",
306 ChangeType::Renamed => "renamed",
307 }
308 }
309}
310
311/// Lifecycle state of a [`PullRequest`].
312///
313/// Why: cycle-time and DORA lead-time only apply to merged PRs;
314/// surfacing the state lets reports filter without joining extra tables.
315/// What: open / closed / merged tri-state with snake_case persistence.
316/// Test: covered by `collect::github::client` round-trip tests.
317#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
318#[serde(rename_all = "snake_case")]
319pub enum PrState {
320 /// PR is open.
321 Open,
322 /// PR was closed without merging.
323 Closed,
324 /// PR was merged.
325 Merged,
326}
327
328impl PrState {
329 /// Stable string representation used for DB storage.
330 ///
331 /// Why: see [`ClassificationMethod::as_str`] — same persistence
332 /// invariant applies.
333 /// What: returns the snake_case label for each variant.
334 /// Test: covered by `collect::github::client` round-trip tests.
335 pub fn as_str(&self) -> &'static str {
336 match self {
337 PrState::Open => "open",
338 PrState::Closed => "closed",
339 PrState::Merged => "merged",
340 }
341 }
342}