trusty-review 0.4.0

LLM-backed code review service — reviews GitHub PRs and unified diffs via AWS Bedrock or OpenRouter
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
//! Tests for the diff sampler.
//!
//! Why: the diff sampler touches real git repos and an in-memory SQLite DB;
//! isolated tests here keep `sampler.rs` free of test scaffolding.
//! What: exercises stratification, truncation, missing-repo skipping, real
//! diff fetching, max_diffs capping, and config path resolution.
//! Test: all tests are self-contained; they use tempfile repos and in-memory DBs.

#[cfg(test)]
mod diff_sampler_tests {
    use std::collections::HashMap;
    use std::path::Path;
    use std::path::PathBuf;

    use rusqlite::params;
    use tga::core::db::Database;

    use crate::profile::diff_sampler::MAX_DIFF_CHARS;
    use crate::profile::diff_sampler::config::{DEFAULT_MAX_DIFFS, DiffSamplerConfig};
    use crate::profile::diff_sampler::sampler::{
        CommitRecord, sample_diffs_for_batches, stratify_and_select, truncate_diff,
    };
    use crate::profile::types::period::PeriodBatch;

    // ── DB seed helpers ───────────────────────────────────────────────────────

    fn seed_author(db: &Database, name: &str, email: &str) -> i64 {
        db.connection()
            .execute(
                "INSERT INTO authors (canonical_name, canonical_email, aliases) \
                 VALUES (?1, ?2, '[]')",
                params![name, email],
            )
            .expect("insert author");
        db.connection().last_insert_rowid()
    }

    fn seed_commit_with_category_effort(
        db: &Database,
        sha: &str,
        author_id: i64,
        repository: &str,
        timestamp: &str,
        category: Option<&str>,
        effort: Option<&str>,
    ) {
        let cls_id: Option<i64> = if let Some(cat) = category {
            db.connection()
                .execute(
                    "INSERT OR IGNORE INTO classifications (id, category, confidence, method) \
                     VALUES (NULL, ?1, 0.9, 'rule')",
                    params![cat],
                )
                .expect("insert classification");
            let id: i64 = db
                .connection()
                .query_row(
                    "SELECT id FROM classifications WHERE category = ?1 LIMIT 1",
                    params![cat],
                    |r| r.get(0),
                )
                .expect("get cls id");
            Some(id)
        } else {
            None
        };

        db.connection()
            .execute(
                "INSERT INTO commits (sha, author_id, author_name, author_email, \
                 timestamp, message, repository, insertions, deletions, classification_id) \
                 VALUES (?1, ?2, 'n', 'e', ?3, ?1, ?4, 5, 2, ?5)",
                params![sha, author_id, timestamp, repository, cls_id],
            )
            .expect("insert commit");

        if let Some(sz) = effort {
            db.connection()
                .execute(
                    "INSERT INTO fact_commit_effort \
                     (sha, repository, size, score, loc, files, test_loc, tests_factor, computed_at) \
                     VALUES (?1, ?2, ?3, 1.0, 10, 1, 0, 1.0, 0)",
                    params![sha, repository, sz],
                )
                .expect("insert effort");
        }
    }

    // ── Git repo helpers ──────────────────────────────────────────────────────

    fn make_repo_with_initial_commit(dir: &Path, filename: &str, content: &str) -> String {
        let repo = git2::Repository::init(dir).expect("init repo");
        let mut config = repo.config().expect("config");
        config.set_str("user.name", "Test User").expect("set name");
        config
            .set_str("user.email", "test@example.com")
            .expect("set email");

        let file_path = dir.join(filename);
        std::fs::write(&file_path, content).expect("write file");

        let mut index = repo.index().expect("index");
        index.add_path(Path::new(filename)).expect("add path");
        index.write().expect("write index");

        let tree_id = index.write_tree().expect("write tree");
        let tree = repo.find_tree(tree_id).expect("find tree");
        let sig = git2::Signature::now("Test User", "test@example.com").expect("sig");
        let oid = repo
            .commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])
            .expect("initial commit");
        oid.to_string()
    }

    fn add_commit(dir: &Path, filename: &str, new_content: &str) -> String {
        let repo = git2::Repository::open(dir).expect("open repo");

        let file_path = dir.join(filename);
        std::fs::write(&file_path, new_content).expect("write file");

        let mut index = repo.index().expect("index");
        index.add_path(Path::new(filename)).expect("add path");
        index.write().expect("write index");

        let tree_id = index.write_tree().expect("write tree");
        let tree = repo.find_tree(tree_id).expect("find tree");
        let sig = git2::Signature::now("Test User", "test@example.com").expect("sig");
        let head = repo.head().expect("head").peel_to_commit().expect("peel");
        let oid = repo
            .commit(
                Some("HEAD"),
                &sig,
                &sig,
                "Follow-up commit",
                &tree,
                &[&head],
            )
            .expect("follow-up commit");
        oid.to_string()
    }

    // ── Tests ─────────────────────────────────────────────────────────────────

    /// Why: the sampler must pick at least one bugfix and one feature commit
    /// when both categories are present.
    /// What: seeds 5 commits across categories, samples with max_diffs=3,
    /// asserts ≥1 of each priority category.
    /// Test: this test itself.
    #[test]
    fn diff_sampler_stratification() {
        let commits = vec![
            CommitRecord {
                sha: "f1".to_string(),
                repository: "r".to_string(),
                message: "feat 1".to_string(),
                category: Some("feature".to_string()),
                effort: Some("S".to_string()),
                effort_rank: 2,
            },
            CommitRecord {
                sha: "f2".to_string(),
                repository: "r".to_string(),
                message: "feat 2".to_string(),
                category: Some("feature".to_string()),
                effort: Some("M".to_string()),
                effort_rank: 3,
            },
            CommitRecord {
                sha: "b1".to_string(),
                repository: "r".to_string(),
                message: "bugfix 1".to_string(),
                category: Some("bugfix".to_string()),
                effort: Some("XS".to_string()),
                effort_rank: 1,
            },
            CommitRecord {
                sha: "b2".to_string(),
                repository: "r".to_string(),
                message: "bugfix 2".to_string(),
                category: Some("bugfix".to_string()),
                effort: Some("S".to_string()),
                effort_rank: 2,
            },
            CommitRecord {
                sha: "r1".to_string(),
                repository: "r".to_string(),
                message: "refactor 1".to_string(),
                category: Some("refactor".to_string()),
                effort: Some("L".to_string()),
                effort_rank: 4,
            },
        ];

        let selected = stratify_and_select(&commits, 3);
        assert_eq!(selected.len(), 3);

        let cats: Vec<Option<&str>> = selected.iter().map(|c| c.category.as_deref()).collect();
        assert!(
            cats.contains(&Some("bugfix")),
            "should include a bugfix: {cats:?}"
        );
        assert!(
            cats.contains(&Some("feature")),
            "should include a feature: {cats:?}"
        );
    }

    /// Why: the sampler must fall back to descending-effort selection when
    /// no priority categories are present.
    /// What: seeds commits with only `chore` category, asserts highest-effort
    /// commit is first in the selection.
    /// Test: this test itself.
    #[test]
    fn diff_sampler_falls_back_to_effort_ordering() {
        let commits = vec![
            CommitRecord {
                sha: "c1".to_string(),
                repository: "r".to_string(),
                message: "chore small".to_string(),
                category: Some("chore".to_string()),
                effort: Some("XS".to_string()),
                effort_rank: 1,
            },
            CommitRecord {
                sha: "c2".to_string(),
                repository: "r".to_string(),
                message: "chore large".to_string(),
                category: Some("chore".to_string()),
                effort: Some("XL".to_string()),
                effort_rank: 5,
            },
            CommitRecord {
                sha: "c3".to_string(),
                repository: "r".to_string(),
                message: "chore medium".to_string(),
                category: Some("chore".to_string()),
                effort: Some("M".to_string()),
                effort_rank: 3,
            },
        ];

        let selected = stratify_and_select(&commits, 2);
        assert_eq!(selected.len(), 2);
        assert_eq!(selected[0].sha, "c2");
    }

    /// Why: `truncate_diff` must produce output no longer than MAX_DIFF_CHARS
    /// (plus the truncation marker) and append the marker string.
    /// What: creates a string longer than MAX_DIFF_CHARS, calls truncate_diff,
    /// asserts length constraint and marker presence.
    /// Test: this test itself.
    #[test]
    fn diff_sampler_truncates_long_diff() {
        let big = "x".repeat(MAX_DIFF_CHARS + 5000);
        let result = truncate_diff(&big);
        assert!(
            result.contains("[... diff truncated"),
            "must contain truncation marker"
        );
        let content_chars = result.chars().count();
        assert!(
            content_chars <= MAX_DIFF_CHARS + 60,
            "truncated diff too long: {content_chars}"
        );
    }

    /// Why: a diff shorter than MAX_DIFF_CHARS must be returned unchanged.
    /// What: passes a short diff, asserts it equals the input.
    /// Test: this test itself.
    #[test]
    fn diff_sampler_short_diff_unchanged() {
        let short = "+fn hello() { println!(\"hi\"); }";
        assert_eq!(truncate_diff(short), short);
    }

    /// Why: commits in repos that are not locally available must be silently
    /// skipped without aborting the profile run.
    /// What: seeds one commit in the DB with a repo path that doesn't exist,
    /// calls `sample_diffs_for_batches`, asserts no diffs were added and no
    /// error was returned.
    /// Test: this test itself.
    #[test]
    fn diff_sampler_skips_missing_repo() {
        let db = Database::open_in_memory().expect("open");
        let aid = seed_author(&db, "Alice", "alice@example.com");
        seed_commit_with_category_effort(
            &db,
            "sha_missing_repo",
            aid,
            "nonexistent-repo",
            "2024-01-08T00:00:00Z",
            Some("feature"),
            Some("M"),
        );

        let stats = tga::report::period_trends::query_author_period_trends(
            &db,
            "alice@example.com",
            4,
            None,
            None,
        )
        .expect("query trends");
        assert!(!stats.is_empty(), "should have at least one period");

        let mut batches: Vec<PeriodBatch> =
            stats.into_iter().map(PeriodBatch::from_stats).collect();

        let config = DiffSamplerConfig {
            max_diffs: 3,
            repo_paths: {
                let mut m = HashMap::new();
                m.insert(
                    "nonexistent-repo".to_string(),
                    PathBuf::from("/tmp/this-path-absolutely-does-not-exist-trusty-review"),
                );
                m
            },
            repos_root: None,
        };

        sample_diffs_for_batches(&mut batches, &db, "alice@example.com", &config)
            .expect("sample_diffs must not error on missing repo");

        let total_diffs: usize = batches.iter().map(|b| b.sampled_diffs.len()).sum();
        assert_eq!(
            total_diffs, 0,
            "no diffs should be collected for missing repos"
        );
    }

    /// Why: when the repo is available locally, diffs must be fetched and
    /// attached to the batch.
    /// What: creates a real temp git repo, seeds a commit in the DB with its
    /// SHA and path, calls `sample_diffs_for_batches`, asserts a diff was added.
    /// Test: this test itself.
    #[test]
    fn diff_sampler_fetches_real_diff() {
        let tmp = tempfile::TempDir::new().expect("tempdir");
        let repo_path = tmp.path().to_path_buf();

        make_repo_with_initial_commit(tmp.path(), "hello.txt", "hello world\n");
        let sha = add_commit(tmp.path(), "hello.txt", "hello universe\n");

        let db = Database::open_in_memory().expect("open");
        let aid = seed_author(&db, "Alice", "alice@example.com");

        let repo_name = "test-repo";
        seed_commit_with_category_effort(
            &db,
            &sha,
            aid,
            repo_name,
            "2024-01-08T00:00:00Z",
            Some("feature"),
            Some("S"),
        );

        let stats = tga::report::period_trends::query_author_period_trends(
            &db,
            "alice@example.com",
            4,
            None,
            None,
        )
        .expect("query trends");
        assert!(!stats.is_empty());

        let mut batches: Vec<PeriodBatch> =
            stats.into_iter().map(PeriodBatch::from_stats).collect();

        let config = DiffSamplerConfig {
            max_diffs: 3,
            repo_paths: {
                let mut m = HashMap::new();
                m.insert(repo_name.to_string(), repo_path);
                m
            },
            repos_root: None,
        };

        sample_diffs_for_batches(&mut batches, &db, "alice@example.com", &config)
            .expect("sample_diffs");

        let total_diffs: usize = batches.iter().map(|b| b.sampled_diffs.len()).sum();
        assert_eq!(total_diffs, 1, "one diff should have been sampled");

        let diff = &batches[0].sampled_diffs[0];
        assert_eq!(diff.sha, sha);
        assert!(
            diff.diff_text.contains("+hello universe"),
            "diff text must contain the added line"
        );
        assert_eq!(diff.category, Some("feature".to_string()));
    }

    /// Why: max_diffs must cap the number of sampled diffs per period.
    /// What: seeds 5 commits, sets max_diffs=2, asserts at most 2 diffs per batch.
    /// Test: this test itself.
    #[test]
    fn diff_sampler_respects_max_diffs() {
        let tmp = tempfile::TempDir::new().expect("tempdir");
        let repo_path = tmp.path().to_path_buf();

        make_repo_with_initial_commit(tmp.path(), "f.txt", "v0\n");
        let mut shas = Vec::new();
        for i in 1..=5 {
            let sha = add_commit(tmp.path(), "f.txt", &format!("v{i}\n"));
            shas.push(sha);
        }

        let db = Database::open_in_memory().expect("open");
        let aid = seed_author(&db, "Alice", "alice@example.com");
        let repo_name = "myrepo";

        for (i, sha) in shas.iter().enumerate() {
            seed_commit_with_category_effort(
                &db,
                sha,
                aid,
                repo_name,
                &format!("2024-01-{:02}T00:00:00Z", i + 8),
                Some("feature"),
                None,
            );
        }

        let stats = tga::report::period_trends::query_author_period_trends(
            &db,
            "alice@example.com",
            4,
            None,
            None,
        )
        .expect("query trends");
        let mut batches: Vec<PeriodBatch> =
            stats.into_iter().map(PeriodBatch::from_stats).collect();

        let config = DiffSamplerConfig {
            max_diffs: 2,
            repo_paths: {
                let mut m = HashMap::new();
                m.insert(repo_name.to_string(), repo_path);
                m
            },
            repos_root: None,
        };

        sample_diffs_for_batches(&mut batches, &db, "alice@example.com", &config)
            .expect("sample_diffs");

        for batch in &batches {
            assert!(
                batch.sampled_diffs.len() <= 2,
                "max_diffs=2 must cap sampled diffs per period, got {}",
                batch.sampled_diffs.len()
            );
        }
    }

    /// Why: `DiffSamplerConfig::repo_path` must prefer the explicit map entry
    /// over the repos_root.
    /// What: sets both repo_paths and repos_root, queries a repo in repo_paths,
    /// asserts the explicit path is returned.
    /// Test: this test itself.
    #[test]
    fn config_repo_path_resolution() {
        let config = DiffSamplerConfig {
            repos_root: Some(PathBuf::from("/repos")),
            repo_paths: {
                let mut m = HashMap::new();
                m.insert("acme".to_string(), PathBuf::from("/explicit/acme"));
                m
            },
            max_diffs: DEFAULT_MAX_DIFFS,
        };

        assert_eq!(
            config.repo_path("acme"),
            Some(PathBuf::from("/explicit/acme")),
            "explicit entry must win over repos_root"
        );
        assert_eq!(
            config.repo_path("other"),
            Some(PathBuf::from("/repos/other")),
            "repos_root fallback must work"
        );
        assert_eq!(
            DiffSamplerConfig::default().repo_path("anything"),
            None,
            "no config → None"
        );
    }
}