1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
//! Configuration types for the diff sampler.
//!
//! Why: callers need to tune sampling parameters (number of diffs, repo paths)
//! without changing function signatures; this module isolates the config type
//! so it can be imported independently.
//! What: defines `DiffSamplerConfig` and its resolution helpers; also defines
//! `DEFAULT_MAX_DIFFS` and `MAX_DIFF_CHARS` constants.
//! Test: `config_repo_path_resolution` in the parent `tests` module.
use HashMap;
use PathBuf;
/// Maximum characters of diff text kept per sampled commit.
///
/// Why: LLM context windows are finite; truncating here prevents a single
/// large commit from consuming the entire profile budget.
/// What: 20,000 UTF-8 characters (~5–10K tokens). The tga `DIFF_BYTE_CAP`
/// (200 KiB) is a separate, lower-level limit applied by `diff_for_commit`
/// itself; this constant is a further truncation applied at the profile layer.
/// Test: `tests::diff_sampler_truncates_long_diff`.
pub const MAX_DIFF_CHARS: usize = 20_000;
/// Maximum number of diffs to sample per period batch.
///
/// Why: the default max protects against periods with many commits all being
/// fed into the LLM in one shot.
/// What: 5 — enough for qualitative coverage without excessive token usage.
/// Callers may override this via `DiffSamplerConfig::max_diffs`.
/// Test: `tests::diff_sampler_respects_max_diffs`.
pub const DEFAULT_MAX_DIFFS: usize = 5;
/// Configuration for the diff sampler.
///
/// Why: callers need to tune sampling parameters (number of diffs, repo paths)
/// without changing function signatures.
/// What: holds the maximum number of diffs per period and the map from
/// repository name to local filesystem path used by `diff_for_commit`.
/// Test: `DiffSamplerConfig::default()` is exercised by all sampler tests.