llm_git/
config.rs

1use std::path::{Path, PathBuf};
2
3use indexmap::IndexMap;
4use serde::Deserialize;
5
6use crate::{
7   error::{CommitGenError, Result},
8   types::{
9      CategoryConfig, TypeConfig, default_categories, default_classifier_hint, default_types,
10   },
11};
12
13#[derive(Debug, Clone, Deserialize)]
14#[serde(default)]
15pub struct CommitConfig {
16   pub api_base_url: String,
17
18   /// Optional API key for authentication (overridden by `LLM_GIT_API_KEY` env
19   /// var)
20   pub api_key: Option<String>,
21
22   /// HTTP request timeout in seconds
23   pub request_timeout_secs: u64,
24
25   /// HTTP connection timeout in seconds
26   pub connect_timeout_secs: u64,
27
28   /// Maximum rounds for compose mode multi-commit generation
29   pub compose_max_rounds: usize,
30
31   pub summary_guideline:       usize,
32   pub summary_soft_limit:      usize,
33   pub summary_hard_limit:      usize,
34   pub max_retries:             u32,
35   pub initial_backoff_ms:      u64,
36   pub max_diff_length:         usize,
37   pub max_diff_tokens:         usize,
38   pub wide_change_threshold:   f32,
39   pub temperature:             f32,
40   pub analysis_model:          String,
41   pub summary_model:           String,
42   pub excluded_files:          Vec<String>,
43   pub low_priority_extensions: Vec<String>,
44
45   /// Maximum token budget for commit message detail points (approx 4
46   /// chars/token)
47   pub max_detail_tokens: usize,
48
49   /// Prompt variant for analysis phase (e.g., "default")
50   #[serde(default = "default_analysis_prompt_variant")]
51   pub analysis_prompt_variant: String,
52
53   /// Prompt variant for summary phase (e.g., "default")
54   #[serde(default = "default_summary_prompt_variant")]
55   pub summary_prompt_variant: String,
56
57   /// Enable abstract summaries for wide changes (cross-cutting refactors)
58   #[serde(default = "default_wide_change_abstract")]
59   pub wide_change_abstract: bool,
60
61   /// Exclude old commit message from context in commit mode (rewrite mode uses
62   /// this)
63   #[serde(default = "default_exclude_old_message")]
64   pub exclude_old_message: bool,
65
66   /// GPG sign commits by default (can be overridden by --sign CLI flag)
67   #[serde(default = "default_gpg_sign")]
68   pub gpg_sign: bool,
69
70   /// Commit types with descriptions for AI prompts (order = priority)
71   #[serde(default = "default_types")]
72   pub types: IndexMap<String, TypeConfig>,
73
74   /// Global hint for cross-type disambiguation
75   #[serde(default = "default_classifier_hint")]
76   pub classifier_hint: String,
77
78   /// Changelog categories with matching rules (order = render order)
79   #[serde(default = "default_categories")]
80   pub categories: Vec<CategoryConfig>,
81
82   /// Enable automatic changelog updates (default: true)
83   #[serde(default = "default_changelog_enabled")]
84   pub changelog_enabled: bool,
85
86   /// Enable map-reduce for large diffs (default: true)
87   #[serde(default = "default_map_reduce_enabled")]
88   pub map_reduce_enabled: bool,
89
90   /// Token threshold for triggering map-reduce (default: 30000 tokens)
91   #[serde(default = "default_map_reduce_threshold")]
92   pub map_reduce_threshold: usize,
93
94   /// Loaded analysis prompt (not in config file)
95   #[serde(skip)]
96   pub analysis_prompt: String,
97
98   /// Loaded summary prompt (not in config file)
99   #[serde(skip)]
100   pub summary_prompt: String,
101}
102
103fn default_analysis_prompt_variant() -> String {
104   "default".to_string()
105}
106
107fn default_summary_prompt_variant() -> String {
108   "default".to_string()
109}
110
111const fn default_wide_change_abstract() -> bool {
112   true
113}
114
115const fn default_exclude_old_message() -> bool {
116   true
117}
118
119const fn default_gpg_sign() -> bool {
120   false
121}
122
123const fn default_changelog_enabled() -> bool {
124   true
125}
126
127const fn default_map_reduce_enabled() -> bool {
128   true
129}
130
131const fn default_map_reduce_threshold() -> usize {
132   30000 // ~30k tokens, roughly 120k characters
133}
134
135impl Default for CommitConfig {
136   fn default() -> Self {
137      Self {
138         api_base_url:            "http://localhost:4000".to_string(),
139         api_key:                 None,
140         request_timeout_secs:    120,
141         connect_timeout_secs:    30,
142         compose_max_rounds:      5,
143         summary_guideline:       72,
144         summary_soft_limit:      96,
145         summary_hard_limit:      128,
146         max_retries:             3,
147         initial_backoff_ms:      1000,
148         max_diff_length:         100000, // Increased to handle larger refactors better
149         max_diff_tokens:         25000,  // ~100K chars = 25K tokens (4 chars/token estimate)
150         wide_change_threshold:   0.50,
151         temperature:             0.2, // Low temperature for consistent structured output
152         analysis_model:          "claude-sonnet-4.5".to_string(),
153         summary_model:           "claude-haiku-4-5".to_string(),
154         excluded_files:          vec![
155            "Cargo.lock".to_string(),
156            "package-lock.json".to_string(),
157            "yarn.lock".to_string(),
158            "pnpm-lock.yaml".to_string(),
159            "composer.lock".to_string(),
160            "Gemfile.lock".to_string(),
161            "poetry.lock".to_string(),
162            "flake.lock".to_string(),
163            ".gitignore".to_string(),
164         ],
165         low_priority_extensions: vec![
166            ".lock".to_string(),
167            ".sum".to_string(),
168            ".toml".to_string(),
169            ".yaml".to_string(),
170            ".yml".to_string(),
171            ".json".to_string(),
172            ".md".to_string(),
173            ".txt".to_string(),
174            ".log".to_string(),
175            ".tmp".to_string(),
176            ".bak".to_string(),
177         ],
178         max_detail_tokens:       200,
179         analysis_prompt_variant: default_analysis_prompt_variant(),
180         summary_prompt_variant:  default_summary_prompt_variant(),
181         wide_change_abstract:    default_wide_change_abstract(),
182         exclude_old_message:     default_exclude_old_message(),
183         gpg_sign:                default_gpg_sign(),
184         types:                   default_types(),
185         classifier_hint:         default_classifier_hint(),
186         categories:              default_categories(),
187         changelog_enabled:       default_changelog_enabled(),
188         map_reduce_enabled:      default_map_reduce_enabled(),
189         map_reduce_threshold:    default_map_reduce_threshold(),
190         analysis_prompt:         String::new(),
191         summary_prompt:          String::new(),
192      }
193   }
194}
195
196impl CommitConfig {
197   /// Load config from default location (~/.config/llm-git/config.toml)
198   /// Falls back to Default if file doesn't exist or can't determine home
199   /// directory Environment variables override config file values:
200   /// - `LLM_GIT_API_URL` overrides `api_base_url`
201   /// - `LLM_GIT_API_KEY` overrides `api_key`
202   pub fn load() -> Result<Self> {
203      let config_path = if let Ok(custom_path) = std::env::var("LLM_GIT_CONFIG") {
204         PathBuf::from(custom_path)
205      } else {
206         Self::default_config_path().unwrap_or_else(|_| PathBuf::new())
207      };
208
209      let mut config = if config_path.exists() {
210         Self::from_file(&config_path)?
211      } else {
212         Self::default()
213      };
214
215      // Apply environment variable overrides
216      Self::apply_env_overrides(&mut config);
217
218      config.load_prompts()?;
219      Ok(config)
220   }
221
222   /// Apply environment variable overrides to config
223   fn apply_env_overrides(config: &mut Self) {
224      if let Ok(api_url) = std::env::var("LLM_GIT_API_URL") {
225         config.api_base_url = api_url;
226      }
227
228      if let Ok(api_key) = std::env::var("LLM_GIT_API_KEY") {
229         config.api_key = Some(api_key);
230      }
231   }
232
233   /// Load config from specific file
234   pub fn from_file(path: &Path) -> Result<Self> {
235      let contents = std::fs::read_to_string(path)
236         .map_err(|e| CommitGenError::Other(format!("Failed to read config: {e}")))?;
237      let mut config: Self = toml::from_str(&contents)
238         .map_err(|e| CommitGenError::Other(format!("Failed to parse config: {e}")))?;
239
240      // Apply environment variable overrides
241      Self::apply_env_overrides(&mut config);
242
243      config.load_prompts()?;
244      Ok(config)
245   }
246
247   /// Load prompts - templates are now loaded dynamically via Tera
248   /// This method ensures prompts are initialized
249   fn load_prompts(&mut self) -> Result<()> {
250      // Ensure prompts directory exists and embedded templates are unpacked
251      crate::templates::ensure_prompts_dir()?;
252
253      // Templates loaded dynamically at render time
254      self.analysis_prompt = String::new();
255      self.summary_prompt = String::new();
256      Ok(())
257   }
258
259   /// Get default config path (platform-safe)
260   /// Tries HOME (Unix/Linux/macOS) then USERPROFILE (Windows)
261   pub fn default_config_path() -> Result<PathBuf> {
262      // Try HOME first (Unix/Linux/macOS)
263      if let Ok(home) = std::env::var("HOME") {
264         return Ok(PathBuf::from(home).join(".config/llm-git/config.toml"));
265      }
266
267      // Try USERPROFILE on Windows
268      if let Ok(home) = std::env::var("USERPROFILE") {
269         return Ok(PathBuf::from(home).join(".config/llm-git/config.toml"));
270      }
271
272      Err(CommitGenError::Other("No home directory found (tried HOME and USERPROFILE)".to_string()))
273   }
274}
275
276/// Valid past-tense verbs for commit messages
277pub const PAST_TENSE_VERBS: &[&str] = &[
278   "added",
279   "fixed",
280   "updated",
281   "refactored",
282   "removed",
283   "replaced",
284   "improved",
285   "implemented",
286   "migrated",
287   "renamed",
288   "moved",
289   "merged",
290   "split",
291   "extracted",
292   "restructured",
293   "reorganized",
294   "consolidated",
295   "simplified",
296   "optimized",
297   "documented",
298   "tested",
299   "changed",
300   "introduced",
301   "deprecated",
302   "deleted",
303   "corrected",
304   "enhanced",
305   "reverted",
306];
307
308#[allow(dead_code, reason = "Defined in src/api/prompts.rs where it is used")]
309pub const CONVENTIONAL_ANALYSIS_PROMPT: &str = r#"
310Analyze git changes and classify as a conventional commit with detail points.
311
312OVERVIEW OF CHANGES:
313```
314{stat}
315```
316
317COMMIT TYPE (choose one):
318- feat: New public API, function, or user-facing capability (even with refactoring)
319- fix: Bug fix or correction
320- refactor: Code restructuring with SAME behavior (no new capability)
321- docs: Documentation-only changes
322- test: Test additions/modifications
323- chore: Tooling, dependencies, maintenance (no production code)
324- style: Formatting, whitespace (no logic change)
325- perf: Performance optimization
326- build: Build system, dependencies (Cargo.toml, package.json)
327- ci: CI/CD configuration (.github/workflows, etc)
328- revert: Reverts a previous commit
329
330TYPE CLASSIFICATION (CRITICAL):
331✓ feat: New public functions, API endpoints, features, capabilities users can invoke
332  - "Added TLS support with new builder API" → feat (new capability)
333  - "Implemented JSON-LD iterator traits" → feat (new API surface)
334✗ refactor: ONLY when behavior unchanged
335  - "Replaced polling with event model" → feat if new behavior; refactor if same output
336  - "Migrated from HTTP to gRPC" → feat (protocol change affects behavior)
337  - "Renamed internal functions" → refactor (no user-visible change)
338
339RULE: Be neutral between feat and refactor. Feat requires NEW capability/behavior. Refactor requires PROOF of unchanged behavior.
340
341CRITICAL REFACTOR vs FEAT DISTINCTION:
342When deciding between 'feat' and 'refactor', ask: "Can users observe different behavior?"
343
344- refactor: Same external behavior, different internal structure
345  ✗ "Migrated HTTP client to async" → feat (behavior change: now async)
346  ✓ "Reorganized HTTP client modules" → refactor (no behavior change)
347
348- feat: New behavior users can observe/invoke
349  ✓ "Added async HTTP client support" → feat (new capability)
350  ✓ "Implemented TLS transport layer" → feat (new feature)
351  ✓ "Migrated from polling to event-driven model" → feat (observable change)
352
353GUIDELINE: If the diff adds new public APIs, changes protocols, or enables new capabilities → feat
354If the diff just reorganizes code without changing what it does → refactor
355
356OTHER HEURISTICS:
357- Commit message starts with "Revert" → revert
358- Bug keywords, test fixes → fix
359- Only .md/doc comments → docs
360- Only test files → test
361- Lock files, configs, .gitignore → chore
362- Only formatting → style
363- Optimization (proven faster) → perf
364- Build scripts, dependency updates → build
365- CI config files → ci
366
367SCOPE EXTRACTION (optional):
368SCOPE SUGGESTIONS (derived from changed files with line-count weights): {scope_candidates}
369- You may use a suggested scope above, infer a more specific two-segment scope (e.g., core/utime), or omit when changes are broad
370- Scopes MUST reflect actual directories from the diff, not invented names
371- Use slash-separated paths (e.g., core/utime) when changes focus on a specific submodule
372- Omit scope when: multi-component changes, cross-cutting concerns, or unclear focus
373- Special cases (even if not suggested): "toolchain", "deps", "config"
374- Format: lowercase alphanumeric with `/`, `-`, or `_` only (max 2 segments)
375
376ISSUE REFERENCE EXTRACTION:
377- Extract issue numbers from context (e.g. #123, GH-456)
378- Return as array of strings or empty array if none
379
380DETAIL REQUIREMENTS (0-6 items, prefer 3-4):
3811. Past-tense verb ONLY: added, fixed, updated, refactored, removed, replaced,
382   improved, implemented, migrated, renamed, moved, merged, split, extracted,
383   restructured, reorganized, consolidated, simplified, optimized
3842. End with period
3853. Balance WHAT changed with WHY/HOW (not just "what")
3864. Abstraction levels (prefer higher):
387   - Level 3 (BEST): Architectural impact, user-facing change, performance gain
388     "Replaced polling with event-driven model for 10x throughput."
389   - Level 2 (GOOD): Component changes, API surface
390     "Consolidated three HTTP builders into unified API."
391   - Level 1 (AVOID): Low-level details, renames
392     "Renamed workspacePath to locate." ❌
3935. Group ≥3 similar changes: "Updated 5 test files for new API." not 5 bullets
3946. Prioritize: user-visible > performance/security > architecture > internal refactoring
3957. Empty array if no supporting details needed
396
397EXCLUDE FROM DETAILS:
398- Import/use statements
399- Whitespace/formatting/indentation
400- Trivial renames (unless part of larger API change)
401- Debug prints/temporary logging
402- Comment changes (unless substantial docs)
403- File moves without modification
404- Single-line tweaks/typo fixes
405- Internal implementation details invisible to users
406
407WRITING RULES:
408- Plain sentences only (bullets/numbering added during formatting)
409- Short, direct (120 chars max per detail)
410- Precise nouns (module/file/API names)
411- Group related changes
412- Include why or how validated when meaningful:
413  Added retry logic to handle transient network failures.
414  Migrated to async I/O to unblock event loop.
415- Avoid meta phrases (This commit, Updated code, etc)
416
417DETAILED DIFF:
418```diff
419{diff}
420```"#;
421
422#[allow(dead_code, reason = "Defined in src/api/prompts.rs where it is used")]
423pub const SUMMARY_PROMPT_TEMPLATE: &str = r#"
424Draft a conventional commit summary (WITHOUT type/scope prefix).
425
426COMMIT TYPE: {type}
427SCOPE: {scope}
428
429DETAIL POINTS:
430{details}
431
432DIFF STAT:
433```
434{stat}
435```
436
437SUMMARY REQUIREMENTS:
4381. Output ONLY the description part (after "type(scope): ")
4392. Maximum {chars} characters
4403. First word MUST be one of these past-tense verbs:
441   added, fixed, updated, removed, replaced, improved, implemented,
442   migrated, renamed, moved, merged, split, extracted, simplified,
443   optimized, documented, tested, changed, introduced, deprecated,
444   deleted, corrected, enhanced, restructured, reorganized, consolidated,
445   reverted
4464. Focus on primary change (single concept if scope is specific)
4475. NO trailing period (conventional commits style)
4486. NO leading adjectives before verb
449
450FORBIDDEN PATTERNS:
451- DO NOT repeat the commit type "{type}" in the summary
452- If type is "refactor", use: restructured, reorganized, migrated, simplified,
453  consolidated, extracted (NOT "refactored")
454- NO filler words: "comprehensive", "improved", "enhanced", "various", "several"
455- NO "and" conjunctions cramming multiple unrelated concepts
456
457GOOD EXAMPLES (type in parens):
458- (feat) "added TLS support with mutual authentication"
459- (refactor) "migrated HTTP transport to unified builder API"
460- (fix) "corrected race condition in connection pool"
461- (perf) "optimized batch processing to reduce allocations"
462
463BAD EXAMPLES:
464- (refactor) "refactor TLS configuration" ❌ (repeats type)
465- (feat) "add comprehensive support for..." ❌ (filler word)
466- (chore) "update deps and improve build" ❌ (multiple concepts)
467
468FULL FORMAT WILL BE: {type}({scope}): <your summary>
469
470BEFORE RESPONDING:
471✓ Summary ≤{chars} chars
472✓ Starts lowercase
473✓ First word is past-tense verb from list above
474✓ Does NOT repeat type "{type}"
475✓ NO trailing period
476✓ NO filler words
477✓ Single focused concept
478✓ Aligns with detail points and diff stat
479✓ Specific (names subsystem/artifact)
480"#;