llm_git/
changelog.rs

1//! Changelog maintenance for git commits
2//!
3//! This module auto-detects CHANGELOG.md files and generates entries
4//! for staged changes, grouped by changelog boundary.
5//!
6//! Uses a single LLM call per changelog that sees existing entries
7//! for style matching and deduplication.
8
9use std::{
10   collections::HashMap,
11   path::{Path, PathBuf},
12   process::Command,
13   thread,
14   time::Duration,
15};
16
17use serde::Deserialize;
18
19use crate::{
20   config::CommitConfig,
21   diff::smart_truncate_diff,
22   error::{CommitGenError, Result},
23   patch::stage_files,
24   templates,
25   tokens::create_token_counter,
26   types::{ChangelogBoundary, ChangelogCategory, UnreleasedSection},
27};
28
29/// Response from the changelog generation LLM call
30#[derive(Debug, Deserialize)]
31struct ChangelogResponse {
32   entries: HashMap<String, Vec<String>>,
33}
34
35/// Run the changelog maintenance flow
36///
37/// 1. Get staged files (excluding CHANGELOG.md files)
38/// 2. Detect changelog boundaries
39/// 3. For each boundary: generate entries via LLM, write to changelog
40/// 4. Stage modified changelogs
41pub fn run_changelog_flow(args: &crate::types::Args, config: &CommitConfig) -> Result<()> {
42   let token_counter = create_token_counter(config);
43
44   // Get list of staged files
45   let staged_files = get_staged_files(&args.dir)?;
46   if staged_files.is_empty() {
47      return Ok(());
48   }
49
50   // Filter out CHANGELOG.md files (don't analyze changelog changes as changes)
51   let non_changelog_files: Vec<_> = staged_files
52      .iter()
53      .filter(|f| !f.to_lowercase().ends_with("changelog.md"))
54      .cloned()
55      .collect();
56
57   if non_changelog_files.is_empty() {
58      return Ok(());
59   }
60
61   // Find all changelogs in repo
62   let changelogs = find_changelogs(&args.dir)?;
63   if changelogs.is_empty() {
64      // No changelogs found, skip silently
65      return Ok(());
66   }
67
68   // Detect boundaries
69   let boundaries = detect_boundaries(&non_changelog_files, &changelogs, &args.dir);
70   if boundaries.is_empty() {
71      return Ok(());
72   }
73
74   println!("{}", crate::style::info(&format!("Updating {} changelog(s)...", boundaries.len())));
75
76   let mut modified_changelogs = Vec::new();
77
78   for boundary in boundaries {
79      // Get diff and stat for this boundary's files
80      let diff = get_diff_for_files(&boundary.files, &args.dir)?;
81      let stat = get_stat_for_files(&boundary.files, &args.dir)?;
82
83      if diff.is_empty() {
84         continue;
85      }
86
87      // Truncate if needed
88      let diff = if diff.len() > config.max_diff_length {
89         smart_truncate_diff(&diff, config.max_diff_length, config, &token_counter)
90      } else {
91         diff
92      };
93
94      // Parse existing [Unreleased] section for context
95      let changelog_content = std::fs::read_to_string(&boundary.changelog_path).map_err(|e| {
96         CommitGenError::ChangelogParseError {
97            path:   boundary.changelog_path.display().to_string(),
98            reason: e.to_string(),
99         }
100      })?;
101
102      let unreleased = match parse_unreleased_section(&changelog_content, &boundary.changelog_path)
103      {
104         Ok(u) => u,
105         Err(CommitGenError::NoUnreleasedSection { path }) => {
106            eprintln!(
107               "{} No [Unreleased] section in {}, skipping changelog update",
108               crate::style::icons::WARNING,
109               path
110            );
111            continue;
112         },
113         Err(e) => return Err(e),
114      };
115
116      // Check if this is a package-scoped changelog (not root)
117      let is_package_changelog = boundary
118         .changelog_path
119         .parent()
120         .is_some_and(|p| p != Path::new(&args.dir) && p != Path::new("."));
121
122      // Format existing entries for LLM context
123      let existing_entries = format_existing_entries(&unreleased);
124
125      // Generate entries via LLM
126      let new_entries = match generate_changelog_entries(
127         &boundary.changelog_path,
128         is_package_changelog,
129         &stat,
130         &diff,
131         existing_entries.as_deref(),
132         config,
133      ) {
134         Ok(entries) => entries,
135         Err(e) => {
136            eprintln!(
137               "{}",
138               crate::style::warning(&format!("Failed to generate changelog entries: {e}"))
139            );
140            continue;
141         },
142      };
143
144      if new_entries.is_empty() {
145         continue;
146      }
147
148      // Save changelog debug output if requested
149      if let Some(debug_dir) = &args.debug_output {
150         let _ = std::fs::create_dir_all(debug_dir);
151         let changelog_json: HashMap<String, Vec<String>> = new_entries
152            .iter()
153            .map(|(cat, entries)| (cat.as_str().to_string(), entries.clone()))
154            .collect();
155         if let Ok(json_str) = serde_json::to_string_pretty(&changelog_json) {
156            let _ = std::fs::write(debug_dir.join("changelog.json"), json_str);
157         }
158      }
159
160      // Write entries to changelog
161      let updated = write_entries(&changelog_content, &unreleased, &new_entries);
162      std::fs::write(&boundary.changelog_path, updated).map_err(|e| {
163         CommitGenError::ChangelogParseError {
164            path:   boundary.changelog_path.display().to_string(),
165            reason: format!("Failed to write: {e}"),
166         }
167      })?;
168
169      let entry_count: usize = new_entries.values().map(|v| v.len()).sum();
170      modified_changelogs.push(boundary.changelog_path.display().to_string());
171      println!(
172         "{}  Added {} entries to {}",
173         crate::style::icons::SUCCESS,
174         entry_count,
175         boundary.changelog_path.display()
176      );
177   }
178
179   // Stage modified changelogs
180   if !modified_changelogs.is_empty() {
181      stage_files(&modified_changelogs, &args.dir)?;
182   }
183
184   Ok(())
185}
186
187/// Generate changelog entries via LLM
188fn generate_changelog_entries(
189   changelog_path: &Path,
190   is_package_changelog: bool,
191   stat: &str,
192   diff: &str,
193   existing_entries: Option<&str>,
194   config: &CommitConfig,
195) -> Result<HashMap<ChangelogCategory, Vec<String>>> {
196   let prompt = templates::render_changelog_prompt(
197      "default",
198      &changelog_path.display().to_string(),
199      is_package_changelog,
200      stat,
201      diff,
202      existing_entries,
203   )?;
204
205   let response = call_changelog_api(&prompt, config)?;
206
207   // Convert string keys to ChangelogCategory
208   let mut result = HashMap::new();
209   for (key, entries) in response.entries {
210      if entries.is_empty() {
211         continue;
212      }
213      let category = ChangelogCategory::from_name(&key);
214      result.insert(category, entries);
215   }
216
217   Ok(result)
218}
219
220/// Call the LLM API for changelog generation
221fn call_changelog_api(prompt: &str, config: &CommitConfig) -> Result<ChangelogResponse> {
222   let client = reqwest::blocking::Client::builder()
223      .timeout(Duration::from_secs(config.request_timeout_secs))
224      .connect_timeout(Duration::from_secs(config.connect_timeout_secs))
225      .build()
226      .expect("Failed to build HTTP client");
227
228   let model = config.analysis_model.clone();
229
230   let mut attempt = 0;
231   loop {
232      attempt += 1;
233
234      let request_body = serde_json::json!({
235         "model": model,
236         "max_tokens": 2000,
237         "temperature": config.temperature,
238         "messages": [{
239            "role": "user",
240            "content": prompt
241         }]
242      });
243
244      let mut request_builder = client
245         .post(format!("{}/chat/completions", config.api_base_url))
246         .header("content-type", "application/json");
247
248      if let Some(api_key) = &config.api_key {
249         request_builder = request_builder.header("Authorization", format!("Bearer {api_key}"));
250      }
251
252      let response = request_builder
253         .json(&request_body)
254         .send()
255         .map_err(CommitGenError::HttpError)?;
256
257      let status = response.status();
258
259      if status.is_server_error() {
260         if attempt < config.max_retries {
261            let backoff_ms = config.initial_backoff_ms * (1 << (attempt - 1));
262            eprintln!(
263               "{}",
264               crate::style::warning(&format!(
265                  "Server error {status}, retry {attempt}/{} after {backoff_ms}ms...",
266                  config.max_retries
267               ))
268            );
269            thread::sleep(Duration::from_millis(backoff_ms));
270            continue;
271         }
272         let error_text = response
273            .text()
274            .unwrap_or_else(|_| "Unknown error".to_string());
275         return Err(CommitGenError::ApiError { status: status.as_u16(), body: error_text });
276      }
277
278      if !status.is_success() {
279         let error_text = response
280            .text()
281            .unwrap_or_else(|_| "Unknown error".to_string());
282         return Err(CommitGenError::ApiError { status: status.as_u16(), body: error_text });
283      }
284
285      let api_response: serde_json::Value = response.json().map_err(CommitGenError::HttpError)?;
286
287      // Extract content from response
288      let content = api_response["choices"][0]["message"]["content"]
289         .as_str()
290         .ok_or_else(|| CommitGenError::Other("No content in API response".to_string()))?;
291
292      // Parse JSON from content (may be wrapped in markdown code blocks)
293      let json_str = extract_json_from_content(content);
294
295      let changelog_response: ChangelogResponse = serde_json::from_str(&json_str).map_err(|e| {
296         CommitGenError::Other(format!(
297            "Failed to parse changelog response: {e}. Content was: {}",
298            json_str.chars().take(500).collect::<String>()
299         ))
300      })?;
301
302      return Ok(changelog_response);
303   }
304}
305
306/// Extract JSON from content that may be wrapped in markdown code blocks
307fn extract_json_from_content(content: &str) -> String {
308   let trimmed = content.trim();
309
310   // Try to find JSON in code blocks
311   if let Some(start) = trimmed.find("```json") {
312      let after_marker = &trimmed[start + 7..];
313      if let Some(end) = after_marker.find("```") {
314         return after_marker[..end].trim().to_string();
315      }
316   }
317
318   // Try generic code block
319   if let Some(start) = trimmed.find("```") {
320      let after_marker = &trimmed[start + 3..];
321      // Skip optional language identifier
322      let content_start = after_marker.find('\n').map_or(0, |i| i + 1);
323      let after_newline = &after_marker[content_start..];
324      if let Some(end) = after_newline.find("```") {
325         return after_newline[..end].trim().to_string();
326      }
327   }
328
329   // Try to find raw JSON object
330   if let Some(start) = trimmed.find('{')
331      && let Some(end) = trimmed.rfind('}')
332   {
333      return trimmed[start..=end].to_string();
334   }
335
336   trimmed.to_string()
337}
338
339/// Format existing entries for LLM context
340fn format_existing_entries(unreleased: &UnreleasedSection) -> Option<String> {
341   if unreleased.entries.is_empty() {
342      return None;
343   }
344
345   let mut lines = Vec::new();
346   for category in ChangelogCategory::render_order() {
347      if let Some(entries) = unreleased.entries.get(category) {
348         if entries.is_empty() {
349            continue;
350         }
351         lines.push(format!("### {}", category.as_str()));
352         for entry in entries {
353            lines.push(entry.clone());
354         }
355         lines.push(String::new());
356      }
357   }
358
359   if lines.is_empty() {
360      None
361   } else {
362      Some(lines.join("\n"))
363   }
364}
365
366/// Get list of staged files
367fn get_staged_files(dir: &str) -> Result<Vec<String>> {
368   let output = Command::new("git")
369      .args(["diff", "--cached", "--name-only"])
370      .current_dir(dir)
371      .output()
372      .map_err(|e| CommitGenError::GitError(format!("Failed to get staged files: {e}")))?;
373
374   if !output.status.success() {
375      let stderr = String::from_utf8_lossy(&output.stderr);
376      return Err(CommitGenError::GitError(format!(
377         "git diff --cached --name-only failed: {stderr}"
378      )));
379   }
380
381   let files: Vec<String> = String::from_utf8_lossy(&output.stdout)
382      .lines()
383      .filter(|s| !s.is_empty())
384      .map(String::from)
385      .collect();
386
387   Ok(files)
388}
389
390/// Find all CHANGELOG.md files in the repo
391fn find_changelogs(dir: &str) -> Result<Vec<PathBuf>> {
392   let output = Command::new("git")
393      .args(["ls-files", "--full-name", "**/CHANGELOG.md", "CHANGELOG.md"])
394      .current_dir(dir)
395      .output()
396      .map_err(|e| CommitGenError::GitError(format!("Failed to find changelogs: {e}")))?;
397
398   // git ls-files returns empty if no matches, which is fine
399   let files: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
400      .lines()
401      .filter(|s| !s.is_empty())
402      .map(|s| PathBuf::from(dir).join(s))
403      .collect();
404
405   Ok(files)
406}
407
408/// Detect changelog boundaries for files
409fn detect_boundaries(
410   files: &[String],
411   changelogs: &[PathBuf],
412   dir: &str,
413) -> Vec<ChangelogBoundary> {
414   let mut file_to_changelog: HashMap<String, PathBuf> = HashMap::new();
415
416   // Build a map of directory path (relative) -> changelog
417   // e.g., "packages/core" -> "packages/core/CHANGELOG.md"
418   //       "" (empty) -> "CHANGELOG.md" (root)
419   let mut dir_to_changelog: HashMap<String, PathBuf> = HashMap::new();
420   let mut root_changelog: Option<PathBuf> = None;
421
422   for changelog in changelogs {
423      // Get the relative path from repo root
424      let rel_path = changelog
425         .strip_prefix(dir)
426         .unwrap_or(changelog)
427         .to_string_lossy();
428
429      // Parent directory of the changelog
430      if let Some(parent) = Path::new(&*rel_path).parent() {
431         let parent_str = parent.to_string_lossy().to_string();
432         if parent_str.is_empty() || parent_str == "." {
433            root_changelog = Some(changelog.clone());
434         } else {
435            dir_to_changelog.insert(parent_str, changelog.clone());
436         }
437      }
438   }
439
440   for file in files {
441      // Walk up from file's directory to find matching changelog
442      let mut current_path = Path::new(file)
443         .parent()
444         .map(|p| p.to_string_lossy().to_string());
445      let mut found = false;
446
447      while let Some(ref dir_path) = current_path {
448         if let Some(changelog) = dir_to_changelog.get(dir_path) {
449            file_to_changelog.insert(file.clone(), changelog.clone());
450            found = true;
451            break;
452         }
453
454         // Move up one directory
455         let path = Path::new(dir_path);
456         current_path = path.parent().and_then(|p| {
457            let s = p.to_string_lossy().to_string();
458            if s.is_empty() { None } else { Some(s) }
459         });
460      }
461
462      // Fallback to root changelog
463      if !found && let Some(ref root) = root_changelog {
464         file_to_changelog.insert(file.clone(), root.clone());
465      }
466      // If no root changelog, file is skipped
467   }
468
469   // Group files by changelog
470   let mut changelog_to_files: HashMap<PathBuf, Vec<String>> = HashMap::new();
471   for (file, changelog) in file_to_changelog {
472      changelog_to_files.entry(changelog).or_default().push(file);
473   }
474
475   // Build boundaries
476   let boundaries: Vec<ChangelogBoundary> = changelog_to_files
477      .into_iter()
478      .map(|(changelog_path, files)| ChangelogBoundary {
479         changelog_path,
480         files,
481         diff: String::new(), // Filled later
482         stat: String::new(), // Filled later
483      })
484      .collect();
485
486   boundaries
487}
488
489/// Get diff for specific files
490fn get_diff_for_files(files: &[String], dir: &str) -> Result<String> {
491   if files.is_empty() {
492      return Ok(String::new());
493   }
494
495   let output = Command::new("git")
496      .args(["diff", "--cached", "--"])
497      .args(files)
498      .current_dir(dir)
499      .output()
500      .map_err(|e| CommitGenError::GitError(format!("Failed to get diff for files: {e}")))?;
501
502   Ok(String::from_utf8_lossy(&output.stdout).to_string())
503}
504
505/// Get stat for specific files
506fn get_stat_for_files(files: &[String], dir: &str) -> Result<String> {
507   if files.is_empty() {
508      return Ok(String::new());
509   }
510
511   let output = Command::new("git")
512      .args(["diff", "--cached", "--stat", "--"])
513      .args(files)
514      .current_dir(dir)
515      .output()
516      .map_err(|e| CommitGenError::GitError(format!("Failed to get stat for files: {e}")))?;
517
518   Ok(String::from_utf8_lossy(&output.stdout).to_string())
519}
520
521/// Parse the [Unreleased] section from changelog content
522fn parse_unreleased_section(content: &str, path: &Path) -> Result<UnreleasedSection> {
523   let lines: Vec<&str> = content.lines().collect();
524
525   // Find [Unreleased] header
526   let header_line = lines
527      .iter()
528      .position(|l| {
529         let trimmed = l.trim().to_lowercase();
530         trimmed.contains("[unreleased]") || trimmed == "## unreleased"
531      })
532      .ok_or_else(|| CommitGenError::NoUnreleasedSection { path: path.display().to_string() })?;
533
534   // Find end of unreleased section (next version header or EOF)
535   let end_line = lines
536      .iter()
537      .skip(header_line + 1)
538      .position(|l| {
539         let trimmed = l.trim();
540         // Look for version headers like ## [1.0.0] or ## 1.0.0
541         trimmed.starts_with("## [") && trimmed.contains(']')
542            || (trimmed.starts_with("## ")
543               && trimmed.chars().nth(3).is_some_and(|c| c.is_ascii_digit()))
544      })
545      .map_or(lines.len(), |pos| header_line + 1 + pos);
546
547   // Parse existing entries
548   let mut entries: HashMap<ChangelogCategory, Vec<String>> = HashMap::new();
549   let mut current_category: Option<ChangelogCategory> = None;
550
551   for line in &lines[header_line + 1..end_line] {
552      let trimmed = line.trim();
553
554      // Check for category headers
555      if trimmed.starts_with("### ") {
556         let cat_name = trimmed.trim_start_matches("### ").trim();
557         current_category = match cat_name.to_lowercase().as_str() {
558            "added" => Some(ChangelogCategory::Added),
559            "changed" => Some(ChangelogCategory::Changed),
560            "fixed" => Some(ChangelogCategory::Fixed),
561            "deprecated" => Some(ChangelogCategory::Deprecated),
562            "removed" => Some(ChangelogCategory::Removed),
563            "security" => Some(ChangelogCategory::Security),
564            "breaking changes" | "breaking" => Some(ChangelogCategory::Breaking),
565            _ => None,
566         };
567      } else if let Some(cat) = current_category {
568         // Collect entry lines
569         if trimmed.starts_with("- ") || trimmed.starts_with("* ") {
570            entries.entry(cat).or_default().push(trimmed.to_string());
571         }
572      }
573   }
574
575   Ok(UnreleasedSection { header_line, end_line, entries })
576}
577
578/// Write entries to changelog content
579fn write_entries(
580   content: &str,
581   unreleased: &UnreleasedSection,
582   new_entries: &HashMap<ChangelogCategory, Vec<String>>,
583) -> String {
584   let lines: Vec<&str> = content.lines().collect();
585
586   // Build new content
587   let mut result = Vec::new();
588
589   // Copy lines up to and including [Unreleased] header
590   result.extend(
591      lines[..=unreleased.header_line]
592         .iter()
593         .map(|s| s.to_string()),
594   );
595
596   // Add blank line after header if not present
597   if unreleased.header_line + 1 < lines.len() && !lines[unreleased.header_line + 1].is_empty() {
598      result.push(String::new());
599   }
600
601   // Write categories in order
602   for category in ChangelogCategory::render_order() {
603      let new_in_category = new_entries.get(category);
604      let existing_in_category = unreleased.entries.get(category);
605
606      let has_new = new_in_category.is_some_and(|v| !v.is_empty());
607      let has_existing = existing_in_category.is_some_and(|v| !v.is_empty());
608
609      if !has_new && !has_existing {
610         continue;
611      }
612
613      result.push(format!("### {}", category.as_str()));
614      result.push(String::new());
615
616      // New entries first
617      if let Some(entries) = new_in_category {
618         for entry in entries {
619            // Ensure entry starts with "- "
620            if entry.starts_with("- ") || entry.starts_with("* ") {
621               result.push(entry.clone());
622            } else {
623               result.push(format!("- {entry}"));
624            }
625         }
626      }
627
628      // Then existing entries
629      if let Some(entries) = existing_in_category {
630         for entry in entries {
631            result.push(entry.clone());
632         }
633      }
634
635      result.push(String::new());
636   }
637
638   // Copy remaining lines (after [Unreleased] section)
639   if unreleased.end_line < lines.len() {
640      result.extend(lines[unreleased.end_line..].iter().map(|s| s.to_string()));
641   }
642
643   result.join("\n")
644}
645
646#[cfg(test)]
647mod tests {
648   use super::*;
649
650   #[test]
651   fn test_extract_json_from_content_raw() {
652      let content = r#"{"entries": {"Added": ["entry 1"]}}"#;
653      let result = extract_json_from_content(content);
654      assert_eq!(result, r#"{"entries": {"Added": ["entry 1"]}}"#);
655   }
656
657   #[test]
658   fn test_extract_json_from_content_code_block() {
659      let content = r#"Here's the changelog:
660
661```json
662{"entries": {"Added": ["entry 1"]}}
663```
664
665That's all!"#;
666      let result = extract_json_from_content(content);
667      assert_eq!(result, r#"{"entries": {"Added": ["entry 1"]}}"#);
668   }
669
670   #[test]
671   fn test_extract_json_from_content_generic_block() {
672      let content = r#"```
673{"entries": {"Fixed": ["bug fix"]}}
674```"#;
675      let result = extract_json_from_content(content);
676      assert_eq!(result, r#"{"entries": {"Fixed": ["bug fix"]}}"#);
677   }
678
679   #[test]
680   fn test_parse_unreleased_section() {
681      let content = r"# Changelog
682
683## [Unreleased]
684
685### Added
686
687- Feature one
688- Feature two
689
690### Fixed
691
692- Bug fix
693
694## [1.0.0] - 2024-01-01
695
696### Added
697
698- Initial release
699";
700
701      let section = parse_unreleased_section(content, Path::new("CHANGELOG.md")).unwrap();
702      assert_eq!(section.header_line, 2);
703      assert_eq!(section.end_line, 13); // Line 13 is "## [1.0.0] - 2024-01-01"
704      assert_eq!(
705         section
706            .entries
707            .get(&ChangelogCategory::Added)
708            .unwrap()
709            .len(),
710         2
711      );
712      assert_eq!(
713         section
714            .entries
715            .get(&ChangelogCategory::Fixed)
716            .unwrap()
717            .len(),
718         1
719      );
720   }
721
722   #[test]
723   fn test_format_existing_entries() {
724      let mut entries = HashMap::new();
725      entries.insert(ChangelogCategory::Added, vec![
726         "- Feature one".to_string(),
727         "- Feature two".to_string(),
728      ]);
729      entries.insert(ChangelogCategory::Fixed, vec!["- Bug fix".to_string()]);
730
731      let unreleased = UnreleasedSection { header_line: 0, end_line: 10, entries };
732
733      let formatted = format_existing_entries(&unreleased).unwrap();
734      assert!(formatted.contains("### Added"));
735      assert!(formatted.contains("- Feature one"));
736      assert!(formatted.contains("### Fixed"));
737      assert!(formatted.contains("- Bug fix"));
738   }
739
740   #[test]
741   fn test_format_existing_entries_empty() {
742      let unreleased =
743         UnreleasedSection { header_line: 0, end_line: 10, entries: HashMap::new() };
744
745      assert!(format_existing_entries(&unreleased).is_none());
746   }
747}