1use std::path::Path;
7
8use futures::stream::{self, StreamExt};
9use serde::{Deserialize, Serialize};
10
11use crate::{
12 api::retry_api_call,
13 config::{CommitConfig, ResolvedApiMode},
14 diff::{FileDiff, parse_diff, reconstruct_diff},
15 error::{CommitGenError, Result},
16 templates,
17 tokens::TokenCounter,
18 types::ConventionalAnalysis,
19};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct FileObservation {
24 pub file: String,
25 pub observations: Vec<String>,
26 pub additions: usize,
27 pub deletions: usize,
28}
29
30const MIN_FILES_FOR_MAP_REDUCE: usize = 4;
32
33const MAX_FILE_TOKENS: usize = 50_000;
36
37pub fn should_use_map_reduce(diff: &str, config: &CommitConfig, counter: &TokenCounter) -> bool {
42 if !config.map_reduce_enabled {
43 return false;
44 }
45
46 let files = parse_diff(diff);
47 let file_count = files
48 .iter()
49 .filter(|f| {
50 !config
51 .excluded_files
52 .iter()
53 .any(|ex| f.filename.ends_with(ex))
54 })
55 .count();
56
57 file_count >= MIN_FILES_FOR_MAP_REDUCE
59 || files
60 .iter()
61 .any(|f| f.token_estimate(counter) > MAX_FILE_TOKENS)
62}
63
64const MAX_CONTEXT_FILES: usize = 20;
66
67fn generate_context_header(files: &[FileDiff], current_file: &str) -> String {
69 if files.len() > 100 {
71 return format!("(Large commit with {} total files)", files.len());
72 }
73
74 let mut lines = vec!["OTHER FILES IN THIS CHANGE:".to_string()];
75
76 let other_files: Vec<_> = files
77 .iter()
78 .filter(|f| f.filename != current_file)
79 .collect();
80
81 let total_other = other_files.len();
82
83 let to_show: Vec<&FileDiff> = if total_other > MAX_CONTEXT_FILES {
85 let mut sorted = other_files;
86 sorted.sort_by_key(|f| std::cmp::Reverse(f.additions + f.deletions));
87 sorted.truncate(MAX_CONTEXT_FILES);
88 sorted
89 } else {
90 other_files
91 };
92
93 for file in &to_show {
94 let line_count = file.additions + file.deletions;
95 let description = infer_file_description(&file.filename, &file.content);
96 lines.push(format!("- {} ({} lines): {}", file.filename, line_count, description));
97 }
98
99 if to_show.len() < total_other {
100 lines.push(format!("... and {} more files", total_other - to_show.len()));
101 }
102
103 if lines.len() == 1 {
104 return String::new(); }
106
107 lines.join("\n")
108}
109
110fn infer_file_description(filename: &str, content: &str) -> &'static str {
113 let filename_lower = filename.to_lowercase();
114
115 if filename_lower.contains("test") {
117 return "test file";
118 }
119 if Path::new(filename)
120 .extension()
121 .is_some_and(|e| e.eq_ignore_ascii_case("md"))
122 {
123 return "documentation";
124 }
125 let ext = Path::new(filename).extension();
126 if filename_lower.contains("config")
127 || ext.is_some_and(|e| e.eq_ignore_ascii_case("toml"))
128 || ext.is_some_and(|e| e.eq_ignore_ascii_case("yaml"))
129 || ext.is_some_and(|e| e.eq_ignore_ascii_case("yml"))
130 {
131 return "configuration";
132 }
133 if filename_lower.contains("error") {
134 return "error definitions";
135 }
136 if filename_lower.contains("type") {
137 return "type definitions";
138 }
139 if filename_lower.ends_with("mod.rs") || filename_lower.ends_with("lib.rs") {
140 return "module exports";
141 }
142 if filename_lower.ends_with("main.rs")
143 || filename_lower.ends_with("main.go")
144 || filename_lower.ends_with("main.py")
145 {
146 return "entry point";
147 }
148
149 if content.contains("impl ") || content.contains("fn ") {
151 return "implementation";
152 }
153 if content.contains("struct ") || content.contains("enum ") {
154 return "type definitions";
155 }
156 if content.contains("async ") || content.contains("await") {
157 return "async code";
158 }
159
160 "source code"
161}
162
163async fn map_phase(
165 files: &[FileDiff],
166 model_name: &str,
167 config: &CommitConfig,
168 counter: &TokenCounter,
169) -> Result<Vec<FileObservation>> {
170 let observations: Vec<Result<FileObservation>> = stream::iter(files.iter())
172 .map(|file| async {
173 if file.is_binary {
174 return Ok(FileObservation {
175 file: file.filename.clone(),
176 observations: vec!["Binary file changed.".to_string()],
177 additions: 0,
178 deletions: 0,
179 });
180 }
181
182 let context_header = generate_context_header(files, &file.filename);
183 let mut file_clone = file.clone();
185 let file_tokens = file_clone.token_estimate(counter);
186 if file_tokens > MAX_FILE_TOKENS {
187 let target_size = MAX_FILE_TOKENS * 4; file_clone.truncate(target_size);
189 eprintln!(
190 " {} truncated {} ({} \u{2192} {} tokens)",
191 crate::style::icons::WARNING,
192 file.filename,
193 file_tokens,
194 file_clone.token_estimate(counter)
195 );
196 }
197
198 let file_diff = reconstruct_diff(&[file_clone]);
199
200 map_single_file(&file.filename, &file_diff, &context_header, model_name, config).await
201 })
202 .buffer_unordered(8)
203 .collect()
204 .await;
205
206 observations.into_iter().collect()
208}
209
210async fn map_single_file(
212 filename: &str,
213 file_diff: &str,
214 context_header: &str,
215 model_name: &str,
216 config: &CommitConfig,
217) -> Result<FileObservation> {
218 retry_api_call(config, async move || {
219 let client = crate::api::get_client(config);
220
221 let tool = build_observation_tool();
222
223 let parts = templates::render_map_prompt("default", filename, file_diff, context_header)?;
224 let mode = config.resolved_api_mode(model_name);
225
226 let response_text = match mode {
227 ResolvedApiMode::ChatCompletions => {
228 let request = build_api_request(
229 config,
230 model_name,
231 config.temperature,
232 vec![tool],
233 &parts.system,
234 &parts.user,
235 "map",
236 "default",
237 );
238
239 let mut request_builder = client
240 .post(format!("{}/chat/completions", config.api_base_url))
241 .header("content-type", "application/json");
242
243 if let Some(api_key) = &config.api_key {
244 request_builder =
245 request_builder.header("Authorization", format!("Bearer {api_key}"));
246 }
247
248 let (status, response_text) =
249 crate::api::timed_send(request_builder.json(&request), "map-reduce/map", model_name).await?;
250
251 if status.is_server_error() {
252 eprintln!(
253 "{}",
254 crate::style::error(&format!("Server error {status}: {response_text}"))
255 );
256 return Ok((true, None)); }
258
259 if !status.is_success() {
260 return Err(CommitGenError::ApiError {
261 status: status.as_u16(),
262 body: response_text,
263 });
264 }
265
266 response_text
267 },
268 ResolvedApiMode::AnthropicMessages => {
269 let prompt_caching = anthropic_prompt_caching_enabled(config);
270 let mut tools = vec![AnthropicTool {
271 name: "create_file_observation".to_string(),
272 description: "Extract observations from a single file's changes".to_string(),
273 input_schema: serde_json::json!({
274 "type": "object",
275 "properties": {
276 "observations": {
277 "type": "array",
278 "description": "List of factual observations about what changed in this file",
279 "items": {"type": "string"}
280 }
281 },
282 "required": ["observations"]
283 }),
284 cache_control: None,
285 }];
286 cache_last_anthropic_tool(&mut tools, prompt_caching);
287
288 let request = AnthropicRequest {
289 model: model_name.to_string(),
290 max_tokens: 1500,
291 temperature: config.temperature,
292 system: anthropic_system_content(&parts.system, prompt_caching),
293 tools,
294 tool_choice: Some(AnthropicToolChoice {
295 choice_type: "tool".to_string(),
296 name: "create_file_observation".to_string(),
297 }),
298 messages: vec![AnthropicMessage {
299 role: "user".to_string(),
300 content: vec![anthropic_text_content(parts.user, false)],
301 }],
302 };
303
304 let mut request_builder = append_anthropic_cache_beta_header(
305 client
306 .post(anthropic_messages_url(&config.api_base_url))
307 .header("content-type", "application/json")
308 .header("anthropic-version", "2023-06-01"),
309 prompt_caching,
310 );
311
312 if let Some(api_key) = &config.api_key {
313 request_builder = request_builder.header("x-api-key", api_key);
314 }
315
316 let (status, response_text) =
317 crate::api::timed_send(request_builder.json(&request), "map-reduce/map", model_name).await?;
318
319 if status.is_server_error() {
320 eprintln!(
321 "{}",
322 crate::style::error(&format!("Server error {status}: {response_text}"))
323 );
324 return Ok((true, None)); }
326
327 if !status.is_success() {
328 return Err(CommitGenError::ApiError {
329 status: status.as_u16(),
330 body: response_text,
331 });
332 }
333
334 response_text
335 },
336 };
337
338 if response_text.trim().is_empty() {
339 crate::style::warn("Model returned empty response body for observation; retrying.");
340 return Ok((true, None));
341 }
342
343 match mode {
344 ResolvedApiMode::ChatCompletions => {
345 let api_response: ApiResponse = serde_json::from_str(&response_text).map_err(|e| {
346 CommitGenError::Other(format!(
347 "Failed to parse observation response JSON: {e}. Response body: {}",
348 response_snippet(&response_text, 500)
349 ))
350 })?;
351
352 if api_response.choices.is_empty() {
353 return Err(CommitGenError::Other(
354 "API returned empty response for file observation".to_string(),
355 ));
356 }
357
358 let message = &api_response.choices[0].message;
359
360 if !message.tool_calls.is_empty() {
361 let tool_call = &message.tool_calls[0];
362 if tool_call.function.name.ends_with("create_file_observation") {
363 let args = &tool_call.function.arguments;
364 if args.is_empty() {
365 return Err(CommitGenError::Other(
366 "Model returned empty function arguments for observation".to_string(),
367 ));
368 }
369
370 let obs: FileObservationResponse = serde_json::from_str(args).map_err(|e| {
371 CommitGenError::Other(format!("Failed to parse observation response: {e}"))
372 })?;
373
374 return Ok((
375 false,
376 Some(FileObservation {
377 file: filename.to_string(),
378 observations: obs.observations,
379 additions: 0, deletions: 0,
381 }),
382 ));
383 }
384 }
385
386 if let Some(content) = &message.content {
388 if content.trim().is_empty() {
389 crate::style::warn("Model returned empty content for observation; retrying.");
390 return Ok((true, None));
391 }
392 let obs: FileObservationResponse =
393 serde_json::from_str(content.trim()).map_err(|e| {
394 CommitGenError::Other(format!(
395 "Failed to parse observation content JSON: {e}. Content: {}",
396 response_snippet(content, 500)
397 ))
398 })?;
399 return Ok((
400 false,
401 Some(FileObservation {
402 file: filename.to_string(),
403 observations: obs.observations,
404 additions: 0,
405 deletions: 0,
406 }),
407 ));
408 }
409
410 Err(CommitGenError::Other("No observation found in API response".to_string()))
411 },
412 ResolvedApiMode::AnthropicMessages => {
413 let (tool_input, text_content, stop_reason) =
414 extract_anthropic_content(&response_text, "create_file_observation")?;
415
416 if let Some(input) = tool_input {
417 let mut observations = match input.get("observations") {
418 Some(serde_json::Value::Array(arr)) => arr
419 .iter()
420 .filter_map(|v| v.as_str().map(str::to_string))
421 .collect::<Vec<_>>(),
422 Some(serde_json::Value::String(s)) => parse_string_to_observations(s),
423 _ => Vec::new(),
424 };
425
426 if observations.is_empty() {
427 let text_observations = parse_observations_from_text(&text_content);
428 if !text_observations.is_empty() {
429 observations = text_observations;
430 } else if stop_reason.as_deref() == Some("max_tokens") {
431 crate::style::warn(
432 "Anthropic stopped at max_tokens with empty observations; using fallback \
433 observation.",
434 );
435 let fallback_target = Path::new(filename)
436 .file_name()
437 .and_then(|name| name.to_str())
438 .unwrap_or(filename);
439 observations = vec![format!("Updated {fallback_target}.")];
440 } else {
441 crate::style::warn(
442 "Model returned empty observation tool input; continuing with no \
443 observations.",
444 );
445 }
446 }
447
448 return Ok((
449 false,
450 Some(FileObservation {
451 file: filename.to_string(),
452 observations,
453 additions: 0,
454 deletions: 0,
455 }),
456 ));
457 }
458
459 if text_content.trim().is_empty() {
460 crate::style::warn("Model returned empty content for observation; retrying.");
461 return Ok((true, None));
462 }
463
464 let obs: FileObservationResponse =
465 serde_json::from_str(text_content.trim()).map_err(|e| {
466 CommitGenError::Other(format!(
467 "Failed to parse observation content JSON: {e}. Content: {}",
468 response_snippet(&text_content, 500)
469 ))
470 })?;
471 Ok((
472 false,
473 Some(FileObservation {
474 file: filename.to_string(),
475 observations: obs.observations,
476 additions: 0,
477 deletions: 0,
478 }),
479 ))
480 },
481 }
482 }).await
483}
484
485pub async fn reduce_phase(
487 observations: &[FileObservation],
488 stat: &str,
489 scope_candidates: &str,
490 model_name: &str,
491 config: &CommitConfig,
492) -> Result<ConventionalAnalysis> {
493 retry_api_call(config, async move || {
494 let client = crate::api::get_client(config);
495
496 let type_enum: Vec<&str> = config.types.keys().map(|s| s.as_str()).collect();
498
499 let tool = build_analysis_tool(&type_enum);
500
501 let observations_json =
502 serde_json::to_string_pretty(observations).unwrap_or_else(|_| "[]".to_string());
503
504 let types_description = crate::api::format_types_description(config);
505 let parts = templates::render_reduce_prompt(
506 "default",
507 &observations_json,
508 stat,
509 scope_candidates,
510 Some(&types_description),
511 )?;
512 let mode = config.resolved_api_mode(model_name);
513
514 let response_text = match mode {
515 ResolvedApiMode::ChatCompletions => {
516 let request = build_api_request(
517 config,
518 model_name,
519 config.temperature,
520 vec![tool],
521 &parts.system,
522 &parts.user,
523 "reduce",
524 "default",
525 );
526
527 let mut request_builder = client
528 .post(format!("{}/chat/completions", config.api_base_url))
529 .header("content-type", "application/json");
530
531 if let Some(api_key) = &config.api_key {
532 request_builder =
533 request_builder.header("Authorization", format!("Bearer {api_key}"));
534 }
535
536 let (status, response_text) =
537 crate::api::timed_send(request_builder.json(&request), "map-reduce/reduce", model_name).await?;
538
539 if status.is_server_error() {
540 eprintln!(
541 "{}",
542 crate::style::error(&format!("Server error {status}: {response_text}"))
543 );
544 return Ok((true, None)); }
546
547 if !status.is_success() {
548 return Err(CommitGenError::ApiError {
549 status: status.as_u16(),
550 body: response_text,
551 });
552 }
553
554 response_text
555 },
556 ResolvedApiMode::AnthropicMessages => {
557 let prompt_caching = anthropic_prompt_caching_enabled(config);
558 let mut tools = vec![AnthropicTool {
559 name: "create_conventional_analysis".to_string(),
560 description: "Analyze changes and classify as conventional commit with type, \
561 scope, details, and metadata"
562 .to_string(),
563 input_schema: serde_json::json!({
564 "type": "object",
565 "properties": {
566 "type": {
567 "type": "string",
568 "enum": type_enum,
569 "description": "Commit type based on change classification"
570 },
571 "scope": {
572 "type": "string",
573 "description": "Optional scope (module/component). Omit if unclear or multi-component."
574 },
575 "details": {
576 "type": "array",
577 "description": "Array of 0-6 detail items with changelog metadata.",
578 "items": {
579 "type": "object",
580 "properties": {
581 "text": {
582 "type": "string",
583 "description": "Detail about change, starting with past-tense verb, ending with period"
584 },
585 "changelog_category": {
586 "type": "string",
587 "enum": ["Added", "Changed", "Fixed", "Deprecated", "Removed", "Security"],
588 "description": "Changelog category if user-visible. Omit for internal changes."
589 },
590 "user_visible": {
591 "type": "boolean",
592 "description": "True if this change affects users/API and should appear in changelog"
593 }
594 },
595 "required": ["text", "user_visible"]
596 }
597 },
598 "issue_refs": {
599 "type": "array",
600 "description": "Issue numbers from context (e.g., ['#123', '#456']). Empty if none.",
601 "items": {
602 "type": "string"
603 }
604 }
605 },
606 "required": ["type", "details", "issue_refs"]
607 }),
608 cache_control: None,
609 }];
610 cache_last_anthropic_tool(&mut tools, prompt_caching);
611
612 let request = AnthropicRequest {
613 model: model_name.to_string(),
614 max_tokens: 1500,
615 temperature: config.temperature,
616 system: anthropic_system_content(&parts.system, prompt_caching),
617 tools,
618 tool_choice: Some(AnthropicToolChoice {
619 choice_type: "tool".to_string(),
620 name: "create_conventional_analysis".to_string(),
621 }),
622 messages: vec![AnthropicMessage {
623 role: "user".to_string(),
624 content: vec![anthropic_text_content(parts.user, false)],
625 }],
626 };
627
628 let mut request_builder = append_anthropic_cache_beta_header(
629 client
630 .post(anthropic_messages_url(&config.api_base_url))
631 .header("content-type", "application/json")
632 .header("anthropic-version", "2023-06-01"),
633 prompt_caching,
634 );
635
636 if let Some(api_key) = &config.api_key {
637 request_builder = request_builder.header("x-api-key", api_key);
638 }
639
640 let (status, response_text) =
641 crate::api::timed_send(request_builder.json(&request), "map-reduce/reduce", model_name).await?;
642
643 if status.is_server_error() {
644 eprintln!(
645 "{}",
646 crate::style::error(&format!("Server error {status}: {response_text}"))
647 );
648 return Ok((true, None));
649 }
650
651 if !status.is_success() {
652 return Err(CommitGenError::ApiError {
653 status: status.as_u16(),
654 body: response_text,
655 });
656 }
657
658 response_text
659 },
660 };
661
662 if response_text.trim().is_empty() {
663 crate::style::warn("Model returned empty response body for synthesis; retrying.");
664 return Ok((true, None));
665 }
666
667 match mode {
668 ResolvedApiMode::ChatCompletions => {
669 let api_response: ApiResponse = serde_json::from_str(&response_text).map_err(|e| {
670 CommitGenError::Other(format!(
671 "Failed to parse synthesis response JSON: {e}. Response body: {}",
672 response_snippet(&response_text, 500)
673 ))
674 })?;
675
676 if api_response.choices.is_empty() {
677 return Err(CommitGenError::Other(
678 "API returned empty response for synthesis".to_string(),
679 ));
680 }
681
682 let message = &api_response.choices[0].message;
683
684 if !message.tool_calls.is_empty() {
685 let tool_call = &message.tool_calls[0];
686 if tool_call
687 .function
688 .name
689 .ends_with("create_conventional_analysis")
690 {
691 let args = &tool_call.function.arguments;
692 if args.is_empty() {
693 return Err(CommitGenError::Other(
694 "Model returned empty function arguments for synthesis".to_string(),
695 ));
696 }
697
698 let analysis: ConventionalAnalysis = serde_json::from_str(args).map_err(|e| {
699 CommitGenError::Other(format!("Failed to parse synthesis response: {e}"))
700 })?;
701
702 return Ok((false, Some(analysis)));
703 }
704 }
705
706 if let Some(content) = &message.content {
708 if content.trim().is_empty() {
709 crate::style::warn("Model returned empty content for synthesis; retrying.");
710 return Ok((true, None));
711 }
712 let analysis: ConventionalAnalysis =
713 serde_json::from_str(content.trim()).map_err(|e| {
714 CommitGenError::Other(format!(
715 "Failed to parse synthesis content JSON: {e}. Content: {}",
716 response_snippet(content, 500)
717 ))
718 })?;
719 return Ok((false, Some(analysis)));
720 }
721
722 Err(CommitGenError::Other("No analysis found in synthesis response".to_string()))
723 },
724 ResolvedApiMode::AnthropicMessages => {
725 let (tool_input, text_content, stop_reason) =
726 extract_anthropic_content(&response_text, "create_conventional_analysis")?;
727
728 if let Some(input) = tool_input {
729 let analysis: ConventionalAnalysis = serde_json::from_value(input).map_err(|e| {
730 CommitGenError::Other(format!(
731 "Failed to parse synthesis tool input: {e}. Response body: {}",
732 response_snippet(&response_text, 500)
733 ))
734 })?;
735 return Ok((false, Some(analysis)));
736 }
737
738 if text_content.trim().is_empty() {
739 if stop_reason.as_deref() == Some("max_tokens") {
740 crate::style::warn(
741 "Anthropic stopped at max_tokens with empty synthesis; retrying.",
742 );
743 return Ok((true, None));
744 }
745 crate::style::warn("Model returned empty content for synthesis; retrying.");
746 return Ok((true, None));
747 }
748
749 let analysis: ConventionalAnalysis = serde_json::from_str(text_content.trim())
750 .map_err(|e| {
751 CommitGenError::Other(format!(
752 "Failed to parse synthesis content JSON: {e}. Content: {}",
753 response_snippet(&text_content, 500)
754 ))
755 })?;
756 Ok((false, Some(analysis)))
757 },
758 }
759 }).await
760}
761
762pub async fn run_map_reduce(
764 diff: &str,
765 stat: &str,
766 scope_candidates: &str,
767 model_name: &str,
768 config: &CommitConfig,
769 counter: &TokenCounter,
770) -> Result<ConventionalAnalysis> {
771 let mut files = parse_diff(diff);
772
773 files.retain(|f| {
775 !config
776 .excluded_files
777 .iter()
778 .any(|excluded| f.filename.ends_with(excluded))
779 });
780
781 if files.is_empty() {
782 return Err(CommitGenError::Other(
783 "No relevant files to analyze after filtering".to_string(),
784 ));
785 }
786
787 let file_count = files.len();
788 crate::style::print_info(&format!("Running map-reduce on {file_count} files..."));
789
790 let observations = map_phase(&files, model_name, config, counter).await?;
792
793 reduce_phase(&observations, stat, scope_candidates, model_name, config).await
795}
796
797fn response_snippet(body: &str, limit: usize) -> String {
802 if body.is_empty() {
803 return "<empty response body>".to_string();
804 }
805 let mut snippet = body.trim().to_string();
806 if snippet.len() > limit {
807 snippet.truncate(limit);
808 snippet.push_str("...");
809 }
810 snippet
811}
812
813fn parse_observations_from_text(text: &str) -> Vec<String> {
814 let trimmed = text.trim();
815 if trimmed.is_empty() {
816 return Vec::new();
817 }
818
819 if let Ok(obs) = serde_json::from_str::<FileObservationResponse>(trimmed) {
820 return obs.observations;
821 }
822
823 trimmed
824 .lines()
825 .map(str::trim)
826 .filter(|line| !line.is_empty())
827 .map(|line| {
828 line
829 .strip_prefix("- ")
830 .or_else(|| line.strip_prefix("* "))
831 .unwrap_or(line)
832 .trim()
833 })
834 .filter(|line| !line.is_empty())
835 .map(str::to_string)
836 .collect()
837}
838
839fn anthropic_messages_url(base_url: &str) -> String {
840 let trimmed = base_url.trim_end_matches('/');
841 if trimmed.ends_with("/v1") {
842 format!("{trimmed}/messages")
843 } else {
844 format!("{trimmed}/v1/messages")
845 }
846}
847
848fn prompt_cache_control() -> PromptCacheControl {
849 PromptCacheControl { control_type: "ephemeral".to_string() }
850}
851
852fn anthropic_prompt_caching_enabled(config: &CommitConfig) -> bool {
853 config.api_base_url.to_lowercase().contains("anthropic.com")
854}
855
856fn append_anthropic_cache_beta_header(
857 request_builder: reqwest::RequestBuilder,
858 enable_cache: bool,
859) -> reqwest::RequestBuilder {
860 if enable_cache {
861 request_builder.header("anthropic-beta", "prompt-caching-2024-07-31")
862 } else {
863 request_builder
864 }
865}
866
867fn anthropic_text_content(text: String, cache: bool) -> AnthropicContent {
868 AnthropicContent {
869 content_type: "text".to_string(),
870 text,
871 cache_control: cache.then(prompt_cache_control),
872 }
873}
874
875fn anthropic_system_content(system_prompt: &str, cache: bool) -> Option<Vec<AnthropicContent>> {
876 if system_prompt.trim().is_empty() {
877 None
878 } else {
879 Some(vec![anthropic_text_content(system_prompt.to_string(), cache)])
880 }
881}
882
883fn cache_last_anthropic_tool(tools: &mut [AnthropicTool], cache: bool) {
884 if cache && let Some(last) = tools.last_mut() {
885 last.cache_control = Some(prompt_cache_control());
886 }
887}
888
889fn extract_anthropic_content(
890 response_text: &str,
891 tool_name: &str,
892) -> Result<(Option<serde_json::Value>, String, Option<String>)> {
893 let value: serde_json::Value = serde_json::from_str(response_text).map_err(|e| {
894 CommitGenError::Other(format!(
895 "Failed to parse Anthropic response JSON: {e}. Response body: {}",
896 response_snippet(response_text, 500)
897 ))
898 })?;
899
900 let stop_reason = value
901 .get("stop_reason")
902 .and_then(|v| v.as_str())
903 .map(str::to_string);
904
905 let mut tool_input: Option<serde_json::Value> = None;
906 let mut text_parts = Vec::new();
907
908 if let Some(content) = value.get("content").and_then(|v| v.as_array()) {
909 for item in content {
910 let item_type = item.get("type").and_then(|v| v.as_str()).unwrap_or("");
911 match item_type {
912 "tool_use" => {
913 let name = item.get("name").and_then(|v| v.as_str()).unwrap_or("");
914 if name == tool_name
915 && let Some(input) = item.get("input")
916 {
917 tool_input = Some(input.clone());
918 }
919 },
920 "text" => {
921 if let Some(text) = item.get("text").and_then(|v| v.as_str()) {
922 text_parts.push(text.to_string());
923 }
924 },
925 _ => {},
926 }
927 }
928 }
929
930 Ok((tool_input, text_parts.join("\n"), stop_reason))
931}
932
933#[derive(Debug, Serialize)]
934struct Message {
935 role: String,
936 content: String,
937}
938
939#[derive(Debug, Serialize, Deserialize)]
940struct FunctionParameters {
941 #[serde(rename = "type")]
942 param_type: String,
943 properties: serde_json::Value,
944 required: Vec<String>,
945}
946
947#[derive(Debug, Serialize, Deserialize)]
948struct Function {
949 name: String,
950 description: String,
951 parameters: FunctionParameters,
952}
953
954#[derive(Debug, Serialize, Deserialize)]
955struct Tool {
956 #[serde(rename = "type")]
957 tool_type: String,
958 function: Function,
959}
960
961#[derive(Debug, Serialize)]
962struct ApiRequest {
963 model: String,
964 max_tokens: u32,
965 temperature: f32,
966 tools: Vec<Tool>,
967 #[serde(skip_serializing_if = "Option::is_none")]
968 tool_choice: Option<serde_json::Value>,
969 #[serde(skip_serializing_if = "Option::is_none")]
970 prompt_cache_key: Option<String>,
971 messages: Vec<Message>,
972}
973
974#[derive(Debug, Serialize)]
975struct AnthropicRequest {
976 model: String,
977 max_tokens: u32,
978 temperature: f32,
979 #[serde(skip_serializing_if = "Option::is_none")]
980 system: Option<Vec<AnthropicContent>>,
981 tools: Vec<AnthropicTool>,
982 #[serde(skip_serializing_if = "Option::is_none")]
983 tool_choice: Option<AnthropicToolChoice>,
984 messages: Vec<AnthropicMessage>,
985}
986
987#[derive(Debug, Clone, Serialize)]
988struct PromptCacheControl {
989 #[serde(rename = "type")]
990 control_type: String,
991}
992
993#[derive(Debug, Serialize)]
994struct AnthropicTool {
995 name: String,
996 description: String,
997 input_schema: serde_json::Value,
998 #[serde(skip_serializing_if = "Option::is_none")]
999 cache_control: Option<PromptCacheControl>,
1000}
1001
1002#[derive(Debug, Serialize)]
1003struct AnthropicToolChoice {
1004 #[serde(rename = "type")]
1005 choice_type: String,
1006 name: String,
1007}
1008
1009#[derive(Debug, Serialize)]
1010struct AnthropicMessage {
1011 role: String,
1012 content: Vec<AnthropicContent>,
1013}
1014
1015#[derive(Debug, Clone, Serialize)]
1016struct AnthropicContent {
1017 #[serde(rename = "type")]
1018 content_type: String,
1019 text: String,
1020 #[serde(skip_serializing_if = "Option::is_none")]
1021 cache_control: Option<PromptCacheControl>,
1022}
1023
1024#[derive(Debug, Deserialize)]
1025struct ToolCall {
1026 function: FunctionCall,
1027}
1028
1029#[derive(Debug, Deserialize)]
1030struct FunctionCall {
1031 name: String,
1032 arguments: String,
1033}
1034
1035#[derive(Debug, Deserialize)]
1036struct Choice {
1037 message: ResponseMessage,
1038}
1039
1040#[derive(Debug, Deserialize)]
1041struct ResponseMessage {
1042 #[serde(default)]
1043 tool_calls: Vec<ToolCall>,
1044 #[serde(default)]
1045 content: Option<String>,
1046}
1047
1048#[derive(Debug, Deserialize)]
1049struct ApiResponse {
1050 choices: Vec<Choice>,
1051}
1052
1053#[derive(Debug, Deserialize)]
1054struct FileObservationResponse {
1055 #[serde(deserialize_with = "deserialize_observations")]
1056 observations: Vec<String>,
1057}
1058
1059fn deserialize_observations<'de, D>(deserializer: D) -> std::result::Result<Vec<String>, D::Error>
1062where
1063 D: serde::Deserializer<'de>,
1064{
1065 use std::fmt;
1066
1067 use serde::de::{self, Visitor};
1068
1069 struct ObservationsVisitor;
1070
1071 impl<'de> Visitor<'de> for ObservationsVisitor {
1072 type Value = Vec<String>;
1073
1074 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
1075 formatter.write_str("an array of strings, a JSON array string, or a bullet-point string")
1076 }
1077
1078 fn visit_seq<A>(self, mut seq: A) -> std::result::Result<Self::Value, A::Error>
1079 where
1080 A: de::SeqAccess<'de>,
1081 {
1082 let mut vec = Vec::new();
1083 while let Some(item) = seq.next_element::<String>()? {
1084 vec.push(item);
1085 }
1086 Ok(vec)
1087 }
1088
1089 fn visit_str<E>(self, s: &str) -> std::result::Result<Self::Value, E>
1090 where
1091 E: de::Error,
1092 {
1093 Ok(parse_string_to_observations(s))
1094 }
1095 }
1096
1097 deserializer.deserialize_any(ObservationsVisitor)
1098}
1099
1100fn parse_string_to_observations(s: &str) -> Vec<String> {
1103 let trimmed = s.trim();
1104 if trimmed.is_empty() {
1105 return Vec::new();
1106 }
1107
1108 if trimmed.starts_with('[')
1110 && let Ok(arr) = serde_json::from_str::<Vec<String>>(trimmed)
1111 {
1112 return arr;
1113 }
1114
1115 trimmed
1117 .lines()
1118 .map(str::trim)
1119 .filter(|line| !line.is_empty())
1120 .map(|line| {
1121 line
1122 .strip_prefix("- ")
1123 .or_else(|| line.strip_prefix("* "))
1124 .or_else(|| line.strip_prefix("• "))
1125 .unwrap_or(line)
1126 .trim()
1127 .to_string()
1128 })
1129 .filter(|line| !line.is_empty())
1130 .collect()
1131}
1132
1133fn build_observation_tool() -> Tool {
1134 Tool {
1135 tool_type: "function".to_string(),
1136 function: Function {
1137 name: "create_file_observation".to_string(),
1138 description: "Extract observations from a single file's changes".to_string(),
1139 parameters: FunctionParameters {
1140 param_type: "object".to_string(),
1141 properties: serde_json::json!({
1142 "observations": {
1143 "type": "array",
1144 "description": "List of factual observations about what changed in this file",
1145 "items": {
1146 "type": "string"
1147 }
1148 }
1149 }),
1150 required: vec!["observations".to_string()],
1151 },
1152 },
1153 }
1154}
1155
1156fn build_analysis_tool(type_enum: &[&str]) -> Tool {
1157 Tool {
1158 tool_type: "function".to_string(),
1159 function: Function {
1160 name: "create_conventional_analysis".to_string(),
1161 description: "Synthesize observations into conventional commit analysis".to_string(),
1162 parameters: FunctionParameters {
1163 param_type: "object".to_string(),
1164 properties: serde_json::json!({
1165 "type": {
1166 "type": "string",
1167 "enum": type_enum,
1168 "description": "Commit type based on combined changes"
1169 },
1170 "scope": {
1171 "type": "string",
1172 "description": "Optional scope (module/component). Omit if unclear or multi-component."
1173 },
1174 "details": {
1175 "type": "array",
1176 "description": "Array of 0-6 detail items with changelog metadata.",
1177 "items": {
1178 "type": "object",
1179 "properties": {
1180 "text": {
1181 "type": "string",
1182 "description": "Detail about change, starting with past-tense verb, ending with period"
1183 },
1184 "changelog_category": {
1185 "type": "string",
1186 "enum": ["Added", "Changed", "Fixed", "Deprecated", "Removed", "Security"],
1187 "description": "Changelog category if user-visible. Omit for internal changes."
1188 },
1189 "user_visible": {
1190 "type": "boolean",
1191 "description": "True if this change affects users/API and should appear in changelog"
1192 }
1193 },
1194 "required": ["text", "user_visible"]
1195 }
1196 },
1197 "issue_refs": {
1198 "type": "array",
1199 "description": "Issue numbers from context (e.g., ['#123', '#456']). Empty if none.",
1200 "items": {
1201 "type": "string"
1202 }
1203 }
1204 }),
1205 required: vec!["type".to_string(), "details".to_string(), "issue_refs".to_string()],
1206 },
1207 },
1208 }
1209}
1210
1211#[allow(
1212 clippy::too_many_arguments,
1213 reason = "needs model/tool/prompt metadata to build deterministic cacheable request"
1214)]
1215fn build_api_request(
1216 config: &CommitConfig,
1217 model: &str,
1218 temperature: f32,
1219 tools: Vec<Tool>,
1220 system: &str,
1221 user: &str,
1222 prompt_family: &str,
1223 prompt_variant: &str,
1224) -> ApiRequest {
1225 let tool_name = tools.first().map(|t| t.function.name.clone());
1226
1227 let mut messages = Vec::new();
1228 if !system.is_empty() {
1229 messages.push(Message { role: "system".to_string(), content: system.to_string() });
1230 }
1231 messages.push(Message { role: "user".to_string(), content: user.to_string() });
1232
1233 let prompt_cache_key =
1234 crate::api::openai_prompt_cache_key(config, model, prompt_family, prompt_variant, system);
1235
1236 ApiRequest {
1237 model: model.to_string(),
1238 max_tokens: 1500,
1239 temperature,
1240 tool_choice: tool_name
1241 .map(|name| serde_json::json!({ "type": "function", "function": { "name": name } })),
1242 prompt_cache_key,
1243 tools,
1244 messages,
1245 }
1246}
1247
1248#[cfg(test)]
1249mod tests {
1250 use super::*;
1251 use crate::tokens::TokenCounter;
1252
1253 fn test_counter() -> TokenCounter {
1254 TokenCounter::new("http://localhost:4000", None, "claude-sonnet-4.5")
1255 }
1256
1257 #[test]
1258 fn test_should_use_map_reduce_disabled() {
1259 let config = CommitConfig { map_reduce_enabled: false, ..Default::default() };
1260 let counter = test_counter();
1261 let diff = r"diff --git a/a.rs b/a.rs
1263@@ -0,0 +1 @@
1264+a
1265diff --git a/b.rs b/b.rs
1266@@ -0,0 +1 @@
1267+b
1268diff --git a/c.rs b/c.rs
1269@@ -0,0 +1 @@
1270+c
1271diff --git a/d.rs b/d.rs
1272@@ -0,0 +1 @@
1273+d";
1274 assert!(!should_use_map_reduce(diff, &config, &counter));
1275 }
1276
1277 #[test]
1278 fn test_should_use_map_reduce_few_files() {
1279 let config = CommitConfig::default();
1280 let counter = test_counter();
1281 let diff = r"diff --git a/a.rs b/a.rs
1283@@ -0,0 +1 @@
1284+a
1285diff --git a/b.rs b/b.rs
1286@@ -0,0 +1 @@
1287+b";
1288 assert!(!should_use_map_reduce(diff, &config, &counter));
1289 }
1290
1291 #[test]
1292 fn test_should_use_map_reduce_many_files() {
1293 let config = CommitConfig::default();
1294 let counter = test_counter();
1295 let diff = r"diff --git a/a.rs b/a.rs
1297@@ -0,0 +1 @@
1298+a
1299diff --git a/b.rs b/b.rs
1300@@ -0,0 +1 @@
1301+b
1302diff --git a/c.rs b/c.rs
1303@@ -0,0 +1 @@
1304+c
1305diff --git a/d.rs d/d.rs
1306@@ -0,0 +1 @@
1307+d
1308diff --git a/e.rs b/e.rs
1309@@ -0,0 +1 @@
1310+e";
1311 assert!(should_use_map_reduce(diff, &config, &counter));
1312 }
1313
1314 #[test]
1315 fn test_generate_context_header_empty() {
1316 let files = vec![FileDiff {
1317 filename: "only.rs".to_string(),
1318 header: String::new(),
1319 content: String::new(),
1320 additions: 10,
1321 deletions: 5,
1322 is_binary: false,
1323 }];
1324 let header = generate_context_header(&files, "only.rs");
1325 assert!(header.is_empty());
1326 }
1327
1328 #[test]
1329 fn test_generate_context_header_multiple() {
1330 let files = vec![
1331 FileDiff {
1332 filename: "src/main.rs".to_string(),
1333 header: String::new(),
1334 content: "fn main() {}".to_string(),
1335 additions: 10,
1336 deletions: 5,
1337 is_binary: false,
1338 },
1339 FileDiff {
1340 filename: "src/lib.rs".to_string(),
1341 header: String::new(),
1342 content: "mod test;".to_string(),
1343 additions: 3,
1344 deletions: 1,
1345 is_binary: false,
1346 },
1347 FileDiff {
1348 filename: "tests/test.rs".to_string(),
1349 header: String::new(),
1350 content: "#[test]".to_string(),
1351 additions: 20,
1352 deletions: 0,
1353 is_binary: false,
1354 },
1355 ];
1356
1357 let header = generate_context_header(&files, "src/main.rs");
1358 assert!(header.contains("OTHER FILES IN THIS CHANGE:"));
1359 assert!(header.contains("src/lib.rs"));
1360 assert!(header.contains("tests/test.rs"));
1361 assert!(!header.contains("src/main.rs")); }
1363
1364 #[test]
1365 fn test_infer_file_description() {
1366 assert_eq!(infer_file_description("src/test_utils.rs", ""), "test file");
1367 assert_eq!(infer_file_description("README.md", ""), "documentation");
1368 assert_eq!(infer_file_description("config.toml", ""), "configuration");
1369 assert_eq!(infer_file_description("src/error.rs", ""), "error definitions");
1370 assert_eq!(infer_file_description("src/types.rs", ""), "type definitions");
1371 assert_eq!(infer_file_description("src/mod.rs", ""), "module exports");
1372 assert_eq!(infer_file_description("src/main.rs", ""), "entry point");
1373 assert_eq!(infer_file_description("src/api.rs", "fn call()"), "implementation");
1374 assert_eq!(infer_file_description("src/models.rs", "struct Foo"), "type definitions");
1375 assert_eq!(infer_file_description("src/unknown.xyz", ""), "source code");
1376 }
1377
1378 #[test]
1379 fn test_parse_string_to_observations_json_array() {
1380 let input = r#"["item one", "item two", "item three"]"#;
1381 let result = parse_string_to_observations(input);
1382 assert_eq!(result, vec!["item one", "item two", "item three"]);
1383 }
1384
1385 #[test]
1386 fn test_parse_string_to_observations_bullet_points() {
1387 let input = "- added new function\n- fixed bug in parser\n- updated tests";
1388 let result = parse_string_to_observations(input);
1389 assert_eq!(result, vec!["added new function", "fixed bug in parser", "updated tests"]);
1390 }
1391
1392 #[test]
1393 fn test_parse_string_to_observations_asterisk_bullets() {
1394 let input = "* first change\n* second change";
1395 let result = parse_string_to_observations(input);
1396 assert_eq!(result, vec!["first change", "second change"]);
1397 }
1398
1399 #[test]
1400 fn test_parse_string_to_observations_empty() {
1401 assert!(parse_string_to_observations("").is_empty());
1402 assert!(parse_string_to_observations(" ").is_empty());
1403 }
1404
1405 #[test]
1406 fn test_deserialize_observations_array() {
1407 let json = r#"{"observations": ["a", "b", "c"]}"#;
1408 let result: FileObservationResponse = serde_json::from_str(json).unwrap();
1409 assert_eq!(result.observations, vec!["a", "b", "c"]);
1410 }
1411
1412 #[test]
1413 fn test_deserialize_observations_stringified_array() {
1414 let json = r#"{"observations": "[\"a\", \"b\", \"c\"]"}"#;
1415 let result: FileObservationResponse = serde_json::from_str(json).unwrap();
1416 assert_eq!(result.observations, vec!["a", "b", "c"]);
1417 }
1418
1419 #[test]
1420 fn test_deserialize_observations_bullet_string() {
1421 let json = r#"{"observations": "- updated function\n- fixed bug"}"#;
1422 let result: FileObservationResponse = serde_json::from_str(json).unwrap();
1423 assert_eq!(result.observations, vec!["updated function", "fixed bug"]);
1424 }
1425}