1use std::path::Path;
7
8use futures::stream::{self, StreamExt};
9use serde::{Deserialize, Serialize};
10
11use crate::{
12 api::retry_api_call,
13 config::{CommitConfig, ResolvedApiMode},
14 diff::{FileDiff, parse_diff, reconstruct_diff},
15 error::{CommitGenError, Result},
16 templates,
17 tokens::TokenCounter,
18 types::ConventionalAnalysis,
19};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct FileObservation {
24 pub file: String,
25 pub observations: Vec<String>,
26 pub additions: usize,
27 pub deletions: usize,
28}
29
30const MIN_FILES_FOR_MAP_REDUCE: usize = 4;
32
33const MAX_FILE_TOKENS: usize = 50_000;
36
37pub fn should_use_map_reduce(diff: &str, config: &CommitConfig, counter: &TokenCounter) -> bool {
42 if !config.map_reduce_enabled {
43 return false;
44 }
45
46 let files = parse_diff(diff);
47 let file_count = files
48 .iter()
49 .filter(|f| {
50 !config
51 .excluded_files
52 .iter()
53 .any(|ex| f.filename.ends_with(ex))
54 })
55 .count();
56
57 file_count >= MIN_FILES_FOR_MAP_REDUCE
59 || files
60 .iter()
61 .any(|f| f.token_estimate(counter) > MAX_FILE_TOKENS)
62}
63
64const MAX_CONTEXT_FILES: usize = 20;
66
67fn generate_context_header(files: &[FileDiff], current_file: &str) -> String {
69 if files.len() > 100 {
71 return format!("(Large commit with {} total files)", files.len());
72 }
73
74 let mut lines = vec!["OTHER FILES IN THIS CHANGE:".to_string()];
75
76 let other_files: Vec<_> = files
77 .iter()
78 .filter(|f| f.filename != current_file)
79 .collect();
80
81 let total_other = other_files.len();
82
83 let to_show: Vec<&FileDiff> = if total_other > MAX_CONTEXT_FILES {
85 let mut sorted = other_files;
86 sorted.sort_by_key(|f| std::cmp::Reverse(f.additions + f.deletions));
87 sorted.truncate(MAX_CONTEXT_FILES);
88 sorted
89 } else {
90 other_files
91 };
92
93 for file in &to_show {
94 let line_count = file.additions + file.deletions;
95 let description = infer_file_description(&file.filename, &file.content);
96 lines.push(format!("- {} ({} lines): {}", file.filename, line_count, description));
97 }
98
99 if to_show.len() < total_other {
100 lines.push(format!("... and {} more files", total_other - to_show.len()));
101 }
102
103 if lines.len() == 1 {
104 return String::new(); }
106
107 lines.join("\n")
108}
109
110fn infer_file_description(filename: &str, content: &str) -> &'static str {
113 let filename_lower = filename.to_lowercase();
114
115 if filename_lower.contains("test") {
117 return "test file";
118 }
119 if Path::new(filename)
120 .extension()
121 .is_some_and(|e| e.eq_ignore_ascii_case("md"))
122 {
123 return "documentation";
124 }
125 let ext = Path::new(filename).extension();
126 if filename_lower.contains("config")
127 || ext.is_some_and(|e| e.eq_ignore_ascii_case("toml"))
128 || ext.is_some_and(|e| e.eq_ignore_ascii_case("yaml"))
129 || ext.is_some_and(|e| e.eq_ignore_ascii_case("yml"))
130 {
131 return "configuration";
132 }
133 if filename_lower.contains("error") {
134 return "error definitions";
135 }
136 if filename_lower.contains("type") {
137 return "type definitions";
138 }
139 if filename_lower.ends_with("mod.rs") || filename_lower.ends_with("lib.rs") {
140 return "module exports";
141 }
142 if filename_lower.ends_with("main.rs")
143 || filename_lower.ends_with("main.go")
144 || filename_lower.ends_with("main.py")
145 {
146 return "entry point";
147 }
148
149 if content.contains("impl ") || content.contains("fn ") {
151 return "implementation";
152 }
153 if content.contains("struct ") || content.contains("enum ") {
154 return "type definitions";
155 }
156 if content.contains("async ") || content.contains("await") {
157 return "async code";
158 }
159
160 "source code"
161}
162
163async fn map_phase(
165 files: &[FileDiff],
166 model_name: &str,
167 config: &CommitConfig,
168 counter: &TokenCounter,
169) -> Result<Vec<FileObservation>> {
170 let observations: Vec<Result<FileObservation>> = stream::iter(files.iter())
172 .map(|file| async {
173 if file.is_binary {
174 return Ok(FileObservation {
175 file: file.filename.clone(),
176 observations: vec!["Binary file changed.".to_string()],
177 additions: 0,
178 deletions: 0,
179 });
180 }
181
182 let context_header = generate_context_header(files, &file.filename);
183 let mut file_clone = file.clone();
185 let file_tokens = file_clone.token_estimate(counter);
186 if file_tokens > MAX_FILE_TOKENS {
187 let target_size = MAX_FILE_TOKENS * 4; file_clone.truncate(target_size);
189 eprintln!(
190 " {} truncated {} ({} \u{2192} {} tokens)",
191 crate::style::icons::WARNING,
192 file.filename,
193 file_tokens,
194 file_clone.token_estimate(counter)
195 );
196 }
197
198 let file_diff = reconstruct_diff(&[file_clone]);
199
200 map_single_file(&file.filename, &file_diff, &context_header, model_name, config).await
201 })
202 .buffer_unordered(8)
203 .collect()
204 .await;
205
206 observations.into_iter().collect()
208}
209
210async fn map_single_file(
212 filename: &str,
213 file_diff: &str,
214 context_header: &str,
215 model_name: &str,
216 config: &CommitConfig,
217) -> Result<FileObservation> {
218 retry_api_call(config, async move || {
219 let client = crate::api::get_client(config);
220
221 let tool = build_observation_tool();
222
223 let parts = templates::render_map_prompt("default", filename, file_diff, context_header)?;
224 let mode = config.resolved_api_mode(model_name);
225
226 let response_text = match mode {
227 ResolvedApiMode::ChatCompletions => {
228 let request = build_api_request(
229 model_name,
230 config.temperature,
231 vec![tool],
232 &parts.system,
233 &parts.user,
234 );
235
236 let mut request_builder = client
237 .post(format!("{}/chat/completions", config.api_base_url))
238 .header("content-type", "application/json");
239
240 if let Some(api_key) = &config.api_key {
241 request_builder =
242 request_builder.header("Authorization", format!("Bearer {api_key}"));
243 }
244
245 let (status, response_text) =
246 crate::api::timed_send(request_builder.json(&request), "map-reduce/map", model_name).await?;
247
248 if status.is_server_error() {
249 eprintln!(
250 "{}",
251 crate::style::error(&format!("Server error {status}: {response_text}"))
252 );
253 return Ok((true, None)); }
255
256 if !status.is_success() {
257 return Err(CommitGenError::ApiError {
258 status: status.as_u16(),
259 body: response_text,
260 });
261 }
262
263 response_text
264 },
265 ResolvedApiMode::AnthropicMessages => {
266 let request = AnthropicRequest {
267 model: model_name.to_string(),
268 max_tokens: 1500,
269 temperature: config.temperature,
270 system: if parts.system.is_empty() {
271 None
272 } else {
273 Some(parts.system.clone())
274 },
275 tools: vec![AnthropicTool {
276 name: "create_file_observation".to_string(),
277 description: "Extract observations from a single file's changes".to_string(),
278 input_schema: serde_json::json!({
279 "type": "object",
280 "properties": {
281 "observations": {
282 "type": "array",
283 "description": "List of factual observations about what changed in this file",
284 "items": {"type": "string"}
285 }
286 },
287 "required": ["observations"]
288 }),
289 }],
290 tool_choice: Some(AnthropicToolChoice {
291 choice_type: "tool".to_string(),
292 name: "create_file_observation".to_string(),
293 }),
294 messages: vec![AnthropicMessage {
295 role: "user".to_string(),
296 content: vec![AnthropicContent {
297 content_type: "text".to_string(),
298 text: parts.user,
299 }],
300 }],
301 };
302
303 let mut request_builder = client
304 .post(anthropic_messages_url(&config.api_base_url))
305 .header("content-type", "application/json")
306 .header("anthropic-version", "2023-06-01");
307
308 if let Some(api_key) = &config.api_key {
309 request_builder = request_builder.header("x-api-key", api_key);
310 }
311
312 let (status, response_text) =
313 crate::api::timed_send(request_builder.json(&request), "map-reduce/map", model_name).await?;
314
315 if status.is_server_error() {
316 eprintln!(
317 "{}",
318 crate::style::error(&format!("Server error {status}: {response_text}"))
319 );
320 return Ok((true, None)); }
322
323 if !status.is_success() {
324 return Err(CommitGenError::ApiError {
325 status: status.as_u16(),
326 body: response_text,
327 });
328 }
329
330 response_text
331 },
332 };
333
334 if response_text.trim().is_empty() {
335 crate::style::warn("Model returned empty response body for observation; retrying.");
336 return Ok((true, None));
337 }
338
339 match mode {
340 ResolvedApiMode::ChatCompletions => {
341 let api_response: ApiResponse = serde_json::from_str(&response_text).map_err(|e| {
342 CommitGenError::Other(format!(
343 "Failed to parse observation response JSON: {e}. Response body: {}",
344 response_snippet(&response_text, 500)
345 ))
346 })?;
347
348 if api_response.choices.is_empty() {
349 return Err(CommitGenError::Other(
350 "API returned empty response for file observation".to_string(),
351 ));
352 }
353
354 let message = &api_response.choices[0].message;
355
356 if !message.tool_calls.is_empty() {
357 let tool_call = &message.tool_calls[0];
358 if tool_call.function.name.ends_with("create_file_observation") {
359 let args = &tool_call.function.arguments;
360 if args.is_empty() {
361 return Err(CommitGenError::Other(
362 "Model returned empty function arguments for observation".to_string(),
363 ));
364 }
365
366 let obs: FileObservationResponse = serde_json::from_str(args).map_err(|e| {
367 CommitGenError::Other(format!("Failed to parse observation response: {e}"))
368 })?;
369
370 return Ok((
371 false,
372 Some(FileObservation {
373 file: filename.to_string(),
374 observations: obs.observations,
375 additions: 0, deletions: 0,
377 }),
378 ));
379 }
380 }
381
382 if let Some(content) = &message.content {
384 if content.trim().is_empty() {
385 crate::style::warn("Model returned empty content for observation; retrying.");
386 return Ok((true, None));
387 }
388 let obs: FileObservationResponse =
389 serde_json::from_str(content.trim()).map_err(|e| {
390 CommitGenError::Other(format!(
391 "Failed to parse observation content JSON: {e}. Content: {}",
392 response_snippet(content, 500)
393 ))
394 })?;
395 return Ok((
396 false,
397 Some(FileObservation {
398 file: filename.to_string(),
399 observations: obs.observations,
400 additions: 0,
401 deletions: 0,
402 }),
403 ));
404 }
405
406 Err(CommitGenError::Other("No observation found in API response".to_string()))
407 },
408 ResolvedApiMode::AnthropicMessages => {
409 let (tool_input, text_content, stop_reason) =
410 extract_anthropic_content(&response_text, "create_file_observation")?;
411
412 if let Some(input) = tool_input {
413 let mut observations = match input.get("observations") {
414 Some(serde_json::Value::Array(arr)) => arr
415 .iter()
416 .filter_map(|v| v.as_str().map(str::to_string))
417 .collect::<Vec<_>>(),
418 Some(serde_json::Value::String(s)) => parse_string_to_observations(s),
419 _ => Vec::new(),
420 };
421
422 if observations.is_empty() {
423 let text_observations = parse_observations_from_text(&text_content);
424 if !text_observations.is_empty() {
425 observations = text_observations;
426 } else if stop_reason.as_deref() == Some("max_tokens") {
427 crate::style::warn(
428 "Anthropic stopped at max_tokens with empty observations; using fallback \
429 observation.",
430 );
431 let fallback_target = Path::new(filename)
432 .file_name()
433 .and_then(|name| name.to_str())
434 .unwrap_or(filename);
435 observations = vec![format!("Updated {fallback_target}.")];
436 } else {
437 crate::style::warn(
438 "Model returned empty observation tool input; continuing with no \
439 observations.",
440 );
441 }
442 }
443
444 return Ok((
445 false,
446 Some(FileObservation {
447 file: filename.to_string(),
448 observations,
449 additions: 0,
450 deletions: 0,
451 }),
452 ));
453 }
454
455 if text_content.trim().is_empty() {
456 crate::style::warn("Model returned empty content for observation; retrying.");
457 return Ok((true, None));
458 }
459
460 let obs: FileObservationResponse =
461 serde_json::from_str(text_content.trim()).map_err(|e| {
462 CommitGenError::Other(format!(
463 "Failed to parse observation content JSON: {e}. Content: {}",
464 response_snippet(&text_content, 500)
465 ))
466 })?;
467 Ok((
468 false,
469 Some(FileObservation {
470 file: filename.to_string(),
471 observations: obs.observations,
472 additions: 0,
473 deletions: 0,
474 }),
475 ))
476 },
477 }
478 }).await
479}
480
481pub async fn reduce_phase(
483 observations: &[FileObservation],
484 stat: &str,
485 scope_candidates: &str,
486 model_name: &str,
487 config: &CommitConfig,
488) -> Result<ConventionalAnalysis> {
489 retry_api_call(config, async move || {
490 let client = crate::api::get_client(config);
491
492 let type_enum: Vec<&str> = config.types.keys().map(|s| s.as_str()).collect();
494
495 let tool = build_analysis_tool(&type_enum);
496
497 let observations_json =
498 serde_json::to_string_pretty(observations).unwrap_or_else(|_| "[]".to_string());
499
500 let types_description = crate::api::format_types_description(config);
501 let parts = templates::render_reduce_prompt(
502 "default",
503 &observations_json,
504 stat,
505 scope_candidates,
506 Some(&types_description),
507 )?;
508 let mode = config.resolved_api_mode(model_name);
509
510 let response_text = match mode {
511 ResolvedApiMode::ChatCompletions => {
512 let request = build_api_request(
513 model_name,
514 config.temperature,
515 vec![tool],
516 &parts.system,
517 &parts.user,
518 );
519
520 let mut request_builder = client
521 .post(format!("{}/chat/completions", config.api_base_url))
522 .header("content-type", "application/json");
523
524 if let Some(api_key) = &config.api_key {
525 request_builder =
526 request_builder.header("Authorization", format!("Bearer {api_key}"));
527 }
528
529 let (status, response_text) =
530 crate::api::timed_send(request_builder.json(&request), "map-reduce/reduce", model_name).await?;
531
532 if status.is_server_error() {
533 eprintln!(
534 "{}",
535 crate::style::error(&format!("Server error {status}: {response_text}"))
536 );
537 return Ok((true, None)); }
539
540 if !status.is_success() {
541 return Err(CommitGenError::ApiError {
542 status: status.as_u16(),
543 body: response_text,
544 });
545 }
546
547 response_text
548 },
549 ResolvedApiMode::AnthropicMessages => {
550 let request = AnthropicRequest {
551 model: model_name.to_string(),
552 max_tokens: 1500,
553 temperature: config.temperature,
554 system: if parts.system.is_empty() {
555 None
556 } else {
557 Some(parts.system.clone())
558 },
559 tools: vec![AnthropicTool {
560 name: "create_conventional_analysis".to_string(),
561 description: "Analyze changes and classify as conventional commit with type, \
562 scope, details, and metadata"
563 .to_string(),
564 input_schema: serde_json::json!({
565 "type": "object",
566 "properties": {
567 "type": {
568 "type": "string",
569 "enum": type_enum,
570 "description": "Commit type based on change classification"
571 },
572 "scope": {
573 "type": "string",
574 "description": "Optional scope (module/component). Omit if unclear or multi-component."
575 },
576 "details": {
577 "type": "array",
578 "description": "Array of 0-6 detail items with changelog metadata.",
579 "items": {
580 "type": "object",
581 "properties": {
582 "text": {
583 "type": "string",
584 "description": "Detail about change, starting with past-tense verb, ending with period"
585 },
586 "changelog_category": {
587 "type": "string",
588 "enum": ["Added", "Changed", "Fixed", "Deprecated", "Removed", "Security"],
589 "description": "Changelog category if user-visible. Omit for internal changes."
590 },
591 "user_visible": {
592 "type": "boolean",
593 "description": "True if this change affects users/API and should appear in changelog"
594 }
595 },
596 "required": ["text", "user_visible"]
597 }
598 },
599 "issue_refs": {
600 "type": "array",
601 "description": "Issue numbers from context (e.g., ['#123', '#456']). Empty if none.",
602 "items": {
603 "type": "string"
604 }
605 }
606 },
607 "required": ["type", "details", "issue_refs"]
608 }),
609 }],
610 tool_choice: Some(AnthropicToolChoice {
611 choice_type: "tool".to_string(),
612 name: "create_conventional_analysis".to_string(),
613 }),
614 messages: vec![AnthropicMessage {
615 role: "user".to_string(),
616 content: vec![AnthropicContent {
617 content_type: "text".to_string(),
618 text: parts.user,
619 }],
620 }],
621 };
622
623 let mut request_builder = client
624 .post(anthropic_messages_url(&config.api_base_url))
625 .header("content-type", "application/json")
626 .header("anthropic-version", "2023-06-01");
627
628 if let Some(api_key) = &config.api_key {
629 request_builder = request_builder.header("x-api-key", api_key);
630 }
631
632 let (status, response_text) =
633 crate::api::timed_send(request_builder.json(&request), "map-reduce/reduce", model_name).await?;
634
635 if status.is_server_error() {
636 eprintln!(
637 "{}",
638 crate::style::error(&format!("Server error {status}: {response_text}"))
639 );
640 return Ok((true, None));
641 }
642
643 if !status.is_success() {
644 return Err(CommitGenError::ApiError {
645 status: status.as_u16(),
646 body: response_text,
647 });
648 }
649
650 response_text
651 },
652 };
653
654 if response_text.trim().is_empty() {
655 crate::style::warn("Model returned empty response body for synthesis; retrying.");
656 return Ok((true, None));
657 }
658
659 match mode {
660 ResolvedApiMode::ChatCompletions => {
661 let api_response: ApiResponse = serde_json::from_str(&response_text).map_err(|e| {
662 CommitGenError::Other(format!(
663 "Failed to parse synthesis response JSON: {e}. Response body: {}",
664 response_snippet(&response_text, 500)
665 ))
666 })?;
667
668 if api_response.choices.is_empty() {
669 return Err(CommitGenError::Other(
670 "API returned empty response for synthesis".to_string(),
671 ));
672 }
673
674 let message = &api_response.choices[0].message;
675
676 if !message.tool_calls.is_empty() {
677 let tool_call = &message.tool_calls[0];
678 if tool_call
679 .function
680 .name
681 .ends_with("create_conventional_analysis")
682 {
683 let args = &tool_call.function.arguments;
684 if args.is_empty() {
685 return Err(CommitGenError::Other(
686 "Model returned empty function arguments for synthesis".to_string(),
687 ));
688 }
689
690 let analysis: ConventionalAnalysis = serde_json::from_str(args).map_err(|e| {
691 CommitGenError::Other(format!("Failed to parse synthesis response: {e}"))
692 })?;
693
694 return Ok((false, Some(analysis)));
695 }
696 }
697
698 if let Some(content) = &message.content {
700 if content.trim().is_empty() {
701 crate::style::warn("Model returned empty content for synthesis; retrying.");
702 return Ok((true, None));
703 }
704 let analysis: ConventionalAnalysis =
705 serde_json::from_str(content.trim()).map_err(|e| {
706 CommitGenError::Other(format!(
707 "Failed to parse synthesis content JSON: {e}. Content: {}",
708 response_snippet(content, 500)
709 ))
710 })?;
711 return Ok((false, Some(analysis)));
712 }
713
714 Err(CommitGenError::Other("No analysis found in synthesis response".to_string()))
715 },
716 ResolvedApiMode::AnthropicMessages => {
717 let (tool_input, text_content, stop_reason) =
718 extract_anthropic_content(&response_text, "create_conventional_analysis")?;
719
720 if let Some(input) = tool_input {
721 let analysis: ConventionalAnalysis = serde_json::from_value(input).map_err(|e| {
722 CommitGenError::Other(format!(
723 "Failed to parse synthesis tool input: {e}. Response body: {}",
724 response_snippet(&response_text, 500)
725 ))
726 })?;
727 return Ok((false, Some(analysis)));
728 }
729
730 if text_content.trim().is_empty() {
731 if stop_reason.as_deref() == Some("max_tokens") {
732 crate::style::warn(
733 "Anthropic stopped at max_tokens with empty synthesis; retrying.",
734 );
735 return Ok((true, None));
736 }
737 crate::style::warn("Model returned empty content for synthesis; retrying.");
738 return Ok((true, None));
739 }
740
741 let analysis: ConventionalAnalysis = serde_json::from_str(text_content.trim())
742 .map_err(|e| {
743 CommitGenError::Other(format!(
744 "Failed to parse synthesis content JSON: {e}. Content: {}",
745 response_snippet(&text_content, 500)
746 ))
747 })?;
748 Ok((false, Some(analysis)))
749 },
750 }
751 }).await
752}
753
754pub async fn run_map_reduce(
756 diff: &str,
757 stat: &str,
758 scope_candidates: &str,
759 model_name: &str,
760 config: &CommitConfig,
761 counter: &TokenCounter,
762) -> Result<ConventionalAnalysis> {
763 let mut files = parse_diff(diff);
764
765 files.retain(|f| {
767 !config
768 .excluded_files
769 .iter()
770 .any(|excluded| f.filename.ends_with(excluded))
771 });
772
773 if files.is_empty() {
774 return Err(CommitGenError::Other(
775 "No relevant files to analyze after filtering".to_string(),
776 ));
777 }
778
779 let file_count = files.len();
780 crate::style::print_info(&format!("Running map-reduce on {file_count} files..."));
781
782 let observations = map_phase(&files, model_name, config, counter).await?;
784
785 reduce_phase(&observations, stat, scope_candidates, model_name, config).await
787}
788
789fn response_snippet(body: &str, limit: usize) -> String {
794 if body.is_empty() {
795 return "<empty response body>".to_string();
796 }
797 let mut snippet = body.trim().to_string();
798 if snippet.len() > limit {
799 snippet.truncate(limit);
800 snippet.push_str("...");
801 }
802 snippet
803}
804
805fn parse_observations_from_text(text: &str) -> Vec<String> {
806 let trimmed = text.trim();
807 if trimmed.is_empty() {
808 return Vec::new();
809 }
810
811 if let Ok(obs) = serde_json::from_str::<FileObservationResponse>(trimmed) {
812 return obs.observations;
813 }
814
815 trimmed
816 .lines()
817 .map(str::trim)
818 .filter(|line| !line.is_empty())
819 .map(|line| {
820 line
821 .strip_prefix("- ")
822 .or_else(|| line.strip_prefix("* "))
823 .unwrap_or(line)
824 .trim()
825 })
826 .filter(|line| !line.is_empty())
827 .map(str::to_string)
828 .collect()
829}
830
831fn anthropic_messages_url(base_url: &str) -> String {
832 let trimmed = base_url.trim_end_matches('/');
833 if trimmed.ends_with("/v1") {
834 format!("{trimmed}/messages")
835 } else {
836 format!("{trimmed}/v1/messages")
837 }
838}
839
840fn extract_anthropic_content(
841 response_text: &str,
842 tool_name: &str,
843) -> Result<(Option<serde_json::Value>, String, Option<String>)> {
844 let value: serde_json::Value = serde_json::from_str(response_text).map_err(|e| {
845 CommitGenError::Other(format!(
846 "Failed to parse Anthropic response JSON: {e}. Response body: {}",
847 response_snippet(response_text, 500)
848 ))
849 })?;
850
851 let stop_reason = value
852 .get("stop_reason")
853 .and_then(|v| v.as_str())
854 .map(str::to_string);
855
856 let mut tool_input: Option<serde_json::Value> = None;
857 let mut text_parts = Vec::new();
858
859 if let Some(content) = value.get("content").and_then(|v| v.as_array()) {
860 for item in content {
861 let item_type = item.get("type").and_then(|v| v.as_str()).unwrap_or("");
862 match item_type {
863 "tool_use" => {
864 let name = item.get("name").and_then(|v| v.as_str()).unwrap_or("");
865 if name == tool_name
866 && let Some(input) = item.get("input")
867 {
868 tool_input = Some(input.clone());
869 }
870 },
871 "text" => {
872 if let Some(text) = item.get("text").and_then(|v| v.as_str()) {
873 text_parts.push(text.to_string());
874 }
875 },
876 _ => {},
877 }
878 }
879 }
880
881 Ok((tool_input, text_parts.join("\n"), stop_reason))
882}
883
884#[derive(Debug, Serialize)]
885struct Message {
886 role: String,
887 content: String,
888}
889
890#[derive(Debug, Serialize, Deserialize)]
891struct FunctionParameters {
892 #[serde(rename = "type")]
893 param_type: String,
894 properties: serde_json::Value,
895 required: Vec<String>,
896}
897
898#[derive(Debug, Serialize, Deserialize)]
899struct Function {
900 name: String,
901 description: String,
902 parameters: FunctionParameters,
903}
904
905#[derive(Debug, Serialize, Deserialize)]
906struct Tool {
907 #[serde(rename = "type")]
908 tool_type: String,
909 function: Function,
910}
911
912#[derive(Debug, Serialize)]
913struct ApiRequest {
914 model: String,
915 max_tokens: u32,
916 temperature: f32,
917 tools: Vec<Tool>,
918 #[serde(skip_serializing_if = "Option::is_none")]
919 tool_choice: Option<serde_json::Value>,
920 messages: Vec<Message>,
921}
922
923#[derive(Debug, Serialize)]
924struct AnthropicRequest {
925 model: String,
926 max_tokens: u32,
927 temperature: f32,
928 #[serde(skip_serializing_if = "Option::is_none")]
929 system: Option<String>,
930 tools: Vec<AnthropicTool>,
931 #[serde(skip_serializing_if = "Option::is_none")]
932 tool_choice: Option<AnthropicToolChoice>,
933 messages: Vec<AnthropicMessage>,
934}
935
936#[derive(Debug, Serialize)]
937struct AnthropicTool {
938 name: String,
939 description: String,
940 input_schema: serde_json::Value,
941}
942
943#[derive(Debug, Serialize)]
944struct AnthropicToolChoice {
945 #[serde(rename = "type")]
946 choice_type: String,
947 name: String,
948}
949
950#[derive(Debug, Serialize)]
951struct AnthropicMessage {
952 role: String,
953 content: Vec<AnthropicContent>,
954}
955
956#[derive(Debug, Serialize)]
957struct AnthropicContent {
958 #[serde(rename = "type")]
959 content_type: String,
960 text: String,
961}
962
963#[derive(Debug, Deserialize)]
964struct ToolCall {
965 function: FunctionCall,
966}
967
968#[derive(Debug, Deserialize)]
969struct FunctionCall {
970 name: String,
971 arguments: String,
972}
973
974#[derive(Debug, Deserialize)]
975struct Choice {
976 message: ResponseMessage,
977}
978
979#[derive(Debug, Deserialize)]
980struct ResponseMessage {
981 #[serde(default)]
982 tool_calls: Vec<ToolCall>,
983 #[serde(default)]
984 content: Option<String>,
985}
986
987#[derive(Debug, Deserialize)]
988struct ApiResponse {
989 choices: Vec<Choice>,
990}
991
992#[derive(Debug, Deserialize)]
993struct FileObservationResponse {
994 #[serde(deserialize_with = "deserialize_observations")]
995 observations: Vec<String>,
996}
997
998fn deserialize_observations<'de, D>(deserializer: D) -> std::result::Result<Vec<String>, D::Error>
1001where
1002 D: serde::Deserializer<'de>,
1003{
1004 use std::fmt;
1005
1006 use serde::de::{self, Visitor};
1007
1008 struct ObservationsVisitor;
1009
1010 impl<'de> Visitor<'de> for ObservationsVisitor {
1011 type Value = Vec<String>;
1012
1013 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
1014 formatter.write_str("an array of strings, a JSON array string, or a bullet-point string")
1015 }
1016
1017 fn visit_seq<A>(self, mut seq: A) -> std::result::Result<Self::Value, A::Error>
1018 where
1019 A: de::SeqAccess<'de>,
1020 {
1021 let mut vec = Vec::new();
1022 while let Some(item) = seq.next_element::<String>()? {
1023 vec.push(item);
1024 }
1025 Ok(vec)
1026 }
1027
1028 fn visit_str<E>(self, s: &str) -> std::result::Result<Self::Value, E>
1029 where
1030 E: de::Error,
1031 {
1032 Ok(parse_string_to_observations(s))
1033 }
1034 }
1035
1036 deserializer.deserialize_any(ObservationsVisitor)
1037}
1038
1039fn parse_string_to_observations(s: &str) -> Vec<String> {
1042 let trimmed = s.trim();
1043 if trimmed.is_empty() {
1044 return Vec::new();
1045 }
1046
1047 if trimmed.starts_with('[')
1049 && let Ok(arr) = serde_json::from_str::<Vec<String>>(trimmed)
1050 {
1051 return arr;
1052 }
1053
1054 trimmed
1056 .lines()
1057 .map(str::trim)
1058 .filter(|line| !line.is_empty())
1059 .map(|line| {
1060 line
1061 .strip_prefix("- ")
1062 .or_else(|| line.strip_prefix("* "))
1063 .or_else(|| line.strip_prefix("• "))
1064 .unwrap_or(line)
1065 .trim()
1066 .to_string()
1067 })
1068 .filter(|line| !line.is_empty())
1069 .collect()
1070}
1071
1072fn build_observation_tool() -> Tool {
1073 Tool {
1074 tool_type: "function".to_string(),
1075 function: Function {
1076 name: "create_file_observation".to_string(),
1077 description: "Extract observations from a single file's changes".to_string(),
1078 parameters: FunctionParameters {
1079 param_type: "object".to_string(),
1080 properties: serde_json::json!({
1081 "observations": {
1082 "type": "array",
1083 "description": "List of factual observations about what changed in this file",
1084 "items": {
1085 "type": "string"
1086 }
1087 }
1088 }),
1089 required: vec!["observations".to_string()],
1090 },
1091 },
1092 }
1093}
1094
1095fn build_analysis_tool(type_enum: &[&str]) -> Tool {
1096 Tool {
1097 tool_type: "function".to_string(),
1098 function: Function {
1099 name: "create_conventional_analysis".to_string(),
1100 description: "Synthesize observations into conventional commit analysis".to_string(),
1101 parameters: FunctionParameters {
1102 param_type: "object".to_string(),
1103 properties: serde_json::json!({
1104 "type": {
1105 "type": "string",
1106 "enum": type_enum,
1107 "description": "Commit type based on combined changes"
1108 },
1109 "scope": {
1110 "type": "string",
1111 "description": "Optional scope (module/component). Omit if unclear or multi-component."
1112 },
1113 "details": {
1114 "type": "array",
1115 "description": "Array of 0-6 detail items with changelog metadata.",
1116 "items": {
1117 "type": "object",
1118 "properties": {
1119 "text": {
1120 "type": "string",
1121 "description": "Detail about change, starting with past-tense verb, ending with period"
1122 },
1123 "changelog_category": {
1124 "type": "string",
1125 "enum": ["Added", "Changed", "Fixed", "Deprecated", "Removed", "Security"],
1126 "description": "Changelog category if user-visible. Omit for internal changes."
1127 },
1128 "user_visible": {
1129 "type": "boolean",
1130 "description": "True if this change affects users/API and should appear in changelog"
1131 }
1132 },
1133 "required": ["text", "user_visible"]
1134 }
1135 },
1136 "issue_refs": {
1137 "type": "array",
1138 "description": "Issue numbers from context (e.g., ['#123', '#456']). Empty if none.",
1139 "items": {
1140 "type": "string"
1141 }
1142 }
1143 }),
1144 required: vec!["type".to_string(), "details".to_string(), "issue_refs".to_string()],
1145 },
1146 },
1147 }
1148}
1149
1150fn build_api_request(
1151 model: &str,
1152 temperature: f32,
1153 tools: Vec<Tool>,
1154 system: &str,
1155 user: &str,
1156) -> ApiRequest {
1157 let tool_name = tools.first().map(|t| t.function.name.clone());
1158
1159 let mut messages = Vec::new();
1160 if !system.is_empty() {
1161 messages.push(Message { role: "system".to_string(), content: system.to_string() });
1162 }
1163 messages.push(Message { role: "user".to_string(), content: user.to_string() });
1164
1165 ApiRequest {
1166 model: model.to_string(),
1167 max_tokens: 1500,
1168 temperature,
1169 tool_choice: tool_name
1170 .map(|name| serde_json::json!({ "type": "function", "function": { "name": name } })),
1171 tools,
1172 messages,
1173 }
1174}
1175
1176#[cfg(test)]
1177mod tests {
1178 use super::*;
1179 use crate::tokens::TokenCounter;
1180
1181 fn test_counter() -> TokenCounter {
1182 TokenCounter::new("http://localhost:4000", None, "claude-sonnet-4.5")
1183 }
1184
1185 #[test]
1186 fn test_should_use_map_reduce_disabled() {
1187 let config = CommitConfig { map_reduce_enabled: false, ..Default::default() };
1188 let counter = test_counter();
1189 let diff = r"diff --git a/a.rs b/a.rs
1191@@ -0,0 +1 @@
1192+a
1193diff --git a/b.rs b/b.rs
1194@@ -0,0 +1 @@
1195+b
1196diff --git a/c.rs b/c.rs
1197@@ -0,0 +1 @@
1198+c
1199diff --git a/d.rs b/d.rs
1200@@ -0,0 +1 @@
1201+d";
1202 assert!(!should_use_map_reduce(diff, &config, &counter));
1203 }
1204
1205 #[test]
1206 fn test_should_use_map_reduce_few_files() {
1207 let config = CommitConfig::default();
1208 let counter = test_counter();
1209 let diff = r"diff --git a/a.rs b/a.rs
1211@@ -0,0 +1 @@
1212+a
1213diff --git a/b.rs b/b.rs
1214@@ -0,0 +1 @@
1215+b";
1216 assert!(!should_use_map_reduce(diff, &config, &counter));
1217 }
1218
1219 #[test]
1220 fn test_should_use_map_reduce_many_files() {
1221 let config = CommitConfig::default();
1222 let counter = test_counter();
1223 let diff = r"diff --git a/a.rs b/a.rs
1225@@ -0,0 +1 @@
1226+a
1227diff --git a/b.rs b/b.rs
1228@@ -0,0 +1 @@
1229+b
1230diff --git a/c.rs b/c.rs
1231@@ -0,0 +1 @@
1232+c
1233diff --git a/d.rs d/d.rs
1234@@ -0,0 +1 @@
1235+d
1236diff --git a/e.rs b/e.rs
1237@@ -0,0 +1 @@
1238+e";
1239 assert!(should_use_map_reduce(diff, &config, &counter));
1240 }
1241
1242 #[test]
1243 fn test_generate_context_header_empty() {
1244 let files = vec![FileDiff {
1245 filename: "only.rs".to_string(),
1246 header: String::new(),
1247 content: String::new(),
1248 additions: 10,
1249 deletions: 5,
1250 is_binary: false,
1251 }];
1252 let header = generate_context_header(&files, "only.rs");
1253 assert!(header.is_empty());
1254 }
1255
1256 #[test]
1257 fn test_generate_context_header_multiple() {
1258 let files = vec![
1259 FileDiff {
1260 filename: "src/main.rs".to_string(),
1261 header: String::new(),
1262 content: "fn main() {}".to_string(),
1263 additions: 10,
1264 deletions: 5,
1265 is_binary: false,
1266 },
1267 FileDiff {
1268 filename: "src/lib.rs".to_string(),
1269 header: String::new(),
1270 content: "mod test;".to_string(),
1271 additions: 3,
1272 deletions: 1,
1273 is_binary: false,
1274 },
1275 FileDiff {
1276 filename: "tests/test.rs".to_string(),
1277 header: String::new(),
1278 content: "#[test]".to_string(),
1279 additions: 20,
1280 deletions: 0,
1281 is_binary: false,
1282 },
1283 ];
1284
1285 let header = generate_context_header(&files, "src/main.rs");
1286 assert!(header.contains("OTHER FILES IN THIS CHANGE:"));
1287 assert!(header.contains("src/lib.rs"));
1288 assert!(header.contains("tests/test.rs"));
1289 assert!(!header.contains("src/main.rs")); }
1291
1292 #[test]
1293 fn test_infer_file_description() {
1294 assert_eq!(infer_file_description("src/test_utils.rs", ""), "test file");
1295 assert_eq!(infer_file_description("README.md", ""), "documentation");
1296 assert_eq!(infer_file_description("config.toml", ""), "configuration");
1297 assert_eq!(infer_file_description("src/error.rs", ""), "error definitions");
1298 assert_eq!(infer_file_description("src/types.rs", ""), "type definitions");
1299 assert_eq!(infer_file_description("src/mod.rs", ""), "module exports");
1300 assert_eq!(infer_file_description("src/main.rs", ""), "entry point");
1301 assert_eq!(infer_file_description("src/api.rs", "fn call()"), "implementation");
1302 assert_eq!(infer_file_description("src/models.rs", "struct Foo"), "type definitions");
1303 assert_eq!(infer_file_description("src/unknown.xyz", ""), "source code");
1304 }
1305
1306 #[test]
1307 fn test_parse_string_to_observations_json_array() {
1308 let input = r#"["item one", "item two", "item three"]"#;
1309 let result = parse_string_to_observations(input);
1310 assert_eq!(result, vec!["item one", "item two", "item three"]);
1311 }
1312
1313 #[test]
1314 fn test_parse_string_to_observations_bullet_points() {
1315 let input = "- added new function\n- fixed bug in parser\n- updated tests";
1316 let result = parse_string_to_observations(input);
1317 assert_eq!(result, vec!["added new function", "fixed bug in parser", "updated tests"]);
1318 }
1319
1320 #[test]
1321 fn test_parse_string_to_observations_asterisk_bullets() {
1322 let input = "* first change\n* second change";
1323 let result = parse_string_to_observations(input);
1324 assert_eq!(result, vec!["first change", "second change"]);
1325 }
1326
1327 #[test]
1328 fn test_parse_string_to_observations_empty() {
1329 assert!(parse_string_to_observations("").is_empty());
1330 assert!(parse_string_to_observations(" ").is_empty());
1331 }
1332
1333 #[test]
1334 fn test_deserialize_observations_array() {
1335 let json = r#"{"observations": ["a", "b", "c"]}"#;
1336 let result: FileObservationResponse = serde_json::from_str(json).unwrap();
1337 assert_eq!(result.observations, vec!["a", "b", "c"]);
1338 }
1339
1340 #[test]
1341 fn test_deserialize_observations_stringified_array() {
1342 let json = r#"{"observations": "[\"a\", \"b\", \"c\"]"}"#;
1343 let result: FileObservationResponse = serde_json::from_str(json).unwrap();
1344 assert_eq!(result.observations, vec!["a", "b", "c"]);
1345 }
1346
1347 #[test]
1348 fn test_deserialize_observations_bullet_string() {
1349 let json = r#"{"observations": "- updated function\n- fixed bug"}"#;
1350 let result: FileObservationResponse = serde_json::from_str(json).unwrap();
1351 assert_eq!(result.observations, vec!["updated function", "fixed bug"]);
1352 }
1353}