1use std::{
2 collections::{BTreeMap, BTreeSet, HashMap, HashSet},
3 fmt::Write,
4 fs,
5 path::{Path, PathBuf},
6};
7
8use futures::stream::{self, StreamExt};
9use serde::{Deserialize, Serialize};
10
11use crate::{
12 api::{
13 AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
14 generate_summary_from_analysis, run_oneshot, strict_json_schema,
15 },
16 compose_types::{
17 ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
18 ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
19 },
20 config::CommitConfig,
21 error::{CommitGenError, Result},
22 git::{get_compose_diff, get_compose_stat, get_git_dir, get_head_hash, git_commit},
23 map_reduce::{FileObservation, observe_diff_files, should_use_map_reduce},
24 normalization::{format_commit_message, post_process_commit_message},
25 patch::{
26 StageResult, build_compose_snapshot, create_executable_group_patch, reset_staging,
27 stage_executable_group,
28 },
29 style, templates,
30 tokens::{TokenCounter, create_token_counter},
31 types::{Args, CommitType, ConventionalCommit, Scope},
32 validation::validate_commit_message,
33};
34
35const MAX_OBSERVATIONS_PER_FILE: usize = 3;
36const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
37const COMPOSE_PLANNER_TEMPERATURE: f32 = 0.0;
38const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
39const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
40const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
41const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
42const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
43const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
44const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
45const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
46const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
47const MAX_BIND_FILES_PER_REQUEST: usize = 18;
48const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
49const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
52
53#[derive(Debug, Deserialize, Serialize)]
54struct ComposeIntentResponse {
55 groups: Vec<ComposeIntentGroup>,
56}
57
58#[derive(Debug, Deserialize, Serialize)]
59struct ComposeBindingResponse {
60 assignments: Vec<ComposeBindingAssignment>,
61}
62
63#[derive(Debug, Serialize, Deserialize)]
64struct ComposeCachedPlan {
65 schema_version: String,
66 cache_key: String,
67 plan: ComposeExecutablePlan,
68}
69
70#[derive(Debug, Clone)]
71struct AmbiguousFileBinding {
72 file_id: String,
73 path: String,
74 candidate_group_ids: Vec<String>,
75 hunk_ids: Vec<String>,
76}
77
78#[derive(Debug, Clone)]
79struct AmbiguousHunkContext {
80 candidate_group_ids: Vec<String>,
81}
82
83type HunkAssignments = HashMap<String, BTreeSet<String>>;
84
85#[derive(Debug)]
86struct BindingEvaluation {
87 assigned: HashMap<String, Vec<String>>,
88 unresolved: Vec<String>,
89}
90
91#[derive(Debug, Clone, Copy)]
92struct SnapshotSummaryBudget {
93 max_observations_per_file: usize,
94 max_hunks_per_file: Option<usize>,
95}
96
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98enum PlanningMode {
99 File,
100 Area,
101}
102
103#[derive(Debug, Clone)]
104struct PlanningTarget {
105 target_id: String,
106 label: String,
107 file_ids: Vec<String>,
108 hunk_count: usize,
109 additions: usize,
110 deletions: usize,
111}
112
113#[derive(Debug, Clone)]
114struct PlanningIndex {
115 mode: PlanningMode,
116 targets: Vec<PlanningTarget>,
117 aliases: HashMap<String, String>,
118}
119
120#[derive(Debug, Clone)]
121struct PlanningBucket {
122 label: String,
123 file_ids: Vec<String>,
124}
125
126impl PlanningIndex {
127 fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
128 let mut expanded = Vec::new();
129 let mut seen_file_ids = HashSet::new();
130
131 for target_id in target_ids {
132 if let Some(target) = self
133 .targets
134 .iter()
135 .find(|candidate| candidate.target_id == *target_id)
136 {
137 for file_id in &target.file_ids {
138 if seen_file_ids.insert(file_id.clone()) {
139 expanded.push(file_id.clone());
140 }
141 }
142 }
143 }
144
145 expanded
146 }
147}
148
149impl SnapshotSummaryBudget {
150 const fn is_compacted(self) -> bool {
151 self.max_hunks_per_file.is_some()
152 }
153}
154
155fn is_dependency_manifest(path: &str) -> bool {
156 const DEP_MANIFESTS: &[&str] = &[
157 "Cargo.toml",
158 "Cargo.lock",
159 "package.json",
160 "package-lock.json",
161 "pnpm-lock.yaml",
162 "yarn.lock",
163 "bun.lock",
164 "bun.lockb",
165 "go.mod",
166 "go.sum",
167 "requirements.txt",
168 "Pipfile",
169 "Pipfile.lock",
170 "pyproject.toml",
171 "Gemfile",
172 "Gemfile.lock",
173 "composer.json",
174 "composer.lock",
175 "build.gradle",
176 "build.gradle.kts",
177 "gradle.properties",
178 "pom.xml",
179 ];
180
181 let path = Path::new(path);
182 let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
183 return false;
184 };
185
186 if DEP_MANIFESTS.contains(&file_name) {
187 return true;
188 }
189
190 Path::new(file_name)
191 .extension()
192 .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
193}
194
195fn save_debug_artifact<T: Serialize>(
196 debug_dir: Option<&Path>,
197 filename: &str,
198 value: &T,
199) -> Result<()> {
200 let Some(debug_dir) = debug_dir else {
201 return Ok(());
202 };
203
204 fs::create_dir_all(debug_dir)?;
205 let path = debug_dir.join(filename);
206 let json = serde_json::to_string_pretty(value)?;
207 fs::write(path, json)?;
208 Ok(())
209}
210
211fn fnv1a_64(input: &str) -> String {
212 let mut hash = 0xcbf29ce484222325_u64;
213 for byte in input.as_bytes() {
214 hash ^= u64::from(*byte);
215 hash = hash.wrapping_mul(0x100000001b3);
216 }
217 format!("{hash:016x}")
218}
219
220fn compose_plan_cache_key(
221 snapshot: &ComposeSnapshot,
222 max_commits: usize,
223 analysis_model: &str,
224) -> String {
225 fnv1a_64(&format!(
226 "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
227 snapshot.diff, snapshot.stat
228 ))
229}
230
231fn compose_plan_cache_path(
232 dir: &str,
233 snapshot: &ComposeSnapshot,
234 max_commits: usize,
235 analysis_model: &str,
236) -> Result<PathBuf> {
237 let git_dir = get_git_dir(dir)?;
238 Ok(git_dir.join("llm-git").join(format!(
239 "compose-plan-{}.json",
240 compose_plan_cache_key(snapshot, max_commits, analysis_model)
241 )))
242}
243
244fn load_cached_plan(
245 dir: &str,
246 snapshot: &ComposeSnapshot,
247 max_commits: usize,
248 analysis_model: &str,
249) -> Result<Option<ComposeExecutablePlan>> {
250 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
251 if !cache_path.exists() {
252 return Ok(None);
253 }
254
255 let content = match fs::read_to_string(&cache_path) {
256 Ok(content) => content,
257 Err(err) => {
258 eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
259 return Ok(None);
260 },
261 };
262 let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
263 Ok(cached) => cached,
264 Err(err) => {
265 eprintln!(
266 "{}",
267 style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
268 );
269 let _ = fs::remove_file(&cache_path);
270 return Ok(None);
271 },
272 };
273 let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
274
275 if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
276 return Ok(None);
277 }
278 if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
279 eprintln!(
280 "{}",
281 style::warning(&format!(
282 "Discarding cached compose plan (no longer valid for current snapshot): {err}"
283 ))
284 );
285 let _ = fs::remove_file(&cache_path);
286 return Ok(None);
287 }
288 Ok(Some(cached.plan))
289}
290
291fn save_cached_plan(
292 dir: &str,
293 snapshot: &ComposeSnapshot,
294 max_commits: usize,
295 analysis_model: &str,
296 plan: &ComposeExecutablePlan,
297) -> Result<()> {
298 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
299 if let Some(parent) = cache_path.parent() {
300 fs::create_dir_all(parent)?;
301 }
302
303 let cached = ComposeCachedPlan {
304 schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
305 cache_key: compose_plan_cache_key(snapshot, max_commits, analysis_model),
306 plan: plan.clone(),
307 };
308 fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
309 Ok(())
310}
311
312fn format_line_range(start: usize, count: usize) -> String {
313 match count {
314 0 => "0".to_string(),
315 1 => start.to_string(),
316 _ => format!("{start}-{}", start + count - 1),
317 }
318}
319
320const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
321 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
322 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
323 {
324 SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
325 } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
326 || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
327 {
328 SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
329 } else {
330 SnapshotSummaryBudget {
331 max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
332 max_hunks_per_file: None,
333 }
334 }
335}
336
337fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
338 if count <= max_samples {
339 return (0..count).collect();
340 }
341
342 if max_samples <= 1 {
343 return vec![0];
344 }
345
346 let last = count - 1;
347 let mut positions = Vec::with_capacity(max_samples);
348 for slot in 0..max_samples {
349 let position = slot * last / (max_samples - 1);
350 if positions.last().copied() != Some(position) {
351 positions.push(position);
352 }
353 }
354 positions
355}
356
357fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
358 match budget.max_hunks_per_file {
359 None => file.hunk_ids.iter().map(String::as_str).collect(),
360 Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
361 .into_iter()
362 .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
363 .collect(),
364 }
365}
366
367fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
368 let budget = snapshot_summary_budget(snapshot);
369 let observations_by_file: HashMap<&str, Vec<&str>> = observations
370 .iter()
371 .map(|observation| {
372 (
373 observation.file.as_str(),
374 observation
375 .observations
376 .iter()
377 .map(String::as_str)
378 .take(budget.max_observations_per_file)
379 .collect(),
380 )
381 })
382 .collect();
383
384 let mut out = String::new();
385 if budget.is_compacted() {
386 let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
387 writeln!(
388 out,
389 "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
390 representative hunks and {} observation(s) per file",
391 budget.max_observations_per_file
392 )
393 .unwrap();
394 }
395
396 for file in &snapshot.files {
397 writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
398 if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
399 for observation in file_observations {
400 writeln!(out, " observation: {observation}").unwrap();
401 }
402 }
403
404 let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
405 for hunk_id in &rendered_hunk_ids {
406 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
407 if hunk.synthetic {
408 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
409 } else {
410 writeln!(
411 out,
412 " - {} old:{} new:{} :: {}",
413 hunk.hunk_id,
414 format_line_range(hunk.old_start, hunk.old_count),
415 format_line_range(hunk.new_start, hunk.new_count),
416 hunk.snippet
417 )
418 .unwrap();
419 }
420 }
421 }
422
423 let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
424 if omitted_hunks > 0 {
425 writeln!(out, " ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
426 }
427 }
428
429 out
430}
431
432const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
433 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
434 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
435 {
436 PlanningMode::Area
437 } else {
438 PlanningMode::File
439 }
440}
441
442fn path_depth(path: &str) -> usize {
443 path.split('/').count()
444}
445
446fn prefix_at_depth(path: &str, depth: usize) -> String {
447 if depth == 0 {
448 return String::new();
449 }
450
451 let segments: Vec<&str> = path.split('/').collect();
452 let effective_depth = depth.min(segments.len());
453 segments[..effective_depth].join("/")
454}
455
456fn common_path_prefix(paths: &[String]) -> String {
457 let Some(first_path) = paths.first() else {
458 return String::new();
459 };
460
461 let mut prefix: Vec<&str> = first_path.split('/').collect();
462 for path in paths.iter().skip(1) {
463 let segments: Vec<&str> = path.split('/').collect();
464 let shared = prefix
465 .iter()
466 .zip(segments.iter())
467 .take_while(|(left, right)| left == right)
468 .count();
469 prefix.truncate(shared);
470 if prefix.is_empty() {
471 break;
472 }
473 }
474
475 prefix.join("/")
476}
477
478fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
479 file_ids
480 .iter()
481 .filter_map(|file_id| snapshot.file_by_id(file_id))
482 .map(|file| file.hunk_ids.len())
483 .sum()
484}
485
486fn group_file_ids_by_prefix(
487 snapshot: &ComposeSnapshot,
488 file_ids: &[String],
489 depth: usize,
490) -> BTreeMap<String, Vec<String>> {
491 let mut groups = BTreeMap::new();
492
493 for file_id in file_ids {
494 if let Some(file) = snapshot.file_by_id(file_id) {
495 groups
496 .entry(prefix_at_depth(&file.path, depth))
497 .or_insert_with(Vec::new)
498 .push(file_id.clone());
499 }
500 }
501
502 groups
503}
504
505fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
506 let paths: Vec<String> = file_ids
507 .iter()
508 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
509 .collect();
510
511 let common_prefix = common_path_prefix(&paths);
512 if common_prefix.is_empty() {
513 paths.first().cloned().unwrap_or_else(|| "misc".to_string())
514 } else {
515 common_prefix
516 }
517}
518
519fn collect_planning_buckets(
520 snapshot: &ComposeSnapshot,
521 file_ids: &[String],
522 depth: usize,
523) -> Vec<PlanningBucket> {
524 let file_count = file_ids.len();
525 let hunk_count = bucket_hunk_count(snapshot, file_ids);
526 let max_path_depth = file_ids
527 .iter()
528 .filter_map(|file_id| snapshot.file_by_id(file_id))
529 .map(|file| path_depth(&file.path))
530 .max()
531 .unwrap_or(depth);
532
533 let should_stop =
534 file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
535 if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
536 return vec![PlanningBucket {
537 label: planning_bucket_label(snapshot, file_ids),
538 file_ids: file_ids.to_vec(),
539 }];
540 }
541
542 let next_depth = depth + 1;
543 let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
544 if groups.len() <= 1 {
545 return collect_planning_buckets(snapshot, file_ids, next_depth);
546 }
547
548 groups
549 .into_values()
550 .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
551 .collect()
552}
553
554fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
555 let all_file_ids: Vec<String> = snapshot
556 .files
557 .iter()
558 .map(|file| file.file_id.clone())
559 .collect();
560 let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
561
562 buckets
563 .into_iter()
564 .enumerate()
565 .map(|(idx, bucket)| {
566 let mut additions = 0_usize;
567 let mut deletions = 0_usize;
568 let mut hunk_count = 0_usize;
569
570 for file_id in &bucket.file_ids {
571 if let Some(file) = snapshot.file_by_id(file_id) {
572 additions = additions.saturating_add(file.additions);
573 deletions = deletions.saturating_add(file.deletions);
574 hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
575 }
576 }
577
578 PlanningTarget {
579 target_id: format!("A{:03}", idx + 1),
580 label: bucket.label,
581 file_ids: bucket.file_ids,
582 hunk_count,
583 additions,
584 deletions,
585 }
586 })
587 .collect()
588}
589
590fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
591 snapshot
592 .files
593 .iter()
594 .map(|file| PlanningTarget {
595 target_id: file.file_id.clone(),
596 label: file.path.clone(),
597 file_ids: vec![file.file_id.clone()],
598 hunk_count: file.hunk_ids.len(),
599 additions: file.additions,
600 deletions: file.deletions,
601 })
602 .collect()
603}
604
605fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
606 let mode = planning_mode_for_snapshot(snapshot);
607 let targets = match mode {
608 PlanningMode::File => build_file_planning_targets(snapshot),
609 PlanningMode::Area => build_area_planning_targets(snapshot),
610 };
611
612 let aliases = targets
613 .iter()
614 .flat_map(|target| {
615 let normalized_label = normalize_file_reference(&target.label);
616 [
617 (target.target_id.clone(), target.target_id.clone()),
618 (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
619 (normalized_label, target.target_id.clone()),
620 ]
621 })
622 .collect();
623
624 PlanningIndex { mode, targets, aliases }
625}
626
627fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
628 sample_positions(target.file_ids.len(), 4)
629 .into_iter()
630 .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
631 .collect()
632}
633
634fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
635 let hunk_ids: Vec<&String> = target
636 .file_ids
637 .iter()
638 .filter_map(|file_id| snapshot.file_by_id(file_id))
639 .flat_map(|file| file.hunk_ids.iter())
640 .collect();
641
642 sample_positions(hunk_ids.len(), 4)
643 .into_iter()
644 .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
645 .collect()
646}
647
648fn render_planning_stat(index: &PlanningIndex) -> String {
649 let mut out = String::new();
650
651 match index.mode {
652 PlanningMode::File => {
653 writeln!(out, "# planning over individual file IDs").unwrap();
654 },
655 PlanningMode::Area => {
656 writeln!(
657 out,
658 "# planning over {} area IDs spanning {} files",
659 index.targets.len(),
660 index
661 .targets
662 .iter()
663 .flat_map(|target| target.file_ids.iter())
664 .collect::<HashSet<_>>()
665 .len()
666 )
667 .unwrap();
668 },
669 }
670
671 for target in &index.targets {
672 writeln!(
673 out,
674 "{} {} | {} files | {} hunks | +{}/-{}",
675 target.target_id,
676 target.label,
677 target.file_ids.len(),
678 target.hunk_count,
679 target.additions,
680 target.deletions
681 )
682 .unwrap();
683 }
684
685 out
686}
687
688fn render_planning_snapshot_summary(
689 snapshot: &ComposeSnapshot,
690 observations: &[FileObservation],
691 index: &PlanningIndex,
692) -> String {
693 if index.mode == PlanningMode::File {
694 return render_snapshot_summary(snapshot, observations);
695 }
696
697 let observations_by_file: HashMap<&str, Vec<&str>> = observations
698 .iter()
699 .map(|observation| {
700 (
701 observation.file.as_str(),
702 observation
703 .observations
704 .iter()
705 .map(String::as_str)
706 .take(1)
707 .collect(),
708 )
709 })
710 .collect();
711
712 let mut out = String::new();
713 writeln!(
714 out,
715 "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
716 )
717 .unwrap();
718
719 for target in &index.targets {
720 writeln!(
721 out,
722 "- {} {} ({} files, {} hunks, +{}/-{})",
723 target.target_id,
724 target.label,
725 target.file_ids.len(),
726 target.hunk_count,
727 target.additions,
728 target.deletions
729 )
730 .unwrap();
731
732 let sample_file_ids = sample_file_ids_for_target(target);
733 if !sample_file_ids.is_empty() {
734 let sample_files: Vec<String> = sample_file_ids
735 .iter()
736 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
737 .collect();
738 writeln!(out, " files: {}", sample_files.join(", ")).unwrap();
739 let omitted = target.file_ids.len().saturating_sub(sample_files.len());
740 if omitted > 0 {
741 writeln!(out, " ... {omitted} more files omitted from {}", target.target_id).unwrap();
742 }
743 }
744
745 let mut rendered_observations = 0_usize;
746 for file_id in &target.file_ids {
747 let Some(file) = snapshot.file_by_id(file_id) else {
748 continue;
749 };
750 let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
751 continue;
752 };
753
754 for observation in file_observations {
755 writeln!(out, " observation: {observation}").unwrap();
756 rendered_observations += 1;
757 if rendered_observations >= 2 {
758 break;
759 }
760 }
761
762 if rendered_observations >= 2 {
763 break;
764 }
765 }
766
767 for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
768 if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
769 if hunk.synthetic {
770 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
771 } else {
772 writeln!(
773 out,
774 " - {} old:{} new:{} :: {}",
775 hunk.hunk_id,
776 format_line_range(hunk.old_start, hunk.old_count),
777 format_line_range(hunk.new_start, hunk.new_count),
778 hunk.snippet
779 )
780 .unwrap();
781 }
782 }
783 }
784 }
785
786 out
787}
788
789fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
790 match index.mode {
791 PlanningMode::File => format!(
792 "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
793 snapshot.files.len()
794 ),
795 PlanningMode::Area => format!(
796 "Area IDs only. Each target may expand to multiple files by shared path prefix. \
797 Coverage: {} areas spanning {} files.",
798 index.targets.len(),
799 snapshot.files.len()
800 ),
801 }
802}
803
804fn render_planning_notes(index: &PlanningIndex) -> String {
805 match index.mode {
806 PlanningMode::File => {
807 "Use only the provided file IDs and keep the grouping conservative.".to_string()
808 },
809 PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
810 planning areas. Split along independent subsystems or workstreams \
811 when the areas point at unrelated changes."
812 .to_string(),
813 }
814}
815
816fn render_split_bias(index: &PlanningIndex) -> String {
817 match index.mode {
818 PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
819 PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
820 one broad group if nearly every area clearly belongs to the same \
821 atomic change."
822 .to_string(),
823 }
824}
825
826fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
827 let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
828
829 strict_json_schema(
830 serde_json::json!({
831 "groups": {
832 "type": "array",
833 "items": {
834 "type": "object",
835 "properties": {
836 "group_id": {
837 "type": "string",
838 "description": "Stable identifier like G1, G2, G3"
839 },
840 "file_ids": {
841 "type": "array",
842 "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
843 "items": { "type": "string" }
844 },
845 "type": {
846 "type": "string",
847 "enum": type_enum,
848 "description": "Conventional commit type for this group"
849 },
850 "scope": {
851 "type": "string",
852 "description": "Optional scope (module/component). Omit if broad."
853 },
854 "rationale": {
855 "type": "string",
856 "description": "Brief explanation of the logical change"
857 },
858 "dependencies": {
859 "type": "array",
860 "description": "Group IDs this group depends on",
861 "items": { "type": "string" }
862 }
863 },
864 "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
865 "additionalProperties": false
866 }
867 }
868 }),
869 &["groups"],
870 )
871}
872
873fn build_binding_schema() -> serde_json::Value {
874 strict_json_schema(
875 serde_json::json!({
876 "assignments": {
877 "type": "array",
878 "items": {
879 "type": "object",
880 "properties": {
881 "group_id": { "type": "string" },
882 "hunk_ids": {
883 "type": "array",
884 "items": { "type": "string" }
885 }
886 },
887 "required": ["group_id", "hunk_ids"],
888 "additionalProperties": false
889 }
890 }
891 }),
892 &["assignments"],
893 )
894}
895
896fn compute_dependency_order<T, FId, FDeps>(
897 groups: &[T],
898 group_id: FId,
899 dependencies: FDeps,
900) -> Result<Vec<usize>>
901where
902 FId: Fn(&T) -> &str,
903 FDeps: Fn(&T) -> &[String],
904{
905 let mut index_by_id = HashMap::new();
906 for (idx, group) in groups.iter().enumerate() {
907 let id = group_id(group);
908 if id.trim().is_empty() {
909 return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
910 }
911 if index_by_id.insert(id.to_string(), idx).is_some() {
912 return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
913 }
914 }
915
916 let mut in_degree = vec![0_usize; groups.len()];
917 let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
918
919 for (idx, group) in groups.iter().enumerate() {
920 for dependency in dependencies(group) {
921 let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
922 CommitGenError::Other(format!(
923 "Group {} depends on unknown group_id '{}'",
924 group_id(group),
925 dependency
926 ))
927 })?;
928 if dependency_idx == idx {
929 return Err(CommitGenError::Other(format!(
930 "Group {} depends on itself",
931 group_id(group)
932 )));
933 }
934
935 adjacency[dependency_idx].push(idx);
936 in_degree[idx] += 1;
937 }
938 }
939
940 let mut queue: Vec<usize> = (0..groups.len())
941 .filter(|idx| in_degree[*idx] == 0)
942 .collect();
943 let mut order = Vec::with_capacity(groups.len());
944
945 while let Some(node) = queue.pop() {
946 order.push(node);
947 for neighbor in &adjacency[node] {
948 in_degree[*neighbor] -= 1;
949 if in_degree[*neighbor] == 0 {
950 queue.push(*neighbor);
951 }
952 }
953 }
954
955 if order.len() != groups.len() {
956 return Err(CommitGenError::Other(
957 "Circular dependency detected in compose groups".to_string(),
958 ));
959 }
960
961 Ok(order)
962}
963
964fn normalize_file_reference(raw_file_ref: &str) -> String {
965 raw_file_ref
966 .trim()
967 .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
968 .trim_start_matches("./")
969 .trim_end_matches([',', ';'])
970 .to_string()
971}
972
973fn planning_text_tokens(text: &str) -> Vec<String> {
974 const STOP_WORDS: &[&str] = &[
975 "and",
976 "for",
977 "the",
978 "with",
979 "from",
980 "into",
981 "after",
982 "before",
983 "over",
984 "under",
985 "plus",
986 "across",
987 "update",
988 "updated",
989 "refactor",
990 "refactored",
991 "changes",
992 "change",
993 "logical",
994 "group",
995 "groups",
996 "commit",
997 "commits",
998 ];
999
1000 let mut tokens = Vec::new();
1001 let mut current = String::new();
1002 let mut seen = HashSet::new();
1003
1004 for ch in text.chars() {
1005 if ch.is_ascii_alphanumeric() {
1006 current.push(ch.to_ascii_lowercase());
1007 } else if current.len() >= 3 {
1008 if !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone()) {
1009 tokens.push(current.clone());
1010 }
1011 current.clear();
1012 } else {
1013 current.clear();
1014 }
1015 }
1016
1017 if current.len() >= 3 && !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone())
1018 {
1019 tokens.push(current);
1020 }
1021
1022 tokens
1023}
1024
1025fn extract_group_id_candidate(raw: &str) -> Option<String> {
1026 let normalized = normalize_file_reference(raw);
1027 let uppercase = normalized.to_ascii_uppercase();
1028
1029 if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1030 return Some(format!("G{uppercase}"));
1031 }
1032
1033 if let Some(rest) = uppercase.strip_prefix('G')
1034 && !rest.is_empty()
1035 && rest.chars().all(|ch| ch.is_ascii_digit())
1036 {
1037 return Some(format!("G{rest}"));
1038 }
1039
1040 let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1041 let compact = uppercase
1042 .chars()
1043 .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1044 .collect::<String>();
1045 if compact.starts_with("GROUP") && !digits.is_empty() {
1046 return Some(format!("G{digits}"));
1047 }
1048
1049 None
1050}
1051
1052#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1053enum ComposeFileCategory {
1054 Binary,
1055 Dependency,
1056 Docs,
1057 Test,
1058 Config,
1059 Source,
1060 Other,
1061}
1062
1063fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1064 if file.is_binary {
1065 return ComposeFileCategory::Binary;
1066 }
1067
1068 if is_dependency_manifest(&file.path) {
1069 return ComposeFileCategory::Dependency;
1070 }
1071
1072 let path = file.path.to_ascii_lowercase();
1073 let file_name = Path::new(&path)
1074 .file_name()
1075 .and_then(|name| name.to_str())
1076 .unwrap_or_default();
1077 let extension = Path::new(&path)
1078 .extension()
1079 .and_then(|ext| ext.to_str())
1080 .unwrap_or_default();
1081
1082 if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1083 return ComposeFileCategory::Docs;
1084 }
1085
1086 if path.contains("/tests/")
1087 || path.starts_with("tests/")
1088 || file_name.contains("test")
1089 || file_name.contains("spec")
1090 {
1091 return ComposeFileCategory::Test;
1092 }
1093
1094 if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1095 return ComposeFileCategory::Config;
1096 }
1097
1098 if matches!(
1099 extension,
1100 "rs"
1101 | "py"
1102 | "js"
1103 | "jsx"
1104 | "ts"
1105 | "tsx"
1106 | "go"
1107 | "java"
1108 | "kt"
1109 | "c"
1110 | "cc"
1111 | "cpp"
1112 | "h"
1113 | "hpp"
1114 | "cs"
1115 | "rb"
1116 | "php"
1117 | "swift"
1118 | "scala"
1119 | "m"
1120 | "mm"
1121 ) {
1122 return ComposeFileCategory::Source;
1123 }
1124
1125 ComposeFileCategory::Other
1126}
1127
1128fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1129 left
1130 .split('/')
1131 .zip(right.split('/'))
1132 .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1133 .count()
1134}
1135
1136fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1137 let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1138
1139 if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1140 score += 40;
1141 }
1142
1143 if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1144 score += 12;
1145 }
1146
1147 if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1148 score += 18;
1149 }
1150
1151 score
1152}
1153
1154fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1155 match (compose_file_category(file), group.commit_type.as_str()) {
1156 (ComposeFileCategory::Docs, "docs") => 25,
1157 (ComposeFileCategory::Test, "test") => 25,
1158 (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1159 (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1160 (ComposeFileCategory::Source, "feat" | "fix" | "refactor" | "perf") => 10,
1161 _ => 0,
1162 }
1163}
1164
1165fn best_group_for_missing_file(
1166 snapshot: &ComposeSnapshot,
1167 groups: &[ComposeIntentGroup],
1168 missing_file: &ComposeFile,
1169) -> usize {
1170 let mut best_group_idx = 0;
1171 let mut best_score = i32::MIN;
1172 let mut best_group_size = usize::MAX;
1173
1174 for (group_idx, group) in groups.iter().enumerate() {
1175 let similarity = group
1176 .file_ids
1177 .iter()
1178 .filter_map(|file_id| snapshot.file_by_id(file_id))
1179 .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1180 .max()
1181 .unwrap_or_default();
1182 let score = similarity + group_type_bonus(missing_file, group);
1183 let group_size = group.file_ids.len();
1184
1185 if score > best_score || (score == best_score && group_size < best_group_size) {
1186 best_group_idx = group_idx;
1187 best_score = score;
1188 best_group_size = group_size;
1189 }
1190 }
1191
1192 best_group_idx
1193}
1194
1195fn normalize_dependency_reference(
1196 raw_dependency: &str,
1197 known_group_ids: &HashSet<String>,
1198) -> Option<String> {
1199 let normalized = normalize_file_reference(raw_dependency);
1200 if normalized.is_empty() {
1201 return None;
1202 }
1203
1204 if known_group_ids.contains(&normalized) {
1205 return Some(normalized);
1206 }
1207
1208 let uppercase = normalized.to_ascii_uppercase();
1209 if known_group_ids.contains(&uppercase) {
1210 return Some(uppercase);
1211 }
1212
1213 let candidate = extract_group_id_candidate(&normalized)?;
1214 known_group_ids.contains(&candidate).then_some(candidate)
1215}
1216
1217fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1218 let label = target.label.to_ascii_lowercase();
1219 let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1220 let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1221
1222 if let Some(scope) = &group.scope {
1223 let scope = scope.as_str().to_ascii_lowercase();
1224 if label.contains(&scope) || workstream.contains(&scope) {
1225 score += 140;
1226 }
1227
1228 for segment in scope.split('/') {
1229 if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1230 score += 45;
1231 }
1232 }
1233 }
1234
1235 for token in planning_text_tokens(&group.rationale) {
1236 if label.contains(&token) || workstream.contains(&token) {
1237 score += 16;
1238 }
1239 }
1240
1241 match group.commit_type.as_str() {
1242 "ci" if target.label.starts_with(".github/") => score += 120,
1243 "docs"
1244 if target.label.starts_with("docs/")
1245 || Path::new(&target.label)
1246 .extension()
1247 .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1248 {
1249 score += 80;
1250 },
1251 "build" | "chore"
1252 if target.label.contains("Cargo")
1253 || target.label.contains("package")
1254 || target.label.contains("lock")
1255 || target.label.contains("tsconfig")
1256 || target.label.contains("biome")
1257 || target.label.contains("bun") =>
1258 {
1259 score += 55;
1260 },
1261 _ => {},
1262 }
1263
1264 score
1265}
1266
1267fn seed_group_targets(
1268 groups: &[ComposeIntentGroup],
1269 planning_index: &PlanningIndex,
1270 group_targets: &mut [Vec<String>],
1271 repair_notes: &mut Vec<String>,
1272) {
1273 let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1274
1275 for (group_idx, group) in groups.iter().enumerate() {
1276 if !group_targets[group_idx].is_empty() {
1277 continue;
1278 }
1279
1280 let fallback_target = planning_index
1281 .targets
1282 .iter()
1283 .max_by_key(|target| {
1284 let mut score = planning_target_match_score(target, group);
1285 if !claimed_target_ids.contains(&target.target_id) {
1286 score += 60;
1287 }
1288 (score, target.hunk_count, target.file_ids.len())
1289 })
1290 .or_else(|| planning_index.targets.first());
1291
1292 let Some(fallback_target) = fallback_target else {
1293 continue;
1294 };
1295
1296 group_targets[group_idx].push(fallback_target.target_id.clone());
1297 claimed_target_ids.insert(fallback_target.target_id.clone());
1298 repair_notes.push(format!(
1299 "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1300 group.group_id, fallback_target.target_id, fallback_target.label
1301 ));
1302 }
1303}
1304
1305fn normalize_intent_plan(
1306 snapshot: &ComposeSnapshot,
1307 planning_index: &PlanningIndex,
1308 mut groups: Vec<ComposeIntentGroup>,
1309) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1310 if groups.is_empty() {
1311 return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1312 }
1313
1314 let known_target_ids: HashSet<&str> = planning_index
1315 .targets
1316 .iter()
1317 .map(|target| target.target_id.as_str())
1318 .collect();
1319 let mut repair_notes = Vec::new();
1320 let mut covered_file_ids = HashSet::new();
1321 let mut normalized_group_targets = Vec::with_capacity(groups.len());
1322
1323 for group in &groups {
1324 if group.file_ids.is_empty() {
1325 repair_notes.push(format!(
1326 "Compose planner left {} without planning targets; assigning targets heuristically",
1327 group.group_id
1328 ));
1329 }
1330
1331 let mut normalized_target_ids = Vec::new();
1332 let mut seen_target_ids = HashSet::new();
1333 for raw_target_ref in &group.file_ids {
1334 let normalized_ref = normalize_file_reference(raw_target_ref);
1335 let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1336 normalized_ref.clone()
1337 } else {
1338 let uppercase_ref = normalized_ref.to_ascii_uppercase();
1339 if known_target_ids.contains(uppercase_ref.as_str()) {
1340 uppercase_ref
1341 } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1342 if raw_target_ref != target_id {
1343 repair_notes.push(format!(
1344 "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1345 ));
1346 }
1347 target_id.clone()
1348 } else {
1349 repair_notes.push(format!(
1350 "Dropped unknown planning target '{}' from {}",
1351 raw_target_ref, group.group_id
1352 ));
1353 continue;
1354 }
1355 };
1356
1357 if seen_target_ids.insert(canonical_target_id.clone()) {
1358 normalized_target_ids.push(canonical_target_id);
1359 }
1360 }
1361
1362 normalized_group_targets.push(normalized_target_ids);
1363 }
1364
1365 seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1366
1367 let known_group_ids: HashSet<String> =
1368 groups.iter().map(|group| group.group_id.clone()).collect();
1369 for group in &mut groups {
1370 let mut normalized_dependencies = Vec::new();
1371 let mut seen_dependencies = HashSet::new();
1372
1373 for raw_dependency in &group.dependencies {
1374 let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1375 else {
1376 repair_notes.push(format!(
1377 "Dropped unknown dependency '{}' from {}",
1378 raw_dependency, group.group_id
1379 ));
1380 continue;
1381 };
1382
1383 if dependency == group.group_id {
1384 repair_notes.push(format!(
1385 "Dropped self-dependency '{}' from {}",
1386 raw_dependency, group.group_id
1387 ));
1388 continue;
1389 }
1390
1391 if seen_dependencies.insert(dependency.clone()) {
1392 if raw_dependency != &dependency {
1393 repair_notes.push(format!(
1394 "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1395 ));
1396 }
1397 normalized_dependencies.push(dependency);
1398 }
1399 }
1400
1401 group.dependencies = normalized_dependencies;
1402 }
1403
1404 for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1405 let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1406 for file_id in &expanded_file_ids {
1407 covered_file_ids.insert(file_id.clone());
1408 }
1409 group.file_ids = expanded_file_ids;
1410 }
1411
1412 for file in &snapshot.files {
1413 if covered_file_ids.contains(file.file_id.as_str()) {
1414 continue;
1415 }
1416
1417 let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1418 let target_group = &mut groups[target_group_idx];
1419 target_group.file_ids.push(file.file_id.clone());
1420 covered_file_ids.insert(file.file_id.clone());
1421 repair_notes.push(format!(
1422 "Compose planner omitted {} ({}); assigned it to {}",
1423 file.file_id, file.path, target_group.group_id
1424 ));
1425 }
1426
1427 Ok((groups, repair_notes))
1428}
1429
1430fn workstream_key_for_label(label: &str) -> String {
1431 let segments: Vec<&str> = label
1432 .split('/')
1433 .filter(|segment| !segment.is_empty())
1434 .collect();
1435 let Some(first) = segments.first() else {
1436 return label.to_string();
1437 };
1438
1439 match *first {
1440 ".github" => match segments.get(1) {
1441 Some(second) => format!("{first}/{second}"),
1442 None => (*first).to_string(),
1443 },
1444 "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1445 Some(second) => format!("{first}/{second}"),
1446 None => (*first).to_string(),
1447 },
1448 _ => (*first).to_string(),
1449 }
1450}
1451
1452fn workstream_display_name(label: &str) -> String {
1453 let key = workstream_key_for_label(label);
1454 match key.as_str() {
1455 ".github/workflows" => "CI workflows".to_string(),
1456 ".github" => "GitHub automation".to_string(),
1457 _ => key
1458 .split('/')
1459 .next_back()
1460 .map(|segment| segment.replace(['_', '-'], " "))
1461 .unwrap_or(key),
1462 }
1463}
1464
1465fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1466 let mut out = String::new();
1467 let mut last_was_separator = false;
1468
1469 for ch in raw.trim().chars() {
1470 if ch.is_ascii_alphanumeric() {
1471 out.push(ch.to_ascii_lowercase());
1472 last_was_separator = false;
1473 } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1474 {
1475 out.push('-');
1476 last_was_separator = true;
1477 }
1478 }
1479
1480 let trimmed = out.trim_matches('-').to_string();
1481 (!trimmed.is_empty()).then_some(trimmed)
1482}
1483
1484fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1485 let key = workstream_key_for_label(label);
1486 let candidate = key
1487 .split('/')
1488 .next_back()
1489 .and_then(sanitize_scope_fragment)?;
1490 Scope::new(candidate).ok()
1491}
1492
1493fn fallback_rationale_for_labels(labels: &[String]) -> String {
1494 if labels.len() == 1 {
1495 let label = labels[0].as_str();
1496 let display = workstream_display_name(label);
1497 if label.starts_with("apps/") {
1498 return format!("{display} application updates");
1499 }
1500 if label.starts_with("packages/") {
1501 return format!("{display} package updates");
1502 }
1503 if label.starts_with("crates/") {
1504 return format!("{display} crate updates");
1505 }
1506 if label.starts_with(".github/") || label == ".github" {
1507 return format!("{display} updates");
1508 }
1509 return format!("{display} updates");
1510 }
1511
1512 let display_labels: Vec<String> = labels
1513 .iter()
1514 .take(3)
1515 .map(|label| workstream_display_name(label))
1516 .collect();
1517 format!("cross-cutting updates for {}", display_labels.join(", "))
1518}
1519
1520fn fallback_commit_type_for_group(
1521 snapshot: &ComposeSnapshot,
1522 labels: &[String],
1523 file_ids: &[String],
1524) -> Result<CommitType> {
1525 if labels
1526 .iter()
1527 .any(|label| label == ".github" || label.starts_with(".github/"))
1528 {
1529 return CommitType::new("ci");
1530 }
1531
1532 let files: Vec<&ComposeFile> = file_ids
1533 .iter()
1534 .filter_map(|file_id| snapshot.file_by_id(file_id))
1535 .collect();
1536 let all_docs = !files.is_empty()
1537 && files
1538 .iter()
1539 .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1540 if all_docs {
1541 return CommitType::new("docs");
1542 }
1543
1544 let all_tests = !files.is_empty()
1545 && files
1546 .iter()
1547 .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1548 if all_tests {
1549 return CommitType::new("test");
1550 }
1551
1552 let all_dependencies =
1553 !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1554 if all_dependencies {
1555 return CommitType::new("build");
1556 }
1557
1558 let all_config = !files.is_empty()
1559 && files.iter().all(|file| {
1560 matches!(
1561 compose_file_category(file),
1562 ComposeFileCategory::Config | ComposeFileCategory::Dependency
1563 )
1564 });
1565 if all_config {
1566 return CommitType::new("chore");
1567 }
1568
1569 CommitType::new("refactor")
1570}
1571
1572fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1573 snapshot
1574 .files
1575 .iter()
1576 .filter(|file| file_ids.contains(&file.file_id))
1577 .map(|file| file.file_id.clone())
1578 .collect()
1579}
1580
1581fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1582 if groups.is_empty() {
1583 return false;
1584 }
1585
1586 let largest_group = groups
1587 .iter()
1588 .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1589 .max()
1590 .unwrap_or_default();
1591
1592 groups.len() == 1
1593 || (groups.len() <= 2
1594 && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1595}
1596
1597fn should_force_large_patch_fallback(
1598 snapshot: &ComposeSnapshot,
1599 planning_index: &PlanningIndex,
1600 groups: &[ComposeIntentGroup],
1601 max_commits: usize,
1602) -> bool {
1603 if max_commits <= 1
1604 || planning_index.mode != PlanningMode::Area
1605 || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1606 || !is_monolithic_intent_plan(snapshot, groups)
1607 {
1608 return false;
1609 }
1610
1611 let workstream_count = planning_index
1612 .targets
1613 .iter()
1614 .map(|target| workstream_key_for_label(&target.label))
1615 .collect::<HashSet<_>>()
1616 .len();
1617
1618 workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1619}
1620
1621fn build_large_patch_fallback_groups(
1622 snapshot: &ComposeSnapshot,
1623 planning_index: &PlanningIndex,
1624 max_commits: usize,
1625) -> Result<Vec<ComposeIntentGroup>> {
1626 #[derive(Debug, Clone)]
1627 struct WorkstreamGroup {
1628 label: String,
1629 file_ids: HashSet<String>,
1630 weight: usize,
1631 }
1632
1633 #[derive(Debug, Clone)]
1634 struct FallbackBin {
1635 labels: Vec<String>,
1636 file_ids: HashSet<String>,
1637 total_weight: usize,
1638 }
1639
1640 let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1641 for target in &planning_index.targets {
1642 let key = workstream_key_for_label(&target.label);
1643 let entry = workstreams
1644 .entry(key.clone())
1645 .or_insert_with(|| WorkstreamGroup {
1646 label: key,
1647 file_ids: HashSet::new(),
1648 weight: 0,
1649 });
1650
1651 for file_id in &target.file_ids {
1652 entry.file_ids.insert(file_id.clone());
1653 }
1654 entry.weight = entry
1655 .weight
1656 .saturating_add(target.hunk_count.max(target.file_ids.len()));
1657 }
1658
1659 let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1660 workstreams.sort_by(|left, right| {
1661 right
1662 .weight
1663 .cmp(&left.weight)
1664 .then_with(|| left.label.cmp(&right.label))
1665 });
1666
1667 let bin_count = max_commits.min(workstreams.len());
1668 let mut bins: Vec<FallbackBin> = Vec::new();
1669 for workstream in workstreams {
1670 if bins.len() < bin_count {
1671 bins.push(FallbackBin {
1672 labels: vec![workstream.label],
1673 file_ids: workstream.file_ids,
1674 total_weight: workstream.weight,
1675 });
1676 continue;
1677 }
1678
1679 let Some((target_idx, _)) = bins
1680 .iter()
1681 .enumerate()
1682 .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1683 else {
1684 continue;
1685 };
1686
1687 let target_bin = &mut bins[target_idx];
1688 target_bin.labels.push(workstream.label);
1689 target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1690 target_bin.file_ids.extend(workstream.file_ids);
1691 }
1692
1693 let mut groups = Vec::new();
1694 for (idx, bin) in bins.into_iter().enumerate() {
1695 let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1696 let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1697 let scope = (bin.labels.len() == 1)
1698 .then(|| fallback_scope_for_label(&bin.labels[0]))
1699 .flatten();
1700 let rationale = fallback_rationale_for_labels(&bin.labels);
1701
1702 groups.push(ComposeIntentGroup {
1703 group_id: format!("G{}", idx + 1),
1704 commit_type,
1705 scope,
1706 file_ids: ordered_ids,
1707 rationale,
1708 dependencies: Vec::new(),
1709 });
1710 }
1711
1712 Ok(groups)
1713}
1714
1715async fn analyze_compose_intent(
1716 snapshot: &ComposeSnapshot,
1717 observations: &[FileObservation],
1718 config: &CommitConfig,
1719 max_commits: usize,
1720 debug_dir: Option<&Path>,
1721) -> Result<ComposeIntentPlan> {
1722 let planning_index = build_planning_index(snapshot);
1723 let stat_summary = render_planning_stat(&planning_index);
1724 let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1725 let planning_targets = render_planning_targets(&planning_index, snapshot);
1726 let planning_notes = render_planning_notes(&planning_index);
1727 let split_bias = render_split_bias(&planning_index);
1728 let schema = build_intent_schema(config);
1729 let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1730 variant: "default",
1731 max_commits,
1732 stat: &stat_summary,
1733 snapshot_summary: &snapshot_summary,
1734 planning_targets: &planning_targets,
1735 planning_notes: &planning_notes,
1736 split_bias: &split_bias,
1737 })?;
1738
1739 let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1740 operation: "compose/intent",
1741 model: &config.analysis_model,
1742 max_tokens: 3000,
1743 temperature: COMPOSE_PLANNER_TEMPERATURE,
1744 prompt_family: "compose-intent",
1745 prompt_variant: "default",
1746 system_prompt: &parts.system,
1747 user_prompt: &parts.user,
1748 tool_name: "create_compose_intent_plan",
1749 tool_description: "Plan logical commit groups over the provided planning target IDs",
1750 schema: &schema,
1751 debug: debug_dir.map(|dir| OneShotDebug {
1752 dir: Some(dir),
1753 prefix: None,
1754 name: "compose_intent",
1755 }),
1756 cacheable: true,
1757 })
1758 .await?;
1759
1760 let (mut groups, repair_notes) =
1761 normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1762 for note in &repair_notes {
1763 eprintln!("{}", style::warning(note));
1764 }
1765 if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1766 eprintln!(
1767 "{}",
1768 style::warning(
1769 "Compose intent collapsed into a monolithic large-patch group; falling back to \
1770 path-based workstream splits."
1771 )
1772 );
1773 groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1774 }
1775 let dependency_order =
1776 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1777
1778 Ok(ComposeIntentPlan { groups, dependency_order })
1779}
1780
1781fn should_collect_compose_observations(
1782 snapshot: &ComposeSnapshot,
1783 config: &CommitConfig,
1784 counter: &TokenCounter,
1785) -> bool {
1786 planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1787 && should_use_map_reduce(&snapshot.diff, config, counter)
1788}
1789
1790fn auto_assign_hunks(
1791 snapshot: &ComposeSnapshot,
1792 intent_plan: &ComposeIntentPlan,
1793) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1794 let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1795 for group in &intent_plan.groups {
1796 for file_id in &group.file_ids {
1797 groups_by_file
1798 .entry(file_id.as_str())
1799 .or_default()
1800 .push(group.group_id.as_str());
1801 }
1802 }
1803
1804 let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1805 .groups
1806 .iter()
1807 .map(|group| (group.group_id.clone(), BTreeSet::new()))
1808 .collect();
1809 let mut ambiguous = Vec::new();
1810
1811 for file in &snapshot.files {
1812 let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1813 return Err(CommitGenError::Other(format!(
1814 "No compose group claimed file {} ({})",
1815 file.file_id, file.path
1816 )));
1817 };
1818
1819 if candidate_group_ids.len() == 1 {
1820 let group_id = candidate_group_ids[0];
1821 let entry = assigned
1822 .get_mut(group_id)
1823 .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1824 for hunk_id in &file.hunk_ids {
1825 entry.insert(hunk_id.clone());
1826 }
1827 } else {
1828 ambiguous.push(AmbiguousFileBinding {
1829 file_id: file.file_id.clone(),
1830 path: file.path.clone(),
1831 candidate_group_ids: candidate_group_ids
1832 .iter()
1833 .map(|group_id| (*group_id).to_string())
1834 .collect(),
1835 hunk_ids: file.hunk_ids.clone(),
1836 });
1837 }
1838 }
1839
1840 Ok((assigned, ambiguous))
1841}
1842
1843fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1844 let mut out = String::new();
1845 for group in groups {
1846 let scope = group
1847 .scope
1848 .as_ref()
1849 .map(|scope| format!("({})", scope.as_str()))
1850 .unwrap_or_default();
1851 writeln!(
1852 out,
1853 "- {} [{}{}] {}",
1854 group.group_id,
1855 group.commit_type.as_str(),
1856 scope,
1857 group.rationale
1858 )
1859 .unwrap();
1860 }
1861
1862 out
1863}
1864
1865fn render_binding_ambiguous_files(
1866 snapshot: &ComposeSnapshot,
1867 ambiguous_files: &[AmbiguousFileBinding],
1868) -> String {
1869 let mut out = String::new();
1870 for ambiguous_file in ambiguous_files {
1871 writeln!(
1872 out,
1873 "- {} {} candidates: {}",
1874 ambiguous_file.file_id,
1875 ambiguous_file.path,
1876 ambiguous_file.candidate_group_ids.join(", ")
1877 )
1878 .unwrap();
1879
1880 for hunk_id in &ambiguous_file.hunk_ids {
1881 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1882 if hunk.synthetic {
1883 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1884 } else {
1885 writeln!(
1886 out,
1887 " - {} old:{} new:{} :: {}",
1888 hunk.hunk_id,
1889 format_line_range(hunk.old_start, hunk.old_count),
1890 format_line_range(hunk.new_start, hunk.new_count),
1891 hunk.snippet
1892 )
1893 .unwrap();
1894 }
1895 }
1896 }
1897 }
1898
1899 out
1900}
1901
1902async fn request_binding(
1903 snapshot: &ComposeSnapshot,
1904 groups: &[ComposeIntentGroup],
1905 ambiguous_files: &[AmbiguousFileBinding],
1906 config: &CommitConfig,
1907 debug_dir: Option<&Path>,
1908 debug_name: &str,
1909) -> Result<Vec<ComposeBindingAssignment>> {
1910 let schema = build_binding_schema();
1911 let groups_text = render_binding_groups(groups);
1912 let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1913 let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1914 variant: "default",
1915 groups: &groups_text,
1916 ambiguous_files: &ambiguous_files_text,
1917 })?;
1918 let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1919 operation: "compose/bind",
1920 model: &config.analysis_model,
1921 max_tokens: 2500,
1922 temperature: COMPOSE_PLANNER_TEMPERATURE,
1923 prompt_family: "compose-bind",
1924 prompt_variant: "default",
1925 system_prompt: &parts.system,
1926 user_prompt: &parts.user,
1927 tool_name: "bind_compose_hunks",
1928 tool_description: "Assign hunk IDs to existing compose groups",
1929 schema: &schema,
1930 debug: debug_dir.map(|dir| OneShotDebug {
1931 dir: Some(dir),
1932 prefix: None,
1933 name: debug_name,
1934 }),
1935 cacheable: true,
1936 })
1937 .await?;
1938
1939 Ok(response.output.assignments)
1940}
1941
1942fn ambiguous_hunk_context(
1943 ambiguous_files: &[AmbiguousFileBinding],
1944) -> HashMap<String, AmbiguousHunkContext> {
1945 let mut context = HashMap::new();
1946 for ambiguous_file in ambiguous_files {
1947 for hunk_id in &ambiguous_file.hunk_ids {
1948 context.insert(hunk_id.clone(), AmbiguousHunkContext {
1949 candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
1950 });
1951 }
1952 }
1953 context
1954}
1955
1956fn evaluate_binding(
1957 assignments: &[ComposeBindingAssignment],
1958 hunk_context: &HashMap<String, AmbiguousHunkContext>,
1959 valid_group_ids: &HashSet<&str>,
1960 snapshot: &ComposeSnapshot,
1961) -> BindingEvaluation {
1962 let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
1963
1964 for assignment in assignments {
1965 if !valid_group_ids.contains(assignment.group_id.as_str()) {
1966 continue;
1967 }
1968
1969 let mut seen_in_group = HashSet::new();
1970 for hunk_id in &assignment.hunk_ids {
1971 if !seen_in_group.insert(hunk_id.as_str()) {
1972 continue;
1973 }
1974
1975 let Some(context) = hunk_context.get(hunk_id) else {
1976 continue;
1977 };
1978
1979 if !context
1980 .candidate_group_ids
1981 .iter()
1982 .any(|candidate| candidate == &assignment.group_id)
1983 {
1984 continue;
1985 }
1986
1987 match assigned_hunk_to_group.get(hunk_id) {
1988 None => {
1989 assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
1990 },
1991 Some(existing_group) if existing_group == &assignment.group_id => {},
1992 Some(_) => {
1993 assigned_hunk_to_group.remove(hunk_id);
1994 },
1995 }
1996 }
1997 }
1998
1999 let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2000 for (hunk_id, group_id) in assigned_hunk_to_group {
2001 assigned_by_group.entry(group_id).or_default().push(hunk_id);
2002 }
2003
2004 for hunk_ids in assigned_by_group.values_mut() {
2005 let ordered: Vec<String> = snapshot
2006 .hunks
2007 .iter()
2008 .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2009 .map(|hunk| hunk.hunk_id.clone())
2010 .collect();
2011 *hunk_ids = ordered;
2012 }
2013
2014 let unresolved = snapshot
2015 .hunks
2016 .iter()
2017 .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2018 .filter(|hunk| {
2019 !assigned_by_group.values().any(|assigned_hunks| {
2020 assigned_hunks
2021 .iter()
2022 .any(|assigned| assigned == &hunk.hunk_id)
2023 })
2024 })
2025 .map(|hunk| hunk.hunk_id.clone())
2026 .collect();
2027
2028 BindingEvaluation { assigned: assigned_by_group, unresolved }
2029}
2030
2031fn filter_ambiguous_files(
2032 ambiguous_files: &[AmbiguousFileBinding],
2033 hunk_ids: &[String],
2034) -> Vec<AmbiguousFileBinding> {
2035 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2036
2037 ambiguous_files
2038 .iter()
2039 .filter_map(|file| {
2040 let matching_hunks: Vec<String> = file
2041 .hunk_ids
2042 .iter()
2043 .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2044 .cloned()
2045 .collect();
2046
2047 (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2048 file_id: file.file_id.clone(),
2049 path: file.path.clone(),
2050 candidate_group_ids: file.candidate_group_ids.clone(),
2051 hunk_ids: matching_hunks,
2052 })
2053 })
2054 .collect()
2055}
2056
2057fn chunk_ambiguous_files(
2058 ambiguous_files: &[AmbiguousFileBinding],
2059) -> Vec<Vec<AmbiguousFileBinding>> {
2060 if ambiguous_files.is_empty() {
2061 return Vec::new();
2062 }
2063
2064 let mut batches = Vec::new();
2065 let mut current_batch = Vec::new();
2066 let mut current_hunk_count = 0_usize;
2067
2068 for file in ambiguous_files {
2069 let file_hunk_count = file.hunk_ids.len();
2070 let should_split = !current_batch.is_empty()
2071 && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2072 || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2073
2074 if should_split {
2075 batches.push(current_batch);
2076 current_batch = Vec::new();
2077 current_hunk_count = 0;
2078 }
2079
2080 current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2081 current_batch.push(file.clone());
2082 }
2083
2084 if !current_batch.is_empty() {
2085 batches.push(current_batch);
2086 }
2087
2088 batches
2089}
2090
2091fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2092 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2093
2094 snapshot
2095 .hunks
2096 .iter()
2097 .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2098 .map(|hunk| hunk.hunk_id.clone())
2099 .collect()
2100}
2101
2102fn fallback_group_for_hunk(
2103 hunk_id: &str,
2104 ambiguous_files: &[AmbiguousFileBinding],
2105 group_rank: &HashMap<&str, usize>,
2106) -> Option<String> {
2107 ambiguous_files.iter().find_map(|file| {
2108 file
2109 .hunk_ids
2110 .iter()
2111 .any(|candidate| candidate == hunk_id)
2112 .then(|| {
2113 file
2114 .candidate_group_ids
2115 .iter()
2116 .min_by_key(|group_id| {
2117 group_rank
2118 .get(group_id.as_str())
2119 .copied()
2120 .unwrap_or(usize::MAX)
2121 })
2122 .cloned()
2123 })
2124 })?
2125}
2126
2127fn assign_unresolved_hunks(
2128 unresolved_hunks: &[String],
2129 assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2130 ambiguous_files: &[AmbiguousFileBinding],
2131 group_rank: &HashMap<&str, usize>,
2132) {
2133 for hunk_id in unresolved_hunks {
2134 if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2135 && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2136 {
2137 group_hunks.insert(hunk_id.clone());
2138 }
2139 }
2140}
2141
2142fn normalize_group_type(
2143 snapshot: &ComposeSnapshot,
2144 file_ids: &[String],
2145 original_type: &CommitType,
2146) -> Result<CommitType> {
2147 let dependency_only = !file_ids.is_empty()
2148 && file_ids.iter().all(|file_id| {
2149 snapshot
2150 .file_by_id(file_id)
2151 .is_some_and(|file| is_dependency_manifest(&file.path))
2152 });
2153
2154 if dependency_only && original_type.as_str() != "build" {
2155 CommitType::new("build")
2156 } else {
2157 Ok(original_type.clone())
2158 }
2159}
2160
2161fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2162 snapshot
2163 .files
2164 .iter()
2165 .filter(|file| {
2166 hunk_ids
2167 .iter()
2168 .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2169 })
2170 .map(|file| file.file_id.clone())
2171 .collect()
2172}
2173
2174fn build_redirects(
2175 intent_plan: &ComposeIntentPlan,
2176 executable_groups: &[ComposeExecutableGroup],
2177 group_rank: &HashMap<&str, usize>,
2178) -> HashMap<String, String> {
2179 let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2180 .iter()
2181 .filter(|group| !group.hunk_ids.is_empty())
2182 .map(|group| (group.group_id.as_str(), group))
2183 .collect();
2184
2185 let mut redirects = HashMap::new();
2186 for group in &intent_plan.groups {
2187 if surviving_groups.contains_key(group.group_id.as_str()) {
2188 continue;
2189 }
2190
2191 let redirect = executable_groups
2192 .iter()
2193 .filter(|candidate| candidate.group_id != group.group_id)
2194 .filter(|candidate| {
2195 candidate.file_ids.iter().any(|file_id| {
2196 group
2197 .file_ids
2198 .iter()
2199 .any(|candidate_id| candidate_id == file_id)
2200 })
2201 })
2202 .min_by_key(|candidate| {
2203 group_rank
2204 .get(candidate.group_id.as_str())
2205 .copied()
2206 .unwrap_or(usize::MAX)
2207 })
2208 .map(|candidate| candidate.group_id.clone());
2209
2210 if let Some(redirect) = redirect {
2211 redirects.insert(group.group_id.clone(), redirect);
2212 }
2213 }
2214
2215 redirects
2216}
2217
2218fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2219 let mut current = group_id.to_string();
2220 let mut seen = HashSet::new();
2221
2222 while let Some(next) = redirects.get(¤t) {
2223 if !seen.insert(current.clone()) {
2224 break;
2225 }
2226 current.clone_from(next);
2227 }
2228
2229 current
2230}
2231
2232fn prune_empty_groups(
2233 groups: Vec<ComposeExecutableGroup>,
2234 redirects: &HashMap<String, String>,
2235) -> Result<ComposeExecutablePlan> {
2236 let surviving_ids: HashSet<String> = groups
2237 .iter()
2238 .filter(|group| !group.hunk_ids.is_empty())
2239 .map(|group| group.group_id.clone())
2240 .collect();
2241
2242 let mut surviving_groups = Vec::new();
2243 for mut group in groups {
2244 if group.hunk_ids.is_empty() {
2245 continue;
2246 }
2247
2248 let mut rewritten_dependencies = Vec::new();
2249 for dependency in &group.dependencies {
2250 let rewritten = resolve_redirect(dependency, redirects);
2251 if rewritten != group.group_id
2252 && surviving_ids.contains(&rewritten)
2253 && !rewritten_dependencies
2254 .iter()
2255 .any(|existing| existing == &rewritten)
2256 {
2257 rewritten_dependencies.push(rewritten);
2258 }
2259 }
2260
2261 group.dependencies = rewritten_dependencies;
2262 surviving_groups.push(group);
2263 }
2264
2265 let dependency_order = compute_dependency_order(
2266 &surviving_groups,
2267 |group| &group.group_id,
2268 |group| &group.dependencies,
2269 )?;
2270 Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2271}
2272
2273fn finalize_executable_plan(
2274 snapshot: &ComposeSnapshot,
2275 intent_plan: &ComposeIntentPlan,
2276 assigned_by_group: HashMap<String, BTreeSet<String>>,
2277) -> Result<ComposeExecutablePlan> {
2278 let group_rank: HashMap<&str, usize> = intent_plan
2279 .dependency_order
2280 .iter()
2281 .enumerate()
2282 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2283 .collect();
2284
2285 let mut executable_groups = Vec::new();
2286 for group in &intent_plan.groups {
2287 let hunk_ids: Vec<String> = snapshot
2288 .hunks
2289 .iter()
2290 .filter(|hunk| {
2291 assigned_by_group
2292 .get(&group.group_id)
2293 .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2294 })
2295 .map(|hunk| hunk.hunk_id.clone())
2296 .collect();
2297
2298 let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2299 let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2300 executable_groups.push(ComposeExecutableGroup {
2301 group_id: group.group_id.clone(),
2302 commit_type,
2303 scope: group.scope.clone(),
2304 file_ids,
2305 rationale: group.rationale.clone(),
2306 dependencies: group.dependencies.clone(),
2307 hunk_ids,
2308 });
2309 }
2310
2311 let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2312 prune_empty_groups(executable_groups, &redirects)
2313}
2314
2315fn validate_executable_plan(
2316 snapshot: &ComposeSnapshot,
2317 plan: &ComposeExecutablePlan,
2318) -> Result<()> {
2319 if plan.groups.is_empty() {
2320 return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2321 }
2322
2323 let known_hunks: HashSet<&str> = snapshot
2324 .hunks
2325 .iter()
2326 .map(|hunk| hunk.hunk_id.as_str())
2327 .collect();
2328 let known_files: HashSet<&str> = snapshot
2329 .files
2330 .iter()
2331 .map(|file| file.file_id.as_str())
2332 .collect();
2333 let mut coverage = HashMap::<String, String>::new();
2334
2335 for group in &plan.groups {
2336 if group.hunk_ids.is_empty() {
2337 return Err(CommitGenError::Other(format!(
2338 "Compose group {} ended up empty after binding",
2339 group.group_id
2340 )));
2341 }
2342
2343 for file_id in &group.file_ids {
2344 if !known_files.contains(file_id.as_str()) {
2345 return Err(CommitGenError::Other(format!(
2346 "Compose group {} references unknown file_id {}",
2347 group.group_id, file_id
2348 )));
2349 }
2350 }
2351
2352 for hunk_id in &group.hunk_ids {
2353 if !known_hunks.contains(hunk_id.as_str()) {
2354 return Err(CommitGenError::Other(format!(
2355 "Compose group {} references unknown hunk_id {}",
2356 group.group_id, hunk_id
2357 )));
2358 }
2359
2360 if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2361 return Err(CommitGenError::Other(format!(
2362 "Hunk {} was assigned to both {} and {}",
2363 hunk_id, existing_group, group.group_id
2364 )));
2365 }
2366 }
2367 }
2368
2369 let missing_hunks: Vec<String> = snapshot
2370 .hunks
2371 .iter()
2372 .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2373 .map(|hunk| hunk.hunk_id.clone())
2374 .collect();
2375 if !missing_hunks.is_empty() {
2376 return Err(CommitGenError::Other(format!(
2377 "Compose plan left hunks unassigned: {}",
2378 missing_hunks.join(", ")
2379 )));
2380 }
2381
2382 let dependency_order =
2383 compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2384 if dependency_order != plan.dependency_order {
2385 return Err(CommitGenError::Other(
2386 "Compose dependency order does not match recomputed order".to_string(),
2387 ));
2388 }
2389
2390 Ok(())
2391}
2392
2393async fn bind_compose_plan(
2394 snapshot: &ComposeSnapshot,
2395 intent_plan: &ComposeIntentPlan,
2396 config: &CommitConfig,
2397 debug_dir: Option<&Path>,
2398) -> Result<ComposeExecutablePlan> {
2399 let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2400
2401 if !ambiguous_files.is_empty() {
2402 let valid_group_ids: HashSet<&str> = intent_plan
2403 .groups
2404 .iter()
2405 .map(|group| group.group_id.as_str())
2406 .collect();
2407 let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2408 let mut unresolved = Vec::new();
2409
2410 for (batch_idx, batch) in binding_batches.iter().enumerate() {
2411 let hunk_context = ambiguous_hunk_context(batch);
2412 let debug_name = if binding_batches.len() == 1 {
2413 "compose_bind".to_string()
2414 } else {
2415 format!("compose_bind_{:02}", batch_idx + 1)
2416 };
2417 let assignments =
2418 request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2419 .await?;
2420 let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2421 for (group_id, hunk_ids) in evaluation.assigned {
2422 let entry = assigned_by_group.entry(group_id).or_default();
2423 for hunk_id in hunk_ids {
2424 entry.insert(hunk_id);
2425 }
2426 }
2427 unresolved.extend(evaluation.unresolved);
2428 }
2429
2430 let group_rank: HashMap<&str, usize> = intent_plan
2431 .dependency_order
2432 .iter()
2433 .enumerate()
2434 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2435 .collect();
2436
2437 let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2438 if !unresolved.is_empty() {
2439 let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2440 let repair_batches = chunk_ambiguous_files(&unresolved_files);
2441 let mut repair_unresolved = Vec::new();
2442
2443 for (batch_idx, batch) in repair_batches.iter().enumerate() {
2444 let debug_name = if repair_batches.len() == 1 {
2445 "compose_bind_repair".to_string()
2446 } else {
2447 format!("compose_bind_repair_{:02}", batch_idx + 1)
2448 };
2449 let repair_assignments = request_binding(
2450 snapshot,
2451 &intent_plan.groups,
2452 batch,
2453 config,
2454 debug_dir,
2455 &debug_name,
2456 )
2457 .await?;
2458 let repair_context = ambiguous_hunk_context(batch);
2459 let repair =
2460 evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2461 for (group_id, hunk_ids) in repair.assigned {
2462 let entry = assigned_by_group.entry(group_id).or_default();
2463 for hunk_id in hunk_ids {
2464 entry.insert(hunk_id);
2465 }
2466 }
2467
2468 repair_unresolved.extend(repair.unresolved);
2469 }
2470 unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2471
2472 if !unresolved.is_empty() {
2473 assign_unresolved_hunks(
2474 &unresolved,
2475 &mut assigned_by_group,
2476 &ambiguous_files,
2477 &group_rank,
2478 );
2479 }
2480 }
2481 }
2482
2483 let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2484 validate_executable_plan(snapshot, &plan)?;
2485 Ok(plan)
2486}
2487
2488fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2489 println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2490 for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2491 let group = &plan.groups[group_idx];
2492 let scope = group
2493 .scope
2494 .as_ref()
2495 .map(|scope| format!("({})", style::scope(scope.as_str())))
2496 .unwrap_or_default();
2497
2498 println!(
2499 "\n{}. {} [{}{}] {}",
2500 display_idx + 1,
2501 style::bold(&group.group_id),
2502 style::commit_type(group.commit_type.as_str()),
2503 scope,
2504 group.rationale
2505 );
2506
2507 println!(" Files:");
2508 for file_id in &group.file_ids {
2509 if let Some(file) = snapshot.file_by_id(file_id) {
2510 let selected_hunk_ids: Vec<&str> = group
2511 .hunk_ids
2512 .iter()
2513 .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2514 .map(String::as_str)
2515 .collect();
2516 let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2517 "all hunks".to_string()
2518 } else {
2519 selected_hunk_ids.join(", ")
2520 };
2521 println!(" - {} {} ({selection})", file.file_id, file.path);
2522 }
2523 }
2524
2525 if !group.dependencies.is_empty() {
2526 println!(" Depends on: {}", group.dependencies.join(", "));
2527 }
2528 }
2529}
2530
2531pub async fn execute_compose(
2532 snapshot: &ComposeSnapshot,
2533 plan: &ComposeExecutablePlan,
2534 config: &CommitConfig,
2535 args: &Args,
2536) -> Result<Vec<String>> {
2537 let dir = &args.dir;
2538 let mut commit_hashes = Vec::new();
2539 let total = plan.dependency_order.len();
2540
2541 println!("{}", style::info("Resetting staging area..."));
2542 reset_staging(dir)?;
2543
2544 let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2548 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2549 let group = &plan.groups[group_idx];
2550 println!(
2551 " {}",
2552 style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total,))
2553 );
2554 let group_patch = create_executable_group_patch(snapshot, group)?;
2555 group_diff_stats.push((group_patch.diff, group_patch.stat));
2556 }
2557
2558 println!(
2562 "{}",
2563 style::info(&format!(
2564 "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2565 COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2566 ))
2567 );
2568
2569 let prepared_messages: Vec<(Vec<String>, crate::types::CommitSummary)> =
2570 stream::iter(plan.dependency_order.iter().enumerate())
2571 .map(|(idx, &group_idx)| {
2572 let group = &plan.groups[group_idx];
2573 let (diff, stat) = &group_diff_stats[idx];
2574 let debug_prefix = format!("compose-{}", idx + 1);
2575 async move {
2576 let ctx = AnalysisContext {
2577 user_context: Some(&group.rationale),
2578 recent_commits: None,
2579 common_scopes: None,
2580 project_context: None,
2581 debug_output: args.debug_output.as_deref(),
2582 debug_prefix: Some(&debug_prefix),
2583 };
2584 let analysis = generate_conventional_analysis(
2585 stat,
2586 diff,
2587 &config.analysis_model,
2588 "",
2589 &ctx,
2590 config,
2591 )
2592 .await?;
2593 let body = analysis.body_texts();
2594 let summary = generate_summary_from_analysis(
2595 stat,
2596 group.commit_type.as_str(),
2597 group.scope.as_ref().map(|scope| scope.as_str()),
2598 &body,
2599 Some(&group.rationale),
2600 config,
2601 args.debug_output.as_deref(),
2602 Some(&debug_prefix),
2603 )
2604 .await?;
2605 Ok::<_, CommitGenError>((body, summary))
2606 }
2607 })
2608 .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2609 .collect::<Vec<_>>()
2610 .await
2611 .into_iter()
2612 .collect::<Result<Vec<_>>>()?;
2613
2614 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2617 let group = &plan.groups[group_idx];
2618
2619 println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2620 println!(" Type: {}", style::commit_type(group.commit_type.as_str()));
2621 if let Some(scope) = &group.scope {
2622 println!(" Scope: {}", style::scope(scope.as_str()));
2623 }
2624 let paths: Vec<String> = group
2625 .file_ids
2626 .iter()
2627 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2628 .collect();
2629 println!(" Files: {}", paths.join(", "));
2630
2631 let stage_result = stage_executable_group(snapshot, group, dir)?;
2632 if stage_result != StageResult::Staged {
2633 eprintln!(
2634 " {}",
2635 style::warning(&format!(
2636 "Skipping {} because its planned patch is already applied ({stage_result:?})",
2637 group.group_id
2638 ))
2639 );
2640 continue;
2641 }
2642
2643 let (analysis_body, summary) = prepared_messages[idx].clone();
2644 let mut commit = ConventionalCommit {
2645 commit_type: group.commit_type.clone(),
2646 scope: group.scope.clone(),
2647 summary,
2648 body: analysis_body,
2649 footers: vec![],
2650 };
2651 post_process_commit_message(&mut commit, config);
2652
2653 if let Err(err) = validate_commit_message(&commit, config) {
2654 eprintln!(
2655 " {}",
2656 style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2657 );
2658 }
2659
2660 let formatted_message = format_commit_message(&commit);
2661 println!(
2662 " Message:\n{}",
2663 formatted_message
2664 .lines()
2665 .take(3)
2666 .collect::<Vec<_>>()
2667 .join("\n")
2668 );
2669
2670 if !args.compose_preview {
2671 let sign = args.sign || config.gpg_sign;
2672 let signoff = args.signoff || config.signoff;
2673 git_commit(&formatted_message, false, dir, sign, signoff, args.skip_hooks, false)?;
2674 let hash = get_head_hash(dir)?;
2675 commit_hashes.push(hash);
2676
2677 if args.compose_test_after_each {
2678 println!(" {}", style::info("Running tests..."));
2679 let status = std::process::Command::new("cargo")
2680 .arg("test")
2681 .current_dir(dir)
2682 .status();
2683
2684 if let Ok(status) = status {
2685 if !status.success() {
2686 return Err(CommitGenError::Other(format!(
2687 "Tests failed after commit {} ({})",
2688 idx + 1,
2689 group.group_id
2690 )));
2691 }
2692 println!(" {}", style::success(&format!("{} Tests passed", style::icons::SUCCESS)));
2693 }
2694 }
2695 }
2696 }
2697
2698 Ok(commit_hashes)
2699}
2700
2701pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2702 let max_rounds = config.compose_max_rounds;
2703
2704 for round in 1..=max_rounds {
2705 if round > 1 {
2706 println!(
2707 "\n{}",
2708 style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2709 );
2710 } else {
2711 println!("{}", style::section_header("Compose Mode", 80));
2712 }
2713 println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2714
2715 run_compose_round(args, config, round).await?;
2716
2717 if args.compose_preview {
2718 break;
2719 }
2720
2721 match get_compose_diff(&args.dir) {
2722 Err(CommitGenError::NoChanges { .. }) => {
2723 println!(
2724 "\n{}",
2725 style::success(&format!(
2726 "{} All changes committed successfully",
2727 style::icons::SUCCESS
2728 ))
2729 );
2730 break;
2731 },
2732 Err(err) => return Err(err),
2733 Ok(remaining_diff) => {
2734 eprintln!(
2735 "\n{}",
2736 style::warning(&format!(
2737 "{} Uncommitted changes remain after round {round}",
2738 style::icons::WARNING
2739 ))
2740 );
2741 eprintln!("{remaining_diff}");
2742 },
2743 }
2744
2745 if round < max_rounds {
2746 eprintln!("{}", style::info("Starting another compose round..."));
2747 } else {
2748 eprintln!(
2749 "{}",
2750 style::warning(&format!(
2751 "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
2752 ))
2753 );
2754 }
2755 }
2756
2757 Ok(())
2758}
2759
2760async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
2761 let diff = get_compose_diff(&args.dir)?;
2762 let stat = get_compose_stat(&args.dir)?;
2763 let snapshot = build_compose_snapshot(&diff, &stat)?;
2764
2765 if let Some(debug_dir) = args.debug_output.as_deref() {
2766 save_debug_artifact(
2767 Some(debug_dir),
2768 &format!("compose_round_{round}_snapshot.json"),
2769 &snapshot,
2770 )?;
2771 }
2772
2773 let token_counter = create_token_counter(config);
2774 let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
2775 println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
2776 observe_diff_files(&snapshot.diff, &config.analysis_model, config, &token_counter).await?
2777 } else {
2778 if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
2779 && should_use_map_reduce(&snapshot.diff, config, &token_counter)
2780 {
2781 println!(
2782 "{}",
2783 style::info(
2784 "Skipping per-file observations for very large compose snapshot; using area-level \
2785 planning instead."
2786 )
2787 );
2788 }
2789 Vec::new()
2790 };
2791
2792 if let Some(debug_dir) = args.debug_output.as_deref()
2793 && !observations.is_empty()
2794 {
2795 save_debug_artifact(
2796 Some(debug_dir),
2797 &format!("compose_round_{round}_observations.json"),
2798 &observations,
2799 )?;
2800 }
2801
2802 let max_commits = args.compose_max_commits.unwrap_or(20);
2803 let executable_plan = if let Some(cached_plan) =
2804 load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
2805 {
2806 println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
2807 cached_plan
2808 } else {
2809 println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
2810 let intent_plan = analyze_compose_intent(
2811 &snapshot,
2812 &observations,
2813 config,
2814 max_commits,
2815 args.debug_output.as_deref(),
2816 )
2817 .await?;
2818
2819 if let Some(debug_dir) = args.debug_output.as_deref() {
2820 save_debug_artifact(
2821 Some(debug_dir),
2822 &format!("compose_round_{round}_intent_plan.json"),
2823 &intent_plan,
2824 )?;
2825 }
2826
2827 println!("{}", style::info("Binding hunks to groups..."));
2828 let plan =
2829 bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
2830 save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
2831 plan
2832 };
2833
2834 if let Some(debug_dir) = args.debug_output.as_deref() {
2835 save_debug_artifact(
2836 Some(debug_dir),
2837 &format!("compose_round_{round}_executable_plan.json"),
2838 &executable_plan,
2839 )?;
2840 }
2841
2842 print_executable_plan(&snapshot, &executable_plan);
2843
2844 if args.compose_preview {
2845 println!(
2846 "\n{}",
2847 style::success(&format!(
2848 "{} Preview complete (use --compose without --compose-preview to execute)",
2849 style::icons::SUCCESS
2850 ))
2851 );
2852 return Ok(());
2853 }
2854
2855 println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
2856 let hashes = execute_compose(&snapshot, &executable_plan, config, args).await?;
2857 println!(
2858 "{}",
2859 style::success(&format!(
2860 "{} Round {round}: Created {} commit(s)",
2861 style::icons::SUCCESS,
2862 hashes.len()
2863 ))
2864 );
2865 Ok(())
2866}
2867
2868#[cfg(test)]
2869mod tests {
2870 use std::fmt::Write;
2871
2872 use super::*;
2873 use crate::{config::CommitConfig, patch::build_compose_snapshot, types::CommitType};
2874
2875 fn shared_file_diff() -> (&'static str, &'static str) {
2876 (
2877 r#"diff --git a/src/lib.rs b/src/lib.rs
2878index 1111111..2222222 100644
2879--- a/src/lib.rs
2880+++ b/src/lib.rs
2881@@ -1,3 +1,3 @@
2882-fn alpha() {
2883+fn alpha_changed() {
2884 println!("alpha");
2885 }
2886@@ -12,3 +12,3 @@
2887-fn beta() {
2888+fn beta_changed() {
2889 println!("beta");
2890 }
2891diff --git a/tests/lib.rs b/tests/lib.rs
2892index 3333333..4444444 100644
2893--- a/tests/lib.rs
2894+++ b/tests/lib.rs
2895@@ -1,3 +1,4 @@
2896 fn test_it() {
2897+ assert!(true);
2898 }
2899"#,
2900 " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
2901 )
2902 }
2903
2904 fn build_test_snapshot() -> ComposeSnapshot {
2905 let (diff, stat) = shared_file_diff();
2906 build_compose_snapshot(diff, stat).unwrap()
2907 }
2908
2909 fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
2910 let mut diff = String::new();
2911
2912 for file_idx in 0..file_count {
2913 let path = format!("src/module_{file_idx:03}.rs");
2914 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
2915 diff.push_str("index 1111111..2222222 100644\n");
2916 writeln!(diff, "--- a/{path}").unwrap();
2917 writeln!(diff, "+++ b/{path}").unwrap();
2918
2919 for hunk_idx in 0..hunks_per_file {
2920 let line_no = (hunk_idx * 4) + 1;
2921 writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
2922 writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
2923 writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
2924 }
2925 }
2926
2927 build_compose_snapshot(&diff, "").unwrap()
2928 }
2929
2930 fn build_multi_area_snapshot() -> ComposeSnapshot {
2931 let mut diff = String::new();
2932 let areas = [
2933 ("apps/frontend/src/server", 72),
2934 ("packages/model/src/models", 54),
2935 ("apps/daemon/src/worker", 43),
2936 (".github/workflows", 16),
2937 ];
2938
2939 for (prefix, count) in areas {
2940 for file_idx in 0..count {
2941 let path = format!("{prefix}/file_{file_idx:03}.rs");
2942 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
2943 diff.push_str("index 1111111..2222222 100644\n");
2944 writeln!(diff, "--- a/{path}").unwrap();
2945 writeln!(diff, "+++ b/{path}").unwrap();
2946 diff.push_str("@@ -1,1 +1,1 @@\n");
2947 writeln!(diff, "-old_{file_idx}").unwrap();
2948 writeln!(diff, "+new_{file_idx}").unwrap();
2949 }
2950 }
2951
2952 build_compose_snapshot(&diff, "").unwrap()
2953 }
2954
2955 fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
2956 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
2957 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
2958 let groups = vec![
2959 ComposeIntentGroup {
2960 group_id: "G1".to_string(),
2961 commit_type: CommitType::new("refactor").unwrap(),
2962 scope: None,
2963 file_ids: vec![source_file.file_id.clone(), test_file.file_id.clone()],
2964 rationale: "implementation group".to_string(),
2965 dependencies: vec![],
2966 },
2967 ComposeIntentGroup {
2968 group_id: "G2".to_string(),
2969 commit_type: CommitType::new("refactor").unwrap(),
2970 scope: None,
2971 file_ids: vec![source_file.file_id.clone()],
2972 rationale: "shared file follow-up".to_string(),
2973 dependencies: vec!["G1".to_string()],
2974 },
2975 ];
2976 let dependency_order =
2977 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
2978 .unwrap();
2979 ComposeIntentPlan { groups, dependency_order }
2980 }
2981
2982 #[test]
2983 fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
2984 let snapshot = build_test_snapshot();
2985 let intent_plan = build_shared_intent_plan(&snapshot);
2986 let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
2987
2988 assert_eq!(ambiguous.len(), 1);
2989 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
2990 let assigned_to_g1 = assigned.get("G1").unwrap();
2991 assert!(
2992 test_file
2993 .hunk_ids
2994 .iter()
2995 .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
2996 "uniquely owned file should be auto-assigned"
2997 );
2998 }
2999
3000 #[test]
3001 fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3002 let snapshot = build_test_snapshot();
3003 let intent_plan = build_shared_intent_plan(&snapshot);
3004 let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3005 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3006 let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3007 let valid_group_ids: HashSet<&str> = intent_plan
3008 .groups
3009 .iter()
3010 .map(|group| group.group_id.as_str())
3011 .collect();
3012
3013 let evaluation = evaluate_binding(
3014 &[
3015 ComposeBindingAssignment {
3016 group_id: "G1".to_string(),
3017 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3018 },
3019 ComposeBindingAssignment {
3020 group_id: "G2".to_string(),
3021 hunk_ids: vec![source_file.hunk_ids[1].clone()],
3022 },
3023 ],
3024 &hunk_context,
3025 &valid_group_ids,
3026 &snapshot,
3027 );
3028
3029 for (group_id, hunk_ids) in evaluation.assigned {
3030 let entry = assigned.entry(group_id).or_default();
3031 for hunk_id in hunk_ids {
3032 entry.insert(hunk_id);
3033 }
3034 }
3035
3036 let group_rank: HashMap<&str, usize> = intent_plan
3037 .dependency_order
3038 .iter()
3039 .enumerate()
3040 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3041 .collect();
3042 assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3043
3044 let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3045 assert_eq!(executable_plan.groups.len(), 1);
3046 assert_eq!(executable_plan.groups[0].group_id, "G1");
3047 assert!(
3048 source_file
3049 .hunk_ids
3050 .iter()
3051 .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3052 "fallback should keep every hunk from the shared file in the surviving group"
3053 );
3054 }
3055
3056 #[test]
3057 fn test_validate_executable_plan_rejects_overlap() {
3058 let snapshot = build_test_snapshot();
3059 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3060 let executable_plan = ComposeExecutablePlan {
3061 groups: vec![
3062 ComposeExecutableGroup {
3063 group_id: "G1".to_string(),
3064 commit_type: CommitType::new("refactor").unwrap(),
3065 scope: None,
3066 file_ids: vec![source_file.file_id.clone()],
3067 rationale: "group one".to_string(),
3068 dependencies: vec![],
3069 hunk_ids: vec![source_file.hunk_ids[0].clone()],
3070 },
3071 ComposeExecutableGroup {
3072 group_id: "G2".to_string(),
3073 commit_type: CommitType::new("refactor").unwrap(),
3074 scope: None,
3075 file_ids: vec![source_file.file_id.clone()],
3076 rationale: "group two".to_string(),
3077 dependencies: vec![],
3078 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3079 },
3080 ],
3081 dependency_order: vec![0, 1],
3082 };
3083
3084 let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3085 assert!(err.to_string().contains("assigned to both"));
3086 }
3087
3088 #[test]
3089 fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3090 let snapshot = build_test_snapshot();
3091 let planning_index = build_planning_index(&snapshot);
3092 let groups = vec![ComposeIntentGroup {
3093 group_id: "G1".to_string(),
3094 commit_type: CommitType::new("refactor").unwrap(),
3095 scope: None,
3096 file_ids: vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3097 rationale: "normalize file references".to_string(),
3098 dependencies: vec![],
3099 }];
3100
3101 let (normalized_groups, repair_notes) =
3102 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3103
3104 assert_eq!(normalized_groups.len(), 1);
3105 assert_eq!(
3106 normalized_groups[0].file_ids,
3107 snapshot
3108 .files
3109 .iter()
3110 .map(|file| file.file_id.clone())
3111 .collect::<Vec<_>>()
3112 );
3113 assert_eq!(repair_notes.len(), 2);
3114 }
3115
3116 #[test]
3117 fn test_normalize_intent_plan_repairs_missing_files() {
3118 let snapshot = build_test_snapshot();
3119 let planning_index = build_planning_index(&snapshot);
3120 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3121 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3122 let groups = vec![ComposeIntentGroup {
3123 group_id: "G1".to_string(),
3124 commit_type: CommitType::new("refactor").unwrap(),
3125 scope: None,
3126 file_ids: vec![source_file.file_id.clone()],
3127 rationale: "partial coverage".to_string(),
3128 dependencies: vec![],
3129 }];
3130
3131 let (normalized_groups, repair_notes) =
3132 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3133
3134 assert_eq!(normalized_groups.len(), 1);
3135 assert!(
3136 normalized_groups[0].file_ids.contains(&source_file.file_id),
3137 "existing file assignment should be preserved"
3138 );
3139 assert!(
3140 normalized_groups[0].file_ids.contains(&test_file.file_id),
3141 "missing files should be assigned to an existing group"
3142 );
3143 assert_eq!(repair_notes.len(), 1);
3144 assert!(repair_notes[0].contains(&test_file.file_id));
3145 }
3146
3147 #[test]
3148 fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3149 let snapshot = build_multi_area_snapshot();
3150 let planning_index = build_planning_index(&snapshot);
3151 let frontend_target = planning_index
3152 .targets
3153 .iter()
3154 .find(|target| target.label.starts_with("apps/frontend"))
3155 .unwrap();
3156 let model_target = planning_index
3157 .targets
3158 .iter()
3159 .find(|target| target.label.starts_with("packages/model"))
3160 .unwrap();
3161 let groups = vec![
3162 ComposeIntentGroup {
3163 group_id: "G1".to_string(),
3164 commit_type: CommitType::new("refactor").unwrap(),
3165 scope: Scope::new("apps/frontend").ok(),
3166 file_ids: vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3167 rationale: "frontend platform updates".to_string(),
3168 dependencies: vec!["group 2".to_string(), "G1".to_string()],
3169 },
3170 ComposeIntentGroup {
3171 group_id: "G2".to_string(),
3172 commit_type: CommitType::new("refactor").unwrap(),
3173 scope: Scope::new("packages/model").ok(),
3174 file_ids: vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3175 rationale: "model storage updates".to_string(),
3176 dependencies: vec!["F5".to_string()],
3177 },
3178 ];
3179
3180 let (normalized_groups, repair_notes) =
3181 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3182
3183 assert_eq!(normalized_groups.len(), 2);
3184 assert!(
3185 normalized_groups[0]
3186 .file_ids
3187 .iter()
3188 .all(|file_id| file_id.starts_with('F'))
3189 );
3190 assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3191 assert!(normalized_groups[1].dependencies.is_empty());
3192 assert!(
3193 repair_notes
3194 .iter()
3195 .any(|note| note.contains("Dropped unknown planning target"))
3196 );
3197 assert!(
3198 repair_notes
3199 .iter()
3200 .any(|note| note.contains("Dropped self-dependency"))
3201 );
3202 assert!(
3203 repair_notes
3204 .iter()
3205 .any(|note| note.contains("Mapped compose planner dependency"))
3206 );
3207 assert!(
3208 repair_notes
3209 .iter()
3210 .any(|note| note.contains("Dropped unknown dependency"))
3211 );
3212 }
3213
3214 #[test]
3215 fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3216 let snapshot = build_test_snapshot();
3217 let summary = render_snapshot_summary(&snapshot, &[]);
3218 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3219
3220 assert!(!summary.contains("# snapshot compacted"));
3221 for hunk_id in &source_file.hunk_ids {
3222 assert!(summary.contains(hunk_id));
3223 }
3224 }
3225
3226 #[test]
3227 fn test_render_snapshot_summary_compacts_large_snapshot() {
3228 let snapshot = build_large_snapshot(160, 4);
3229 let summary = render_snapshot_summary(&snapshot, &[]);
3230
3231 assert!(summary.contains("# snapshot compacted"));
3232 assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3233 assert!(summary.contains("F001-H001"));
3234 assert!(summary.contains("F001-H004"));
3235 assert!(!summary.contains("F001-H002"));
3236 assert!(!summary.contains("F001-H003"));
3237 assert!(summary.contains("... 2 more hunks omitted from F001"));
3238 }
3239
3240 #[test]
3241 fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3242 let snapshot = build_multi_area_snapshot();
3243 let planning_index = build_planning_index(&snapshot);
3244
3245 assert_eq!(planning_index.mode, PlanningMode::Area);
3246 assert!(planning_index.targets.len() < snapshot.files.len());
3247 assert!(
3248 planning_index
3249 .targets
3250 .iter()
3251 .any(|target| target.label.starts_with("apps/frontend"))
3252 );
3253 assert!(
3254 render_planning_stat(&planning_index).contains("planning over"),
3255 "planning stat should explain the area mode"
3256 );
3257 }
3258
3259 #[test]
3260 fn test_normalize_intent_plan_expands_area_targets() {
3261 let snapshot = build_multi_area_snapshot();
3262 let planning_index = build_planning_index(&snapshot);
3263 let midpoint = planning_index.targets.len() / 2;
3264 let first_group_targets: Vec<String> = planning_index
3265 .targets
3266 .iter()
3267 .take(midpoint)
3268 .map(|target| target.label.clone())
3269 .collect();
3270 let second_group_targets: Vec<String> = planning_index
3271 .targets
3272 .iter()
3273 .skip(midpoint)
3274 .map(|target| target.label.clone())
3275 .collect();
3276 let groups = vec![
3277 ComposeIntentGroup {
3278 group_id: "G1".to_string(),
3279 commit_type: CommitType::new("refactor").unwrap(),
3280 scope: None,
3281 file_ids: first_group_targets,
3282 rationale: "frontend and model".to_string(),
3283 dependencies: vec![],
3284 },
3285 ComposeIntentGroup {
3286 group_id: "G2".to_string(),
3287 commit_type: CommitType::new("refactor").unwrap(),
3288 scope: None,
3289 file_ids: second_group_targets,
3290 rationale: "daemon and ci".to_string(),
3291 dependencies: vec![],
3292 },
3293 ];
3294
3295 let (normalized_groups, repair_notes) =
3296 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3297
3298 assert_eq!(normalized_groups.len(), 2);
3299 assert!(
3300 normalized_groups
3301 .iter()
3302 .flat_map(|group| group.file_ids.iter())
3303 .all(|file_id| file_id.starts_with('F')),
3304 "area targets should expand back to concrete file IDs"
3305 );
3306 assert!(!repair_notes.is_empty());
3307 assert_eq!(
3308 normalized_groups
3309 .iter()
3310 .flat_map(|group| group.file_ids.iter())
3311 .collect::<HashSet<_>>()
3312 .len(),
3313 snapshot.files.len()
3314 );
3315 }
3316
3317 #[test]
3318 fn test_large_patch_fallback_splits_monolithic_area_plan() {
3319 let snapshot = build_multi_area_snapshot();
3320 let planning_index = build_planning_index(&snapshot);
3321 let monolithic_group = ComposeIntentGroup {
3322 group_id: "G1".to_string(),
3323 commit_type: CommitType::new("refactor").unwrap(),
3324 scope: None,
3325 file_ids: snapshot
3326 .files
3327 .iter()
3328 .map(|file| file.file_id.clone())
3329 .collect(),
3330 rationale: "repo-wide refactor".to_string(),
3331 dependencies: vec![],
3332 };
3333
3334 assert!(should_force_large_patch_fallback(
3335 &snapshot,
3336 &planning_index,
3337 &[monolithic_group],
3338 6
3339 ));
3340
3341 let fallback_groups =
3342 build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3343 assert!(fallback_groups.len() >= 3);
3344 assert_eq!(
3345 fallback_groups
3346 .iter()
3347 .flat_map(|group| group.file_ids.iter())
3348 .collect::<HashSet<_>>()
3349 .len(),
3350 snapshot.files.len()
3351 );
3352 assert!(
3353 fallback_groups
3354 .iter()
3355 .any(|group| group.rationale.contains("frontend")),
3356 "fallback should preserve workstream identity"
3357 );
3358 }
3359
3360 #[test]
3361 fn test_should_collect_compose_observations_skips_area_mode() {
3362 let snapshot = build_large_snapshot(160, 4);
3363 let config = CommitConfig::default();
3364 let counter = create_token_counter(&config);
3365
3366 assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3367 assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3368 }
3369
3370 #[test]
3371 fn test_chunk_ambiguous_files_splits_large_binding_request() {
3372 let ambiguous_files = vec![
3373 AmbiguousFileBinding {
3374 file_id: "F001".to_string(),
3375 path: "src/alpha.rs".to_string(),
3376 candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3377 hunk_ids: (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3378 },
3379 AmbiguousFileBinding {
3380 file_id: "F002".to_string(),
3381 path: "src/beta.rs".to_string(),
3382 candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3383 hunk_ids: (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3384 },
3385 AmbiguousFileBinding {
3386 file_id: "F003".to_string(),
3387 path: "src/gamma.rs".to_string(),
3388 candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3389 hunk_ids: (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3390 },
3391 ];
3392
3393 let batches = chunk_ambiguous_files(&ambiguous_files);
3394 let total_hunks: usize = batches
3395 .iter()
3396 .flatten()
3397 .map(|file| file.hunk_ids.len())
3398 .sum();
3399
3400 assert_eq!(batches.len(), 2);
3401 assert_eq!(batches[0].len(), 1);
3402 assert_eq!(batches[1].len(), 2);
3403 assert_eq!(total_hunks, 140);
3404 assert!(batches.iter().all(|batch| {
3405 batch.len() <= MAX_BIND_FILES_PER_REQUEST
3406 && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3407 <= MAX_BIND_HUNKS_PER_REQUEST
3408 }));
3409 }
3410}