Skip to main content

llm_git/
compose.rs

1use std::{
2   collections::{BTreeMap, BTreeSet, HashMap, HashSet},
3   fmt::Write,
4   fs,
5   path::{Path, PathBuf},
6};
7
8use futures::stream::{self, StreamExt};
9use serde::{Deserialize, Serialize};
10
11use crate::{
12   api::{
13      AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
14      generate_summary_from_analysis, run_oneshot, strict_json_schema,
15   },
16   compose_types::{
17      ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
18      ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
19   },
20   config::CommitConfig,
21   error::{CommitGenError, Result},
22   git::{get_compose_diff, get_compose_stat, get_git_dir, get_head_hash, git_commit},
23   map_reduce::{FileObservation, observe_diff_files, should_use_map_reduce},
24   normalization::{format_commit_message, post_process_commit_message},
25   patch::{
26      StageResult, build_compose_snapshot, create_executable_group_patch, reset_staging,
27      stage_executable_group,
28   },
29   style, templates,
30   tokens::{TokenCounter, create_token_counter},
31   types::{Args, CommitType, ConventionalCommit, Scope},
32   validation::validate_commit_message,
33};
34
35const MAX_OBSERVATIONS_PER_FILE: usize = 3;
36const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
37const COMPOSE_PLANNER_TEMPERATURE: f32 = 0.0;
38const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
39const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
40const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
41const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
42const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
43const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
44const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
45const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
46const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
47const MAX_BIND_FILES_PER_REQUEST: usize = 18;
48const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
49/// Maximum number of commit messages to generate concurrently during
50/// `execute_compose`. Matches the per-file fan-out used in `map_reduce`.
51const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
52
53#[derive(Debug, Deserialize, Serialize)]
54struct ComposeIntentResponse {
55   groups: Vec<ComposeIntentGroup>,
56}
57
58#[derive(Debug, Deserialize, Serialize)]
59struct ComposeBindingResponse {
60   assignments: Vec<ComposeBindingAssignment>,
61}
62
63#[derive(Debug, Serialize, Deserialize)]
64struct ComposeCachedPlan {
65   schema_version: String,
66   cache_key:      String,
67   plan:           ComposeExecutablePlan,
68}
69
70#[derive(Debug, Clone)]
71struct AmbiguousFileBinding {
72   file_id:             String,
73   path:                String,
74   candidate_group_ids: Vec<String>,
75   hunk_ids:            Vec<String>,
76}
77
78#[derive(Debug, Clone)]
79struct AmbiguousHunkContext {
80   candidate_group_ids: Vec<String>,
81}
82
83type HunkAssignments = HashMap<String, BTreeSet<String>>;
84
85#[derive(Debug)]
86struct BindingEvaluation {
87   assigned:   HashMap<String, Vec<String>>,
88   unresolved: Vec<String>,
89}
90
91#[derive(Debug, Clone, Copy)]
92struct SnapshotSummaryBudget {
93   max_observations_per_file: usize,
94   max_hunks_per_file:        Option<usize>,
95}
96
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98enum PlanningMode {
99   File,
100   Area,
101}
102
103#[derive(Debug, Clone)]
104struct PlanningTarget {
105   target_id:  String,
106   label:      String,
107   file_ids:   Vec<String>,
108   hunk_count: usize,
109   additions:  usize,
110   deletions:  usize,
111}
112
113#[derive(Debug, Clone)]
114struct PlanningIndex {
115   mode:    PlanningMode,
116   targets: Vec<PlanningTarget>,
117   aliases: HashMap<String, String>,
118}
119
120#[derive(Debug, Clone)]
121struct PlanningBucket {
122   label:    String,
123   file_ids: Vec<String>,
124}
125
126impl PlanningIndex {
127   fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
128      let mut expanded = Vec::new();
129      let mut seen_file_ids = HashSet::new();
130
131      for target_id in target_ids {
132         if let Some(target) = self
133            .targets
134            .iter()
135            .find(|candidate| candidate.target_id == *target_id)
136         {
137            for file_id in &target.file_ids {
138               if seen_file_ids.insert(file_id.clone()) {
139                  expanded.push(file_id.clone());
140               }
141            }
142         }
143      }
144
145      expanded
146   }
147}
148
149impl SnapshotSummaryBudget {
150   const fn is_compacted(self) -> bool {
151      self.max_hunks_per_file.is_some()
152   }
153}
154
155fn is_dependency_manifest(path: &str) -> bool {
156   const DEP_MANIFESTS: &[&str] = &[
157      "Cargo.toml",
158      "Cargo.lock",
159      "package.json",
160      "package-lock.json",
161      "pnpm-lock.yaml",
162      "yarn.lock",
163      "bun.lock",
164      "bun.lockb",
165      "go.mod",
166      "go.sum",
167      "requirements.txt",
168      "Pipfile",
169      "Pipfile.lock",
170      "pyproject.toml",
171      "Gemfile",
172      "Gemfile.lock",
173      "composer.json",
174      "composer.lock",
175      "build.gradle",
176      "build.gradle.kts",
177      "gradle.properties",
178      "pom.xml",
179   ];
180
181   let path = Path::new(path);
182   let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
183      return false;
184   };
185
186   if DEP_MANIFESTS.contains(&file_name) {
187      return true;
188   }
189
190   Path::new(file_name)
191      .extension()
192      .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
193}
194
195fn save_debug_artifact<T: Serialize>(
196   debug_dir: Option<&Path>,
197   filename: &str,
198   value: &T,
199) -> Result<()> {
200   let Some(debug_dir) = debug_dir else {
201      return Ok(());
202   };
203
204   fs::create_dir_all(debug_dir)?;
205   let path = debug_dir.join(filename);
206   let json = serde_json::to_string_pretty(value)?;
207   fs::write(path, json)?;
208   Ok(())
209}
210
211fn fnv1a_64(input: &str) -> String {
212   let mut hash = 0xcbf29ce484222325_u64;
213   for byte in input.as_bytes() {
214      hash ^= u64::from(*byte);
215      hash = hash.wrapping_mul(0x100000001b3);
216   }
217   format!("{hash:016x}")
218}
219
220fn compose_plan_cache_key(
221   snapshot: &ComposeSnapshot,
222   max_commits: usize,
223   analysis_model: &str,
224) -> String {
225   fnv1a_64(&format!(
226      "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
227      snapshot.diff, snapshot.stat
228   ))
229}
230
231fn compose_plan_cache_path(
232   dir: &str,
233   snapshot: &ComposeSnapshot,
234   max_commits: usize,
235   analysis_model: &str,
236) -> Result<PathBuf> {
237   let git_dir = get_git_dir(dir)?;
238   Ok(git_dir.join("llm-git").join(format!(
239      "compose-plan-{}.json",
240      compose_plan_cache_key(snapshot, max_commits, analysis_model)
241   )))
242}
243
244fn load_cached_plan(
245   dir: &str,
246   snapshot: &ComposeSnapshot,
247   max_commits: usize,
248   analysis_model: &str,
249) -> Result<Option<ComposeExecutablePlan>> {
250   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
251   if !cache_path.exists() {
252      return Ok(None);
253   }
254
255   let content = match fs::read_to_string(&cache_path) {
256      Ok(content) => content,
257      Err(err) => {
258         eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
259         return Ok(None);
260      },
261   };
262   let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
263      Ok(cached) => cached,
264      Err(err) => {
265         eprintln!(
266            "{}",
267            style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
268         );
269         let _ = fs::remove_file(&cache_path);
270         return Ok(None);
271      },
272   };
273   let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
274
275   if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
276      return Ok(None);
277   }
278   if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
279      eprintln!(
280         "{}",
281         style::warning(&format!(
282            "Discarding cached compose plan (no longer valid for current snapshot): {err}"
283         ))
284      );
285      let _ = fs::remove_file(&cache_path);
286      return Ok(None);
287   }
288   Ok(Some(cached.plan))
289}
290
291fn save_cached_plan(
292   dir: &str,
293   snapshot: &ComposeSnapshot,
294   max_commits: usize,
295   analysis_model: &str,
296   plan: &ComposeExecutablePlan,
297) -> Result<()> {
298   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
299   if let Some(parent) = cache_path.parent() {
300      fs::create_dir_all(parent)?;
301   }
302
303   let cached = ComposeCachedPlan {
304      schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
305      cache_key:      compose_plan_cache_key(snapshot, max_commits, analysis_model),
306      plan:           plan.clone(),
307   };
308   fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
309   Ok(())
310}
311
312fn format_line_range(start: usize, count: usize) -> String {
313   match count {
314      0 => "0".to_string(),
315      1 => start.to_string(),
316      _ => format!("{start}-{}", start + count - 1),
317   }
318}
319
320const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
321   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
322      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
323   {
324      SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
325   } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
326      || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
327   {
328      SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
329   } else {
330      SnapshotSummaryBudget {
331         max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
332         max_hunks_per_file:        None,
333      }
334   }
335}
336
337fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
338   if count <= max_samples {
339      return (0..count).collect();
340   }
341
342   if max_samples <= 1 {
343      return vec![0];
344   }
345
346   let last = count - 1;
347   let mut positions = Vec::with_capacity(max_samples);
348   for slot in 0..max_samples {
349      let position = slot * last / (max_samples - 1);
350      if positions.last().copied() != Some(position) {
351         positions.push(position);
352      }
353   }
354   positions
355}
356
357fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
358   match budget.max_hunks_per_file {
359      None => file.hunk_ids.iter().map(String::as_str).collect(),
360      Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
361         .into_iter()
362         .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
363         .collect(),
364   }
365}
366
367fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
368   let budget = snapshot_summary_budget(snapshot);
369   let observations_by_file: HashMap<&str, Vec<&str>> = observations
370      .iter()
371      .map(|observation| {
372         (
373            observation.file.as_str(),
374            observation
375               .observations
376               .iter()
377               .map(String::as_str)
378               .take(budget.max_observations_per_file)
379               .collect(),
380         )
381      })
382      .collect();
383
384   let mut out = String::new();
385   if budget.is_compacted() {
386      let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
387      writeln!(
388         out,
389         "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
390          representative hunks and {} observation(s) per file",
391         budget.max_observations_per_file
392      )
393      .unwrap();
394   }
395
396   for file in &snapshot.files {
397      writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
398      if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
399         for observation in file_observations {
400            writeln!(out, "  observation: {observation}").unwrap();
401         }
402      }
403
404      let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
405      for hunk_id in &rendered_hunk_ids {
406         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
407            if hunk.synthetic {
408               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
409            } else {
410               writeln!(
411                  out,
412                  "  - {} old:{} new:{} :: {}",
413                  hunk.hunk_id,
414                  format_line_range(hunk.old_start, hunk.old_count),
415                  format_line_range(hunk.new_start, hunk.new_count),
416                  hunk.snippet
417               )
418               .unwrap();
419            }
420         }
421      }
422
423      let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
424      if omitted_hunks > 0 {
425         writeln!(out, "  ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
426      }
427   }
428
429   out
430}
431
432const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
433   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
434      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
435   {
436      PlanningMode::Area
437   } else {
438      PlanningMode::File
439   }
440}
441
442fn path_depth(path: &str) -> usize {
443   path.split('/').count()
444}
445
446fn prefix_at_depth(path: &str, depth: usize) -> String {
447   if depth == 0 {
448      return String::new();
449   }
450
451   let segments: Vec<&str> = path.split('/').collect();
452   let effective_depth = depth.min(segments.len());
453   segments[..effective_depth].join("/")
454}
455
456fn common_path_prefix(paths: &[String]) -> String {
457   let Some(first_path) = paths.first() else {
458      return String::new();
459   };
460
461   let mut prefix: Vec<&str> = first_path.split('/').collect();
462   for path in paths.iter().skip(1) {
463      let segments: Vec<&str> = path.split('/').collect();
464      let shared = prefix
465         .iter()
466         .zip(segments.iter())
467         .take_while(|(left, right)| left == right)
468         .count();
469      prefix.truncate(shared);
470      if prefix.is_empty() {
471         break;
472      }
473   }
474
475   prefix.join("/")
476}
477
478fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
479   file_ids
480      .iter()
481      .filter_map(|file_id| snapshot.file_by_id(file_id))
482      .map(|file| file.hunk_ids.len())
483      .sum()
484}
485
486fn group_file_ids_by_prefix(
487   snapshot: &ComposeSnapshot,
488   file_ids: &[String],
489   depth: usize,
490) -> BTreeMap<String, Vec<String>> {
491   let mut groups = BTreeMap::new();
492
493   for file_id in file_ids {
494      if let Some(file) = snapshot.file_by_id(file_id) {
495         groups
496            .entry(prefix_at_depth(&file.path, depth))
497            .or_insert_with(Vec::new)
498            .push(file_id.clone());
499      }
500   }
501
502   groups
503}
504
505fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
506   let paths: Vec<String> = file_ids
507      .iter()
508      .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
509      .collect();
510
511   let common_prefix = common_path_prefix(&paths);
512   if common_prefix.is_empty() {
513      paths.first().cloned().unwrap_or_else(|| "misc".to_string())
514   } else {
515      common_prefix
516   }
517}
518
519fn collect_planning_buckets(
520   snapshot: &ComposeSnapshot,
521   file_ids: &[String],
522   depth: usize,
523) -> Vec<PlanningBucket> {
524   let file_count = file_ids.len();
525   let hunk_count = bucket_hunk_count(snapshot, file_ids);
526   let max_path_depth = file_ids
527      .iter()
528      .filter_map(|file_id| snapshot.file_by_id(file_id))
529      .map(|file| path_depth(&file.path))
530      .max()
531      .unwrap_or(depth);
532
533   let should_stop =
534      file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
535   if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
536      return vec![PlanningBucket {
537         label:    planning_bucket_label(snapshot, file_ids),
538         file_ids: file_ids.to_vec(),
539      }];
540   }
541
542   let next_depth = depth + 1;
543   let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
544   if groups.len() <= 1 {
545      return collect_planning_buckets(snapshot, file_ids, next_depth);
546   }
547
548   groups
549      .into_values()
550      .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
551      .collect()
552}
553
554fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
555   let all_file_ids: Vec<String> = snapshot
556      .files
557      .iter()
558      .map(|file| file.file_id.clone())
559      .collect();
560   let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
561
562   buckets
563      .into_iter()
564      .enumerate()
565      .map(|(idx, bucket)| {
566         let mut additions = 0_usize;
567         let mut deletions = 0_usize;
568         let mut hunk_count = 0_usize;
569
570         for file_id in &bucket.file_ids {
571            if let Some(file) = snapshot.file_by_id(file_id) {
572               additions = additions.saturating_add(file.additions);
573               deletions = deletions.saturating_add(file.deletions);
574               hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
575            }
576         }
577
578         PlanningTarget {
579            target_id: format!("A{:03}", idx + 1),
580            label: bucket.label,
581            file_ids: bucket.file_ids,
582            hunk_count,
583            additions,
584            deletions,
585         }
586      })
587      .collect()
588}
589
590fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
591   snapshot
592      .files
593      .iter()
594      .map(|file| PlanningTarget {
595         target_id:  file.file_id.clone(),
596         label:      file.path.clone(),
597         file_ids:   vec![file.file_id.clone()],
598         hunk_count: file.hunk_ids.len(),
599         additions:  file.additions,
600         deletions:  file.deletions,
601      })
602      .collect()
603}
604
605fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
606   let mode = planning_mode_for_snapshot(snapshot);
607   let targets = match mode {
608      PlanningMode::File => build_file_planning_targets(snapshot),
609      PlanningMode::Area => build_area_planning_targets(snapshot),
610   };
611
612   let aliases = targets
613      .iter()
614      .flat_map(|target| {
615         let normalized_label = normalize_file_reference(&target.label);
616         [
617            (target.target_id.clone(), target.target_id.clone()),
618            (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
619            (normalized_label, target.target_id.clone()),
620         ]
621      })
622      .collect();
623
624   PlanningIndex { mode, targets, aliases }
625}
626
627fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
628   sample_positions(target.file_ids.len(), 4)
629      .into_iter()
630      .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
631      .collect()
632}
633
634fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
635   let hunk_ids: Vec<&String> = target
636      .file_ids
637      .iter()
638      .filter_map(|file_id| snapshot.file_by_id(file_id))
639      .flat_map(|file| file.hunk_ids.iter())
640      .collect();
641
642   sample_positions(hunk_ids.len(), 4)
643      .into_iter()
644      .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
645      .collect()
646}
647
648fn render_planning_stat(index: &PlanningIndex) -> String {
649   let mut out = String::new();
650
651   match index.mode {
652      PlanningMode::File => {
653         writeln!(out, "# planning over individual file IDs").unwrap();
654      },
655      PlanningMode::Area => {
656         writeln!(
657            out,
658            "# planning over {} area IDs spanning {} files",
659            index.targets.len(),
660            index
661               .targets
662               .iter()
663               .flat_map(|target| target.file_ids.iter())
664               .collect::<HashSet<_>>()
665               .len()
666         )
667         .unwrap();
668      },
669   }
670
671   for target in &index.targets {
672      writeln!(
673         out,
674         "{} {} | {} files | {} hunks | +{}/-{}",
675         target.target_id,
676         target.label,
677         target.file_ids.len(),
678         target.hunk_count,
679         target.additions,
680         target.deletions
681      )
682      .unwrap();
683   }
684
685   out
686}
687
688fn render_planning_snapshot_summary(
689   snapshot: &ComposeSnapshot,
690   observations: &[FileObservation],
691   index: &PlanningIndex,
692) -> String {
693   if index.mode == PlanningMode::File {
694      return render_snapshot_summary(snapshot, observations);
695   }
696
697   let observations_by_file: HashMap<&str, Vec<&str>> = observations
698      .iter()
699      .map(|observation| {
700         (
701            observation.file.as_str(),
702            observation
703               .observations
704               .iter()
705               .map(String::as_str)
706               .take(1)
707               .collect(),
708         )
709      })
710      .collect();
711
712   let mut out = String::new();
713   writeln!(
714      out,
715      "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
716   )
717   .unwrap();
718
719   for target in &index.targets {
720      writeln!(
721         out,
722         "- {} {} ({} files, {} hunks, +{}/-{})",
723         target.target_id,
724         target.label,
725         target.file_ids.len(),
726         target.hunk_count,
727         target.additions,
728         target.deletions
729      )
730      .unwrap();
731
732      let sample_file_ids = sample_file_ids_for_target(target);
733      if !sample_file_ids.is_empty() {
734         let sample_files: Vec<String> = sample_file_ids
735            .iter()
736            .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
737            .collect();
738         writeln!(out, "  files: {}", sample_files.join(", ")).unwrap();
739         let omitted = target.file_ids.len().saturating_sub(sample_files.len());
740         if omitted > 0 {
741            writeln!(out, "  ... {omitted} more files omitted from {}", target.target_id).unwrap();
742         }
743      }
744
745      let mut rendered_observations = 0_usize;
746      for file_id in &target.file_ids {
747         let Some(file) = snapshot.file_by_id(file_id) else {
748            continue;
749         };
750         let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
751            continue;
752         };
753
754         for observation in file_observations {
755            writeln!(out, "  observation: {observation}").unwrap();
756            rendered_observations += 1;
757            if rendered_observations >= 2 {
758               break;
759            }
760         }
761
762         if rendered_observations >= 2 {
763            break;
764         }
765      }
766
767      for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
768         if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
769            if hunk.synthetic {
770               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
771            } else {
772               writeln!(
773                  out,
774                  "  - {} old:{} new:{} :: {}",
775                  hunk.hunk_id,
776                  format_line_range(hunk.old_start, hunk.old_count),
777                  format_line_range(hunk.new_start, hunk.new_count),
778                  hunk.snippet
779               )
780               .unwrap();
781            }
782         }
783      }
784   }
785
786   out
787}
788
789fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
790   match index.mode {
791      PlanningMode::File => format!(
792         "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
793         snapshot.files.len()
794      ),
795      PlanningMode::Area => format!(
796         "Area IDs only. Each target may expand to multiple files by shared path prefix. \
797          Coverage: {} areas spanning {} files.",
798         index.targets.len(),
799         snapshot.files.len()
800      ),
801   }
802}
803
804fn render_planning_notes(index: &PlanningIndex) -> String {
805   match index.mode {
806      PlanningMode::File => {
807         "Use only the provided file IDs and keep the grouping conservative.".to_string()
808      },
809      PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
810                             planning areas. Split along independent subsystems or workstreams \
811                             when the areas point at unrelated changes."
812         .to_string(),
813   }
814}
815
816fn render_split_bias(index: &PlanningIndex) -> String {
817   match index.mode {
818      PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
819      PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
820                             one broad group if nearly every area clearly belongs to the same \
821                             atomic change."
822         .to_string(),
823   }
824}
825
826fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
827   let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
828
829   strict_json_schema(
830      serde_json::json!({
831         "groups": {
832            "type": "array",
833            "items": {
834               "type": "object",
835               "properties": {
836                  "group_id": {
837                     "type": "string",
838                     "description": "Stable identifier like G1, G2, G3"
839                  },
840                  "file_ids": {
841                     "type": "array",
842                     "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
843                     "items": { "type": "string" }
844                  },
845                  "type": {
846                     "type": "string",
847                     "enum": type_enum,
848                     "description": "Conventional commit type for this group"
849                  },
850                  "scope": {
851                     "type": "string",
852                     "description": "Optional scope (module/component). Omit if broad."
853                  },
854                  "rationale": {
855                     "type": "string",
856                     "description": "Brief explanation of the logical change"
857                  },
858                  "dependencies": {
859                     "type": "array",
860                     "description": "Group IDs this group depends on",
861                     "items": { "type": "string" }
862                  }
863               },
864               "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
865               "additionalProperties": false
866            }
867         }
868      }),
869      &["groups"],
870   )
871}
872
873fn build_binding_schema() -> serde_json::Value {
874   strict_json_schema(
875      serde_json::json!({
876         "assignments": {
877            "type": "array",
878            "items": {
879               "type": "object",
880               "properties": {
881                  "group_id": { "type": "string" },
882                  "hunk_ids": {
883                     "type": "array",
884                     "items": { "type": "string" }
885                  }
886               },
887               "required": ["group_id", "hunk_ids"],
888               "additionalProperties": false
889            }
890         }
891      }),
892      &["assignments"],
893   )
894}
895
896fn compute_dependency_order<T, FId, FDeps>(
897   groups: &[T],
898   group_id: FId,
899   dependencies: FDeps,
900) -> Result<Vec<usize>>
901where
902   FId: Fn(&T) -> &str,
903   FDeps: Fn(&T) -> &[String],
904{
905   let mut index_by_id = HashMap::new();
906   for (idx, group) in groups.iter().enumerate() {
907      let id = group_id(group);
908      if id.trim().is_empty() {
909         return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
910      }
911      if index_by_id.insert(id.to_string(), idx).is_some() {
912         return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
913      }
914   }
915
916   let mut in_degree = vec![0_usize; groups.len()];
917   let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
918
919   for (idx, group) in groups.iter().enumerate() {
920      for dependency in dependencies(group) {
921         let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
922            CommitGenError::Other(format!(
923               "Group {} depends on unknown group_id '{}'",
924               group_id(group),
925               dependency
926            ))
927         })?;
928         if dependency_idx == idx {
929            return Err(CommitGenError::Other(format!(
930               "Group {} depends on itself",
931               group_id(group)
932            )));
933         }
934
935         adjacency[dependency_idx].push(idx);
936         in_degree[idx] += 1;
937      }
938   }
939
940   let mut queue: Vec<usize> = (0..groups.len())
941      .filter(|idx| in_degree[*idx] == 0)
942      .collect();
943   let mut order = Vec::with_capacity(groups.len());
944
945   while let Some(node) = queue.pop() {
946      order.push(node);
947      for neighbor in &adjacency[node] {
948         in_degree[*neighbor] -= 1;
949         if in_degree[*neighbor] == 0 {
950            queue.push(*neighbor);
951         }
952      }
953   }
954
955   if order.len() != groups.len() {
956      return Err(CommitGenError::Other(
957         "Circular dependency detected in compose groups".to_string(),
958      ));
959   }
960
961   Ok(order)
962}
963
964fn normalize_file_reference(raw_file_ref: &str) -> String {
965   raw_file_ref
966      .trim()
967      .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
968      .trim_start_matches("./")
969      .trim_end_matches([',', ';'])
970      .to_string()
971}
972
973fn planning_text_tokens(text: &str) -> Vec<String> {
974   const STOP_WORDS: &[&str] = &[
975      "and",
976      "for",
977      "the",
978      "with",
979      "from",
980      "into",
981      "after",
982      "before",
983      "over",
984      "under",
985      "plus",
986      "across",
987      "update",
988      "updated",
989      "refactor",
990      "refactored",
991      "changes",
992      "change",
993      "logical",
994      "group",
995      "groups",
996      "commit",
997      "commits",
998   ];
999
1000   let mut tokens = Vec::new();
1001   let mut current = String::new();
1002   let mut seen = HashSet::new();
1003
1004   for ch in text.chars() {
1005      if ch.is_ascii_alphanumeric() {
1006         current.push(ch.to_ascii_lowercase());
1007      } else if current.len() >= 3 {
1008         if !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone()) {
1009            tokens.push(current.clone());
1010         }
1011         current.clear();
1012      } else {
1013         current.clear();
1014      }
1015   }
1016
1017   if current.len() >= 3 && !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone())
1018   {
1019      tokens.push(current);
1020   }
1021
1022   tokens
1023}
1024
1025fn extract_group_id_candidate(raw: &str) -> Option<String> {
1026   let normalized = normalize_file_reference(raw);
1027   let uppercase = normalized.to_ascii_uppercase();
1028
1029   if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1030      return Some(format!("G{uppercase}"));
1031   }
1032
1033   if let Some(rest) = uppercase.strip_prefix('G')
1034      && !rest.is_empty()
1035      && rest.chars().all(|ch| ch.is_ascii_digit())
1036   {
1037      return Some(format!("G{rest}"));
1038   }
1039
1040   let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1041   let compact = uppercase
1042      .chars()
1043      .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1044      .collect::<String>();
1045   if compact.starts_with("GROUP") && !digits.is_empty() {
1046      return Some(format!("G{digits}"));
1047   }
1048
1049   None
1050}
1051
1052#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1053enum ComposeFileCategory {
1054   Binary,
1055   Dependency,
1056   Docs,
1057   Test,
1058   Config,
1059   Source,
1060   Other,
1061}
1062
1063fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1064   if file.is_binary {
1065      return ComposeFileCategory::Binary;
1066   }
1067
1068   if is_dependency_manifest(&file.path) {
1069      return ComposeFileCategory::Dependency;
1070   }
1071
1072   let path = file.path.to_ascii_lowercase();
1073   let file_name = Path::new(&path)
1074      .file_name()
1075      .and_then(|name| name.to_str())
1076      .unwrap_or_default();
1077   let extension = Path::new(&path)
1078      .extension()
1079      .and_then(|ext| ext.to_str())
1080      .unwrap_or_default();
1081
1082   if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1083      return ComposeFileCategory::Docs;
1084   }
1085
1086   if path.contains("/tests/")
1087      || path.starts_with("tests/")
1088      || file_name.contains("test")
1089      || file_name.contains("spec")
1090   {
1091      return ComposeFileCategory::Test;
1092   }
1093
1094   if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1095      return ComposeFileCategory::Config;
1096   }
1097
1098   if matches!(
1099      extension,
1100      "rs"
1101         | "py"
1102         | "js"
1103         | "jsx"
1104         | "ts"
1105         | "tsx"
1106         | "go"
1107         | "java"
1108         | "kt"
1109         | "c"
1110         | "cc"
1111         | "cpp"
1112         | "h"
1113         | "hpp"
1114         | "cs"
1115         | "rb"
1116         | "php"
1117         | "swift"
1118         | "scala"
1119         | "m"
1120         | "mm"
1121   ) {
1122      return ComposeFileCategory::Source;
1123   }
1124
1125   ComposeFileCategory::Other
1126}
1127
1128fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1129   left
1130      .split('/')
1131      .zip(right.split('/'))
1132      .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1133      .count()
1134}
1135
1136fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1137   let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1138
1139   if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1140      score += 40;
1141   }
1142
1143   if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1144      score += 12;
1145   }
1146
1147   if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1148      score += 18;
1149   }
1150
1151   score
1152}
1153
1154fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1155   match (compose_file_category(file), group.commit_type.as_str()) {
1156      (ComposeFileCategory::Docs, "docs") => 25,
1157      (ComposeFileCategory::Test, "test") => 25,
1158      (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1159      (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1160      (ComposeFileCategory::Source, "feat" | "fix" | "refactor" | "perf") => 10,
1161      _ => 0,
1162   }
1163}
1164
1165fn best_group_for_missing_file(
1166   snapshot: &ComposeSnapshot,
1167   groups: &[ComposeIntentGroup],
1168   missing_file: &ComposeFile,
1169) -> usize {
1170   let mut best_group_idx = 0;
1171   let mut best_score = i32::MIN;
1172   let mut best_group_size = usize::MAX;
1173
1174   for (group_idx, group) in groups.iter().enumerate() {
1175      let similarity = group
1176         .file_ids
1177         .iter()
1178         .filter_map(|file_id| snapshot.file_by_id(file_id))
1179         .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1180         .max()
1181         .unwrap_or_default();
1182      let score = similarity + group_type_bonus(missing_file, group);
1183      let group_size = group.file_ids.len();
1184
1185      if score > best_score || (score == best_score && group_size < best_group_size) {
1186         best_group_idx = group_idx;
1187         best_score = score;
1188         best_group_size = group_size;
1189      }
1190   }
1191
1192   best_group_idx
1193}
1194
1195fn normalize_dependency_reference(
1196   raw_dependency: &str,
1197   known_group_ids: &HashSet<String>,
1198) -> Option<String> {
1199   let normalized = normalize_file_reference(raw_dependency);
1200   if normalized.is_empty() {
1201      return None;
1202   }
1203
1204   if known_group_ids.contains(&normalized) {
1205      return Some(normalized);
1206   }
1207
1208   let uppercase = normalized.to_ascii_uppercase();
1209   if known_group_ids.contains(&uppercase) {
1210      return Some(uppercase);
1211   }
1212
1213   let candidate = extract_group_id_candidate(&normalized)?;
1214   known_group_ids.contains(&candidate).then_some(candidate)
1215}
1216
1217fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1218   let label = target.label.to_ascii_lowercase();
1219   let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1220   let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1221
1222   if let Some(scope) = &group.scope {
1223      let scope = scope.as_str().to_ascii_lowercase();
1224      if label.contains(&scope) || workstream.contains(&scope) {
1225         score += 140;
1226      }
1227
1228      for segment in scope.split('/') {
1229         if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1230            score += 45;
1231         }
1232      }
1233   }
1234
1235   for token in planning_text_tokens(&group.rationale) {
1236      if label.contains(&token) || workstream.contains(&token) {
1237         score += 16;
1238      }
1239   }
1240
1241   match group.commit_type.as_str() {
1242      "ci" if target.label.starts_with(".github/") => score += 120,
1243      "docs"
1244         if target.label.starts_with("docs/")
1245            || Path::new(&target.label)
1246               .extension()
1247               .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1248      {
1249         score += 80;
1250      },
1251      "build" | "chore"
1252         if target.label.contains("Cargo")
1253            || target.label.contains("package")
1254            || target.label.contains("lock")
1255            || target.label.contains("tsconfig")
1256            || target.label.contains("biome")
1257            || target.label.contains("bun") =>
1258      {
1259         score += 55;
1260      },
1261      _ => {},
1262   }
1263
1264   score
1265}
1266
1267fn seed_group_targets(
1268   groups: &[ComposeIntentGroup],
1269   planning_index: &PlanningIndex,
1270   group_targets: &mut [Vec<String>],
1271   repair_notes: &mut Vec<String>,
1272) {
1273   let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1274
1275   for (group_idx, group) in groups.iter().enumerate() {
1276      if !group_targets[group_idx].is_empty() {
1277         continue;
1278      }
1279
1280      let fallback_target = planning_index
1281         .targets
1282         .iter()
1283         .max_by_key(|target| {
1284            let mut score = planning_target_match_score(target, group);
1285            if !claimed_target_ids.contains(&target.target_id) {
1286               score += 60;
1287            }
1288            (score, target.hunk_count, target.file_ids.len())
1289         })
1290         .or_else(|| planning_index.targets.first());
1291
1292      let Some(fallback_target) = fallback_target else {
1293         continue;
1294      };
1295
1296      group_targets[group_idx].push(fallback_target.target_id.clone());
1297      claimed_target_ids.insert(fallback_target.target_id.clone());
1298      repair_notes.push(format!(
1299         "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1300         group.group_id, fallback_target.target_id, fallback_target.label
1301      ));
1302   }
1303}
1304
1305fn normalize_intent_plan(
1306   snapshot: &ComposeSnapshot,
1307   planning_index: &PlanningIndex,
1308   mut groups: Vec<ComposeIntentGroup>,
1309) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1310   if groups.is_empty() {
1311      return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1312   }
1313
1314   let known_target_ids: HashSet<&str> = planning_index
1315      .targets
1316      .iter()
1317      .map(|target| target.target_id.as_str())
1318      .collect();
1319   let mut repair_notes = Vec::new();
1320   let mut covered_file_ids = HashSet::new();
1321   let mut normalized_group_targets = Vec::with_capacity(groups.len());
1322
1323   for group in &groups {
1324      if group.file_ids.is_empty() {
1325         repair_notes.push(format!(
1326            "Compose planner left {} without planning targets; assigning targets heuristically",
1327            group.group_id
1328         ));
1329      }
1330
1331      let mut normalized_target_ids = Vec::new();
1332      let mut seen_target_ids = HashSet::new();
1333      for raw_target_ref in &group.file_ids {
1334         let normalized_ref = normalize_file_reference(raw_target_ref);
1335         let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1336            normalized_ref.clone()
1337         } else {
1338            let uppercase_ref = normalized_ref.to_ascii_uppercase();
1339            if known_target_ids.contains(uppercase_ref.as_str()) {
1340               uppercase_ref
1341            } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1342               if raw_target_ref != target_id {
1343                  repair_notes.push(format!(
1344                     "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1345                  ));
1346               }
1347               target_id.clone()
1348            } else {
1349               repair_notes.push(format!(
1350                  "Dropped unknown planning target '{}' from {}",
1351                  raw_target_ref, group.group_id
1352               ));
1353               continue;
1354            }
1355         };
1356
1357         if seen_target_ids.insert(canonical_target_id.clone()) {
1358            normalized_target_ids.push(canonical_target_id);
1359         }
1360      }
1361
1362      normalized_group_targets.push(normalized_target_ids);
1363   }
1364
1365   seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1366
1367   let known_group_ids: HashSet<String> =
1368      groups.iter().map(|group| group.group_id.clone()).collect();
1369   for group in &mut groups {
1370      let mut normalized_dependencies = Vec::new();
1371      let mut seen_dependencies = HashSet::new();
1372
1373      for raw_dependency in &group.dependencies {
1374         let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1375         else {
1376            repair_notes.push(format!(
1377               "Dropped unknown dependency '{}' from {}",
1378               raw_dependency, group.group_id
1379            ));
1380            continue;
1381         };
1382
1383         if dependency == group.group_id {
1384            repair_notes.push(format!(
1385               "Dropped self-dependency '{}' from {}",
1386               raw_dependency, group.group_id
1387            ));
1388            continue;
1389         }
1390
1391         if seen_dependencies.insert(dependency.clone()) {
1392            if raw_dependency != &dependency {
1393               repair_notes.push(format!(
1394                  "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1395               ));
1396            }
1397            normalized_dependencies.push(dependency);
1398         }
1399      }
1400
1401      group.dependencies = normalized_dependencies;
1402   }
1403
1404   for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1405      let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1406      for file_id in &expanded_file_ids {
1407         covered_file_ids.insert(file_id.clone());
1408      }
1409      group.file_ids = expanded_file_ids;
1410   }
1411
1412   for file in &snapshot.files {
1413      if covered_file_ids.contains(file.file_id.as_str()) {
1414         continue;
1415      }
1416
1417      let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1418      let target_group = &mut groups[target_group_idx];
1419      target_group.file_ids.push(file.file_id.clone());
1420      covered_file_ids.insert(file.file_id.clone());
1421      repair_notes.push(format!(
1422         "Compose planner omitted {} ({}); assigned it to {}",
1423         file.file_id, file.path, target_group.group_id
1424      ));
1425   }
1426
1427   Ok((groups, repair_notes))
1428}
1429
1430fn workstream_key_for_label(label: &str) -> String {
1431   let segments: Vec<&str> = label
1432      .split('/')
1433      .filter(|segment| !segment.is_empty())
1434      .collect();
1435   let Some(first) = segments.first() else {
1436      return label.to_string();
1437   };
1438
1439   match *first {
1440      ".github" => match segments.get(1) {
1441         Some(second) => format!("{first}/{second}"),
1442         None => (*first).to_string(),
1443      },
1444      "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1445         Some(second) => format!("{first}/{second}"),
1446         None => (*first).to_string(),
1447      },
1448      _ => (*first).to_string(),
1449   }
1450}
1451
1452fn workstream_display_name(label: &str) -> String {
1453   let key = workstream_key_for_label(label);
1454   match key.as_str() {
1455      ".github/workflows" => "CI workflows".to_string(),
1456      ".github" => "GitHub automation".to_string(),
1457      _ => key
1458         .split('/')
1459         .next_back()
1460         .map(|segment| segment.replace(['_', '-'], " "))
1461         .unwrap_or(key),
1462   }
1463}
1464
1465fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1466   let mut out = String::new();
1467   let mut last_was_separator = false;
1468
1469   for ch in raw.trim().chars() {
1470      if ch.is_ascii_alphanumeric() {
1471         out.push(ch.to_ascii_lowercase());
1472         last_was_separator = false;
1473      } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1474      {
1475         out.push('-');
1476         last_was_separator = true;
1477      }
1478   }
1479
1480   let trimmed = out.trim_matches('-').to_string();
1481   (!trimmed.is_empty()).then_some(trimmed)
1482}
1483
1484fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1485   let key = workstream_key_for_label(label);
1486   let candidate = key
1487      .split('/')
1488      .next_back()
1489      .and_then(sanitize_scope_fragment)?;
1490   Scope::new(candidate).ok()
1491}
1492
1493fn fallback_rationale_for_labels(labels: &[String]) -> String {
1494   if labels.len() == 1 {
1495      let label = labels[0].as_str();
1496      let display = workstream_display_name(label);
1497      if label.starts_with("apps/") {
1498         return format!("{display} application updates");
1499      }
1500      if label.starts_with("packages/") {
1501         return format!("{display} package updates");
1502      }
1503      if label.starts_with("crates/") {
1504         return format!("{display} crate updates");
1505      }
1506      if label.starts_with(".github/") || label == ".github" {
1507         return format!("{display} updates");
1508      }
1509      return format!("{display} updates");
1510   }
1511
1512   let display_labels: Vec<String> = labels
1513      .iter()
1514      .take(3)
1515      .map(|label| workstream_display_name(label))
1516      .collect();
1517   format!("cross-cutting updates for {}", display_labels.join(", "))
1518}
1519
1520fn fallback_commit_type_for_group(
1521   snapshot: &ComposeSnapshot,
1522   labels: &[String],
1523   file_ids: &[String],
1524) -> Result<CommitType> {
1525   if labels
1526      .iter()
1527      .any(|label| label == ".github" || label.starts_with(".github/"))
1528   {
1529      return CommitType::new("ci");
1530   }
1531
1532   let files: Vec<&ComposeFile> = file_ids
1533      .iter()
1534      .filter_map(|file_id| snapshot.file_by_id(file_id))
1535      .collect();
1536   let all_docs = !files.is_empty()
1537      && files
1538         .iter()
1539         .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1540   if all_docs {
1541      return CommitType::new("docs");
1542   }
1543
1544   let all_tests = !files.is_empty()
1545      && files
1546         .iter()
1547         .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1548   if all_tests {
1549      return CommitType::new("test");
1550   }
1551
1552   let all_dependencies =
1553      !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1554   if all_dependencies {
1555      return CommitType::new("build");
1556   }
1557
1558   let all_config = !files.is_empty()
1559      && files.iter().all(|file| {
1560         matches!(
1561            compose_file_category(file),
1562            ComposeFileCategory::Config | ComposeFileCategory::Dependency
1563         )
1564      });
1565   if all_config {
1566      return CommitType::new("chore");
1567   }
1568
1569   CommitType::new("refactor")
1570}
1571
1572fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1573   snapshot
1574      .files
1575      .iter()
1576      .filter(|file| file_ids.contains(&file.file_id))
1577      .map(|file| file.file_id.clone())
1578      .collect()
1579}
1580
1581fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1582   if groups.is_empty() {
1583      return false;
1584   }
1585
1586   let largest_group = groups
1587      .iter()
1588      .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1589      .max()
1590      .unwrap_or_default();
1591
1592   groups.len() == 1
1593      || (groups.len() <= 2
1594         && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1595}
1596
1597fn should_force_large_patch_fallback(
1598   snapshot: &ComposeSnapshot,
1599   planning_index: &PlanningIndex,
1600   groups: &[ComposeIntentGroup],
1601   max_commits: usize,
1602) -> bool {
1603   if max_commits <= 1
1604      || planning_index.mode != PlanningMode::Area
1605      || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1606      || !is_monolithic_intent_plan(snapshot, groups)
1607   {
1608      return false;
1609   }
1610
1611   let workstream_count = planning_index
1612      .targets
1613      .iter()
1614      .map(|target| workstream_key_for_label(&target.label))
1615      .collect::<HashSet<_>>()
1616      .len();
1617
1618   workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1619}
1620
1621fn build_large_patch_fallback_groups(
1622   snapshot: &ComposeSnapshot,
1623   planning_index: &PlanningIndex,
1624   max_commits: usize,
1625) -> Result<Vec<ComposeIntentGroup>> {
1626   #[derive(Debug, Clone)]
1627   struct WorkstreamGroup {
1628      label:    String,
1629      file_ids: HashSet<String>,
1630      weight:   usize,
1631   }
1632
1633   #[derive(Debug, Clone)]
1634   struct FallbackBin {
1635      labels:       Vec<String>,
1636      file_ids:     HashSet<String>,
1637      total_weight: usize,
1638   }
1639
1640   let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1641   for target in &planning_index.targets {
1642      let key = workstream_key_for_label(&target.label);
1643      let entry = workstreams
1644         .entry(key.clone())
1645         .or_insert_with(|| WorkstreamGroup {
1646            label:    key,
1647            file_ids: HashSet::new(),
1648            weight:   0,
1649         });
1650
1651      for file_id in &target.file_ids {
1652         entry.file_ids.insert(file_id.clone());
1653      }
1654      entry.weight = entry
1655         .weight
1656         .saturating_add(target.hunk_count.max(target.file_ids.len()));
1657   }
1658
1659   let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1660   workstreams.sort_by(|left, right| {
1661      right
1662         .weight
1663         .cmp(&left.weight)
1664         .then_with(|| left.label.cmp(&right.label))
1665   });
1666
1667   let bin_count = max_commits.min(workstreams.len());
1668   let mut bins: Vec<FallbackBin> = Vec::new();
1669   for workstream in workstreams {
1670      if bins.len() < bin_count {
1671         bins.push(FallbackBin {
1672            labels:       vec![workstream.label],
1673            file_ids:     workstream.file_ids,
1674            total_weight: workstream.weight,
1675         });
1676         continue;
1677      }
1678
1679      let Some((target_idx, _)) = bins
1680         .iter()
1681         .enumerate()
1682         .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1683      else {
1684         continue;
1685      };
1686
1687      let target_bin = &mut bins[target_idx];
1688      target_bin.labels.push(workstream.label);
1689      target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1690      target_bin.file_ids.extend(workstream.file_ids);
1691   }
1692
1693   let mut groups = Vec::new();
1694   for (idx, bin) in bins.into_iter().enumerate() {
1695      let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1696      let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1697      let scope = (bin.labels.len() == 1)
1698         .then(|| fallback_scope_for_label(&bin.labels[0]))
1699         .flatten();
1700      let rationale = fallback_rationale_for_labels(&bin.labels);
1701
1702      groups.push(ComposeIntentGroup {
1703         group_id: format!("G{}", idx + 1),
1704         commit_type,
1705         scope,
1706         file_ids: ordered_ids,
1707         rationale,
1708         dependencies: Vec::new(),
1709      });
1710   }
1711
1712   Ok(groups)
1713}
1714
1715async fn analyze_compose_intent(
1716   snapshot: &ComposeSnapshot,
1717   observations: &[FileObservation],
1718   config: &CommitConfig,
1719   max_commits: usize,
1720   debug_dir: Option<&Path>,
1721) -> Result<ComposeIntentPlan> {
1722   let planning_index = build_planning_index(snapshot);
1723   let stat_summary = render_planning_stat(&planning_index);
1724   let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1725   let planning_targets = render_planning_targets(&planning_index, snapshot);
1726   let planning_notes = render_planning_notes(&planning_index);
1727   let split_bias = render_split_bias(&planning_index);
1728   let schema = build_intent_schema(config);
1729   let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1730      variant: "default",
1731      max_commits,
1732      stat: &stat_summary,
1733      snapshot_summary: &snapshot_summary,
1734      planning_targets: &planning_targets,
1735      planning_notes: &planning_notes,
1736      split_bias: &split_bias,
1737   })?;
1738
1739   let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1740      operation:        "compose/intent",
1741      model:            &config.analysis_model,
1742      max_tokens:       3000,
1743      temperature:      COMPOSE_PLANNER_TEMPERATURE,
1744      prompt_family:    "compose-intent",
1745      prompt_variant:   "default",
1746      system_prompt:    &parts.system,
1747      user_prompt:      &parts.user,
1748      tool_name:        "create_compose_intent_plan",
1749      tool_description: "Plan logical commit groups over the provided planning target IDs",
1750      schema:           &schema,
1751      debug:            debug_dir.map(|dir| OneShotDebug {
1752         dir:    Some(dir),
1753         prefix: None,
1754         name:   "compose_intent",
1755      }),
1756      cacheable:        true,
1757   })
1758   .await?;
1759
1760   let (mut groups, repair_notes) =
1761      normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1762   for note in &repair_notes {
1763      eprintln!("{}", style::warning(note));
1764   }
1765   if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1766      eprintln!(
1767         "{}",
1768         style::warning(
1769            "Compose intent collapsed into a monolithic large-patch group; falling back to \
1770             path-based workstream splits."
1771         )
1772      );
1773      groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1774   }
1775   let dependency_order =
1776      compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1777
1778   Ok(ComposeIntentPlan { groups, dependency_order })
1779}
1780
1781fn should_collect_compose_observations(
1782   snapshot: &ComposeSnapshot,
1783   config: &CommitConfig,
1784   counter: &TokenCounter,
1785) -> bool {
1786   planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1787      && should_use_map_reduce(&snapshot.diff, config, counter)
1788}
1789
1790fn auto_assign_hunks(
1791   snapshot: &ComposeSnapshot,
1792   intent_plan: &ComposeIntentPlan,
1793) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1794   let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1795   for group in &intent_plan.groups {
1796      for file_id in &group.file_ids {
1797         groups_by_file
1798            .entry(file_id.as_str())
1799            .or_default()
1800            .push(group.group_id.as_str());
1801      }
1802   }
1803
1804   let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1805      .groups
1806      .iter()
1807      .map(|group| (group.group_id.clone(), BTreeSet::new()))
1808      .collect();
1809   let mut ambiguous = Vec::new();
1810
1811   for file in &snapshot.files {
1812      let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1813         return Err(CommitGenError::Other(format!(
1814            "No compose group claimed file {} ({})",
1815            file.file_id, file.path
1816         )));
1817      };
1818
1819      if candidate_group_ids.len() == 1 {
1820         let group_id = candidate_group_ids[0];
1821         let entry = assigned
1822            .get_mut(group_id)
1823            .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1824         for hunk_id in &file.hunk_ids {
1825            entry.insert(hunk_id.clone());
1826         }
1827      } else {
1828         ambiguous.push(AmbiguousFileBinding {
1829            file_id:             file.file_id.clone(),
1830            path:                file.path.clone(),
1831            candidate_group_ids: candidate_group_ids
1832               .iter()
1833               .map(|group_id| (*group_id).to_string())
1834               .collect(),
1835            hunk_ids:            file.hunk_ids.clone(),
1836         });
1837      }
1838   }
1839
1840   Ok((assigned, ambiguous))
1841}
1842
1843fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1844   let mut out = String::new();
1845   for group in groups {
1846      let scope = group
1847         .scope
1848         .as_ref()
1849         .map(|scope| format!("({})", scope.as_str()))
1850         .unwrap_or_default();
1851      writeln!(
1852         out,
1853         "- {} [{}{}] {}",
1854         group.group_id,
1855         group.commit_type.as_str(),
1856         scope,
1857         group.rationale
1858      )
1859      .unwrap();
1860   }
1861
1862   out
1863}
1864
1865fn render_binding_ambiguous_files(
1866   snapshot: &ComposeSnapshot,
1867   ambiguous_files: &[AmbiguousFileBinding],
1868) -> String {
1869   let mut out = String::new();
1870   for ambiguous_file in ambiguous_files {
1871      writeln!(
1872         out,
1873         "- {} {} candidates: {}",
1874         ambiguous_file.file_id,
1875         ambiguous_file.path,
1876         ambiguous_file.candidate_group_ids.join(", ")
1877      )
1878      .unwrap();
1879
1880      for hunk_id in &ambiguous_file.hunk_ids {
1881         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1882            if hunk.synthetic {
1883               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1884            } else {
1885               writeln!(
1886                  out,
1887                  "  - {} old:{} new:{} :: {}",
1888                  hunk.hunk_id,
1889                  format_line_range(hunk.old_start, hunk.old_count),
1890                  format_line_range(hunk.new_start, hunk.new_count),
1891                  hunk.snippet
1892               )
1893               .unwrap();
1894            }
1895         }
1896      }
1897   }
1898
1899   out
1900}
1901
1902async fn request_binding(
1903   snapshot: &ComposeSnapshot,
1904   groups: &[ComposeIntentGroup],
1905   ambiguous_files: &[AmbiguousFileBinding],
1906   config: &CommitConfig,
1907   debug_dir: Option<&Path>,
1908   debug_name: &str,
1909) -> Result<Vec<ComposeBindingAssignment>> {
1910   let schema = build_binding_schema();
1911   let groups_text = render_binding_groups(groups);
1912   let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1913   let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1914      variant:         "default",
1915      groups:          &groups_text,
1916      ambiguous_files: &ambiguous_files_text,
1917   })?;
1918   let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1919      operation:        "compose/bind",
1920      model:            &config.analysis_model,
1921      max_tokens:       2500,
1922      temperature:      COMPOSE_PLANNER_TEMPERATURE,
1923      prompt_family:    "compose-bind",
1924      prompt_variant:   "default",
1925      system_prompt:    &parts.system,
1926      user_prompt:      &parts.user,
1927      tool_name:        "bind_compose_hunks",
1928      tool_description: "Assign hunk IDs to existing compose groups",
1929      schema:           &schema,
1930      debug:            debug_dir.map(|dir| OneShotDebug {
1931         dir:    Some(dir),
1932         prefix: None,
1933         name:   debug_name,
1934      }),
1935      cacheable:        true,
1936   })
1937   .await?;
1938
1939   Ok(response.output.assignments)
1940}
1941
1942fn ambiguous_hunk_context(
1943   ambiguous_files: &[AmbiguousFileBinding],
1944) -> HashMap<String, AmbiguousHunkContext> {
1945   let mut context = HashMap::new();
1946   for ambiguous_file in ambiguous_files {
1947      for hunk_id in &ambiguous_file.hunk_ids {
1948         context.insert(hunk_id.clone(), AmbiguousHunkContext {
1949            candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
1950         });
1951      }
1952   }
1953   context
1954}
1955
1956fn evaluate_binding(
1957   assignments: &[ComposeBindingAssignment],
1958   hunk_context: &HashMap<String, AmbiguousHunkContext>,
1959   valid_group_ids: &HashSet<&str>,
1960   snapshot: &ComposeSnapshot,
1961) -> BindingEvaluation {
1962   let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
1963
1964   for assignment in assignments {
1965      if !valid_group_ids.contains(assignment.group_id.as_str()) {
1966         continue;
1967      }
1968
1969      let mut seen_in_group = HashSet::new();
1970      for hunk_id in &assignment.hunk_ids {
1971         if !seen_in_group.insert(hunk_id.as_str()) {
1972            continue;
1973         }
1974
1975         let Some(context) = hunk_context.get(hunk_id) else {
1976            continue;
1977         };
1978
1979         if !context
1980            .candidate_group_ids
1981            .iter()
1982            .any(|candidate| candidate == &assignment.group_id)
1983         {
1984            continue;
1985         }
1986
1987         match assigned_hunk_to_group.get(hunk_id) {
1988            None => {
1989               assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
1990            },
1991            Some(existing_group) if existing_group == &assignment.group_id => {},
1992            Some(_) => {
1993               assigned_hunk_to_group.remove(hunk_id);
1994            },
1995         }
1996      }
1997   }
1998
1999   let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2000   for (hunk_id, group_id) in assigned_hunk_to_group {
2001      assigned_by_group.entry(group_id).or_default().push(hunk_id);
2002   }
2003
2004   for hunk_ids in assigned_by_group.values_mut() {
2005      let ordered: Vec<String> = snapshot
2006         .hunks
2007         .iter()
2008         .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2009         .map(|hunk| hunk.hunk_id.clone())
2010         .collect();
2011      *hunk_ids = ordered;
2012   }
2013
2014   let unresolved = snapshot
2015      .hunks
2016      .iter()
2017      .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2018      .filter(|hunk| {
2019         !assigned_by_group.values().any(|assigned_hunks| {
2020            assigned_hunks
2021               .iter()
2022               .any(|assigned| assigned == &hunk.hunk_id)
2023         })
2024      })
2025      .map(|hunk| hunk.hunk_id.clone())
2026      .collect();
2027
2028   BindingEvaluation { assigned: assigned_by_group, unresolved }
2029}
2030
2031fn filter_ambiguous_files(
2032   ambiguous_files: &[AmbiguousFileBinding],
2033   hunk_ids: &[String],
2034) -> Vec<AmbiguousFileBinding> {
2035   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2036
2037   ambiguous_files
2038      .iter()
2039      .filter_map(|file| {
2040         let matching_hunks: Vec<String> = file
2041            .hunk_ids
2042            .iter()
2043            .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2044            .cloned()
2045            .collect();
2046
2047         (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2048            file_id:             file.file_id.clone(),
2049            path:                file.path.clone(),
2050            candidate_group_ids: file.candidate_group_ids.clone(),
2051            hunk_ids:            matching_hunks,
2052         })
2053      })
2054      .collect()
2055}
2056
2057fn chunk_ambiguous_files(
2058   ambiguous_files: &[AmbiguousFileBinding],
2059) -> Vec<Vec<AmbiguousFileBinding>> {
2060   if ambiguous_files.is_empty() {
2061      return Vec::new();
2062   }
2063
2064   let mut batches = Vec::new();
2065   let mut current_batch = Vec::new();
2066   let mut current_hunk_count = 0_usize;
2067
2068   for file in ambiguous_files {
2069      let file_hunk_count = file.hunk_ids.len();
2070      let should_split = !current_batch.is_empty()
2071         && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2072            || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2073
2074      if should_split {
2075         batches.push(current_batch);
2076         current_batch = Vec::new();
2077         current_hunk_count = 0;
2078      }
2079
2080      current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2081      current_batch.push(file.clone());
2082   }
2083
2084   if !current_batch.is_empty() {
2085      batches.push(current_batch);
2086   }
2087
2088   batches
2089}
2090
2091fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2092   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2093
2094   snapshot
2095      .hunks
2096      .iter()
2097      .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2098      .map(|hunk| hunk.hunk_id.clone())
2099      .collect()
2100}
2101
2102fn fallback_group_for_hunk(
2103   hunk_id: &str,
2104   ambiguous_files: &[AmbiguousFileBinding],
2105   group_rank: &HashMap<&str, usize>,
2106) -> Option<String> {
2107   ambiguous_files.iter().find_map(|file| {
2108      file
2109         .hunk_ids
2110         .iter()
2111         .any(|candidate| candidate == hunk_id)
2112         .then(|| {
2113            file
2114               .candidate_group_ids
2115               .iter()
2116               .min_by_key(|group_id| {
2117                  group_rank
2118                     .get(group_id.as_str())
2119                     .copied()
2120                     .unwrap_or(usize::MAX)
2121               })
2122               .cloned()
2123         })
2124   })?
2125}
2126
2127fn assign_unresolved_hunks(
2128   unresolved_hunks: &[String],
2129   assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2130   ambiguous_files: &[AmbiguousFileBinding],
2131   group_rank: &HashMap<&str, usize>,
2132) {
2133   for hunk_id in unresolved_hunks {
2134      if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2135         && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2136      {
2137         group_hunks.insert(hunk_id.clone());
2138      }
2139   }
2140}
2141
2142fn normalize_group_type(
2143   snapshot: &ComposeSnapshot,
2144   file_ids: &[String],
2145   original_type: &CommitType,
2146) -> Result<CommitType> {
2147   let dependency_only = !file_ids.is_empty()
2148      && file_ids.iter().all(|file_id| {
2149         snapshot
2150            .file_by_id(file_id)
2151            .is_some_and(|file| is_dependency_manifest(&file.path))
2152      });
2153
2154   if dependency_only && original_type.as_str() != "build" {
2155      CommitType::new("build")
2156   } else {
2157      Ok(original_type.clone())
2158   }
2159}
2160
2161fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2162   snapshot
2163      .files
2164      .iter()
2165      .filter(|file| {
2166         hunk_ids
2167            .iter()
2168            .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2169      })
2170      .map(|file| file.file_id.clone())
2171      .collect()
2172}
2173
2174fn build_redirects(
2175   intent_plan: &ComposeIntentPlan,
2176   executable_groups: &[ComposeExecutableGroup],
2177   group_rank: &HashMap<&str, usize>,
2178) -> HashMap<String, String> {
2179   let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2180      .iter()
2181      .filter(|group| !group.hunk_ids.is_empty())
2182      .map(|group| (group.group_id.as_str(), group))
2183      .collect();
2184
2185   let mut redirects = HashMap::new();
2186   for group in &intent_plan.groups {
2187      if surviving_groups.contains_key(group.group_id.as_str()) {
2188         continue;
2189      }
2190
2191      let redirect = executable_groups
2192         .iter()
2193         .filter(|candidate| candidate.group_id != group.group_id)
2194         .filter(|candidate| {
2195            candidate.file_ids.iter().any(|file_id| {
2196               group
2197                  .file_ids
2198                  .iter()
2199                  .any(|candidate_id| candidate_id == file_id)
2200            })
2201         })
2202         .min_by_key(|candidate| {
2203            group_rank
2204               .get(candidate.group_id.as_str())
2205               .copied()
2206               .unwrap_or(usize::MAX)
2207         })
2208         .map(|candidate| candidate.group_id.clone());
2209
2210      if let Some(redirect) = redirect {
2211         redirects.insert(group.group_id.clone(), redirect);
2212      }
2213   }
2214
2215   redirects
2216}
2217
2218fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2219   let mut current = group_id.to_string();
2220   let mut seen = HashSet::new();
2221
2222   while let Some(next) = redirects.get(&current) {
2223      if !seen.insert(current.clone()) {
2224         break;
2225      }
2226      current.clone_from(next);
2227   }
2228
2229   current
2230}
2231
2232fn prune_empty_groups(
2233   groups: Vec<ComposeExecutableGroup>,
2234   redirects: &HashMap<String, String>,
2235) -> Result<ComposeExecutablePlan> {
2236   let surviving_ids: HashSet<String> = groups
2237      .iter()
2238      .filter(|group| !group.hunk_ids.is_empty())
2239      .map(|group| group.group_id.clone())
2240      .collect();
2241
2242   let mut surviving_groups = Vec::new();
2243   for mut group in groups {
2244      if group.hunk_ids.is_empty() {
2245         continue;
2246      }
2247
2248      let mut rewritten_dependencies = Vec::new();
2249      for dependency in &group.dependencies {
2250         let rewritten = resolve_redirect(dependency, redirects);
2251         if rewritten != group.group_id
2252            && surviving_ids.contains(&rewritten)
2253            && !rewritten_dependencies
2254               .iter()
2255               .any(|existing| existing == &rewritten)
2256         {
2257            rewritten_dependencies.push(rewritten);
2258         }
2259      }
2260
2261      group.dependencies = rewritten_dependencies;
2262      surviving_groups.push(group);
2263   }
2264
2265   let dependency_order = compute_dependency_order(
2266      &surviving_groups,
2267      |group| &group.group_id,
2268      |group| &group.dependencies,
2269   )?;
2270   Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2271}
2272
2273fn finalize_executable_plan(
2274   snapshot: &ComposeSnapshot,
2275   intent_plan: &ComposeIntentPlan,
2276   assigned_by_group: HashMap<String, BTreeSet<String>>,
2277) -> Result<ComposeExecutablePlan> {
2278   let group_rank: HashMap<&str, usize> = intent_plan
2279      .dependency_order
2280      .iter()
2281      .enumerate()
2282      .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2283      .collect();
2284
2285   let mut executable_groups = Vec::new();
2286   for group in &intent_plan.groups {
2287      let hunk_ids: Vec<String> = snapshot
2288         .hunks
2289         .iter()
2290         .filter(|hunk| {
2291            assigned_by_group
2292               .get(&group.group_id)
2293               .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2294         })
2295         .map(|hunk| hunk.hunk_id.clone())
2296         .collect();
2297
2298      let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2299      let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2300      executable_groups.push(ComposeExecutableGroup {
2301         group_id: group.group_id.clone(),
2302         commit_type,
2303         scope: group.scope.clone(),
2304         file_ids,
2305         rationale: group.rationale.clone(),
2306         dependencies: group.dependencies.clone(),
2307         hunk_ids,
2308      });
2309   }
2310
2311   let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2312   prune_empty_groups(executable_groups, &redirects)
2313}
2314
2315fn validate_executable_plan(
2316   snapshot: &ComposeSnapshot,
2317   plan: &ComposeExecutablePlan,
2318) -> Result<()> {
2319   if plan.groups.is_empty() {
2320      return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2321   }
2322
2323   let known_hunks: HashSet<&str> = snapshot
2324      .hunks
2325      .iter()
2326      .map(|hunk| hunk.hunk_id.as_str())
2327      .collect();
2328   let known_files: HashSet<&str> = snapshot
2329      .files
2330      .iter()
2331      .map(|file| file.file_id.as_str())
2332      .collect();
2333   let mut coverage = HashMap::<String, String>::new();
2334
2335   for group in &plan.groups {
2336      if group.hunk_ids.is_empty() {
2337         return Err(CommitGenError::Other(format!(
2338            "Compose group {} ended up empty after binding",
2339            group.group_id
2340         )));
2341      }
2342
2343      for file_id in &group.file_ids {
2344         if !known_files.contains(file_id.as_str()) {
2345            return Err(CommitGenError::Other(format!(
2346               "Compose group {} references unknown file_id {}",
2347               group.group_id, file_id
2348            )));
2349         }
2350      }
2351
2352      for hunk_id in &group.hunk_ids {
2353         if !known_hunks.contains(hunk_id.as_str()) {
2354            return Err(CommitGenError::Other(format!(
2355               "Compose group {} references unknown hunk_id {}",
2356               group.group_id, hunk_id
2357            )));
2358         }
2359
2360         if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2361            return Err(CommitGenError::Other(format!(
2362               "Hunk {} was assigned to both {} and {}",
2363               hunk_id, existing_group, group.group_id
2364            )));
2365         }
2366      }
2367   }
2368
2369   let missing_hunks: Vec<String> = snapshot
2370      .hunks
2371      .iter()
2372      .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2373      .map(|hunk| hunk.hunk_id.clone())
2374      .collect();
2375   if !missing_hunks.is_empty() {
2376      return Err(CommitGenError::Other(format!(
2377         "Compose plan left hunks unassigned: {}",
2378         missing_hunks.join(", ")
2379      )));
2380   }
2381
2382   let dependency_order =
2383      compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2384   if dependency_order != plan.dependency_order {
2385      return Err(CommitGenError::Other(
2386         "Compose dependency order does not match recomputed order".to_string(),
2387      ));
2388   }
2389
2390   Ok(())
2391}
2392
2393async fn bind_compose_plan(
2394   snapshot: &ComposeSnapshot,
2395   intent_plan: &ComposeIntentPlan,
2396   config: &CommitConfig,
2397   debug_dir: Option<&Path>,
2398) -> Result<ComposeExecutablePlan> {
2399   let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2400
2401   if !ambiguous_files.is_empty() {
2402      let valid_group_ids: HashSet<&str> = intent_plan
2403         .groups
2404         .iter()
2405         .map(|group| group.group_id.as_str())
2406         .collect();
2407      let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2408      let mut unresolved = Vec::new();
2409
2410      for (batch_idx, batch) in binding_batches.iter().enumerate() {
2411         let hunk_context = ambiguous_hunk_context(batch);
2412         let debug_name = if binding_batches.len() == 1 {
2413            "compose_bind".to_string()
2414         } else {
2415            format!("compose_bind_{:02}", batch_idx + 1)
2416         };
2417         let assignments =
2418            request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2419               .await?;
2420         let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2421         for (group_id, hunk_ids) in evaluation.assigned {
2422            let entry = assigned_by_group.entry(group_id).or_default();
2423            for hunk_id in hunk_ids {
2424               entry.insert(hunk_id);
2425            }
2426         }
2427         unresolved.extend(evaluation.unresolved);
2428      }
2429
2430      let group_rank: HashMap<&str, usize> = intent_plan
2431         .dependency_order
2432         .iter()
2433         .enumerate()
2434         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2435         .collect();
2436
2437      let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2438      if !unresolved.is_empty() {
2439         let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2440         let repair_batches = chunk_ambiguous_files(&unresolved_files);
2441         let mut repair_unresolved = Vec::new();
2442
2443         for (batch_idx, batch) in repair_batches.iter().enumerate() {
2444            let debug_name = if repair_batches.len() == 1 {
2445               "compose_bind_repair".to_string()
2446            } else {
2447               format!("compose_bind_repair_{:02}", batch_idx + 1)
2448            };
2449            let repair_assignments = request_binding(
2450               snapshot,
2451               &intent_plan.groups,
2452               batch,
2453               config,
2454               debug_dir,
2455               &debug_name,
2456            )
2457            .await?;
2458            let repair_context = ambiguous_hunk_context(batch);
2459            let repair =
2460               evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2461            for (group_id, hunk_ids) in repair.assigned {
2462               let entry = assigned_by_group.entry(group_id).or_default();
2463               for hunk_id in hunk_ids {
2464                  entry.insert(hunk_id);
2465               }
2466            }
2467
2468            repair_unresolved.extend(repair.unresolved);
2469         }
2470         unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2471
2472         if !unresolved.is_empty() {
2473            assign_unresolved_hunks(
2474               &unresolved,
2475               &mut assigned_by_group,
2476               &ambiguous_files,
2477               &group_rank,
2478            );
2479         }
2480      }
2481   }
2482
2483   let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2484   validate_executable_plan(snapshot, &plan)?;
2485   Ok(plan)
2486}
2487
2488fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2489   println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2490   for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2491      let group = &plan.groups[group_idx];
2492      let scope = group
2493         .scope
2494         .as_ref()
2495         .map(|scope| format!("({})", style::scope(scope.as_str())))
2496         .unwrap_or_default();
2497
2498      println!(
2499         "\n{}. {} [{}{}] {}",
2500         display_idx + 1,
2501         style::bold(&group.group_id),
2502         style::commit_type(group.commit_type.as_str()),
2503         scope,
2504         group.rationale
2505      );
2506
2507      println!("   Files:");
2508      for file_id in &group.file_ids {
2509         if let Some(file) = snapshot.file_by_id(file_id) {
2510            let selected_hunk_ids: Vec<&str> = group
2511               .hunk_ids
2512               .iter()
2513               .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2514               .map(String::as_str)
2515               .collect();
2516            let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2517               "all hunks".to_string()
2518            } else {
2519               selected_hunk_ids.join(", ")
2520            };
2521            println!("     - {} {} ({selection})", file.file_id, file.path);
2522         }
2523      }
2524
2525      if !group.dependencies.is_empty() {
2526         println!("   Depends on: {}", group.dependencies.join(", "));
2527      }
2528   }
2529}
2530
2531pub async fn execute_compose(
2532   snapshot: &ComposeSnapshot,
2533   plan: &ComposeExecutablePlan,
2534   config: &CommitConfig,
2535   args: &Args,
2536) -> Result<Vec<String>> {
2537   let dir = &args.dir;
2538   let mut commit_hashes = Vec::new();
2539   let total = plan.dependency_order.len();
2540
2541   println!("{}", style::info("Resetting staging area..."));
2542   reset_staging(dir)?;
2543
2544   // Phase 1: derive each group's diff/stat from the immutable compose snapshot.
2545   // This avoids mutating the index while commit messages are prepared and keeps
2546   // later worktree edits out of already-planned commits.
2547   let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2548   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2549      let group = &plan.groups[group_idx];
2550      println!(
2551         "  {}",
2552         style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total,))
2553      );
2554      let group_patch = create_executable_group_patch(snapshot, group)?;
2555      group_diff_stats.push((group_patch.diff, group_patch.stat));
2556   }
2557
2558   // Phase 2: generate commit messages concurrently. Both LLM calls per group
2559   // (analysis + summary) run inside a single async task so the slower of the
2560   // two does not block other groups from progressing.
2561   println!(
2562      "{}",
2563      style::info(&format!(
2564         "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2565         COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2566      ))
2567   );
2568
2569   let prepared_messages: Vec<(Vec<String>, crate::types::CommitSummary)> =
2570      stream::iter(plan.dependency_order.iter().enumerate())
2571         .map(|(idx, &group_idx)| {
2572            let group = &plan.groups[group_idx];
2573            let (diff, stat) = &group_diff_stats[idx];
2574            let debug_prefix = format!("compose-{}", idx + 1);
2575            async move {
2576               let ctx = AnalysisContext {
2577                  user_context:    Some(&group.rationale),
2578                  recent_commits:  None,
2579                  common_scopes:   None,
2580                  project_context: None,
2581                  debug_output:    args.debug_output.as_deref(),
2582                  debug_prefix:    Some(&debug_prefix),
2583               };
2584               let analysis = generate_conventional_analysis(
2585                  stat,
2586                  diff,
2587                  &config.analysis_model,
2588                  "",
2589                  &ctx,
2590                  config,
2591               )
2592               .await?;
2593               let body = analysis.body_texts();
2594               let summary = generate_summary_from_analysis(
2595                  stat,
2596                  group.commit_type.as_str(),
2597                  group.scope.as_ref().map(|scope| scope.as_str()),
2598                  &body,
2599                  Some(&group.rationale),
2600                  config,
2601                  args.debug_output.as_deref(),
2602                  Some(&debug_prefix),
2603               )
2604               .await?;
2605               Ok::<_, CommitGenError>((body, summary))
2606            }
2607         })
2608         .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2609         .collect::<Vec<_>>()
2610         .await
2611         .into_iter()
2612         .collect::<Result<Vec<_>>>()?;
2613
2614   // Phase 3: sequential commit loop. Re-stage each group (cheap git ops) and
2615   // commit using the message we generated in phase 2.
2616   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2617      let group = &plan.groups[group_idx];
2618
2619      println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2620      println!("  Type: {}", style::commit_type(group.commit_type.as_str()));
2621      if let Some(scope) = &group.scope {
2622         println!("  Scope: {}", style::scope(scope.as_str()));
2623      }
2624      let paths: Vec<String> = group
2625         .file_ids
2626         .iter()
2627         .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2628         .collect();
2629      println!("  Files: {}", paths.join(", "));
2630
2631      let stage_result = stage_executable_group(snapshot, group, dir)?;
2632      if stage_result != StageResult::Staged {
2633         eprintln!(
2634            "  {}",
2635            style::warning(&format!(
2636               "Skipping {} because its planned patch is already applied ({stage_result:?})",
2637               group.group_id
2638            ))
2639         );
2640         continue;
2641      }
2642
2643      let (analysis_body, summary) = prepared_messages[idx].clone();
2644      let mut commit = ConventionalCommit {
2645         commit_type: group.commit_type.clone(),
2646         scope: group.scope.clone(),
2647         summary,
2648         body: analysis_body,
2649         footers: vec![],
2650      };
2651      post_process_commit_message(&mut commit, config);
2652
2653      if let Err(err) = validate_commit_message(&commit, config) {
2654         eprintln!(
2655            "  {}",
2656            style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2657         );
2658      }
2659
2660      let formatted_message = format_commit_message(&commit);
2661      println!(
2662         "  Message:\n{}",
2663         formatted_message
2664            .lines()
2665            .take(3)
2666            .collect::<Vec<_>>()
2667            .join("\n")
2668      );
2669
2670      if !args.compose_preview {
2671         let sign = args.sign || config.gpg_sign;
2672         let signoff = args.signoff || config.signoff;
2673         git_commit(&formatted_message, false, dir, sign, signoff, args.skip_hooks, false)?;
2674         let hash = get_head_hash(dir)?;
2675         commit_hashes.push(hash);
2676
2677         if args.compose_test_after_each {
2678            println!("  {}", style::info("Running tests..."));
2679            let status = std::process::Command::new("cargo")
2680               .arg("test")
2681               .current_dir(dir)
2682               .status();
2683
2684            if let Ok(status) = status {
2685               if !status.success() {
2686                  return Err(CommitGenError::Other(format!(
2687                     "Tests failed after commit {} ({})",
2688                     idx + 1,
2689                     group.group_id
2690                  )));
2691               }
2692               println!("  {}", style::success(&format!("{} Tests passed", style::icons::SUCCESS)));
2693            }
2694         }
2695      }
2696   }
2697
2698   Ok(commit_hashes)
2699}
2700
2701pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2702   let max_rounds = config.compose_max_rounds;
2703
2704   for round in 1..=max_rounds {
2705      if round > 1 {
2706         println!(
2707            "\n{}",
2708            style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2709         );
2710      } else {
2711         println!("{}", style::section_header("Compose Mode", 80));
2712      }
2713      println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2714
2715      run_compose_round(args, config, round).await?;
2716
2717      if args.compose_preview {
2718         break;
2719      }
2720
2721      match get_compose_diff(&args.dir) {
2722         Err(CommitGenError::NoChanges { .. }) => {
2723            println!(
2724               "\n{}",
2725               style::success(&format!(
2726                  "{} All changes committed successfully",
2727                  style::icons::SUCCESS
2728               ))
2729            );
2730            break;
2731         },
2732         Err(err) => return Err(err),
2733         Ok(remaining_diff) => {
2734            eprintln!(
2735               "\n{}",
2736               style::warning(&format!(
2737                  "{} Uncommitted changes remain after round {round}",
2738                  style::icons::WARNING
2739               ))
2740            );
2741            eprintln!("{remaining_diff}");
2742         },
2743      }
2744
2745      if round < max_rounds {
2746         eprintln!("{}", style::info("Starting another compose round..."));
2747      } else {
2748         eprintln!(
2749            "{}",
2750            style::warning(&format!(
2751               "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
2752            ))
2753         );
2754      }
2755   }
2756
2757   Ok(())
2758}
2759
2760async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
2761   let diff = get_compose_diff(&args.dir)?;
2762   let stat = get_compose_stat(&args.dir)?;
2763   let snapshot = build_compose_snapshot(&diff, &stat)?;
2764
2765   if let Some(debug_dir) = args.debug_output.as_deref() {
2766      save_debug_artifact(
2767         Some(debug_dir),
2768         &format!("compose_round_{round}_snapshot.json"),
2769         &snapshot,
2770      )?;
2771   }
2772
2773   let token_counter = create_token_counter(config);
2774   let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
2775      println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
2776      observe_diff_files(&snapshot.diff, &config.analysis_model, config, &token_counter).await?
2777   } else {
2778      if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
2779         && should_use_map_reduce(&snapshot.diff, config, &token_counter)
2780      {
2781         println!(
2782            "{}",
2783            style::info(
2784               "Skipping per-file observations for very large compose snapshot; using area-level \
2785                planning instead."
2786            )
2787         );
2788      }
2789      Vec::new()
2790   };
2791
2792   if let Some(debug_dir) = args.debug_output.as_deref()
2793      && !observations.is_empty()
2794   {
2795      save_debug_artifact(
2796         Some(debug_dir),
2797         &format!("compose_round_{round}_observations.json"),
2798         &observations,
2799      )?;
2800   }
2801
2802   let max_commits = args.compose_max_commits.unwrap_or(20);
2803   let executable_plan = if let Some(cached_plan) =
2804      load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
2805   {
2806      println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
2807      cached_plan
2808   } else {
2809      println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
2810      let intent_plan = analyze_compose_intent(
2811         &snapshot,
2812         &observations,
2813         config,
2814         max_commits,
2815         args.debug_output.as_deref(),
2816      )
2817      .await?;
2818
2819      if let Some(debug_dir) = args.debug_output.as_deref() {
2820         save_debug_artifact(
2821            Some(debug_dir),
2822            &format!("compose_round_{round}_intent_plan.json"),
2823            &intent_plan,
2824         )?;
2825      }
2826
2827      println!("{}", style::info("Binding hunks to groups..."));
2828      let plan =
2829         bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
2830      save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
2831      plan
2832   };
2833
2834   if let Some(debug_dir) = args.debug_output.as_deref() {
2835      save_debug_artifact(
2836         Some(debug_dir),
2837         &format!("compose_round_{round}_executable_plan.json"),
2838         &executable_plan,
2839      )?;
2840   }
2841
2842   print_executable_plan(&snapshot, &executable_plan);
2843
2844   if args.compose_preview {
2845      println!(
2846         "\n{}",
2847         style::success(&format!(
2848            "{} Preview complete (use --compose without --compose-preview to execute)",
2849            style::icons::SUCCESS
2850         ))
2851      );
2852      return Ok(());
2853   }
2854
2855   println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
2856   let hashes = execute_compose(&snapshot, &executable_plan, config, args).await?;
2857   println!(
2858      "{}",
2859      style::success(&format!(
2860         "{} Round {round}: Created {} commit(s)",
2861         style::icons::SUCCESS,
2862         hashes.len()
2863      ))
2864   );
2865   Ok(())
2866}
2867
2868#[cfg(test)]
2869mod tests {
2870   use std::fmt::Write;
2871
2872   use super::*;
2873   use crate::{config::CommitConfig, patch::build_compose_snapshot, types::CommitType};
2874
2875   fn shared_file_diff() -> (&'static str, &'static str) {
2876      (
2877         r#"diff --git a/src/lib.rs b/src/lib.rs
2878index 1111111..2222222 100644
2879--- a/src/lib.rs
2880+++ b/src/lib.rs
2881@@ -1,3 +1,3 @@
2882-fn alpha() {
2883+fn alpha_changed() {
2884     println!("alpha");
2885 }
2886@@ -12,3 +12,3 @@
2887-fn beta() {
2888+fn beta_changed() {
2889     println!("beta");
2890 }
2891diff --git a/tests/lib.rs b/tests/lib.rs
2892index 3333333..4444444 100644
2893--- a/tests/lib.rs
2894+++ b/tests/lib.rs
2895@@ -1,3 +1,4 @@
2896 fn test_it() {
2897+    assert!(true);
2898 }
2899"#,
2900         " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
2901      )
2902   }
2903
2904   fn build_test_snapshot() -> ComposeSnapshot {
2905      let (diff, stat) = shared_file_diff();
2906      build_compose_snapshot(diff, stat).unwrap()
2907   }
2908
2909   fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
2910      let mut diff = String::new();
2911
2912      for file_idx in 0..file_count {
2913         let path = format!("src/module_{file_idx:03}.rs");
2914         writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
2915         diff.push_str("index 1111111..2222222 100644\n");
2916         writeln!(diff, "--- a/{path}").unwrap();
2917         writeln!(diff, "+++ b/{path}").unwrap();
2918
2919         for hunk_idx in 0..hunks_per_file {
2920            let line_no = (hunk_idx * 4) + 1;
2921            writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
2922            writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
2923            writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
2924         }
2925      }
2926
2927      build_compose_snapshot(&diff, "").unwrap()
2928   }
2929
2930   fn build_multi_area_snapshot() -> ComposeSnapshot {
2931      let mut diff = String::new();
2932      let areas = [
2933         ("apps/frontend/src/server", 72),
2934         ("packages/model/src/models", 54),
2935         ("apps/daemon/src/worker", 43),
2936         (".github/workflows", 16),
2937      ];
2938
2939      for (prefix, count) in areas {
2940         for file_idx in 0..count {
2941            let path = format!("{prefix}/file_{file_idx:03}.rs");
2942            writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
2943            diff.push_str("index 1111111..2222222 100644\n");
2944            writeln!(diff, "--- a/{path}").unwrap();
2945            writeln!(diff, "+++ b/{path}").unwrap();
2946            diff.push_str("@@ -1,1 +1,1 @@\n");
2947            writeln!(diff, "-old_{file_idx}").unwrap();
2948            writeln!(diff, "+new_{file_idx}").unwrap();
2949         }
2950      }
2951
2952      build_compose_snapshot(&diff, "").unwrap()
2953   }
2954
2955   fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
2956      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
2957      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
2958      let groups = vec![
2959         ComposeIntentGroup {
2960            group_id:     "G1".to_string(),
2961            commit_type:  CommitType::new("refactor").unwrap(),
2962            scope:        None,
2963            file_ids:     vec![source_file.file_id.clone(), test_file.file_id.clone()],
2964            rationale:    "implementation group".to_string(),
2965            dependencies: vec![],
2966         },
2967         ComposeIntentGroup {
2968            group_id:     "G2".to_string(),
2969            commit_type:  CommitType::new("refactor").unwrap(),
2970            scope:        None,
2971            file_ids:     vec![source_file.file_id.clone()],
2972            rationale:    "shared file follow-up".to_string(),
2973            dependencies: vec!["G1".to_string()],
2974         },
2975      ];
2976      let dependency_order =
2977         compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
2978            .unwrap();
2979      ComposeIntentPlan { groups, dependency_order }
2980   }
2981
2982   #[test]
2983   fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
2984      let snapshot = build_test_snapshot();
2985      let intent_plan = build_shared_intent_plan(&snapshot);
2986      let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
2987
2988      assert_eq!(ambiguous.len(), 1);
2989      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
2990      let assigned_to_g1 = assigned.get("G1").unwrap();
2991      assert!(
2992         test_file
2993            .hunk_ids
2994            .iter()
2995            .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
2996         "uniquely owned file should be auto-assigned"
2997      );
2998   }
2999
3000   #[test]
3001   fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3002      let snapshot = build_test_snapshot();
3003      let intent_plan = build_shared_intent_plan(&snapshot);
3004      let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3005      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3006      let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3007      let valid_group_ids: HashSet<&str> = intent_plan
3008         .groups
3009         .iter()
3010         .map(|group| group.group_id.as_str())
3011         .collect();
3012
3013      let evaluation = evaluate_binding(
3014         &[
3015            ComposeBindingAssignment {
3016               group_id: "G1".to_string(),
3017               hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3018            },
3019            ComposeBindingAssignment {
3020               group_id: "G2".to_string(),
3021               hunk_ids: vec![source_file.hunk_ids[1].clone()],
3022            },
3023         ],
3024         &hunk_context,
3025         &valid_group_ids,
3026         &snapshot,
3027      );
3028
3029      for (group_id, hunk_ids) in evaluation.assigned {
3030         let entry = assigned.entry(group_id).or_default();
3031         for hunk_id in hunk_ids {
3032            entry.insert(hunk_id);
3033         }
3034      }
3035
3036      let group_rank: HashMap<&str, usize> = intent_plan
3037         .dependency_order
3038         .iter()
3039         .enumerate()
3040         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3041         .collect();
3042      assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3043
3044      let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3045      assert_eq!(executable_plan.groups.len(), 1);
3046      assert_eq!(executable_plan.groups[0].group_id, "G1");
3047      assert!(
3048         source_file
3049            .hunk_ids
3050            .iter()
3051            .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3052         "fallback should keep every hunk from the shared file in the surviving group"
3053      );
3054   }
3055
3056   #[test]
3057   fn test_validate_executable_plan_rejects_overlap() {
3058      let snapshot = build_test_snapshot();
3059      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3060      let executable_plan = ComposeExecutablePlan {
3061         groups:           vec![
3062            ComposeExecutableGroup {
3063               group_id:     "G1".to_string(),
3064               commit_type:  CommitType::new("refactor").unwrap(),
3065               scope:        None,
3066               file_ids:     vec![source_file.file_id.clone()],
3067               rationale:    "group one".to_string(),
3068               dependencies: vec![],
3069               hunk_ids:     vec![source_file.hunk_ids[0].clone()],
3070            },
3071            ComposeExecutableGroup {
3072               group_id:     "G2".to_string(),
3073               commit_type:  CommitType::new("refactor").unwrap(),
3074               scope:        None,
3075               file_ids:     vec![source_file.file_id.clone()],
3076               rationale:    "group two".to_string(),
3077               dependencies: vec![],
3078               hunk_ids:     vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3079            },
3080         ],
3081         dependency_order: vec![0, 1],
3082      };
3083
3084      let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3085      assert!(err.to_string().contains("assigned to both"));
3086   }
3087
3088   #[test]
3089   fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3090      let snapshot = build_test_snapshot();
3091      let planning_index = build_planning_index(&snapshot);
3092      let groups = vec![ComposeIntentGroup {
3093         group_id:     "G1".to_string(),
3094         commit_type:  CommitType::new("refactor").unwrap(),
3095         scope:        None,
3096         file_ids:     vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3097         rationale:    "normalize file references".to_string(),
3098         dependencies: vec![],
3099      }];
3100
3101      let (normalized_groups, repair_notes) =
3102         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3103
3104      assert_eq!(normalized_groups.len(), 1);
3105      assert_eq!(
3106         normalized_groups[0].file_ids,
3107         snapshot
3108            .files
3109            .iter()
3110            .map(|file| file.file_id.clone())
3111            .collect::<Vec<_>>()
3112      );
3113      assert_eq!(repair_notes.len(), 2);
3114   }
3115
3116   #[test]
3117   fn test_normalize_intent_plan_repairs_missing_files() {
3118      let snapshot = build_test_snapshot();
3119      let planning_index = build_planning_index(&snapshot);
3120      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3121      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3122      let groups = vec![ComposeIntentGroup {
3123         group_id:     "G1".to_string(),
3124         commit_type:  CommitType::new("refactor").unwrap(),
3125         scope:        None,
3126         file_ids:     vec![source_file.file_id.clone()],
3127         rationale:    "partial coverage".to_string(),
3128         dependencies: vec![],
3129      }];
3130
3131      let (normalized_groups, repair_notes) =
3132         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3133
3134      assert_eq!(normalized_groups.len(), 1);
3135      assert!(
3136         normalized_groups[0].file_ids.contains(&source_file.file_id),
3137         "existing file assignment should be preserved"
3138      );
3139      assert!(
3140         normalized_groups[0].file_ids.contains(&test_file.file_id),
3141         "missing files should be assigned to an existing group"
3142      );
3143      assert_eq!(repair_notes.len(), 1);
3144      assert!(repair_notes[0].contains(&test_file.file_id));
3145   }
3146
3147   #[test]
3148   fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3149      let snapshot = build_multi_area_snapshot();
3150      let planning_index = build_planning_index(&snapshot);
3151      let frontend_target = planning_index
3152         .targets
3153         .iter()
3154         .find(|target| target.label.starts_with("apps/frontend"))
3155         .unwrap();
3156      let model_target = planning_index
3157         .targets
3158         .iter()
3159         .find(|target| target.label.starts_with("packages/model"))
3160         .unwrap();
3161      let groups = vec![
3162         ComposeIntentGroup {
3163            group_id:     "G1".to_string(),
3164            commit_type:  CommitType::new("refactor").unwrap(),
3165            scope:        Scope::new("apps/frontend").ok(),
3166            file_ids:     vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3167            rationale:    "frontend platform updates".to_string(),
3168            dependencies: vec!["group 2".to_string(), "G1".to_string()],
3169         },
3170         ComposeIntentGroup {
3171            group_id:     "G2".to_string(),
3172            commit_type:  CommitType::new("refactor").unwrap(),
3173            scope:        Scope::new("packages/model").ok(),
3174            file_ids:     vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3175            rationale:    "model storage updates".to_string(),
3176            dependencies: vec!["F5".to_string()],
3177         },
3178      ];
3179
3180      let (normalized_groups, repair_notes) =
3181         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3182
3183      assert_eq!(normalized_groups.len(), 2);
3184      assert!(
3185         normalized_groups[0]
3186            .file_ids
3187            .iter()
3188            .all(|file_id| file_id.starts_with('F'))
3189      );
3190      assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3191      assert!(normalized_groups[1].dependencies.is_empty());
3192      assert!(
3193         repair_notes
3194            .iter()
3195            .any(|note| note.contains("Dropped unknown planning target"))
3196      );
3197      assert!(
3198         repair_notes
3199            .iter()
3200            .any(|note| note.contains("Dropped self-dependency"))
3201      );
3202      assert!(
3203         repair_notes
3204            .iter()
3205            .any(|note| note.contains("Mapped compose planner dependency"))
3206      );
3207      assert!(
3208         repair_notes
3209            .iter()
3210            .any(|note| note.contains("Dropped unknown dependency"))
3211      );
3212   }
3213
3214   #[test]
3215   fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3216      let snapshot = build_test_snapshot();
3217      let summary = render_snapshot_summary(&snapshot, &[]);
3218      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3219
3220      assert!(!summary.contains("# snapshot compacted"));
3221      for hunk_id in &source_file.hunk_ids {
3222         assert!(summary.contains(hunk_id));
3223      }
3224   }
3225
3226   #[test]
3227   fn test_render_snapshot_summary_compacts_large_snapshot() {
3228      let snapshot = build_large_snapshot(160, 4);
3229      let summary = render_snapshot_summary(&snapshot, &[]);
3230
3231      assert!(summary.contains("# snapshot compacted"));
3232      assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3233      assert!(summary.contains("F001-H001"));
3234      assert!(summary.contains("F001-H004"));
3235      assert!(!summary.contains("F001-H002"));
3236      assert!(!summary.contains("F001-H003"));
3237      assert!(summary.contains("... 2 more hunks omitted from F001"));
3238   }
3239
3240   #[test]
3241   fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3242      let snapshot = build_multi_area_snapshot();
3243      let planning_index = build_planning_index(&snapshot);
3244
3245      assert_eq!(planning_index.mode, PlanningMode::Area);
3246      assert!(planning_index.targets.len() < snapshot.files.len());
3247      assert!(
3248         planning_index
3249            .targets
3250            .iter()
3251            .any(|target| target.label.starts_with("apps/frontend"))
3252      );
3253      assert!(
3254         render_planning_stat(&planning_index).contains("planning over"),
3255         "planning stat should explain the area mode"
3256      );
3257   }
3258
3259   #[test]
3260   fn test_normalize_intent_plan_expands_area_targets() {
3261      let snapshot = build_multi_area_snapshot();
3262      let planning_index = build_planning_index(&snapshot);
3263      let midpoint = planning_index.targets.len() / 2;
3264      let first_group_targets: Vec<String> = planning_index
3265         .targets
3266         .iter()
3267         .take(midpoint)
3268         .map(|target| target.label.clone())
3269         .collect();
3270      let second_group_targets: Vec<String> = planning_index
3271         .targets
3272         .iter()
3273         .skip(midpoint)
3274         .map(|target| target.label.clone())
3275         .collect();
3276      let groups = vec![
3277         ComposeIntentGroup {
3278            group_id:     "G1".to_string(),
3279            commit_type:  CommitType::new("refactor").unwrap(),
3280            scope:        None,
3281            file_ids:     first_group_targets,
3282            rationale:    "frontend and model".to_string(),
3283            dependencies: vec![],
3284         },
3285         ComposeIntentGroup {
3286            group_id:     "G2".to_string(),
3287            commit_type:  CommitType::new("refactor").unwrap(),
3288            scope:        None,
3289            file_ids:     second_group_targets,
3290            rationale:    "daemon and ci".to_string(),
3291            dependencies: vec![],
3292         },
3293      ];
3294
3295      let (normalized_groups, repair_notes) =
3296         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3297
3298      assert_eq!(normalized_groups.len(), 2);
3299      assert!(
3300         normalized_groups
3301            .iter()
3302            .flat_map(|group| group.file_ids.iter())
3303            .all(|file_id| file_id.starts_with('F')),
3304         "area targets should expand back to concrete file IDs"
3305      );
3306      assert!(!repair_notes.is_empty());
3307      assert_eq!(
3308         normalized_groups
3309            .iter()
3310            .flat_map(|group| group.file_ids.iter())
3311            .collect::<HashSet<_>>()
3312            .len(),
3313         snapshot.files.len()
3314      );
3315   }
3316
3317   #[test]
3318   fn test_large_patch_fallback_splits_monolithic_area_plan() {
3319      let snapshot = build_multi_area_snapshot();
3320      let planning_index = build_planning_index(&snapshot);
3321      let monolithic_group = ComposeIntentGroup {
3322         group_id:     "G1".to_string(),
3323         commit_type:  CommitType::new("refactor").unwrap(),
3324         scope:        None,
3325         file_ids:     snapshot
3326            .files
3327            .iter()
3328            .map(|file| file.file_id.clone())
3329            .collect(),
3330         rationale:    "repo-wide refactor".to_string(),
3331         dependencies: vec![],
3332      };
3333
3334      assert!(should_force_large_patch_fallback(
3335         &snapshot,
3336         &planning_index,
3337         &[monolithic_group],
3338         6
3339      ));
3340
3341      let fallback_groups =
3342         build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3343      assert!(fallback_groups.len() >= 3);
3344      assert_eq!(
3345         fallback_groups
3346            .iter()
3347            .flat_map(|group| group.file_ids.iter())
3348            .collect::<HashSet<_>>()
3349            .len(),
3350         snapshot.files.len()
3351      );
3352      assert!(
3353         fallback_groups
3354            .iter()
3355            .any(|group| group.rationale.contains("frontend")),
3356         "fallback should preserve workstream identity"
3357      );
3358   }
3359
3360   #[test]
3361   fn test_should_collect_compose_observations_skips_area_mode() {
3362      let snapshot = build_large_snapshot(160, 4);
3363      let config = CommitConfig::default();
3364      let counter = create_token_counter(&config);
3365
3366      assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3367      assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3368   }
3369
3370   #[test]
3371   fn test_chunk_ambiguous_files_splits_large_binding_request() {
3372      let ambiguous_files = vec![
3373         AmbiguousFileBinding {
3374            file_id:             "F001".to_string(),
3375            path:                "src/alpha.rs".to_string(),
3376            candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3377            hunk_ids:            (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3378         },
3379         AmbiguousFileBinding {
3380            file_id:             "F002".to_string(),
3381            path:                "src/beta.rs".to_string(),
3382            candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3383            hunk_ids:            (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3384         },
3385         AmbiguousFileBinding {
3386            file_id:             "F003".to_string(),
3387            path:                "src/gamma.rs".to_string(),
3388            candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3389            hunk_ids:            (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3390         },
3391      ];
3392
3393      let batches = chunk_ambiguous_files(&ambiguous_files);
3394      let total_hunks: usize = batches
3395         .iter()
3396         .flatten()
3397         .map(|file| file.hunk_ids.len())
3398         .sum();
3399
3400      assert_eq!(batches.len(), 2);
3401      assert_eq!(batches[0].len(), 1);
3402      assert_eq!(batches[1].len(), 2);
3403      assert_eq!(total_hunks, 140);
3404      assert!(batches.iter().all(|batch| {
3405         batch.len() <= MAX_BIND_FILES_PER_REQUEST
3406            && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3407               <= MAX_BIND_HUNKS_PER_REQUEST
3408      }));
3409   }
3410}