Skip to main content

llm_git/
compose.rs

1use std::{
2   borrow::Cow,
3   collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4   fmt::Write,
5   fs,
6   path::{Path, PathBuf},
7};
8
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13   api::{
14      AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
15      generate_summary_from_analysis, run_oneshot, strict_json_schema,
16   },
17   compose_types::{
18      ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
19      ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
20   },
21   config::CommitConfig,
22   diff::smart_truncate_diff,
23   error::{CommitGenError, Result},
24   git::{
25      TempGitIndex, append_signoff_trailer, commit_tree, current_head_ref,
26      get_compose_diff_with_config, get_compose_stat, get_git_dir, get_head_hash,
27      read_tree_into_index, reset_mixed_to, reset_paths_to, update_ref_checked, write_index_tree,
28      write_real_index_tree,
29   },
30   map_reduce::{FileObservation, observe_diff_files, run_map_reduce, should_use_map_reduce},
31   normalization::{format_commit_message, post_process_commit_message},
32   patch::{
33      StageResult, build_compose_snapshot, create_executable_group_patch,
34      force_stage_file_from_base_in_index, pin_snapshot_worktree_state,
35      stage_executable_group_in_index,
36   },
37   style, templates,
38   tokens::{TokenCounter, create_token_counter},
39   types::{Args, CommitSummary, CommitType, ConventionalAnalysis, ConventionalCommit, Scope},
40   validation::validate_commit_message,
41};
42
43const MAX_OBSERVATIONS_PER_FILE: usize = 3;
44const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
45const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
46const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
47const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
48const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
49const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
50const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
51const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
52const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
53const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
54const MAX_BIND_FILES_PER_REQUEST: usize = 18;
55const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
56/// Maximum number of commit messages to generate concurrently during
57/// `execute_compose`. Matches the per-file fan-out used in `map_reduce`.
58const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
59
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub struct ComposeBaseState {
62   head_hash:  String,
63   head_ref:   String,
64   index_tree: String,
65}
66
67#[tracing::instrument(target = "lgit", name = "compose.capture_base_state", skip_all, fields(dir))]
68pub fn capture_compose_base_state(dir: &str) -> Result<ComposeBaseState> {
69   Ok(ComposeBaseState {
70      head_hash:  get_head_hash(dir)?,
71      head_ref:   current_head_ref(dir)?,
72      index_tree: write_real_index_tree(dir)?,
73   })
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77enum ComposeAnalysisStrategy {
78   Direct,
79   SmartTruncate,
80   MapReduce,
81}
82
83fn compose_analysis_strategy(
84   diff: &str,
85   config: &CommitConfig,
86   counter: &TokenCounter,
87) -> ComposeAnalysisStrategy {
88   if should_use_map_reduce(diff, config, counter) {
89      return ComposeAnalysisStrategy::MapReduce;
90   }
91
92   let diff_tokens = counter.count_sync(diff);
93   if diff.len() > config.max_diff_length || diff_tokens > config.max_diff_tokens {
94      return ComposeAnalysisStrategy::SmartTruncate;
95   }
96
97   ComposeAnalysisStrategy::Direct
98}
99
100fn compose_truncation_length(config: &CommitConfig) -> usize {
101   config
102      .max_diff_length
103      .min(config.max_diff_tokens.saturating_mul(4))
104      .max(1)
105}
106
107#[derive(Debug, Deserialize, Serialize)]
108struct ComposeIntentResponse {
109   groups: Vec<ComposeIntentGroup>,
110}
111
112#[derive(Debug, Deserialize, Serialize)]
113struct ComposeBindingResponse {
114   assignments: Vec<ComposeBindingAssignment>,
115}
116
117#[derive(Debug, Serialize, Deserialize)]
118struct ComposeCachedPlan {
119   schema_version: String,
120   cache_key:      String,
121   plan:           ComposeExecutablePlan,
122}
123
124#[derive(Debug, Clone)]
125struct AmbiguousFileBinding {
126   file_id:             String,
127   path:                String,
128   candidate_group_ids: Vec<String>,
129   hunk_ids:            Vec<String>,
130}
131
132#[derive(Debug, Clone)]
133struct AmbiguousHunkContext {
134   candidate_group_ids: Vec<String>,
135}
136
137type HunkAssignments = HashMap<String, BTreeSet<String>>;
138
139#[derive(Debug)]
140struct BindingEvaluation {
141   assigned:   HashMap<String, Vec<String>>,
142   unresolved: Vec<String>,
143}
144
145#[derive(Debug, Clone, Copy)]
146struct SnapshotSummaryBudget {
147   max_observations_per_file: usize,
148   max_hunks_per_file:        Option<usize>,
149}
150
151#[derive(Debug, Clone, Copy, PartialEq, Eq)]
152enum PlanningMode {
153   File,
154   Area,
155}
156
157#[derive(Debug, Clone)]
158struct PlanningTarget {
159   target_id:  String,
160   label:      String,
161   file_ids:   Vec<String>,
162   hunk_count: usize,
163   additions:  usize,
164   deletions:  usize,
165}
166
167#[derive(Debug, Clone)]
168struct PlanningIndex {
169   mode:    PlanningMode,
170   targets: Vec<PlanningTarget>,
171   aliases: HashMap<String, String>,
172}
173
174#[derive(Debug, Clone)]
175struct PlanningBucket {
176   label:    String,
177   file_ids: Vec<String>,
178}
179
180impl PlanningIndex {
181   fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
182      let mut expanded = Vec::new();
183      let mut seen_file_ids = HashSet::new();
184
185      for target_id in target_ids {
186         if let Some(target) = self
187            .targets
188            .iter()
189            .find(|candidate| candidate.target_id == *target_id)
190         {
191            for file_id in &target.file_ids {
192               if seen_file_ids.insert(file_id.clone()) {
193                  expanded.push(file_id.clone());
194               }
195            }
196         }
197      }
198
199      expanded
200   }
201}
202
203impl SnapshotSummaryBudget {
204   const fn is_compacted(self) -> bool {
205      self.max_hunks_per_file.is_some()
206   }
207}
208
209fn is_dependency_manifest(path: &str) -> bool {
210   const DEP_MANIFESTS: &[&str] = &[
211      "Cargo.toml",
212      "Cargo.lock",
213      "package.json",
214      "package-lock.json",
215      "pnpm-lock.yaml",
216      "yarn.lock",
217      "bun.lock",
218      "bun.lockb",
219      "go.mod",
220      "go.sum",
221      "requirements.txt",
222      "Pipfile",
223      "Pipfile.lock",
224      "pyproject.toml",
225      "Gemfile",
226      "Gemfile.lock",
227      "composer.json",
228      "composer.lock",
229      "build.gradle",
230      "build.gradle.kts",
231      "gradle.properties",
232      "pom.xml",
233   ];
234
235   let path = Path::new(path);
236   let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
237      return false;
238   };
239
240   if DEP_MANIFESTS.contains(&file_name) {
241      return true;
242   }
243
244   Path::new(file_name)
245      .extension()
246      .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
247}
248
249fn save_debug_artifact<T: Serialize>(
250   debug_dir: Option<&Path>,
251   filename: &str,
252   value: &T,
253) -> Result<()> {
254   let Some(debug_dir) = debug_dir else {
255      return Ok(());
256   };
257
258   fs::create_dir_all(debug_dir)?;
259   let path = debug_dir.join(filename);
260   let json = serde_json::to_string_pretty(value)?;
261   fs::write(path, json)?;
262   Ok(())
263}
264
265fn fnv1a_64(input: &str) -> String {
266   let mut hash = 0xcbf29ce484222325_u64;
267   for byte in input.as_bytes() {
268      hash ^= u64::from(*byte);
269      hash = hash.wrapping_mul(0x100000001b3);
270   }
271   format!("{hash:016x}")
272}
273
274fn compose_plan_cache_key(
275   snapshot: &ComposeSnapshot,
276   max_commits: usize,
277   analysis_model: &str,
278) -> String {
279   fnv1a_64(&format!(
280      "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
281      snapshot.diff, snapshot.stat
282   ))
283}
284
285fn compose_plan_cache_path(
286   dir: &str,
287   snapshot: &ComposeSnapshot,
288   max_commits: usize,
289   analysis_model: &str,
290) -> Result<PathBuf> {
291   let git_dir = get_git_dir(dir)?;
292   Ok(git_dir.join("llm-git").join(format!(
293      "compose-plan-{}.json",
294      compose_plan_cache_key(snapshot, max_commits, analysis_model)
295   )))
296}
297
298fn load_cached_plan(
299   dir: &str,
300   snapshot: &ComposeSnapshot,
301   max_commits: usize,
302   analysis_model: &str,
303) -> Result<Option<ComposeExecutablePlan>> {
304   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
305   if !cache_path.exists() {
306      return Ok(None);
307   }
308
309   let content = match fs::read_to_string(&cache_path) {
310      Ok(content) => content,
311      Err(err) => {
312         eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
313         return Ok(None);
314      },
315   };
316   let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
317      Ok(cached) => cached,
318      Err(err) => {
319         eprintln!(
320            "{}",
321            style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
322         );
323         let _ = fs::remove_file(&cache_path);
324         return Ok(None);
325      },
326   };
327   let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
328
329   if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
330      return Ok(None);
331   }
332   if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
333      eprintln!(
334         "{}",
335         style::warning(&format!(
336            "Discarding cached compose plan (no longer valid for current snapshot): {err}"
337         ))
338      );
339      let _ = fs::remove_file(&cache_path);
340      return Ok(None);
341   }
342   Ok(Some(cached.plan))
343}
344
345fn save_cached_plan(
346   dir: &str,
347   snapshot: &ComposeSnapshot,
348   max_commits: usize,
349   analysis_model: &str,
350   plan: &ComposeExecutablePlan,
351) -> Result<()> {
352   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
353   if let Some(parent) = cache_path.parent() {
354      fs::create_dir_all(parent)?;
355   }
356
357   let cached = ComposeCachedPlan {
358      schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
359      cache_key:      compose_plan_cache_key(snapshot, max_commits, analysis_model),
360      plan:           plan.clone(),
361   };
362   fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
363   Ok(())
364}
365
366fn format_line_range(start: usize, count: usize) -> String {
367   match count {
368      0 => "0".to_string(),
369      1 => start.to_string(),
370      _ => format!("{start}-{}", start + count - 1),
371   }
372}
373
374const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
375   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
376      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
377   {
378      SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
379   } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
380      || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
381   {
382      SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
383   } else {
384      SnapshotSummaryBudget {
385         max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
386         max_hunks_per_file:        None,
387      }
388   }
389}
390
391fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
392   if count <= max_samples {
393      return (0..count).collect();
394   }
395
396   if max_samples <= 1 {
397      return vec![0];
398   }
399
400   let last = count - 1;
401   let mut positions = Vec::with_capacity(max_samples);
402   for slot in 0..max_samples {
403      let position = slot * last / (max_samples - 1);
404      if positions.last().copied() != Some(position) {
405         positions.push(position);
406      }
407   }
408   positions
409}
410
411fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
412   match budget.max_hunks_per_file {
413      None => file.hunk_ids.iter().map(String::as_str).collect(),
414      Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
415         .into_iter()
416         .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
417         .collect(),
418   }
419}
420
421fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
422   let budget = snapshot_summary_budget(snapshot);
423   let observations_by_file: HashMap<&str, Vec<&str>> = observations
424      .iter()
425      .map(|observation| {
426         (
427            observation.file.as_str(),
428            observation
429               .observations
430               .iter()
431               .map(String::as_str)
432               .take(budget.max_observations_per_file)
433               .collect(),
434         )
435      })
436      .collect();
437
438   let mut out = String::new();
439   if budget.is_compacted() {
440      let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
441      writeln!(
442         out,
443         "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
444          representative hunks and {} observation(s) per file",
445         budget.max_observations_per_file
446      )
447      .unwrap();
448   }
449
450   for file in &snapshot.files {
451      writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
452      if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
453         for observation in file_observations {
454            writeln!(out, "  observation: {observation}").unwrap();
455         }
456      }
457
458      let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
459      for hunk_id in &rendered_hunk_ids {
460         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
461            if hunk.synthetic {
462               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
463            } else {
464               writeln!(
465                  out,
466                  "  - {} old:{} new:{} :: {}",
467                  hunk.hunk_id,
468                  format_line_range(hunk.old_start, hunk.old_count),
469                  format_line_range(hunk.new_start, hunk.new_count),
470                  hunk.snippet
471               )
472               .unwrap();
473            }
474         }
475      }
476
477      let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
478      if omitted_hunks > 0 {
479         writeln!(out, "  ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
480      }
481   }
482
483   out
484}
485
486const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
487   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
488      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
489   {
490      PlanningMode::Area
491   } else {
492      PlanningMode::File
493   }
494}
495
496fn path_depth(path: &str) -> usize {
497   path.split('/').count()
498}
499
500fn prefix_at_depth(path: &str, depth: usize) -> String {
501   if depth == 0 {
502      return String::new();
503   }
504
505   let segments: Vec<&str> = path.split('/').collect();
506   let effective_depth = depth.min(segments.len());
507   segments[..effective_depth].join("/")
508}
509
510fn common_path_prefix(paths: &[String]) -> String {
511   let Some(first_path) = paths.first() else {
512      return String::new();
513   };
514
515   let mut prefix: Vec<&str> = first_path.split('/').collect();
516   for path in paths.iter().skip(1) {
517      let segments: Vec<&str> = path.split('/').collect();
518      let shared = prefix
519         .iter()
520         .zip(segments.iter())
521         .take_while(|(left, right)| left == right)
522         .count();
523      prefix.truncate(shared);
524      if prefix.is_empty() {
525         break;
526      }
527   }
528
529   prefix.join("/")
530}
531
532fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
533   file_ids
534      .iter()
535      .filter_map(|file_id| snapshot.file_by_id(file_id))
536      .map(|file| file.hunk_ids.len())
537      .sum()
538}
539
540fn group_file_ids_by_prefix(
541   snapshot: &ComposeSnapshot,
542   file_ids: &[String],
543   depth: usize,
544) -> BTreeMap<String, Vec<String>> {
545   let mut groups = BTreeMap::new();
546
547   for file_id in file_ids {
548      if let Some(file) = snapshot.file_by_id(file_id) {
549         groups
550            .entry(prefix_at_depth(&file.path, depth))
551            .or_insert_with(Vec::new)
552            .push(file_id.clone());
553      }
554   }
555
556   groups
557}
558
559fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
560   let paths: Vec<String> = file_ids
561      .iter()
562      .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
563      .collect();
564
565   let common_prefix = common_path_prefix(&paths);
566   if common_prefix.is_empty() {
567      paths.first().cloned().unwrap_or_else(|| "misc".to_string())
568   } else {
569      common_prefix
570   }
571}
572
573fn collect_planning_buckets(
574   snapshot: &ComposeSnapshot,
575   file_ids: &[String],
576   depth: usize,
577) -> Vec<PlanningBucket> {
578   let file_count = file_ids.len();
579   let hunk_count = bucket_hunk_count(snapshot, file_ids);
580   let max_path_depth = file_ids
581      .iter()
582      .filter_map(|file_id| snapshot.file_by_id(file_id))
583      .map(|file| path_depth(&file.path))
584      .max()
585      .unwrap_or(depth);
586
587   let should_stop =
588      file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
589   if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
590      return vec![PlanningBucket {
591         label:    planning_bucket_label(snapshot, file_ids),
592         file_ids: file_ids.to_vec(),
593      }];
594   }
595
596   let next_depth = depth + 1;
597   let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
598   if groups.len() <= 1 {
599      return collect_planning_buckets(snapshot, file_ids, next_depth);
600   }
601
602   groups
603      .into_values()
604      .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
605      .collect()
606}
607
608fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
609   let all_file_ids: Vec<String> = snapshot
610      .files
611      .iter()
612      .map(|file| file.file_id.clone())
613      .collect();
614   let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
615
616   buckets
617      .into_iter()
618      .enumerate()
619      .map(|(idx, bucket)| {
620         let mut additions = 0_usize;
621         let mut deletions = 0_usize;
622         let mut hunk_count = 0_usize;
623
624         for file_id in &bucket.file_ids {
625            if let Some(file) = snapshot.file_by_id(file_id) {
626               additions = additions.saturating_add(file.additions);
627               deletions = deletions.saturating_add(file.deletions);
628               hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
629            }
630         }
631
632         PlanningTarget {
633            target_id: format!("A{:03}", idx + 1),
634            label: bucket.label,
635            file_ids: bucket.file_ids,
636            hunk_count,
637            additions,
638            deletions,
639         }
640      })
641      .collect()
642}
643
644fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
645   snapshot
646      .files
647      .iter()
648      .map(|file| PlanningTarget {
649         target_id:  file.file_id.clone(),
650         label:      file.path.clone(),
651         file_ids:   vec![file.file_id.clone()],
652         hunk_count: file.hunk_ids.len(),
653         additions:  file.additions,
654         deletions:  file.deletions,
655      })
656      .collect()
657}
658
659fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
660   let mode = planning_mode_for_snapshot(snapshot);
661   let targets = match mode {
662      PlanningMode::File => build_file_planning_targets(snapshot),
663      PlanningMode::Area => build_area_planning_targets(snapshot),
664   };
665
666   let aliases = targets
667      .iter()
668      .flat_map(|target| {
669         let normalized_label = normalize_file_reference(&target.label);
670         [
671            (target.target_id.clone(), target.target_id.clone()),
672            (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
673            (normalized_label, target.target_id.clone()),
674         ]
675      })
676      .collect();
677
678   PlanningIndex { mode, targets, aliases }
679}
680
681fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
682   sample_positions(target.file_ids.len(), 4)
683      .into_iter()
684      .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
685      .collect()
686}
687
688fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
689   let hunk_ids: Vec<&String> = target
690      .file_ids
691      .iter()
692      .filter_map(|file_id| snapshot.file_by_id(file_id))
693      .flat_map(|file| file.hunk_ids.iter())
694      .collect();
695
696   sample_positions(hunk_ids.len(), 4)
697      .into_iter()
698      .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
699      .collect()
700}
701
702fn render_planning_stat(index: &PlanningIndex) -> String {
703   let mut out = String::new();
704
705   match index.mode {
706      PlanningMode::File => {
707         writeln!(out, "# planning over individual file IDs").unwrap();
708      },
709      PlanningMode::Area => {
710         writeln!(
711            out,
712            "# planning over {} area IDs spanning {} files",
713            index.targets.len(),
714            index
715               .targets
716               .iter()
717               .flat_map(|target| target.file_ids.iter())
718               .collect::<HashSet<_>>()
719               .len()
720         )
721         .unwrap();
722      },
723   }
724
725   for target in &index.targets {
726      writeln!(
727         out,
728         "{} {} | {} files | {} hunks | +{}/-{}",
729         target.target_id,
730         target.label,
731         target.file_ids.len(),
732         target.hunk_count,
733         target.additions,
734         target.deletions
735      )
736      .unwrap();
737   }
738
739   out
740}
741
742fn render_planning_snapshot_summary(
743   snapshot: &ComposeSnapshot,
744   observations: &[FileObservation],
745   index: &PlanningIndex,
746) -> String {
747   if index.mode == PlanningMode::File {
748      return render_snapshot_summary(snapshot, observations);
749   }
750
751   let observations_by_file: HashMap<&str, Vec<&str>> = observations
752      .iter()
753      .map(|observation| {
754         (
755            observation.file.as_str(),
756            observation
757               .observations
758               .iter()
759               .map(String::as_str)
760               .take(1)
761               .collect(),
762         )
763      })
764      .collect();
765
766   let mut out = String::new();
767   writeln!(
768      out,
769      "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
770   )
771   .unwrap();
772
773   for target in &index.targets {
774      writeln!(
775         out,
776         "- {} {} ({} files, {} hunks, +{}/-{})",
777         target.target_id,
778         target.label,
779         target.file_ids.len(),
780         target.hunk_count,
781         target.additions,
782         target.deletions
783      )
784      .unwrap();
785
786      let sample_file_ids = sample_file_ids_for_target(target);
787      if !sample_file_ids.is_empty() {
788         let sample_files: Vec<String> = sample_file_ids
789            .iter()
790            .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
791            .collect();
792         writeln!(out, "  files: {}", sample_files.join(", ")).unwrap();
793         let omitted = target.file_ids.len().saturating_sub(sample_files.len());
794         if omitted > 0 {
795            writeln!(out, "  ... {omitted} more files omitted from {}", target.target_id).unwrap();
796         }
797      }
798
799      let mut rendered_observations = 0_usize;
800      for file_id in &target.file_ids {
801         let Some(file) = snapshot.file_by_id(file_id) else {
802            continue;
803         };
804         let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
805            continue;
806         };
807
808         for observation in file_observations {
809            writeln!(out, "  observation: {observation}").unwrap();
810            rendered_observations += 1;
811            if rendered_observations >= 2 {
812               break;
813            }
814         }
815
816         if rendered_observations >= 2 {
817            break;
818         }
819      }
820
821      for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
822         if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
823            if hunk.synthetic {
824               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
825            } else {
826               writeln!(
827                  out,
828                  "  - {} old:{} new:{} :: {}",
829                  hunk.hunk_id,
830                  format_line_range(hunk.old_start, hunk.old_count),
831                  format_line_range(hunk.new_start, hunk.new_count),
832                  hunk.snippet
833               )
834               .unwrap();
835            }
836         }
837      }
838   }
839
840   out
841}
842
843fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
844   match index.mode {
845      PlanningMode::File => format!(
846         "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
847         snapshot.files.len()
848      ),
849      PlanningMode::Area => format!(
850         "Area IDs only. Each target may expand to multiple files by shared path prefix. \
851          Coverage: {} areas spanning {} files.",
852         index.targets.len(),
853         snapshot.files.len()
854      ),
855   }
856}
857
858fn render_planning_notes(index: &PlanningIndex) -> String {
859   match index.mode {
860      PlanningMode::File => {
861         "Use only the provided file IDs and keep the grouping conservative.".to_string()
862      },
863      PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
864                             planning areas. Split along independent subsystems or workstreams \
865                             when the areas point at unrelated changes."
866         .to_string(),
867   }
868}
869
870fn render_split_bias(index: &PlanningIndex) -> String {
871   match index.mode {
872      PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
873      PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
874                             one broad group if nearly every area clearly belongs to the same \
875                             atomic change."
876         .to_string(),
877   }
878}
879
880fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
881   let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
882
883   strict_json_schema(
884      serde_json::json!({
885         "groups": {
886            "type": "array",
887            "items": {
888               "type": "object",
889               "properties": {
890                  "group_id": {
891                     "type": "string",
892                     "description": "Stable identifier like G1, G2, G3"
893                  },
894                  "file_ids": {
895                     "type": "array",
896                     "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
897                     "items": { "type": "string" }
898                  },
899                  "type": {
900                     "type": "string",
901                     "enum": type_enum,
902                     "description": "Conventional commit type for this group"
903                  },
904                  "scope": {
905                     "type": "string",
906                     "description": "Optional scope (module/component). Omit if broad."
907                  },
908                  "rationale": {
909                     "type": "string",
910                     "description": "Brief explanation of the logical change"
911                  },
912                  "dependencies": {
913                     "type": "array",
914                     "description": "Group IDs this group depends on",
915                     "items": { "type": "string" }
916                  }
917               },
918               "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
919               "additionalProperties": false
920            }
921         }
922      }),
923      &["groups"],
924   )
925}
926
927fn build_binding_schema() -> serde_json::Value {
928   strict_json_schema(
929      serde_json::json!({
930         "assignments": {
931            "type": "array",
932            "items": {
933               "type": "object",
934               "properties": {
935                  "group_id": { "type": "string" },
936                  "hunk_ids": {
937                     "type": "array",
938                     "items": { "type": "string" }
939                  }
940               },
941               "required": ["group_id", "hunk_ids"],
942               "additionalProperties": false
943            }
944         }
945      }),
946      &["assignments"],
947   )
948}
949
950fn compute_dependency_order<T, FId, FDeps>(
951   groups: &[T],
952   group_id: FId,
953   dependencies: FDeps,
954) -> Result<Vec<usize>>
955where
956   FId: Fn(&T) -> &str,
957   FDeps: Fn(&T) -> &[String],
958{
959   let mut index_by_id = HashMap::new();
960   for (idx, group) in groups.iter().enumerate() {
961      let id = group_id(group);
962      if id.trim().is_empty() {
963         return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
964      }
965      if index_by_id.insert(id.to_string(), idx).is_some() {
966         return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
967      }
968   }
969
970   let mut in_degree = vec![0_usize; groups.len()];
971   let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
972
973   for (idx, group) in groups.iter().enumerate() {
974      for dependency in dependencies(group) {
975         let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
976            CommitGenError::Other(format!(
977               "Group {} depends on unknown group_id '{}'",
978               group_id(group),
979               dependency
980            ))
981         })?;
982         if dependency_idx == idx {
983            return Err(CommitGenError::Other(format!(
984               "Group {} depends on itself",
985               group_id(group)
986            )));
987         }
988
989         adjacency[dependency_idx].push(idx);
990         in_degree[idx] += 1;
991      }
992   }
993
994   let mut queue: Vec<usize> = (0..groups.len())
995      .filter(|idx| in_degree[*idx] == 0)
996      .collect();
997   let mut order = Vec::with_capacity(groups.len());
998
999   while let Some(node) = queue.pop() {
1000      order.push(node);
1001      for neighbor in &adjacency[node] {
1002         in_degree[*neighbor] -= 1;
1003         if in_degree[*neighbor] == 0 {
1004            queue.push(*neighbor);
1005         }
1006      }
1007   }
1008
1009   if order.len() != groups.len() {
1010      return Err(CommitGenError::Other(
1011         "Circular dependency detected in compose groups".to_string(),
1012      ));
1013   }
1014
1015   Ok(order)
1016}
1017
1018fn normalize_file_reference(raw_file_ref: &str) -> String {
1019   raw_file_ref
1020      .trim()
1021      .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
1022      .trim_start_matches("./")
1023      .trim_end_matches([',', ';'])
1024      .to_string()
1025}
1026
1027fn planning_text_tokens(text: &str) -> Vec<String> {
1028   const STOP_WORDS: &[&str] = &[
1029      "and",
1030      "for",
1031      "the",
1032      "with",
1033      "from",
1034      "into",
1035      "after",
1036      "before",
1037      "over",
1038      "under",
1039      "plus",
1040      "across",
1041      "update",
1042      "updated",
1043      "refactor",
1044      "refactored",
1045      "changes",
1046      "change",
1047      "logical",
1048      "group",
1049      "groups",
1050      "commit",
1051      "commits",
1052   ];
1053
1054   let mut tokens = Vec::new();
1055   let mut current = String::new();
1056   let mut seen = HashSet::new();
1057
1058   for ch in text.chars() {
1059      if ch.is_ascii_alphanumeric() {
1060         current.push(ch.to_ascii_lowercase());
1061      } else if current.len() >= 3 {
1062         if !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone()) {
1063            tokens.push(current.clone());
1064         }
1065         current.clear();
1066      } else {
1067         current.clear();
1068      }
1069   }
1070
1071   if current.len() >= 3 && !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone())
1072   {
1073      tokens.push(current);
1074   }
1075
1076   tokens
1077}
1078
1079fn extract_group_id_candidate(raw: &str) -> Option<String> {
1080   let normalized = normalize_file_reference(raw);
1081   let uppercase = normalized.to_ascii_uppercase();
1082
1083   if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1084      return Some(format!("G{uppercase}"));
1085   }
1086
1087   if let Some(rest) = uppercase.strip_prefix('G')
1088      && !rest.is_empty()
1089      && rest.chars().all(|ch| ch.is_ascii_digit())
1090   {
1091      return Some(format!("G{rest}"));
1092   }
1093
1094   let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1095   let compact = uppercase
1096      .chars()
1097      .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1098      .collect::<String>();
1099   if compact.starts_with("GROUP") && !digits.is_empty() {
1100      return Some(format!("G{digits}"));
1101   }
1102
1103   None
1104}
1105
1106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1107enum ComposeFileCategory {
1108   Binary,
1109   Dependency,
1110   Docs,
1111   Prompt,
1112   Test,
1113   Config,
1114   Source,
1115   Other,
1116}
1117
1118fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1119   if file.is_binary {
1120      return ComposeFileCategory::Binary;
1121   }
1122
1123   if is_dependency_manifest(&file.path) {
1124      return ComposeFileCategory::Dependency;
1125   }
1126
1127   let filename_lower = file.path.to_ascii_lowercase();
1128   let file_name = Path::new(&filename_lower)
1129      .file_name()
1130      .and_then(|name| name.to_str())
1131      .unwrap_or_default();
1132   let extension = Path::new(&filename_lower)
1133      .extension()
1134      .and_then(|ext| ext.to_str())
1135      .unwrap_or_default();
1136
1137   if filename_lower.contains("prompt") || filename_lower.contains("system") {
1138      return ComposeFileCategory::Prompt;
1139   }
1140
1141   if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1142      return ComposeFileCategory::Docs;
1143   }
1144
1145   if filename_lower.contains("/tests/")
1146      || filename_lower.starts_with("tests/")
1147      || file_name.contains("test")
1148      || file_name.contains("spec")
1149   {
1150      return ComposeFileCategory::Test;
1151   }
1152
1153   if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1154      return ComposeFileCategory::Config;
1155   }
1156
1157   if matches!(
1158      extension,
1159      "rs"
1160         | "py"
1161         | "js"
1162         | "jsx"
1163         | "ts"
1164         | "tsx"
1165         | "go"
1166         | "java"
1167         | "kt"
1168         | "c"
1169         | "cc"
1170         | "cpp"
1171         | "h"
1172         | "hpp"
1173         | "cs"
1174         | "rb"
1175         | "php"
1176         | "swift"
1177         | "scala"
1178         | "m"
1179         | "mm"
1180   ) {
1181      return ComposeFileCategory::Source;
1182   }
1183
1184   ComposeFileCategory::Other
1185}
1186
1187fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1188   left
1189      .split('/')
1190      .zip(right.split('/'))
1191      .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1192      .count()
1193}
1194
1195fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1196   let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1197
1198   if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1199      score += 40;
1200   }
1201
1202   if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1203      score += 12;
1204   }
1205
1206   if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1207      score += 18;
1208   }
1209
1210   score
1211}
1212
1213fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1214   match (compose_file_category(file), group.commit_type.as_str()) {
1215      (ComposeFileCategory::Docs, "docs") => 25,
1216      (ComposeFileCategory::Test, "test") => 25,
1217      (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1218      (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1219      (
1220         ComposeFileCategory::Prompt | ComposeFileCategory::Source,
1221         "feat" | "fix" | "refactor" | "perf",
1222      ) => 10,
1223      _ => 0,
1224   }
1225}
1226
1227fn best_group_for_missing_file(
1228   snapshot: &ComposeSnapshot,
1229   groups: &[ComposeIntentGroup],
1230   missing_file: &ComposeFile,
1231) -> usize {
1232   let mut best_group_idx = 0;
1233   let mut best_score = i32::MIN;
1234   let mut best_group_size = usize::MAX;
1235
1236   for (group_idx, group) in groups.iter().enumerate() {
1237      let similarity = group
1238         .file_ids
1239         .iter()
1240         .filter_map(|file_id| snapshot.file_by_id(file_id))
1241         .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1242         .max()
1243         .unwrap_or_default();
1244      let score = similarity + group_type_bonus(missing_file, group);
1245      let group_size = group.file_ids.len();
1246
1247      if score > best_score || (score == best_score && group_size < best_group_size) {
1248         best_group_idx = group_idx;
1249         best_score = score;
1250         best_group_size = group_size;
1251      }
1252   }
1253
1254   best_group_idx
1255}
1256
1257fn normalize_dependency_reference(
1258   raw_dependency: &str,
1259   known_group_ids: &HashSet<String>,
1260) -> Option<String> {
1261   let normalized = normalize_file_reference(raw_dependency);
1262   if normalized.is_empty() {
1263      return None;
1264   }
1265
1266   if known_group_ids.contains(&normalized) {
1267      return Some(normalized);
1268   }
1269
1270   let uppercase = normalized.to_ascii_uppercase();
1271   if known_group_ids.contains(&uppercase) {
1272      return Some(uppercase);
1273   }
1274
1275   let candidate = extract_group_id_candidate(&normalized)?;
1276   known_group_ids.contains(&candidate).then_some(candidate)
1277}
1278
1279fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1280   let label = target.label.to_ascii_lowercase();
1281   let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1282   let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1283
1284   if let Some(scope) = &group.scope {
1285      let scope = scope.as_str().to_ascii_lowercase();
1286      if label.contains(&scope) || workstream.contains(&scope) {
1287         score += 140;
1288      }
1289
1290      for segment in scope.split('/') {
1291         if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1292            score += 45;
1293         }
1294      }
1295   }
1296
1297   for token in planning_text_tokens(&group.rationale) {
1298      if label.contains(&token) || workstream.contains(&token) {
1299         score += 16;
1300      }
1301   }
1302
1303   match group.commit_type.as_str() {
1304      "ci" if target.label.starts_with(".github/") => score += 120,
1305      "docs"
1306         if target.label.starts_with("docs/")
1307            || Path::new(&target.label)
1308               .extension()
1309               .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1310      {
1311         score += 80;
1312      },
1313      "build" | "chore"
1314         if target.label.contains("Cargo")
1315            || target.label.contains("package")
1316            || target.label.contains("lock")
1317            || target.label.contains("tsconfig")
1318            || target.label.contains("biome")
1319            || target.label.contains("bun") =>
1320      {
1321         score += 55;
1322      },
1323      _ => {},
1324   }
1325
1326   score
1327}
1328
1329fn seed_group_targets(
1330   groups: &[ComposeIntentGroup],
1331   planning_index: &PlanningIndex,
1332   group_targets: &mut [Vec<String>],
1333   repair_notes: &mut Vec<String>,
1334) {
1335   let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1336
1337   for (group_idx, group) in groups.iter().enumerate() {
1338      if !group_targets[group_idx].is_empty() {
1339         continue;
1340      }
1341
1342      let fallback_target = planning_index
1343         .targets
1344         .iter()
1345         .max_by_key(|target| {
1346            let mut score = planning_target_match_score(target, group);
1347            if !claimed_target_ids.contains(&target.target_id) {
1348               score += 60;
1349            }
1350            (score, target.hunk_count, target.file_ids.len())
1351         })
1352         .or_else(|| planning_index.targets.first());
1353
1354      let Some(fallback_target) = fallback_target else {
1355         continue;
1356      };
1357
1358      group_targets[group_idx].push(fallback_target.target_id.clone());
1359      claimed_target_ids.insert(fallback_target.target_id.clone());
1360      repair_notes.push(format!(
1361         "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1362         group.group_id, fallback_target.target_id, fallback_target.label
1363      ));
1364   }
1365}
1366
1367fn normalize_intent_plan(
1368   snapshot: &ComposeSnapshot,
1369   planning_index: &PlanningIndex,
1370   mut groups: Vec<ComposeIntentGroup>,
1371) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1372   if groups.is_empty() {
1373      return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1374   }
1375
1376   let known_target_ids: HashSet<&str> = planning_index
1377      .targets
1378      .iter()
1379      .map(|target| target.target_id.as_str())
1380      .collect();
1381   let mut repair_notes = Vec::new();
1382   let mut covered_file_ids = HashSet::new();
1383   let mut normalized_group_targets = Vec::with_capacity(groups.len());
1384
1385   for group in &groups {
1386      if group.file_ids.is_empty() {
1387         repair_notes.push(format!(
1388            "Compose planner left {} without planning targets; assigning targets heuristically",
1389            group.group_id
1390         ));
1391      }
1392
1393      let mut normalized_target_ids = Vec::new();
1394      let mut seen_target_ids = HashSet::new();
1395      for raw_target_ref in &group.file_ids {
1396         let normalized_ref = normalize_file_reference(raw_target_ref);
1397         let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1398            normalized_ref.clone()
1399         } else {
1400            let uppercase_ref = normalized_ref.to_ascii_uppercase();
1401            if known_target_ids.contains(uppercase_ref.as_str()) {
1402               uppercase_ref
1403            } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1404               if raw_target_ref != target_id {
1405                  repair_notes.push(format!(
1406                     "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1407                  ));
1408               }
1409               target_id.clone()
1410            } else {
1411               repair_notes.push(format!(
1412                  "Dropped unknown planning target '{}' from {}",
1413                  raw_target_ref, group.group_id
1414               ));
1415               continue;
1416            }
1417         };
1418
1419         if seen_target_ids.insert(canonical_target_id.clone()) {
1420            normalized_target_ids.push(canonical_target_id);
1421         }
1422      }
1423
1424      normalized_group_targets.push(normalized_target_ids);
1425   }
1426
1427   seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1428
1429   let known_group_ids: HashSet<String> =
1430      groups.iter().map(|group| group.group_id.clone()).collect();
1431   for group in &mut groups {
1432      let mut normalized_dependencies = Vec::new();
1433      let mut seen_dependencies = HashSet::new();
1434
1435      for raw_dependency in &group.dependencies {
1436         let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1437         else {
1438            repair_notes.push(format!(
1439               "Dropped unknown dependency '{}' from {}",
1440               raw_dependency, group.group_id
1441            ));
1442            continue;
1443         };
1444
1445         if dependency == group.group_id {
1446            repair_notes.push(format!(
1447               "Dropped self-dependency '{}' from {}",
1448               raw_dependency, group.group_id
1449            ));
1450            continue;
1451         }
1452
1453         if seen_dependencies.insert(dependency.clone()) {
1454            if raw_dependency != &dependency {
1455               repair_notes.push(format!(
1456                  "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1457               ));
1458            }
1459            normalized_dependencies.push(dependency);
1460         }
1461      }
1462
1463      group.dependencies = normalized_dependencies;
1464   }
1465
1466   for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1467      let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1468      for file_id in &expanded_file_ids {
1469         covered_file_ids.insert(file_id.clone());
1470      }
1471      group.file_ids = expanded_file_ids;
1472   }
1473
1474   for file in &snapshot.files {
1475      if covered_file_ids.contains(file.file_id.as_str()) {
1476         continue;
1477      }
1478
1479      let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1480      let target_group = &mut groups[target_group_idx];
1481      target_group.file_ids.push(file.file_id.clone());
1482      covered_file_ids.insert(file.file_id.clone());
1483      repair_notes.push(format!(
1484         "Compose planner omitted {} ({}); assigned it to {}",
1485         file.file_id, file.path, target_group.group_id
1486      ));
1487   }
1488
1489   Ok((groups, repair_notes))
1490}
1491
1492fn workstream_key_for_label(label: &str) -> String {
1493   let segments: Vec<&str> = label
1494      .split('/')
1495      .filter(|segment| !segment.is_empty())
1496      .collect();
1497   let Some(first) = segments.first() else {
1498      return label.to_string();
1499   };
1500
1501   match *first {
1502      ".github" => match segments.get(1) {
1503         Some(second) => format!("{first}/{second}"),
1504         None => (*first).to_string(),
1505      },
1506      "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1507         Some(second) => format!("{first}/{second}"),
1508         None => (*first).to_string(),
1509      },
1510      _ => (*first).to_string(),
1511   }
1512}
1513
1514fn workstream_display_name(label: &str) -> String {
1515   let key = workstream_key_for_label(label);
1516   match key.as_str() {
1517      ".github/workflows" => "CI workflows".to_string(),
1518      ".github" => "GitHub automation".to_string(),
1519      _ => key
1520         .split('/')
1521         .next_back()
1522         .map(|segment| segment.replace(['_', '-'], " "))
1523         .unwrap_or(key),
1524   }
1525}
1526
1527fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1528   let mut out = String::new();
1529   let mut last_was_separator = false;
1530
1531   for ch in raw.trim().chars() {
1532      if ch.is_ascii_alphanumeric() {
1533         out.push(ch.to_ascii_lowercase());
1534         last_was_separator = false;
1535      } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1536      {
1537         out.push('-');
1538         last_was_separator = true;
1539      }
1540   }
1541
1542   let trimmed = out.trim_matches('-').to_string();
1543   (!trimmed.is_empty()).then_some(trimmed)
1544}
1545
1546fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1547   let key = workstream_key_for_label(label);
1548   let candidate = key
1549      .split('/')
1550      .next_back()
1551      .and_then(sanitize_scope_fragment)?;
1552   Scope::new(candidate).ok()
1553}
1554
1555fn fallback_rationale_for_labels(labels: &[String]) -> String {
1556   if labels.len() == 1 {
1557      let label = labels[0].as_str();
1558      let display = workstream_display_name(label);
1559      if label.starts_with("apps/") {
1560         return format!("{display} application updates");
1561      }
1562      if label.starts_with("packages/") {
1563         return format!("{display} package updates");
1564      }
1565      if label.starts_with("crates/") {
1566         return format!("{display} crate updates");
1567      }
1568      if label.starts_with(".github/") || label == ".github" {
1569         return format!("{display} updates");
1570      }
1571      return format!("{display} updates");
1572   }
1573
1574   let display_labels: Vec<String> = labels
1575      .iter()
1576      .take(3)
1577      .map(|label| workstream_display_name(label))
1578      .collect();
1579   format!("cross-cutting updates for {}", display_labels.join(", "))
1580}
1581
1582fn fallback_commit_type_for_group(
1583   snapshot: &ComposeSnapshot,
1584   labels: &[String],
1585   file_ids: &[String],
1586) -> Result<CommitType> {
1587   if labels
1588      .iter()
1589      .any(|label| label == ".github" || label.starts_with(".github/"))
1590   {
1591      return CommitType::new("ci");
1592   }
1593
1594   let files: Vec<&ComposeFile> = file_ids
1595      .iter()
1596      .filter_map(|file_id| snapshot.file_by_id(file_id))
1597      .collect();
1598   let all_docs = !files.is_empty()
1599      && files
1600         .iter()
1601         .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1602   if all_docs {
1603      return CommitType::new("docs");
1604   }
1605
1606   let all_tests = !files.is_empty()
1607      && files
1608         .iter()
1609         .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1610   if all_tests {
1611      return CommitType::new("test");
1612   }
1613
1614   let all_dependencies =
1615      !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1616   if all_dependencies {
1617      return CommitType::new("build");
1618   }
1619
1620   let all_config = !files.is_empty()
1621      && files.iter().all(|file| {
1622         matches!(
1623            compose_file_category(file),
1624            ComposeFileCategory::Config | ComposeFileCategory::Dependency
1625         )
1626      });
1627   if all_config {
1628      return CommitType::new("chore");
1629   }
1630
1631   CommitType::new("refactor")
1632}
1633
1634fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1635   snapshot
1636      .files
1637      .iter()
1638      .filter(|file| file_ids.contains(&file.file_id))
1639      .map(|file| file.file_id.clone())
1640      .collect()
1641}
1642
1643fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1644   if groups.is_empty() {
1645      return false;
1646   }
1647
1648   let largest_group = groups
1649      .iter()
1650      .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1651      .max()
1652      .unwrap_or_default();
1653
1654   groups.len() == 1
1655      || (groups.len() <= 2
1656         && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1657}
1658
1659fn should_force_large_patch_fallback(
1660   snapshot: &ComposeSnapshot,
1661   planning_index: &PlanningIndex,
1662   groups: &[ComposeIntentGroup],
1663   max_commits: usize,
1664) -> bool {
1665   if max_commits <= 1
1666      || planning_index.mode != PlanningMode::Area
1667      || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1668      || !is_monolithic_intent_plan(snapshot, groups)
1669   {
1670      return false;
1671   }
1672
1673   let workstream_count = planning_index
1674      .targets
1675      .iter()
1676      .map(|target| workstream_key_for_label(&target.label))
1677      .collect::<HashSet<_>>()
1678      .len();
1679
1680   workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1681}
1682
1683fn build_large_patch_fallback_groups(
1684   snapshot: &ComposeSnapshot,
1685   planning_index: &PlanningIndex,
1686   max_commits: usize,
1687) -> Result<Vec<ComposeIntentGroup>> {
1688   #[derive(Debug, Clone)]
1689   struct WorkstreamGroup {
1690      label:    String,
1691      file_ids: HashSet<String>,
1692      weight:   usize,
1693   }
1694
1695   #[derive(Debug, Clone)]
1696   struct FallbackBin {
1697      labels:       Vec<String>,
1698      file_ids:     HashSet<String>,
1699      total_weight: usize,
1700   }
1701
1702   let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1703   for target in &planning_index.targets {
1704      let key = workstream_key_for_label(&target.label);
1705      let entry = workstreams
1706         .entry(key.clone())
1707         .or_insert_with(|| WorkstreamGroup {
1708            label:    key,
1709            file_ids: HashSet::new(),
1710            weight:   0,
1711         });
1712
1713      for file_id in &target.file_ids {
1714         entry.file_ids.insert(file_id.clone());
1715      }
1716      entry.weight = entry
1717         .weight
1718         .saturating_add(target.hunk_count.max(target.file_ids.len()));
1719   }
1720
1721   let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1722   workstreams.sort_by(|left, right| {
1723      right
1724         .weight
1725         .cmp(&left.weight)
1726         .then_with(|| left.label.cmp(&right.label))
1727   });
1728
1729   let bin_count = max_commits.min(workstreams.len());
1730   let mut bins: Vec<FallbackBin> = Vec::new();
1731   for workstream in workstreams {
1732      if bins.len() < bin_count {
1733         bins.push(FallbackBin {
1734            labels:       vec![workstream.label],
1735            file_ids:     workstream.file_ids,
1736            total_weight: workstream.weight,
1737         });
1738         continue;
1739      }
1740
1741      let Some((target_idx, _)) = bins
1742         .iter()
1743         .enumerate()
1744         .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1745      else {
1746         continue;
1747      };
1748
1749      let target_bin = &mut bins[target_idx];
1750      target_bin.labels.push(workstream.label);
1751      target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1752      target_bin.file_ids.extend(workstream.file_ids);
1753   }
1754
1755   let mut groups = Vec::new();
1756   for (idx, bin) in bins.into_iter().enumerate() {
1757      let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1758      let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1759      let scope = (bin.labels.len() == 1)
1760         .then(|| fallback_scope_for_label(&bin.labels[0]))
1761         .flatten();
1762      let rationale = fallback_rationale_for_labels(&bin.labels);
1763
1764      groups.push(ComposeIntentGroup {
1765         group_id: format!("G{}", idx + 1),
1766         commit_type,
1767         scope,
1768         file_ids: ordered_ids,
1769         rationale,
1770         dependencies: Vec::new(),
1771      });
1772   }
1773
1774   Ok(groups)
1775}
1776
1777#[tracing::instrument(target = "lgit", name = "compose.analyze_intent", skip_all, fields(file_count = snapshot.files.len(), observation_count = observations.len(), max_commits))]
1778async fn analyze_compose_intent(
1779   snapshot: &ComposeSnapshot,
1780   observations: &[FileObservation],
1781   config: &CommitConfig,
1782   max_commits: usize,
1783   debug_dir: Option<&Path>,
1784) -> Result<ComposeIntentPlan> {
1785   let planning_index = build_planning_index(snapshot);
1786   let stat_summary = render_planning_stat(&planning_index);
1787   let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1788   let planning_targets = render_planning_targets(&planning_index, snapshot);
1789   let planning_notes = render_planning_notes(&planning_index);
1790   let split_bias = render_split_bias(&planning_index);
1791   let schema = build_intent_schema(config);
1792   let variant = if config.markdown_output {
1793      "markdown"
1794   } else {
1795      "default"
1796   };
1797   let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1798      variant,
1799      max_commits,
1800      stat: &stat_summary,
1801      snapshot_summary: &snapshot_summary,
1802      planning_targets: &planning_targets,
1803      planning_notes: &planning_notes,
1804      split_bias: &split_bias,
1805   })?;
1806
1807   let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1808      operation:        "compose/intent",
1809      model:            &config.analysis_model,
1810      prompt_family:    "compose-intent",
1811      prompt_variant:   variant,
1812      system_prompt:    &parts.system,
1813      user_prompt:      &parts.user,
1814      tool_name:        "create_compose_intent_plan",
1815      tool_description: "Plan logical commit groups over the provided planning target IDs",
1816      schema:           &schema,
1817      progress_label:   Some("compose intent planner"),
1818      debug:            debug_dir.map(|dir| OneShotDebug {
1819         dir:    Some(dir),
1820         prefix: None,
1821         name:   "compose_intent",
1822      }),
1823      cacheable:        true,
1824   })
1825   .await?;
1826
1827   let (mut groups, repair_notes) =
1828      normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1829   for note in &repair_notes {
1830      eprintln!("{}", style::warning(note));
1831   }
1832   if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1833      eprintln!(
1834         "{}",
1835         style::warning(
1836            "Compose intent collapsed into a monolithic large-patch group; falling back to \
1837             path-based workstream splits."
1838         )
1839      );
1840      groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1841   }
1842   let dependency_order =
1843      compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1844
1845   Ok(ComposeIntentPlan { groups, dependency_order })
1846}
1847
1848#[tracing::instrument(target = "lgit", name = "compose.should_collect_observations", skip_all, fields(file_count = snapshot.files.len()))]
1849fn should_collect_compose_observations(
1850   snapshot: &ComposeSnapshot,
1851   config: &CommitConfig,
1852   counter: &TokenCounter,
1853) -> bool {
1854   planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1855      && should_use_map_reduce(&snapshot.diff, config, counter)
1856}
1857
1858#[tracing::instrument(target = "lgit", name = "compose.auto_assign_hunks", skip_all, fields(group_count = intent_plan.groups.len()))]
1859fn auto_assign_hunks(
1860   snapshot: &ComposeSnapshot,
1861   intent_plan: &ComposeIntentPlan,
1862) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1863   let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1864   for group in &intent_plan.groups {
1865      for file_id in &group.file_ids {
1866         groups_by_file
1867            .entry(file_id.as_str())
1868            .or_default()
1869            .push(group.group_id.as_str());
1870      }
1871   }
1872
1873   let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1874      .groups
1875      .iter()
1876      .map(|group| (group.group_id.clone(), BTreeSet::new()))
1877      .collect();
1878   let mut ambiguous = Vec::new();
1879
1880   for file in &snapshot.files {
1881      let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1882         return Err(CommitGenError::Other(format!(
1883            "No compose group claimed file {} ({})",
1884            file.file_id, file.path
1885         )));
1886      };
1887
1888      if candidate_group_ids.len() == 1 {
1889         let group_id = candidate_group_ids[0];
1890         let entry = assigned
1891            .get_mut(group_id)
1892            .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1893         for hunk_id in &file.hunk_ids {
1894            entry.insert(hunk_id.clone());
1895         }
1896      } else {
1897         ambiguous.push(AmbiguousFileBinding {
1898            file_id:             file.file_id.clone(),
1899            path:                file.path.clone(),
1900            candidate_group_ids: candidate_group_ids
1901               .iter()
1902               .map(|group_id| (*group_id).to_string())
1903               .collect(),
1904            hunk_ids:            file.hunk_ids.clone(),
1905         });
1906      }
1907   }
1908
1909   Ok((assigned, ambiguous))
1910}
1911
1912fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1913   let mut out = String::new();
1914   for group in groups {
1915      let scope = group
1916         .scope
1917         .as_ref()
1918         .map(|scope| format!("({})", scope.as_str()))
1919         .unwrap_or_default();
1920      writeln!(
1921         out,
1922         "- {} [{}{}] {}",
1923         group.group_id,
1924         group.commit_type.as_str(),
1925         scope,
1926         group.rationale
1927      )
1928      .unwrap();
1929   }
1930
1931   out
1932}
1933
1934fn render_binding_ambiguous_files(
1935   snapshot: &ComposeSnapshot,
1936   ambiguous_files: &[AmbiguousFileBinding],
1937) -> String {
1938   let mut out = String::new();
1939   for ambiguous_file in ambiguous_files {
1940      writeln!(
1941         out,
1942         "- {} {} candidates: {}",
1943         ambiguous_file.file_id,
1944         ambiguous_file.path,
1945         ambiguous_file.candidate_group_ids.join(", ")
1946      )
1947      .unwrap();
1948
1949      for hunk_id in &ambiguous_file.hunk_ids {
1950         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1951            if hunk.synthetic {
1952               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1953            } else {
1954               writeln!(
1955                  out,
1956                  "  - {} old:{} new:{} :: {}",
1957                  hunk.hunk_id,
1958                  format_line_range(hunk.old_start, hunk.old_count),
1959                  format_line_range(hunk.new_start, hunk.new_count),
1960                  hunk.snippet
1961               )
1962               .unwrap();
1963            }
1964         }
1965      }
1966   }
1967
1968   out
1969}
1970
1971async fn request_binding(
1972   snapshot: &ComposeSnapshot,
1973   groups: &[ComposeIntentGroup],
1974   ambiguous_files: &[AmbiguousFileBinding],
1975   config: &CommitConfig,
1976   debug_dir: Option<&Path>,
1977   debug_name: &str,
1978) -> Result<Vec<ComposeBindingAssignment>> {
1979   let schema = build_binding_schema();
1980   let groups_text = render_binding_groups(groups);
1981   let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1982   let variant = if config.markdown_output {
1983      "markdown"
1984   } else {
1985      "default"
1986   };
1987   let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1988      variant,
1989      groups: &groups_text,
1990      ambiguous_files: &ambiguous_files_text,
1991   })?;
1992   let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1993      operation:        "compose/bind",
1994      model:            &config.analysis_model,
1995      prompt_family:    "compose-bind",
1996      prompt_variant:   variant,
1997      system_prompt:    &parts.system,
1998      user_prompt:      &parts.user,
1999      tool_name:        "bind_compose_hunks",
2000      tool_description: "Assign hunk IDs to existing compose groups",
2001      schema:           &schema,
2002      progress_label:   Some("compose hunk binder"),
2003      debug:            debug_dir.map(|dir| OneShotDebug {
2004         dir:    Some(dir),
2005         prefix: None,
2006         name:   debug_name,
2007      }),
2008      cacheable:        true,
2009   })
2010   .await?;
2011
2012   Ok(response.output.assignments)
2013}
2014
2015fn ambiguous_hunk_context(
2016   ambiguous_files: &[AmbiguousFileBinding],
2017) -> HashMap<String, AmbiguousHunkContext> {
2018   let mut context = HashMap::new();
2019   for ambiguous_file in ambiguous_files {
2020      for hunk_id in &ambiguous_file.hunk_ids {
2021         context.insert(hunk_id.clone(), AmbiguousHunkContext {
2022            candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
2023         });
2024      }
2025   }
2026   context
2027}
2028
2029fn evaluate_binding(
2030   assignments: &[ComposeBindingAssignment],
2031   hunk_context: &HashMap<String, AmbiguousHunkContext>,
2032   valid_group_ids: &HashSet<&str>,
2033   snapshot: &ComposeSnapshot,
2034) -> BindingEvaluation {
2035   let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
2036
2037   for assignment in assignments {
2038      if !valid_group_ids.contains(assignment.group_id.as_str()) {
2039         continue;
2040      }
2041
2042      let mut seen_in_group = HashSet::new();
2043      for hunk_id in &assignment.hunk_ids {
2044         if !seen_in_group.insert(hunk_id.as_str()) {
2045            continue;
2046         }
2047
2048         let Some(context) = hunk_context.get(hunk_id) else {
2049            continue;
2050         };
2051
2052         if !context
2053            .candidate_group_ids
2054            .iter()
2055            .any(|candidate| candidate == &assignment.group_id)
2056         {
2057            continue;
2058         }
2059
2060         match assigned_hunk_to_group.get(hunk_id) {
2061            None => {
2062               assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
2063            },
2064            Some(existing_group) if existing_group == &assignment.group_id => {},
2065            Some(_) => {
2066               assigned_hunk_to_group.remove(hunk_id);
2067            },
2068         }
2069      }
2070   }
2071
2072   let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2073   for (hunk_id, group_id) in assigned_hunk_to_group {
2074      assigned_by_group.entry(group_id).or_default().push(hunk_id);
2075   }
2076
2077   for hunk_ids in assigned_by_group.values_mut() {
2078      let ordered: Vec<String> = snapshot
2079         .hunks
2080         .iter()
2081         .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2082         .map(|hunk| hunk.hunk_id.clone())
2083         .collect();
2084      *hunk_ids = ordered;
2085   }
2086
2087   let unresolved = snapshot
2088      .hunks
2089      .iter()
2090      .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2091      .filter(|hunk| {
2092         !assigned_by_group.values().any(|assigned_hunks| {
2093            assigned_hunks
2094               .iter()
2095               .any(|assigned| assigned == &hunk.hunk_id)
2096         })
2097      })
2098      .map(|hunk| hunk.hunk_id.clone())
2099      .collect();
2100
2101   BindingEvaluation { assigned: assigned_by_group, unresolved }
2102}
2103
2104fn filter_ambiguous_files(
2105   ambiguous_files: &[AmbiguousFileBinding],
2106   hunk_ids: &[String],
2107) -> Vec<AmbiguousFileBinding> {
2108   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2109
2110   ambiguous_files
2111      .iter()
2112      .filter_map(|file| {
2113         let matching_hunks: Vec<String> = file
2114            .hunk_ids
2115            .iter()
2116            .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2117            .cloned()
2118            .collect();
2119
2120         (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2121            file_id:             file.file_id.clone(),
2122            path:                file.path.clone(),
2123            candidate_group_ids: file.candidate_group_ids.clone(),
2124            hunk_ids:            matching_hunks,
2125         })
2126      })
2127      .collect()
2128}
2129
2130fn chunk_ambiguous_files(
2131   ambiguous_files: &[AmbiguousFileBinding],
2132) -> Vec<Vec<AmbiguousFileBinding>> {
2133   if ambiguous_files.is_empty() {
2134      return Vec::new();
2135   }
2136
2137   let mut batches = Vec::new();
2138   let mut current_batch = Vec::new();
2139   let mut current_hunk_count = 0_usize;
2140
2141   for file in ambiguous_files {
2142      let file_hunk_count = file.hunk_ids.len();
2143      let should_split = !current_batch.is_empty()
2144         && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2145            || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2146
2147      if should_split {
2148         batches.push(current_batch);
2149         current_batch = Vec::new();
2150         current_hunk_count = 0;
2151      }
2152
2153      current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2154      current_batch.push(file.clone());
2155   }
2156
2157   if !current_batch.is_empty() {
2158      batches.push(current_batch);
2159   }
2160
2161   batches
2162}
2163
2164fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2165   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2166
2167   snapshot
2168      .hunks
2169      .iter()
2170      .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2171      .map(|hunk| hunk.hunk_id.clone())
2172      .collect()
2173}
2174
2175fn fallback_group_for_hunk(
2176   hunk_id: &str,
2177   ambiguous_files: &[AmbiguousFileBinding],
2178   group_rank: &HashMap<&str, usize>,
2179) -> Option<String> {
2180   ambiguous_files.iter().find_map(|file| {
2181      file
2182         .hunk_ids
2183         .iter()
2184         .any(|candidate| candidate == hunk_id)
2185         .then(|| {
2186            file
2187               .candidate_group_ids
2188               .iter()
2189               .min_by_key(|group_id| {
2190                  group_rank
2191                     .get(group_id.as_str())
2192                     .copied()
2193                     .unwrap_or(usize::MAX)
2194               })
2195               .cloned()
2196         })
2197   })?
2198}
2199
2200fn assign_unresolved_hunks(
2201   unresolved_hunks: &[String],
2202   assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2203   ambiguous_files: &[AmbiguousFileBinding],
2204   group_rank: &HashMap<&str, usize>,
2205) {
2206   for hunk_id in unresolved_hunks {
2207      if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2208         && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2209      {
2210         group_hunks.insert(hunk_id.clone());
2211      }
2212   }
2213}
2214
2215fn normalize_group_type(
2216   snapshot: &ComposeSnapshot,
2217   file_ids: &[String],
2218   original_type: &CommitType,
2219) -> Result<CommitType> {
2220   let dependency_only = !file_ids.is_empty()
2221      && file_ids.iter().all(|file_id| {
2222         snapshot
2223            .file_by_id(file_id)
2224            .is_some_and(|file| is_dependency_manifest(&file.path))
2225      });
2226
2227   if dependency_only && original_type.as_str() != "build" {
2228      CommitType::new("build")
2229   } else {
2230      Ok(original_type.clone())
2231   }
2232}
2233
2234fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2235   snapshot
2236      .files
2237      .iter()
2238      .filter(|file| {
2239         hunk_ids
2240            .iter()
2241            .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2242      })
2243      .map(|file| file.file_id.clone())
2244      .collect()
2245}
2246
2247fn build_redirects(
2248   intent_plan: &ComposeIntentPlan,
2249   executable_groups: &[ComposeExecutableGroup],
2250   group_rank: &HashMap<&str, usize>,
2251) -> HashMap<String, String> {
2252   let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2253      .iter()
2254      .filter(|group| !group.hunk_ids.is_empty())
2255      .map(|group| (group.group_id.as_str(), group))
2256      .collect();
2257
2258   let mut redirects = HashMap::new();
2259   for group in &intent_plan.groups {
2260      if surviving_groups.contains_key(group.group_id.as_str()) {
2261         continue;
2262      }
2263
2264      let redirect = executable_groups
2265         .iter()
2266         .filter(|candidate| candidate.group_id != group.group_id)
2267         .filter(|candidate| {
2268            candidate.file_ids.iter().any(|file_id| {
2269               group
2270                  .file_ids
2271                  .iter()
2272                  .any(|candidate_id| candidate_id == file_id)
2273            })
2274         })
2275         .min_by_key(|candidate| {
2276            group_rank
2277               .get(candidate.group_id.as_str())
2278               .copied()
2279               .unwrap_or(usize::MAX)
2280         })
2281         .map(|candidate| candidate.group_id.clone());
2282
2283      if let Some(redirect) = redirect {
2284         redirects.insert(group.group_id.clone(), redirect);
2285      }
2286   }
2287
2288   redirects
2289}
2290
2291fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2292   let mut current = group_id.to_string();
2293   let mut seen = HashSet::new();
2294
2295   while let Some(next) = redirects.get(&current) {
2296      if !seen.insert(current.clone()) {
2297         break;
2298      }
2299      current.clone_from(next);
2300   }
2301
2302   current
2303}
2304
2305fn prune_empty_groups(
2306   groups: Vec<ComposeExecutableGroup>,
2307   redirects: &HashMap<String, String>,
2308) -> Result<ComposeExecutablePlan> {
2309   let surviving_ids: HashSet<String> = groups
2310      .iter()
2311      .filter(|group| !group.hunk_ids.is_empty())
2312      .map(|group| group.group_id.clone())
2313      .collect();
2314
2315   let mut surviving_groups = Vec::new();
2316   for mut group in groups {
2317      if group.hunk_ids.is_empty() {
2318         continue;
2319      }
2320
2321      let mut rewritten_dependencies = Vec::new();
2322      for dependency in &group.dependencies {
2323         let rewritten = resolve_redirect(dependency, redirects);
2324         if rewritten != group.group_id
2325            && surviving_ids.contains(&rewritten)
2326            && !rewritten_dependencies
2327               .iter()
2328               .any(|existing| existing == &rewritten)
2329         {
2330            rewritten_dependencies.push(rewritten);
2331         }
2332      }
2333
2334      group.dependencies = rewritten_dependencies;
2335      surviving_groups.push(group);
2336   }
2337
2338   let dependency_order = compute_dependency_order(
2339      &surviving_groups,
2340      |group| &group.group_id,
2341      |group| &group.dependencies,
2342   )?;
2343   Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2344}
2345
2346fn finalize_executable_plan(
2347   snapshot: &ComposeSnapshot,
2348   intent_plan: &ComposeIntentPlan,
2349   assigned_by_group: HashMap<String, BTreeSet<String>>,
2350) -> Result<ComposeExecutablePlan> {
2351   let group_rank: HashMap<&str, usize> = intent_plan
2352      .dependency_order
2353      .iter()
2354      .enumerate()
2355      .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2356      .collect();
2357
2358   let mut executable_groups = Vec::new();
2359   for group in &intent_plan.groups {
2360      let hunk_ids: Vec<String> = snapshot
2361         .hunks
2362         .iter()
2363         .filter(|hunk| {
2364            assigned_by_group
2365               .get(&group.group_id)
2366               .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2367         })
2368         .map(|hunk| hunk.hunk_id.clone())
2369         .collect();
2370
2371      let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2372      let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2373      executable_groups.push(ComposeExecutableGroup {
2374         group_id: group.group_id.clone(),
2375         commit_type,
2376         scope: group.scope.clone(),
2377         file_ids,
2378         rationale: group.rationale.clone(),
2379         dependencies: group.dependencies.clone(),
2380         hunk_ids,
2381      });
2382   }
2383
2384   let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2385   prune_empty_groups(executable_groups, &redirects)
2386}
2387
2388fn validate_executable_plan(
2389   snapshot: &ComposeSnapshot,
2390   plan: &ComposeExecutablePlan,
2391) -> Result<()> {
2392   if plan.groups.is_empty() {
2393      return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2394   }
2395
2396   let known_hunks: HashSet<&str> = snapshot
2397      .hunks
2398      .iter()
2399      .map(|hunk| hunk.hunk_id.as_str())
2400      .collect();
2401   let known_files: HashSet<&str> = snapshot
2402      .files
2403      .iter()
2404      .map(|file| file.file_id.as_str())
2405      .collect();
2406   let mut coverage = HashMap::<String, String>::new();
2407
2408   for group in &plan.groups {
2409      if group.hunk_ids.is_empty() {
2410         return Err(CommitGenError::Other(format!(
2411            "Compose group {} ended up empty after binding",
2412            group.group_id
2413         )));
2414      }
2415
2416      for file_id in &group.file_ids {
2417         if !known_files.contains(file_id.as_str()) {
2418            return Err(CommitGenError::Other(format!(
2419               "Compose group {} references unknown file_id {}",
2420               group.group_id, file_id
2421            )));
2422         }
2423      }
2424
2425      for hunk_id in &group.hunk_ids {
2426         if !known_hunks.contains(hunk_id.as_str()) {
2427            return Err(CommitGenError::Other(format!(
2428               "Compose group {} references unknown hunk_id {}",
2429               group.group_id, hunk_id
2430            )));
2431         }
2432
2433         if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2434            return Err(CommitGenError::Other(format!(
2435               "Hunk {} was assigned to both {} and {}",
2436               hunk_id, existing_group, group.group_id
2437            )));
2438         }
2439      }
2440   }
2441
2442   let missing_hunks: Vec<String> = snapshot
2443      .hunks
2444      .iter()
2445      .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2446      .map(|hunk| hunk.hunk_id.clone())
2447      .collect();
2448   if !missing_hunks.is_empty() {
2449      return Err(CommitGenError::Other(format!(
2450         "Compose plan left hunks unassigned: {}",
2451         missing_hunks.join(", ")
2452      )));
2453   }
2454
2455   let dependency_order =
2456      compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2457   if dependency_order != plan.dependency_order {
2458      return Err(CommitGenError::Other(
2459         "Compose dependency order does not match recomputed order".to_string(),
2460      ));
2461   }
2462
2463   Ok(())
2464}
2465
2466#[tracing::instrument(target = "lgit", name = "compose.bind_plan", skip_all, fields(file_count = snapshot.files.len(), group_count = intent_plan.groups.len()))]
2467async fn bind_compose_plan(
2468   snapshot: &ComposeSnapshot,
2469   intent_plan: &ComposeIntentPlan,
2470   config: &CommitConfig,
2471   debug_dir: Option<&Path>,
2472) -> Result<ComposeExecutablePlan> {
2473   let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2474
2475   if !ambiguous_files.is_empty() {
2476      let valid_group_ids: HashSet<&str> = intent_plan
2477         .groups
2478         .iter()
2479         .map(|group| group.group_id.as_str())
2480         .collect();
2481      let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2482      let mut unresolved = Vec::new();
2483
2484      for (batch_idx, batch) in binding_batches.iter().enumerate() {
2485         let hunk_context = ambiguous_hunk_context(batch);
2486         let debug_name = if binding_batches.len() == 1 {
2487            "compose_bind".to_string()
2488         } else {
2489            format!("compose_bind_{:02}", batch_idx + 1)
2490         };
2491         let assignments =
2492            request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2493               .await?;
2494         let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2495         for (group_id, hunk_ids) in evaluation.assigned {
2496            let entry = assigned_by_group.entry(group_id).or_default();
2497            for hunk_id in hunk_ids {
2498               entry.insert(hunk_id);
2499            }
2500         }
2501         unresolved.extend(evaluation.unresolved);
2502      }
2503
2504      let group_rank: HashMap<&str, usize> = intent_plan
2505         .dependency_order
2506         .iter()
2507         .enumerate()
2508         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2509         .collect();
2510
2511      let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2512      if !unresolved.is_empty() {
2513         let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2514         let repair_batches = chunk_ambiguous_files(&unresolved_files);
2515         let mut repair_unresolved = Vec::new();
2516
2517         for (batch_idx, batch) in repair_batches.iter().enumerate() {
2518            let debug_name = if repair_batches.len() == 1 {
2519               "compose_bind_repair".to_string()
2520            } else {
2521               format!("compose_bind_repair_{:02}", batch_idx + 1)
2522            };
2523            let repair_assignments = request_binding(
2524               snapshot,
2525               &intent_plan.groups,
2526               batch,
2527               config,
2528               debug_dir,
2529               &debug_name,
2530            )
2531            .await?;
2532            let repair_context = ambiguous_hunk_context(batch);
2533            let repair =
2534               evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2535            for (group_id, hunk_ids) in repair.assigned {
2536               let entry = assigned_by_group.entry(group_id).or_default();
2537               for hunk_id in hunk_ids {
2538                  entry.insert(hunk_id);
2539               }
2540            }
2541
2542            repair_unresolved.extend(repair.unresolved);
2543         }
2544         unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2545
2546         if !unresolved.is_empty() {
2547            assign_unresolved_hunks(
2548               &unresolved,
2549               &mut assigned_by_group,
2550               &ambiguous_files,
2551               &group_rank,
2552            );
2553         }
2554      }
2555   }
2556
2557   let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2558   validate_executable_plan(snapshot, &plan)?;
2559   Ok(plan)
2560}
2561
2562fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2563   println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2564   for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2565      let group = &plan.groups[group_idx];
2566      let scope = group
2567         .scope
2568         .as_ref()
2569         .map(|scope| format!("({})", style::scope(scope.as_str())))
2570         .unwrap_or_default();
2571
2572      println!(
2573         "\n{}. {} [{}{}] {}",
2574         display_idx + 1,
2575         style::bold(&group.group_id),
2576         style::commit_type(group.commit_type.as_str()),
2577         scope,
2578         group.rationale
2579      );
2580
2581      println!("   Files:");
2582      for file_id in &group.file_ids {
2583         if let Some(file) = snapshot.file_by_id(file_id) {
2584            let selected_hunk_ids: Vec<&str> = group
2585               .hunk_ids
2586               .iter()
2587               .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2588               .map(String::as_str)
2589               .collect();
2590            let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2591               "all hunks".to_string()
2592            } else {
2593               selected_hunk_ids.join(", ")
2594            };
2595            println!("     - {} {} ({selection})", file.file_id, file.path);
2596         }
2597      }
2598
2599      if !group.dependencies.is_empty() {
2600         println!("   Depends on: {}", group.dependencies.join(", "));
2601      }
2602   }
2603}
2604
2605#[tracing::instrument(target = "lgit", name = "compose.generate_group_analysis", skip_all, fields(group_id = %group.group_id, diff_bytes = diff.len(), stat_bytes = stat.len()))]
2606async fn generate_compose_group_analysis(
2607   stat: &str,
2608   diff: &str,
2609   group: &ComposeExecutableGroup,
2610   config: &CommitConfig,
2611   args: &Args,
2612   debug_prefix: &str,
2613   counter: &TokenCounter,
2614) -> Result<ConventionalAnalysis> {
2615   match compose_analysis_strategy(diff, config, counter) {
2616      ComposeAnalysisStrategy::MapReduce => {
2617         println!(
2618            "  {}",
2619            style::info(&format!(
2620               "Using map-reduce for {} commit analysis (diff exceeds token budget)",
2621               group.group_id
2622            ))
2623         );
2624         run_map_reduce(diff, stat, "", &config.analysis_model, config, counter).await
2625      },
2626      strategy => {
2627         let analysis_diff = if strategy == ComposeAnalysisStrategy::SmartTruncate {
2628            eprintln!(
2629               "  {}",
2630               style::warning(&format!(
2631                  "Truncating diff for {} commit analysis (diff exceeds configured budget)",
2632                  group.group_id
2633               ))
2634            );
2635            Cow::Owned(smart_truncate_diff(
2636               diff,
2637               compose_truncation_length(config),
2638               config,
2639               counter,
2640            ))
2641         } else {
2642            Cow::Borrowed(diff)
2643         };
2644
2645         let ctx = AnalysisContext {
2646            user_context:    Some(&group.rationale),
2647            recent_commits:  None,
2648            common_scopes:   None,
2649            project_context: None,
2650            debug_output:    args.debug_output.as_deref(),
2651            debug_prefix:    Some(debug_prefix),
2652         };
2653
2654         generate_conventional_analysis(
2655            stat,
2656            analysis_diff.as_ref(),
2657            &config.analysis_model,
2658            "",
2659            &ctx,
2660            config,
2661         )
2662         .await
2663      },
2664   }
2665}
2666
2667fn compose_group_file_list(snapshot: &ComposeSnapshot, group: &ComposeExecutableGroup) -> String {
2668   let files: Vec<&str> = group
2669      .file_ids
2670      .iter()
2671      .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.as_str()))
2672      .collect();
2673
2674   if files.is_empty() {
2675      "no files resolved".to_string()
2676   } else {
2677      files.join(", ")
2678   }
2679}
2680
2681/// Hunk ids for `file_id` planned by every group up to and including the group
2682/// at `position` in the dependency order. Used to reconstruct a file's intended
2683/// index content at a given commit from its base, independent of apply order.
2684fn cumulative_file_hunk_ids(
2685   plan: &ComposeExecutablePlan,
2686   position: usize,
2687   snapshot: &ComposeSnapshot,
2688   file_id: &str,
2689) -> Vec<String> {
2690   let mut hunk_ids = Vec::new();
2691   for &group_idx in plan.dependency_order.iter().take(position + 1) {
2692      let Some(group) = plan.groups.get(group_idx) else {
2693         continue;
2694      };
2695      for hunk_id in &group.hunk_ids {
2696         if snapshot
2697            .hunk_by_id(hunk_id)
2698            .is_some_and(|hunk| hunk.file_id == file_id)
2699         {
2700            hunk_ids.push(hunk_id.clone());
2701         }
2702      }
2703   }
2704   hunk_ids
2705}
2706
2707#[tracing::instrument(target = "lgit", name = "compose.execute", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2708pub async fn execute_compose(
2709   snapshot: &ComposeSnapshot,
2710   plan: &ComposeExecutablePlan,
2711   config: &CommitConfig,
2712   args: &Args,
2713   base_state: &ComposeBaseState,
2714) -> Result<Vec<String>> {
2715   let total = plan.dependency_order.len();
2716
2717   // Phase 1: derive each group's diff/stat from the immutable compose snapshot.
2718   // This avoids mutating the index while commit messages are prepared and keeps
2719   // later worktree edits out of already-planned commits.
2720   let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2721   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2722      let group = &plan.groups[group_idx];
2723      println!(
2724         "  {}",
2725         style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total))
2726      );
2727      let group_patch = create_executable_group_patch(snapshot, group)?;
2728      group_diff_stats.push((group_patch.diff, group_patch.stat));
2729   }
2730
2731   // Phase 2: generate commit messages concurrently. Both LLM calls per group
2732   // (analysis + summary) run inside a single async task so the slower of the
2733   // two does not block other groups from progressing.
2734   println!(
2735      "{}",
2736      style::info(&format!(
2737         "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2738         COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2739      ))
2740   );
2741
2742   let token_counter = create_token_counter(config);
2743   let prepared_messages: Vec<(Vec<String>, CommitSummary)> =
2744      stream::iter(plan.dependency_order.iter().enumerate())
2745         .map(|(idx, &group_idx)| {
2746            let group = &plan.groups[group_idx];
2747            let (diff, stat) = &group_diff_stats[idx];
2748            let debug_prefix = format!("compose-{}", idx + 1);
2749            let token_counter = &token_counter;
2750            async move {
2751               let result = async {
2752                  let analysis = generate_compose_group_analysis(
2753                     stat,
2754                     diff,
2755                     group,
2756                     config,
2757                     args,
2758                     &debug_prefix,
2759                     token_counter,
2760                  )
2761                  .await?;
2762                  let body = analysis.body_texts();
2763                  let summary = generate_summary_from_analysis(
2764                     stat,
2765                     group.commit_type.as_str(),
2766                     group.scope.as_ref().map(|scope| scope.as_str()),
2767                     &body,
2768                     Some(&group.rationale),
2769                     config,
2770                     args.debug_output.as_deref(),
2771                     Some(&debug_prefix),
2772                  )
2773                  .await?;
2774                  Ok::<_, CommitGenError>((body, summary))
2775               }
2776               .await;
2777
2778               result.map_err(|source| CommitGenError::ComposeMessageError {
2779                  group_id: group.group_id.clone(),
2780                  files:    compose_group_file_list(snapshot, group),
2781                  source:   Box::new(source),
2782               })
2783            }
2784         })
2785         .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2786         .collect::<Vec<_>>()
2787         .await
2788         .into_iter()
2789         .collect::<Result<Vec<_>>>()?;
2790
2791   execute_compose_with_prepared_messages(
2792      snapshot,
2793      plan,
2794      config,
2795      args,
2796      base_state,
2797      prepared_messages,
2798   )
2799}
2800
2801#[tracing::instrument(target = "lgit", name = "compose.execute_prepared_messages", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2802fn execute_compose_with_prepared_messages(
2803   snapshot: &ComposeSnapshot,
2804   plan: &ComposeExecutablePlan,
2805   config: &CommitConfig,
2806   args: &Args,
2807   base_state: &ComposeBaseState,
2808   prepared_messages: Vec<(Vec<String>, CommitSummary)>,
2809) -> Result<Vec<String>> {
2810   let dir = &args.dir;
2811   let total = plan.dependency_order.len();
2812   if args.compose_preview {
2813      return Ok(Vec::new());
2814   }
2815
2816   let index = TempGitIndex::new(dir)?;
2817   read_tree_into_index(index.path(), &base_state.head_hash, dir)?;
2818
2819   let mut commit_hashes = Vec::new();
2820   let mut parent_hash = base_state.head_hash.clone();
2821
2822   // Phase 3: sequential commit-object loop. Re-stage each group into an
2823   // isolated temporary index, then create commit objects parented in memory.
2824   // The real branch and index are not updated until every group succeeds.
2825   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2826      let group = &plan.groups[group_idx];
2827
2828      println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2829      println!("  Type: {}", style::commit_type(group.commit_type.as_str()));
2830      if let Some(scope) = &group.scope {
2831         println!("  Scope: {}", style::scope(scope.as_str()));
2832      }
2833      let paths: Vec<String> = group
2834         .file_ids
2835         .iter()
2836         .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2837         .collect();
2838      println!("  Files: {}", paths.join(", "));
2839
2840      let outcome = stage_executable_group_in_index(snapshot, group, dir, index.path())?;
2841      let mut staged_anything = outcome.result == StageResult::Staged;
2842
2843      // Any file whose planned patch no longer applies against the temporary
2844      // index is reconstructed from the immutable snapshot base and cumulative
2845      // hunk selection. The real index and worktree are never touched here.
2846      for skipped in &outcome.skipped {
2847         let Some(file) = snapshot.file_by_path(&skipped.path) else {
2848            continue;
2849         };
2850         let cumulative = cumulative_file_hunk_ids(plan, idx, snapshot, &file.file_id);
2851         force_stage_file_from_base_in_index(
2852            snapshot,
2853            &file.file_id,
2854            &cumulative,
2855            dir,
2856            index.path(),
2857         )?;
2858         staged_anything = true;
2859         eprintln!(
2860            "  {}",
2861            style::info(&format!(
2862               "Re-staged {} from base via splice (whole-file apply not used for partial hunks)",
2863               skipped.path
2864            ))
2865         );
2866      }
2867
2868      if !staged_anything {
2869         eprintln!(
2870            "  {}",
2871            style::warning(&format!(
2872               "Skipping commit {}: its planned patch is already applied ({:?})",
2873               group.group_id, outcome.result
2874            ))
2875         );
2876         continue;
2877      }
2878
2879      let (analysis_body, summary) = prepared_messages[idx].clone();
2880      let mut commit = ConventionalCommit {
2881         commit_type: group.commit_type.clone(),
2882         scope: group.scope.clone(),
2883         summary,
2884         body: analysis_body,
2885         footers: vec![],
2886      };
2887      post_process_commit_message(&mut commit, config);
2888
2889      if let Err(err) = validate_commit_message(&commit, config) {
2890         eprintln!(
2891            "  {}",
2892            style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2893         );
2894      }
2895
2896      let mut formatted_message = format_commit_message(&commit);
2897      if args.signoff || config.signoff {
2898         formatted_message = append_signoff_trailer(&formatted_message, dir)?;
2899      }
2900      println!(
2901         "  Message:\n{}",
2902         formatted_message
2903            .lines()
2904            .take(3)
2905            .collect::<Vec<_>>()
2906            .join("\n")
2907      );
2908
2909      let tree = write_index_tree(index.path(), dir)?;
2910      let sign = args.sign || config.gpg_sign;
2911      let hash = commit_tree(&tree, &[parent_hash.as_str()], &formatted_message, dir, sign)?;
2912      parent_hash.clone_from(&hash);
2913      commit_hashes.push(hash);
2914
2915      if args.compose_test_after_each {
2916         return Err(CommitGenError::Other(
2917            "--compose-test-after-each is incompatible with isolated compose execution".to_string(),
2918         ));
2919      }
2920   }
2921
2922   if commit_hashes.is_empty() {
2923      return Ok(commit_hashes);
2924   }
2925
2926   update_ref_checked(&base_state.head_ref, &parent_hash, &base_state.head_hash, dir)?;
2927
2928   let current_index_tree = write_real_index_tree(dir)?;
2929   if current_index_tree == base_state.index_tree {
2930      reset_mixed_to(&parent_hash, dir)?;
2931   } else {
2932      // Someone staged while compose ran. The commits contain only pinned
2933      // snapshot content, so just refresh the index entries for the paths
2934      // compose committed and leave the drifted staging intact.
2935      println!(
2936         "{}",
2937         style::warning("Index changed during compose; preserving newly staged changes")
2938      );
2939      let paths: Vec<String> = snapshot
2940         .files
2941         .iter()
2942         .map(|file| file.path.clone())
2943         .collect();
2944      reset_paths_to(&parent_hash, &paths, dir)?;
2945   }
2946
2947   Ok(commit_hashes)
2948}
2949
2950#[tracing::instrument(target = "lgit", name = "compose.run", skip_all, fields(dir = %args.dir, max_rounds = config.compose_max_rounds))]
2951pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2952   let max_rounds = config.compose_max_rounds;
2953
2954   for round in 1..=max_rounds {
2955      if round > 1 {
2956         println!(
2957            "\n{}",
2958            style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2959         );
2960      } else {
2961         println!("{}", style::section_header("Compose Mode", 80));
2962      }
2963      println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2964
2965      run_compose_round(args, config, round).await?;
2966
2967      if args.compose_preview {
2968         break;
2969      }
2970      match get_compose_diff_with_config(&args.dir, config) {
2971         Err(CommitGenError::NoChanges { .. }) => {
2972            println!(
2973               "\n{}",
2974               style::success(&format!(
2975                  "{} All changes committed successfully",
2976                  style::icons::SUCCESS
2977               ))
2978            );
2979            break;
2980         },
2981         Err(err) => return Err(err),
2982         Ok(remaining_diff) => {
2983            eprintln!(
2984               "\n{}",
2985               style::warning(&format!(
2986                  "{} Uncommitted changes remain after round {round}",
2987                  style::icons::WARNING
2988               ))
2989            );
2990            eprintln!("{remaining_diff}");
2991         },
2992      }
2993
2994      if round < max_rounds {
2995         eprintln!("{}", style::info("Starting another compose round..."));
2996      } else {
2997         eprintln!(
2998            "{}",
2999            style::warning(&format!(
3000               "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
3001            ))
3002         );
3003      }
3004   }
3005
3006   Ok(())
3007}
3008
3009#[tracing::instrument(target = "lgit", name = "compose.round", skip_all, fields(dir = %args.dir, round))]
3010async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
3011   let base_state = capture_compose_base_state(&args.dir)?;
3012   let diff = get_compose_diff_with_config(&args.dir, config)?;
3013   let stat = get_compose_stat(&args.dir)?;
3014   let mut snapshot = build_compose_snapshot(&diff, &stat)?;
3015   // Freeze every file's on-disk content into the odb before any LLM call:
3016   // staging later reads these pins, never the live worktree, so edits made
3017   // while compose runs cannot leak into its commits.
3018   pin_snapshot_worktree_state(&mut snapshot, &args.dir)?;
3019   let snapshot = snapshot;
3020
3021   if let Some(debug_dir) = args.debug_output.as_deref() {
3022      save_debug_artifact(
3023         Some(debug_dir),
3024         &format!("compose_round_{round}_snapshot.json"),
3025         &snapshot,
3026      )?;
3027   }
3028
3029   let token_counter = create_token_counter(config);
3030   let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
3031      println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
3032      observe_diff_files(&snapshot.diff, &config.summary_model, config, &token_counter).await?
3033   } else {
3034      if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
3035         && should_use_map_reduce(&snapshot.diff, config, &token_counter)
3036      {
3037         println!(
3038            "{}",
3039            style::info(
3040               "Skipping per-file observations for very large compose snapshot; using area-level \
3041                planning instead."
3042            )
3043         );
3044      }
3045      Vec::new()
3046   };
3047
3048   if let Some(debug_dir) = args.debug_output.as_deref()
3049      && !observations.is_empty()
3050   {
3051      save_debug_artifact(
3052         Some(debug_dir),
3053         &format!("compose_round_{round}_observations.json"),
3054         &observations,
3055      )?;
3056   }
3057
3058   let max_commits = args.compose_max_commits.unwrap_or(20);
3059   let executable_plan = if let Some(cached_plan) =
3060      load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
3061   {
3062      println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
3063      cached_plan
3064   } else {
3065      println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
3066      let intent_plan = analyze_compose_intent(
3067         &snapshot,
3068         &observations,
3069         config,
3070         max_commits,
3071         args.debug_output.as_deref(),
3072      )
3073      .await?;
3074
3075      if let Some(debug_dir) = args.debug_output.as_deref() {
3076         save_debug_artifact(
3077            Some(debug_dir),
3078            &format!("compose_round_{round}_intent_plan.json"),
3079            &intent_plan,
3080         )?;
3081      }
3082
3083      println!("{}", style::info("Binding hunks to groups..."));
3084      let plan =
3085         bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
3086      save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
3087      plan
3088   };
3089
3090   if let Some(debug_dir) = args.debug_output.as_deref() {
3091      save_debug_artifact(
3092         Some(debug_dir),
3093         &format!("compose_round_{round}_executable_plan.json"),
3094         &executable_plan,
3095      )?;
3096   }
3097
3098   print_executable_plan(&snapshot, &executable_plan);
3099
3100   if args.compose_preview {
3101      println!(
3102         "\n{}",
3103         style::success(&format!(
3104            "{} Preview complete (use --compose without --compose-preview to execute)",
3105            style::icons::SUCCESS
3106         ))
3107      );
3108      return Ok(());
3109   }
3110
3111   println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
3112   let hashes = execute_compose(&snapshot, &executable_plan, config, args, &base_state).await?;
3113   println!(
3114      "{}",
3115      style::success(&format!(
3116         "{} Round {round}: Created {} commit(s)",
3117         style::icons::SUCCESS,
3118         hashes.len()
3119      ))
3120   );
3121   Ok(())
3122}
3123
3124#[cfg(test)]
3125mod tests {
3126   use std::{fmt::Write, fs};
3127
3128   use tempfile::TempDir;
3129
3130   use super::*;
3131   use crate::{
3132      config::CommitConfig, git::get_compose_diff, patch::build_compose_snapshot, types::CommitType,
3133   };
3134
3135   fn shared_file_diff() -> (&'static str, &'static str) {
3136      (
3137         r#"diff --git a/src/lib.rs b/src/lib.rs
3138index 1111111..2222222 100644
3139--- a/src/lib.rs
3140+++ b/src/lib.rs
3141@@ -1,3 +1,3 @@
3142-fn alpha() {
3143+fn alpha_changed() {
3144     println!("alpha");
3145 }
3146@@ -12,3 +12,3 @@
3147-fn beta() {
3148+fn beta_changed() {
3149     println!("beta");
3150 }
3151diff --git a/tests/lib.rs b/tests/lib.rs
3152index 3333333..4444444 100644
3153--- a/tests/lib.rs
3154+++ b/tests/lib.rs
3155@@ -1,3 +1,4 @@
3156 fn test_it() {
3157+    assert!(true);
3158 }
3159"#,
3160         " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
3161      )
3162   }
3163
3164   fn build_test_snapshot() -> ComposeSnapshot {
3165      let (diff, stat) = shared_file_diff();
3166      build_compose_snapshot(diff, stat).unwrap()
3167   }
3168
3169   fn write_file(dir: &TempDir, path: &str, contents: &str) {
3170      let full_path = dir.path().join(path);
3171      if let Some(parent) = full_path.parent() {
3172         fs::create_dir_all(parent).unwrap();
3173      }
3174      fs::write(full_path, contents).unwrap();
3175   }
3176
3177   fn run_git(dir: &TempDir, args: &[&str]) -> String {
3178      let output = crate::git::git_command()
3179         .args(args)
3180         .current_dir(dir.path())
3181         .output()
3182         .unwrap_or_else(|err| panic!("git {args:?} failed to spawn: {err}"));
3183
3184      assert!(
3185         output.status.success(),
3186         "git {:?} failed: stdout={} stderr={}",
3187         args,
3188         String::from_utf8_lossy(&output.stdout),
3189         String::from_utf8_lossy(&output.stderr)
3190      );
3191
3192      String::from_utf8_lossy(&output.stdout).to_string()
3193   }
3194
3195   fn init_repo() -> TempDir {
3196      let dir = TempDir::new().unwrap();
3197      run_git(&dir, &["init"]);
3198      run_git(&dir, &["config", "user.name", "Compose Test"]);
3199      run_git(&dir, &["config", "user.email", "compose@test.local"]);
3200      run_git(&dir, &["config", "commit.gpgsign", "false"]);
3201      dir
3202   }
3203
3204   fn commit_all(dir: &TempDir, message: &str) {
3205      run_git(dir, &["add", "."]);
3206      run_git(dir, &["commit", "-m", message]);
3207   }
3208
3209   fn canned_message(summary: &str) -> (Vec<String>, CommitSummary) {
3210      (vec![], CommitSummary::new_unchecked(summary, 128).unwrap())
3211   }
3212
3213   #[test]
3214   fn test_compose_file_category_treats_prompts_as_functional_source() {
3215      let diff = r"diff --git a/prompts/analysis/default.md b/prompts/analysis/default.md
3216index 1111111..2222222 100644
3217--- a/prompts/analysis/default.md
3218+++ b/prompts/analysis/default.md
3219@@ -1,1 +1,1 @@
3220-old prompt
3221+new prompt
3222diff --git a/system/analysis/default.md b/system/analysis/default.md
3223index 5555555..6666666 100644
3224--- a/system/analysis/default.md
3225+++ b/system/analysis/default.md
3226@@ -1,1 +1,1 @@
3227-old system
3228+new system
3229diff --git a/README.md b/README.md
3230index 3333333..4444444 100644
3231--- a/README.md
3232+++ b/README.md
3233@@ -1,1 +1,1 @@
3234-old docs
3235+new docs
3236";
3237      let snapshot = build_compose_snapshot(diff, "").unwrap();
3238      let prompt_file = snapshot
3239         .file_by_path("prompts/analysis/default.md")
3240         .unwrap();
3241      let system_file = snapshot.file_by_path("system/analysis/default.md").unwrap();
3242      let readme_file = snapshot.file_by_path("README.md").unwrap();
3243
3244      assert_eq!(compose_file_category(prompt_file), ComposeFileCategory::Prompt);
3245      assert_eq!(compose_file_category(system_file), ComposeFileCategory::Prompt);
3246      assert_eq!(compose_file_category(readme_file), ComposeFileCategory::Docs);
3247
3248      let feat_group = ComposeIntentGroup {
3249         group_id:     "G1".to_string(),
3250         commit_type:  CommitType::new("feat").unwrap(),
3251         scope:        None,
3252         file_ids:     vec![prompt_file.file_id.clone()],
3253         rationale:    "prompt behavior change".to_string(),
3254         dependencies: vec![],
3255      };
3256      assert_eq!(group_type_bonus(prompt_file, &feat_group), 10);
3257
3258      let fallback_type =
3259         fallback_commit_type_for_group(&snapshot, &[], std::slice::from_ref(&prompt_file.file_id))
3260            .unwrap();
3261      assert_eq!(fallback_type.as_str(), "refactor");
3262   }
3263
3264   fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
3265      let mut diff = String::new();
3266
3267      for file_idx in 0..file_count {
3268         let path = format!("src/module_{file_idx:03}.rs");
3269         writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3270         diff.push_str("index 1111111..2222222 100644\n");
3271         writeln!(diff, "--- a/{path}").unwrap();
3272         writeln!(diff, "+++ b/{path}").unwrap();
3273
3274         for hunk_idx in 0..hunks_per_file {
3275            let line_no = (hunk_idx * 4) + 1;
3276            writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
3277            writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
3278            writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
3279         }
3280      }
3281
3282      build_compose_snapshot(&diff, "").unwrap()
3283   }
3284
3285   fn build_multi_area_snapshot() -> ComposeSnapshot {
3286      let mut diff = String::new();
3287      let areas = [
3288         ("apps/frontend/src/server", 72),
3289         ("packages/model/src/models", 54),
3290         ("apps/daemon/src/worker", 43),
3291         (".github/workflows", 16),
3292      ];
3293
3294      for (prefix, count) in areas {
3295         for file_idx in 0..count {
3296            let path = format!("{prefix}/file_{file_idx:03}.rs");
3297            writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3298            diff.push_str("index 1111111..2222222 100644\n");
3299            writeln!(diff, "--- a/{path}").unwrap();
3300            writeln!(diff, "+++ b/{path}").unwrap();
3301            diff.push_str("@@ -1,1 +1,1 @@\n");
3302            writeln!(diff, "-old_{file_idx}").unwrap();
3303            writeln!(diff, "+new_{file_idx}").unwrap();
3304         }
3305      }
3306
3307      build_compose_snapshot(&diff, "").unwrap()
3308   }
3309
3310   fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
3311      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3312      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3313      let groups = vec![
3314         ComposeIntentGroup {
3315            group_id:     "G1".to_string(),
3316            commit_type:  CommitType::new("refactor").unwrap(),
3317            scope:        None,
3318            file_ids:     vec![source_file.file_id.clone(), test_file.file_id.clone()],
3319            rationale:    "implementation group".to_string(),
3320            dependencies: vec![],
3321         },
3322         ComposeIntentGroup {
3323            group_id:     "G2".to_string(),
3324            commit_type:  CommitType::new("refactor").unwrap(),
3325            scope:        None,
3326            file_ids:     vec![source_file.file_id.clone()],
3327            rationale:    "shared file follow-up".to_string(),
3328            dependencies: vec!["G1".to_string()],
3329         },
3330      ];
3331      let dependency_order =
3332         compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
3333            .unwrap();
3334      ComposeIntentPlan { groups, dependency_order }
3335   }
3336
3337   #[test]
3338   fn test_execute_compose_with_temp_index_applies_two_group_plan() {
3339      let dir = init_repo();
3340      write_file(&dir, "src/a.rs", "fn a() {}\n");
3341      write_file(&dir, "src/b.rs", "fn b() {}\n");
3342      commit_all(&dir, "initial");
3343      write_file(&dir, "src/a.rs", "fn a_changed() {}\n");
3344      write_file(&dir, "src/b.rs", "fn b_changed() {}\n");
3345
3346      let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3347      let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3348      let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3349      let a_file = snapshot.file_by_path("src/a.rs").unwrap();
3350      let b_file = snapshot.file_by_path("src/b.rs").unwrap();
3351      let plan = ComposeExecutablePlan {
3352         groups:           vec![
3353            ComposeExecutableGroup {
3354               group_id:     "G1".to_string(),
3355               commit_type:  CommitType::new("refactor").unwrap(),
3356               scope:        None,
3357               file_ids:     vec![a_file.file_id.clone()],
3358               rationale:    "change a".to_string(),
3359               dependencies: vec![],
3360               hunk_ids:     a_file.hunk_ids.clone(),
3361            },
3362            ComposeExecutableGroup {
3363               group_id:     "G2".to_string(),
3364               commit_type:  CommitType::new("refactor").unwrap(),
3365               scope:        None,
3366               file_ids:     vec![b_file.file_id.clone()],
3367               rationale:    "change b".to_string(),
3368               dependencies: vec!["G1".to_string()],
3369               hunk_ids:     b_file.hunk_ids.clone(),
3370            },
3371         ],
3372         dependency_order: vec![0, 1],
3373      };
3374      let config = CommitConfig::default();
3375      let args = Args {
3376         dir: dir.path().to_string_lossy().to_string(),
3377         compose: true,
3378         ..Default::default()
3379      };
3380      let base_state = capture_compose_base_state(&args.dir).unwrap();
3381
3382      let hashes = execute_compose_with_prepared_messages(
3383         &snapshot,
3384         &plan,
3385         &config,
3386         &args,
3387         &base_state,
3388         vec![canned_message("change a"), canned_message("change b")],
3389      )
3390      .unwrap();
3391
3392      assert_eq!(hashes.len(), 2);
3393      assert_eq!(get_head_hash(&args.dir).unwrap(), hashes[1]);
3394      assert!(run_git(&dir, &["diff", "--cached"]).trim().is_empty());
3395   }
3396
3397   #[test]
3398   fn test_execute_compose_failure_before_update_ref_preserves_real_index() {
3399      let dir = init_repo();
3400      write_file(&dir, "src/lib.rs", "old\n");
3401      write_file(&dir, "sentinel.txt", "base\n");
3402      commit_all(&dir, "initial");
3403      let initial_head = get_head_hash(dir.path().to_str().unwrap()).unwrap();
3404
3405      // A real change so the snapshot is valid.
3406      write_file(&dir, "src/lib.rs", "changed\n");
3407
3408      // A pre-existing staged change that MUST survive a failed compose run.
3409      write_file(&dir, "sentinel.txt", "base\nstaged sentinel\n");
3410      run_git(&dir, &["add", "sentinel.txt"]);
3411      let staged_before = run_git(&dir, &["diff", "--cached"]);
3412      assert!(staged_before.contains("staged sentinel"));
3413
3414      let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3415      let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3416      let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3417      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3418      // The plan references a hunk id that does not exist, so staging fails
3419      // before any commit object is created or any ref is updated.
3420      let plan = ComposeExecutablePlan {
3421         groups:           vec![ComposeExecutableGroup {
3422            group_id:     "G1".to_string(),
3423            commit_type:  CommitType::new("fix").unwrap(),
3424            scope:        None,
3425            file_ids:     vec![source_file.file_id.clone()],
3426            rationale:    "unstageable group".to_string(),
3427            dependencies: vec![],
3428            hunk_ids:     vec!["F999-H001".to_string()],
3429         }],
3430         dependency_order: vec![0],
3431      };
3432      let config = CommitConfig::default();
3433      let args = Args {
3434         dir: dir.path().to_string_lossy().to_string(),
3435         compose: true,
3436         ..Default::default()
3437      };
3438      let base_state = capture_compose_base_state(&args.dir).unwrap();
3439
3440      let err = execute_compose_with_prepared_messages(
3441         &snapshot,
3442         &plan,
3443         &config,
3444         &args,
3445         &base_state,
3446         vec![canned_message("unstageable group")],
3447      )
3448      .unwrap_err();
3449
3450      assert!(err.to_string().contains("unknown hunk id"));
3451      assert_eq!(get_head_hash(&args.dir).unwrap(), initial_head);
3452      assert_eq!(run_git(&dir, &["diff", "--cached"]), staged_before);
3453   }
3454
3455   #[test]
3456   fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
3457      let snapshot = build_test_snapshot();
3458      let intent_plan = build_shared_intent_plan(&snapshot);
3459      let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3460
3461      assert_eq!(ambiguous.len(), 1);
3462      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3463      let assigned_to_g1 = assigned.get("G1").unwrap();
3464      assert!(
3465         test_file
3466            .hunk_ids
3467            .iter()
3468            .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
3469         "uniquely owned file should be auto-assigned"
3470      );
3471   }
3472
3473   #[test]
3474   fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3475      let snapshot = build_test_snapshot();
3476      let intent_plan = build_shared_intent_plan(&snapshot);
3477      let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3478      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3479      let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3480      let valid_group_ids: HashSet<&str> = intent_plan
3481         .groups
3482         .iter()
3483         .map(|group| group.group_id.as_str())
3484         .collect();
3485
3486      let evaluation = evaluate_binding(
3487         &[
3488            ComposeBindingAssignment {
3489               group_id: "G1".to_string(),
3490               hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3491            },
3492            ComposeBindingAssignment {
3493               group_id: "G2".to_string(),
3494               hunk_ids: vec![source_file.hunk_ids[1].clone()],
3495            },
3496         ],
3497         &hunk_context,
3498         &valid_group_ids,
3499         &snapshot,
3500      );
3501
3502      for (group_id, hunk_ids) in evaluation.assigned {
3503         let entry = assigned.entry(group_id).or_default();
3504         for hunk_id in hunk_ids {
3505            entry.insert(hunk_id);
3506         }
3507      }
3508
3509      let group_rank: HashMap<&str, usize> = intent_plan
3510         .dependency_order
3511         .iter()
3512         .enumerate()
3513         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3514         .collect();
3515      assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3516
3517      let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3518      assert_eq!(executable_plan.groups.len(), 1);
3519      assert_eq!(executable_plan.groups[0].group_id, "G1");
3520      assert!(
3521         source_file
3522            .hunk_ids
3523            .iter()
3524            .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3525         "fallback should keep every hunk from the shared file in the surviving group"
3526      );
3527   }
3528
3529   #[test]
3530   fn test_validate_executable_plan_rejects_overlap() {
3531      let snapshot = build_test_snapshot();
3532      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3533      let executable_plan = ComposeExecutablePlan {
3534         groups:           vec![
3535            ComposeExecutableGroup {
3536               group_id:     "G1".to_string(),
3537               commit_type:  CommitType::new("refactor").unwrap(),
3538               scope:        None,
3539               file_ids:     vec![source_file.file_id.clone()],
3540               rationale:    "group one".to_string(),
3541               dependencies: vec![],
3542               hunk_ids:     vec![source_file.hunk_ids[0].clone()],
3543            },
3544            ComposeExecutableGroup {
3545               group_id:     "G2".to_string(),
3546               commit_type:  CommitType::new("refactor").unwrap(),
3547               scope:        None,
3548               file_ids:     vec![source_file.file_id.clone()],
3549               rationale:    "group two".to_string(),
3550               dependencies: vec![],
3551               hunk_ids:     vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3552            },
3553         ],
3554         dependency_order: vec![0, 1],
3555      };
3556
3557      let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3558      assert!(err.to_string().contains("assigned to both"));
3559   }
3560
3561   #[test]
3562   fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3563      let snapshot = build_test_snapshot();
3564      let planning_index = build_planning_index(&snapshot);
3565      let groups = vec![ComposeIntentGroup {
3566         group_id:     "G1".to_string(),
3567         commit_type:  CommitType::new("refactor").unwrap(),
3568         scope:        None,
3569         file_ids:     vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3570         rationale:    "normalize file references".to_string(),
3571         dependencies: vec![],
3572      }];
3573
3574      let (normalized_groups, repair_notes) =
3575         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3576
3577      assert_eq!(normalized_groups.len(), 1);
3578      assert_eq!(
3579         normalized_groups[0].file_ids,
3580         snapshot
3581            .files
3582            .iter()
3583            .map(|file| file.file_id.clone())
3584            .collect::<Vec<_>>()
3585      );
3586      assert_eq!(repair_notes.len(), 2);
3587   }
3588
3589   #[test]
3590   fn test_normalize_intent_plan_repairs_missing_files() {
3591      let snapshot = build_test_snapshot();
3592      let planning_index = build_planning_index(&snapshot);
3593      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3594      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3595      let groups = vec![ComposeIntentGroup {
3596         group_id:     "G1".to_string(),
3597         commit_type:  CommitType::new("refactor").unwrap(),
3598         scope:        None,
3599         file_ids:     vec![source_file.file_id.clone()],
3600         rationale:    "partial coverage".to_string(),
3601         dependencies: vec![],
3602      }];
3603
3604      let (normalized_groups, repair_notes) =
3605         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3606
3607      assert_eq!(normalized_groups.len(), 1);
3608      assert!(
3609         normalized_groups[0].file_ids.contains(&source_file.file_id),
3610         "existing file assignment should be preserved"
3611      );
3612      assert!(
3613         normalized_groups[0].file_ids.contains(&test_file.file_id),
3614         "missing files should be assigned to an existing group"
3615      );
3616      assert_eq!(repair_notes.len(), 1);
3617      assert!(repair_notes[0].contains(&test_file.file_id));
3618   }
3619
3620   #[test]
3621   fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3622      let snapshot = build_multi_area_snapshot();
3623      let planning_index = build_planning_index(&snapshot);
3624      let frontend_target = planning_index
3625         .targets
3626         .iter()
3627         .find(|target| target.label.starts_with("apps/frontend"))
3628         .unwrap();
3629      let model_target = planning_index
3630         .targets
3631         .iter()
3632         .find(|target| target.label.starts_with("packages/model"))
3633         .unwrap();
3634      let groups = vec![
3635         ComposeIntentGroup {
3636            group_id:     "G1".to_string(),
3637            commit_type:  CommitType::new("refactor").unwrap(),
3638            scope:        Scope::new("apps/frontend").ok(),
3639            file_ids:     vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3640            rationale:    "frontend platform updates".to_string(),
3641            dependencies: vec!["group 2".to_string(), "G1".to_string()],
3642         },
3643         ComposeIntentGroup {
3644            group_id:     "G2".to_string(),
3645            commit_type:  CommitType::new("refactor").unwrap(),
3646            scope:        Scope::new("packages/model").ok(),
3647            file_ids:     vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3648            rationale:    "model storage updates".to_string(),
3649            dependencies: vec!["F5".to_string()],
3650         },
3651      ];
3652
3653      let (normalized_groups, repair_notes) =
3654         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3655
3656      assert_eq!(normalized_groups.len(), 2);
3657      assert!(
3658         normalized_groups[0]
3659            .file_ids
3660            .iter()
3661            .all(|file_id| file_id.starts_with('F'))
3662      );
3663      assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3664      assert!(normalized_groups[1].dependencies.is_empty());
3665      assert!(
3666         repair_notes
3667            .iter()
3668            .any(|note| note.contains("Dropped unknown planning target"))
3669      );
3670      assert!(
3671         repair_notes
3672            .iter()
3673            .any(|note| note.contains("Dropped self-dependency"))
3674      );
3675      assert!(
3676         repair_notes
3677            .iter()
3678            .any(|note| note.contains("Mapped compose planner dependency"))
3679      );
3680      assert!(
3681         repair_notes
3682            .iter()
3683            .any(|note| note.contains("Dropped unknown dependency"))
3684      );
3685   }
3686
3687   #[test]
3688   fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3689      let snapshot = build_test_snapshot();
3690      let summary = render_snapshot_summary(&snapshot, &[]);
3691      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3692
3693      assert!(!summary.contains("# snapshot compacted"));
3694      for hunk_id in &source_file.hunk_ids {
3695         assert!(summary.contains(hunk_id));
3696      }
3697   }
3698
3699   #[test]
3700   fn test_render_snapshot_summary_compacts_large_snapshot() {
3701      let snapshot = build_large_snapshot(160, 4);
3702      let summary = render_snapshot_summary(&snapshot, &[]);
3703
3704      assert!(summary.contains("# snapshot compacted"));
3705      assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3706      assert!(summary.contains("F001-H001"));
3707      assert!(summary.contains("F001-H004"));
3708      assert!(!summary.contains("F001-H002"));
3709      assert!(!summary.contains("F001-H003"));
3710      assert!(summary.contains("... 2 more hunks omitted from F001"));
3711   }
3712
3713   #[test]
3714   fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3715      let snapshot = build_multi_area_snapshot();
3716      let planning_index = build_planning_index(&snapshot);
3717
3718      assert_eq!(planning_index.mode, PlanningMode::Area);
3719      assert!(planning_index.targets.len() < snapshot.files.len());
3720      assert!(
3721         planning_index
3722            .targets
3723            .iter()
3724            .any(|target| target.label.starts_with("apps/frontend"))
3725      );
3726      assert!(
3727         render_planning_stat(&planning_index).contains("planning over"),
3728         "planning stat should explain the area mode"
3729      );
3730   }
3731
3732   #[test]
3733   fn test_normalize_intent_plan_expands_area_targets() {
3734      let snapshot = build_multi_area_snapshot();
3735      let planning_index = build_planning_index(&snapshot);
3736      let midpoint = planning_index.targets.len() / 2;
3737      let first_group_targets: Vec<String> = planning_index
3738         .targets
3739         .iter()
3740         .take(midpoint)
3741         .map(|target| target.label.clone())
3742         .collect();
3743      let second_group_targets: Vec<String> = planning_index
3744         .targets
3745         .iter()
3746         .skip(midpoint)
3747         .map(|target| target.label.clone())
3748         .collect();
3749      let groups = vec![
3750         ComposeIntentGroup {
3751            group_id:     "G1".to_string(),
3752            commit_type:  CommitType::new("refactor").unwrap(),
3753            scope:        None,
3754            file_ids:     first_group_targets,
3755            rationale:    "frontend and model".to_string(),
3756            dependencies: vec![],
3757         },
3758         ComposeIntentGroup {
3759            group_id:     "G2".to_string(),
3760            commit_type:  CommitType::new("refactor").unwrap(),
3761            scope:        None,
3762            file_ids:     second_group_targets,
3763            rationale:    "daemon and ci".to_string(),
3764            dependencies: vec![],
3765         },
3766      ];
3767
3768      let (normalized_groups, repair_notes) =
3769         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3770
3771      assert_eq!(normalized_groups.len(), 2);
3772      assert!(
3773         normalized_groups
3774            .iter()
3775            .flat_map(|group| group.file_ids.iter())
3776            .all(|file_id| file_id.starts_with('F')),
3777         "area targets should expand back to concrete file IDs"
3778      );
3779      assert!(!repair_notes.is_empty());
3780      assert_eq!(
3781         normalized_groups
3782            .iter()
3783            .flat_map(|group| group.file_ids.iter())
3784            .collect::<HashSet<_>>()
3785            .len(),
3786         snapshot.files.len()
3787      );
3788   }
3789
3790   #[test]
3791   fn test_large_patch_fallback_splits_monolithic_area_plan() {
3792      let snapshot = build_multi_area_snapshot();
3793      let planning_index = build_planning_index(&snapshot);
3794      let monolithic_group = ComposeIntentGroup {
3795         group_id:     "G1".to_string(),
3796         commit_type:  CommitType::new("refactor").unwrap(),
3797         scope:        None,
3798         file_ids:     snapshot
3799            .files
3800            .iter()
3801            .map(|file| file.file_id.clone())
3802            .collect(),
3803         rationale:    "repo-wide refactor".to_string(),
3804         dependencies: vec![],
3805      };
3806
3807      assert!(should_force_large_patch_fallback(
3808         &snapshot,
3809         &planning_index,
3810         &[monolithic_group],
3811         6
3812      ));
3813
3814      let fallback_groups =
3815         build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3816      assert!(fallback_groups.len() >= 3);
3817      assert_eq!(
3818         fallback_groups
3819            .iter()
3820            .flat_map(|group| group.file_ids.iter())
3821            .collect::<HashSet<_>>()
3822            .len(),
3823         snapshot.files.len()
3824      );
3825      assert!(
3826         fallback_groups
3827            .iter()
3828            .any(|group| group.rationale.contains("frontend")),
3829         "fallback should preserve workstream identity"
3830      );
3831   }
3832
3833   #[test]
3834   fn test_should_collect_compose_observations_skips_area_mode() {
3835      let snapshot = build_large_snapshot(160, 4);
3836      let config = CommitConfig { map_reduce_threshold: 1_000, ..Default::default() };
3837      let counter = create_token_counter(&config);
3838
3839      assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3840      assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3841   }
3842
3843   #[test]
3844   fn test_compose_analysis_strategy_uses_map_reduce_for_large_diff() {
3845      let config = CommitConfig { map_reduce_threshold: 20, ..Default::default() };
3846      let counter = create_token_counter(&config);
3847      let payload = "a".repeat(200);
3848      let diff = format!("diff --git a/a.rs b/a.rs\n@@ -0,0 +1 @@\n+{payload}");
3849
3850      assert_eq!(
3851         compose_analysis_strategy(&diff, &config, &counter),
3852         ComposeAnalysisStrategy::MapReduce
3853      );
3854   }
3855
3856   #[test]
3857   fn test_compose_analysis_strategy_truncates_when_map_reduce_disabled() {
3858      let config = CommitConfig {
3859         map_reduce_enabled: false,
3860         max_diff_tokens: 1,
3861         max_diff_length: 10_000,
3862         ..Default::default()
3863      };
3864      let counter = create_token_counter(&config);
3865      assert_eq!(compose_truncation_length(&config), 4);
3866
3867      assert_eq!(
3868         compose_analysis_strategy(
3869            "diff --git a/models.json b/models.json\n+large",
3870            &config,
3871            &counter
3872         ),
3873         ComposeAnalysisStrategy::SmartTruncate
3874      );
3875   }
3876
3877   #[test]
3878   fn test_compose_analysis_strategy_keeps_small_group_direct() {
3879      let config = CommitConfig {
3880         map_reduce_threshold: 1_000,
3881         max_diff_tokens: 1_000,
3882         max_diff_length: 10_000,
3883         ..Default::default()
3884      };
3885      let counter = create_token_counter(&config);
3886
3887      assert_eq!(
3888         compose_analysis_strategy("diff --git a/a.rs b/a.rs\n+a", &config, &counter),
3889         ComposeAnalysisStrategy::Direct
3890      );
3891   }
3892
3893   #[test]
3894   fn test_chunk_ambiguous_files_splits_large_binding_request() {
3895      let ambiguous_files = vec![
3896         AmbiguousFileBinding {
3897            file_id:             "F001".to_string(),
3898            path:                "src/alpha.rs".to_string(),
3899            candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3900            hunk_ids:            (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3901         },
3902         AmbiguousFileBinding {
3903            file_id:             "F002".to_string(),
3904            path:                "src/beta.rs".to_string(),
3905            candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3906            hunk_ids:            (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3907         },
3908         AmbiguousFileBinding {
3909            file_id:             "F003".to_string(),
3910            path:                "src/gamma.rs".to_string(),
3911            candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3912            hunk_ids:            (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3913         },
3914      ];
3915
3916      let batches = chunk_ambiguous_files(&ambiguous_files);
3917      let total_hunks: usize = batches
3918         .iter()
3919         .flatten()
3920         .map(|file| file.hunk_ids.len())
3921         .sum();
3922
3923      assert_eq!(batches.len(), 2);
3924      assert_eq!(batches[0].len(), 1);
3925      assert_eq!(batches[1].len(), 2);
3926      assert_eq!(total_hunks, 140);
3927      assert!(batches.iter().all(|batch| {
3928         batch.len() <= MAX_BIND_FILES_PER_REQUEST
3929            && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3930               <= MAX_BIND_HUNKS_PER_REQUEST
3931      }));
3932   }
3933}