Skip to main content

llm_git/
compose.rs

1use std::{
2   borrow::Cow,
3   collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4   fmt::Write,
5   fs,
6   path::{Path, PathBuf},
7};
8
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13   api::{
14      AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
15      generate_summary_from_analysis, run_oneshot, strict_json_schema,
16   },
17   compose_types::{
18      ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
19      ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
20   },
21   config::CommitConfig,
22   diff::smart_truncate_diff,
23   error::{CommitGenError, Result},
24   git::{
25      TempGitIndex, append_signoff_trailer, commit_tree, current_head_ref, get_compose_diff,
26      get_compose_stat, get_git_dir, get_head_hash, read_tree_into_index, reset_mixed_to,
27      update_ref_checked, write_index_tree, write_real_index_tree,
28   },
29   map_reduce::{FileObservation, observe_diff_files, run_map_reduce, should_use_map_reduce},
30   normalization::{format_commit_message, post_process_commit_message},
31   patch::{
32      StageResult, build_compose_snapshot, create_executable_group_patch,
33      force_stage_file_from_base_in_index, stage_executable_group_in_index,
34   },
35   style, templates,
36   tokens::{TokenCounter, create_token_counter},
37   types::{Args, CommitSummary, CommitType, ConventionalAnalysis, ConventionalCommit, Scope},
38   validation::validate_commit_message,
39};
40
41const MAX_OBSERVATIONS_PER_FILE: usize = 3;
42const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
43const COMPOSE_PLANNER_TEMPERATURE: f32 = 0.0;
44const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
45const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
46const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
47const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
48const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
49const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
50const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
51const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
52const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
53const MAX_BIND_FILES_PER_REQUEST: usize = 18;
54const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
55/// Maximum number of commit messages to generate concurrently during
56/// `execute_compose`. Matches the per-file fan-out used in `map_reduce`.
57const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
58
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ComposeBaseState {
61   head_hash:  String,
62   head_ref:   String,
63   index_tree: String,
64}
65
66#[tracing::instrument(target = "lgit", name = "compose.capture_base_state", skip_all, fields(dir))]
67pub fn capture_compose_base_state(dir: &str) -> Result<ComposeBaseState> {
68   Ok(ComposeBaseState {
69      head_hash:  get_head_hash(dir)?,
70      head_ref:   current_head_ref(dir)?,
71      index_tree: write_real_index_tree(dir)?,
72   })
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76enum ComposeAnalysisStrategy {
77   Direct,
78   SmartTruncate,
79   MapReduce,
80}
81
82fn compose_analysis_strategy(
83   diff: &str,
84   config: &CommitConfig,
85   counter: &TokenCounter,
86) -> ComposeAnalysisStrategy {
87   if should_use_map_reduce(diff, config, counter) {
88      return ComposeAnalysisStrategy::MapReduce;
89   }
90
91   let diff_tokens = counter.count_sync(diff);
92   if diff.len() > config.max_diff_length || diff_tokens > config.max_diff_tokens {
93      return ComposeAnalysisStrategy::SmartTruncate;
94   }
95
96   ComposeAnalysisStrategy::Direct
97}
98
99fn compose_truncation_length(config: &CommitConfig) -> usize {
100   config
101      .max_diff_length
102      .min(config.max_diff_tokens.saturating_mul(4))
103      .max(1)
104}
105
106#[derive(Debug, Deserialize, Serialize)]
107struct ComposeIntentResponse {
108   groups: Vec<ComposeIntentGroup>,
109}
110
111#[derive(Debug, Deserialize, Serialize)]
112struct ComposeBindingResponse {
113   assignments: Vec<ComposeBindingAssignment>,
114}
115
116#[derive(Debug, Serialize, Deserialize)]
117struct ComposeCachedPlan {
118   schema_version: String,
119   cache_key:      String,
120   plan:           ComposeExecutablePlan,
121}
122
123#[derive(Debug, Clone)]
124struct AmbiguousFileBinding {
125   file_id:             String,
126   path:                String,
127   candidate_group_ids: Vec<String>,
128   hunk_ids:            Vec<String>,
129}
130
131#[derive(Debug, Clone)]
132struct AmbiguousHunkContext {
133   candidate_group_ids: Vec<String>,
134}
135
136type HunkAssignments = HashMap<String, BTreeSet<String>>;
137
138#[derive(Debug)]
139struct BindingEvaluation {
140   assigned:   HashMap<String, Vec<String>>,
141   unresolved: Vec<String>,
142}
143
144#[derive(Debug, Clone, Copy)]
145struct SnapshotSummaryBudget {
146   max_observations_per_file: usize,
147   max_hunks_per_file:        Option<usize>,
148}
149
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151enum PlanningMode {
152   File,
153   Area,
154}
155
156#[derive(Debug, Clone)]
157struct PlanningTarget {
158   target_id:  String,
159   label:      String,
160   file_ids:   Vec<String>,
161   hunk_count: usize,
162   additions:  usize,
163   deletions:  usize,
164}
165
166#[derive(Debug, Clone)]
167struct PlanningIndex {
168   mode:    PlanningMode,
169   targets: Vec<PlanningTarget>,
170   aliases: HashMap<String, String>,
171}
172
173#[derive(Debug, Clone)]
174struct PlanningBucket {
175   label:    String,
176   file_ids: Vec<String>,
177}
178
179impl PlanningIndex {
180   fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
181      let mut expanded = Vec::new();
182      let mut seen_file_ids = HashSet::new();
183
184      for target_id in target_ids {
185         if let Some(target) = self
186            .targets
187            .iter()
188            .find(|candidate| candidate.target_id == *target_id)
189         {
190            for file_id in &target.file_ids {
191               if seen_file_ids.insert(file_id.clone()) {
192                  expanded.push(file_id.clone());
193               }
194            }
195         }
196      }
197
198      expanded
199   }
200}
201
202impl SnapshotSummaryBudget {
203   const fn is_compacted(self) -> bool {
204      self.max_hunks_per_file.is_some()
205   }
206}
207
208fn is_dependency_manifest(path: &str) -> bool {
209   const DEP_MANIFESTS: &[&str] = &[
210      "Cargo.toml",
211      "Cargo.lock",
212      "package.json",
213      "package-lock.json",
214      "pnpm-lock.yaml",
215      "yarn.lock",
216      "bun.lock",
217      "bun.lockb",
218      "go.mod",
219      "go.sum",
220      "requirements.txt",
221      "Pipfile",
222      "Pipfile.lock",
223      "pyproject.toml",
224      "Gemfile",
225      "Gemfile.lock",
226      "composer.json",
227      "composer.lock",
228      "build.gradle",
229      "build.gradle.kts",
230      "gradle.properties",
231      "pom.xml",
232   ];
233
234   let path = Path::new(path);
235   let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
236      return false;
237   };
238
239   if DEP_MANIFESTS.contains(&file_name) {
240      return true;
241   }
242
243   Path::new(file_name)
244      .extension()
245      .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
246}
247
248fn save_debug_artifact<T: Serialize>(
249   debug_dir: Option<&Path>,
250   filename: &str,
251   value: &T,
252) -> Result<()> {
253   let Some(debug_dir) = debug_dir else {
254      return Ok(());
255   };
256
257   fs::create_dir_all(debug_dir)?;
258   let path = debug_dir.join(filename);
259   let json = serde_json::to_string_pretty(value)?;
260   fs::write(path, json)?;
261   Ok(())
262}
263
264fn fnv1a_64(input: &str) -> String {
265   let mut hash = 0xcbf29ce484222325_u64;
266   for byte in input.as_bytes() {
267      hash ^= u64::from(*byte);
268      hash = hash.wrapping_mul(0x100000001b3);
269   }
270   format!("{hash:016x}")
271}
272
273fn compose_plan_cache_key(
274   snapshot: &ComposeSnapshot,
275   max_commits: usize,
276   analysis_model: &str,
277) -> String {
278   fnv1a_64(&format!(
279      "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
280      snapshot.diff, snapshot.stat
281   ))
282}
283
284fn compose_plan_cache_path(
285   dir: &str,
286   snapshot: &ComposeSnapshot,
287   max_commits: usize,
288   analysis_model: &str,
289) -> Result<PathBuf> {
290   let git_dir = get_git_dir(dir)?;
291   Ok(git_dir.join("llm-git").join(format!(
292      "compose-plan-{}.json",
293      compose_plan_cache_key(snapshot, max_commits, analysis_model)
294   )))
295}
296
297fn load_cached_plan(
298   dir: &str,
299   snapshot: &ComposeSnapshot,
300   max_commits: usize,
301   analysis_model: &str,
302) -> Result<Option<ComposeExecutablePlan>> {
303   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
304   if !cache_path.exists() {
305      return Ok(None);
306   }
307
308   let content = match fs::read_to_string(&cache_path) {
309      Ok(content) => content,
310      Err(err) => {
311         eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
312         return Ok(None);
313      },
314   };
315   let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
316      Ok(cached) => cached,
317      Err(err) => {
318         eprintln!(
319            "{}",
320            style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
321         );
322         let _ = fs::remove_file(&cache_path);
323         return Ok(None);
324      },
325   };
326   let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
327
328   if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
329      return Ok(None);
330   }
331   if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
332      eprintln!(
333         "{}",
334         style::warning(&format!(
335            "Discarding cached compose plan (no longer valid for current snapshot): {err}"
336         ))
337      );
338      let _ = fs::remove_file(&cache_path);
339      return Ok(None);
340   }
341   Ok(Some(cached.plan))
342}
343
344fn save_cached_plan(
345   dir: &str,
346   snapshot: &ComposeSnapshot,
347   max_commits: usize,
348   analysis_model: &str,
349   plan: &ComposeExecutablePlan,
350) -> Result<()> {
351   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
352   if let Some(parent) = cache_path.parent() {
353      fs::create_dir_all(parent)?;
354   }
355
356   let cached = ComposeCachedPlan {
357      schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
358      cache_key:      compose_plan_cache_key(snapshot, max_commits, analysis_model),
359      plan:           plan.clone(),
360   };
361   fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
362   Ok(())
363}
364
365fn format_line_range(start: usize, count: usize) -> String {
366   match count {
367      0 => "0".to_string(),
368      1 => start.to_string(),
369      _ => format!("{start}-{}", start + count - 1),
370   }
371}
372
373const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
374   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
375      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
376   {
377      SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
378   } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
379      || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
380   {
381      SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
382   } else {
383      SnapshotSummaryBudget {
384         max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
385         max_hunks_per_file:        None,
386      }
387   }
388}
389
390fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
391   if count <= max_samples {
392      return (0..count).collect();
393   }
394
395   if max_samples <= 1 {
396      return vec![0];
397   }
398
399   let last = count - 1;
400   let mut positions = Vec::with_capacity(max_samples);
401   for slot in 0..max_samples {
402      let position = slot * last / (max_samples - 1);
403      if positions.last().copied() != Some(position) {
404         positions.push(position);
405      }
406   }
407   positions
408}
409
410fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
411   match budget.max_hunks_per_file {
412      None => file.hunk_ids.iter().map(String::as_str).collect(),
413      Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
414         .into_iter()
415         .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
416         .collect(),
417   }
418}
419
420fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
421   let budget = snapshot_summary_budget(snapshot);
422   let observations_by_file: HashMap<&str, Vec<&str>> = observations
423      .iter()
424      .map(|observation| {
425         (
426            observation.file.as_str(),
427            observation
428               .observations
429               .iter()
430               .map(String::as_str)
431               .take(budget.max_observations_per_file)
432               .collect(),
433         )
434      })
435      .collect();
436
437   let mut out = String::new();
438   if budget.is_compacted() {
439      let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
440      writeln!(
441         out,
442         "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
443          representative hunks and {} observation(s) per file",
444         budget.max_observations_per_file
445      )
446      .unwrap();
447   }
448
449   for file in &snapshot.files {
450      writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
451      if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
452         for observation in file_observations {
453            writeln!(out, "  observation: {observation}").unwrap();
454         }
455      }
456
457      let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
458      for hunk_id in &rendered_hunk_ids {
459         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
460            if hunk.synthetic {
461               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
462            } else {
463               writeln!(
464                  out,
465                  "  - {} old:{} new:{} :: {}",
466                  hunk.hunk_id,
467                  format_line_range(hunk.old_start, hunk.old_count),
468                  format_line_range(hunk.new_start, hunk.new_count),
469                  hunk.snippet
470               )
471               .unwrap();
472            }
473         }
474      }
475
476      let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
477      if omitted_hunks > 0 {
478         writeln!(out, "  ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
479      }
480   }
481
482   out
483}
484
485const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
486   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
487      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
488   {
489      PlanningMode::Area
490   } else {
491      PlanningMode::File
492   }
493}
494
495fn path_depth(path: &str) -> usize {
496   path.split('/').count()
497}
498
499fn prefix_at_depth(path: &str, depth: usize) -> String {
500   if depth == 0 {
501      return String::new();
502   }
503
504   let segments: Vec<&str> = path.split('/').collect();
505   let effective_depth = depth.min(segments.len());
506   segments[..effective_depth].join("/")
507}
508
509fn common_path_prefix(paths: &[String]) -> String {
510   let Some(first_path) = paths.first() else {
511      return String::new();
512   };
513
514   let mut prefix: Vec<&str> = first_path.split('/').collect();
515   for path in paths.iter().skip(1) {
516      let segments: Vec<&str> = path.split('/').collect();
517      let shared = prefix
518         .iter()
519         .zip(segments.iter())
520         .take_while(|(left, right)| left == right)
521         .count();
522      prefix.truncate(shared);
523      if prefix.is_empty() {
524         break;
525      }
526   }
527
528   prefix.join("/")
529}
530
531fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
532   file_ids
533      .iter()
534      .filter_map(|file_id| snapshot.file_by_id(file_id))
535      .map(|file| file.hunk_ids.len())
536      .sum()
537}
538
539fn group_file_ids_by_prefix(
540   snapshot: &ComposeSnapshot,
541   file_ids: &[String],
542   depth: usize,
543) -> BTreeMap<String, Vec<String>> {
544   let mut groups = BTreeMap::new();
545
546   for file_id in file_ids {
547      if let Some(file) = snapshot.file_by_id(file_id) {
548         groups
549            .entry(prefix_at_depth(&file.path, depth))
550            .or_insert_with(Vec::new)
551            .push(file_id.clone());
552      }
553   }
554
555   groups
556}
557
558fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
559   let paths: Vec<String> = file_ids
560      .iter()
561      .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
562      .collect();
563
564   let common_prefix = common_path_prefix(&paths);
565   if common_prefix.is_empty() {
566      paths.first().cloned().unwrap_or_else(|| "misc".to_string())
567   } else {
568      common_prefix
569   }
570}
571
572fn collect_planning_buckets(
573   snapshot: &ComposeSnapshot,
574   file_ids: &[String],
575   depth: usize,
576) -> Vec<PlanningBucket> {
577   let file_count = file_ids.len();
578   let hunk_count = bucket_hunk_count(snapshot, file_ids);
579   let max_path_depth = file_ids
580      .iter()
581      .filter_map(|file_id| snapshot.file_by_id(file_id))
582      .map(|file| path_depth(&file.path))
583      .max()
584      .unwrap_or(depth);
585
586   let should_stop =
587      file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
588   if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
589      return vec![PlanningBucket {
590         label:    planning_bucket_label(snapshot, file_ids),
591         file_ids: file_ids.to_vec(),
592      }];
593   }
594
595   let next_depth = depth + 1;
596   let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
597   if groups.len() <= 1 {
598      return collect_planning_buckets(snapshot, file_ids, next_depth);
599   }
600
601   groups
602      .into_values()
603      .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
604      .collect()
605}
606
607fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
608   let all_file_ids: Vec<String> = snapshot
609      .files
610      .iter()
611      .map(|file| file.file_id.clone())
612      .collect();
613   let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
614
615   buckets
616      .into_iter()
617      .enumerate()
618      .map(|(idx, bucket)| {
619         let mut additions = 0_usize;
620         let mut deletions = 0_usize;
621         let mut hunk_count = 0_usize;
622
623         for file_id in &bucket.file_ids {
624            if let Some(file) = snapshot.file_by_id(file_id) {
625               additions = additions.saturating_add(file.additions);
626               deletions = deletions.saturating_add(file.deletions);
627               hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
628            }
629         }
630
631         PlanningTarget {
632            target_id: format!("A{:03}", idx + 1),
633            label: bucket.label,
634            file_ids: bucket.file_ids,
635            hunk_count,
636            additions,
637            deletions,
638         }
639      })
640      .collect()
641}
642
643fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
644   snapshot
645      .files
646      .iter()
647      .map(|file| PlanningTarget {
648         target_id:  file.file_id.clone(),
649         label:      file.path.clone(),
650         file_ids:   vec![file.file_id.clone()],
651         hunk_count: file.hunk_ids.len(),
652         additions:  file.additions,
653         deletions:  file.deletions,
654      })
655      .collect()
656}
657
658fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
659   let mode = planning_mode_for_snapshot(snapshot);
660   let targets = match mode {
661      PlanningMode::File => build_file_planning_targets(snapshot),
662      PlanningMode::Area => build_area_planning_targets(snapshot),
663   };
664
665   let aliases = targets
666      .iter()
667      .flat_map(|target| {
668         let normalized_label = normalize_file_reference(&target.label);
669         [
670            (target.target_id.clone(), target.target_id.clone()),
671            (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
672            (normalized_label, target.target_id.clone()),
673         ]
674      })
675      .collect();
676
677   PlanningIndex { mode, targets, aliases }
678}
679
680fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
681   sample_positions(target.file_ids.len(), 4)
682      .into_iter()
683      .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
684      .collect()
685}
686
687fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
688   let hunk_ids: Vec<&String> = target
689      .file_ids
690      .iter()
691      .filter_map(|file_id| snapshot.file_by_id(file_id))
692      .flat_map(|file| file.hunk_ids.iter())
693      .collect();
694
695   sample_positions(hunk_ids.len(), 4)
696      .into_iter()
697      .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
698      .collect()
699}
700
701fn render_planning_stat(index: &PlanningIndex) -> String {
702   let mut out = String::new();
703
704   match index.mode {
705      PlanningMode::File => {
706         writeln!(out, "# planning over individual file IDs").unwrap();
707      },
708      PlanningMode::Area => {
709         writeln!(
710            out,
711            "# planning over {} area IDs spanning {} files",
712            index.targets.len(),
713            index
714               .targets
715               .iter()
716               .flat_map(|target| target.file_ids.iter())
717               .collect::<HashSet<_>>()
718               .len()
719         )
720         .unwrap();
721      },
722   }
723
724   for target in &index.targets {
725      writeln!(
726         out,
727         "{} {} | {} files | {} hunks | +{}/-{}",
728         target.target_id,
729         target.label,
730         target.file_ids.len(),
731         target.hunk_count,
732         target.additions,
733         target.deletions
734      )
735      .unwrap();
736   }
737
738   out
739}
740
741fn render_planning_snapshot_summary(
742   snapshot: &ComposeSnapshot,
743   observations: &[FileObservation],
744   index: &PlanningIndex,
745) -> String {
746   if index.mode == PlanningMode::File {
747      return render_snapshot_summary(snapshot, observations);
748   }
749
750   let observations_by_file: HashMap<&str, Vec<&str>> = observations
751      .iter()
752      .map(|observation| {
753         (
754            observation.file.as_str(),
755            observation
756               .observations
757               .iter()
758               .map(String::as_str)
759               .take(1)
760               .collect(),
761         )
762      })
763      .collect();
764
765   let mut out = String::new();
766   writeln!(
767      out,
768      "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
769   )
770   .unwrap();
771
772   for target in &index.targets {
773      writeln!(
774         out,
775         "- {} {} ({} files, {} hunks, +{}/-{})",
776         target.target_id,
777         target.label,
778         target.file_ids.len(),
779         target.hunk_count,
780         target.additions,
781         target.deletions
782      )
783      .unwrap();
784
785      let sample_file_ids = sample_file_ids_for_target(target);
786      if !sample_file_ids.is_empty() {
787         let sample_files: Vec<String> = sample_file_ids
788            .iter()
789            .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
790            .collect();
791         writeln!(out, "  files: {}", sample_files.join(", ")).unwrap();
792         let omitted = target.file_ids.len().saturating_sub(sample_files.len());
793         if omitted > 0 {
794            writeln!(out, "  ... {omitted} more files omitted from {}", target.target_id).unwrap();
795         }
796      }
797
798      let mut rendered_observations = 0_usize;
799      for file_id in &target.file_ids {
800         let Some(file) = snapshot.file_by_id(file_id) else {
801            continue;
802         };
803         let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
804            continue;
805         };
806
807         for observation in file_observations {
808            writeln!(out, "  observation: {observation}").unwrap();
809            rendered_observations += 1;
810            if rendered_observations >= 2 {
811               break;
812            }
813         }
814
815         if rendered_observations >= 2 {
816            break;
817         }
818      }
819
820      for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
821         if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
822            if hunk.synthetic {
823               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
824            } else {
825               writeln!(
826                  out,
827                  "  - {} old:{} new:{} :: {}",
828                  hunk.hunk_id,
829                  format_line_range(hunk.old_start, hunk.old_count),
830                  format_line_range(hunk.new_start, hunk.new_count),
831                  hunk.snippet
832               )
833               .unwrap();
834            }
835         }
836      }
837   }
838
839   out
840}
841
842fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
843   match index.mode {
844      PlanningMode::File => format!(
845         "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
846         snapshot.files.len()
847      ),
848      PlanningMode::Area => format!(
849         "Area IDs only. Each target may expand to multiple files by shared path prefix. \
850          Coverage: {} areas spanning {} files.",
851         index.targets.len(),
852         snapshot.files.len()
853      ),
854   }
855}
856
857fn render_planning_notes(index: &PlanningIndex) -> String {
858   match index.mode {
859      PlanningMode::File => {
860         "Use only the provided file IDs and keep the grouping conservative.".to_string()
861      },
862      PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
863                             planning areas. Split along independent subsystems or workstreams \
864                             when the areas point at unrelated changes."
865         .to_string(),
866   }
867}
868
869fn render_split_bias(index: &PlanningIndex) -> String {
870   match index.mode {
871      PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
872      PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
873                             one broad group if nearly every area clearly belongs to the same \
874                             atomic change."
875         .to_string(),
876   }
877}
878
879fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
880   let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
881
882   strict_json_schema(
883      serde_json::json!({
884         "groups": {
885            "type": "array",
886            "items": {
887               "type": "object",
888               "properties": {
889                  "group_id": {
890                     "type": "string",
891                     "description": "Stable identifier like G1, G2, G3"
892                  },
893                  "file_ids": {
894                     "type": "array",
895                     "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
896                     "items": { "type": "string" }
897                  },
898                  "type": {
899                     "type": "string",
900                     "enum": type_enum,
901                     "description": "Conventional commit type for this group"
902                  },
903                  "scope": {
904                     "type": "string",
905                     "description": "Optional scope (module/component). Omit if broad."
906                  },
907                  "rationale": {
908                     "type": "string",
909                     "description": "Brief explanation of the logical change"
910                  },
911                  "dependencies": {
912                     "type": "array",
913                     "description": "Group IDs this group depends on",
914                     "items": { "type": "string" }
915                  }
916               },
917               "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
918               "additionalProperties": false
919            }
920         }
921      }),
922      &["groups"],
923   )
924}
925
926fn build_binding_schema() -> serde_json::Value {
927   strict_json_schema(
928      serde_json::json!({
929         "assignments": {
930            "type": "array",
931            "items": {
932               "type": "object",
933               "properties": {
934                  "group_id": { "type": "string" },
935                  "hunk_ids": {
936                     "type": "array",
937                     "items": { "type": "string" }
938                  }
939               },
940               "required": ["group_id", "hunk_ids"],
941               "additionalProperties": false
942            }
943         }
944      }),
945      &["assignments"],
946   )
947}
948
949fn compute_dependency_order<T, FId, FDeps>(
950   groups: &[T],
951   group_id: FId,
952   dependencies: FDeps,
953) -> Result<Vec<usize>>
954where
955   FId: Fn(&T) -> &str,
956   FDeps: Fn(&T) -> &[String],
957{
958   let mut index_by_id = HashMap::new();
959   for (idx, group) in groups.iter().enumerate() {
960      let id = group_id(group);
961      if id.trim().is_empty() {
962         return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
963      }
964      if index_by_id.insert(id.to_string(), idx).is_some() {
965         return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
966      }
967   }
968
969   let mut in_degree = vec![0_usize; groups.len()];
970   let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
971
972   for (idx, group) in groups.iter().enumerate() {
973      for dependency in dependencies(group) {
974         let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
975            CommitGenError::Other(format!(
976               "Group {} depends on unknown group_id '{}'",
977               group_id(group),
978               dependency
979            ))
980         })?;
981         if dependency_idx == idx {
982            return Err(CommitGenError::Other(format!(
983               "Group {} depends on itself",
984               group_id(group)
985            )));
986         }
987
988         adjacency[dependency_idx].push(idx);
989         in_degree[idx] += 1;
990      }
991   }
992
993   let mut queue: Vec<usize> = (0..groups.len())
994      .filter(|idx| in_degree[*idx] == 0)
995      .collect();
996   let mut order = Vec::with_capacity(groups.len());
997
998   while let Some(node) = queue.pop() {
999      order.push(node);
1000      for neighbor in &adjacency[node] {
1001         in_degree[*neighbor] -= 1;
1002         if in_degree[*neighbor] == 0 {
1003            queue.push(*neighbor);
1004         }
1005      }
1006   }
1007
1008   if order.len() != groups.len() {
1009      return Err(CommitGenError::Other(
1010         "Circular dependency detected in compose groups".to_string(),
1011      ));
1012   }
1013
1014   Ok(order)
1015}
1016
1017fn normalize_file_reference(raw_file_ref: &str) -> String {
1018   raw_file_ref
1019      .trim()
1020      .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
1021      .trim_start_matches("./")
1022      .trim_end_matches([',', ';'])
1023      .to_string()
1024}
1025
1026fn planning_text_tokens(text: &str) -> Vec<String> {
1027   const STOP_WORDS: &[&str] = &[
1028      "and",
1029      "for",
1030      "the",
1031      "with",
1032      "from",
1033      "into",
1034      "after",
1035      "before",
1036      "over",
1037      "under",
1038      "plus",
1039      "across",
1040      "update",
1041      "updated",
1042      "refactor",
1043      "refactored",
1044      "changes",
1045      "change",
1046      "logical",
1047      "group",
1048      "groups",
1049      "commit",
1050      "commits",
1051   ];
1052
1053   let mut tokens = Vec::new();
1054   let mut current = String::new();
1055   let mut seen = HashSet::new();
1056
1057   for ch in text.chars() {
1058      if ch.is_ascii_alphanumeric() {
1059         current.push(ch.to_ascii_lowercase());
1060      } else if current.len() >= 3 {
1061         if !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone()) {
1062            tokens.push(current.clone());
1063         }
1064         current.clear();
1065      } else {
1066         current.clear();
1067      }
1068   }
1069
1070   if current.len() >= 3 && !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone())
1071   {
1072      tokens.push(current);
1073   }
1074
1075   tokens
1076}
1077
1078fn extract_group_id_candidate(raw: &str) -> Option<String> {
1079   let normalized = normalize_file_reference(raw);
1080   let uppercase = normalized.to_ascii_uppercase();
1081
1082   if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1083      return Some(format!("G{uppercase}"));
1084   }
1085
1086   if let Some(rest) = uppercase.strip_prefix('G')
1087      && !rest.is_empty()
1088      && rest.chars().all(|ch| ch.is_ascii_digit())
1089   {
1090      return Some(format!("G{rest}"));
1091   }
1092
1093   let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1094   let compact = uppercase
1095      .chars()
1096      .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1097      .collect::<String>();
1098   if compact.starts_with("GROUP") && !digits.is_empty() {
1099      return Some(format!("G{digits}"));
1100   }
1101
1102   None
1103}
1104
1105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1106enum ComposeFileCategory {
1107   Binary,
1108   Dependency,
1109   Docs,
1110   Prompt,
1111   Test,
1112   Config,
1113   Source,
1114   Other,
1115}
1116
1117fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1118   if file.is_binary {
1119      return ComposeFileCategory::Binary;
1120   }
1121
1122   if is_dependency_manifest(&file.path) {
1123      return ComposeFileCategory::Dependency;
1124   }
1125
1126   let filename_lower = file.path.to_ascii_lowercase();
1127   let file_name = Path::new(&filename_lower)
1128      .file_name()
1129      .and_then(|name| name.to_str())
1130      .unwrap_or_default();
1131   let extension = Path::new(&filename_lower)
1132      .extension()
1133      .and_then(|ext| ext.to_str())
1134      .unwrap_or_default();
1135
1136   if filename_lower.contains("prompt") || filename_lower.contains("system") {
1137      return ComposeFileCategory::Prompt;
1138   }
1139
1140   if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1141      return ComposeFileCategory::Docs;
1142   }
1143
1144   if filename_lower.contains("/tests/")
1145      || filename_lower.starts_with("tests/")
1146      || file_name.contains("test")
1147      || file_name.contains("spec")
1148   {
1149      return ComposeFileCategory::Test;
1150   }
1151
1152   if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1153      return ComposeFileCategory::Config;
1154   }
1155
1156   if matches!(
1157      extension,
1158      "rs"
1159         | "py"
1160         | "js"
1161         | "jsx"
1162         | "ts"
1163         | "tsx"
1164         | "go"
1165         | "java"
1166         | "kt"
1167         | "c"
1168         | "cc"
1169         | "cpp"
1170         | "h"
1171         | "hpp"
1172         | "cs"
1173         | "rb"
1174         | "php"
1175         | "swift"
1176         | "scala"
1177         | "m"
1178         | "mm"
1179   ) {
1180      return ComposeFileCategory::Source;
1181   }
1182
1183   ComposeFileCategory::Other
1184}
1185
1186fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1187   left
1188      .split('/')
1189      .zip(right.split('/'))
1190      .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1191      .count()
1192}
1193
1194fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1195   let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1196
1197   if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1198      score += 40;
1199   }
1200
1201   if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1202      score += 12;
1203   }
1204
1205   if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1206      score += 18;
1207   }
1208
1209   score
1210}
1211
1212fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1213   match (compose_file_category(file), group.commit_type.as_str()) {
1214      (ComposeFileCategory::Docs, "docs") => 25,
1215      (ComposeFileCategory::Test, "test") => 25,
1216      (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1217      (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1218      (
1219         ComposeFileCategory::Prompt | ComposeFileCategory::Source,
1220         "feat" | "fix" | "refactor" | "perf",
1221      ) => 10,
1222      _ => 0,
1223   }
1224}
1225
1226fn best_group_for_missing_file(
1227   snapshot: &ComposeSnapshot,
1228   groups: &[ComposeIntentGroup],
1229   missing_file: &ComposeFile,
1230) -> usize {
1231   let mut best_group_idx = 0;
1232   let mut best_score = i32::MIN;
1233   let mut best_group_size = usize::MAX;
1234
1235   for (group_idx, group) in groups.iter().enumerate() {
1236      let similarity = group
1237         .file_ids
1238         .iter()
1239         .filter_map(|file_id| snapshot.file_by_id(file_id))
1240         .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1241         .max()
1242         .unwrap_or_default();
1243      let score = similarity + group_type_bonus(missing_file, group);
1244      let group_size = group.file_ids.len();
1245
1246      if score > best_score || (score == best_score && group_size < best_group_size) {
1247         best_group_idx = group_idx;
1248         best_score = score;
1249         best_group_size = group_size;
1250      }
1251   }
1252
1253   best_group_idx
1254}
1255
1256fn normalize_dependency_reference(
1257   raw_dependency: &str,
1258   known_group_ids: &HashSet<String>,
1259) -> Option<String> {
1260   let normalized = normalize_file_reference(raw_dependency);
1261   if normalized.is_empty() {
1262      return None;
1263   }
1264
1265   if known_group_ids.contains(&normalized) {
1266      return Some(normalized);
1267   }
1268
1269   let uppercase = normalized.to_ascii_uppercase();
1270   if known_group_ids.contains(&uppercase) {
1271      return Some(uppercase);
1272   }
1273
1274   let candidate = extract_group_id_candidate(&normalized)?;
1275   known_group_ids.contains(&candidate).then_some(candidate)
1276}
1277
1278fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1279   let label = target.label.to_ascii_lowercase();
1280   let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1281   let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1282
1283   if let Some(scope) = &group.scope {
1284      let scope = scope.as_str().to_ascii_lowercase();
1285      if label.contains(&scope) || workstream.contains(&scope) {
1286         score += 140;
1287      }
1288
1289      for segment in scope.split('/') {
1290         if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1291            score += 45;
1292         }
1293      }
1294   }
1295
1296   for token in planning_text_tokens(&group.rationale) {
1297      if label.contains(&token) || workstream.contains(&token) {
1298         score += 16;
1299      }
1300   }
1301
1302   match group.commit_type.as_str() {
1303      "ci" if target.label.starts_with(".github/") => score += 120,
1304      "docs"
1305         if target.label.starts_with("docs/")
1306            || Path::new(&target.label)
1307               .extension()
1308               .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1309      {
1310         score += 80;
1311      },
1312      "build" | "chore"
1313         if target.label.contains("Cargo")
1314            || target.label.contains("package")
1315            || target.label.contains("lock")
1316            || target.label.contains("tsconfig")
1317            || target.label.contains("biome")
1318            || target.label.contains("bun") =>
1319      {
1320         score += 55;
1321      },
1322      _ => {},
1323   }
1324
1325   score
1326}
1327
1328fn seed_group_targets(
1329   groups: &[ComposeIntentGroup],
1330   planning_index: &PlanningIndex,
1331   group_targets: &mut [Vec<String>],
1332   repair_notes: &mut Vec<String>,
1333) {
1334   let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1335
1336   for (group_idx, group) in groups.iter().enumerate() {
1337      if !group_targets[group_idx].is_empty() {
1338         continue;
1339      }
1340
1341      let fallback_target = planning_index
1342         .targets
1343         .iter()
1344         .max_by_key(|target| {
1345            let mut score = planning_target_match_score(target, group);
1346            if !claimed_target_ids.contains(&target.target_id) {
1347               score += 60;
1348            }
1349            (score, target.hunk_count, target.file_ids.len())
1350         })
1351         .or_else(|| planning_index.targets.first());
1352
1353      let Some(fallback_target) = fallback_target else {
1354         continue;
1355      };
1356
1357      group_targets[group_idx].push(fallback_target.target_id.clone());
1358      claimed_target_ids.insert(fallback_target.target_id.clone());
1359      repair_notes.push(format!(
1360         "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1361         group.group_id, fallback_target.target_id, fallback_target.label
1362      ));
1363   }
1364}
1365
1366fn normalize_intent_plan(
1367   snapshot: &ComposeSnapshot,
1368   planning_index: &PlanningIndex,
1369   mut groups: Vec<ComposeIntentGroup>,
1370) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1371   if groups.is_empty() {
1372      return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1373   }
1374
1375   let known_target_ids: HashSet<&str> = planning_index
1376      .targets
1377      .iter()
1378      .map(|target| target.target_id.as_str())
1379      .collect();
1380   let mut repair_notes = Vec::new();
1381   let mut covered_file_ids = HashSet::new();
1382   let mut normalized_group_targets = Vec::with_capacity(groups.len());
1383
1384   for group in &groups {
1385      if group.file_ids.is_empty() {
1386         repair_notes.push(format!(
1387            "Compose planner left {} without planning targets; assigning targets heuristically",
1388            group.group_id
1389         ));
1390      }
1391
1392      let mut normalized_target_ids = Vec::new();
1393      let mut seen_target_ids = HashSet::new();
1394      for raw_target_ref in &group.file_ids {
1395         let normalized_ref = normalize_file_reference(raw_target_ref);
1396         let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1397            normalized_ref.clone()
1398         } else {
1399            let uppercase_ref = normalized_ref.to_ascii_uppercase();
1400            if known_target_ids.contains(uppercase_ref.as_str()) {
1401               uppercase_ref
1402            } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1403               if raw_target_ref != target_id {
1404                  repair_notes.push(format!(
1405                     "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1406                  ));
1407               }
1408               target_id.clone()
1409            } else {
1410               repair_notes.push(format!(
1411                  "Dropped unknown planning target '{}' from {}",
1412                  raw_target_ref, group.group_id
1413               ));
1414               continue;
1415            }
1416         };
1417
1418         if seen_target_ids.insert(canonical_target_id.clone()) {
1419            normalized_target_ids.push(canonical_target_id);
1420         }
1421      }
1422
1423      normalized_group_targets.push(normalized_target_ids);
1424   }
1425
1426   seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1427
1428   let known_group_ids: HashSet<String> =
1429      groups.iter().map(|group| group.group_id.clone()).collect();
1430   for group in &mut groups {
1431      let mut normalized_dependencies = Vec::new();
1432      let mut seen_dependencies = HashSet::new();
1433
1434      for raw_dependency in &group.dependencies {
1435         let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1436         else {
1437            repair_notes.push(format!(
1438               "Dropped unknown dependency '{}' from {}",
1439               raw_dependency, group.group_id
1440            ));
1441            continue;
1442         };
1443
1444         if dependency == group.group_id {
1445            repair_notes.push(format!(
1446               "Dropped self-dependency '{}' from {}",
1447               raw_dependency, group.group_id
1448            ));
1449            continue;
1450         }
1451
1452         if seen_dependencies.insert(dependency.clone()) {
1453            if raw_dependency != &dependency {
1454               repair_notes.push(format!(
1455                  "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1456               ));
1457            }
1458            normalized_dependencies.push(dependency);
1459         }
1460      }
1461
1462      group.dependencies = normalized_dependencies;
1463   }
1464
1465   for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1466      let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1467      for file_id in &expanded_file_ids {
1468         covered_file_ids.insert(file_id.clone());
1469      }
1470      group.file_ids = expanded_file_ids;
1471   }
1472
1473   for file in &snapshot.files {
1474      if covered_file_ids.contains(file.file_id.as_str()) {
1475         continue;
1476      }
1477
1478      let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1479      let target_group = &mut groups[target_group_idx];
1480      target_group.file_ids.push(file.file_id.clone());
1481      covered_file_ids.insert(file.file_id.clone());
1482      repair_notes.push(format!(
1483         "Compose planner omitted {} ({}); assigned it to {}",
1484         file.file_id, file.path, target_group.group_id
1485      ));
1486   }
1487
1488   Ok((groups, repair_notes))
1489}
1490
1491fn workstream_key_for_label(label: &str) -> String {
1492   let segments: Vec<&str> = label
1493      .split('/')
1494      .filter(|segment| !segment.is_empty())
1495      .collect();
1496   let Some(first) = segments.first() else {
1497      return label.to_string();
1498   };
1499
1500   match *first {
1501      ".github" => match segments.get(1) {
1502         Some(second) => format!("{first}/{second}"),
1503         None => (*first).to_string(),
1504      },
1505      "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1506         Some(second) => format!("{first}/{second}"),
1507         None => (*first).to_string(),
1508      },
1509      _ => (*first).to_string(),
1510   }
1511}
1512
1513fn workstream_display_name(label: &str) -> String {
1514   let key = workstream_key_for_label(label);
1515   match key.as_str() {
1516      ".github/workflows" => "CI workflows".to_string(),
1517      ".github" => "GitHub automation".to_string(),
1518      _ => key
1519         .split('/')
1520         .next_back()
1521         .map(|segment| segment.replace(['_', '-'], " "))
1522         .unwrap_or(key),
1523   }
1524}
1525
1526fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1527   let mut out = String::new();
1528   let mut last_was_separator = false;
1529
1530   for ch in raw.trim().chars() {
1531      if ch.is_ascii_alphanumeric() {
1532         out.push(ch.to_ascii_lowercase());
1533         last_was_separator = false;
1534      } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1535      {
1536         out.push('-');
1537         last_was_separator = true;
1538      }
1539   }
1540
1541   let trimmed = out.trim_matches('-').to_string();
1542   (!trimmed.is_empty()).then_some(trimmed)
1543}
1544
1545fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1546   let key = workstream_key_for_label(label);
1547   let candidate = key
1548      .split('/')
1549      .next_back()
1550      .and_then(sanitize_scope_fragment)?;
1551   Scope::new(candidate).ok()
1552}
1553
1554fn fallback_rationale_for_labels(labels: &[String]) -> String {
1555   if labels.len() == 1 {
1556      let label = labels[0].as_str();
1557      let display = workstream_display_name(label);
1558      if label.starts_with("apps/") {
1559         return format!("{display} application updates");
1560      }
1561      if label.starts_with("packages/") {
1562         return format!("{display} package updates");
1563      }
1564      if label.starts_with("crates/") {
1565         return format!("{display} crate updates");
1566      }
1567      if label.starts_with(".github/") || label == ".github" {
1568         return format!("{display} updates");
1569      }
1570      return format!("{display} updates");
1571   }
1572
1573   let display_labels: Vec<String> = labels
1574      .iter()
1575      .take(3)
1576      .map(|label| workstream_display_name(label))
1577      .collect();
1578   format!("cross-cutting updates for {}", display_labels.join(", "))
1579}
1580
1581fn fallback_commit_type_for_group(
1582   snapshot: &ComposeSnapshot,
1583   labels: &[String],
1584   file_ids: &[String],
1585) -> Result<CommitType> {
1586   if labels
1587      .iter()
1588      .any(|label| label == ".github" || label.starts_with(".github/"))
1589   {
1590      return CommitType::new("ci");
1591   }
1592
1593   let files: Vec<&ComposeFile> = file_ids
1594      .iter()
1595      .filter_map(|file_id| snapshot.file_by_id(file_id))
1596      .collect();
1597   let all_docs = !files.is_empty()
1598      && files
1599         .iter()
1600         .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1601   if all_docs {
1602      return CommitType::new("docs");
1603   }
1604
1605   let all_tests = !files.is_empty()
1606      && files
1607         .iter()
1608         .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1609   if all_tests {
1610      return CommitType::new("test");
1611   }
1612
1613   let all_dependencies =
1614      !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1615   if all_dependencies {
1616      return CommitType::new("build");
1617   }
1618
1619   let all_config = !files.is_empty()
1620      && files.iter().all(|file| {
1621         matches!(
1622            compose_file_category(file),
1623            ComposeFileCategory::Config | ComposeFileCategory::Dependency
1624         )
1625      });
1626   if all_config {
1627      return CommitType::new("chore");
1628   }
1629
1630   CommitType::new("refactor")
1631}
1632
1633fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1634   snapshot
1635      .files
1636      .iter()
1637      .filter(|file| file_ids.contains(&file.file_id))
1638      .map(|file| file.file_id.clone())
1639      .collect()
1640}
1641
1642fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1643   if groups.is_empty() {
1644      return false;
1645   }
1646
1647   let largest_group = groups
1648      .iter()
1649      .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1650      .max()
1651      .unwrap_or_default();
1652
1653   groups.len() == 1
1654      || (groups.len() <= 2
1655         && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1656}
1657
1658fn should_force_large_patch_fallback(
1659   snapshot: &ComposeSnapshot,
1660   planning_index: &PlanningIndex,
1661   groups: &[ComposeIntentGroup],
1662   max_commits: usize,
1663) -> bool {
1664   if max_commits <= 1
1665      || planning_index.mode != PlanningMode::Area
1666      || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1667      || !is_monolithic_intent_plan(snapshot, groups)
1668   {
1669      return false;
1670   }
1671
1672   let workstream_count = planning_index
1673      .targets
1674      .iter()
1675      .map(|target| workstream_key_for_label(&target.label))
1676      .collect::<HashSet<_>>()
1677      .len();
1678
1679   workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1680}
1681
1682fn build_large_patch_fallback_groups(
1683   snapshot: &ComposeSnapshot,
1684   planning_index: &PlanningIndex,
1685   max_commits: usize,
1686) -> Result<Vec<ComposeIntentGroup>> {
1687   #[derive(Debug, Clone)]
1688   struct WorkstreamGroup {
1689      label:    String,
1690      file_ids: HashSet<String>,
1691      weight:   usize,
1692   }
1693
1694   #[derive(Debug, Clone)]
1695   struct FallbackBin {
1696      labels:       Vec<String>,
1697      file_ids:     HashSet<String>,
1698      total_weight: usize,
1699   }
1700
1701   let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1702   for target in &planning_index.targets {
1703      let key = workstream_key_for_label(&target.label);
1704      let entry = workstreams
1705         .entry(key.clone())
1706         .or_insert_with(|| WorkstreamGroup {
1707            label:    key,
1708            file_ids: HashSet::new(),
1709            weight:   0,
1710         });
1711
1712      for file_id in &target.file_ids {
1713         entry.file_ids.insert(file_id.clone());
1714      }
1715      entry.weight = entry
1716         .weight
1717         .saturating_add(target.hunk_count.max(target.file_ids.len()));
1718   }
1719
1720   let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1721   workstreams.sort_by(|left, right| {
1722      right
1723         .weight
1724         .cmp(&left.weight)
1725         .then_with(|| left.label.cmp(&right.label))
1726   });
1727
1728   let bin_count = max_commits.min(workstreams.len());
1729   let mut bins: Vec<FallbackBin> = Vec::new();
1730   for workstream in workstreams {
1731      if bins.len() < bin_count {
1732         bins.push(FallbackBin {
1733            labels:       vec![workstream.label],
1734            file_ids:     workstream.file_ids,
1735            total_weight: workstream.weight,
1736         });
1737         continue;
1738      }
1739
1740      let Some((target_idx, _)) = bins
1741         .iter()
1742         .enumerate()
1743         .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1744      else {
1745         continue;
1746      };
1747
1748      let target_bin = &mut bins[target_idx];
1749      target_bin.labels.push(workstream.label);
1750      target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1751      target_bin.file_ids.extend(workstream.file_ids);
1752   }
1753
1754   let mut groups = Vec::new();
1755   for (idx, bin) in bins.into_iter().enumerate() {
1756      let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1757      let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1758      let scope = (bin.labels.len() == 1)
1759         .then(|| fallback_scope_for_label(&bin.labels[0]))
1760         .flatten();
1761      let rationale = fallback_rationale_for_labels(&bin.labels);
1762
1763      groups.push(ComposeIntentGroup {
1764         group_id: format!("G{}", idx + 1),
1765         commit_type,
1766         scope,
1767         file_ids: ordered_ids,
1768         rationale,
1769         dependencies: Vec::new(),
1770      });
1771   }
1772
1773   Ok(groups)
1774}
1775
1776#[tracing::instrument(target = "lgit", name = "compose.analyze_intent", skip_all, fields(file_count = snapshot.files.len(), observation_count = observations.len(), max_commits))]
1777async fn analyze_compose_intent(
1778   snapshot: &ComposeSnapshot,
1779   observations: &[FileObservation],
1780   config: &CommitConfig,
1781   max_commits: usize,
1782   debug_dir: Option<&Path>,
1783) -> Result<ComposeIntentPlan> {
1784   let planning_index = build_planning_index(snapshot);
1785   let stat_summary = render_planning_stat(&planning_index);
1786   let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1787   let planning_targets = render_planning_targets(&planning_index, snapshot);
1788   let planning_notes = render_planning_notes(&planning_index);
1789   let split_bias = render_split_bias(&planning_index);
1790   let schema = build_intent_schema(config);
1791   let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1792      variant: "default",
1793      max_commits,
1794      stat: &stat_summary,
1795      snapshot_summary: &snapshot_summary,
1796      planning_targets: &planning_targets,
1797      planning_notes: &planning_notes,
1798      split_bias: &split_bias,
1799   })?;
1800
1801   let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1802      operation:        "compose/intent",
1803      model:            &config.analysis_model,
1804      max_tokens:       3000,
1805      temperature:      COMPOSE_PLANNER_TEMPERATURE,
1806      prompt_family:    "compose-intent",
1807      prompt_variant:   "default",
1808      system_prompt:    &parts.system,
1809      user_prompt:      &parts.user,
1810      tool_name:        "create_compose_intent_plan",
1811      tool_description: "Plan logical commit groups over the provided planning target IDs",
1812      schema:           &schema,
1813      progress_label:   Some("compose intent planner"),
1814      debug:            debug_dir.map(|dir| OneShotDebug {
1815         dir:    Some(dir),
1816         prefix: None,
1817         name:   "compose_intent",
1818      }),
1819      cacheable:        true,
1820   })
1821   .await?;
1822
1823   let (mut groups, repair_notes) =
1824      normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1825   for note in &repair_notes {
1826      eprintln!("{}", style::warning(note));
1827   }
1828   if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1829      eprintln!(
1830         "{}",
1831         style::warning(
1832            "Compose intent collapsed into a monolithic large-patch group; falling back to \
1833             path-based workstream splits."
1834         )
1835      );
1836      groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1837   }
1838   let dependency_order =
1839      compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1840
1841   Ok(ComposeIntentPlan { groups, dependency_order })
1842}
1843
1844#[tracing::instrument(target = "lgit", name = "compose.should_collect_observations", skip_all, fields(file_count = snapshot.files.len()))]
1845fn should_collect_compose_observations(
1846   snapshot: &ComposeSnapshot,
1847   config: &CommitConfig,
1848   counter: &TokenCounter,
1849) -> bool {
1850   planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1851      && should_use_map_reduce(&snapshot.diff, config, counter)
1852}
1853
1854#[tracing::instrument(target = "lgit", name = "compose.auto_assign_hunks", skip_all, fields(group_count = intent_plan.groups.len()))]
1855fn auto_assign_hunks(
1856   snapshot: &ComposeSnapshot,
1857   intent_plan: &ComposeIntentPlan,
1858) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1859   let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1860   for group in &intent_plan.groups {
1861      for file_id in &group.file_ids {
1862         groups_by_file
1863            .entry(file_id.as_str())
1864            .or_default()
1865            .push(group.group_id.as_str());
1866      }
1867   }
1868
1869   let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1870      .groups
1871      .iter()
1872      .map(|group| (group.group_id.clone(), BTreeSet::new()))
1873      .collect();
1874   let mut ambiguous = Vec::new();
1875
1876   for file in &snapshot.files {
1877      let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1878         return Err(CommitGenError::Other(format!(
1879            "No compose group claimed file {} ({})",
1880            file.file_id, file.path
1881         )));
1882      };
1883
1884      if candidate_group_ids.len() == 1 {
1885         let group_id = candidate_group_ids[0];
1886         let entry = assigned
1887            .get_mut(group_id)
1888            .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1889         for hunk_id in &file.hunk_ids {
1890            entry.insert(hunk_id.clone());
1891         }
1892      } else {
1893         ambiguous.push(AmbiguousFileBinding {
1894            file_id:             file.file_id.clone(),
1895            path:                file.path.clone(),
1896            candidate_group_ids: candidate_group_ids
1897               .iter()
1898               .map(|group_id| (*group_id).to_string())
1899               .collect(),
1900            hunk_ids:            file.hunk_ids.clone(),
1901         });
1902      }
1903   }
1904
1905   Ok((assigned, ambiguous))
1906}
1907
1908fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1909   let mut out = String::new();
1910   for group in groups {
1911      let scope = group
1912         .scope
1913         .as_ref()
1914         .map(|scope| format!("({})", scope.as_str()))
1915         .unwrap_or_default();
1916      writeln!(
1917         out,
1918         "- {} [{}{}] {}",
1919         group.group_id,
1920         group.commit_type.as_str(),
1921         scope,
1922         group.rationale
1923      )
1924      .unwrap();
1925   }
1926
1927   out
1928}
1929
1930fn render_binding_ambiguous_files(
1931   snapshot: &ComposeSnapshot,
1932   ambiguous_files: &[AmbiguousFileBinding],
1933) -> String {
1934   let mut out = String::new();
1935   for ambiguous_file in ambiguous_files {
1936      writeln!(
1937         out,
1938         "- {} {} candidates: {}",
1939         ambiguous_file.file_id,
1940         ambiguous_file.path,
1941         ambiguous_file.candidate_group_ids.join(", ")
1942      )
1943      .unwrap();
1944
1945      for hunk_id in &ambiguous_file.hunk_ids {
1946         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1947            if hunk.synthetic {
1948               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1949            } else {
1950               writeln!(
1951                  out,
1952                  "  - {} old:{} new:{} :: {}",
1953                  hunk.hunk_id,
1954                  format_line_range(hunk.old_start, hunk.old_count),
1955                  format_line_range(hunk.new_start, hunk.new_count),
1956                  hunk.snippet
1957               )
1958               .unwrap();
1959            }
1960         }
1961      }
1962   }
1963
1964   out
1965}
1966
1967async fn request_binding(
1968   snapshot: &ComposeSnapshot,
1969   groups: &[ComposeIntentGroup],
1970   ambiguous_files: &[AmbiguousFileBinding],
1971   config: &CommitConfig,
1972   debug_dir: Option<&Path>,
1973   debug_name: &str,
1974) -> Result<Vec<ComposeBindingAssignment>> {
1975   let schema = build_binding_schema();
1976   let groups_text = render_binding_groups(groups);
1977   let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1978   let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1979      variant:         "default",
1980      groups:          &groups_text,
1981      ambiguous_files: &ambiguous_files_text,
1982   })?;
1983   let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1984      operation:        "compose/bind",
1985      model:            &config.analysis_model,
1986      max_tokens:       2500,
1987      temperature:      COMPOSE_PLANNER_TEMPERATURE,
1988      prompt_family:    "compose-bind",
1989      prompt_variant:   "default",
1990      system_prompt:    &parts.system,
1991      user_prompt:      &parts.user,
1992      tool_name:        "bind_compose_hunks",
1993      tool_description: "Assign hunk IDs to existing compose groups",
1994      schema:           &schema,
1995      progress_label:   Some("compose hunk binder"),
1996      debug:            debug_dir.map(|dir| OneShotDebug {
1997         dir:    Some(dir),
1998         prefix: None,
1999         name:   debug_name,
2000      }),
2001      cacheable:        true,
2002   })
2003   .await?;
2004
2005   Ok(response.output.assignments)
2006}
2007
2008fn ambiguous_hunk_context(
2009   ambiguous_files: &[AmbiguousFileBinding],
2010) -> HashMap<String, AmbiguousHunkContext> {
2011   let mut context = HashMap::new();
2012   for ambiguous_file in ambiguous_files {
2013      for hunk_id in &ambiguous_file.hunk_ids {
2014         context.insert(hunk_id.clone(), AmbiguousHunkContext {
2015            candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
2016         });
2017      }
2018   }
2019   context
2020}
2021
2022fn evaluate_binding(
2023   assignments: &[ComposeBindingAssignment],
2024   hunk_context: &HashMap<String, AmbiguousHunkContext>,
2025   valid_group_ids: &HashSet<&str>,
2026   snapshot: &ComposeSnapshot,
2027) -> BindingEvaluation {
2028   let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
2029
2030   for assignment in assignments {
2031      if !valid_group_ids.contains(assignment.group_id.as_str()) {
2032         continue;
2033      }
2034
2035      let mut seen_in_group = HashSet::new();
2036      for hunk_id in &assignment.hunk_ids {
2037         if !seen_in_group.insert(hunk_id.as_str()) {
2038            continue;
2039         }
2040
2041         let Some(context) = hunk_context.get(hunk_id) else {
2042            continue;
2043         };
2044
2045         if !context
2046            .candidate_group_ids
2047            .iter()
2048            .any(|candidate| candidate == &assignment.group_id)
2049         {
2050            continue;
2051         }
2052
2053         match assigned_hunk_to_group.get(hunk_id) {
2054            None => {
2055               assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
2056            },
2057            Some(existing_group) if existing_group == &assignment.group_id => {},
2058            Some(_) => {
2059               assigned_hunk_to_group.remove(hunk_id);
2060            },
2061         }
2062      }
2063   }
2064
2065   let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2066   for (hunk_id, group_id) in assigned_hunk_to_group {
2067      assigned_by_group.entry(group_id).or_default().push(hunk_id);
2068   }
2069
2070   for hunk_ids in assigned_by_group.values_mut() {
2071      let ordered: Vec<String> = snapshot
2072         .hunks
2073         .iter()
2074         .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2075         .map(|hunk| hunk.hunk_id.clone())
2076         .collect();
2077      *hunk_ids = ordered;
2078   }
2079
2080   let unresolved = snapshot
2081      .hunks
2082      .iter()
2083      .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2084      .filter(|hunk| {
2085         !assigned_by_group.values().any(|assigned_hunks| {
2086            assigned_hunks
2087               .iter()
2088               .any(|assigned| assigned == &hunk.hunk_id)
2089         })
2090      })
2091      .map(|hunk| hunk.hunk_id.clone())
2092      .collect();
2093
2094   BindingEvaluation { assigned: assigned_by_group, unresolved }
2095}
2096
2097fn filter_ambiguous_files(
2098   ambiguous_files: &[AmbiguousFileBinding],
2099   hunk_ids: &[String],
2100) -> Vec<AmbiguousFileBinding> {
2101   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2102
2103   ambiguous_files
2104      .iter()
2105      .filter_map(|file| {
2106         let matching_hunks: Vec<String> = file
2107            .hunk_ids
2108            .iter()
2109            .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2110            .cloned()
2111            .collect();
2112
2113         (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2114            file_id:             file.file_id.clone(),
2115            path:                file.path.clone(),
2116            candidate_group_ids: file.candidate_group_ids.clone(),
2117            hunk_ids:            matching_hunks,
2118         })
2119      })
2120      .collect()
2121}
2122
2123fn chunk_ambiguous_files(
2124   ambiguous_files: &[AmbiguousFileBinding],
2125) -> Vec<Vec<AmbiguousFileBinding>> {
2126   if ambiguous_files.is_empty() {
2127      return Vec::new();
2128   }
2129
2130   let mut batches = Vec::new();
2131   let mut current_batch = Vec::new();
2132   let mut current_hunk_count = 0_usize;
2133
2134   for file in ambiguous_files {
2135      let file_hunk_count = file.hunk_ids.len();
2136      let should_split = !current_batch.is_empty()
2137         && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2138            || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2139
2140      if should_split {
2141         batches.push(current_batch);
2142         current_batch = Vec::new();
2143         current_hunk_count = 0;
2144      }
2145
2146      current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2147      current_batch.push(file.clone());
2148   }
2149
2150   if !current_batch.is_empty() {
2151      batches.push(current_batch);
2152   }
2153
2154   batches
2155}
2156
2157fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2158   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2159
2160   snapshot
2161      .hunks
2162      .iter()
2163      .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2164      .map(|hunk| hunk.hunk_id.clone())
2165      .collect()
2166}
2167
2168fn fallback_group_for_hunk(
2169   hunk_id: &str,
2170   ambiguous_files: &[AmbiguousFileBinding],
2171   group_rank: &HashMap<&str, usize>,
2172) -> Option<String> {
2173   ambiguous_files.iter().find_map(|file| {
2174      file
2175         .hunk_ids
2176         .iter()
2177         .any(|candidate| candidate == hunk_id)
2178         .then(|| {
2179            file
2180               .candidate_group_ids
2181               .iter()
2182               .min_by_key(|group_id| {
2183                  group_rank
2184                     .get(group_id.as_str())
2185                     .copied()
2186                     .unwrap_or(usize::MAX)
2187               })
2188               .cloned()
2189         })
2190   })?
2191}
2192
2193fn assign_unresolved_hunks(
2194   unresolved_hunks: &[String],
2195   assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2196   ambiguous_files: &[AmbiguousFileBinding],
2197   group_rank: &HashMap<&str, usize>,
2198) {
2199   for hunk_id in unresolved_hunks {
2200      if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2201         && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2202      {
2203         group_hunks.insert(hunk_id.clone());
2204      }
2205   }
2206}
2207
2208fn normalize_group_type(
2209   snapshot: &ComposeSnapshot,
2210   file_ids: &[String],
2211   original_type: &CommitType,
2212) -> Result<CommitType> {
2213   let dependency_only = !file_ids.is_empty()
2214      && file_ids.iter().all(|file_id| {
2215         snapshot
2216            .file_by_id(file_id)
2217            .is_some_and(|file| is_dependency_manifest(&file.path))
2218      });
2219
2220   if dependency_only && original_type.as_str() != "build" {
2221      CommitType::new("build")
2222   } else {
2223      Ok(original_type.clone())
2224   }
2225}
2226
2227fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2228   snapshot
2229      .files
2230      .iter()
2231      .filter(|file| {
2232         hunk_ids
2233            .iter()
2234            .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2235      })
2236      .map(|file| file.file_id.clone())
2237      .collect()
2238}
2239
2240fn build_redirects(
2241   intent_plan: &ComposeIntentPlan,
2242   executable_groups: &[ComposeExecutableGroup],
2243   group_rank: &HashMap<&str, usize>,
2244) -> HashMap<String, String> {
2245   let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2246      .iter()
2247      .filter(|group| !group.hunk_ids.is_empty())
2248      .map(|group| (group.group_id.as_str(), group))
2249      .collect();
2250
2251   let mut redirects = HashMap::new();
2252   for group in &intent_plan.groups {
2253      if surviving_groups.contains_key(group.group_id.as_str()) {
2254         continue;
2255      }
2256
2257      let redirect = executable_groups
2258         .iter()
2259         .filter(|candidate| candidate.group_id != group.group_id)
2260         .filter(|candidate| {
2261            candidate.file_ids.iter().any(|file_id| {
2262               group
2263                  .file_ids
2264                  .iter()
2265                  .any(|candidate_id| candidate_id == file_id)
2266            })
2267         })
2268         .min_by_key(|candidate| {
2269            group_rank
2270               .get(candidate.group_id.as_str())
2271               .copied()
2272               .unwrap_or(usize::MAX)
2273         })
2274         .map(|candidate| candidate.group_id.clone());
2275
2276      if let Some(redirect) = redirect {
2277         redirects.insert(group.group_id.clone(), redirect);
2278      }
2279   }
2280
2281   redirects
2282}
2283
2284fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2285   let mut current = group_id.to_string();
2286   let mut seen = HashSet::new();
2287
2288   while let Some(next) = redirects.get(&current) {
2289      if !seen.insert(current.clone()) {
2290         break;
2291      }
2292      current.clone_from(next);
2293   }
2294
2295   current
2296}
2297
2298fn prune_empty_groups(
2299   groups: Vec<ComposeExecutableGroup>,
2300   redirects: &HashMap<String, String>,
2301) -> Result<ComposeExecutablePlan> {
2302   let surviving_ids: HashSet<String> = groups
2303      .iter()
2304      .filter(|group| !group.hunk_ids.is_empty())
2305      .map(|group| group.group_id.clone())
2306      .collect();
2307
2308   let mut surviving_groups = Vec::new();
2309   for mut group in groups {
2310      if group.hunk_ids.is_empty() {
2311         continue;
2312      }
2313
2314      let mut rewritten_dependencies = Vec::new();
2315      for dependency in &group.dependencies {
2316         let rewritten = resolve_redirect(dependency, redirects);
2317         if rewritten != group.group_id
2318            && surviving_ids.contains(&rewritten)
2319            && !rewritten_dependencies
2320               .iter()
2321               .any(|existing| existing == &rewritten)
2322         {
2323            rewritten_dependencies.push(rewritten);
2324         }
2325      }
2326
2327      group.dependencies = rewritten_dependencies;
2328      surviving_groups.push(group);
2329   }
2330
2331   let dependency_order = compute_dependency_order(
2332      &surviving_groups,
2333      |group| &group.group_id,
2334      |group| &group.dependencies,
2335   )?;
2336   Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2337}
2338
2339fn finalize_executable_plan(
2340   snapshot: &ComposeSnapshot,
2341   intent_plan: &ComposeIntentPlan,
2342   assigned_by_group: HashMap<String, BTreeSet<String>>,
2343) -> Result<ComposeExecutablePlan> {
2344   let group_rank: HashMap<&str, usize> = intent_plan
2345      .dependency_order
2346      .iter()
2347      .enumerate()
2348      .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2349      .collect();
2350
2351   let mut executable_groups = Vec::new();
2352   for group in &intent_plan.groups {
2353      let hunk_ids: Vec<String> = snapshot
2354         .hunks
2355         .iter()
2356         .filter(|hunk| {
2357            assigned_by_group
2358               .get(&group.group_id)
2359               .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2360         })
2361         .map(|hunk| hunk.hunk_id.clone())
2362         .collect();
2363
2364      let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2365      let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2366      executable_groups.push(ComposeExecutableGroup {
2367         group_id: group.group_id.clone(),
2368         commit_type,
2369         scope: group.scope.clone(),
2370         file_ids,
2371         rationale: group.rationale.clone(),
2372         dependencies: group.dependencies.clone(),
2373         hunk_ids,
2374      });
2375   }
2376
2377   let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2378   prune_empty_groups(executable_groups, &redirects)
2379}
2380
2381fn validate_executable_plan(
2382   snapshot: &ComposeSnapshot,
2383   plan: &ComposeExecutablePlan,
2384) -> Result<()> {
2385   if plan.groups.is_empty() {
2386      return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2387   }
2388
2389   let known_hunks: HashSet<&str> = snapshot
2390      .hunks
2391      .iter()
2392      .map(|hunk| hunk.hunk_id.as_str())
2393      .collect();
2394   let known_files: HashSet<&str> = snapshot
2395      .files
2396      .iter()
2397      .map(|file| file.file_id.as_str())
2398      .collect();
2399   let mut coverage = HashMap::<String, String>::new();
2400
2401   for group in &plan.groups {
2402      if group.hunk_ids.is_empty() {
2403         return Err(CommitGenError::Other(format!(
2404            "Compose group {} ended up empty after binding",
2405            group.group_id
2406         )));
2407      }
2408
2409      for file_id in &group.file_ids {
2410         if !known_files.contains(file_id.as_str()) {
2411            return Err(CommitGenError::Other(format!(
2412               "Compose group {} references unknown file_id {}",
2413               group.group_id, file_id
2414            )));
2415         }
2416      }
2417
2418      for hunk_id in &group.hunk_ids {
2419         if !known_hunks.contains(hunk_id.as_str()) {
2420            return Err(CommitGenError::Other(format!(
2421               "Compose group {} references unknown hunk_id {}",
2422               group.group_id, hunk_id
2423            )));
2424         }
2425
2426         if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2427            return Err(CommitGenError::Other(format!(
2428               "Hunk {} was assigned to both {} and {}",
2429               hunk_id, existing_group, group.group_id
2430            )));
2431         }
2432      }
2433   }
2434
2435   let missing_hunks: Vec<String> = snapshot
2436      .hunks
2437      .iter()
2438      .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2439      .map(|hunk| hunk.hunk_id.clone())
2440      .collect();
2441   if !missing_hunks.is_empty() {
2442      return Err(CommitGenError::Other(format!(
2443         "Compose plan left hunks unassigned: {}",
2444         missing_hunks.join(", ")
2445      )));
2446   }
2447
2448   let dependency_order =
2449      compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2450   if dependency_order != plan.dependency_order {
2451      return Err(CommitGenError::Other(
2452         "Compose dependency order does not match recomputed order".to_string(),
2453      ));
2454   }
2455
2456   Ok(())
2457}
2458
2459#[tracing::instrument(target = "lgit", name = "compose.bind_plan", skip_all, fields(file_count = snapshot.files.len(), group_count = intent_plan.groups.len()))]
2460async fn bind_compose_plan(
2461   snapshot: &ComposeSnapshot,
2462   intent_plan: &ComposeIntentPlan,
2463   config: &CommitConfig,
2464   debug_dir: Option<&Path>,
2465) -> Result<ComposeExecutablePlan> {
2466   let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2467
2468   if !ambiguous_files.is_empty() {
2469      let valid_group_ids: HashSet<&str> = intent_plan
2470         .groups
2471         .iter()
2472         .map(|group| group.group_id.as_str())
2473         .collect();
2474      let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2475      let mut unresolved = Vec::new();
2476
2477      for (batch_idx, batch) in binding_batches.iter().enumerate() {
2478         let hunk_context = ambiguous_hunk_context(batch);
2479         let debug_name = if binding_batches.len() == 1 {
2480            "compose_bind".to_string()
2481         } else {
2482            format!("compose_bind_{:02}", batch_idx + 1)
2483         };
2484         let assignments =
2485            request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2486               .await?;
2487         let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2488         for (group_id, hunk_ids) in evaluation.assigned {
2489            let entry = assigned_by_group.entry(group_id).or_default();
2490            for hunk_id in hunk_ids {
2491               entry.insert(hunk_id);
2492            }
2493         }
2494         unresolved.extend(evaluation.unresolved);
2495      }
2496
2497      let group_rank: HashMap<&str, usize> = intent_plan
2498         .dependency_order
2499         .iter()
2500         .enumerate()
2501         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2502         .collect();
2503
2504      let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2505      if !unresolved.is_empty() {
2506         let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2507         let repair_batches = chunk_ambiguous_files(&unresolved_files);
2508         let mut repair_unresolved = Vec::new();
2509
2510         for (batch_idx, batch) in repair_batches.iter().enumerate() {
2511            let debug_name = if repair_batches.len() == 1 {
2512               "compose_bind_repair".to_string()
2513            } else {
2514               format!("compose_bind_repair_{:02}", batch_idx + 1)
2515            };
2516            let repair_assignments = request_binding(
2517               snapshot,
2518               &intent_plan.groups,
2519               batch,
2520               config,
2521               debug_dir,
2522               &debug_name,
2523            )
2524            .await?;
2525            let repair_context = ambiguous_hunk_context(batch);
2526            let repair =
2527               evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2528            for (group_id, hunk_ids) in repair.assigned {
2529               let entry = assigned_by_group.entry(group_id).or_default();
2530               for hunk_id in hunk_ids {
2531                  entry.insert(hunk_id);
2532               }
2533            }
2534
2535            repair_unresolved.extend(repair.unresolved);
2536         }
2537         unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2538
2539         if !unresolved.is_empty() {
2540            assign_unresolved_hunks(
2541               &unresolved,
2542               &mut assigned_by_group,
2543               &ambiguous_files,
2544               &group_rank,
2545            );
2546         }
2547      }
2548   }
2549
2550   let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2551   validate_executable_plan(snapshot, &plan)?;
2552   Ok(plan)
2553}
2554
2555fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2556   println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2557   for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2558      let group = &plan.groups[group_idx];
2559      let scope = group
2560         .scope
2561         .as_ref()
2562         .map(|scope| format!("({})", style::scope(scope.as_str())))
2563         .unwrap_or_default();
2564
2565      println!(
2566         "\n{}. {} [{}{}] {}",
2567         display_idx + 1,
2568         style::bold(&group.group_id),
2569         style::commit_type(group.commit_type.as_str()),
2570         scope,
2571         group.rationale
2572      );
2573
2574      println!("   Files:");
2575      for file_id in &group.file_ids {
2576         if let Some(file) = snapshot.file_by_id(file_id) {
2577            let selected_hunk_ids: Vec<&str> = group
2578               .hunk_ids
2579               .iter()
2580               .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2581               .map(String::as_str)
2582               .collect();
2583            let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2584               "all hunks".to_string()
2585            } else {
2586               selected_hunk_ids.join(", ")
2587            };
2588            println!("     - {} {} ({selection})", file.file_id, file.path);
2589         }
2590      }
2591
2592      if !group.dependencies.is_empty() {
2593         println!("   Depends on: {}", group.dependencies.join(", "));
2594      }
2595   }
2596}
2597
2598#[tracing::instrument(target = "lgit", name = "compose.generate_group_analysis", skip_all, fields(group_id = %group.group_id, diff_bytes = diff.len(), stat_bytes = stat.len()))]
2599async fn generate_compose_group_analysis(
2600   stat: &str,
2601   diff: &str,
2602   group: &ComposeExecutableGroup,
2603   config: &CommitConfig,
2604   args: &Args,
2605   debug_prefix: &str,
2606   counter: &TokenCounter,
2607) -> Result<ConventionalAnalysis> {
2608   match compose_analysis_strategy(diff, config, counter) {
2609      ComposeAnalysisStrategy::MapReduce => {
2610         println!(
2611            "  {}",
2612            style::info(&format!(
2613               "Using map-reduce for {} commit analysis (diff exceeds token budget)",
2614               group.group_id
2615            ))
2616         );
2617         run_map_reduce(diff, stat, "", &config.analysis_model, config, counter).await
2618      },
2619      strategy => {
2620         let analysis_diff = if strategy == ComposeAnalysisStrategy::SmartTruncate {
2621            eprintln!(
2622               "  {}",
2623               style::warning(&format!(
2624                  "Truncating diff for {} commit analysis (diff exceeds configured budget)",
2625                  group.group_id
2626               ))
2627            );
2628            Cow::Owned(smart_truncate_diff(
2629               diff,
2630               compose_truncation_length(config),
2631               config,
2632               counter,
2633            ))
2634         } else {
2635            Cow::Borrowed(diff)
2636         };
2637
2638         let ctx = AnalysisContext {
2639            user_context:    Some(&group.rationale),
2640            recent_commits:  None,
2641            common_scopes:   None,
2642            project_context: None,
2643            debug_output:    args.debug_output.as_deref(),
2644            debug_prefix:    Some(debug_prefix),
2645         };
2646
2647         generate_conventional_analysis(
2648            stat,
2649            analysis_diff.as_ref(),
2650            &config.analysis_model,
2651            "",
2652            &ctx,
2653            config,
2654         )
2655         .await
2656      },
2657   }
2658}
2659
2660fn compose_group_file_list(snapshot: &ComposeSnapshot, group: &ComposeExecutableGroup) -> String {
2661   let files: Vec<&str> = group
2662      .file_ids
2663      .iter()
2664      .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.as_str()))
2665      .collect();
2666
2667   if files.is_empty() {
2668      "no files resolved".to_string()
2669   } else {
2670      files.join(", ")
2671   }
2672}
2673
2674/// Hunk ids for `file_id` planned by every group up to and including the group
2675/// at `position` in the dependency order. Used to reconstruct a file's intended
2676/// index content at a given commit from its base, independent of apply order.
2677fn cumulative_file_hunk_ids(
2678   plan: &ComposeExecutablePlan,
2679   position: usize,
2680   snapshot: &ComposeSnapshot,
2681   file_id: &str,
2682) -> Vec<String> {
2683   let mut hunk_ids = Vec::new();
2684   for &group_idx in plan.dependency_order.iter().take(position + 1) {
2685      let Some(group) = plan.groups.get(group_idx) else {
2686         continue;
2687      };
2688      for hunk_id in &group.hunk_ids {
2689         if snapshot
2690            .hunk_by_id(hunk_id)
2691            .is_some_and(|hunk| hunk.file_id == file_id)
2692         {
2693            hunk_ids.push(hunk_id.clone());
2694         }
2695      }
2696   }
2697   hunk_ids
2698}
2699
2700#[tracing::instrument(target = "lgit", name = "compose.execute", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2701pub async fn execute_compose(
2702   snapshot: &ComposeSnapshot,
2703   plan: &ComposeExecutablePlan,
2704   config: &CommitConfig,
2705   args: &Args,
2706   base_state: &ComposeBaseState,
2707) -> Result<Vec<String>> {
2708   let total = plan.dependency_order.len();
2709
2710   // Phase 1: derive each group's diff/stat from the immutable compose snapshot.
2711   // This avoids mutating the index while commit messages are prepared and keeps
2712   // later worktree edits out of already-planned commits.
2713   let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2714   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2715      let group = &plan.groups[group_idx];
2716      println!(
2717         "  {}",
2718         style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total))
2719      );
2720      let group_patch = create_executable_group_patch(snapshot, group)?;
2721      group_diff_stats.push((group_patch.diff, group_patch.stat));
2722   }
2723
2724   // Phase 2: generate commit messages concurrently. Both LLM calls per group
2725   // (analysis + summary) run inside a single async task so the slower of the
2726   // two does not block other groups from progressing.
2727   println!(
2728      "{}",
2729      style::info(&format!(
2730         "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2731         COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2732      ))
2733   );
2734
2735   let token_counter = create_token_counter(config);
2736   let prepared_messages: Vec<(Vec<String>, CommitSummary)> =
2737      stream::iter(plan.dependency_order.iter().enumerate())
2738         .map(|(idx, &group_idx)| {
2739            let group = &plan.groups[group_idx];
2740            let (diff, stat) = &group_diff_stats[idx];
2741            let debug_prefix = format!("compose-{}", idx + 1);
2742            let token_counter = &token_counter;
2743            async move {
2744               let result = async {
2745                  let analysis = generate_compose_group_analysis(
2746                     stat,
2747                     diff,
2748                     group,
2749                     config,
2750                     args,
2751                     &debug_prefix,
2752                     token_counter,
2753                  )
2754                  .await?;
2755                  let body = analysis.body_texts();
2756                  let summary = generate_summary_from_analysis(
2757                     stat,
2758                     group.commit_type.as_str(),
2759                     group.scope.as_ref().map(|scope| scope.as_str()),
2760                     &body,
2761                     Some(&group.rationale),
2762                     config,
2763                     args.debug_output.as_deref(),
2764                     Some(&debug_prefix),
2765                  )
2766                  .await?;
2767                  Ok::<_, CommitGenError>((body, summary))
2768               }
2769               .await;
2770
2771               result.map_err(|source| CommitGenError::ComposeMessageError {
2772                  group_id: group.group_id.clone(),
2773                  files:    compose_group_file_list(snapshot, group),
2774                  source:   Box::new(source),
2775               })
2776            }
2777         })
2778         .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2779         .collect::<Vec<_>>()
2780         .await
2781         .into_iter()
2782         .collect::<Result<Vec<_>>>()?;
2783
2784   execute_compose_with_prepared_messages(
2785      snapshot,
2786      plan,
2787      config,
2788      args,
2789      base_state,
2790      prepared_messages,
2791   )
2792}
2793
2794#[tracing::instrument(target = "lgit", name = "compose.execute_prepared_messages", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2795fn execute_compose_with_prepared_messages(
2796   snapshot: &ComposeSnapshot,
2797   plan: &ComposeExecutablePlan,
2798   config: &CommitConfig,
2799   args: &Args,
2800   base_state: &ComposeBaseState,
2801   prepared_messages: Vec<(Vec<String>, CommitSummary)>,
2802) -> Result<Vec<String>> {
2803   let dir = &args.dir;
2804   let total = plan.dependency_order.len();
2805   if args.compose_preview {
2806      return Ok(Vec::new());
2807   }
2808
2809   let index = TempGitIndex::new(dir)?;
2810   read_tree_into_index(index.path(), &base_state.head_hash, dir)?;
2811
2812   let mut commit_hashes = Vec::new();
2813   let mut parent_hash = base_state.head_hash.clone();
2814
2815   // Phase 3: sequential commit-object loop. Re-stage each group into an
2816   // isolated temporary index, then create commit objects parented in memory.
2817   // The real branch and index are not updated until every group succeeds.
2818   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2819      let group = &plan.groups[group_idx];
2820
2821      println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2822      println!("  Type: {}", style::commit_type(group.commit_type.as_str()));
2823      if let Some(scope) = &group.scope {
2824         println!("  Scope: {}", style::scope(scope.as_str()));
2825      }
2826      let paths: Vec<String> = group
2827         .file_ids
2828         .iter()
2829         .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2830         .collect();
2831      println!("  Files: {}", paths.join(", "));
2832
2833      let outcome = stage_executable_group_in_index(snapshot, group, dir, index.path())?;
2834      let mut staged_anything = outcome.result == StageResult::Staged;
2835
2836      // Any file whose planned patch no longer applies against the temporary
2837      // index is reconstructed from the immutable snapshot base and cumulative
2838      // hunk selection. The real index and worktree are never touched here.
2839      for skipped in &outcome.skipped {
2840         let Some(file) = snapshot.file_by_path(&skipped.path) else {
2841            continue;
2842         };
2843         let cumulative = cumulative_file_hunk_ids(plan, idx, snapshot, &file.file_id);
2844         force_stage_file_from_base_in_index(
2845            snapshot,
2846            &file.file_id,
2847            &cumulative,
2848            dir,
2849            index.path(),
2850         )?;
2851         staged_anything = true;
2852         eprintln!(
2853            "  {}",
2854            style::info(&format!(
2855               "Re-staged {} from base via splice (whole-file apply not used for partial hunks)",
2856               skipped.path
2857            ))
2858         );
2859      }
2860
2861      if !staged_anything {
2862         eprintln!(
2863            "  {}",
2864            style::warning(&format!(
2865               "Skipping commit {}: its planned patch is already applied ({:?})",
2866               group.group_id, outcome.result
2867            ))
2868         );
2869         continue;
2870      }
2871
2872      let (analysis_body, summary) = prepared_messages[idx].clone();
2873      let mut commit = ConventionalCommit {
2874         commit_type: group.commit_type.clone(),
2875         scope: group.scope.clone(),
2876         summary,
2877         body: analysis_body,
2878         footers: vec![],
2879      };
2880      post_process_commit_message(&mut commit, config);
2881
2882      if let Err(err) = validate_commit_message(&commit, config) {
2883         eprintln!(
2884            "  {}",
2885            style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2886         );
2887      }
2888
2889      let mut formatted_message = format_commit_message(&commit);
2890      if args.signoff || config.signoff {
2891         formatted_message = append_signoff_trailer(&formatted_message, dir)?;
2892      }
2893      println!(
2894         "  Message:\n{}",
2895         formatted_message
2896            .lines()
2897            .take(3)
2898            .collect::<Vec<_>>()
2899            .join("\n")
2900      );
2901
2902      let tree = write_index_tree(index.path(), dir)?;
2903      let sign = args.sign || config.gpg_sign;
2904      let hash = commit_tree(&tree, &parent_hash, &formatted_message, dir, sign)?;
2905      parent_hash.clone_from(&hash);
2906      commit_hashes.push(hash);
2907
2908      if args.compose_test_after_each {
2909         return Err(CommitGenError::Other(
2910            "--compose-test-after-each is incompatible with isolated compose execution".to_string(),
2911         ));
2912      }
2913   }
2914
2915   if commit_hashes.is_empty() {
2916      return Ok(commit_hashes);
2917   }
2918
2919   let current_index_tree = write_real_index_tree(dir)?;
2920   if current_index_tree != base_state.index_tree {
2921      return Err(CommitGenError::Other(
2922         "Real git index changed during compose; aborting before updating HEAD".to_string(),
2923      ));
2924   }
2925
2926   update_ref_checked(&base_state.head_ref, &parent_hash, &base_state.head_hash, dir)?;
2927   reset_mixed_to(&parent_hash, dir)?;
2928
2929   Ok(commit_hashes)
2930}
2931
2932#[tracing::instrument(target = "lgit", name = "compose.run", skip_all, fields(dir = %args.dir, max_rounds = config.compose_max_rounds))]
2933pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2934   let max_rounds = config.compose_max_rounds;
2935
2936   for round in 1..=max_rounds {
2937      if round > 1 {
2938         println!(
2939            "\n{}",
2940            style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2941         );
2942      } else {
2943         println!("{}", style::section_header("Compose Mode", 80));
2944      }
2945      println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2946
2947      run_compose_round(args, config, round).await?;
2948
2949      if args.compose_preview {
2950         break;
2951      }
2952
2953      match get_compose_diff(&args.dir) {
2954         Err(CommitGenError::NoChanges { .. }) => {
2955            println!(
2956               "\n{}",
2957               style::success(&format!(
2958                  "{} All changes committed successfully",
2959                  style::icons::SUCCESS
2960               ))
2961            );
2962            break;
2963         },
2964         Err(err) => return Err(err),
2965         Ok(remaining_diff) => {
2966            eprintln!(
2967               "\n{}",
2968               style::warning(&format!(
2969                  "{} Uncommitted changes remain after round {round}",
2970                  style::icons::WARNING
2971               ))
2972            );
2973            eprintln!("{remaining_diff}");
2974         },
2975      }
2976
2977      if round < max_rounds {
2978         eprintln!("{}", style::info("Starting another compose round..."));
2979      } else {
2980         eprintln!(
2981            "{}",
2982            style::warning(&format!(
2983               "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
2984            ))
2985         );
2986      }
2987   }
2988
2989   Ok(())
2990}
2991
2992#[tracing::instrument(target = "lgit", name = "compose.round", skip_all, fields(dir = %args.dir, round))]
2993async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
2994   let base_state = capture_compose_base_state(&args.dir)?;
2995   let diff = get_compose_diff(&args.dir)?;
2996   let stat = get_compose_stat(&args.dir)?;
2997   let snapshot = build_compose_snapshot(&diff, &stat)?;
2998
2999   if let Some(debug_dir) = args.debug_output.as_deref() {
3000      save_debug_artifact(
3001         Some(debug_dir),
3002         &format!("compose_round_{round}_snapshot.json"),
3003         &snapshot,
3004      )?;
3005   }
3006
3007   let token_counter = create_token_counter(config);
3008   let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
3009      println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
3010      observe_diff_files(&snapshot.diff, &config.summary_model, config, &token_counter).await?
3011   } else {
3012      if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
3013         && should_use_map_reduce(&snapshot.diff, config, &token_counter)
3014      {
3015         println!(
3016            "{}",
3017            style::info(
3018               "Skipping per-file observations for very large compose snapshot; using area-level \
3019                planning instead."
3020            )
3021         );
3022      }
3023      Vec::new()
3024   };
3025
3026   if let Some(debug_dir) = args.debug_output.as_deref()
3027      && !observations.is_empty()
3028   {
3029      save_debug_artifact(
3030         Some(debug_dir),
3031         &format!("compose_round_{round}_observations.json"),
3032         &observations,
3033      )?;
3034   }
3035
3036   let max_commits = args.compose_max_commits.unwrap_or(20);
3037   let executable_plan = if let Some(cached_plan) =
3038      load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
3039   {
3040      println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
3041      cached_plan
3042   } else {
3043      println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
3044      let intent_plan = analyze_compose_intent(
3045         &snapshot,
3046         &observations,
3047         config,
3048         max_commits,
3049         args.debug_output.as_deref(),
3050      )
3051      .await?;
3052
3053      if let Some(debug_dir) = args.debug_output.as_deref() {
3054         save_debug_artifact(
3055            Some(debug_dir),
3056            &format!("compose_round_{round}_intent_plan.json"),
3057            &intent_plan,
3058         )?;
3059      }
3060
3061      println!("{}", style::info("Binding hunks to groups..."));
3062      let plan =
3063         bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
3064      save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
3065      plan
3066   };
3067
3068   if let Some(debug_dir) = args.debug_output.as_deref() {
3069      save_debug_artifact(
3070         Some(debug_dir),
3071         &format!("compose_round_{round}_executable_plan.json"),
3072         &executable_plan,
3073      )?;
3074   }
3075
3076   print_executable_plan(&snapshot, &executable_plan);
3077
3078   if args.compose_preview {
3079      println!(
3080         "\n{}",
3081         style::success(&format!(
3082            "{} Preview complete (use --compose without --compose-preview to execute)",
3083            style::icons::SUCCESS
3084         ))
3085      );
3086      return Ok(());
3087   }
3088
3089   println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
3090   let hashes = execute_compose(&snapshot, &executable_plan, config, args, &base_state).await?;
3091   println!(
3092      "{}",
3093      style::success(&format!(
3094         "{} Round {round}: Created {} commit(s)",
3095         style::icons::SUCCESS,
3096         hashes.len()
3097      ))
3098   );
3099   Ok(())
3100}
3101
3102#[cfg(test)]
3103mod tests {
3104   use std::{fmt::Write, fs};
3105
3106   use tempfile::TempDir;
3107
3108   use super::*;
3109   use crate::{config::CommitConfig, patch::build_compose_snapshot, types::CommitType};
3110
3111   fn shared_file_diff() -> (&'static str, &'static str) {
3112      (
3113         r#"diff --git a/src/lib.rs b/src/lib.rs
3114index 1111111..2222222 100644
3115--- a/src/lib.rs
3116+++ b/src/lib.rs
3117@@ -1,3 +1,3 @@
3118-fn alpha() {
3119+fn alpha_changed() {
3120     println!("alpha");
3121 }
3122@@ -12,3 +12,3 @@
3123-fn beta() {
3124+fn beta_changed() {
3125     println!("beta");
3126 }
3127diff --git a/tests/lib.rs b/tests/lib.rs
3128index 3333333..4444444 100644
3129--- a/tests/lib.rs
3130+++ b/tests/lib.rs
3131@@ -1,3 +1,4 @@
3132 fn test_it() {
3133+    assert!(true);
3134 }
3135"#,
3136         " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
3137      )
3138   }
3139
3140   fn build_test_snapshot() -> ComposeSnapshot {
3141      let (diff, stat) = shared_file_diff();
3142      build_compose_snapshot(diff, stat).unwrap()
3143   }
3144
3145   fn write_file(dir: &TempDir, path: &str, contents: &str) {
3146      let full_path = dir.path().join(path);
3147      if let Some(parent) = full_path.parent() {
3148         fs::create_dir_all(parent).unwrap();
3149      }
3150      fs::write(full_path, contents).unwrap();
3151   }
3152
3153   fn run_git(dir: &TempDir, args: &[&str]) -> String {
3154      let output = crate::git::git_command()
3155         .args(args)
3156         .current_dir(dir.path())
3157         .output()
3158         .unwrap_or_else(|err| panic!("git {args:?} failed to spawn: {err}"));
3159
3160      assert!(
3161         output.status.success(),
3162         "git {:?} failed: stdout={} stderr={}",
3163         args,
3164         String::from_utf8_lossy(&output.stdout),
3165         String::from_utf8_lossy(&output.stderr)
3166      );
3167
3168      String::from_utf8_lossy(&output.stdout).to_string()
3169   }
3170
3171   fn init_repo() -> TempDir {
3172      let dir = TempDir::new().unwrap();
3173      run_git(&dir, &["init"]);
3174      run_git(&dir, &["config", "user.name", "Compose Test"]);
3175      run_git(&dir, &["config", "user.email", "compose@test.local"]);
3176      run_git(&dir, &["config", "commit.gpgsign", "false"]);
3177      dir
3178   }
3179
3180   fn commit_all(dir: &TempDir, message: &str) {
3181      run_git(dir, &["add", "."]);
3182      run_git(dir, &["commit", "-m", message]);
3183   }
3184
3185   fn canned_message(summary: &str) -> (Vec<String>, CommitSummary) {
3186      (vec![], CommitSummary::new_unchecked(summary, 128).unwrap())
3187   }
3188
3189   #[test]
3190   fn test_compose_file_category_treats_prompts_as_functional_source() {
3191      let diff = r"diff --git a/prompts/analysis/default.md b/prompts/analysis/default.md
3192index 1111111..2222222 100644
3193--- a/prompts/analysis/default.md
3194+++ b/prompts/analysis/default.md
3195@@ -1,1 +1,1 @@
3196-old prompt
3197+new prompt
3198diff --git a/system/analysis/default.md b/system/analysis/default.md
3199index 5555555..6666666 100644
3200--- a/system/analysis/default.md
3201+++ b/system/analysis/default.md
3202@@ -1,1 +1,1 @@
3203-old system
3204+new system
3205diff --git a/README.md b/README.md
3206index 3333333..4444444 100644
3207--- a/README.md
3208+++ b/README.md
3209@@ -1,1 +1,1 @@
3210-old docs
3211+new docs
3212";
3213      let snapshot = build_compose_snapshot(diff, "").unwrap();
3214      let prompt_file = snapshot
3215         .file_by_path("prompts/analysis/default.md")
3216         .unwrap();
3217      let system_file = snapshot.file_by_path("system/analysis/default.md").unwrap();
3218      let readme_file = snapshot.file_by_path("README.md").unwrap();
3219
3220      assert_eq!(compose_file_category(prompt_file), ComposeFileCategory::Prompt);
3221      assert_eq!(compose_file_category(system_file), ComposeFileCategory::Prompt);
3222      assert_eq!(compose_file_category(readme_file), ComposeFileCategory::Docs);
3223
3224      let feat_group = ComposeIntentGroup {
3225         group_id:     "G1".to_string(),
3226         commit_type:  CommitType::new("feat").unwrap(),
3227         scope:        None,
3228         file_ids:     vec![prompt_file.file_id.clone()],
3229         rationale:    "prompt behavior change".to_string(),
3230         dependencies: vec![],
3231      };
3232      assert_eq!(group_type_bonus(prompt_file, &feat_group), 10);
3233
3234      let fallback_type =
3235         fallback_commit_type_for_group(&snapshot, &[], std::slice::from_ref(&prompt_file.file_id))
3236            .unwrap();
3237      assert_eq!(fallback_type.as_str(), "refactor");
3238   }
3239
3240   fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
3241      let mut diff = String::new();
3242
3243      for file_idx in 0..file_count {
3244         let path = format!("src/module_{file_idx:03}.rs");
3245         writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3246         diff.push_str("index 1111111..2222222 100644\n");
3247         writeln!(diff, "--- a/{path}").unwrap();
3248         writeln!(diff, "+++ b/{path}").unwrap();
3249
3250         for hunk_idx in 0..hunks_per_file {
3251            let line_no = (hunk_idx * 4) + 1;
3252            writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
3253            writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
3254            writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
3255         }
3256      }
3257
3258      build_compose_snapshot(&diff, "").unwrap()
3259   }
3260
3261   fn build_multi_area_snapshot() -> ComposeSnapshot {
3262      let mut diff = String::new();
3263      let areas = [
3264         ("apps/frontend/src/server", 72),
3265         ("packages/model/src/models", 54),
3266         ("apps/daemon/src/worker", 43),
3267         (".github/workflows", 16),
3268      ];
3269
3270      for (prefix, count) in areas {
3271         for file_idx in 0..count {
3272            let path = format!("{prefix}/file_{file_idx:03}.rs");
3273            writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3274            diff.push_str("index 1111111..2222222 100644\n");
3275            writeln!(diff, "--- a/{path}").unwrap();
3276            writeln!(diff, "+++ b/{path}").unwrap();
3277            diff.push_str("@@ -1,1 +1,1 @@\n");
3278            writeln!(diff, "-old_{file_idx}").unwrap();
3279            writeln!(diff, "+new_{file_idx}").unwrap();
3280         }
3281      }
3282
3283      build_compose_snapshot(&diff, "").unwrap()
3284   }
3285
3286   fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
3287      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3288      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3289      let groups = vec![
3290         ComposeIntentGroup {
3291            group_id:     "G1".to_string(),
3292            commit_type:  CommitType::new("refactor").unwrap(),
3293            scope:        None,
3294            file_ids:     vec![source_file.file_id.clone(), test_file.file_id.clone()],
3295            rationale:    "implementation group".to_string(),
3296            dependencies: vec![],
3297         },
3298         ComposeIntentGroup {
3299            group_id:     "G2".to_string(),
3300            commit_type:  CommitType::new("refactor").unwrap(),
3301            scope:        None,
3302            file_ids:     vec![source_file.file_id.clone()],
3303            rationale:    "shared file follow-up".to_string(),
3304            dependencies: vec!["G1".to_string()],
3305         },
3306      ];
3307      let dependency_order =
3308         compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
3309            .unwrap();
3310      ComposeIntentPlan { groups, dependency_order }
3311   }
3312
3313   #[test]
3314   fn test_execute_compose_with_temp_index_applies_two_group_plan() {
3315      let dir = init_repo();
3316      write_file(&dir, "src/a.rs", "fn a() {}\n");
3317      write_file(&dir, "src/b.rs", "fn b() {}\n");
3318      commit_all(&dir, "initial");
3319      write_file(&dir, "src/a.rs", "fn a_changed() {}\n");
3320      write_file(&dir, "src/b.rs", "fn b_changed() {}\n");
3321
3322      let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3323      let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3324      let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3325      let a_file = snapshot.file_by_path("src/a.rs").unwrap();
3326      let b_file = snapshot.file_by_path("src/b.rs").unwrap();
3327      let plan = ComposeExecutablePlan {
3328         groups:           vec![
3329            ComposeExecutableGroup {
3330               group_id:     "G1".to_string(),
3331               commit_type:  CommitType::new("refactor").unwrap(),
3332               scope:        None,
3333               file_ids:     vec![a_file.file_id.clone()],
3334               rationale:    "change a".to_string(),
3335               dependencies: vec![],
3336               hunk_ids:     a_file.hunk_ids.clone(),
3337            },
3338            ComposeExecutableGroup {
3339               group_id:     "G2".to_string(),
3340               commit_type:  CommitType::new("refactor").unwrap(),
3341               scope:        None,
3342               file_ids:     vec![b_file.file_id.clone()],
3343               rationale:    "change b".to_string(),
3344               dependencies: vec!["G1".to_string()],
3345               hunk_ids:     b_file.hunk_ids.clone(),
3346            },
3347         ],
3348         dependency_order: vec![0, 1],
3349      };
3350      let config = CommitConfig::default();
3351      let args = Args {
3352         dir: dir.path().to_string_lossy().to_string(),
3353         compose: true,
3354         ..Default::default()
3355      };
3356      let base_state = capture_compose_base_state(&args.dir).unwrap();
3357
3358      let hashes = execute_compose_with_prepared_messages(
3359         &snapshot,
3360         &plan,
3361         &config,
3362         &args,
3363         &base_state,
3364         vec![canned_message("change a"), canned_message("change b")],
3365      )
3366      .unwrap();
3367
3368      assert_eq!(hashes.len(), 2);
3369      assert_eq!(get_head_hash(&args.dir).unwrap(), hashes[1]);
3370      assert!(run_git(&dir, &["diff", "--cached"]).trim().is_empty());
3371   }
3372
3373   #[test]
3374   fn test_execute_compose_failure_before_update_ref_preserves_real_index() {
3375      let dir = init_repo();
3376      write_file(&dir, "src/lib.rs", "old\n");
3377      write_file(&dir, "sentinel.txt", "base\n");
3378      commit_all(&dir, "initial");
3379      let initial_head = get_head_hash(dir.path().to_str().unwrap()).unwrap();
3380
3381      // A real change so the snapshot is valid.
3382      write_file(&dir, "src/lib.rs", "changed\n");
3383
3384      // A pre-existing staged change that MUST survive a failed compose run.
3385      write_file(&dir, "sentinel.txt", "base\nstaged sentinel\n");
3386      run_git(&dir, &["add", "sentinel.txt"]);
3387      let staged_before = run_git(&dir, &["diff", "--cached"]);
3388      assert!(staged_before.contains("staged sentinel"));
3389
3390      let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3391      let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3392      let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3393      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3394      // The plan references a hunk id that does not exist, so staging fails
3395      // before any commit object is created or any ref is updated.
3396      let plan = ComposeExecutablePlan {
3397         groups:           vec![ComposeExecutableGroup {
3398            group_id:     "G1".to_string(),
3399            commit_type:  CommitType::new("fix").unwrap(),
3400            scope:        None,
3401            file_ids:     vec![source_file.file_id.clone()],
3402            rationale:    "unstageable group".to_string(),
3403            dependencies: vec![],
3404            hunk_ids:     vec!["F999-H001".to_string()],
3405         }],
3406         dependency_order: vec![0],
3407      };
3408      let config = CommitConfig::default();
3409      let args = Args {
3410         dir: dir.path().to_string_lossy().to_string(),
3411         compose: true,
3412         ..Default::default()
3413      };
3414      let base_state = capture_compose_base_state(&args.dir).unwrap();
3415
3416      let err = execute_compose_with_prepared_messages(
3417         &snapshot,
3418         &plan,
3419         &config,
3420         &args,
3421         &base_state,
3422         vec![canned_message("unstageable group")],
3423      )
3424      .unwrap_err();
3425
3426      assert!(err.to_string().contains("unknown hunk id"));
3427      assert_eq!(get_head_hash(&args.dir).unwrap(), initial_head);
3428      assert_eq!(run_git(&dir, &["diff", "--cached"]), staged_before);
3429   }
3430
3431   #[test]
3432   fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
3433      let snapshot = build_test_snapshot();
3434      let intent_plan = build_shared_intent_plan(&snapshot);
3435      let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3436
3437      assert_eq!(ambiguous.len(), 1);
3438      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3439      let assigned_to_g1 = assigned.get("G1").unwrap();
3440      assert!(
3441         test_file
3442            .hunk_ids
3443            .iter()
3444            .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
3445         "uniquely owned file should be auto-assigned"
3446      );
3447   }
3448
3449   #[test]
3450   fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3451      let snapshot = build_test_snapshot();
3452      let intent_plan = build_shared_intent_plan(&snapshot);
3453      let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3454      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3455      let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3456      let valid_group_ids: HashSet<&str> = intent_plan
3457         .groups
3458         .iter()
3459         .map(|group| group.group_id.as_str())
3460         .collect();
3461
3462      let evaluation = evaluate_binding(
3463         &[
3464            ComposeBindingAssignment {
3465               group_id: "G1".to_string(),
3466               hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3467            },
3468            ComposeBindingAssignment {
3469               group_id: "G2".to_string(),
3470               hunk_ids: vec![source_file.hunk_ids[1].clone()],
3471            },
3472         ],
3473         &hunk_context,
3474         &valid_group_ids,
3475         &snapshot,
3476      );
3477
3478      for (group_id, hunk_ids) in evaluation.assigned {
3479         let entry = assigned.entry(group_id).or_default();
3480         for hunk_id in hunk_ids {
3481            entry.insert(hunk_id);
3482         }
3483      }
3484
3485      let group_rank: HashMap<&str, usize> = intent_plan
3486         .dependency_order
3487         .iter()
3488         .enumerate()
3489         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3490         .collect();
3491      assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3492
3493      let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3494      assert_eq!(executable_plan.groups.len(), 1);
3495      assert_eq!(executable_plan.groups[0].group_id, "G1");
3496      assert!(
3497         source_file
3498            .hunk_ids
3499            .iter()
3500            .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3501         "fallback should keep every hunk from the shared file in the surviving group"
3502      );
3503   }
3504
3505   #[test]
3506   fn test_validate_executable_plan_rejects_overlap() {
3507      let snapshot = build_test_snapshot();
3508      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3509      let executable_plan = ComposeExecutablePlan {
3510         groups:           vec![
3511            ComposeExecutableGroup {
3512               group_id:     "G1".to_string(),
3513               commit_type:  CommitType::new("refactor").unwrap(),
3514               scope:        None,
3515               file_ids:     vec![source_file.file_id.clone()],
3516               rationale:    "group one".to_string(),
3517               dependencies: vec![],
3518               hunk_ids:     vec![source_file.hunk_ids[0].clone()],
3519            },
3520            ComposeExecutableGroup {
3521               group_id:     "G2".to_string(),
3522               commit_type:  CommitType::new("refactor").unwrap(),
3523               scope:        None,
3524               file_ids:     vec![source_file.file_id.clone()],
3525               rationale:    "group two".to_string(),
3526               dependencies: vec![],
3527               hunk_ids:     vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3528            },
3529         ],
3530         dependency_order: vec![0, 1],
3531      };
3532
3533      let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3534      assert!(err.to_string().contains("assigned to both"));
3535   }
3536
3537   #[test]
3538   fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3539      let snapshot = build_test_snapshot();
3540      let planning_index = build_planning_index(&snapshot);
3541      let groups = vec![ComposeIntentGroup {
3542         group_id:     "G1".to_string(),
3543         commit_type:  CommitType::new("refactor").unwrap(),
3544         scope:        None,
3545         file_ids:     vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3546         rationale:    "normalize file references".to_string(),
3547         dependencies: vec![],
3548      }];
3549
3550      let (normalized_groups, repair_notes) =
3551         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3552
3553      assert_eq!(normalized_groups.len(), 1);
3554      assert_eq!(
3555         normalized_groups[0].file_ids,
3556         snapshot
3557            .files
3558            .iter()
3559            .map(|file| file.file_id.clone())
3560            .collect::<Vec<_>>()
3561      );
3562      assert_eq!(repair_notes.len(), 2);
3563   }
3564
3565   #[test]
3566   fn test_normalize_intent_plan_repairs_missing_files() {
3567      let snapshot = build_test_snapshot();
3568      let planning_index = build_planning_index(&snapshot);
3569      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3570      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3571      let groups = vec![ComposeIntentGroup {
3572         group_id:     "G1".to_string(),
3573         commit_type:  CommitType::new("refactor").unwrap(),
3574         scope:        None,
3575         file_ids:     vec![source_file.file_id.clone()],
3576         rationale:    "partial coverage".to_string(),
3577         dependencies: vec![],
3578      }];
3579
3580      let (normalized_groups, repair_notes) =
3581         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3582
3583      assert_eq!(normalized_groups.len(), 1);
3584      assert!(
3585         normalized_groups[0].file_ids.contains(&source_file.file_id),
3586         "existing file assignment should be preserved"
3587      );
3588      assert!(
3589         normalized_groups[0].file_ids.contains(&test_file.file_id),
3590         "missing files should be assigned to an existing group"
3591      );
3592      assert_eq!(repair_notes.len(), 1);
3593      assert!(repair_notes[0].contains(&test_file.file_id));
3594   }
3595
3596   #[test]
3597   fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3598      let snapshot = build_multi_area_snapshot();
3599      let planning_index = build_planning_index(&snapshot);
3600      let frontend_target = planning_index
3601         .targets
3602         .iter()
3603         .find(|target| target.label.starts_with("apps/frontend"))
3604         .unwrap();
3605      let model_target = planning_index
3606         .targets
3607         .iter()
3608         .find(|target| target.label.starts_with("packages/model"))
3609         .unwrap();
3610      let groups = vec![
3611         ComposeIntentGroup {
3612            group_id:     "G1".to_string(),
3613            commit_type:  CommitType::new("refactor").unwrap(),
3614            scope:        Scope::new("apps/frontend").ok(),
3615            file_ids:     vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3616            rationale:    "frontend platform updates".to_string(),
3617            dependencies: vec!["group 2".to_string(), "G1".to_string()],
3618         },
3619         ComposeIntentGroup {
3620            group_id:     "G2".to_string(),
3621            commit_type:  CommitType::new("refactor").unwrap(),
3622            scope:        Scope::new("packages/model").ok(),
3623            file_ids:     vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3624            rationale:    "model storage updates".to_string(),
3625            dependencies: vec!["F5".to_string()],
3626         },
3627      ];
3628
3629      let (normalized_groups, repair_notes) =
3630         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3631
3632      assert_eq!(normalized_groups.len(), 2);
3633      assert!(
3634         normalized_groups[0]
3635            .file_ids
3636            .iter()
3637            .all(|file_id| file_id.starts_with('F'))
3638      );
3639      assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3640      assert!(normalized_groups[1].dependencies.is_empty());
3641      assert!(
3642         repair_notes
3643            .iter()
3644            .any(|note| note.contains("Dropped unknown planning target"))
3645      );
3646      assert!(
3647         repair_notes
3648            .iter()
3649            .any(|note| note.contains("Dropped self-dependency"))
3650      );
3651      assert!(
3652         repair_notes
3653            .iter()
3654            .any(|note| note.contains("Mapped compose planner dependency"))
3655      );
3656      assert!(
3657         repair_notes
3658            .iter()
3659            .any(|note| note.contains("Dropped unknown dependency"))
3660      );
3661   }
3662
3663   #[test]
3664   fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3665      let snapshot = build_test_snapshot();
3666      let summary = render_snapshot_summary(&snapshot, &[]);
3667      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3668
3669      assert!(!summary.contains("# snapshot compacted"));
3670      for hunk_id in &source_file.hunk_ids {
3671         assert!(summary.contains(hunk_id));
3672      }
3673   }
3674
3675   #[test]
3676   fn test_render_snapshot_summary_compacts_large_snapshot() {
3677      let snapshot = build_large_snapshot(160, 4);
3678      let summary = render_snapshot_summary(&snapshot, &[]);
3679
3680      assert!(summary.contains("# snapshot compacted"));
3681      assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3682      assert!(summary.contains("F001-H001"));
3683      assert!(summary.contains("F001-H004"));
3684      assert!(!summary.contains("F001-H002"));
3685      assert!(!summary.contains("F001-H003"));
3686      assert!(summary.contains("... 2 more hunks omitted from F001"));
3687   }
3688
3689   #[test]
3690   fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3691      let snapshot = build_multi_area_snapshot();
3692      let planning_index = build_planning_index(&snapshot);
3693
3694      assert_eq!(planning_index.mode, PlanningMode::Area);
3695      assert!(planning_index.targets.len() < snapshot.files.len());
3696      assert!(
3697         planning_index
3698            .targets
3699            .iter()
3700            .any(|target| target.label.starts_with("apps/frontend"))
3701      );
3702      assert!(
3703         render_planning_stat(&planning_index).contains("planning over"),
3704         "planning stat should explain the area mode"
3705      );
3706   }
3707
3708   #[test]
3709   fn test_normalize_intent_plan_expands_area_targets() {
3710      let snapshot = build_multi_area_snapshot();
3711      let planning_index = build_planning_index(&snapshot);
3712      let midpoint = planning_index.targets.len() / 2;
3713      let first_group_targets: Vec<String> = planning_index
3714         .targets
3715         .iter()
3716         .take(midpoint)
3717         .map(|target| target.label.clone())
3718         .collect();
3719      let second_group_targets: Vec<String> = planning_index
3720         .targets
3721         .iter()
3722         .skip(midpoint)
3723         .map(|target| target.label.clone())
3724         .collect();
3725      let groups = vec![
3726         ComposeIntentGroup {
3727            group_id:     "G1".to_string(),
3728            commit_type:  CommitType::new("refactor").unwrap(),
3729            scope:        None,
3730            file_ids:     first_group_targets,
3731            rationale:    "frontend and model".to_string(),
3732            dependencies: vec![],
3733         },
3734         ComposeIntentGroup {
3735            group_id:     "G2".to_string(),
3736            commit_type:  CommitType::new("refactor").unwrap(),
3737            scope:        None,
3738            file_ids:     second_group_targets,
3739            rationale:    "daemon and ci".to_string(),
3740            dependencies: vec![],
3741         },
3742      ];
3743
3744      let (normalized_groups, repair_notes) =
3745         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3746
3747      assert_eq!(normalized_groups.len(), 2);
3748      assert!(
3749         normalized_groups
3750            .iter()
3751            .flat_map(|group| group.file_ids.iter())
3752            .all(|file_id| file_id.starts_with('F')),
3753         "area targets should expand back to concrete file IDs"
3754      );
3755      assert!(!repair_notes.is_empty());
3756      assert_eq!(
3757         normalized_groups
3758            .iter()
3759            .flat_map(|group| group.file_ids.iter())
3760            .collect::<HashSet<_>>()
3761            .len(),
3762         snapshot.files.len()
3763      );
3764   }
3765
3766   #[test]
3767   fn test_large_patch_fallback_splits_monolithic_area_plan() {
3768      let snapshot = build_multi_area_snapshot();
3769      let planning_index = build_planning_index(&snapshot);
3770      let monolithic_group = ComposeIntentGroup {
3771         group_id:     "G1".to_string(),
3772         commit_type:  CommitType::new("refactor").unwrap(),
3773         scope:        None,
3774         file_ids:     snapshot
3775            .files
3776            .iter()
3777            .map(|file| file.file_id.clone())
3778            .collect(),
3779         rationale:    "repo-wide refactor".to_string(),
3780         dependencies: vec![],
3781      };
3782
3783      assert!(should_force_large_patch_fallback(
3784         &snapshot,
3785         &planning_index,
3786         &[monolithic_group],
3787         6
3788      ));
3789
3790      let fallback_groups =
3791         build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3792      assert!(fallback_groups.len() >= 3);
3793      assert_eq!(
3794         fallback_groups
3795            .iter()
3796            .flat_map(|group| group.file_ids.iter())
3797            .collect::<HashSet<_>>()
3798            .len(),
3799         snapshot.files.len()
3800      );
3801      assert!(
3802         fallback_groups
3803            .iter()
3804            .any(|group| group.rationale.contains("frontend")),
3805         "fallback should preserve workstream identity"
3806      );
3807   }
3808
3809   #[test]
3810   fn test_should_collect_compose_observations_skips_area_mode() {
3811      let snapshot = build_large_snapshot(160, 4);
3812      let config = CommitConfig { map_reduce_threshold: 1_000, ..Default::default() };
3813      let counter = create_token_counter(&config);
3814
3815      assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3816      assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3817   }
3818
3819   #[test]
3820   fn test_compose_analysis_strategy_uses_map_reduce_for_large_diff() {
3821      let config = CommitConfig { map_reduce_threshold: 20, ..Default::default() };
3822      let counter = create_token_counter(&config);
3823      let payload = "a".repeat(200);
3824      let diff = format!("diff --git a/a.rs b/a.rs\n@@ -0,0 +1 @@\n+{payload}");
3825
3826      assert_eq!(
3827         compose_analysis_strategy(&diff, &config, &counter),
3828         ComposeAnalysisStrategy::MapReduce
3829      );
3830   }
3831
3832   #[test]
3833   fn test_compose_analysis_strategy_truncates_when_map_reduce_disabled() {
3834      let config = CommitConfig {
3835         map_reduce_enabled: false,
3836         max_diff_tokens: 1,
3837         max_diff_length: 10_000,
3838         ..Default::default()
3839      };
3840      let counter = create_token_counter(&config);
3841      assert_eq!(compose_truncation_length(&config), 4);
3842
3843      assert_eq!(
3844         compose_analysis_strategy(
3845            "diff --git a/models.json b/models.json\n+large",
3846            &config,
3847            &counter
3848         ),
3849         ComposeAnalysisStrategy::SmartTruncate
3850      );
3851   }
3852
3853   #[test]
3854   fn test_compose_analysis_strategy_keeps_small_group_direct() {
3855      let config = CommitConfig {
3856         map_reduce_threshold: 1_000,
3857         max_diff_tokens: 1_000,
3858         max_diff_length: 10_000,
3859         ..Default::default()
3860      };
3861      let counter = create_token_counter(&config);
3862
3863      assert_eq!(
3864         compose_analysis_strategy("diff --git a/a.rs b/a.rs\n+a", &config, &counter),
3865         ComposeAnalysisStrategy::Direct
3866      );
3867   }
3868
3869   #[test]
3870   fn test_chunk_ambiguous_files_splits_large_binding_request() {
3871      let ambiguous_files = vec![
3872         AmbiguousFileBinding {
3873            file_id:             "F001".to_string(),
3874            path:                "src/alpha.rs".to_string(),
3875            candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3876            hunk_ids:            (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3877         },
3878         AmbiguousFileBinding {
3879            file_id:             "F002".to_string(),
3880            path:                "src/beta.rs".to_string(),
3881            candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3882            hunk_ids:            (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3883         },
3884         AmbiguousFileBinding {
3885            file_id:             "F003".to_string(),
3886            path:                "src/gamma.rs".to_string(),
3887            candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3888            hunk_ids:            (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3889         },
3890      ];
3891
3892      let batches = chunk_ambiguous_files(&ambiguous_files);
3893      let total_hunks: usize = batches
3894         .iter()
3895         .flatten()
3896         .map(|file| file.hunk_ids.len())
3897         .sum();
3898
3899      assert_eq!(batches.len(), 2);
3900      assert_eq!(batches[0].len(), 1);
3901      assert_eq!(batches[1].len(), 2);
3902      assert_eq!(total_hunks, 140);
3903      assert!(batches.iter().all(|batch| {
3904         batch.len() <= MAX_BIND_FILES_PER_REQUEST
3905            && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3906               <= MAX_BIND_HUNKS_PER_REQUEST
3907      }));
3908   }
3909}