Skip to main content

llm_git/
compose.rs

1use std::{
2   borrow::Cow,
3   collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4   fmt::Write,
5   fs,
6   path::{Path, PathBuf},
7};
8
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13   api::{
14      AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
15      generate_summary_from_analysis, run_oneshot, strict_json_schema,
16   },
17   compose_types::{
18      ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
19      ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
20   },
21   config::CommitConfig,
22   diff::smart_truncate_diff,
23   error::{CommitGenError, Result},
24   git::{
25      TempGitIndex, append_signoff_trailer, commit_tree, current_head_ref, get_compose_diff,
26      get_compose_stat, get_git_dir, get_head_hash, read_tree_into_index, reset_mixed_to,
27      reset_paths_to, update_ref_checked, write_index_tree, write_real_index_tree,
28   },
29   map_reduce::{FileObservation, observe_diff_files, run_map_reduce, should_use_map_reduce},
30   normalization::{format_commit_message, post_process_commit_message},
31   patch::{
32      StageResult, build_compose_snapshot, create_executable_group_patch,
33      force_stage_file_from_base_in_index, pin_snapshot_worktree_state,
34      stage_executable_group_in_index,
35   },
36   style, templates,
37   tokens::{TokenCounter, create_token_counter},
38   types::{Args, CommitSummary, CommitType, ConventionalAnalysis, ConventionalCommit, Scope},
39   validation::validate_commit_message,
40};
41
42const MAX_OBSERVATIONS_PER_FILE: usize = 3;
43const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
44const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
45const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
46const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
47const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
48const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
49const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
50const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
51const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
52const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
53const MAX_BIND_FILES_PER_REQUEST: usize = 18;
54const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
55/// Maximum number of commit messages to generate concurrently during
56/// `execute_compose`. Matches the per-file fan-out used in `map_reduce`.
57const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
58
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ComposeBaseState {
61   head_hash:  String,
62   head_ref:   String,
63   index_tree: String,
64}
65
66#[tracing::instrument(target = "lgit", name = "compose.capture_base_state", skip_all, fields(dir))]
67pub fn capture_compose_base_state(dir: &str) -> Result<ComposeBaseState> {
68   Ok(ComposeBaseState {
69      head_hash:  get_head_hash(dir)?,
70      head_ref:   current_head_ref(dir)?,
71      index_tree: write_real_index_tree(dir)?,
72   })
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76enum ComposeAnalysisStrategy {
77   Direct,
78   SmartTruncate,
79   MapReduce,
80}
81
82fn compose_analysis_strategy(
83   diff: &str,
84   config: &CommitConfig,
85   counter: &TokenCounter,
86) -> ComposeAnalysisStrategy {
87   if should_use_map_reduce(diff, config, counter) {
88      return ComposeAnalysisStrategy::MapReduce;
89   }
90
91   let diff_tokens = counter.count_sync(diff);
92   if diff.len() > config.max_diff_length || diff_tokens > config.max_diff_tokens {
93      return ComposeAnalysisStrategy::SmartTruncate;
94   }
95
96   ComposeAnalysisStrategy::Direct
97}
98
99fn compose_truncation_length(config: &CommitConfig) -> usize {
100   config
101      .max_diff_length
102      .min(config.max_diff_tokens.saturating_mul(4))
103      .max(1)
104}
105
106#[derive(Debug, Deserialize, Serialize)]
107struct ComposeIntentResponse {
108   groups: Vec<ComposeIntentGroup>,
109}
110
111#[derive(Debug, Deserialize, Serialize)]
112struct ComposeBindingResponse {
113   assignments: Vec<ComposeBindingAssignment>,
114}
115
116#[derive(Debug, Serialize, Deserialize)]
117struct ComposeCachedPlan {
118   schema_version: String,
119   cache_key:      String,
120   plan:           ComposeExecutablePlan,
121}
122
123#[derive(Debug, Clone)]
124struct AmbiguousFileBinding {
125   file_id:             String,
126   path:                String,
127   candidate_group_ids: Vec<String>,
128   hunk_ids:            Vec<String>,
129}
130
131#[derive(Debug, Clone)]
132struct AmbiguousHunkContext {
133   candidate_group_ids: Vec<String>,
134}
135
136type HunkAssignments = HashMap<String, BTreeSet<String>>;
137
138#[derive(Debug)]
139struct BindingEvaluation {
140   assigned:   HashMap<String, Vec<String>>,
141   unresolved: Vec<String>,
142}
143
144#[derive(Debug, Clone, Copy)]
145struct SnapshotSummaryBudget {
146   max_observations_per_file: usize,
147   max_hunks_per_file:        Option<usize>,
148}
149
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151enum PlanningMode {
152   File,
153   Area,
154}
155
156#[derive(Debug, Clone)]
157struct PlanningTarget {
158   target_id:  String,
159   label:      String,
160   file_ids:   Vec<String>,
161   hunk_count: usize,
162   additions:  usize,
163   deletions:  usize,
164}
165
166#[derive(Debug, Clone)]
167struct PlanningIndex {
168   mode:    PlanningMode,
169   targets: Vec<PlanningTarget>,
170   aliases: HashMap<String, String>,
171}
172
173#[derive(Debug, Clone)]
174struct PlanningBucket {
175   label:    String,
176   file_ids: Vec<String>,
177}
178
179impl PlanningIndex {
180   fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
181      let mut expanded = Vec::new();
182      let mut seen_file_ids = HashSet::new();
183
184      for target_id in target_ids {
185         if let Some(target) = self
186            .targets
187            .iter()
188            .find(|candidate| candidate.target_id == *target_id)
189         {
190            for file_id in &target.file_ids {
191               if seen_file_ids.insert(file_id.clone()) {
192                  expanded.push(file_id.clone());
193               }
194            }
195         }
196      }
197
198      expanded
199   }
200}
201
202impl SnapshotSummaryBudget {
203   const fn is_compacted(self) -> bool {
204      self.max_hunks_per_file.is_some()
205   }
206}
207
208fn is_dependency_manifest(path: &str) -> bool {
209   const DEP_MANIFESTS: &[&str] = &[
210      "Cargo.toml",
211      "Cargo.lock",
212      "package.json",
213      "package-lock.json",
214      "pnpm-lock.yaml",
215      "yarn.lock",
216      "bun.lock",
217      "bun.lockb",
218      "go.mod",
219      "go.sum",
220      "requirements.txt",
221      "Pipfile",
222      "Pipfile.lock",
223      "pyproject.toml",
224      "Gemfile",
225      "Gemfile.lock",
226      "composer.json",
227      "composer.lock",
228      "build.gradle",
229      "build.gradle.kts",
230      "gradle.properties",
231      "pom.xml",
232   ];
233
234   let path = Path::new(path);
235   let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
236      return false;
237   };
238
239   if DEP_MANIFESTS.contains(&file_name) {
240      return true;
241   }
242
243   Path::new(file_name)
244      .extension()
245      .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
246}
247
248fn save_debug_artifact<T: Serialize>(
249   debug_dir: Option<&Path>,
250   filename: &str,
251   value: &T,
252) -> Result<()> {
253   let Some(debug_dir) = debug_dir else {
254      return Ok(());
255   };
256
257   fs::create_dir_all(debug_dir)?;
258   let path = debug_dir.join(filename);
259   let json = serde_json::to_string_pretty(value)?;
260   fs::write(path, json)?;
261   Ok(())
262}
263
264fn fnv1a_64(input: &str) -> String {
265   let mut hash = 0xcbf29ce484222325_u64;
266   for byte in input.as_bytes() {
267      hash ^= u64::from(*byte);
268      hash = hash.wrapping_mul(0x100000001b3);
269   }
270   format!("{hash:016x}")
271}
272
273fn compose_plan_cache_key(
274   snapshot: &ComposeSnapshot,
275   max_commits: usize,
276   analysis_model: &str,
277) -> String {
278   fnv1a_64(&format!(
279      "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
280      snapshot.diff, snapshot.stat
281   ))
282}
283
284fn compose_plan_cache_path(
285   dir: &str,
286   snapshot: &ComposeSnapshot,
287   max_commits: usize,
288   analysis_model: &str,
289) -> Result<PathBuf> {
290   let git_dir = get_git_dir(dir)?;
291   Ok(git_dir.join("llm-git").join(format!(
292      "compose-plan-{}.json",
293      compose_plan_cache_key(snapshot, max_commits, analysis_model)
294   )))
295}
296
297fn load_cached_plan(
298   dir: &str,
299   snapshot: &ComposeSnapshot,
300   max_commits: usize,
301   analysis_model: &str,
302) -> Result<Option<ComposeExecutablePlan>> {
303   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
304   if !cache_path.exists() {
305      return Ok(None);
306   }
307
308   let content = match fs::read_to_string(&cache_path) {
309      Ok(content) => content,
310      Err(err) => {
311         eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
312         return Ok(None);
313      },
314   };
315   let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
316      Ok(cached) => cached,
317      Err(err) => {
318         eprintln!(
319            "{}",
320            style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
321         );
322         let _ = fs::remove_file(&cache_path);
323         return Ok(None);
324      },
325   };
326   let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
327
328   if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
329      return Ok(None);
330   }
331   if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
332      eprintln!(
333         "{}",
334         style::warning(&format!(
335            "Discarding cached compose plan (no longer valid for current snapshot): {err}"
336         ))
337      );
338      let _ = fs::remove_file(&cache_path);
339      return Ok(None);
340   }
341   Ok(Some(cached.plan))
342}
343
344fn save_cached_plan(
345   dir: &str,
346   snapshot: &ComposeSnapshot,
347   max_commits: usize,
348   analysis_model: &str,
349   plan: &ComposeExecutablePlan,
350) -> Result<()> {
351   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
352   if let Some(parent) = cache_path.parent() {
353      fs::create_dir_all(parent)?;
354   }
355
356   let cached = ComposeCachedPlan {
357      schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
358      cache_key:      compose_plan_cache_key(snapshot, max_commits, analysis_model),
359      plan:           plan.clone(),
360   };
361   fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
362   Ok(())
363}
364
365fn format_line_range(start: usize, count: usize) -> String {
366   match count {
367      0 => "0".to_string(),
368      1 => start.to_string(),
369      _ => format!("{start}-{}", start + count - 1),
370   }
371}
372
373const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
374   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
375      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
376   {
377      SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
378   } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
379      || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
380   {
381      SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
382   } else {
383      SnapshotSummaryBudget {
384         max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
385         max_hunks_per_file:        None,
386      }
387   }
388}
389
390fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
391   if count <= max_samples {
392      return (0..count).collect();
393   }
394
395   if max_samples <= 1 {
396      return vec![0];
397   }
398
399   let last = count - 1;
400   let mut positions = Vec::with_capacity(max_samples);
401   for slot in 0..max_samples {
402      let position = slot * last / (max_samples - 1);
403      if positions.last().copied() != Some(position) {
404         positions.push(position);
405      }
406   }
407   positions
408}
409
410fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
411   match budget.max_hunks_per_file {
412      None => file.hunk_ids.iter().map(String::as_str).collect(),
413      Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
414         .into_iter()
415         .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
416         .collect(),
417   }
418}
419
420fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
421   let budget = snapshot_summary_budget(snapshot);
422   let observations_by_file: HashMap<&str, Vec<&str>> = observations
423      .iter()
424      .map(|observation| {
425         (
426            observation.file.as_str(),
427            observation
428               .observations
429               .iter()
430               .map(String::as_str)
431               .take(budget.max_observations_per_file)
432               .collect(),
433         )
434      })
435      .collect();
436
437   let mut out = String::new();
438   if budget.is_compacted() {
439      let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
440      writeln!(
441         out,
442         "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
443          representative hunks and {} observation(s) per file",
444         budget.max_observations_per_file
445      )
446      .unwrap();
447   }
448
449   for file in &snapshot.files {
450      writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
451      if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
452         for observation in file_observations {
453            writeln!(out, "  observation: {observation}").unwrap();
454         }
455      }
456
457      let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
458      for hunk_id in &rendered_hunk_ids {
459         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
460            if hunk.synthetic {
461               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
462            } else {
463               writeln!(
464                  out,
465                  "  - {} old:{} new:{} :: {}",
466                  hunk.hunk_id,
467                  format_line_range(hunk.old_start, hunk.old_count),
468                  format_line_range(hunk.new_start, hunk.new_count),
469                  hunk.snippet
470               )
471               .unwrap();
472            }
473         }
474      }
475
476      let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
477      if omitted_hunks > 0 {
478         writeln!(out, "  ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
479      }
480   }
481
482   out
483}
484
485const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
486   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
487      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
488   {
489      PlanningMode::Area
490   } else {
491      PlanningMode::File
492   }
493}
494
495fn path_depth(path: &str) -> usize {
496   path.split('/').count()
497}
498
499fn prefix_at_depth(path: &str, depth: usize) -> String {
500   if depth == 0 {
501      return String::new();
502   }
503
504   let segments: Vec<&str> = path.split('/').collect();
505   let effective_depth = depth.min(segments.len());
506   segments[..effective_depth].join("/")
507}
508
509fn common_path_prefix(paths: &[String]) -> String {
510   let Some(first_path) = paths.first() else {
511      return String::new();
512   };
513
514   let mut prefix: Vec<&str> = first_path.split('/').collect();
515   for path in paths.iter().skip(1) {
516      let segments: Vec<&str> = path.split('/').collect();
517      let shared = prefix
518         .iter()
519         .zip(segments.iter())
520         .take_while(|(left, right)| left == right)
521         .count();
522      prefix.truncate(shared);
523      if prefix.is_empty() {
524         break;
525      }
526   }
527
528   prefix.join("/")
529}
530
531fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
532   file_ids
533      .iter()
534      .filter_map(|file_id| snapshot.file_by_id(file_id))
535      .map(|file| file.hunk_ids.len())
536      .sum()
537}
538
539fn group_file_ids_by_prefix(
540   snapshot: &ComposeSnapshot,
541   file_ids: &[String],
542   depth: usize,
543) -> BTreeMap<String, Vec<String>> {
544   let mut groups = BTreeMap::new();
545
546   for file_id in file_ids {
547      if let Some(file) = snapshot.file_by_id(file_id) {
548         groups
549            .entry(prefix_at_depth(&file.path, depth))
550            .or_insert_with(Vec::new)
551            .push(file_id.clone());
552      }
553   }
554
555   groups
556}
557
558fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
559   let paths: Vec<String> = file_ids
560      .iter()
561      .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
562      .collect();
563
564   let common_prefix = common_path_prefix(&paths);
565   if common_prefix.is_empty() {
566      paths.first().cloned().unwrap_or_else(|| "misc".to_string())
567   } else {
568      common_prefix
569   }
570}
571
572fn collect_planning_buckets(
573   snapshot: &ComposeSnapshot,
574   file_ids: &[String],
575   depth: usize,
576) -> Vec<PlanningBucket> {
577   let file_count = file_ids.len();
578   let hunk_count = bucket_hunk_count(snapshot, file_ids);
579   let max_path_depth = file_ids
580      .iter()
581      .filter_map(|file_id| snapshot.file_by_id(file_id))
582      .map(|file| path_depth(&file.path))
583      .max()
584      .unwrap_or(depth);
585
586   let should_stop =
587      file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
588   if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
589      return vec![PlanningBucket {
590         label:    planning_bucket_label(snapshot, file_ids),
591         file_ids: file_ids.to_vec(),
592      }];
593   }
594
595   let next_depth = depth + 1;
596   let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
597   if groups.len() <= 1 {
598      return collect_planning_buckets(snapshot, file_ids, next_depth);
599   }
600
601   groups
602      .into_values()
603      .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
604      .collect()
605}
606
607fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
608   let all_file_ids: Vec<String> = snapshot
609      .files
610      .iter()
611      .map(|file| file.file_id.clone())
612      .collect();
613   let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
614
615   buckets
616      .into_iter()
617      .enumerate()
618      .map(|(idx, bucket)| {
619         let mut additions = 0_usize;
620         let mut deletions = 0_usize;
621         let mut hunk_count = 0_usize;
622
623         for file_id in &bucket.file_ids {
624            if let Some(file) = snapshot.file_by_id(file_id) {
625               additions = additions.saturating_add(file.additions);
626               deletions = deletions.saturating_add(file.deletions);
627               hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
628            }
629         }
630
631         PlanningTarget {
632            target_id: format!("A{:03}", idx + 1),
633            label: bucket.label,
634            file_ids: bucket.file_ids,
635            hunk_count,
636            additions,
637            deletions,
638         }
639      })
640      .collect()
641}
642
643fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
644   snapshot
645      .files
646      .iter()
647      .map(|file| PlanningTarget {
648         target_id:  file.file_id.clone(),
649         label:      file.path.clone(),
650         file_ids:   vec![file.file_id.clone()],
651         hunk_count: file.hunk_ids.len(),
652         additions:  file.additions,
653         deletions:  file.deletions,
654      })
655      .collect()
656}
657
658fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
659   let mode = planning_mode_for_snapshot(snapshot);
660   let targets = match mode {
661      PlanningMode::File => build_file_planning_targets(snapshot),
662      PlanningMode::Area => build_area_planning_targets(snapshot),
663   };
664
665   let aliases = targets
666      .iter()
667      .flat_map(|target| {
668         let normalized_label = normalize_file_reference(&target.label);
669         [
670            (target.target_id.clone(), target.target_id.clone()),
671            (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
672            (normalized_label, target.target_id.clone()),
673         ]
674      })
675      .collect();
676
677   PlanningIndex { mode, targets, aliases }
678}
679
680fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
681   sample_positions(target.file_ids.len(), 4)
682      .into_iter()
683      .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
684      .collect()
685}
686
687fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
688   let hunk_ids: Vec<&String> = target
689      .file_ids
690      .iter()
691      .filter_map(|file_id| snapshot.file_by_id(file_id))
692      .flat_map(|file| file.hunk_ids.iter())
693      .collect();
694
695   sample_positions(hunk_ids.len(), 4)
696      .into_iter()
697      .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
698      .collect()
699}
700
701fn render_planning_stat(index: &PlanningIndex) -> String {
702   let mut out = String::new();
703
704   match index.mode {
705      PlanningMode::File => {
706         writeln!(out, "# planning over individual file IDs").unwrap();
707      },
708      PlanningMode::Area => {
709         writeln!(
710            out,
711            "# planning over {} area IDs spanning {} files",
712            index.targets.len(),
713            index
714               .targets
715               .iter()
716               .flat_map(|target| target.file_ids.iter())
717               .collect::<HashSet<_>>()
718               .len()
719         )
720         .unwrap();
721      },
722   }
723
724   for target in &index.targets {
725      writeln!(
726         out,
727         "{} {} | {} files | {} hunks | +{}/-{}",
728         target.target_id,
729         target.label,
730         target.file_ids.len(),
731         target.hunk_count,
732         target.additions,
733         target.deletions
734      )
735      .unwrap();
736   }
737
738   out
739}
740
741fn render_planning_snapshot_summary(
742   snapshot: &ComposeSnapshot,
743   observations: &[FileObservation],
744   index: &PlanningIndex,
745) -> String {
746   if index.mode == PlanningMode::File {
747      return render_snapshot_summary(snapshot, observations);
748   }
749
750   let observations_by_file: HashMap<&str, Vec<&str>> = observations
751      .iter()
752      .map(|observation| {
753         (
754            observation.file.as_str(),
755            observation
756               .observations
757               .iter()
758               .map(String::as_str)
759               .take(1)
760               .collect(),
761         )
762      })
763      .collect();
764
765   let mut out = String::new();
766   writeln!(
767      out,
768      "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
769   )
770   .unwrap();
771
772   for target in &index.targets {
773      writeln!(
774         out,
775         "- {} {} ({} files, {} hunks, +{}/-{})",
776         target.target_id,
777         target.label,
778         target.file_ids.len(),
779         target.hunk_count,
780         target.additions,
781         target.deletions
782      )
783      .unwrap();
784
785      let sample_file_ids = sample_file_ids_for_target(target);
786      if !sample_file_ids.is_empty() {
787         let sample_files: Vec<String> = sample_file_ids
788            .iter()
789            .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
790            .collect();
791         writeln!(out, "  files: {}", sample_files.join(", ")).unwrap();
792         let omitted = target.file_ids.len().saturating_sub(sample_files.len());
793         if omitted > 0 {
794            writeln!(out, "  ... {omitted} more files omitted from {}", target.target_id).unwrap();
795         }
796      }
797
798      let mut rendered_observations = 0_usize;
799      for file_id in &target.file_ids {
800         let Some(file) = snapshot.file_by_id(file_id) else {
801            continue;
802         };
803         let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
804            continue;
805         };
806
807         for observation in file_observations {
808            writeln!(out, "  observation: {observation}").unwrap();
809            rendered_observations += 1;
810            if rendered_observations >= 2 {
811               break;
812            }
813         }
814
815         if rendered_observations >= 2 {
816            break;
817         }
818      }
819
820      for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
821         if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
822            if hunk.synthetic {
823               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
824            } else {
825               writeln!(
826                  out,
827                  "  - {} old:{} new:{} :: {}",
828                  hunk.hunk_id,
829                  format_line_range(hunk.old_start, hunk.old_count),
830                  format_line_range(hunk.new_start, hunk.new_count),
831                  hunk.snippet
832               )
833               .unwrap();
834            }
835         }
836      }
837   }
838
839   out
840}
841
842fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
843   match index.mode {
844      PlanningMode::File => format!(
845         "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
846         snapshot.files.len()
847      ),
848      PlanningMode::Area => format!(
849         "Area IDs only. Each target may expand to multiple files by shared path prefix. \
850          Coverage: {} areas spanning {} files.",
851         index.targets.len(),
852         snapshot.files.len()
853      ),
854   }
855}
856
857fn render_planning_notes(index: &PlanningIndex) -> String {
858   match index.mode {
859      PlanningMode::File => {
860         "Use only the provided file IDs and keep the grouping conservative.".to_string()
861      },
862      PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
863                             planning areas. Split along independent subsystems or workstreams \
864                             when the areas point at unrelated changes."
865         .to_string(),
866   }
867}
868
869fn render_split_bias(index: &PlanningIndex) -> String {
870   match index.mode {
871      PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
872      PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
873                             one broad group if nearly every area clearly belongs to the same \
874                             atomic change."
875         .to_string(),
876   }
877}
878
879fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
880   let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
881
882   strict_json_schema(
883      serde_json::json!({
884         "groups": {
885            "type": "array",
886            "items": {
887               "type": "object",
888               "properties": {
889                  "group_id": {
890                     "type": "string",
891                     "description": "Stable identifier like G1, G2, G3"
892                  },
893                  "file_ids": {
894                     "type": "array",
895                     "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
896                     "items": { "type": "string" }
897                  },
898                  "type": {
899                     "type": "string",
900                     "enum": type_enum,
901                     "description": "Conventional commit type for this group"
902                  },
903                  "scope": {
904                     "type": "string",
905                     "description": "Optional scope (module/component). Omit if broad."
906                  },
907                  "rationale": {
908                     "type": "string",
909                     "description": "Brief explanation of the logical change"
910                  },
911                  "dependencies": {
912                     "type": "array",
913                     "description": "Group IDs this group depends on",
914                     "items": { "type": "string" }
915                  }
916               },
917               "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
918               "additionalProperties": false
919            }
920         }
921      }),
922      &["groups"],
923   )
924}
925
926fn build_binding_schema() -> serde_json::Value {
927   strict_json_schema(
928      serde_json::json!({
929         "assignments": {
930            "type": "array",
931            "items": {
932               "type": "object",
933               "properties": {
934                  "group_id": { "type": "string" },
935                  "hunk_ids": {
936                     "type": "array",
937                     "items": { "type": "string" }
938                  }
939               },
940               "required": ["group_id", "hunk_ids"],
941               "additionalProperties": false
942            }
943         }
944      }),
945      &["assignments"],
946   )
947}
948
949fn compute_dependency_order<T, FId, FDeps>(
950   groups: &[T],
951   group_id: FId,
952   dependencies: FDeps,
953) -> Result<Vec<usize>>
954where
955   FId: Fn(&T) -> &str,
956   FDeps: Fn(&T) -> &[String],
957{
958   let mut index_by_id = HashMap::new();
959   for (idx, group) in groups.iter().enumerate() {
960      let id = group_id(group);
961      if id.trim().is_empty() {
962         return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
963      }
964      if index_by_id.insert(id.to_string(), idx).is_some() {
965         return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
966      }
967   }
968
969   let mut in_degree = vec![0_usize; groups.len()];
970   let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
971
972   for (idx, group) in groups.iter().enumerate() {
973      for dependency in dependencies(group) {
974         let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
975            CommitGenError::Other(format!(
976               "Group {} depends on unknown group_id '{}'",
977               group_id(group),
978               dependency
979            ))
980         })?;
981         if dependency_idx == idx {
982            return Err(CommitGenError::Other(format!(
983               "Group {} depends on itself",
984               group_id(group)
985            )));
986         }
987
988         adjacency[dependency_idx].push(idx);
989         in_degree[idx] += 1;
990      }
991   }
992
993   let mut queue: Vec<usize> = (0..groups.len())
994      .filter(|idx| in_degree[*idx] == 0)
995      .collect();
996   let mut order = Vec::with_capacity(groups.len());
997
998   while let Some(node) = queue.pop() {
999      order.push(node);
1000      for neighbor in &adjacency[node] {
1001         in_degree[*neighbor] -= 1;
1002         if in_degree[*neighbor] == 0 {
1003            queue.push(*neighbor);
1004         }
1005      }
1006   }
1007
1008   if order.len() != groups.len() {
1009      return Err(CommitGenError::Other(
1010         "Circular dependency detected in compose groups".to_string(),
1011      ));
1012   }
1013
1014   Ok(order)
1015}
1016
1017fn normalize_file_reference(raw_file_ref: &str) -> String {
1018   raw_file_ref
1019      .trim()
1020      .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
1021      .trim_start_matches("./")
1022      .trim_end_matches([',', ';'])
1023      .to_string()
1024}
1025
1026fn planning_text_tokens(text: &str) -> Vec<String> {
1027   const STOP_WORDS: &[&str] = &[
1028      "and",
1029      "for",
1030      "the",
1031      "with",
1032      "from",
1033      "into",
1034      "after",
1035      "before",
1036      "over",
1037      "under",
1038      "plus",
1039      "across",
1040      "update",
1041      "updated",
1042      "refactor",
1043      "refactored",
1044      "changes",
1045      "change",
1046      "logical",
1047      "group",
1048      "groups",
1049      "commit",
1050      "commits",
1051   ];
1052
1053   let mut tokens = Vec::new();
1054   let mut current = String::new();
1055   let mut seen = HashSet::new();
1056
1057   for ch in text.chars() {
1058      if ch.is_ascii_alphanumeric() {
1059         current.push(ch.to_ascii_lowercase());
1060      } else if current.len() >= 3 {
1061         if !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone()) {
1062            tokens.push(current.clone());
1063         }
1064         current.clear();
1065      } else {
1066         current.clear();
1067      }
1068   }
1069
1070   if current.len() >= 3 && !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone())
1071   {
1072      tokens.push(current);
1073   }
1074
1075   tokens
1076}
1077
1078fn extract_group_id_candidate(raw: &str) -> Option<String> {
1079   let normalized = normalize_file_reference(raw);
1080   let uppercase = normalized.to_ascii_uppercase();
1081
1082   if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1083      return Some(format!("G{uppercase}"));
1084   }
1085
1086   if let Some(rest) = uppercase.strip_prefix('G')
1087      && !rest.is_empty()
1088      && rest.chars().all(|ch| ch.is_ascii_digit())
1089   {
1090      return Some(format!("G{rest}"));
1091   }
1092
1093   let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1094   let compact = uppercase
1095      .chars()
1096      .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1097      .collect::<String>();
1098   if compact.starts_with("GROUP") && !digits.is_empty() {
1099      return Some(format!("G{digits}"));
1100   }
1101
1102   None
1103}
1104
1105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1106enum ComposeFileCategory {
1107   Binary,
1108   Dependency,
1109   Docs,
1110   Prompt,
1111   Test,
1112   Config,
1113   Source,
1114   Other,
1115}
1116
1117fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1118   if file.is_binary {
1119      return ComposeFileCategory::Binary;
1120   }
1121
1122   if is_dependency_manifest(&file.path) {
1123      return ComposeFileCategory::Dependency;
1124   }
1125
1126   let filename_lower = file.path.to_ascii_lowercase();
1127   let file_name = Path::new(&filename_lower)
1128      .file_name()
1129      .and_then(|name| name.to_str())
1130      .unwrap_or_default();
1131   let extension = Path::new(&filename_lower)
1132      .extension()
1133      .and_then(|ext| ext.to_str())
1134      .unwrap_or_default();
1135
1136   if filename_lower.contains("prompt") || filename_lower.contains("system") {
1137      return ComposeFileCategory::Prompt;
1138   }
1139
1140   if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1141      return ComposeFileCategory::Docs;
1142   }
1143
1144   if filename_lower.contains("/tests/")
1145      || filename_lower.starts_with("tests/")
1146      || file_name.contains("test")
1147      || file_name.contains("spec")
1148   {
1149      return ComposeFileCategory::Test;
1150   }
1151
1152   if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1153      return ComposeFileCategory::Config;
1154   }
1155
1156   if matches!(
1157      extension,
1158      "rs"
1159         | "py"
1160         | "js"
1161         | "jsx"
1162         | "ts"
1163         | "tsx"
1164         | "go"
1165         | "java"
1166         | "kt"
1167         | "c"
1168         | "cc"
1169         | "cpp"
1170         | "h"
1171         | "hpp"
1172         | "cs"
1173         | "rb"
1174         | "php"
1175         | "swift"
1176         | "scala"
1177         | "m"
1178         | "mm"
1179   ) {
1180      return ComposeFileCategory::Source;
1181   }
1182
1183   ComposeFileCategory::Other
1184}
1185
1186fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1187   left
1188      .split('/')
1189      .zip(right.split('/'))
1190      .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1191      .count()
1192}
1193
1194fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1195   let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1196
1197   if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1198      score += 40;
1199   }
1200
1201   if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1202      score += 12;
1203   }
1204
1205   if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1206      score += 18;
1207   }
1208
1209   score
1210}
1211
1212fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1213   match (compose_file_category(file), group.commit_type.as_str()) {
1214      (ComposeFileCategory::Docs, "docs") => 25,
1215      (ComposeFileCategory::Test, "test") => 25,
1216      (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1217      (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1218      (
1219         ComposeFileCategory::Prompt | ComposeFileCategory::Source,
1220         "feat" | "fix" | "refactor" | "perf",
1221      ) => 10,
1222      _ => 0,
1223   }
1224}
1225
1226fn best_group_for_missing_file(
1227   snapshot: &ComposeSnapshot,
1228   groups: &[ComposeIntentGroup],
1229   missing_file: &ComposeFile,
1230) -> usize {
1231   let mut best_group_idx = 0;
1232   let mut best_score = i32::MIN;
1233   let mut best_group_size = usize::MAX;
1234
1235   for (group_idx, group) in groups.iter().enumerate() {
1236      let similarity = group
1237         .file_ids
1238         .iter()
1239         .filter_map(|file_id| snapshot.file_by_id(file_id))
1240         .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1241         .max()
1242         .unwrap_or_default();
1243      let score = similarity + group_type_bonus(missing_file, group);
1244      let group_size = group.file_ids.len();
1245
1246      if score > best_score || (score == best_score && group_size < best_group_size) {
1247         best_group_idx = group_idx;
1248         best_score = score;
1249         best_group_size = group_size;
1250      }
1251   }
1252
1253   best_group_idx
1254}
1255
1256fn normalize_dependency_reference(
1257   raw_dependency: &str,
1258   known_group_ids: &HashSet<String>,
1259) -> Option<String> {
1260   let normalized = normalize_file_reference(raw_dependency);
1261   if normalized.is_empty() {
1262      return None;
1263   }
1264
1265   if known_group_ids.contains(&normalized) {
1266      return Some(normalized);
1267   }
1268
1269   let uppercase = normalized.to_ascii_uppercase();
1270   if known_group_ids.contains(&uppercase) {
1271      return Some(uppercase);
1272   }
1273
1274   let candidate = extract_group_id_candidate(&normalized)?;
1275   known_group_ids.contains(&candidate).then_some(candidate)
1276}
1277
1278fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1279   let label = target.label.to_ascii_lowercase();
1280   let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1281   let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1282
1283   if let Some(scope) = &group.scope {
1284      let scope = scope.as_str().to_ascii_lowercase();
1285      if label.contains(&scope) || workstream.contains(&scope) {
1286         score += 140;
1287      }
1288
1289      for segment in scope.split('/') {
1290         if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1291            score += 45;
1292         }
1293      }
1294   }
1295
1296   for token in planning_text_tokens(&group.rationale) {
1297      if label.contains(&token) || workstream.contains(&token) {
1298         score += 16;
1299      }
1300   }
1301
1302   match group.commit_type.as_str() {
1303      "ci" if target.label.starts_with(".github/") => score += 120,
1304      "docs"
1305         if target.label.starts_with("docs/")
1306            || Path::new(&target.label)
1307               .extension()
1308               .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1309      {
1310         score += 80;
1311      },
1312      "build" | "chore"
1313         if target.label.contains("Cargo")
1314            || target.label.contains("package")
1315            || target.label.contains("lock")
1316            || target.label.contains("tsconfig")
1317            || target.label.contains("biome")
1318            || target.label.contains("bun") =>
1319      {
1320         score += 55;
1321      },
1322      _ => {},
1323   }
1324
1325   score
1326}
1327
1328fn seed_group_targets(
1329   groups: &[ComposeIntentGroup],
1330   planning_index: &PlanningIndex,
1331   group_targets: &mut [Vec<String>],
1332   repair_notes: &mut Vec<String>,
1333) {
1334   let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1335
1336   for (group_idx, group) in groups.iter().enumerate() {
1337      if !group_targets[group_idx].is_empty() {
1338         continue;
1339      }
1340
1341      let fallback_target = planning_index
1342         .targets
1343         .iter()
1344         .max_by_key(|target| {
1345            let mut score = planning_target_match_score(target, group);
1346            if !claimed_target_ids.contains(&target.target_id) {
1347               score += 60;
1348            }
1349            (score, target.hunk_count, target.file_ids.len())
1350         })
1351         .or_else(|| planning_index.targets.first());
1352
1353      let Some(fallback_target) = fallback_target else {
1354         continue;
1355      };
1356
1357      group_targets[group_idx].push(fallback_target.target_id.clone());
1358      claimed_target_ids.insert(fallback_target.target_id.clone());
1359      repair_notes.push(format!(
1360         "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1361         group.group_id, fallback_target.target_id, fallback_target.label
1362      ));
1363   }
1364}
1365
1366fn normalize_intent_plan(
1367   snapshot: &ComposeSnapshot,
1368   planning_index: &PlanningIndex,
1369   mut groups: Vec<ComposeIntentGroup>,
1370) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1371   if groups.is_empty() {
1372      return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1373   }
1374
1375   let known_target_ids: HashSet<&str> = planning_index
1376      .targets
1377      .iter()
1378      .map(|target| target.target_id.as_str())
1379      .collect();
1380   let mut repair_notes = Vec::new();
1381   let mut covered_file_ids = HashSet::new();
1382   let mut normalized_group_targets = Vec::with_capacity(groups.len());
1383
1384   for group in &groups {
1385      if group.file_ids.is_empty() {
1386         repair_notes.push(format!(
1387            "Compose planner left {} without planning targets; assigning targets heuristically",
1388            group.group_id
1389         ));
1390      }
1391
1392      let mut normalized_target_ids = Vec::new();
1393      let mut seen_target_ids = HashSet::new();
1394      for raw_target_ref in &group.file_ids {
1395         let normalized_ref = normalize_file_reference(raw_target_ref);
1396         let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1397            normalized_ref.clone()
1398         } else {
1399            let uppercase_ref = normalized_ref.to_ascii_uppercase();
1400            if known_target_ids.contains(uppercase_ref.as_str()) {
1401               uppercase_ref
1402            } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1403               if raw_target_ref != target_id {
1404                  repair_notes.push(format!(
1405                     "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1406                  ));
1407               }
1408               target_id.clone()
1409            } else {
1410               repair_notes.push(format!(
1411                  "Dropped unknown planning target '{}' from {}",
1412                  raw_target_ref, group.group_id
1413               ));
1414               continue;
1415            }
1416         };
1417
1418         if seen_target_ids.insert(canonical_target_id.clone()) {
1419            normalized_target_ids.push(canonical_target_id);
1420         }
1421      }
1422
1423      normalized_group_targets.push(normalized_target_ids);
1424   }
1425
1426   seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1427
1428   let known_group_ids: HashSet<String> =
1429      groups.iter().map(|group| group.group_id.clone()).collect();
1430   for group in &mut groups {
1431      let mut normalized_dependencies = Vec::new();
1432      let mut seen_dependencies = HashSet::new();
1433
1434      for raw_dependency in &group.dependencies {
1435         let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1436         else {
1437            repair_notes.push(format!(
1438               "Dropped unknown dependency '{}' from {}",
1439               raw_dependency, group.group_id
1440            ));
1441            continue;
1442         };
1443
1444         if dependency == group.group_id {
1445            repair_notes.push(format!(
1446               "Dropped self-dependency '{}' from {}",
1447               raw_dependency, group.group_id
1448            ));
1449            continue;
1450         }
1451
1452         if seen_dependencies.insert(dependency.clone()) {
1453            if raw_dependency != &dependency {
1454               repair_notes.push(format!(
1455                  "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1456               ));
1457            }
1458            normalized_dependencies.push(dependency);
1459         }
1460      }
1461
1462      group.dependencies = normalized_dependencies;
1463   }
1464
1465   for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1466      let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1467      for file_id in &expanded_file_ids {
1468         covered_file_ids.insert(file_id.clone());
1469      }
1470      group.file_ids = expanded_file_ids;
1471   }
1472
1473   for file in &snapshot.files {
1474      if covered_file_ids.contains(file.file_id.as_str()) {
1475         continue;
1476      }
1477
1478      let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1479      let target_group = &mut groups[target_group_idx];
1480      target_group.file_ids.push(file.file_id.clone());
1481      covered_file_ids.insert(file.file_id.clone());
1482      repair_notes.push(format!(
1483         "Compose planner omitted {} ({}); assigned it to {}",
1484         file.file_id, file.path, target_group.group_id
1485      ));
1486   }
1487
1488   Ok((groups, repair_notes))
1489}
1490
1491fn workstream_key_for_label(label: &str) -> String {
1492   let segments: Vec<&str> = label
1493      .split('/')
1494      .filter(|segment| !segment.is_empty())
1495      .collect();
1496   let Some(first) = segments.first() else {
1497      return label.to_string();
1498   };
1499
1500   match *first {
1501      ".github" => match segments.get(1) {
1502         Some(second) => format!("{first}/{second}"),
1503         None => (*first).to_string(),
1504      },
1505      "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1506         Some(second) => format!("{first}/{second}"),
1507         None => (*first).to_string(),
1508      },
1509      _ => (*first).to_string(),
1510   }
1511}
1512
1513fn workstream_display_name(label: &str) -> String {
1514   let key = workstream_key_for_label(label);
1515   match key.as_str() {
1516      ".github/workflows" => "CI workflows".to_string(),
1517      ".github" => "GitHub automation".to_string(),
1518      _ => key
1519         .split('/')
1520         .next_back()
1521         .map(|segment| segment.replace(['_', '-'], " "))
1522         .unwrap_or(key),
1523   }
1524}
1525
1526fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1527   let mut out = String::new();
1528   let mut last_was_separator = false;
1529
1530   for ch in raw.trim().chars() {
1531      if ch.is_ascii_alphanumeric() {
1532         out.push(ch.to_ascii_lowercase());
1533         last_was_separator = false;
1534      } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1535      {
1536         out.push('-');
1537         last_was_separator = true;
1538      }
1539   }
1540
1541   let trimmed = out.trim_matches('-').to_string();
1542   (!trimmed.is_empty()).then_some(trimmed)
1543}
1544
1545fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1546   let key = workstream_key_for_label(label);
1547   let candidate = key
1548      .split('/')
1549      .next_back()
1550      .and_then(sanitize_scope_fragment)?;
1551   Scope::new(candidate).ok()
1552}
1553
1554fn fallback_rationale_for_labels(labels: &[String]) -> String {
1555   if labels.len() == 1 {
1556      let label = labels[0].as_str();
1557      let display = workstream_display_name(label);
1558      if label.starts_with("apps/") {
1559         return format!("{display} application updates");
1560      }
1561      if label.starts_with("packages/") {
1562         return format!("{display} package updates");
1563      }
1564      if label.starts_with("crates/") {
1565         return format!("{display} crate updates");
1566      }
1567      if label.starts_with(".github/") || label == ".github" {
1568         return format!("{display} updates");
1569      }
1570      return format!("{display} updates");
1571   }
1572
1573   let display_labels: Vec<String> = labels
1574      .iter()
1575      .take(3)
1576      .map(|label| workstream_display_name(label))
1577      .collect();
1578   format!("cross-cutting updates for {}", display_labels.join(", "))
1579}
1580
1581fn fallback_commit_type_for_group(
1582   snapshot: &ComposeSnapshot,
1583   labels: &[String],
1584   file_ids: &[String],
1585) -> Result<CommitType> {
1586   if labels
1587      .iter()
1588      .any(|label| label == ".github" || label.starts_with(".github/"))
1589   {
1590      return CommitType::new("ci");
1591   }
1592
1593   let files: Vec<&ComposeFile> = file_ids
1594      .iter()
1595      .filter_map(|file_id| snapshot.file_by_id(file_id))
1596      .collect();
1597   let all_docs = !files.is_empty()
1598      && files
1599         .iter()
1600         .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1601   if all_docs {
1602      return CommitType::new("docs");
1603   }
1604
1605   let all_tests = !files.is_empty()
1606      && files
1607         .iter()
1608         .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1609   if all_tests {
1610      return CommitType::new("test");
1611   }
1612
1613   let all_dependencies =
1614      !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1615   if all_dependencies {
1616      return CommitType::new("build");
1617   }
1618
1619   let all_config = !files.is_empty()
1620      && files.iter().all(|file| {
1621         matches!(
1622            compose_file_category(file),
1623            ComposeFileCategory::Config | ComposeFileCategory::Dependency
1624         )
1625      });
1626   if all_config {
1627      return CommitType::new("chore");
1628   }
1629
1630   CommitType::new("refactor")
1631}
1632
1633fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1634   snapshot
1635      .files
1636      .iter()
1637      .filter(|file| file_ids.contains(&file.file_id))
1638      .map(|file| file.file_id.clone())
1639      .collect()
1640}
1641
1642fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1643   if groups.is_empty() {
1644      return false;
1645   }
1646
1647   let largest_group = groups
1648      .iter()
1649      .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1650      .max()
1651      .unwrap_or_default();
1652
1653   groups.len() == 1
1654      || (groups.len() <= 2
1655         && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1656}
1657
1658fn should_force_large_patch_fallback(
1659   snapshot: &ComposeSnapshot,
1660   planning_index: &PlanningIndex,
1661   groups: &[ComposeIntentGroup],
1662   max_commits: usize,
1663) -> bool {
1664   if max_commits <= 1
1665      || planning_index.mode != PlanningMode::Area
1666      || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1667      || !is_monolithic_intent_plan(snapshot, groups)
1668   {
1669      return false;
1670   }
1671
1672   let workstream_count = planning_index
1673      .targets
1674      .iter()
1675      .map(|target| workstream_key_for_label(&target.label))
1676      .collect::<HashSet<_>>()
1677      .len();
1678
1679   workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1680}
1681
1682fn build_large_patch_fallback_groups(
1683   snapshot: &ComposeSnapshot,
1684   planning_index: &PlanningIndex,
1685   max_commits: usize,
1686) -> Result<Vec<ComposeIntentGroup>> {
1687   #[derive(Debug, Clone)]
1688   struct WorkstreamGroup {
1689      label:    String,
1690      file_ids: HashSet<String>,
1691      weight:   usize,
1692   }
1693
1694   #[derive(Debug, Clone)]
1695   struct FallbackBin {
1696      labels:       Vec<String>,
1697      file_ids:     HashSet<String>,
1698      total_weight: usize,
1699   }
1700
1701   let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1702   for target in &planning_index.targets {
1703      let key = workstream_key_for_label(&target.label);
1704      let entry = workstreams
1705         .entry(key.clone())
1706         .or_insert_with(|| WorkstreamGroup {
1707            label:    key,
1708            file_ids: HashSet::new(),
1709            weight:   0,
1710         });
1711
1712      for file_id in &target.file_ids {
1713         entry.file_ids.insert(file_id.clone());
1714      }
1715      entry.weight = entry
1716         .weight
1717         .saturating_add(target.hunk_count.max(target.file_ids.len()));
1718   }
1719
1720   let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1721   workstreams.sort_by(|left, right| {
1722      right
1723         .weight
1724         .cmp(&left.weight)
1725         .then_with(|| left.label.cmp(&right.label))
1726   });
1727
1728   let bin_count = max_commits.min(workstreams.len());
1729   let mut bins: Vec<FallbackBin> = Vec::new();
1730   for workstream in workstreams {
1731      if bins.len() < bin_count {
1732         bins.push(FallbackBin {
1733            labels:       vec![workstream.label],
1734            file_ids:     workstream.file_ids,
1735            total_weight: workstream.weight,
1736         });
1737         continue;
1738      }
1739
1740      let Some((target_idx, _)) = bins
1741         .iter()
1742         .enumerate()
1743         .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1744      else {
1745         continue;
1746      };
1747
1748      let target_bin = &mut bins[target_idx];
1749      target_bin.labels.push(workstream.label);
1750      target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1751      target_bin.file_ids.extend(workstream.file_ids);
1752   }
1753
1754   let mut groups = Vec::new();
1755   for (idx, bin) in bins.into_iter().enumerate() {
1756      let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1757      let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1758      let scope = (bin.labels.len() == 1)
1759         .then(|| fallback_scope_for_label(&bin.labels[0]))
1760         .flatten();
1761      let rationale = fallback_rationale_for_labels(&bin.labels);
1762
1763      groups.push(ComposeIntentGroup {
1764         group_id: format!("G{}", idx + 1),
1765         commit_type,
1766         scope,
1767         file_ids: ordered_ids,
1768         rationale,
1769         dependencies: Vec::new(),
1770      });
1771   }
1772
1773   Ok(groups)
1774}
1775
1776#[tracing::instrument(target = "lgit", name = "compose.analyze_intent", skip_all, fields(file_count = snapshot.files.len(), observation_count = observations.len(), max_commits))]
1777async fn analyze_compose_intent(
1778   snapshot: &ComposeSnapshot,
1779   observations: &[FileObservation],
1780   config: &CommitConfig,
1781   max_commits: usize,
1782   debug_dir: Option<&Path>,
1783) -> Result<ComposeIntentPlan> {
1784   let planning_index = build_planning_index(snapshot);
1785   let stat_summary = render_planning_stat(&planning_index);
1786   let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1787   let planning_targets = render_planning_targets(&planning_index, snapshot);
1788   let planning_notes = render_planning_notes(&planning_index);
1789   let split_bias = render_split_bias(&planning_index);
1790   let schema = build_intent_schema(config);
1791   let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1792      variant: "default",
1793      max_commits,
1794      stat: &stat_summary,
1795      snapshot_summary: &snapshot_summary,
1796      planning_targets: &planning_targets,
1797      planning_notes: &planning_notes,
1798      split_bias: &split_bias,
1799   })?;
1800
1801   let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1802      operation:        "compose/intent",
1803      model:            &config.analysis_model,
1804      prompt_family:    "compose-intent",
1805      prompt_variant:   "default",
1806      system_prompt:    &parts.system,
1807      user_prompt:      &parts.user,
1808      tool_name:        "create_compose_intent_plan",
1809      tool_description: "Plan logical commit groups over the provided planning target IDs",
1810      schema:           &schema,
1811      progress_label:   Some("compose intent planner"),
1812      debug:            debug_dir.map(|dir| OneShotDebug {
1813         dir:    Some(dir),
1814         prefix: None,
1815         name:   "compose_intent",
1816      }),
1817      cacheable:        true,
1818   })
1819   .await?;
1820
1821   let (mut groups, repair_notes) =
1822      normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1823   for note in &repair_notes {
1824      eprintln!("{}", style::warning(note));
1825   }
1826   if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1827      eprintln!(
1828         "{}",
1829         style::warning(
1830            "Compose intent collapsed into a monolithic large-patch group; falling back to \
1831             path-based workstream splits."
1832         )
1833      );
1834      groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1835   }
1836   let dependency_order =
1837      compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1838
1839   Ok(ComposeIntentPlan { groups, dependency_order })
1840}
1841
1842#[tracing::instrument(target = "lgit", name = "compose.should_collect_observations", skip_all, fields(file_count = snapshot.files.len()))]
1843fn should_collect_compose_observations(
1844   snapshot: &ComposeSnapshot,
1845   config: &CommitConfig,
1846   counter: &TokenCounter,
1847) -> bool {
1848   planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1849      && should_use_map_reduce(&snapshot.diff, config, counter)
1850}
1851
1852#[tracing::instrument(target = "lgit", name = "compose.auto_assign_hunks", skip_all, fields(group_count = intent_plan.groups.len()))]
1853fn auto_assign_hunks(
1854   snapshot: &ComposeSnapshot,
1855   intent_plan: &ComposeIntentPlan,
1856) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1857   let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1858   for group in &intent_plan.groups {
1859      for file_id in &group.file_ids {
1860         groups_by_file
1861            .entry(file_id.as_str())
1862            .or_default()
1863            .push(group.group_id.as_str());
1864      }
1865   }
1866
1867   let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1868      .groups
1869      .iter()
1870      .map(|group| (group.group_id.clone(), BTreeSet::new()))
1871      .collect();
1872   let mut ambiguous = Vec::new();
1873
1874   for file in &snapshot.files {
1875      let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1876         return Err(CommitGenError::Other(format!(
1877            "No compose group claimed file {} ({})",
1878            file.file_id, file.path
1879         )));
1880      };
1881
1882      if candidate_group_ids.len() == 1 {
1883         let group_id = candidate_group_ids[0];
1884         let entry = assigned
1885            .get_mut(group_id)
1886            .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1887         for hunk_id in &file.hunk_ids {
1888            entry.insert(hunk_id.clone());
1889         }
1890      } else {
1891         ambiguous.push(AmbiguousFileBinding {
1892            file_id:             file.file_id.clone(),
1893            path:                file.path.clone(),
1894            candidate_group_ids: candidate_group_ids
1895               .iter()
1896               .map(|group_id| (*group_id).to_string())
1897               .collect(),
1898            hunk_ids:            file.hunk_ids.clone(),
1899         });
1900      }
1901   }
1902
1903   Ok((assigned, ambiguous))
1904}
1905
1906fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1907   let mut out = String::new();
1908   for group in groups {
1909      let scope = group
1910         .scope
1911         .as_ref()
1912         .map(|scope| format!("({})", scope.as_str()))
1913         .unwrap_or_default();
1914      writeln!(
1915         out,
1916         "- {} [{}{}] {}",
1917         group.group_id,
1918         group.commit_type.as_str(),
1919         scope,
1920         group.rationale
1921      )
1922      .unwrap();
1923   }
1924
1925   out
1926}
1927
1928fn render_binding_ambiguous_files(
1929   snapshot: &ComposeSnapshot,
1930   ambiguous_files: &[AmbiguousFileBinding],
1931) -> String {
1932   let mut out = String::new();
1933   for ambiguous_file in ambiguous_files {
1934      writeln!(
1935         out,
1936         "- {} {} candidates: {}",
1937         ambiguous_file.file_id,
1938         ambiguous_file.path,
1939         ambiguous_file.candidate_group_ids.join(", ")
1940      )
1941      .unwrap();
1942
1943      for hunk_id in &ambiguous_file.hunk_ids {
1944         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1945            if hunk.synthetic {
1946               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1947            } else {
1948               writeln!(
1949                  out,
1950                  "  - {} old:{} new:{} :: {}",
1951                  hunk.hunk_id,
1952                  format_line_range(hunk.old_start, hunk.old_count),
1953                  format_line_range(hunk.new_start, hunk.new_count),
1954                  hunk.snippet
1955               )
1956               .unwrap();
1957            }
1958         }
1959      }
1960   }
1961
1962   out
1963}
1964
1965async fn request_binding(
1966   snapshot: &ComposeSnapshot,
1967   groups: &[ComposeIntentGroup],
1968   ambiguous_files: &[AmbiguousFileBinding],
1969   config: &CommitConfig,
1970   debug_dir: Option<&Path>,
1971   debug_name: &str,
1972) -> Result<Vec<ComposeBindingAssignment>> {
1973   let schema = build_binding_schema();
1974   let groups_text = render_binding_groups(groups);
1975   let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1976   let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1977      variant:         "default",
1978      groups:          &groups_text,
1979      ambiguous_files: &ambiguous_files_text,
1980   })?;
1981   let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1982      operation:        "compose/bind",
1983      model:            &config.analysis_model,
1984      prompt_family:    "compose-bind",
1985      prompt_variant:   "default",
1986      system_prompt:    &parts.system,
1987      user_prompt:      &parts.user,
1988      tool_name:        "bind_compose_hunks",
1989      tool_description: "Assign hunk IDs to existing compose groups",
1990      schema:           &schema,
1991      progress_label:   Some("compose hunk binder"),
1992      debug:            debug_dir.map(|dir| OneShotDebug {
1993         dir:    Some(dir),
1994         prefix: None,
1995         name:   debug_name,
1996      }),
1997      cacheable:        true,
1998   })
1999   .await?;
2000
2001   Ok(response.output.assignments)
2002}
2003
2004fn ambiguous_hunk_context(
2005   ambiguous_files: &[AmbiguousFileBinding],
2006) -> HashMap<String, AmbiguousHunkContext> {
2007   let mut context = HashMap::new();
2008   for ambiguous_file in ambiguous_files {
2009      for hunk_id in &ambiguous_file.hunk_ids {
2010         context.insert(hunk_id.clone(), AmbiguousHunkContext {
2011            candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
2012         });
2013      }
2014   }
2015   context
2016}
2017
2018fn evaluate_binding(
2019   assignments: &[ComposeBindingAssignment],
2020   hunk_context: &HashMap<String, AmbiguousHunkContext>,
2021   valid_group_ids: &HashSet<&str>,
2022   snapshot: &ComposeSnapshot,
2023) -> BindingEvaluation {
2024   let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
2025
2026   for assignment in assignments {
2027      if !valid_group_ids.contains(assignment.group_id.as_str()) {
2028         continue;
2029      }
2030
2031      let mut seen_in_group = HashSet::new();
2032      for hunk_id in &assignment.hunk_ids {
2033         if !seen_in_group.insert(hunk_id.as_str()) {
2034            continue;
2035         }
2036
2037         let Some(context) = hunk_context.get(hunk_id) else {
2038            continue;
2039         };
2040
2041         if !context
2042            .candidate_group_ids
2043            .iter()
2044            .any(|candidate| candidate == &assignment.group_id)
2045         {
2046            continue;
2047         }
2048
2049         match assigned_hunk_to_group.get(hunk_id) {
2050            None => {
2051               assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
2052            },
2053            Some(existing_group) if existing_group == &assignment.group_id => {},
2054            Some(_) => {
2055               assigned_hunk_to_group.remove(hunk_id);
2056            },
2057         }
2058      }
2059   }
2060
2061   let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2062   for (hunk_id, group_id) in assigned_hunk_to_group {
2063      assigned_by_group.entry(group_id).or_default().push(hunk_id);
2064   }
2065
2066   for hunk_ids in assigned_by_group.values_mut() {
2067      let ordered: Vec<String> = snapshot
2068         .hunks
2069         .iter()
2070         .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2071         .map(|hunk| hunk.hunk_id.clone())
2072         .collect();
2073      *hunk_ids = ordered;
2074   }
2075
2076   let unresolved = snapshot
2077      .hunks
2078      .iter()
2079      .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2080      .filter(|hunk| {
2081         !assigned_by_group.values().any(|assigned_hunks| {
2082            assigned_hunks
2083               .iter()
2084               .any(|assigned| assigned == &hunk.hunk_id)
2085         })
2086      })
2087      .map(|hunk| hunk.hunk_id.clone())
2088      .collect();
2089
2090   BindingEvaluation { assigned: assigned_by_group, unresolved }
2091}
2092
2093fn filter_ambiguous_files(
2094   ambiguous_files: &[AmbiguousFileBinding],
2095   hunk_ids: &[String],
2096) -> Vec<AmbiguousFileBinding> {
2097   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2098
2099   ambiguous_files
2100      .iter()
2101      .filter_map(|file| {
2102         let matching_hunks: Vec<String> = file
2103            .hunk_ids
2104            .iter()
2105            .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2106            .cloned()
2107            .collect();
2108
2109         (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2110            file_id:             file.file_id.clone(),
2111            path:                file.path.clone(),
2112            candidate_group_ids: file.candidate_group_ids.clone(),
2113            hunk_ids:            matching_hunks,
2114         })
2115      })
2116      .collect()
2117}
2118
2119fn chunk_ambiguous_files(
2120   ambiguous_files: &[AmbiguousFileBinding],
2121) -> Vec<Vec<AmbiguousFileBinding>> {
2122   if ambiguous_files.is_empty() {
2123      return Vec::new();
2124   }
2125
2126   let mut batches = Vec::new();
2127   let mut current_batch = Vec::new();
2128   let mut current_hunk_count = 0_usize;
2129
2130   for file in ambiguous_files {
2131      let file_hunk_count = file.hunk_ids.len();
2132      let should_split = !current_batch.is_empty()
2133         && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2134            || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2135
2136      if should_split {
2137         batches.push(current_batch);
2138         current_batch = Vec::new();
2139         current_hunk_count = 0;
2140      }
2141
2142      current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2143      current_batch.push(file.clone());
2144   }
2145
2146   if !current_batch.is_empty() {
2147      batches.push(current_batch);
2148   }
2149
2150   batches
2151}
2152
2153fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2154   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2155
2156   snapshot
2157      .hunks
2158      .iter()
2159      .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2160      .map(|hunk| hunk.hunk_id.clone())
2161      .collect()
2162}
2163
2164fn fallback_group_for_hunk(
2165   hunk_id: &str,
2166   ambiguous_files: &[AmbiguousFileBinding],
2167   group_rank: &HashMap<&str, usize>,
2168) -> Option<String> {
2169   ambiguous_files.iter().find_map(|file| {
2170      file
2171         .hunk_ids
2172         .iter()
2173         .any(|candidate| candidate == hunk_id)
2174         .then(|| {
2175            file
2176               .candidate_group_ids
2177               .iter()
2178               .min_by_key(|group_id| {
2179                  group_rank
2180                     .get(group_id.as_str())
2181                     .copied()
2182                     .unwrap_or(usize::MAX)
2183               })
2184               .cloned()
2185         })
2186   })?
2187}
2188
2189fn assign_unresolved_hunks(
2190   unresolved_hunks: &[String],
2191   assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2192   ambiguous_files: &[AmbiguousFileBinding],
2193   group_rank: &HashMap<&str, usize>,
2194) {
2195   for hunk_id in unresolved_hunks {
2196      if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2197         && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2198      {
2199         group_hunks.insert(hunk_id.clone());
2200      }
2201   }
2202}
2203
2204fn normalize_group_type(
2205   snapshot: &ComposeSnapshot,
2206   file_ids: &[String],
2207   original_type: &CommitType,
2208) -> Result<CommitType> {
2209   let dependency_only = !file_ids.is_empty()
2210      && file_ids.iter().all(|file_id| {
2211         snapshot
2212            .file_by_id(file_id)
2213            .is_some_and(|file| is_dependency_manifest(&file.path))
2214      });
2215
2216   if dependency_only && original_type.as_str() != "build" {
2217      CommitType::new("build")
2218   } else {
2219      Ok(original_type.clone())
2220   }
2221}
2222
2223fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2224   snapshot
2225      .files
2226      .iter()
2227      .filter(|file| {
2228         hunk_ids
2229            .iter()
2230            .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2231      })
2232      .map(|file| file.file_id.clone())
2233      .collect()
2234}
2235
2236fn build_redirects(
2237   intent_plan: &ComposeIntentPlan,
2238   executable_groups: &[ComposeExecutableGroup],
2239   group_rank: &HashMap<&str, usize>,
2240) -> HashMap<String, String> {
2241   let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2242      .iter()
2243      .filter(|group| !group.hunk_ids.is_empty())
2244      .map(|group| (group.group_id.as_str(), group))
2245      .collect();
2246
2247   let mut redirects = HashMap::new();
2248   for group in &intent_plan.groups {
2249      if surviving_groups.contains_key(group.group_id.as_str()) {
2250         continue;
2251      }
2252
2253      let redirect = executable_groups
2254         .iter()
2255         .filter(|candidate| candidate.group_id != group.group_id)
2256         .filter(|candidate| {
2257            candidate.file_ids.iter().any(|file_id| {
2258               group
2259                  .file_ids
2260                  .iter()
2261                  .any(|candidate_id| candidate_id == file_id)
2262            })
2263         })
2264         .min_by_key(|candidate| {
2265            group_rank
2266               .get(candidate.group_id.as_str())
2267               .copied()
2268               .unwrap_or(usize::MAX)
2269         })
2270         .map(|candidate| candidate.group_id.clone());
2271
2272      if let Some(redirect) = redirect {
2273         redirects.insert(group.group_id.clone(), redirect);
2274      }
2275   }
2276
2277   redirects
2278}
2279
2280fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2281   let mut current = group_id.to_string();
2282   let mut seen = HashSet::new();
2283
2284   while let Some(next) = redirects.get(&current) {
2285      if !seen.insert(current.clone()) {
2286         break;
2287      }
2288      current.clone_from(next);
2289   }
2290
2291   current
2292}
2293
2294fn prune_empty_groups(
2295   groups: Vec<ComposeExecutableGroup>,
2296   redirects: &HashMap<String, String>,
2297) -> Result<ComposeExecutablePlan> {
2298   let surviving_ids: HashSet<String> = groups
2299      .iter()
2300      .filter(|group| !group.hunk_ids.is_empty())
2301      .map(|group| group.group_id.clone())
2302      .collect();
2303
2304   let mut surviving_groups = Vec::new();
2305   for mut group in groups {
2306      if group.hunk_ids.is_empty() {
2307         continue;
2308      }
2309
2310      let mut rewritten_dependencies = Vec::new();
2311      for dependency in &group.dependencies {
2312         let rewritten = resolve_redirect(dependency, redirects);
2313         if rewritten != group.group_id
2314            && surviving_ids.contains(&rewritten)
2315            && !rewritten_dependencies
2316               .iter()
2317               .any(|existing| existing == &rewritten)
2318         {
2319            rewritten_dependencies.push(rewritten);
2320         }
2321      }
2322
2323      group.dependencies = rewritten_dependencies;
2324      surviving_groups.push(group);
2325   }
2326
2327   let dependency_order = compute_dependency_order(
2328      &surviving_groups,
2329      |group| &group.group_id,
2330      |group| &group.dependencies,
2331   )?;
2332   Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2333}
2334
2335fn finalize_executable_plan(
2336   snapshot: &ComposeSnapshot,
2337   intent_plan: &ComposeIntentPlan,
2338   assigned_by_group: HashMap<String, BTreeSet<String>>,
2339) -> Result<ComposeExecutablePlan> {
2340   let group_rank: HashMap<&str, usize> = intent_plan
2341      .dependency_order
2342      .iter()
2343      .enumerate()
2344      .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2345      .collect();
2346
2347   let mut executable_groups = Vec::new();
2348   for group in &intent_plan.groups {
2349      let hunk_ids: Vec<String> = snapshot
2350         .hunks
2351         .iter()
2352         .filter(|hunk| {
2353            assigned_by_group
2354               .get(&group.group_id)
2355               .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2356         })
2357         .map(|hunk| hunk.hunk_id.clone())
2358         .collect();
2359
2360      let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2361      let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2362      executable_groups.push(ComposeExecutableGroup {
2363         group_id: group.group_id.clone(),
2364         commit_type,
2365         scope: group.scope.clone(),
2366         file_ids,
2367         rationale: group.rationale.clone(),
2368         dependencies: group.dependencies.clone(),
2369         hunk_ids,
2370      });
2371   }
2372
2373   let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2374   prune_empty_groups(executable_groups, &redirects)
2375}
2376
2377fn validate_executable_plan(
2378   snapshot: &ComposeSnapshot,
2379   plan: &ComposeExecutablePlan,
2380) -> Result<()> {
2381   if plan.groups.is_empty() {
2382      return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2383   }
2384
2385   let known_hunks: HashSet<&str> = snapshot
2386      .hunks
2387      .iter()
2388      .map(|hunk| hunk.hunk_id.as_str())
2389      .collect();
2390   let known_files: HashSet<&str> = snapshot
2391      .files
2392      .iter()
2393      .map(|file| file.file_id.as_str())
2394      .collect();
2395   let mut coverage = HashMap::<String, String>::new();
2396
2397   for group in &plan.groups {
2398      if group.hunk_ids.is_empty() {
2399         return Err(CommitGenError::Other(format!(
2400            "Compose group {} ended up empty after binding",
2401            group.group_id
2402         )));
2403      }
2404
2405      for file_id in &group.file_ids {
2406         if !known_files.contains(file_id.as_str()) {
2407            return Err(CommitGenError::Other(format!(
2408               "Compose group {} references unknown file_id {}",
2409               group.group_id, file_id
2410            )));
2411         }
2412      }
2413
2414      for hunk_id in &group.hunk_ids {
2415         if !known_hunks.contains(hunk_id.as_str()) {
2416            return Err(CommitGenError::Other(format!(
2417               "Compose group {} references unknown hunk_id {}",
2418               group.group_id, hunk_id
2419            )));
2420         }
2421
2422         if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2423            return Err(CommitGenError::Other(format!(
2424               "Hunk {} was assigned to both {} and {}",
2425               hunk_id, existing_group, group.group_id
2426            )));
2427         }
2428      }
2429   }
2430
2431   let missing_hunks: Vec<String> = snapshot
2432      .hunks
2433      .iter()
2434      .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2435      .map(|hunk| hunk.hunk_id.clone())
2436      .collect();
2437   if !missing_hunks.is_empty() {
2438      return Err(CommitGenError::Other(format!(
2439         "Compose plan left hunks unassigned: {}",
2440         missing_hunks.join(", ")
2441      )));
2442   }
2443
2444   let dependency_order =
2445      compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2446   if dependency_order != plan.dependency_order {
2447      return Err(CommitGenError::Other(
2448         "Compose dependency order does not match recomputed order".to_string(),
2449      ));
2450   }
2451
2452   Ok(())
2453}
2454
2455#[tracing::instrument(target = "lgit", name = "compose.bind_plan", skip_all, fields(file_count = snapshot.files.len(), group_count = intent_plan.groups.len()))]
2456async fn bind_compose_plan(
2457   snapshot: &ComposeSnapshot,
2458   intent_plan: &ComposeIntentPlan,
2459   config: &CommitConfig,
2460   debug_dir: Option<&Path>,
2461) -> Result<ComposeExecutablePlan> {
2462   let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2463
2464   if !ambiguous_files.is_empty() {
2465      let valid_group_ids: HashSet<&str> = intent_plan
2466         .groups
2467         .iter()
2468         .map(|group| group.group_id.as_str())
2469         .collect();
2470      let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2471      let mut unresolved = Vec::new();
2472
2473      for (batch_idx, batch) in binding_batches.iter().enumerate() {
2474         let hunk_context = ambiguous_hunk_context(batch);
2475         let debug_name = if binding_batches.len() == 1 {
2476            "compose_bind".to_string()
2477         } else {
2478            format!("compose_bind_{:02}", batch_idx + 1)
2479         };
2480         let assignments =
2481            request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2482               .await?;
2483         let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2484         for (group_id, hunk_ids) in evaluation.assigned {
2485            let entry = assigned_by_group.entry(group_id).or_default();
2486            for hunk_id in hunk_ids {
2487               entry.insert(hunk_id);
2488            }
2489         }
2490         unresolved.extend(evaluation.unresolved);
2491      }
2492
2493      let group_rank: HashMap<&str, usize> = intent_plan
2494         .dependency_order
2495         .iter()
2496         .enumerate()
2497         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2498         .collect();
2499
2500      let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2501      if !unresolved.is_empty() {
2502         let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2503         let repair_batches = chunk_ambiguous_files(&unresolved_files);
2504         let mut repair_unresolved = Vec::new();
2505
2506         for (batch_idx, batch) in repair_batches.iter().enumerate() {
2507            let debug_name = if repair_batches.len() == 1 {
2508               "compose_bind_repair".to_string()
2509            } else {
2510               format!("compose_bind_repair_{:02}", batch_idx + 1)
2511            };
2512            let repair_assignments = request_binding(
2513               snapshot,
2514               &intent_plan.groups,
2515               batch,
2516               config,
2517               debug_dir,
2518               &debug_name,
2519            )
2520            .await?;
2521            let repair_context = ambiguous_hunk_context(batch);
2522            let repair =
2523               evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2524            for (group_id, hunk_ids) in repair.assigned {
2525               let entry = assigned_by_group.entry(group_id).or_default();
2526               for hunk_id in hunk_ids {
2527                  entry.insert(hunk_id);
2528               }
2529            }
2530
2531            repair_unresolved.extend(repair.unresolved);
2532         }
2533         unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2534
2535         if !unresolved.is_empty() {
2536            assign_unresolved_hunks(
2537               &unresolved,
2538               &mut assigned_by_group,
2539               &ambiguous_files,
2540               &group_rank,
2541            );
2542         }
2543      }
2544   }
2545
2546   let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2547   validate_executable_plan(snapshot, &plan)?;
2548   Ok(plan)
2549}
2550
2551fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2552   println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2553   for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2554      let group = &plan.groups[group_idx];
2555      let scope = group
2556         .scope
2557         .as_ref()
2558         .map(|scope| format!("({})", style::scope(scope.as_str())))
2559         .unwrap_or_default();
2560
2561      println!(
2562         "\n{}. {} [{}{}] {}",
2563         display_idx + 1,
2564         style::bold(&group.group_id),
2565         style::commit_type(group.commit_type.as_str()),
2566         scope,
2567         group.rationale
2568      );
2569
2570      println!("   Files:");
2571      for file_id in &group.file_ids {
2572         if let Some(file) = snapshot.file_by_id(file_id) {
2573            let selected_hunk_ids: Vec<&str> = group
2574               .hunk_ids
2575               .iter()
2576               .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2577               .map(String::as_str)
2578               .collect();
2579            let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2580               "all hunks".to_string()
2581            } else {
2582               selected_hunk_ids.join(", ")
2583            };
2584            println!("     - {} {} ({selection})", file.file_id, file.path);
2585         }
2586      }
2587
2588      if !group.dependencies.is_empty() {
2589         println!("   Depends on: {}", group.dependencies.join(", "));
2590      }
2591   }
2592}
2593
2594#[tracing::instrument(target = "lgit", name = "compose.generate_group_analysis", skip_all, fields(group_id = %group.group_id, diff_bytes = diff.len(), stat_bytes = stat.len()))]
2595async fn generate_compose_group_analysis(
2596   stat: &str,
2597   diff: &str,
2598   group: &ComposeExecutableGroup,
2599   config: &CommitConfig,
2600   args: &Args,
2601   debug_prefix: &str,
2602   counter: &TokenCounter,
2603) -> Result<ConventionalAnalysis> {
2604   match compose_analysis_strategy(diff, config, counter) {
2605      ComposeAnalysisStrategy::MapReduce => {
2606         println!(
2607            "  {}",
2608            style::info(&format!(
2609               "Using map-reduce for {} commit analysis (diff exceeds token budget)",
2610               group.group_id
2611            ))
2612         );
2613         run_map_reduce(diff, stat, "", &config.analysis_model, config, counter).await
2614      },
2615      strategy => {
2616         let analysis_diff = if strategy == ComposeAnalysisStrategy::SmartTruncate {
2617            eprintln!(
2618               "  {}",
2619               style::warning(&format!(
2620                  "Truncating diff for {} commit analysis (diff exceeds configured budget)",
2621                  group.group_id
2622               ))
2623            );
2624            Cow::Owned(smart_truncate_diff(
2625               diff,
2626               compose_truncation_length(config),
2627               config,
2628               counter,
2629            ))
2630         } else {
2631            Cow::Borrowed(diff)
2632         };
2633
2634         let ctx = AnalysisContext {
2635            user_context:    Some(&group.rationale),
2636            recent_commits:  None,
2637            common_scopes:   None,
2638            project_context: None,
2639            debug_output:    args.debug_output.as_deref(),
2640            debug_prefix:    Some(debug_prefix),
2641         };
2642
2643         generate_conventional_analysis(
2644            stat,
2645            analysis_diff.as_ref(),
2646            &config.analysis_model,
2647            "",
2648            &ctx,
2649            config,
2650         )
2651         .await
2652      },
2653   }
2654}
2655
2656fn compose_group_file_list(snapshot: &ComposeSnapshot, group: &ComposeExecutableGroup) -> String {
2657   let files: Vec<&str> = group
2658      .file_ids
2659      .iter()
2660      .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.as_str()))
2661      .collect();
2662
2663   if files.is_empty() {
2664      "no files resolved".to_string()
2665   } else {
2666      files.join(", ")
2667   }
2668}
2669
2670/// Hunk ids for `file_id` planned by every group up to and including the group
2671/// at `position` in the dependency order. Used to reconstruct a file's intended
2672/// index content at a given commit from its base, independent of apply order.
2673fn cumulative_file_hunk_ids(
2674   plan: &ComposeExecutablePlan,
2675   position: usize,
2676   snapshot: &ComposeSnapshot,
2677   file_id: &str,
2678) -> Vec<String> {
2679   let mut hunk_ids = Vec::new();
2680   for &group_idx in plan.dependency_order.iter().take(position + 1) {
2681      let Some(group) = plan.groups.get(group_idx) else {
2682         continue;
2683      };
2684      for hunk_id in &group.hunk_ids {
2685         if snapshot
2686            .hunk_by_id(hunk_id)
2687            .is_some_and(|hunk| hunk.file_id == file_id)
2688         {
2689            hunk_ids.push(hunk_id.clone());
2690         }
2691      }
2692   }
2693   hunk_ids
2694}
2695
2696#[tracing::instrument(target = "lgit", name = "compose.execute", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2697pub async fn execute_compose(
2698   snapshot: &ComposeSnapshot,
2699   plan: &ComposeExecutablePlan,
2700   config: &CommitConfig,
2701   args: &Args,
2702   base_state: &ComposeBaseState,
2703) -> Result<Vec<String>> {
2704   let total = plan.dependency_order.len();
2705
2706   // Phase 1: derive each group's diff/stat from the immutable compose snapshot.
2707   // This avoids mutating the index while commit messages are prepared and keeps
2708   // later worktree edits out of already-planned commits.
2709   let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2710   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2711      let group = &plan.groups[group_idx];
2712      println!(
2713         "  {}",
2714         style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total))
2715      );
2716      let group_patch = create_executable_group_patch(snapshot, group)?;
2717      group_diff_stats.push((group_patch.diff, group_patch.stat));
2718   }
2719
2720   // Phase 2: generate commit messages concurrently. Both LLM calls per group
2721   // (analysis + summary) run inside a single async task so the slower of the
2722   // two does not block other groups from progressing.
2723   println!(
2724      "{}",
2725      style::info(&format!(
2726         "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2727         COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2728      ))
2729   );
2730
2731   let token_counter = create_token_counter(config);
2732   let prepared_messages: Vec<(Vec<String>, CommitSummary)> =
2733      stream::iter(plan.dependency_order.iter().enumerate())
2734         .map(|(idx, &group_idx)| {
2735            let group = &plan.groups[group_idx];
2736            let (diff, stat) = &group_diff_stats[idx];
2737            let debug_prefix = format!("compose-{}", idx + 1);
2738            let token_counter = &token_counter;
2739            async move {
2740               let result = async {
2741                  let analysis = generate_compose_group_analysis(
2742                     stat,
2743                     diff,
2744                     group,
2745                     config,
2746                     args,
2747                     &debug_prefix,
2748                     token_counter,
2749                  )
2750                  .await?;
2751                  let body = analysis.body_texts();
2752                  let summary = generate_summary_from_analysis(
2753                     stat,
2754                     group.commit_type.as_str(),
2755                     group.scope.as_ref().map(|scope| scope.as_str()),
2756                     &body,
2757                     Some(&group.rationale),
2758                     config,
2759                     args.debug_output.as_deref(),
2760                     Some(&debug_prefix),
2761                  )
2762                  .await?;
2763                  Ok::<_, CommitGenError>((body, summary))
2764               }
2765               .await;
2766
2767               result.map_err(|source| CommitGenError::ComposeMessageError {
2768                  group_id: group.group_id.clone(),
2769                  files:    compose_group_file_list(snapshot, group),
2770                  source:   Box::new(source),
2771               })
2772            }
2773         })
2774         .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2775         .collect::<Vec<_>>()
2776         .await
2777         .into_iter()
2778         .collect::<Result<Vec<_>>>()?;
2779
2780   execute_compose_with_prepared_messages(
2781      snapshot,
2782      plan,
2783      config,
2784      args,
2785      base_state,
2786      prepared_messages,
2787   )
2788}
2789
2790#[tracing::instrument(target = "lgit", name = "compose.execute_prepared_messages", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2791fn execute_compose_with_prepared_messages(
2792   snapshot: &ComposeSnapshot,
2793   plan: &ComposeExecutablePlan,
2794   config: &CommitConfig,
2795   args: &Args,
2796   base_state: &ComposeBaseState,
2797   prepared_messages: Vec<(Vec<String>, CommitSummary)>,
2798) -> Result<Vec<String>> {
2799   let dir = &args.dir;
2800   let total = plan.dependency_order.len();
2801   if args.compose_preview {
2802      return Ok(Vec::new());
2803   }
2804
2805   let index = TempGitIndex::new(dir)?;
2806   read_tree_into_index(index.path(), &base_state.head_hash, dir)?;
2807
2808   let mut commit_hashes = Vec::new();
2809   let mut parent_hash = base_state.head_hash.clone();
2810
2811   // Phase 3: sequential commit-object loop. Re-stage each group into an
2812   // isolated temporary index, then create commit objects parented in memory.
2813   // The real branch and index are not updated until every group succeeds.
2814   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2815      let group = &plan.groups[group_idx];
2816
2817      println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2818      println!("  Type: {}", style::commit_type(group.commit_type.as_str()));
2819      if let Some(scope) = &group.scope {
2820         println!("  Scope: {}", style::scope(scope.as_str()));
2821      }
2822      let paths: Vec<String> = group
2823         .file_ids
2824         .iter()
2825         .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2826         .collect();
2827      println!("  Files: {}", paths.join(", "));
2828
2829      let outcome = stage_executable_group_in_index(snapshot, group, dir, index.path())?;
2830      let mut staged_anything = outcome.result == StageResult::Staged;
2831
2832      // Any file whose planned patch no longer applies against the temporary
2833      // index is reconstructed from the immutable snapshot base and cumulative
2834      // hunk selection. The real index and worktree are never touched here.
2835      for skipped in &outcome.skipped {
2836         let Some(file) = snapshot.file_by_path(&skipped.path) else {
2837            continue;
2838         };
2839         let cumulative = cumulative_file_hunk_ids(plan, idx, snapshot, &file.file_id);
2840         force_stage_file_from_base_in_index(
2841            snapshot,
2842            &file.file_id,
2843            &cumulative,
2844            dir,
2845            index.path(),
2846         )?;
2847         staged_anything = true;
2848         eprintln!(
2849            "  {}",
2850            style::info(&format!(
2851               "Re-staged {} from base via splice (whole-file apply not used for partial hunks)",
2852               skipped.path
2853            ))
2854         );
2855      }
2856
2857      if !staged_anything {
2858         eprintln!(
2859            "  {}",
2860            style::warning(&format!(
2861               "Skipping commit {}: its planned patch is already applied ({:?})",
2862               group.group_id, outcome.result
2863            ))
2864         );
2865         continue;
2866      }
2867
2868      let (analysis_body, summary) = prepared_messages[idx].clone();
2869      let mut commit = ConventionalCommit {
2870         commit_type: group.commit_type.clone(),
2871         scope: group.scope.clone(),
2872         summary,
2873         body: analysis_body,
2874         footers: vec![],
2875      };
2876      post_process_commit_message(&mut commit, config);
2877
2878      if let Err(err) = validate_commit_message(&commit, config) {
2879         eprintln!(
2880            "  {}",
2881            style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2882         );
2883      }
2884
2885      let mut formatted_message = format_commit_message(&commit);
2886      if args.signoff || config.signoff {
2887         formatted_message = append_signoff_trailer(&formatted_message, dir)?;
2888      }
2889      println!(
2890         "  Message:\n{}",
2891         formatted_message
2892            .lines()
2893            .take(3)
2894            .collect::<Vec<_>>()
2895            .join("\n")
2896      );
2897
2898      let tree = write_index_tree(index.path(), dir)?;
2899      let sign = args.sign || config.gpg_sign;
2900      let hash = commit_tree(&tree, &[parent_hash.as_str()], &formatted_message, dir, sign)?;
2901      parent_hash.clone_from(&hash);
2902      commit_hashes.push(hash);
2903
2904      if args.compose_test_after_each {
2905         return Err(CommitGenError::Other(
2906            "--compose-test-after-each is incompatible with isolated compose execution".to_string(),
2907         ));
2908      }
2909   }
2910
2911   if commit_hashes.is_empty() {
2912      return Ok(commit_hashes);
2913   }
2914
2915   update_ref_checked(&base_state.head_ref, &parent_hash, &base_state.head_hash, dir)?;
2916
2917   let current_index_tree = write_real_index_tree(dir)?;
2918   if current_index_tree == base_state.index_tree {
2919      reset_mixed_to(&parent_hash, dir)?;
2920   } else {
2921      // Someone staged while compose ran. The commits contain only pinned
2922      // snapshot content, so just refresh the index entries for the paths
2923      // compose committed and leave the drifted staging intact.
2924      println!(
2925         "{}",
2926         style::warning("Index changed during compose; preserving newly staged changes")
2927      );
2928      let paths: Vec<String> = snapshot.files.iter().map(|file| file.path.clone()).collect();
2929      reset_paths_to(&parent_hash, &paths, dir)?;
2930   }
2931
2932   Ok(commit_hashes)
2933}
2934
2935#[tracing::instrument(target = "lgit", name = "compose.run", skip_all, fields(dir = %args.dir, max_rounds = config.compose_max_rounds))]
2936pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2937   let max_rounds = config.compose_max_rounds;
2938
2939   for round in 1..=max_rounds {
2940      if round > 1 {
2941         println!(
2942            "\n{}",
2943            style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2944         );
2945      } else {
2946         println!("{}", style::section_header("Compose Mode", 80));
2947      }
2948      println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2949
2950      run_compose_round(args, config, round).await?;
2951
2952      if args.compose_preview {
2953         break;
2954      }
2955
2956      match get_compose_diff(&args.dir) {
2957         Err(CommitGenError::NoChanges { .. }) => {
2958            println!(
2959               "\n{}",
2960               style::success(&format!(
2961                  "{} All changes committed successfully",
2962                  style::icons::SUCCESS
2963               ))
2964            );
2965            break;
2966         },
2967         Err(err) => return Err(err),
2968         Ok(remaining_diff) => {
2969            eprintln!(
2970               "\n{}",
2971               style::warning(&format!(
2972                  "{} Uncommitted changes remain after round {round}",
2973                  style::icons::WARNING
2974               ))
2975            );
2976            eprintln!("{remaining_diff}");
2977         },
2978      }
2979
2980      if round < max_rounds {
2981         eprintln!("{}", style::info("Starting another compose round..."));
2982      } else {
2983         eprintln!(
2984            "{}",
2985            style::warning(&format!(
2986               "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
2987            ))
2988         );
2989      }
2990   }
2991
2992   Ok(())
2993}
2994
2995#[tracing::instrument(target = "lgit", name = "compose.round", skip_all, fields(dir = %args.dir, round))]
2996async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
2997   let base_state = capture_compose_base_state(&args.dir)?;
2998   let diff = get_compose_diff(&args.dir)?;
2999   let stat = get_compose_stat(&args.dir)?;
3000   let mut snapshot = build_compose_snapshot(&diff, &stat)?;
3001   // Freeze every file's on-disk content into the odb before any LLM call:
3002   // staging later reads these pins, never the live worktree, so edits made
3003   // while compose runs cannot leak into its commits.
3004   pin_snapshot_worktree_state(&mut snapshot, &args.dir)?;
3005   let snapshot = snapshot;
3006
3007   if let Some(debug_dir) = args.debug_output.as_deref() {
3008      save_debug_artifact(
3009         Some(debug_dir),
3010         &format!("compose_round_{round}_snapshot.json"),
3011         &snapshot,
3012      )?;
3013   }
3014
3015   let token_counter = create_token_counter(config);
3016   let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
3017      println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
3018      observe_diff_files(&snapshot.diff, &config.summary_model, config, &token_counter).await?
3019   } else {
3020      if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
3021         && should_use_map_reduce(&snapshot.diff, config, &token_counter)
3022      {
3023         println!(
3024            "{}",
3025            style::info(
3026               "Skipping per-file observations for very large compose snapshot; using area-level \
3027                planning instead."
3028            )
3029         );
3030      }
3031      Vec::new()
3032   };
3033
3034   if let Some(debug_dir) = args.debug_output.as_deref()
3035      && !observations.is_empty()
3036   {
3037      save_debug_artifact(
3038         Some(debug_dir),
3039         &format!("compose_round_{round}_observations.json"),
3040         &observations,
3041      )?;
3042   }
3043
3044   let max_commits = args.compose_max_commits.unwrap_or(20);
3045   let executable_plan = if let Some(cached_plan) =
3046      load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
3047   {
3048      println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
3049      cached_plan
3050   } else {
3051      println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
3052      let intent_plan = analyze_compose_intent(
3053         &snapshot,
3054         &observations,
3055         config,
3056         max_commits,
3057         args.debug_output.as_deref(),
3058      )
3059      .await?;
3060
3061      if let Some(debug_dir) = args.debug_output.as_deref() {
3062         save_debug_artifact(
3063            Some(debug_dir),
3064            &format!("compose_round_{round}_intent_plan.json"),
3065            &intent_plan,
3066         )?;
3067      }
3068
3069      println!("{}", style::info("Binding hunks to groups..."));
3070      let plan =
3071         bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
3072      save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
3073      plan
3074   };
3075
3076   if let Some(debug_dir) = args.debug_output.as_deref() {
3077      save_debug_artifact(
3078         Some(debug_dir),
3079         &format!("compose_round_{round}_executable_plan.json"),
3080         &executable_plan,
3081      )?;
3082   }
3083
3084   print_executable_plan(&snapshot, &executable_plan);
3085
3086   if args.compose_preview {
3087      println!(
3088         "\n{}",
3089         style::success(&format!(
3090            "{} Preview complete (use --compose without --compose-preview to execute)",
3091            style::icons::SUCCESS
3092         ))
3093      );
3094      return Ok(());
3095   }
3096
3097   println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
3098   let hashes = execute_compose(&snapshot, &executable_plan, config, args, &base_state).await?;
3099   println!(
3100      "{}",
3101      style::success(&format!(
3102         "{} Round {round}: Created {} commit(s)",
3103         style::icons::SUCCESS,
3104         hashes.len()
3105      ))
3106   );
3107   Ok(())
3108}
3109
3110#[cfg(test)]
3111mod tests {
3112   use std::{fmt::Write, fs};
3113
3114   use tempfile::TempDir;
3115
3116   use super::*;
3117   use crate::{config::CommitConfig, patch::build_compose_snapshot, types::CommitType};
3118
3119   fn shared_file_diff() -> (&'static str, &'static str) {
3120      (
3121         r#"diff --git a/src/lib.rs b/src/lib.rs
3122index 1111111..2222222 100644
3123--- a/src/lib.rs
3124+++ b/src/lib.rs
3125@@ -1,3 +1,3 @@
3126-fn alpha() {
3127+fn alpha_changed() {
3128     println!("alpha");
3129 }
3130@@ -12,3 +12,3 @@
3131-fn beta() {
3132+fn beta_changed() {
3133     println!("beta");
3134 }
3135diff --git a/tests/lib.rs b/tests/lib.rs
3136index 3333333..4444444 100644
3137--- a/tests/lib.rs
3138+++ b/tests/lib.rs
3139@@ -1,3 +1,4 @@
3140 fn test_it() {
3141+    assert!(true);
3142 }
3143"#,
3144         " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
3145      )
3146   }
3147
3148   fn build_test_snapshot() -> ComposeSnapshot {
3149      let (diff, stat) = shared_file_diff();
3150      build_compose_snapshot(diff, stat).unwrap()
3151   }
3152
3153   fn write_file(dir: &TempDir, path: &str, contents: &str) {
3154      let full_path = dir.path().join(path);
3155      if let Some(parent) = full_path.parent() {
3156         fs::create_dir_all(parent).unwrap();
3157      }
3158      fs::write(full_path, contents).unwrap();
3159   }
3160
3161   fn run_git(dir: &TempDir, args: &[&str]) -> String {
3162      let output = crate::git::git_command()
3163         .args(args)
3164         .current_dir(dir.path())
3165         .output()
3166         .unwrap_or_else(|err| panic!("git {args:?} failed to spawn: {err}"));
3167
3168      assert!(
3169         output.status.success(),
3170         "git {:?} failed: stdout={} stderr={}",
3171         args,
3172         String::from_utf8_lossy(&output.stdout),
3173         String::from_utf8_lossy(&output.stderr)
3174      );
3175
3176      String::from_utf8_lossy(&output.stdout).to_string()
3177   }
3178
3179   fn init_repo() -> TempDir {
3180      let dir = TempDir::new().unwrap();
3181      run_git(&dir, &["init"]);
3182      run_git(&dir, &["config", "user.name", "Compose Test"]);
3183      run_git(&dir, &["config", "user.email", "compose@test.local"]);
3184      run_git(&dir, &["config", "commit.gpgsign", "false"]);
3185      dir
3186   }
3187
3188   fn commit_all(dir: &TempDir, message: &str) {
3189      run_git(dir, &["add", "."]);
3190      run_git(dir, &["commit", "-m", message]);
3191   }
3192
3193   fn canned_message(summary: &str) -> (Vec<String>, CommitSummary) {
3194      (vec![], CommitSummary::new_unchecked(summary, 128).unwrap())
3195   }
3196
3197   #[test]
3198   fn test_compose_file_category_treats_prompts_as_functional_source() {
3199      let diff = r"diff --git a/prompts/analysis/default.md b/prompts/analysis/default.md
3200index 1111111..2222222 100644
3201--- a/prompts/analysis/default.md
3202+++ b/prompts/analysis/default.md
3203@@ -1,1 +1,1 @@
3204-old prompt
3205+new prompt
3206diff --git a/system/analysis/default.md b/system/analysis/default.md
3207index 5555555..6666666 100644
3208--- a/system/analysis/default.md
3209+++ b/system/analysis/default.md
3210@@ -1,1 +1,1 @@
3211-old system
3212+new system
3213diff --git a/README.md b/README.md
3214index 3333333..4444444 100644
3215--- a/README.md
3216+++ b/README.md
3217@@ -1,1 +1,1 @@
3218-old docs
3219+new docs
3220";
3221      let snapshot = build_compose_snapshot(diff, "").unwrap();
3222      let prompt_file = snapshot
3223         .file_by_path("prompts/analysis/default.md")
3224         .unwrap();
3225      let system_file = snapshot.file_by_path("system/analysis/default.md").unwrap();
3226      let readme_file = snapshot.file_by_path("README.md").unwrap();
3227
3228      assert_eq!(compose_file_category(prompt_file), ComposeFileCategory::Prompt);
3229      assert_eq!(compose_file_category(system_file), ComposeFileCategory::Prompt);
3230      assert_eq!(compose_file_category(readme_file), ComposeFileCategory::Docs);
3231
3232      let feat_group = ComposeIntentGroup {
3233         group_id:     "G1".to_string(),
3234         commit_type:  CommitType::new("feat").unwrap(),
3235         scope:        None,
3236         file_ids:     vec![prompt_file.file_id.clone()],
3237         rationale:    "prompt behavior change".to_string(),
3238         dependencies: vec![],
3239      };
3240      assert_eq!(group_type_bonus(prompt_file, &feat_group), 10);
3241
3242      let fallback_type =
3243         fallback_commit_type_for_group(&snapshot, &[], std::slice::from_ref(&prompt_file.file_id))
3244            .unwrap();
3245      assert_eq!(fallback_type.as_str(), "refactor");
3246   }
3247
3248   fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
3249      let mut diff = String::new();
3250
3251      for file_idx in 0..file_count {
3252         let path = format!("src/module_{file_idx:03}.rs");
3253         writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3254         diff.push_str("index 1111111..2222222 100644\n");
3255         writeln!(diff, "--- a/{path}").unwrap();
3256         writeln!(diff, "+++ b/{path}").unwrap();
3257
3258         for hunk_idx in 0..hunks_per_file {
3259            let line_no = (hunk_idx * 4) + 1;
3260            writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
3261            writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
3262            writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
3263         }
3264      }
3265
3266      build_compose_snapshot(&diff, "").unwrap()
3267   }
3268
3269   fn build_multi_area_snapshot() -> ComposeSnapshot {
3270      let mut diff = String::new();
3271      let areas = [
3272         ("apps/frontend/src/server", 72),
3273         ("packages/model/src/models", 54),
3274         ("apps/daemon/src/worker", 43),
3275         (".github/workflows", 16),
3276      ];
3277
3278      for (prefix, count) in areas {
3279         for file_idx in 0..count {
3280            let path = format!("{prefix}/file_{file_idx:03}.rs");
3281            writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3282            diff.push_str("index 1111111..2222222 100644\n");
3283            writeln!(diff, "--- a/{path}").unwrap();
3284            writeln!(diff, "+++ b/{path}").unwrap();
3285            diff.push_str("@@ -1,1 +1,1 @@\n");
3286            writeln!(diff, "-old_{file_idx}").unwrap();
3287            writeln!(diff, "+new_{file_idx}").unwrap();
3288         }
3289      }
3290
3291      build_compose_snapshot(&diff, "").unwrap()
3292   }
3293
3294   fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
3295      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3296      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3297      let groups = vec![
3298         ComposeIntentGroup {
3299            group_id:     "G1".to_string(),
3300            commit_type:  CommitType::new("refactor").unwrap(),
3301            scope:        None,
3302            file_ids:     vec![source_file.file_id.clone(), test_file.file_id.clone()],
3303            rationale:    "implementation group".to_string(),
3304            dependencies: vec![],
3305         },
3306         ComposeIntentGroup {
3307            group_id:     "G2".to_string(),
3308            commit_type:  CommitType::new("refactor").unwrap(),
3309            scope:        None,
3310            file_ids:     vec![source_file.file_id.clone()],
3311            rationale:    "shared file follow-up".to_string(),
3312            dependencies: vec!["G1".to_string()],
3313         },
3314      ];
3315      let dependency_order =
3316         compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
3317            .unwrap();
3318      ComposeIntentPlan { groups, dependency_order }
3319   }
3320
3321   #[test]
3322   fn test_execute_compose_with_temp_index_applies_two_group_plan() {
3323      let dir = init_repo();
3324      write_file(&dir, "src/a.rs", "fn a() {}\n");
3325      write_file(&dir, "src/b.rs", "fn b() {}\n");
3326      commit_all(&dir, "initial");
3327      write_file(&dir, "src/a.rs", "fn a_changed() {}\n");
3328      write_file(&dir, "src/b.rs", "fn b_changed() {}\n");
3329
3330      let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3331      let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3332      let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3333      let a_file = snapshot.file_by_path("src/a.rs").unwrap();
3334      let b_file = snapshot.file_by_path("src/b.rs").unwrap();
3335      let plan = ComposeExecutablePlan {
3336         groups:           vec![
3337            ComposeExecutableGroup {
3338               group_id:     "G1".to_string(),
3339               commit_type:  CommitType::new("refactor").unwrap(),
3340               scope:        None,
3341               file_ids:     vec![a_file.file_id.clone()],
3342               rationale:    "change a".to_string(),
3343               dependencies: vec![],
3344               hunk_ids:     a_file.hunk_ids.clone(),
3345            },
3346            ComposeExecutableGroup {
3347               group_id:     "G2".to_string(),
3348               commit_type:  CommitType::new("refactor").unwrap(),
3349               scope:        None,
3350               file_ids:     vec![b_file.file_id.clone()],
3351               rationale:    "change b".to_string(),
3352               dependencies: vec!["G1".to_string()],
3353               hunk_ids:     b_file.hunk_ids.clone(),
3354            },
3355         ],
3356         dependency_order: vec![0, 1],
3357      };
3358      let config = CommitConfig::default();
3359      let args = Args {
3360         dir: dir.path().to_string_lossy().to_string(),
3361         compose: true,
3362         ..Default::default()
3363      };
3364      let base_state = capture_compose_base_state(&args.dir).unwrap();
3365
3366      let hashes = execute_compose_with_prepared_messages(
3367         &snapshot,
3368         &plan,
3369         &config,
3370         &args,
3371         &base_state,
3372         vec![canned_message("change a"), canned_message("change b")],
3373      )
3374      .unwrap();
3375
3376      assert_eq!(hashes.len(), 2);
3377      assert_eq!(get_head_hash(&args.dir).unwrap(), hashes[1]);
3378      assert!(run_git(&dir, &["diff", "--cached"]).trim().is_empty());
3379   }
3380
3381   #[test]
3382   fn test_execute_compose_failure_before_update_ref_preserves_real_index() {
3383      let dir = init_repo();
3384      write_file(&dir, "src/lib.rs", "old\n");
3385      write_file(&dir, "sentinel.txt", "base\n");
3386      commit_all(&dir, "initial");
3387      let initial_head = get_head_hash(dir.path().to_str().unwrap()).unwrap();
3388
3389      // A real change so the snapshot is valid.
3390      write_file(&dir, "src/lib.rs", "changed\n");
3391
3392      // A pre-existing staged change that MUST survive a failed compose run.
3393      write_file(&dir, "sentinel.txt", "base\nstaged sentinel\n");
3394      run_git(&dir, &["add", "sentinel.txt"]);
3395      let staged_before = run_git(&dir, &["diff", "--cached"]);
3396      assert!(staged_before.contains("staged sentinel"));
3397
3398      let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3399      let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3400      let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3401      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3402      // The plan references a hunk id that does not exist, so staging fails
3403      // before any commit object is created or any ref is updated.
3404      let plan = ComposeExecutablePlan {
3405         groups:           vec![ComposeExecutableGroup {
3406            group_id:     "G1".to_string(),
3407            commit_type:  CommitType::new("fix").unwrap(),
3408            scope:        None,
3409            file_ids:     vec![source_file.file_id.clone()],
3410            rationale:    "unstageable group".to_string(),
3411            dependencies: vec![],
3412            hunk_ids:     vec!["F999-H001".to_string()],
3413         }],
3414         dependency_order: vec![0],
3415      };
3416      let config = CommitConfig::default();
3417      let args = Args {
3418         dir: dir.path().to_string_lossy().to_string(),
3419         compose: true,
3420         ..Default::default()
3421      };
3422      let base_state = capture_compose_base_state(&args.dir).unwrap();
3423
3424      let err = execute_compose_with_prepared_messages(
3425         &snapshot,
3426         &plan,
3427         &config,
3428         &args,
3429         &base_state,
3430         vec![canned_message("unstageable group")],
3431      )
3432      .unwrap_err();
3433
3434      assert!(err.to_string().contains("unknown hunk id"));
3435      assert_eq!(get_head_hash(&args.dir).unwrap(), initial_head);
3436      assert_eq!(run_git(&dir, &["diff", "--cached"]), staged_before);
3437   }
3438
3439   #[test]
3440   fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
3441      let snapshot = build_test_snapshot();
3442      let intent_plan = build_shared_intent_plan(&snapshot);
3443      let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3444
3445      assert_eq!(ambiguous.len(), 1);
3446      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3447      let assigned_to_g1 = assigned.get("G1").unwrap();
3448      assert!(
3449         test_file
3450            .hunk_ids
3451            .iter()
3452            .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
3453         "uniquely owned file should be auto-assigned"
3454      );
3455   }
3456
3457   #[test]
3458   fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3459      let snapshot = build_test_snapshot();
3460      let intent_plan = build_shared_intent_plan(&snapshot);
3461      let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3462      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3463      let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3464      let valid_group_ids: HashSet<&str> = intent_plan
3465         .groups
3466         .iter()
3467         .map(|group| group.group_id.as_str())
3468         .collect();
3469
3470      let evaluation = evaluate_binding(
3471         &[
3472            ComposeBindingAssignment {
3473               group_id: "G1".to_string(),
3474               hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3475            },
3476            ComposeBindingAssignment {
3477               group_id: "G2".to_string(),
3478               hunk_ids: vec![source_file.hunk_ids[1].clone()],
3479            },
3480         ],
3481         &hunk_context,
3482         &valid_group_ids,
3483         &snapshot,
3484      );
3485
3486      for (group_id, hunk_ids) in evaluation.assigned {
3487         let entry = assigned.entry(group_id).or_default();
3488         for hunk_id in hunk_ids {
3489            entry.insert(hunk_id);
3490         }
3491      }
3492
3493      let group_rank: HashMap<&str, usize> = intent_plan
3494         .dependency_order
3495         .iter()
3496         .enumerate()
3497         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3498         .collect();
3499      assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3500
3501      let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3502      assert_eq!(executable_plan.groups.len(), 1);
3503      assert_eq!(executable_plan.groups[0].group_id, "G1");
3504      assert!(
3505         source_file
3506            .hunk_ids
3507            .iter()
3508            .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3509         "fallback should keep every hunk from the shared file in the surviving group"
3510      );
3511   }
3512
3513   #[test]
3514   fn test_validate_executable_plan_rejects_overlap() {
3515      let snapshot = build_test_snapshot();
3516      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3517      let executable_plan = ComposeExecutablePlan {
3518         groups:           vec![
3519            ComposeExecutableGroup {
3520               group_id:     "G1".to_string(),
3521               commit_type:  CommitType::new("refactor").unwrap(),
3522               scope:        None,
3523               file_ids:     vec![source_file.file_id.clone()],
3524               rationale:    "group one".to_string(),
3525               dependencies: vec![],
3526               hunk_ids:     vec![source_file.hunk_ids[0].clone()],
3527            },
3528            ComposeExecutableGroup {
3529               group_id:     "G2".to_string(),
3530               commit_type:  CommitType::new("refactor").unwrap(),
3531               scope:        None,
3532               file_ids:     vec![source_file.file_id.clone()],
3533               rationale:    "group two".to_string(),
3534               dependencies: vec![],
3535               hunk_ids:     vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3536            },
3537         ],
3538         dependency_order: vec![0, 1],
3539      };
3540
3541      let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3542      assert!(err.to_string().contains("assigned to both"));
3543   }
3544
3545   #[test]
3546   fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3547      let snapshot = build_test_snapshot();
3548      let planning_index = build_planning_index(&snapshot);
3549      let groups = vec![ComposeIntentGroup {
3550         group_id:     "G1".to_string(),
3551         commit_type:  CommitType::new("refactor").unwrap(),
3552         scope:        None,
3553         file_ids:     vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3554         rationale:    "normalize file references".to_string(),
3555         dependencies: vec![],
3556      }];
3557
3558      let (normalized_groups, repair_notes) =
3559         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3560
3561      assert_eq!(normalized_groups.len(), 1);
3562      assert_eq!(
3563         normalized_groups[0].file_ids,
3564         snapshot
3565            .files
3566            .iter()
3567            .map(|file| file.file_id.clone())
3568            .collect::<Vec<_>>()
3569      );
3570      assert_eq!(repair_notes.len(), 2);
3571   }
3572
3573   #[test]
3574   fn test_normalize_intent_plan_repairs_missing_files() {
3575      let snapshot = build_test_snapshot();
3576      let planning_index = build_planning_index(&snapshot);
3577      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3578      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3579      let groups = vec![ComposeIntentGroup {
3580         group_id:     "G1".to_string(),
3581         commit_type:  CommitType::new("refactor").unwrap(),
3582         scope:        None,
3583         file_ids:     vec![source_file.file_id.clone()],
3584         rationale:    "partial coverage".to_string(),
3585         dependencies: vec![],
3586      }];
3587
3588      let (normalized_groups, repair_notes) =
3589         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3590
3591      assert_eq!(normalized_groups.len(), 1);
3592      assert!(
3593         normalized_groups[0].file_ids.contains(&source_file.file_id),
3594         "existing file assignment should be preserved"
3595      );
3596      assert!(
3597         normalized_groups[0].file_ids.contains(&test_file.file_id),
3598         "missing files should be assigned to an existing group"
3599      );
3600      assert_eq!(repair_notes.len(), 1);
3601      assert!(repair_notes[0].contains(&test_file.file_id));
3602   }
3603
3604   #[test]
3605   fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3606      let snapshot = build_multi_area_snapshot();
3607      let planning_index = build_planning_index(&snapshot);
3608      let frontend_target = planning_index
3609         .targets
3610         .iter()
3611         .find(|target| target.label.starts_with("apps/frontend"))
3612         .unwrap();
3613      let model_target = planning_index
3614         .targets
3615         .iter()
3616         .find(|target| target.label.starts_with("packages/model"))
3617         .unwrap();
3618      let groups = vec![
3619         ComposeIntentGroup {
3620            group_id:     "G1".to_string(),
3621            commit_type:  CommitType::new("refactor").unwrap(),
3622            scope:        Scope::new("apps/frontend").ok(),
3623            file_ids:     vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3624            rationale:    "frontend platform updates".to_string(),
3625            dependencies: vec!["group 2".to_string(), "G1".to_string()],
3626         },
3627         ComposeIntentGroup {
3628            group_id:     "G2".to_string(),
3629            commit_type:  CommitType::new("refactor").unwrap(),
3630            scope:        Scope::new("packages/model").ok(),
3631            file_ids:     vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3632            rationale:    "model storage updates".to_string(),
3633            dependencies: vec!["F5".to_string()],
3634         },
3635      ];
3636
3637      let (normalized_groups, repair_notes) =
3638         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3639
3640      assert_eq!(normalized_groups.len(), 2);
3641      assert!(
3642         normalized_groups[0]
3643            .file_ids
3644            .iter()
3645            .all(|file_id| file_id.starts_with('F'))
3646      );
3647      assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3648      assert!(normalized_groups[1].dependencies.is_empty());
3649      assert!(
3650         repair_notes
3651            .iter()
3652            .any(|note| note.contains("Dropped unknown planning target"))
3653      );
3654      assert!(
3655         repair_notes
3656            .iter()
3657            .any(|note| note.contains("Dropped self-dependency"))
3658      );
3659      assert!(
3660         repair_notes
3661            .iter()
3662            .any(|note| note.contains("Mapped compose planner dependency"))
3663      );
3664      assert!(
3665         repair_notes
3666            .iter()
3667            .any(|note| note.contains("Dropped unknown dependency"))
3668      );
3669   }
3670
3671   #[test]
3672   fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3673      let snapshot = build_test_snapshot();
3674      let summary = render_snapshot_summary(&snapshot, &[]);
3675      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3676
3677      assert!(!summary.contains("# snapshot compacted"));
3678      for hunk_id in &source_file.hunk_ids {
3679         assert!(summary.contains(hunk_id));
3680      }
3681   }
3682
3683   #[test]
3684   fn test_render_snapshot_summary_compacts_large_snapshot() {
3685      let snapshot = build_large_snapshot(160, 4);
3686      let summary = render_snapshot_summary(&snapshot, &[]);
3687
3688      assert!(summary.contains("# snapshot compacted"));
3689      assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3690      assert!(summary.contains("F001-H001"));
3691      assert!(summary.contains("F001-H004"));
3692      assert!(!summary.contains("F001-H002"));
3693      assert!(!summary.contains("F001-H003"));
3694      assert!(summary.contains("... 2 more hunks omitted from F001"));
3695   }
3696
3697   #[test]
3698   fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3699      let snapshot = build_multi_area_snapshot();
3700      let planning_index = build_planning_index(&snapshot);
3701
3702      assert_eq!(planning_index.mode, PlanningMode::Area);
3703      assert!(planning_index.targets.len() < snapshot.files.len());
3704      assert!(
3705         planning_index
3706            .targets
3707            .iter()
3708            .any(|target| target.label.starts_with("apps/frontend"))
3709      );
3710      assert!(
3711         render_planning_stat(&planning_index).contains("planning over"),
3712         "planning stat should explain the area mode"
3713      );
3714   }
3715
3716   #[test]
3717   fn test_normalize_intent_plan_expands_area_targets() {
3718      let snapshot = build_multi_area_snapshot();
3719      let planning_index = build_planning_index(&snapshot);
3720      let midpoint = planning_index.targets.len() / 2;
3721      let first_group_targets: Vec<String> = planning_index
3722         .targets
3723         .iter()
3724         .take(midpoint)
3725         .map(|target| target.label.clone())
3726         .collect();
3727      let second_group_targets: Vec<String> = planning_index
3728         .targets
3729         .iter()
3730         .skip(midpoint)
3731         .map(|target| target.label.clone())
3732         .collect();
3733      let groups = vec![
3734         ComposeIntentGroup {
3735            group_id:     "G1".to_string(),
3736            commit_type:  CommitType::new("refactor").unwrap(),
3737            scope:        None,
3738            file_ids:     first_group_targets,
3739            rationale:    "frontend and model".to_string(),
3740            dependencies: vec![],
3741         },
3742         ComposeIntentGroup {
3743            group_id:     "G2".to_string(),
3744            commit_type:  CommitType::new("refactor").unwrap(),
3745            scope:        None,
3746            file_ids:     second_group_targets,
3747            rationale:    "daemon and ci".to_string(),
3748            dependencies: vec![],
3749         },
3750      ];
3751
3752      let (normalized_groups, repair_notes) =
3753         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3754
3755      assert_eq!(normalized_groups.len(), 2);
3756      assert!(
3757         normalized_groups
3758            .iter()
3759            .flat_map(|group| group.file_ids.iter())
3760            .all(|file_id| file_id.starts_with('F')),
3761         "area targets should expand back to concrete file IDs"
3762      );
3763      assert!(!repair_notes.is_empty());
3764      assert_eq!(
3765         normalized_groups
3766            .iter()
3767            .flat_map(|group| group.file_ids.iter())
3768            .collect::<HashSet<_>>()
3769            .len(),
3770         snapshot.files.len()
3771      );
3772   }
3773
3774   #[test]
3775   fn test_large_patch_fallback_splits_monolithic_area_plan() {
3776      let snapshot = build_multi_area_snapshot();
3777      let planning_index = build_planning_index(&snapshot);
3778      let monolithic_group = ComposeIntentGroup {
3779         group_id:     "G1".to_string(),
3780         commit_type:  CommitType::new("refactor").unwrap(),
3781         scope:        None,
3782         file_ids:     snapshot
3783            .files
3784            .iter()
3785            .map(|file| file.file_id.clone())
3786            .collect(),
3787         rationale:    "repo-wide refactor".to_string(),
3788         dependencies: vec![],
3789      };
3790
3791      assert!(should_force_large_patch_fallback(
3792         &snapshot,
3793         &planning_index,
3794         &[monolithic_group],
3795         6
3796      ));
3797
3798      let fallback_groups =
3799         build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3800      assert!(fallback_groups.len() >= 3);
3801      assert_eq!(
3802         fallback_groups
3803            .iter()
3804            .flat_map(|group| group.file_ids.iter())
3805            .collect::<HashSet<_>>()
3806            .len(),
3807         snapshot.files.len()
3808      );
3809      assert!(
3810         fallback_groups
3811            .iter()
3812            .any(|group| group.rationale.contains("frontend")),
3813         "fallback should preserve workstream identity"
3814      );
3815   }
3816
3817   #[test]
3818   fn test_should_collect_compose_observations_skips_area_mode() {
3819      let snapshot = build_large_snapshot(160, 4);
3820      let config = CommitConfig { map_reduce_threshold: 1_000, ..Default::default() };
3821      let counter = create_token_counter(&config);
3822
3823      assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3824      assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3825   }
3826
3827   #[test]
3828   fn test_compose_analysis_strategy_uses_map_reduce_for_large_diff() {
3829      let config = CommitConfig { map_reduce_threshold: 20, ..Default::default() };
3830      let counter = create_token_counter(&config);
3831      let payload = "a".repeat(200);
3832      let diff = format!("diff --git a/a.rs b/a.rs\n@@ -0,0 +1 @@\n+{payload}");
3833
3834      assert_eq!(
3835         compose_analysis_strategy(&diff, &config, &counter),
3836         ComposeAnalysisStrategy::MapReduce
3837      );
3838   }
3839
3840   #[test]
3841   fn test_compose_analysis_strategy_truncates_when_map_reduce_disabled() {
3842      let config = CommitConfig {
3843         map_reduce_enabled: false,
3844         max_diff_tokens: 1,
3845         max_diff_length: 10_000,
3846         ..Default::default()
3847      };
3848      let counter = create_token_counter(&config);
3849      assert_eq!(compose_truncation_length(&config), 4);
3850
3851      assert_eq!(
3852         compose_analysis_strategy(
3853            "diff --git a/models.json b/models.json\n+large",
3854            &config,
3855            &counter
3856         ),
3857         ComposeAnalysisStrategy::SmartTruncate
3858      );
3859   }
3860
3861   #[test]
3862   fn test_compose_analysis_strategy_keeps_small_group_direct() {
3863      let config = CommitConfig {
3864         map_reduce_threshold: 1_000,
3865         max_diff_tokens: 1_000,
3866         max_diff_length: 10_000,
3867         ..Default::default()
3868      };
3869      let counter = create_token_counter(&config);
3870
3871      assert_eq!(
3872         compose_analysis_strategy("diff --git a/a.rs b/a.rs\n+a", &config, &counter),
3873         ComposeAnalysisStrategy::Direct
3874      );
3875   }
3876
3877   #[test]
3878   fn test_chunk_ambiguous_files_splits_large_binding_request() {
3879      let ambiguous_files = vec![
3880         AmbiguousFileBinding {
3881            file_id:             "F001".to_string(),
3882            path:                "src/alpha.rs".to_string(),
3883            candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3884            hunk_ids:            (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3885         },
3886         AmbiguousFileBinding {
3887            file_id:             "F002".to_string(),
3888            path:                "src/beta.rs".to_string(),
3889            candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3890            hunk_ids:            (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3891         },
3892         AmbiguousFileBinding {
3893            file_id:             "F003".to_string(),
3894            path:                "src/gamma.rs".to_string(),
3895            candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3896            hunk_ids:            (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3897         },
3898      ];
3899
3900      let batches = chunk_ambiguous_files(&ambiguous_files);
3901      let total_hunks: usize = batches
3902         .iter()
3903         .flatten()
3904         .map(|file| file.hunk_ids.len())
3905         .sum();
3906
3907      assert_eq!(batches.len(), 2);
3908      assert_eq!(batches[0].len(), 1);
3909      assert_eq!(batches[1].len(), 2);
3910      assert_eq!(total_hunks, 140);
3911      assert!(batches.iter().all(|batch| {
3912         batch.len() <= MAX_BIND_FILES_PER_REQUEST
3913            && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3914               <= MAX_BIND_HUNKS_PER_REQUEST
3915      }));
3916   }
3917}