Skip to main content

llm_git/
compose.rs

1use std::{
2   borrow::Cow,
3   collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4   fmt::Write,
5   fs,
6   path::{Path, PathBuf},
7};
8
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13   api::{
14      AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
15      generate_summary_from_analysis, run_oneshot, strict_json_schema,
16   },
17   compose_types::{
18      ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
19      ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
20   },
21   config::CommitConfig,
22   diff::smart_truncate_diff,
23   error::{CommitGenError, Result},
24   git::{
25      TempGitIndex, append_signoff_trailer, commit_tree, current_head_ref, get_compose_diff,
26      get_compose_stat, get_git_dir, get_head_hash, read_tree_into_index, reset_mixed_to,
27      reset_paths_to, update_ref_checked, write_index_tree, write_real_index_tree,
28   },
29   map_reduce::{FileObservation, observe_diff_files, run_map_reduce, should_use_map_reduce},
30   normalization::{format_commit_message, post_process_commit_message},
31   patch::{
32      StageResult, build_compose_snapshot, create_executable_group_patch,
33      force_stage_file_from_base_in_index, pin_snapshot_worktree_state,
34      stage_executable_group_in_index,
35   },
36   style, templates,
37   tokens::{TokenCounter, create_token_counter},
38   types::{Args, CommitSummary, CommitType, ConventionalAnalysis, ConventionalCommit, Scope},
39   validation::validate_commit_message,
40};
41
42const MAX_OBSERVATIONS_PER_FILE: usize = 3;
43const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
44const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
45const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
46const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
47const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
48const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
49const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
50const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
51const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
52const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
53const MAX_BIND_FILES_PER_REQUEST: usize = 18;
54const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
55/// Maximum number of commit messages to generate concurrently during
56/// `execute_compose`. Matches the per-file fan-out used in `map_reduce`.
57const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
58
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ComposeBaseState {
61   head_hash:  String,
62   head_ref:   String,
63   index_tree: String,
64}
65
66#[tracing::instrument(target = "lgit", name = "compose.capture_base_state", skip_all, fields(dir))]
67pub fn capture_compose_base_state(dir: &str) -> Result<ComposeBaseState> {
68   Ok(ComposeBaseState {
69      head_hash:  get_head_hash(dir)?,
70      head_ref:   current_head_ref(dir)?,
71      index_tree: write_real_index_tree(dir)?,
72   })
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76enum ComposeAnalysisStrategy {
77   Direct,
78   SmartTruncate,
79   MapReduce,
80}
81
82fn compose_analysis_strategy(
83   diff: &str,
84   config: &CommitConfig,
85   counter: &TokenCounter,
86) -> ComposeAnalysisStrategy {
87   if should_use_map_reduce(diff, config, counter) {
88      return ComposeAnalysisStrategy::MapReduce;
89   }
90
91   let diff_tokens = counter.count_sync(diff);
92   if diff.len() > config.max_diff_length || diff_tokens > config.max_diff_tokens {
93      return ComposeAnalysisStrategy::SmartTruncate;
94   }
95
96   ComposeAnalysisStrategy::Direct
97}
98
99fn compose_truncation_length(config: &CommitConfig) -> usize {
100   config
101      .max_diff_length
102      .min(config.max_diff_tokens.saturating_mul(4))
103      .max(1)
104}
105
106#[derive(Debug, Deserialize, Serialize)]
107struct ComposeIntentResponse {
108   groups: Vec<ComposeIntentGroup>,
109}
110
111#[derive(Debug, Deserialize, Serialize)]
112struct ComposeBindingResponse {
113   assignments: Vec<ComposeBindingAssignment>,
114}
115
116#[derive(Debug, Serialize, Deserialize)]
117struct ComposeCachedPlan {
118   schema_version: String,
119   cache_key:      String,
120   plan:           ComposeExecutablePlan,
121}
122
123#[derive(Debug, Clone)]
124struct AmbiguousFileBinding {
125   file_id:             String,
126   path:                String,
127   candidate_group_ids: Vec<String>,
128   hunk_ids:            Vec<String>,
129}
130
131#[derive(Debug, Clone)]
132struct AmbiguousHunkContext {
133   candidate_group_ids: Vec<String>,
134}
135
136type HunkAssignments = HashMap<String, BTreeSet<String>>;
137
138#[derive(Debug)]
139struct BindingEvaluation {
140   assigned:   HashMap<String, Vec<String>>,
141   unresolved: Vec<String>,
142}
143
144#[derive(Debug, Clone, Copy)]
145struct SnapshotSummaryBudget {
146   max_observations_per_file: usize,
147   max_hunks_per_file:        Option<usize>,
148}
149
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151enum PlanningMode {
152   File,
153   Area,
154}
155
156#[derive(Debug, Clone)]
157struct PlanningTarget {
158   target_id:  String,
159   label:      String,
160   file_ids:   Vec<String>,
161   hunk_count: usize,
162   additions:  usize,
163   deletions:  usize,
164}
165
166#[derive(Debug, Clone)]
167struct PlanningIndex {
168   mode:    PlanningMode,
169   targets: Vec<PlanningTarget>,
170   aliases: HashMap<String, String>,
171}
172
173#[derive(Debug, Clone)]
174struct PlanningBucket {
175   label:    String,
176   file_ids: Vec<String>,
177}
178
179impl PlanningIndex {
180   fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
181      let mut expanded = Vec::new();
182      let mut seen_file_ids = HashSet::new();
183
184      for target_id in target_ids {
185         if let Some(target) = self
186            .targets
187            .iter()
188            .find(|candidate| candidate.target_id == *target_id)
189         {
190            for file_id in &target.file_ids {
191               if seen_file_ids.insert(file_id.clone()) {
192                  expanded.push(file_id.clone());
193               }
194            }
195         }
196      }
197
198      expanded
199   }
200}
201
202impl SnapshotSummaryBudget {
203   const fn is_compacted(self) -> bool {
204      self.max_hunks_per_file.is_some()
205   }
206}
207
208fn is_dependency_manifest(path: &str) -> bool {
209   const DEP_MANIFESTS: &[&str] = &[
210      "Cargo.toml",
211      "Cargo.lock",
212      "package.json",
213      "package-lock.json",
214      "pnpm-lock.yaml",
215      "yarn.lock",
216      "bun.lock",
217      "bun.lockb",
218      "go.mod",
219      "go.sum",
220      "requirements.txt",
221      "Pipfile",
222      "Pipfile.lock",
223      "pyproject.toml",
224      "Gemfile",
225      "Gemfile.lock",
226      "composer.json",
227      "composer.lock",
228      "build.gradle",
229      "build.gradle.kts",
230      "gradle.properties",
231      "pom.xml",
232   ];
233
234   let path = Path::new(path);
235   let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
236      return false;
237   };
238
239   if DEP_MANIFESTS.contains(&file_name) {
240      return true;
241   }
242
243   Path::new(file_name)
244      .extension()
245      .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
246}
247
248fn save_debug_artifact<T: Serialize>(
249   debug_dir: Option<&Path>,
250   filename: &str,
251   value: &T,
252) -> Result<()> {
253   let Some(debug_dir) = debug_dir else {
254      return Ok(());
255   };
256
257   fs::create_dir_all(debug_dir)?;
258   let path = debug_dir.join(filename);
259   let json = serde_json::to_string_pretty(value)?;
260   fs::write(path, json)?;
261   Ok(())
262}
263
264fn fnv1a_64(input: &str) -> String {
265   let mut hash = 0xcbf29ce484222325_u64;
266   for byte in input.as_bytes() {
267      hash ^= u64::from(*byte);
268      hash = hash.wrapping_mul(0x100000001b3);
269   }
270   format!("{hash:016x}")
271}
272
273fn compose_plan_cache_key(
274   snapshot: &ComposeSnapshot,
275   max_commits: usize,
276   analysis_model: &str,
277) -> String {
278   fnv1a_64(&format!(
279      "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
280      snapshot.diff, snapshot.stat
281   ))
282}
283
284fn compose_plan_cache_path(
285   dir: &str,
286   snapshot: &ComposeSnapshot,
287   max_commits: usize,
288   analysis_model: &str,
289) -> Result<PathBuf> {
290   let git_dir = get_git_dir(dir)?;
291   Ok(git_dir.join("llm-git").join(format!(
292      "compose-plan-{}.json",
293      compose_plan_cache_key(snapshot, max_commits, analysis_model)
294   )))
295}
296
297fn load_cached_plan(
298   dir: &str,
299   snapshot: &ComposeSnapshot,
300   max_commits: usize,
301   analysis_model: &str,
302) -> Result<Option<ComposeExecutablePlan>> {
303   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
304   if !cache_path.exists() {
305      return Ok(None);
306   }
307
308   let content = match fs::read_to_string(&cache_path) {
309      Ok(content) => content,
310      Err(err) => {
311         eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
312         return Ok(None);
313      },
314   };
315   let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
316      Ok(cached) => cached,
317      Err(err) => {
318         eprintln!(
319            "{}",
320            style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
321         );
322         let _ = fs::remove_file(&cache_path);
323         return Ok(None);
324      },
325   };
326   let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
327
328   if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
329      return Ok(None);
330   }
331   if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
332      eprintln!(
333         "{}",
334         style::warning(&format!(
335            "Discarding cached compose plan (no longer valid for current snapshot): {err}"
336         ))
337      );
338      let _ = fs::remove_file(&cache_path);
339      return Ok(None);
340   }
341   Ok(Some(cached.plan))
342}
343
344fn save_cached_plan(
345   dir: &str,
346   snapshot: &ComposeSnapshot,
347   max_commits: usize,
348   analysis_model: &str,
349   plan: &ComposeExecutablePlan,
350) -> Result<()> {
351   let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
352   if let Some(parent) = cache_path.parent() {
353      fs::create_dir_all(parent)?;
354   }
355
356   let cached = ComposeCachedPlan {
357      schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
358      cache_key:      compose_plan_cache_key(snapshot, max_commits, analysis_model),
359      plan:           plan.clone(),
360   };
361   fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
362   Ok(())
363}
364
365fn format_line_range(start: usize, count: usize) -> String {
366   match count {
367      0 => "0".to_string(),
368      1 => start.to_string(),
369      _ => format!("{start}-{}", start + count - 1),
370   }
371}
372
373const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
374   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
375      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
376   {
377      SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
378   } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
379      || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
380   {
381      SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
382   } else {
383      SnapshotSummaryBudget {
384         max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
385         max_hunks_per_file:        None,
386      }
387   }
388}
389
390fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
391   if count <= max_samples {
392      return (0..count).collect();
393   }
394
395   if max_samples <= 1 {
396      return vec![0];
397   }
398
399   let last = count - 1;
400   let mut positions = Vec::with_capacity(max_samples);
401   for slot in 0..max_samples {
402      let position = slot * last / (max_samples - 1);
403      if positions.last().copied() != Some(position) {
404         positions.push(position);
405      }
406   }
407   positions
408}
409
410fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
411   match budget.max_hunks_per_file {
412      None => file.hunk_ids.iter().map(String::as_str).collect(),
413      Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
414         .into_iter()
415         .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
416         .collect(),
417   }
418}
419
420fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
421   let budget = snapshot_summary_budget(snapshot);
422   let observations_by_file: HashMap<&str, Vec<&str>> = observations
423      .iter()
424      .map(|observation| {
425         (
426            observation.file.as_str(),
427            observation
428               .observations
429               .iter()
430               .map(String::as_str)
431               .take(budget.max_observations_per_file)
432               .collect(),
433         )
434      })
435      .collect();
436
437   let mut out = String::new();
438   if budget.is_compacted() {
439      let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
440      writeln!(
441         out,
442         "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
443          representative hunks and {} observation(s) per file",
444         budget.max_observations_per_file
445      )
446      .unwrap();
447   }
448
449   for file in &snapshot.files {
450      writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
451      if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
452         for observation in file_observations {
453            writeln!(out, "  observation: {observation}").unwrap();
454         }
455      }
456
457      let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
458      for hunk_id in &rendered_hunk_ids {
459         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
460            if hunk.synthetic {
461               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
462            } else {
463               writeln!(
464                  out,
465                  "  - {} old:{} new:{} :: {}",
466                  hunk.hunk_id,
467                  format_line_range(hunk.old_start, hunk.old_count),
468                  format_line_range(hunk.new_start, hunk.new_count),
469                  hunk.snippet
470               )
471               .unwrap();
472            }
473         }
474      }
475
476      let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
477      if omitted_hunks > 0 {
478         writeln!(out, "  ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
479      }
480   }
481
482   out
483}
484
485const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
486   if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
487      || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
488   {
489      PlanningMode::Area
490   } else {
491      PlanningMode::File
492   }
493}
494
495fn path_depth(path: &str) -> usize {
496   path.split('/').count()
497}
498
499fn prefix_at_depth(path: &str, depth: usize) -> String {
500   if depth == 0 {
501      return String::new();
502   }
503
504   let segments: Vec<&str> = path.split('/').collect();
505   let effective_depth = depth.min(segments.len());
506   segments[..effective_depth].join("/")
507}
508
509fn common_path_prefix(paths: &[String]) -> String {
510   let Some(first_path) = paths.first() else {
511      return String::new();
512   };
513
514   let mut prefix: Vec<&str> = first_path.split('/').collect();
515   for path in paths.iter().skip(1) {
516      let segments: Vec<&str> = path.split('/').collect();
517      let shared = prefix
518         .iter()
519         .zip(segments.iter())
520         .take_while(|(left, right)| left == right)
521         .count();
522      prefix.truncate(shared);
523      if prefix.is_empty() {
524         break;
525      }
526   }
527
528   prefix.join("/")
529}
530
531fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
532   file_ids
533      .iter()
534      .filter_map(|file_id| snapshot.file_by_id(file_id))
535      .map(|file| file.hunk_ids.len())
536      .sum()
537}
538
539fn group_file_ids_by_prefix(
540   snapshot: &ComposeSnapshot,
541   file_ids: &[String],
542   depth: usize,
543) -> BTreeMap<String, Vec<String>> {
544   let mut groups = BTreeMap::new();
545
546   for file_id in file_ids {
547      if let Some(file) = snapshot.file_by_id(file_id) {
548         groups
549            .entry(prefix_at_depth(&file.path, depth))
550            .or_insert_with(Vec::new)
551            .push(file_id.clone());
552      }
553   }
554
555   groups
556}
557
558fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
559   let paths: Vec<String> = file_ids
560      .iter()
561      .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
562      .collect();
563
564   let common_prefix = common_path_prefix(&paths);
565   if common_prefix.is_empty() {
566      paths.first().cloned().unwrap_or_else(|| "misc".to_string())
567   } else {
568      common_prefix
569   }
570}
571
572fn collect_planning_buckets(
573   snapshot: &ComposeSnapshot,
574   file_ids: &[String],
575   depth: usize,
576) -> Vec<PlanningBucket> {
577   let file_count = file_ids.len();
578   let hunk_count = bucket_hunk_count(snapshot, file_ids);
579   let max_path_depth = file_ids
580      .iter()
581      .filter_map(|file_id| snapshot.file_by_id(file_id))
582      .map(|file| path_depth(&file.path))
583      .max()
584      .unwrap_or(depth);
585
586   let should_stop =
587      file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
588   if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
589      return vec![PlanningBucket {
590         label:    planning_bucket_label(snapshot, file_ids),
591         file_ids: file_ids.to_vec(),
592      }];
593   }
594
595   let next_depth = depth + 1;
596   let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
597   if groups.len() <= 1 {
598      return collect_planning_buckets(snapshot, file_ids, next_depth);
599   }
600
601   groups
602      .into_values()
603      .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
604      .collect()
605}
606
607fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
608   let all_file_ids: Vec<String> = snapshot
609      .files
610      .iter()
611      .map(|file| file.file_id.clone())
612      .collect();
613   let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
614
615   buckets
616      .into_iter()
617      .enumerate()
618      .map(|(idx, bucket)| {
619         let mut additions = 0_usize;
620         let mut deletions = 0_usize;
621         let mut hunk_count = 0_usize;
622
623         for file_id in &bucket.file_ids {
624            if let Some(file) = snapshot.file_by_id(file_id) {
625               additions = additions.saturating_add(file.additions);
626               deletions = deletions.saturating_add(file.deletions);
627               hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
628            }
629         }
630
631         PlanningTarget {
632            target_id: format!("A{:03}", idx + 1),
633            label: bucket.label,
634            file_ids: bucket.file_ids,
635            hunk_count,
636            additions,
637            deletions,
638         }
639      })
640      .collect()
641}
642
643fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
644   snapshot
645      .files
646      .iter()
647      .map(|file| PlanningTarget {
648         target_id:  file.file_id.clone(),
649         label:      file.path.clone(),
650         file_ids:   vec![file.file_id.clone()],
651         hunk_count: file.hunk_ids.len(),
652         additions:  file.additions,
653         deletions:  file.deletions,
654      })
655      .collect()
656}
657
658fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
659   let mode = planning_mode_for_snapshot(snapshot);
660   let targets = match mode {
661      PlanningMode::File => build_file_planning_targets(snapshot),
662      PlanningMode::Area => build_area_planning_targets(snapshot),
663   };
664
665   let aliases = targets
666      .iter()
667      .flat_map(|target| {
668         let normalized_label = normalize_file_reference(&target.label);
669         [
670            (target.target_id.clone(), target.target_id.clone()),
671            (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
672            (normalized_label, target.target_id.clone()),
673         ]
674      })
675      .collect();
676
677   PlanningIndex { mode, targets, aliases }
678}
679
680fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
681   sample_positions(target.file_ids.len(), 4)
682      .into_iter()
683      .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
684      .collect()
685}
686
687fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
688   let hunk_ids: Vec<&String> = target
689      .file_ids
690      .iter()
691      .filter_map(|file_id| snapshot.file_by_id(file_id))
692      .flat_map(|file| file.hunk_ids.iter())
693      .collect();
694
695   sample_positions(hunk_ids.len(), 4)
696      .into_iter()
697      .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
698      .collect()
699}
700
701fn render_planning_stat(index: &PlanningIndex) -> String {
702   let mut out = String::new();
703
704   match index.mode {
705      PlanningMode::File => {
706         writeln!(out, "# planning over individual file IDs").unwrap();
707      },
708      PlanningMode::Area => {
709         writeln!(
710            out,
711            "# planning over {} area IDs spanning {} files",
712            index.targets.len(),
713            index
714               .targets
715               .iter()
716               .flat_map(|target| target.file_ids.iter())
717               .collect::<HashSet<_>>()
718               .len()
719         )
720         .unwrap();
721      },
722   }
723
724   for target in &index.targets {
725      writeln!(
726         out,
727         "{} {} | {} files | {} hunks | +{}/-{}",
728         target.target_id,
729         target.label,
730         target.file_ids.len(),
731         target.hunk_count,
732         target.additions,
733         target.deletions
734      )
735      .unwrap();
736   }
737
738   out
739}
740
741fn render_planning_snapshot_summary(
742   snapshot: &ComposeSnapshot,
743   observations: &[FileObservation],
744   index: &PlanningIndex,
745) -> String {
746   if index.mode == PlanningMode::File {
747      return render_snapshot_summary(snapshot, observations);
748   }
749
750   let observations_by_file: HashMap<&str, Vec<&str>> = observations
751      .iter()
752      .map(|observation| {
753         (
754            observation.file.as_str(),
755            observation
756               .observations
757               .iter()
758               .map(String::as_str)
759               .take(1)
760               .collect(),
761         )
762      })
763      .collect();
764
765   let mut out = String::new();
766   writeln!(
767      out,
768      "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
769   )
770   .unwrap();
771
772   for target in &index.targets {
773      writeln!(
774         out,
775         "- {} {} ({} files, {} hunks, +{}/-{})",
776         target.target_id,
777         target.label,
778         target.file_ids.len(),
779         target.hunk_count,
780         target.additions,
781         target.deletions
782      )
783      .unwrap();
784
785      let sample_file_ids = sample_file_ids_for_target(target);
786      if !sample_file_ids.is_empty() {
787         let sample_files: Vec<String> = sample_file_ids
788            .iter()
789            .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
790            .collect();
791         writeln!(out, "  files: {}", sample_files.join(", ")).unwrap();
792         let omitted = target.file_ids.len().saturating_sub(sample_files.len());
793         if omitted > 0 {
794            writeln!(out, "  ... {omitted} more files omitted from {}", target.target_id).unwrap();
795         }
796      }
797
798      let mut rendered_observations = 0_usize;
799      for file_id in &target.file_ids {
800         let Some(file) = snapshot.file_by_id(file_id) else {
801            continue;
802         };
803         let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
804            continue;
805         };
806
807         for observation in file_observations {
808            writeln!(out, "  observation: {observation}").unwrap();
809            rendered_observations += 1;
810            if rendered_observations >= 2 {
811               break;
812            }
813         }
814
815         if rendered_observations >= 2 {
816            break;
817         }
818      }
819
820      for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
821         if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
822            if hunk.synthetic {
823               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
824            } else {
825               writeln!(
826                  out,
827                  "  - {} old:{} new:{} :: {}",
828                  hunk.hunk_id,
829                  format_line_range(hunk.old_start, hunk.old_count),
830                  format_line_range(hunk.new_start, hunk.new_count),
831                  hunk.snippet
832               )
833               .unwrap();
834            }
835         }
836      }
837   }
838
839   out
840}
841
842fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
843   match index.mode {
844      PlanningMode::File => format!(
845         "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
846         snapshot.files.len()
847      ),
848      PlanningMode::Area => format!(
849         "Area IDs only. Each target may expand to multiple files by shared path prefix. \
850          Coverage: {} areas spanning {} files.",
851         index.targets.len(),
852         snapshot.files.len()
853      ),
854   }
855}
856
857fn render_planning_notes(index: &PlanningIndex) -> String {
858   match index.mode {
859      PlanningMode::File => {
860         "Use only the provided file IDs and keep the grouping conservative.".to_string()
861      },
862      PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
863                             planning areas. Split along independent subsystems or workstreams \
864                             when the areas point at unrelated changes."
865         .to_string(),
866   }
867}
868
869fn render_split_bias(index: &PlanningIndex) -> String {
870   match index.mode {
871      PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
872      PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
873                             one broad group if nearly every area clearly belongs to the same \
874                             atomic change."
875         .to_string(),
876   }
877}
878
879fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
880   let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
881
882   strict_json_schema(
883      serde_json::json!({
884         "groups": {
885            "type": "array",
886            "items": {
887               "type": "object",
888               "properties": {
889                  "group_id": {
890                     "type": "string",
891                     "description": "Stable identifier like G1, G2, G3"
892                  },
893                  "file_ids": {
894                     "type": "array",
895                     "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
896                     "items": { "type": "string" }
897                  },
898                  "type": {
899                     "type": "string",
900                     "enum": type_enum,
901                     "description": "Conventional commit type for this group"
902                  },
903                  "scope": {
904                     "type": "string",
905                     "description": "Optional scope (module/component). Omit if broad."
906                  },
907                  "rationale": {
908                     "type": "string",
909                     "description": "Brief explanation of the logical change"
910                  },
911                  "dependencies": {
912                     "type": "array",
913                     "description": "Group IDs this group depends on",
914                     "items": { "type": "string" }
915                  }
916               },
917               "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
918               "additionalProperties": false
919            }
920         }
921      }),
922      &["groups"],
923   )
924}
925
926fn build_binding_schema() -> serde_json::Value {
927   strict_json_schema(
928      serde_json::json!({
929         "assignments": {
930            "type": "array",
931            "items": {
932               "type": "object",
933               "properties": {
934                  "group_id": { "type": "string" },
935                  "hunk_ids": {
936                     "type": "array",
937                     "items": { "type": "string" }
938                  }
939               },
940               "required": ["group_id", "hunk_ids"],
941               "additionalProperties": false
942            }
943         }
944      }),
945      &["assignments"],
946   )
947}
948
949fn compute_dependency_order<T, FId, FDeps>(
950   groups: &[T],
951   group_id: FId,
952   dependencies: FDeps,
953) -> Result<Vec<usize>>
954where
955   FId: Fn(&T) -> &str,
956   FDeps: Fn(&T) -> &[String],
957{
958   let mut index_by_id = HashMap::new();
959   for (idx, group) in groups.iter().enumerate() {
960      let id = group_id(group);
961      if id.trim().is_empty() {
962         return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
963      }
964      if index_by_id.insert(id.to_string(), idx).is_some() {
965         return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
966      }
967   }
968
969   let mut in_degree = vec![0_usize; groups.len()];
970   let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
971
972   for (idx, group) in groups.iter().enumerate() {
973      for dependency in dependencies(group) {
974         let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
975            CommitGenError::Other(format!(
976               "Group {} depends on unknown group_id '{}'",
977               group_id(group),
978               dependency
979            ))
980         })?;
981         if dependency_idx == idx {
982            return Err(CommitGenError::Other(format!(
983               "Group {} depends on itself",
984               group_id(group)
985            )));
986         }
987
988         adjacency[dependency_idx].push(idx);
989         in_degree[idx] += 1;
990      }
991   }
992
993   let mut queue: Vec<usize> = (0..groups.len())
994      .filter(|idx| in_degree[*idx] == 0)
995      .collect();
996   let mut order = Vec::with_capacity(groups.len());
997
998   while let Some(node) = queue.pop() {
999      order.push(node);
1000      for neighbor in &adjacency[node] {
1001         in_degree[*neighbor] -= 1;
1002         if in_degree[*neighbor] == 0 {
1003            queue.push(*neighbor);
1004         }
1005      }
1006   }
1007
1008   if order.len() != groups.len() {
1009      return Err(CommitGenError::Other(
1010         "Circular dependency detected in compose groups".to_string(),
1011      ));
1012   }
1013
1014   Ok(order)
1015}
1016
1017fn normalize_file_reference(raw_file_ref: &str) -> String {
1018   raw_file_ref
1019      .trim()
1020      .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
1021      .trim_start_matches("./")
1022      .trim_end_matches([',', ';'])
1023      .to_string()
1024}
1025
1026fn planning_text_tokens(text: &str) -> Vec<String> {
1027   const STOP_WORDS: &[&str] = &[
1028      "and",
1029      "for",
1030      "the",
1031      "with",
1032      "from",
1033      "into",
1034      "after",
1035      "before",
1036      "over",
1037      "under",
1038      "plus",
1039      "across",
1040      "update",
1041      "updated",
1042      "refactor",
1043      "refactored",
1044      "changes",
1045      "change",
1046      "logical",
1047      "group",
1048      "groups",
1049      "commit",
1050      "commits",
1051   ];
1052
1053   let mut tokens = Vec::new();
1054   let mut current = String::new();
1055   let mut seen = HashSet::new();
1056
1057   for ch in text.chars() {
1058      if ch.is_ascii_alphanumeric() {
1059         current.push(ch.to_ascii_lowercase());
1060      } else if current.len() >= 3 {
1061         if !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone()) {
1062            tokens.push(current.clone());
1063         }
1064         current.clear();
1065      } else {
1066         current.clear();
1067      }
1068   }
1069
1070   if current.len() >= 3 && !STOP_WORDS.contains(&current.as_str()) && seen.insert(current.clone())
1071   {
1072      tokens.push(current);
1073   }
1074
1075   tokens
1076}
1077
1078fn extract_group_id_candidate(raw: &str) -> Option<String> {
1079   let normalized = normalize_file_reference(raw);
1080   let uppercase = normalized.to_ascii_uppercase();
1081
1082   if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1083      return Some(format!("G{uppercase}"));
1084   }
1085
1086   if let Some(rest) = uppercase.strip_prefix('G')
1087      && !rest.is_empty()
1088      && rest.chars().all(|ch| ch.is_ascii_digit())
1089   {
1090      return Some(format!("G{rest}"));
1091   }
1092
1093   let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1094   let compact = uppercase
1095      .chars()
1096      .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1097      .collect::<String>();
1098   if compact.starts_with("GROUP") && !digits.is_empty() {
1099      return Some(format!("G{digits}"));
1100   }
1101
1102   None
1103}
1104
1105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1106enum ComposeFileCategory {
1107   Binary,
1108   Dependency,
1109   Docs,
1110   Prompt,
1111   Test,
1112   Config,
1113   Source,
1114   Other,
1115}
1116
1117fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1118   if file.is_binary {
1119      return ComposeFileCategory::Binary;
1120   }
1121
1122   if is_dependency_manifest(&file.path) {
1123      return ComposeFileCategory::Dependency;
1124   }
1125
1126   let filename_lower = file.path.to_ascii_lowercase();
1127   let file_name = Path::new(&filename_lower)
1128      .file_name()
1129      .and_then(|name| name.to_str())
1130      .unwrap_or_default();
1131   let extension = Path::new(&filename_lower)
1132      .extension()
1133      .and_then(|ext| ext.to_str())
1134      .unwrap_or_default();
1135
1136   if filename_lower.contains("prompt") || filename_lower.contains("system") {
1137      return ComposeFileCategory::Prompt;
1138   }
1139
1140   if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1141      return ComposeFileCategory::Docs;
1142   }
1143
1144   if filename_lower.contains("/tests/")
1145      || filename_lower.starts_with("tests/")
1146      || file_name.contains("test")
1147      || file_name.contains("spec")
1148   {
1149      return ComposeFileCategory::Test;
1150   }
1151
1152   if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1153      return ComposeFileCategory::Config;
1154   }
1155
1156   if matches!(
1157      extension,
1158      "rs"
1159         | "py"
1160         | "js"
1161         | "jsx"
1162         | "ts"
1163         | "tsx"
1164         | "go"
1165         | "java"
1166         | "kt"
1167         | "c"
1168         | "cc"
1169         | "cpp"
1170         | "h"
1171         | "hpp"
1172         | "cs"
1173         | "rb"
1174         | "php"
1175         | "swift"
1176         | "scala"
1177         | "m"
1178         | "mm"
1179   ) {
1180      return ComposeFileCategory::Source;
1181   }
1182
1183   ComposeFileCategory::Other
1184}
1185
1186fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1187   left
1188      .split('/')
1189      .zip(right.split('/'))
1190      .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1191      .count()
1192}
1193
1194fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1195   let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1196
1197   if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1198      score += 40;
1199   }
1200
1201   if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1202      score += 12;
1203   }
1204
1205   if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1206      score += 18;
1207   }
1208
1209   score
1210}
1211
1212fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1213   match (compose_file_category(file), group.commit_type.as_str()) {
1214      (ComposeFileCategory::Docs, "docs") => 25,
1215      (ComposeFileCategory::Test, "test") => 25,
1216      (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1217      (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1218      (
1219         ComposeFileCategory::Prompt | ComposeFileCategory::Source,
1220         "feat" | "fix" | "refactor" | "perf",
1221      ) => 10,
1222      _ => 0,
1223   }
1224}
1225
1226fn best_group_for_missing_file(
1227   snapshot: &ComposeSnapshot,
1228   groups: &[ComposeIntentGroup],
1229   missing_file: &ComposeFile,
1230) -> usize {
1231   let mut best_group_idx = 0;
1232   let mut best_score = i32::MIN;
1233   let mut best_group_size = usize::MAX;
1234
1235   for (group_idx, group) in groups.iter().enumerate() {
1236      let similarity = group
1237         .file_ids
1238         .iter()
1239         .filter_map(|file_id| snapshot.file_by_id(file_id))
1240         .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1241         .max()
1242         .unwrap_or_default();
1243      let score = similarity + group_type_bonus(missing_file, group);
1244      let group_size = group.file_ids.len();
1245
1246      if score > best_score || (score == best_score && group_size < best_group_size) {
1247         best_group_idx = group_idx;
1248         best_score = score;
1249         best_group_size = group_size;
1250      }
1251   }
1252
1253   best_group_idx
1254}
1255
1256fn normalize_dependency_reference(
1257   raw_dependency: &str,
1258   known_group_ids: &HashSet<String>,
1259) -> Option<String> {
1260   let normalized = normalize_file_reference(raw_dependency);
1261   if normalized.is_empty() {
1262      return None;
1263   }
1264
1265   if known_group_ids.contains(&normalized) {
1266      return Some(normalized);
1267   }
1268
1269   let uppercase = normalized.to_ascii_uppercase();
1270   if known_group_ids.contains(&uppercase) {
1271      return Some(uppercase);
1272   }
1273
1274   let candidate = extract_group_id_candidate(&normalized)?;
1275   known_group_ids.contains(&candidate).then_some(candidate)
1276}
1277
1278fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1279   let label = target.label.to_ascii_lowercase();
1280   let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1281   let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1282
1283   if let Some(scope) = &group.scope {
1284      let scope = scope.as_str().to_ascii_lowercase();
1285      if label.contains(&scope) || workstream.contains(&scope) {
1286         score += 140;
1287      }
1288
1289      for segment in scope.split('/') {
1290         if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1291            score += 45;
1292         }
1293      }
1294   }
1295
1296   for token in planning_text_tokens(&group.rationale) {
1297      if label.contains(&token) || workstream.contains(&token) {
1298         score += 16;
1299      }
1300   }
1301
1302   match group.commit_type.as_str() {
1303      "ci" if target.label.starts_with(".github/") => score += 120,
1304      "docs"
1305         if target.label.starts_with("docs/")
1306            || Path::new(&target.label)
1307               .extension()
1308               .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1309      {
1310         score += 80;
1311      },
1312      "build" | "chore"
1313         if target.label.contains("Cargo")
1314            || target.label.contains("package")
1315            || target.label.contains("lock")
1316            || target.label.contains("tsconfig")
1317            || target.label.contains("biome")
1318            || target.label.contains("bun") =>
1319      {
1320         score += 55;
1321      },
1322      _ => {},
1323   }
1324
1325   score
1326}
1327
1328fn seed_group_targets(
1329   groups: &[ComposeIntentGroup],
1330   planning_index: &PlanningIndex,
1331   group_targets: &mut [Vec<String>],
1332   repair_notes: &mut Vec<String>,
1333) {
1334   let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1335
1336   for (group_idx, group) in groups.iter().enumerate() {
1337      if !group_targets[group_idx].is_empty() {
1338         continue;
1339      }
1340
1341      let fallback_target = planning_index
1342         .targets
1343         .iter()
1344         .max_by_key(|target| {
1345            let mut score = planning_target_match_score(target, group);
1346            if !claimed_target_ids.contains(&target.target_id) {
1347               score += 60;
1348            }
1349            (score, target.hunk_count, target.file_ids.len())
1350         })
1351         .or_else(|| planning_index.targets.first());
1352
1353      let Some(fallback_target) = fallback_target else {
1354         continue;
1355      };
1356
1357      group_targets[group_idx].push(fallback_target.target_id.clone());
1358      claimed_target_ids.insert(fallback_target.target_id.clone());
1359      repair_notes.push(format!(
1360         "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1361         group.group_id, fallback_target.target_id, fallback_target.label
1362      ));
1363   }
1364}
1365
1366fn normalize_intent_plan(
1367   snapshot: &ComposeSnapshot,
1368   planning_index: &PlanningIndex,
1369   mut groups: Vec<ComposeIntentGroup>,
1370) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1371   if groups.is_empty() {
1372      return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1373   }
1374
1375   let known_target_ids: HashSet<&str> = planning_index
1376      .targets
1377      .iter()
1378      .map(|target| target.target_id.as_str())
1379      .collect();
1380   let mut repair_notes = Vec::new();
1381   let mut covered_file_ids = HashSet::new();
1382   let mut normalized_group_targets = Vec::with_capacity(groups.len());
1383
1384   for group in &groups {
1385      if group.file_ids.is_empty() {
1386         repair_notes.push(format!(
1387            "Compose planner left {} without planning targets; assigning targets heuristically",
1388            group.group_id
1389         ));
1390      }
1391
1392      let mut normalized_target_ids = Vec::new();
1393      let mut seen_target_ids = HashSet::new();
1394      for raw_target_ref in &group.file_ids {
1395         let normalized_ref = normalize_file_reference(raw_target_ref);
1396         let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1397            normalized_ref.clone()
1398         } else {
1399            let uppercase_ref = normalized_ref.to_ascii_uppercase();
1400            if known_target_ids.contains(uppercase_ref.as_str()) {
1401               uppercase_ref
1402            } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1403               if raw_target_ref != target_id {
1404                  repair_notes.push(format!(
1405                     "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1406                  ));
1407               }
1408               target_id.clone()
1409            } else {
1410               repair_notes.push(format!(
1411                  "Dropped unknown planning target '{}' from {}",
1412                  raw_target_ref, group.group_id
1413               ));
1414               continue;
1415            }
1416         };
1417
1418         if seen_target_ids.insert(canonical_target_id.clone()) {
1419            normalized_target_ids.push(canonical_target_id);
1420         }
1421      }
1422
1423      normalized_group_targets.push(normalized_target_ids);
1424   }
1425
1426   seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1427
1428   let known_group_ids: HashSet<String> =
1429      groups.iter().map(|group| group.group_id.clone()).collect();
1430   for group in &mut groups {
1431      let mut normalized_dependencies = Vec::new();
1432      let mut seen_dependencies = HashSet::new();
1433
1434      for raw_dependency in &group.dependencies {
1435         let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1436         else {
1437            repair_notes.push(format!(
1438               "Dropped unknown dependency '{}' from {}",
1439               raw_dependency, group.group_id
1440            ));
1441            continue;
1442         };
1443
1444         if dependency == group.group_id {
1445            repair_notes.push(format!(
1446               "Dropped self-dependency '{}' from {}",
1447               raw_dependency, group.group_id
1448            ));
1449            continue;
1450         }
1451
1452         if seen_dependencies.insert(dependency.clone()) {
1453            if raw_dependency != &dependency {
1454               repair_notes.push(format!(
1455                  "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1456               ));
1457            }
1458            normalized_dependencies.push(dependency);
1459         }
1460      }
1461
1462      group.dependencies = normalized_dependencies;
1463   }
1464
1465   for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1466      let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1467      for file_id in &expanded_file_ids {
1468         covered_file_ids.insert(file_id.clone());
1469      }
1470      group.file_ids = expanded_file_ids;
1471   }
1472
1473   for file in &snapshot.files {
1474      if covered_file_ids.contains(file.file_id.as_str()) {
1475         continue;
1476      }
1477
1478      let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1479      let target_group = &mut groups[target_group_idx];
1480      target_group.file_ids.push(file.file_id.clone());
1481      covered_file_ids.insert(file.file_id.clone());
1482      repair_notes.push(format!(
1483         "Compose planner omitted {} ({}); assigned it to {}",
1484         file.file_id, file.path, target_group.group_id
1485      ));
1486   }
1487
1488   Ok((groups, repair_notes))
1489}
1490
1491fn workstream_key_for_label(label: &str) -> String {
1492   let segments: Vec<&str> = label
1493      .split('/')
1494      .filter(|segment| !segment.is_empty())
1495      .collect();
1496   let Some(first) = segments.first() else {
1497      return label.to_string();
1498   };
1499
1500   match *first {
1501      ".github" => match segments.get(1) {
1502         Some(second) => format!("{first}/{second}"),
1503         None => (*first).to_string(),
1504      },
1505      "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1506         Some(second) => format!("{first}/{second}"),
1507         None => (*first).to_string(),
1508      },
1509      _ => (*first).to_string(),
1510   }
1511}
1512
1513fn workstream_display_name(label: &str) -> String {
1514   let key = workstream_key_for_label(label);
1515   match key.as_str() {
1516      ".github/workflows" => "CI workflows".to_string(),
1517      ".github" => "GitHub automation".to_string(),
1518      _ => key
1519         .split('/')
1520         .next_back()
1521         .map(|segment| segment.replace(['_', '-'], " "))
1522         .unwrap_or(key),
1523   }
1524}
1525
1526fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1527   let mut out = String::new();
1528   let mut last_was_separator = false;
1529
1530   for ch in raw.trim().chars() {
1531      if ch.is_ascii_alphanumeric() {
1532         out.push(ch.to_ascii_lowercase());
1533         last_was_separator = false;
1534      } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1535      {
1536         out.push('-');
1537         last_was_separator = true;
1538      }
1539   }
1540
1541   let trimmed = out.trim_matches('-').to_string();
1542   (!trimmed.is_empty()).then_some(trimmed)
1543}
1544
1545fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1546   let key = workstream_key_for_label(label);
1547   let candidate = key
1548      .split('/')
1549      .next_back()
1550      .and_then(sanitize_scope_fragment)?;
1551   Scope::new(candidate).ok()
1552}
1553
1554fn fallback_rationale_for_labels(labels: &[String]) -> String {
1555   if labels.len() == 1 {
1556      let label = labels[0].as_str();
1557      let display = workstream_display_name(label);
1558      if label.starts_with("apps/") {
1559         return format!("{display} application updates");
1560      }
1561      if label.starts_with("packages/") {
1562         return format!("{display} package updates");
1563      }
1564      if label.starts_with("crates/") {
1565         return format!("{display} crate updates");
1566      }
1567      if label.starts_with(".github/") || label == ".github" {
1568         return format!("{display} updates");
1569      }
1570      return format!("{display} updates");
1571   }
1572
1573   let display_labels: Vec<String> = labels
1574      .iter()
1575      .take(3)
1576      .map(|label| workstream_display_name(label))
1577      .collect();
1578   format!("cross-cutting updates for {}", display_labels.join(", "))
1579}
1580
1581fn fallback_commit_type_for_group(
1582   snapshot: &ComposeSnapshot,
1583   labels: &[String],
1584   file_ids: &[String],
1585) -> Result<CommitType> {
1586   if labels
1587      .iter()
1588      .any(|label| label == ".github" || label.starts_with(".github/"))
1589   {
1590      return CommitType::new("ci");
1591   }
1592
1593   let files: Vec<&ComposeFile> = file_ids
1594      .iter()
1595      .filter_map(|file_id| snapshot.file_by_id(file_id))
1596      .collect();
1597   let all_docs = !files.is_empty()
1598      && files
1599         .iter()
1600         .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1601   if all_docs {
1602      return CommitType::new("docs");
1603   }
1604
1605   let all_tests = !files.is_empty()
1606      && files
1607         .iter()
1608         .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1609   if all_tests {
1610      return CommitType::new("test");
1611   }
1612
1613   let all_dependencies =
1614      !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1615   if all_dependencies {
1616      return CommitType::new("build");
1617   }
1618
1619   let all_config = !files.is_empty()
1620      && files.iter().all(|file| {
1621         matches!(
1622            compose_file_category(file),
1623            ComposeFileCategory::Config | ComposeFileCategory::Dependency
1624         )
1625      });
1626   if all_config {
1627      return CommitType::new("chore");
1628   }
1629
1630   CommitType::new("refactor")
1631}
1632
1633fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1634   snapshot
1635      .files
1636      .iter()
1637      .filter(|file| file_ids.contains(&file.file_id))
1638      .map(|file| file.file_id.clone())
1639      .collect()
1640}
1641
1642fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1643   if groups.is_empty() {
1644      return false;
1645   }
1646
1647   let largest_group = groups
1648      .iter()
1649      .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1650      .max()
1651      .unwrap_or_default();
1652
1653   groups.len() == 1
1654      || (groups.len() <= 2
1655         && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1656}
1657
1658fn should_force_large_patch_fallback(
1659   snapshot: &ComposeSnapshot,
1660   planning_index: &PlanningIndex,
1661   groups: &[ComposeIntentGroup],
1662   max_commits: usize,
1663) -> bool {
1664   if max_commits <= 1
1665      || planning_index.mode != PlanningMode::Area
1666      || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1667      || !is_monolithic_intent_plan(snapshot, groups)
1668   {
1669      return false;
1670   }
1671
1672   let workstream_count = planning_index
1673      .targets
1674      .iter()
1675      .map(|target| workstream_key_for_label(&target.label))
1676      .collect::<HashSet<_>>()
1677      .len();
1678
1679   workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1680}
1681
1682fn build_large_patch_fallback_groups(
1683   snapshot: &ComposeSnapshot,
1684   planning_index: &PlanningIndex,
1685   max_commits: usize,
1686) -> Result<Vec<ComposeIntentGroup>> {
1687   #[derive(Debug, Clone)]
1688   struct WorkstreamGroup {
1689      label:    String,
1690      file_ids: HashSet<String>,
1691      weight:   usize,
1692   }
1693
1694   #[derive(Debug, Clone)]
1695   struct FallbackBin {
1696      labels:       Vec<String>,
1697      file_ids:     HashSet<String>,
1698      total_weight: usize,
1699   }
1700
1701   let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1702   for target in &planning_index.targets {
1703      let key = workstream_key_for_label(&target.label);
1704      let entry = workstreams
1705         .entry(key.clone())
1706         .or_insert_with(|| WorkstreamGroup {
1707            label:    key,
1708            file_ids: HashSet::new(),
1709            weight:   0,
1710         });
1711
1712      for file_id in &target.file_ids {
1713         entry.file_ids.insert(file_id.clone());
1714      }
1715      entry.weight = entry
1716         .weight
1717         .saturating_add(target.hunk_count.max(target.file_ids.len()));
1718   }
1719
1720   let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1721   workstreams.sort_by(|left, right| {
1722      right
1723         .weight
1724         .cmp(&left.weight)
1725         .then_with(|| left.label.cmp(&right.label))
1726   });
1727
1728   let bin_count = max_commits.min(workstreams.len());
1729   let mut bins: Vec<FallbackBin> = Vec::new();
1730   for workstream in workstreams {
1731      if bins.len() < bin_count {
1732         bins.push(FallbackBin {
1733            labels:       vec![workstream.label],
1734            file_ids:     workstream.file_ids,
1735            total_weight: workstream.weight,
1736         });
1737         continue;
1738      }
1739
1740      let Some((target_idx, _)) = bins
1741         .iter()
1742         .enumerate()
1743         .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1744      else {
1745         continue;
1746      };
1747
1748      let target_bin = &mut bins[target_idx];
1749      target_bin.labels.push(workstream.label);
1750      target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1751      target_bin.file_ids.extend(workstream.file_ids);
1752   }
1753
1754   let mut groups = Vec::new();
1755   for (idx, bin) in bins.into_iter().enumerate() {
1756      let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1757      let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1758      let scope = (bin.labels.len() == 1)
1759         .then(|| fallback_scope_for_label(&bin.labels[0]))
1760         .flatten();
1761      let rationale = fallback_rationale_for_labels(&bin.labels);
1762
1763      groups.push(ComposeIntentGroup {
1764         group_id: format!("G{}", idx + 1),
1765         commit_type,
1766         scope,
1767         file_ids: ordered_ids,
1768         rationale,
1769         dependencies: Vec::new(),
1770      });
1771   }
1772
1773   Ok(groups)
1774}
1775
1776#[tracing::instrument(target = "lgit", name = "compose.analyze_intent", skip_all, fields(file_count = snapshot.files.len(), observation_count = observations.len(), max_commits))]
1777async fn analyze_compose_intent(
1778   snapshot: &ComposeSnapshot,
1779   observations: &[FileObservation],
1780   config: &CommitConfig,
1781   max_commits: usize,
1782   debug_dir: Option<&Path>,
1783) -> Result<ComposeIntentPlan> {
1784   let planning_index = build_planning_index(snapshot);
1785   let stat_summary = render_planning_stat(&planning_index);
1786   let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1787   let planning_targets = render_planning_targets(&planning_index, snapshot);
1788   let planning_notes = render_planning_notes(&planning_index);
1789   let split_bias = render_split_bias(&planning_index);
1790   let schema = build_intent_schema(config);
1791   let variant = if config.markdown_output { "markdown" } else { "default" };
1792   let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1793      variant,
1794      max_commits,
1795      stat: &stat_summary,
1796      snapshot_summary: &snapshot_summary,
1797      planning_targets: &planning_targets,
1798      planning_notes: &planning_notes,
1799      split_bias: &split_bias,
1800   })?;
1801
1802   let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1803      operation:        "compose/intent",
1804      model:            &config.analysis_model,
1805      prompt_family:    "compose-intent",
1806      prompt_variant:   variant,
1807      system_prompt:    &parts.system,
1808      user_prompt:      &parts.user,
1809      tool_name:        "create_compose_intent_plan",
1810      tool_description: "Plan logical commit groups over the provided planning target IDs",
1811      schema:           &schema,
1812      progress_label:   Some("compose intent planner"),
1813      debug:            debug_dir.map(|dir| OneShotDebug {
1814         dir:    Some(dir),
1815         prefix: None,
1816         name:   "compose_intent",
1817      }),
1818      cacheable:        true,
1819   })
1820   .await?;
1821
1822   let (mut groups, repair_notes) =
1823      normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1824   for note in &repair_notes {
1825      eprintln!("{}", style::warning(note));
1826   }
1827   if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1828      eprintln!(
1829         "{}",
1830         style::warning(
1831            "Compose intent collapsed into a monolithic large-patch group; falling back to \
1832             path-based workstream splits."
1833         )
1834      );
1835      groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1836   }
1837   let dependency_order =
1838      compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1839
1840   Ok(ComposeIntentPlan { groups, dependency_order })
1841}
1842
1843#[tracing::instrument(target = "lgit", name = "compose.should_collect_observations", skip_all, fields(file_count = snapshot.files.len()))]
1844fn should_collect_compose_observations(
1845   snapshot: &ComposeSnapshot,
1846   config: &CommitConfig,
1847   counter: &TokenCounter,
1848) -> bool {
1849   planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1850      && should_use_map_reduce(&snapshot.diff, config, counter)
1851}
1852
1853#[tracing::instrument(target = "lgit", name = "compose.auto_assign_hunks", skip_all, fields(group_count = intent_plan.groups.len()))]
1854fn auto_assign_hunks(
1855   snapshot: &ComposeSnapshot,
1856   intent_plan: &ComposeIntentPlan,
1857) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1858   let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1859   for group in &intent_plan.groups {
1860      for file_id in &group.file_ids {
1861         groups_by_file
1862            .entry(file_id.as_str())
1863            .or_default()
1864            .push(group.group_id.as_str());
1865      }
1866   }
1867
1868   let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1869      .groups
1870      .iter()
1871      .map(|group| (group.group_id.clone(), BTreeSet::new()))
1872      .collect();
1873   let mut ambiguous = Vec::new();
1874
1875   for file in &snapshot.files {
1876      let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1877         return Err(CommitGenError::Other(format!(
1878            "No compose group claimed file {} ({})",
1879            file.file_id, file.path
1880         )));
1881      };
1882
1883      if candidate_group_ids.len() == 1 {
1884         let group_id = candidate_group_ids[0];
1885         let entry = assigned
1886            .get_mut(group_id)
1887            .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1888         for hunk_id in &file.hunk_ids {
1889            entry.insert(hunk_id.clone());
1890         }
1891      } else {
1892         ambiguous.push(AmbiguousFileBinding {
1893            file_id:             file.file_id.clone(),
1894            path:                file.path.clone(),
1895            candidate_group_ids: candidate_group_ids
1896               .iter()
1897               .map(|group_id| (*group_id).to_string())
1898               .collect(),
1899            hunk_ids:            file.hunk_ids.clone(),
1900         });
1901      }
1902   }
1903
1904   Ok((assigned, ambiguous))
1905}
1906
1907fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1908   let mut out = String::new();
1909   for group in groups {
1910      let scope = group
1911         .scope
1912         .as_ref()
1913         .map(|scope| format!("({})", scope.as_str()))
1914         .unwrap_or_default();
1915      writeln!(
1916         out,
1917         "- {} [{}{}] {}",
1918         group.group_id,
1919         group.commit_type.as_str(),
1920         scope,
1921         group.rationale
1922      )
1923      .unwrap();
1924   }
1925
1926   out
1927}
1928
1929fn render_binding_ambiguous_files(
1930   snapshot: &ComposeSnapshot,
1931   ambiguous_files: &[AmbiguousFileBinding],
1932) -> String {
1933   let mut out = String::new();
1934   for ambiguous_file in ambiguous_files {
1935      writeln!(
1936         out,
1937         "- {} {} candidates: {}",
1938         ambiguous_file.file_id,
1939         ambiguous_file.path,
1940         ambiguous_file.candidate_group_ids.join(", ")
1941      )
1942      .unwrap();
1943
1944      for hunk_id in &ambiguous_file.hunk_ids {
1945         if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1946            if hunk.synthetic {
1947               writeln!(out, "  - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1948            } else {
1949               writeln!(
1950                  out,
1951                  "  - {} old:{} new:{} :: {}",
1952                  hunk.hunk_id,
1953                  format_line_range(hunk.old_start, hunk.old_count),
1954                  format_line_range(hunk.new_start, hunk.new_count),
1955                  hunk.snippet
1956               )
1957               .unwrap();
1958            }
1959         }
1960      }
1961   }
1962
1963   out
1964}
1965
1966async fn request_binding(
1967   snapshot: &ComposeSnapshot,
1968   groups: &[ComposeIntentGroup],
1969   ambiguous_files: &[AmbiguousFileBinding],
1970   config: &CommitConfig,
1971   debug_dir: Option<&Path>,
1972   debug_name: &str,
1973) -> Result<Vec<ComposeBindingAssignment>> {
1974   let schema = build_binding_schema();
1975   let groups_text = render_binding_groups(groups);
1976   let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1977   let variant = if config.markdown_output { "markdown" } else { "default" };
1978   let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1979      variant,
1980      groups:          &groups_text,
1981      ambiguous_files: &ambiguous_files_text,
1982   })?;
1983   let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1984      operation:        "compose/bind",
1985      model:            &config.analysis_model,
1986      prompt_family:    "compose-bind",
1987      prompt_variant:   variant,
1988      system_prompt:    &parts.system,
1989      user_prompt:      &parts.user,
1990      tool_name:        "bind_compose_hunks",
1991      tool_description: "Assign hunk IDs to existing compose groups",
1992      schema:           &schema,
1993      progress_label:   Some("compose hunk binder"),
1994      debug:            debug_dir.map(|dir| OneShotDebug {
1995         dir:    Some(dir),
1996         prefix: None,
1997         name:   debug_name,
1998      }),
1999      cacheable:        true,
2000   })
2001   .await?;
2002
2003   Ok(response.output.assignments)
2004}
2005
2006fn ambiguous_hunk_context(
2007   ambiguous_files: &[AmbiguousFileBinding],
2008) -> HashMap<String, AmbiguousHunkContext> {
2009   let mut context = HashMap::new();
2010   for ambiguous_file in ambiguous_files {
2011      for hunk_id in &ambiguous_file.hunk_ids {
2012         context.insert(hunk_id.clone(), AmbiguousHunkContext {
2013            candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
2014         });
2015      }
2016   }
2017   context
2018}
2019
2020fn evaluate_binding(
2021   assignments: &[ComposeBindingAssignment],
2022   hunk_context: &HashMap<String, AmbiguousHunkContext>,
2023   valid_group_ids: &HashSet<&str>,
2024   snapshot: &ComposeSnapshot,
2025) -> BindingEvaluation {
2026   let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
2027
2028   for assignment in assignments {
2029      if !valid_group_ids.contains(assignment.group_id.as_str()) {
2030         continue;
2031      }
2032
2033      let mut seen_in_group = HashSet::new();
2034      for hunk_id in &assignment.hunk_ids {
2035         if !seen_in_group.insert(hunk_id.as_str()) {
2036            continue;
2037         }
2038
2039         let Some(context) = hunk_context.get(hunk_id) else {
2040            continue;
2041         };
2042
2043         if !context
2044            .candidate_group_ids
2045            .iter()
2046            .any(|candidate| candidate == &assignment.group_id)
2047         {
2048            continue;
2049         }
2050
2051         match assigned_hunk_to_group.get(hunk_id) {
2052            None => {
2053               assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
2054            },
2055            Some(existing_group) if existing_group == &assignment.group_id => {},
2056            Some(_) => {
2057               assigned_hunk_to_group.remove(hunk_id);
2058            },
2059         }
2060      }
2061   }
2062
2063   let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2064   for (hunk_id, group_id) in assigned_hunk_to_group {
2065      assigned_by_group.entry(group_id).or_default().push(hunk_id);
2066   }
2067
2068   for hunk_ids in assigned_by_group.values_mut() {
2069      let ordered: Vec<String> = snapshot
2070         .hunks
2071         .iter()
2072         .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2073         .map(|hunk| hunk.hunk_id.clone())
2074         .collect();
2075      *hunk_ids = ordered;
2076   }
2077
2078   let unresolved = snapshot
2079      .hunks
2080      .iter()
2081      .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2082      .filter(|hunk| {
2083         !assigned_by_group.values().any(|assigned_hunks| {
2084            assigned_hunks
2085               .iter()
2086               .any(|assigned| assigned == &hunk.hunk_id)
2087         })
2088      })
2089      .map(|hunk| hunk.hunk_id.clone())
2090      .collect();
2091
2092   BindingEvaluation { assigned: assigned_by_group, unresolved }
2093}
2094
2095fn filter_ambiguous_files(
2096   ambiguous_files: &[AmbiguousFileBinding],
2097   hunk_ids: &[String],
2098) -> Vec<AmbiguousFileBinding> {
2099   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2100
2101   ambiguous_files
2102      .iter()
2103      .filter_map(|file| {
2104         let matching_hunks: Vec<String> = file
2105            .hunk_ids
2106            .iter()
2107            .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2108            .cloned()
2109            .collect();
2110
2111         (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2112            file_id:             file.file_id.clone(),
2113            path:                file.path.clone(),
2114            candidate_group_ids: file.candidate_group_ids.clone(),
2115            hunk_ids:            matching_hunks,
2116         })
2117      })
2118      .collect()
2119}
2120
2121fn chunk_ambiguous_files(
2122   ambiguous_files: &[AmbiguousFileBinding],
2123) -> Vec<Vec<AmbiguousFileBinding>> {
2124   if ambiguous_files.is_empty() {
2125      return Vec::new();
2126   }
2127
2128   let mut batches = Vec::new();
2129   let mut current_batch = Vec::new();
2130   let mut current_hunk_count = 0_usize;
2131
2132   for file in ambiguous_files {
2133      let file_hunk_count = file.hunk_ids.len();
2134      let should_split = !current_batch.is_empty()
2135         && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2136            || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2137
2138      if should_split {
2139         batches.push(current_batch);
2140         current_batch = Vec::new();
2141         current_hunk_count = 0;
2142      }
2143
2144      current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2145      current_batch.push(file.clone());
2146   }
2147
2148   if !current_batch.is_empty() {
2149      batches.push(current_batch);
2150   }
2151
2152   batches
2153}
2154
2155fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2156   let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2157
2158   snapshot
2159      .hunks
2160      .iter()
2161      .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2162      .map(|hunk| hunk.hunk_id.clone())
2163      .collect()
2164}
2165
2166fn fallback_group_for_hunk(
2167   hunk_id: &str,
2168   ambiguous_files: &[AmbiguousFileBinding],
2169   group_rank: &HashMap<&str, usize>,
2170) -> Option<String> {
2171   ambiguous_files.iter().find_map(|file| {
2172      file
2173         .hunk_ids
2174         .iter()
2175         .any(|candidate| candidate == hunk_id)
2176         .then(|| {
2177            file
2178               .candidate_group_ids
2179               .iter()
2180               .min_by_key(|group_id| {
2181                  group_rank
2182                     .get(group_id.as_str())
2183                     .copied()
2184                     .unwrap_or(usize::MAX)
2185               })
2186               .cloned()
2187         })
2188   })?
2189}
2190
2191fn assign_unresolved_hunks(
2192   unresolved_hunks: &[String],
2193   assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2194   ambiguous_files: &[AmbiguousFileBinding],
2195   group_rank: &HashMap<&str, usize>,
2196) {
2197   for hunk_id in unresolved_hunks {
2198      if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2199         && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2200      {
2201         group_hunks.insert(hunk_id.clone());
2202      }
2203   }
2204}
2205
2206fn normalize_group_type(
2207   snapshot: &ComposeSnapshot,
2208   file_ids: &[String],
2209   original_type: &CommitType,
2210) -> Result<CommitType> {
2211   let dependency_only = !file_ids.is_empty()
2212      && file_ids.iter().all(|file_id| {
2213         snapshot
2214            .file_by_id(file_id)
2215            .is_some_and(|file| is_dependency_manifest(&file.path))
2216      });
2217
2218   if dependency_only && original_type.as_str() != "build" {
2219      CommitType::new("build")
2220   } else {
2221      Ok(original_type.clone())
2222   }
2223}
2224
2225fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2226   snapshot
2227      .files
2228      .iter()
2229      .filter(|file| {
2230         hunk_ids
2231            .iter()
2232            .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2233      })
2234      .map(|file| file.file_id.clone())
2235      .collect()
2236}
2237
2238fn build_redirects(
2239   intent_plan: &ComposeIntentPlan,
2240   executable_groups: &[ComposeExecutableGroup],
2241   group_rank: &HashMap<&str, usize>,
2242) -> HashMap<String, String> {
2243   let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2244      .iter()
2245      .filter(|group| !group.hunk_ids.is_empty())
2246      .map(|group| (group.group_id.as_str(), group))
2247      .collect();
2248
2249   let mut redirects = HashMap::new();
2250   for group in &intent_plan.groups {
2251      if surviving_groups.contains_key(group.group_id.as_str()) {
2252         continue;
2253      }
2254
2255      let redirect = executable_groups
2256         .iter()
2257         .filter(|candidate| candidate.group_id != group.group_id)
2258         .filter(|candidate| {
2259            candidate.file_ids.iter().any(|file_id| {
2260               group
2261                  .file_ids
2262                  .iter()
2263                  .any(|candidate_id| candidate_id == file_id)
2264            })
2265         })
2266         .min_by_key(|candidate| {
2267            group_rank
2268               .get(candidate.group_id.as_str())
2269               .copied()
2270               .unwrap_or(usize::MAX)
2271         })
2272         .map(|candidate| candidate.group_id.clone());
2273
2274      if let Some(redirect) = redirect {
2275         redirects.insert(group.group_id.clone(), redirect);
2276      }
2277   }
2278
2279   redirects
2280}
2281
2282fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2283   let mut current = group_id.to_string();
2284   let mut seen = HashSet::new();
2285
2286   while let Some(next) = redirects.get(&current) {
2287      if !seen.insert(current.clone()) {
2288         break;
2289      }
2290      current.clone_from(next);
2291   }
2292
2293   current
2294}
2295
2296fn prune_empty_groups(
2297   groups: Vec<ComposeExecutableGroup>,
2298   redirects: &HashMap<String, String>,
2299) -> Result<ComposeExecutablePlan> {
2300   let surviving_ids: HashSet<String> = groups
2301      .iter()
2302      .filter(|group| !group.hunk_ids.is_empty())
2303      .map(|group| group.group_id.clone())
2304      .collect();
2305
2306   let mut surviving_groups = Vec::new();
2307   for mut group in groups {
2308      if group.hunk_ids.is_empty() {
2309         continue;
2310      }
2311
2312      let mut rewritten_dependencies = Vec::new();
2313      for dependency in &group.dependencies {
2314         let rewritten = resolve_redirect(dependency, redirects);
2315         if rewritten != group.group_id
2316            && surviving_ids.contains(&rewritten)
2317            && !rewritten_dependencies
2318               .iter()
2319               .any(|existing| existing == &rewritten)
2320         {
2321            rewritten_dependencies.push(rewritten);
2322         }
2323      }
2324
2325      group.dependencies = rewritten_dependencies;
2326      surviving_groups.push(group);
2327   }
2328
2329   let dependency_order = compute_dependency_order(
2330      &surviving_groups,
2331      |group| &group.group_id,
2332      |group| &group.dependencies,
2333   )?;
2334   Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2335}
2336
2337fn finalize_executable_plan(
2338   snapshot: &ComposeSnapshot,
2339   intent_plan: &ComposeIntentPlan,
2340   assigned_by_group: HashMap<String, BTreeSet<String>>,
2341) -> Result<ComposeExecutablePlan> {
2342   let group_rank: HashMap<&str, usize> = intent_plan
2343      .dependency_order
2344      .iter()
2345      .enumerate()
2346      .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2347      .collect();
2348
2349   let mut executable_groups = Vec::new();
2350   for group in &intent_plan.groups {
2351      let hunk_ids: Vec<String> = snapshot
2352         .hunks
2353         .iter()
2354         .filter(|hunk| {
2355            assigned_by_group
2356               .get(&group.group_id)
2357               .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2358         })
2359         .map(|hunk| hunk.hunk_id.clone())
2360         .collect();
2361
2362      let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2363      let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2364      executable_groups.push(ComposeExecutableGroup {
2365         group_id: group.group_id.clone(),
2366         commit_type,
2367         scope: group.scope.clone(),
2368         file_ids,
2369         rationale: group.rationale.clone(),
2370         dependencies: group.dependencies.clone(),
2371         hunk_ids,
2372      });
2373   }
2374
2375   let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2376   prune_empty_groups(executable_groups, &redirects)
2377}
2378
2379fn validate_executable_plan(
2380   snapshot: &ComposeSnapshot,
2381   plan: &ComposeExecutablePlan,
2382) -> Result<()> {
2383   if plan.groups.is_empty() {
2384      return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2385   }
2386
2387   let known_hunks: HashSet<&str> = snapshot
2388      .hunks
2389      .iter()
2390      .map(|hunk| hunk.hunk_id.as_str())
2391      .collect();
2392   let known_files: HashSet<&str> = snapshot
2393      .files
2394      .iter()
2395      .map(|file| file.file_id.as_str())
2396      .collect();
2397   let mut coverage = HashMap::<String, String>::new();
2398
2399   for group in &plan.groups {
2400      if group.hunk_ids.is_empty() {
2401         return Err(CommitGenError::Other(format!(
2402            "Compose group {} ended up empty after binding",
2403            group.group_id
2404         )));
2405      }
2406
2407      for file_id in &group.file_ids {
2408         if !known_files.contains(file_id.as_str()) {
2409            return Err(CommitGenError::Other(format!(
2410               "Compose group {} references unknown file_id {}",
2411               group.group_id, file_id
2412            )));
2413         }
2414      }
2415
2416      for hunk_id in &group.hunk_ids {
2417         if !known_hunks.contains(hunk_id.as_str()) {
2418            return Err(CommitGenError::Other(format!(
2419               "Compose group {} references unknown hunk_id {}",
2420               group.group_id, hunk_id
2421            )));
2422         }
2423
2424         if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2425            return Err(CommitGenError::Other(format!(
2426               "Hunk {} was assigned to both {} and {}",
2427               hunk_id, existing_group, group.group_id
2428            )));
2429         }
2430      }
2431   }
2432
2433   let missing_hunks: Vec<String> = snapshot
2434      .hunks
2435      .iter()
2436      .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2437      .map(|hunk| hunk.hunk_id.clone())
2438      .collect();
2439   if !missing_hunks.is_empty() {
2440      return Err(CommitGenError::Other(format!(
2441         "Compose plan left hunks unassigned: {}",
2442         missing_hunks.join(", ")
2443      )));
2444   }
2445
2446   let dependency_order =
2447      compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2448   if dependency_order != plan.dependency_order {
2449      return Err(CommitGenError::Other(
2450         "Compose dependency order does not match recomputed order".to_string(),
2451      ));
2452   }
2453
2454   Ok(())
2455}
2456
2457#[tracing::instrument(target = "lgit", name = "compose.bind_plan", skip_all, fields(file_count = snapshot.files.len(), group_count = intent_plan.groups.len()))]
2458async fn bind_compose_plan(
2459   snapshot: &ComposeSnapshot,
2460   intent_plan: &ComposeIntentPlan,
2461   config: &CommitConfig,
2462   debug_dir: Option<&Path>,
2463) -> Result<ComposeExecutablePlan> {
2464   let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2465
2466   if !ambiguous_files.is_empty() {
2467      let valid_group_ids: HashSet<&str> = intent_plan
2468         .groups
2469         .iter()
2470         .map(|group| group.group_id.as_str())
2471         .collect();
2472      let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2473      let mut unresolved = Vec::new();
2474
2475      for (batch_idx, batch) in binding_batches.iter().enumerate() {
2476         let hunk_context = ambiguous_hunk_context(batch);
2477         let debug_name = if binding_batches.len() == 1 {
2478            "compose_bind".to_string()
2479         } else {
2480            format!("compose_bind_{:02}", batch_idx + 1)
2481         };
2482         let assignments =
2483            request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2484               .await?;
2485         let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2486         for (group_id, hunk_ids) in evaluation.assigned {
2487            let entry = assigned_by_group.entry(group_id).or_default();
2488            for hunk_id in hunk_ids {
2489               entry.insert(hunk_id);
2490            }
2491         }
2492         unresolved.extend(evaluation.unresolved);
2493      }
2494
2495      let group_rank: HashMap<&str, usize> = intent_plan
2496         .dependency_order
2497         .iter()
2498         .enumerate()
2499         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2500         .collect();
2501
2502      let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2503      if !unresolved.is_empty() {
2504         let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2505         let repair_batches = chunk_ambiguous_files(&unresolved_files);
2506         let mut repair_unresolved = Vec::new();
2507
2508         for (batch_idx, batch) in repair_batches.iter().enumerate() {
2509            let debug_name = if repair_batches.len() == 1 {
2510               "compose_bind_repair".to_string()
2511            } else {
2512               format!("compose_bind_repair_{:02}", batch_idx + 1)
2513            };
2514            let repair_assignments = request_binding(
2515               snapshot,
2516               &intent_plan.groups,
2517               batch,
2518               config,
2519               debug_dir,
2520               &debug_name,
2521            )
2522            .await?;
2523            let repair_context = ambiguous_hunk_context(batch);
2524            let repair =
2525               evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2526            for (group_id, hunk_ids) in repair.assigned {
2527               let entry = assigned_by_group.entry(group_id).or_default();
2528               for hunk_id in hunk_ids {
2529                  entry.insert(hunk_id);
2530               }
2531            }
2532
2533            repair_unresolved.extend(repair.unresolved);
2534         }
2535         unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2536
2537         if !unresolved.is_empty() {
2538            assign_unresolved_hunks(
2539               &unresolved,
2540               &mut assigned_by_group,
2541               &ambiguous_files,
2542               &group_rank,
2543            );
2544         }
2545      }
2546   }
2547
2548   let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2549   validate_executable_plan(snapshot, &plan)?;
2550   Ok(plan)
2551}
2552
2553fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2554   println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2555   for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2556      let group = &plan.groups[group_idx];
2557      let scope = group
2558         .scope
2559         .as_ref()
2560         .map(|scope| format!("({})", style::scope(scope.as_str())))
2561         .unwrap_or_default();
2562
2563      println!(
2564         "\n{}. {} [{}{}] {}",
2565         display_idx + 1,
2566         style::bold(&group.group_id),
2567         style::commit_type(group.commit_type.as_str()),
2568         scope,
2569         group.rationale
2570      );
2571
2572      println!("   Files:");
2573      for file_id in &group.file_ids {
2574         if let Some(file) = snapshot.file_by_id(file_id) {
2575            let selected_hunk_ids: Vec<&str> = group
2576               .hunk_ids
2577               .iter()
2578               .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2579               .map(String::as_str)
2580               .collect();
2581            let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2582               "all hunks".to_string()
2583            } else {
2584               selected_hunk_ids.join(", ")
2585            };
2586            println!("     - {} {} ({selection})", file.file_id, file.path);
2587         }
2588      }
2589
2590      if !group.dependencies.is_empty() {
2591         println!("   Depends on: {}", group.dependencies.join(", "));
2592      }
2593   }
2594}
2595
2596#[tracing::instrument(target = "lgit", name = "compose.generate_group_analysis", skip_all, fields(group_id = %group.group_id, diff_bytes = diff.len(), stat_bytes = stat.len()))]
2597async fn generate_compose_group_analysis(
2598   stat: &str,
2599   diff: &str,
2600   group: &ComposeExecutableGroup,
2601   config: &CommitConfig,
2602   args: &Args,
2603   debug_prefix: &str,
2604   counter: &TokenCounter,
2605) -> Result<ConventionalAnalysis> {
2606   match compose_analysis_strategy(diff, config, counter) {
2607      ComposeAnalysisStrategy::MapReduce => {
2608         println!(
2609            "  {}",
2610            style::info(&format!(
2611               "Using map-reduce for {} commit analysis (diff exceeds token budget)",
2612               group.group_id
2613            ))
2614         );
2615         run_map_reduce(diff, stat, "", &config.analysis_model, config, counter).await
2616      },
2617      strategy => {
2618         let analysis_diff = if strategy == ComposeAnalysisStrategy::SmartTruncate {
2619            eprintln!(
2620               "  {}",
2621               style::warning(&format!(
2622                  "Truncating diff for {} commit analysis (diff exceeds configured budget)",
2623                  group.group_id
2624               ))
2625            );
2626            Cow::Owned(smart_truncate_diff(
2627               diff,
2628               compose_truncation_length(config),
2629               config,
2630               counter,
2631            ))
2632         } else {
2633            Cow::Borrowed(diff)
2634         };
2635
2636         let ctx = AnalysisContext {
2637            user_context:    Some(&group.rationale),
2638            recent_commits:  None,
2639            common_scopes:   None,
2640            project_context: None,
2641            debug_output:    args.debug_output.as_deref(),
2642            debug_prefix:    Some(debug_prefix),
2643         };
2644
2645         generate_conventional_analysis(
2646            stat,
2647            analysis_diff.as_ref(),
2648            &config.analysis_model,
2649            "",
2650            &ctx,
2651            config,
2652         )
2653         .await
2654      },
2655   }
2656}
2657
2658fn compose_group_file_list(snapshot: &ComposeSnapshot, group: &ComposeExecutableGroup) -> String {
2659   let files: Vec<&str> = group
2660      .file_ids
2661      .iter()
2662      .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.as_str()))
2663      .collect();
2664
2665   if files.is_empty() {
2666      "no files resolved".to_string()
2667   } else {
2668      files.join(", ")
2669   }
2670}
2671
2672/// Hunk ids for `file_id` planned by every group up to and including the group
2673/// at `position` in the dependency order. Used to reconstruct a file's intended
2674/// index content at a given commit from its base, independent of apply order.
2675fn cumulative_file_hunk_ids(
2676   plan: &ComposeExecutablePlan,
2677   position: usize,
2678   snapshot: &ComposeSnapshot,
2679   file_id: &str,
2680) -> Vec<String> {
2681   let mut hunk_ids = Vec::new();
2682   for &group_idx in plan.dependency_order.iter().take(position + 1) {
2683      let Some(group) = plan.groups.get(group_idx) else {
2684         continue;
2685      };
2686      for hunk_id in &group.hunk_ids {
2687         if snapshot
2688            .hunk_by_id(hunk_id)
2689            .is_some_and(|hunk| hunk.file_id == file_id)
2690         {
2691            hunk_ids.push(hunk_id.clone());
2692         }
2693      }
2694   }
2695   hunk_ids
2696}
2697
2698#[tracing::instrument(target = "lgit", name = "compose.execute", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2699pub async fn execute_compose(
2700   snapshot: &ComposeSnapshot,
2701   plan: &ComposeExecutablePlan,
2702   config: &CommitConfig,
2703   args: &Args,
2704   base_state: &ComposeBaseState,
2705) -> Result<Vec<String>> {
2706   let total = plan.dependency_order.len();
2707
2708   // Phase 1: derive each group's diff/stat from the immutable compose snapshot.
2709   // This avoids mutating the index while commit messages are prepared and keeps
2710   // later worktree edits out of already-planned commits.
2711   let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2712   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2713      let group = &plan.groups[group_idx];
2714      println!(
2715         "  {}",
2716         style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total))
2717      );
2718      let group_patch = create_executable_group_patch(snapshot, group)?;
2719      group_diff_stats.push((group_patch.diff, group_patch.stat));
2720   }
2721
2722   // Phase 2: generate commit messages concurrently. Both LLM calls per group
2723   // (analysis + summary) run inside a single async task so the slower of the
2724   // two does not block other groups from progressing.
2725   println!(
2726      "{}",
2727      style::info(&format!(
2728         "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2729         COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2730      ))
2731   );
2732
2733   let token_counter = create_token_counter(config);
2734   let prepared_messages: Vec<(Vec<String>, CommitSummary)> =
2735      stream::iter(plan.dependency_order.iter().enumerate())
2736         .map(|(idx, &group_idx)| {
2737            let group = &plan.groups[group_idx];
2738            let (diff, stat) = &group_diff_stats[idx];
2739            let debug_prefix = format!("compose-{}", idx + 1);
2740            let token_counter = &token_counter;
2741            async move {
2742               let result = async {
2743                  let analysis = generate_compose_group_analysis(
2744                     stat,
2745                     diff,
2746                     group,
2747                     config,
2748                     args,
2749                     &debug_prefix,
2750                     token_counter,
2751                  )
2752                  .await?;
2753                  let body = analysis.body_texts();
2754                  let summary = generate_summary_from_analysis(
2755                     stat,
2756                     group.commit_type.as_str(),
2757                     group.scope.as_ref().map(|scope| scope.as_str()),
2758                     &body,
2759                     Some(&group.rationale),
2760                     config,
2761                     args.debug_output.as_deref(),
2762                     Some(&debug_prefix),
2763                  )
2764                  .await?;
2765                  Ok::<_, CommitGenError>((body, summary))
2766               }
2767               .await;
2768
2769               result.map_err(|source| CommitGenError::ComposeMessageError {
2770                  group_id: group.group_id.clone(),
2771                  files:    compose_group_file_list(snapshot, group),
2772                  source:   Box::new(source),
2773               })
2774            }
2775         })
2776         .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2777         .collect::<Vec<_>>()
2778         .await
2779         .into_iter()
2780         .collect::<Result<Vec<_>>>()?;
2781
2782   execute_compose_with_prepared_messages(
2783      snapshot,
2784      plan,
2785      config,
2786      args,
2787      base_state,
2788      prepared_messages,
2789   )
2790}
2791
2792#[tracing::instrument(target = "lgit", name = "compose.execute_prepared_messages", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2793fn execute_compose_with_prepared_messages(
2794   snapshot: &ComposeSnapshot,
2795   plan: &ComposeExecutablePlan,
2796   config: &CommitConfig,
2797   args: &Args,
2798   base_state: &ComposeBaseState,
2799   prepared_messages: Vec<(Vec<String>, CommitSummary)>,
2800) -> Result<Vec<String>> {
2801   let dir = &args.dir;
2802   let total = plan.dependency_order.len();
2803   if args.compose_preview {
2804      return Ok(Vec::new());
2805   }
2806
2807   let index = TempGitIndex::new(dir)?;
2808   read_tree_into_index(index.path(), &base_state.head_hash, dir)?;
2809
2810   let mut commit_hashes = Vec::new();
2811   let mut parent_hash = base_state.head_hash.clone();
2812
2813   // Phase 3: sequential commit-object loop. Re-stage each group into an
2814   // isolated temporary index, then create commit objects parented in memory.
2815   // The real branch and index are not updated until every group succeeds.
2816   for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2817      let group = &plan.groups[group_idx];
2818
2819      println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2820      println!("  Type: {}", style::commit_type(group.commit_type.as_str()));
2821      if let Some(scope) = &group.scope {
2822         println!("  Scope: {}", style::scope(scope.as_str()));
2823      }
2824      let paths: Vec<String> = group
2825         .file_ids
2826         .iter()
2827         .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2828         .collect();
2829      println!("  Files: {}", paths.join(", "));
2830
2831      let outcome = stage_executable_group_in_index(snapshot, group, dir, index.path())?;
2832      let mut staged_anything = outcome.result == StageResult::Staged;
2833
2834      // Any file whose planned patch no longer applies against the temporary
2835      // index is reconstructed from the immutable snapshot base and cumulative
2836      // hunk selection. The real index and worktree are never touched here.
2837      for skipped in &outcome.skipped {
2838         let Some(file) = snapshot.file_by_path(&skipped.path) else {
2839            continue;
2840         };
2841         let cumulative = cumulative_file_hunk_ids(plan, idx, snapshot, &file.file_id);
2842         force_stage_file_from_base_in_index(
2843            snapshot,
2844            &file.file_id,
2845            &cumulative,
2846            dir,
2847            index.path(),
2848         )?;
2849         staged_anything = true;
2850         eprintln!(
2851            "  {}",
2852            style::info(&format!(
2853               "Re-staged {} from base via splice (whole-file apply not used for partial hunks)",
2854               skipped.path
2855            ))
2856         );
2857      }
2858
2859      if !staged_anything {
2860         eprintln!(
2861            "  {}",
2862            style::warning(&format!(
2863               "Skipping commit {}: its planned patch is already applied ({:?})",
2864               group.group_id, outcome.result
2865            ))
2866         );
2867         continue;
2868      }
2869
2870      let (analysis_body, summary) = prepared_messages[idx].clone();
2871      let mut commit = ConventionalCommit {
2872         commit_type: group.commit_type.clone(),
2873         scope: group.scope.clone(),
2874         summary,
2875         body: analysis_body,
2876         footers: vec![],
2877      };
2878      post_process_commit_message(&mut commit, config);
2879
2880      if let Err(err) = validate_commit_message(&commit, config) {
2881         eprintln!(
2882            "  {}",
2883            style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2884         );
2885      }
2886
2887      let mut formatted_message = format_commit_message(&commit);
2888      if args.signoff || config.signoff {
2889         formatted_message = append_signoff_trailer(&formatted_message, dir)?;
2890      }
2891      println!(
2892         "  Message:\n{}",
2893         formatted_message
2894            .lines()
2895            .take(3)
2896            .collect::<Vec<_>>()
2897            .join("\n")
2898      );
2899
2900      let tree = write_index_tree(index.path(), dir)?;
2901      let sign = args.sign || config.gpg_sign;
2902      let hash = commit_tree(&tree, &[parent_hash.as_str()], &formatted_message, dir, sign)?;
2903      parent_hash.clone_from(&hash);
2904      commit_hashes.push(hash);
2905
2906      if args.compose_test_after_each {
2907         return Err(CommitGenError::Other(
2908            "--compose-test-after-each is incompatible with isolated compose execution".to_string(),
2909         ));
2910      }
2911   }
2912
2913   if commit_hashes.is_empty() {
2914      return Ok(commit_hashes);
2915   }
2916
2917   update_ref_checked(&base_state.head_ref, &parent_hash, &base_state.head_hash, dir)?;
2918
2919   let current_index_tree = write_real_index_tree(dir)?;
2920   if current_index_tree == base_state.index_tree {
2921      reset_mixed_to(&parent_hash, dir)?;
2922   } else {
2923      // Someone staged while compose ran. The commits contain only pinned
2924      // snapshot content, so just refresh the index entries for the paths
2925      // compose committed and leave the drifted staging intact.
2926      println!(
2927         "{}",
2928         style::warning("Index changed during compose; preserving newly staged changes")
2929      );
2930      let paths: Vec<String> = snapshot.files.iter().map(|file| file.path.clone()).collect();
2931      reset_paths_to(&parent_hash, &paths, dir)?;
2932   }
2933
2934   Ok(commit_hashes)
2935}
2936
2937#[tracing::instrument(target = "lgit", name = "compose.run", skip_all, fields(dir = %args.dir, max_rounds = config.compose_max_rounds))]
2938pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2939   let max_rounds = config.compose_max_rounds;
2940
2941   for round in 1..=max_rounds {
2942      if round > 1 {
2943         println!(
2944            "\n{}",
2945            style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2946         );
2947      } else {
2948         println!("{}", style::section_header("Compose Mode", 80));
2949      }
2950      println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2951
2952      run_compose_round(args, config, round).await?;
2953
2954      if args.compose_preview {
2955         break;
2956      }
2957
2958      match get_compose_diff(&args.dir) {
2959         Err(CommitGenError::NoChanges { .. }) => {
2960            println!(
2961               "\n{}",
2962               style::success(&format!(
2963                  "{} All changes committed successfully",
2964                  style::icons::SUCCESS
2965               ))
2966            );
2967            break;
2968         },
2969         Err(err) => return Err(err),
2970         Ok(remaining_diff) => {
2971            eprintln!(
2972               "\n{}",
2973               style::warning(&format!(
2974                  "{} Uncommitted changes remain after round {round}",
2975                  style::icons::WARNING
2976               ))
2977            );
2978            eprintln!("{remaining_diff}");
2979         },
2980      }
2981
2982      if round < max_rounds {
2983         eprintln!("{}", style::info("Starting another compose round..."));
2984      } else {
2985         eprintln!(
2986            "{}",
2987            style::warning(&format!(
2988               "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
2989            ))
2990         );
2991      }
2992   }
2993
2994   Ok(())
2995}
2996
2997#[tracing::instrument(target = "lgit", name = "compose.round", skip_all, fields(dir = %args.dir, round))]
2998async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
2999   let base_state = capture_compose_base_state(&args.dir)?;
3000   let diff = get_compose_diff(&args.dir)?;
3001   let stat = get_compose_stat(&args.dir)?;
3002   let mut snapshot = build_compose_snapshot(&diff, &stat)?;
3003   // Freeze every file's on-disk content into the odb before any LLM call:
3004   // staging later reads these pins, never the live worktree, so edits made
3005   // while compose runs cannot leak into its commits.
3006   pin_snapshot_worktree_state(&mut snapshot, &args.dir)?;
3007   let snapshot = snapshot;
3008
3009   if let Some(debug_dir) = args.debug_output.as_deref() {
3010      save_debug_artifact(
3011         Some(debug_dir),
3012         &format!("compose_round_{round}_snapshot.json"),
3013         &snapshot,
3014      )?;
3015   }
3016
3017   let token_counter = create_token_counter(config);
3018   let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
3019      println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
3020      observe_diff_files(&snapshot.diff, &config.summary_model, config, &token_counter).await?
3021   } else {
3022      if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
3023         && should_use_map_reduce(&snapshot.diff, config, &token_counter)
3024      {
3025         println!(
3026            "{}",
3027            style::info(
3028               "Skipping per-file observations for very large compose snapshot; using area-level \
3029                planning instead."
3030            )
3031         );
3032      }
3033      Vec::new()
3034   };
3035
3036   if let Some(debug_dir) = args.debug_output.as_deref()
3037      && !observations.is_empty()
3038   {
3039      save_debug_artifact(
3040         Some(debug_dir),
3041         &format!("compose_round_{round}_observations.json"),
3042         &observations,
3043      )?;
3044   }
3045
3046   let max_commits = args.compose_max_commits.unwrap_or(20);
3047   let executable_plan = if let Some(cached_plan) =
3048      load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
3049   {
3050      println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
3051      cached_plan
3052   } else {
3053      println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
3054      let intent_plan = analyze_compose_intent(
3055         &snapshot,
3056         &observations,
3057         config,
3058         max_commits,
3059         args.debug_output.as_deref(),
3060      )
3061      .await?;
3062
3063      if let Some(debug_dir) = args.debug_output.as_deref() {
3064         save_debug_artifact(
3065            Some(debug_dir),
3066            &format!("compose_round_{round}_intent_plan.json"),
3067            &intent_plan,
3068         )?;
3069      }
3070
3071      println!("{}", style::info("Binding hunks to groups..."));
3072      let plan =
3073         bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
3074      save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
3075      plan
3076   };
3077
3078   if let Some(debug_dir) = args.debug_output.as_deref() {
3079      save_debug_artifact(
3080         Some(debug_dir),
3081         &format!("compose_round_{round}_executable_plan.json"),
3082         &executable_plan,
3083      )?;
3084   }
3085
3086   print_executable_plan(&snapshot, &executable_plan);
3087
3088   if args.compose_preview {
3089      println!(
3090         "\n{}",
3091         style::success(&format!(
3092            "{} Preview complete (use --compose without --compose-preview to execute)",
3093            style::icons::SUCCESS
3094         ))
3095      );
3096      return Ok(());
3097   }
3098
3099   println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
3100   let hashes = execute_compose(&snapshot, &executable_plan, config, args, &base_state).await?;
3101   println!(
3102      "{}",
3103      style::success(&format!(
3104         "{} Round {round}: Created {} commit(s)",
3105         style::icons::SUCCESS,
3106         hashes.len()
3107      ))
3108   );
3109   Ok(())
3110}
3111
3112#[cfg(test)]
3113mod tests {
3114   use std::{fmt::Write, fs};
3115
3116   use tempfile::TempDir;
3117
3118   use super::*;
3119   use crate::{config::CommitConfig, patch::build_compose_snapshot, types::CommitType};
3120
3121   fn shared_file_diff() -> (&'static str, &'static str) {
3122      (
3123         r#"diff --git a/src/lib.rs b/src/lib.rs
3124index 1111111..2222222 100644
3125--- a/src/lib.rs
3126+++ b/src/lib.rs
3127@@ -1,3 +1,3 @@
3128-fn alpha() {
3129+fn alpha_changed() {
3130     println!("alpha");
3131 }
3132@@ -12,3 +12,3 @@
3133-fn beta() {
3134+fn beta_changed() {
3135     println!("beta");
3136 }
3137diff --git a/tests/lib.rs b/tests/lib.rs
3138index 3333333..4444444 100644
3139--- a/tests/lib.rs
3140+++ b/tests/lib.rs
3141@@ -1,3 +1,4 @@
3142 fn test_it() {
3143+    assert!(true);
3144 }
3145"#,
3146         " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
3147      )
3148   }
3149
3150   fn build_test_snapshot() -> ComposeSnapshot {
3151      let (diff, stat) = shared_file_diff();
3152      build_compose_snapshot(diff, stat).unwrap()
3153   }
3154
3155   fn write_file(dir: &TempDir, path: &str, contents: &str) {
3156      let full_path = dir.path().join(path);
3157      if let Some(parent) = full_path.parent() {
3158         fs::create_dir_all(parent).unwrap();
3159      }
3160      fs::write(full_path, contents).unwrap();
3161   }
3162
3163   fn run_git(dir: &TempDir, args: &[&str]) -> String {
3164      let output = crate::git::git_command()
3165         .args(args)
3166         .current_dir(dir.path())
3167         .output()
3168         .unwrap_or_else(|err| panic!("git {args:?} failed to spawn: {err}"));
3169
3170      assert!(
3171         output.status.success(),
3172         "git {:?} failed: stdout={} stderr={}",
3173         args,
3174         String::from_utf8_lossy(&output.stdout),
3175         String::from_utf8_lossy(&output.stderr)
3176      );
3177
3178      String::from_utf8_lossy(&output.stdout).to_string()
3179   }
3180
3181   fn init_repo() -> TempDir {
3182      let dir = TempDir::new().unwrap();
3183      run_git(&dir, &["init"]);
3184      run_git(&dir, &["config", "user.name", "Compose Test"]);
3185      run_git(&dir, &["config", "user.email", "compose@test.local"]);
3186      run_git(&dir, &["config", "commit.gpgsign", "false"]);
3187      dir
3188   }
3189
3190   fn commit_all(dir: &TempDir, message: &str) {
3191      run_git(dir, &["add", "."]);
3192      run_git(dir, &["commit", "-m", message]);
3193   }
3194
3195   fn canned_message(summary: &str) -> (Vec<String>, CommitSummary) {
3196      (vec![], CommitSummary::new_unchecked(summary, 128).unwrap())
3197   }
3198
3199   #[test]
3200   fn test_compose_file_category_treats_prompts_as_functional_source() {
3201      let diff = r"diff --git a/prompts/analysis/default.md b/prompts/analysis/default.md
3202index 1111111..2222222 100644
3203--- a/prompts/analysis/default.md
3204+++ b/prompts/analysis/default.md
3205@@ -1,1 +1,1 @@
3206-old prompt
3207+new prompt
3208diff --git a/system/analysis/default.md b/system/analysis/default.md
3209index 5555555..6666666 100644
3210--- a/system/analysis/default.md
3211+++ b/system/analysis/default.md
3212@@ -1,1 +1,1 @@
3213-old system
3214+new system
3215diff --git a/README.md b/README.md
3216index 3333333..4444444 100644
3217--- a/README.md
3218+++ b/README.md
3219@@ -1,1 +1,1 @@
3220-old docs
3221+new docs
3222";
3223      let snapshot = build_compose_snapshot(diff, "").unwrap();
3224      let prompt_file = snapshot
3225         .file_by_path("prompts/analysis/default.md")
3226         .unwrap();
3227      let system_file = snapshot.file_by_path("system/analysis/default.md").unwrap();
3228      let readme_file = snapshot.file_by_path("README.md").unwrap();
3229
3230      assert_eq!(compose_file_category(prompt_file), ComposeFileCategory::Prompt);
3231      assert_eq!(compose_file_category(system_file), ComposeFileCategory::Prompt);
3232      assert_eq!(compose_file_category(readme_file), ComposeFileCategory::Docs);
3233
3234      let feat_group = ComposeIntentGroup {
3235         group_id:     "G1".to_string(),
3236         commit_type:  CommitType::new("feat").unwrap(),
3237         scope:        None,
3238         file_ids:     vec![prompt_file.file_id.clone()],
3239         rationale:    "prompt behavior change".to_string(),
3240         dependencies: vec![],
3241      };
3242      assert_eq!(group_type_bonus(prompt_file, &feat_group), 10);
3243
3244      let fallback_type =
3245         fallback_commit_type_for_group(&snapshot, &[], std::slice::from_ref(&prompt_file.file_id))
3246            .unwrap();
3247      assert_eq!(fallback_type.as_str(), "refactor");
3248   }
3249
3250   fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
3251      let mut diff = String::new();
3252
3253      for file_idx in 0..file_count {
3254         let path = format!("src/module_{file_idx:03}.rs");
3255         writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3256         diff.push_str("index 1111111..2222222 100644\n");
3257         writeln!(diff, "--- a/{path}").unwrap();
3258         writeln!(diff, "+++ b/{path}").unwrap();
3259
3260         for hunk_idx in 0..hunks_per_file {
3261            let line_no = (hunk_idx * 4) + 1;
3262            writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
3263            writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
3264            writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
3265         }
3266      }
3267
3268      build_compose_snapshot(&diff, "").unwrap()
3269   }
3270
3271   fn build_multi_area_snapshot() -> ComposeSnapshot {
3272      let mut diff = String::new();
3273      let areas = [
3274         ("apps/frontend/src/server", 72),
3275         ("packages/model/src/models", 54),
3276         ("apps/daemon/src/worker", 43),
3277         (".github/workflows", 16),
3278      ];
3279
3280      for (prefix, count) in areas {
3281         for file_idx in 0..count {
3282            let path = format!("{prefix}/file_{file_idx:03}.rs");
3283            writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3284            diff.push_str("index 1111111..2222222 100644\n");
3285            writeln!(diff, "--- a/{path}").unwrap();
3286            writeln!(diff, "+++ b/{path}").unwrap();
3287            diff.push_str("@@ -1,1 +1,1 @@\n");
3288            writeln!(diff, "-old_{file_idx}").unwrap();
3289            writeln!(diff, "+new_{file_idx}").unwrap();
3290         }
3291      }
3292
3293      build_compose_snapshot(&diff, "").unwrap()
3294   }
3295
3296   fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
3297      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3298      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3299      let groups = vec![
3300         ComposeIntentGroup {
3301            group_id:     "G1".to_string(),
3302            commit_type:  CommitType::new("refactor").unwrap(),
3303            scope:        None,
3304            file_ids:     vec![source_file.file_id.clone(), test_file.file_id.clone()],
3305            rationale:    "implementation group".to_string(),
3306            dependencies: vec![],
3307         },
3308         ComposeIntentGroup {
3309            group_id:     "G2".to_string(),
3310            commit_type:  CommitType::new("refactor").unwrap(),
3311            scope:        None,
3312            file_ids:     vec![source_file.file_id.clone()],
3313            rationale:    "shared file follow-up".to_string(),
3314            dependencies: vec!["G1".to_string()],
3315         },
3316      ];
3317      let dependency_order =
3318         compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
3319            .unwrap();
3320      ComposeIntentPlan { groups, dependency_order }
3321   }
3322
3323   #[test]
3324   fn test_execute_compose_with_temp_index_applies_two_group_plan() {
3325      let dir = init_repo();
3326      write_file(&dir, "src/a.rs", "fn a() {}\n");
3327      write_file(&dir, "src/b.rs", "fn b() {}\n");
3328      commit_all(&dir, "initial");
3329      write_file(&dir, "src/a.rs", "fn a_changed() {}\n");
3330      write_file(&dir, "src/b.rs", "fn b_changed() {}\n");
3331
3332      let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3333      let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3334      let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3335      let a_file = snapshot.file_by_path("src/a.rs").unwrap();
3336      let b_file = snapshot.file_by_path("src/b.rs").unwrap();
3337      let plan = ComposeExecutablePlan {
3338         groups:           vec![
3339            ComposeExecutableGroup {
3340               group_id:     "G1".to_string(),
3341               commit_type:  CommitType::new("refactor").unwrap(),
3342               scope:        None,
3343               file_ids:     vec![a_file.file_id.clone()],
3344               rationale:    "change a".to_string(),
3345               dependencies: vec![],
3346               hunk_ids:     a_file.hunk_ids.clone(),
3347            },
3348            ComposeExecutableGroup {
3349               group_id:     "G2".to_string(),
3350               commit_type:  CommitType::new("refactor").unwrap(),
3351               scope:        None,
3352               file_ids:     vec![b_file.file_id.clone()],
3353               rationale:    "change b".to_string(),
3354               dependencies: vec!["G1".to_string()],
3355               hunk_ids:     b_file.hunk_ids.clone(),
3356            },
3357         ],
3358         dependency_order: vec![0, 1],
3359      };
3360      let config = CommitConfig::default();
3361      let args = Args {
3362         dir: dir.path().to_string_lossy().to_string(),
3363         compose: true,
3364         ..Default::default()
3365      };
3366      let base_state = capture_compose_base_state(&args.dir).unwrap();
3367
3368      let hashes = execute_compose_with_prepared_messages(
3369         &snapshot,
3370         &plan,
3371         &config,
3372         &args,
3373         &base_state,
3374         vec![canned_message("change a"), canned_message("change b")],
3375      )
3376      .unwrap();
3377
3378      assert_eq!(hashes.len(), 2);
3379      assert_eq!(get_head_hash(&args.dir).unwrap(), hashes[1]);
3380      assert!(run_git(&dir, &["diff", "--cached"]).trim().is_empty());
3381   }
3382
3383   #[test]
3384   fn test_execute_compose_failure_before_update_ref_preserves_real_index() {
3385      let dir = init_repo();
3386      write_file(&dir, "src/lib.rs", "old\n");
3387      write_file(&dir, "sentinel.txt", "base\n");
3388      commit_all(&dir, "initial");
3389      let initial_head = get_head_hash(dir.path().to_str().unwrap()).unwrap();
3390
3391      // A real change so the snapshot is valid.
3392      write_file(&dir, "src/lib.rs", "changed\n");
3393
3394      // A pre-existing staged change that MUST survive a failed compose run.
3395      write_file(&dir, "sentinel.txt", "base\nstaged sentinel\n");
3396      run_git(&dir, &["add", "sentinel.txt"]);
3397      let staged_before = run_git(&dir, &["diff", "--cached"]);
3398      assert!(staged_before.contains("staged sentinel"));
3399
3400      let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3401      let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3402      let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3403      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3404      // The plan references a hunk id that does not exist, so staging fails
3405      // before any commit object is created or any ref is updated.
3406      let plan = ComposeExecutablePlan {
3407         groups:           vec![ComposeExecutableGroup {
3408            group_id:     "G1".to_string(),
3409            commit_type:  CommitType::new("fix").unwrap(),
3410            scope:        None,
3411            file_ids:     vec![source_file.file_id.clone()],
3412            rationale:    "unstageable group".to_string(),
3413            dependencies: vec![],
3414            hunk_ids:     vec!["F999-H001".to_string()],
3415         }],
3416         dependency_order: vec![0],
3417      };
3418      let config = CommitConfig::default();
3419      let args = Args {
3420         dir: dir.path().to_string_lossy().to_string(),
3421         compose: true,
3422         ..Default::default()
3423      };
3424      let base_state = capture_compose_base_state(&args.dir).unwrap();
3425
3426      let err = execute_compose_with_prepared_messages(
3427         &snapshot,
3428         &plan,
3429         &config,
3430         &args,
3431         &base_state,
3432         vec![canned_message("unstageable group")],
3433      )
3434      .unwrap_err();
3435
3436      assert!(err.to_string().contains("unknown hunk id"));
3437      assert_eq!(get_head_hash(&args.dir).unwrap(), initial_head);
3438      assert_eq!(run_git(&dir, &["diff", "--cached"]), staged_before);
3439   }
3440
3441   #[test]
3442   fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
3443      let snapshot = build_test_snapshot();
3444      let intent_plan = build_shared_intent_plan(&snapshot);
3445      let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3446
3447      assert_eq!(ambiguous.len(), 1);
3448      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3449      let assigned_to_g1 = assigned.get("G1").unwrap();
3450      assert!(
3451         test_file
3452            .hunk_ids
3453            .iter()
3454            .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
3455         "uniquely owned file should be auto-assigned"
3456      );
3457   }
3458
3459   #[test]
3460   fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3461      let snapshot = build_test_snapshot();
3462      let intent_plan = build_shared_intent_plan(&snapshot);
3463      let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3464      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3465      let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3466      let valid_group_ids: HashSet<&str> = intent_plan
3467         .groups
3468         .iter()
3469         .map(|group| group.group_id.as_str())
3470         .collect();
3471
3472      let evaluation = evaluate_binding(
3473         &[
3474            ComposeBindingAssignment {
3475               group_id: "G1".to_string(),
3476               hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3477            },
3478            ComposeBindingAssignment {
3479               group_id: "G2".to_string(),
3480               hunk_ids: vec![source_file.hunk_ids[1].clone()],
3481            },
3482         ],
3483         &hunk_context,
3484         &valid_group_ids,
3485         &snapshot,
3486      );
3487
3488      for (group_id, hunk_ids) in evaluation.assigned {
3489         let entry = assigned.entry(group_id).or_default();
3490         for hunk_id in hunk_ids {
3491            entry.insert(hunk_id);
3492         }
3493      }
3494
3495      let group_rank: HashMap<&str, usize> = intent_plan
3496         .dependency_order
3497         .iter()
3498         .enumerate()
3499         .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3500         .collect();
3501      assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3502
3503      let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3504      assert_eq!(executable_plan.groups.len(), 1);
3505      assert_eq!(executable_plan.groups[0].group_id, "G1");
3506      assert!(
3507         source_file
3508            .hunk_ids
3509            .iter()
3510            .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3511         "fallback should keep every hunk from the shared file in the surviving group"
3512      );
3513   }
3514
3515   #[test]
3516   fn test_validate_executable_plan_rejects_overlap() {
3517      let snapshot = build_test_snapshot();
3518      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3519      let executable_plan = ComposeExecutablePlan {
3520         groups:           vec![
3521            ComposeExecutableGroup {
3522               group_id:     "G1".to_string(),
3523               commit_type:  CommitType::new("refactor").unwrap(),
3524               scope:        None,
3525               file_ids:     vec![source_file.file_id.clone()],
3526               rationale:    "group one".to_string(),
3527               dependencies: vec![],
3528               hunk_ids:     vec![source_file.hunk_ids[0].clone()],
3529            },
3530            ComposeExecutableGroup {
3531               group_id:     "G2".to_string(),
3532               commit_type:  CommitType::new("refactor").unwrap(),
3533               scope:        None,
3534               file_ids:     vec![source_file.file_id.clone()],
3535               rationale:    "group two".to_string(),
3536               dependencies: vec![],
3537               hunk_ids:     vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3538            },
3539         ],
3540         dependency_order: vec![0, 1],
3541      };
3542
3543      let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3544      assert!(err.to_string().contains("assigned to both"));
3545   }
3546
3547   #[test]
3548   fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3549      let snapshot = build_test_snapshot();
3550      let planning_index = build_planning_index(&snapshot);
3551      let groups = vec![ComposeIntentGroup {
3552         group_id:     "G1".to_string(),
3553         commit_type:  CommitType::new("refactor").unwrap(),
3554         scope:        None,
3555         file_ids:     vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3556         rationale:    "normalize file references".to_string(),
3557         dependencies: vec![],
3558      }];
3559
3560      let (normalized_groups, repair_notes) =
3561         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3562
3563      assert_eq!(normalized_groups.len(), 1);
3564      assert_eq!(
3565         normalized_groups[0].file_ids,
3566         snapshot
3567            .files
3568            .iter()
3569            .map(|file| file.file_id.clone())
3570            .collect::<Vec<_>>()
3571      );
3572      assert_eq!(repair_notes.len(), 2);
3573   }
3574
3575   #[test]
3576   fn test_normalize_intent_plan_repairs_missing_files() {
3577      let snapshot = build_test_snapshot();
3578      let planning_index = build_planning_index(&snapshot);
3579      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3580      let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3581      let groups = vec![ComposeIntentGroup {
3582         group_id:     "G1".to_string(),
3583         commit_type:  CommitType::new("refactor").unwrap(),
3584         scope:        None,
3585         file_ids:     vec![source_file.file_id.clone()],
3586         rationale:    "partial coverage".to_string(),
3587         dependencies: vec![],
3588      }];
3589
3590      let (normalized_groups, repair_notes) =
3591         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3592
3593      assert_eq!(normalized_groups.len(), 1);
3594      assert!(
3595         normalized_groups[0].file_ids.contains(&source_file.file_id),
3596         "existing file assignment should be preserved"
3597      );
3598      assert!(
3599         normalized_groups[0].file_ids.contains(&test_file.file_id),
3600         "missing files should be assigned to an existing group"
3601      );
3602      assert_eq!(repair_notes.len(), 1);
3603      assert!(repair_notes[0].contains(&test_file.file_id));
3604   }
3605
3606   #[test]
3607   fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3608      let snapshot = build_multi_area_snapshot();
3609      let planning_index = build_planning_index(&snapshot);
3610      let frontend_target = planning_index
3611         .targets
3612         .iter()
3613         .find(|target| target.label.starts_with("apps/frontend"))
3614         .unwrap();
3615      let model_target = planning_index
3616         .targets
3617         .iter()
3618         .find(|target| target.label.starts_with("packages/model"))
3619         .unwrap();
3620      let groups = vec![
3621         ComposeIntentGroup {
3622            group_id:     "G1".to_string(),
3623            commit_type:  CommitType::new("refactor").unwrap(),
3624            scope:        Scope::new("apps/frontend").ok(),
3625            file_ids:     vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3626            rationale:    "frontend platform updates".to_string(),
3627            dependencies: vec!["group 2".to_string(), "G1".to_string()],
3628         },
3629         ComposeIntentGroup {
3630            group_id:     "G2".to_string(),
3631            commit_type:  CommitType::new("refactor").unwrap(),
3632            scope:        Scope::new("packages/model").ok(),
3633            file_ids:     vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3634            rationale:    "model storage updates".to_string(),
3635            dependencies: vec!["F5".to_string()],
3636         },
3637      ];
3638
3639      let (normalized_groups, repair_notes) =
3640         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3641
3642      assert_eq!(normalized_groups.len(), 2);
3643      assert!(
3644         normalized_groups[0]
3645            .file_ids
3646            .iter()
3647            .all(|file_id| file_id.starts_with('F'))
3648      );
3649      assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3650      assert!(normalized_groups[1].dependencies.is_empty());
3651      assert!(
3652         repair_notes
3653            .iter()
3654            .any(|note| note.contains("Dropped unknown planning target"))
3655      );
3656      assert!(
3657         repair_notes
3658            .iter()
3659            .any(|note| note.contains("Dropped self-dependency"))
3660      );
3661      assert!(
3662         repair_notes
3663            .iter()
3664            .any(|note| note.contains("Mapped compose planner dependency"))
3665      );
3666      assert!(
3667         repair_notes
3668            .iter()
3669            .any(|note| note.contains("Dropped unknown dependency"))
3670      );
3671   }
3672
3673   #[test]
3674   fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3675      let snapshot = build_test_snapshot();
3676      let summary = render_snapshot_summary(&snapshot, &[]);
3677      let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3678
3679      assert!(!summary.contains("# snapshot compacted"));
3680      for hunk_id in &source_file.hunk_ids {
3681         assert!(summary.contains(hunk_id));
3682      }
3683   }
3684
3685   #[test]
3686   fn test_render_snapshot_summary_compacts_large_snapshot() {
3687      let snapshot = build_large_snapshot(160, 4);
3688      let summary = render_snapshot_summary(&snapshot, &[]);
3689
3690      assert!(summary.contains("# snapshot compacted"));
3691      assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3692      assert!(summary.contains("F001-H001"));
3693      assert!(summary.contains("F001-H004"));
3694      assert!(!summary.contains("F001-H002"));
3695      assert!(!summary.contains("F001-H003"));
3696      assert!(summary.contains("... 2 more hunks omitted from F001"));
3697   }
3698
3699   #[test]
3700   fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3701      let snapshot = build_multi_area_snapshot();
3702      let planning_index = build_planning_index(&snapshot);
3703
3704      assert_eq!(planning_index.mode, PlanningMode::Area);
3705      assert!(planning_index.targets.len() < snapshot.files.len());
3706      assert!(
3707         planning_index
3708            .targets
3709            .iter()
3710            .any(|target| target.label.starts_with("apps/frontend"))
3711      );
3712      assert!(
3713         render_planning_stat(&planning_index).contains("planning over"),
3714         "planning stat should explain the area mode"
3715      );
3716   }
3717
3718   #[test]
3719   fn test_normalize_intent_plan_expands_area_targets() {
3720      let snapshot = build_multi_area_snapshot();
3721      let planning_index = build_planning_index(&snapshot);
3722      let midpoint = planning_index.targets.len() / 2;
3723      let first_group_targets: Vec<String> = planning_index
3724         .targets
3725         .iter()
3726         .take(midpoint)
3727         .map(|target| target.label.clone())
3728         .collect();
3729      let second_group_targets: Vec<String> = planning_index
3730         .targets
3731         .iter()
3732         .skip(midpoint)
3733         .map(|target| target.label.clone())
3734         .collect();
3735      let groups = vec![
3736         ComposeIntentGroup {
3737            group_id:     "G1".to_string(),
3738            commit_type:  CommitType::new("refactor").unwrap(),
3739            scope:        None,
3740            file_ids:     first_group_targets,
3741            rationale:    "frontend and model".to_string(),
3742            dependencies: vec![],
3743         },
3744         ComposeIntentGroup {
3745            group_id:     "G2".to_string(),
3746            commit_type:  CommitType::new("refactor").unwrap(),
3747            scope:        None,
3748            file_ids:     second_group_targets,
3749            rationale:    "daemon and ci".to_string(),
3750            dependencies: vec![],
3751         },
3752      ];
3753
3754      let (normalized_groups, repair_notes) =
3755         normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3756
3757      assert_eq!(normalized_groups.len(), 2);
3758      assert!(
3759         normalized_groups
3760            .iter()
3761            .flat_map(|group| group.file_ids.iter())
3762            .all(|file_id| file_id.starts_with('F')),
3763         "area targets should expand back to concrete file IDs"
3764      );
3765      assert!(!repair_notes.is_empty());
3766      assert_eq!(
3767         normalized_groups
3768            .iter()
3769            .flat_map(|group| group.file_ids.iter())
3770            .collect::<HashSet<_>>()
3771            .len(),
3772         snapshot.files.len()
3773      );
3774   }
3775
3776   #[test]
3777   fn test_large_patch_fallback_splits_monolithic_area_plan() {
3778      let snapshot = build_multi_area_snapshot();
3779      let planning_index = build_planning_index(&snapshot);
3780      let monolithic_group = ComposeIntentGroup {
3781         group_id:     "G1".to_string(),
3782         commit_type:  CommitType::new("refactor").unwrap(),
3783         scope:        None,
3784         file_ids:     snapshot
3785            .files
3786            .iter()
3787            .map(|file| file.file_id.clone())
3788            .collect(),
3789         rationale:    "repo-wide refactor".to_string(),
3790         dependencies: vec![],
3791      };
3792
3793      assert!(should_force_large_patch_fallback(
3794         &snapshot,
3795         &planning_index,
3796         &[monolithic_group],
3797         6
3798      ));
3799
3800      let fallback_groups =
3801         build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3802      assert!(fallback_groups.len() >= 3);
3803      assert_eq!(
3804         fallback_groups
3805            .iter()
3806            .flat_map(|group| group.file_ids.iter())
3807            .collect::<HashSet<_>>()
3808            .len(),
3809         snapshot.files.len()
3810      );
3811      assert!(
3812         fallback_groups
3813            .iter()
3814            .any(|group| group.rationale.contains("frontend")),
3815         "fallback should preserve workstream identity"
3816      );
3817   }
3818
3819   #[test]
3820   fn test_should_collect_compose_observations_skips_area_mode() {
3821      let snapshot = build_large_snapshot(160, 4);
3822      let config = CommitConfig { map_reduce_threshold: 1_000, ..Default::default() };
3823      let counter = create_token_counter(&config);
3824
3825      assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3826      assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3827   }
3828
3829   #[test]
3830   fn test_compose_analysis_strategy_uses_map_reduce_for_large_diff() {
3831      let config = CommitConfig { map_reduce_threshold: 20, ..Default::default() };
3832      let counter = create_token_counter(&config);
3833      let payload = "a".repeat(200);
3834      let diff = format!("diff --git a/a.rs b/a.rs\n@@ -0,0 +1 @@\n+{payload}");
3835
3836      assert_eq!(
3837         compose_analysis_strategy(&diff, &config, &counter),
3838         ComposeAnalysisStrategy::MapReduce
3839      );
3840   }
3841
3842   #[test]
3843   fn test_compose_analysis_strategy_truncates_when_map_reduce_disabled() {
3844      let config = CommitConfig {
3845         map_reduce_enabled: false,
3846         max_diff_tokens: 1,
3847         max_diff_length: 10_000,
3848         ..Default::default()
3849      };
3850      let counter = create_token_counter(&config);
3851      assert_eq!(compose_truncation_length(&config), 4);
3852
3853      assert_eq!(
3854         compose_analysis_strategy(
3855            "diff --git a/models.json b/models.json\n+large",
3856            &config,
3857            &counter
3858         ),
3859         ComposeAnalysisStrategy::SmartTruncate
3860      );
3861   }
3862
3863   #[test]
3864   fn test_compose_analysis_strategy_keeps_small_group_direct() {
3865      let config = CommitConfig {
3866         map_reduce_threshold: 1_000,
3867         max_diff_tokens: 1_000,
3868         max_diff_length: 10_000,
3869         ..Default::default()
3870      };
3871      let counter = create_token_counter(&config);
3872
3873      assert_eq!(
3874         compose_analysis_strategy("diff --git a/a.rs b/a.rs\n+a", &config, &counter),
3875         ComposeAnalysisStrategy::Direct
3876      );
3877   }
3878
3879   #[test]
3880   fn test_chunk_ambiguous_files_splits_large_binding_request() {
3881      let ambiguous_files = vec![
3882         AmbiguousFileBinding {
3883            file_id:             "F001".to_string(),
3884            path:                "src/alpha.rs".to_string(),
3885            candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3886            hunk_ids:            (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3887         },
3888         AmbiguousFileBinding {
3889            file_id:             "F002".to_string(),
3890            path:                "src/beta.rs".to_string(),
3891            candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3892            hunk_ids:            (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3893         },
3894         AmbiguousFileBinding {
3895            file_id:             "F003".to_string(),
3896            path:                "src/gamma.rs".to_string(),
3897            candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3898            hunk_ids:            (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3899         },
3900      ];
3901
3902      let batches = chunk_ambiguous_files(&ambiguous_files);
3903      let total_hunks: usize = batches
3904         .iter()
3905         .flatten()
3906         .map(|file| file.hunk_ids.len())
3907         .sum();
3908
3909      assert_eq!(batches.len(), 2);
3910      assert_eq!(batches[0].len(), 1);
3911      assert_eq!(batches[1].len(), 2);
3912      assert_eq!(total_hunks, 140);
3913      assert!(batches.iter().all(|batch| {
3914         batch.len() <= MAX_BIND_FILES_PER_REQUEST
3915            && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3916               <= MAX_BIND_HUNKS_PER_REQUEST
3917      }));
3918   }
3919}