1use std::{
2 borrow::Cow,
3 collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4 fmt::Write,
5 fs,
6 path::{Path, PathBuf},
7};
8
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13 api::{
14 AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
15 generate_summary_from_analysis, run_oneshot, strict_json_schema,
16 },
17 compose_types::{
18 ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
19 ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
20 },
21 config::CommitConfig,
22 diff::smart_truncate_diff,
23 error::{CommitGenError, Result},
24 git::{
25 TempGitIndex, append_signoff_trailer, commit_tree, current_head_ref, get_compose_diff,
26 get_compose_stat, get_git_dir, get_head_hash, read_tree_into_index, reset_mixed_to,
27 reset_paths_to, update_ref_checked, write_index_tree, write_real_index_tree,
28 },
29 map_reduce::{FileObservation, observe_diff_files, run_map_reduce, should_use_map_reduce},
30 normalization::{format_commit_message, post_process_commit_message},
31 patch::{
32 StageResult, build_compose_snapshot, create_executable_group_patch,
33 force_stage_file_from_base_in_index, pin_snapshot_worktree_state,
34 stage_executable_group_in_index,
35 },
36 style, templates,
37 tokens::{TokenCounter, create_token_counter},
38 types::{Args, CommitSummary, CommitType, ConventionalAnalysis, ConventionalCommit, Scope},
39 validation::validate_commit_message,
40};
41
42const MAX_OBSERVATIONS_PER_FILE: usize = 3;
43const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
44const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
45const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
46const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
47const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
48const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
49const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
50const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
51const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
52const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
53const MAX_BIND_FILES_PER_REQUEST: usize = 18;
54const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
55const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
58
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ComposeBaseState {
61 head_hash: String,
62 head_ref: String,
63 index_tree: String,
64}
65
66#[tracing::instrument(target = "lgit", name = "compose.capture_base_state", skip_all, fields(dir))]
67pub fn capture_compose_base_state(dir: &str) -> Result<ComposeBaseState> {
68 Ok(ComposeBaseState {
69 head_hash: get_head_hash(dir)?,
70 head_ref: current_head_ref(dir)?,
71 index_tree: write_real_index_tree(dir)?,
72 })
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76enum ComposeAnalysisStrategy {
77 Direct,
78 SmartTruncate,
79 MapReduce,
80}
81
82fn compose_analysis_strategy(
83 diff: &str,
84 config: &CommitConfig,
85 counter: &TokenCounter,
86) -> ComposeAnalysisStrategy {
87 if should_use_map_reduce(diff, config, counter) {
88 return ComposeAnalysisStrategy::MapReduce;
89 }
90
91 let diff_tokens = counter.count_sync(diff);
92 if diff.len() > config.max_diff_length || diff_tokens > config.max_diff_tokens {
93 return ComposeAnalysisStrategy::SmartTruncate;
94 }
95
96 ComposeAnalysisStrategy::Direct
97}
98
99fn compose_truncation_length(config: &CommitConfig) -> usize {
100 config
101 .max_diff_length
102 .min(config.max_diff_tokens.saturating_mul(4))
103 .max(1)
104}
105
106#[derive(Debug, Deserialize, Serialize)]
107struct ComposeIntentResponse {
108 groups: Vec<ComposeIntentGroup>,
109}
110
111#[derive(Debug, Deserialize, Serialize)]
112struct ComposeBindingResponse {
113 assignments: Vec<ComposeBindingAssignment>,
114}
115
116#[derive(Debug, Serialize, Deserialize)]
117struct ComposeCachedPlan {
118 schema_version: String,
119 cache_key: String,
120 plan: ComposeExecutablePlan,
121}
122
123#[derive(Debug, Clone)]
124struct AmbiguousFileBinding {
125 file_id: String,
126 path: String,
127 candidate_group_ids: Vec<String>,
128 hunk_ids: Vec<String>,
129}
130
131#[derive(Debug, Clone)]
132struct AmbiguousHunkContext {
133 candidate_group_ids: Vec<String>,
134}
135
136type HunkAssignments = HashMap<String, BTreeSet<String>>;
137
138#[derive(Debug)]
139struct BindingEvaluation {
140 assigned: HashMap<String, Vec<String>>,
141 unresolved: Vec<String>,
142}
143
144#[derive(Debug, Clone, Copy)]
145struct SnapshotSummaryBudget {
146 max_observations_per_file: usize,
147 max_hunks_per_file: Option<usize>,
148}
149
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151enum PlanningMode {
152 File,
153 Area,
154}
155
156#[derive(Debug, Clone)]
157struct PlanningTarget {
158 target_id: String,
159 label: String,
160 file_ids: Vec<String>,
161 hunk_count: usize,
162 additions: usize,
163 deletions: usize,
164}
165
166#[derive(Debug, Clone)]
167struct PlanningIndex {
168 mode: PlanningMode,
169 targets: Vec<PlanningTarget>,
170 aliases: HashMap<String, String>,
171}
172
173#[derive(Debug, Clone)]
174struct PlanningBucket {
175 label: String,
176 file_ids: Vec<String>,
177}
178
179impl PlanningIndex {
180 fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
181 let mut expanded = Vec::new();
182 let mut seen_file_ids = HashSet::new();
183
184 for target_id in target_ids {
185 if let Some(target) = self
186 .targets
187 .iter()
188 .find(|candidate| candidate.target_id == *target_id)
189 {
190 for file_id in &target.file_ids {
191 if seen_file_ids.insert(file_id.clone()) {
192 expanded.push(file_id.clone());
193 }
194 }
195 }
196 }
197
198 expanded
199 }
200}
201
202impl SnapshotSummaryBudget {
203 const fn is_compacted(self) -> bool {
204 self.max_hunks_per_file.is_some()
205 }
206}
207
208fn is_dependency_manifest(path: &str) -> bool {
209 const DEP_MANIFESTS: &[&str] = &[
210 "Cargo.toml",
211 "Cargo.lock",
212 "package.json",
213 "package-lock.json",
214 "pnpm-lock.yaml",
215 "yarn.lock",
216 "bun.lock",
217 "bun.lockb",
218 "go.mod",
219 "go.sum",
220 "requirements.txt",
221 "Pipfile",
222 "Pipfile.lock",
223 "pyproject.toml",
224 "Gemfile",
225 "Gemfile.lock",
226 "composer.json",
227 "composer.lock",
228 "build.gradle",
229 "build.gradle.kts",
230 "gradle.properties",
231 "pom.xml",
232 ];
233
234 let path = Path::new(path);
235 let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
236 return false;
237 };
238
239 if DEP_MANIFESTS.contains(&file_name) {
240 return true;
241 }
242
243 Path::new(file_name)
244 .extension()
245 .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
246}
247
248fn save_debug_artifact<T: Serialize>(
249 debug_dir: Option<&Path>,
250 filename: &str,
251 value: &T,
252) -> Result<()> {
253 let Some(debug_dir) = debug_dir else {
254 return Ok(());
255 };
256
257 fs::create_dir_all(debug_dir)?;
258 let path = debug_dir.join(filename);
259 let json = serde_json::to_string_pretty(value)?;
260 fs::write(path, json)?;
261 Ok(())
262}
263
264fn fnv1a_64(input: &str) -> String {
265 let mut hash = 0xcbf29ce484222325_u64;
266 for byte in input.as_bytes() {
267 hash ^= u64::from(*byte);
268 hash = hash.wrapping_mul(0x100000001b3);
269 }
270 format!("{hash:016x}")
271}
272
273fn compose_plan_cache_key(
274 snapshot: &ComposeSnapshot,
275 max_commits: usize,
276 analysis_model: &str,
277) -> String {
278 fnv1a_64(&format!(
279 "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
280 snapshot.diff, snapshot.stat
281 ))
282}
283
284fn compose_plan_cache_path(
285 dir: &str,
286 snapshot: &ComposeSnapshot,
287 max_commits: usize,
288 analysis_model: &str,
289) -> Result<PathBuf> {
290 let git_dir = get_git_dir(dir)?;
291 Ok(git_dir.join("llm-git").join(format!(
292 "compose-plan-{}.json",
293 compose_plan_cache_key(snapshot, max_commits, analysis_model)
294 )))
295}
296
297fn load_cached_plan(
298 dir: &str,
299 snapshot: &ComposeSnapshot,
300 max_commits: usize,
301 analysis_model: &str,
302) -> Result<Option<ComposeExecutablePlan>> {
303 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
304 if !cache_path.exists() {
305 return Ok(None);
306 }
307
308 let content = match fs::read_to_string(&cache_path) {
309 Ok(content) => content,
310 Err(err) => {
311 eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
312 return Ok(None);
313 },
314 };
315 let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
316 Ok(cached) => cached,
317 Err(err) => {
318 eprintln!(
319 "{}",
320 style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
321 );
322 let _ = fs::remove_file(&cache_path);
323 return Ok(None);
324 },
325 };
326 let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
327
328 if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
329 return Ok(None);
330 }
331 if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
332 eprintln!(
333 "{}",
334 style::warning(&format!(
335 "Discarding cached compose plan (no longer valid for current snapshot): {err}"
336 ))
337 );
338 let _ = fs::remove_file(&cache_path);
339 return Ok(None);
340 }
341 Ok(Some(cached.plan))
342}
343
344fn save_cached_plan(
345 dir: &str,
346 snapshot: &ComposeSnapshot,
347 max_commits: usize,
348 analysis_model: &str,
349 plan: &ComposeExecutablePlan,
350) -> Result<()> {
351 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
352 if let Some(parent) = cache_path.parent() {
353 fs::create_dir_all(parent)?;
354 }
355
356 let cached = ComposeCachedPlan {
357 schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
358 cache_key: compose_plan_cache_key(snapshot, max_commits, analysis_model),
359 plan: plan.clone(),
360 };
361 fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
362 Ok(())
363}
364
365fn format_line_range(start: usize, count: usize) -> String {
366 match count {
367 0 => "0".to_string(),
368 1 => start.to_string(),
369 _ => format!("{start}-{}", start + count - 1),
370 }
371}
372
373const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
374 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
375 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
376 {
377 SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
378 } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
379 || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
380 {
381 SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
382 } else {
383 SnapshotSummaryBudget {
384 max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
385 max_hunks_per_file: None,
386 }
387 }
388}
389
390fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
391 if count <= max_samples {
392 return (0..count).collect();
393 }
394
395 if max_samples <= 1 {
396 return vec![0];
397 }
398
399 let last = count - 1;
400 let mut positions = Vec::with_capacity(max_samples);
401 for slot in 0..max_samples {
402 let position = slot * last / (max_samples - 1);
403 if positions.last().copied() != Some(position) {
404 positions.push(position);
405 }
406 }
407 positions
408}
409
410fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
411 match budget.max_hunks_per_file {
412 None => file.hunk_ids.iter().map(String::as_str).collect(),
413 Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
414 .into_iter()
415 .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
416 .collect(),
417 }
418}
419
420fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
421 let budget = snapshot_summary_budget(snapshot);
422 let observations_by_file: HashMap<&str, Vec<&str>> = observations
423 .iter()
424 .map(|observation| {
425 (
426 observation.file.as_str(),
427 observation
428 .observations
429 .iter()
430 .map(String::as_str)
431 .take(budget.max_observations_per_file)
432 .collect(),
433 )
434 })
435 .collect();
436
437 let mut out = String::new();
438 if budget.is_compacted() {
439 let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
440 writeln!(
441 out,
442 "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
443 representative hunks and {} observation(s) per file",
444 budget.max_observations_per_file
445 )
446 .unwrap();
447 }
448
449 for file in &snapshot.files {
450 writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
451 if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
452 for observation in file_observations {
453 writeln!(out, " observation: {observation}").unwrap();
454 }
455 }
456
457 let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
458 for hunk_id in &rendered_hunk_ids {
459 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
460 if hunk.synthetic {
461 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
462 } else {
463 writeln!(
464 out,
465 " - {} old:{} new:{} :: {}",
466 hunk.hunk_id,
467 format_line_range(hunk.old_start, hunk.old_count),
468 format_line_range(hunk.new_start, hunk.new_count),
469 hunk.snippet
470 )
471 .unwrap();
472 }
473 }
474 }
475
476 let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
477 if omitted_hunks > 0 {
478 writeln!(out, " ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
479 }
480 }
481
482 out
483}
484
485const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
486 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
487 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
488 {
489 PlanningMode::Area
490 } else {
491 PlanningMode::File
492 }
493}
494
495fn path_depth(path: &str) -> usize {
496 path.split('/').count()
497}
498
499fn prefix_at_depth(path: &str, depth: usize) -> String {
500 if depth == 0 {
501 return String::new();
502 }
503
504 let segments: Vec<&str> = path.split('/').collect();
505 let effective_depth = depth.min(segments.len());
506 segments[..effective_depth].join("/")
507}
508
509fn common_path_prefix(paths: &[String]) -> String {
510 let Some(first_path) = paths.first() else {
511 return String::new();
512 };
513
514 let mut prefix: Vec<&str> = first_path.split('/').collect();
515 for path in paths.iter().skip(1) {
516 let segments: Vec<&str> = path.split('/').collect();
517 let shared = prefix
518 .iter()
519 .zip(segments.iter())
520 .take_while(|(left, right)| left == right)
521 .count();
522 prefix.truncate(shared);
523 if prefix.is_empty() {
524 break;
525 }
526 }
527
528 prefix.join("/")
529}
530
531fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
532 file_ids
533 .iter()
534 .filter_map(|file_id| snapshot.file_by_id(file_id))
535 .map(|file| file.hunk_ids.len())
536 .sum()
537}
538
539fn group_file_ids_by_prefix(
540 snapshot: &ComposeSnapshot,
541 file_ids: &[String],
542 depth: usize,
543) -> BTreeMap<String, Vec<String>> {
544 let mut groups = BTreeMap::new();
545
546 for file_id in file_ids {
547 if let Some(file) = snapshot.file_by_id(file_id) {
548 groups
549 .entry(prefix_at_depth(&file.path, depth))
550 .or_insert_with(Vec::new)
551 .push(file_id.clone());
552 }
553 }
554
555 groups
556}
557
558fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
559 let paths: Vec<String> = file_ids
560 .iter()
561 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
562 .collect();
563
564 let common_prefix = common_path_prefix(&paths);
565 if common_prefix.is_empty() {
566 paths.first().cloned().unwrap_or_else(|| "misc".to_string())
567 } else {
568 common_prefix
569 }
570}
571
572fn collect_planning_buckets(
573 snapshot: &ComposeSnapshot,
574 file_ids: &[String],
575 depth: usize,
576) -> Vec<PlanningBucket> {
577 let file_count = file_ids.len();
578 let hunk_count = bucket_hunk_count(snapshot, file_ids);
579 let max_path_depth = file_ids
580 .iter()
581 .filter_map(|file_id| snapshot.file_by_id(file_id))
582 .map(|file| path_depth(&file.path))
583 .max()
584 .unwrap_or(depth);
585
586 let should_stop =
587 file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
588 if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
589 return vec![PlanningBucket {
590 label: planning_bucket_label(snapshot, file_ids),
591 file_ids: file_ids.to_vec(),
592 }];
593 }
594
595 let next_depth = depth + 1;
596 let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
597 if groups.len() <= 1 {
598 return collect_planning_buckets(snapshot, file_ids, next_depth);
599 }
600
601 groups
602 .into_values()
603 .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
604 .collect()
605}
606
607fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
608 let all_file_ids: Vec<String> = snapshot
609 .files
610 .iter()
611 .map(|file| file.file_id.clone())
612 .collect();
613 let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
614
615 buckets
616 .into_iter()
617 .enumerate()
618 .map(|(idx, bucket)| {
619 let mut additions = 0_usize;
620 let mut deletions = 0_usize;
621 let mut hunk_count = 0_usize;
622
623 for file_id in &bucket.file_ids {
624 if let Some(file) = snapshot.file_by_id(file_id) {
625 additions = additions.saturating_add(file.additions);
626 deletions = deletions.saturating_add(file.deletions);
627 hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
628 }
629 }
630
631 PlanningTarget {
632 target_id: format!("A{:03}", idx + 1),
633 label: bucket.label,
634 file_ids: bucket.file_ids,
635 hunk_count,
636 additions,
637 deletions,
638 }
639 })
640 .collect()
641}
642
643fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
644 snapshot
645 .files
646 .iter()
647 .map(|file| PlanningTarget {
648 target_id: file.file_id.clone(),
649 label: file.path.clone(),
650 file_ids: vec![file.file_id.clone()],
651 hunk_count: file.hunk_ids.len(),
652 additions: file.additions,
653 deletions: file.deletions,
654 })
655 .collect()
656}
657
658fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
659 let mode = planning_mode_for_snapshot(snapshot);
660 let targets = match mode {
661 PlanningMode::File => build_file_planning_targets(snapshot),
662 PlanningMode::Area => build_area_planning_targets(snapshot),
663 };
664
665 let aliases = targets
666 .iter()
667 .flat_map(|target| {
668 let normalized_label = normalize_file_reference(&target.label);
669 [
670 (target.target_id.clone(), target.target_id.clone()),
671 (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
672 (normalized_label, target.target_id.clone()),
673 ]
674 })
675 .collect();
676
677 PlanningIndex { mode, targets, aliases }
678}
679
680fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
681 sample_positions(target.file_ids.len(), 4)
682 .into_iter()
683 .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
684 .collect()
685}
686
687fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
688 let hunk_ids: Vec<&String> = target
689 .file_ids
690 .iter()
691 .filter_map(|file_id| snapshot.file_by_id(file_id))
692 .flat_map(|file| file.hunk_ids.iter())
693 .collect();
694
695 sample_positions(hunk_ids.len(), 4)
696 .into_iter()
697 .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
698 .collect()
699}
700
701fn render_planning_stat(index: &PlanningIndex) -> String {
702 let mut out = String::new();
703
704 match index.mode {
705 PlanningMode::File => {
706 writeln!(out, "# planning over individual file IDs").unwrap();
707 },
708 PlanningMode::Area => {
709 writeln!(
710 out,
711 "# planning over {} area IDs spanning {} files",
712 index.targets.len(),
713 index
714 .targets
715 .iter()
716 .flat_map(|target| target.file_ids.iter())
717 .collect::<HashSet<_>>()
718 .len()
719 )
720 .unwrap();
721 },
722 }
723
724 for target in &index.targets {
725 writeln!(
726 out,
727 "{} {} | {} files | {} hunks | +{}/-{}",
728 target.target_id,
729 target.label,
730 target.file_ids.len(),
731 target.hunk_count,
732 target.additions,
733 target.deletions
734 )
735 .unwrap();
736 }
737
738 out
739}
740
741fn render_planning_snapshot_summary(
742 snapshot: &ComposeSnapshot,
743 observations: &[FileObservation],
744 index: &PlanningIndex,
745) -> String {
746 if index.mode == PlanningMode::File {
747 return render_snapshot_summary(snapshot, observations);
748 }
749
750 let observations_by_file: HashMap<&str, Vec<&str>> = observations
751 .iter()
752 .map(|observation| {
753 (
754 observation.file.as_str(),
755 observation
756 .observations
757 .iter()
758 .map(String::as_str)
759 .take(1)
760 .collect(),
761 )
762 })
763 .collect();
764
765 let mut out = String::new();
766 writeln!(
767 out,
768 "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
769 )
770 .unwrap();
771
772 for target in &index.targets {
773 writeln!(
774 out,
775 "- {} {} ({} files, {} hunks, +{}/-{})",
776 target.target_id,
777 target.label,
778 target.file_ids.len(),
779 target.hunk_count,
780 target.additions,
781 target.deletions
782 )
783 .unwrap();
784
785 let sample_file_ids = sample_file_ids_for_target(target);
786 if !sample_file_ids.is_empty() {
787 let sample_files: Vec<String> = sample_file_ids
788 .iter()
789 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
790 .collect();
791 writeln!(out, " files: {}", sample_files.join(", ")).unwrap();
792 let omitted = target.file_ids.len().saturating_sub(sample_files.len());
793 if omitted > 0 {
794 writeln!(out, " ... {omitted} more files omitted from {}", target.target_id).unwrap();
795 }
796 }
797
798 let mut rendered_observations = 0_usize;
799 for file_id in &target.file_ids {
800 let Some(file) = snapshot.file_by_id(file_id) else {
801 continue;
802 };
803 let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
804 continue;
805 };
806
807 for observation in file_observations {
808 writeln!(out, " observation: {observation}").unwrap();
809 rendered_observations += 1;
810 if rendered_observations >= 2 {
811 break;
812 }
813 }
814
815 if rendered_observations >= 2 {
816 break;
817 }
818 }
819
820 for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
821 if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
822 if hunk.synthetic {
823 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
824 } else {
825 writeln!(
826 out,
827 " - {} old:{} new:{} :: {}",
828 hunk.hunk_id,
829 format_line_range(hunk.old_start, hunk.old_count),
830 format_line_range(hunk.new_start, hunk.new_count),
831 hunk.snippet
832 )
833 .unwrap();
834 }
835 }
836 }
837 }
838
839 out
840}
841
842fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
843 match index.mode {
844 PlanningMode::File => format!(
845 "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
846 snapshot.files.len()
847 ),
848 PlanningMode::Area => format!(
849 "Area IDs only. Each target may expand to multiple files by shared path prefix. \
850 Coverage: {} areas spanning {} files.",
851 index.targets.len(),
852 snapshot.files.len()
853 ),
854 }
855}
856
857fn render_planning_notes(index: &PlanningIndex) -> String {
858 match index.mode {
859 PlanningMode::File => {
860 "Use only the provided file IDs and keep the grouping conservative.".to_string()
861 },
862 PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
863 planning areas. Split along independent subsystems or workstreams \
864 when the areas point at unrelated changes."
865 .to_string(),
866 }
867}
868
869fn render_split_bias(index: &PlanningIndex) -> String {
870 match index.mode {
871 PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
872 PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
873 one broad group if nearly every area clearly belongs to the same \
874 atomic change."
875 .to_string(),
876 }
877}
878
879fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
880 let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
881
882 strict_json_schema(
883 serde_json::json!({
884 "groups": {
885 "type": "array",
886 "items": {
887 "type": "object",
888 "properties": {
889 "group_id": {
890 "type": "string",
891 "description": "Stable identifier like G1, G2, G3"
892 },
893 "file_ids": {
894 "type": "array",
895 "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
896 "items": { "type": "string" }
897 },
898 "type": {
899 "type": "string",
900 "enum": type_enum,
901 "description": "Conventional commit type for this group"
902 },
903 "scope": {
904 "type": "string",
905 "description": "Optional scope (module/component). Omit if broad."
906 },
907 "rationale": {
908 "type": "string",
909 "description": "Brief explanation of the logical change"
910 },
911 "dependencies": {
912 "type": "array",
913 "description": "Group IDs this group depends on",
914 "items": { "type": "string" }
915 }
916 },
917 "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
918 "additionalProperties": false
919 }
920 }
921 }),
922 &["groups"],
923 )
924}
925
926fn build_binding_schema() -> serde_json::Value {
927 strict_json_schema(
928 serde_json::json!({
929 "assignments": {
930 "type": "array",
931 "items": {
932 "type": "object",
933 "properties": {
934 "group_id": { "type": "string" },
935 "hunk_ids": {
936 "type": "array",
937 "items": { "type": "string" }
938 }
939 },
940 "required": ["group_id", "hunk_ids"],
941 "additionalProperties": false
942 }
943 }
944 }),
945 &["assignments"],
946 )
947}
948
949fn compute_dependency_order<T, FId, FDeps>(
950 groups: &[T],
951 group_id: FId,
952 dependencies: FDeps,
953) -> Result<Vec<usize>>
954where
955 FId: Fn(&T) -> &str,
956 FDeps: Fn(&T) -> &[String],
957{
958 let mut index_by_id = HashMap::new();
959 for (idx, group) in groups.iter().enumerate() {
960 let id = group_id(group);
961 if id.trim().is_empty() {
962 return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
963 }
964 if index_by_id.insert(id.to_string(), idx).is_some() {
965 return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
966 }
967 }
968
969 let mut in_degree = vec![0_usize; groups.len()];
970 let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
971
972 for (idx, group) in groups.iter().enumerate() {
973 for dependency in dependencies(group) {
974 let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
975 CommitGenError::Other(format!(
976 "Group {} depends on unknown group_id '{}'",
977 group_id(group),
978 dependency
979 ))
980 })?;
981 if dependency_idx == idx {
982 return Err(CommitGenError::Other(format!(
983 "Group {} depends on itself",
984 group_id(group)
985 )));
986 }
987
988 adjacency[dependency_idx].push(idx);
989 in_degree[idx] += 1;
990 }
991 }
992
993 let mut queue: Vec<usize> = (0..groups.len())
994 .filter(|idx| in_degree[*idx] == 0)
995 .collect();
996 let mut order = Vec::with_capacity(groups.len());
997
998 while let Some(node) = queue.pop() {
999 order.push(node);
1000 for neighbor in &adjacency[node] {
1001 in_degree[*neighbor] -= 1;
1002 if in_degree[*neighbor] == 0 {
1003 queue.push(*neighbor);
1004 }
1005 }
1006 }
1007
1008 if order.len() != groups.len() {
1009 return Err(CommitGenError::Other(
1010 "Circular dependency detected in compose groups".to_string(),
1011 ));
1012 }
1013
1014 Ok(order)
1015}
1016
1017fn normalize_file_reference(raw_file_ref: &str) -> String {
1018 raw_file_ref
1019 .trim()
1020 .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
1021 .trim_start_matches("./")
1022 .trim_end_matches([',', ';'])
1023 .to_string()
1024}
1025
1026fn planning_text_tokens(text: &str) -> Vec<String> {
1027 const STOP_WORDS: &[&str] = &[
1028 "and",
1029 "for",
1030 "the",
1031 "with",
1032 "from",
1033 "into",
1034 "after",
1035 "before",
1036 "over",
1037 "under",
1038 "plus",
1039 "across",
1040 "update",
1041 "updated",
1042 "refactor",
1043 "refactored",
1044 "changes",
1045 "change",
1046 "logical",
1047 "group",
1048 "groups",
1049 "commit",
1050 "commits",
1051 ];
1052
1053 let mut tokens = Vec::new();
1054 let mut current = String::new();
1055 let mut seen = HashSet::new();
1056
1057 for ch in text.chars() {
1058 if ch.is_ascii_alphanumeric() {
1059 current.push(ch.to_ascii_lowercase());
1060 } else if current.len() >= 3 {
1061 if !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone()) {
1062 tokens.push(current.clone());
1063 }
1064 current.clear();
1065 } else {
1066 current.clear();
1067 }
1068 }
1069
1070 if current.len() >= 3 && !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone())
1071 {
1072 tokens.push(current);
1073 }
1074
1075 tokens
1076}
1077
1078fn extract_group_id_candidate(raw: &str) -> Option<String> {
1079 let normalized = normalize_file_reference(raw);
1080 let uppercase = normalized.to_ascii_uppercase();
1081
1082 if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1083 return Some(format!("G{uppercase}"));
1084 }
1085
1086 if let Some(rest) = uppercase.strip_prefix('G')
1087 && !rest.is_empty()
1088 && rest.chars().all(|ch| ch.is_ascii_digit())
1089 {
1090 return Some(format!("G{rest}"));
1091 }
1092
1093 let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1094 let compact = uppercase
1095 .chars()
1096 .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1097 .collect::<String>();
1098 if compact.starts_with("GROUP") && !digits.is_empty() {
1099 return Some(format!("G{digits}"));
1100 }
1101
1102 None
1103}
1104
1105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1106enum ComposeFileCategory {
1107 Binary,
1108 Dependency,
1109 Docs,
1110 Prompt,
1111 Test,
1112 Config,
1113 Source,
1114 Other,
1115}
1116
1117fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1118 if file.is_binary {
1119 return ComposeFileCategory::Binary;
1120 }
1121
1122 if is_dependency_manifest(&file.path) {
1123 return ComposeFileCategory::Dependency;
1124 }
1125
1126 let filename_lower = file.path.to_ascii_lowercase();
1127 let file_name = Path::new(&filename_lower)
1128 .file_name()
1129 .and_then(|name| name.to_str())
1130 .unwrap_or_default();
1131 let extension = Path::new(&filename_lower)
1132 .extension()
1133 .and_then(|ext| ext.to_str())
1134 .unwrap_or_default();
1135
1136 if filename_lower.contains("prompt") || filename_lower.contains("system") {
1137 return ComposeFileCategory::Prompt;
1138 }
1139
1140 if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1141 return ComposeFileCategory::Docs;
1142 }
1143
1144 if filename_lower.contains("/tests/")
1145 || filename_lower.starts_with("tests/")
1146 || file_name.contains("test")
1147 || file_name.contains("spec")
1148 {
1149 return ComposeFileCategory::Test;
1150 }
1151
1152 if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1153 return ComposeFileCategory::Config;
1154 }
1155
1156 if matches!(
1157 extension,
1158 "rs"
1159 | "py"
1160 | "js"
1161 | "jsx"
1162 | "ts"
1163 | "tsx"
1164 | "go"
1165 | "java"
1166 | "kt"
1167 | "c"
1168 | "cc"
1169 | "cpp"
1170 | "h"
1171 | "hpp"
1172 | "cs"
1173 | "rb"
1174 | "php"
1175 | "swift"
1176 | "scala"
1177 | "m"
1178 | "mm"
1179 ) {
1180 return ComposeFileCategory::Source;
1181 }
1182
1183 ComposeFileCategory::Other
1184}
1185
1186fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1187 left
1188 .split('/')
1189 .zip(right.split('/'))
1190 .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1191 .count()
1192}
1193
1194fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1195 let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1196
1197 if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1198 score += 40;
1199 }
1200
1201 if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1202 score += 12;
1203 }
1204
1205 if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1206 score += 18;
1207 }
1208
1209 score
1210}
1211
1212fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1213 match (compose_file_category(file), group.commit_type.as_str()) {
1214 (ComposeFileCategory::Docs, "docs") => 25,
1215 (ComposeFileCategory::Test, "test") => 25,
1216 (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1217 (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1218 (
1219 ComposeFileCategory::Prompt | ComposeFileCategory::Source,
1220 "feat" | "fix" | "refactor" | "perf",
1221 ) => 10,
1222 _ => 0,
1223 }
1224}
1225
1226fn best_group_for_missing_file(
1227 snapshot: &ComposeSnapshot,
1228 groups: &[ComposeIntentGroup],
1229 missing_file: &ComposeFile,
1230) -> usize {
1231 let mut best_group_idx = 0;
1232 let mut best_score = i32::MIN;
1233 let mut best_group_size = usize::MAX;
1234
1235 for (group_idx, group) in groups.iter().enumerate() {
1236 let similarity = group
1237 .file_ids
1238 .iter()
1239 .filter_map(|file_id| snapshot.file_by_id(file_id))
1240 .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1241 .max()
1242 .unwrap_or_default();
1243 let score = similarity + group_type_bonus(missing_file, group);
1244 let group_size = group.file_ids.len();
1245
1246 if score > best_score || (score == best_score && group_size < best_group_size) {
1247 best_group_idx = group_idx;
1248 best_score = score;
1249 best_group_size = group_size;
1250 }
1251 }
1252
1253 best_group_idx
1254}
1255
1256fn normalize_dependency_reference(
1257 raw_dependency: &str,
1258 known_group_ids: &HashSet<String>,
1259) -> Option<String> {
1260 let normalized = normalize_file_reference(raw_dependency);
1261 if normalized.is_empty() {
1262 return None;
1263 }
1264
1265 if known_group_ids.contains(&normalized) {
1266 return Some(normalized);
1267 }
1268
1269 let uppercase = normalized.to_ascii_uppercase();
1270 if known_group_ids.contains(&uppercase) {
1271 return Some(uppercase);
1272 }
1273
1274 let candidate = extract_group_id_candidate(&normalized)?;
1275 known_group_ids.contains(&candidate).then_some(candidate)
1276}
1277
1278fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1279 let label = target.label.to_ascii_lowercase();
1280 let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1281 let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1282
1283 if let Some(scope) = &group.scope {
1284 let scope = scope.as_str().to_ascii_lowercase();
1285 if label.contains(&scope) || workstream.contains(&scope) {
1286 score += 140;
1287 }
1288
1289 for segment in scope.split('/') {
1290 if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1291 score += 45;
1292 }
1293 }
1294 }
1295
1296 for token in planning_text_tokens(&group.rationale) {
1297 if label.contains(&token) || workstream.contains(&token) {
1298 score += 16;
1299 }
1300 }
1301
1302 match group.commit_type.as_str() {
1303 "ci" if target.label.starts_with(".github/") => score += 120,
1304 "docs"
1305 if target.label.starts_with("docs/")
1306 || Path::new(&target.label)
1307 .extension()
1308 .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1309 {
1310 score += 80;
1311 },
1312 "build" | "chore"
1313 if target.label.contains("Cargo")
1314 || target.label.contains("package")
1315 || target.label.contains("lock")
1316 || target.label.contains("tsconfig")
1317 || target.label.contains("biome")
1318 || target.label.contains("bun") =>
1319 {
1320 score += 55;
1321 },
1322 _ => {},
1323 }
1324
1325 score
1326}
1327
1328fn seed_group_targets(
1329 groups: &[ComposeIntentGroup],
1330 planning_index: &PlanningIndex,
1331 group_targets: &mut [Vec<String>],
1332 repair_notes: &mut Vec<String>,
1333) {
1334 let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1335
1336 for (group_idx, group) in groups.iter().enumerate() {
1337 if !group_targets[group_idx].is_empty() {
1338 continue;
1339 }
1340
1341 let fallback_target = planning_index
1342 .targets
1343 .iter()
1344 .max_by_key(|target| {
1345 let mut score = planning_target_match_score(target, group);
1346 if !claimed_target_ids.contains(&target.target_id) {
1347 score += 60;
1348 }
1349 (score, target.hunk_count, target.file_ids.len())
1350 })
1351 .or_else(|| planning_index.targets.first());
1352
1353 let Some(fallback_target) = fallback_target else {
1354 continue;
1355 };
1356
1357 group_targets[group_idx].push(fallback_target.target_id.clone());
1358 claimed_target_ids.insert(fallback_target.target_id.clone());
1359 repair_notes.push(format!(
1360 "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1361 group.group_id, fallback_target.target_id, fallback_target.label
1362 ));
1363 }
1364}
1365
1366fn normalize_intent_plan(
1367 snapshot: &ComposeSnapshot,
1368 planning_index: &PlanningIndex,
1369 mut groups: Vec<ComposeIntentGroup>,
1370) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1371 if groups.is_empty() {
1372 return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1373 }
1374
1375 let known_target_ids: HashSet<&str> = planning_index
1376 .targets
1377 .iter()
1378 .map(|target| target.target_id.as_str())
1379 .collect();
1380 let mut repair_notes = Vec::new();
1381 let mut covered_file_ids = HashSet::new();
1382 let mut normalized_group_targets = Vec::with_capacity(groups.len());
1383
1384 for group in &groups {
1385 if group.file_ids.is_empty() {
1386 repair_notes.push(format!(
1387 "Compose planner left {} without planning targets; assigning targets heuristically",
1388 group.group_id
1389 ));
1390 }
1391
1392 let mut normalized_target_ids = Vec::new();
1393 let mut seen_target_ids = HashSet::new();
1394 for raw_target_ref in &group.file_ids {
1395 let normalized_ref = normalize_file_reference(raw_target_ref);
1396 let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1397 normalized_ref.clone()
1398 } else {
1399 let uppercase_ref = normalized_ref.to_ascii_uppercase();
1400 if known_target_ids.contains(uppercase_ref.as_str()) {
1401 uppercase_ref
1402 } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1403 if raw_target_ref != target_id {
1404 repair_notes.push(format!(
1405 "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1406 ));
1407 }
1408 target_id.clone()
1409 } else {
1410 repair_notes.push(format!(
1411 "Dropped unknown planning target '{}' from {}",
1412 raw_target_ref, group.group_id
1413 ));
1414 continue;
1415 }
1416 };
1417
1418 if seen_target_ids.insert(canonical_target_id.clone()) {
1419 normalized_target_ids.push(canonical_target_id);
1420 }
1421 }
1422
1423 normalized_group_targets.push(normalized_target_ids);
1424 }
1425
1426 seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1427
1428 let known_group_ids: HashSet<String> =
1429 groups.iter().map(|group| group.group_id.clone()).collect();
1430 for group in &mut groups {
1431 let mut normalized_dependencies = Vec::new();
1432 let mut seen_dependencies = HashSet::new();
1433
1434 for raw_dependency in &group.dependencies {
1435 let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1436 else {
1437 repair_notes.push(format!(
1438 "Dropped unknown dependency '{}' from {}",
1439 raw_dependency, group.group_id
1440 ));
1441 continue;
1442 };
1443
1444 if dependency == group.group_id {
1445 repair_notes.push(format!(
1446 "Dropped self-dependency '{}' from {}",
1447 raw_dependency, group.group_id
1448 ));
1449 continue;
1450 }
1451
1452 if seen_dependencies.insert(dependency.clone()) {
1453 if raw_dependency != &dependency {
1454 repair_notes.push(format!(
1455 "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1456 ));
1457 }
1458 normalized_dependencies.push(dependency);
1459 }
1460 }
1461
1462 group.dependencies = normalized_dependencies;
1463 }
1464
1465 for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1466 let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1467 for file_id in &expanded_file_ids {
1468 covered_file_ids.insert(file_id.clone());
1469 }
1470 group.file_ids = expanded_file_ids;
1471 }
1472
1473 for file in &snapshot.files {
1474 if covered_file_ids.contains(file.file_id.as_str()) {
1475 continue;
1476 }
1477
1478 let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1479 let target_group = &mut groups[target_group_idx];
1480 target_group.file_ids.push(file.file_id.clone());
1481 covered_file_ids.insert(file.file_id.clone());
1482 repair_notes.push(format!(
1483 "Compose planner omitted {} ({}); assigned it to {}",
1484 file.file_id, file.path, target_group.group_id
1485 ));
1486 }
1487
1488 Ok((groups, repair_notes))
1489}
1490
1491fn workstream_key_for_label(label: &str) -> String {
1492 let segments: Vec<&str> = label
1493 .split('/')
1494 .filter(|segment| !segment.is_empty())
1495 .collect();
1496 let Some(first) = segments.first() else {
1497 return label.to_string();
1498 };
1499
1500 match *first {
1501 ".github" => match segments.get(1) {
1502 Some(second) => format!("{first}/{second}"),
1503 None => (*first).to_string(),
1504 },
1505 "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1506 Some(second) => format!("{first}/{second}"),
1507 None => (*first).to_string(),
1508 },
1509 _ => (*first).to_string(),
1510 }
1511}
1512
1513fn workstream_display_name(label: &str) -> String {
1514 let key = workstream_key_for_label(label);
1515 match key.as_str() {
1516 ".github/workflows" => "CI workflows".to_string(),
1517 ".github" => "GitHub automation".to_string(),
1518 _ => key
1519 .split('/')
1520 .next_back()
1521 .map(|segment| segment.replace(['_', '-'], " "))
1522 .unwrap_or(key),
1523 }
1524}
1525
1526fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1527 let mut out = String::new();
1528 let mut last_was_separator = false;
1529
1530 for ch in raw.trim().chars() {
1531 if ch.is_ascii_alphanumeric() {
1532 out.push(ch.to_ascii_lowercase());
1533 last_was_separator = false;
1534 } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1535 {
1536 out.push('-');
1537 last_was_separator = true;
1538 }
1539 }
1540
1541 let trimmed = out.trim_matches('-').to_string();
1542 (!trimmed.is_empty()).then_some(trimmed)
1543}
1544
1545fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1546 let key = workstream_key_for_label(label);
1547 let candidate = key
1548 .split('/')
1549 .next_back()
1550 .and_then(sanitize_scope_fragment)?;
1551 Scope::new(candidate).ok()
1552}
1553
1554fn fallback_rationale_for_labels(labels: &[String]) -> String {
1555 if labels.len() == 1 {
1556 let label = labels[0].as_str();
1557 let display = workstream_display_name(label);
1558 if label.starts_with("apps/") {
1559 return format!("{display} application updates");
1560 }
1561 if label.starts_with("packages/") {
1562 return format!("{display} package updates");
1563 }
1564 if label.starts_with("crates/") {
1565 return format!("{display} crate updates");
1566 }
1567 if label.starts_with(".github/") || label == ".github" {
1568 return format!("{display} updates");
1569 }
1570 return format!("{display} updates");
1571 }
1572
1573 let display_labels: Vec<String> = labels
1574 .iter()
1575 .take(3)
1576 .map(|label| workstream_display_name(label))
1577 .collect();
1578 format!("cross-cutting updates for {}", display_labels.join(", "))
1579}
1580
1581fn fallback_commit_type_for_group(
1582 snapshot: &ComposeSnapshot,
1583 labels: &[String],
1584 file_ids: &[String],
1585) -> Result<CommitType> {
1586 if labels
1587 .iter()
1588 .any(|label| label == ".github" || label.starts_with(".github/"))
1589 {
1590 return CommitType::new("ci");
1591 }
1592
1593 let files: Vec<&ComposeFile> = file_ids
1594 .iter()
1595 .filter_map(|file_id| snapshot.file_by_id(file_id))
1596 .collect();
1597 let all_docs = !files.is_empty()
1598 && files
1599 .iter()
1600 .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1601 if all_docs {
1602 return CommitType::new("docs");
1603 }
1604
1605 let all_tests = !files.is_empty()
1606 && files
1607 .iter()
1608 .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1609 if all_tests {
1610 return CommitType::new("test");
1611 }
1612
1613 let all_dependencies =
1614 !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1615 if all_dependencies {
1616 return CommitType::new("build");
1617 }
1618
1619 let all_config = !files.is_empty()
1620 && files.iter().all(|file| {
1621 matches!(
1622 compose_file_category(file),
1623 ComposeFileCategory::Config | ComposeFileCategory::Dependency
1624 )
1625 });
1626 if all_config {
1627 return CommitType::new("chore");
1628 }
1629
1630 CommitType::new("refactor")
1631}
1632
1633fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1634 snapshot
1635 .files
1636 .iter()
1637 .filter(|file| file_ids.contains(&file.file_id))
1638 .map(|file| file.file_id.clone())
1639 .collect()
1640}
1641
1642fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1643 if groups.is_empty() {
1644 return false;
1645 }
1646
1647 let largest_group = groups
1648 .iter()
1649 .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1650 .max()
1651 .unwrap_or_default();
1652
1653 groups.len() == 1
1654 || (groups.len() <= 2
1655 && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1656}
1657
1658fn should_force_large_patch_fallback(
1659 snapshot: &ComposeSnapshot,
1660 planning_index: &PlanningIndex,
1661 groups: &[ComposeIntentGroup],
1662 max_commits: usize,
1663) -> bool {
1664 if max_commits <= 1
1665 || planning_index.mode != PlanningMode::Area
1666 || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1667 || !is_monolithic_intent_plan(snapshot, groups)
1668 {
1669 return false;
1670 }
1671
1672 let workstream_count = planning_index
1673 .targets
1674 .iter()
1675 .map(|target| workstream_key_for_label(&target.label))
1676 .collect::<HashSet<_>>()
1677 .len();
1678
1679 workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1680}
1681
1682fn build_large_patch_fallback_groups(
1683 snapshot: &ComposeSnapshot,
1684 planning_index: &PlanningIndex,
1685 max_commits: usize,
1686) -> Result<Vec<ComposeIntentGroup>> {
1687 #[derive(Debug, Clone)]
1688 struct WorkstreamGroup {
1689 label: String,
1690 file_ids: HashSet<String>,
1691 weight: usize,
1692 }
1693
1694 #[derive(Debug, Clone)]
1695 struct FallbackBin {
1696 labels: Vec<String>,
1697 file_ids: HashSet<String>,
1698 total_weight: usize,
1699 }
1700
1701 let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1702 for target in &planning_index.targets {
1703 let key = workstream_key_for_label(&target.label);
1704 let entry = workstreams
1705 .entry(key.clone())
1706 .or_insert_with(|| WorkstreamGroup {
1707 label: key,
1708 file_ids: HashSet::new(),
1709 weight: 0,
1710 });
1711
1712 for file_id in &target.file_ids {
1713 entry.file_ids.insert(file_id.clone());
1714 }
1715 entry.weight = entry
1716 .weight
1717 .saturating_add(target.hunk_count.max(target.file_ids.len()));
1718 }
1719
1720 let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1721 workstreams.sort_by(|left, right| {
1722 right
1723 .weight
1724 .cmp(&left.weight)
1725 .then_with(|| left.label.cmp(&right.label))
1726 });
1727
1728 let bin_count = max_commits.min(workstreams.len());
1729 let mut bins: Vec<FallbackBin> = Vec::new();
1730 for workstream in workstreams {
1731 if bins.len() < bin_count {
1732 bins.push(FallbackBin {
1733 labels: vec![workstream.label],
1734 file_ids: workstream.file_ids,
1735 total_weight: workstream.weight,
1736 });
1737 continue;
1738 }
1739
1740 let Some((target_idx, _)) = bins
1741 .iter()
1742 .enumerate()
1743 .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1744 else {
1745 continue;
1746 };
1747
1748 let target_bin = &mut bins[target_idx];
1749 target_bin.labels.push(workstream.label);
1750 target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1751 target_bin.file_ids.extend(workstream.file_ids);
1752 }
1753
1754 let mut groups = Vec::new();
1755 for (idx, bin) in bins.into_iter().enumerate() {
1756 let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1757 let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1758 let scope = (bin.labels.len() == 1)
1759 .then(|| fallback_scope_for_label(&bin.labels[0]))
1760 .flatten();
1761 let rationale = fallback_rationale_for_labels(&bin.labels);
1762
1763 groups.push(ComposeIntentGroup {
1764 group_id: format!("G{}", idx + 1),
1765 commit_type,
1766 scope,
1767 file_ids: ordered_ids,
1768 rationale,
1769 dependencies: Vec::new(),
1770 });
1771 }
1772
1773 Ok(groups)
1774}
1775
1776#[tracing::instrument(target = "lgit", name = "compose.analyze_intent", skip_all, fields(file_count = snapshot.files.len(), observation_count = observations.len(), max_commits))]
1777async fn analyze_compose_intent(
1778 snapshot: &ComposeSnapshot,
1779 observations: &[FileObservation],
1780 config: &CommitConfig,
1781 max_commits: usize,
1782 debug_dir: Option<&Path>,
1783) -> Result<ComposeIntentPlan> {
1784 let planning_index = build_planning_index(snapshot);
1785 let stat_summary = render_planning_stat(&planning_index);
1786 let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1787 let planning_targets = render_planning_targets(&planning_index, snapshot);
1788 let planning_notes = render_planning_notes(&planning_index);
1789 let split_bias = render_split_bias(&planning_index);
1790 let schema = build_intent_schema(config);
1791 let variant = if config.markdown_output { "markdown" } else { "default" };
1792 let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1793 variant,
1794 max_commits,
1795 stat: &stat_summary,
1796 snapshot_summary: &snapshot_summary,
1797 planning_targets: &planning_targets,
1798 planning_notes: &planning_notes,
1799 split_bias: &split_bias,
1800 })?;
1801
1802 let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1803 operation: "compose/intent",
1804 model: &config.analysis_model,
1805 prompt_family: "compose-intent",
1806 prompt_variant: variant,
1807 system_prompt: &parts.system,
1808 user_prompt: &parts.user,
1809 tool_name: "create_compose_intent_plan",
1810 tool_description: "Plan logical commit groups over the provided planning target IDs",
1811 schema: &schema,
1812 progress_label: Some("compose intent planner"),
1813 debug: debug_dir.map(|dir| OneShotDebug {
1814 dir: Some(dir),
1815 prefix: None,
1816 name: "compose_intent",
1817 }),
1818 cacheable: true,
1819 })
1820 .await?;
1821
1822 let (mut groups, repair_notes) =
1823 normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1824 for note in &repair_notes {
1825 eprintln!("{}", style::warning(note));
1826 }
1827 if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1828 eprintln!(
1829 "{}",
1830 style::warning(
1831 "Compose intent collapsed into a monolithic large-patch group; falling back to \
1832 path-based workstream splits."
1833 )
1834 );
1835 groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1836 }
1837 let dependency_order =
1838 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1839
1840 Ok(ComposeIntentPlan { groups, dependency_order })
1841}
1842
1843#[tracing::instrument(target = "lgit", name = "compose.should_collect_observations", skip_all, fields(file_count = snapshot.files.len()))]
1844fn should_collect_compose_observations(
1845 snapshot: &ComposeSnapshot,
1846 config: &CommitConfig,
1847 counter: &TokenCounter,
1848) -> bool {
1849 planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1850 && should_use_map_reduce(&snapshot.diff, config, counter)
1851}
1852
1853#[tracing::instrument(target = "lgit", name = "compose.auto_assign_hunks", skip_all, fields(group_count = intent_plan.groups.len()))]
1854fn auto_assign_hunks(
1855 snapshot: &ComposeSnapshot,
1856 intent_plan: &ComposeIntentPlan,
1857) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1858 let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1859 for group in &intent_plan.groups {
1860 for file_id in &group.file_ids {
1861 groups_by_file
1862 .entry(file_id.as_str())
1863 .or_default()
1864 .push(group.group_id.as_str());
1865 }
1866 }
1867
1868 let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1869 .groups
1870 .iter()
1871 .map(|group| (group.group_id.clone(), BTreeSet::new()))
1872 .collect();
1873 let mut ambiguous = Vec::new();
1874
1875 for file in &snapshot.files {
1876 let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1877 return Err(CommitGenError::Other(format!(
1878 "No compose group claimed file {} ({})",
1879 file.file_id, file.path
1880 )));
1881 };
1882
1883 if candidate_group_ids.len() == 1 {
1884 let group_id = candidate_group_ids[0];
1885 let entry = assigned
1886 .get_mut(group_id)
1887 .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1888 for hunk_id in &file.hunk_ids {
1889 entry.insert(hunk_id.clone());
1890 }
1891 } else {
1892 ambiguous.push(AmbiguousFileBinding {
1893 file_id: file.file_id.clone(),
1894 path: file.path.clone(),
1895 candidate_group_ids: candidate_group_ids
1896 .iter()
1897 .map(|group_id| (*group_id).to_string())
1898 .collect(),
1899 hunk_ids: file.hunk_ids.clone(),
1900 });
1901 }
1902 }
1903
1904 Ok((assigned, ambiguous))
1905}
1906
1907fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1908 let mut out = String::new();
1909 for group in groups {
1910 let scope = group
1911 .scope
1912 .as_ref()
1913 .map(|scope| format!("({})", scope.as_str()))
1914 .unwrap_or_default();
1915 writeln!(
1916 out,
1917 "- {} [{}{}] {}",
1918 group.group_id,
1919 group.commit_type.as_str(),
1920 scope,
1921 group.rationale
1922 )
1923 .unwrap();
1924 }
1925
1926 out
1927}
1928
1929fn render_binding_ambiguous_files(
1930 snapshot: &ComposeSnapshot,
1931 ambiguous_files: &[AmbiguousFileBinding],
1932) -> String {
1933 let mut out = String::new();
1934 for ambiguous_file in ambiguous_files {
1935 writeln!(
1936 out,
1937 "- {} {} candidates: {}",
1938 ambiguous_file.file_id,
1939 ambiguous_file.path,
1940 ambiguous_file.candidate_group_ids.join(", ")
1941 )
1942 .unwrap();
1943
1944 for hunk_id in &ambiguous_file.hunk_ids {
1945 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1946 if hunk.synthetic {
1947 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1948 } else {
1949 writeln!(
1950 out,
1951 " - {} old:{} new:{} :: {}",
1952 hunk.hunk_id,
1953 format_line_range(hunk.old_start, hunk.old_count),
1954 format_line_range(hunk.new_start, hunk.new_count),
1955 hunk.snippet
1956 )
1957 .unwrap();
1958 }
1959 }
1960 }
1961 }
1962
1963 out
1964}
1965
1966async fn request_binding(
1967 snapshot: &ComposeSnapshot,
1968 groups: &[ComposeIntentGroup],
1969 ambiguous_files: &[AmbiguousFileBinding],
1970 config: &CommitConfig,
1971 debug_dir: Option<&Path>,
1972 debug_name: &str,
1973) -> Result<Vec<ComposeBindingAssignment>> {
1974 let schema = build_binding_schema();
1975 let groups_text = render_binding_groups(groups);
1976 let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1977 let variant = if config.markdown_output { "markdown" } else { "default" };
1978 let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1979 variant,
1980 groups: &groups_text,
1981 ambiguous_files: &ambiguous_files_text,
1982 })?;
1983 let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1984 operation: "compose/bind",
1985 model: &config.analysis_model,
1986 prompt_family: "compose-bind",
1987 prompt_variant: variant,
1988 system_prompt: &parts.system,
1989 user_prompt: &parts.user,
1990 tool_name: "bind_compose_hunks",
1991 tool_description: "Assign hunk IDs to existing compose groups",
1992 schema: &schema,
1993 progress_label: Some("compose hunk binder"),
1994 debug: debug_dir.map(|dir| OneShotDebug {
1995 dir: Some(dir),
1996 prefix: None,
1997 name: debug_name,
1998 }),
1999 cacheable: true,
2000 })
2001 .await?;
2002
2003 Ok(response.output.assignments)
2004}
2005
2006fn ambiguous_hunk_context(
2007 ambiguous_files: &[AmbiguousFileBinding],
2008) -> HashMap<String, AmbiguousHunkContext> {
2009 let mut context = HashMap::new();
2010 for ambiguous_file in ambiguous_files {
2011 for hunk_id in &ambiguous_file.hunk_ids {
2012 context.insert(hunk_id.clone(), AmbiguousHunkContext {
2013 candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
2014 });
2015 }
2016 }
2017 context
2018}
2019
2020fn evaluate_binding(
2021 assignments: &[ComposeBindingAssignment],
2022 hunk_context: &HashMap<String, AmbiguousHunkContext>,
2023 valid_group_ids: &HashSet<&str>,
2024 snapshot: &ComposeSnapshot,
2025) -> BindingEvaluation {
2026 let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
2027
2028 for assignment in assignments {
2029 if !valid_group_ids.contains(assignment.group_id.as_str()) {
2030 continue;
2031 }
2032
2033 let mut seen_in_group = HashSet::new();
2034 for hunk_id in &assignment.hunk_ids {
2035 if !seen_in_group.insert(hunk_id.as_str()) {
2036 continue;
2037 }
2038
2039 let Some(context) = hunk_context.get(hunk_id) else {
2040 continue;
2041 };
2042
2043 if !context
2044 .candidate_group_ids
2045 .iter()
2046 .any(|candidate| candidate == &assignment.group_id)
2047 {
2048 continue;
2049 }
2050
2051 match assigned_hunk_to_group.get(hunk_id) {
2052 None => {
2053 assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
2054 },
2055 Some(existing_group) if existing_group == &assignment.group_id => {},
2056 Some(_) => {
2057 assigned_hunk_to_group.remove(hunk_id);
2058 },
2059 }
2060 }
2061 }
2062
2063 let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2064 for (hunk_id, group_id) in assigned_hunk_to_group {
2065 assigned_by_group.entry(group_id).or_default().push(hunk_id);
2066 }
2067
2068 for hunk_ids in assigned_by_group.values_mut() {
2069 let ordered: Vec<String> = snapshot
2070 .hunks
2071 .iter()
2072 .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2073 .map(|hunk| hunk.hunk_id.clone())
2074 .collect();
2075 *hunk_ids = ordered;
2076 }
2077
2078 let unresolved = snapshot
2079 .hunks
2080 .iter()
2081 .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2082 .filter(|hunk| {
2083 !assigned_by_group.values().any(|assigned_hunks| {
2084 assigned_hunks
2085 .iter()
2086 .any(|assigned| assigned == &hunk.hunk_id)
2087 })
2088 })
2089 .map(|hunk| hunk.hunk_id.clone())
2090 .collect();
2091
2092 BindingEvaluation { assigned: assigned_by_group, unresolved }
2093}
2094
2095fn filter_ambiguous_files(
2096 ambiguous_files: &[AmbiguousFileBinding],
2097 hunk_ids: &[String],
2098) -> Vec<AmbiguousFileBinding> {
2099 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2100
2101 ambiguous_files
2102 .iter()
2103 .filter_map(|file| {
2104 let matching_hunks: Vec<String> = file
2105 .hunk_ids
2106 .iter()
2107 .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2108 .cloned()
2109 .collect();
2110
2111 (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2112 file_id: file.file_id.clone(),
2113 path: file.path.clone(),
2114 candidate_group_ids: file.candidate_group_ids.clone(),
2115 hunk_ids: matching_hunks,
2116 })
2117 })
2118 .collect()
2119}
2120
2121fn chunk_ambiguous_files(
2122 ambiguous_files: &[AmbiguousFileBinding],
2123) -> Vec<Vec<AmbiguousFileBinding>> {
2124 if ambiguous_files.is_empty() {
2125 return Vec::new();
2126 }
2127
2128 let mut batches = Vec::new();
2129 let mut current_batch = Vec::new();
2130 let mut current_hunk_count = 0_usize;
2131
2132 for file in ambiguous_files {
2133 let file_hunk_count = file.hunk_ids.len();
2134 let should_split = !current_batch.is_empty()
2135 && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2136 || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2137
2138 if should_split {
2139 batches.push(current_batch);
2140 current_batch = Vec::new();
2141 current_hunk_count = 0;
2142 }
2143
2144 current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2145 current_batch.push(file.clone());
2146 }
2147
2148 if !current_batch.is_empty() {
2149 batches.push(current_batch);
2150 }
2151
2152 batches
2153}
2154
2155fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2156 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2157
2158 snapshot
2159 .hunks
2160 .iter()
2161 .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2162 .map(|hunk| hunk.hunk_id.clone())
2163 .collect()
2164}
2165
2166fn fallback_group_for_hunk(
2167 hunk_id: &str,
2168 ambiguous_files: &[AmbiguousFileBinding],
2169 group_rank: &HashMap<&str, usize>,
2170) -> Option<String> {
2171 ambiguous_files.iter().find_map(|file| {
2172 file
2173 .hunk_ids
2174 .iter()
2175 .any(|candidate| candidate == hunk_id)
2176 .then(|| {
2177 file
2178 .candidate_group_ids
2179 .iter()
2180 .min_by_key(|group_id| {
2181 group_rank
2182 .get(group_id.as_str())
2183 .copied()
2184 .unwrap_or(usize::MAX)
2185 })
2186 .cloned()
2187 })
2188 })?
2189}
2190
2191fn assign_unresolved_hunks(
2192 unresolved_hunks: &[String],
2193 assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2194 ambiguous_files: &[AmbiguousFileBinding],
2195 group_rank: &HashMap<&str, usize>,
2196) {
2197 for hunk_id in unresolved_hunks {
2198 if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2199 && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2200 {
2201 group_hunks.insert(hunk_id.clone());
2202 }
2203 }
2204}
2205
2206fn normalize_group_type(
2207 snapshot: &ComposeSnapshot,
2208 file_ids: &[String],
2209 original_type: &CommitType,
2210) -> Result<CommitType> {
2211 let dependency_only = !file_ids.is_empty()
2212 && file_ids.iter().all(|file_id| {
2213 snapshot
2214 .file_by_id(file_id)
2215 .is_some_and(|file| is_dependency_manifest(&file.path))
2216 });
2217
2218 if dependency_only && original_type.as_str() != "build" {
2219 CommitType::new("build")
2220 } else {
2221 Ok(original_type.clone())
2222 }
2223}
2224
2225fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2226 snapshot
2227 .files
2228 .iter()
2229 .filter(|file| {
2230 hunk_ids
2231 .iter()
2232 .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2233 })
2234 .map(|file| file.file_id.clone())
2235 .collect()
2236}
2237
2238fn build_redirects(
2239 intent_plan: &ComposeIntentPlan,
2240 executable_groups: &[ComposeExecutableGroup],
2241 group_rank: &HashMap<&str, usize>,
2242) -> HashMap<String, String> {
2243 let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2244 .iter()
2245 .filter(|group| !group.hunk_ids.is_empty())
2246 .map(|group| (group.group_id.as_str(), group))
2247 .collect();
2248
2249 let mut redirects = HashMap::new();
2250 for group in &intent_plan.groups {
2251 if surviving_groups.contains_key(group.group_id.as_str()) {
2252 continue;
2253 }
2254
2255 let redirect = executable_groups
2256 .iter()
2257 .filter(|candidate| candidate.group_id != group.group_id)
2258 .filter(|candidate| {
2259 candidate.file_ids.iter().any(|file_id| {
2260 group
2261 .file_ids
2262 .iter()
2263 .any(|candidate_id| candidate_id == file_id)
2264 })
2265 })
2266 .min_by_key(|candidate| {
2267 group_rank
2268 .get(candidate.group_id.as_str())
2269 .copied()
2270 .unwrap_or(usize::MAX)
2271 })
2272 .map(|candidate| candidate.group_id.clone());
2273
2274 if let Some(redirect) = redirect {
2275 redirects.insert(group.group_id.clone(), redirect);
2276 }
2277 }
2278
2279 redirects
2280}
2281
2282fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2283 let mut current = group_id.to_string();
2284 let mut seen = HashSet::new();
2285
2286 while let Some(next) = redirects.get(¤t) {
2287 if !seen.insert(current.clone()) {
2288 break;
2289 }
2290 current.clone_from(next);
2291 }
2292
2293 current
2294}
2295
2296fn prune_empty_groups(
2297 groups: Vec<ComposeExecutableGroup>,
2298 redirects: &HashMap<String, String>,
2299) -> Result<ComposeExecutablePlan> {
2300 let surviving_ids: HashSet<String> = groups
2301 .iter()
2302 .filter(|group| !group.hunk_ids.is_empty())
2303 .map(|group| group.group_id.clone())
2304 .collect();
2305
2306 let mut surviving_groups = Vec::new();
2307 for mut group in groups {
2308 if group.hunk_ids.is_empty() {
2309 continue;
2310 }
2311
2312 let mut rewritten_dependencies = Vec::new();
2313 for dependency in &group.dependencies {
2314 let rewritten = resolve_redirect(dependency, redirects);
2315 if rewritten != group.group_id
2316 && surviving_ids.contains(&rewritten)
2317 && !rewritten_dependencies
2318 .iter()
2319 .any(|existing| existing == &rewritten)
2320 {
2321 rewritten_dependencies.push(rewritten);
2322 }
2323 }
2324
2325 group.dependencies = rewritten_dependencies;
2326 surviving_groups.push(group);
2327 }
2328
2329 let dependency_order = compute_dependency_order(
2330 &surviving_groups,
2331 |group| &group.group_id,
2332 |group| &group.dependencies,
2333 )?;
2334 Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2335}
2336
2337fn finalize_executable_plan(
2338 snapshot: &ComposeSnapshot,
2339 intent_plan: &ComposeIntentPlan,
2340 assigned_by_group: HashMap<String, BTreeSet<String>>,
2341) -> Result<ComposeExecutablePlan> {
2342 let group_rank: HashMap<&str, usize> = intent_plan
2343 .dependency_order
2344 .iter()
2345 .enumerate()
2346 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2347 .collect();
2348
2349 let mut executable_groups = Vec::new();
2350 for group in &intent_plan.groups {
2351 let hunk_ids: Vec<String> = snapshot
2352 .hunks
2353 .iter()
2354 .filter(|hunk| {
2355 assigned_by_group
2356 .get(&group.group_id)
2357 .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2358 })
2359 .map(|hunk| hunk.hunk_id.clone())
2360 .collect();
2361
2362 let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2363 let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2364 executable_groups.push(ComposeExecutableGroup {
2365 group_id: group.group_id.clone(),
2366 commit_type,
2367 scope: group.scope.clone(),
2368 file_ids,
2369 rationale: group.rationale.clone(),
2370 dependencies: group.dependencies.clone(),
2371 hunk_ids,
2372 });
2373 }
2374
2375 let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2376 prune_empty_groups(executable_groups, &redirects)
2377}
2378
2379fn validate_executable_plan(
2380 snapshot: &ComposeSnapshot,
2381 plan: &ComposeExecutablePlan,
2382) -> Result<()> {
2383 if plan.groups.is_empty() {
2384 return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2385 }
2386
2387 let known_hunks: HashSet<&str> = snapshot
2388 .hunks
2389 .iter()
2390 .map(|hunk| hunk.hunk_id.as_str())
2391 .collect();
2392 let known_files: HashSet<&str> = snapshot
2393 .files
2394 .iter()
2395 .map(|file| file.file_id.as_str())
2396 .collect();
2397 let mut coverage = HashMap::<String, String>::new();
2398
2399 for group in &plan.groups {
2400 if group.hunk_ids.is_empty() {
2401 return Err(CommitGenError::Other(format!(
2402 "Compose group {} ended up empty after binding",
2403 group.group_id
2404 )));
2405 }
2406
2407 for file_id in &group.file_ids {
2408 if !known_files.contains(file_id.as_str()) {
2409 return Err(CommitGenError::Other(format!(
2410 "Compose group {} references unknown file_id {}",
2411 group.group_id, file_id
2412 )));
2413 }
2414 }
2415
2416 for hunk_id in &group.hunk_ids {
2417 if !known_hunks.contains(hunk_id.as_str()) {
2418 return Err(CommitGenError::Other(format!(
2419 "Compose group {} references unknown hunk_id {}",
2420 group.group_id, hunk_id
2421 )));
2422 }
2423
2424 if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2425 return Err(CommitGenError::Other(format!(
2426 "Hunk {} was assigned to both {} and {}",
2427 hunk_id, existing_group, group.group_id
2428 )));
2429 }
2430 }
2431 }
2432
2433 let missing_hunks: Vec<String> = snapshot
2434 .hunks
2435 .iter()
2436 .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2437 .map(|hunk| hunk.hunk_id.clone())
2438 .collect();
2439 if !missing_hunks.is_empty() {
2440 return Err(CommitGenError::Other(format!(
2441 "Compose plan left hunks unassigned: {}",
2442 missing_hunks.join(", ")
2443 )));
2444 }
2445
2446 let dependency_order =
2447 compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2448 if dependency_order != plan.dependency_order {
2449 return Err(CommitGenError::Other(
2450 "Compose dependency order does not match recomputed order".to_string(),
2451 ));
2452 }
2453
2454 Ok(())
2455}
2456
2457#[tracing::instrument(target = "lgit", name = "compose.bind_plan", skip_all, fields(file_count = snapshot.files.len(), group_count = intent_plan.groups.len()))]
2458async fn bind_compose_plan(
2459 snapshot: &ComposeSnapshot,
2460 intent_plan: &ComposeIntentPlan,
2461 config: &CommitConfig,
2462 debug_dir: Option<&Path>,
2463) -> Result<ComposeExecutablePlan> {
2464 let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2465
2466 if !ambiguous_files.is_empty() {
2467 let valid_group_ids: HashSet<&str> = intent_plan
2468 .groups
2469 .iter()
2470 .map(|group| group.group_id.as_str())
2471 .collect();
2472 let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2473 let mut unresolved = Vec::new();
2474
2475 for (batch_idx, batch) in binding_batches.iter().enumerate() {
2476 let hunk_context = ambiguous_hunk_context(batch);
2477 let debug_name = if binding_batches.len() == 1 {
2478 "compose_bind".to_string()
2479 } else {
2480 format!("compose_bind_{:02}", batch_idx + 1)
2481 };
2482 let assignments =
2483 request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2484 .await?;
2485 let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2486 for (group_id, hunk_ids) in evaluation.assigned {
2487 let entry = assigned_by_group.entry(group_id).or_default();
2488 for hunk_id in hunk_ids {
2489 entry.insert(hunk_id);
2490 }
2491 }
2492 unresolved.extend(evaluation.unresolved);
2493 }
2494
2495 let group_rank: HashMap<&str, usize> = intent_plan
2496 .dependency_order
2497 .iter()
2498 .enumerate()
2499 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2500 .collect();
2501
2502 let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2503 if !unresolved.is_empty() {
2504 let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2505 let repair_batches = chunk_ambiguous_files(&unresolved_files);
2506 let mut repair_unresolved = Vec::new();
2507
2508 for (batch_idx, batch) in repair_batches.iter().enumerate() {
2509 let debug_name = if repair_batches.len() == 1 {
2510 "compose_bind_repair".to_string()
2511 } else {
2512 format!("compose_bind_repair_{:02}", batch_idx + 1)
2513 };
2514 let repair_assignments = request_binding(
2515 snapshot,
2516 &intent_plan.groups,
2517 batch,
2518 config,
2519 debug_dir,
2520 &debug_name,
2521 )
2522 .await?;
2523 let repair_context = ambiguous_hunk_context(batch);
2524 let repair =
2525 evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2526 for (group_id, hunk_ids) in repair.assigned {
2527 let entry = assigned_by_group.entry(group_id).or_default();
2528 for hunk_id in hunk_ids {
2529 entry.insert(hunk_id);
2530 }
2531 }
2532
2533 repair_unresolved.extend(repair.unresolved);
2534 }
2535 unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2536
2537 if !unresolved.is_empty() {
2538 assign_unresolved_hunks(
2539 &unresolved,
2540 &mut assigned_by_group,
2541 &ambiguous_files,
2542 &group_rank,
2543 );
2544 }
2545 }
2546 }
2547
2548 let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2549 validate_executable_plan(snapshot, &plan)?;
2550 Ok(plan)
2551}
2552
2553fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2554 println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2555 for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2556 let group = &plan.groups[group_idx];
2557 let scope = group
2558 .scope
2559 .as_ref()
2560 .map(|scope| format!("({})", style::scope(scope.as_str())))
2561 .unwrap_or_default();
2562
2563 println!(
2564 "\n{}. {} [{}{}] {}",
2565 display_idx + 1,
2566 style::bold(&group.group_id),
2567 style::commit_type(group.commit_type.as_str()),
2568 scope,
2569 group.rationale
2570 );
2571
2572 println!(" Files:");
2573 for file_id in &group.file_ids {
2574 if let Some(file) = snapshot.file_by_id(file_id) {
2575 let selected_hunk_ids: Vec<&str> = group
2576 .hunk_ids
2577 .iter()
2578 .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2579 .map(String::as_str)
2580 .collect();
2581 let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2582 "all hunks".to_string()
2583 } else {
2584 selected_hunk_ids.join(", ")
2585 };
2586 println!(" - {} {} ({selection})", file.file_id, file.path);
2587 }
2588 }
2589
2590 if !group.dependencies.is_empty() {
2591 println!(" Depends on: {}", group.dependencies.join(", "));
2592 }
2593 }
2594}
2595
2596#[tracing::instrument(target = "lgit", name = "compose.generate_group_analysis", skip_all, fields(group_id = %group.group_id, diff_bytes = diff.len(), stat_bytes = stat.len()))]
2597async fn generate_compose_group_analysis(
2598 stat: &str,
2599 diff: &str,
2600 group: &ComposeExecutableGroup,
2601 config: &CommitConfig,
2602 args: &Args,
2603 debug_prefix: &str,
2604 counter: &TokenCounter,
2605) -> Result<ConventionalAnalysis> {
2606 match compose_analysis_strategy(diff, config, counter) {
2607 ComposeAnalysisStrategy::MapReduce => {
2608 println!(
2609 " {}",
2610 style::info(&format!(
2611 "Using map-reduce for {} commit analysis (diff exceeds token budget)",
2612 group.group_id
2613 ))
2614 );
2615 run_map_reduce(diff, stat, "", &config.analysis_model, config, counter).await
2616 },
2617 strategy => {
2618 let analysis_diff = if strategy == ComposeAnalysisStrategy::SmartTruncate {
2619 eprintln!(
2620 " {}",
2621 style::warning(&format!(
2622 "Truncating diff for {} commit analysis (diff exceeds configured budget)",
2623 group.group_id
2624 ))
2625 );
2626 Cow::Owned(smart_truncate_diff(
2627 diff,
2628 compose_truncation_length(config),
2629 config,
2630 counter,
2631 ))
2632 } else {
2633 Cow::Borrowed(diff)
2634 };
2635
2636 let ctx = AnalysisContext {
2637 user_context: Some(&group.rationale),
2638 recent_commits: None,
2639 common_scopes: None,
2640 project_context: None,
2641 debug_output: args.debug_output.as_deref(),
2642 debug_prefix: Some(debug_prefix),
2643 };
2644
2645 generate_conventional_analysis(
2646 stat,
2647 analysis_diff.as_ref(),
2648 &config.analysis_model,
2649 "",
2650 &ctx,
2651 config,
2652 )
2653 .await
2654 },
2655 }
2656}
2657
2658fn compose_group_file_list(snapshot: &ComposeSnapshot, group: &ComposeExecutableGroup) -> String {
2659 let files: Vec<&str> = group
2660 .file_ids
2661 .iter()
2662 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.as_str()))
2663 .collect();
2664
2665 if files.is_empty() {
2666 "no files resolved".to_string()
2667 } else {
2668 files.join(", ")
2669 }
2670}
2671
2672fn cumulative_file_hunk_ids(
2676 plan: &ComposeExecutablePlan,
2677 position: usize,
2678 snapshot: &ComposeSnapshot,
2679 file_id: &str,
2680) -> Vec<String> {
2681 let mut hunk_ids = Vec::new();
2682 for &group_idx in plan.dependency_order.iter().take(position + 1) {
2683 let Some(group) = plan.groups.get(group_idx) else {
2684 continue;
2685 };
2686 for hunk_id in &group.hunk_ids {
2687 if snapshot
2688 .hunk_by_id(hunk_id)
2689 .is_some_and(|hunk| hunk.file_id == file_id)
2690 {
2691 hunk_ids.push(hunk_id.clone());
2692 }
2693 }
2694 }
2695 hunk_ids
2696}
2697
2698#[tracing::instrument(target = "lgit", name = "compose.execute", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2699pub async fn execute_compose(
2700 snapshot: &ComposeSnapshot,
2701 plan: &ComposeExecutablePlan,
2702 config: &CommitConfig,
2703 args: &Args,
2704 base_state: &ComposeBaseState,
2705) -> Result<Vec<String>> {
2706 let total = plan.dependency_order.len();
2707
2708 let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2712 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2713 let group = &plan.groups[group_idx];
2714 println!(
2715 " {}",
2716 style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total))
2717 );
2718 let group_patch = create_executable_group_patch(snapshot, group)?;
2719 group_diff_stats.push((group_patch.diff, group_patch.stat));
2720 }
2721
2722 println!(
2726 "{}",
2727 style::info(&format!(
2728 "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2729 COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2730 ))
2731 );
2732
2733 let token_counter = create_token_counter(config);
2734 let prepared_messages: Vec<(Vec<String>, CommitSummary)> =
2735 stream::iter(plan.dependency_order.iter().enumerate())
2736 .map(|(idx, &group_idx)| {
2737 let group = &plan.groups[group_idx];
2738 let (diff, stat) = &group_diff_stats[idx];
2739 let debug_prefix = format!("compose-{}", idx + 1);
2740 let token_counter = &token_counter;
2741 async move {
2742 let result = async {
2743 let analysis = generate_compose_group_analysis(
2744 stat,
2745 diff,
2746 group,
2747 config,
2748 args,
2749 &debug_prefix,
2750 token_counter,
2751 )
2752 .await?;
2753 let body = analysis.body_texts();
2754 let summary = generate_summary_from_analysis(
2755 stat,
2756 group.commit_type.as_str(),
2757 group.scope.as_ref().map(|scope| scope.as_str()),
2758 &body,
2759 Some(&group.rationale),
2760 config,
2761 args.debug_output.as_deref(),
2762 Some(&debug_prefix),
2763 )
2764 .await?;
2765 Ok::<_, CommitGenError>((body, summary))
2766 }
2767 .await;
2768
2769 result.map_err(|source| CommitGenError::ComposeMessageError {
2770 group_id: group.group_id.clone(),
2771 files: compose_group_file_list(snapshot, group),
2772 source: Box::new(source),
2773 })
2774 }
2775 })
2776 .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2777 .collect::<Vec<_>>()
2778 .await
2779 .into_iter()
2780 .collect::<Result<Vec<_>>>()?;
2781
2782 execute_compose_with_prepared_messages(
2783 snapshot,
2784 plan,
2785 config,
2786 args,
2787 base_state,
2788 prepared_messages,
2789 )
2790}
2791
2792#[tracing::instrument(target = "lgit", name = "compose.execute_prepared_messages", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2793fn execute_compose_with_prepared_messages(
2794 snapshot: &ComposeSnapshot,
2795 plan: &ComposeExecutablePlan,
2796 config: &CommitConfig,
2797 args: &Args,
2798 base_state: &ComposeBaseState,
2799 prepared_messages: Vec<(Vec<String>, CommitSummary)>,
2800) -> Result<Vec<String>> {
2801 let dir = &args.dir;
2802 let total = plan.dependency_order.len();
2803 if args.compose_preview {
2804 return Ok(Vec::new());
2805 }
2806
2807 let index = TempGitIndex::new(dir)?;
2808 read_tree_into_index(index.path(), &base_state.head_hash, dir)?;
2809
2810 let mut commit_hashes = Vec::new();
2811 let mut parent_hash = base_state.head_hash.clone();
2812
2813 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2817 let group = &plan.groups[group_idx];
2818
2819 println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2820 println!(" Type: {}", style::commit_type(group.commit_type.as_str()));
2821 if let Some(scope) = &group.scope {
2822 println!(" Scope: {}", style::scope(scope.as_str()));
2823 }
2824 let paths: Vec<String> = group
2825 .file_ids
2826 .iter()
2827 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2828 .collect();
2829 println!(" Files: {}", paths.join(", "));
2830
2831 let outcome = stage_executable_group_in_index(snapshot, group, dir, index.path())?;
2832 let mut staged_anything = outcome.result == StageResult::Staged;
2833
2834 for skipped in &outcome.skipped {
2838 let Some(file) = snapshot.file_by_path(&skipped.path) else {
2839 continue;
2840 };
2841 let cumulative = cumulative_file_hunk_ids(plan, idx, snapshot, &file.file_id);
2842 force_stage_file_from_base_in_index(
2843 snapshot,
2844 &file.file_id,
2845 &cumulative,
2846 dir,
2847 index.path(),
2848 )?;
2849 staged_anything = true;
2850 eprintln!(
2851 " {}",
2852 style::info(&format!(
2853 "Re-staged {} from base via splice (whole-file apply not used for partial hunks)",
2854 skipped.path
2855 ))
2856 );
2857 }
2858
2859 if !staged_anything {
2860 eprintln!(
2861 " {}",
2862 style::warning(&format!(
2863 "Skipping commit {}: its planned patch is already applied ({:?})",
2864 group.group_id, outcome.result
2865 ))
2866 );
2867 continue;
2868 }
2869
2870 let (analysis_body, summary) = prepared_messages[idx].clone();
2871 let mut commit = ConventionalCommit {
2872 commit_type: group.commit_type.clone(),
2873 scope: group.scope.clone(),
2874 summary,
2875 body: analysis_body,
2876 footers: vec![],
2877 };
2878 post_process_commit_message(&mut commit, config);
2879
2880 if let Err(err) = validate_commit_message(&commit, config) {
2881 eprintln!(
2882 " {}",
2883 style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2884 );
2885 }
2886
2887 let mut formatted_message = format_commit_message(&commit);
2888 if args.signoff || config.signoff {
2889 formatted_message = append_signoff_trailer(&formatted_message, dir)?;
2890 }
2891 println!(
2892 " Message:\n{}",
2893 formatted_message
2894 .lines()
2895 .take(3)
2896 .collect::<Vec<_>>()
2897 .join("\n")
2898 );
2899
2900 let tree = write_index_tree(index.path(), dir)?;
2901 let sign = args.sign || config.gpg_sign;
2902 let hash = commit_tree(&tree, &[parent_hash.as_str()], &formatted_message, dir, sign)?;
2903 parent_hash.clone_from(&hash);
2904 commit_hashes.push(hash);
2905
2906 if args.compose_test_after_each {
2907 return Err(CommitGenError::Other(
2908 "--compose-test-after-each is incompatible with isolated compose execution".to_string(),
2909 ));
2910 }
2911 }
2912
2913 if commit_hashes.is_empty() {
2914 return Ok(commit_hashes);
2915 }
2916
2917 update_ref_checked(&base_state.head_ref, &parent_hash, &base_state.head_hash, dir)?;
2918
2919 let current_index_tree = write_real_index_tree(dir)?;
2920 if current_index_tree == base_state.index_tree {
2921 reset_mixed_to(&parent_hash, dir)?;
2922 } else {
2923 println!(
2927 "{}",
2928 style::warning("Index changed during compose; preserving newly staged changes")
2929 );
2930 let paths: Vec<String> = snapshot.files.iter().map(|file| file.path.clone()).collect();
2931 reset_paths_to(&parent_hash, &paths, dir)?;
2932 }
2933
2934 Ok(commit_hashes)
2935}
2936
2937#[tracing::instrument(target = "lgit", name = "compose.run", skip_all, fields(dir = %args.dir, max_rounds = config.compose_max_rounds))]
2938pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2939 let max_rounds = config.compose_max_rounds;
2940
2941 for round in 1..=max_rounds {
2942 if round > 1 {
2943 println!(
2944 "\n{}",
2945 style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2946 );
2947 } else {
2948 println!("{}", style::section_header("Compose Mode", 80));
2949 }
2950 println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2951
2952 run_compose_round(args, config, round).await?;
2953
2954 if args.compose_preview {
2955 break;
2956 }
2957
2958 match get_compose_diff(&args.dir) {
2959 Err(CommitGenError::NoChanges { .. }) => {
2960 println!(
2961 "\n{}",
2962 style::success(&format!(
2963 "{} All changes committed successfully",
2964 style::icons::SUCCESS
2965 ))
2966 );
2967 break;
2968 },
2969 Err(err) => return Err(err),
2970 Ok(remaining_diff) => {
2971 eprintln!(
2972 "\n{}",
2973 style::warning(&format!(
2974 "{} Uncommitted changes remain after round {round}",
2975 style::icons::WARNING
2976 ))
2977 );
2978 eprintln!("{remaining_diff}");
2979 },
2980 }
2981
2982 if round < max_rounds {
2983 eprintln!("{}", style::info("Starting another compose round..."));
2984 } else {
2985 eprintln!(
2986 "{}",
2987 style::warning(&format!(
2988 "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
2989 ))
2990 );
2991 }
2992 }
2993
2994 Ok(())
2995}
2996
2997#[tracing::instrument(target = "lgit", name = "compose.round", skip_all, fields(dir = %args.dir, round))]
2998async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
2999 let base_state = capture_compose_base_state(&args.dir)?;
3000 let diff = get_compose_diff(&args.dir)?;
3001 let stat = get_compose_stat(&args.dir)?;
3002 let mut snapshot = build_compose_snapshot(&diff, &stat)?;
3003 pin_snapshot_worktree_state(&mut snapshot, &args.dir)?;
3007 let snapshot = snapshot;
3008
3009 if let Some(debug_dir) = args.debug_output.as_deref() {
3010 save_debug_artifact(
3011 Some(debug_dir),
3012 &format!("compose_round_{round}_snapshot.json"),
3013 &snapshot,
3014 )?;
3015 }
3016
3017 let token_counter = create_token_counter(config);
3018 let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
3019 println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
3020 observe_diff_files(&snapshot.diff, &config.summary_model, config, &token_counter).await?
3021 } else {
3022 if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
3023 && should_use_map_reduce(&snapshot.diff, config, &token_counter)
3024 {
3025 println!(
3026 "{}",
3027 style::info(
3028 "Skipping per-file observations for very large compose snapshot; using area-level \
3029 planning instead."
3030 )
3031 );
3032 }
3033 Vec::new()
3034 };
3035
3036 if let Some(debug_dir) = args.debug_output.as_deref()
3037 && !observations.is_empty()
3038 {
3039 save_debug_artifact(
3040 Some(debug_dir),
3041 &format!("compose_round_{round}_observations.json"),
3042 &observations,
3043 )?;
3044 }
3045
3046 let max_commits = args.compose_max_commits.unwrap_or(20);
3047 let executable_plan = if let Some(cached_plan) =
3048 load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
3049 {
3050 println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
3051 cached_plan
3052 } else {
3053 println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
3054 let intent_plan = analyze_compose_intent(
3055 &snapshot,
3056 &observations,
3057 config,
3058 max_commits,
3059 args.debug_output.as_deref(),
3060 )
3061 .await?;
3062
3063 if let Some(debug_dir) = args.debug_output.as_deref() {
3064 save_debug_artifact(
3065 Some(debug_dir),
3066 &format!("compose_round_{round}_intent_plan.json"),
3067 &intent_plan,
3068 )?;
3069 }
3070
3071 println!("{}", style::info("Binding hunks to groups..."));
3072 let plan =
3073 bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
3074 save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
3075 plan
3076 };
3077
3078 if let Some(debug_dir) = args.debug_output.as_deref() {
3079 save_debug_artifact(
3080 Some(debug_dir),
3081 &format!("compose_round_{round}_executable_plan.json"),
3082 &executable_plan,
3083 )?;
3084 }
3085
3086 print_executable_plan(&snapshot, &executable_plan);
3087
3088 if args.compose_preview {
3089 println!(
3090 "\n{}",
3091 style::success(&format!(
3092 "{} Preview complete (use --compose without --compose-preview to execute)",
3093 style::icons::SUCCESS
3094 ))
3095 );
3096 return Ok(());
3097 }
3098
3099 println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
3100 let hashes = execute_compose(&snapshot, &executable_plan, config, args, &base_state).await?;
3101 println!(
3102 "{}",
3103 style::success(&format!(
3104 "{} Round {round}: Created {} commit(s)",
3105 style::icons::SUCCESS,
3106 hashes.len()
3107 ))
3108 );
3109 Ok(())
3110}
3111
3112#[cfg(test)]
3113mod tests {
3114 use std::{fmt::Write, fs};
3115
3116 use tempfile::TempDir;
3117
3118 use super::*;
3119 use crate::{config::CommitConfig, patch::build_compose_snapshot, types::CommitType};
3120
3121 fn shared_file_diff() -> (&'static str, &'static str) {
3122 (
3123 r#"diff --git a/src/lib.rs b/src/lib.rs
3124index 1111111..2222222 100644
3125--- a/src/lib.rs
3126+++ b/src/lib.rs
3127@@ -1,3 +1,3 @@
3128-fn alpha() {
3129+fn alpha_changed() {
3130 println!("alpha");
3131 }
3132@@ -12,3 +12,3 @@
3133-fn beta() {
3134+fn beta_changed() {
3135 println!("beta");
3136 }
3137diff --git a/tests/lib.rs b/tests/lib.rs
3138index 3333333..4444444 100644
3139--- a/tests/lib.rs
3140+++ b/tests/lib.rs
3141@@ -1,3 +1,4 @@
3142 fn test_it() {
3143+ assert!(true);
3144 }
3145"#,
3146 " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
3147 )
3148 }
3149
3150 fn build_test_snapshot() -> ComposeSnapshot {
3151 let (diff, stat) = shared_file_diff();
3152 build_compose_snapshot(diff, stat).unwrap()
3153 }
3154
3155 fn write_file(dir: &TempDir, path: &str, contents: &str) {
3156 let full_path = dir.path().join(path);
3157 if let Some(parent) = full_path.parent() {
3158 fs::create_dir_all(parent).unwrap();
3159 }
3160 fs::write(full_path, contents).unwrap();
3161 }
3162
3163 fn run_git(dir: &TempDir, args: &[&str]) -> String {
3164 let output = crate::git::git_command()
3165 .args(args)
3166 .current_dir(dir.path())
3167 .output()
3168 .unwrap_or_else(|err| panic!("git {args:?} failed to spawn: {err}"));
3169
3170 assert!(
3171 output.status.success(),
3172 "git {:?} failed: stdout={} stderr={}",
3173 args,
3174 String::from_utf8_lossy(&output.stdout),
3175 String::from_utf8_lossy(&output.stderr)
3176 );
3177
3178 String::from_utf8_lossy(&output.stdout).to_string()
3179 }
3180
3181 fn init_repo() -> TempDir {
3182 let dir = TempDir::new().unwrap();
3183 run_git(&dir, &["init"]);
3184 run_git(&dir, &["config", "user.name", "Compose Test"]);
3185 run_git(&dir, &["config", "user.email", "compose@test.local"]);
3186 run_git(&dir, &["config", "commit.gpgsign", "false"]);
3187 dir
3188 }
3189
3190 fn commit_all(dir: &TempDir, message: &str) {
3191 run_git(dir, &["add", "."]);
3192 run_git(dir, &["commit", "-m", message]);
3193 }
3194
3195 fn canned_message(summary: &str) -> (Vec<String>, CommitSummary) {
3196 (vec![], CommitSummary::new_unchecked(summary, 128).unwrap())
3197 }
3198
3199 #[test]
3200 fn test_compose_file_category_treats_prompts_as_functional_source() {
3201 let diff = r"diff --git a/prompts/analysis/default.md b/prompts/analysis/default.md
3202index 1111111..2222222 100644
3203--- a/prompts/analysis/default.md
3204+++ b/prompts/analysis/default.md
3205@@ -1,1 +1,1 @@
3206-old prompt
3207+new prompt
3208diff --git a/system/analysis/default.md b/system/analysis/default.md
3209index 5555555..6666666 100644
3210--- a/system/analysis/default.md
3211+++ b/system/analysis/default.md
3212@@ -1,1 +1,1 @@
3213-old system
3214+new system
3215diff --git a/README.md b/README.md
3216index 3333333..4444444 100644
3217--- a/README.md
3218+++ b/README.md
3219@@ -1,1 +1,1 @@
3220-old docs
3221+new docs
3222";
3223 let snapshot = build_compose_snapshot(diff, "").unwrap();
3224 let prompt_file = snapshot
3225 .file_by_path("prompts/analysis/default.md")
3226 .unwrap();
3227 let system_file = snapshot.file_by_path("system/analysis/default.md").unwrap();
3228 let readme_file = snapshot.file_by_path("README.md").unwrap();
3229
3230 assert_eq!(compose_file_category(prompt_file), ComposeFileCategory::Prompt);
3231 assert_eq!(compose_file_category(system_file), ComposeFileCategory::Prompt);
3232 assert_eq!(compose_file_category(readme_file), ComposeFileCategory::Docs);
3233
3234 let feat_group = ComposeIntentGroup {
3235 group_id: "G1".to_string(),
3236 commit_type: CommitType::new("feat").unwrap(),
3237 scope: None,
3238 file_ids: vec![prompt_file.file_id.clone()],
3239 rationale: "prompt behavior change".to_string(),
3240 dependencies: vec![],
3241 };
3242 assert_eq!(group_type_bonus(prompt_file, &feat_group), 10);
3243
3244 let fallback_type =
3245 fallback_commit_type_for_group(&snapshot, &[], std::slice::from_ref(&prompt_file.file_id))
3246 .unwrap();
3247 assert_eq!(fallback_type.as_str(), "refactor");
3248 }
3249
3250 fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
3251 let mut diff = String::new();
3252
3253 for file_idx in 0..file_count {
3254 let path = format!("src/module_{file_idx:03}.rs");
3255 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3256 diff.push_str("index 1111111..2222222 100644\n");
3257 writeln!(diff, "--- a/{path}").unwrap();
3258 writeln!(diff, "+++ b/{path}").unwrap();
3259
3260 for hunk_idx in 0..hunks_per_file {
3261 let line_no = (hunk_idx * 4) + 1;
3262 writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
3263 writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
3264 writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
3265 }
3266 }
3267
3268 build_compose_snapshot(&diff, "").unwrap()
3269 }
3270
3271 fn build_multi_area_snapshot() -> ComposeSnapshot {
3272 let mut diff = String::new();
3273 let areas = [
3274 ("apps/frontend/src/server", 72),
3275 ("packages/model/src/models", 54),
3276 ("apps/daemon/src/worker", 43),
3277 (".github/workflows", 16),
3278 ];
3279
3280 for (prefix, count) in areas {
3281 for file_idx in 0..count {
3282 let path = format!("{prefix}/file_{file_idx:03}.rs");
3283 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3284 diff.push_str("index 1111111..2222222 100644\n");
3285 writeln!(diff, "--- a/{path}").unwrap();
3286 writeln!(diff, "+++ b/{path}").unwrap();
3287 diff.push_str("@@ -1,1 +1,1 @@\n");
3288 writeln!(diff, "-old_{file_idx}").unwrap();
3289 writeln!(diff, "+new_{file_idx}").unwrap();
3290 }
3291 }
3292
3293 build_compose_snapshot(&diff, "").unwrap()
3294 }
3295
3296 fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
3297 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3298 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3299 let groups = vec![
3300 ComposeIntentGroup {
3301 group_id: "G1".to_string(),
3302 commit_type: CommitType::new("refactor").unwrap(),
3303 scope: None,
3304 file_ids: vec![source_file.file_id.clone(), test_file.file_id.clone()],
3305 rationale: "implementation group".to_string(),
3306 dependencies: vec![],
3307 },
3308 ComposeIntentGroup {
3309 group_id: "G2".to_string(),
3310 commit_type: CommitType::new("refactor").unwrap(),
3311 scope: None,
3312 file_ids: vec![source_file.file_id.clone()],
3313 rationale: "shared file follow-up".to_string(),
3314 dependencies: vec!["G1".to_string()],
3315 },
3316 ];
3317 let dependency_order =
3318 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
3319 .unwrap();
3320 ComposeIntentPlan { groups, dependency_order }
3321 }
3322
3323 #[test]
3324 fn test_execute_compose_with_temp_index_applies_two_group_plan() {
3325 let dir = init_repo();
3326 write_file(&dir, "src/a.rs", "fn a() {}\n");
3327 write_file(&dir, "src/b.rs", "fn b() {}\n");
3328 commit_all(&dir, "initial");
3329 write_file(&dir, "src/a.rs", "fn a_changed() {}\n");
3330 write_file(&dir, "src/b.rs", "fn b_changed() {}\n");
3331
3332 let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3333 let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3334 let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3335 let a_file = snapshot.file_by_path("src/a.rs").unwrap();
3336 let b_file = snapshot.file_by_path("src/b.rs").unwrap();
3337 let plan = ComposeExecutablePlan {
3338 groups: vec![
3339 ComposeExecutableGroup {
3340 group_id: "G1".to_string(),
3341 commit_type: CommitType::new("refactor").unwrap(),
3342 scope: None,
3343 file_ids: vec![a_file.file_id.clone()],
3344 rationale: "change a".to_string(),
3345 dependencies: vec![],
3346 hunk_ids: a_file.hunk_ids.clone(),
3347 },
3348 ComposeExecutableGroup {
3349 group_id: "G2".to_string(),
3350 commit_type: CommitType::new("refactor").unwrap(),
3351 scope: None,
3352 file_ids: vec![b_file.file_id.clone()],
3353 rationale: "change b".to_string(),
3354 dependencies: vec!["G1".to_string()],
3355 hunk_ids: b_file.hunk_ids.clone(),
3356 },
3357 ],
3358 dependency_order: vec![0, 1],
3359 };
3360 let config = CommitConfig::default();
3361 let args = Args {
3362 dir: dir.path().to_string_lossy().to_string(),
3363 compose: true,
3364 ..Default::default()
3365 };
3366 let base_state = capture_compose_base_state(&args.dir).unwrap();
3367
3368 let hashes = execute_compose_with_prepared_messages(
3369 &snapshot,
3370 &plan,
3371 &config,
3372 &args,
3373 &base_state,
3374 vec![canned_message("change a"), canned_message("change b")],
3375 )
3376 .unwrap();
3377
3378 assert_eq!(hashes.len(), 2);
3379 assert_eq!(get_head_hash(&args.dir).unwrap(), hashes[1]);
3380 assert!(run_git(&dir, &["diff", "--cached"]).trim().is_empty());
3381 }
3382
3383 #[test]
3384 fn test_execute_compose_failure_before_update_ref_preserves_real_index() {
3385 let dir = init_repo();
3386 write_file(&dir, "src/lib.rs", "old\n");
3387 write_file(&dir, "sentinel.txt", "base\n");
3388 commit_all(&dir, "initial");
3389 let initial_head = get_head_hash(dir.path().to_str().unwrap()).unwrap();
3390
3391 write_file(&dir, "src/lib.rs", "changed\n");
3393
3394 write_file(&dir, "sentinel.txt", "base\nstaged sentinel\n");
3396 run_git(&dir, &["add", "sentinel.txt"]);
3397 let staged_before = run_git(&dir, &["diff", "--cached"]);
3398 assert!(staged_before.contains("staged sentinel"));
3399
3400 let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3401 let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3402 let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3403 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3404 let plan = ComposeExecutablePlan {
3407 groups: vec![ComposeExecutableGroup {
3408 group_id: "G1".to_string(),
3409 commit_type: CommitType::new("fix").unwrap(),
3410 scope: None,
3411 file_ids: vec![source_file.file_id.clone()],
3412 rationale: "unstageable group".to_string(),
3413 dependencies: vec![],
3414 hunk_ids: vec!["F999-H001".to_string()],
3415 }],
3416 dependency_order: vec![0],
3417 };
3418 let config = CommitConfig::default();
3419 let args = Args {
3420 dir: dir.path().to_string_lossy().to_string(),
3421 compose: true,
3422 ..Default::default()
3423 };
3424 let base_state = capture_compose_base_state(&args.dir).unwrap();
3425
3426 let err = execute_compose_with_prepared_messages(
3427 &snapshot,
3428 &plan,
3429 &config,
3430 &args,
3431 &base_state,
3432 vec![canned_message("unstageable group")],
3433 )
3434 .unwrap_err();
3435
3436 assert!(err.to_string().contains("unknown hunk id"));
3437 assert_eq!(get_head_hash(&args.dir).unwrap(), initial_head);
3438 assert_eq!(run_git(&dir, &["diff", "--cached"]), staged_before);
3439 }
3440
3441 #[test]
3442 fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
3443 let snapshot = build_test_snapshot();
3444 let intent_plan = build_shared_intent_plan(&snapshot);
3445 let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3446
3447 assert_eq!(ambiguous.len(), 1);
3448 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3449 let assigned_to_g1 = assigned.get("G1").unwrap();
3450 assert!(
3451 test_file
3452 .hunk_ids
3453 .iter()
3454 .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
3455 "uniquely owned file should be auto-assigned"
3456 );
3457 }
3458
3459 #[test]
3460 fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3461 let snapshot = build_test_snapshot();
3462 let intent_plan = build_shared_intent_plan(&snapshot);
3463 let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3464 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3465 let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3466 let valid_group_ids: HashSet<&str> = intent_plan
3467 .groups
3468 .iter()
3469 .map(|group| group.group_id.as_str())
3470 .collect();
3471
3472 let evaluation = evaluate_binding(
3473 &[
3474 ComposeBindingAssignment {
3475 group_id: "G1".to_string(),
3476 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3477 },
3478 ComposeBindingAssignment {
3479 group_id: "G2".to_string(),
3480 hunk_ids: vec![source_file.hunk_ids[1].clone()],
3481 },
3482 ],
3483 &hunk_context,
3484 &valid_group_ids,
3485 &snapshot,
3486 );
3487
3488 for (group_id, hunk_ids) in evaluation.assigned {
3489 let entry = assigned.entry(group_id).or_default();
3490 for hunk_id in hunk_ids {
3491 entry.insert(hunk_id);
3492 }
3493 }
3494
3495 let group_rank: HashMap<&str, usize> = intent_plan
3496 .dependency_order
3497 .iter()
3498 .enumerate()
3499 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3500 .collect();
3501 assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3502
3503 let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3504 assert_eq!(executable_plan.groups.len(), 1);
3505 assert_eq!(executable_plan.groups[0].group_id, "G1");
3506 assert!(
3507 source_file
3508 .hunk_ids
3509 .iter()
3510 .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3511 "fallback should keep every hunk from the shared file in the surviving group"
3512 );
3513 }
3514
3515 #[test]
3516 fn test_validate_executable_plan_rejects_overlap() {
3517 let snapshot = build_test_snapshot();
3518 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3519 let executable_plan = ComposeExecutablePlan {
3520 groups: vec![
3521 ComposeExecutableGroup {
3522 group_id: "G1".to_string(),
3523 commit_type: CommitType::new("refactor").unwrap(),
3524 scope: None,
3525 file_ids: vec![source_file.file_id.clone()],
3526 rationale: "group one".to_string(),
3527 dependencies: vec![],
3528 hunk_ids: vec![source_file.hunk_ids[0].clone()],
3529 },
3530 ComposeExecutableGroup {
3531 group_id: "G2".to_string(),
3532 commit_type: CommitType::new("refactor").unwrap(),
3533 scope: None,
3534 file_ids: vec![source_file.file_id.clone()],
3535 rationale: "group two".to_string(),
3536 dependencies: vec![],
3537 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3538 },
3539 ],
3540 dependency_order: vec![0, 1],
3541 };
3542
3543 let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3544 assert!(err.to_string().contains("assigned to both"));
3545 }
3546
3547 #[test]
3548 fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3549 let snapshot = build_test_snapshot();
3550 let planning_index = build_planning_index(&snapshot);
3551 let groups = vec![ComposeIntentGroup {
3552 group_id: "G1".to_string(),
3553 commit_type: CommitType::new("refactor").unwrap(),
3554 scope: None,
3555 file_ids: vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3556 rationale: "normalize file references".to_string(),
3557 dependencies: vec![],
3558 }];
3559
3560 let (normalized_groups, repair_notes) =
3561 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3562
3563 assert_eq!(normalized_groups.len(), 1);
3564 assert_eq!(
3565 normalized_groups[0].file_ids,
3566 snapshot
3567 .files
3568 .iter()
3569 .map(|file| file.file_id.clone())
3570 .collect::<Vec<_>>()
3571 );
3572 assert_eq!(repair_notes.len(), 2);
3573 }
3574
3575 #[test]
3576 fn test_normalize_intent_plan_repairs_missing_files() {
3577 let snapshot = build_test_snapshot();
3578 let planning_index = build_planning_index(&snapshot);
3579 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3580 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3581 let groups = vec![ComposeIntentGroup {
3582 group_id: "G1".to_string(),
3583 commit_type: CommitType::new("refactor").unwrap(),
3584 scope: None,
3585 file_ids: vec![source_file.file_id.clone()],
3586 rationale: "partial coverage".to_string(),
3587 dependencies: vec![],
3588 }];
3589
3590 let (normalized_groups, repair_notes) =
3591 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3592
3593 assert_eq!(normalized_groups.len(), 1);
3594 assert!(
3595 normalized_groups[0].file_ids.contains(&source_file.file_id),
3596 "existing file assignment should be preserved"
3597 );
3598 assert!(
3599 normalized_groups[0].file_ids.contains(&test_file.file_id),
3600 "missing files should be assigned to an existing group"
3601 );
3602 assert_eq!(repair_notes.len(), 1);
3603 assert!(repair_notes[0].contains(&test_file.file_id));
3604 }
3605
3606 #[test]
3607 fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3608 let snapshot = build_multi_area_snapshot();
3609 let planning_index = build_planning_index(&snapshot);
3610 let frontend_target = planning_index
3611 .targets
3612 .iter()
3613 .find(|target| target.label.starts_with("apps/frontend"))
3614 .unwrap();
3615 let model_target = planning_index
3616 .targets
3617 .iter()
3618 .find(|target| target.label.starts_with("packages/model"))
3619 .unwrap();
3620 let groups = vec![
3621 ComposeIntentGroup {
3622 group_id: "G1".to_string(),
3623 commit_type: CommitType::new("refactor").unwrap(),
3624 scope: Scope::new("apps/frontend").ok(),
3625 file_ids: vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3626 rationale: "frontend platform updates".to_string(),
3627 dependencies: vec!["group 2".to_string(), "G1".to_string()],
3628 },
3629 ComposeIntentGroup {
3630 group_id: "G2".to_string(),
3631 commit_type: CommitType::new("refactor").unwrap(),
3632 scope: Scope::new("packages/model").ok(),
3633 file_ids: vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3634 rationale: "model storage updates".to_string(),
3635 dependencies: vec!["F5".to_string()],
3636 },
3637 ];
3638
3639 let (normalized_groups, repair_notes) =
3640 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3641
3642 assert_eq!(normalized_groups.len(), 2);
3643 assert!(
3644 normalized_groups[0]
3645 .file_ids
3646 .iter()
3647 .all(|file_id| file_id.starts_with('F'))
3648 );
3649 assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3650 assert!(normalized_groups[1].dependencies.is_empty());
3651 assert!(
3652 repair_notes
3653 .iter()
3654 .any(|note| note.contains("Dropped unknown planning target"))
3655 );
3656 assert!(
3657 repair_notes
3658 .iter()
3659 .any(|note| note.contains("Dropped self-dependency"))
3660 );
3661 assert!(
3662 repair_notes
3663 .iter()
3664 .any(|note| note.contains("Mapped compose planner dependency"))
3665 );
3666 assert!(
3667 repair_notes
3668 .iter()
3669 .any(|note| note.contains("Dropped unknown dependency"))
3670 );
3671 }
3672
3673 #[test]
3674 fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3675 let snapshot = build_test_snapshot();
3676 let summary = render_snapshot_summary(&snapshot, &[]);
3677 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3678
3679 assert!(!summary.contains("# snapshot compacted"));
3680 for hunk_id in &source_file.hunk_ids {
3681 assert!(summary.contains(hunk_id));
3682 }
3683 }
3684
3685 #[test]
3686 fn test_render_snapshot_summary_compacts_large_snapshot() {
3687 let snapshot = build_large_snapshot(160, 4);
3688 let summary = render_snapshot_summary(&snapshot, &[]);
3689
3690 assert!(summary.contains("# snapshot compacted"));
3691 assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3692 assert!(summary.contains("F001-H001"));
3693 assert!(summary.contains("F001-H004"));
3694 assert!(!summary.contains("F001-H002"));
3695 assert!(!summary.contains("F001-H003"));
3696 assert!(summary.contains("... 2 more hunks omitted from F001"));
3697 }
3698
3699 #[test]
3700 fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3701 let snapshot = build_multi_area_snapshot();
3702 let planning_index = build_planning_index(&snapshot);
3703
3704 assert_eq!(planning_index.mode, PlanningMode::Area);
3705 assert!(planning_index.targets.len() < snapshot.files.len());
3706 assert!(
3707 planning_index
3708 .targets
3709 .iter()
3710 .any(|target| target.label.starts_with("apps/frontend"))
3711 );
3712 assert!(
3713 render_planning_stat(&planning_index).contains("planning over"),
3714 "planning stat should explain the area mode"
3715 );
3716 }
3717
3718 #[test]
3719 fn test_normalize_intent_plan_expands_area_targets() {
3720 let snapshot = build_multi_area_snapshot();
3721 let planning_index = build_planning_index(&snapshot);
3722 let midpoint = planning_index.targets.len() / 2;
3723 let first_group_targets: Vec<String> = planning_index
3724 .targets
3725 .iter()
3726 .take(midpoint)
3727 .map(|target| target.label.clone())
3728 .collect();
3729 let second_group_targets: Vec<String> = planning_index
3730 .targets
3731 .iter()
3732 .skip(midpoint)
3733 .map(|target| target.label.clone())
3734 .collect();
3735 let groups = vec![
3736 ComposeIntentGroup {
3737 group_id: "G1".to_string(),
3738 commit_type: CommitType::new("refactor").unwrap(),
3739 scope: None,
3740 file_ids: first_group_targets,
3741 rationale: "frontend and model".to_string(),
3742 dependencies: vec![],
3743 },
3744 ComposeIntentGroup {
3745 group_id: "G2".to_string(),
3746 commit_type: CommitType::new("refactor").unwrap(),
3747 scope: None,
3748 file_ids: second_group_targets,
3749 rationale: "daemon and ci".to_string(),
3750 dependencies: vec![],
3751 },
3752 ];
3753
3754 let (normalized_groups, repair_notes) =
3755 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3756
3757 assert_eq!(normalized_groups.len(), 2);
3758 assert!(
3759 normalized_groups
3760 .iter()
3761 .flat_map(|group| group.file_ids.iter())
3762 .all(|file_id| file_id.starts_with('F')),
3763 "area targets should expand back to concrete file IDs"
3764 );
3765 assert!(!repair_notes.is_empty());
3766 assert_eq!(
3767 normalized_groups
3768 .iter()
3769 .flat_map(|group| group.file_ids.iter())
3770 .collect::<HashSet<_>>()
3771 .len(),
3772 snapshot.files.len()
3773 );
3774 }
3775
3776 #[test]
3777 fn test_large_patch_fallback_splits_monolithic_area_plan() {
3778 let snapshot = build_multi_area_snapshot();
3779 let planning_index = build_planning_index(&snapshot);
3780 let monolithic_group = ComposeIntentGroup {
3781 group_id: "G1".to_string(),
3782 commit_type: CommitType::new("refactor").unwrap(),
3783 scope: None,
3784 file_ids: snapshot
3785 .files
3786 .iter()
3787 .map(|file| file.file_id.clone())
3788 .collect(),
3789 rationale: "repo-wide refactor".to_string(),
3790 dependencies: vec![],
3791 };
3792
3793 assert!(should_force_large_patch_fallback(
3794 &snapshot,
3795 &planning_index,
3796 &[monolithic_group],
3797 6
3798 ));
3799
3800 let fallback_groups =
3801 build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3802 assert!(fallback_groups.len() >= 3);
3803 assert_eq!(
3804 fallback_groups
3805 .iter()
3806 .flat_map(|group| group.file_ids.iter())
3807 .collect::<HashSet<_>>()
3808 .len(),
3809 snapshot.files.len()
3810 );
3811 assert!(
3812 fallback_groups
3813 .iter()
3814 .any(|group| group.rationale.contains("frontend")),
3815 "fallback should preserve workstream identity"
3816 );
3817 }
3818
3819 #[test]
3820 fn test_should_collect_compose_observations_skips_area_mode() {
3821 let snapshot = build_large_snapshot(160, 4);
3822 let config = CommitConfig { map_reduce_threshold: 1_000, ..Default::default() };
3823 let counter = create_token_counter(&config);
3824
3825 assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3826 assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3827 }
3828
3829 #[test]
3830 fn test_compose_analysis_strategy_uses_map_reduce_for_large_diff() {
3831 let config = CommitConfig { map_reduce_threshold: 20, ..Default::default() };
3832 let counter = create_token_counter(&config);
3833 let payload = "a".repeat(200);
3834 let diff = format!("diff --git a/a.rs b/a.rs\n@@ -0,0 +1 @@\n+{payload}");
3835
3836 assert_eq!(
3837 compose_analysis_strategy(&diff, &config, &counter),
3838 ComposeAnalysisStrategy::MapReduce
3839 );
3840 }
3841
3842 #[test]
3843 fn test_compose_analysis_strategy_truncates_when_map_reduce_disabled() {
3844 let config = CommitConfig {
3845 map_reduce_enabled: false,
3846 max_diff_tokens: 1,
3847 max_diff_length: 10_000,
3848 ..Default::default()
3849 };
3850 let counter = create_token_counter(&config);
3851 assert_eq!(compose_truncation_length(&config), 4);
3852
3853 assert_eq!(
3854 compose_analysis_strategy(
3855 "diff --git a/models.json b/models.json\n+large",
3856 &config,
3857 &counter
3858 ),
3859 ComposeAnalysisStrategy::SmartTruncate
3860 );
3861 }
3862
3863 #[test]
3864 fn test_compose_analysis_strategy_keeps_small_group_direct() {
3865 let config = CommitConfig {
3866 map_reduce_threshold: 1_000,
3867 max_diff_tokens: 1_000,
3868 max_diff_length: 10_000,
3869 ..Default::default()
3870 };
3871 let counter = create_token_counter(&config);
3872
3873 assert_eq!(
3874 compose_analysis_strategy("diff --git a/a.rs b/a.rs\n+a", &config, &counter),
3875 ComposeAnalysisStrategy::Direct
3876 );
3877 }
3878
3879 #[test]
3880 fn test_chunk_ambiguous_files_splits_large_binding_request() {
3881 let ambiguous_files = vec![
3882 AmbiguousFileBinding {
3883 file_id: "F001".to_string(),
3884 path: "src/alpha.rs".to_string(),
3885 candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3886 hunk_ids: (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3887 },
3888 AmbiguousFileBinding {
3889 file_id: "F002".to_string(),
3890 path: "src/beta.rs".to_string(),
3891 candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3892 hunk_ids: (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3893 },
3894 AmbiguousFileBinding {
3895 file_id: "F003".to_string(),
3896 path: "src/gamma.rs".to_string(),
3897 candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3898 hunk_ids: (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3899 },
3900 ];
3901
3902 let batches = chunk_ambiguous_files(&ambiguous_files);
3903 let total_hunks: usize = batches
3904 .iter()
3905 .flatten()
3906 .map(|file| file.hunk_ids.len())
3907 .sum();
3908
3909 assert_eq!(batches.len(), 2);
3910 assert_eq!(batches[0].len(), 1);
3911 assert_eq!(batches[1].len(), 2);
3912 assert_eq!(total_hunks, 140);
3913 assert!(batches.iter().all(|batch| {
3914 batch.len() <= MAX_BIND_FILES_PER_REQUEST
3915 && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3916 <= MAX_BIND_HUNKS_PER_REQUEST
3917 }));
3918 }
3919}