1use std::{
2 borrow::Cow,
3 collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4 fmt::Write,
5 fs,
6 path::{Path, PathBuf},
7};
8
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13 api::{
14 AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
15 generate_summary_from_analysis, run_oneshot, strict_json_schema,
16 },
17 compose_types::{
18 ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
19 ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
20 },
21 config::CommitConfig,
22 diff::smart_truncate_diff,
23 error::{CommitGenError, Result},
24 git::{
25 TempGitIndex, append_signoff_trailer, commit_tree, current_head_ref,
26 get_compose_diff_with_config, get_compose_stat, get_git_dir, get_head_hash,
27 read_tree_into_index, reset_mixed_to, reset_paths_to, update_ref_checked, write_index_tree,
28 write_real_index_tree,
29 },
30 map_reduce::{FileObservation, observe_diff_files, run_map_reduce, should_use_map_reduce},
31 normalization::{format_commit_message, post_process_commit_message},
32 patch::{
33 StageResult, build_compose_snapshot, create_executable_group_patch,
34 force_stage_file_from_base_in_index, pin_snapshot_worktree_state,
35 stage_executable_group_in_index,
36 },
37 style, templates,
38 tokens::{TokenCounter, create_token_counter},
39 types::{Args, CommitSummary, CommitType, ConventionalAnalysis, ConventionalCommit, Scope},
40 validation::validate_commit_message,
41};
42
43const MAX_OBSERVATIONS_PER_FILE: usize = 3;
44const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
45const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
46const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
47const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
48const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
49const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
50const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
51const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
52const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
53const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
54const MAX_BIND_FILES_PER_REQUEST: usize = 18;
55const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
56const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
59
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub struct ComposeBaseState {
62 head_hash: String,
63 head_ref: String,
64 index_tree: String,
65}
66
67#[tracing::instrument(target = "lgit", name = "compose.capture_base_state", skip_all, fields(dir))]
68pub fn capture_compose_base_state(dir: &str) -> Result<ComposeBaseState> {
69 Ok(ComposeBaseState {
70 head_hash: get_head_hash(dir)?,
71 head_ref: current_head_ref(dir)?,
72 index_tree: write_real_index_tree(dir)?,
73 })
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77enum ComposeAnalysisStrategy {
78 Direct,
79 SmartTruncate,
80 MapReduce,
81}
82
83fn compose_analysis_strategy(
84 diff: &str,
85 config: &CommitConfig,
86 counter: &TokenCounter,
87) -> ComposeAnalysisStrategy {
88 if should_use_map_reduce(diff, config, counter) {
89 return ComposeAnalysisStrategy::MapReduce;
90 }
91
92 let diff_tokens = counter.count_sync(diff);
93 if diff.len() > config.max_diff_length || diff_tokens > config.max_diff_tokens {
94 return ComposeAnalysisStrategy::SmartTruncate;
95 }
96
97 ComposeAnalysisStrategy::Direct
98}
99
100fn compose_truncation_length(config: &CommitConfig) -> usize {
101 config
102 .max_diff_length
103 .min(config.max_diff_tokens.saturating_mul(4))
104 .max(1)
105}
106
107#[derive(Debug, Deserialize, Serialize)]
108struct ComposeIntentResponse {
109 groups: Vec<ComposeIntentGroup>,
110}
111
112#[derive(Debug, Deserialize, Serialize)]
113struct ComposeBindingResponse {
114 assignments: Vec<ComposeBindingAssignment>,
115}
116
117#[derive(Debug, Serialize, Deserialize)]
118struct ComposeCachedPlan {
119 schema_version: String,
120 cache_key: String,
121 plan: ComposeExecutablePlan,
122}
123
124#[derive(Debug, Clone)]
125struct AmbiguousFileBinding {
126 file_id: String,
127 path: String,
128 candidate_group_ids: Vec<String>,
129 hunk_ids: Vec<String>,
130}
131
132#[derive(Debug, Clone)]
133struct AmbiguousHunkContext {
134 candidate_group_ids: Vec<String>,
135}
136
137type HunkAssignments = HashMap<String, BTreeSet<String>>;
138
139#[derive(Debug)]
140struct BindingEvaluation {
141 assigned: HashMap<String, Vec<String>>,
142 unresolved: Vec<String>,
143}
144
145#[derive(Debug, Clone, Copy)]
146struct SnapshotSummaryBudget {
147 max_observations_per_file: usize,
148 max_hunks_per_file: Option<usize>,
149}
150
151#[derive(Debug, Clone, Copy, PartialEq, Eq)]
152enum PlanningMode {
153 File,
154 Area,
155}
156
157#[derive(Debug, Clone)]
158struct PlanningTarget {
159 target_id: String,
160 label: String,
161 file_ids: Vec<String>,
162 hunk_count: usize,
163 additions: usize,
164 deletions: usize,
165}
166
167#[derive(Debug, Clone)]
168struct PlanningIndex {
169 mode: PlanningMode,
170 targets: Vec<PlanningTarget>,
171 aliases: HashMap<String, String>,
172}
173
174#[derive(Debug, Clone)]
175struct PlanningBucket {
176 label: String,
177 file_ids: Vec<String>,
178}
179
180impl PlanningIndex {
181 fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
182 let mut expanded = Vec::new();
183 let mut seen_file_ids = HashSet::new();
184
185 for target_id in target_ids {
186 if let Some(target) = self
187 .targets
188 .iter()
189 .find(|candidate| candidate.target_id == *target_id)
190 {
191 for file_id in &target.file_ids {
192 if seen_file_ids.insert(file_id.clone()) {
193 expanded.push(file_id.clone());
194 }
195 }
196 }
197 }
198
199 expanded
200 }
201}
202
203impl SnapshotSummaryBudget {
204 const fn is_compacted(self) -> bool {
205 self.max_hunks_per_file.is_some()
206 }
207}
208
209fn is_dependency_manifest(path: &str) -> bool {
210 const DEP_MANIFESTS: &[&str] = &[
211 "Cargo.toml",
212 "Cargo.lock",
213 "package.json",
214 "package-lock.json",
215 "pnpm-lock.yaml",
216 "yarn.lock",
217 "bun.lock",
218 "bun.lockb",
219 "go.mod",
220 "go.sum",
221 "requirements.txt",
222 "Pipfile",
223 "Pipfile.lock",
224 "pyproject.toml",
225 "Gemfile",
226 "Gemfile.lock",
227 "composer.json",
228 "composer.lock",
229 "build.gradle",
230 "build.gradle.kts",
231 "gradle.properties",
232 "pom.xml",
233 ];
234
235 let path = Path::new(path);
236 let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
237 return false;
238 };
239
240 if DEP_MANIFESTS.contains(&file_name) {
241 return true;
242 }
243
244 Path::new(file_name)
245 .extension()
246 .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
247}
248
249fn save_debug_artifact<T: Serialize>(
250 debug_dir: Option<&Path>,
251 filename: &str,
252 value: &T,
253) -> Result<()> {
254 let Some(debug_dir) = debug_dir else {
255 return Ok(());
256 };
257
258 fs::create_dir_all(debug_dir)?;
259 let path = debug_dir.join(filename);
260 let json = serde_json::to_string_pretty(value)?;
261 fs::write(path, json)?;
262 Ok(())
263}
264
265fn fnv1a_64(input: &str) -> String {
266 let mut hash = 0xcbf29ce484222325_u64;
267 for byte in input.as_bytes() {
268 hash ^= u64::from(*byte);
269 hash = hash.wrapping_mul(0x100000001b3);
270 }
271 format!("{hash:016x}")
272}
273
274fn compose_plan_cache_key(
275 snapshot: &ComposeSnapshot,
276 max_commits: usize,
277 analysis_model: &str,
278) -> String {
279 fnv1a_64(&format!(
280 "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
281 snapshot.diff, snapshot.stat
282 ))
283}
284
285fn compose_plan_cache_path(
286 dir: &str,
287 snapshot: &ComposeSnapshot,
288 max_commits: usize,
289 analysis_model: &str,
290) -> Result<PathBuf> {
291 let git_dir = get_git_dir(dir)?;
292 Ok(git_dir.join("llm-git").join(format!(
293 "compose-plan-{}.json",
294 compose_plan_cache_key(snapshot, max_commits, analysis_model)
295 )))
296}
297
298fn load_cached_plan(
299 dir: &str,
300 snapshot: &ComposeSnapshot,
301 max_commits: usize,
302 analysis_model: &str,
303) -> Result<Option<ComposeExecutablePlan>> {
304 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
305 if !cache_path.exists() {
306 return Ok(None);
307 }
308
309 let content = match fs::read_to_string(&cache_path) {
310 Ok(content) => content,
311 Err(err) => {
312 eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
313 return Ok(None);
314 },
315 };
316 let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
317 Ok(cached) => cached,
318 Err(err) => {
319 eprintln!(
320 "{}",
321 style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
322 );
323 let _ = fs::remove_file(&cache_path);
324 return Ok(None);
325 },
326 };
327 let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
328
329 if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
330 return Ok(None);
331 }
332 if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
333 eprintln!(
334 "{}",
335 style::warning(&format!(
336 "Discarding cached compose plan (no longer valid for current snapshot): {err}"
337 ))
338 );
339 let _ = fs::remove_file(&cache_path);
340 return Ok(None);
341 }
342 Ok(Some(cached.plan))
343}
344
345fn save_cached_plan(
346 dir: &str,
347 snapshot: &ComposeSnapshot,
348 max_commits: usize,
349 analysis_model: &str,
350 plan: &ComposeExecutablePlan,
351) -> Result<()> {
352 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
353 if let Some(parent) = cache_path.parent() {
354 fs::create_dir_all(parent)?;
355 }
356
357 let cached = ComposeCachedPlan {
358 schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
359 cache_key: compose_plan_cache_key(snapshot, max_commits, analysis_model),
360 plan: plan.clone(),
361 };
362 fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
363 Ok(())
364}
365
366fn format_line_range(start: usize, count: usize) -> String {
367 match count {
368 0 => "0".to_string(),
369 1 => start.to_string(),
370 _ => format!("{start}-{}", start + count - 1),
371 }
372}
373
374const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
375 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
376 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
377 {
378 SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
379 } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
380 || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
381 {
382 SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
383 } else {
384 SnapshotSummaryBudget {
385 max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
386 max_hunks_per_file: None,
387 }
388 }
389}
390
391fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
392 if count <= max_samples {
393 return (0..count).collect();
394 }
395
396 if max_samples <= 1 {
397 return vec![0];
398 }
399
400 let last = count - 1;
401 let mut positions = Vec::with_capacity(max_samples);
402 for slot in 0..max_samples {
403 let position = slot * last / (max_samples - 1);
404 if positions.last().copied() != Some(position) {
405 positions.push(position);
406 }
407 }
408 positions
409}
410
411fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
412 match budget.max_hunks_per_file {
413 None => file.hunk_ids.iter().map(String::as_str).collect(),
414 Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
415 .into_iter()
416 .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
417 .collect(),
418 }
419}
420
421fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
422 let budget = snapshot_summary_budget(snapshot);
423 let observations_by_file: HashMap<&str, Vec<&str>> = observations
424 .iter()
425 .map(|observation| {
426 (
427 observation.file.as_str(),
428 observation
429 .observations
430 .iter()
431 .map(String::as_str)
432 .take(budget.max_observations_per_file)
433 .collect(),
434 )
435 })
436 .collect();
437
438 let mut out = String::new();
439 if budget.is_compacted() {
440 let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
441 writeln!(
442 out,
443 "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
444 representative hunks and {} observation(s) per file",
445 budget.max_observations_per_file
446 )
447 .unwrap();
448 }
449
450 for file in &snapshot.files {
451 writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
452 if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
453 for observation in file_observations {
454 writeln!(out, " observation: {observation}").unwrap();
455 }
456 }
457
458 let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
459 for hunk_id in &rendered_hunk_ids {
460 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
461 if hunk.synthetic {
462 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
463 } else {
464 writeln!(
465 out,
466 " - {} old:{} new:{} :: {}",
467 hunk.hunk_id,
468 format_line_range(hunk.old_start, hunk.old_count),
469 format_line_range(hunk.new_start, hunk.new_count),
470 hunk.snippet
471 )
472 .unwrap();
473 }
474 }
475 }
476
477 let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
478 if omitted_hunks > 0 {
479 writeln!(out, " ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
480 }
481 }
482
483 out
484}
485
486const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
487 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
488 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
489 {
490 PlanningMode::Area
491 } else {
492 PlanningMode::File
493 }
494}
495
496fn path_depth(path: &str) -> usize {
497 path.split('/').count()
498}
499
500fn prefix_at_depth(path: &str, depth: usize) -> String {
501 if depth == 0 {
502 return String::new();
503 }
504
505 let segments: Vec<&str> = path.split('/').collect();
506 let effective_depth = depth.min(segments.len());
507 segments[..effective_depth].join("/")
508}
509
510fn common_path_prefix(paths: &[String]) -> String {
511 let Some(first_path) = paths.first() else {
512 return String::new();
513 };
514
515 let mut prefix: Vec<&str> = first_path.split('/').collect();
516 for path in paths.iter().skip(1) {
517 let segments: Vec<&str> = path.split('/').collect();
518 let shared = prefix
519 .iter()
520 .zip(segments.iter())
521 .take_while(|(left, right)| left == right)
522 .count();
523 prefix.truncate(shared);
524 if prefix.is_empty() {
525 break;
526 }
527 }
528
529 prefix.join("/")
530}
531
532fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
533 file_ids
534 .iter()
535 .filter_map(|file_id| snapshot.file_by_id(file_id))
536 .map(|file| file.hunk_ids.len())
537 .sum()
538}
539
540fn group_file_ids_by_prefix(
541 snapshot: &ComposeSnapshot,
542 file_ids: &[String],
543 depth: usize,
544) -> BTreeMap<String, Vec<String>> {
545 let mut groups = BTreeMap::new();
546
547 for file_id in file_ids {
548 if let Some(file) = snapshot.file_by_id(file_id) {
549 groups
550 .entry(prefix_at_depth(&file.path, depth))
551 .or_insert_with(Vec::new)
552 .push(file_id.clone());
553 }
554 }
555
556 groups
557}
558
559fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
560 let paths: Vec<String> = file_ids
561 .iter()
562 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
563 .collect();
564
565 let common_prefix = common_path_prefix(&paths);
566 if common_prefix.is_empty() {
567 paths.first().cloned().unwrap_or_else(|| "misc".to_string())
568 } else {
569 common_prefix
570 }
571}
572
573fn collect_planning_buckets(
574 snapshot: &ComposeSnapshot,
575 file_ids: &[String],
576 depth: usize,
577) -> Vec<PlanningBucket> {
578 let file_count = file_ids.len();
579 let hunk_count = bucket_hunk_count(snapshot, file_ids);
580 let max_path_depth = file_ids
581 .iter()
582 .filter_map(|file_id| snapshot.file_by_id(file_id))
583 .map(|file| path_depth(&file.path))
584 .max()
585 .unwrap_or(depth);
586
587 let should_stop =
588 file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
589 if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
590 return vec![PlanningBucket {
591 label: planning_bucket_label(snapshot, file_ids),
592 file_ids: file_ids.to_vec(),
593 }];
594 }
595
596 let next_depth = depth + 1;
597 let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
598 if groups.len() <= 1 {
599 return collect_planning_buckets(snapshot, file_ids, next_depth);
600 }
601
602 groups
603 .into_values()
604 .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
605 .collect()
606}
607
608fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
609 let all_file_ids: Vec<String> = snapshot
610 .files
611 .iter()
612 .map(|file| file.file_id.clone())
613 .collect();
614 let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
615
616 buckets
617 .into_iter()
618 .enumerate()
619 .map(|(idx, bucket)| {
620 let mut additions = 0_usize;
621 let mut deletions = 0_usize;
622 let mut hunk_count = 0_usize;
623
624 for file_id in &bucket.file_ids {
625 if let Some(file) = snapshot.file_by_id(file_id) {
626 additions = additions.saturating_add(file.additions);
627 deletions = deletions.saturating_add(file.deletions);
628 hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
629 }
630 }
631
632 PlanningTarget {
633 target_id: format!("A{:03}", idx + 1),
634 label: bucket.label,
635 file_ids: bucket.file_ids,
636 hunk_count,
637 additions,
638 deletions,
639 }
640 })
641 .collect()
642}
643
644fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
645 snapshot
646 .files
647 .iter()
648 .map(|file| PlanningTarget {
649 target_id: file.file_id.clone(),
650 label: file.path.clone(),
651 file_ids: vec![file.file_id.clone()],
652 hunk_count: file.hunk_ids.len(),
653 additions: file.additions,
654 deletions: file.deletions,
655 })
656 .collect()
657}
658
659fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
660 let mode = planning_mode_for_snapshot(snapshot);
661 let targets = match mode {
662 PlanningMode::File => build_file_planning_targets(snapshot),
663 PlanningMode::Area => build_area_planning_targets(snapshot),
664 };
665
666 let aliases = targets
667 .iter()
668 .flat_map(|target| {
669 let normalized_label = normalize_file_reference(&target.label);
670 [
671 (target.target_id.clone(), target.target_id.clone()),
672 (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
673 (normalized_label, target.target_id.clone()),
674 ]
675 })
676 .collect();
677
678 PlanningIndex { mode, targets, aliases }
679}
680
681fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
682 sample_positions(target.file_ids.len(), 4)
683 .into_iter()
684 .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
685 .collect()
686}
687
688fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
689 let hunk_ids: Vec<&String> = target
690 .file_ids
691 .iter()
692 .filter_map(|file_id| snapshot.file_by_id(file_id))
693 .flat_map(|file| file.hunk_ids.iter())
694 .collect();
695
696 sample_positions(hunk_ids.len(), 4)
697 .into_iter()
698 .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
699 .collect()
700}
701
702fn render_planning_stat(index: &PlanningIndex) -> String {
703 let mut out = String::new();
704
705 match index.mode {
706 PlanningMode::File => {
707 writeln!(out, "# planning over individual file IDs").unwrap();
708 },
709 PlanningMode::Area => {
710 writeln!(
711 out,
712 "# planning over {} area IDs spanning {} files",
713 index.targets.len(),
714 index
715 .targets
716 .iter()
717 .flat_map(|target| target.file_ids.iter())
718 .collect::<HashSet<_>>()
719 .len()
720 )
721 .unwrap();
722 },
723 }
724
725 for target in &index.targets {
726 writeln!(
727 out,
728 "{} {} | {} files | {} hunks | +{}/-{}",
729 target.target_id,
730 target.label,
731 target.file_ids.len(),
732 target.hunk_count,
733 target.additions,
734 target.deletions
735 )
736 .unwrap();
737 }
738
739 out
740}
741
742fn render_planning_snapshot_summary(
743 snapshot: &ComposeSnapshot,
744 observations: &[FileObservation],
745 index: &PlanningIndex,
746) -> String {
747 if index.mode == PlanningMode::File {
748 return render_snapshot_summary(snapshot, observations);
749 }
750
751 let observations_by_file: HashMap<&str, Vec<&str>> = observations
752 .iter()
753 .map(|observation| {
754 (
755 observation.file.as_str(),
756 observation
757 .observations
758 .iter()
759 .map(String::as_str)
760 .take(1)
761 .collect(),
762 )
763 })
764 .collect();
765
766 let mut out = String::new();
767 writeln!(
768 out,
769 "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
770 )
771 .unwrap();
772
773 for target in &index.targets {
774 writeln!(
775 out,
776 "- {} {} ({} files, {} hunks, +{}/-{})",
777 target.target_id,
778 target.label,
779 target.file_ids.len(),
780 target.hunk_count,
781 target.additions,
782 target.deletions
783 )
784 .unwrap();
785
786 let sample_file_ids = sample_file_ids_for_target(target);
787 if !sample_file_ids.is_empty() {
788 let sample_files: Vec<String> = sample_file_ids
789 .iter()
790 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
791 .collect();
792 writeln!(out, " files: {}", sample_files.join(", ")).unwrap();
793 let omitted = target.file_ids.len().saturating_sub(sample_files.len());
794 if omitted > 0 {
795 writeln!(out, " ... {omitted} more files omitted from {}", target.target_id).unwrap();
796 }
797 }
798
799 let mut rendered_observations = 0_usize;
800 for file_id in &target.file_ids {
801 let Some(file) = snapshot.file_by_id(file_id) else {
802 continue;
803 };
804 let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
805 continue;
806 };
807
808 for observation in file_observations {
809 writeln!(out, " observation: {observation}").unwrap();
810 rendered_observations += 1;
811 if rendered_observations >= 2 {
812 break;
813 }
814 }
815
816 if rendered_observations >= 2 {
817 break;
818 }
819 }
820
821 for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
822 if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
823 if hunk.synthetic {
824 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
825 } else {
826 writeln!(
827 out,
828 " - {} old:{} new:{} :: {}",
829 hunk.hunk_id,
830 format_line_range(hunk.old_start, hunk.old_count),
831 format_line_range(hunk.new_start, hunk.new_count),
832 hunk.snippet
833 )
834 .unwrap();
835 }
836 }
837 }
838 }
839
840 out
841}
842
843fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
844 match index.mode {
845 PlanningMode::File => format!(
846 "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
847 snapshot.files.len()
848 ),
849 PlanningMode::Area => format!(
850 "Area IDs only. Each target may expand to multiple files by shared path prefix. \
851 Coverage: {} areas spanning {} files.",
852 index.targets.len(),
853 snapshot.files.len()
854 ),
855 }
856}
857
858fn render_planning_notes(index: &PlanningIndex) -> String {
859 match index.mode {
860 PlanningMode::File => {
861 "Use only the provided file IDs and keep the grouping conservative.".to_string()
862 },
863 PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
864 planning areas. Split along independent subsystems or workstreams \
865 when the areas point at unrelated changes."
866 .to_string(),
867 }
868}
869
870fn render_split_bias(index: &PlanningIndex) -> String {
871 match index.mode {
872 PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
873 PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
874 one broad group if nearly every area clearly belongs to the same \
875 atomic change."
876 .to_string(),
877 }
878}
879
880fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
881 let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
882
883 strict_json_schema(
884 serde_json::json!({
885 "groups": {
886 "type": "array",
887 "items": {
888 "type": "object",
889 "properties": {
890 "group_id": {
891 "type": "string",
892 "description": "Stable identifier like G1, G2, G3"
893 },
894 "file_ids": {
895 "type": "array",
896 "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
897 "items": { "type": "string" }
898 },
899 "type": {
900 "type": "string",
901 "enum": type_enum,
902 "description": "Conventional commit type for this group"
903 },
904 "scope": {
905 "type": "string",
906 "description": "Optional scope (module/component). Omit if broad."
907 },
908 "rationale": {
909 "type": "string",
910 "description": "Brief explanation of the logical change"
911 },
912 "dependencies": {
913 "type": "array",
914 "description": "Group IDs this group depends on",
915 "items": { "type": "string" }
916 }
917 },
918 "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
919 "additionalProperties": false
920 }
921 }
922 }),
923 &["groups"],
924 )
925}
926
927fn build_binding_schema() -> serde_json::Value {
928 strict_json_schema(
929 serde_json::json!({
930 "assignments": {
931 "type": "array",
932 "items": {
933 "type": "object",
934 "properties": {
935 "group_id": { "type": "string" },
936 "hunk_ids": {
937 "type": "array",
938 "items": { "type": "string" }
939 }
940 },
941 "required": ["group_id", "hunk_ids"],
942 "additionalProperties": false
943 }
944 }
945 }),
946 &["assignments"],
947 )
948}
949
950fn compute_dependency_order<T, FId, FDeps>(
951 groups: &[T],
952 group_id: FId,
953 dependencies: FDeps,
954) -> Result<Vec<usize>>
955where
956 FId: Fn(&T) -> &str,
957 FDeps: Fn(&T) -> &[String],
958{
959 let mut index_by_id = HashMap::new();
960 for (idx, group) in groups.iter().enumerate() {
961 let id = group_id(group);
962 if id.trim().is_empty() {
963 return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
964 }
965 if index_by_id.insert(id.to_string(), idx).is_some() {
966 return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
967 }
968 }
969
970 let mut in_degree = vec![0_usize; groups.len()];
971 let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
972
973 for (idx, group) in groups.iter().enumerate() {
974 for dependency in dependencies(group) {
975 let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
976 CommitGenError::Other(format!(
977 "Group {} depends on unknown group_id '{}'",
978 group_id(group),
979 dependency
980 ))
981 })?;
982 if dependency_idx == idx {
983 return Err(CommitGenError::Other(format!(
984 "Group {} depends on itself",
985 group_id(group)
986 )));
987 }
988
989 adjacency[dependency_idx].push(idx);
990 in_degree[idx] += 1;
991 }
992 }
993
994 let mut queue: Vec<usize> = (0..groups.len())
995 .filter(|idx| in_degree[*idx] == 0)
996 .collect();
997 let mut order = Vec::with_capacity(groups.len());
998
999 while let Some(node) = queue.pop() {
1000 order.push(node);
1001 for neighbor in &adjacency[node] {
1002 in_degree[*neighbor] -= 1;
1003 if in_degree[*neighbor] == 0 {
1004 queue.push(*neighbor);
1005 }
1006 }
1007 }
1008
1009 if order.len() != groups.len() {
1010 return Err(CommitGenError::Other(
1011 "Circular dependency detected in compose groups".to_string(),
1012 ));
1013 }
1014
1015 Ok(order)
1016}
1017
1018fn normalize_file_reference(raw_file_ref: &str) -> String {
1019 raw_file_ref
1020 .trim()
1021 .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
1022 .trim_start_matches("./")
1023 .trim_end_matches([',', ';'])
1024 .to_string()
1025}
1026
1027fn planning_text_tokens(text: &str) -> Vec<String> {
1028 const STOP_WORDS: &[&str] = &[
1029 "and",
1030 "for",
1031 "the",
1032 "with",
1033 "from",
1034 "into",
1035 "after",
1036 "before",
1037 "over",
1038 "under",
1039 "plus",
1040 "across",
1041 "update",
1042 "updated",
1043 "refactor",
1044 "refactored",
1045 "changes",
1046 "change",
1047 "logical",
1048 "group",
1049 "groups",
1050 "commit",
1051 "commits",
1052 ];
1053
1054 let mut tokens = Vec::new();
1055 let mut current = String::new();
1056 let mut seen = HashSet::new();
1057
1058 for ch in text.chars() {
1059 if ch.is_ascii_alphanumeric() {
1060 current.push(ch.to_ascii_lowercase());
1061 } else if current.len() >= 3 {
1062 if !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone()) {
1063 tokens.push(current.clone());
1064 }
1065 current.clear();
1066 } else {
1067 current.clear();
1068 }
1069 }
1070
1071 if current.len() >= 3 && !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone())
1072 {
1073 tokens.push(current);
1074 }
1075
1076 tokens
1077}
1078
1079fn extract_group_id_candidate(raw: &str) -> Option<String> {
1080 let normalized = normalize_file_reference(raw);
1081 let uppercase = normalized.to_ascii_uppercase();
1082
1083 if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1084 return Some(format!("G{uppercase}"));
1085 }
1086
1087 if let Some(rest) = uppercase.strip_prefix('G')
1088 && !rest.is_empty()
1089 && rest.chars().all(|ch| ch.is_ascii_digit())
1090 {
1091 return Some(format!("G{rest}"));
1092 }
1093
1094 let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1095 let compact = uppercase
1096 .chars()
1097 .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1098 .collect::<String>();
1099 if compact.starts_with("GROUP") && !digits.is_empty() {
1100 return Some(format!("G{digits}"));
1101 }
1102
1103 None
1104}
1105
1106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1107enum ComposeFileCategory {
1108 Binary,
1109 Dependency,
1110 Docs,
1111 Prompt,
1112 Test,
1113 Config,
1114 Source,
1115 Other,
1116}
1117
1118fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1119 if file.is_binary {
1120 return ComposeFileCategory::Binary;
1121 }
1122
1123 if is_dependency_manifest(&file.path) {
1124 return ComposeFileCategory::Dependency;
1125 }
1126
1127 let filename_lower = file.path.to_ascii_lowercase();
1128 let file_name = Path::new(&filename_lower)
1129 .file_name()
1130 .and_then(|name| name.to_str())
1131 .unwrap_or_default();
1132 let extension = Path::new(&filename_lower)
1133 .extension()
1134 .and_then(|ext| ext.to_str())
1135 .unwrap_or_default();
1136
1137 if filename_lower.contains("prompt") || filename_lower.contains("system") {
1138 return ComposeFileCategory::Prompt;
1139 }
1140
1141 if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1142 return ComposeFileCategory::Docs;
1143 }
1144
1145 if filename_lower.contains("/tests/")
1146 || filename_lower.starts_with("tests/")
1147 || file_name.contains("test")
1148 || file_name.contains("spec")
1149 {
1150 return ComposeFileCategory::Test;
1151 }
1152
1153 if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1154 return ComposeFileCategory::Config;
1155 }
1156
1157 if matches!(
1158 extension,
1159 "rs"
1160 | "py"
1161 | "js"
1162 | "jsx"
1163 | "ts"
1164 | "tsx"
1165 | "go"
1166 | "java"
1167 | "kt"
1168 | "c"
1169 | "cc"
1170 | "cpp"
1171 | "h"
1172 | "hpp"
1173 | "cs"
1174 | "rb"
1175 | "php"
1176 | "swift"
1177 | "scala"
1178 | "m"
1179 | "mm"
1180 ) {
1181 return ComposeFileCategory::Source;
1182 }
1183
1184 ComposeFileCategory::Other
1185}
1186
1187fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1188 left
1189 .split('/')
1190 .zip(right.split('/'))
1191 .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1192 .count()
1193}
1194
1195fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1196 let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1197
1198 if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1199 score += 40;
1200 }
1201
1202 if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1203 score += 12;
1204 }
1205
1206 if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1207 score += 18;
1208 }
1209
1210 score
1211}
1212
1213fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1214 match (compose_file_category(file), group.commit_type.as_str()) {
1215 (ComposeFileCategory::Docs, "docs") => 25,
1216 (ComposeFileCategory::Test, "test") => 25,
1217 (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1218 (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1219 (
1220 ComposeFileCategory::Prompt | ComposeFileCategory::Source,
1221 "feat" | "fix" | "refactor" | "perf",
1222 ) => 10,
1223 _ => 0,
1224 }
1225}
1226
1227fn best_group_for_missing_file(
1228 snapshot: &ComposeSnapshot,
1229 groups: &[ComposeIntentGroup],
1230 missing_file: &ComposeFile,
1231) -> usize {
1232 let mut best_group_idx = 0;
1233 let mut best_score = i32::MIN;
1234 let mut best_group_size = usize::MAX;
1235
1236 for (group_idx, group) in groups.iter().enumerate() {
1237 let similarity = group
1238 .file_ids
1239 .iter()
1240 .filter_map(|file_id| snapshot.file_by_id(file_id))
1241 .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1242 .max()
1243 .unwrap_or_default();
1244 let score = similarity + group_type_bonus(missing_file, group);
1245 let group_size = group.file_ids.len();
1246
1247 if score > best_score || (score == best_score && group_size < best_group_size) {
1248 best_group_idx = group_idx;
1249 best_score = score;
1250 best_group_size = group_size;
1251 }
1252 }
1253
1254 best_group_idx
1255}
1256
1257fn normalize_dependency_reference(
1258 raw_dependency: &str,
1259 known_group_ids: &HashSet<String>,
1260) -> Option<String> {
1261 let normalized = normalize_file_reference(raw_dependency);
1262 if normalized.is_empty() {
1263 return None;
1264 }
1265
1266 if known_group_ids.contains(&normalized) {
1267 return Some(normalized);
1268 }
1269
1270 let uppercase = normalized.to_ascii_uppercase();
1271 if known_group_ids.contains(&uppercase) {
1272 return Some(uppercase);
1273 }
1274
1275 let candidate = extract_group_id_candidate(&normalized)?;
1276 known_group_ids.contains(&candidate).then_some(candidate)
1277}
1278
1279fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1280 let label = target.label.to_ascii_lowercase();
1281 let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1282 let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1283
1284 if let Some(scope) = &group.scope {
1285 let scope = scope.as_str().to_ascii_lowercase();
1286 if label.contains(&scope) || workstream.contains(&scope) {
1287 score += 140;
1288 }
1289
1290 for segment in scope.split('/') {
1291 if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1292 score += 45;
1293 }
1294 }
1295 }
1296
1297 for token in planning_text_tokens(&group.rationale) {
1298 if label.contains(&token) || workstream.contains(&token) {
1299 score += 16;
1300 }
1301 }
1302
1303 match group.commit_type.as_str() {
1304 "ci" if target.label.starts_with(".github/") => score += 120,
1305 "docs"
1306 if target.label.starts_with("docs/")
1307 || Path::new(&target.label)
1308 .extension()
1309 .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1310 {
1311 score += 80;
1312 },
1313 "build" | "chore"
1314 if target.label.contains("Cargo")
1315 || target.label.contains("package")
1316 || target.label.contains("lock")
1317 || target.label.contains("tsconfig")
1318 || target.label.contains("biome")
1319 || target.label.contains("bun") =>
1320 {
1321 score += 55;
1322 },
1323 _ => {},
1324 }
1325
1326 score
1327}
1328
1329fn seed_group_targets(
1330 groups: &[ComposeIntentGroup],
1331 planning_index: &PlanningIndex,
1332 group_targets: &mut [Vec<String>],
1333 repair_notes: &mut Vec<String>,
1334) {
1335 let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1336
1337 for (group_idx, group) in groups.iter().enumerate() {
1338 if !group_targets[group_idx].is_empty() {
1339 continue;
1340 }
1341
1342 let fallback_target = planning_index
1343 .targets
1344 .iter()
1345 .max_by_key(|target| {
1346 let mut score = planning_target_match_score(target, group);
1347 if !claimed_target_ids.contains(&target.target_id) {
1348 score += 60;
1349 }
1350 (score, target.hunk_count, target.file_ids.len())
1351 })
1352 .or_else(|| planning_index.targets.first());
1353
1354 let Some(fallback_target) = fallback_target else {
1355 continue;
1356 };
1357
1358 group_targets[group_idx].push(fallback_target.target_id.clone());
1359 claimed_target_ids.insert(fallback_target.target_id.clone());
1360 repair_notes.push(format!(
1361 "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1362 group.group_id, fallback_target.target_id, fallback_target.label
1363 ));
1364 }
1365}
1366
1367fn normalize_intent_plan(
1368 snapshot: &ComposeSnapshot,
1369 planning_index: &PlanningIndex,
1370 mut groups: Vec<ComposeIntentGroup>,
1371) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1372 if groups.is_empty() {
1373 return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1374 }
1375
1376 let known_target_ids: HashSet<&str> = planning_index
1377 .targets
1378 .iter()
1379 .map(|target| target.target_id.as_str())
1380 .collect();
1381 let mut repair_notes = Vec::new();
1382 let mut covered_file_ids = HashSet::new();
1383 let mut normalized_group_targets = Vec::with_capacity(groups.len());
1384
1385 for group in &groups {
1386 if group.file_ids.is_empty() {
1387 repair_notes.push(format!(
1388 "Compose planner left {} without planning targets; assigning targets heuristically",
1389 group.group_id
1390 ));
1391 }
1392
1393 let mut normalized_target_ids = Vec::new();
1394 let mut seen_target_ids = HashSet::new();
1395 for raw_target_ref in &group.file_ids {
1396 let normalized_ref = normalize_file_reference(raw_target_ref);
1397 let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1398 normalized_ref.clone()
1399 } else {
1400 let uppercase_ref = normalized_ref.to_ascii_uppercase();
1401 if known_target_ids.contains(uppercase_ref.as_str()) {
1402 uppercase_ref
1403 } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1404 if raw_target_ref != target_id {
1405 repair_notes.push(format!(
1406 "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1407 ));
1408 }
1409 target_id.clone()
1410 } else {
1411 repair_notes.push(format!(
1412 "Dropped unknown planning target '{}' from {}",
1413 raw_target_ref, group.group_id
1414 ));
1415 continue;
1416 }
1417 };
1418
1419 if seen_target_ids.insert(canonical_target_id.clone()) {
1420 normalized_target_ids.push(canonical_target_id);
1421 }
1422 }
1423
1424 normalized_group_targets.push(normalized_target_ids);
1425 }
1426
1427 seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1428
1429 let known_group_ids: HashSet<String> =
1430 groups.iter().map(|group| group.group_id.clone()).collect();
1431 for group in &mut groups {
1432 let mut normalized_dependencies = Vec::new();
1433 let mut seen_dependencies = HashSet::new();
1434
1435 for raw_dependency in &group.dependencies {
1436 let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1437 else {
1438 repair_notes.push(format!(
1439 "Dropped unknown dependency '{}' from {}",
1440 raw_dependency, group.group_id
1441 ));
1442 continue;
1443 };
1444
1445 if dependency == group.group_id {
1446 repair_notes.push(format!(
1447 "Dropped self-dependency '{}' from {}",
1448 raw_dependency, group.group_id
1449 ));
1450 continue;
1451 }
1452
1453 if seen_dependencies.insert(dependency.clone()) {
1454 if raw_dependency != &dependency {
1455 repair_notes.push(format!(
1456 "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1457 ));
1458 }
1459 normalized_dependencies.push(dependency);
1460 }
1461 }
1462
1463 group.dependencies = normalized_dependencies;
1464 }
1465
1466 for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1467 let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1468 for file_id in &expanded_file_ids {
1469 covered_file_ids.insert(file_id.clone());
1470 }
1471 group.file_ids = expanded_file_ids;
1472 }
1473
1474 for file in &snapshot.files {
1475 if covered_file_ids.contains(file.file_id.as_str()) {
1476 continue;
1477 }
1478
1479 let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1480 let target_group = &mut groups[target_group_idx];
1481 target_group.file_ids.push(file.file_id.clone());
1482 covered_file_ids.insert(file.file_id.clone());
1483 repair_notes.push(format!(
1484 "Compose planner omitted {} ({}); assigned it to {}",
1485 file.file_id, file.path, target_group.group_id
1486 ));
1487 }
1488
1489 Ok((groups, repair_notes))
1490}
1491
1492fn workstream_key_for_label(label: &str) -> String {
1493 let segments: Vec<&str> = label
1494 .split('/')
1495 .filter(|segment| !segment.is_empty())
1496 .collect();
1497 let Some(first) = segments.first() else {
1498 return label.to_string();
1499 };
1500
1501 match *first {
1502 ".github" => match segments.get(1) {
1503 Some(second) => format!("{first}/{second}"),
1504 None => (*first).to_string(),
1505 },
1506 "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1507 Some(second) => format!("{first}/{second}"),
1508 None => (*first).to_string(),
1509 },
1510 _ => (*first).to_string(),
1511 }
1512}
1513
1514fn workstream_display_name(label: &str) -> String {
1515 let key = workstream_key_for_label(label);
1516 match key.as_str() {
1517 ".github/workflows" => "CI workflows".to_string(),
1518 ".github" => "GitHub automation".to_string(),
1519 _ => key
1520 .split('/')
1521 .next_back()
1522 .map(|segment| segment.replace(['_', '-'], " "))
1523 .unwrap_or(key),
1524 }
1525}
1526
1527fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1528 let mut out = String::new();
1529 let mut last_was_separator = false;
1530
1531 for ch in raw.trim().chars() {
1532 if ch.is_ascii_alphanumeric() {
1533 out.push(ch.to_ascii_lowercase());
1534 last_was_separator = false;
1535 } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1536 {
1537 out.push('-');
1538 last_was_separator = true;
1539 }
1540 }
1541
1542 let trimmed = out.trim_matches('-').to_string();
1543 (!trimmed.is_empty()).then_some(trimmed)
1544}
1545
1546fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1547 let key = workstream_key_for_label(label);
1548 let candidate = key
1549 .split('/')
1550 .next_back()
1551 .and_then(sanitize_scope_fragment)?;
1552 Scope::new(candidate).ok()
1553}
1554
1555fn fallback_rationale_for_labels(labels: &[String]) -> String {
1556 if labels.len() == 1 {
1557 let label = labels[0].as_str();
1558 let display = workstream_display_name(label);
1559 if label.starts_with("apps/") {
1560 return format!("{display} application updates");
1561 }
1562 if label.starts_with("packages/") {
1563 return format!("{display} package updates");
1564 }
1565 if label.starts_with("crates/") {
1566 return format!("{display} crate updates");
1567 }
1568 if label.starts_with(".github/") || label == ".github" {
1569 return format!("{display} updates");
1570 }
1571 return format!("{display} updates");
1572 }
1573
1574 let display_labels: Vec<String> = labels
1575 .iter()
1576 .take(3)
1577 .map(|label| workstream_display_name(label))
1578 .collect();
1579 format!("cross-cutting updates for {}", display_labels.join(", "))
1580}
1581
1582fn fallback_commit_type_for_group(
1583 snapshot: &ComposeSnapshot,
1584 labels: &[String],
1585 file_ids: &[String],
1586) -> Result<CommitType> {
1587 if labels
1588 .iter()
1589 .any(|label| label == ".github" || label.starts_with(".github/"))
1590 {
1591 return CommitType::new("ci");
1592 }
1593
1594 let files: Vec<&ComposeFile> = file_ids
1595 .iter()
1596 .filter_map(|file_id| snapshot.file_by_id(file_id))
1597 .collect();
1598 let all_docs = !files.is_empty()
1599 && files
1600 .iter()
1601 .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1602 if all_docs {
1603 return CommitType::new("docs");
1604 }
1605
1606 let all_tests = !files.is_empty()
1607 && files
1608 .iter()
1609 .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1610 if all_tests {
1611 return CommitType::new("test");
1612 }
1613
1614 let all_dependencies =
1615 !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1616 if all_dependencies {
1617 return CommitType::new("build");
1618 }
1619
1620 let all_config = !files.is_empty()
1621 && files.iter().all(|file| {
1622 matches!(
1623 compose_file_category(file),
1624 ComposeFileCategory::Config | ComposeFileCategory::Dependency
1625 )
1626 });
1627 if all_config {
1628 return CommitType::new("chore");
1629 }
1630
1631 CommitType::new("refactor")
1632}
1633
1634fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1635 snapshot
1636 .files
1637 .iter()
1638 .filter(|file| file_ids.contains(&file.file_id))
1639 .map(|file| file.file_id.clone())
1640 .collect()
1641}
1642
1643fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1644 if groups.is_empty() {
1645 return false;
1646 }
1647
1648 let largest_group = groups
1649 .iter()
1650 .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1651 .max()
1652 .unwrap_or_default();
1653
1654 groups.len() == 1
1655 || (groups.len() <= 2
1656 && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1657}
1658
1659fn should_force_large_patch_fallback(
1660 snapshot: &ComposeSnapshot,
1661 planning_index: &PlanningIndex,
1662 groups: &[ComposeIntentGroup],
1663 max_commits: usize,
1664) -> bool {
1665 if max_commits <= 1
1666 || planning_index.mode != PlanningMode::Area
1667 || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1668 || !is_monolithic_intent_plan(snapshot, groups)
1669 {
1670 return false;
1671 }
1672
1673 let workstream_count = planning_index
1674 .targets
1675 .iter()
1676 .map(|target| workstream_key_for_label(&target.label))
1677 .collect::<HashSet<_>>()
1678 .len();
1679
1680 workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1681}
1682
1683fn build_large_patch_fallback_groups(
1684 snapshot: &ComposeSnapshot,
1685 planning_index: &PlanningIndex,
1686 max_commits: usize,
1687) -> Result<Vec<ComposeIntentGroup>> {
1688 #[derive(Debug, Clone)]
1689 struct WorkstreamGroup {
1690 label: String,
1691 file_ids: HashSet<String>,
1692 weight: usize,
1693 }
1694
1695 #[derive(Debug, Clone)]
1696 struct FallbackBin {
1697 labels: Vec<String>,
1698 file_ids: HashSet<String>,
1699 total_weight: usize,
1700 }
1701
1702 let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1703 for target in &planning_index.targets {
1704 let key = workstream_key_for_label(&target.label);
1705 let entry = workstreams
1706 .entry(key.clone())
1707 .or_insert_with(|| WorkstreamGroup {
1708 label: key,
1709 file_ids: HashSet::new(),
1710 weight: 0,
1711 });
1712
1713 for file_id in &target.file_ids {
1714 entry.file_ids.insert(file_id.clone());
1715 }
1716 entry.weight = entry
1717 .weight
1718 .saturating_add(target.hunk_count.max(target.file_ids.len()));
1719 }
1720
1721 let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1722 workstreams.sort_by(|left, right| {
1723 right
1724 .weight
1725 .cmp(&left.weight)
1726 .then_with(|| left.label.cmp(&right.label))
1727 });
1728
1729 let bin_count = max_commits.min(workstreams.len());
1730 let mut bins: Vec<FallbackBin> = Vec::new();
1731 for workstream in workstreams {
1732 if bins.len() < bin_count {
1733 bins.push(FallbackBin {
1734 labels: vec![workstream.label],
1735 file_ids: workstream.file_ids,
1736 total_weight: workstream.weight,
1737 });
1738 continue;
1739 }
1740
1741 let Some((target_idx, _)) = bins
1742 .iter()
1743 .enumerate()
1744 .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1745 else {
1746 continue;
1747 };
1748
1749 let target_bin = &mut bins[target_idx];
1750 target_bin.labels.push(workstream.label);
1751 target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1752 target_bin.file_ids.extend(workstream.file_ids);
1753 }
1754
1755 let mut groups = Vec::new();
1756 for (idx, bin) in bins.into_iter().enumerate() {
1757 let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1758 let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1759 let scope = (bin.labels.len() == 1)
1760 .then(|| fallback_scope_for_label(&bin.labels[0]))
1761 .flatten();
1762 let rationale = fallback_rationale_for_labels(&bin.labels);
1763
1764 groups.push(ComposeIntentGroup {
1765 group_id: format!("G{}", idx + 1),
1766 commit_type,
1767 scope,
1768 file_ids: ordered_ids,
1769 rationale,
1770 dependencies: Vec::new(),
1771 });
1772 }
1773
1774 Ok(groups)
1775}
1776
1777#[tracing::instrument(target = "lgit", name = "compose.analyze_intent", skip_all, fields(file_count = snapshot.files.len(), observation_count = observations.len(), max_commits))]
1778async fn analyze_compose_intent(
1779 snapshot: &ComposeSnapshot,
1780 observations: &[FileObservation],
1781 config: &CommitConfig,
1782 max_commits: usize,
1783 debug_dir: Option<&Path>,
1784) -> Result<ComposeIntentPlan> {
1785 let planning_index = build_planning_index(snapshot);
1786 let stat_summary = render_planning_stat(&planning_index);
1787 let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1788 let planning_targets = render_planning_targets(&planning_index, snapshot);
1789 let planning_notes = render_planning_notes(&planning_index);
1790 let split_bias = render_split_bias(&planning_index);
1791 let schema = build_intent_schema(config);
1792 let variant = if config.markdown_output {
1793 "markdown"
1794 } else {
1795 "default"
1796 };
1797 let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1798 variant,
1799 max_commits,
1800 stat: &stat_summary,
1801 snapshot_summary: &snapshot_summary,
1802 planning_targets: &planning_targets,
1803 planning_notes: &planning_notes,
1804 split_bias: &split_bias,
1805 })?;
1806
1807 let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1808 operation: "compose/intent",
1809 model: &config.analysis_model,
1810 prompt_family: "compose-intent",
1811 prompt_variant: variant,
1812 system_prompt: &parts.system,
1813 user_prompt: &parts.user,
1814 tool_name: "create_compose_intent_plan",
1815 tool_description: "Plan logical commit groups over the provided planning target IDs",
1816 schema: &schema,
1817 progress_label: Some("compose intent planner"),
1818 debug: debug_dir.map(|dir| OneShotDebug {
1819 dir: Some(dir),
1820 prefix: None,
1821 name: "compose_intent",
1822 }),
1823 cacheable: true,
1824 })
1825 .await?;
1826
1827 let (mut groups, repair_notes) =
1828 normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1829 for note in &repair_notes {
1830 eprintln!("{}", style::warning(note));
1831 }
1832 if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1833 eprintln!(
1834 "{}",
1835 style::warning(
1836 "Compose intent collapsed into a monolithic large-patch group; falling back to \
1837 path-based workstream splits."
1838 )
1839 );
1840 groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1841 }
1842 let dependency_order =
1843 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1844
1845 Ok(ComposeIntentPlan { groups, dependency_order })
1846}
1847
1848#[tracing::instrument(target = "lgit", name = "compose.should_collect_observations", skip_all, fields(file_count = snapshot.files.len()))]
1849fn should_collect_compose_observations(
1850 snapshot: &ComposeSnapshot,
1851 config: &CommitConfig,
1852 counter: &TokenCounter,
1853) -> bool {
1854 planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1855 && should_use_map_reduce(&snapshot.diff, config, counter)
1856}
1857
1858#[tracing::instrument(target = "lgit", name = "compose.auto_assign_hunks", skip_all, fields(group_count = intent_plan.groups.len()))]
1859fn auto_assign_hunks(
1860 snapshot: &ComposeSnapshot,
1861 intent_plan: &ComposeIntentPlan,
1862) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1863 let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1864 for group in &intent_plan.groups {
1865 for file_id in &group.file_ids {
1866 groups_by_file
1867 .entry(file_id.as_str())
1868 .or_default()
1869 .push(group.group_id.as_str());
1870 }
1871 }
1872
1873 let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1874 .groups
1875 .iter()
1876 .map(|group| (group.group_id.clone(), BTreeSet::new()))
1877 .collect();
1878 let mut ambiguous = Vec::new();
1879
1880 for file in &snapshot.files {
1881 let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1882 return Err(CommitGenError::Other(format!(
1883 "No compose group claimed file {} ({})",
1884 file.file_id, file.path
1885 )));
1886 };
1887
1888 if candidate_group_ids.len() == 1 {
1889 let group_id = candidate_group_ids[0];
1890 let entry = assigned
1891 .get_mut(group_id)
1892 .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1893 for hunk_id in &file.hunk_ids {
1894 entry.insert(hunk_id.clone());
1895 }
1896 } else {
1897 ambiguous.push(AmbiguousFileBinding {
1898 file_id: file.file_id.clone(),
1899 path: file.path.clone(),
1900 candidate_group_ids: candidate_group_ids
1901 .iter()
1902 .map(|group_id| (*group_id).to_string())
1903 .collect(),
1904 hunk_ids: file.hunk_ids.clone(),
1905 });
1906 }
1907 }
1908
1909 Ok((assigned, ambiguous))
1910}
1911
1912fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1913 let mut out = String::new();
1914 for group in groups {
1915 let scope = group
1916 .scope
1917 .as_ref()
1918 .map(|scope| format!("({})", scope.as_str()))
1919 .unwrap_or_default();
1920 writeln!(
1921 out,
1922 "- {} [{}{}] {}",
1923 group.group_id,
1924 group.commit_type.as_str(),
1925 scope,
1926 group.rationale
1927 )
1928 .unwrap();
1929 }
1930
1931 out
1932}
1933
1934fn render_binding_ambiguous_files(
1935 snapshot: &ComposeSnapshot,
1936 ambiguous_files: &[AmbiguousFileBinding],
1937) -> String {
1938 let mut out = String::new();
1939 for ambiguous_file in ambiguous_files {
1940 writeln!(
1941 out,
1942 "- {} {} candidates: {}",
1943 ambiguous_file.file_id,
1944 ambiguous_file.path,
1945 ambiguous_file.candidate_group_ids.join(", ")
1946 )
1947 .unwrap();
1948
1949 for hunk_id in &ambiguous_file.hunk_ids {
1950 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1951 if hunk.synthetic {
1952 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1953 } else {
1954 writeln!(
1955 out,
1956 " - {} old:{} new:{} :: {}",
1957 hunk.hunk_id,
1958 format_line_range(hunk.old_start, hunk.old_count),
1959 format_line_range(hunk.new_start, hunk.new_count),
1960 hunk.snippet
1961 )
1962 .unwrap();
1963 }
1964 }
1965 }
1966 }
1967
1968 out
1969}
1970
1971async fn request_binding(
1972 snapshot: &ComposeSnapshot,
1973 groups: &[ComposeIntentGroup],
1974 ambiguous_files: &[AmbiguousFileBinding],
1975 config: &CommitConfig,
1976 debug_dir: Option<&Path>,
1977 debug_name: &str,
1978) -> Result<Vec<ComposeBindingAssignment>> {
1979 let schema = build_binding_schema();
1980 let groups_text = render_binding_groups(groups);
1981 let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1982 let variant = if config.markdown_output {
1983 "markdown"
1984 } else {
1985 "default"
1986 };
1987 let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1988 variant,
1989 groups: &groups_text,
1990 ambiguous_files: &ambiguous_files_text,
1991 })?;
1992 let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1993 operation: "compose/bind",
1994 model: &config.analysis_model,
1995 prompt_family: "compose-bind",
1996 prompt_variant: variant,
1997 system_prompt: &parts.system,
1998 user_prompt: &parts.user,
1999 tool_name: "bind_compose_hunks",
2000 tool_description: "Assign hunk IDs to existing compose groups",
2001 schema: &schema,
2002 progress_label: Some("compose hunk binder"),
2003 debug: debug_dir.map(|dir| OneShotDebug {
2004 dir: Some(dir),
2005 prefix: None,
2006 name: debug_name,
2007 }),
2008 cacheable: true,
2009 })
2010 .await?;
2011
2012 Ok(response.output.assignments)
2013}
2014
2015fn ambiguous_hunk_context(
2016 ambiguous_files: &[AmbiguousFileBinding],
2017) -> HashMap<String, AmbiguousHunkContext> {
2018 let mut context = HashMap::new();
2019 for ambiguous_file in ambiguous_files {
2020 for hunk_id in &ambiguous_file.hunk_ids {
2021 context.insert(hunk_id.clone(), AmbiguousHunkContext {
2022 candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
2023 });
2024 }
2025 }
2026 context
2027}
2028
2029fn evaluate_binding(
2030 assignments: &[ComposeBindingAssignment],
2031 hunk_context: &HashMap<String, AmbiguousHunkContext>,
2032 valid_group_ids: &HashSet<&str>,
2033 snapshot: &ComposeSnapshot,
2034) -> BindingEvaluation {
2035 let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
2036
2037 for assignment in assignments {
2038 if !valid_group_ids.contains(assignment.group_id.as_str()) {
2039 continue;
2040 }
2041
2042 let mut seen_in_group = HashSet::new();
2043 for hunk_id in &assignment.hunk_ids {
2044 if !seen_in_group.insert(hunk_id.as_str()) {
2045 continue;
2046 }
2047
2048 let Some(context) = hunk_context.get(hunk_id) else {
2049 continue;
2050 };
2051
2052 if !context
2053 .candidate_group_ids
2054 .iter()
2055 .any(|candidate| candidate == &assignment.group_id)
2056 {
2057 continue;
2058 }
2059
2060 match assigned_hunk_to_group.get(hunk_id) {
2061 None => {
2062 assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
2063 },
2064 Some(existing_group) if existing_group == &assignment.group_id => {},
2065 Some(_) => {
2066 assigned_hunk_to_group.remove(hunk_id);
2067 },
2068 }
2069 }
2070 }
2071
2072 let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2073 for (hunk_id, group_id) in assigned_hunk_to_group {
2074 assigned_by_group.entry(group_id).or_default().push(hunk_id);
2075 }
2076
2077 for hunk_ids in assigned_by_group.values_mut() {
2078 let ordered: Vec<String> = snapshot
2079 .hunks
2080 .iter()
2081 .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2082 .map(|hunk| hunk.hunk_id.clone())
2083 .collect();
2084 *hunk_ids = ordered;
2085 }
2086
2087 let unresolved = snapshot
2088 .hunks
2089 .iter()
2090 .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2091 .filter(|hunk| {
2092 !assigned_by_group.values().any(|assigned_hunks| {
2093 assigned_hunks
2094 .iter()
2095 .any(|assigned| assigned == &hunk.hunk_id)
2096 })
2097 })
2098 .map(|hunk| hunk.hunk_id.clone())
2099 .collect();
2100
2101 BindingEvaluation { assigned: assigned_by_group, unresolved }
2102}
2103
2104fn filter_ambiguous_files(
2105 ambiguous_files: &[AmbiguousFileBinding],
2106 hunk_ids: &[String],
2107) -> Vec<AmbiguousFileBinding> {
2108 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2109
2110 ambiguous_files
2111 .iter()
2112 .filter_map(|file| {
2113 let matching_hunks: Vec<String> = file
2114 .hunk_ids
2115 .iter()
2116 .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2117 .cloned()
2118 .collect();
2119
2120 (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2121 file_id: file.file_id.clone(),
2122 path: file.path.clone(),
2123 candidate_group_ids: file.candidate_group_ids.clone(),
2124 hunk_ids: matching_hunks,
2125 })
2126 })
2127 .collect()
2128}
2129
2130fn chunk_ambiguous_files(
2131 ambiguous_files: &[AmbiguousFileBinding],
2132) -> Vec<Vec<AmbiguousFileBinding>> {
2133 if ambiguous_files.is_empty() {
2134 return Vec::new();
2135 }
2136
2137 let mut batches = Vec::new();
2138 let mut current_batch = Vec::new();
2139 let mut current_hunk_count = 0_usize;
2140
2141 for file in ambiguous_files {
2142 let file_hunk_count = file.hunk_ids.len();
2143 let should_split = !current_batch.is_empty()
2144 && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2145 || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2146
2147 if should_split {
2148 batches.push(current_batch);
2149 current_batch = Vec::new();
2150 current_hunk_count = 0;
2151 }
2152
2153 current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2154 current_batch.push(file.clone());
2155 }
2156
2157 if !current_batch.is_empty() {
2158 batches.push(current_batch);
2159 }
2160
2161 batches
2162}
2163
2164fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2165 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2166
2167 snapshot
2168 .hunks
2169 .iter()
2170 .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2171 .map(|hunk| hunk.hunk_id.clone())
2172 .collect()
2173}
2174
2175fn fallback_group_for_hunk(
2176 hunk_id: &str,
2177 ambiguous_files: &[AmbiguousFileBinding],
2178 group_rank: &HashMap<&str, usize>,
2179) -> Option<String> {
2180 ambiguous_files.iter().find_map(|file| {
2181 file
2182 .hunk_ids
2183 .iter()
2184 .any(|candidate| candidate == hunk_id)
2185 .then(|| {
2186 file
2187 .candidate_group_ids
2188 .iter()
2189 .min_by_key(|group_id| {
2190 group_rank
2191 .get(group_id.as_str())
2192 .copied()
2193 .unwrap_or(usize::MAX)
2194 })
2195 .cloned()
2196 })
2197 })?
2198}
2199
2200fn assign_unresolved_hunks(
2201 unresolved_hunks: &[String],
2202 assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2203 ambiguous_files: &[AmbiguousFileBinding],
2204 group_rank: &HashMap<&str, usize>,
2205) {
2206 for hunk_id in unresolved_hunks {
2207 if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2208 && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2209 {
2210 group_hunks.insert(hunk_id.clone());
2211 }
2212 }
2213}
2214
2215fn normalize_group_type(
2216 snapshot: &ComposeSnapshot,
2217 file_ids: &[String],
2218 original_type: &CommitType,
2219) -> Result<CommitType> {
2220 let dependency_only = !file_ids.is_empty()
2221 && file_ids.iter().all(|file_id| {
2222 snapshot
2223 .file_by_id(file_id)
2224 .is_some_and(|file| is_dependency_manifest(&file.path))
2225 });
2226
2227 if dependency_only && original_type.as_str() != "build" {
2228 CommitType::new("build")
2229 } else {
2230 Ok(original_type.clone())
2231 }
2232}
2233
2234fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2235 snapshot
2236 .files
2237 .iter()
2238 .filter(|file| {
2239 hunk_ids
2240 .iter()
2241 .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2242 })
2243 .map(|file| file.file_id.clone())
2244 .collect()
2245}
2246
2247fn build_redirects(
2248 intent_plan: &ComposeIntentPlan,
2249 executable_groups: &[ComposeExecutableGroup],
2250 group_rank: &HashMap<&str, usize>,
2251) -> HashMap<String, String> {
2252 let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2253 .iter()
2254 .filter(|group| !group.hunk_ids.is_empty())
2255 .map(|group| (group.group_id.as_str(), group))
2256 .collect();
2257
2258 let mut redirects = HashMap::new();
2259 for group in &intent_plan.groups {
2260 if surviving_groups.contains_key(group.group_id.as_str()) {
2261 continue;
2262 }
2263
2264 let redirect = executable_groups
2265 .iter()
2266 .filter(|candidate| candidate.group_id != group.group_id)
2267 .filter(|candidate| {
2268 candidate.file_ids.iter().any(|file_id| {
2269 group
2270 .file_ids
2271 .iter()
2272 .any(|candidate_id| candidate_id == file_id)
2273 })
2274 })
2275 .min_by_key(|candidate| {
2276 group_rank
2277 .get(candidate.group_id.as_str())
2278 .copied()
2279 .unwrap_or(usize::MAX)
2280 })
2281 .map(|candidate| candidate.group_id.clone());
2282
2283 if let Some(redirect) = redirect {
2284 redirects.insert(group.group_id.clone(), redirect);
2285 }
2286 }
2287
2288 redirects
2289}
2290
2291fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2292 let mut current = group_id.to_string();
2293 let mut seen = HashSet::new();
2294
2295 while let Some(next) = redirects.get(¤t) {
2296 if !seen.insert(current.clone()) {
2297 break;
2298 }
2299 current.clone_from(next);
2300 }
2301
2302 current
2303}
2304
2305fn prune_empty_groups(
2306 groups: Vec<ComposeExecutableGroup>,
2307 redirects: &HashMap<String, String>,
2308) -> Result<ComposeExecutablePlan> {
2309 let surviving_ids: HashSet<String> = groups
2310 .iter()
2311 .filter(|group| !group.hunk_ids.is_empty())
2312 .map(|group| group.group_id.clone())
2313 .collect();
2314
2315 let mut surviving_groups = Vec::new();
2316 for mut group in groups {
2317 if group.hunk_ids.is_empty() {
2318 continue;
2319 }
2320
2321 let mut rewritten_dependencies = Vec::new();
2322 for dependency in &group.dependencies {
2323 let rewritten = resolve_redirect(dependency, redirects);
2324 if rewritten != group.group_id
2325 && surviving_ids.contains(&rewritten)
2326 && !rewritten_dependencies
2327 .iter()
2328 .any(|existing| existing == &rewritten)
2329 {
2330 rewritten_dependencies.push(rewritten);
2331 }
2332 }
2333
2334 group.dependencies = rewritten_dependencies;
2335 surviving_groups.push(group);
2336 }
2337
2338 let dependency_order = compute_dependency_order(
2339 &surviving_groups,
2340 |group| &group.group_id,
2341 |group| &group.dependencies,
2342 )?;
2343 Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2344}
2345
2346fn finalize_executable_plan(
2347 snapshot: &ComposeSnapshot,
2348 intent_plan: &ComposeIntentPlan,
2349 assigned_by_group: HashMap<String, BTreeSet<String>>,
2350) -> Result<ComposeExecutablePlan> {
2351 let group_rank: HashMap<&str, usize> = intent_plan
2352 .dependency_order
2353 .iter()
2354 .enumerate()
2355 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2356 .collect();
2357
2358 let mut executable_groups = Vec::new();
2359 for group in &intent_plan.groups {
2360 let hunk_ids: Vec<String> = snapshot
2361 .hunks
2362 .iter()
2363 .filter(|hunk| {
2364 assigned_by_group
2365 .get(&group.group_id)
2366 .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2367 })
2368 .map(|hunk| hunk.hunk_id.clone())
2369 .collect();
2370
2371 let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2372 let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2373 executable_groups.push(ComposeExecutableGroup {
2374 group_id: group.group_id.clone(),
2375 commit_type,
2376 scope: group.scope.clone(),
2377 file_ids,
2378 rationale: group.rationale.clone(),
2379 dependencies: group.dependencies.clone(),
2380 hunk_ids,
2381 });
2382 }
2383
2384 let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2385 prune_empty_groups(executable_groups, &redirects)
2386}
2387
2388fn validate_executable_plan(
2389 snapshot: &ComposeSnapshot,
2390 plan: &ComposeExecutablePlan,
2391) -> Result<()> {
2392 if plan.groups.is_empty() {
2393 return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2394 }
2395
2396 let known_hunks: HashSet<&str> = snapshot
2397 .hunks
2398 .iter()
2399 .map(|hunk| hunk.hunk_id.as_str())
2400 .collect();
2401 let known_files: HashSet<&str> = snapshot
2402 .files
2403 .iter()
2404 .map(|file| file.file_id.as_str())
2405 .collect();
2406 let mut coverage = HashMap::<String, String>::new();
2407
2408 for group in &plan.groups {
2409 if group.hunk_ids.is_empty() {
2410 return Err(CommitGenError::Other(format!(
2411 "Compose group {} ended up empty after binding",
2412 group.group_id
2413 )));
2414 }
2415
2416 for file_id in &group.file_ids {
2417 if !known_files.contains(file_id.as_str()) {
2418 return Err(CommitGenError::Other(format!(
2419 "Compose group {} references unknown file_id {}",
2420 group.group_id, file_id
2421 )));
2422 }
2423 }
2424
2425 for hunk_id in &group.hunk_ids {
2426 if !known_hunks.contains(hunk_id.as_str()) {
2427 return Err(CommitGenError::Other(format!(
2428 "Compose group {} references unknown hunk_id {}",
2429 group.group_id, hunk_id
2430 )));
2431 }
2432
2433 if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2434 return Err(CommitGenError::Other(format!(
2435 "Hunk {} was assigned to both {} and {}",
2436 hunk_id, existing_group, group.group_id
2437 )));
2438 }
2439 }
2440 }
2441
2442 let missing_hunks: Vec<String> = snapshot
2443 .hunks
2444 .iter()
2445 .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2446 .map(|hunk| hunk.hunk_id.clone())
2447 .collect();
2448 if !missing_hunks.is_empty() {
2449 return Err(CommitGenError::Other(format!(
2450 "Compose plan left hunks unassigned: {}",
2451 missing_hunks.join(", ")
2452 )));
2453 }
2454
2455 let dependency_order =
2456 compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2457 if dependency_order != plan.dependency_order {
2458 return Err(CommitGenError::Other(
2459 "Compose dependency order does not match recomputed order".to_string(),
2460 ));
2461 }
2462
2463 Ok(())
2464}
2465
2466#[tracing::instrument(target = "lgit", name = "compose.bind_plan", skip_all, fields(file_count = snapshot.files.len(), group_count = intent_plan.groups.len()))]
2467async fn bind_compose_plan(
2468 snapshot: &ComposeSnapshot,
2469 intent_plan: &ComposeIntentPlan,
2470 config: &CommitConfig,
2471 debug_dir: Option<&Path>,
2472) -> Result<ComposeExecutablePlan> {
2473 let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2474
2475 if !ambiguous_files.is_empty() {
2476 let valid_group_ids: HashSet<&str> = intent_plan
2477 .groups
2478 .iter()
2479 .map(|group| group.group_id.as_str())
2480 .collect();
2481 let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2482 let mut unresolved = Vec::new();
2483
2484 for (batch_idx, batch) in binding_batches.iter().enumerate() {
2485 let hunk_context = ambiguous_hunk_context(batch);
2486 let debug_name = if binding_batches.len() == 1 {
2487 "compose_bind".to_string()
2488 } else {
2489 format!("compose_bind_{:02}", batch_idx + 1)
2490 };
2491 let assignments =
2492 request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2493 .await?;
2494 let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2495 for (group_id, hunk_ids) in evaluation.assigned {
2496 let entry = assigned_by_group.entry(group_id).or_default();
2497 for hunk_id in hunk_ids {
2498 entry.insert(hunk_id);
2499 }
2500 }
2501 unresolved.extend(evaluation.unresolved);
2502 }
2503
2504 let group_rank: HashMap<&str, usize> = intent_plan
2505 .dependency_order
2506 .iter()
2507 .enumerate()
2508 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2509 .collect();
2510
2511 let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2512 if !unresolved.is_empty() {
2513 let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2514 let repair_batches = chunk_ambiguous_files(&unresolved_files);
2515 let mut repair_unresolved = Vec::new();
2516
2517 for (batch_idx, batch) in repair_batches.iter().enumerate() {
2518 let debug_name = if repair_batches.len() == 1 {
2519 "compose_bind_repair".to_string()
2520 } else {
2521 format!("compose_bind_repair_{:02}", batch_idx + 1)
2522 };
2523 let repair_assignments = request_binding(
2524 snapshot,
2525 &intent_plan.groups,
2526 batch,
2527 config,
2528 debug_dir,
2529 &debug_name,
2530 )
2531 .await?;
2532 let repair_context = ambiguous_hunk_context(batch);
2533 let repair =
2534 evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2535 for (group_id, hunk_ids) in repair.assigned {
2536 let entry = assigned_by_group.entry(group_id).or_default();
2537 for hunk_id in hunk_ids {
2538 entry.insert(hunk_id);
2539 }
2540 }
2541
2542 repair_unresolved.extend(repair.unresolved);
2543 }
2544 unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2545
2546 if !unresolved.is_empty() {
2547 assign_unresolved_hunks(
2548 &unresolved,
2549 &mut assigned_by_group,
2550 &ambiguous_files,
2551 &group_rank,
2552 );
2553 }
2554 }
2555 }
2556
2557 let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2558 validate_executable_plan(snapshot, &plan)?;
2559 Ok(plan)
2560}
2561
2562fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2563 println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2564 for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2565 let group = &plan.groups[group_idx];
2566 let scope = group
2567 .scope
2568 .as_ref()
2569 .map(|scope| format!("({})", style::scope(scope.as_str())))
2570 .unwrap_or_default();
2571
2572 println!(
2573 "\n{}. {} [{}{}] {}",
2574 display_idx + 1,
2575 style::bold(&group.group_id),
2576 style::commit_type(group.commit_type.as_str()),
2577 scope,
2578 group.rationale
2579 );
2580
2581 println!(" Files:");
2582 for file_id in &group.file_ids {
2583 if let Some(file) = snapshot.file_by_id(file_id) {
2584 let selected_hunk_ids: Vec<&str> = group
2585 .hunk_ids
2586 .iter()
2587 .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2588 .map(String::as_str)
2589 .collect();
2590 let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2591 "all hunks".to_string()
2592 } else {
2593 selected_hunk_ids.join(", ")
2594 };
2595 println!(" - {} {} ({selection})", file.file_id, file.path);
2596 }
2597 }
2598
2599 if !group.dependencies.is_empty() {
2600 println!(" Depends on: {}", group.dependencies.join(", "));
2601 }
2602 }
2603}
2604
2605#[tracing::instrument(target = "lgit", name = "compose.generate_group_analysis", skip_all, fields(group_id = %group.group_id, diff_bytes = diff.len(), stat_bytes = stat.len()))]
2606async fn generate_compose_group_analysis(
2607 stat: &str,
2608 diff: &str,
2609 group: &ComposeExecutableGroup,
2610 config: &CommitConfig,
2611 args: &Args,
2612 debug_prefix: &str,
2613 counter: &TokenCounter,
2614) -> Result<ConventionalAnalysis> {
2615 match compose_analysis_strategy(diff, config, counter) {
2616 ComposeAnalysisStrategy::MapReduce => {
2617 println!(
2618 " {}",
2619 style::info(&format!(
2620 "Using map-reduce for {} commit analysis (diff exceeds token budget)",
2621 group.group_id
2622 ))
2623 );
2624 run_map_reduce(diff, stat, "", &config.analysis_model, config, counter).await
2625 },
2626 strategy => {
2627 let analysis_diff = if strategy == ComposeAnalysisStrategy::SmartTruncate {
2628 eprintln!(
2629 " {}",
2630 style::warning(&format!(
2631 "Truncating diff for {} commit analysis (diff exceeds configured budget)",
2632 group.group_id
2633 ))
2634 );
2635 Cow::Owned(smart_truncate_diff(
2636 diff,
2637 compose_truncation_length(config),
2638 config,
2639 counter,
2640 ))
2641 } else {
2642 Cow::Borrowed(diff)
2643 };
2644
2645 let ctx = AnalysisContext {
2646 user_context: Some(&group.rationale),
2647 recent_commits: None,
2648 common_scopes: None,
2649 project_context: None,
2650 debug_output: args.debug_output.as_deref(),
2651 debug_prefix: Some(debug_prefix),
2652 };
2653
2654 generate_conventional_analysis(
2655 stat,
2656 analysis_diff.as_ref(),
2657 &config.analysis_model,
2658 "",
2659 &ctx,
2660 config,
2661 )
2662 .await
2663 },
2664 }
2665}
2666
2667fn compose_group_file_list(snapshot: &ComposeSnapshot, group: &ComposeExecutableGroup) -> String {
2668 let files: Vec<&str> = group
2669 .file_ids
2670 .iter()
2671 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.as_str()))
2672 .collect();
2673
2674 if files.is_empty() {
2675 "no files resolved".to_string()
2676 } else {
2677 files.join(", ")
2678 }
2679}
2680
2681fn cumulative_file_hunk_ids(
2685 plan: &ComposeExecutablePlan,
2686 position: usize,
2687 snapshot: &ComposeSnapshot,
2688 file_id: &str,
2689) -> Vec<String> {
2690 let mut hunk_ids = Vec::new();
2691 for &group_idx in plan.dependency_order.iter().take(position + 1) {
2692 let Some(group) = plan.groups.get(group_idx) else {
2693 continue;
2694 };
2695 for hunk_id in &group.hunk_ids {
2696 if snapshot
2697 .hunk_by_id(hunk_id)
2698 .is_some_and(|hunk| hunk.file_id == file_id)
2699 {
2700 hunk_ids.push(hunk_id.clone());
2701 }
2702 }
2703 }
2704 hunk_ids
2705}
2706
2707#[tracing::instrument(target = "lgit", name = "compose.execute", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2708pub async fn execute_compose(
2709 snapshot: &ComposeSnapshot,
2710 plan: &ComposeExecutablePlan,
2711 config: &CommitConfig,
2712 args: &Args,
2713 base_state: &ComposeBaseState,
2714) -> Result<Vec<String>> {
2715 let total = plan.dependency_order.len();
2716
2717 let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2721 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2722 let group = &plan.groups[group_idx];
2723 println!(
2724 " {}",
2725 style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total))
2726 );
2727 let group_patch = create_executable_group_patch(snapshot, group)?;
2728 group_diff_stats.push((group_patch.diff, group_patch.stat));
2729 }
2730
2731 println!(
2735 "{}",
2736 style::info(&format!(
2737 "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2738 COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2739 ))
2740 );
2741
2742 let token_counter = create_token_counter(config);
2743 let prepared_messages: Vec<(Vec<String>, CommitSummary)> =
2744 stream::iter(plan.dependency_order.iter().enumerate())
2745 .map(|(idx, &group_idx)| {
2746 let group = &plan.groups[group_idx];
2747 let (diff, stat) = &group_diff_stats[idx];
2748 let debug_prefix = format!("compose-{}", idx + 1);
2749 let token_counter = &token_counter;
2750 async move {
2751 let result = async {
2752 let analysis = generate_compose_group_analysis(
2753 stat,
2754 diff,
2755 group,
2756 config,
2757 args,
2758 &debug_prefix,
2759 token_counter,
2760 )
2761 .await?;
2762 let body = analysis.body_texts();
2763 let summary = generate_summary_from_analysis(
2764 stat,
2765 group.commit_type.as_str(),
2766 group.scope.as_ref().map(|scope| scope.as_str()),
2767 &body,
2768 Some(&group.rationale),
2769 config,
2770 args.debug_output.as_deref(),
2771 Some(&debug_prefix),
2772 )
2773 .await?;
2774 Ok::<_, CommitGenError>((body, summary))
2775 }
2776 .await;
2777
2778 result.map_err(|source| CommitGenError::ComposeMessageError {
2779 group_id: group.group_id.clone(),
2780 files: compose_group_file_list(snapshot, group),
2781 source: Box::new(source),
2782 })
2783 }
2784 })
2785 .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2786 .collect::<Vec<_>>()
2787 .await
2788 .into_iter()
2789 .collect::<Result<Vec<_>>>()?;
2790
2791 execute_compose_with_prepared_messages(
2792 snapshot,
2793 plan,
2794 config,
2795 args,
2796 base_state,
2797 prepared_messages,
2798 )
2799}
2800
2801#[tracing::instrument(target = "lgit", name = "compose.execute_prepared_messages", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2802fn execute_compose_with_prepared_messages(
2803 snapshot: &ComposeSnapshot,
2804 plan: &ComposeExecutablePlan,
2805 config: &CommitConfig,
2806 args: &Args,
2807 base_state: &ComposeBaseState,
2808 prepared_messages: Vec<(Vec<String>, CommitSummary)>,
2809) -> Result<Vec<String>> {
2810 let dir = &args.dir;
2811 let total = plan.dependency_order.len();
2812 if args.compose_preview {
2813 return Ok(Vec::new());
2814 }
2815
2816 let index = TempGitIndex::new(dir)?;
2817 read_tree_into_index(index.path(), &base_state.head_hash, dir)?;
2818
2819 let mut commit_hashes = Vec::new();
2820 let mut parent_hash = base_state.head_hash.clone();
2821
2822 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2826 let group = &plan.groups[group_idx];
2827
2828 println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2829 println!(" Type: {}", style::commit_type(group.commit_type.as_str()));
2830 if let Some(scope) = &group.scope {
2831 println!(" Scope: {}", style::scope(scope.as_str()));
2832 }
2833 let paths: Vec<String> = group
2834 .file_ids
2835 .iter()
2836 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2837 .collect();
2838 println!(" Files: {}", paths.join(", "));
2839
2840 let outcome = stage_executable_group_in_index(snapshot, group, dir, index.path())?;
2841 let mut staged_anything = outcome.result == StageResult::Staged;
2842
2843 for skipped in &outcome.skipped {
2847 let Some(file) = snapshot.file_by_path(&skipped.path) else {
2848 continue;
2849 };
2850 let cumulative = cumulative_file_hunk_ids(plan, idx, snapshot, &file.file_id);
2851 force_stage_file_from_base_in_index(
2852 snapshot,
2853 &file.file_id,
2854 &cumulative,
2855 dir,
2856 index.path(),
2857 )?;
2858 staged_anything = true;
2859 eprintln!(
2860 " {}",
2861 style::info(&format!(
2862 "Re-staged {} from base via splice (whole-file apply not used for partial hunks)",
2863 skipped.path
2864 ))
2865 );
2866 }
2867
2868 if !staged_anything {
2869 eprintln!(
2870 " {}",
2871 style::warning(&format!(
2872 "Skipping commit {}: its planned patch is already applied ({:?})",
2873 group.group_id, outcome.result
2874 ))
2875 );
2876 continue;
2877 }
2878
2879 let (analysis_body, summary) = prepared_messages[idx].clone();
2880 let mut commit = ConventionalCommit {
2881 commit_type: group.commit_type.clone(),
2882 scope: group.scope.clone(),
2883 summary,
2884 body: analysis_body,
2885 footers: vec![],
2886 };
2887 post_process_commit_message(&mut commit, config);
2888
2889 if let Err(err) = validate_commit_message(&commit, config) {
2890 eprintln!(
2891 " {}",
2892 style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2893 );
2894 }
2895
2896 let mut formatted_message = format_commit_message(&commit);
2897 if args.signoff || config.signoff {
2898 formatted_message = append_signoff_trailer(&formatted_message, dir)?;
2899 }
2900 println!(
2901 " Message:\n{}",
2902 formatted_message
2903 .lines()
2904 .take(3)
2905 .collect::<Vec<_>>()
2906 .join("\n")
2907 );
2908
2909 let tree = write_index_tree(index.path(), dir)?;
2910 let sign = args.sign || config.gpg_sign;
2911 let hash = commit_tree(&tree, &[parent_hash.as_str()], &formatted_message, dir, sign)?;
2912 parent_hash.clone_from(&hash);
2913 commit_hashes.push(hash);
2914
2915 if args.compose_test_after_each {
2916 return Err(CommitGenError::Other(
2917 "--compose-test-after-each is incompatible with isolated compose execution".to_string(),
2918 ));
2919 }
2920 }
2921
2922 if commit_hashes.is_empty() {
2923 return Ok(commit_hashes);
2924 }
2925
2926 update_ref_checked(&base_state.head_ref, &parent_hash, &base_state.head_hash, dir)?;
2927
2928 let current_index_tree = write_real_index_tree(dir)?;
2929 if current_index_tree == base_state.index_tree {
2930 reset_mixed_to(&parent_hash, dir)?;
2931 } else {
2932 println!(
2936 "{}",
2937 style::warning("Index changed during compose; preserving newly staged changes")
2938 );
2939 let paths: Vec<String> = snapshot
2940 .files
2941 .iter()
2942 .map(|file| file.path.clone())
2943 .collect();
2944 reset_paths_to(&parent_hash, &paths, dir)?;
2945 }
2946
2947 Ok(commit_hashes)
2948}
2949
2950#[tracing::instrument(target = "lgit", name = "compose.run", skip_all, fields(dir = %args.dir, max_rounds = config.compose_max_rounds))]
2951pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2952 let max_rounds = config.compose_max_rounds;
2953
2954 for round in 1..=max_rounds {
2955 if round > 1 {
2956 println!(
2957 "\n{}",
2958 style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2959 );
2960 } else {
2961 println!("{}", style::section_header("Compose Mode", 80));
2962 }
2963 println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2964
2965 run_compose_round(args, config, round).await?;
2966
2967 if args.compose_preview {
2968 break;
2969 }
2970 match get_compose_diff_with_config(&args.dir, config) {
2971 Err(CommitGenError::NoChanges { .. }) => {
2972 println!(
2973 "\n{}",
2974 style::success(&format!(
2975 "{} All changes committed successfully",
2976 style::icons::SUCCESS
2977 ))
2978 );
2979 break;
2980 },
2981 Err(err) => return Err(err),
2982 Ok(remaining_diff) => {
2983 eprintln!(
2984 "\n{}",
2985 style::warning(&format!(
2986 "{} Uncommitted changes remain after round {round}",
2987 style::icons::WARNING
2988 ))
2989 );
2990 eprintln!("{remaining_diff}");
2991 },
2992 }
2993
2994 if round < max_rounds {
2995 eprintln!("{}", style::info("Starting another compose round..."));
2996 } else {
2997 eprintln!(
2998 "{}",
2999 style::warning(&format!(
3000 "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
3001 ))
3002 );
3003 }
3004 }
3005
3006 Ok(())
3007}
3008
3009#[tracing::instrument(target = "lgit", name = "compose.round", skip_all, fields(dir = %args.dir, round))]
3010async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
3011 let base_state = capture_compose_base_state(&args.dir)?;
3012 let diff = get_compose_diff_with_config(&args.dir, config)?;
3013 let stat = get_compose_stat(&args.dir)?;
3014 let mut snapshot = build_compose_snapshot(&diff, &stat)?;
3015 pin_snapshot_worktree_state(&mut snapshot, &args.dir)?;
3019 let snapshot = snapshot;
3020
3021 if let Some(debug_dir) = args.debug_output.as_deref() {
3022 save_debug_artifact(
3023 Some(debug_dir),
3024 &format!("compose_round_{round}_snapshot.json"),
3025 &snapshot,
3026 )?;
3027 }
3028
3029 let token_counter = create_token_counter(config);
3030 let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
3031 println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
3032 observe_diff_files(&snapshot.diff, &config.summary_model, config, &token_counter).await?
3033 } else {
3034 if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
3035 && should_use_map_reduce(&snapshot.diff, config, &token_counter)
3036 {
3037 println!(
3038 "{}",
3039 style::info(
3040 "Skipping per-file observations for very large compose snapshot; using area-level \
3041 planning instead."
3042 )
3043 );
3044 }
3045 Vec::new()
3046 };
3047
3048 if let Some(debug_dir) = args.debug_output.as_deref()
3049 && !observations.is_empty()
3050 {
3051 save_debug_artifact(
3052 Some(debug_dir),
3053 &format!("compose_round_{round}_observations.json"),
3054 &observations,
3055 )?;
3056 }
3057
3058 let max_commits = args.compose_max_commits.unwrap_or(20);
3059 let executable_plan = if let Some(cached_plan) =
3060 load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
3061 {
3062 println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
3063 cached_plan
3064 } else {
3065 println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
3066 let intent_plan = analyze_compose_intent(
3067 &snapshot,
3068 &observations,
3069 config,
3070 max_commits,
3071 args.debug_output.as_deref(),
3072 )
3073 .await?;
3074
3075 if let Some(debug_dir) = args.debug_output.as_deref() {
3076 save_debug_artifact(
3077 Some(debug_dir),
3078 &format!("compose_round_{round}_intent_plan.json"),
3079 &intent_plan,
3080 )?;
3081 }
3082
3083 println!("{}", style::info("Binding hunks to groups..."));
3084 let plan =
3085 bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
3086 save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
3087 plan
3088 };
3089
3090 if let Some(debug_dir) = args.debug_output.as_deref() {
3091 save_debug_artifact(
3092 Some(debug_dir),
3093 &format!("compose_round_{round}_executable_plan.json"),
3094 &executable_plan,
3095 )?;
3096 }
3097
3098 print_executable_plan(&snapshot, &executable_plan);
3099
3100 if args.compose_preview {
3101 println!(
3102 "\n{}",
3103 style::success(&format!(
3104 "{} Preview complete (use --compose without --compose-preview to execute)",
3105 style::icons::SUCCESS
3106 ))
3107 );
3108 return Ok(());
3109 }
3110
3111 println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
3112 let hashes = execute_compose(&snapshot, &executable_plan, config, args, &base_state).await?;
3113 println!(
3114 "{}",
3115 style::success(&format!(
3116 "{} Round {round}: Created {} commit(s)",
3117 style::icons::SUCCESS,
3118 hashes.len()
3119 ))
3120 );
3121 Ok(())
3122}
3123
3124#[cfg(test)]
3125mod tests {
3126 use std::{fmt::Write, fs};
3127
3128 use tempfile::TempDir;
3129
3130 use super::*;
3131 use crate::{
3132 config::CommitConfig, git::get_compose_diff, patch::build_compose_snapshot, types::CommitType,
3133 };
3134
3135 fn shared_file_diff() -> (&'static str, &'static str) {
3136 (
3137 r#"diff --git a/src/lib.rs b/src/lib.rs
3138index 1111111..2222222 100644
3139--- a/src/lib.rs
3140+++ b/src/lib.rs
3141@@ -1,3 +1,3 @@
3142-fn alpha() {
3143+fn alpha_changed() {
3144 println!("alpha");
3145 }
3146@@ -12,3 +12,3 @@
3147-fn beta() {
3148+fn beta_changed() {
3149 println!("beta");
3150 }
3151diff --git a/tests/lib.rs b/tests/lib.rs
3152index 3333333..4444444 100644
3153--- a/tests/lib.rs
3154+++ b/tests/lib.rs
3155@@ -1,3 +1,4 @@
3156 fn test_it() {
3157+ assert!(true);
3158 }
3159"#,
3160 " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
3161 )
3162 }
3163
3164 fn build_test_snapshot() -> ComposeSnapshot {
3165 let (diff, stat) = shared_file_diff();
3166 build_compose_snapshot(diff, stat).unwrap()
3167 }
3168
3169 fn write_file(dir: &TempDir, path: &str, contents: &str) {
3170 let full_path = dir.path().join(path);
3171 if let Some(parent) = full_path.parent() {
3172 fs::create_dir_all(parent).unwrap();
3173 }
3174 fs::write(full_path, contents).unwrap();
3175 }
3176
3177 fn run_git(dir: &TempDir, args: &[&str]) -> String {
3178 let output = crate::git::git_command()
3179 .args(args)
3180 .current_dir(dir.path())
3181 .output()
3182 .unwrap_or_else(|err| panic!("git {args:?} failed to spawn: {err}"));
3183
3184 assert!(
3185 output.status.success(),
3186 "git {:?} failed: stdout={} stderr={}",
3187 args,
3188 String::from_utf8_lossy(&output.stdout),
3189 String::from_utf8_lossy(&output.stderr)
3190 );
3191
3192 String::from_utf8_lossy(&output.stdout).to_string()
3193 }
3194
3195 fn init_repo() -> TempDir {
3196 let dir = TempDir::new().unwrap();
3197 run_git(&dir, &["init"]);
3198 run_git(&dir, &["config", "user.name", "Compose Test"]);
3199 run_git(&dir, &["config", "user.email", "compose@test.local"]);
3200 run_git(&dir, &["config", "commit.gpgsign", "false"]);
3201 dir
3202 }
3203
3204 fn commit_all(dir: &TempDir, message: &str) {
3205 run_git(dir, &["add", "."]);
3206 run_git(dir, &["commit", "-m", message]);
3207 }
3208
3209 fn canned_message(summary: &str) -> (Vec<String>, CommitSummary) {
3210 (vec![], CommitSummary::new_unchecked(summary, 128).unwrap())
3211 }
3212
3213 #[test]
3214 fn test_compose_file_category_treats_prompts_as_functional_source() {
3215 let diff = r"diff --git a/prompts/analysis/default.md b/prompts/analysis/default.md
3216index 1111111..2222222 100644
3217--- a/prompts/analysis/default.md
3218+++ b/prompts/analysis/default.md
3219@@ -1,1 +1,1 @@
3220-old prompt
3221+new prompt
3222diff --git a/system/analysis/default.md b/system/analysis/default.md
3223index 5555555..6666666 100644
3224--- a/system/analysis/default.md
3225+++ b/system/analysis/default.md
3226@@ -1,1 +1,1 @@
3227-old system
3228+new system
3229diff --git a/README.md b/README.md
3230index 3333333..4444444 100644
3231--- a/README.md
3232+++ b/README.md
3233@@ -1,1 +1,1 @@
3234-old docs
3235+new docs
3236";
3237 let snapshot = build_compose_snapshot(diff, "").unwrap();
3238 let prompt_file = snapshot
3239 .file_by_path("prompts/analysis/default.md")
3240 .unwrap();
3241 let system_file = snapshot.file_by_path("system/analysis/default.md").unwrap();
3242 let readme_file = snapshot.file_by_path("README.md").unwrap();
3243
3244 assert_eq!(compose_file_category(prompt_file), ComposeFileCategory::Prompt);
3245 assert_eq!(compose_file_category(system_file), ComposeFileCategory::Prompt);
3246 assert_eq!(compose_file_category(readme_file), ComposeFileCategory::Docs);
3247
3248 let feat_group = ComposeIntentGroup {
3249 group_id: "G1".to_string(),
3250 commit_type: CommitType::new("feat").unwrap(),
3251 scope: None,
3252 file_ids: vec![prompt_file.file_id.clone()],
3253 rationale: "prompt behavior change".to_string(),
3254 dependencies: vec![],
3255 };
3256 assert_eq!(group_type_bonus(prompt_file, &feat_group), 10);
3257
3258 let fallback_type =
3259 fallback_commit_type_for_group(&snapshot, &[], std::slice::from_ref(&prompt_file.file_id))
3260 .unwrap();
3261 assert_eq!(fallback_type.as_str(), "refactor");
3262 }
3263
3264 fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
3265 let mut diff = String::new();
3266
3267 for file_idx in 0..file_count {
3268 let path = format!("src/module_{file_idx:03}.rs");
3269 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3270 diff.push_str("index 1111111..2222222 100644\n");
3271 writeln!(diff, "--- a/{path}").unwrap();
3272 writeln!(diff, "+++ b/{path}").unwrap();
3273
3274 for hunk_idx in 0..hunks_per_file {
3275 let line_no = (hunk_idx * 4) + 1;
3276 writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
3277 writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
3278 writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
3279 }
3280 }
3281
3282 build_compose_snapshot(&diff, "").unwrap()
3283 }
3284
3285 fn build_multi_area_snapshot() -> ComposeSnapshot {
3286 let mut diff = String::new();
3287 let areas = [
3288 ("apps/frontend/src/server", 72),
3289 ("packages/model/src/models", 54),
3290 ("apps/daemon/src/worker", 43),
3291 (".github/workflows", 16),
3292 ];
3293
3294 for (prefix, count) in areas {
3295 for file_idx in 0..count {
3296 let path = format!("{prefix}/file_{file_idx:03}.rs");
3297 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3298 diff.push_str("index 1111111..2222222 100644\n");
3299 writeln!(diff, "--- a/{path}").unwrap();
3300 writeln!(diff, "+++ b/{path}").unwrap();
3301 diff.push_str("@@ -1,1 +1,1 @@\n");
3302 writeln!(diff, "-old_{file_idx}").unwrap();
3303 writeln!(diff, "+new_{file_idx}").unwrap();
3304 }
3305 }
3306
3307 build_compose_snapshot(&diff, "").unwrap()
3308 }
3309
3310 fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
3311 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3312 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3313 let groups = vec![
3314 ComposeIntentGroup {
3315 group_id: "G1".to_string(),
3316 commit_type: CommitType::new("refactor").unwrap(),
3317 scope: None,
3318 file_ids: vec![source_file.file_id.clone(), test_file.file_id.clone()],
3319 rationale: "implementation group".to_string(),
3320 dependencies: vec![],
3321 },
3322 ComposeIntentGroup {
3323 group_id: "G2".to_string(),
3324 commit_type: CommitType::new("refactor").unwrap(),
3325 scope: None,
3326 file_ids: vec![source_file.file_id.clone()],
3327 rationale: "shared file follow-up".to_string(),
3328 dependencies: vec!["G1".to_string()],
3329 },
3330 ];
3331 let dependency_order =
3332 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
3333 .unwrap();
3334 ComposeIntentPlan { groups, dependency_order }
3335 }
3336
3337 #[test]
3338 fn test_execute_compose_with_temp_index_applies_two_group_plan() {
3339 let dir = init_repo();
3340 write_file(&dir, "src/a.rs", "fn a() {}\n");
3341 write_file(&dir, "src/b.rs", "fn b() {}\n");
3342 commit_all(&dir, "initial");
3343 write_file(&dir, "src/a.rs", "fn a_changed() {}\n");
3344 write_file(&dir, "src/b.rs", "fn b_changed() {}\n");
3345
3346 let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3347 let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3348 let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3349 let a_file = snapshot.file_by_path("src/a.rs").unwrap();
3350 let b_file = snapshot.file_by_path("src/b.rs").unwrap();
3351 let plan = ComposeExecutablePlan {
3352 groups: vec![
3353 ComposeExecutableGroup {
3354 group_id: "G1".to_string(),
3355 commit_type: CommitType::new("refactor").unwrap(),
3356 scope: None,
3357 file_ids: vec![a_file.file_id.clone()],
3358 rationale: "change a".to_string(),
3359 dependencies: vec![],
3360 hunk_ids: a_file.hunk_ids.clone(),
3361 },
3362 ComposeExecutableGroup {
3363 group_id: "G2".to_string(),
3364 commit_type: CommitType::new("refactor").unwrap(),
3365 scope: None,
3366 file_ids: vec![b_file.file_id.clone()],
3367 rationale: "change b".to_string(),
3368 dependencies: vec!["G1".to_string()],
3369 hunk_ids: b_file.hunk_ids.clone(),
3370 },
3371 ],
3372 dependency_order: vec![0, 1],
3373 };
3374 let config = CommitConfig::default();
3375 let args = Args {
3376 dir: dir.path().to_string_lossy().to_string(),
3377 compose: true,
3378 ..Default::default()
3379 };
3380 let base_state = capture_compose_base_state(&args.dir).unwrap();
3381
3382 let hashes = execute_compose_with_prepared_messages(
3383 &snapshot,
3384 &plan,
3385 &config,
3386 &args,
3387 &base_state,
3388 vec![canned_message("change a"), canned_message("change b")],
3389 )
3390 .unwrap();
3391
3392 assert_eq!(hashes.len(), 2);
3393 assert_eq!(get_head_hash(&args.dir).unwrap(), hashes[1]);
3394 assert!(run_git(&dir, &["diff", "--cached"]).trim().is_empty());
3395 }
3396
3397 #[test]
3398 fn test_execute_compose_failure_before_update_ref_preserves_real_index() {
3399 let dir = init_repo();
3400 write_file(&dir, "src/lib.rs", "old\n");
3401 write_file(&dir, "sentinel.txt", "base\n");
3402 commit_all(&dir, "initial");
3403 let initial_head = get_head_hash(dir.path().to_str().unwrap()).unwrap();
3404
3405 write_file(&dir, "src/lib.rs", "changed\n");
3407
3408 write_file(&dir, "sentinel.txt", "base\nstaged sentinel\n");
3410 run_git(&dir, &["add", "sentinel.txt"]);
3411 let staged_before = run_git(&dir, &["diff", "--cached"]);
3412 assert!(staged_before.contains("staged sentinel"));
3413
3414 let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3415 let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3416 let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3417 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3418 let plan = ComposeExecutablePlan {
3421 groups: vec![ComposeExecutableGroup {
3422 group_id: "G1".to_string(),
3423 commit_type: CommitType::new("fix").unwrap(),
3424 scope: None,
3425 file_ids: vec![source_file.file_id.clone()],
3426 rationale: "unstageable group".to_string(),
3427 dependencies: vec![],
3428 hunk_ids: vec!["F999-H001".to_string()],
3429 }],
3430 dependency_order: vec![0],
3431 };
3432 let config = CommitConfig::default();
3433 let args = Args {
3434 dir: dir.path().to_string_lossy().to_string(),
3435 compose: true,
3436 ..Default::default()
3437 };
3438 let base_state = capture_compose_base_state(&args.dir).unwrap();
3439
3440 let err = execute_compose_with_prepared_messages(
3441 &snapshot,
3442 &plan,
3443 &config,
3444 &args,
3445 &base_state,
3446 vec![canned_message("unstageable group")],
3447 )
3448 .unwrap_err();
3449
3450 assert!(err.to_string().contains("unknown hunk id"));
3451 assert_eq!(get_head_hash(&args.dir).unwrap(), initial_head);
3452 assert_eq!(run_git(&dir, &["diff", "--cached"]), staged_before);
3453 }
3454
3455 #[test]
3456 fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
3457 let snapshot = build_test_snapshot();
3458 let intent_plan = build_shared_intent_plan(&snapshot);
3459 let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3460
3461 assert_eq!(ambiguous.len(), 1);
3462 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3463 let assigned_to_g1 = assigned.get("G1").unwrap();
3464 assert!(
3465 test_file
3466 .hunk_ids
3467 .iter()
3468 .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
3469 "uniquely owned file should be auto-assigned"
3470 );
3471 }
3472
3473 #[test]
3474 fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3475 let snapshot = build_test_snapshot();
3476 let intent_plan = build_shared_intent_plan(&snapshot);
3477 let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3478 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3479 let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3480 let valid_group_ids: HashSet<&str> = intent_plan
3481 .groups
3482 .iter()
3483 .map(|group| group.group_id.as_str())
3484 .collect();
3485
3486 let evaluation = evaluate_binding(
3487 &[
3488 ComposeBindingAssignment {
3489 group_id: "G1".to_string(),
3490 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3491 },
3492 ComposeBindingAssignment {
3493 group_id: "G2".to_string(),
3494 hunk_ids: vec![source_file.hunk_ids[1].clone()],
3495 },
3496 ],
3497 &hunk_context,
3498 &valid_group_ids,
3499 &snapshot,
3500 );
3501
3502 for (group_id, hunk_ids) in evaluation.assigned {
3503 let entry = assigned.entry(group_id).or_default();
3504 for hunk_id in hunk_ids {
3505 entry.insert(hunk_id);
3506 }
3507 }
3508
3509 let group_rank: HashMap<&str, usize> = intent_plan
3510 .dependency_order
3511 .iter()
3512 .enumerate()
3513 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3514 .collect();
3515 assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3516
3517 let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3518 assert_eq!(executable_plan.groups.len(), 1);
3519 assert_eq!(executable_plan.groups[0].group_id, "G1");
3520 assert!(
3521 source_file
3522 .hunk_ids
3523 .iter()
3524 .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3525 "fallback should keep every hunk from the shared file in the surviving group"
3526 );
3527 }
3528
3529 #[test]
3530 fn test_validate_executable_plan_rejects_overlap() {
3531 let snapshot = build_test_snapshot();
3532 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3533 let executable_plan = ComposeExecutablePlan {
3534 groups: vec![
3535 ComposeExecutableGroup {
3536 group_id: "G1".to_string(),
3537 commit_type: CommitType::new("refactor").unwrap(),
3538 scope: None,
3539 file_ids: vec![source_file.file_id.clone()],
3540 rationale: "group one".to_string(),
3541 dependencies: vec![],
3542 hunk_ids: vec![source_file.hunk_ids[0].clone()],
3543 },
3544 ComposeExecutableGroup {
3545 group_id: "G2".to_string(),
3546 commit_type: CommitType::new("refactor").unwrap(),
3547 scope: None,
3548 file_ids: vec![source_file.file_id.clone()],
3549 rationale: "group two".to_string(),
3550 dependencies: vec![],
3551 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3552 },
3553 ],
3554 dependency_order: vec![0, 1],
3555 };
3556
3557 let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3558 assert!(err.to_string().contains("assigned to both"));
3559 }
3560
3561 #[test]
3562 fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3563 let snapshot = build_test_snapshot();
3564 let planning_index = build_planning_index(&snapshot);
3565 let groups = vec![ComposeIntentGroup {
3566 group_id: "G1".to_string(),
3567 commit_type: CommitType::new("refactor").unwrap(),
3568 scope: None,
3569 file_ids: vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3570 rationale: "normalize file references".to_string(),
3571 dependencies: vec![],
3572 }];
3573
3574 let (normalized_groups, repair_notes) =
3575 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3576
3577 assert_eq!(normalized_groups.len(), 1);
3578 assert_eq!(
3579 normalized_groups[0].file_ids,
3580 snapshot
3581 .files
3582 .iter()
3583 .map(|file| file.file_id.clone())
3584 .collect::<Vec<_>>()
3585 );
3586 assert_eq!(repair_notes.len(), 2);
3587 }
3588
3589 #[test]
3590 fn test_normalize_intent_plan_repairs_missing_files() {
3591 let snapshot = build_test_snapshot();
3592 let planning_index = build_planning_index(&snapshot);
3593 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3594 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3595 let groups = vec![ComposeIntentGroup {
3596 group_id: "G1".to_string(),
3597 commit_type: CommitType::new("refactor").unwrap(),
3598 scope: None,
3599 file_ids: vec![source_file.file_id.clone()],
3600 rationale: "partial coverage".to_string(),
3601 dependencies: vec![],
3602 }];
3603
3604 let (normalized_groups, repair_notes) =
3605 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3606
3607 assert_eq!(normalized_groups.len(), 1);
3608 assert!(
3609 normalized_groups[0].file_ids.contains(&source_file.file_id),
3610 "existing file assignment should be preserved"
3611 );
3612 assert!(
3613 normalized_groups[0].file_ids.contains(&test_file.file_id),
3614 "missing files should be assigned to an existing group"
3615 );
3616 assert_eq!(repair_notes.len(), 1);
3617 assert!(repair_notes[0].contains(&test_file.file_id));
3618 }
3619
3620 #[test]
3621 fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3622 let snapshot = build_multi_area_snapshot();
3623 let planning_index = build_planning_index(&snapshot);
3624 let frontend_target = planning_index
3625 .targets
3626 .iter()
3627 .find(|target| target.label.starts_with("apps/frontend"))
3628 .unwrap();
3629 let model_target = planning_index
3630 .targets
3631 .iter()
3632 .find(|target| target.label.starts_with("packages/model"))
3633 .unwrap();
3634 let groups = vec![
3635 ComposeIntentGroup {
3636 group_id: "G1".to_string(),
3637 commit_type: CommitType::new("refactor").unwrap(),
3638 scope: Scope::new("apps/frontend").ok(),
3639 file_ids: vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3640 rationale: "frontend platform updates".to_string(),
3641 dependencies: vec!["group 2".to_string(), "G1".to_string()],
3642 },
3643 ComposeIntentGroup {
3644 group_id: "G2".to_string(),
3645 commit_type: CommitType::new("refactor").unwrap(),
3646 scope: Scope::new("packages/model").ok(),
3647 file_ids: vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3648 rationale: "model storage updates".to_string(),
3649 dependencies: vec!["F5".to_string()],
3650 },
3651 ];
3652
3653 let (normalized_groups, repair_notes) =
3654 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3655
3656 assert_eq!(normalized_groups.len(), 2);
3657 assert!(
3658 normalized_groups[0]
3659 .file_ids
3660 .iter()
3661 .all(|file_id| file_id.starts_with('F'))
3662 );
3663 assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3664 assert!(normalized_groups[1].dependencies.is_empty());
3665 assert!(
3666 repair_notes
3667 .iter()
3668 .any(|note| note.contains("Dropped unknown planning target"))
3669 );
3670 assert!(
3671 repair_notes
3672 .iter()
3673 .any(|note| note.contains("Dropped self-dependency"))
3674 );
3675 assert!(
3676 repair_notes
3677 .iter()
3678 .any(|note| note.contains("Mapped compose planner dependency"))
3679 );
3680 assert!(
3681 repair_notes
3682 .iter()
3683 .any(|note| note.contains("Dropped unknown dependency"))
3684 );
3685 }
3686
3687 #[test]
3688 fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3689 let snapshot = build_test_snapshot();
3690 let summary = render_snapshot_summary(&snapshot, &[]);
3691 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3692
3693 assert!(!summary.contains("# snapshot compacted"));
3694 for hunk_id in &source_file.hunk_ids {
3695 assert!(summary.contains(hunk_id));
3696 }
3697 }
3698
3699 #[test]
3700 fn test_render_snapshot_summary_compacts_large_snapshot() {
3701 let snapshot = build_large_snapshot(160, 4);
3702 let summary = render_snapshot_summary(&snapshot, &[]);
3703
3704 assert!(summary.contains("# snapshot compacted"));
3705 assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3706 assert!(summary.contains("F001-H001"));
3707 assert!(summary.contains("F001-H004"));
3708 assert!(!summary.contains("F001-H002"));
3709 assert!(!summary.contains("F001-H003"));
3710 assert!(summary.contains("... 2 more hunks omitted from F001"));
3711 }
3712
3713 #[test]
3714 fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3715 let snapshot = build_multi_area_snapshot();
3716 let planning_index = build_planning_index(&snapshot);
3717
3718 assert_eq!(planning_index.mode, PlanningMode::Area);
3719 assert!(planning_index.targets.len() < snapshot.files.len());
3720 assert!(
3721 planning_index
3722 .targets
3723 .iter()
3724 .any(|target| target.label.starts_with("apps/frontend"))
3725 );
3726 assert!(
3727 render_planning_stat(&planning_index).contains("planning over"),
3728 "planning stat should explain the area mode"
3729 );
3730 }
3731
3732 #[test]
3733 fn test_normalize_intent_plan_expands_area_targets() {
3734 let snapshot = build_multi_area_snapshot();
3735 let planning_index = build_planning_index(&snapshot);
3736 let midpoint = planning_index.targets.len() / 2;
3737 let first_group_targets: Vec<String> = planning_index
3738 .targets
3739 .iter()
3740 .take(midpoint)
3741 .map(|target| target.label.clone())
3742 .collect();
3743 let second_group_targets: Vec<String> = planning_index
3744 .targets
3745 .iter()
3746 .skip(midpoint)
3747 .map(|target| target.label.clone())
3748 .collect();
3749 let groups = vec![
3750 ComposeIntentGroup {
3751 group_id: "G1".to_string(),
3752 commit_type: CommitType::new("refactor").unwrap(),
3753 scope: None,
3754 file_ids: first_group_targets,
3755 rationale: "frontend and model".to_string(),
3756 dependencies: vec![],
3757 },
3758 ComposeIntentGroup {
3759 group_id: "G2".to_string(),
3760 commit_type: CommitType::new("refactor").unwrap(),
3761 scope: None,
3762 file_ids: second_group_targets,
3763 rationale: "daemon and ci".to_string(),
3764 dependencies: vec![],
3765 },
3766 ];
3767
3768 let (normalized_groups, repair_notes) =
3769 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3770
3771 assert_eq!(normalized_groups.len(), 2);
3772 assert!(
3773 normalized_groups
3774 .iter()
3775 .flat_map(|group| group.file_ids.iter())
3776 .all(|file_id| file_id.starts_with('F')),
3777 "area targets should expand back to concrete file IDs"
3778 );
3779 assert!(!repair_notes.is_empty());
3780 assert_eq!(
3781 normalized_groups
3782 .iter()
3783 .flat_map(|group| group.file_ids.iter())
3784 .collect::<HashSet<_>>()
3785 .len(),
3786 snapshot.files.len()
3787 );
3788 }
3789
3790 #[test]
3791 fn test_large_patch_fallback_splits_monolithic_area_plan() {
3792 let snapshot = build_multi_area_snapshot();
3793 let planning_index = build_planning_index(&snapshot);
3794 let monolithic_group = ComposeIntentGroup {
3795 group_id: "G1".to_string(),
3796 commit_type: CommitType::new("refactor").unwrap(),
3797 scope: None,
3798 file_ids: snapshot
3799 .files
3800 .iter()
3801 .map(|file| file.file_id.clone())
3802 .collect(),
3803 rationale: "repo-wide refactor".to_string(),
3804 dependencies: vec![],
3805 };
3806
3807 assert!(should_force_large_patch_fallback(
3808 &snapshot,
3809 &planning_index,
3810 &[monolithic_group],
3811 6
3812 ));
3813
3814 let fallback_groups =
3815 build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3816 assert!(fallback_groups.len() >= 3);
3817 assert_eq!(
3818 fallback_groups
3819 .iter()
3820 .flat_map(|group| group.file_ids.iter())
3821 .collect::<HashSet<_>>()
3822 .len(),
3823 snapshot.files.len()
3824 );
3825 assert!(
3826 fallback_groups
3827 .iter()
3828 .any(|group| group.rationale.contains("frontend")),
3829 "fallback should preserve workstream identity"
3830 );
3831 }
3832
3833 #[test]
3834 fn test_should_collect_compose_observations_skips_area_mode() {
3835 let snapshot = build_large_snapshot(160, 4);
3836 let config = CommitConfig { map_reduce_threshold: 1_000, ..Default::default() };
3837 let counter = create_token_counter(&config);
3838
3839 assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3840 assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3841 }
3842
3843 #[test]
3844 fn test_compose_analysis_strategy_uses_map_reduce_for_large_diff() {
3845 let config = CommitConfig { map_reduce_threshold: 20, ..Default::default() };
3846 let counter = create_token_counter(&config);
3847 let payload = "a".repeat(200);
3848 let diff = format!("diff --git a/a.rs b/a.rs\n@@ -0,0 +1 @@\n+{payload}");
3849
3850 assert_eq!(
3851 compose_analysis_strategy(&diff, &config, &counter),
3852 ComposeAnalysisStrategy::MapReduce
3853 );
3854 }
3855
3856 #[test]
3857 fn test_compose_analysis_strategy_truncates_when_map_reduce_disabled() {
3858 let config = CommitConfig {
3859 map_reduce_enabled: false,
3860 max_diff_tokens: 1,
3861 max_diff_length: 10_000,
3862 ..Default::default()
3863 };
3864 let counter = create_token_counter(&config);
3865 assert_eq!(compose_truncation_length(&config), 4);
3866
3867 assert_eq!(
3868 compose_analysis_strategy(
3869 "diff --git a/models.json b/models.json\n+large",
3870 &config,
3871 &counter
3872 ),
3873 ComposeAnalysisStrategy::SmartTruncate
3874 );
3875 }
3876
3877 #[test]
3878 fn test_compose_analysis_strategy_keeps_small_group_direct() {
3879 let config = CommitConfig {
3880 map_reduce_threshold: 1_000,
3881 max_diff_tokens: 1_000,
3882 max_diff_length: 10_000,
3883 ..Default::default()
3884 };
3885 let counter = create_token_counter(&config);
3886
3887 assert_eq!(
3888 compose_analysis_strategy("diff --git a/a.rs b/a.rs\n+a", &config, &counter),
3889 ComposeAnalysisStrategy::Direct
3890 );
3891 }
3892
3893 #[test]
3894 fn test_chunk_ambiguous_files_splits_large_binding_request() {
3895 let ambiguous_files = vec![
3896 AmbiguousFileBinding {
3897 file_id: "F001".to_string(),
3898 path: "src/alpha.rs".to_string(),
3899 candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3900 hunk_ids: (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3901 },
3902 AmbiguousFileBinding {
3903 file_id: "F002".to_string(),
3904 path: "src/beta.rs".to_string(),
3905 candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3906 hunk_ids: (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3907 },
3908 AmbiguousFileBinding {
3909 file_id: "F003".to_string(),
3910 path: "src/gamma.rs".to_string(),
3911 candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3912 hunk_ids: (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3913 },
3914 ];
3915
3916 let batches = chunk_ambiguous_files(&ambiguous_files);
3917 let total_hunks: usize = batches
3918 .iter()
3919 .flatten()
3920 .map(|file| file.hunk_ids.len())
3921 .sum();
3922
3923 assert_eq!(batches.len(), 2);
3924 assert_eq!(batches[0].len(), 1);
3925 assert_eq!(batches[1].len(), 2);
3926 assert_eq!(total_hunks, 140);
3927 assert!(batches.iter().all(|batch| {
3928 batch.len() <= MAX_BIND_FILES_PER_REQUEST
3929 && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3930 <= MAX_BIND_HUNKS_PER_REQUEST
3931 }));
3932 }
3933}