1use std::{
2 borrow::Cow,
3 collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4 fmt::Write,
5 fs,
6 path::{Path, PathBuf},
7};
8
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13 api::{
14 AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
15 generate_summary_from_analysis, run_oneshot, strict_json_schema,
16 },
17 compose_types::{
18 ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
19 ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
20 },
21 config::CommitConfig,
22 diff::smart_truncate_diff,
23 error::{CommitGenError, Result},
24 git::{
25 TempGitIndex, append_signoff_trailer, commit_tree, current_head_ref, get_compose_diff,
26 get_compose_stat, get_git_dir, get_head_hash, read_tree_into_index, reset_mixed_to,
27 reset_paths_to, update_ref_checked, write_index_tree, write_real_index_tree,
28 },
29 map_reduce::{FileObservation, observe_diff_files, run_map_reduce, should_use_map_reduce},
30 normalization::{format_commit_message, post_process_commit_message},
31 patch::{
32 StageResult, build_compose_snapshot, create_executable_group_patch,
33 force_stage_file_from_base_in_index, pin_snapshot_worktree_state,
34 stage_executable_group_in_index,
35 },
36 style, templates,
37 tokens::{TokenCounter, create_token_counter},
38 types::{Args, CommitSummary, CommitType, ConventionalAnalysis, ConventionalCommit, Scope},
39 validation::validate_commit_message,
40};
41
42const MAX_OBSERVATIONS_PER_FILE: usize = 3;
43const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
44const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
45const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
46const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
47const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
48const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
49const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
50const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
51const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
52const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
53const MAX_BIND_FILES_PER_REQUEST: usize = 18;
54const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
55const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
58
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ComposeBaseState {
61 head_hash: String,
62 head_ref: String,
63 index_tree: String,
64}
65
66#[tracing::instrument(target = "lgit", name = "compose.capture_base_state", skip_all, fields(dir))]
67pub fn capture_compose_base_state(dir: &str) -> Result<ComposeBaseState> {
68 Ok(ComposeBaseState {
69 head_hash: get_head_hash(dir)?,
70 head_ref: current_head_ref(dir)?,
71 index_tree: write_real_index_tree(dir)?,
72 })
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76enum ComposeAnalysisStrategy {
77 Direct,
78 SmartTruncate,
79 MapReduce,
80}
81
82fn compose_analysis_strategy(
83 diff: &str,
84 config: &CommitConfig,
85 counter: &TokenCounter,
86) -> ComposeAnalysisStrategy {
87 if should_use_map_reduce(diff, config, counter) {
88 return ComposeAnalysisStrategy::MapReduce;
89 }
90
91 let diff_tokens = counter.count_sync(diff);
92 if diff.len() > config.max_diff_length || diff_tokens > config.max_diff_tokens {
93 return ComposeAnalysisStrategy::SmartTruncate;
94 }
95
96 ComposeAnalysisStrategy::Direct
97}
98
99fn compose_truncation_length(config: &CommitConfig) -> usize {
100 config
101 .max_diff_length
102 .min(config.max_diff_tokens.saturating_mul(4))
103 .max(1)
104}
105
106#[derive(Debug, Deserialize, Serialize)]
107struct ComposeIntentResponse {
108 groups: Vec<ComposeIntentGroup>,
109}
110
111#[derive(Debug, Deserialize, Serialize)]
112struct ComposeBindingResponse {
113 assignments: Vec<ComposeBindingAssignment>,
114}
115
116#[derive(Debug, Serialize, Deserialize)]
117struct ComposeCachedPlan {
118 schema_version: String,
119 cache_key: String,
120 plan: ComposeExecutablePlan,
121}
122
123#[derive(Debug, Clone)]
124struct AmbiguousFileBinding {
125 file_id: String,
126 path: String,
127 candidate_group_ids: Vec<String>,
128 hunk_ids: Vec<String>,
129}
130
131#[derive(Debug, Clone)]
132struct AmbiguousHunkContext {
133 candidate_group_ids: Vec<String>,
134}
135
136type HunkAssignments = HashMap<String, BTreeSet<String>>;
137
138#[derive(Debug)]
139struct BindingEvaluation {
140 assigned: HashMap<String, Vec<String>>,
141 unresolved: Vec<String>,
142}
143
144#[derive(Debug, Clone, Copy)]
145struct SnapshotSummaryBudget {
146 max_observations_per_file: usize,
147 max_hunks_per_file: Option<usize>,
148}
149
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151enum PlanningMode {
152 File,
153 Area,
154}
155
156#[derive(Debug, Clone)]
157struct PlanningTarget {
158 target_id: String,
159 label: String,
160 file_ids: Vec<String>,
161 hunk_count: usize,
162 additions: usize,
163 deletions: usize,
164}
165
166#[derive(Debug, Clone)]
167struct PlanningIndex {
168 mode: PlanningMode,
169 targets: Vec<PlanningTarget>,
170 aliases: HashMap<String, String>,
171}
172
173#[derive(Debug, Clone)]
174struct PlanningBucket {
175 label: String,
176 file_ids: Vec<String>,
177}
178
179impl PlanningIndex {
180 fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
181 let mut expanded = Vec::new();
182 let mut seen_file_ids = HashSet::new();
183
184 for target_id in target_ids {
185 if let Some(target) = self
186 .targets
187 .iter()
188 .find(|candidate| candidate.target_id == *target_id)
189 {
190 for file_id in &target.file_ids {
191 if seen_file_ids.insert(file_id.clone()) {
192 expanded.push(file_id.clone());
193 }
194 }
195 }
196 }
197
198 expanded
199 }
200}
201
202impl SnapshotSummaryBudget {
203 const fn is_compacted(self) -> bool {
204 self.max_hunks_per_file.is_some()
205 }
206}
207
208fn is_dependency_manifest(path: &str) -> bool {
209 const DEP_MANIFESTS: &[&str] = &[
210 "Cargo.toml",
211 "Cargo.lock",
212 "package.json",
213 "package-lock.json",
214 "pnpm-lock.yaml",
215 "yarn.lock",
216 "bun.lock",
217 "bun.lockb",
218 "go.mod",
219 "go.sum",
220 "requirements.txt",
221 "Pipfile",
222 "Pipfile.lock",
223 "pyproject.toml",
224 "Gemfile",
225 "Gemfile.lock",
226 "composer.json",
227 "composer.lock",
228 "build.gradle",
229 "build.gradle.kts",
230 "gradle.properties",
231 "pom.xml",
232 ];
233
234 let path = Path::new(path);
235 let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
236 return false;
237 };
238
239 if DEP_MANIFESTS.contains(&file_name) {
240 return true;
241 }
242
243 Path::new(file_name)
244 .extension()
245 .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
246}
247
248fn save_debug_artifact<T: Serialize>(
249 debug_dir: Option<&Path>,
250 filename: &str,
251 value: &T,
252) -> Result<()> {
253 let Some(debug_dir) = debug_dir else {
254 return Ok(());
255 };
256
257 fs::create_dir_all(debug_dir)?;
258 let path = debug_dir.join(filename);
259 let json = serde_json::to_string_pretty(value)?;
260 fs::write(path, json)?;
261 Ok(())
262}
263
264fn fnv1a_64(input: &str) -> String {
265 let mut hash = 0xcbf29ce484222325_u64;
266 for byte in input.as_bytes() {
267 hash ^= u64::from(*byte);
268 hash = hash.wrapping_mul(0x100000001b3);
269 }
270 format!("{hash:016x}")
271}
272
273fn compose_plan_cache_key(
274 snapshot: &ComposeSnapshot,
275 max_commits: usize,
276 analysis_model: &str,
277) -> String {
278 fnv1a_64(&format!(
279 "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
280 snapshot.diff, snapshot.stat
281 ))
282}
283
284fn compose_plan_cache_path(
285 dir: &str,
286 snapshot: &ComposeSnapshot,
287 max_commits: usize,
288 analysis_model: &str,
289) -> Result<PathBuf> {
290 let git_dir = get_git_dir(dir)?;
291 Ok(git_dir.join("llm-git").join(format!(
292 "compose-plan-{}.json",
293 compose_plan_cache_key(snapshot, max_commits, analysis_model)
294 )))
295}
296
297fn load_cached_plan(
298 dir: &str,
299 snapshot: &ComposeSnapshot,
300 max_commits: usize,
301 analysis_model: &str,
302) -> Result<Option<ComposeExecutablePlan>> {
303 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
304 if !cache_path.exists() {
305 return Ok(None);
306 }
307
308 let content = match fs::read_to_string(&cache_path) {
309 Ok(content) => content,
310 Err(err) => {
311 eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
312 return Ok(None);
313 },
314 };
315 let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
316 Ok(cached) => cached,
317 Err(err) => {
318 eprintln!(
319 "{}",
320 style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
321 );
322 let _ = fs::remove_file(&cache_path);
323 return Ok(None);
324 },
325 };
326 let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
327
328 if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
329 return Ok(None);
330 }
331 if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
332 eprintln!(
333 "{}",
334 style::warning(&format!(
335 "Discarding cached compose plan (no longer valid for current snapshot): {err}"
336 ))
337 );
338 let _ = fs::remove_file(&cache_path);
339 return Ok(None);
340 }
341 Ok(Some(cached.plan))
342}
343
344fn save_cached_plan(
345 dir: &str,
346 snapshot: &ComposeSnapshot,
347 max_commits: usize,
348 analysis_model: &str,
349 plan: &ComposeExecutablePlan,
350) -> Result<()> {
351 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
352 if let Some(parent) = cache_path.parent() {
353 fs::create_dir_all(parent)?;
354 }
355
356 let cached = ComposeCachedPlan {
357 schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
358 cache_key: compose_plan_cache_key(snapshot, max_commits, analysis_model),
359 plan: plan.clone(),
360 };
361 fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
362 Ok(())
363}
364
365fn format_line_range(start: usize, count: usize) -> String {
366 match count {
367 0 => "0".to_string(),
368 1 => start.to_string(),
369 _ => format!("{start}-{}", start + count - 1),
370 }
371}
372
373const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
374 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
375 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
376 {
377 SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
378 } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
379 || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
380 {
381 SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
382 } else {
383 SnapshotSummaryBudget {
384 max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
385 max_hunks_per_file: None,
386 }
387 }
388}
389
390fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
391 if count <= max_samples {
392 return (0..count).collect();
393 }
394
395 if max_samples <= 1 {
396 return vec![0];
397 }
398
399 let last = count - 1;
400 let mut positions = Vec::with_capacity(max_samples);
401 for slot in 0..max_samples {
402 let position = slot * last / (max_samples - 1);
403 if positions.last().copied() != Some(position) {
404 positions.push(position);
405 }
406 }
407 positions
408}
409
410fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
411 match budget.max_hunks_per_file {
412 None => file.hunk_ids.iter().map(String::as_str).collect(),
413 Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
414 .into_iter()
415 .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
416 .collect(),
417 }
418}
419
420fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
421 let budget = snapshot_summary_budget(snapshot);
422 let observations_by_file: HashMap<&str, Vec<&str>> = observations
423 .iter()
424 .map(|observation| {
425 (
426 observation.file.as_str(),
427 observation
428 .observations
429 .iter()
430 .map(String::as_str)
431 .take(budget.max_observations_per_file)
432 .collect(),
433 )
434 })
435 .collect();
436
437 let mut out = String::new();
438 if budget.is_compacted() {
439 let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
440 writeln!(
441 out,
442 "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
443 representative hunks and {} observation(s) per file",
444 budget.max_observations_per_file
445 )
446 .unwrap();
447 }
448
449 for file in &snapshot.files {
450 writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
451 if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
452 for observation in file_observations {
453 writeln!(out, " observation: {observation}").unwrap();
454 }
455 }
456
457 let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
458 for hunk_id in &rendered_hunk_ids {
459 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
460 if hunk.synthetic {
461 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
462 } else {
463 writeln!(
464 out,
465 " - {} old:{} new:{} :: {}",
466 hunk.hunk_id,
467 format_line_range(hunk.old_start, hunk.old_count),
468 format_line_range(hunk.new_start, hunk.new_count),
469 hunk.snippet
470 )
471 .unwrap();
472 }
473 }
474 }
475
476 let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
477 if omitted_hunks > 0 {
478 writeln!(out, " ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
479 }
480 }
481
482 out
483}
484
485const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
486 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
487 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
488 {
489 PlanningMode::Area
490 } else {
491 PlanningMode::File
492 }
493}
494
495fn path_depth(path: &str) -> usize {
496 path.split('/').count()
497}
498
499fn prefix_at_depth(path: &str, depth: usize) -> String {
500 if depth == 0 {
501 return String::new();
502 }
503
504 let segments: Vec<&str> = path.split('/').collect();
505 let effective_depth = depth.min(segments.len());
506 segments[..effective_depth].join("/")
507}
508
509fn common_path_prefix(paths: &[String]) -> String {
510 let Some(first_path) = paths.first() else {
511 return String::new();
512 };
513
514 let mut prefix: Vec<&str> = first_path.split('/').collect();
515 for path in paths.iter().skip(1) {
516 let segments: Vec<&str> = path.split('/').collect();
517 let shared = prefix
518 .iter()
519 .zip(segments.iter())
520 .take_while(|(left, right)| left == right)
521 .count();
522 prefix.truncate(shared);
523 if prefix.is_empty() {
524 break;
525 }
526 }
527
528 prefix.join("/")
529}
530
531fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
532 file_ids
533 .iter()
534 .filter_map(|file_id| snapshot.file_by_id(file_id))
535 .map(|file| file.hunk_ids.len())
536 .sum()
537}
538
539fn group_file_ids_by_prefix(
540 snapshot: &ComposeSnapshot,
541 file_ids: &[String],
542 depth: usize,
543) -> BTreeMap<String, Vec<String>> {
544 let mut groups = BTreeMap::new();
545
546 for file_id in file_ids {
547 if let Some(file) = snapshot.file_by_id(file_id) {
548 groups
549 .entry(prefix_at_depth(&file.path, depth))
550 .or_insert_with(Vec::new)
551 .push(file_id.clone());
552 }
553 }
554
555 groups
556}
557
558fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
559 let paths: Vec<String> = file_ids
560 .iter()
561 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
562 .collect();
563
564 let common_prefix = common_path_prefix(&paths);
565 if common_prefix.is_empty() {
566 paths.first().cloned().unwrap_or_else(|| "misc".to_string())
567 } else {
568 common_prefix
569 }
570}
571
572fn collect_planning_buckets(
573 snapshot: &ComposeSnapshot,
574 file_ids: &[String],
575 depth: usize,
576) -> Vec<PlanningBucket> {
577 let file_count = file_ids.len();
578 let hunk_count = bucket_hunk_count(snapshot, file_ids);
579 let max_path_depth = file_ids
580 .iter()
581 .filter_map(|file_id| snapshot.file_by_id(file_id))
582 .map(|file| path_depth(&file.path))
583 .max()
584 .unwrap_or(depth);
585
586 let should_stop =
587 file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
588 if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
589 return vec![PlanningBucket {
590 label: planning_bucket_label(snapshot, file_ids),
591 file_ids: file_ids.to_vec(),
592 }];
593 }
594
595 let next_depth = depth + 1;
596 let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
597 if groups.len() <= 1 {
598 return collect_planning_buckets(snapshot, file_ids, next_depth);
599 }
600
601 groups
602 .into_values()
603 .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
604 .collect()
605}
606
607fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
608 let all_file_ids: Vec<String> = snapshot
609 .files
610 .iter()
611 .map(|file| file.file_id.clone())
612 .collect();
613 let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
614
615 buckets
616 .into_iter()
617 .enumerate()
618 .map(|(idx, bucket)| {
619 let mut additions = 0_usize;
620 let mut deletions = 0_usize;
621 let mut hunk_count = 0_usize;
622
623 for file_id in &bucket.file_ids {
624 if let Some(file) = snapshot.file_by_id(file_id) {
625 additions = additions.saturating_add(file.additions);
626 deletions = deletions.saturating_add(file.deletions);
627 hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
628 }
629 }
630
631 PlanningTarget {
632 target_id: format!("A{:03}", idx + 1),
633 label: bucket.label,
634 file_ids: bucket.file_ids,
635 hunk_count,
636 additions,
637 deletions,
638 }
639 })
640 .collect()
641}
642
643fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
644 snapshot
645 .files
646 .iter()
647 .map(|file| PlanningTarget {
648 target_id: file.file_id.clone(),
649 label: file.path.clone(),
650 file_ids: vec![file.file_id.clone()],
651 hunk_count: file.hunk_ids.len(),
652 additions: file.additions,
653 deletions: file.deletions,
654 })
655 .collect()
656}
657
658fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
659 let mode = planning_mode_for_snapshot(snapshot);
660 let targets = match mode {
661 PlanningMode::File => build_file_planning_targets(snapshot),
662 PlanningMode::Area => build_area_planning_targets(snapshot),
663 };
664
665 let aliases = targets
666 .iter()
667 .flat_map(|target| {
668 let normalized_label = normalize_file_reference(&target.label);
669 [
670 (target.target_id.clone(), target.target_id.clone()),
671 (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
672 (normalized_label, target.target_id.clone()),
673 ]
674 })
675 .collect();
676
677 PlanningIndex { mode, targets, aliases }
678}
679
680fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
681 sample_positions(target.file_ids.len(), 4)
682 .into_iter()
683 .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
684 .collect()
685}
686
687fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
688 let hunk_ids: Vec<&String> = target
689 .file_ids
690 .iter()
691 .filter_map(|file_id| snapshot.file_by_id(file_id))
692 .flat_map(|file| file.hunk_ids.iter())
693 .collect();
694
695 sample_positions(hunk_ids.len(), 4)
696 .into_iter()
697 .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
698 .collect()
699}
700
701fn render_planning_stat(index: &PlanningIndex) -> String {
702 let mut out = String::new();
703
704 match index.mode {
705 PlanningMode::File => {
706 writeln!(out, "# planning over individual file IDs").unwrap();
707 },
708 PlanningMode::Area => {
709 writeln!(
710 out,
711 "# planning over {} area IDs spanning {} files",
712 index.targets.len(),
713 index
714 .targets
715 .iter()
716 .flat_map(|target| target.file_ids.iter())
717 .collect::<HashSet<_>>()
718 .len()
719 )
720 .unwrap();
721 },
722 }
723
724 for target in &index.targets {
725 writeln!(
726 out,
727 "{} {} | {} files | {} hunks | +{}/-{}",
728 target.target_id,
729 target.label,
730 target.file_ids.len(),
731 target.hunk_count,
732 target.additions,
733 target.deletions
734 )
735 .unwrap();
736 }
737
738 out
739}
740
741fn render_planning_snapshot_summary(
742 snapshot: &ComposeSnapshot,
743 observations: &[FileObservation],
744 index: &PlanningIndex,
745) -> String {
746 if index.mode == PlanningMode::File {
747 return render_snapshot_summary(snapshot, observations);
748 }
749
750 let observations_by_file: HashMap<&str, Vec<&str>> = observations
751 .iter()
752 .map(|observation| {
753 (
754 observation.file.as_str(),
755 observation
756 .observations
757 .iter()
758 .map(String::as_str)
759 .take(1)
760 .collect(),
761 )
762 })
763 .collect();
764
765 let mut out = String::new();
766 writeln!(
767 out,
768 "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
769 )
770 .unwrap();
771
772 for target in &index.targets {
773 writeln!(
774 out,
775 "- {} {} ({} files, {} hunks, +{}/-{})",
776 target.target_id,
777 target.label,
778 target.file_ids.len(),
779 target.hunk_count,
780 target.additions,
781 target.deletions
782 )
783 .unwrap();
784
785 let sample_file_ids = sample_file_ids_for_target(target);
786 if !sample_file_ids.is_empty() {
787 let sample_files: Vec<String> = sample_file_ids
788 .iter()
789 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
790 .collect();
791 writeln!(out, " files: {}", sample_files.join(", ")).unwrap();
792 let omitted = target.file_ids.len().saturating_sub(sample_files.len());
793 if omitted > 0 {
794 writeln!(out, " ... {omitted} more files omitted from {}", target.target_id).unwrap();
795 }
796 }
797
798 let mut rendered_observations = 0_usize;
799 for file_id in &target.file_ids {
800 let Some(file) = snapshot.file_by_id(file_id) else {
801 continue;
802 };
803 let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
804 continue;
805 };
806
807 for observation in file_observations {
808 writeln!(out, " observation: {observation}").unwrap();
809 rendered_observations += 1;
810 if rendered_observations >= 2 {
811 break;
812 }
813 }
814
815 if rendered_observations >= 2 {
816 break;
817 }
818 }
819
820 for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
821 if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
822 if hunk.synthetic {
823 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
824 } else {
825 writeln!(
826 out,
827 " - {} old:{} new:{} :: {}",
828 hunk.hunk_id,
829 format_line_range(hunk.old_start, hunk.old_count),
830 format_line_range(hunk.new_start, hunk.new_count),
831 hunk.snippet
832 )
833 .unwrap();
834 }
835 }
836 }
837 }
838
839 out
840}
841
842fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
843 match index.mode {
844 PlanningMode::File => format!(
845 "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
846 snapshot.files.len()
847 ),
848 PlanningMode::Area => format!(
849 "Area IDs only. Each target may expand to multiple files by shared path prefix. \
850 Coverage: {} areas spanning {} files.",
851 index.targets.len(),
852 snapshot.files.len()
853 ),
854 }
855}
856
857fn render_planning_notes(index: &PlanningIndex) -> String {
858 match index.mode {
859 PlanningMode::File => {
860 "Use only the provided file IDs and keep the grouping conservative.".to_string()
861 },
862 PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
863 planning areas. Split along independent subsystems or workstreams \
864 when the areas point at unrelated changes."
865 .to_string(),
866 }
867}
868
869fn render_split_bias(index: &PlanningIndex) -> String {
870 match index.mode {
871 PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
872 PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
873 one broad group if nearly every area clearly belongs to the same \
874 atomic change."
875 .to_string(),
876 }
877}
878
879fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
880 let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
881
882 strict_json_schema(
883 serde_json::json!({
884 "groups": {
885 "type": "array",
886 "items": {
887 "type": "object",
888 "properties": {
889 "group_id": {
890 "type": "string",
891 "description": "Stable identifier like G1, G2, G3"
892 },
893 "file_ids": {
894 "type": "array",
895 "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
896 "items": { "type": "string" }
897 },
898 "type": {
899 "type": "string",
900 "enum": type_enum,
901 "description": "Conventional commit type for this group"
902 },
903 "scope": {
904 "type": "string",
905 "description": "Optional scope (module/component). Omit if broad."
906 },
907 "rationale": {
908 "type": "string",
909 "description": "Brief explanation of the logical change"
910 },
911 "dependencies": {
912 "type": "array",
913 "description": "Group IDs this group depends on",
914 "items": { "type": "string" }
915 }
916 },
917 "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
918 "additionalProperties": false
919 }
920 }
921 }),
922 &["groups"],
923 )
924}
925
926fn build_binding_schema() -> serde_json::Value {
927 strict_json_schema(
928 serde_json::json!({
929 "assignments": {
930 "type": "array",
931 "items": {
932 "type": "object",
933 "properties": {
934 "group_id": { "type": "string" },
935 "hunk_ids": {
936 "type": "array",
937 "items": { "type": "string" }
938 }
939 },
940 "required": ["group_id", "hunk_ids"],
941 "additionalProperties": false
942 }
943 }
944 }),
945 &["assignments"],
946 )
947}
948
949fn compute_dependency_order<T, FId, FDeps>(
950 groups: &[T],
951 group_id: FId,
952 dependencies: FDeps,
953) -> Result<Vec<usize>>
954where
955 FId: Fn(&T) -> &str,
956 FDeps: Fn(&T) -> &[String],
957{
958 let mut index_by_id = HashMap::new();
959 for (idx, group) in groups.iter().enumerate() {
960 let id = group_id(group);
961 if id.trim().is_empty() {
962 return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
963 }
964 if index_by_id.insert(id.to_string(), idx).is_some() {
965 return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
966 }
967 }
968
969 let mut in_degree = vec![0_usize; groups.len()];
970 let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
971
972 for (idx, group) in groups.iter().enumerate() {
973 for dependency in dependencies(group) {
974 let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
975 CommitGenError::Other(format!(
976 "Group {} depends on unknown group_id '{}'",
977 group_id(group),
978 dependency
979 ))
980 })?;
981 if dependency_idx == idx {
982 return Err(CommitGenError::Other(format!(
983 "Group {} depends on itself",
984 group_id(group)
985 )));
986 }
987
988 adjacency[dependency_idx].push(idx);
989 in_degree[idx] += 1;
990 }
991 }
992
993 let mut queue: Vec<usize> = (0..groups.len())
994 .filter(|idx| in_degree[*idx] == 0)
995 .collect();
996 let mut order = Vec::with_capacity(groups.len());
997
998 while let Some(node) = queue.pop() {
999 order.push(node);
1000 for neighbor in &adjacency[node] {
1001 in_degree[*neighbor] -= 1;
1002 if in_degree[*neighbor] == 0 {
1003 queue.push(*neighbor);
1004 }
1005 }
1006 }
1007
1008 if order.len() != groups.len() {
1009 return Err(CommitGenError::Other(
1010 "Circular dependency detected in compose groups".to_string(),
1011 ));
1012 }
1013
1014 Ok(order)
1015}
1016
1017fn normalize_file_reference(raw_file_ref: &str) -> String {
1018 raw_file_ref
1019 .trim()
1020 .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
1021 .trim_start_matches("./")
1022 .trim_end_matches([',', ';'])
1023 .to_string()
1024}
1025
1026fn planning_text_tokens(text: &str) -> Vec<String> {
1027 const STOP_WORDS: &[&str] = &[
1028 "and",
1029 "for",
1030 "the",
1031 "with",
1032 "from",
1033 "into",
1034 "after",
1035 "before",
1036 "over",
1037 "under",
1038 "plus",
1039 "across",
1040 "update",
1041 "updated",
1042 "refactor",
1043 "refactored",
1044 "changes",
1045 "change",
1046 "logical",
1047 "group",
1048 "groups",
1049 "commit",
1050 "commits",
1051 ];
1052
1053 let mut tokens = Vec::new();
1054 let mut current = String::new();
1055 let mut seen = HashSet::new();
1056
1057 for ch in text.chars() {
1058 if ch.is_ascii_alphanumeric() {
1059 current.push(ch.to_ascii_lowercase());
1060 } else if current.len() >= 3 {
1061 if !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone()) {
1062 tokens.push(current.clone());
1063 }
1064 current.clear();
1065 } else {
1066 current.clear();
1067 }
1068 }
1069
1070 if current.len() >= 3 && !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone())
1071 {
1072 tokens.push(current);
1073 }
1074
1075 tokens
1076}
1077
1078fn extract_group_id_candidate(raw: &str) -> Option<String> {
1079 let normalized = normalize_file_reference(raw);
1080 let uppercase = normalized.to_ascii_uppercase();
1081
1082 if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1083 return Some(format!("G{uppercase}"));
1084 }
1085
1086 if let Some(rest) = uppercase.strip_prefix('G')
1087 && !rest.is_empty()
1088 && rest.chars().all(|ch| ch.is_ascii_digit())
1089 {
1090 return Some(format!("G{rest}"));
1091 }
1092
1093 let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1094 let compact = uppercase
1095 .chars()
1096 .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1097 .collect::<String>();
1098 if compact.starts_with("GROUP") && !digits.is_empty() {
1099 return Some(format!("G{digits}"));
1100 }
1101
1102 None
1103}
1104
1105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1106enum ComposeFileCategory {
1107 Binary,
1108 Dependency,
1109 Docs,
1110 Prompt,
1111 Test,
1112 Config,
1113 Source,
1114 Other,
1115}
1116
1117fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1118 if file.is_binary {
1119 return ComposeFileCategory::Binary;
1120 }
1121
1122 if is_dependency_manifest(&file.path) {
1123 return ComposeFileCategory::Dependency;
1124 }
1125
1126 let filename_lower = file.path.to_ascii_lowercase();
1127 let file_name = Path::new(&filename_lower)
1128 .file_name()
1129 .and_then(|name| name.to_str())
1130 .unwrap_or_default();
1131 let extension = Path::new(&filename_lower)
1132 .extension()
1133 .and_then(|ext| ext.to_str())
1134 .unwrap_or_default();
1135
1136 if filename_lower.contains("prompt") || filename_lower.contains("system") {
1137 return ComposeFileCategory::Prompt;
1138 }
1139
1140 if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1141 return ComposeFileCategory::Docs;
1142 }
1143
1144 if filename_lower.contains("/tests/")
1145 || filename_lower.starts_with("tests/")
1146 || file_name.contains("test")
1147 || file_name.contains("spec")
1148 {
1149 return ComposeFileCategory::Test;
1150 }
1151
1152 if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1153 return ComposeFileCategory::Config;
1154 }
1155
1156 if matches!(
1157 extension,
1158 "rs"
1159 | "py"
1160 | "js"
1161 | "jsx"
1162 | "ts"
1163 | "tsx"
1164 | "go"
1165 | "java"
1166 | "kt"
1167 | "c"
1168 | "cc"
1169 | "cpp"
1170 | "h"
1171 | "hpp"
1172 | "cs"
1173 | "rb"
1174 | "php"
1175 | "swift"
1176 | "scala"
1177 | "m"
1178 | "mm"
1179 ) {
1180 return ComposeFileCategory::Source;
1181 }
1182
1183 ComposeFileCategory::Other
1184}
1185
1186fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1187 left
1188 .split('/')
1189 .zip(right.split('/'))
1190 .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1191 .count()
1192}
1193
1194fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1195 let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1196
1197 if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1198 score += 40;
1199 }
1200
1201 if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1202 score += 12;
1203 }
1204
1205 if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1206 score += 18;
1207 }
1208
1209 score
1210}
1211
1212fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1213 match (compose_file_category(file), group.commit_type.as_str()) {
1214 (ComposeFileCategory::Docs, "docs") => 25,
1215 (ComposeFileCategory::Test, "test") => 25,
1216 (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1217 (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1218 (
1219 ComposeFileCategory::Prompt | ComposeFileCategory::Source,
1220 "feat" | "fix" | "refactor" | "perf",
1221 ) => 10,
1222 _ => 0,
1223 }
1224}
1225
1226fn best_group_for_missing_file(
1227 snapshot: &ComposeSnapshot,
1228 groups: &[ComposeIntentGroup],
1229 missing_file: &ComposeFile,
1230) -> usize {
1231 let mut best_group_idx = 0;
1232 let mut best_score = i32::MIN;
1233 let mut best_group_size = usize::MAX;
1234
1235 for (group_idx, group) in groups.iter().enumerate() {
1236 let similarity = group
1237 .file_ids
1238 .iter()
1239 .filter_map(|file_id| snapshot.file_by_id(file_id))
1240 .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1241 .max()
1242 .unwrap_or_default();
1243 let score = similarity + group_type_bonus(missing_file, group);
1244 let group_size = group.file_ids.len();
1245
1246 if score > best_score || (score == best_score && group_size < best_group_size) {
1247 best_group_idx = group_idx;
1248 best_score = score;
1249 best_group_size = group_size;
1250 }
1251 }
1252
1253 best_group_idx
1254}
1255
1256fn normalize_dependency_reference(
1257 raw_dependency: &str,
1258 known_group_ids: &HashSet<String>,
1259) -> Option<String> {
1260 let normalized = normalize_file_reference(raw_dependency);
1261 if normalized.is_empty() {
1262 return None;
1263 }
1264
1265 if known_group_ids.contains(&normalized) {
1266 return Some(normalized);
1267 }
1268
1269 let uppercase = normalized.to_ascii_uppercase();
1270 if known_group_ids.contains(&uppercase) {
1271 return Some(uppercase);
1272 }
1273
1274 let candidate = extract_group_id_candidate(&normalized)?;
1275 known_group_ids.contains(&candidate).then_some(candidate)
1276}
1277
1278fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1279 let label = target.label.to_ascii_lowercase();
1280 let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1281 let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1282
1283 if let Some(scope) = &group.scope {
1284 let scope = scope.as_str().to_ascii_lowercase();
1285 if label.contains(&scope) || workstream.contains(&scope) {
1286 score += 140;
1287 }
1288
1289 for segment in scope.split('/') {
1290 if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1291 score += 45;
1292 }
1293 }
1294 }
1295
1296 for token in planning_text_tokens(&group.rationale) {
1297 if label.contains(&token) || workstream.contains(&token) {
1298 score += 16;
1299 }
1300 }
1301
1302 match group.commit_type.as_str() {
1303 "ci" if target.label.starts_with(".github/") => score += 120,
1304 "docs"
1305 if target.label.starts_with("docs/")
1306 || Path::new(&target.label)
1307 .extension()
1308 .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1309 {
1310 score += 80;
1311 },
1312 "build" | "chore"
1313 if target.label.contains("Cargo")
1314 || target.label.contains("package")
1315 || target.label.contains("lock")
1316 || target.label.contains("tsconfig")
1317 || target.label.contains("biome")
1318 || target.label.contains("bun") =>
1319 {
1320 score += 55;
1321 },
1322 _ => {},
1323 }
1324
1325 score
1326}
1327
1328fn seed_group_targets(
1329 groups: &[ComposeIntentGroup],
1330 planning_index: &PlanningIndex,
1331 group_targets: &mut [Vec<String>],
1332 repair_notes: &mut Vec<String>,
1333) {
1334 let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1335
1336 for (group_idx, group) in groups.iter().enumerate() {
1337 if !group_targets[group_idx].is_empty() {
1338 continue;
1339 }
1340
1341 let fallback_target = planning_index
1342 .targets
1343 .iter()
1344 .max_by_key(|target| {
1345 let mut score = planning_target_match_score(target, group);
1346 if !claimed_target_ids.contains(&target.target_id) {
1347 score += 60;
1348 }
1349 (score, target.hunk_count, target.file_ids.len())
1350 })
1351 .or_else(|| planning_index.targets.first());
1352
1353 let Some(fallback_target) = fallback_target else {
1354 continue;
1355 };
1356
1357 group_targets[group_idx].push(fallback_target.target_id.clone());
1358 claimed_target_ids.insert(fallback_target.target_id.clone());
1359 repair_notes.push(format!(
1360 "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1361 group.group_id, fallback_target.target_id, fallback_target.label
1362 ));
1363 }
1364}
1365
1366fn normalize_intent_plan(
1367 snapshot: &ComposeSnapshot,
1368 planning_index: &PlanningIndex,
1369 mut groups: Vec<ComposeIntentGroup>,
1370) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1371 if groups.is_empty() {
1372 return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1373 }
1374
1375 let known_target_ids: HashSet<&str> = planning_index
1376 .targets
1377 .iter()
1378 .map(|target| target.target_id.as_str())
1379 .collect();
1380 let mut repair_notes = Vec::new();
1381 let mut covered_file_ids = HashSet::new();
1382 let mut normalized_group_targets = Vec::with_capacity(groups.len());
1383
1384 for group in &groups {
1385 if group.file_ids.is_empty() {
1386 repair_notes.push(format!(
1387 "Compose planner left {} without planning targets; assigning targets heuristically",
1388 group.group_id
1389 ));
1390 }
1391
1392 let mut normalized_target_ids = Vec::new();
1393 let mut seen_target_ids = HashSet::new();
1394 for raw_target_ref in &group.file_ids {
1395 let normalized_ref = normalize_file_reference(raw_target_ref);
1396 let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1397 normalized_ref.clone()
1398 } else {
1399 let uppercase_ref = normalized_ref.to_ascii_uppercase();
1400 if known_target_ids.contains(uppercase_ref.as_str()) {
1401 uppercase_ref
1402 } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1403 if raw_target_ref != target_id {
1404 repair_notes.push(format!(
1405 "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1406 ));
1407 }
1408 target_id.clone()
1409 } else {
1410 repair_notes.push(format!(
1411 "Dropped unknown planning target '{}' from {}",
1412 raw_target_ref, group.group_id
1413 ));
1414 continue;
1415 }
1416 };
1417
1418 if seen_target_ids.insert(canonical_target_id.clone()) {
1419 normalized_target_ids.push(canonical_target_id);
1420 }
1421 }
1422
1423 normalized_group_targets.push(normalized_target_ids);
1424 }
1425
1426 seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1427
1428 let known_group_ids: HashSet<String> =
1429 groups.iter().map(|group| group.group_id.clone()).collect();
1430 for group in &mut groups {
1431 let mut normalized_dependencies = Vec::new();
1432 let mut seen_dependencies = HashSet::new();
1433
1434 for raw_dependency in &group.dependencies {
1435 let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1436 else {
1437 repair_notes.push(format!(
1438 "Dropped unknown dependency '{}' from {}",
1439 raw_dependency, group.group_id
1440 ));
1441 continue;
1442 };
1443
1444 if dependency == group.group_id {
1445 repair_notes.push(format!(
1446 "Dropped self-dependency '{}' from {}",
1447 raw_dependency, group.group_id
1448 ));
1449 continue;
1450 }
1451
1452 if seen_dependencies.insert(dependency.clone()) {
1453 if raw_dependency != &dependency {
1454 repair_notes.push(format!(
1455 "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1456 ));
1457 }
1458 normalized_dependencies.push(dependency);
1459 }
1460 }
1461
1462 group.dependencies = normalized_dependencies;
1463 }
1464
1465 for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1466 let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1467 for file_id in &expanded_file_ids {
1468 covered_file_ids.insert(file_id.clone());
1469 }
1470 group.file_ids = expanded_file_ids;
1471 }
1472
1473 for file in &snapshot.files {
1474 if covered_file_ids.contains(file.file_id.as_str()) {
1475 continue;
1476 }
1477
1478 let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1479 let target_group = &mut groups[target_group_idx];
1480 target_group.file_ids.push(file.file_id.clone());
1481 covered_file_ids.insert(file.file_id.clone());
1482 repair_notes.push(format!(
1483 "Compose planner omitted {} ({}); assigned it to {}",
1484 file.file_id, file.path, target_group.group_id
1485 ));
1486 }
1487
1488 Ok((groups, repair_notes))
1489}
1490
1491fn workstream_key_for_label(label: &str) -> String {
1492 let segments: Vec<&str> = label
1493 .split('/')
1494 .filter(|segment| !segment.is_empty())
1495 .collect();
1496 let Some(first) = segments.first() else {
1497 return label.to_string();
1498 };
1499
1500 match *first {
1501 ".github" => match segments.get(1) {
1502 Some(second) => format!("{first}/{second}"),
1503 None => (*first).to_string(),
1504 },
1505 "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1506 Some(second) => format!("{first}/{second}"),
1507 None => (*first).to_string(),
1508 },
1509 _ => (*first).to_string(),
1510 }
1511}
1512
1513fn workstream_display_name(label: &str) -> String {
1514 let key = workstream_key_for_label(label);
1515 match key.as_str() {
1516 ".github/workflows" => "CI workflows".to_string(),
1517 ".github" => "GitHub automation".to_string(),
1518 _ => key
1519 .split('/')
1520 .next_back()
1521 .map(|segment| segment.replace(['_', '-'], " "))
1522 .unwrap_or(key),
1523 }
1524}
1525
1526fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1527 let mut out = String::new();
1528 let mut last_was_separator = false;
1529
1530 for ch in raw.trim().chars() {
1531 if ch.is_ascii_alphanumeric() {
1532 out.push(ch.to_ascii_lowercase());
1533 last_was_separator = false;
1534 } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1535 {
1536 out.push('-');
1537 last_was_separator = true;
1538 }
1539 }
1540
1541 let trimmed = out.trim_matches('-').to_string();
1542 (!trimmed.is_empty()).then_some(trimmed)
1543}
1544
1545fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1546 let key = workstream_key_for_label(label);
1547 let candidate = key
1548 .split('/')
1549 .next_back()
1550 .and_then(sanitize_scope_fragment)?;
1551 Scope::new(candidate).ok()
1552}
1553
1554fn fallback_rationale_for_labels(labels: &[String]) -> String {
1555 if labels.len() == 1 {
1556 let label = labels[0].as_str();
1557 let display = workstream_display_name(label);
1558 if label.starts_with("apps/") {
1559 return format!("{display} application updates");
1560 }
1561 if label.starts_with("packages/") {
1562 return format!("{display} package updates");
1563 }
1564 if label.starts_with("crates/") {
1565 return format!("{display} crate updates");
1566 }
1567 if label.starts_with(".github/") || label == ".github" {
1568 return format!("{display} updates");
1569 }
1570 return format!("{display} updates");
1571 }
1572
1573 let display_labels: Vec<String> = labels
1574 .iter()
1575 .take(3)
1576 .map(|label| workstream_display_name(label))
1577 .collect();
1578 format!("cross-cutting updates for {}", display_labels.join(", "))
1579}
1580
1581fn fallback_commit_type_for_group(
1582 snapshot: &ComposeSnapshot,
1583 labels: &[String],
1584 file_ids: &[String],
1585) -> Result<CommitType> {
1586 if labels
1587 .iter()
1588 .any(|label| label == ".github" || label.starts_with(".github/"))
1589 {
1590 return CommitType::new("ci");
1591 }
1592
1593 let files: Vec<&ComposeFile> = file_ids
1594 .iter()
1595 .filter_map(|file_id| snapshot.file_by_id(file_id))
1596 .collect();
1597 let all_docs = !files.is_empty()
1598 && files
1599 .iter()
1600 .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1601 if all_docs {
1602 return CommitType::new("docs");
1603 }
1604
1605 let all_tests = !files.is_empty()
1606 && files
1607 .iter()
1608 .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1609 if all_tests {
1610 return CommitType::new("test");
1611 }
1612
1613 let all_dependencies =
1614 !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1615 if all_dependencies {
1616 return CommitType::new("build");
1617 }
1618
1619 let all_config = !files.is_empty()
1620 && files.iter().all(|file| {
1621 matches!(
1622 compose_file_category(file),
1623 ComposeFileCategory::Config | ComposeFileCategory::Dependency
1624 )
1625 });
1626 if all_config {
1627 return CommitType::new("chore");
1628 }
1629
1630 CommitType::new("refactor")
1631}
1632
1633fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1634 snapshot
1635 .files
1636 .iter()
1637 .filter(|file| file_ids.contains(&file.file_id))
1638 .map(|file| file.file_id.clone())
1639 .collect()
1640}
1641
1642fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1643 if groups.is_empty() {
1644 return false;
1645 }
1646
1647 let largest_group = groups
1648 .iter()
1649 .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1650 .max()
1651 .unwrap_or_default();
1652
1653 groups.len() == 1
1654 || (groups.len() <= 2
1655 && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1656}
1657
1658fn should_force_large_patch_fallback(
1659 snapshot: &ComposeSnapshot,
1660 planning_index: &PlanningIndex,
1661 groups: &[ComposeIntentGroup],
1662 max_commits: usize,
1663) -> bool {
1664 if max_commits <= 1
1665 || planning_index.mode != PlanningMode::Area
1666 || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1667 || !is_monolithic_intent_plan(snapshot, groups)
1668 {
1669 return false;
1670 }
1671
1672 let workstream_count = planning_index
1673 .targets
1674 .iter()
1675 .map(|target| workstream_key_for_label(&target.label))
1676 .collect::<HashSet<_>>()
1677 .len();
1678
1679 workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1680}
1681
1682fn build_large_patch_fallback_groups(
1683 snapshot: &ComposeSnapshot,
1684 planning_index: &PlanningIndex,
1685 max_commits: usize,
1686) -> Result<Vec<ComposeIntentGroup>> {
1687 #[derive(Debug, Clone)]
1688 struct WorkstreamGroup {
1689 label: String,
1690 file_ids: HashSet<String>,
1691 weight: usize,
1692 }
1693
1694 #[derive(Debug, Clone)]
1695 struct FallbackBin {
1696 labels: Vec<String>,
1697 file_ids: HashSet<String>,
1698 total_weight: usize,
1699 }
1700
1701 let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1702 for target in &planning_index.targets {
1703 let key = workstream_key_for_label(&target.label);
1704 let entry = workstreams
1705 .entry(key.clone())
1706 .or_insert_with(|| WorkstreamGroup {
1707 label: key,
1708 file_ids: HashSet::new(),
1709 weight: 0,
1710 });
1711
1712 for file_id in &target.file_ids {
1713 entry.file_ids.insert(file_id.clone());
1714 }
1715 entry.weight = entry
1716 .weight
1717 .saturating_add(target.hunk_count.max(target.file_ids.len()));
1718 }
1719
1720 let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1721 workstreams.sort_by(|left, right| {
1722 right
1723 .weight
1724 .cmp(&left.weight)
1725 .then_with(|| left.label.cmp(&right.label))
1726 });
1727
1728 let bin_count = max_commits.min(workstreams.len());
1729 let mut bins: Vec<FallbackBin> = Vec::new();
1730 for workstream in workstreams {
1731 if bins.len() < bin_count {
1732 bins.push(FallbackBin {
1733 labels: vec![workstream.label],
1734 file_ids: workstream.file_ids,
1735 total_weight: workstream.weight,
1736 });
1737 continue;
1738 }
1739
1740 let Some((target_idx, _)) = bins
1741 .iter()
1742 .enumerate()
1743 .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1744 else {
1745 continue;
1746 };
1747
1748 let target_bin = &mut bins[target_idx];
1749 target_bin.labels.push(workstream.label);
1750 target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1751 target_bin.file_ids.extend(workstream.file_ids);
1752 }
1753
1754 let mut groups = Vec::new();
1755 for (idx, bin) in bins.into_iter().enumerate() {
1756 let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1757 let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1758 let scope = (bin.labels.len() == 1)
1759 .then(|| fallback_scope_for_label(&bin.labels[0]))
1760 .flatten();
1761 let rationale = fallback_rationale_for_labels(&bin.labels);
1762
1763 groups.push(ComposeIntentGroup {
1764 group_id: format!("G{}", idx + 1),
1765 commit_type,
1766 scope,
1767 file_ids: ordered_ids,
1768 rationale,
1769 dependencies: Vec::new(),
1770 });
1771 }
1772
1773 Ok(groups)
1774}
1775
1776#[tracing::instrument(target = "lgit", name = "compose.analyze_intent", skip_all, fields(file_count = snapshot.files.len(), observation_count = observations.len(), max_commits))]
1777async fn analyze_compose_intent(
1778 snapshot: &ComposeSnapshot,
1779 observations: &[FileObservation],
1780 config: &CommitConfig,
1781 max_commits: usize,
1782 debug_dir: Option<&Path>,
1783) -> Result<ComposeIntentPlan> {
1784 let planning_index = build_planning_index(snapshot);
1785 let stat_summary = render_planning_stat(&planning_index);
1786 let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1787 let planning_targets = render_planning_targets(&planning_index, snapshot);
1788 let planning_notes = render_planning_notes(&planning_index);
1789 let split_bias = render_split_bias(&planning_index);
1790 let schema = build_intent_schema(config);
1791 let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1792 variant: "default",
1793 max_commits,
1794 stat: &stat_summary,
1795 snapshot_summary: &snapshot_summary,
1796 planning_targets: &planning_targets,
1797 planning_notes: &planning_notes,
1798 split_bias: &split_bias,
1799 })?;
1800
1801 let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1802 operation: "compose/intent",
1803 model: &config.analysis_model,
1804 prompt_family: "compose-intent",
1805 prompt_variant: "default",
1806 system_prompt: &parts.system,
1807 user_prompt: &parts.user,
1808 tool_name: "create_compose_intent_plan",
1809 tool_description: "Plan logical commit groups over the provided planning target IDs",
1810 schema: &schema,
1811 progress_label: Some("compose intent planner"),
1812 debug: debug_dir.map(|dir| OneShotDebug {
1813 dir: Some(dir),
1814 prefix: None,
1815 name: "compose_intent",
1816 }),
1817 cacheable: true,
1818 })
1819 .await?;
1820
1821 let (mut groups, repair_notes) =
1822 normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1823 for note in &repair_notes {
1824 eprintln!("{}", style::warning(note));
1825 }
1826 if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1827 eprintln!(
1828 "{}",
1829 style::warning(
1830 "Compose intent collapsed into a monolithic large-patch group; falling back to \
1831 path-based workstream splits."
1832 )
1833 );
1834 groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1835 }
1836 let dependency_order =
1837 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1838
1839 Ok(ComposeIntentPlan { groups, dependency_order })
1840}
1841
1842#[tracing::instrument(target = "lgit", name = "compose.should_collect_observations", skip_all, fields(file_count = snapshot.files.len()))]
1843fn should_collect_compose_observations(
1844 snapshot: &ComposeSnapshot,
1845 config: &CommitConfig,
1846 counter: &TokenCounter,
1847) -> bool {
1848 planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1849 && should_use_map_reduce(&snapshot.diff, config, counter)
1850}
1851
1852#[tracing::instrument(target = "lgit", name = "compose.auto_assign_hunks", skip_all, fields(group_count = intent_plan.groups.len()))]
1853fn auto_assign_hunks(
1854 snapshot: &ComposeSnapshot,
1855 intent_plan: &ComposeIntentPlan,
1856) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1857 let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1858 for group in &intent_plan.groups {
1859 for file_id in &group.file_ids {
1860 groups_by_file
1861 .entry(file_id.as_str())
1862 .or_default()
1863 .push(group.group_id.as_str());
1864 }
1865 }
1866
1867 let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1868 .groups
1869 .iter()
1870 .map(|group| (group.group_id.clone(), BTreeSet::new()))
1871 .collect();
1872 let mut ambiguous = Vec::new();
1873
1874 for file in &snapshot.files {
1875 let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1876 return Err(CommitGenError::Other(format!(
1877 "No compose group claimed file {} ({})",
1878 file.file_id, file.path
1879 )));
1880 };
1881
1882 if candidate_group_ids.len() == 1 {
1883 let group_id = candidate_group_ids[0];
1884 let entry = assigned
1885 .get_mut(group_id)
1886 .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1887 for hunk_id in &file.hunk_ids {
1888 entry.insert(hunk_id.clone());
1889 }
1890 } else {
1891 ambiguous.push(AmbiguousFileBinding {
1892 file_id: file.file_id.clone(),
1893 path: file.path.clone(),
1894 candidate_group_ids: candidate_group_ids
1895 .iter()
1896 .map(|group_id| (*group_id).to_string())
1897 .collect(),
1898 hunk_ids: file.hunk_ids.clone(),
1899 });
1900 }
1901 }
1902
1903 Ok((assigned, ambiguous))
1904}
1905
1906fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1907 let mut out = String::new();
1908 for group in groups {
1909 let scope = group
1910 .scope
1911 .as_ref()
1912 .map(|scope| format!("({})", scope.as_str()))
1913 .unwrap_or_default();
1914 writeln!(
1915 out,
1916 "- {} [{}{}] {}",
1917 group.group_id,
1918 group.commit_type.as_str(),
1919 scope,
1920 group.rationale
1921 )
1922 .unwrap();
1923 }
1924
1925 out
1926}
1927
1928fn render_binding_ambiguous_files(
1929 snapshot: &ComposeSnapshot,
1930 ambiguous_files: &[AmbiguousFileBinding],
1931) -> String {
1932 let mut out = String::new();
1933 for ambiguous_file in ambiguous_files {
1934 writeln!(
1935 out,
1936 "- {} {} candidates: {}",
1937 ambiguous_file.file_id,
1938 ambiguous_file.path,
1939 ambiguous_file.candidate_group_ids.join(", ")
1940 )
1941 .unwrap();
1942
1943 for hunk_id in &ambiguous_file.hunk_ids {
1944 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1945 if hunk.synthetic {
1946 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1947 } else {
1948 writeln!(
1949 out,
1950 " - {} old:{} new:{} :: {}",
1951 hunk.hunk_id,
1952 format_line_range(hunk.old_start, hunk.old_count),
1953 format_line_range(hunk.new_start, hunk.new_count),
1954 hunk.snippet
1955 )
1956 .unwrap();
1957 }
1958 }
1959 }
1960 }
1961
1962 out
1963}
1964
1965async fn request_binding(
1966 snapshot: &ComposeSnapshot,
1967 groups: &[ComposeIntentGroup],
1968 ambiguous_files: &[AmbiguousFileBinding],
1969 config: &CommitConfig,
1970 debug_dir: Option<&Path>,
1971 debug_name: &str,
1972) -> Result<Vec<ComposeBindingAssignment>> {
1973 let schema = build_binding_schema();
1974 let groups_text = render_binding_groups(groups);
1975 let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1976 let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1977 variant: "default",
1978 groups: &groups_text,
1979 ambiguous_files: &ambiguous_files_text,
1980 })?;
1981 let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1982 operation: "compose/bind",
1983 model: &config.analysis_model,
1984 prompt_family: "compose-bind",
1985 prompt_variant: "default",
1986 system_prompt: &parts.system,
1987 user_prompt: &parts.user,
1988 tool_name: "bind_compose_hunks",
1989 tool_description: "Assign hunk IDs to existing compose groups",
1990 schema: &schema,
1991 progress_label: Some("compose hunk binder"),
1992 debug: debug_dir.map(|dir| OneShotDebug {
1993 dir: Some(dir),
1994 prefix: None,
1995 name: debug_name,
1996 }),
1997 cacheable: true,
1998 })
1999 .await?;
2000
2001 Ok(response.output.assignments)
2002}
2003
2004fn ambiguous_hunk_context(
2005 ambiguous_files: &[AmbiguousFileBinding],
2006) -> HashMap<String, AmbiguousHunkContext> {
2007 let mut context = HashMap::new();
2008 for ambiguous_file in ambiguous_files {
2009 for hunk_id in &ambiguous_file.hunk_ids {
2010 context.insert(hunk_id.clone(), AmbiguousHunkContext {
2011 candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
2012 });
2013 }
2014 }
2015 context
2016}
2017
2018fn evaluate_binding(
2019 assignments: &[ComposeBindingAssignment],
2020 hunk_context: &HashMap<String, AmbiguousHunkContext>,
2021 valid_group_ids: &HashSet<&str>,
2022 snapshot: &ComposeSnapshot,
2023) -> BindingEvaluation {
2024 let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
2025
2026 for assignment in assignments {
2027 if !valid_group_ids.contains(assignment.group_id.as_str()) {
2028 continue;
2029 }
2030
2031 let mut seen_in_group = HashSet::new();
2032 for hunk_id in &assignment.hunk_ids {
2033 if !seen_in_group.insert(hunk_id.as_str()) {
2034 continue;
2035 }
2036
2037 let Some(context) = hunk_context.get(hunk_id) else {
2038 continue;
2039 };
2040
2041 if !context
2042 .candidate_group_ids
2043 .iter()
2044 .any(|candidate| candidate == &assignment.group_id)
2045 {
2046 continue;
2047 }
2048
2049 match assigned_hunk_to_group.get(hunk_id) {
2050 None => {
2051 assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
2052 },
2053 Some(existing_group) if existing_group == &assignment.group_id => {},
2054 Some(_) => {
2055 assigned_hunk_to_group.remove(hunk_id);
2056 },
2057 }
2058 }
2059 }
2060
2061 let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2062 for (hunk_id, group_id) in assigned_hunk_to_group {
2063 assigned_by_group.entry(group_id).or_default().push(hunk_id);
2064 }
2065
2066 for hunk_ids in assigned_by_group.values_mut() {
2067 let ordered: Vec<String> = snapshot
2068 .hunks
2069 .iter()
2070 .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2071 .map(|hunk| hunk.hunk_id.clone())
2072 .collect();
2073 *hunk_ids = ordered;
2074 }
2075
2076 let unresolved = snapshot
2077 .hunks
2078 .iter()
2079 .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2080 .filter(|hunk| {
2081 !assigned_by_group.values().any(|assigned_hunks| {
2082 assigned_hunks
2083 .iter()
2084 .any(|assigned| assigned == &hunk.hunk_id)
2085 })
2086 })
2087 .map(|hunk| hunk.hunk_id.clone())
2088 .collect();
2089
2090 BindingEvaluation { assigned: assigned_by_group, unresolved }
2091}
2092
2093fn filter_ambiguous_files(
2094 ambiguous_files: &[AmbiguousFileBinding],
2095 hunk_ids: &[String],
2096) -> Vec<AmbiguousFileBinding> {
2097 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2098
2099 ambiguous_files
2100 .iter()
2101 .filter_map(|file| {
2102 let matching_hunks: Vec<String> = file
2103 .hunk_ids
2104 .iter()
2105 .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2106 .cloned()
2107 .collect();
2108
2109 (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2110 file_id: file.file_id.clone(),
2111 path: file.path.clone(),
2112 candidate_group_ids: file.candidate_group_ids.clone(),
2113 hunk_ids: matching_hunks,
2114 })
2115 })
2116 .collect()
2117}
2118
2119fn chunk_ambiguous_files(
2120 ambiguous_files: &[AmbiguousFileBinding],
2121) -> Vec<Vec<AmbiguousFileBinding>> {
2122 if ambiguous_files.is_empty() {
2123 return Vec::new();
2124 }
2125
2126 let mut batches = Vec::new();
2127 let mut current_batch = Vec::new();
2128 let mut current_hunk_count = 0_usize;
2129
2130 for file in ambiguous_files {
2131 let file_hunk_count = file.hunk_ids.len();
2132 let should_split = !current_batch.is_empty()
2133 && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2134 || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2135
2136 if should_split {
2137 batches.push(current_batch);
2138 current_batch = Vec::new();
2139 current_hunk_count = 0;
2140 }
2141
2142 current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2143 current_batch.push(file.clone());
2144 }
2145
2146 if !current_batch.is_empty() {
2147 batches.push(current_batch);
2148 }
2149
2150 batches
2151}
2152
2153fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2154 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2155
2156 snapshot
2157 .hunks
2158 .iter()
2159 .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2160 .map(|hunk| hunk.hunk_id.clone())
2161 .collect()
2162}
2163
2164fn fallback_group_for_hunk(
2165 hunk_id: &str,
2166 ambiguous_files: &[AmbiguousFileBinding],
2167 group_rank: &HashMap<&str, usize>,
2168) -> Option<String> {
2169 ambiguous_files.iter().find_map(|file| {
2170 file
2171 .hunk_ids
2172 .iter()
2173 .any(|candidate| candidate == hunk_id)
2174 .then(|| {
2175 file
2176 .candidate_group_ids
2177 .iter()
2178 .min_by_key(|group_id| {
2179 group_rank
2180 .get(group_id.as_str())
2181 .copied()
2182 .unwrap_or(usize::MAX)
2183 })
2184 .cloned()
2185 })
2186 })?
2187}
2188
2189fn assign_unresolved_hunks(
2190 unresolved_hunks: &[String],
2191 assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2192 ambiguous_files: &[AmbiguousFileBinding],
2193 group_rank: &HashMap<&str, usize>,
2194) {
2195 for hunk_id in unresolved_hunks {
2196 if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2197 && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2198 {
2199 group_hunks.insert(hunk_id.clone());
2200 }
2201 }
2202}
2203
2204fn normalize_group_type(
2205 snapshot: &ComposeSnapshot,
2206 file_ids: &[String],
2207 original_type: &CommitType,
2208) -> Result<CommitType> {
2209 let dependency_only = !file_ids.is_empty()
2210 && file_ids.iter().all(|file_id| {
2211 snapshot
2212 .file_by_id(file_id)
2213 .is_some_and(|file| is_dependency_manifest(&file.path))
2214 });
2215
2216 if dependency_only && original_type.as_str() != "build" {
2217 CommitType::new("build")
2218 } else {
2219 Ok(original_type.clone())
2220 }
2221}
2222
2223fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2224 snapshot
2225 .files
2226 .iter()
2227 .filter(|file| {
2228 hunk_ids
2229 .iter()
2230 .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2231 })
2232 .map(|file| file.file_id.clone())
2233 .collect()
2234}
2235
2236fn build_redirects(
2237 intent_plan: &ComposeIntentPlan,
2238 executable_groups: &[ComposeExecutableGroup],
2239 group_rank: &HashMap<&str, usize>,
2240) -> HashMap<String, String> {
2241 let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2242 .iter()
2243 .filter(|group| !group.hunk_ids.is_empty())
2244 .map(|group| (group.group_id.as_str(), group))
2245 .collect();
2246
2247 let mut redirects = HashMap::new();
2248 for group in &intent_plan.groups {
2249 if surviving_groups.contains_key(group.group_id.as_str()) {
2250 continue;
2251 }
2252
2253 let redirect = executable_groups
2254 .iter()
2255 .filter(|candidate| candidate.group_id != group.group_id)
2256 .filter(|candidate| {
2257 candidate.file_ids.iter().any(|file_id| {
2258 group
2259 .file_ids
2260 .iter()
2261 .any(|candidate_id| candidate_id == file_id)
2262 })
2263 })
2264 .min_by_key(|candidate| {
2265 group_rank
2266 .get(candidate.group_id.as_str())
2267 .copied()
2268 .unwrap_or(usize::MAX)
2269 })
2270 .map(|candidate| candidate.group_id.clone());
2271
2272 if let Some(redirect) = redirect {
2273 redirects.insert(group.group_id.clone(), redirect);
2274 }
2275 }
2276
2277 redirects
2278}
2279
2280fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2281 let mut current = group_id.to_string();
2282 let mut seen = HashSet::new();
2283
2284 while let Some(next) = redirects.get(¤t) {
2285 if !seen.insert(current.clone()) {
2286 break;
2287 }
2288 current.clone_from(next);
2289 }
2290
2291 current
2292}
2293
2294fn prune_empty_groups(
2295 groups: Vec<ComposeExecutableGroup>,
2296 redirects: &HashMap<String, String>,
2297) -> Result<ComposeExecutablePlan> {
2298 let surviving_ids: HashSet<String> = groups
2299 .iter()
2300 .filter(|group| !group.hunk_ids.is_empty())
2301 .map(|group| group.group_id.clone())
2302 .collect();
2303
2304 let mut surviving_groups = Vec::new();
2305 for mut group in groups {
2306 if group.hunk_ids.is_empty() {
2307 continue;
2308 }
2309
2310 let mut rewritten_dependencies = Vec::new();
2311 for dependency in &group.dependencies {
2312 let rewritten = resolve_redirect(dependency, redirects);
2313 if rewritten != group.group_id
2314 && surviving_ids.contains(&rewritten)
2315 && !rewritten_dependencies
2316 .iter()
2317 .any(|existing| existing == &rewritten)
2318 {
2319 rewritten_dependencies.push(rewritten);
2320 }
2321 }
2322
2323 group.dependencies = rewritten_dependencies;
2324 surviving_groups.push(group);
2325 }
2326
2327 let dependency_order = compute_dependency_order(
2328 &surviving_groups,
2329 |group| &group.group_id,
2330 |group| &group.dependencies,
2331 )?;
2332 Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2333}
2334
2335fn finalize_executable_plan(
2336 snapshot: &ComposeSnapshot,
2337 intent_plan: &ComposeIntentPlan,
2338 assigned_by_group: HashMap<String, BTreeSet<String>>,
2339) -> Result<ComposeExecutablePlan> {
2340 let group_rank: HashMap<&str, usize> = intent_plan
2341 .dependency_order
2342 .iter()
2343 .enumerate()
2344 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2345 .collect();
2346
2347 let mut executable_groups = Vec::new();
2348 for group in &intent_plan.groups {
2349 let hunk_ids: Vec<String> = snapshot
2350 .hunks
2351 .iter()
2352 .filter(|hunk| {
2353 assigned_by_group
2354 .get(&group.group_id)
2355 .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2356 })
2357 .map(|hunk| hunk.hunk_id.clone())
2358 .collect();
2359
2360 let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2361 let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2362 executable_groups.push(ComposeExecutableGroup {
2363 group_id: group.group_id.clone(),
2364 commit_type,
2365 scope: group.scope.clone(),
2366 file_ids,
2367 rationale: group.rationale.clone(),
2368 dependencies: group.dependencies.clone(),
2369 hunk_ids,
2370 });
2371 }
2372
2373 let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2374 prune_empty_groups(executable_groups, &redirects)
2375}
2376
2377fn validate_executable_plan(
2378 snapshot: &ComposeSnapshot,
2379 plan: &ComposeExecutablePlan,
2380) -> Result<()> {
2381 if plan.groups.is_empty() {
2382 return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2383 }
2384
2385 let known_hunks: HashSet<&str> = snapshot
2386 .hunks
2387 .iter()
2388 .map(|hunk| hunk.hunk_id.as_str())
2389 .collect();
2390 let known_files: HashSet<&str> = snapshot
2391 .files
2392 .iter()
2393 .map(|file| file.file_id.as_str())
2394 .collect();
2395 let mut coverage = HashMap::<String, String>::new();
2396
2397 for group in &plan.groups {
2398 if group.hunk_ids.is_empty() {
2399 return Err(CommitGenError::Other(format!(
2400 "Compose group {} ended up empty after binding",
2401 group.group_id
2402 )));
2403 }
2404
2405 for file_id in &group.file_ids {
2406 if !known_files.contains(file_id.as_str()) {
2407 return Err(CommitGenError::Other(format!(
2408 "Compose group {} references unknown file_id {}",
2409 group.group_id, file_id
2410 )));
2411 }
2412 }
2413
2414 for hunk_id in &group.hunk_ids {
2415 if !known_hunks.contains(hunk_id.as_str()) {
2416 return Err(CommitGenError::Other(format!(
2417 "Compose group {} references unknown hunk_id {}",
2418 group.group_id, hunk_id
2419 )));
2420 }
2421
2422 if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2423 return Err(CommitGenError::Other(format!(
2424 "Hunk {} was assigned to both {} and {}",
2425 hunk_id, existing_group, group.group_id
2426 )));
2427 }
2428 }
2429 }
2430
2431 let missing_hunks: Vec<String> = snapshot
2432 .hunks
2433 .iter()
2434 .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2435 .map(|hunk| hunk.hunk_id.clone())
2436 .collect();
2437 if !missing_hunks.is_empty() {
2438 return Err(CommitGenError::Other(format!(
2439 "Compose plan left hunks unassigned: {}",
2440 missing_hunks.join(", ")
2441 )));
2442 }
2443
2444 let dependency_order =
2445 compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2446 if dependency_order != plan.dependency_order {
2447 return Err(CommitGenError::Other(
2448 "Compose dependency order does not match recomputed order".to_string(),
2449 ));
2450 }
2451
2452 Ok(())
2453}
2454
2455#[tracing::instrument(target = "lgit", name = "compose.bind_plan", skip_all, fields(file_count = snapshot.files.len(), group_count = intent_plan.groups.len()))]
2456async fn bind_compose_plan(
2457 snapshot: &ComposeSnapshot,
2458 intent_plan: &ComposeIntentPlan,
2459 config: &CommitConfig,
2460 debug_dir: Option<&Path>,
2461) -> Result<ComposeExecutablePlan> {
2462 let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2463
2464 if !ambiguous_files.is_empty() {
2465 let valid_group_ids: HashSet<&str> = intent_plan
2466 .groups
2467 .iter()
2468 .map(|group| group.group_id.as_str())
2469 .collect();
2470 let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2471 let mut unresolved = Vec::new();
2472
2473 for (batch_idx, batch) in binding_batches.iter().enumerate() {
2474 let hunk_context = ambiguous_hunk_context(batch);
2475 let debug_name = if binding_batches.len() == 1 {
2476 "compose_bind".to_string()
2477 } else {
2478 format!("compose_bind_{:02}", batch_idx + 1)
2479 };
2480 let assignments =
2481 request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2482 .await?;
2483 let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2484 for (group_id, hunk_ids) in evaluation.assigned {
2485 let entry = assigned_by_group.entry(group_id).or_default();
2486 for hunk_id in hunk_ids {
2487 entry.insert(hunk_id);
2488 }
2489 }
2490 unresolved.extend(evaluation.unresolved);
2491 }
2492
2493 let group_rank: HashMap<&str, usize> = intent_plan
2494 .dependency_order
2495 .iter()
2496 .enumerate()
2497 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2498 .collect();
2499
2500 let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2501 if !unresolved.is_empty() {
2502 let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2503 let repair_batches = chunk_ambiguous_files(&unresolved_files);
2504 let mut repair_unresolved = Vec::new();
2505
2506 for (batch_idx, batch) in repair_batches.iter().enumerate() {
2507 let debug_name = if repair_batches.len() == 1 {
2508 "compose_bind_repair".to_string()
2509 } else {
2510 format!("compose_bind_repair_{:02}", batch_idx + 1)
2511 };
2512 let repair_assignments = request_binding(
2513 snapshot,
2514 &intent_plan.groups,
2515 batch,
2516 config,
2517 debug_dir,
2518 &debug_name,
2519 )
2520 .await?;
2521 let repair_context = ambiguous_hunk_context(batch);
2522 let repair =
2523 evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2524 for (group_id, hunk_ids) in repair.assigned {
2525 let entry = assigned_by_group.entry(group_id).or_default();
2526 for hunk_id in hunk_ids {
2527 entry.insert(hunk_id);
2528 }
2529 }
2530
2531 repair_unresolved.extend(repair.unresolved);
2532 }
2533 unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2534
2535 if !unresolved.is_empty() {
2536 assign_unresolved_hunks(
2537 &unresolved,
2538 &mut assigned_by_group,
2539 &ambiguous_files,
2540 &group_rank,
2541 );
2542 }
2543 }
2544 }
2545
2546 let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2547 validate_executable_plan(snapshot, &plan)?;
2548 Ok(plan)
2549}
2550
2551fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2552 println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2553 for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2554 let group = &plan.groups[group_idx];
2555 let scope = group
2556 .scope
2557 .as_ref()
2558 .map(|scope| format!("({})", style::scope(scope.as_str())))
2559 .unwrap_or_default();
2560
2561 println!(
2562 "\n{}. {} [{}{}] {}",
2563 display_idx + 1,
2564 style::bold(&group.group_id),
2565 style::commit_type(group.commit_type.as_str()),
2566 scope,
2567 group.rationale
2568 );
2569
2570 println!(" Files:");
2571 for file_id in &group.file_ids {
2572 if let Some(file) = snapshot.file_by_id(file_id) {
2573 let selected_hunk_ids: Vec<&str> = group
2574 .hunk_ids
2575 .iter()
2576 .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2577 .map(String::as_str)
2578 .collect();
2579 let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2580 "all hunks".to_string()
2581 } else {
2582 selected_hunk_ids.join(", ")
2583 };
2584 println!(" - {} {} ({selection})", file.file_id, file.path);
2585 }
2586 }
2587
2588 if !group.dependencies.is_empty() {
2589 println!(" Depends on: {}", group.dependencies.join(", "));
2590 }
2591 }
2592}
2593
2594#[tracing::instrument(target = "lgit", name = "compose.generate_group_analysis", skip_all, fields(group_id = %group.group_id, diff_bytes = diff.len(), stat_bytes = stat.len()))]
2595async fn generate_compose_group_analysis(
2596 stat: &str,
2597 diff: &str,
2598 group: &ComposeExecutableGroup,
2599 config: &CommitConfig,
2600 args: &Args,
2601 debug_prefix: &str,
2602 counter: &TokenCounter,
2603) -> Result<ConventionalAnalysis> {
2604 match compose_analysis_strategy(diff, config, counter) {
2605 ComposeAnalysisStrategy::MapReduce => {
2606 println!(
2607 " {}",
2608 style::info(&format!(
2609 "Using map-reduce for {} commit analysis (diff exceeds token budget)",
2610 group.group_id
2611 ))
2612 );
2613 run_map_reduce(diff, stat, "", &config.analysis_model, config, counter).await
2614 },
2615 strategy => {
2616 let analysis_diff = if strategy == ComposeAnalysisStrategy::SmartTruncate {
2617 eprintln!(
2618 " {}",
2619 style::warning(&format!(
2620 "Truncating diff for {} commit analysis (diff exceeds configured budget)",
2621 group.group_id
2622 ))
2623 );
2624 Cow::Owned(smart_truncate_diff(
2625 diff,
2626 compose_truncation_length(config),
2627 config,
2628 counter,
2629 ))
2630 } else {
2631 Cow::Borrowed(diff)
2632 };
2633
2634 let ctx = AnalysisContext {
2635 user_context: Some(&group.rationale),
2636 recent_commits: None,
2637 common_scopes: None,
2638 project_context: None,
2639 debug_output: args.debug_output.as_deref(),
2640 debug_prefix: Some(debug_prefix),
2641 };
2642
2643 generate_conventional_analysis(
2644 stat,
2645 analysis_diff.as_ref(),
2646 &config.analysis_model,
2647 "",
2648 &ctx,
2649 config,
2650 )
2651 .await
2652 },
2653 }
2654}
2655
2656fn compose_group_file_list(snapshot: &ComposeSnapshot, group: &ComposeExecutableGroup) -> String {
2657 let files: Vec<&str> = group
2658 .file_ids
2659 .iter()
2660 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.as_str()))
2661 .collect();
2662
2663 if files.is_empty() {
2664 "no files resolved".to_string()
2665 } else {
2666 files.join(", ")
2667 }
2668}
2669
2670fn cumulative_file_hunk_ids(
2674 plan: &ComposeExecutablePlan,
2675 position: usize,
2676 snapshot: &ComposeSnapshot,
2677 file_id: &str,
2678) -> Vec<String> {
2679 let mut hunk_ids = Vec::new();
2680 for &group_idx in plan.dependency_order.iter().take(position + 1) {
2681 let Some(group) = plan.groups.get(group_idx) else {
2682 continue;
2683 };
2684 for hunk_id in &group.hunk_ids {
2685 if snapshot
2686 .hunk_by_id(hunk_id)
2687 .is_some_and(|hunk| hunk.file_id == file_id)
2688 {
2689 hunk_ids.push(hunk_id.clone());
2690 }
2691 }
2692 }
2693 hunk_ids
2694}
2695
2696#[tracing::instrument(target = "lgit", name = "compose.execute", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2697pub async fn execute_compose(
2698 snapshot: &ComposeSnapshot,
2699 plan: &ComposeExecutablePlan,
2700 config: &CommitConfig,
2701 args: &Args,
2702 base_state: &ComposeBaseState,
2703) -> Result<Vec<String>> {
2704 let total = plan.dependency_order.len();
2705
2706 let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2710 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2711 let group = &plan.groups[group_idx];
2712 println!(
2713 " {}",
2714 style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total))
2715 );
2716 let group_patch = create_executable_group_patch(snapshot, group)?;
2717 group_diff_stats.push((group_patch.diff, group_patch.stat));
2718 }
2719
2720 println!(
2724 "{}",
2725 style::info(&format!(
2726 "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2727 COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2728 ))
2729 );
2730
2731 let token_counter = create_token_counter(config);
2732 let prepared_messages: Vec<(Vec<String>, CommitSummary)> =
2733 stream::iter(plan.dependency_order.iter().enumerate())
2734 .map(|(idx, &group_idx)| {
2735 let group = &plan.groups[group_idx];
2736 let (diff, stat) = &group_diff_stats[idx];
2737 let debug_prefix = format!("compose-{}", idx + 1);
2738 let token_counter = &token_counter;
2739 async move {
2740 let result = async {
2741 let analysis = generate_compose_group_analysis(
2742 stat,
2743 diff,
2744 group,
2745 config,
2746 args,
2747 &debug_prefix,
2748 token_counter,
2749 )
2750 .await?;
2751 let body = analysis.body_texts();
2752 let summary = generate_summary_from_analysis(
2753 stat,
2754 group.commit_type.as_str(),
2755 group.scope.as_ref().map(|scope| scope.as_str()),
2756 &body,
2757 Some(&group.rationale),
2758 config,
2759 args.debug_output.as_deref(),
2760 Some(&debug_prefix),
2761 )
2762 .await?;
2763 Ok::<_, CommitGenError>((body, summary))
2764 }
2765 .await;
2766
2767 result.map_err(|source| CommitGenError::ComposeMessageError {
2768 group_id: group.group_id.clone(),
2769 files: compose_group_file_list(snapshot, group),
2770 source: Box::new(source),
2771 })
2772 }
2773 })
2774 .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2775 .collect::<Vec<_>>()
2776 .await
2777 .into_iter()
2778 .collect::<Result<Vec<_>>>()?;
2779
2780 execute_compose_with_prepared_messages(
2781 snapshot,
2782 plan,
2783 config,
2784 args,
2785 base_state,
2786 prepared_messages,
2787 )
2788}
2789
2790#[tracing::instrument(target = "lgit", name = "compose.execute_prepared_messages", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2791fn execute_compose_with_prepared_messages(
2792 snapshot: &ComposeSnapshot,
2793 plan: &ComposeExecutablePlan,
2794 config: &CommitConfig,
2795 args: &Args,
2796 base_state: &ComposeBaseState,
2797 prepared_messages: Vec<(Vec<String>, CommitSummary)>,
2798) -> Result<Vec<String>> {
2799 let dir = &args.dir;
2800 let total = plan.dependency_order.len();
2801 if args.compose_preview {
2802 return Ok(Vec::new());
2803 }
2804
2805 let index = TempGitIndex::new(dir)?;
2806 read_tree_into_index(index.path(), &base_state.head_hash, dir)?;
2807
2808 let mut commit_hashes = Vec::new();
2809 let mut parent_hash = base_state.head_hash.clone();
2810
2811 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2815 let group = &plan.groups[group_idx];
2816
2817 println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2818 println!(" Type: {}", style::commit_type(group.commit_type.as_str()));
2819 if let Some(scope) = &group.scope {
2820 println!(" Scope: {}", style::scope(scope.as_str()));
2821 }
2822 let paths: Vec<String> = group
2823 .file_ids
2824 .iter()
2825 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2826 .collect();
2827 println!(" Files: {}", paths.join(", "));
2828
2829 let outcome = stage_executable_group_in_index(snapshot, group, dir, index.path())?;
2830 let mut staged_anything = outcome.result == StageResult::Staged;
2831
2832 for skipped in &outcome.skipped {
2836 let Some(file) = snapshot.file_by_path(&skipped.path) else {
2837 continue;
2838 };
2839 let cumulative = cumulative_file_hunk_ids(plan, idx, snapshot, &file.file_id);
2840 force_stage_file_from_base_in_index(
2841 snapshot,
2842 &file.file_id,
2843 &cumulative,
2844 dir,
2845 index.path(),
2846 )?;
2847 staged_anything = true;
2848 eprintln!(
2849 " {}",
2850 style::info(&format!(
2851 "Re-staged {} from base via splice (whole-file apply not used for partial hunks)",
2852 skipped.path
2853 ))
2854 );
2855 }
2856
2857 if !staged_anything {
2858 eprintln!(
2859 " {}",
2860 style::warning(&format!(
2861 "Skipping commit {}: its planned patch is already applied ({:?})",
2862 group.group_id, outcome.result
2863 ))
2864 );
2865 continue;
2866 }
2867
2868 let (analysis_body, summary) = prepared_messages[idx].clone();
2869 let mut commit = ConventionalCommit {
2870 commit_type: group.commit_type.clone(),
2871 scope: group.scope.clone(),
2872 summary,
2873 body: analysis_body,
2874 footers: vec![],
2875 };
2876 post_process_commit_message(&mut commit, config);
2877
2878 if let Err(err) = validate_commit_message(&commit, config) {
2879 eprintln!(
2880 " {}",
2881 style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2882 );
2883 }
2884
2885 let mut formatted_message = format_commit_message(&commit);
2886 if args.signoff || config.signoff {
2887 formatted_message = append_signoff_trailer(&formatted_message, dir)?;
2888 }
2889 println!(
2890 " Message:\n{}",
2891 formatted_message
2892 .lines()
2893 .take(3)
2894 .collect::<Vec<_>>()
2895 .join("\n")
2896 );
2897
2898 let tree = write_index_tree(index.path(), dir)?;
2899 let sign = args.sign || config.gpg_sign;
2900 let hash = commit_tree(&tree, &[parent_hash.as_str()], &formatted_message, dir, sign)?;
2901 parent_hash.clone_from(&hash);
2902 commit_hashes.push(hash);
2903
2904 if args.compose_test_after_each {
2905 return Err(CommitGenError::Other(
2906 "--compose-test-after-each is incompatible with isolated compose execution".to_string(),
2907 ));
2908 }
2909 }
2910
2911 if commit_hashes.is_empty() {
2912 return Ok(commit_hashes);
2913 }
2914
2915 update_ref_checked(&base_state.head_ref, &parent_hash, &base_state.head_hash, dir)?;
2916
2917 let current_index_tree = write_real_index_tree(dir)?;
2918 if current_index_tree == base_state.index_tree {
2919 reset_mixed_to(&parent_hash, dir)?;
2920 } else {
2921 println!(
2925 "{}",
2926 style::warning("Index changed during compose; preserving newly staged changes")
2927 );
2928 let paths: Vec<String> = snapshot.files.iter().map(|file| file.path.clone()).collect();
2929 reset_paths_to(&parent_hash, &paths, dir)?;
2930 }
2931
2932 Ok(commit_hashes)
2933}
2934
2935#[tracing::instrument(target = "lgit", name = "compose.run", skip_all, fields(dir = %args.dir, max_rounds = config.compose_max_rounds))]
2936pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2937 let max_rounds = config.compose_max_rounds;
2938
2939 for round in 1..=max_rounds {
2940 if round > 1 {
2941 println!(
2942 "\n{}",
2943 style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2944 );
2945 } else {
2946 println!("{}", style::section_header("Compose Mode", 80));
2947 }
2948 println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2949
2950 run_compose_round(args, config, round).await?;
2951
2952 if args.compose_preview {
2953 break;
2954 }
2955
2956 match get_compose_diff(&args.dir) {
2957 Err(CommitGenError::NoChanges { .. }) => {
2958 println!(
2959 "\n{}",
2960 style::success(&format!(
2961 "{} All changes committed successfully",
2962 style::icons::SUCCESS
2963 ))
2964 );
2965 break;
2966 },
2967 Err(err) => return Err(err),
2968 Ok(remaining_diff) => {
2969 eprintln!(
2970 "\n{}",
2971 style::warning(&format!(
2972 "{} Uncommitted changes remain after round {round}",
2973 style::icons::WARNING
2974 ))
2975 );
2976 eprintln!("{remaining_diff}");
2977 },
2978 }
2979
2980 if round < max_rounds {
2981 eprintln!("{}", style::info("Starting another compose round..."));
2982 } else {
2983 eprintln!(
2984 "{}",
2985 style::warning(&format!(
2986 "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
2987 ))
2988 );
2989 }
2990 }
2991
2992 Ok(())
2993}
2994
2995#[tracing::instrument(target = "lgit", name = "compose.round", skip_all, fields(dir = %args.dir, round))]
2996async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
2997 let base_state = capture_compose_base_state(&args.dir)?;
2998 let diff = get_compose_diff(&args.dir)?;
2999 let stat = get_compose_stat(&args.dir)?;
3000 let mut snapshot = build_compose_snapshot(&diff, &stat)?;
3001 pin_snapshot_worktree_state(&mut snapshot, &args.dir)?;
3005 let snapshot = snapshot;
3006
3007 if let Some(debug_dir) = args.debug_output.as_deref() {
3008 save_debug_artifact(
3009 Some(debug_dir),
3010 &format!("compose_round_{round}_snapshot.json"),
3011 &snapshot,
3012 )?;
3013 }
3014
3015 let token_counter = create_token_counter(config);
3016 let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
3017 println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
3018 observe_diff_files(&snapshot.diff, &config.summary_model, config, &token_counter).await?
3019 } else {
3020 if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
3021 && should_use_map_reduce(&snapshot.diff, config, &token_counter)
3022 {
3023 println!(
3024 "{}",
3025 style::info(
3026 "Skipping per-file observations for very large compose snapshot; using area-level \
3027 planning instead."
3028 )
3029 );
3030 }
3031 Vec::new()
3032 };
3033
3034 if let Some(debug_dir) = args.debug_output.as_deref()
3035 && !observations.is_empty()
3036 {
3037 save_debug_artifact(
3038 Some(debug_dir),
3039 &format!("compose_round_{round}_observations.json"),
3040 &observations,
3041 )?;
3042 }
3043
3044 let max_commits = args.compose_max_commits.unwrap_or(20);
3045 let executable_plan = if let Some(cached_plan) =
3046 load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
3047 {
3048 println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
3049 cached_plan
3050 } else {
3051 println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
3052 let intent_plan = analyze_compose_intent(
3053 &snapshot,
3054 &observations,
3055 config,
3056 max_commits,
3057 args.debug_output.as_deref(),
3058 )
3059 .await?;
3060
3061 if let Some(debug_dir) = args.debug_output.as_deref() {
3062 save_debug_artifact(
3063 Some(debug_dir),
3064 &format!("compose_round_{round}_intent_plan.json"),
3065 &intent_plan,
3066 )?;
3067 }
3068
3069 println!("{}", style::info("Binding hunks to groups..."));
3070 let plan =
3071 bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
3072 save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
3073 plan
3074 };
3075
3076 if let Some(debug_dir) = args.debug_output.as_deref() {
3077 save_debug_artifact(
3078 Some(debug_dir),
3079 &format!("compose_round_{round}_executable_plan.json"),
3080 &executable_plan,
3081 )?;
3082 }
3083
3084 print_executable_plan(&snapshot, &executable_plan);
3085
3086 if args.compose_preview {
3087 println!(
3088 "\n{}",
3089 style::success(&format!(
3090 "{} Preview complete (use --compose without --compose-preview to execute)",
3091 style::icons::SUCCESS
3092 ))
3093 );
3094 return Ok(());
3095 }
3096
3097 println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
3098 let hashes = execute_compose(&snapshot, &executable_plan, config, args, &base_state).await?;
3099 println!(
3100 "{}",
3101 style::success(&format!(
3102 "{} Round {round}: Created {} commit(s)",
3103 style::icons::SUCCESS,
3104 hashes.len()
3105 ))
3106 );
3107 Ok(())
3108}
3109
3110#[cfg(test)]
3111mod tests {
3112 use std::{fmt::Write, fs};
3113
3114 use tempfile::TempDir;
3115
3116 use super::*;
3117 use crate::{config::CommitConfig, patch::build_compose_snapshot, types::CommitType};
3118
3119 fn shared_file_diff() -> (&'static str, &'static str) {
3120 (
3121 r#"diff --git a/src/lib.rs b/src/lib.rs
3122index 1111111..2222222 100644
3123--- a/src/lib.rs
3124+++ b/src/lib.rs
3125@@ -1,3 +1,3 @@
3126-fn alpha() {
3127+fn alpha_changed() {
3128 println!("alpha");
3129 }
3130@@ -12,3 +12,3 @@
3131-fn beta() {
3132+fn beta_changed() {
3133 println!("beta");
3134 }
3135diff --git a/tests/lib.rs b/tests/lib.rs
3136index 3333333..4444444 100644
3137--- a/tests/lib.rs
3138+++ b/tests/lib.rs
3139@@ -1,3 +1,4 @@
3140 fn test_it() {
3141+ assert!(true);
3142 }
3143"#,
3144 " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
3145 )
3146 }
3147
3148 fn build_test_snapshot() -> ComposeSnapshot {
3149 let (diff, stat) = shared_file_diff();
3150 build_compose_snapshot(diff, stat).unwrap()
3151 }
3152
3153 fn write_file(dir: &TempDir, path: &str, contents: &str) {
3154 let full_path = dir.path().join(path);
3155 if let Some(parent) = full_path.parent() {
3156 fs::create_dir_all(parent).unwrap();
3157 }
3158 fs::write(full_path, contents).unwrap();
3159 }
3160
3161 fn run_git(dir: &TempDir, args: &[&str]) -> String {
3162 let output = crate::git::git_command()
3163 .args(args)
3164 .current_dir(dir.path())
3165 .output()
3166 .unwrap_or_else(|err| panic!("git {args:?} failed to spawn: {err}"));
3167
3168 assert!(
3169 output.status.success(),
3170 "git {:?} failed: stdout={} stderr={}",
3171 args,
3172 String::from_utf8_lossy(&output.stdout),
3173 String::from_utf8_lossy(&output.stderr)
3174 );
3175
3176 String::from_utf8_lossy(&output.stdout).to_string()
3177 }
3178
3179 fn init_repo() -> TempDir {
3180 let dir = TempDir::new().unwrap();
3181 run_git(&dir, &["init"]);
3182 run_git(&dir, &["config", "user.name", "Compose Test"]);
3183 run_git(&dir, &["config", "user.email", "compose@test.local"]);
3184 run_git(&dir, &["config", "commit.gpgsign", "false"]);
3185 dir
3186 }
3187
3188 fn commit_all(dir: &TempDir, message: &str) {
3189 run_git(dir, &["add", "."]);
3190 run_git(dir, &["commit", "-m", message]);
3191 }
3192
3193 fn canned_message(summary: &str) -> (Vec<String>, CommitSummary) {
3194 (vec![], CommitSummary::new_unchecked(summary, 128).unwrap())
3195 }
3196
3197 #[test]
3198 fn test_compose_file_category_treats_prompts_as_functional_source() {
3199 let diff = r"diff --git a/prompts/analysis/default.md b/prompts/analysis/default.md
3200index 1111111..2222222 100644
3201--- a/prompts/analysis/default.md
3202+++ b/prompts/analysis/default.md
3203@@ -1,1 +1,1 @@
3204-old prompt
3205+new prompt
3206diff --git a/system/analysis/default.md b/system/analysis/default.md
3207index 5555555..6666666 100644
3208--- a/system/analysis/default.md
3209+++ b/system/analysis/default.md
3210@@ -1,1 +1,1 @@
3211-old system
3212+new system
3213diff --git a/README.md b/README.md
3214index 3333333..4444444 100644
3215--- a/README.md
3216+++ b/README.md
3217@@ -1,1 +1,1 @@
3218-old docs
3219+new docs
3220";
3221 let snapshot = build_compose_snapshot(diff, "").unwrap();
3222 let prompt_file = snapshot
3223 .file_by_path("prompts/analysis/default.md")
3224 .unwrap();
3225 let system_file = snapshot.file_by_path("system/analysis/default.md").unwrap();
3226 let readme_file = snapshot.file_by_path("README.md").unwrap();
3227
3228 assert_eq!(compose_file_category(prompt_file), ComposeFileCategory::Prompt);
3229 assert_eq!(compose_file_category(system_file), ComposeFileCategory::Prompt);
3230 assert_eq!(compose_file_category(readme_file), ComposeFileCategory::Docs);
3231
3232 let feat_group = ComposeIntentGroup {
3233 group_id: "G1".to_string(),
3234 commit_type: CommitType::new("feat").unwrap(),
3235 scope: None,
3236 file_ids: vec![prompt_file.file_id.clone()],
3237 rationale: "prompt behavior change".to_string(),
3238 dependencies: vec![],
3239 };
3240 assert_eq!(group_type_bonus(prompt_file, &feat_group), 10);
3241
3242 let fallback_type =
3243 fallback_commit_type_for_group(&snapshot, &[], std::slice::from_ref(&prompt_file.file_id))
3244 .unwrap();
3245 assert_eq!(fallback_type.as_str(), "refactor");
3246 }
3247
3248 fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
3249 let mut diff = String::new();
3250
3251 for file_idx in 0..file_count {
3252 let path = format!("src/module_{file_idx:03}.rs");
3253 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3254 diff.push_str("index 1111111..2222222 100644\n");
3255 writeln!(diff, "--- a/{path}").unwrap();
3256 writeln!(diff, "+++ b/{path}").unwrap();
3257
3258 for hunk_idx in 0..hunks_per_file {
3259 let line_no = (hunk_idx * 4) + 1;
3260 writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
3261 writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
3262 writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
3263 }
3264 }
3265
3266 build_compose_snapshot(&diff, "").unwrap()
3267 }
3268
3269 fn build_multi_area_snapshot() -> ComposeSnapshot {
3270 let mut diff = String::new();
3271 let areas = [
3272 ("apps/frontend/src/server", 72),
3273 ("packages/model/src/models", 54),
3274 ("apps/daemon/src/worker", 43),
3275 (".github/workflows", 16),
3276 ];
3277
3278 for (prefix, count) in areas {
3279 for file_idx in 0..count {
3280 let path = format!("{prefix}/file_{file_idx:03}.rs");
3281 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3282 diff.push_str("index 1111111..2222222 100644\n");
3283 writeln!(diff, "--- a/{path}").unwrap();
3284 writeln!(diff, "+++ b/{path}").unwrap();
3285 diff.push_str("@@ -1,1 +1,1 @@\n");
3286 writeln!(diff, "-old_{file_idx}").unwrap();
3287 writeln!(diff, "+new_{file_idx}").unwrap();
3288 }
3289 }
3290
3291 build_compose_snapshot(&diff, "").unwrap()
3292 }
3293
3294 fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
3295 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3296 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3297 let groups = vec![
3298 ComposeIntentGroup {
3299 group_id: "G1".to_string(),
3300 commit_type: CommitType::new("refactor").unwrap(),
3301 scope: None,
3302 file_ids: vec![source_file.file_id.clone(), test_file.file_id.clone()],
3303 rationale: "implementation group".to_string(),
3304 dependencies: vec![],
3305 },
3306 ComposeIntentGroup {
3307 group_id: "G2".to_string(),
3308 commit_type: CommitType::new("refactor").unwrap(),
3309 scope: None,
3310 file_ids: vec![source_file.file_id.clone()],
3311 rationale: "shared file follow-up".to_string(),
3312 dependencies: vec!["G1".to_string()],
3313 },
3314 ];
3315 let dependency_order =
3316 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
3317 .unwrap();
3318 ComposeIntentPlan { groups, dependency_order }
3319 }
3320
3321 #[test]
3322 fn test_execute_compose_with_temp_index_applies_two_group_plan() {
3323 let dir = init_repo();
3324 write_file(&dir, "src/a.rs", "fn a() {}\n");
3325 write_file(&dir, "src/b.rs", "fn b() {}\n");
3326 commit_all(&dir, "initial");
3327 write_file(&dir, "src/a.rs", "fn a_changed() {}\n");
3328 write_file(&dir, "src/b.rs", "fn b_changed() {}\n");
3329
3330 let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3331 let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3332 let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3333 let a_file = snapshot.file_by_path("src/a.rs").unwrap();
3334 let b_file = snapshot.file_by_path("src/b.rs").unwrap();
3335 let plan = ComposeExecutablePlan {
3336 groups: vec![
3337 ComposeExecutableGroup {
3338 group_id: "G1".to_string(),
3339 commit_type: CommitType::new("refactor").unwrap(),
3340 scope: None,
3341 file_ids: vec![a_file.file_id.clone()],
3342 rationale: "change a".to_string(),
3343 dependencies: vec![],
3344 hunk_ids: a_file.hunk_ids.clone(),
3345 },
3346 ComposeExecutableGroup {
3347 group_id: "G2".to_string(),
3348 commit_type: CommitType::new("refactor").unwrap(),
3349 scope: None,
3350 file_ids: vec![b_file.file_id.clone()],
3351 rationale: "change b".to_string(),
3352 dependencies: vec!["G1".to_string()],
3353 hunk_ids: b_file.hunk_ids.clone(),
3354 },
3355 ],
3356 dependency_order: vec![0, 1],
3357 };
3358 let config = CommitConfig::default();
3359 let args = Args {
3360 dir: dir.path().to_string_lossy().to_string(),
3361 compose: true,
3362 ..Default::default()
3363 };
3364 let base_state = capture_compose_base_state(&args.dir).unwrap();
3365
3366 let hashes = execute_compose_with_prepared_messages(
3367 &snapshot,
3368 &plan,
3369 &config,
3370 &args,
3371 &base_state,
3372 vec![canned_message("change a"), canned_message("change b")],
3373 )
3374 .unwrap();
3375
3376 assert_eq!(hashes.len(), 2);
3377 assert_eq!(get_head_hash(&args.dir).unwrap(), hashes[1]);
3378 assert!(run_git(&dir, &["diff", "--cached"]).trim().is_empty());
3379 }
3380
3381 #[test]
3382 fn test_execute_compose_failure_before_update_ref_preserves_real_index() {
3383 let dir = init_repo();
3384 write_file(&dir, "src/lib.rs", "old\n");
3385 write_file(&dir, "sentinel.txt", "base\n");
3386 commit_all(&dir, "initial");
3387 let initial_head = get_head_hash(dir.path().to_str().unwrap()).unwrap();
3388
3389 write_file(&dir, "src/lib.rs", "changed\n");
3391
3392 write_file(&dir, "sentinel.txt", "base\nstaged sentinel\n");
3394 run_git(&dir, &["add", "sentinel.txt"]);
3395 let staged_before = run_git(&dir, &["diff", "--cached"]);
3396 assert!(staged_before.contains("staged sentinel"));
3397
3398 let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3399 let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3400 let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3401 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3402 let plan = ComposeExecutablePlan {
3405 groups: vec![ComposeExecutableGroup {
3406 group_id: "G1".to_string(),
3407 commit_type: CommitType::new("fix").unwrap(),
3408 scope: None,
3409 file_ids: vec![source_file.file_id.clone()],
3410 rationale: "unstageable group".to_string(),
3411 dependencies: vec![],
3412 hunk_ids: vec!["F999-H001".to_string()],
3413 }],
3414 dependency_order: vec![0],
3415 };
3416 let config = CommitConfig::default();
3417 let args = Args {
3418 dir: dir.path().to_string_lossy().to_string(),
3419 compose: true,
3420 ..Default::default()
3421 };
3422 let base_state = capture_compose_base_state(&args.dir).unwrap();
3423
3424 let err = execute_compose_with_prepared_messages(
3425 &snapshot,
3426 &plan,
3427 &config,
3428 &args,
3429 &base_state,
3430 vec![canned_message("unstageable group")],
3431 )
3432 .unwrap_err();
3433
3434 assert!(err.to_string().contains("unknown hunk id"));
3435 assert_eq!(get_head_hash(&args.dir).unwrap(), initial_head);
3436 assert_eq!(run_git(&dir, &["diff", "--cached"]), staged_before);
3437 }
3438
3439 #[test]
3440 fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
3441 let snapshot = build_test_snapshot();
3442 let intent_plan = build_shared_intent_plan(&snapshot);
3443 let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3444
3445 assert_eq!(ambiguous.len(), 1);
3446 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3447 let assigned_to_g1 = assigned.get("G1").unwrap();
3448 assert!(
3449 test_file
3450 .hunk_ids
3451 .iter()
3452 .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
3453 "uniquely owned file should be auto-assigned"
3454 );
3455 }
3456
3457 #[test]
3458 fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3459 let snapshot = build_test_snapshot();
3460 let intent_plan = build_shared_intent_plan(&snapshot);
3461 let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3462 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3463 let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3464 let valid_group_ids: HashSet<&str> = intent_plan
3465 .groups
3466 .iter()
3467 .map(|group| group.group_id.as_str())
3468 .collect();
3469
3470 let evaluation = evaluate_binding(
3471 &[
3472 ComposeBindingAssignment {
3473 group_id: "G1".to_string(),
3474 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3475 },
3476 ComposeBindingAssignment {
3477 group_id: "G2".to_string(),
3478 hunk_ids: vec![source_file.hunk_ids[1].clone()],
3479 },
3480 ],
3481 &hunk_context,
3482 &valid_group_ids,
3483 &snapshot,
3484 );
3485
3486 for (group_id, hunk_ids) in evaluation.assigned {
3487 let entry = assigned.entry(group_id).or_default();
3488 for hunk_id in hunk_ids {
3489 entry.insert(hunk_id);
3490 }
3491 }
3492
3493 let group_rank: HashMap<&str, usize> = intent_plan
3494 .dependency_order
3495 .iter()
3496 .enumerate()
3497 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3498 .collect();
3499 assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3500
3501 let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3502 assert_eq!(executable_plan.groups.len(), 1);
3503 assert_eq!(executable_plan.groups[0].group_id, "G1");
3504 assert!(
3505 source_file
3506 .hunk_ids
3507 .iter()
3508 .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3509 "fallback should keep every hunk from the shared file in the surviving group"
3510 );
3511 }
3512
3513 #[test]
3514 fn test_validate_executable_plan_rejects_overlap() {
3515 let snapshot = build_test_snapshot();
3516 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3517 let executable_plan = ComposeExecutablePlan {
3518 groups: vec![
3519 ComposeExecutableGroup {
3520 group_id: "G1".to_string(),
3521 commit_type: CommitType::new("refactor").unwrap(),
3522 scope: None,
3523 file_ids: vec![source_file.file_id.clone()],
3524 rationale: "group one".to_string(),
3525 dependencies: vec![],
3526 hunk_ids: vec![source_file.hunk_ids[0].clone()],
3527 },
3528 ComposeExecutableGroup {
3529 group_id: "G2".to_string(),
3530 commit_type: CommitType::new("refactor").unwrap(),
3531 scope: None,
3532 file_ids: vec![source_file.file_id.clone()],
3533 rationale: "group two".to_string(),
3534 dependencies: vec![],
3535 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3536 },
3537 ],
3538 dependency_order: vec![0, 1],
3539 };
3540
3541 let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3542 assert!(err.to_string().contains("assigned to both"));
3543 }
3544
3545 #[test]
3546 fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3547 let snapshot = build_test_snapshot();
3548 let planning_index = build_planning_index(&snapshot);
3549 let groups = vec![ComposeIntentGroup {
3550 group_id: "G1".to_string(),
3551 commit_type: CommitType::new("refactor").unwrap(),
3552 scope: None,
3553 file_ids: vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3554 rationale: "normalize file references".to_string(),
3555 dependencies: vec![],
3556 }];
3557
3558 let (normalized_groups, repair_notes) =
3559 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3560
3561 assert_eq!(normalized_groups.len(), 1);
3562 assert_eq!(
3563 normalized_groups[0].file_ids,
3564 snapshot
3565 .files
3566 .iter()
3567 .map(|file| file.file_id.clone())
3568 .collect::<Vec<_>>()
3569 );
3570 assert_eq!(repair_notes.len(), 2);
3571 }
3572
3573 #[test]
3574 fn test_normalize_intent_plan_repairs_missing_files() {
3575 let snapshot = build_test_snapshot();
3576 let planning_index = build_planning_index(&snapshot);
3577 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3578 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3579 let groups = vec![ComposeIntentGroup {
3580 group_id: "G1".to_string(),
3581 commit_type: CommitType::new("refactor").unwrap(),
3582 scope: None,
3583 file_ids: vec![source_file.file_id.clone()],
3584 rationale: "partial coverage".to_string(),
3585 dependencies: vec![],
3586 }];
3587
3588 let (normalized_groups, repair_notes) =
3589 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3590
3591 assert_eq!(normalized_groups.len(), 1);
3592 assert!(
3593 normalized_groups[0].file_ids.contains(&source_file.file_id),
3594 "existing file assignment should be preserved"
3595 );
3596 assert!(
3597 normalized_groups[0].file_ids.contains(&test_file.file_id),
3598 "missing files should be assigned to an existing group"
3599 );
3600 assert_eq!(repair_notes.len(), 1);
3601 assert!(repair_notes[0].contains(&test_file.file_id));
3602 }
3603
3604 #[test]
3605 fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3606 let snapshot = build_multi_area_snapshot();
3607 let planning_index = build_planning_index(&snapshot);
3608 let frontend_target = planning_index
3609 .targets
3610 .iter()
3611 .find(|target| target.label.starts_with("apps/frontend"))
3612 .unwrap();
3613 let model_target = planning_index
3614 .targets
3615 .iter()
3616 .find(|target| target.label.starts_with("packages/model"))
3617 .unwrap();
3618 let groups = vec![
3619 ComposeIntentGroup {
3620 group_id: "G1".to_string(),
3621 commit_type: CommitType::new("refactor").unwrap(),
3622 scope: Scope::new("apps/frontend").ok(),
3623 file_ids: vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3624 rationale: "frontend platform updates".to_string(),
3625 dependencies: vec!["group 2".to_string(), "G1".to_string()],
3626 },
3627 ComposeIntentGroup {
3628 group_id: "G2".to_string(),
3629 commit_type: CommitType::new("refactor").unwrap(),
3630 scope: Scope::new("packages/model").ok(),
3631 file_ids: vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3632 rationale: "model storage updates".to_string(),
3633 dependencies: vec!["F5".to_string()],
3634 },
3635 ];
3636
3637 let (normalized_groups, repair_notes) =
3638 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3639
3640 assert_eq!(normalized_groups.len(), 2);
3641 assert!(
3642 normalized_groups[0]
3643 .file_ids
3644 .iter()
3645 .all(|file_id| file_id.starts_with('F'))
3646 );
3647 assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3648 assert!(normalized_groups[1].dependencies.is_empty());
3649 assert!(
3650 repair_notes
3651 .iter()
3652 .any(|note| note.contains("Dropped unknown planning target"))
3653 );
3654 assert!(
3655 repair_notes
3656 .iter()
3657 .any(|note| note.contains("Dropped self-dependency"))
3658 );
3659 assert!(
3660 repair_notes
3661 .iter()
3662 .any(|note| note.contains("Mapped compose planner dependency"))
3663 );
3664 assert!(
3665 repair_notes
3666 .iter()
3667 .any(|note| note.contains("Dropped unknown dependency"))
3668 );
3669 }
3670
3671 #[test]
3672 fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3673 let snapshot = build_test_snapshot();
3674 let summary = render_snapshot_summary(&snapshot, &[]);
3675 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3676
3677 assert!(!summary.contains("# snapshot compacted"));
3678 for hunk_id in &source_file.hunk_ids {
3679 assert!(summary.contains(hunk_id));
3680 }
3681 }
3682
3683 #[test]
3684 fn test_render_snapshot_summary_compacts_large_snapshot() {
3685 let snapshot = build_large_snapshot(160, 4);
3686 let summary = render_snapshot_summary(&snapshot, &[]);
3687
3688 assert!(summary.contains("# snapshot compacted"));
3689 assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3690 assert!(summary.contains("F001-H001"));
3691 assert!(summary.contains("F001-H004"));
3692 assert!(!summary.contains("F001-H002"));
3693 assert!(!summary.contains("F001-H003"));
3694 assert!(summary.contains("... 2 more hunks omitted from F001"));
3695 }
3696
3697 #[test]
3698 fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3699 let snapshot = build_multi_area_snapshot();
3700 let planning_index = build_planning_index(&snapshot);
3701
3702 assert_eq!(planning_index.mode, PlanningMode::Area);
3703 assert!(planning_index.targets.len() < snapshot.files.len());
3704 assert!(
3705 planning_index
3706 .targets
3707 .iter()
3708 .any(|target| target.label.starts_with("apps/frontend"))
3709 );
3710 assert!(
3711 render_planning_stat(&planning_index).contains("planning over"),
3712 "planning stat should explain the area mode"
3713 );
3714 }
3715
3716 #[test]
3717 fn test_normalize_intent_plan_expands_area_targets() {
3718 let snapshot = build_multi_area_snapshot();
3719 let planning_index = build_planning_index(&snapshot);
3720 let midpoint = planning_index.targets.len() / 2;
3721 let first_group_targets: Vec<String> = planning_index
3722 .targets
3723 .iter()
3724 .take(midpoint)
3725 .map(|target| target.label.clone())
3726 .collect();
3727 let second_group_targets: Vec<String> = planning_index
3728 .targets
3729 .iter()
3730 .skip(midpoint)
3731 .map(|target| target.label.clone())
3732 .collect();
3733 let groups = vec![
3734 ComposeIntentGroup {
3735 group_id: "G1".to_string(),
3736 commit_type: CommitType::new("refactor").unwrap(),
3737 scope: None,
3738 file_ids: first_group_targets,
3739 rationale: "frontend and model".to_string(),
3740 dependencies: vec![],
3741 },
3742 ComposeIntentGroup {
3743 group_id: "G2".to_string(),
3744 commit_type: CommitType::new("refactor").unwrap(),
3745 scope: None,
3746 file_ids: second_group_targets,
3747 rationale: "daemon and ci".to_string(),
3748 dependencies: vec![],
3749 },
3750 ];
3751
3752 let (normalized_groups, repair_notes) =
3753 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3754
3755 assert_eq!(normalized_groups.len(), 2);
3756 assert!(
3757 normalized_groups
3758 .iter()
3759 .flat_map(|group| group.file_ids.iter())
3760 .all(|file_id| file_id.starts_with('F')),
3761 "area targets should expand back to concrete file IDs"
3762 );
3763 assert!(!repair_notes.is_empty());
3764 assert_eq!(
3765 normalized_groups
3766 .iter()
3767 .flat_map(|group| group.file_ids.iter())
3768 .collect::<HashSet<_>>()
3769 .len(),
3770 snapshot.files.len()
3771 );
3772 }
3773
3774 #[test]
3775 fn test_large_patch_fallback_splits_monolithic_area_plan() {
3776 let snapshot = build_multi_area_snapshot();
3777 let planning_index = build_planning_index(&snapshot);
3778 let monolithic_group = ComposeIntentGroup {
3779 group_id: "G1".to_string(),
3780 commit_type: CommitType::new("refactor").unwrap(),
3781 scope: None,
3782 file_ids: snapshot
3783 .files
3784 .iter()
3785 .map(|file| file.file_id.clone())
3786 .collect(),
3787 rationale: "repo-wide refactor".to_string(),
3788 dependencies: vec![],
3789 };
3790
3791 assert!(should_force_large_patch_fallback(
3792 &snapshot,
3793 &planning_index,
3794 &[monolithic_group],
3795 6
3796 ));
3797
3798 let fallback_groups =
3799 build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3800 assert!(fallback_groups.len() >= 3);
3801 assert_eq!(
3802 fallback_groups
3803 .iter()
3804 .flat_map(|group| group.file_ids.iter())
3805 .collect::<HashSet<_>>()
3806 .len(),
3807 snapshot.files.len()
3808 );
3809 assert!(
3810 fallback_groups
3811 .iter()
3812 .any(|group| group.rationale.contains("frontend")),
3813 "fallback should preserve workstream identity"
3814 );
3815 }
3816
3817 #[test]
3818 fn test_should_collect_compose_observations_skips_area_mode() {
3819 let snapshot = build_large_snapshot(160, 4);
3820 let config = CommitConfig { map_reduce_threshold: 1_000, ..Default::default() };
3821 let counter = create_token_counter(&config);
3822
3823 assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3824 assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3825 }
3826
3827 #[test]
3828 fn test_compose_analysis_strategy_uses_map_reduce_for_large_diff() {
3829 let config = CommitConfig { map_reduce_threshold: 20, ..Default::default() };
3830 let counter = create_token_counter(&config);
3831 let payload = "a".repeat(200);
3832 let diff = format!("diff --git a/a.rs b/a.rs\n@@ -0,0 +1 @@\n+{payload}");
3833
3834 assert_eq!(
3835 compose_analysis_strategy(&diff, &config, &counter),
3836 ComposeAnalysisStrategy::MapReduce
3837 );
3838 }
3839
3840 #[test]
3841 fn test_compose_analysis_strategy_truncates_when_map_reduce_disabled() {
3842 let config = CommitConfig {
3843 map_reduce_enabled: false,
3844 max_diff_tokens: 1,
3845 max_diff_length: 10_000,
3846 ..Default::default()
3847 };
3848 let counter = create_token_counter(&config);
3849 assert_eq!(compose_truncation_length(&config), 4);
3850
3851 assert_eq!(
3852 compose_analysis_strategy(
3853 "diff --git a/models.json b/models.json\n+large",
3854 &config,
3855 &counter
3856 ),
3857 ComposeAnalysisStrategy::SmartTruncate
3858 );
3859 }
3860
3861 #[test]
3862 fn test_compose_analysis_strategy_keeps_small_group_direct() {
3863 let config = CommitConfig {
3864 map_reduce_threshold: 1_000,
3865 max_diff_tokens: 1_000,
3866 max_diff_length: 10_000,
3867 ..Default::default()
3868 };
3869 let counter = create_token_counter(&config);
3870
3871 assert_eq!(
3872 compose_analysis_strategy("diff --git a/a.rs b/a.rs\n+a", &config, &counter),
3873 ComposeAnalysisStrategy::Direct
3874 );
3875 }
3876
3877 #[test]
3878 fn test_chunk_ambiguous_files_splits_large_binding_request() {
3879 let ambiguous_files = vec![
3880 AmbiguousFileBinding {
3881 file_id: "F001".to_string(),
3882 path: "src/alpha.rs".to_string(),
3883 candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3884 hunk_ids: (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3885 },
3886 AmbiguousFileBinding {
3887 file_id: "F002".to_string(),
3888 path: "src/beta.rs".to_string(),
3889 candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3890 hunk_ids: (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3891 },
3892 AmbiguousFileBinding {
3893 file_id: "F003".to_string(),
3894 path: "src/gamma.rs".to_string(),
3895 candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3896 hunk_ids: (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3897 },
3898 ];
3899
3900 let batches = chunk_ambiguous_files(&ambiguous_files);
3901 let total_hunks: usize = batches
3902 .iter()
3903 .flatten()
3904 .map(|file| file.hunk_ids.len())
3905 .sum();
3906
3907 assert_eq!(batches.len(), 2);
3908 assert_eq!(batches[0].len(), 1);
3909 assert_eq!(batches[1].len(), 2);
3910 assert_eq!(total_hunks, 140);
3911 assert!(batches.iter().all(|batch| {
3912 batch.len() <= MAX_BIND_FILES_PER_REQUEST
3913 && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3914 <= MAX_BIND_HUNKS_PER_REQUEST
3915 }));
3916 }
3917}