1use std::{
2 borrow::Cow,
3 collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4 fmt::Write,
5 fs,
6 path::{Path, PathBuf},
7};
8
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13 api::{
14 AnalysisContext, OneShotDebug, OneShotSpec, generate_conventional_analysis,
15 generate_summary_from_analysis, run_oneshot, strict_json_schema,
16 },
17 compose_types::{
18 ComposeBindingAssignment, ComposeExecutableGroup, ComposeExecutablePlan, ComposeFile,
19 ComposeIntentGroup, ComposeIntentPlan, ComposeSnapshot,
20 },
21 config::CommitConfig,
22 diff::smart_truncate_diff,
23 error::{CommitGenError, Result},
24 git::{
25 TempGitIndex, append_signoff_trailer, commit_tree, current_head_ref, get_compose_diff,
26 get_compose_stat, get_git_dir, get_head_hash, read_tree_into_index, reset_mixed_to,
27 update_ref_checked, write_index_tree, write_real_index_tree,
28 },
29 map_reduce::{FileObservation, observe_diff_files, run_map_reduce, should_use_map_reduce},
30 normalization::{format_commit_message, post_process_commit_message},
31 patch::{
32 StageResult, build_compose_snapshot, create_executable_group_patch,
33 force_stage_file_from_base_in_index, stage_executable_group_in_index,
34 },
35 style, templates,
36 tokens::{TokenCounter, create_token_counter},
37 types::{Args, CommitSummary, CommitType, ConventionalAnalysis, ConventionalCommit, Scope},
38 validation::validate_commit_message,
39};
40
41const MAX_OBSERVATIONS_PER_FILE: usize = 3;
42const COMPOSE_PLAN_SCHEMA_VERSION: &str = "v3";
43const COMPOSE_PLANNER_TEMPERATURE: f32 = 0.0;
44const COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD: usize = 60;
45const COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD: usize = 200;
46const COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD: usize = 150;
47const COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD: usize = 500;
48const COMPOSE_AREA_TARGET_MAX_FILES: usize = 60;
49const COMPOSE_AREA_TARGET_MAX_HUNKS: usize = 140;
50const COMPOSE_AREA_TARGET_MAX_DEPTH: usize = 6;
51const COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD: usize = 8;
52const COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD: usize = 3;
53const MAX_BIND_FILES_PER_REQUEST: usize = 18;
54const MAX_BIND_HUNKS_PER_REQUEST: usize = 120;
55const COMPOSE_MESSAGE_PARALLELISM: usize = 8;
58
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ComposeBaseState {
61 head_hash: String,
62 head_ref: String,
63 index_tree: String,
64}
65
66#[tracing::instrument(target = "lgit", name = "compose.capture_base_state", skip_all, fields(dir))]
67pub fn capture_compose_base_state(dir: &str) -> Result<ComposeBaseState> {
68 Ok(ComposeBaseState {
69 head_hash: get_head_hash(dir)?,
70 head_ref: current_head_ref(dir)?,
71 index_tree: write_real_index_tree(dir)?,
72 })
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76enum ComposeAnalysisStrategy {
77 Direct,
78 SmartTruncate,
79 MapReduce,
80}
81
82fn compose_analysis_strategy(
83 diff: &str,
84 config: &CommitConfig,
85 counter: &TokenCounter,
86) -> ComposeAnalysisStrategy {
87 if should_use_map_reduce(diff, config, counter) {
88 return ComposeAnalysisStrategy::MapReduce;
89 }
90
91 let diff_tokens = counter.count_sync(diff);
92 if diff.len() > config.max_diff_length || diff_tokens > config.max_diff_tokens {
93 return ComposeAnalysisStrategy::SmartTruncate;
94 }
95
96 ComposeAnalysisStrategy::Direct
97}
98
99fn compose_truncation_length(config: &CommitConfig) -> usize {
100 config
101 .max_diff_length
102 .min(config.max_diff_tokens.saturating_mul(4))
103 .max(1)
104}
105
106#[derive(Debug, Deserialize, Serialize)]
107struct ComposeIntentResponse {
108 groups: Vec<ComposeIntentGroup>,
109}
110
111#[derive(Debug, Deserialize, Serialize)]
112struct ComposeBindingResponse {
113 assignments: Vec<ComposeBindingAssignment>,
114}
115
116#[derive(Debug, Serialize, Deserialize)]
117struct ComposeCachedPlan {
118 schema_version: String,
119 cache_key: String,
120 plan: ComposeExecutablePlan,
121}
122
123#[derive(Debug, Clone)]
124struct AmbiguousFileBinding {
125 file_id: String,
126 path: String,
127 candidate_group_ids: Vec<String>,
128 hunk_ids: Vec<String>,
129}
130
131#[derive(Debug, Clone)]
132struct AmbiguousHunkContext {
133 candidate_group_ids: Vec<String>,
134}
135
136type HunkAssignments = HashMap<String, BTreeSet<String>>;
137
138#[derive(Debug)]
139struct BindingEvaluation {
140 assigned: HashMap<String, Vec<String>>,
141 unresolved: Vec<String>,
142}
143
144#[derive(Debug, Clone, Copy)]
145struct SnapshotSummaryBudget {
146 max_observations_per_file: usize,
147 max_hunks_per_file: Option<usize>,
148}
149
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151enum PlanningMode {
152 File,
153 Area,
154}
155
156#[derive(Debug, Clone)]
157struct PlanningTarget {
158 target_id: String,
159 label: String,
160 file_ids: Vec<String>,
161 hunk_count: usize,
162 additions: usize,
163 deletions: usize,
164}
165
166#[derive(Debug, Clone)]
167struct PlanningIndex {
168 mode: PlanningMode,
169 targets: Vec<PlanningTarget>,
170 aliases: HashMap<String, String>,
171}
172
173#[derive(Debug, Clone)]
174struct PlanningBucket {
175 label: String,
176 file_ids: Vec<String>,
177}
178
179impl PlanningIndex {
180 fn expand_target_ids(&self, target_ids: &[String]) -> Vec<String> {
181 let mut expanded = Vec::new();
182 let mut seen_file_ids = HashSet::new();
183
184 for target_id in target_ids {
185 if let Some(target) = self
186 .targets
187 .iter()
188 .find(|candidate| candidate.target_id == *target_id)
189 {
190 for file_id in &target.file_ids {
191 if seen_file_ids.insert(file_id.clone()) {
192 expanded.push(file_id.clone());
193 }
194 }
195 }
196 }
197
198 expanded
199 }
200}
201
202impl SnapshotSummaryBudget {
203 const fn is_compacted(self) -> bool {
204 self.max_hunks_per_file.is_some()
205 }
206}
207
208fn is_dependency_manifest(path: &str) -> bool {
209 const DEP_MANIFESTS: &[&str] = &[
210 "Cargo.toml",
211 "Cargo.lock",
212 "package.json",
213 "package-lock.json",
214 "pnpm-lock.yaml",
215 "yarn.lock",
216 "bun.lock",
217 "bun.lockb",
218 "go.mod",
219 "go.sum",
220 "requirements.txt",
221 "Pipfile",
222 "Pipfile.lock",
223 "pyproject.toml",
224 "Gemfile",
225 "Gemfile.lock",
226 "composer.json",
227 "composer.lock",
228 "build.gradle",
229 "build.gradle.kts",
230 "gradle.properties",
231 "pom.xml",
232 ];
233
234 let path = Path::new(path);
235 let Some(file_name) = path.file_name().and_then(|s| s.to_str()) else {
236 return false;
237 };
238
239 if DEP_MANIFESTS.contains(&file_name) {
240 return true;
241 }
242
243 Path::new(file_name)
244 .extension()
245 .is_some_and(|ext| ext.eq_ignore_ascii_case("lock") || ext.eq_ignore_ascii_case("lockb"))
246}
247
248fn save_debug_artifact<T: Serialize>(
249 debug_dir: Option<&Path>,
250 filename: &str,
251 value: &T,
252) -> Result<()> {
253 let Some(debug_dir) = debug_dir else {
254 return Ok(());
255 };
256
257 fs::create_dir_all(debug_dir)?;
258 let path = debug_dir.join(filename);
259 let json = serde_json::to_string_pretty(value)?;
260 fs::write(path, json)?;
261 Ok(())
262}
263
264fn fnv1a_64(input: &str) -> String {
265 let mut hash = 0xcbf29ce484222325_u64;
266 for byte in input.as_bytes() {
267 hash ^= u64::from(*byte);
268 hash = hash.wrapping_mul(0x100000001b3);
269 }
270 format!("{hash:016x}")
271}
272
273fn compose_plan_cache_key(
274 snapshot: &ComposeSnapshot,
275 max_commits: usize,
276 analysis_model: &str,
277) -> String {
278 fnv1a_64(&format!(
279 "{COMPOSE_PLAN_SCHEMA_VERSION}\n{analysis_model}\n{max_commits}\n{}\n{}",
280 snapshot.diff, snapshot.stat
281 ))
282}
283
284fn compose_plan_cache_path(
285 dir: &str,
286 snapshot: &ComposeSnapshot,
287 max_commits: usize,
288 analysis_model: &str,
289) -> Result<PathBuf> {
290 let git_dir = get_git_dir(dir)?;
291 Ok(git_dir.join("llm-git").join(format!(
292 "compose-plan-{}.json",
293 compose_plan_cache_key(snapshot, max_commits, analysis_model)
294 )))
295}
296
297fn load_cached_plan(
298 dir: &str,
299 snapshot: &ComposeSnapshot,
300 max_commits: usize,
301 analysis_model: &str,
302) -> Result<Option<ComposeExecutablePlan>> {
303 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
304 if !cache_path.exists() {
305 return Ok(None);
306 }
307
308 let content = match fs::read_to_string(&cache_path) {
309 Ok(content) => content,
310 Err(err) => {
311 eprintln!("{}", style::warning(&format!("Ignoring unreadable compose plan cache: {err}")));
312 return Ok(None);
313 },
314 };
315 let cached: ComposeCachedPlan = match serde_json::from_str(&content) {
316 Ok(cached) => cached,
317 Err(err) => {
318 eprintln!(
319 "{}",
320 style::warning(&format!("Discarding corrupted compose plan cache: {err}"))
321 );
322 let _ = fs::remove_file(&cache_path);
323 return Ok(None);
324 },
325 };
326 let expected_key = compose_plan_cache_key(snapshot, max_commits, analysis_model);
327
328 if cached.schema_version != COMPOSE_PLAN_SCHEMA_VERSION || cached.cache_key != expected_key {
329 return Ok(None);
330 }
331 if let Err(err) = validate_executable_plan(snapshot, &cached.plan) {
332 eprintln!(
333 "{}",
334 style::warning(&format!(
335 "Discarding cached compose plan (no longer valid for current snapshot): {err}"
336 ))
337 );
338 let _ = fs::remove_file(&cache_path);
339 return Ok(None);
340 }
341 Ok(Some(cached.plan))
342}
343
344fn save_cached_plan(
345 dir: &str,
346 snapshot: &ComposeSnapshot,
347 max_commits: usize,
348 analysis_model: &str,
349 plan: &ComposeExecutablePlan,
350) -> Result<()> {
351 let cache_path = compose_plan_cache_path(dir, snapshot, max_commits, analysis_model)?;
352 if let Some(parent) = cache_path.parent() {
353 fs::create_dir_all(parent)?;
354 }
355
356 let cached = ComposeCachedPlan {
357 schema_version: COMPOSE_PLAN_SCHEMA_VERSION.to_string(),
358 cache_key: compose_plan_cache_key(snapshot, max_commits, analysis_model),
359 plan: plan.clone(),
360 };
361 fs::write(cache_path, serde_json::to_string_pretty(&cached)?)?;
362 Ok(())
363}
364
365fn format_line_range(start: usize, count: usize) -> String {
366 match count {
367 0 => "0".to_string(),
368 1 => start.to_string(),
369 _ => format!("{start}-{}", start + count - 1),
370 }
371}
372
373const fn snapshot_summary_budget(snapshot: &ComposeSnapshot) -> SnapshotSummaryBudget {
374 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
375 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
376 {
377 SnapshotSummaryBudget { max_observations_per_file: 1, max_hunks_per_file: Some(2) }
378 } else if snapshot.files.len() > COMPOSE_SUMMARY_MEDIUM_FILE_THRESHOLD
379 || snapshot.hunks.len() > COMPOSE_SUMMARY_MEDIUM_HUNK_THRESHOLD
380 {
381 SnapshotSummaryBudget { max_observations_per_file: 2, max_hunks_per_file: Some(3) }
382 } else {
383 SnapshotSummaryBudget {
384 max_observations_per_file: MAX_OBSERVATIONS_PER_FILE,
385 max_hunks_per_file: None,
386 }
387 }
388}
389
390fn sample_positions(count: usize, max_samples: usize) -> Vec<usize> {
391 if count <= max_samples {
392 return (0..count).collect();
393 }
394
395 if max_samples <= 1 {
396 return vec![0];
397 }
398
399 let last = count - 1;
400 let mut positions = Vec::with_capacity(max_samples);
401 for slot in 0..max_samples {
402 let position = slot * last / (max_samples - 1);
403 if positions.last().copied() != Some(position) {
404 positions.push(position);
405 }
406 }
407 positions
408}
409
410fn sampled_hunk_ids_for_summary(file: &ComposeFile, budget: SnapshotSummaryBudget) -> Vec<&str> {
411 match budget.max_hunks_per_file {
412 None => file.hunk_ids.iter().map(String::as_str).collect(),
413 Some(max_hunks_per_file) => sample_positions(file.hunk_ids.len(), max_hunks_per_file)
414 .into_iter()
415 .filter_map(|idx| file.hunk_ids.get(idx).map(String::as_str))
416 .collect(),
417 }
418}
419
420fn render_snapshot_summary(snapshot: &ComposeSnapshot, observations: &[FileObservation]) -> String {
421 let budget = snapshot_summary_budget(snapshot);
422 let observations_by_file: HashMap<&str, Vec<&str>> = observations
423 .iter()
424 .map(|observation| {
425 (
426 observation.file.as_str(),
427 observation
428 .observations
429 .iter()
430 .map(String::as_str)
431 .take(budget.max_observations_per_file)
432 .collect(),
433 )
434 })
435 .collect();
436
437 let mut out = String::new();
438 if budget.is_compacted() {
439 let max_hunks_per_file = budget.max_hunks_per_file.unwrap_or_default();
440 writeln!(
441 out,
442 "# snapshot compacted: all file IDs are preserved; showing up to {max_hunks_per_file} \
443 representative hunks and {} observation(s) per file",
444 budget.max_observations_per_file
445 )
446 .unwrap();
447 }
448
449 for file in &snapshot.files {
450 writeln!(out, "- {} {}", file.file_id, file.summary).unwrap();
451 if let Some(file_observations) = observations_by_file.get(file.path.as_str()) {
452 for observation in file_observations {
453 writeln!(out, " observation: {observation}").unwrap();
454 }
455 }
456
457 let rendered_hunk_ids = sampled_hunk_ids_for_summary(file, budget);
458 for hunk_id in &rendered_hunk_ids {
459 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
460 if hunk.synthetic {
461 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
462 } else {
463 writeln!(
464 out,
465 " - {} old:{} new:{} :: {}",
466 hunk.hunk_id,
467 format_line_range(hunk.old_start, hunk.old_count),
468 format_line_range(hunk.new_start, hunk.new_count),
469 hunk.snippet
470 )
471 .unwrap();
472 }
473 }
474 }
475
476 let omitted_hunks = file.hunk_ids.len().saturating_sub(rendered_hunk_ids.len());
477 if omitted_hunks > 0 {
478 writeln!(out, " ... {omitted_hunks} more hunks omitted from {}", file.file_id).unwrap();
479 }
480 }
481
482 out
483}
484
485const fn planning_mode_for_snapshot(snapshot: &ComposeSnapshot) -> PlanningMode {
486 if snapshot.files.len() > COMPOSE_SUMMARY_LARGE_FILE_THRESHOLD
487 || snapshot.hunks.len() > COMPOSE_SUMMARY_LARGE_HUNK_THRESHOLD
488 {
489 PlanningMode::Area
490 } else {
491 PlanningMode::File
492 }
493}
494
495fn path_depth(path: &str) -> usize {
496 path.split('/').count()
497}
498
499fn prefix_at_depth(path: &str, depth: usize) -> String {
500 if depth == 0 {
501 return String::new();
502 }
503
504 let segments: Vec<&str> = path.split('/').collect();
505 let effective_depth = depth.min(segments.len());
506 segments[..effective_depth].join("/")
507}
508
509fn common_path_prefix(paths: &[String]) -> String {
510 let Some(first_path) = paths.first() else {
511 return String::new();
512 };
513
514 let mut prefix: Vec<&str> = first_path.split('/').collect();
515 for path in paths.iter().skip(1) {
516 let segments: Vec<&str> = path.split('/').collect();
517 let shared = prefix
518 .iter()
519 .zip(segments.iter())
520 .take_while(|(left, right)| left == right)
521 .count();
522 prefix.truncate(shared);
523 if prefix.is_empty() {
524 break;
525 }
526 }
527
528 prefix.join("/")
529}
530
531fn bucket_hunk_count(snapshot: &ComposeSnapshot, file_ids: &[String]) -> usize {
532 file_ids
533 .iter()
534 .filter_map(|file_id| snapshot.file_by_id(file_id))
535 .map(|file| file.hunk_ids.len())
536 .sum()
537}
538
539fn group_file_ids_by_prefix(
540 snapshot: &ComposeSnapshot,
541 file_ids: &[String],
542 depth: usize,
543) -> BTreeMap<String, Vec<String>> {
544 let mut groups = BTreeMap::new();
545
546 for file_id in file_ids {
547 if let Some(file) = snapshot.file_by_id(file_id) {
548 groups
549 .entry(prefix_at_depth(&file.path, depth))
550 .or_insert_with(Vec::new)
551 .push(file_id.clone());
552 }
553 }
554
555 groups
556}
557
558fn planning_bucket_label(snapshot: &ComposeSnapshot, file_ids: &[String]) -> String {
559 let paths: Vec<String> = file_ids
560 .iter()
561 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
562 .collect();
563
564 let common_prefix = common_path_prefix(&paths);
565 if common_prefix.is_empty() {
566 paths.first().cloned().unwrap_or_else(|| "misc".to_string())
567 } else {
568 common_prefix
569 }
570}
571
572fn collect_planning_buckets(
573 snapshot: &ComposeSnapshot,
574 file_ids: &[String],
575 depth: usize,
576) -> Vec<PlanningBucket> {
577 let file_count = file_ids.len();
578 let hunk_count = bucket_hunk_count(snapshot, file_ids);
579 let max_path_depth = file_ids
580 .iter()
581 .filter_map(|file_id| snapshot.file_by_id(file_id))
582 .map(|file| path_depth(&file.path))
583 .max()
584 .unwrap_or(depth);
585
586 let should_stop =
587 file_count <= COMPOSE_AREA_TARGET_MAX_FILES && hunk_count <= COMPOSE_AREA_TARGET_MAX_HUNKS;
588 if should_stop || depth >= COMPOSE_AREA_TARGET_MAX_DEPTH || depth >= max_path_depth {
589 return vec![PlanningBucket {
590 label: planning_bucket_label(snapshot, file_ids),
591 file_ids: file_ids.to_vec(),
592 }];
593 }
594
595 let next_depth = depth + 1;
596 let groups = group_file_ids_by_prefix(snapshot, file_ids, next_depth);
597 if groups.len() <= 1 {
598 return collect_planning_buckets(snapshot, file_ids, next_depth);
599 }
600
601 groups
602 .into_values()
603 .flat_map(|group_file_ids| collect_planning_buckets(snapshot, &group_file_ids, next_depth))
604 .collect()
605}
606
607fn build_area_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
608 let all_file_ids: Vec<String> = snapshot
609 .files
610 .iter()
611 .map(|file| file.file_id.clone())
612 .collect();
613 let buckets = collect_planning_buckets(snapshot, &all_file_ids, 0);
614
615 buckets
616 .into_iter()
617 .enumerate()
618 .map(|(idx, bucket)| {
619 let mut additions = 0_usize;
620 let mut deletions = 0_usize;
621 let mut hunk_count = 0_usize;
622
623 for file_id in &bucket.file_ids {
624 if let Some(file) = snapshot.file_by_id(file_id) {
625 additions = additions.saturating_add(file.additions);
626 deletions = deletions.saturating_add(file.deletions);
627 hunk_count = hunk_count.saturating_add(file.hunk_ids.len());
628 }
629 }
630
631 PlanningTarget {
632 target_id: format!("A{:03}", idx + 1),
633 label: bucket.label,
634 file_ids: bucket.file_ids,
635 hunk_count,
636 additions,
637 deletions,
638 }
639 })
640 .collect()
641}
642
643fn build_file_planning_targets(snapshot: &ComposeSnapshot) -> Vec<PlanningTarget> {
644 snapshot
645 .files
646 .iter()
647 .map(|file| PlanningTarget {
648 target_id: file.file_id.clone(),
649 label: file.path.clone(),
650 file_ids: vec![file.file_id.clone()],
651 hunk_count: file.hunk_ids.len(),
652 additions: file.additions,
653 deletions: file.deletions,
654 })
655 .collect()
656}
657
658fn build_planning_index(snapshot: &ComposeSnapshot) -> PlanningIndex {
659 let mode = planning_mode_for_snapshot(snapshot);
660 let targets = match mode {
661 PlanningMode::File => build_file_planning_targets(snapshot),
662 PlanningMode::Area => build_area_planning_targets(snapshot),
663 };
664
665 let aliases = targets
666 .iter()
667 .flat_map(|target| {
668 let normalized_label = normalize_file_reference(&target.label);
669 [
670 (target.target_id.clone(), target.target_id.clone()),
671 (target.target_id.to_ascii_uppercase(), target.target_id.clone()),
672 (normalized_label, target.target_id.clone()),
673 ]
674 })
675 .collect();
676
677 PlanningIndex { mode, targets, aliases }
678}
679
680fn sample_file_ids_for_target(target: &PlanningTarget) -> Vec<&str> {
681 sample_positions(target.file_ids.len(), 4)
682 .into_iter()
683 .filter_map(|idx| target.file_ids.get(idx).map(String::as_str))
684 .collect()
685}
686
687fn sample_hunk_ids_for_target(target: &PlanningTarget, snapshot: &ComposeSnapshot) -> Vec<String> {
688 let hunk_ids: Vec<&String> = target
689 .file_ids
690 .iter()
691 .filter_map(|file_id| snapshot.file_by_id(file_id))
692 .flat_map(|file| file.hunk_ids.iter())
693 .collect();
694
695 sample_positions(hunk_ids.len(), 4)
696 .into_iter()
697 .filter_map(|idx| hunk_ids.get(idx).map(|hunk_id| (*hunk_id).clone()))
698 .collect()
699}
700
701fn render_planning_stat(index: &PlanningIndex) -> String {
702 let mut out = String::new();
703
704 match index.mode {
705 PlanningMode::File => {
706 writeln!(out, "# planning over individual file IDs").unwrap();
707 },
708 PlanningMode::Area => {
709 writeln!(
710 out,
711 "# planning over {} area IDs spanning {} files",
712 index.targets.len(),
713 index
714 .targets
715 .iter()
716 .flat_map(|target| target.file_ids.iter())
717 .collect::<HashSet<_>>()
718 .len()
719 )
720 .unwrap();
721 },
722 }
723
724 for target in &index.targets {
725 writeln!(
726 out,
727 "{} {} | {} files | {} hunks | +{}/-{}",
728 target.target_id,
729 target.label,
730 target.file_ids.len(),
731 target.hunk_count,
732 target.additions,
733 target.deletions
734 )
735 .unwrap();
736 }
737
738 out
739}
740
741fn render_planning_snapshot_summary(
742 snapshot: &ComposeSnapshot,
743 observations: &[FileObservation],
744 index: &PlanningIndex,
745) -> String {
746 if index.mode == PlanningMode::File {
747 return render_snapshot_summary(snapshot, observations);
748 }
749
750 let observations_by_file: HashMap<&str, Vec<&str>> = observations
751 .iter()
752 .map(|observation| {
753 (
754 observation.file.as_str(),
755 observation
756 .observations
757 .iter()
758 .map(String::as_str)
759 .take(1)
760 .collect(),
761 )
762 })
763 .collect();
764
765 let mut out = String::new();
766 writeln!(
767 out,
768 "# snapshot compacted into path-based planning areas; use the area IDs below in `file_ids`"
769 )
770 .unwrap();
771
772 for target in &index.targets {
773 writeln!(
774 out,
775 "- {} {} ({} files, {} hunks, +{}/-{})",
776 target.target_id,
777 target.label,
778 target.file_ids.len(),
779 target.hunk_count,
780 target.additions,
781 target.deletions
782 )
783 .unwrap();
784
785 let sample_file_ids = sample_file_ids_for_target(target);
786 if !sample_file_ids.is_empty() {
787 let sample_files: Vec<String> = sample_file_ids
788 .iter()
789 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
790 .collect();
791 writeln!(out, " files: {}", sample_files.join(", ")).unwrap();
792 let omitted = target.file_ids.len().saturating_sub(sample_files.len());
793 if omitted > 0 {
794 writeln!(out, " ... {omitted} more files omitted from {}", target.target_id).unwrap();
795 }
796 }
797
798 let mut rendered_observations = 0_usize;
799 for file_id in &target.file_ids {
800 let Some(file) = snapshot.file_by_id(file_id) else {
801 continue;
802 };
803 let Some(file_observations) = observations_by_file.get(file.path.as_str()) else {
804 continue;
805 };
806
807 for observation in file_observations {
808 writeln!(out, " observation: {observation}").unwrap();
809 rendered_observations += 1;
810 if rendered_observations >= 2 {
811 break;
812 }
813 }
814
815 if rendered_observations >= 2 {
816 break;
817 }
818 }
819
820 for hunk_id in sample_hunk_ids_for_target(target, snapshot) {
821 if let Some(hunk) = snapshot.hunk_by_id(&hunk_id) {
822 if hunk.synthetic {
823 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
824 } else {
825 writeln!(
826 out,
827 " - {} old:{} new:{} :: {}",
828 hunk.hunk_id,
829 format_line_range(hunk.old_start, hunk.old_count),
830 format_line_range(hunk.new_start, hunk.new_count),
831 hunk.snippet
832 )
833 .unwrap();
834 }
835 }
836 }
837 }
838
839 out
840}
841
842fn render_planning_targets(index: &PlanningIndex, snapshot: &ComposeSnapshot) -> String {
843 match index.mode {
844 PlanningMode::File => format!(
845 "File IDs only. Each target maps to exactly one file. Coverage: {} files.",
846 snapshot.files.len()
847 ),
848 PlanningMode::Area => format!(
849 "Area IDs only. Each target may expand to multiple files by shared path prefix. \
850 Coverage: {} areas spanning {} files.",
851 index.targets.len(),
852 snapshot.files.len()
853 ),
854 }
855}
856
857fn render_planning_notes(index: &PlanningIndex) -> String {
858 match index.mode {
859 PlanningMode::File => {
860 "Use only the provided file IDs and keep the grouping conservative.".to_string()
861 },
862 PlanningMode::Area => "This snapshot is large, so files were compacted into path-based \
863 planning areas. Split along independent subsystems or workstreams \
864 when the areas point at unrelated changes."
865 .to_string(),
866 }
867}
868
869fn render_split_bias(index: &PlanningIndex) -> String {
870 match index.mode {
871 PlanningMode::File => "Prefer fewer groups when the split is uncertain.".to_string(),
872 PlanningMode::Area => "Prefer splitting unrelated areas into separate groups. Only return \
873 one broad group if nearly every area clearly belongs to the same \
874 atomic change."
875 .to_string(),
876 }
877}
878
879fn build_intent_schema(config: &CommitConfig) -> serde_json::Value {
880 let type_enum: Vec<&str> = config.types.keys().map(String::as_str).collect();
881
882 strict_json_schema(
883 serde_json::json!({
884 "groups": {
885 "type": "array",
886 "items": {
887 "type": "object",
888 "properties": {
889 "group_id": {
890 "type": "string",
891 "description": "Stable identifier like G1, G2, G3"
892 },
893 "file_ids": {
894 "type": "array",
895 "description": "Planning target IDs that belong to this logical commit. Use the exact IDs supplied in the prompt, even when they represent path-based areas instead of individual files. Never place group IDs or placeholder strings here. Repeat IDs across groups when a target is shared.",
896 "items": { "type": "string" }
897 },
898 "type": {
899 "type": "string",
900 "enum": type_enum,
901 "description": "Conventional commit type for this group"
902 },
903 "scope": {
904 "type": "string",
905 "description": "Optional scope (module/component). Omit if broad."
906 },
907 "rationale": {
908 "type": "string",
909 "description": "Brief explanation of the logical change"
910 },
911 "dependencies": {
912 "type": "array",
913 "description": "Group IDs this group depends on",
914 "items": { "type": "string" }
915 }
916 },
917 "required": ["group_id", "file_ids", "type", "rationale", "dependencies"],
918 "additionalProperties": false
919 }
920 }
921 }),
922 &["groups"],
923 )
924}
925
926fn build_binding_schema() -> serde_json::Value {
927 strict_json_schema(
928 serde_json::json!({
929 "assignments": {
930 "type": "array",
931 "items": {
932 "type": "object",
933 "properties": {
934 "group_id": { "type": "string" },
935 "hunk_ids": {
936 "type": "array",
937 "items": { "type": "string" }
938 }
939 },
940 "required": ["group_id", "hunk_ids"],
941 "additionalProperties": false
942 }
943 }
944 }),
945 &["assignments"],
946 )
947}
948
949fn compute_dependency_order<T, FId, FDeps>(
950 groups: &[T],
951 group_id: FId,
952 dependencies: FDeps,
953) -> Result<Vec<usize>>
954where
955 FId: Fn(&T) -> &str,
956 FDeps: Fn(&T) -> &[String],
957{
958 let mut index_by_id = HashMap::new();
959 for (idx, group) in groups.iter().enumerate() {
960 let id = group_id(group);
961 if id.trim().is_empty() {
962 return Err(CommitGenError::Other("Compose group_id cannot be empty".to_string()));
963 }
964 if index_by_id.insert(id.to_string(), idx).is_some() {
965 return Err(CommitGenError::Other(format!("Duplicate compose group_id '{id}'")));
966 }
967 }
968
969 let mut in_degree = vec![0_usize; groups.len()];
970 let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); groups.len()];
971
972 for (idx, group) in groups.iter().enumerate() {
973 for dependency in dependencies(group) {
974 let dependency_idx = index_by_id.get(dependency).copied().ok_or_else(|| {
975 CommitGenError::Other(format!(
976 "Group {} depends on unknown group_id '{}'",
977 group_id(group),
978 dependency
979 ))
980 })?;
981 if dependency_idx == idx {
982 return Err(CommitGenError::Other(format!(
983 "Group {} depends on itself",
984 group_id(group)
985 )));
986 }
987
988 adjacency[dependency_idx].push(idx);
989 in_degree[idx] += 1;
990 }
991 }
992
993 let mut queue: Vec<usize> = (0..groups.len())
994 .filter(|idx| in_degree[*idx] == 0)
995 .collect();
996 let mut order = Vec::with_capacity(groups.len());
997
998 while let Some(node) = queue.pop() {
999 order.push(node);
1000 for neighbor in &adjacency[node] {
1001 in_degree[*neighbor] -= 1;
1002 if in_degree[*neighbor] == 0 {
1003 queue.push(*neighbor);
1004 }
1005 }
1006 }
1007
1008 if order.len() != groups.len() {
1009 return Err(CommitGenError::Other(
1010 "Circular dependency detected in compose groups".to_string(),
1011 ));
1012 }
1013
1014 Ok(order)
1015}
1016
1017fn normalize_file_reference(raw_file_ref: &str) -> String {
1018 raw_file_ref
1019 .trim()
1020 .trim_matches(|ch| matches!(ch, '`' | '"' | '\''))
1021 .trim_start_matches("./")
1022 .trim_end_matches([',', ';'])
1023 .to_string()
1024}
1025
1026fn planning_text_tokens(text: &str) -> Vec<String> {
1027 const STOP_WORDS: &[&str] = &[
1028 "and",
1029 "for",
1030 "the",
1031 "with",
1032 "from",
1033 "into",
1034 "after",
1035 "before",
1036 "over",
1037 "under",
1038 "plus",
1039 "across",
1040 "update",
1041 "updated",
1042 "refactor",
1043 "refactored",
1044 "changes",
1045 "change",
1046 "logical",
1047 "group",
1048 "groups",
1049 "commit",
1050 "commits",
1051 ];
1052
1053 let mut tokens = Vec::new();
1054 let mut current = String::new();
1055 let mut seen = HashSet::new();
1056
1057 for ch in text.chars() {
1058 if ch.is_ascii_alphanumeric() {
1059 current.push(ch.to_ascii_lowercase());
1060 } else if current.len() >= 3 {
1061 if !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone()) {
1062 tokens.push(current.clone());
1063 }
1064 current.clear();
1065 } else {
1066 current.clear();
1067 }
1068 }
1069
1070 if current.len() >= 3 && !STOP_WORDS.contains(¤t.as_str()) && seen.insert(current.clone())
1071 {
1072 tokens.push(current);
1073 }
1074
1075 tokens
1076}
1077
1078fn extract_group_id_candidate(raw: &str) -> Option<String> {
1079 let normalized = normalize_file_reference(raw);
1080 let uppercase = normalized.to_ascii_uppercase();
1081
1082 if uppercase.chars().all(|ch| ch.is_ascii_digit()) {
1083 return Some(format!("G{uppercase}"));
1084 }
1085
1086 if let Some(rest) = uppercase.strip_prefix('G')
1087 && !rest.is_empty()
1088 && rest.chars().all(|ch| ch.is_ascii_digit())
1089 {
1090 return Some(format!("G{rest}"));
1091 }
1092
1093 let digits: String = uppercase.chars().filter(|ch| ch.is_ascii_digit()).collect();
1094 let compact = uppercase
1095 .chars()
1096 .filter(|ch| !matches!(ch, ' ' | '_' | '-'))
1097 .collect::<String>();
1098 if compact.starts_with("GROUP") && !digits.is_empty() {
1099 return Some(format!("G{digits}"));
1100 }
1101
1102 None
1103}
1104
1105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1106enum ComposeFileCategory {
1107 Binary,
1108 Dependency,
1109 Docs,
1110 Prompt,
1111 Test,
1112 Config,
1113 Source,
1114 Other,
1115}
1116
1117fn compose_file_category(file: &ComposeFile) -> ComposeFileCategory {
1118 if file.is_binary {
1119 return ComposeFileCategory::Binary;
1120 }
1121
1122 if is_dependency_manifest(&file.path) {
1123 return ComposeFileCategory::Dependency;
1124 }
1125
1126 let filename_lower = file.path.to_ascii_lowercase();
1127 let file_name = Path::new(&filename_lower)
1128 .file_name()
1129 .and_then(|name| name.to_str())
1130 .unwrap_or_default();
1131 let extension = Path::new(&filename_lower)
1132 .extension()
1133 .and_then(|ext| ext.to_str())
1134 .unwrap_or_default();
1135
1136 if filename_lower.contains("prompt") || filename_lower.contains("system") {
1137 return ComposeFileCategory::Prompt;
1138 }
1139
1140 if extension == "md" || file_name == "readme" || file_name == "readme.md" {
1141 return ComposeFileCategory::Docs;
1142 }
1143
1144 if filename_lower.contains("/tests/")
1145 || filename_lower.starts_with("tests/")
1146 || file_name.contains("test")
1147 || file_name.contains("spec")
1148 {
1149 return ComposeFileCategory::Test;
1150 }
1151
1152 if matches!(extension, "toml" | "yaml" | "yml" | "json" | "ini" | "cfg" | "conf" | "env") {
1153 return ComposeFileCategory::Config;
1154 }
1155
1156 if matches!(
1157 extension,
1158 "rs"
1159 | "py"
1160 | "js"
1161 | "jsx"
1162 | "ts"
1163 | "tsx"
1164 | "go"
1165 | "java"
1166 | "kt"
1167 | "c"
1168 | "cc"
1169 | "cpp"
1170 | "h"
1171 | "hpp"
1172 | "cs"
1173 | "rb"
1174 | "php"
1175 | "swift"
1176 | "scala"
1177 | "m"
1178 | "mm"
1179 ) {
1180 return ComposeFileCategory::Source;
1181 }
1182
1183 ComposeFileCategory::Other
1184}
1185
1186fn common_path_prefix_depth(left: &str, right: &str) -> usize {
1187 left
1188 .split('/')
1189 .zip(right.split('/'))
1190 .take_while(|(left_segment, right_segment)| left_segment == right_segment)
1191 .count()
1192}
1193
1194fn file_similarity_score(missing_file: &ComposeFile, candidate_file: &ComposeFile) -> i32 {
1195 let mut score = (common_path_prefix_depth(&missing_file.path, &candidate_file.path) as i32) * 25;
1196
1197 if Path::new(&missing_file.path).parent() == Path::new(&candidate_file.path).parent() {
1198 score += 40;
1199 }
1200
1201 if Path::new(&missing_file.path).extension() == Path::new(&candidate_file.path).extension() {
1202 score += 12;
1203 }
1204
1205 if compose_file_category(missing_file) == compose_file_category(candidate_file) {
1206 score += 18;
1207 }
1208
1209 score
1210}
1211
1212fn group_type_bonus(file: &ComposeFile, group: &ComposeIntentGroup) -> i32 {
1213 match (compose_file_category(file), group.commit_type.as_str()) {
1214 (ComposeFileCategory::Docs, "docs") => 25,
1215 (ComposeFileCategory::Test, "test") => 25,
1216 (ComposeFileCategory::Dependency, "build" | "chore" | "ci") => 18,
1217 (ComposeFileCategory::Config, "build" | "chore" | "ci") => 12,
1218 (
1219 ComposeFileCategory::Prompt | ComposeFileCategory::Source,
1220 "feat" | "fix" | "refactor" | "perf",
1221 ) => 10,
1222 _ => 0,
1223 }
1224}
1225
1226fn best_group_for_missing_file(
1227 snapshot: &ComposeSnapshot,
1228 groups: &[ComposeIntentGroup],
1229 missing_file: &ComposeFile,
1230) -> usize {
1231 let mut best_group_idx = 0;
1232 let mut best_score = i32::MIN;
1233 let mut best_group_size = usize::MAX;
1234
1235 for (group_idx, group) in groups.iter().enumerate() {
1236 let similarity = group
1237 .file_ids
1238 .iter()
1239 .filter_map(|file_id| snapshot.file_by_id(file_id))
1240 .map(|candidate_file| file_similarity_score(missing_file, candidate_file))
1241 .max()
1242 .unwrap_or_default();
1243 let score = similarity + group_type_bonus(missing_file, group);
1244 let group_size = group.file_ids.len();
1245
1246 if score > best_score || (score == best_score && group_size < best_group_size) {
1247 best_group_idx = group_idx;
1248 best_score = score;
1249 best_group_size = group_size;
1250 }
1251 }
1252
1253 best_group_idx
1254}
1255
1256fn normalize_dependency_reference(
1257 raw_dependency: &str,
1258 known_group_ids: &HashSet<String>,
1259) -> Option<String> {
1260 let normalized = normalize_file_reference(raw_dependency);
1261 if normalized.is_empty() {
1262 return None;
1263 }
1264
1265 if known_group_ids.contains(&normalized) {
1266 return Some(normalized);
1267 }
1268
1269 let uppercase = normalized.to_ascii_uppercase();
1270 if known_group_ids.contains(&uppercase) {
1271 return Some(uppercase);
1272 }
1273
1274 let candidate = extract_group_id_candidate(&normalized)?;
1275 known_group_ids.contains(&candidate).then_some(candidate)
1276}
1277
1278fn planning_target_match_score(target: &PlanningTarget, group: &ComposeIntentGroup) -> i32 {
1279 let label = target.label.to_ascii_lowercase();
1280 let workstream = workstream_key_for_label(&target.label).to_ascii_lowercase();
1281 let mut score = (target.hunk_count.min(40) as i32) + (target.file_ids.len().min(20) as i32);
1282
1283 if let Some(scope) = &group.scope {
1284 let scope = scope.as_str().to_ascii_lowercase();
1285 if label.contains(&scope) || workstream.contains(&scope) {
1286 score += 140;
1287 }
1288
1289 for segment in scope.split('/') {
1290 if !segment.is_empty() && (label.contains(segment) || workstream.contains(segment)) {
1291 score += 45;
1292 }
1293 }
1294 }
1295
1296 for token in planning_text_tokens(&group.rationale) {
1297 if label.contains(&token) || workstream.contains(&token) {
1298 score += 16;
1299 }
1300 }
1301
1302 match group.commit_type.as_str() {
1303 "ci" if target.label.starts_with(".github/") => score += 120,
1304 "docs"
1305 if target.label.starts_with("docs/")
1306 || Path::new(&target.label)
1307 .extension()
1308 .is_some_and(|ext| ext.eq_ignore_ascii_case("md")) =>
1309 {
1310 score += 80;
1311 },
1312 "build" | "chore"
1313 if target.label.contains("Cargo")
1314 || target.label.contains("package")
1315 || target.label.contains("lock")
1316 || target.label.contains("tsconfig")
1317 || target.label.contains("biome")
1318 || target.label.contains("bun") =>
1319 {
1320 score += 55;
1321 },
1322 _ => {},
1323 }
1324
1325 score
1326}
1327
1328fn seed_group_targets(
1329 groups: &[ComposeIntentGroup],
1330 planning_index: &PlanningIndex,
1331 group_targets: &mut [Vec<String>],
1332 repair_notes: &mut Vec<String>,
1333) {
1334 let mut claimed_target_ids: HashSet<String> = group_targets.iter().flatten().cloned().collect();
1335
1336 for (group_idx, group) in groups.iter().enumerate() {
1337 if !group_targets[group_idx].is_empty() {
1338 continue;
1339 }
1340
1341 let fallback_target = planning_index
1342 .targets
1343 .iter()
1344 .max_by_key(|target| {
1345 let mut score = planning_target_match_score(target, group);
1346 if !claimed_target_ids.contains(&target.target_id) {
1347 score += 60;
1348 }
1349 (score, target.hunk_count, target.file_ids.len())
1350 })
1351 .or_else(|| planning_index.targets.first());
1352
1353 let Some(fallback_target) = fallback_target else {
1354 continue;
1355 };
1356
1357 group_targets[group_idx].push(fallback_target.target_id.clone());
1358 claimed_target_ids.insert(fallback_target.target_id.clone());
1359 repair_notes.push(format!(
1360 "Compose planner left {} without valid planning targets; seeded it with {} ({})",
1361 group.group_id, fallback_target.target_id, fallback_target.label
1362 ));
1363 }
1364}
1365
1366fn normalize_intent_plan(
1367 snapshot: &ComposeSnapshot,
1368 planning_index: &PlanningIndex,
1369 mut groups: Vec<ComposeIntentGroup>,
1370) -> Result<(Vec<ComposeIntentGroup>, Vec<String>)> {
1371 if groups.is_empty() {
1372 return Err(CommitGenError::Other("Compose intent plan returned no groups".to_string()));
1373 }
1374
1375 let known_target_ids: HashSet<&str> = planning_index
1376 .targets
1377 .iter()
1378 .map(|target| target.target_id.as_str())
1379 .collect();
1380 let mut repair_notes = Vec::new();
1381 let mut covered_file_ids = HashSet::new();
1382 let mut normalized_group_targets = Vec::with_capacity(groups.len());
1383
1384 for group in &groups {
1385 if group.file_ids.is_empty() {
1386 repair_notes.push(format!(
1387 "Compose planner left {} without planning targets; assigning targets heuristically",
1388 group.group_id
1389 ));
1390 }
1391
1392 let mut normalized_target_ids = Vec::new();
1393 let mut seen_target_ids = HashSet::new();
1394 for raw_target_ref in &group.file_ids {
1395 let normalized_ref = normalize_file_reference(raw_target_ref);
1396 let canonical_target_id = if known_target_ids.contains(normalized_ref.as_str()) {
1397 normalized_ref.clone()
1398 } else {
1399 let uppercase_ref = normalized_ref.to_ascii_uppercase();
1400 if known_target_ids.contains(uppercase_ref.as_str()) {
1401 uppercase_ref
1402 } else if let Some(target_id) = planning_index.aliases.get(&normalized_ref) {
1403 if raw_target_ref != target_id {
1404 repair_notes.push(format!(
1405 "Mapped compose planner target reference '{raw_target_ref}' to {target_id}"
1406 ));
1407 }
1408 target_id.clone()
1409 } else {
1410 repair_notes.push(format!(
1411 "Dropped unknown planning target '{}' from {}",
1412 raw_target_ref, group.group_id
1413 ));
1414 continue;
1415 }
1416 };
1417
1418 if seen_target_ids.insert(canonical_target_id.clone()) {
1419 normalized_target_ids.push(canonical_target_id);
1420 }
1421 }
1422
1423 normalized_group_targets.push(normalized_target_ids);
1424 }
1425
1426 seed_group_targets(&groups, planning_index, &mut normalized_group_targets, &mut repair_notes);
1427
1428 let known_group_ids: HashSet<String> =
1429 groups.iter().map(|group| group.group_id.clone()).collect();
1430 for group in &mut groups {
1431 let mut normalized_dependencies = Vec::new();
1432 let mut seen_dependencies = HashSet::new();
1433
1434 for raw_dependency in &group.dependencies {
1435 let Some(dependency) = normalize_dependency_reference(raw_dependency, &known_group_ids)
1436 else {
1437 repair_notes.push(format!(
1438 "Dropped unknown dependency '{}' from {}",
1439 raw_dependency, group.group_id
1440 ));
1441 continue;
1442 };
1443
1444 if dependency == group.group_id {
1445 repair_notes.push(format!(
1446 "Dropped self-dependency '{}' from {}",
1447 raw_dependency, group.group_id
1448 ));
1449 continue;
1450 }
1451
1452 if seen_dependencies.insert(dependency.clone()) {
1453 if raw_dependency != &dependency {
1454 repair_notes.push(format!(
1455 "Mapped compose planner dependency '{raw_dependency}' to {dependency}"
1456 ));
1457 }
1458 normalized_dependencies.push(dependency);
1459 }
1460 }
1461
1462 group.dependencies = normalized_dependencies;
1463 }
1464
1465 for (group, target_ids) in groups.iter_mut().zip(normalized_group_targets) {
1466 let expanded_file_ids = planning_index.expand_target_ids(&target_ids);
1467 for file_id in &expanded_file_ids {
1468 covered_file_ids.insert(file_id.clone());
1469 }
1470 group.file_ids = expanded_file_ids;
1471 }
1472
1473 for file in &snapshot.files {
1474 if covered_file_ids.contains(file.file_id.as_str()) {
1475 continue;
1476 }
1477
1478 let target_group_idx = best_group_for_missing_file(snapshot, &groups, file);
1479 let target_group = &mut groups[target_group_idx];
1480 target_group.file_ids.push(file.file_id.clone());
1481 covered_file_ids.insert(file.file_id.clone());
1482 repair_notes.push(format!(
1483 "Compose planner omitted {} ({}); assigned it to {}",
1484 file.file_id, file.path, target_group.group_id
1485 ));
1486 }
1487
1488 Ok((groups, repair_notes))
1489}
1490
1491fn workstream_key_for_label(label: &str) -> String {
1492 let segments: Vec<&str> = label
1493 .split('/')
1494 .filter(|segment| !segment.is_empty())
1495 .collect();
1496 let Some(first) = segments.first() else {
1497 return label.to_string();
1498 };
1499
1500 match *first {
1501 ".github" => match segments.get(1) {
1502 Some(second) => format!("{first}/{second}"),
1503 None => (*first).to_string(),
1504 },
1505 "apps" | "packages" | "crates" | "services" | "libs" | "pass" => match segments.get(1) {
1506 Some(second) => format!("{first}/{second}"),
1507 None => (*first).to_string(),
1508 },
1509 _ => (*first).to_string(),
1510 }
1511}
1512
1513fn workstream_display_name(label: &str) -> String {
1514 let key = workstream_key_for_label(label);
1515 match key.as_str() {
1516 ".github/workflows" => "CI workflows".to_string(),
1517 ".github" => "GitHub automation".to_string(),
1518 _ => key
1519 .split('/')
1520 .next_back()
1521 .map(|segment| segment.replace(['_', '-'], " "))
1522 .unwrap_or(key),
1523 }
1524}
1525
1526fn sanitize_scope_fragment(raw: &str) -> Option<String> {
1527 let mut out = String::new();
1528 let mut last_was_separator = false;
1529
1530 for ch in raw.trim().chars() {
1531 if ch.is_ascii_alphanumeric() {
1532 out.push(ch.to_ascii_lowercase());
1533 last_was_separator = false;
1534 } else if matches!(ch, '-' | '_' | '/' | '.' | ' ') && !out.is_empty() && !last_was_separator
1535 {
1536 out.push('-');
1537 last_was_separator = true;
1538 }
1539 }
1540
1541 let trimmed = out.trim_matches('-').to_string();
1542 (!trimmed.is_empty()).then_some(trimmed)
1543}
1544
1545fn fallback_scope_for_label(label: &str) -> Option<Scope> {
1546 let key = workstream_key_for_label(label);
1547 let candidate = key
1548 .split('/')
1549 .next_back()
1550 .and_then(sanitize_scope_fragment)?;
1551 Scope::new(candidate).ok()
1552}
1553
1554fn fallback_rationale_for_labels(labels: &[String]) -> String {
1555 if labels.len() == 1 {
1556 let label = labels[0].as_str();
1557 let display = workstream_display_name(label);
1558 if label.starts_with("apps/") {
1559 return format!("{display} application updates");
1560 }
1561 if label.starts_with("packages/") {
1562 return format!("{display} package updates");
1563 }
1564 if label.starts_with("crates/") {
1565 return format!("{display} crate updates");
1566 }
1567 if label.starts_with(".github/") || label == ".github" {
1568 return format!("{display} updates");
1569 }
1570 return format!("{display} updates");
1571 }
1572
1573 let display_labels: Vec<String> = labels
1574 .iter()
1575 .take(3)
1576 .map(|label| workstream_display_name(label))
1577 .collect();
1578 format!("cross-cutting updates for {}", display_labels.join(", "))
1579}
1580
1581fn fallback_commit_type_for_group(
1582 snapshot: &ComposeSnapshot,
1583 labels: &[String],
1584 file_ids: &[String],
1585) -> Result<CommitType> {
1586 if labels
1587 .iter()
1588 .any(|label| label == ".github" || label.starts_with(".github/"))
1589 {
1590 return CommitType::new("ci");
1591 }
1592
1593 let files: Vec<&ComposeFile> = file_ids
1594 .iter()
1595 .filter_map(|file_id| snapshot.file_by_id(file_id))
1596 .collect();
1597 let all_docs = !files.is_empty()
1598 && files
1599 .iter()
1600 .all(|file| compose_file_category(file) == ComposeFileCategory::Docs);
1601 if all_docs {
1602 return CommitType::new("docs");
1603 }
1604
1605 let all_tests = !files.is_empty()
1606 && files
1607 .iter()
1608 .all(|file| compose_file_category(file) == ComposeFileCategory::Test);
1609 if all_tests {
1610 return CommitType::new("test");
1611 }
1612
1613 let all_dependencies =
1614 !files.is_empty() && files.iter().all(|file| is_dependency_manifest(&file.path));
1615 if all_dependencies {
1616 return CommitType::new("build");
1617 }
1618
1619 let all_config = !files.is_empty()
1620 && files.iter().all(|file| {
1621 matches!(
1622 compose_file_category(file),
1623 ComposeFileCategory::Config | ComposeFileCategory::Dependency
1624 )
1625 });
1626 if all_config {
1627 return CommitType::new("chore");
1628 }
1629
1630 CommitType::new("refactor")
1631}
1632
1633fn ordered_file_ids(snapshot: &ComposeSnapshot, file_ids: &HashSet<String>) -> Vec<String> {
1634 snapshot
1635 .files
1636 .iter()
1637 .filter(|file| file_ids.contains(&file.file_id))
1638 .map(|file| file.file_id.clone())
1639 .collect()
1640}
1641
1642fn is_monolithic_intent_plan(snapshot: &ComposeSnapshot, groups: &[ComposeIntentGroup]) -> bool {
1643 if groups.is_empty() {
1644 return false;
1645 }
1646
1647 let largest_group = groups
1648 .iter()
1649 .map(|group| group.file_ids.iter().collect::<HashSet<_>>().len())
1650 .max()
1651 .unwrap_or_default();
1652
1653 groups.len() == 1
1654 || (groups.len() <= 2
1655 && largest_group.saturating_mul(10) >= snapshot.files.len().saturating_mul(9))
1656}
1657
1658fn should_force_large_patch_fallback(
1659 snapshot: &ComposeSnapshot,
1660 planning_index: &PlanningIndex,
1661 groups: &[ComposeIntentGroup],
1662 max_commits: usize,
1663) -> bool {
1664 if max_commits <= 1
1665 || planning_index.mode != PlanningMode::Area
1666 || planning_index.targets.len() < COMPOSE_MONOLITH_FALLBACK_TARGET_THRESHOLD
1667 || !is_monolithic_intent_plan(snapshot, groups)
1668 {
1669 return false;
1670 }
1671
1672 let workstream_count = planning_index
1673 .targets
1674 .iter()
1675 .map(|target| workstream_key_for_label(&target.label))
1676 .collect::<HashSet<_>>()
1677 .len();
1678
1679 workstream_count >= COMPOSE_MONOLITH_FALLBACK_WORKSTREAM_THRESHOLD
1680}
1681
1682fn build_large_patch_fallback_groups(
1683 snapshot: &ComposeSnapshot,
1684 planning_index: &PlanningIndex,
1685 max_commits: usize,
1686) -> Result<Vec<ComposeIntentGroup>> {
1687 #[derive(Debug, Clone)]
1688 struct WorkstreamGroup {
1689 label: String,
1690 file_ids: HashSet<String>,
1691 weight: usize,
1692 }
1693
1694 #[derive(Debug, Clone)]
1695 struct FallbackBin {
1696 labels: Vec<String>,
1697 file_ids: HashSet<String>,
1698 total_weight: usize,
1699 }
1700
1701 let mut workstreams: HashMap<String, WorkstreamGroup> = HashMap::new();
1702 for target in &planning_index.targets {
1703 let key = workstream_key_for_label(&target.label);
1704 let entry = workstreams
1705 .entry(key.clone())
1706 .or_insert_with(|| WorkstreamGroup {
1707 label: key,
1708 file_ids: HashSet::new(),
1709 weight: 0,
1710 });
1711
1712 for file_id in &target.file_ids {
1713 entry.file_ids.insert(file_id.clone());
1714 }
1715 entry.weight = entry
1716 .weight
1717 .saturating_add(target.hunk_count.max(target.file_ids.len()));
1718 }
1719
1720 let mut workstreams: Vec<WorkstreamGroup> = workstreams.into_values().collect();
1721 workstreams.sort_by(|left, right| {
1722 right
1723 .weight
1724 .cmp(&left.weight)
1725 .then_with(|| left.label.cmp(&right.label))
1726 });
1727
1728 let bin_count = max_commits.min(workstreams.len());
1729 let mut bins: Vec<FallbackBin> = Vec::new();
1730 for workstream in workstreams {
1731 if bins.len() < bin_count {
1732 bins.push(FallbackBin {
1733 labels: vec![workstream.label],
1734 file_ids: workstream.file_ids,
1735 total_weight: workstream.weight,
1736 });
1737 continue;
1738 }
1739
1740 let Some((target_idx, _)) = bins
1741 .iter()
1742 .enumerate()
1743 .min_by_key(|(_, bin)| (bin.total_weight, bin.labels.len()))
1744 else {
1745 continue;
1746 };
1747
1748 let target_bin = &mut bins[target_idx];
1749 target_bin.labels.push(workstream.label);
1750 target_bin.total_weight = target_bin.total_weight.saturating_add(workstream.weight);
1751 target_bin.file_ids.extend(workstream.file_ids);
1752 }
1753
1754 let mut groups = Vec::new();
1755 for (idx, bin) in bins.into_iter().enumerate() {
1756 let ordered_ids = ordered_file_ids(snapshot, &bin.file_ids);
1757 let commit_type = fallback_commit_type_for_group(snapshot, &bin.labels, &ordered_ids)?;
1758 let scope = (bin.labels.len() == 1)
1759 .then(|| fallback_scope_for_label(&bin.labels[0]))
1760 .flatten();
1761 let rationale = fallback_rationale_for_labels(&bin.labels);
1762
1763 groups.push(ComposeIntentGroup {
1764 group_id: format!("G{}", idx + 1),
1765 commit_type,
1766 scope,
1767 file_ids: ordered_ids,
1768 rationale,
1769 dependencies: Vec::new(),
1770 });
1771 }
1772
1773 Ok(groups)
1774}
1775
1776#[tracing::instrument(target = "lgit", name = "compose.analyze_intent", skip_all, fields(file_count = snapshot.files.len(), observation_count = observations.len(), max_commits))]
1777async fn analyze_compose_intent(
1778 snapshot: &ComposeSnapshot,
1779 observations: &[FileObservation],
1780 config: &CommitConfig,
1781 max_commits: usize,
1782 debug_dir: Option<&Path>,
1783) -> Result<ComposeIntentPlan> {
1784 let planning_index = build_planning_index(snapshot);
1785 let stat_summary = render_planning_stat(&planning_index);
1786 let snapshot_summary = render_planning_snapshot_summary(snapshot, observations, &planning_index);
1787 let planning_targets = render_planning_targets(&planning_index, snapshot);
1788 let planning_notes = render_planning_notes(&planning_index);
1789 let split_bias = render_split_bias(&planning_index);
1790 let schema = build_intent_schema(config);
1791 let parts = templates::render_compose_intent_prompt(&templates::ComposeIntentPromptParams {
1792 variant: "default",
1793 max_commits,
1794 stat: &stat_summary,
1795 snapshot_summary: &snapshot_summary,
1796 planning_targets: &planning_targets,
1797 planning_notes: &planning_notes,
1798 split_bias: &split_bias,
1799 })?;
1800
1801 let response = run_oneshot::<ComposeIntentResponse>(config, &OneShotSpec {
1802 operation: "compose/intent",
1803 model: &config.analysis_model,
1804 max_tokens: 3000,
1805 temperature: COMPOSE_PLANNER_TEMPERATURE,
1806 prompt_family: "compose-intent",
1807 prompt_variant: "default",
1808 system_prompt: &parts.system,
1809 user_prompt: &parts.user,
1810 tool_name: "create_compose_intent_plan",
1811 tool_description: "Plan logical commit groups over the provided planning target IDs",
1812 schema: &schema,
1813 progress_label: Some("compose intent planner"),
1814 debug: debug_dir.map(|dir| OneShotDebug {
1815 dir: Some(dir),
1816 prefix: None,
1817 name: "compose_intent",
1818 }),
1819 cacheable: true,
1820 })
1821 .await?;
1822
1823 let (mut groups, repair_notes) =
1824 normalize_intent_plan(snapshot, &planning_index, response.output.groups)?;
1825 for note in &repair_notes {
1826 eprintln!("{}", style::warning(note));
1827 }
1828 if should_force_large_patch_fallback(snapshot, &planning_index, &groups, max_commits) {
1829 eprintln!(
1830 "{}",
1831 style::warning(
1832 "Compose intent collapsed into a monolithic large-patch group; falling back to \
1833 path-based workstream splits."
1834 )
1835 );
1836 groups = build_large_patch_fallback_groups(snapshot, &planning_index, max_commits)?;
1837 }
1838 let dependency_order =
1839 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)?;
1840
1841 Ok(ComposeIntentPlan { groups, dependency_order })
1842}
1843
1844#[tracing::instrument(target = "lgit", name = "compose.should_collect_observations", skip_all, fields(file_count = snapshot.files.len()))]
1845fn should_collect_compose_observations(
1846 snapshot: &ComposeSnapshot,
1847 config: &CommitConfig,
1848 counter: &TokenCounter,
1849) -> bool {
1850 planning_mode_for_snapshot(snapshot) != PlanningMode::Area
1851 && should_use_map_reduce(&snapshot.diff, config, counter)
1852}
1853
1854#[tracing::instrument(target = "lgit", name = "compose.auto_assign_hunks", skip_all, fields(group_count = intent_plan.groups.len()))]
1855fn auto_assign_hunks(
1856 snapshot: &ComposeSnapshot,
1857 intent_plan: &ComposeIntentPlan,
1858) -> Result<(HunkAssignments, Vec<AmbiguousFileBinding>)> {
1859 let mut groups_by_file: HashMap<&str, Vec<&str>> = HashMap::new();
1860 for group in &intent_plan.groups {
1861 for file_id in &group.file_ids {
1862 groups_by_file
1863 .entry(file_id.as_str())
1864 .or_default()
1865 .push(group.group_id.as_str());
1866 }
1867 }
1868
1869 let mut assigned: HashMap<String, BTreeSet<String>> = intent_plan
1870 .groups
1871 .iter()
1872 .map(|group| (group.group_id.clone(), BTreeSet::new()))
1873 .collect();
1874 let mut ambiguous = Vec::new();
1875
1876 for file in &snapshot.files {
1877 let Some(candidate_group_ids) = groups_by_file.get(file.file_id.as_str()) else {
1878 return Err(CommitGenError::Other(format!(
1879 "No compose group claimed file {} ({})",
1880 file.file_id, file.path
1881 )));
1882 };
1883
1884 if candidate_group_ids.len() == 1 {
1885 let group_id = candidate_group_ids[0];
1886 let entry = assigned
1887 .get_mut(group_id)
1888 .ok_or_else(|| CommitGenError::Other(format!("Unknown compose group {group_id}")))?;
1889 for hunk_id in &file.hunk_ids {
1890 entry.insert(hunk_id.clone());
1891 }
1892 } else {
1893 ambiguous.push(AmbiguousFileBinding {
1894 file_id: file.file_id.clone(),
1895 path: file.path.clone(),
1896 candidate_group_ids: candidate_group_ids
1897 .iter()
1898 .map(|group_id| (*group_id).to_string())
1899 .collect(),
1900 hunk_ids: file.hunk_ids.clone(),
1901 });
1902 }
1903 }
1904
1905 Ok((assigned, ambiguous))
1906}
1907
1908fn render_binding_groups(groups: &[ComposeIntentGroup]) -> String {
1909 let mut out = String::new();
1910 for group in groups {
1911 let scope = group
1912 .scope
1913 .as_ref()
1914 .map(|scope| format!("({})", scope.as_str()))
1915 .unwrap_or_default();
1916 writeln!(
1917 out,
1918 "- {} [{}{}] {}",
1919 group.group_id,
1920 group.commit_type.as_str(),
1921 scope,
1922 group.rationale
1923 )
1924 .unwrap();
1925 }
1926
1927 out
1928}
1929
1930fn render_binding_ambiguous_files(
1931 snapshot: &ComposeSnapshot,
1932 ambiguous_files: &[AmbiguousFileBinding],
1933) -> String {
1934 let mut out = String::new();
1935 for ambiguous_file in ambiguous_files {
1936 writeln!(
1937 out,
1938 "- {} {} candidates: {}",
1939 ambiguous_file.file_id,
1940 ambiguous_file.path,
1941 ambiguous_file.candidate_group_ids.join(", ")
1942 )
1943 .unwrap();
1944
1945 for hunk_id in &ambiguous_file.hunk_ids {
1946 if let Some(hunk) = snapshot.hunk_by_id(hunk_id) {
1947 if hunk.synthetic {
1948 writeln!(out, " - {} :: {}", hunk.hunk_id, hunk.snippet).unwrap();
1949 } else {
1950 writeln!(
1951 out,
1952 " - {} old:{} new:{} :: {}",
1953 hunk.hunk_id,
1954 format_line_range(hunk.old_start, hunk.old_count),
1955 format_line_range(hunk.new_start, hunk.new_count),
1956 hunk.snippet
1957 )
1958 .unwrap();
1959 }
1960 }
1961 }
1962 }
1963
1964 out
1965}
1966
1967async fn request_binding(
1968 snapshot: &ComposeSnapshot,
1969 groups: &[ComposeIntentGroup],
1970 ambiguous_files: &[AmbiguousFileBinding],
1971 config: &CommitConfig,
1972 debug_dir: Option<&Path>,
1973 debug_name: &str,
1974) -> Result<Vec<ComposeBindingAssignment>> {
1975 let schema = build_binding_schema();
1976 let groups_text = render_binding_groups(groups);
1977 let ambiguous_files_text = render_binding_ambiguous_files(snapshot, ambiguous_files);
1978 let parts = templates::render_compose_bind_prompt(&templates::ComposeBindPromptParams {
1979 variant: "default",
1980 groups: &groups_text,
1981 ambiguous_files: &ambiguous_files_text,
1982 })?;
1983 let response = run_oneshot::<ComposeBindingResponse>(config, &OneShotSpec {
1984 operation: "compose/bind",
1985 model: &config.analysis_model,
1986 max_tokens: 2500,
1987 temperature: COMPOSE_PLANNER_TEMPERATURE,
1988 prompt_family: "compose-bind",
1989 prompt_variant: "default",
1990 system_prompt: &parts.system,
1991 user_prompt: &parts.user,
1992 tool_name: "bind_compose_hunks",
1993 tool_description: "Assign hunk IDs to existing compose groups",
1994 schema: &schema,
1995 progress_label: Some("compose hunk binder"),
1996 debug: debug_dir.map(|dir| OneShotDebug {
1997 dir: Some(dir),
1998 prefix: None,
1999 name: debug_name,
2000 }),
2001 cacheable: true,
2002 })
2003 .await?;
2004
2005 Ok(response.output.assignments)
2006}
2007
2008fn ambiguous_hunk_context(
2009 ambiguous_files: &[AmbiguousFileBinding],
2010) -> HashMap<String, AmbiguousHunkContext> {
2011 let mut context = HashMap::new();
2012 for ambiguous_file in ambiguous_files {
2013 for hunk_id in &ambiguous_file.hunk_ids {
2014 context.insert(hunk_id.clone(), AmbiguousHunkContext {
2015 candidate_group_ids: ambiguous_file.candidate_group_ids.clone(),
2016 });
2017 }
2018 }
2019 context
2020}
2021
2022fn evaluate_binding(
2023 assignments: &[ComposeBindingAssignment],
2024 hunk_context: &HashMap<String, AmbiguousHunkContext>,
2025 valid_group_ids: &HashSet<&str>,
2026 snapshot: &ComposeSnapshot,
2027) -> BindingEvaluation {
2028 let mut assigned_hunk_to_group: HashMap<String, String> = HashMap::new();
2029
2030 for assignment in assignments {
2031 if !valid_group_ids.contains(assignment.group_id.as_str()) {
2032 continue;
2033 }
2034
2035 let mut seen_in_group = HashSet::new();
2036 for hunk_id in &assignment.hunk_ids {
2037 if !seen_in_group.insert(hunk_id.as_str()) {
2038 continue;
2039 }
2040
2041 let Some(context) = hunk_context.get(hunk_id) else {
2042 continue;
2043 };
2044
2045 if !context
2046 .candidate_group_ids
2047 .iter()
2048 .any(|candidate| candidate == &assignment.group_id)
2049 {
2050 continue;
2051 }
2052
2053 match assigned_hunk_to_group.get(hunk_id) {
2054 None => {
2055 assigned_hunk_to_group.insert(hunk_id.clone(), assignment.group_id.clone());
2056 },
2057 Some(existing_group) if existing_group == &assignment.group_id => {},
2058 Some(_) => {
2059 assigned_hunk_to_group.remove(hunk_id);
2060 },
2061 }
2062 }
2063 }
2064
2065 let mut assigned_by_group: HashMap<String, Vec<String>> = HashMap::new();
2066 for (hunk_id, group_id) in assigned_hunk_to_group {
2067 assigned_by_group.entry(group_id).or_default().push(hunk_id);
2068 }
2069
2070 for hunk_ids in assigned_by_group.values_mut() {
2071 let ordered: Vec<String> = snapshot
2072 .hunks
2073 .iter()
2074 .filter(|hunk| hunk_ids.iter().any(|selected| selected == &hunk.hunk_id))
2075 .map(|hunk| hunk.hunk_id.clone())
2076 .collect();
2077 *hunk_ids = ordered;
2078 }
2079
2080 let unresolved = snapshot
2081 .hunks
2082 .iter()
2083 .filter(|hunk| hunk_context.contains_key(&hunk.hunk_id))
2084 .filter(|hunk| {
2085 !assigned_by_group.values().any(|assigned_hunks| {
2086 assigned_hunks
2087 .iter()
2088 .any(|assigned| assigned == &hunk.hunk_id)
2089 })
2090 })
2091 .map(|hunk| hunk.hunk_id.clone())
2092 .collect();
2093
2094 BindingEvaluation { assigned: assigned_by_group, unresolved }
2095}
2096
2097fn filter_ambiguous_files(
2098 ambiguous_files: &[AmbiguousFileBinding],
2099 hunk_ids: &[String],
2100) -> Vec<AmbiguousFileBinding> {
2101 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2102
2103 ambiguous_files
2104 .iter()
2105 .filter_map(|file| {
2106 let matching_hunks: Vec<String> = file
2107 .hunk_ids
2108 .iter()
2109 .filter(|hunk_id| hunk_ids.contains(hunk_id.as_str()))
2110 .cloned()
2111 .collect();
2112
2113 (!matching_hunks.is_empty()).then(|| AmbiguousFileBinding {
2114 file_id: file.file_id.clone(),
2115 path: file.path.clone(),
2116 candidate_group_ids: file.candidate_group_ids.clone(),
2117 hunk_ids: matching_hunks,
2118 })
2119 })
2120 .collect()
2121}
2122
2123fn chunk_ambiguous_files(
2124 ambiguous_files: &[AmbiguousFileBinding],
2125) -> Vec<Vec<AmbiguousFileBinding>> {
2126 if ambiguous_files.is_empty() {
2127 return Vec::new();
2128 }
2129
2130 let mut batches = Vec::new();
2131 let mut current_batch = Vec::new();
2132 let mut current_hunk_count = 0_usize;
2133
2134 for file in ambiguous_files {
2135 let file_hunk_count = file.hunk_ids.len();
2136 let should_split = !current_batch.is_empty()
2137 && (current_batch.len() >= MAX_BIND_FILES_PER_REQUEST
2138 || current_hunk_count.saturating_add(file_hunk_count) > MAX_BIND_HUNKS_PER_REQUEST);
2139
2140 if should_split {
2141 batches.push(current_batch);
2142 current_batch = Vec::new();
2143 current_hunk_count = 0;
2144 }
2145
2146 current_hunk_count = current_hunk_count.saturating_add(file_hunk_count);
2147 current_batch.push(file.clone());
2148 }
2149
2150 if !current_batch.is_empty() {
2151 batches.push(current_batch);
2152 }
2153
2154 batches
2155}
2156
2157fn order_hunk_ids(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2158 let hunk_ids: HashSet<&str> = hunk_ids.iter().map(String::as_str).collect();
2159
2160 snapshot
2161 .hunks
2162 .iter()
2163 .filter(|hunk| hunk_ids.contains(hunk.hunk_id.as_str()))
2164 .map(|hunk| hunk.hunk_id.clone())
2165 .collect()
2166}
2167
2168fn fallback_group_for_hunk(
2169 hunk_id: &str,
2170 ambiguous_files: &[AmbiguousFileBinding],
2171 group_rank: &HashMap<&str, usize>,
2172) -> Option<String> {
2173 ambiguous_files.iter().find_map(|file| {
2174 file
2175 .hunk_ids
2176 .iter()
2177 .any(|candidate| candidate == hunk_id)
2178 .then(|| {
2179 file
2180 .candidate_group_ids
2181 .iter()
2182 .min_by_key(|group_id| {
2183 group_rank
2184 .get(group_id.as_str())
2185 .copied()
2186 .unwrap_or(usize::MAX)
2187 })
2188 .cloned()
2189 })
2190 })?
2191}
2192
2193fn assign_unresolved_hunks(
2194 unresolved_hunks: &[String],
2195 assigned_by_group: &mut HashMap<String, BTreeSet<String>>,
2196 ambiguous_files: &[AmbiguousFileBinding],
2197 group_rank: &HashMap<&str, usize>,
2198) {
2199 for hunk_id in unresolved_hunks {
2200 if let Some(group_id) = fallback_group_for_hunk(hunk_id, ambiguous_files, group_rank)
2201 && let Some(group_hunks) = assigned_by_group.get_mut(&group_id)
2202 {
2203 group_hunks.insert(hunk_id.clone());
2204 }
2205 }
2206}
2207
2208fn normalize_group_type(
2209 snapshot: &ComposeSnapshot,
2210 file_ids: &[String],
2211 original_type: &CommitType,
2212) -> Result<CommitType> {
2213 let dependency_only = !file_ids.is_empty()
2214 && file_ids.iter().all(|file_id| {
2215 snapshot
2216 .file_by_id(file_id)
2217 .is_some_and(|file| is_dependency_manifest(&file.path))
2218 });
2219
2220 if dependency_only && original_type.as_str() != "build" {
2221 CommitType::new("build")
2222 } else {
2223 Ok(original_type.clone())
2224 }
2225}
2226
2227fn derive_file_ids_for_hunks(snapshot: &ComposeSnapshot, hunk_ids: &[String]) -> Vec<String> {
2228 snapshot
2229 .files
2230 .iter()
2231 .filter(|file| {
2232 hunk_ids
2233 .iter()
2234 .any(|hunk_id| file.hunk_ids.contains(hunk_id))
2235 })
2236 .map(|file| file.file_id.clone())
2237 .collect()
2238}
2239
2240fn build_redirects(
2241 intent_plan: &ComposeIntentPlan,
2242 executable_groups: &[ComposeExecutableGroup],
2243 group_rank: &HashMap<&str, usize>,
2244) -> HashMap<String, String> {
2245 let surviving_groups: HashMap<&str, &ComposeExecutableGroup> = executable_groups
2246 .iter()
2247 .filter(|group| !group.hunk_ids.is_empty())
2248 .map(|group| (group.group_id.as_str(), group))
2249 .collect();
2250
2251 let mut redirects = HashMap::new();
2252 for group in &intent_plan.groups {
2253 if surviving_groups.contains_key(group.group_id.as_str()) {
2254 continue;
2255 }
2256
2257 let redirect = executable_groups
2258 .iter()
2259 .filter(|candidate| candidate.group_id != group.group_id)
2260 .filter(|candidate| {
2261 candidate.file_ids.iter().any(|file_id| {
2262 group
2263 .file_ids
2264 .iter()
2265 .any(|candidate_id| candidate_id == file_id)
2266 })
2267 })
2268 .min_by_key(|candidate| {
2269 group_rank
2270 .get(candidate.group_id.as_str())
2271 .copied()
2272 .unwrap_or(usize::MAX)
2273 })
2274 .map(|candidate| candidate.group_id.clone());
2275
2276 if let Some(redirect) = redirect {
2277 redirects.insert(group.group_id.clone(), redirect);
2278 }
2279 }
2280
2281 redirects
2282}
2283
2284fn resolve_redirect(group_id: &str, redirects: &HashMap<String, String>) -> String {
2285 let mut current = group_id.to_string();
2286 let mut seen = HashSet::new();
2287
2288 while let Some(next) = redirects.get(¤t) {
2289 if !seen.insert(current.clone()) {
2290 break;
2291 }
2292 current.clone_from(next);
2293 }
2294
2295 current
2296}
2297
2298fn prune_empty_groups(
2299 groups: Vec<ComposeExecutableGroup>,
2300 redirects: &HashMap<String, String>,
2301) -> Result<ComposeExecutablePlan> {
2302 let surviving_ids: HashSet<String> = groups
2303 .iter()
2304 .filter(|group| !group.hunk_ids.is_empty())
2305 .map(|group| group.group_id.clone())
2306 .collect();
2307
2308 let mut surviving_groups = Vec::new();
2309 for mut group in groups {
2310 if group.hunk_ids.is_empty() {
2311 continue;
2312 }
2313
2314 let mut rewritten_dependencies = Vec::new();
2315 for dependency in &group.dependencies {
2316 let rewritten = resolve_redirect(dependency, redirects);
2317 if rewritten != group.group_id
2318 && surviving_ids.contains(&rewritten)
2319 && !rewritten_dependencies
2320 .iter()
2321 .any(|existing| existing == &rewritten)
2322 {
2323 rewritten_dependencies.push(rewritten);
2324 }
2325 }
2326
2327 group.dependencies = rewritten_dependencies;
2328 surviving_groups.push(group);
2329 }
2330
2331 let dependency_order = compute_dependency_order(
2332 &surviving_groups,
2333 |group| &group.group_id,
2334 |group| &group.dependencies,
2335 )?;
2336 Ok(ComposeExecutablePlan { groups: surviving_groups, dependency_order })
2337}
2338
2339fn finalize_executable_plan(
2340 snapshot: &ComposeSnapshot,
2341 intent_plan: &ComposeIntentPlan,
2342 assigned_by_group: HashMap<String, BTreeSet<String>>,
2343) -> Result<ComposeExecutablePlan> {
2344 let group_rank: HashMap<&str, usize> = intent_plan
2345 .dependency_order
2346 .iter()
2347 .enumerate()
2348 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2349 .collect();
2350
2351 let mut executable_groups = Vec::new();
2352 for group in &intent_plan.groups {
2353 let hunk_ids: Vec<String> = snapshot
2354 .hunks
2355 .iter()
2356 .filter(|hunk| {
2357 assigned_by_group
2358 .get(&group.group_id)
2359 .is_some_and(|assigned| assigned.contains(&hunk.hunk_id))
2360 })
2361 .map(|hunk| hunk.hunk_id.clone())
2362 .collect();
2363
2364 let file_ids = derive_file_ids_for_hunks(snapshot, &hunk_ids);
2365 let commit_type = normalize_group_type(snapshot, &file_ids, &group.commit_type)?;
2366 executable_groups.push(ComposeExecutableGroup {
2367 group_id: group.group_id.clone(),
2368 commit_type,
2369 scope: group.scope.clone(),
2370 file_ids,
2371 rationale: group.rationale.clone(),
2372 dependencies: group.dependencies.clone(),
2373 hunk_ids,
2374 });
2375 }
2376
2377 let redirects = build_redirects(intent_plan, &executable_groups, &group_rank);
2378 prune_empty_groups(executable_groups, &redirects)
2379}
2380
2381fn validate_executable_plan(
2382 snapshot: &ComposeSnapshot,
2383 plan: &ComposeExecutablePlan,
2384) -> Result<()> {
2385 if plan.groups.is_empty() {
2386 return Err(CommitGenError::Other("Compose executable plan returned no groups".to_string()));
2387 }
2388
2389 let known_hunks: HashSet<&str> = snapshot
2390 .hunks
2391 .iter()
2392 .map(|hunk| hunk.hunk_id.as_str())
2393 .collect();
2394 let known_files: HashSet<&str> = snapshot
2395 .files
2396 .iter()
2397 .map(|file| file.file_id.as_str())
2398 .collect();
2399 let mut coverage = HashMap::<String, String>::new();
2400
2401 for group in &plan.groups {
2402 if group.hunk_ids.is_empty() {
2403 return Err(CommitGenError::Other(format!(
2404 "Compose group {} ended up empty after binding",
2405 group.group_id
2406 )));
2407 }
2408
2409 for file_id in &group.file_ids {
2410 if !known_files.contains(file_id.as_str()) {
2411 return Err(CommitGenError::Other(format!(
2412 "Compose group {} references unknown file_id {}",
2413 group.group_id, file_id
2414 )));
2415 }
2416 }
2417
2418 for hunk_id in &group.hunk_ids {
2419 if !known_hunks.contains(hunk_id.as_str()) {
2420 return Err(CommitGenError::Other(format!(
2421 "Compose group {} references unknown hunk_id {}",
2422 group.group_id, hunk_id
2423 )));
2424 }
2425
2426 if let Some(existing_group) = coverage.insert(hunk_id.clone(), group.group_id.clone()) {
2427 return Err(CommitGenError::Other(format!(
2428 "Hunk {} was assigned to both {} and {}",
2429 hunk_id, existing_group, group.group_id
2430 )));
2431 }
2432 }
2433 }
2434
2435 let missing_hunks: Vec<String> = snapshot
2436 .hunks
2437 .iter()
2438 .filter(|hunk| !coverage.contains_key(&hunk.hunk_id))
2439 .map(|hunk| hunk.hunk_id.clone())
2440 .collect();
2441 if !missing_hunks.is_empty() {
2442 return Err(CommitGenError::Other(format!(
2443 "Compose plan left hunks unassigned: {}",
2444 missing_hunks.join(", ")
2445 )));
2446 }
2447
2448 let dependency_order =
2449 compute_dependency_order(&plan.groups, |group| &group.group_id, |group| &group.dependencies)?;
2450 if dependency_order != plan.dependency_order {
2451 return Err(CommitGenError::Other(
2452 "Compose dependency order does not match recomputed order".to_string(),
2453 ));
2454 }
2455
2456 Ok(())
2457}
2458
2459#[tracing::instrument(target = "lgit", name = "compose.bind_plan", skip_all, fields(file_count = snapshot.files.len(), group_count = intent_plan.groups.len()))]
2460async fn bind_compose_plan(
2461 snapshot: &ComposeSnapshot,
2462 intent_plan: &ComposeIntentPlan,
2463 config: &CommitConfig,
2464 debug_dir: Option<&Path>,
2465) -> Result<ComposeExecutablePlan> {
2466 let (mut assigned_by_group, ambiguous_files) = auto_assign_hunks(snapshot, intent_plan)?;
2467
2468 if !ambiguous_files.is_empty() {
2469 let valid_group_ids: HashSet<&str> = intent_plan
2470 .groups
2471 .iter()
2472 .map(|group| group.group_id.as_str())
2473 .collect();
2474 let binding_batches = chunk_ambiguous_files(&ambiguous_files);
2475 let mut unresolved = Vec::new();
2476
2477 for (batch_idx, batch) in binding_batches.iter().enumerate() {
2478 let hunk_context = ambiguous_hunk_context(batch);
2479 let debug_name = if binding_batches.len() == 1 {
2480 "compose_bind".to_string()
2481 } else {
2482 format!("compose_bind_{:02}", batch_idx + 1)
2483 };
2484 let assignments =
2485 request_binding(snapshot, &intent_plan.groups, batch, config, debug_dir, &debug_name)
2486 .await?;
2487 let evaluation = evaluate_binding(&assignments, &hunk_context, &valid_group_ids, snapshot);
2488 for (group_id, hunk_ids) in evaluation.assigned {
2489 let entry = assigned_by_group.entry(group_id).or_default();
2490 for hunk_id in hunk_ids {
2491 entry.insert(hunk_id);
2492 }
2493 }
2494 unresolved.extend(evaluation.unresolved);
2495 }
2496
2497 let group_rank: HashMap<&str, usize> = intent_plan
2498 .dependency_order
2499 .iter()
2500 .enumerate()
2501 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
2502 .collect();
2503
2504 let mut unresolved = order_hunk_ids(snapshot, &unresolved);
2505 if !unresolved.is_empty() {
2506 let unresolved_files = filter_ambiguous_files(&ambiguous_files, &unresolved);
2507 let repair_batches = chunk_ambiguous_files(&unresolved_files);
2508 let mut repair_unresolved = Vec::new();
2509
2510 for (batch_idx, batch) in repair_batches.iter().enumerate() {
2511 let debug_name = if repair_batches.len() == 1 {
2512 "compose_bind_repair".to_string()
2513 } else {
2514 format!("compose_bind_repair_{:02}", batch_idx + 1)
2515 };
2516 let repair_assignments = request_binding(
2517 snapshot,
2518 &intent_plan.groups,
2519 batch,
2520 config,
2521 debug_dir,
2522 &debug_name,
2523 )
2524 .await?;
2525 let repair_context = ambiguous_hunk_context(batch);
2526 let repair =
2527 evaluate_binding(&repair_assignments, &repair_context, &valid_group_ids, snapshot);
2528 for (group_id, hunk_ids) in repair.assigned {
2529 let entry = assigned_by_group.entry(group_id).or_default();
2530 for hunk_id in hunk_ids {
2531 entry.insert(hunk_id);
2532 }
2533 }
2534
2535 repair_unresolved.extend(repair.unresolved);
2536 }
2537 unresolved = order_hunk_ids(snapshot, &repair_unresolved);
2538
2539 if !unresolved.is_empty() {
2540 assign_unresolved_hunks(
2541 &unresolved,
2542 &mut assigned_by_group,
2543 &ambiguous_files,
2544 &group_rank,
2545 );
2546 }
2547 }
2548 }
2549
2550 let plan = finalize_executable_plan(snapshot, intent_plan, assigned_by_group)?;
2551 validate_executable_plan(snapshot, &plan)?;
2552 Ok(plan)
2553}
2554
2555fn print_executable_plan(snapshot: &ComposeSnapshot, plan: &ComposeExecutablePlan) {
2556 println!("\n{}", style::section_header("Proposed Commit Groups", 80));
2557 for (display_idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2558 let group = &plan.groups[group_idx];
2559 let scope = group
2560 .scope
2561 .as_ref()
2562 .map(|scope| format!("({})", style::scope(scope.as_str())))
2563 .unwrap_or_default();
2564
2565 println!(
2566 "\n{}. {} [{}{}] {}",
2567 display_idx + 1,
2568 style::bold(&group.group_id),
2569 style::commit_type(group.commit_type.as_str()),
2570 scope,
2571 group.rationale
2572 );
2573
2574 println!(" Files:");
2575 for file_id in &group.file_ids {
2576 if let Some(file) = snapshot.file_by_id(file_id) {
2577 let selected_hunk_ids: Vec<&str> = group
2578 .hunk_ids
2579 .iter()
2580 .filter(|hunk_id| file.hunk_ids.contains(*hunk_id))
2581 .map(String::as_str)
2582 .collect();
2583 let selection = if selected_hunk_ids.len() == file.hunk_ids.len() {
2584 "all hunks".to_string()
2585 } else {
2586 selected_hunk_ids.join(", ")
2587 };
2588 println!(" - {} {} ({selection})", file.file_id, file.path);
2589 }
2590 }
2591
2592 if !group.dependencies.is_empty() {
2593 println!(" Depends on: {}", group.dependencies.join(", "));
2594 }
2595 }
2596}
2597
2598#[tracing::instrument(target = "lgit", name = "compose.generate_group_analysis", skip_all, fields(group_id = %group.group_id, diff_bytes = diff.len(), stat_bytes = stat.len()))]
2599async fn generate_compose_group_analysis(
2600 stat: &str,
2601 diff: &str,
2602 group: &ComposeExecutableGroup,
2603 config: &CommitConfig,
2604 args: &Args,
2605 debug_prefix: &str,
2606 counter: &TokenCounter,
2607) -> Result<ConventionalAnalysis> {
2608 match compose_analysis_strategy(diff, config, counter) {
2609 ComposeAnalysisStrategy::MapReduce => {
2610 println!(
2611 " {}",
2612 style::info(&format!(
2613 "Using map-reduce for {} commit analysis (diff exceeds token budget)",
2614 group.group_id
2615 ))
2616 );
2617 run_map_reduce(diff, stat, "", &config.analysis_model, config, counter).await
2618 },
2619 strategy => {
2620 let analysis_diff = if strategy == ComposeAnalysisStrategy::SmartTruncate {
2621 eprintln!(
2622 " {}",
2623 style::warning(&format!(
2624 "Truncating diff for {} commit analysis (diff exceeds configured budget)",
2625 group.group_id
2626 ))
2627 );
2628 Cow::Owned(smart_truncate_diff(
2629 diff,
2630 compose_truncation_length(config),
2631 config,
2632 counter,
2633 ))
2634 } else {
2635 Cow::Borrowed(diff)
2636 };
2637
2638 let ctx = AnalysisContext {
2639 user_context: Some(&group.rationale),
2640 recent_commits: None,
2641 common_scopes: None,
2642 project_context: None,
2643 debug_output: args.debug_output.as_deref(),
2644 debug_prefix: Some(debug_prefix),
2645 };
2646
2647 generate_conventional_analysis(
2648 stat,
2649 analysis_diff.as_ref(),
2650 &config.analysis_model,
2651 "",
2652 &ctx,
2653 config,
2654 )
2655 .await
2656 },
2657 }
2658}
2659
2660fn compose_group_file_list(snapshot: &ComposeSnapshot, group: &ComposeExecutableGroup) -> String {
2661 let files: Vec<&str> = group
2662 .file_ids
2663 .iter()
2664 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.as_str()))
2665 .collect();
2666
2667 if files.is_empty() {
2668 "no files resolved".to_string()
2669 } else {
2670 files.join(", ")
2671 }
2672}
2673
2674fn cumulative_file_hunk_ids(
2678 plan: &ComposeExecutablePlan,
2679 position: usize,
2680 snapshot: &ComposeSnapshot,
2681 file_id: &str,
2682) -> Vec<String> {
2683 let mut hunk_ids = Vec::new();
2684 for &group_idx in plan.dependency_order.iter().take(position + 1) {
2685 let Some(group) = plan.groups.get(group_idx) else {
2686 continue;
2687 };
2688 for hunk_id in &group.hunk_ids {
2689 if snapshot
2690 .hunk_by_id(hunk_id)
2691 .is_some_and(|hunk| hunk.file_id == file_id)
2692 {
2693 hunk_ids.push(hunk_id.clone());
2694 }
2695 }
2696 }
2697 hunk_ids
2698}
2699
2700#[tracing::instrument(target = "lgit", name = "compose.execute", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2701pub async fn execute_compose(
2702 snapshot: &ComposeSnapshot,
2703 plan: &ComposeExecutablePlan,
2704 config: &CommitConfig,
2705 args: &Args,
2706 base_state: &ComposeBaseState,
2707) -> Result<Vec<String>> {
2708 let total = plan.dependency_order.len();
2709
2710 let mut group_diff_stats: Vec<(String, String)> = Vec::with_capacity(total);
2714 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2715 let group = &plan.groups[group_idx];
2716 println!(
2717 " {}",
2718 style::info(&format!("Preparing diff for {} ({}/{})", group.group_id, idx + 1, total))
2719 );
2720 let group_patch = create_executable_group_patch(snapshot, group)?;
2721 group_diff_stats.push((group_patch.diff, group_patch.stat));
2722 }
2723
2724 println!(
2728 "{}",
2729 style::info(&format!(
2730 "Generating {total} commit message(s) in parallel (up to {} at a time)...",
2731 COMPOSE_MESSAGE_PARALLELISM.min(total).max(1)
2732 ))
2733 );
2734
2735 let token_counter = create_token_counter(config);
2736 let prepared_messages: Vec<(Vec<String>, CommitSummary)> =
2737 stream::iter(plan.dependency_order.iter().enumerate())
2738 .map(|(idx, &group_idx)| {
2739 let group = &plan.groups[group_idx];
2740 let (diff, stat) = &group_diff_stats[idx];
2741 let debug_prefix = format!("compose-{}", idx + 1);
2742 let token_counter = &token_counter;
2743 async move {
2744 let result = async {
2745 let analysis = generate_compose_group_analysis(
2746 stat,
2747 diff,
2748 group,
2749 config,
2750 args,
2751 &debug_prefix,
2752 token_counter,
2753 )
2754 .await?;
2755 let body = analysis.body_texts();
2756 let summary = generate_summary_from_analysis(
2757 stat,
2758 group.commit_type.as_str(),
2759 group.scope.as_ref().map(|scope| scope.as_str()),
2760 &body,
2761 Some(&group.rationale),
2762 config,
2763 args.debug_output.as_deref(),
2764 Some(&debug_prefix),
2765 )
2766 .await?;
2767 Ok::<_, CommitGenError>((body, summary))
2768 }
2769 .await;
2770
2771 result.map_err(|source| CommitGenError::ComposeMessageError {
2772 group_id: group.group_id.clone(),
2773 files: compose_group_file_list(snapshot, group),
2774 source: Box::new(source),
2775 })
2776 }
2777 })
2778 .buffered(COMPOSE_MESSAGE_PARALLELISM.min(total).max(1))
2779 .collect::<Vec<_>>()
2780 .await
2781 .into_iter()
2782 .collect::<Result<Vec<_>>>()?;
2783
2784 execute_compose_with_prepared_messages(
2785 snapshot,
2786 plan,
2787 config,
2788 args,
2789 base_state,
2790 prepared_messages,
2791 )
2792}
2793
2794#[tracing::instrument(target = "lgit", name = "compose.execute_prepared_messages", skip_all, fields(group_count = plan.dependency_order.len(), dir = %args.dir))]
2795fn execute_compose_with_prepared_messages(
2796 snapshot: &ComposeSnapshot,
2797 plan: &ComposeExecutablePlan,
2798 config: &CommitConfig,
2799 args: &Args,
2800 base_state: &ComposeBaseState,
2801 prepared_messages: Vec<(Vec<String>, CommitSummary)>,
2802) -> Result<Vec<String>> {
2803 let dir = &args.dir;
2804 let total = plan.dependency_order.len();
2805 if args.compose_preview {
2806 return Ok(Vec::new());
2807 }
2808
2809 let index = TempGitIndex::new(dir)?;
2810 read_tree_into_index(index.path(), &base_state.head_hash, dir)?;
2811
2812 let mut commit_hashes = Vec::new();
2813 let mut parent_hash = base_state.head_hash.clone();
2814
2815 for (idx, &group_idx) in plan.dependency_order.iter().enumerate() {
2819 let group = &plan.groups[group_idx];
2820
2821 println!("\n[{}/{}] Creating commit {}: {}", idx + 1, total, group.group_id, group.rationale);
2822 println!(" Type: {}", style::commit_type(group.commit_type.as_str()));
2823 if let Some(scope) = &group.scope {
2824 println!(" Scope: {}", style::scope(scope.as_str()));
2825 }
2826 let paths: Vec<String> = group
2827 .file_ids
2828 .iter()
2829 .filter_map(|file_id| snapshot.file_by_id(file_id).map(|file| file.path.clone()))
2830 .collect();
2831 println!(" Files: {}", paths.join(", "));
2832
2833 let outcome = stage_executable_group_in_index(snapshot, group, dir, index.path())?;
2834 let mut staged_anything = outcome.result == StageResult::Staged;
2835
2836 for skipped in &outcome.skipped {
2840 let Some(file) = snapshot.file_by_path(&skipped.path) else {
2841 continue;
2842 };
2843 let cumulative = cumulative_file_hunk_ids(plan, idx, snapshot, &file.file_id);
2844 force_stage_file_from_base_in_index(
2845 snapshot,
2846 &file.file_id,
2847 &cumulative,
2848 dir,
2849 index.path(),
2850 )?;
2851 staged_anything = true;
2852 eprintln!(
2853 " {}",
2854 style::info(&format!(
2855 "Re-staged {} from base via splice (whole-file apply not used for partial hunks)",
2856 skipped.path
2857 ))
2858 );
2859 }
2860
2861 if !staged_anything {
2862 eprintln!(
2863 " {}",
2864 style::warning(&format!(
2865 "Skipping commit {}: its planned patch is already applied ({:?})",
2866 group.group_id, outcome.result
2867 ))
2868 );
2869 continue;
2870 }
2871
2872 let (analysis_body, summary) = prepared_messages[idx].clone();
2873 let mut commit = ConventionalCommit {
2874 commit_type: group.commit_type.clone(),
2875 scope: group.scope.clone(),
2876 summary,
2877 body: analysis_body,
2878 footers: vec![],
2879 };
2880 post_process_commit_message(&mut commit, config);
2881
2882 if let Err(err) = validate_commit_message(&commit, config) {
2883 eprintln!(
2884 " {}",
2885 style::warning(&format!("{} Warning: Validation failed: {err}", style::icons::WARNING))
2886 );
2887 }
2888
2889 let mut formatted_message = format_commit_message(&commit);
2890 if args.signoff || config.signoff {
2891 formatted_message = append_signoff_trailer(&formatted_message, dir)?;
2892 }
2893 println!(
2894 " Message:\n{}",
2895 formatted_message
2896 .lines()
2897 .take(3)
2898 .collect::<Vec<_>>()
2899 .join("\n")
2900 );
2901
2902 let tree = write_index_tree(index.path(), dir)?;
2903 let sign = args.sign || config.gpg_sign;
2904 let hash = commit_tree(&tree, &parent_hash, &formatted_message, dir, sign)?;
2905 parent_hash.clone_from(&hash);
2906 commit_hashes.push(hash);
2907
2908 if args.compose_test_after_each {
2909 return Err(CommitGenError::Other(
2910 "--compose-test-after-each is incompatible with isolated compose execution".to_string(),
2911 ));
2912 }
2913 }
2914
2915 if commit_hashes.is_empty() {
2916 return Ok(commit_hashes);
2917 }
2918
2919 let current_index_tree = write_real_index_tree(dir)?;
2920 if current_index_tree != base_state.index_tree {
2921 return Err(CommitGenError::Other(
2922 "Real git index changed during compose; aborting before updating HEAD".to_string(),
2923 ));
2924 }
2925
2926 update_ref_checked(&base_state.head_ref, &parent_hash, &base_state.head_hash, dir)?;
2927 reset_mixed_to(&parent_hash, dir)?;
2928
2929 Ok(commit_hashes)
2930}
2931
2932#[tracing::instrument(target = "lgit", name = "compose.run", skip_all, fields(dir = %args.dir, max_rounds = config.compose_max_rounds))]
2933pub async fn run_compose_mode(args: &Args, config: &CommitConfig) -> Result<()> {
2934 let max_rounds = config.compose_max_rounds;
2935
2936 for round in 1..=max_rounds {
2937 if round > 1 {
2938 println!(
2939 "\n{}",
2940 style::section_header(&format!("Compose Round {round}/{max_rounds}"), 80)
2941 );
2942 } else {
2943 println!("{}", style::section_header("Compose Mode", 80));
2944 }
2945 println!("{}\n", style::info("Analyzing all changes for intelligent splitting..."));
2946
2947 run_compose_round(args, config, round).await?;
2948
2949 if args.compose_preview {
2950 break;
2951 }
2952
2953 match get_compose_diff(&args.dir) {
2954 Err(CommitGenError::NoChanges { .. }) => {
2955 println!(
2956 "\n{}",
2957 style::success(&format!(
2958 "{} All changes committed successfully",
2959 style::icons::SUCCESS
2960 ))
2961 );
2962 break;
2963 },
2964 Err(err) => return Err(err),
2965 Ok(remaining_diff) => {
2966 eprintln!(
2967 "\n{}",
2968 style::warning(&format!(
2969 "{} Uncommitted changes remain after round {round}",
2970 style::icons::WARNING
2971 ))
2972 );
2973 eprintln!("{remaining_diff}");
2974 },
2975 }
2976
2977 if round < max_rounds {
2978 eprintln!("{}", style::info("Starting another compose round..."));
2979 } else {
2980 eprintln!(
2981 "{}",
2982 style::warning(&format!(
2983 "Reached max rounds ({max_rounds}). Remaining changes need manual commit."
2984 ))
2985 );
2986 }
2987 }
2988
2989 Ok(())
2990}
2991
2992#[tracing::instrument(target = "lgit", name = "compose.round", skip_all, fields(dir = %args.dir, round))]
2993async fn run_compose_round(args: &Args, config: &CommitConfig, round: usize) -> Result<()> {
2994 let base_state = capture_compose_base_state(&args.dir)?;
2995 let diff = get_compose_diff(&args.dir)?;
2996 let stat = get_compose_stat(&args.dir)?;
2997 let snapshot = build_compose_snapshot(&diff, &stat)?;
2998
2999 if let Some(debug_dir) = args.debug_output.as_deref() {
3000 save_debug_artifact(
3001 Some(debug_dir),
3002 &format!("compose_round_{round}_snapshot.json"),
3003 &snapshot,
3004 )?;
3005 }
3006
3007 let token_counter = create_token_counter(config);
3008 let observations = if should_collect_compose_observations(&snapshot, config, &token_counter) {
3009 println!("{}", style::info("Summarizing compose snapshot with map-reduce..."));
3010 observe_diff_files(&snapshot.diff, &config.summary_model, config, &token_counter).await?
3011 } else {
3012 if planning_mode_for_snapshot(&snapshot) == PlanningMode::Area
3013 && should_use_map_reduce(&snapshot.diff, config, &token_counter)
3014 {
3015 println!(
3016 "{}",
3017 style::info(
3018 "Skipping per-file observations for very large compose snapshot; using area-level \
3019 planning instead."
3020 )
3021 );
3022 }
3023 Vec::new()
3024 };
3025
3026 if let Some(debug_dir) = args.debug_output.as_deref()
3027 && !observations.is_empty()
3028 {
3029 save_debug_artifact(
3030 Some(debug_dir),
3031 &format!("compose_round_{round}_observations.json"),
3032 &observations,
3033 )?;
3034 }
3035
3036 let max_commits = args.compose_max_commits.unwrap_or(20);
3037 let executable_plan = if let Some(cached_plan) =
3038 load_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model)?
3039 {
3040 println!("{}", style::info("Reusing cached compose plan for identical snapshot..."));
3041 cached_plan
3042 } else {
3043 println!("{}", style::info(&format!("Planning changes (max {max_commits} commits)...")));
3044 let intent_plan = analyze_compose_intent(
3045 &snapshot,
3046 &observations,
3047 config,
3048 max_commits,
3049 args.debug_output.as_deref(),
3050 )
3051 .await?;
3052
3053 if let Some(debug_dir) = args.debug_output.as_deref() {
3054 save_debug_artifact(
3055 Some(debug_dir),
3056 &format!("compose_round_{round}_intent_plan.json"),
3057 &intent_plan,
3058 )?;
3059 }
3060
3061 println!("{}", style::info("Binding hunks to groups..."));
3062 let plan =
3063 bind_compose_plan(&snapshot, &intent_plan, config, args.debug_output.as_deref()).await?;
3064 save_cached_plan(&args.dir, &snapshot, max_commits, &config.analysis_model, &plan)?;
3065 plan
3066 };
3067
3068 if let Some(debug_dir) = args.debug_output.as_deref() {
3069 save_debug_artifact(
3070 Some(debug_dir),
3071 &format!("compose_round_{round}_executable_plan.json"),
3072 &executable_plan,
3073 )?;
3074 }
3075
3076 print_executable_plan(&snapshot, &executable_plan);
3077
3078 if args.compose_preview {
3079 println!(
3080 "\n{}",
3081 style::success(&format!(
3082 "{} Preview complete (use --compose without --compose-preview to execute)",
3083 style::icons::SUCCESS
3084 ))
3085 );
3086 return Ok(());
3087 }
3088
3089 println!("\n{}", style::info(&format!("Executing compose (round {round})...")));
3090 let hashes = execute_compose(&snapshot, &executable_plan, config, args, &base_state).await?;
3091 println!(
3092 "{}",
3093 style::success(&format!(
3094 "{} Round {round}: Created {} commit(s)",
3095 style::icons::SUCCESS,
3096 hashes.len()
3097 ))
3098 );
3099 Ok(())
3100}
3101
3102#[cfg(test)]
3103mod tests {
3104 use std::{fmt::Write, fs};
3105
3106 use tempfile::TempDir;
3107
3108 use super::*;
3109 use crate::{config::CommitConfig, patch::build_compose_snapshot, types::CommitType};
3110
3111 fn shared_file_diff() -> (&'static str, &'static str) {
3112 (
3113 r#"diff --git a/src/lib.rs b/src/lib.rs
3114index 1111111..2222222 100644
3115--- a/src/lib.rs
3116+++ b/src/lib.rs
3117@@ -1,3 +1,3 @@
3118-fn alpha() {
3119+fn alpha_changed() {
3120 println!("alpha");
3121 }
3122@@ -12,3 +12,3 @@
3123-fn beta() {
3124+fn beta_changed() {
3125 println!("beta");
3126 }
3127diff --git a/tests/lib.rs b/tests/lib.rs
3128index 3333333..4444444 100644
3129--- a/tests/lib.rs
3130+++ b/tests/lib.rs
3131@@ -1,3 +1,4 @@
3132 fn test_it() {
3133+ assert!(true);
3134 }
3135"#,
3136 " src/lib.rs | 4 ++--\n tests/lib.rs | 1 +\n",
3137 )
3138 }
3139
3140 fn build_test_snapshot() -> ComposeSnapshot {
3141 let (diff, stat) = shared_file_diff();
3142 build_compose_snapshot(diff, stat).unwrap()
3143 }
3144
3145 fn write_file(dir: &TempDir, path: &str, contents: &str) {
3146 let full_path = dir.path().join(path);
3147 if let Some(parent) = full_path.parent() {
3148 fs::create_dir_all(parent).unwrap();
3149 }
3150 fs::write(full_path, contents).unwrap();
3151 }
3152
3153 fn run_git(dir: &TempDir, args: &[&str]) -> String {
3154 let output = crate::git::git_command()
3155 .args(args)
3156 .current_dir(dir.path())
3157 .output()
3158 .unwrap_or_else(|err| panic!("git {args:?} failed to spawn: {err}"));
3159
3160 assert!(
3161 output.status.success(),
3162 "git {:?} failed: stdout={} stderr={}",
3163 args,
3164 String::from_utf8_lossy(&output.stdout),
3165 String::from_utf8_lossy(&output.stderr)
3166 );
3167
3168 String::from_utf8_lossy(&output.stdout).to_string()
3169 }
3170
3171 fn init_repo() -> TempDir {
3172 let dir = TempDir::new().unwrap();
3173 run_git(&dir, &["init"]);
3174 run_git(&dir, &["config", "user.name", "Compose Test"]);
3175 run_git(&dir, &["config", "user.email", "compose@test.local"]);
3176 run_git(&dir, &["config", "commit.gpgsign", "false"]);
3177 dir
3178 }
3179
3180 fn commit_all(dir: &TempDir, message: &str) {
3181 run_git(dir, &["add", "."]);
3182 run_git(dir, &["commit", "-m", message]);
3183 }
3184
3185 fn canned_message(summary: &str) -> (Vec<String>, CommitSummary) {
3186 (vec![], CommitSummary::new_unchecked(summary, 128).unwrap())
3187 }
3188
3189 #[test]
3190 fn test_compose_file_category_treats_prompts_as_functional_source() {
3191 let diff = r"diff --git a/prompts/analysis/default.md b/prompts/analysis/default.md
3192index 1111111..2222222 100644
3193--- a/prompts/analysis/default.md
3194+++ b/prompts/analysis/default.md
3195@@ -1,1 +1,1 @@
3196-old prompt
3197+new prompt
3198diff --git a/system/analysis/default.md b/system/analysis/default.md
3199index 5555555..6666666 100644
3200--- a/system/analysis/default.md
3201+++ b/system/analysis/default.md
3202@@ -1,1 +1,1 @@
3203-old system
3204+new system
3205diff --git a/README.md b/README.md
3206index 3333333..4444444 100644
3207--- a/README.md
3208+++ b/README.md
3209@@ -1,1 +1,1 @@
3210-old docs
3211+new docs
3212";
3213 let snapshot = build_compose_snapshot(diff, "").unwrap();
3214 let prompt_file = snapshot
3215 .file_by_path("prompts/analysis/default.md")
3216 .unwrap();
3217 let system_file = snapshot.file_by_path("system/analysis/default.md").unwrap();
3218 let readme_file = snapshot.file_by_path("README.md").unwrap();
3219
3220 assert_eq!(compose_file_category(prompt_file), ComposeFileCategory::Prompt);
3221 assert_eq!(compose_file_category(system_file), ComposeFileCategory::Prompt);
3222 assert_eq!(compose_file_category(readme_file), ComposeFileCategory::Docs);
3223
3224 let feat_group = ComposeIntentGroup {
3225 group_id: "G1".to_string(),
3226 commit_type: CommitType::new("feat").unwrap(),
3227 scope: None,
3228 file_ids: vec![prompt_file.file_id.clone()],
3229 rationale: "prompt behavior change".to_string(),
3230 dependencies: vec![],
3231 };
3232 assert_eq!(group_type_bonus(prompt_file, &feat_group), 10);
3233
3234 let fallback_type =
3235 fallback_commit_type_for_group(&snapshot, &[], std::slice::from_ref(&prompt_file.file_id))
3236 .unwrap();
3237 assert_eq!(fallback_type.as_str(), "refactor");
3238 }
3239
3240 fn build_large_snapshot(file_count: usize, hunks_per_file: usize) -> ComposeSnapshot {
3241 let mut diff = String::new();
3242
3243 for file_idx in 0..file_count {
3244 let path = format!("src/module_{file_idx:03}.rs");
3245 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3246 diff.push_str("index 1111111..2222222 100644\n");
3247 writeln!(diff, "--- a/{path}").unwrap();
3248 writeln!(diff, "+++ b/{path}").unwrap();
3249
3250 for hunk_idx in 0..hunks_per_file {
3251 let line_no = (hunk_idx * 4) + 1;
3252 writeln!(diff, "@@ -{line_no},1 +{line_no},1 @@").unwrap();
3253 writeln!(diff, "-old_{file_idx}_{hunk_idx}").unwrap();
3254 writeln!(diff, "+new_{file_idx}_{hunk_idx}").unwrap();
3255 }
3256 }
3257
3258 build_compose_snapshot(&diff, "").unwrap()
3259 }
3260
3261 fn build_multi_area_snapshot() -> ComposeSnapshot {
3262 let mut diff = String::new();
3263 let areas = [
3264 ("apps/frontend/src/server", 72),
3265 ("packages/model/src/models", 54),
3266 ("apps/daemon/src/worker", 43),
3267 (".github/workflows", 16),
3268 ];
3269
3270 for (prefix, count) in areas {
3271 for file_idx in 0..count {
3272 let path = format!("{prefix}/file_{file_idx:03}.rs");
3273 writeln!(diff, "diff --git a/{path} b/{path}").unwrap();
3274 diff.push_str("index 1111111..2222222 100644\n");
3275 writeln!(diff, "--- a/{path}").unwrap();
3276 writeln!(diff, "+++ b/{path}").unwrap();
3277 diff.push_str("@@ -1,1 +1,1 @@\n");
3278 writeln!(diff, "-old_{file_idx}").unwrap();
3279 writeln!(diff, "+new_{file_idx}").unwrap();
3280 }
3281 }
3282
3283 build_compose_snapshot(&diff, "").unwrap()
3284 }
3285
3286 fn build_shared_intent_plan(snapshot: &ComposeSnapshot) -> ComposeIntentPlan {
3287 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3288 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3289 let groups = vec![
3290 ComposeIntentGroup {
3291 group_id: "G1".to_string(),
3292 commit_type: CommitType::new("refactor").unwrap(),
3293 scope: None,
3294 file_ids: vec![source_file.file_id.clone(), test_file.file_id.clone()],
3295 rationale: "implementation group".to_string(),
3296 dependencies: vec![],
3297 },
3298 ComposeIntentGroup {
3299 group_id: "G2".to_string(),
3300 commit_type: CommitType::new("refactor").unwrap(),
3301 scope: None,
3302 file_ids: vec![source_file.file_id.clone()],
3303 rationale: "shared file follow-up".to_string(),
3304 dependencies: vec!["G1".to_string()],
3305 },
3306 ];
3307 let dependency_order =
3308 compute_dependency_order(&groups, |group| &group.group_id, |group| &group.dependencies)
3309 .unwrap();
3310 ComposeIntentPlan { groups, dependency_order }
3311 }
3312
3313 #[test]
3314 fn test_execute_compose_with_temp_index_applies_two_group_plan() {
3315 let dir = init_repo();
3316 write_file(&dir, "src/a.rs", "fn a() {}\n");
3317 write_file(&dir, "src/b.rs", "fn b() {}\n");
3318 commit_all(&dir, "initial");
3319 write_file(&dir, "src/a.rs", "fn a_changed() {}\n");
3320 write_file(&dir, "src/b.rs", "fn b_changed() {}\n");
3321
3322 let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3323 let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3324 let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3325 let a_file = snapshot.file_by_path("src/a.rs").unwrap();
3326 let b_file = snapshot.file_by_path("src/b.rs").unwrap();
3327 let plan = ComposeExecutablePlan {
3328 groups: vec![
3329 ComposeExecutableGroup {
3330 group_id: "G1".to_string(),
3331 commit_type: CommitType::new("refactor").unwrap(),
3332 scope: None,
3333 file_ids: vec![a_file.file_id.clone()],
3334 rationale: "change a".to_string(),
3335 dependencies: vec![],
3336 hunk_ids: a_file.hunk_ids.clone(),
3337 },
3338 ComposeExecutableGroup {
3339 group_id: "G2".to_string(),
3340 commit_type: CommitType::new("refactor").unwrap(),
3341 scope: None,
3342 file_ids: vec![b_file.file_id.clone()],
3343 rationale: "change b".to_string(),
3344 dependencies: vec!["G1".to_string()],
3345 hunk_ids: b_file.hunk_ids.clone(),
3346 },
3347 ],
3348 dependency_order: vec![0, 1],
3349 };
3350 let config = CommitConfig::default();
3351 let args = Args {
3352 dir: dir.path().to_string_lossy().to_string(),
3353 compose: true,
3354 ..Default::default()
3355 };
3356 let base_state = capture_compose_base_state(&args.dir).unwrap();
3357
3358 let hashes = execute_compose_with_prepared_messages(
3359 &snapshot,
3360 &plan,
3361 &config,
3362 &args,
3363 &base_state,
3364 vec![canned_message("change a"), canned_message("change b")],
3365 )
3366 .unwrap();
3367
3368 assert_eq!(hashes.len(), 2);
3369 assert_eq!(get_head_hash(&args.dir).unwrap(), hashes[1]);
3370 assert!(run_git(&dir, &["diff", "--cached"]).trim().is_empty());
3371 }
3372
3373 #[test]
3374 fn test_execute_compose_failure_before_update_ref_preserves_real_index() {
3375 let dir = init_repo();
3376 write_file(&dir, "src/lib.rs", "old\n");
3377 write_file(&dir, "sentinel.txt", "base\n");
3378 commit_all(&dir, "initial");
3379 let initial_head = get_head_hash(dir.path().to_str().unwrap()).unwrap();
3380
3381 write_file(&dir, "src/lib.rs", "changed\n");
3383
3384 write_file(&dir, "sentinel.txt", "base\nstaged sentinel\n");
3386 run_git(&dir, &["add", "sentinel.txt"]);
3387 let staged_before = run_git(&dir, &["diff", "--cached"]);
3388 assert!(staged_before.contains("staged sentinel"));
3389
3390 let diff = get_compose_diff(dir.path().to_str().unwrap()).unwrap();
3391 let stat = get_compose_stat(dir.path().to_str().unwrap()).unwrap();
3392 let snapshot = build_compose_snapshot(&diff, &stat).unwrap();
3393 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3394 let plan = ComposeExecutablePlan {
3397 groups: vec![ComposeExecutableGroup {
3398 group_id: "G1".to_string(),
3399 commit_type: CommitType::new("fix").unwrap(),
3400 scope: None,
3401 file_ids: vec![source_file.file_id.clone()],
3402 rationale: "unstageable group".to_string(),
3403 dependencies: vec![],
3404 hunk_ids: vec!["F999-H001".to_string()],
3405 }],
3406 dependency_order: vec![0],
3407 };
3408 let config = CommitConfig::default();
3409 let args = Args {
3410 dir: dir.path().to_string_lossy().to_string(),
3411 compose: true,
3412 ..Default::default()
3413 };
3414 let base_state = capture_compose_base_state(&args.dir).unwrap();
3415
3416 let err = execute_compose_with_prepared_messages(
3417 &snapshot,
3418 &plan,
3419 &config,
3420 &args,
3421 &base_state,
3422 vec![canned_message("unstageable group")],
3423 )
3424 .unwrap_err();
3425
3426 assert!(err.to_string().contains("unknown hunk id"));
3427 assert_eq!(get_head_hash(&args.dir).unwrap(), initial_head);
3428 assert_eq!(run_git(&dir, &["diff", "--cached"]), staged_before);
3429 }
3430
3431 #[test]
3432 fn test_auto_assign_hunks_marks_shared_file_ambiguous() {
3433 let snapshot = build_test_snapshot();
3434 let intent_plan = build_shared_intent_plan(&snapshot);
3435 let (assigned, ambiguous) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3436
3437 assert_eq!(ambiguous.len(), 1);
3438 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3439 let assigned_to_g1 = assigned.get("G1").unwrap();
3440 assert!(
3441 test_file
3442 .hunk_ids
3443 .iter()
3444 .all(|hunk_id| assigned_to_g1.contains(hunk_id)),
3445 "uniquely owned file should be auto-assigned"
3446 );
3447 }
3448
3449 #[test]
3450 fn test_ambiguous_fallback_merges_and_prunes_empty_group() {
3451 let snapshot = build_test_snapshot();
3452 let intent_plan = build_shared_intent_plan(&snapshot);
3453 let (mut assigned, ambiguous_files) = auto_assign_hunks(&snapshot, &intent_plan).unwrap();
3454 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3455 let hunk_context = ambiguous_hunk_context(&ambiguous_files);
3456 let valid_group_ids: HashSet<&str> = intent_plan
3457 .groups
3458 .iter()
3459 .map(|group| group.group_id.as_str())
3460 .collect();
3461
3462 let evaluation = evaluate_binding(
3463 &[
3464 ComposeBindingAssignment {
3465 group_id: "G1".to_string(),
3466 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3467 },
3468 ComposeBindingAssignment {
3469 group_id: "G2".to_string(),
3470 hunk_ids: vec![source_file.hunk_ids[1].clone()],
3471 },
3472 ],
3473 &hunk_context,
3474 &valid_group_ids,
3475 &snapshot,
3476 );
3477
3478 for (group_id, hunk_ids) in evaluation.assigned {
3479 let entry = assigned.entry(group_id).or_default();
3480 for hunk_id in hunk_ids {
3481 entry.insert(hunk_id);
3482 }
3483 }
3484
3485 let group_rank: HashMap<&str, usize> = intent_plan
3486 .dependency_order
3487 .iter()
3488 .enumerate()
3489 .map(|(position, idx)| (intent_plan.groups[*idx].group_id.as_str(), position))
3490 .collect();
3491 assign_unresolved_hunks(&evaluation.unresolved, &mut assigned, &ambiguous_files, &group_rank);
3492
3493 let executable_plan = finalize_executable_plan(&snapshot, &intent_plan, assigned).unwrap();
3494 assert_eq!(executable_plan.groups.len(), 1);
3495 assert_eq!(executable_plan.groups[0].group_id, "G1");
3496 assert!(
3497 source_file
3498 .hunk_ids
3499 .iter()
3500 .all(|hunk_id| executable_plan.groups[0].hunk_ids.contains(hunk_id)),
3501 "fallback should keep every hunk from the shared file in the surviving group"
3502 );
3503 }
3504
3505 #[test]
3506 fn test_validate_executable_plan_rejects_overlap() {
3507 let snapshot = build_test_snapshot();
3508 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3509 let executable_plan = ComposeExecutablePlan {
3510 groups: vec![
3511 ComposeExecutableGroup {
3512 group_id: "G1".to_string(),
3513 commit_type: CommitType::new("refactor").unwrap(),
3514 scope: None,
3515 file_ids: vec![source_file.file_id.clone()],
3516 rationale: "group one".to_string(),
3517 dependencies: vec![],
3518 hunk_ids: vec![source_file.hunk_ids[0].clone()],
3519 },
3520 ComposeExecutableGroup {
3521 group_id: "G2".to_string(),
3522 commit_type: CommitType::new("refactor").unwrap(),
3523 scope: None,
3524 file_ids: vec![source_file.file_id.clone()],
3525 rationale: "group two".to_string(),
3526 dependencies: vec![],
3527 hunk_ids: vec![source_file.hunk_ids[0].clone(), source_file.hunk_ids[1].clone()],
3528 },
3529 ],
3530 dependency_order: vec![0, 1],
3531 };
3532
3533 let err = validate_executable_plan(&snapshot, &executable_plan).unwrap_err();
3534 assert!(err.to_string().contains("assigned to both"));
3535 }
3536
3537 #[test]
3538 fn test_normalize_intent_plan_maps_path_references_to_file_ids() {
3539 let snapshot = build_test_snapshot();
3540 let planning_index = build_planning_index(&snapshot);
3541 let groups = vec![ComposeIntentGroup {
3542 group_id: "G1".to_string(),
3543 commit_type: CommitType::new("refactor").unwrap(),
3544 scope: None,
3545 file_ids: vec!["src/lib.rs".to_string(), "`tests/lib.rs`".to_string()],
3546 rationale: "normalize file references".to_string(),
3547 dependencies: vec![],
3548 }];
3549
3550 let (normalized_groups, repair_notes) =
3551 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3552
3553 assert_eq!(normalized_groups.len(), 1);
3554 assert_eq!(
3555 normalized_groups[0].file_ids,
3556 snapshot
3557 .files
3558 .iter()
3559 .map(|file| file.file_id.clone())
3560 .collect::<Vec<_>>()
3561 );
3562 assert_eq!(repair_notes.len(), 2);
3563 }
3564
3565 #[test]
3566 fn test_normalize_intent_plan_repairs_missing_files() {
3567 let snapshot = build_test_snapshot();
3568 let planning_index = build_planning_index(&snapshot);
3569 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3570 let test_file = snapshot.file_by_path("tests/lib.rs").unwrap();
3571 let groups = vec![ComposeIntentGroup {
3572 group_id: "G1".to_string(),
3573 commit_type: CommitType::new("refactor").unwrap(),
3574 scope: None,
3575 file_ids: vec![source_file.file_id.clone()],
3576 rationale: "partial coverage".to_string(),
3577 dependencies: vec![],
3578 }];
3579
3580 let (normalized_groups, repair_notes) =
3581 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3582
3583 assert_eq!(normalized_groups.len(), 1);
3584 assert!(
3585 normalized_groups[0].file_ids.contains(&source_file.file_id),
3586 "existing file assignment should be preserved"
3587 );
3588 assert!(
3589 normalized_groups[0].file_ids.contains(&test_file.file_id),
3590 "missing files should be assigned to an existing group"
3591 );
3592 assert_eq!(repair_notes.len(), 1);
3593 assert!(repair_notes[0].contains(&test_file.file_id));
3594 }
3595
3596 #[test]
3597 fn test_normalize_intent_plan_drops_placeholder_targets_and_repairs_dependencies() {
3598 let snapshot = build_multi_area_snapshot();
3599 let planning_index = build_planning_index(&snapshot);
3600 let frontend_target = planning_index
3601 .targets
3602 .iter()
3603 .find(|target| target.label.starts_with("apps/frontend"))
3604 .unwrap();
3605 let model_target = planning_index
3606 .targets
3607 .iter()
3608 .find(|target| target.label.starts_with("packages/model"))
3609 .unwrap();
3610 let groups = vec![
3611 ComposeIntentGroup {
3612 group_id: "G1".to_string(),
3613 commit_type: CommitType::new("refactor").unwrap(),
3614 scope: Scope::new("apps/frontend").ok(),
3615 file_ids: vec!["G3_PLACEHOLDER".to_string(), frontend_target.target_id.clone()],
3616 rationale: "frontend platform updates".to_string(),
3617 dependencies: vec!["group 2".to_string(), "G1".to_string()],
3618 },
3619 ComposeIntentGroup {
3620 group_id: "G2".to_string(),
3621 commit_type: CommitType::new("refactor").unwrap(),
3622 scope: Scope::new("packages/model").ok(),
3623 file_ids: vec!["UNKNOWN_TARGET".to_string(), model_target.target_id.clone()],
3624 rationale: "model storage updates".to_string(),
3625 dependencies: vec!["F5".to_string()],
3626 },
3627 ];
3628
3629 let (normalized_groups, repair_notes) =
3630 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3631
3632 assert_eq!(normalized_groups.len(), 2);
3633 assert!(
3634 normalized_groups[0]
3635 .file_ids
3636 .iter()
3637 .all(|file_id| file_id.starts_with('F'))
3638 );
3639 assert_eq!(normalized_groups[0].dependencies, vec!["G2".to_string()]);
3640 assert!(normalized_groups[1].dependencies.is_empty());
3641 assert!(
3642 repair_notes
3643 .iter()
3644 .any(|note| note.contains("Dropped unknown planning target"))
3645 );
3646 assert!(
3647 repair_notes
3648 .iter()
3649 .any(|note| note.contains("Dropped self-dependency"))
3650 );
3651 assert!(
3652 repair_notes
3653 .iter()
3654 .any(|note| note.contains("Mapped compose planner dependency"))
3655 );
3656 assert!(
3657 repair_notes
3658 .iter()
3659 .any(|note| note.contains("Dropped unknown dependency"))
3660 );
3661 }
3662
3663 #[test]
3664 fn test_render_snapshot_summary_keeps_all_hunks_for_small_snapshot() {
3665 let snapshot = build_test_snapshot();
3666 let summary = render_snapshot_summary(&snapshot, &[]);
3667 let source_file = snapshot.file_by_path("src/lib.rs").unwrap();
3668
3669 assert!(!summary.contains("# snapshot compacted"));
3670 for hunk_id in &source_file.hunk_ids {
3671 assert!(summary.contains(hunk_id));
3672 }
3673 }
3674
3675 #[test]
3676 fn test_render_snapshot_summary_compacts_large_snapshot() {
3677 let snapshot = build_large_snapshot(160, 4);
3678 let summary = render_snapshot_summary(&snapshot, &[]);
3679
3680 assert!(summary.contains("# snapshot compacted"));
3681 assert!(summary.contains("- F001 src/module_000.rs (+4/-4, 4 hunks)"));
3682 assert!(summary.contains("F001-H001"));
3683 assert!(summary.contains("F001-H004"));
3684 assert!(!summary.contains("F001-H002"));
3685 assert!(!summary.contains("F001-H003"));
3686 assert!(summary.contains("... 2 more hunks omitted from F001"));
3687 }
3688
3689 #[test]
3690 fn test_build_planning_index_uses_area_targets_for_large_snapshot() {
3691 let snapshot = build_multi_area_snapshot();
3692 let planning_index = build_planning_index(&snapshot);
3693
3694 assert_eq!(planning_index.mode, PlanningMode::Area);
3695 assert!(planning_index.targets.len() < snapshot.files.len());
3696 assert!(
3697 planning_index
3698 .targets
3699 .iter()
3700 .any(|target| target.label.starts_with("apps/frontend"))
3701 );
3702 assert!(
3703 render_planning_stat(&planning_index).contains("planning over"),
3704 "planning stat should explain the area mode"
3705 );
3706 }
3707
3708 #[test]
3709 fn test_normalize_intent_plan_expands_area_targets() {
3710 let snapshot = build_multi_area_snapshot();
3711 let planning_index = build_planning_index(&snapshot);
3712 let midpoint = planning_index.targets.len() / 2;
3713 let first_group_targets: Vec<String> = planning_index
3714 .targets
3715 .iter()
3716 .take(midpoint)
3717 .map(|target| target.label.clone())
3718 .collect();
3719 let second_group_targets: Vec<String> = planning_index
3720 .targets
3721 .iter()
3722 .skip(midpoint)
3723 .map(|target| target.label.clone())
3724 .collect();
3725 let groups = vec![
3726 ComposeIntentGroup {
3727 group_id: "G1".to_string(),
3728 commit_type: CommitType::new("refactor").unwrap(),
3729 scope: None,
3730 file_ids: first_group_targets,
3731 rationale: "frontend and model".to_string(),
3732 dependencies: vec![],
3733 },
3734 ComposeIntentGroup {
3735 group_id: "G2".to_string(),
3736 commit_type: CommitType::new("refactor").unwrap(),
3737 scope: None,
3738 file_ids: second_group_targets,
3739 rationale: "daemon and ci".to_string(),
3740 dependencies: vec![],
3741 },
3742 ];
3743
3744 let (normalized_groups, repair_notes) =
3745 normalize_intent_plan(&snapshot, &planning_index, groups).unwrap();
3746
3747 assert_eq!(normalized_groups.len(), 2);
3748 assert!(
3749 normalized_groups
3750 .iter()
3751 .flat_map(|group| group.file_ids.iter())
3752 .all(|file_id| file_id.starts_with('F')),
3753 "area targets should expand back to concrete file IDs"
3754 );
3755 assert!(!repair_notes.is_empty());
3756 assert_eq!(
3757 normalized_groups
3758 .iter()
3759 .flat_map(|group| group.file_ids.iter())
3760 .collect::<HashSet<_>>()
3761 .len(),
3762 snapshot.files.len()
3763 );
3764 }
3765
3766 #[test]
3767 fn test_large_patch_fallback_splits_monolithic_area_plan() {
3768 let snapshot = build_multi_area_snapshot();
3769 let planning_index = build_planning_index(&snapshot);
3770 let monolithic_group = ComposeIntentGroup {
3771 group_id: "G1".to_string(),
3772 commit_type: CommitType::new("refactor").unwrap(),
3773 scope: None,
3774 file_ids: snapshot
3775 .files
3776 .iter()
3777 .map(|file| file.file_id.clone())
3778 .collect(),
3779 rationale: "repo-wide refactor".to_string(),
3780 dependencies: vec![],
3781 };
3782
3783 assert!(should_force_large_patch_fallback(
3784 &snapshot,
3785 &planning_index,
3786 &[monolithic_group],
3787 6
3788 ));
3789
3790 let fallback_groups =
3791 build_large_patch_fallback_groups(&snapshot, &planning_index, 6).unwrap();
3792 assert!(fallback_groups.len() >= 3);
3793 assert_eq!(
3794 fallback_groups
3795 .iter()
3796 .flat_map(|group| group.file_ids.iter())
3797 .collect::<HashSet<_>>()
3798 .len(),
3799 snapshot.files.len()
3800 );
3801 assert!(
3802 fallback_groups
3803 .iter()
3804 .any(|group| group.rationale.contains("frontend")),
3805 "fallback should preserve workstream identity"
3806 );
3807 }
3808
3809 #[test]
3810 fn test_should_collect_compose_observations_skips_area_mode() {
3811 let snapshot = build_large_snapshot(160, 4);
3812 let config = CommitConfig { map_reduce_threshold: 1_000, ..Default::default() };
3813 let counter = create_token_counter(&config);
3814
3815 assert!(should_use_map_reduce(&snapshot.diff, &config, &counter));
3816 assert!(!should_collect_compose_observations(&snapshot, &config, &counter));
3817 }
3818
3819 #[test]
3820 fn test_compose_analysis_strategy_uses_map_reduce_for_large_diff() {
3821 let config = CommitConfig { map_reduce_threshold: 20, ..Default::default() };
3822 let counter = create_token_counter(&config);
3823 let payload = "a".repeat(200);
3824 let diff = format!("diff --git a/a.rs b/a.rs\n@@ -0,0 +1 @@\n+{payload}");
3825
3826 assert_eq!(
3827 compose_analysis_strategy(&diff, &config, &counter),
3828 ComposeAnalysisStrategy::MapReduce
3829 );
3830 }
3831
3832 #[test]
3833 fn test_compose_analysis_strategy_truncates_when_map_reduce_disabled() {
3834 let config = CommitConfig {
3835 map_reduce_enabled: false,
3836 max_diff_tokens: 1,
3837 max_diff_length: 10_000,
3838 ..Default::default()
3839 };
3840 let counter = create_token_counter(&config);
3841 assert_eq!(compose_truncation_length(&config), 4);
3842
3843 assert_eq!(
3844 compose_analysis_strategy(
3845 "diff --git a/models.json b/models.json\n+large",
3846 &config,
3847 &counter
3848 ),
3849 ComposeAnalysisStrategy::SmartTruncate
3850 );
3851 }
3852
3853 #[test]
3854 fn test_compose_analysis_strategy_keeps_small_group_direct() {
3855 let config = CommitConfig {
3856 map_reduce_threshold: 1_000,
3857 max_diff_tokens: 1_000,
3858 max_diff_length: 10_000,
3859 ..Default::default()
3860 };
3861 let counter = create_token_counter(&config);
3862
3863 assert_eq!(
3864 compose_analysis_strategy("diff --git a/a.rs b/a.rs\n+a", &config, &counter),
3865 ComposeAnalysisStrategy::Direct
3866 );
3867 }
3868
3869 #[test]
3870 fn test_chunk_ambiguous_files_splits_large_binding_request() {
3871 let ambiguous_files = vec![
3872 AmbiguousFileBinding {
3873 file_id: "F001".to_string(),
3874 path: "src/alpha.rs".to_string(),
3875 candidate_group_ids: vec!["G1".to_string(), "G2".to_string()],
3876 hunk_ids: (1..=70).map(|idx| format!("F001-H{idx:03}")).collect(),
3877 },
3878 AmbiguousFileBinding {
3879 file_id: "F002".to_string(),
3880 path: "src/beta.rs".to_string(),
3881 candidate_group_ids: vec!["G1".to_string(), "G3".to_string()],
3882 hunk_ids: (1..=60).map(|idx| format!("F002-H{idx:03}")).collect(),
3883 },
3884 AmbiguousFileBinding {
3885 file_id: "F003".to_string(),
3886 path: "src/gamma.rs".to_string(),
3887 candidate_group_ids: vec!["G2".to_string(), "G3".to_string()],
3888 hunk_ids: (1..=10).map(|idx| format!("F003-H{idx:03}")).collect(),
3889 },
3890 ];
3891
3892 let batches = chunk_ambiguous_files(&ambiguous_files);
3893 let total_hunks: usize = batches
3894 .iter()
3895 .flatten()
3896 .map(|file| file.hunk_ids.len())
3897 .sum();
3898
3899 assert_eq!(batches.len(), 2);
3900 assert_eq!(batches[0].len(), 1);
3901 assert_eq!(batches[1].len(), 2);
3902 assert_eq!(total_hunks, 140);
3903 assert!(batches.iter().all(|batch| {
3904 batch.len() <= MAX_BIND_FILES_PER_REQUEST
3905 && batch.iter().map(|file| file.hunk_ids.len()).sum::<usize>()
3906 <= MAX_BIND_HUNKS_PER_REQUEST
3907 }));
3908 }
3909}