1use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
2use std::io::Write;
3use std::path::{Path, PathBuf};
4
5use serde::Serialize;
6
7use crate::parse::{Plan, Sprint, parse_plan_with_display};
8
9const USAGE: &str = r#"Usage:
10 plan-tooling split-prs --file <plan.md> --pr-grouping <per-sprint|group> [options]
11
12Purpose:
13 Build task-to-PR split records from a Plan Format v1 file.
14
15Required:
16 --file <path> Plan file to parse
17 --pr-grouping <mode> per-sprint | group
18
19Options:
20 --scope <plan|sprint> Scope to split (default: sprint)
21 --sprint <n> Sprint number when --scope sprint
22 --pr-group <task=group> Group pin; repeatable (group mode only)
23 deterministic/group: required for every task
24 auto/group: optional pins + auto assignment for remaining tasks
25 --strategy <deterministic|auto> Split strategy (default: deterministic)
26 --explain Include grouping rationale in JSON output
27 --owner-prefix <text> Owner prefix (default: subagent)
28 --branch-prefix <text> Branch prefix (default: issue)
29 --worktree-prefix <text> Worktree prefix (default: issue__)
30 --format <json|tsv> Output format (default: json)
31 -h, --help Show help
32
33Argument style:
34 --key value and --key=value are both accepted for value options.
35
36Exit:
37 0: success
38 1: runtime or validation error
39 2: usage error
40"#;
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum SplitScope {
44 Plan,
45 Sprint(i32),
46}
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum SplitPrGrouping {
50 PerSprint,
51 Group,
52}
53
54impl SplitPrGrouping {
55 pub fn as_str(self) -> &'static str {
56 match self {
57 Self::PerSprint => "per-sprint",
58 Self::Group => "group",
59 }
60 }
61
62 fn from_cli(value: &str) -> Option<Self> {
63 match value {
64 "per-sprint" => Some(Self::PerSprint),
65 "group" => Some(Self::Group),
66 _ => None,
67 }
68 }
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum SplitPrStrategy {
73 Deterministic,
74 Auto,
75}
76
77impl SplitPrStrategy {
78 pub fn as_str(self) -> &'static str {
79 match self {
80 Self::Deterministic => "deterministic",
81 Self::Auto => "auto",
82 }
83 }
84
85 fn from_cli(value: &str) -> Option<Self> {
86 match value {
87 "deterministic" => Some(Self::Deterministic),
88 "auto" => Some(Self::Auto),
89 _ => None,
90 }
91 }
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
95pub struct SplitPlanOptions {
96 pub pr_grouping: SplitPrGrouping,
97 pub strategy: SplitPrStrategy,
98 pub pr_group_entries: Vec<String>,
99 pub owner_prefix: String,
100 pub branch_prefix: String,
101 pub worktree_prefix: String,
102}
103
104#[derive(Debug, Clone, PartialEq, Eq)]
105pub struct SplitPlanRecord {
106 pub task_id: String,
107 pub sprint: i32,
108 pub summary: String,
109 pub pr_group: String,
110}
111
112#[derive(Debug, Clone)]
113struct Record {
114 task_id: String,
115 plan_task_id: String,
116 sprint: i32,
117 summary: String,
118 complexity: i32,
119 location_paths: Vec<String>,
120 dependency_keys: Vec<String>,
121 pr_group: String,
122}
123
124#[derive(Debug, Clone, Default)]
125struct AutoSprintHint {
126 pr_grouping_intent: Option<SplitPrGrouping>,
127 execution_profile: Option<String>,
128 target_parallel_width: Option<usize>,
129}
130
131#[derive(Debug, Serialize)]
132struct Output {
133 file: String,
134 scope: String,
135 sprint: Option<i32>,
136 pr_grouping: String,
137 strategy: String,
138 records: Vec<OutputRecord>,
139 #[serde(skip_serializing_if = "Option::is_none")]
140 explain: Option<Vec<ExplainSprint>>,
141}
142
143#[derive(Debug, Serialize, PartialEq, Eq)]
144struct OutputRecord {
145 task_id: String,
146 summary: String,
147 pr_group: String,
148}
149
150#[derive(Debug, Serialize, PartialEq, Eq)]
151struct ExplainSprint {
152 sprint: i32,
153 #[serde(skip_serializing_if = "Option::is_none")]
154 target_parallel_width: Option<usize>,
155 #[serde(skip_serializing_if = "Option::is_none")]
156 execution_profile: Option<String>,
157 #[serde(skip_serializing_if = "Option::is_none")]
158 pr_grouping_intent: Option<String>,
159 groups: Vec<ExplainGroup>,
160}
161
162#[derive(Debug, Serialize, PartialEq, Eq)]
163struct ExplainGroup {
164 pr_group: String,
165 task_ids: Vec<String>,
166 anchor: String,
167}
168
169pub fn run(args: &[String]) -> i32 {
170 let mut file: Option<String> = None;
171 let mut scope = String::from("sprint");
172 let mut sprint: Option<String> = None;
173 let mut pr_grouping: Option<String> = None;
174 let mut pr_group_entries: Vec<String> = Vec::new();
175 let mut strategy = String::from("deterministic");
176 let mut explain = false;
177 let mut owner_prefix = String::from("subagent");
178 let mut branch_prefix = String::from("issue");
179 let mut worktree_prefix = String::from("issue__");
180 let mut format = String::from("json");
181
182 let mut i = 0usize;
183 while i < args.len() {
184 let raw_arg = args[i].as_str();
185 let (flag, inline_value) = split_value_arg(raw_arg);
186 match flag {
187 "--file" => {
188 let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--file") else {
189 return die("missing value for --file");
190 };
191 file = Some(v);
192 i = next_i;
193 }
194 "--scope" => {
195 let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--scope") else {
196 return die("missing value for --scope");
197 };
198 scope = v;
199 i = next_i;
200 }
201 "--sprint" => {
202 let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--sprint")
203 else {
204 return die("missing value for --sprint");
205 };
206 sprint = Some(v);
207 i = next_i;
208 }
209 "--pr-grouping" => {
210 let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--pr-grouping")
211 else {
212 return die("missing value for --pr-grouping");
213 };
214 pr_grouping = Some(v);
215 i = next_i;
216 }
217 "--pr-group" => {
218 let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--pr-group")
219 else {
220 return die("missing value for --pr-group");
221 };
222 pr_group_entries.push(v);
223 i = next_i;
224 }
225 "--strategy" => {
226 let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--strategy")
227 else {
228 return die("missing value for --strategy");
229 };
230 strategy = v;
231 i = next_i;
232 }
233 "--explain" => {
234 if inline_value.is_some() {
235 return die("unexpected value for --explain");
236 }
237 explain = true;
238 i += 1;
239 }
240 "--owner-prefix" => {
241 let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--owner-prefix")
242 else {
243 return die("missing value for --owner-prefix");
244 };
245 owner_prefix = v;
246 i = next_i;
247 }
248 "--branch-prefix" => {
249 let Ok((v, next_i)) =
250 consume_option_value(args, i, inline_value, "--branch-prefix")
251 else {
252 return die("missing value for --branch-prefix");
253 };
254 branch_prefix = v;
255 i = next_i;
256 }
257 "--worktree-prefix" => {
258 let Ok((v, next_i)) =
259 consume_option_value(args, i, inline_value, "--worktree-prefix")
260 else {
261 return die("missing value for --worktree-prefix");
262 };
263 worktree_prefix = v;
264 i = next_i;
265 }
266 "--format" => {
267 let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--format")
268 else {
269 return die("missing value for --format");
270 };
271 format = v;
272 i = next_i;
273 }
274 "-h" | "--help" => {
275 if inline_value.is_some() {
276 return die(&format!("unknown argument: {raw_arg}"));
277 }
278 print_usage();
279 return 0;
280 }
281 _ => {
282 return die(&format!("unknown argument: {raw_arg}"));
283 }
284 }
285 }
286
287 let Some(file_arg) = file else {
288 print_usage();
289 return 2;
290 };
291 let Some(mut pr_grouping) = pr_grouping else {
292 print_usage();
293 return 2;
294 };
295
296 if pr_grouping == "per-spring" {
297 pr_grouping = String::from("per-sprint");
298 }
299 if scope != "plan" && scope != "sprint" {
300 return die(&format!(
301 "invalid --scope (expected plan|sprint): {}",
302 crate::repr::py_repr(&scope)
303 ));
304 }
305 if pr_grouping != "per-sprint" && pr_grouping != "group" {
306 return die(&format!(
307 "invalid --pr-grouping (expected per-sprint|group): {}",
308 crate::repr::py_repr(&pr_grouping)
309 ));
310 }
311 if strategy != "deterministic" && strategy != "auto" {
312 return die(&format!(
313 "invalid --strategy (expected deterministic|auto): {}",
314 crate::repr::py_repr(&strategy)
315 ));
316 }
317 if format != "json" && format != "tsv" {
318 return die(&format!(
319 "invalid --format (expected json|tsv): {}",
320 crate::repr::py_repr(&format)
321 ));
322 }
323
324 let sprint_num = if scope == "sprint" {
325 let Some(raw) = sprint.as_deref() else {
326 return die("--sprint is required when --scope sprint");
327 };
328 match raw.parse::<i32>() {
329 Ok(v) if v > 0 => Some(v),
330 _ => {
331 eprintln!(
332 "error: invalid --sprint (expected positive int): {}",
333 crate::repr::py_repr(raw)
334 );
335 return 2;
336 }
337 }
338 } else {
339 None
340 };
341
342 if pr_grouping == "group" && strategy == "deterministic" && pr_group_entries.is_empty() {
345 return die(
346 "--pr-grouping group requires at least one --pr-group <task-or-plan-id>=<group> entry",
347 );
348 }
349 if pr_grouping != "group" && !pr_group_entries.is_empty() {
350 return die("--pr-group can only be used when --pr-grouping group");
351 }
352
353 let repo_root = crate::repo_root::detect();
354 let display_path = file_arg.clone();
355 let read_path = resolve_repo_relative(&repo_root, Path::new(&file_arg));
356 if !read_path.is_file() {
357 eprintln!("error: plan file not found: {display_path}");
358 return 1;
359 }
360
361 let plan: Plan;
362 let parse_errors: Vec<String>;
363 match parse_plan_with_display(&read_path, &display_path) {
364 Ok((p, errs)) => {
365 plan = p;
366 parse_errors = errs;
367 }
368 Err(err) => {
369 eprintln!("error: {display_path}: {err}");
370 return 1;
371 }
372 }
373 if !parse_errors.is_empty() {
374 for err in parse_errors {
375 eprintln!("error: {display_path}: error: {err}");
376 }
377 return 1;
378 }
379
380 let split_scope = match scope.as_str() {
381 "plan" => SplitScope::Plan,
382 "sprint" => {
383 let Some(want) = sprint_num else {
384 return die("internal error: missing sprint number");
385 };
386 SplitScope::Sprint(want)
387 }
388 _ => return die("internal error: invalid scope"),
389 };
390 let Some(grouping_mode) = SplitPrGrouping::from_cli(&pr_grouping) else {
391 return die("internal error: invalid pr-grouping");
392 };
393 let Some(strategy_mode) = SplitPrStrategy::from_cli(&strategy) else {
394 return die("internal error: invalid strategy");
395 };
396
397 let selected_sprints = match select_sprints_for_scope(&plan, split_scope) {
398 Ok(sprints) => sprints,
399 Err(err) => {
400 eprintln!("error: {display_path}: {err}");
401 return 1;
402 }
403 };
404 let sprint_hints = sprint_hints(&selected_sprints);
405
406 let options = SplitPlanOptions {
407 pr_grouping: grouping_mode,
408 strategy: strategy_mode,
409 pr_group_entries,
410 owner_prefix,
411 branch_prefix,
412 worktree_prefix,
413 };
414 let split_records = match build_split_plan_records(&selected_sprints, &options) {
415 Ok(records) => records,
416 Err(err) => {
417 eprintln!("error: {err}");
418 return 1;
419 }
420 };
421 let explain_payload = if explain {
422 Some(build_explain_payload(
423 &split_records,
424 &sprint_hints,
425 options.pr_grouping,
426 ))
427 } else {
428 None
429 };
430
431 let out_records: Vec<OutputRecord> = split_records
432 .iter()
433 .map(OutputRecord::from_split_record)
434 .collect();
435
436 if format == "tsv" {
437 print_tsv(&out_records);
438 return 0;
439 }
440
441 let output = Output {
442 file: path_to_posix(&maybe_relativize(&read_path, &repo_root)),
443 scope: scope.clone(),
444 sprint: sprint_num,
445 pr_grouping,
446 strategy,
447 records: out_records,
448 explain: explain_payload,
449 };
450 match serde_json::to_string(&output) {
451 Ok(json) => {
452 println!("{json}");
453 0
454 }
455 Err(err) => {
456 eprintln!("error: failed to encode JSON: {err}");
457 1
458 }
459 }
460}
461
462impl OutputRecord {
463 fn from_split_record(record: &SplitPlanRecord) -> Self {
464 Self {
465 task_id: record.task_id.clone(),
466 summary: record.summary.clone(),
467 pr_group: record.pr_group.clone(),
468 }
469 }
470}
471
472pub fn select_sprints_for_scope(plan: &Plan, scope: SplitScope) -> Result<Vec<Sprint>, String> {
473 let selected = match scope {
474 SplitScope::Plan => plan
475 .sprints
476 .iter()
477 .filter(|s| !s.tasks.is_empty())
478 .cloned()
479 .collect::<Vec<_>>(),
480 SplitScope::Sprint(want) => match plan.sprints.iter().find(|s| s.number == want) {
481 Some(sprint) if !sprint.tasks.is_empty() => vec![sprint.clone()],
482 Some(_) => return Err(format!("sprint {want} has no tasks")),
483 None => return Err(format!("sprint not found: {want}")),
484 },
485 };
486 if selected.is_empty() {
487 return Err("selected scope has no tasks".to_string());
488 }
489 Ok(selected)
490}
491
492pub fn build_split_plan_records(
493 selected_sprints: &[Sprint],
494 options: &SplitPlanOptions,
495) -> Result<Vec<SplitPlanRecord>, String> {
496 if selected_sprints.is_empty() {
497 return Err("selected scope has no tasks".to_string());
498 }
499 if options.pr_grouping == SplitPrGrouping::Group
500 && options.strategy == SplitPrStrategy::Deterministic
501 && options.pr_group_entries.is_empty()
502 {
503 return Err(
504 "--pr-grouping group requires at least one --pr-group <task-or-plan-id>=<group> entry"
505 .to_string(),
506 );
507 }
508 if options.pr_grouping != SplitPrGrouping::Group && !options.pr_group_entries.is_empty() {
509 return Err("--pr-group can only be used when --pr-grouping group".to_string());
510 }
511
512 let sprint_hints = sprint_hints(selected_sprints);
513
514 let mut records: Vec<Record> = Vec::new();
515 for sprint in selected_sprints {
516 for (idx, task) in sprint.tasks.iter().enumerate() {
517 let ordinal = idx + 1;
518 let task_id = format!("S{}T{ordinal}", sprint.number);
519 let plan_task_id = task.id.trim().to_string();
520 let summary = normalize_spaces(if task.name.trim().is_empty() {
521 if plan_task_id.is_empty() {
522 format!("sprint-{}-task-{ordinal}", sprint.number)
523 } else {
524 plan_task_id.clone()
525 }
526 } else {
527 task.name.trim().to_string()
528 });
529 let deps: Vec<String> = task
530 .dependencies
531 .clone()
532 .unwrap_or_default()
533 .into_iter()
534 .map(|d| d.trim().to_string())
535 .filter(|d| !d.is_empty())
536 .filter(|d| !is_placeholder(d))
537 .collect();
538 let location_paths: Vec<String> = task
539 .location
540 .iter()
541 .map(|p| p.trim().to_string())
542 .filter(|p| !p.is_empty())
543 .filter(|p| !is_placeholder(p))
544 .collect();
545 let complexity = match task.complexity {
546 Some(value) if value > 0 => value,
547 _ => 5,
548 };
549
550 records.push(Record {
551 task_id,
552 plan_task_id,
553 sprint: sprint.number,
554 summary,
555 complexity,
556 location_paths,
557 dependency_keys: deps,
558 pr_group: String::new(),
559 });
560 }
561 }
562
563 if records.is_empty() {
564 return Err("selected scope has no tasks".to_string());
565 }
566
567 let mut group_assignments: HashMap<String, String> = HashMap::new();
568 let mut assignment_sources: Vec<String> = Vec::new();
569 for entry in &options.pr_group_entries {
570 let trimmed = entry.trim();
571 if trimmed.is_empty() {
572 continue;
573 }
574 let Some((raw_key, raw_group)) = trimmed.split_once('=') else {
575 return Err("--pr-group must use <task-or-plan-id>=<group> format".to_string());
576 };
577 let key = raw_key.trim();
578 let group = normalize_token(raw_group.trim(), "", 48);
579 if key.is_empty() || group.is_empty() {
580 return Err("--pr-group must include both task key and group".to_string());
581 }
582 assignment_sources.push(key.to_string());
583 group_assignments.insert(key.to_ascii_lowercase(), group);
584 }
585
586 if options.pr_grouping == SplitPrGrouping::Group && !assignment_sources.is_empty() {
587 let mut known: HashMap<String, bool> = HashMap::new();
588 for rec in &records {
589 known.insert(rec.task_id.to_ascii_lowercase(), true);
590 if !rec.plan_task_id.is_empty() {
591 known.insert(rec.plan_task_id.to_ascii_lowercase(), true);
592 }
593 }
594
595 let unknown: Vec<String> = assignment_sources
596 .iter()
597 .filter(|key| !known.contains_key(&key.to_ascii_lowercase()))
598 .cloned()
599 .collect();
600 if !unknown.is_empty() {
601 return Err(format!(
602 "--pr-group references unknown task keys: {}",
603 unknown
604 .iter()
605 .take(5)
606 .cloned()
607 .collect::<Vec<_>>()
608 .join(", ")
609 ));
610 }
611 }
612
613 if options.pr_grouping == SplitPrGrouping::Group {
614 let mut missing: Vec<String> = Vec::new();
615 for rec in &mut records {
616 rec.pr_group.clear();
617 for key in [&rec.task_id, &rec.plan_task_id] {
618 if key.is_empty() {
619 continue;
620 }
621 if let Some(v) = group_assignments.get(&key.to_ascii_lowercase()) {
622 rec.pr_group = v.to_string();
623 break;
624 }
625 }
626 if rec.pr_group.is_empty() {
627 missing.push(rec.task_id.clone());
628 }
629 }
630 if options.strategy == SplitPrStrategy::Deterministic {
631 if !missing.is_empty() {
632 return Err(format!(
633 "--pr-grouping group requires explicit mapping for every task; missing: {}",
634 missing
635 .iter()
636 .take(8)
637 .cloned()
638 .collect::<Vec<_>>()
639 .join(", ")
640 ));
641 }
642 } else if !missing.is_empty() {
643 assign_auto_groups(&mut records, &sprint_hints);
644 }
645 } else {
646 for rec in &mut records {
647 rec.pr_group =
648 normalize_token(&format!("s{}", rec.sprint), &format!("s{}", rec.sprint), 48);
649 }
650 }
651
652 let mut out: Vec<SplitPlanRecord> = Vec::new();
653 for rec in records {
654 out.push(SplitPlanRecord {
655 task_id: rec.task_id,
656 sprint: rec.sprint,
657 summary: rec.summary,
658 pr_group: rec.pr_group,
659 });
660 }
661
662 Ok(out)
663}
664
665#[derive(Debug)]
666struct AutoMergeCandidate {
667 i: usize,
668 j: usize,
669 score_key: i64,
670 key_a: String,
671 key_b: String,
672}
673
674#[derive(Debug)]
675struct ForcedMergeCandidate {
676 i: usize,
677 j: usize,
678 span: usize,
679 complexity: i32,
680 key_a: String,
681 key_b: String,
682}
683
684fn assign_auto_groups(records: &mut [Record], hints: &HashMap<i32, AutoSprintHint>) {
685 let mut sprint_to_indices: BTreeMap<i32, Vec<usize>> = BTreeMap::new();
686 for (idx, rec) in records.iter().enumerate() {
687 if rec.pr_group.is_empty() {
688 sprint_to_indices.entry(rec.sprint).or_default().push(idx);
689 }
690 }
691
692 for (sprint, indices) in sprint_to_indices {
693 let hint = hints.get(&sprint).cloned().unwrap_or_default();
694 let assignments = auto_groups_for_sprint(records, sprint, &indices, &hint);
695 for (idx, group) in assignments {
696 if let Some(rec) = records.get_mut(idx)
697 && rec.pr_group.is_empty()
698 {
699 rec.pr_group = group;
700 }
701 }
702 }
703}
704
705fn auto_groups_for_sprint(
706 records: &[Record],
707 sprint: i32,
708 indices: &[usize],
709 hint: &AutoSprintHint,
710) -> BTreeMap<usize, String> {
711 let mut lookup: HashMap<String, usize> = HashMap::new();
712 for idx in indices {
713 let rec = &records[*idx];
714 lookup.insert(rec.task_id.to_ascii_lowercase(), *idx);
715 if !rec.plan_task_id.is_empty() {
716 lookup.insert(rec.plan_task_id.to_ascii_lowercase(), *idx);
717 }
718 }
719
720 let mut deps: BTreeMap<usize, BTreeSet<usize>> = BTreeMap::new();
721 let mut paths: BTreeMap<usize, BTreeSet<String>> = BTreeMap::new();
722 for idx in indices {
723 let rec = &records[*idx];
724 let mut resolved_deps: BTreeSet<usize> = BTreeSet::new();
725 for dep in &rec.dependency_keys {
726 let dep_key = dep.trim().to_ascii_lowercase();
727 if dep_key.is_empty() {
728 continue;
729 }
730 if let Some(dep_idx) = lookup.get(&dep_key)
731 && dep_idx != idx
732 {
733 resolved_deps.insert(*dep_idx);
734 }
735 }
736 deps.insert(*idx, resolved_deps);
737
738 let normalized_paths: BTreeSet<String> = rec
739 .location_paths
740 .iter()
741 .map(|path| normalize_location_path(path))
742 .filter(|path| !path.is_empty())
743 .collect();
744 paths.insert(*idx, normalized_paths);
745 }
746
747 let batch_by_idx = compute_batch_index(records, indices, &deps);
748 let mut parent: HashMap<usize, usize> = indices.iter().copied().map(|idx| (idx, idx)).collect();
749
750 let mut by_batch: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
751 for idx in indices {
752 let batch = batch_by_idx.get(idx).copied().unwrap_or(0);
753 by_batch.entry(batch).or_default().push(*idx);
754 }
755
756 for members in by_batch.values_mut() {
757 members.sort_by_key(|idx| task_sort_key(records, *idx));
758
759 let mut path_to_members: BTreeMap<String, Vec<usize>> = BTreeMap::new();
760 for idx in members {
761 for path in paths.get(idx).into_iter().flatten() {
762 path_to_members.entry(path.clone()).or_default().push(*idx);
763 }
764 }
765 for overlap_members in path_to_members.values() {
766 if overlap_members.len() < 2 {
767 continue;
768 }
769 let first = overlap_members[0];
770 for other in overlap_members.iter().skip(1) {
771 uf_union(&mut parent, first, *other);
772 }
773 }
774 }
775
776 let mut grouped: BTreeMap<usize, BTreeSet<usize>> = BTreeMap::new();
777 for idx in indices {
778 let root = uf_find(&mut parent, *idx);
779 grouped.entry(root).or_default().insert(*idx);
780 }
781 let mut groups: Vec<BTreeSet<usize>> = grouped.into_values().collect();
782 let target_group_count = desired_auto_group_count(indices.len(), hint);
783
784 loop {
785 if let Some(target) = target_group_count
786 && groups.len() <= target
787 {
788 break;
789 }
790
791 let mut candidates: Vec<AutoMergeCandidate> = Vec::new();
792 for i in 0..groups.len() {
793 for j in (i + 1)..groups.len() {
794 let merged_complexity =
795 group_complexity(records, &groups[i]) + group_complexity(records, &groups[j]);
796 if merged_complexity > 20 {
797 continue;
798 }
799
800 let dep_cross = dependency_cross_edges(&deps, &groups[i], &groups[j]);
801 let overlap_paths = overlap_path_count(&paths, &groups[i], &groups[j]);
802 let min_group_size = groups[i].len().min(groups[j].len()).max(1) as f64;
803 let dep_affinity = ((dep_cross as f64) / min_group_size).min(1.0);
804 let ovl_affinity = ((overlap_paths as f64) / 2.0).min(1.0);
805 let size_fit = (1.0 - ((merged_complexity as f64 - 12.0).abs() / 12.0)).max(0.0);
806 let span = group_span(&batch_by_idx, &groups[i], &groups[j]);
807 let serial_penalty = ((span as f64 - 1.0).max(0.0)) / 3.0;
808 let oversize_penalty = ((merged_complexity as f64 - 20.0).max(0.0)) / 20.0;
809
810 let score = (0.45 * dep_affinity) + (0.35 * ovl_affinity) + (0.20 * size_fit)
811 - (0.25 * serial_penalty)
812 - (0.45 * oversize_penalty);
813 if score < 0.30 {
814 continue;
815 }
816
817 let mut key_a = group_min_task_key(records, &groups[i]);
818 let mut key_b = group_min_task_key(records, &groups[j]);
819 if key_b < key_a {
820 std::mem::swap(&mut key_a, &mut key_b);
821 }
822 candidates.push(AutoMergeCandidate {
823 i,
824 j,
825 score_key: (score * 1_000_000.0).round() as i64,
826 key_a,
827 key_b,
828 });
829 }
830 }
831
832 if candidates.is_empty() {
833 if let Some(target) = target_group_count
834 && groups.len() > target
835 && let Some(chosen) = pick_forced_merge(records, &batch_by_idx, &groups)
836 {
837 let mut merged = groups[chosen.i].clone();
838 merged.extend(groups[chosen.j].iter().copied());
839 groups[chosen.i] = merged;
840 groups.remove(chosen.j);
841 continue;
842 }
843 break;
844 }
845
846 candidates.sort_by(|a, b| {
847 b.score_key
848 .cmp(&a.score_key)
849 .then_with(|| a.key_a.cmp(&b.key_a))
850 .then_with(|| a.key_b.cmp(&b.key_b))
851 .then_with(|| a.i.cmp(&b.i))
852 .then_with(|| a.j.cmp(&b.j))
853 });
854 let chosen = &candidates[0];
855
856 let mut merged = groups[chosen.i].clone();
857 merged.extend(groups[chosen.j].iter().copied());
858 groups[chosen.i] = merged;
859 groups.remove(chosen.j);
860 }
861
862 groups.sort_by(|a, b| {
863 group_min_batch(&batch_by_idx, a)
864 .cmp(&group_min_batch(&batch_by_idx, b))
865 .then_with(|| group_min_task_key(records, a).cmp(&group_min_task_key(records, b)))
866 });
867
868 let mut out: BTreeMap<usize, String> = BTreeMap::new();
869 for (idx, group) in groups.iter().enumerate() {
870 let fallback = format!("s{sprint}-auto-g{}", idx + 1);
871 let group_key = normalize_token(&fallback, &fallback, 48);
872 for member in group {
873 out.insert(*member, group_key.clone());
874 }
875 }
876 out
877}
878
879fn compute_batch_index(
880 records: &[Record],
881 indices: &[usize],
882 deps: &BTreeMap<usize, BTreeSet<usize>>,
883) -> BTreeMap<usize, usize> {
884 let mut in_deg: HashMap<usize, usize> = indices.iter().copied().map(|idx| (idx, 0)).collect();
885 let mut reverse: HashMap<usize, BTreeSet<usize>> = indices
886 .iter()
887 .copied()
888 .map(|idx| (idx, BTreeSet::new()))
889 .collect();
890
891 for idx in indices {
892 for dep in deps.get(idx).cloned().unwrap_or_default() {
893 if !in_deg.contains_key(&dep) {
894 continue;
895 }
896 if let Some(value) = in_deg.get_mut(idx) {
897 *value += 1;
898 }
899 if let Some(children) = reverse.get_mut(&dep) {
900 children.insert(*idx);
901 }
902 }
903 }
904
905 let mut remaining: BTreeSet<usize> = indices.iter().copied().collect();
906 let mut batch_by_idx: BTreeMap<usize, usize> = BTreeMap::new();
907 let mut layer = 0usize;
908 let mut ready: VecDeque<usize> = {
909 let mut start: Vec<usize> = indices
910 .iter()
911 .copied()
912 .filter(|idx| in_deg.get(idx).copied().unwrap_or(0) == 0)
913 .collect();
914 start.sort_by_key(|idx| task_sort_key(records, *idx));
915 start.into_iter().collect()
916 };
917
918 while !remaining.is_empty() {
919 let mut batch_members: Vec<usize> = ready.drain(..).collect();
920 batch_members.sort_by_key(|idx| task_sort_key(records, *idx));
921
922 if batch_members.is_empty() {
923 let mut cycle_members: Vec<usize> = remaining.iter().copied().collect();
924 cycle_members.sort_by_key(|idx| task_sort_key(records, *idx));
925 for idx in cycle_members {
926 remaining.remove(&idx);
927 batch_by_idx.insert(idx, layer);
928 }
929 break;
930 }
931
932 for idx in &batch_members {
933 remaining.remove(idx);
934 batch_by_idx.insert(*idx, layer);
935 }
936
937 let mut next: Vec<usize> = Vec::new();
938 for idx in batch_members {
939 for child in reverse.get(&idx).cloned().unwrap_or_default() {
940 if let Some(value) = in_deg.get_mut(&child) {
941 *value = value.saturating_sub(1);
942 if *value == 0 && remaining.contains(&child) {
943 next.push(child);
944 }
945 }
946 }
947 }
948 next.sort_by_key(|idx| task_sort_key(records, *idx));
949 next.dedup();
950 ready.extend(next);
951 layer += 1;
952 }
953
954 for idx in indices {
955 batch_by_idx.entry(*idx).or_insert(0);
956 }
957 batch_by_idx
958}
959
960fn task_sort_key(records: &[Record], idx: usize) -> (String, String) {
961 let rec = &records[idx];
962 let primary = if rec.plan_task_id.trim().is_empty() {
963 rec.task_id.to_ascii_lowercase()
964 } else {
965 rec.plan_task_id.to_ascii_lowercase()
966 };
967 (primary, rec.task_id.to_ascii_lowercase())
968}
969
970fn normalize_location_path(path: &str) -> String {
971 path.split_whitespace()
972 .collect::<Vec<_>>()
973 .join(" ")
974 .to_ascii_lowercase()
975}
976
977fn group_complexity(records: &[Record], group: &BTreeSet<usize>) -> i32 {
978 group
979 .iter()
980 .map(|idx| records[*idx].complexity.max(1))
981 .sum::<i32>()
982}
983
984fn group_min_task_key(records: &[Record], group: &BTreeSet<usize>) -> String {
985 group
986 .iter()
987 .map(|idx| task_sort_key(records, *idx).0)
988 .min()
989 .unwrap_or_default()
990}
991
992fn group_min_batch(batch_by_idx: &BTreeMap<usize, usize>, group: &BTreeSet<usize>) -> usize {
993 group
994 .iter()
995 .filter_map(|idx| batch_by_idx.get(idx).copied())
996 .min()
997 .unwrap_or(0)
998}
999
1000fn group_span(
1001 batch_by_idx: &BTreeMap<usize, usize>,
1002 left: &BTreeSet<usize>,
1003 right: &BTreeSet<usize>,
1004) -> usize {
1005 let mut min_batch = usize::MAX;
1006 let mut max_batch = 0usize;
1007 for idx in left.union(right) {
1008 let batch = batch_by_idx.get(idx).copied().unwrap_or(0);
1009 min_batch = min_batch.min(batch);
1010 max_batch = max_batch.max(batch);
1011 }
1012 if min_batch == usize::MAX {
1013 0
1014 } else {
1015 max_batch.saturating_sub(min_batch)
1016 }
1017}
1018
1019fn dependency_cross_edges(
1020 deps: &BTreeMap<usize, BTreeSet<usize>>,
1021 left: &BTreeSet<usize>,
1022 right: &BTreeSet<usize>,
1023) -> usize {
1024 let mut count = 0usize;
1025 for src in left {
1026 if let Some(edges) = deps.get(src) {
1027 count += edges.iter().filter(|dep| right.contains(dep)).count();
1028 }
1029 }
1030 for src in right {
1031 if let Some(edges) = deps.get(src) {
1032 count += edges.iter().filter(|dep| left.contains(dep)).count();
1033 }
1034 }
1035 count
1036}
1037
1038fn overlap_path_count(
1039 paths: &BTreeMap<usize, BTreeSet<String>>,
1040 left: &BTreeSet<usize>,
1041 right: &BTreeSet<usize>,
1042) -> usize {
1043 let mut left_paths: BTreeSet<String> = BTreeSet::new();
1044 let mut right_paths: BTreeSet<String> = BTreeSet::new();
1045 for idx in left {
1046 for path in paths.get(idx).into_iter().flatten() {
1047 left_paths.insert(path.clone());
1048 }
1049 }
1050 for idx in right {
1051 for path in paths.get(idx).into_iter().flatten() {
1052 right_paths.insert(path.clone());
1053 }
1054 }
1055 left_paths.intersection(&right_paths).count()
1056}
1057
1058fn desired_auto_group_count(max_groups: usize, hint: &AutoSprintHint) -> Option<usize> {
1059 if max_groups == 0 {
1060 return None;
1061 }
1062 let preferred = hint
1063 .target_parallel_width
1064 .or_else(|| {
1065 if hint.execution_profile.as_deref() == Some("serial") {
1066 Some(1usize)
1067 } else {
1068 None
1069 }
1070 })
1071 .or_else(|| {
1072 if hint.pr_grouping_intent == Some(SplitPrGrouping::PerSprint) {
1073 Some(1usize)
1074 } else {
1075 None
1076 }
1077 })?;
1078 Some(preferred.clamp(1, max_groups))
1079}
1080
1081fn pick_forced_merge(
1082 records: &[Record],
1083 batch_by_idx: &BTreeMap<usize, usize>,
1084 groups: &[BTreeSet<usize>],
1085) -> Option<ForcedMergeCandidate> {
1086 let mut chosen: Option<ForcedMergeCandidate> = None;
1087 for i in 0..groups.len() {
1088 for j in (i + 1)..groups.len() {
1089 let mut key_a = group_min_task_key(records, &groups[i]);
1090 let mut key_b = group_min_task_key(records, &groups[j]);
1091 if key_b < key_a {
1092 std::mem::swap(&mut key_a, &mut key_b);
1093 }
1094 let candidate = ForcedMergeCandidate {
1095 i,
1096 j,
1097 span: group_span(batch_by_idx, &groups[i], &groups[j]),
1098 complexity: group_complexity(records, &groups[i])
1099 + group_complexity(records, &groups[j]),
1100 key_a,
1101 key_b,
1102 };
1103 let replace = match &chosen {
1104 None => true,
1105 Some(best) => {
1106 (
1107 candidate.span,
1108 candidate.complexity,
1109 &candidate.key_a,
1110 &candidate.key_b,
1111 candidate.i,
1112 candidate.j,
1113 ) < (
1114 best.span,
1115 best.complexity,
1116 &best.key_a,
1117 &best.key_b,
1118 best.i,
1119 best.j,
1120 )
1121 }
1122 };
1123 if replace {
1124 chosen = Some(candidate);
1125 }
1126 }
1127 }
1128 chosen
1129}
1130
1131fn sprint_hints(selected_sprints: &[Sprint]) -> HashMap<i32, AutoSprintHint> {
1132 let mut hints: HashMap<i32, AutoSprintHint> = HashMap::new();
1133 for sprint in selected_sprints {
1134 let pr_grouping_intent = sprint
1135 .metadata
1136 .pr_grouping_intent
1137 .as_deref()
1138 .and_then(SplitPrGrouping::from_cli);
1139 let execution_profile = sprint.metadata.execution_profile.clone();
1140 let target_parallel_width = sprint.metadata.parallel_width;
1141 hints.insert(
1142 sprint.number,
1143 AutoSprintHint {
1144 pr_grouping_intent,
1145 execution_profile,
1146 target_parallel_width,
1147 },
1148 );
1149 }
1150 hints
1151}
1152
1153fn build_explain_payload(
1154 records: &[SplitPlanRecord],
1155 hints: &HashMap<i32, AutoSprintHint>,
1156 pr_grouping: SplitPrGrouping,
1157) -> Vec<ExplainSprint> {
1158 let mut grouped: BTreeMap<i32, BTreeMap<String, Vec<String>>> = BTreeMap::new();
1159 for record in records {
1160 grouped
1161 .entry(record.sprint)
1162 .or_default()
1163 .entry(record.pr_group.clone())
1164 .or_default()
1165 .push(record.task_id.clone());
1166 }
1167
1168 let mut out: Vec<ExplainSprint> = Vec::new();
1169 for (sprint, per_group) in grouped {
1170 let hint = hints.get(&sprint).cloned().unwrap_or_default();
1171 let groups = per_group
1172 .into_iter()
1173 .map(|(pr_group, task_ids)| {
1174 let anchor = task_ids.first().cloned().unwrap_or_default();
1175 ExplainGroup {
1176 pr_group,
1177 task_ids,
1178 anchor,
1179 }
1180 })
1181 .collect::<Vec<_>>();
1182 out.push(ExplainSprint {
1183 sprint,
1184 target_parallel_width: hint.target_parallel_width,
1185 execution_profile: hint.execution_profile,
1186 pr_grouping_intent: hint
1187 .pr_grouping_intent
1188 .map(|value| value.as_str().to_string())
1189 .or_else(|| Some(pr_grouping.as_str().to_string())),
1190 groups,
1191 });
1192 }
1193 out
1194}
1195
1196fn split_value_arg(raw: &str) -> (&str, Option<&str>) {
1197 if raw.starts_with("--")
1198 && let Some((flag, value)) = raw.split_once('=')
1199 && !flag.is_empty()
1200 {
1201 return (flag, Some(value));
1202 }
1203 (raw, None)
1204}
1205
1206fn consume_option_value(
1207 args: &[String],
1208 idx: usize,
1209 inline_value: Option<&str>,
1210 _flag: &str,
1211) -> Result<(String, usize), ()> {
1212 match inline_value {
1213 Some(value) => {
1214 if value.is_empty() {
1215 Err(())
1216 } else {
1217 Ok((value.to_string(), idx + 1))
1218 }
1219 }
1220 None => {
1221 let Some(value) = args.get(idx + 1) else {
1222 return Err(());
1223 };
1224 if value.is_empty() {
1225 Err(())
1226 } else {
1227 Ok((value.to_string(), idx + 2))
1228 }
1229 }
1230 }
1231}
1232
1233fn uf_find(parent: &mut HashMap<usize, usize>, node: usize) -> usize {
1234 let parent_node = parent.get(&node).copied().unwrap_or(node);
1235 if parent_node == node {
1236 return node;
1237 }
1238 let root = uf_find(parent, parent_node);
1239 parent.insert(node, root);
1240 root
1241}
1242
1243fn uf_union(parent: &mut HashMap<usize, usize>, left: usize, right: usize) {
1244 let left_root = uf_find(parent, left);
1245 let right_root = uf_find(parent, right);
1246 if left_root == right_root {
1247 return;
1248 }
1249 if left_root < right_root {
1250 parent.insert(right_root, left_root);
1251 } else {
1252 parent.insert(left_root, right_root);
1253 }
1254}
1255
1256fn print_tsv(records: &[OutputRecord]) {
1257 println!("# task_id\tsummary\tpr_group");
1258 for rec in records {
1259 println!(
1260 "{}\t{}\t{}",
1261 rec.task_id.replace('\t', " "),
1262 rec.summary.replace('\t', " "),
1263 rec.pr_group.replace('\t', " "),
1264 );
1265 }
1266}
1267
1268fn print_usage() {
1269 let _ = std::io::stderr().write_all(USAGE.as_bytes());
1270}
1271
1272fn die(msg: &str) -> i32 {
1273 eprintln!("split-prs: {msg}");
1274 2
1275}
1276
1277fn resolve_repo_relative(repo_root: &Path, path: &Path) -> PathBuf {
1278 if path.is_absolute() {
1279 return path.to_path_buf();
1280 }
1281 repo_root.join(path)
1282}
1283
1284fn maybe_relativize(path: &Path, repo_root: &Path) -> PathBuf {
1285 let Ok(path_abs) = path.canonicalize() else {
1286 return path.to_path_buf();
1287 };
1288 let Ok(root_abs) = repo_root.canonicalize() else {
1289 return path_abs;
1290 };
1291 match path_abs.strip_prefix(&root_abs) {
1292 Ok(rel) => rel.to_path_buf(),
1293 Err(_) => path_abs,
1294 }
1295}
1296
1297fn path_to_posix(path: &Path) -> String {
1298 path.to_string_lossy()
1299 .replace(std::path::MAIN_SEPARATOR, "/")
1300}
1301
1302fn normalize_spaces(value: String) -> String {
1303 let joined = value.split_whitespace().collect::<Vec<_>>().join(" ");
1304 if joined.is_empty() {
1305 String::from("task")
1306 } else {
1307 joined
1308 }
1309}
1310
1311fn normalize_token(value: &str, fallback: &str, max_len: usize) -> String {
1312 let mut out = String::new();
1313 let mut last_dash = false;
1314 for ch in value.chars().flat_map(char::to_lowercase) {
1315 if ch.is_ascii_alphanumeric() {
1316 out.push(ch);
1317 last_dash = false;
1318 } else if !last_dash {
1319 out.push('-');
1320 last_dash = true;
1321 }
1322 }
1323 let normalized = out.trim_matches('-').to_string();
1324 let mut final_token = if normalized.is_empty() {
1325 fallback.to_string()
1326 } else {
1327 normalized
1328 };
1329 if final_token.len() > max_len {
1330 final_token.truncate(max_len);
1331 final_token = final_token.trim_matches('-').to_string();
1332 }
1333 final_token
1334}
1335
1336fn is_placeholder(value: &str) -> bool {
1337 let token = value.trim().to_ascii_lowercase();
1338 if matches!(token.as_str(), "" | "-" | "none" | "n/a" | "na" | "...") {
1339 return true;
1340 }
1341 if token.starts_with('<') && token.ends_with('>') {
1342 return true;
1343 }
1344 token.contains("task ids")
1345}
1346
1347#[cfg(test)]
1348mod tests {
1349 use super::{is_placeholder, normalize_token};
1350 use pretty_assertions::assert_eq;
1351
1352 #[test]
1353 fn normalize_token_collapses_non_alnum_and_limits_length() {
1354 assert_eq!(
1355 normalize_token("Sprint 2 :: Shared Pair", "fallback", 20),
1356 "sprint-2-shared-pair"
1357 );
1358 assert_eq!(normalize_token("!!!!", "fallback-value", 8), "fallback");
1359 }
1360
1361 #[test]
1362 fn placeholder_rules_cover_common_plan_values() {
1363 assert!(is_placeholder("none"));
1364 assert!(is_placeholder("<task ids>"));
1365 assert!(is_placeholder("Task IDs here"));
1366 assert!(!is_placeholder("Task 1.1"));
1367 }
1368}