plan_tooling/
split_prs.rs

1use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
2use std::io::Write;
3use std::path::{Path, PathBuf};
4
5use serde::Serialize;
6
7use crate::parse::{Plan, Sprint, parse_plan_with_display};
8
9const USAGE: &str = r#"Usage:
10  plan-tooling split-prs --file <plan.md> --pr-grouping <per-sprint|group> [options]
11
12Purpose:
13  Build task-to-PR split records from a Plan Format v1 file.
14
15Required:
16  --file <path>                    Plan file to parse
17  --pr-grouping <mode>             per-sprint | group
18
19Options:
20  --scope <plan|sprint>            Scope to split (default: sprint)
21  --sprint <n>                     Sprint number when --scope sprint
22  --pr-group <task=group>          Group pin; repeatable (group mode only)
23                                   deterministic/group: required for every task
24                                   auto/group: optional pins + auto assignment for remaining tasks
25  --strategy <deterministic|auto>  Split strategy (default: deterministic)
26  --explain                        Include grouping rationale in JSON output
27  --owner-prefix <text>            Owner prefix (default: subagent)
28  --branch-prefix <text>           Branch prefix (default: issue)
29  --worktree-prefix <text>         Worktree prefix (default: issue__)
30  --format <json|tsv>              Output format (default: json)
31  -h, --help                       Show help
32
33Argument style:
34  --key value and --key=value are both accepted for value options.
35
36Exit:
37  0: success
38  1: runtime or validation error
39  2: usage error
40"#;
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum SplitScope {
44    Plan,
45    Sprint(i32),
46}
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum SplitPrGrouping {
50    PerSprint,
51    Group,
52}
53
54impl SplitPrGrouping {
55    pub fn as_str(self) -> &'static str {
56        match self {
57            Self::PerSprint => "per-sprint",
58            Self::Group => "group",
59        }
60    }
61
62    fn from_cli(value: &str) -> Option<Self> {
63        match value {
64            "per-sprint" => Some(Self::PerSprint),
65            "group" => Some(Self::Group),
66            _ => None,
67        }
68    }
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum SplitPrStrategy {
73    Deterministic,
74    Auto,
75}
76
77impl SplitPrStrategy {
78    pub fn as_str(self) -> &'static str {
79        match self {
80            Self::Deterministic => "deterministic",
81            Self::Auto => "auto",
82        }
83    }
84
85    fn from_cli(value: &str) -> Option<Self> {
86        match value {
87            "deterministic" => Some(Self::Deterministic),
88            "auto" => Some(Self::Auto),
89            _ => None,
90        }
91    }
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
95pub struct SplitPlanOptions {
96    pub pr_grouping: SplitPrGrouping,
97    pub strategy: SplitPrStrategy,
98    pub pr_group_entries: Vec<String>,
99    pub owner_prefix: String,
100    pub branch_prefix: String,
101    pub worktree_prefix: String,
102}
103
104#[derive(Debug, Clone, PartialEq, Eq)]
105pub struct SplitPlanRecord {
106    pub task_id: String,
107    pub sprint: i32,
108    pub summary: String,
109    pub pr_group: String,
110}
111
112#[derive(Debug, Clone)]
113struct Record {
114    task_id: String,
115    plan_task_id: String,
116    sprint: i32,
117    summary: String,
118    complexity: i32,
119    location_paths: Vec<String>,
120    dependency_keys: Vec<String>,
121    pr_group: String,
122}
123
124#[derive(Debug, Clone, Default)]
125struct AutoSprintHint {
126    pr_grouping_intent: Option<SplitPrGrouping>,
127    execution_profile: Option<String>,
128    target_parallel_width: Option<usize>,
129}
130
131#[derive(Debug, Serialize)]
132struct Output {
133    file: String,
134    scope: String,
135    sprint: Option<i32>,
136    pr_grouping: String,
137    strategy: String,
138    records: Vec<OutputRecord>,
139    #[serde(skip_serializing_if = "Option::is_none")]
140    explain: Option<Vec<ExplainSprint>>,
141}
142
143#[derive(Debug, Serialize, PartialEq, Eq)]
144struct OutputRecord {
145    task_id: String,
146    summary: String,
147    pr_group: String,
148}
149
150#[derive(Debug, Serialize, PartialEq, Eq)]
151struct ExplainSprint {
152    sprint: i32,
153    #[serde(skip_serializing_if = "Option::is_none")]
154    target_parallel_width: Option<usize>,
155    #[serde(skip_serializing_if = "Option::is_none")]
156    execution_profile: Option<String>,
157    #[serde(skip_serializing_if = "Option::is_none")]
158    pr_grouping_intent: Option<String>,
159    groups: Vec<ExplainGroup>,
160}
161
162#[derive(Debug, Serialize, PartialEq, Eq)]
163struct ExplainGroup {
164    pr_group: String,
165    task_ids: Vec<String>,
166    anchor: String,
167}
168
169pub fn run(args: &[String]) -> i32 {
170    let mut file: Option<String> = None;
171    let mut scope = String::from("sprint");
172    let mut sprint: Option<String> = None;
173    let mut pr_grouping: Option<String> = None;
174    let mut pr_group_entries: Vec<String> = Vec::new();
175    let mut strategy = String::from("deterministic");
176    let mut explain = false;
177    let mut owner_prefix = String::from("subagent");
178    let mut branch_prefix = String::from("issue");
179    let mut worktree_prefix = String::from("issue__");
180    let mut format = String::from("json");
181
182    let mut i = 0usize;
183    while i < args.len() {
184        let raw_arg = args[i].as_str();
185        let (flag, inline_value) = split_value_arg(raw_arg);
186        match flag {
187            "--file" => {
188                let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--file") else {
189                    return die("missing value for --file");
190                };
191                file = Some(v);
192                i = next_i;
193            }
194            "--scope" => {
195                let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--scope") else {
196                    return die("missing value for --scope");
197                };
198                scope = v;
199                i = next_i;
200            }
201            "--sprint" => {
202                let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--sprint")
203                else {
204                    return die("missing value for --sprint");
205                };
206                sprint = Some(v);
207                i = next_i;
208            }
209            "--pr-grouping" => {
210                let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--pr-grouping")
211                else {
212                    return die("missing value for --pr-grouping");
213                };
214                pr_grouping = Some(v);
215                i = next_i;
216            }
217            "--pr-group" => {
218                let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--pr-group")
219                else {
220                    return die("missing value for --pr-group");
221                };
222                pr_group_entries.push(v);
223                i = next_i;
224            }
225            "--strategy" => {
226                let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--strategy")
227                else {
228                    return die("missing value for --strategy");
229                };
230                strategy = v;
231                i = next_i;
232            }
233            "--explain" => {
234                if inline_value.is_some() {
235                    return die("unexpected value for --explain");
236                }
237                explain = true;
238                i += 1;
239            }
240            "--owner-prefix" => {
241                let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--owner-prefix")
242                else {
243                    return die("missing value for --owner-prefix");
244                };
245                owner_prefix = v;
246                i = next_i;
247            }
248            "--branch-prefix" => {
249                let Ok((v, next_i)) =
250                    consume_option_value(args, i, inline_value, "--branch-prefix")
251                else {
252                    return die("missing value for --branch-prefix");
253                };
254                branch_prefix = v;
255                i = next_i;
256            }
257            "--worktree-prefix" => {
258                let Ok((v, next_i)) =
259                    consume_option_value(args, i, inline_value, "--worktree-prefix")
260                else {
261                    return die("missing value for --worktree-prefix");
262                };
263                worktree_prefix = v;
264                i = next_i;
265            }
266            "--format" => {
267                let Ok((v, next_i)) = consume_option_value(args, i, inline_value, "--format")
268                else {
269                    return die("missing value for --format");
270                };
271                format = v;
272                i = next_i;
273            }
274            "-h" | "--help" => {
275                if inline_value.is_some() {
276                    return die(&format!("unknown argument: {raw_arg}"));
277                }
278                print_usage();
279                return 0;
280            }
281            _ => {
282                return die(&format!("unknown argument: {raw_arg}"));
283            }
284        }
285    }
286
287    let Some(file_arg) = file else {
288        print_usage();
289        return 2;
290    };
291    let Some(mut pr_grouping) = pr_grouping else {
292        print_usage();
293        return 2;
294    };
295
296    if pr_grouping == "per-spring" {
297        pr_grouping = String::from("per-sprint");
298    }
299    if scope != "plan" && scope != "sprint" {
300        return die(&format!(
301            "invalid --scope (expected plan|sprint): {}",
302            crate::repr::py_repr(&scope)
303        ));
304    }
305    if pr_grouping != "per-sprint" && pr_grouping != "group" {
306        return die(&format!(
307            "invalid --pr-grouping (expected per-sprint|group): {}",
308            crate::repr::py_repr(&pr_grouping)
309        ));
310    }
311    if strategy != "deterministic" && strategy != "auto" {
312        return die(&format!(
313            "invalid --strategy (expected deterministic|auto): {}",
314            crate::repr::py_repr(&strategy)
315        ));
316    }
317    if format != "json" && format != "tsv" {
318        return die(&format!(
319            "invalid --format (expected json|tsv): {}",
320            crate::repr::py_repr(&format)
321        ));
322    }
323
324    let sprint_num = if scope == "sprint" {
325        let Some(raw) = sprint.as_deref() else {
326            return die("--sprint is required when --scope sprint");
327        };
328        match raw.parse::<i32>() {
329            Ok(v) if v > 0 => Some(v),
330            _ => {
331                eprintln!(
332                    "error: invalid --sprint (expected positive int): {}",
333                    crate::repr::py_repr(raw)
334                );
335                return 2;
336            }
337        }
338    } else {
339        None
340    };
341
342    // Deterministic group mode requires full explicit mappings.
343    // Auto group mode can derive missing assignments from topology/conflict signals.
344    if pr_grouping == "group" && strategy == "deterministic" && pr_group_entries.is_empty() {
345        return die(
346            "--pr-grouping group requires at least one --pr-group <task-or-plan-id>=<group> entry",
347        );
348    }
349    if pr_grouping != "group" && !pr_group_entries.is_empty() {
350        return die("--pr-group can only be used when --pr-grouping group");
351    }
352
353    let repo_root = crate::repo_root::detect();
354    let display_path = file_arg.clone();
355    let read_path = resolve_repo_relative(&repo_root, Path::new(&file_arg));
356    if !read_path.is_file() {
357        eprintln!("error: plan file not found: {display_path}");
358        return 1;
359    }
360
361    let plan: Plan;
362    let parse_errors: Vec<String>;
363    match parse_plan_with_display(&read_path, &display_path) {
364        Ok((p, errs)) => {
365            plan = p;
366            parse_errors = errs;
367        }
368        Err(err) => {
369            eprintln!("error: {display_path}: {err}");
370            return 1;
371        }
372    }
373    if !parse_errors.is_empty() {
374        for err in parse_errors {
375            eprintln!("error: {display_path}: error: {err}");
376        }
377        return 1;
378    }
379
380    let split_scope = match scope.as_str() {
381        "plan" => SplitScope::Plan,
382        "sprint" => {
383            let Some(want) = sprint_num else {
384                return die("internal error: missing sprint number");
385            };
386            SplitScope::Sprint(want)
387        }
388        _ => return die("internal error: invalid scope"),
389    };
390    let Some(grouping_mode) = SplitPrGrouping::from_cli(&pr_grouping) else {
391        return die("internal error: invalid pr-grouping");
392    };
393    let Some(strategy_mode) = SplitPrStrategy::from_cli(&strategy) else {
394        return die("internal error: invalid strategy");
395    };
396
397    let selected_sprints = match select_sprints_for_scope(&plan, split_scope) {
398        Ok(sprints) => sprints,
399        Err(err) => {
400            eprintln!("error: {display_path}: {err}");
401            return 1;
402        }
403    };
404    let sprint_hints = sprint_hints(&selected_sprints);
405
406    let options = SplitPlanOptions {
407        pr_grouping: grouping_mode,
408        strategy: strategy_mode,
409        pr_group_entries,
410        owner_prefix,
411        branch_prefix,
412        worktree_prefix,
413    };
414    let split_records = match build_split_plan_records(&selected_sprints, &options) {
415        Ok(records) => records,
416        Err(err) => {
417            eprintln!("error: {err}");
418            return 1;
419        }
420    };
421    let explain_payload = if explain {
422        Some(build_explain_payload(
423            &split_records,
424            &sprint_hints,
425            options.pr_grouping,
426        ))
427    } else {
428        None
429    };
430
431    let out_records: Vec<OutputRecord> = split_records
432        .iter()
433        .map(OutputRecord::from_split_record)
434        .collect();
435
436    if format == "tsv" {
437        print_tsv(&out_records);
438        return 0;
439    }
440
441    let output = Output {
442        file: path_to_posix(&maybe_relativize(&read_path, &repo_root)),
443        scope: scope.clone(),
444        sprint: sprint_num,
445        pr_grouping,
446        strategy,
447        records: out_records,
448        explain: explain_payload,
449    };
450    match serde_json::to_string(&output) {
451        Ok(json) => {
452            println!("{json}");
453            0
454        }
455        Err(err) => {
456            eprintln!("error: failed to encode JSON: {err}");
457            1
458        }
459    }
460}
461
462impl OutputRecord {
463    fn from_split_record(record: &SplitPlanRecord) -> Self {
464        Self {
465            task_id: record.task_id.clone(),
466            summary: record.summary.clone(),
467            pr_group: record.pr_group.clone(),
468        }
469    }
470}
471
472pub fn select_sprints_for_scope(plan: &Plan, scope: SplitScope) -> Result<Vec<Sprint>, String> {
473    let selected = match scope {
474        SplitScope::Plan => plan
475            .sprints
476            .iter()
477            .filter(|s| !s.tasks.is_empty())
478            .cloned()
479            .collect::<Vec<_>>(),
480        SplitScope::Sprint(want) => match plan.sprints.iter().find(|s| s.number == want) {
481            Some(sprint) if !sprint.tasks.is_empty() => vec![sprint.clone()],
482            Some(_) => return Err(format!("sprint {want} has no tasks")),
483            None => return Err(format!("sprint not found: {want}")),
484        },
485    };
486    if selected.is_empty() {
487        return Err("selected scope has no tasks".to_string());
488    }
489    Ok(selected)
490}
491
492pub fn build_split_plan_records(
493    selected_sprints: &[Sprint],
494    options: &SplitPlanOptions,
495) -> Result<Vec<SplitPlanRecord>, String> {
496    if selected_sprints.is_empty() {
497        return Err("selected scope has no tasks".to_string());
498    }
499    if options.pr_grouping == SplitPrGrouping::Group
500        && options.strategy == SplitPrStrategy::Deterministic
501        && options.pr_group_entries.is_empty()
502    {
503        return Err(
504            "--pr-grouping group requires at least one --pr-group <task-or-plan-id>=<group> entry"
505                .to_string(),
506        );
507    }
508    if options.pr_grouping != SplitPrGrouping::Group && !options.pr_group_entries.is_empty() {
509        return Err("--pr-group can only be used when --pr-grouping group".to_string());
510    }
511
512    let sprint_hints = sprint_hints(selected_sprints);
513
514    let mut records: Vec<Record> = Vec::new();
515    for sprint in selected_sprints {
516        for (idx, task) in sprint.tasks.iter().enumerate() {
517            let ordinal = idx + 1;
518            let task_id = format!("S{}T{ordinal}", sprint.number);
519            let plan_task_id = task.id.trim().to_string();
520            let summary = normalize_spaces(if task.name.trim().is_empty() {
521                if plan_task_id.is_empty() {
522                    format!("sprint-{}-task-{ordinal}", sprint.number)
523                } else {
524                    plan_task_id.clone()
525                }
526            } else {
527                task.name.trim().to_string()
528            });
529            let deps: Vec<String> = task
530                .dependencies
531                .clone()
532                .unwrap_or_default()
533                .into_iter()
534                .map(|d| d.trim().to_string())
535                .filter(|d| !d.is_empty())
536                .filter(|d| !is_placeholder(d))
537                .collect();
538            let location_paths: Vec<String> = task
539                .location
540                .iter()
541                .map(|p| p.trim().to_string())
542                .filter(|p| !p.is_empty())
543                .filter(|p| !is_placeholder(p))
544                .collect();
545            let complexity = match task.complexity {
546                Some(value) if value > 0 => value,
547                _ => 5,
548            };
549
550            records.push(Record {
551                task_id,
552                plan_task_id,
553                sprint: sprint.number,
554                summary,
555                complexity,
556                location_paths,
557                dependency_keys: deps,
558                pr_group: String::new(),
559            });
560        }
561    }
562
563    if records.is_empty() {
564        return Err("selected scope has no tasks".to_string());
565    }
566
567    let mut group_assignments: HashMap<String, String> = HashMap::new();
568    let mut assignment_sources: Vec<String> = Vec::new();
569    for entry in &options.pr_group_entries {
570        let trimmed = entry.trim();
571        if trimmed.is_empty() {
572            continue;
573        }
574        let Some((raw_key, raw_group)) = trimmed.split_once('=') else {
575            return Err("--pr-group must use <task-or-plan-id>=<group> format".to_string());
576        };
577        let key = raw_key.trim();
578        let group = normalize_token(raw_group.trim(), "", 48);
579        if key.is_empty() || group.is_empty() {
580            return Err("--pr-group must include both task key and group".to_string());
581        }
582        assignment_sources.push(key.to_string());
583        group_assignments.insert(key.to_ascii_lowercase(), group);
584    }
585
586    if options.pr_grouping == SplitPrGrouping::Group && !assignment_sources.is_empty() {
587        let mut known: HashMap<String, bool> = HashMap::new();
588        for rec in &records {
589            known.insert(rec.task_id.to_ascii_lowercase(), true);
590            if !rec.plan_task_id.is_empty() {
591                known.insert(rec.plan_task_id.to_ascii_lowercase(), true);
592            }
593        }
594
595        let unknown: Vec<String> = assignment_sources
596            .iter()
597            .filter(|key| !known.contains_key(&key.to_ascii_lowercase()))
598            .cloned()
599            .collect();
600        if !unknown.is_empty() {
601            return Err(format!(
602                "--pr-group references unknown task keys: {}",
603                unknown
604                    .iter()
605                    .take(5)
606                    .cloned()
607                    .collect::<Vec<_>>()
608                    .join(", ")
609            ));
610        }
611    }
612
613    if options.pr_grouping == SplitPrGrouping::Group {
614        let mut missing: Vec<String> = Vec::new();
615        for rec in &mut records {
616            rec.pr_group.clear();
617            for key in [&rec.task_id, &rec.plan_task_id] {
618                if key.is_empty() {
619                    continue;
620                }
621                if let Some(v) = group_assignments.get(&key.to_ascii_lowercase()) {
622                    rec.pr_group = v.to_string();
623                    break;
624                }
625            }
626            if rec.pr_group.is_empty() {
627                missing.push(rec.task_id.clone());
628            }
629        }
630        if options.strategy == SplitPrStrategy::Deterministic {
631            if !missing.is_empty() {
632                return Err(format!(
633                    "--pr-grouping group requires explicit mapping for every task; missing: {}",
634                    missing
635                        .iter()
636                        .take(8)
637                        .cloned()
638                        .collect::<Vec<_>>()
639                        .join(", ")
640                ));
641            }
642        } else if !missing.is_empty() {
643            assign_auto_groups(&mut records, &sprint_hints);
644        }
645    } else {
646        for rec in &mut records {
647            rec.pr_group =
648                normalize_token(&format!("s{}", rec.sprint), &format!("s{}", rec.sprint), 48);
649        }
650    }
651
652    let mut out: Vec<SplitPlanRecord> = Vec::new();
653    for rec in records {
654        out.push(SplitPlanRecord {
655            task_id: rec.task_id,
656            sprint: rec.sprint,
657            summary: rec.summary,
658            pr_group: rec.pr_group,
659        });
660    }
661
662    Ok(out)
663}
664
665#[derive(Debug)]
666struct AutoMergeCandidate {
667    i: usize,
668    j: usize,
669    score_key: i64,
670    key_a: String,
671    key_b: String,
672}
673
674#[derive(Debug)]
675struct ForcedMergeCandidate {
676    i: usize,
677    j: usize,
678    span: usize,
679    complexity: i32,
680    key_a: String,
681    key_b: String,
682}
683
684fn assign_auto_groups(records: &mut [Record], hints: &HashMap<i32, AutoSprintHint>) {
685    let mut sprint_to_indices: BTreeMap<i32, Vec<usize>> = BTreeMap::new();
686    for (idx, rec) in records.iter().enumerate() {
687        if rec.pr_group.is_empty() {
688            sprint_to_indices.entry(rec.sprint).or_default().push(idx);
689        }
690    }
691
692    for (sprint, indices) in sprint_to_indices {
693        let hint = hints.get(&sprint).cloned().unwrap_or_default();
694        let assignments = auto_groups_for_sprint(records, sprint, &indices, &hint);
695        for (idx, group) in assignments {
696            if let Some(rec) = records.get_mut(idx)
697                && rec.pr_group.is_empty()
698            {
699                rec.pr_group = group;
700            }
701        }
702    }
703}
704
705fn auto_groups_for_sprint(
706    records: &[Record],
707    sprint: i32,
708    indices: &[usize],
709    hint: &AutoSprintHint,
710) -> BTreeMap<usize, String> {
711    let mut lookup: HashMap<String, usize> = HashMap::new();
712    for idx in indices {
713        let rec = &records[*idx];
714        lookup.insert(rec.task_id.to_ascii_lowercase(), *idx);
715        if !rec.plan_task_id.is_empty() {
716            lookup.insert(rec.plan_task_id.to_ascii_lowercase(), *idx);
717        }
718    }
719
720    let mut deps: BTreeMap<usize, BTreeSet<usize>> = BTreeMap::new();
721    let mut paths: BTreeMap<usize, BTreeSet<String>> = BTreeMap::new();
722    for idx in indices {
723        let rec = &records[*idx];
724        let mut resolved_deps: BTreeSet<usize> = BTreeSet::new();
725        for dep in &rec.dependency_keys {
726            let dep_key = dep.trim().to_ascii_lowercase();
727            if dep_key.is_empty() {
728                continue;
729            }
730            if let Some(dep_idx) = lookup.get(&dep_key)
731                && dep_idx != idx
732            {
733                resolved_deps.insert(*dep_idx);
734            }
735        }
736        deps.insert(*idx, resolved_deps);
737
738        let normalized_paths: BTreeSet<String> = rec
739            .location_paths
740            .iter()
741            .map(|path| normalize_location_path(path))
742            .filter(|path| !path.is_empty())
743            .collect();
744        paths.insert(*idx, normalized_paths);
745    }
746
747    let batch_by_idx = compute_batch_index(records, indices, &deps);
748    let mut parent: HashMap<usize, usize> = indices.iter().copied().map(|idx| (idx, idx)).collect();
749
750    let mut by_batch: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
751    for idx in indices {
752        let batch = batch_by_idx.get(idx).copied().unwrap_or(0);
753        by_batch.entry(batch).or_default().push(*idx);
754    }
755
756    for members in by_batch.values_mut() {
757        members.sort_by_key(|idx| task_sort_key(records, *idx));
758
759        let mut path_to_members: BTreeMap<String, Vec<usize>> = BTreeMap::new();
760        for idx in members {
761            for path in paths.get(idx).into_iter().flatten() {
762                path_to_members.entry(path.clone()).or_default().push(*idx);
763            }
764        }
765        for overlap_members in path_to_members.values() {
766            if overlap_members.len() < 2 {
767                continue;
768            }
769            let first = overlap_members[0];
770            for other in overlap_members.iter().skip(1) {
771                uf_union(&mut parent, first, *other);
772            }
773        }
774    }
775
776    let mut grouped: BTreeMap<usize, BTreeSet<usize>> = BTreeMap::new();
777    for idx in indices {
778        let root = uf_find(&mut parent, *idx);
779        grouped.entry(root).or_default().insert(*idx);
780    }
781    let mut groups: Vec<BTreeSet<usize>> = grouped.into_values().collect();
782    let target_group_count = desired_auto_group_count(indices.len(), hint);
783
784    loop {
785        if let Some(target) = target_group_count
786            && groups.len() <= target
787        {
788            break;
789        }
790
791        let mut candidates: Vec<AutoMergeCandidate> = Vec::new();
792        for i in 0..groups.len() {
793            for j in (i + 1)..groups.len() {
794                let merged_complexity =
795                    group_complexity(records, &groups[i]) + group_complexity(records, &groups[j]);
796                if merged_complexity > 20 {
797                    continue;
798                }
799
800                let dep_cross = dependency_cross_edges(&deps, &groups[i], &groups[j]);
801                let overlap_paths = overlap_path_count(&paths, &groups[i], &groups[j]);
802                let min_group_size = groups[i].len().min(groups[j].len()).max(1) as f64;
803                let dep_affinity = ((dep_cross as f64) / min_group_size).min(1.0);
804                let ovl_affinity = ((overlap_paths as f64) / 2.0).min(1.0);
805                let size_fit = (1.0 - ((merged_complexity as f64 - 12.0).abs() / 12.0)).max(0.0);
806                let span = group_span(&batch_by_idx, &groups[i], &groups[j]);
807                let serial_penalty = ((span as f64 - 1.0).max(0.0)) / 3.0;
808                let oversize_penalty = ((merged_complexity as f64 - 20.0).max(0.0)) / 20.0;
809
810                let score = (0.45 * dep_affinity) + (0.35 * ovl_affinity) + (0.20 * size_fit)
811                    - (0.25 * serial_penalty)
812                    - (0.45 * oversize_penalty);
813                if score < 0.30 {
814                    continue;
815                }
816
817                let mut key_a = group_min_task_key(records, &groups[i]);
818                let mut key_b = group_min_task_key(records, &groups[j]);
819                if key_b < key_a {
820                    std::mem::swap(&mut key_a, &mut key_b);
821                }
822                candidates.push(AutoMergeCandidate {
823                    i,
824                    j,
825                    score_key: (score * 1_000_000.0).round() as i64,
826                    key_a,
827                    key_b,
828                });
829            }
830        }
831
832        if candidates.is_empty() {
833            if let Some(target) = target_group_count
834                && groups.len() > target
835                && let Some(chosen) = pick_forced_merge(records, &batch_by_idx, &groups)
836            {
837                let mut merged = groups[chosen.i].clone();
838                merged.extend(groups[chosen.j].iter().copied());
839                groups[chosen.i] = merged;
840                groups.remove(chosen.j);
841                continue;
842            }
843            break;
844        }
845
846        candidates.sort_by(|a, b| {
847            b.score_key
848                .cmp(&a.score_key)
849                .then_with(|| a.key_a.cmp(&b.key_a))
850                .then_with(|| a.key_b.cmp(&b.key_b))
851                .then_with(|| a.i.cmp(&b.i))
852                .then_with(|| a.j.cmp(&b.j))
853        });
854        let chosen = &candidates[0];
855
856        let mut merged = groups[chosen.i].clone();
857        merged.extend(groups[chosen.j].iter().copied());
858        groups[chosen.i] = merged;
859        groups.remove(chosen.j);
860    }
861
862    groups.sort_by(|a, b| {
863        group_min_batch(&batch_by_idx, a)
864            .cmp(&group_min_batch(&batch_by_idx, b))
865            .then_with(|| group_min_task_key(records, a).cmp(&group_min_task_key(records, b)))
866    });
867
868    let mut out: BTreeMap<usize, String> = BTreeMap::new();
869    for (idx, group) in groups.iter().enumerate() {
870        let fallback = format!("s{sprint}-auto-g{}", idx + 1);
871        let group_key = normalize_token(&fallback, &fallback, 48);
872        for member in group {
873            out.insert(*member, group_key.clone());
874        }
875    }
876    out
877}
878
879fn compute_batch_index(
880    records: &[Record],
881    indices: &[usize],
882    deps: &BTreeMap<usize, BTreeSet<usize>>,
883) -> BTreeMap<usize, usize> {
884    let mut in_deg: HashMap<usize, usize> = indices.iter().copied().map(|idx| (idx, 0)).collect();
885    let mut reverse: HashMap<usize, BTreeSet<usize>> = indices
886        .iter()
887        .copied()
888        .map(|idx| (idx, BTreeSet::new()))
889        .collect();
890
891    for idx in indices {
892        for dep in deps.get(idx).cloned().unwrap_or_default() {
893            if !in_deg.contains_key(&dep) {
894                continue;
895            }
896            if let Some(value) = in_deg.get_mut(idx) {
897                *value += 1;
898            }
899            if let Some(children) = reverse.get_mut(&dep) {
900                children.insert(*idx);
901            }
902        }
903    }
904
905    let mut remaining: BTreeSet<usize> = indices.iter().copied().collect();
906    let mut batch_by_idx: BTreeMap<usize, usize> = BTreeMap::new();
907    let mut layer = 0usize;
908    let mut ready: VecDeque<usize> = {
909        let mut start: Vec<usize> = indices
910            .iter()
911            .copied()
912            .filter(|idx| in_deg.get(idx).copied().unwrap_or(0) == 0)
913            .collect();
914        start.sort_by_key(|idx| task_sort_key(records, *idx));
915        start.into_iter().collect()
916    };
917
918    while !remaining.is_empty() {
919        let mut batch_members: Vec<usize> = ready.drain(..).collect();
920        batch_members.sort_by_key(|idx| task_sort_key(records, *idx));
921
922        if batch_members.is_empty() {
923            let mut cycle_members: Vec<usize> = remaining.iter().copied().collect();
924            cycle_members.sort_by_key(|idx| task_sort_key(records, *idx));
925            for idx in cycle_members {
926                remaining.remove(&idx);
927                batch_by_idx.insert(idx, layer);
928            }
929            break;
930        }
931
932        for idx in &batch_members {
933            remaining.remove(idx);
934            batch_by_idx.insert(*idx, layer);
935        }
936
937        let mut next: Vec<usize> = Vec::new();
938        for idx in batch_members {
939            for child in reverse.get(&idx).cloned().unwrap_or_default() {
940                if let Some(value) = in_deg.get_mut(&child) {
941                    *value = value.saturating_sub(1);
942                    if *value == 0 && remaining.contains(&child) {
943                        next.push(child);
944                    }
945                }
946            }
947        }
948        next.sort_by_key(|idx| task_sort_key(records, *idx));
949        next.dedup();
950        ready.extend(next);
951        layer += 1;
952    }
953
954    for idx in indices {
955        batch_by_idx.entry(*idx).or_insert(0);
956    }
957    batch_by_idx
958}
959
960fn task_sort_key(records: &[Record], idx: usize) -> (String, String) {
961    let rec = &records[idx];
962    let primary = if rec.plan_task_id.trim().is_empty() {
963        rec.task_id.to_ascii_lowercase()
964    } else {
965        rec.plan_task_id.to_ascii_lowercase()
966    };
967    (primary, rec.task_id.to_ascii_lowercase())
968}
969
970fn normalize_location_path(path: &str) -> String {
971    path.split_whitespace()
972        .collect::<Vec<_>>()
973        .join(" ")
974        .to_ascii_lowercase()
975}
976
977fn group_complexity(records: &[Record], group: &BTreeSet<usize>) -> i32 {
978    group
979        .iter()
980        .map(|idx| records[*idx].complexity.max(1))
981        .sum::<i32>()
982}
983
984fn group_min_task_key(records: &[Record], group: &BTreeSet<usize>) -> String {
985    group
986        .iter()
987        .map(|idx| task_sort_key(records, *idx).0)
988        .min()
989        .unwrap_or_default()
990}
991
992fn group_min_batch(batch_by_idx: &BTreeMap<usize, usize>, group: &BTreeSet<usize>) -> usize {
993    group
994        .iter()
995        .filter_map(|idx| batch_by_idx.get(idx).copied())
996        .min()
997        .unwrap_or(0)
998}
999
1000fn group_span(
1001    batch_by_idx: &BTreeMap<usize, usize>,
1002    left: &BTreeSet<usize>,
1003    right: &BTreeSet<usize>,
1004) -> usize {
1005    let mut min_batch = usize::MAX;
1006    let mut max_batch = 0usize;
1007    for idx in left.union(right) {
1008        let batch = batch_by_idx.get(idx).copied().unwrap_or(0);
1009        min_batch = min_batch.min(batch);
1010        max_batch = max_batch.max(batch);
1011    }
1012    if min_batch == usize::MAX {
1013        0
1014    } else {
1015        max_batch.saturating_sub(min_batch)
1016    }
1017}
1018
1019fn dependency_cross_edges(
1020    deps: &BTreeMap<usize, BTreeSet<usize>>,
1021    left: &BTreeSet<usize>,
1022    right: &BTreeSet<usize>,
1023) -> usize {
1024    let mut count = 0usize;
1025    for src in left {
1026        if let Some(edges) = deps.get(src) {
1027            count += edges.iter().filter(|dep| right.contains(dep)).count();
1028        }
1029    }
1030    for src in right {
1031        if let Some(edges) = deps.get(src) {
1032            count += edges.iter().filter(|dep| left.contains(dep)).count();
1033        }
1034    }
1035    count
1036}
1037
1038fn overlap_path_count(
1039    paths: &BTreeMap<usize, BTreeSet<String>>,
1040    left: &BTreeSet<usize>,
1041    right: &BTreeSet<usize>,
1042) -> usize {
1043    let mut left_paths: BTreeSet<String> = BTreeSet::new();
1044    let mut right_paths: BTreeSet<String> = BTreeSet::new();
1045    for idx in left {
1046        for path in paths.get(idx).into_iter().flatten() {
1047            left_paths.insert(path.clone());
1048        }
1049    }
1050    for idx in right {
1051        for path in paths.get(idx).into_iter().flatten() {
1052            right_paths.insert(path.clone());
1053        }
1054    }
1055    left_paths.intersection(&right_paths).count()
1056}
1057
1058fn desired_auto_group_count(max_groups: usize, hint: &AutoSprintHint) -> Option<usize> {
1059    if max_groups == 0 {
1060        return None;
1061    }
1062    let preferred = hint
1063        .target_parallel_width
1064        .or_else(|| {
1065            if hint.execution_profile.as_deref() == Some("serial") {
1066                Some(1usize)
1067            } else {
1068                None
1069            }
1070        })
1071        .or_else(|| {
1072            if hint.pr_grouping_intent == Some(SplitPrGrouping::PerSprint) {
1073                Some(1usize)
1074            } else {
1075                None
1076            }
1077        })?;
1078    Some(preferred.clamp(1, max_groups))
1079}
1080
1081fn pick_forced_merge(
1082    records: &[Record],
1083    batch_by_idx: &BTreeMap<usize, usize>,
1084    groups: &[BTreeSet<usize>],
1085) -> Option<ForcedMergeCandidate> {
1086    let mut chosen: Option<ForcedMergeCandidate> = None;
1087    for i in 0..groups.len() {
1088        for j in (i + 1)..groups.len() {
1089            let mut key_a = group_min_task_key(records, &groups[i]);
1090            let mut key_b = group_min_task_key(records, &groups[j]);
1091            if key_b < key_a {
1092                std::mem::swap(&mut key_a, &mut key_b);
1093            }
1094            let candidate = ForcedMergeCandidate {
1095                i,
1096                j,
1097                span: group_span(batch_by_idx, &groups[i], &groups[j]),
1098                complexity: group_complexity(records, &groups[i])
1099                    + group_complexity(records, &groups[j]),
1100                key_a,
1101                key_b,
1102            };
1103            let replace = match &chosen {
1104                None => true,
1105                Some(best) => {
1106                    (
1107                        candidate.span,
1108                        candidate.complexity,
1109                        &candidate.key_a,
1110                        &candidate.key_b,
1111                        candidate.i,
1112                        candidate.j,
1113                    ) < (
1114                        best.span,
1115                        best.complexity,
1116                        &best.key_a,
1117                        &best.key_b,
1118                        best.i,
1119                        best.j,
1120                    )
1121                }
1122            };
1123            if replace {
1124                chosen = Some(candidate);
1125            }
1126        }
1127    }
1128    chosen
1129}
1130
1131fn sprint_hints(selected_sprints: &[Sprint]) -> HashMap<i32, AutoSprintHint> {
1132    let mut hints: HashMap<i32, AutoSprintHint> = HashMap::new();
1133    for sprint in selected_sprints {
1134        let pr_grouping_intent = sprint
1135            .metadata
1136            .pr_grouping_intent
1137            .as_deref()
1138            .and_then(SplitPrGrouping::from_cli);
1139        let execution_profile = sprint.metadata.execution_profile.clone();
1140        let target_parallel_width = sprint.metadata.parallel_width;
1141        hints.insert(
1142            sprint.number,
1143            AutoSprintHint {
1144                pr_grouping_intent,
1145                execution_profile,
1146                target_parallel_width,
1147            },
1148        );
1149    }
1150    hints
1151}
1152
1153fn build_explain_payload(
1154    records: &[SplitPlanRecord],
1155    hints: &HashMap<i32, AutoSprintHint>,
1156    pr_grouping: SplitPrGrouping,
1157) -> Vec<ExplainSprint> {
1158    let mut grouped: BTreeMap<i32, BTreeMap<String, Vec<String>>> = BTreeMap::new();
1159    for record in records {
1160        grouped
1161            .entry(record.sprint)
1162            .or_default()
1163            .entry(record.pr_group.clone())
1164            .or_default()
1165            .push(record.task_id.clone());
1166    }
1167
1168    let mut out: Vec<ExplainSprint> = Vec::new();
1169    for (sprint, per_group) in grouped {
1170        let hint = hints.get(&sprint).cloned().unwrap_or_default();
1171        let groups = per_group
1172            .into_iter()
1173            .map(|(pr_group, task_ids)| {
1174                let anchor = task_ids.first().cloned().unwrap_or_default();
1175                ExplainGroup {
1176                    pr_group,
1177                    task_ids,
1178                    anchor,
1179                }
1180            })
1181            .collect::<Vec<_>>();
1182        out.push(ExplainSprint {
1183            sprint,
1184            target_parallel_width: hint.target_parallel_width,
1185            execution_profile: hint.execution_profile,
1186            pr_grouping_intent: hint
1187                .pr_grouping_intent
1188                .map(|value| value.as_str().to_string())
1189                .or_else(|| Some(pr_grouping.as_str().to_string())),
1190            groups,
1191        });
1192    }
1193    out
1194}
1195
1196fn split_value_arg(raw: &str) -> (&str, Option<&str>) {
1197    if raw.starts_with("--")
1198        && let Some((flag, value)) = raw.split_once('=')
1199        && !flag.is_empty()
1200    {
1201        return (flag, Some(value));
1202    }
1203    (raw, None)
1204}
1205
1206fn consume_option_value(
1207    args: &[String],
1208    idx: usize,
1209    inline_value: Option<&str>,
1210    _flag: &str,
1211) -> Result<(String, usize), ()> {
1212    match inline_value {
1213        Some(value) => {
1214            if value.is_empty() {
1215                Err(())
1216            } else {
1217                Ok((value.to_string(), idx + 1))
1218            }
1219        }
1220        None => {
1221            let Some(value) = args.get(idx + 1) else {
1222                return Err(());
1223            };
1224            if value.is_empty() {
1225                Err(())
1226            } else {
1227                Ok((value.to_string(), idx + 2))
1228            }
1229        }
1230    }
1231}
1232
1233fn uf_find(parent: &mut HashMap<usize, usize>, node: usize) -> usize {
1234    let parent_node = parent.get(&node).copied().unwrap_or(node);
1235    if parent_node == node {
1236        return node;
1237    }
1238    let root = uf_find(parent, parent_node);
1239    parent.insert(node, root);
1240    root
1241}
1242
1243fn uf_union(parent: &mut HashMap<usize, usize>, left: usize, right: usize) {
1244    let left_root = uf_find(parent, left);
1245    let right_root = uf_find(parent, right);
1246    if left_root == right_root {
1247        return;
1248    }
1249    if left_root < right_root {
1250        parent.insert(right_root, left_root);
1251    } else {
1252        parent.insert(left_root, right_root);
1253    }
1254}
1255
1256fn print_tsv(records: &[OutputRecord]) {
1257    println!("# task_id\tsummary\tpr_group");
1258    for rec in records {
1259        println!(
1260            "{}\t{}\t{}",
1261            rec.task_id.replace('\t', " "),
1262            rec.summary.replace('\t', " "),
1263            rec.pr_group.replace('\t', " "),
1264        );
1265    }
1266}
1267
1268fn print_usage() {
1269    let _ = std::io::stderr().write_all(USAGE.as_bytes());
1270}
1271
1272fn die(msg: &str) -> i32 {
1273    eprintln!("split-prs: {msg}");
1274    2
1275}
1276
1277fn resolve_repo_relative(repo_root: &Path, path: &Path) -> PathBuf {
1278    if path.is_absolute() {
1279        return path.to_path_buf();
1280    }
1281    repo_root.join(path)
1282}
1283
1284fn maybe_relativize(path: &Path, repo_root: &Path) -> PathBuf {
1285    let Ok(path_abs) = path.canonicalize() else {
1286        return path.to_path_buf();
1287    };
1288    let Ok(root_abs) = repo_root.canonicalize() else {
1289        return path_abs;
1290    };
1291    match path_abs.strip_prefix(&root_abs) {
1292        Ok(rel) => rel.to_path_buf(),
1293        Err(_) => path_abs,
1294    }
1295}
1296
1297fn path_to_posix(path: &Path) -> String {
1298    path.to_string_lossy()
1299        .replace(std::path::MAIN_SEPARATOR, "/")
1300}
1301
1302fn normalize_spaces(value: String) -> String {
1303    let joined = value.split_whitespace().collect::<Vec<_>>().join(" ");
1304    if joined.is_empty() {
1305        String::from("task")
1306    } else {
1307        joined
1308    }
1309}
1310
1311fn normalize_token(value: &str, fallback: &str, max_len: usize) -> String {
1312    let mut out = String::new();
1313    let mut last_dash = false;
1314    for ch in value.chars().flat_map(char::to_lowercase) {
1315        if ch.is_ascii_alphanumeric() {
1316            out.push(ch);
1317            last_dash = false;
1318        } else if !last_dash {
1319            out.push('-');
1320            last_dash = true;
1321        }
1322    }
1323    let normalized = out.trim_matches('-').to_string();
1324    let mut final_token = if normalized.is_empty() {
1325        fallback.to_string()
1326    } else {
1327        normalized
1328    };
1329    if final_token.len() > max_len {
1330        final_token.truncate(max_len);
1331        final_token = final_token.trim_matches('-').to_string();
1332    }
1333    final_token
1334}
1335
1336fn is_placeholder(value: &str) -> bool {
1337    let token = value.trim().to_ascii_lowercase();
1338    if matches!(token.as_str(), "" | "-" | "none" | "n/a" | "na" | "...") {
1339        return true;
1340    }
1341    if token.starts_with('<') && token.ends_with('>') {
1342        return true;
1343    }
1344    token.contains("task ids")
1345}
1346
1347#[cfg(test)]
1348mod tests {
1349    use super::{is_placeholder, normalize_token};
1350    use pretty_assertions::assert_eq;
1351
1352    #[test]
1353    fn normalize_token_collapses_non_alnum_and_limits_length() {
1354        assert_eq!(
1355            normalize_token("Sprint 2 :: Shared Pair", "fallback", 20),
1356            "sprint-2-shared-pair"
1357        );
1358        assert_eq!(normalize_token("!!!!", "fallback-value", 8), "fallback");
1359    }
1360
1361    #[test]
1362    fn placeholder_rules_cover_common_plan_values() {
1363        assert!(is_placeholder("none"));
1364        assert!(is_placeholder("<task ids>"));
1365        assert!(is_placeholder("Task IDs here"));
1366        assert!(!is_placeholder("Task 1.1"));
1367    }
1368}
plan_tooling/split_prs.rs

plan_tooling/
split_prs.rs