plan_tooling/
split_prs.rs

1use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
2use std::io::Write;
3use std::path::{Path, PathBuf};
4
5use serde::Serialize;
6
7use crate::parse::{Plan, Sprint, parse_plan_with_display};
8
9const USAGE: &str = r#"Usage:
10  plan-tooling split-prs --file <plan.md> --pr-grouping <per-sprint|group> [options]
11
12Purpose:
13  Build task-to-PR split records from a Plan Format v1 file.
14
15Required:
16  --file <path>                    Plan file to parse
17  --pr-grouping <mode>             per-sprint | group
18
19Options:
20  --scope <plan|sprint>            Scope to split (default: sprint)
21  --sprint <n>                     Sprint number when --scope sprint
22  --pr-group <task=group>          Group pin; repeatable (group mode only)
23                                   deterministic/group: required for every task
24                                   auto/group: optional pins + auto assignment for remaining tasks
25  --strategy <deterministic|auto>  Split strategy (default: deterministic)
26  --owner-prefix <text>            Owner prefix (default: subagent)
27  --branch-prefix <text>           Branch prefix (default: issue)
28  --worktree-prefix <text>         Worktree prefix (default: issue__)
29  --format <json|tsv>              Output format (default: json)
30  -h, --help                       Show help
31
32Exit:
33  0: success
34  1: runtime or validation error
35  2: usage error
36"#;
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum SplitScope {
40    Plan,
41    Sprint(i32),
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum SplitPrGrouping {
46    PerSprint,
47    Group,
48}
49
50impl SplitPrGrouping {
51    pub fn as_str(self) -> &'static str {
52        match self {
53            Self::PerSprint => "per-sprint",
54            Self::Group => "group",
55        }
56    }
57
58    fn from_cli(value: &str) -> Option<Self> {
59        match value {
60            "per-sprint" => Some(Self::PerSprint),
61            "group" => Some(Self::Group),
62            _ => None,
63        }
64    }
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum SplitPrStrategy {
69    Deterministic,
70    Auto,
71}
72
73impl SplitPrStrategy {
74    pub fn as_str(self) -> &'static str {
75        match self {
76            Self::Deterministic => "deterministic",
77            Self::Auto => "auto",
78        }
79    }
80
81    fn from_cli(value: &str) -> Option<Self> {
82        match value {
83            "deterministic" => Some(Self::Deterministic),
84            "auto" => Some(Self::Auto),
85            _ => None,
86        }
87    }
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct SplitPlanOptions {
92    pub pr_grouping: SplitPrGrouping,
93    pub strategy: SplitPrStrategy,
94    pub pr_group_entries: Vec<String>,
95    pub owner_prefix: String,
96    pub branch_prefix: String,
97    pub worktree_prefix: String,
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
101pub struct SplitPlanRecord {
102    pub task_id: String,
103    pub sprint: i32,
104    pub summary: String,
105    pub branch: String,
106    pub worktree: String,
107    pub owner: String,
108    pub notes: String,
109    pub pr_group: String,
110}
111
112#[derive(Debug, Clone)]
113struct Record {
114    task_id: String,
115    plan_task_id: String,
116    sprint: i32,
117    summary: String,
118    branch: String,
119    worktree: String,
120    owner: String,
121    notes_parts: Vec<String>,
122    complexity: i32,
123    location_paths: Vec<String>,
124    dependency_keys: Vec<String>,
125    pr_group: String,
126}
127
128#[derive(Debug, Serialize)]
129struct Output {
130    file: String,
131    scope: String,
132    sprint: Option<i32>,
133    pr_grouping: String,
134    strategy: String,
135    records: Vec<OutputRecord>,
136}
137
138#[derive(Debug, Serialize, PartialEq, Eq)]
139struct OutputRecord {
140    task_id: String,
141    summary: String,
142    branch: String,
143    worktree: String,
144    owner: String,
145    notes: String,
146    pr_group: String,
147}
148
149pub fn run(args: &[String]) -> i32 {
150    let mut file: Option<String> = None;
151    let mut scope = String::from("sprint");
152    let mut sprint: Option<String> = None;
153    let mut pr_grouping: Option<String> = None;
154    let mut pr_group_entries: Vec<String> = Vec::new();
155    let mut strategy = String::from("deterministic");
156    let mut owner_prefix = String::from("subagent");
157    let mut branch_prefix = String::from("issue");
158    let mut worktree_prefix = String::from("issue__");
159    let mut format = String::from("json");
160
161    let mut i = 0usize;
162    while i < args.len() {
163        match args[i].as_str() {
164            "--file" => {
165                let Some(v) = args.get(i + 1) else {
166                    return die("missing value for --file");
167                };
168                if v.is_empty() {
169                    return die("missing value for --file");
170                }
171                file = Some(v.to_string());
172                i += 2;
173            }
174            "--scope" => {
175                let Some(v) = args.get(i + 1) else {
176                    return die("missing value for --scope");
177                };
178                if v.is_empty() {
179                    return die("missing value for --scope");
180                }
181                scope = v.to_string();
182                i += 2;
183            }
184            "--sprint" => {
185                let Some(v) = args.get(i + 1) else {
186                    return die("missing value for --sprint");
187                };
188                if v.is_empty() {
189                    return die("missing value for --sprint");
190                }
191                sprint = Some(v.to_string());
192                i += 2;
193            }
194            "--pr-grouping" => {
195                let Some(v) = args.get(i + 1) else {
196                    return die("missing value for --pr-grouping");
197                };
198                if v.is_empty() {
199                    return die("missing value for --pr-grouping");
200                }
201                pr_grouping = Some(v.to_string());
202                i += 2;
203            }
204            "--pr-group" => {
205                let Some(v) = args.get(i + 1) else {
206                    return die("missing value for --pr-group");
207                };
208                if v.is_empty() {
209                    return die("missing value for --pr-group");
210                }
211                pr_group_entries.push(v.to_string());
212                i += 2;
213            }
214            "--strategy" => {
215                let Some(v) = args.get(i + 1) else {
216                    return die("missing value for --strategy");
217                };
218                if v.is_empty() {
219                    return die("missing value for --strategy");
220                }
221                strategy = v.to_string();
222                i += 2;
223            }
224            "--owner-prefix" => {
225                let Some(v) = args.get(i + 1) else {
226                    return die("missing value for --owner-prefix");
227                };
228                if v.is_empty() {
229                    return die("missing value for --owner-prefix");
230                }
231                owner_prefix = v.to_string();
232                i += 2;
233            }
234            "--branch-prefix" => {
235                let Some(v) = args.get(i + 1) else {
236                    return die("missing value for --branch-prefix");
237                };
238                if v.is_empty() {
239                    return die("missing value for --branch-prefix");
240                }
241                branch_prefix = v.to_string();
242                i += 2;
243            }
244            "--worktree-prefix" => {
245                let Some(v) = args.get(i + 1) else {
246                    return die("missing value for --worktree-prefix");
247                };
248                if v.is_empty() {
249                    return die("missing value for --worktree-prefix");
250                }
251                worktree_prefix = v.to_string();
252                i += 2;
253            }
254            "--format" => {
255                let Some(v) = args.get(i + 1) else {
256                    return die("missing value for --format");
257                };
258                if v.is_empty() {
259                    return die("missing value for --format");
260                }
261                format = v.to_string();
262                i += 2;
263            }
264            "-h" | "--help" => {
265                print_usage();
266                return 0;
267            }
268            other => {
269                return die(&format!("unknown argument: {other}"));
270            }
271        }
272    }
273
274    let Some(file_arg) = file else {
275        print_usage();
276        return 2;
277    };
278    let Some(mut pr_grouping) = pr_grouping else {
279        print_usage();
280        return 2;
281    };
282
283    if pr_grouping == "per-spring" {
284        pr_grouping = String::from("per-sprint");
285    }
286    if scope != "plan" && scope != "sprint" {
287        return die(&format!(
288            "invalid --scope (expected plan|sprint): {}",
289            crate::repr::py_repr(&scope)
290        ));
291    }
292    if pr_grouping != "per-sprint" && pr_grouping != "group" {
293        return die(&format!(
294            "invalid --pr-grouping (expected per-sprint|group): {}",
295            crate::repr::py_repr(&pr_grouping)
296        ));
297    }
298    if strategy != "deterministic" && strategy != "auto" {
299        return die(&format!(
300            "invalid --strategy (expected deterministic|auto): {}",
301            crate::repr::py_repr(&strategy)
302        ));
303    }
304    if format != "json" && format != "tsv" {
305        return die(&format!(
306            "invalid --format (expected json|tsv): {}",
307            crate::repr::py_repr(&format)
308        ));
309    }
310
311    let sprint_num = if scope == "sprint" {
312        let Some(raw) = sprint.as_deref() else {
313            return die("--sprint is required when --scope sprint");
314        };
315        match raw.parse::<i32>() {
316            Ok(v) if v > 0 => Some(v),
317            _ => {
318                eprintln!(
319                    "error: invalid --sprint (expected positive int): {}",
320                    crate::repr::py_repr(raw)
321                );
322                return 2;
323            }
324        }
325    } else {
326        None
327    };
328
329    // Deterministic group mode requires full explicit mappings.
330    // Auto group mode can derive missing assignments from topology/conflict signals.
331    if pr_grouping == "group" && strategy == "deterministic" && pr_group_entries.is_empty() {
332        return die(
333            "--pr-grouping group requires at least one --pr-group <task-or-plan-id>=<group> entry",
334        );
335    }
336    if pr_grouping != "group" && !pr_group_entries.is_empty() {
337        return die("--pr-group can only be used when --pr-grouping group");
338    }
339
340    let repo_root = crate::repo_root::detect();
341    let display_path = file_arg.clone();
342    let read_path = resolve_repo_relative(&repo_root, Path::new(&file_arg));
343    if !read_path.is_file() {
344        eprintln!("error: plan file not found: {display_path}");
345        return 1;
346    }
347
348    let plan: Plan;
349    let parse_errors: Vec<String>;
350    match parse_plan_with_display(&read_path, &display_path) {
351        Ok((p, errs)) => {
352            plan = p;
353            parse_errors = errs;
354        }
355        Err(err) => {
356            eprintln!("error: {display_path}: {err}");
357            return 1;
358        }
359    }
360    if !parse_errors.is_empty() {
361        for err in parse_errors {
362            eprintln!("error: {display_path}: error: {err}");
363        }
364        return 1;
365    }
366
367    let split_scope = match scope.as_str() {
368        "plan" => SplitScope::Plan,
369        "sprint" => {
370            let Some(want) = sprint_num else {
371                return die("internal error: missing sprint number");
372            };
373            SplitScope::Sprint(want)
374        }
375        _ => return die("internal error: invalid scope"),
376    };
377    let Some(grouping_mode) = SplitPrGrouping::from_cli(&pr_grouping) else {
378        return die("internal error: invalid pr-grouping");
379    };
380    let Some(strategy_mode) = SplitPrStrategy::from_cli(&strategy) else {
381        return die("internal error: invalid strategy");
382    };
383
384    let selected_sprints = match select_sprints_for_scope(&plan, split_scope) {
385        Ok(sprints) => sprints,
386        Err(err) => {
387            eprintln!("error: {display_path}: {err}");
388            return 1;
389        }
390    };
391
392    let options = SplitPlanOptions {
393        pr_grouping: grouping_mode,
394        strategy: strategy_mode,
395        pr_group_entries,
396        owner_prefix,
397        branch_prefix,
398        worktree_prefix,
399    };
400    let split_records = match build_split_plan_records(&selected_sprints, &options) {
401        Ok(records) => records,
402        Err(err) => {
403            eprintln!("error: {err}");
404            return 1;
405        }
406    };
407
408    let out_records: Vec<OutputRecord> = split_records
409        .iter()
410        .map(OutputRecord::from_split_record)
411        .collect();
412
413    if format == "tsv" {
414        print_tsv(&out_records);
415        return 0;
416    }
417
418    let output = Output {
419        file: path_to_posix(&maybe_relativize(&read_path, &repo_root)),
420        scope: scope.clone(),
421        sprint: sprint_num,
422        pr_grouping,
423        strategy,
424        records: out_records,
425    };
426    match serde_json::to_string(&output) {
427        Ok(json) => {
428            println!("{json}");
429            0
430        }
431        Err(err) => {
432            eprintln!("error: failed to encode JSON: {err}");
433            1
434        }
435    }
436}
437
438impl OutputRecord {
439    fn from_split_record(record: &SplitPlanRecord) -> Self {
440        Self {
441            task_id: record.task_id.clone(),
442            summary: record.summary.clone(),
443            branch: record.branch.clone(),
444            worktree: record.worktree.clone(),
445            owner: record.owner.clone(),
446            notes: record.notes.clone(),
447            pr_group: record.pr_group.clone(),
448        }
449    }
450}
451
452pub fn select_sprints_for_scope(plan: &Plan, scope: SplitScope) -> Result<Vec<Sprint>, String> {
453    let selected = match scope {
454        SplitScope::Plan => plan
455            .sprints
456            .iter()
457            .filter(|s| !s.tasks.is_empty())
458            .cloned()
459            .collect::<Vec<_>>(),
460        SplitScope::Sprint(want) => match plan.sprints.iter().find(|s| s.number == want) {
461            Some(sprint) if !sprint.tasks.is_empty() => vec![sprint.clone()],
462            Some(_) => return Err(format!("sprint {want} has no tasks")),
463            None => return Err(format!("sprint not found: {want}")),
464        },
465    };
466    if selected.is_empty() {
467        return Err("selected scope has no tasks".to_string());
468    }
469    Ok(selected)
470}
471
472pub fn build_split_plan_records(
473    selected_sprints: &[Sprint],
474    options: &SplitPlanOptions,
475) -> Result<Vec<SplitPlanRecord>, String> {
476    if selected_sprints.is_empty() {
477        return Err("selected scope has no tasks".to_string());
478    }
479    if options.pr_grouping == SplitPrGrouping::Group
480        && options.strategy == SplitPrStrategy::Deterministic
481        && options.pr_group_entries.is_empty()
482    {
483        return Err(
484            "--pr-grouping group requires at least one --pr-group <task-or-plan-id>=<group> entry"
485                .to_string(),
486        );
487    }
488    if options.pr_grouping != SplitPrGrouping::Group && !options.pr_group_entries.is_empty() {
489        return Err("--pr-group can only be used when --pr-grouping group".to_string());
490    }
491
492    let branch_prefix_norm = normalize_branch_prefix(&options.branch_prefix);
493    let worktree_prefix_norm = normalize_worktree_prefix(&options.worktree_prefix);
494    let owner_prefix_norm = normalize_owner_prefix(&options.owner_prefix);
495
496    let mut records: Vec<Record> = Vec::new();
497    for sprint in selected_sprints {
498        for (idx, task) in sprint.tasks.iter().enumerate() {
499            let ordinal = idx + 1;
500            let task_id = format!("S{}T{ordinal}", sprint.number);
501            let plan_task_id = task.id.trim().to_string();
502            let summary = normalize_spaces(if task.name.trim().is_empty() {
503                if plan_task_id.is_empty() {
504                    format!("sprint-{}-task-{ordinal}", sprint.number)
505                } else {
506                    plan_task_id.clone()
507                }
508            } else {
509                task.name.trim().to_string()
510            });
511            let slug = normalize_token(&summary, &format!("task-{ordinal}"), 48);
512
513            let deps: Vec<String> = task
514                .dependencies
515                .clone()
516                .unwrap_or_default()
517                .into_iter()
518                .map(|d| d.trim().to_string())
519                .filter(|d| !d.is_empty())
520                .filter(|d| !is_placeholder(d))
521                .collect();
522            let location_paths: Vec<String> = task
523                .location
524                .iter()
525                .map(|p| p.trim().to_string())
526                .filter(|p| !p.is_empty())
527                .filter(|p| !is_placeholder(p))
528                .collect();
529            let complexity = match task.complexity {
530                Some(value) if value > 0 => value,
531                _ => 5,
532            };
533
534            let validations: Vec<String> = task
535                .validation
536                .iter()
537                .map(|v| v.trim().to_string())
538                .filter(|v| !v.is_empty())
539                .filter(|v| !is_placeholder(v))
540                .collect();
541
542            let mut notes_parts = vec![
543                format!("sprint=S{}", sprint.number),
544                format!(
545                    "plan-task:{}",
546                    if plan_task_id.is_empty() {
547                        task_id.clone()
548                    } else {
549                        plan_task_id.clone()
550                    }
551                ),
552            ];
553            if !deps.is_empty() {
554                notes_parts.push(format!("deps={}", deps.join(",")));
555            }
556            if let Some(first) = validations.first() {
557                notes_parts.push(format!("validate={first}"));
558            }
559
560            records.push(Record {
561                task_id,
562                plan_task_id,
563                sprint: sprint.number,
564                summary,
565                branch: format!("{branch_prefix_norm}/s{}-t{ordinal}-{slug}", sprint.number),
566                worktree: format!("{worktree_prefix_norm}-s{}-t{ordinal}", sprint.number),
567                owner: format!("{owner_prefix_norm}-s{}-t{ordinal}", sprint.number),
568                notes_parts,
569                complexity,
570                location_paths,
571                dependency_keys: deps,
572                pr_group: String::new(),
573            });
574        }
575    }
576
577    if records.is_empty() {
578        return Err("selected scope has no tasks".to_string());
579    }
580
581    let mut group_assignments: HashMap<String, String> = HashMap::new();
582    let mut assignment_sources: Vec<String> = Vec::new();
583    for entry in &options.pr_group_entries {
584        let trimmed = entry.trim();
585        if trimmed.is_empty() {
586            continue;
587        }
588        let Some((raw_key, raw_group)) = trimmed.split_once('=') else {
589            return Err("--pr-group must use <task-or-plan-id>=<group> format".to_string());
590        };
591        let key = raw_key.trim();
592        let group = normalize_token(raw_group.trim(), "", 48);
593        if key.is_empty() || group.is_empty() {
594            return Err("--pr-group must include both task key and group".to_string());
595        }
596        assignment_sources.push(key.to_string());
597        group_assignments.insert(key.to_ascii_lowercase(), group);
598    }
599
600    if options.pr_grouping == SplitPrGrouping::Group && !assignment_sources.is_empty() {
601        let mut known: HashMap<String, bool> = HashMap::new();
602        for rec in &records {
603            known.insert(rec.task_id.to_ascii_lowercase(), true);
604            if !rec.plan_task_id.is_empty() {
605                known.insert(rec.plan_task_id.to_ascii_lowercase(), true);
606            }
607        }
608
609        let unknown: Vec<String> = assignment_sources
610            .iter()
611            .filter(|key| !known.contains_key(&key.to_ascii_lowercase()))
612            .cloned()
613            .collect();
614        if !unknown.is_empty() {
615            return Err(format!(
616                "--pr-group references unknown task keys: {}",
617                unknown
618                    .iter()
619                    .take(5)
620                    .cloned()
621                    .collect::<Vec<_>>()
622                    .join(", ")
623            ));
624        }
625    }
626
627    if options.pr_grouping == SplitPrGrouping::Group {
628        let mut missing: Vec<String> = Vec::new();
629        for rec in &mut records {
630            rec.pr_group.clear();
631            for key in [&rec.task_id, &rec.plan_task_id] {
632                if key.is_empty() {
633                    continue;
634                }
635                if let Some(v) = group_assignments.get(&key.to_ascii_lowercase()) {
636                    rec.pr_group = v.to_string();
637                    break;
638                }
639            }
640            if rec.pr_group.is_empty() {
641                missing.push(rec.task_id.clone());
642            }
643        }
644        if options.strategy == SplitPrStrategy::Deterministic {
645            if !missing.is_empty() {
646                return Err(format!(
647                    "--pr-grouping group requires explicit mapping for every task; missing: {}",
648                    missing
649                        .iter()
650                        .take(8)
651                        .cloned()
652                        .collect::<Vec<_>>()
653                        .join(", ")
654                ));
655            }
656        } else if !missing.is_empty() {
657            assign_auto_groups(&mut records);
658        }
659    } else {
660        for rec in &mut records {
661            rec.pr_group =
662                normalize_token(&format!("s{}", rec.sprint), &format!("s{}", rec.sprint), 48);
663        }
664    }
665
666    // Anchor selection is deterministic because records are emitted in stable sprint/task order.
667    let mut group_sizes: HashMap<String, usize> = HashMap::new();
668    let mut group_anchor: HashMap<String, String> = HashMap::new();
669    for rec in &records {
670        let size = group_sizes.entry(rec.pr_group.clone()).or_insert(0);
671        *size += 1;
672        group_anchor
673            .entry(rec.pr_group.clone())
674            .or_insert_with(|| rec.task_id.clone());
675    }
676
677    let mut out: Vec<SplitPlanRecord> = Vec::new();
678    for rec in records {
679        let mut notes = rec.notes_parts.clone();
680        notes.push(format!("pr-grouping={}", options.pr_grouping.as_str()));
681        notes.push(format!("pr-group={}", rec.pr_group));
682        if group_sizes.get(&rec.pr_group).copied().unwrap_or(0) > 1
683            && let Some(anchor) = group_anchor.get(&rec.pr_group)
684        {
685            notes.push(format!("shared-pr-anchor={anchor}"));
686        }
687        out.push(SplitPlanRecord {
688            task_id: rec.task_id,
689            sprint: rec.sprint,
690            summary: rec.summary,
691            branch: rec.branch,
692            worktree: rec.worktree,
693            owner: rec.owner,
694            notes: notes.join("; "),
695            pr_group: rec.pr_group,
696        });
697    }
698
699    Ok(out)
700}
701
702#[derive(Debug)]
703struct AutoMergeCandidate {
704    i: usize,
705    j: usize,
706    score_key: i64,
707    key_a: String,
708    key_b: String,
709}
710
711fn assign_auto_groups(records: &mut [Record]) {
712    let mut sprint_to_indices: BTreeMap<i32, Vec<usize>> = BTreeMap::new();
713    for (idx, rec) in records.iter().enumerate() {
714        if rec.pr_group.is_empty() {
715            sprint_to_indices.entry(rec.sprint).or_default().push(idx);
716        }
717    }
718
719    for (sprint, indices) in sprint_to_indices {
720        let assignments = auto_groups_for_sprint(records, sprint, &indices);
721        for (idx, group) in assignments {
722            if let Some(rec) = records.get_mut(idx)
723                && rec.pr_group.is_empty()
724            {
725                rec.pr_group = group;
726            }
727        }
728    }
729}
730
731fn auto_groups_for_sprint(
732    records: &[Record],
733    sprint: i32,
734    indices: &[usize],
735) -> BTreeMap<usize, String> {
736    let mut lookup: HashMap<String, usize> = HashMap::new();
737    for idx in indices {
738        let rec = &records[*idx];
739        lookup.insert(rec.task_id.to_ascii_lowercase(), *idx);
740        if !rec.plan_task_id.is_empty() {
741            lookup.insert(rec.plan_task_id.to_ascii_lowercase(), *idx);
742        }
743    }
744
745    let mut deps: BTreeMap<usize, BTreeSet<usize>> = BTreeMap::new();
746    let mut paths: BTreeMap<usize, BTreeSet<String>> = BTreeMap::new();
747    for idx in indices {
748        let rec = &records[*idx];
749        let mut resolved_deps: BTreeSet<usize> = BTreeSet::new();
750        for dep in &rec.dependency_keys {
751            let dep_key = dep.trim().to_ascii_lowercase();
752            if dep_key.is_empty() {
753                continue;
754            }
755            if let Some(dep_idx) = lookup.get(&dep_key)
756                && dep_idx != idx
757            {
758                resolved_deps.insert(*dep_idx);
759            }
760        }
761        deps.insert(*idx, resolved_deps);
762
763        let normalized_paths: BTreeSet<String> = rec
764            .location_paths
765            .iter()
766            .map(|path| normalize_location_path(path))
767            .filter(|path| !path.is_empty())
768            .collect();
769        paths.insert(*idx, normalized_paths);
770    }
771
772    let batch_by_idx = compute_batch_index(records, indices, &deps);
773    let mut parent: HashMap<usize, usize> = indices.iter().copied().map(|idx| (idx, idx)).collect();
774
775    let mut by_batch: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
776    for idx in indices {
777        let batch = batch_by_idx.get(idx).copied().unwrap_or(0);
778        by_batch.entry(batch).or_default().push(*idx);
779    }
780
781    for members in by_batch.values_mut() {
782        members.sort_by_key(|idx| task_sort_key(records, *idx));
783
784        let mut path_to_members: BTreeMap<String, Vec<usize>> = BTreeMap::new();
785        for idx in members {
786            for path in paths.get(idx).into_iter().flatten() {
787                path_to_members.entry(path.clone()).or_default().push(*idx);
788            }
789        }
790        for overlap_members in path_to_members.values() {
791            if overlap_members.len() < 2 {
792                continue;
793            }
794            let first = overlap_members[0];
795            for other in overlap_members.iter().skip(1) {
796                uf_union(&mut parent, first, *other);
797            }
798        }
799    }
800
801    let mut grouped: BTreeMap<usize, BTreeSet<usize>> = BTreeMap::new();
802    for idx in indices {
803        let root = uf_find(&mut parent, *idx);
804        grouped.entry(root).or_default().insert(*idx);
805    }
806    let mut groups: Vec<BTreeSet<usize>> = grouped.into_values().collect();
807
808    loop {
809        let mut candidates: Vec<AutoMergeCandidate> = Vec::new();
810        for i in 0..groups.len() {
811            for j in (i + 1)..groups.len() {
812                let merged_complexity =
813                    group_complexity(records, &groups[i]) + group_complexity(records, &groups[j]);
814                if merged_complexity > 20 {
815                    continue;
816                }
817
818                let dep_cross = dependency_cross_edges(&deps, &groups[i], &groups[j]);
819                let overlap_paths = overlap_path_count(&paths, &groups[i], &groups[j]);
820                let min_group_size = groups[i].len().min(groups[j].len()).max(1) as f64;
821                let dep_affinity = ((dep_cross as f64) / min_group_size).min(1.0);
822                let ovl_affinity = ((overlap_paths as f64) / 2.0).min(1.0);
823                let size_fit = (1.0 - ((merged_complexity as f64 - 12.0).abs() / 12.0)).max(0.0);
824                let span = group_span(&batch_by_idx, &groups[i], &groups[j]);
825                let serial_penalty = ((span as f64 - 1.0).max(0.0)) / 3.0;
826                let oversize_penalty = ((merged_complexity as f64 - 20.0).max(0.0)) / 20.0;
827
828                let score = (0.45 * dep_affinity) + (0.35 * ovl_affinity) + (0.20 * size_fit)
829                    - (0.25 * serial_penalty)
830                    - (0.45 * oversize_penalty);
831                if score < 0.30 {
832                    continue;
833                }
834
835                let mut key_a = group_min_task_key(records, &groups[i]);
836                let mut key_b = group_min_task_key(records, &groups[j]);
837                if key_b < key_a {
838                    std::mem::swap(&mut key_a, &mut key_b);
839                }
840                candidates.push(AutoMergeCandidate {
841                    i,
842                    j,
843                    score_key: (score * 1_000_000.0).round() as i64,
844                    key_a,
845                    key_b,
846                });
847            }
848        }
849
850        if candidates.is_empty() {
851            break;
852        }
853
854        candidates.sort_by(|a, b| {
855            b.score_key
856                .cmp(&a.score_key)
857                .then_with(|| a.key_a.cmp(&b.key_a))
858                .then_with(|| a.key_b.cmp(&b.key_b))
859                .then_with(|| a.i.cmp(&b.i))
860                .then_with(|| a.j.cmp(&b.j))
861        });
862        let chosen = &candidates[0];
863
864        let mut merged = groups[chosen.i].clone();
865        merged.extend(groups[chosen.j].iter().copied());
866        groups[chosen.i] = merged;
867        groups.remove(chosen.j);
868    }
869
870    groups.sort_by(|a, b| {
871        group_min_batch(&batch_by_idx, a)
872            .cmp(&group_min_batch(&batch_by_idx, b))
873            .then_with(|| group_min_task_key(records, a).cmp(&group_min_task_key(records, b)))
874    });
875
876    let mut out: BTreeMap<usize, String> = BTreeMap::new();
877    for (idx, group) in groups.iter().enumerate() {
878        let fallback = format!("s{sprint}-auto-g{}", idx + 1);
879        let group_key = normalize_token(&fallback, &fallback, 48);
880        for member in group {
881            out.insert(*member, group_key.clone());
882        }
883    }
884    out
885}
886
887fn compute_batch_index(
888    records: &[Record],
889    indices: &[usize],
890    deps: &BTreeMap<usize, BTreeSet<usize>>,
891) -> BTreeMap<usize, usize> {
892    let mut in_deg: HashMap<usize, usize> = indices.iter().copied().map(|idx| (idx, 0)).collect();
893    let mut reverse: HashMap<usize, BTreeSet<usize>> = indices
894        .iter()
895        .copied()
896        .map(|idx| (idx, BTreeSet::new()))
897        .collect();
898
899    for idx in indices {
900        for dep in deps.get(idx).cloned().unwrap_or_default() {
901            if !in_deg.contains_key(&dep) {
902                continue;
903            }
904            if let Some(value) = in_deg.get_mut(idx) {
905                *value += 1;
906            }
907            if let Some(children) = reverse.get_mut(&dep) {
908                children.insert(*idx);
909            }
910        }
911    }
912
913    let mut remaining: BTreeSet<usize> = indices.iter().copied().collect();
914    let mut batch_by_idx: BTreeMap<usize, usize> = BTreeMap::new();
915    let mut layer = 0usize;
916    let mut ready: VecDeque<usize> = {
917        let mut start: Vec<usize> = indices
918            .iter()
919            .copied()
920            .filter(|idx| in_deg.get(idx).copied().unwrap_or(0) == 0)
921            .collect();
922        start.sort_by_key(|idx| task_sort_key(records, *idx));
923        start.into_iter().collect()
924    };
925
926    while !remaining.is_empty() {
927        let mut batch_members: Vec<usize> = ready.drain(..).collect();
928        batch_members.sort_by_key(|idx| task_sort_key(records, *idx));
929
930        if batch_members.is_empty() {
931            let mut cycle_members: Vec<usize> = remaining.iter().copied().collect();
932            cycle_members.sort_by_key(|idx| task_sort_key(records, *idx));
933            for idx in cycle_members {
934                remaining.remove(&idx);
935                batch_by_idx.insert(idx, layer);
936            }
937            break;
938        }
939
940        for idx in &batch_members {
941            remaining.remove(idx);
942            batch_by_idx.insert(*idx, layer);
943        }
944
945        let mut next: Vec<usize> = Vec::new();
946        for idx in batch_members {
947            for child in reverse.get(&idx).cloned().unwrap_or_default() {
948                if let Some(value) = in_deg.get_mut(&child) {
949                    *value = value.saturating_sub(1);
950                    if *value == 0 && remaining.contains(&child) {
951                        next.push(child);
952                    }
953                }
954            }
955        }
956        next.sort_by_key(|idx| task_sort_key(records, *idx));
957        next.dedup();
958        ready.extend(next);
959        layer += 1;
960    }
961
962    for idx in indices {
963        batch_by_idx.entry(*idx).or_insert(0);
964    }
965    batch_by_idx
966}
967
968fn task_sort_key(records: &[Record], idx: usize) -> (String, String) {
969    let rec = &records[idx];
970    let primary = if rec.plan_task_id.trim().is_empty() {
971        rec.task_id.to_ascii_lowercase()
972    } else {
973        rec.plan_task_id.to_ascii_lowercase()
974    };
975    (primary, rec.task_id.to_ascii_lowercase())
976}
977
978fn normalize_location_path(path: &str) -> String {
979    path.split_whitespace()
980        .collect::<Vec<_>>()
981        .join(" ")
982        .to_ascii_lowercase()
983}
984
985fn group_complexity(records: &[Record], group: &BTreeSet<usize>) -> i32 {
986    group
987        .iter()
988        .map(|idx| records[*idx].complexity.max(1))
989        .sum::<i32>()
990}
991
992fn group_min_task_key(records: &[Record], group: &BTreeSet<usize>) -> String {
993    group
994        .iter()
995        .map(|idx| task_sort_key(records, *idx).0)
996        .min()
997        .unwrap_or_default()
998}
999
1000fn group_min_batch(batch_by_idx: &BTreeMap<usize, usize>, group: &BTreeSet<usize>) -> usize {
1001    group
1002        .iter()
1003        .filter_map(|idx| batch_by_idx.get(idx).copied())
1004        .min()
1005        .unwrap_or(0)
1006}
1007
1008fn group_span(
1009    batch_by_idx: &BTreeMap<usize, usize>,
1010    left: &BTreeSet<usize>,
1011    right: &BTreeSet<usize>,
1012) -> usize {
1013    let mut min_batch = usize::MAX;
1014    let mut max_batch = 0usize;
1015    for idx in left.union(right) {
1016        let batch = batch_by_idx.get(idx).copied().unwrap_or(0);
1017        min_batch = min_batch.min(batch);
1018        max_batch = max_batch.max(batch);
1019    }
1020    if min_batch == usize::MAX {
1021        0
1022    } else {
1023        max_batch.saturating_sub(min_batch)
1024    }
1025}
1026
1027fn dependency_cross_edges(
1028    deps: &BTreeMap<usize, BTreeSet<usize>>,
1029    left: &BTreeSet<usize>,
1030    right: &BTreeSet<usize>,
1031) -> usize {
1032    let mut count = 0usize;
1033    for src in left {
1034        if let Some(edges) = deps.get(src) {
1035            count += edges.iter().filter(|dep| right.contains(dep)).count();
1036        }
1037    }
1038    for src in right {
1039        if let Some(edges) = deps.get(src) {
1040            count += edges.iter().filter(|dep| left.contains(dep)).count();
1041        }
1042    }
1043    count
1044}
1045
1046fn overlap_path_count(
1047    paths: &BTreeMap<usize, BTreeSet<String>>,
1048    left: &BTreeSet<usize>,
1049    right: &BTreeSet<usize>,
1050) -> usize {
1051    let mut left_paths: BTreeSet<String> = BTreeSet::new();
1052    let mut right_paths: BTreeSet<String> = BTreeSet::new();
1053    for idx in left {
1054        for path in paths.get(idx).into_iter().flatten() {
1055            left_paths.insert(path.clone());
1056        }
1057    }
1058    for idx in right {
1059        for path in paths.get(idx).into_iter().flatten() {
1060            right_paths.insert(path.clone());
1061        }
1062    }
1063    left_paths.intersection(&right_paths).count()
1064}
1065
1066fn uf_find(parent: &mut HashMap<usize, usize>, node: usize) -> usize {
1067    let parent_node = parent.get(&node).copied().unwrap_or(node);
1068    if parent_node == node {
1069        return node;
1070    }
1071    let root = uf_find(parent, parent_node);
1072    parent.insert(node, root);
1073    root
1074}
1075
1076fn uf_union(parent: &mut HashMap<usize, usize>, left: usize, right: usize) {
1077    let left_root = uf_find(parent, left);
1078    let right_root = uf_find(parent, right);
1079    if left_root == right_root {
1080        return;
1081    }
1082    if left_root < right_root {
1083        parent.insert(right_root, left_root);
1084    } else {
1085        parent.insert(left_root, right_root);
1086    }
1087}
1088
1089fn print_tsv(records: &[OutputRecord]) {
1090    println!("# task_id\tsummary\tbranch\tworktree\towner\tnotes\tpr_group");
1091    for rec in records {
1092        println!(
1093            "{}\t{}\t{}\t{}\t{}\t{}\t{}",
1094            rec.task_id.replace('\t', " "),
1095            rec.summary.replace('\t', " "),
1096            rec.branch.replace('\t', " "),
1097            rec.worktree.replace('\t', " "),
1098            rec.owner.replace('\t', " "),
1099            rec.notes.replace('\t', " "),
1100            rec.pr_group.replace('\t', " "),
1101        );
1102    }
1103}
1104
1105fn print_usage() {
1106    let _ = std::io::stderr().write_all(USAGE.as_bytes());
1107}
1108
1109fn die(msg: &str) -> i32 {
1110    eprintln!("split-prs: {msg}");
1111    2
1112}
1113
1114fn resolve_repo_relative(repo_root: &Path, path: &Path) -> PathBuf {
1115    if path.is_absolute() {
1116        return path.to_path_buf();
1117    }
1118    repo_root.join(path)
1119}
1120
1121fn maybe_relativize(path: &Path, repo_root: &Path) -> PathBuf {
1122    let Ok(path_abs) = path.canonicalize() else {
1123        return path.to_path_buf();
1124    };
1125    let Ok(root_abs) = repo_root.canonicalize() else {
1126        return path_abs;
1127    };
1128    match path_abs.strip_prefix(&root_abs) {
1129        Ok(rel) => rel.to_path_buf(),
1130        Err(_) => path_abs,
1131    }
1132}
1133
1134fn path_to_posix(path: &Path) -> String {
1135    path.to_string_lossy()
1136        .replace(std::path::MAIN_SEPARATOR, "/")
1137}
1138
1139fn normalize_branch_prefix(value: &str) -> String {
1140    let trimmed = value.trim().trim_end_matches('/');
1141    if trimmed.is_empty() {
1142        "issue".to_string()
1143    } else {
1144        trimmed.to_string()
1145    }
1146}
1147
1148fn normalize_worktree_prefix(value: &str) -> String {
1149    let trimmed = value.trim().trim_end_matches(['-', '_']);
1150    if trimmed.is_empty() {
1151        "issue".to_string()
1152    } else {
1153        trimmed.to_string()
1154    }
1155}
1156
1157fn normalize_owner_prefix(value: &str) -> String {
1158    let trimmed = value.trim();
1159    if trimmed.is_empty() {
1160        "subagent".to_string()
1161    } else if trimmed.to_ascii_lowercase().contains("subagent") {
1162        trimmed.to_string()
1163    } else {
1164        format!("subagent-{trimmed}")
1165    }
1166}
1167
1168fn normalize_spaces(value: String) -> String {
1169    let joined = value.split_whitespace().collect::<Vec<_>>().join(" ");
1170    if joined.is_empty() {
1171        String::from("task")
1172    } else {
1173        joined
1174    }
1175}
1176
1177fn normalize_token(value: &str, fallback: &str, max_len: usize) -> String {
1178    let mut out = String::new();
1179    let mut last_dash = false;
1180    for ch in value.chars().flat_map(char::to_lowercase) {
1181        if ch.is_ascii_alphanumeric() {
1182            out.push(ch);
1183            last_dash = false;
1184        } else if !last_dash {
1185            out.push('-');
1186            last_dash = true;
1187        }
1188    }
1189    let normalized = out.trim_matches('-').to_string();
1190    let mut final_token = if normalized.is_empty() {
1191        fallback.to_string()
1192    } else {
1193        normalized
1194    };
1195    if final_token.len() > max_len {
1196        final_token.truncate(max_len);
1197        final_token = final_token.trim_matches('-').to_string();
1198    }
1199    final_token
1200}
1201
1202fn is_placeholder(value: &str) -> bool {
1203    let token = value.trim().to_ascii_lowercase();
1204    if matches!(token.as_str(), "" | "-" | "none" | "n/a" | "na" | "...") {
1205        return true;
1206    }
1207    if token.starts_with('<') && token.ends_with('>') {
1208        return true;
1209    }
1210    token.contains("task ids")
1211}
1212
1213#[cfg(test)]
1214mod tests {
1215    use super::{is_placeholder, normalize_token};
1216    use pretty_assertions::assert_eq;
1217
1218    #[test]
1219    fn normalize_token_collapses_non_alnum_and_limits_length() {
1220        assert_eq!(
1221            normalize_token("Sprint 2 :: Shared Pair", "fallback", 20),
1222            "sprint-2-shared-pair"
1223        );
1224        assert_eq!(normalize_token("!!!!", "fallback-value", 8), "fallback");
1225    }
1226
1227    #[test]
1228    fn placeholder_rules_cover_common_plan_values() {
1229        assert!(is_placeholder("none"));
1230        assert!(is_placeholder("<task ids>"));
1231        assert!(is_placeholder("Task IDs here"));
1232        assert!(!is_placeholder("Task 1.1"));
1233    }
1234}
plan_tooling/split_prs.rs

plan_tooling/
split_prs.rs