Skip to main content

bvr/analysis/
git_history.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::path::Path;
3use std::process::Command;
4
5use chrono::{DateTime, Utc};
6use serde::Serialize;
7
8use super::diff::{FieldChange, detect_changes};
9use crate::{BvrError, Result, model::Issue};
10
11#[derive(Debug, Clone)]
12pub struct GitCommitRecord {
13    pub sha: String,
14    pub short_sha: String,
15    pub timestamp: String,
16    pub author: String,
17    pub author_email: String,
18    pub message: String,
19    pub files: Vec<HistoryFileChangeCompat>,
20    pub changed_beads: bool,
21    pub changed_non_beads: bool,
22}
23
24#[derive(Debug, Clone, Serialize, Default)]
25pub struct HistoryMilestonesCompat {
26    #[serde(skip_serializing_if = "Option::is_none")]
27    pub created: Option<HistoryEventCompat>,
28    #[serde(skip_serializing_if = "Option::is_none")]
29    pub claimed: Option<HistoryEventCompat>,
30    #[serde(skip_serializing_if = "Option::is_none")]
31    pub closed: Option<HistoryEventCompat>,
32    #[serde(skip_serializing_if = "Option::is_none")]
33    pub reopened: Option<HistoryEventCompat>,
34}
35
36#[derive(Debug, Clone, Serialize)]
37pub struct HistoryEventCompat {
38    pub bead_id: String,
39    pub event_type: String,
40    pub timestamp: String,
41    pub commit_sha: String,
42    pub commit_message: String,
43    pub author: String,
44    pub author_email: String,
45}
46
47#[derive(Debug, Clone, Serialize)]
48pub struct HistoryBeadCompat {
49    pub bead_id: String,
50    pub title: String,
51    pub status: String,
52    pub events: Vec<HistoryEventCompat>,
53    pub milestones: HistoryMilestonesCompat,
54    pub commits: Option<Vec<HistoryCommitCompat>>,
55    pub cycle_time: Option<HistoryCycleCompat>,
56    pub last_author: String,
57}
58
59#[derive(Debug, Clone, Serialize)]
60pub struct HistoryCommitCompat {
61    pub sha: String,
62    pub short_sha: String,
63    pub message: String,
64    pub author: String,
65    pub author_email: String,
66    pub timestamp: String,
67    pub files: Vec<HistoryFileChangeCompat>,
68    pub method: String,
69    pub confidence: f64,
70    pub reason: String,
71    #[serde(skip_serializing_if = "Vec::is_empty")]
72    pub field_changes: Vec<FieldChange>,
73    #[serde(skip_serializing_if = "Vec::is_empty")]
74    pub bead_diff_lines: Vec<String>,
75}
76
77#[derive(Debug, Clone, Serialize)]
78pub struct HistoryFileChangeCompat {
79    pub path: String,
80    pub action: String,
81    pub insertions: i64,
82    pub deletions: i64,
83}
84
85#[derive(Debug, Clone, Serialize)]
86pub struct HistoryCycleCompat {
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub claim_to_close: Option<String>,
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub create_to_close: Option<String>,
91    #[serde(skip_serializing_if = "Option::is_none")]
92    pub create_to_claim: Option<String>,
93}
94
95#[derive(Debug, Serialize)]
96pub struct HistoryStatsCompat {
97    pub total_beads: usize,
98    pub beads_with_commits: usize,
99    pub total_commits: usize,
100    pub unique_authors: usize,
101    pub avg_commits_per_bead: f64,
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub avg_cycle_time_days: Option<f64>,
104    pub method_distribution: BTreeMap<String, usize>,
105}
106
107pub fn load_git_commits(
108    repo_root: &Path,
109    limit: usize,
110    history_since: Option<&str>,
111) -> Result<Vec<GitCommitRecord>> {
112    if !is_git_work_tree(repo_root) {
113        return Ok(Vec::new());
114    }
115
116    let mut command = Command::new("git");
117    command.arg("-C").arg(repo_root).arg("log");
118    if limit > 0 {
119        command.arg(format!("-n{limit}"));
120    }
121    if let Some(since) = history_since {
122        command.arg("--since").arg(since);
123    }
124    command
125        .arg("--name-status")
126        .arg("--date=iso-strict")
127        .arg("--pretty=format:\u{1e}%H\u{1f}%h\u{1f}%cI\u{1f}%an\u{1f}%ae\u{1f}%s");
128
129    let output = command.output()?;
130    if !output.status.success() {
131        if let Some(since) = history_since {
132            let stderr = String::from_utf8_lossy(&output.stderr);
133            return Err(BvrError::InvalidArgument(format!(
134                "Error parsing --history-since '{since}': {}",
135                stderr.trim()
136            )));
137        }
138        return Ok(Vec::new());
139    }
140
141    let text = String::from_utf8_lossy(&output.stdout);
142    let mut commits = Vec::<GitCommitRecord>::new();
143
144    for block in text.split('\u{1e}') {
145        let block = block.trim();
146        if block.is_empty() {
147            continue;
148        }
149
150        let mut lines = block.lines();
151        let Some(header) = lines.next() else {
152            continue;
153        };
154
155        let fields = header.split('\u{1f}').collect::<Vec<_>>();
156        if fields.len() < 6 {
157            continue;
158        }
159
160        let mut files = Vec::<HistoryFileChangeCompat>::new();
161        let mut changed_beads = false;
162        let mut changed_non_beads = false;
163
164        for raw_line in lines {
165            let line = raw_line.trim();
166            if line.is_empty() {
167                continue;
168            }
169
170            let parts = line.split('\t').collect::<Vec<_>>();
171            if parts.len() < 2 {
172                continue;
173            }
174
175            let status = parts[0];
176            let (action, path) = if status.starts_with('R') && parts.len() >= 3 {
177                ("R", parts[2])
178            } else {
179                (&status[..status.len().min(1)], parts[1])
180            };
181
182            let path = path.to_string();
183            let is_beads = is_beads_jsonl_path(&path);
184            changed_beads |= is_beads;
185            changed_non_beads |= !is_beads;
186
187            files.push(HistoryFileChangeCompat {
188                path,
189                action: action.to_string(),
190                insertions: 0,
191                deletions: 0,
192            });
193        }
194
195        files.sort_by(|left, right| left.path.cmp(&right.path));
196
197        commits.push(GitCommitRecord {
198            sha: fields[0].to_string(),
199            short_sha: fields[1].to_string(),
200            timestamp: fields[2].to_string(),
201            author: fields[3].to_string(),
202            author_email: fields[4].to_string(),
203            message: fields[5].to_string(),
204            files,
205            changed_beads,
206            changed_non_beads,
207        });
208    }
209
210    Ok(commits)
211}
212
213fn is_git_work_tree(path: &Path) -> bool {
214    let output = Command::new("git")
215        .arg("-C")
216        .arg(path)
217        .arg("rev-parse")
218        .arg("--is-inside-work-tree")
219        .output();
220
221    let Ok(output) = output else {
222        return false;
223    };
224    if !output.status.success() {
225        return false;
226    }
227
228    String::from_utf8_lossy(&output.stdout)
229        .trim()
230        .eq_ignore_ascii_case("true")
231}
232
233pub fn correlate_histories_with_git(
234    repo_root: &Path,
235    commits: &[GitCommitRecord],
236    histories_map: &mut BTreeMap<String, HistoryBeadCompat>,
237    commit_index: &mut BTreeMap<String, Vec<String>>,
238    method_distribution: &mut BTreeMap<String, usize>,
239) {
240    correlate_histories_with_git_aliases(
241        repo_root,
242        commits,
243        histories_map,
244        commit_index,
245        method_distribution,
246        &BTreeMap::new(),
247    );
248}
249
250/// Like [`correlate_histories_with_git`] but accepts `workspace_id_aliases`.
251///
252/// The alias map goes from **raw (unprefixed, lowercase)** bead IDs to their
253/// canonical workspace-prefixed form.  This allows JSONL diffs from nested
254/// workspace repos (which store unprefixed IDs) to be matched against the
255/// prefixed IDs in the histories map.
256pub fn correlate_histories_with_git_aliases(
257    repo_root: &Path,
258    commits: &[GitCommitRecord],
259    histories_map: &mut BTreeMap<String, HistoryBeadCompat>,
260    commit_index: &mut BTreeMap<String, Vec<String>>,
261    method_distribution: &mut BTreeMap<String, usize>,
262    workspace_id_aliases: &BTreeMap<String, String>,
263) {
264    let mut known_ids: BTreeMap<String, String> = histories_map
265        .keys()
266        .map(|id| (id.to_ascii_lowercase(), id.clone()))
267        .collect();
268
269    // Merge workspace aliases so raw (unprefixed) IDs resolve to their
270    // canonical prefixed form.  Only add aliases that don't shadow an
271    // already-known direct entry.
272    for (raw_lower, canonical) in workspace_id_aliases {
273        known_ids
274            .entry(raw_lower.clone())
275            .or_insert_with(|| canonical.clone());
276    }
277
278    for commit in commits {
279        let mut bead_ids = extract_ids_from_message(&commit.message, &known_ids);
280        let mut bead_change_details = BTreeMap::<String, (Vec<FieldChange>, Vec<String>)>::new();
281        if bead_ids.is_empty() && commit.changed_beads {
282            let from_diff = extract_ids_from_beads_diffs(repo_root, commit, &known_ids);
283            bead_ids.extend(from_diff);
284        }
285        if commit.changed_beads {
286            bead_change_details = extract_bead_change_details(repo_root, commit, &known_ids);
287            bead_ids.extend(bead_change_details.keys().cloned());
288        }
289        if bead_ids.is_empty() {
290            continue;
291        }
292
293        let (method, confidence, reason) = if commit.changed_beads && commit.changed_non_beads {
294            (
295                "co_committed",
296                0.95,
297                "Commit modified beads metadata and code paths together".to_string(),
298            )
299        } else if commit.changed_beads {
300            (
301                "explicit_id",
302                0.85,
303                "Commit references bead changes explicitly".to_string(),
304            )
305        } else {
306            (
307                "explicit_id",
308                0.75,
309                "Commit message references bead ID".to_string(),
310            )
311        };
312
313        for bead_id in bead_ids {
314            let Some(history) = histories_map.get_mut(&bead_id) else {
315                continue;
316            };
317
318            let commits = history.commits.get_or_insert_with(Vec::new);
319            if commits.iter().any(|entry| entry.sha == commit.sha) {
320                continue;
321            }
322
323            commits.push(HistoryCommitCompat {
324                sha: commit.sha.clone(),
325                short_sha: commit.short_sha.clone(),
326                message: commit.message.clone(),
327                author: commit.author.clone(),
328                author_email: commit.author_email.clone(),
329                timestamp: commit.timestamp.clone(),
330                files: commit.files.clone(),
331                method: method.to_string(),
332                confidence,
333                reason: reason.clone(),
334                field_changes: bead_change_details
335                    .get(&bead_id)
336                    .map(|(changes, _)| changes.clone())
337                    .unwrap_or_default(),
338                bead_diff_lines: bead_change_details
339                    .get(&bead_id)
340                    .map(|(_, diff_lines)| diff_lines.clone())
341                    .unwrap_or_default(),
342            });
343
344            let ids = commit_index.entry(commit.sha.clone()).or_default();
345            if !ids.contains(&bead_id) {
346                ids.push(bead_id.clone());
347            }
348
349            *method_distribution.entry(method.to_string()).or_insert(0) += 1;
350        }
351    }
352
353    for ids in commit_index.values_mut() {
354        ids.sort();
355        ids.dedup();
356    }
357}
358
359/// Build a mapping from raw (unprefixed, lowercase) bead IDs to their canonical
360/// workspace-prefixed form.
361///
362/// For each issue with a known workspace prefix, strip that prefix from the
363/// issue ID to recover the raw ID. Older callers that only populated
364/// `source_repo` still get a best-effort fallback of `lowercase(source_repo)-`.
365pub fn build_workspace_id_aliases(issues: &[Issue]) -> BTreeMap<String, String> {
366    let mut aliases = BTreeMap::<String, String>::new();
367
368    for issue in issues {
369        let prefix = issue
370            .workspace_prefix
371            .as_deref()
372            .map(str::trim)
373            .filter(|prefix| !prefix.is_empty())
374            .map(std::borrow::ToOwned::to_owned)
375            .or_else(|| {
376                let repo = issue.source_repo.trim();
377                (!repo.is_empty()).then(|| format!("{repo}-"))
378            });
379
380        let Some(prefix) = prefix else {
381            continue;
382        };
383
384        let id_lower = issue.id.to_ascii_lowercase();
385        let prefix_lower = prefix.to_ascii_lowercase();
386        if let Some(raw) = id_lower.strip_prefix(&prefix_lower) {
387            if !raw.is_empty() {
388                aliases
389                    .entry(raw.to_string())
390                    .or_insert_with(|| issue.id.clone());
391            }
392        }
393    }
394
395    aliases
396}
397
398pub fn extract_ids_from_message(
399    message: &str,
400    known_ids: &BTreeMap<String, String>,
401) -> BTreeSet<String> {
402    let message = message.to_ascii_lowercase();
403    known_ids
404        .iter()
405        .filter_map(|(lower, canonical)| {
406            if contains_issue_id_token(&message, lower) {
407                Some(canonical.clone())
408            } else {
409                None
410            }
411        })
412        .collect()
413}
414
415fn contains_issue_id_token(message: &str, issue_id: &str) -> bool {
416    if issue_id.is_empty() {
417        return false;
418    }
419
420    message.match_indices(issue_id).any(|(start, _)| {
421        let left = message[..start].chars().next_back();
422        let right = message[start + issue_id.len()..].chars().next();
423
424        let left_boundary = left.is_none_or(|ch| !is_issue_id_char(ch));
425        let right_boundary = right.is_none_or(|ch| !is_issue_id_char(ch));
426
427        left_boundary && right_boundary
428    })
429}
430
431const fn is_issue_id_char(ch: char) -> bool {
432    ch.is_ascii_alphanumeric() || ch == '-' || ch == '_'
433}
434
435fn extract_ids_from_beads_diffs(
436    repo_root: &Path,
437    commit: &GitCommitRecord,
438    known_ids: &BTreeMap<String, String>,
439) -> BTreeSet<String> {
440    let mut ids = BTreeSet::<String>::new();
441
442    for file in &commit.files {
443        if !is_beads_jsonl_path(&file.path) {
444            continue;
445        }
446
447        let output = Command::new("git")
448            .arg("-C")
449            .arg(repo_root)
450            .arg("show")
451            .arg("--format=")
452            .arg("--unified=0")
453            .arg(&commit.sha)
454            .arg("--")
455            .arg(&file.path)
456            .output();
457
458        let Ok(output) = output else {
459            continue;
460        };
461        if !output.status.success() {
462            continue;
463        }
464
465        let text = String::from_utf8_lossy(&output.stdout);
466        for raw_line in text.lines() {
467            let line = raw_line.trim();
468            if !(line.starts_with('+') || line.starts_with('-'))
469                || line.starts_with("+++")
470                || line.starts_with("---")
471            {
472                continue;
473            }
474
475            let content = line.trim_start_matches(['+', '-']).trim();
476            if !(content.starts_with('{') && content.ends_with('}')) {
477                continue;
478            }
479
480            let Ok(value) = serde_json::from_str::<serde_json::Value>(content) else {
481                continue;
482            };
483            let Some(raw_id) = value.get("id").and_then(serde_json::Value::as_str) else {
484                continue;
485            };
486            if let Some(canonical) = known_ids.get(&raw_id.to_ascii_lowercase()) {
487                ids.insert(canonical.clone());
488            }
489        }
490    }
491
492    ids
493}
494
495fn extract_bead_change_details(
496    repo_root: &Path,
497    commit: &GitCommitRecord,
498    known_ids: &BTreeMap<String, String>,
499) -> BTreeMap<String, (Vec<FieldChange>, Vec<String>)> {
500    let mut before = BTreeMap::<String, serde_json::Value>::new();
501    let mut after = BTreeMap::<String, serde_json::Value>::new();
502
503    for file in &commit.files {
504        if !is_beads_jsonl_path(&file.path) {
505            continue;
506        }
507
508        let output = Command::new("git")
509            .arg("-C")
510            .arg(repo_root)
511            .arg("show")
512            .arg("--format=")
513            .arg("--unified=0")
514            .arg(&commit.sha)
515            .arg("--")
516            .arg(&file.path)
517            .output();
518
519        let Ok(output) = output else {
520            continue;
521        };
522        if !output.status.success() {
523            continue;
524        }
525
526        let text = String::from_utf8_lossy(&output.stdout);
527        for raw_line in text.lines() {
528            let line = raw_line.trim();
529            if !(line.starts_with('+') || line.starts_with('-'))
530                || line.starts_with("+++")
531                || line.starts_with("---")
532            {
533                continue;
534            }
535
536            let content = line.trim_start_matches(['+', '-']).trim();
537            if !(content.starts_with('{') && content.ends_with('}')) {
538                continue;
539            }
540
541            let Ok(value) = serde_json::from_str::<serde_json::Value>(content) else {
542                continue;
543            };
544            let Some(raw_id) = value.get("id").and_then(serde_json::Value::as_str) else {
545                continue;
546            };
547            let Some(canonical) = known_ids.get(&raw_id.to_ascii_lowercase()) else {
548                continue;
549            };
550
551            if line.starts_with('-') {
552                before.insert(canonical.clone(), value);
553            } else {
554                after.insert(canonical.clone(), value);
555            }
556        }
557    }
558
559    let mut details = BTreeMap::<String, (Vec<FieldChange>, Vec<String>)>::new();
560    let bead_ids = before
561        .keys()
562        .chain(after.keys())
563        .cloned()
564        .collect::<BTreeSet<_>>();
565    for bead_id in bead_ids {
566        let field_changes = match (before.get(&bead_id), after.get(&bead_id)) {
567            (Some(old_value), Some(new_value)) => {
568                match (
569                    serde_json::from_value::<Issue>(old_value.clone()),
570                    serde_json::from_value::<Issue>(new_value.clone()),
571                ) {
572                    (Ok(old_issue), Ok(new_issue)) => detect_changes(&old_issue, &new_issue),
573                    _ => Vec::new(),
574                }
575            }
576            _ => Vec::new(),
577        };
578
579        let mut diff_lines = field_changes
580            .iter()
581            .flat_map(|change| {
582                [
583                    format!("- {}: {}", change.field, change.old_value),
584                    format!("+ {}: {}", change.field, change.new_value),
585                ]
586            })
587            .collect::<Vec<_>>();
588
589        if diff_lines.is_empty() {
590            if let Some(old_value) = before.get(&bead_id) {
591                diff_lines.push(format!("- issue: {}", summarize_bead_snapshot(old_value)));
592            }
593            if let Some(new_value) = after.get(&bead_id) {
594                diff_lines.push(format!("+ issue: {}", summarize_bead_snapshot(new_value)));
595            }
596        }
597
598        details.insert(bead_id, (field_changes, diff_lines));
599    }
600
601    details
602}
603
604fn summarize_bead_snapshot(value: &serde_json::Value) -> String {
605    let id = value
606        .get("id")
607        .and_then(serde_json::Value::as_str)
608        .unwrap_or("?");
609    let status = value
610        .get("status")
611        .and_then(serde_json::Value::as_str)
612        .unwrap_or("?");
613    let title = value
614        .get("title")
615        .and_then(serde_json::Value::as_str)
616        .unwrap_or("");
617    if title.is_empty() {
618        format!("{id} [{status}]")
619    } else {
620        format!("{id} [{status}] {title}")
621    }
622}
623
624fn is_beads_jsonl_path(path: &str) -> bool {
625    let normalized = path.replace('\\', "/");
626    let path = Path::new(&normalized);
627    path.extension()
628        .is_some_and(|ext| ext.to_string_lossy().eq_ignore_ascii_case("jsonl"))
629        && path
630            .components()
631            .any(|component| component.as_os_str() == ".beads")
632}
633
634fn is_closed_like_status(status: &str) -> bool {
635    matches!(status, "closed" | "tombstone")
636}
637
638pub fn finalize_history_entries(histories_map: &mut BTreeMap<String, HistoryBeadCompat>) {
639    for history in histories_map.values_mut() {
640        if let Some(commits) = history.commits.as_mut() {
641            commits.sort_by(|left, right| {
642                compare_timestamps(&left.timestamp, &right.timestamp)
643                    .then_with(|| left.sha.cmp(&right.sha))
644            });
645        }
646
647        if let Some(commits) = history.commits.as_ref().filter(|c| !c.is_empty()) {
648            let mut events = commits
649                .iter()
650                .enumerate()
651                .map(|(index, commit)| HistoryEventCompat {
652                    bead_id: history.bead_id.clone(),
653                    event_type: infer_event_type_from_commit(index, &commit.message),
654                    timestamp: commit.timestamp.clone(),
655                    commit_sha: commit.sha.clone(),
656                    commit_message: commit.message.clone(),
657                    author: commit.author.clone(),
658                    author_email: commit.author_email.clone(),
659                })
660                .collect::<Vec<_>>();
661
662            if !events.iter().any(|entry| entry.event_type == "created")
663                && let Some(first) = commits.first()
664            {
665                events.insert(
666                    0,
667                    HistoryEventCompat {
668                        bead_id: history.bead_id.clone(),
669                        event_type: "created".to_string(),
670                        timestamp: first.timestamp.clone(),
671                        commit_sha: first.sha.clone(),
672                        commit_message: first.message.clone(),
673                        author: first.author.clone(),
674                        author_email: first.author_email.clone(),
675                    },
676                );
677            }
678
679            if is_closed_like_status(&history.status.to_ascii_lowercase())
680                && !events.iter().any(|entry| entry.event_type == "closed")
681                && let Some(last) = commits.last()
682            {
683                events.push(HistoryEventCompat {
684                    bead_id: history.bead_id.clone(),
685                    event_type: "closed".to_string(),
686                    timestamp: last.timestamp.clone(),
687                    commit_sha: last.sha.clone(),
688                    commit_message: last.message.clone(),
689                    author: last.author.clone(),
690                    author_email: last.author_email.clone(),
691                });
692            }
693
694            events.sort_by(|left, right| {
695                compare_timestamps(&left.timestamp, &right.timestamp)
696                    .then_with(|| left.event_type.cmp(&right.event_type))
697            });
698            history.events = events;
699        }
700
701        history.milestones = HistoryMilestonesCompat {
702            created: history
703                .events
704                .iter()
705                .find(|event| event.event_type == "created")
706                .cloned(),
707            claimed: history
708                .events
709                .iter()
710                .find(|event| event.event_type == "claimed")
711                .cloned(),
712            closed: history
713                .events
714                .iter()
715                .find(|event| event.event_type == "closed")
716                .cloned(),
717            reopened: history
718                .events
719                .iter()
720                .rev()
721                .find(|event| event.event_type == "reopened")
722                .cloned(),
723        };
724
725        let create_to_close = duration_between(
726            history
727                .milestones
728                .created
729                .as_ref()
730                .map(|event| event.timestamp.as_str()),
731            history
732                .milestones
733                .closed
734                .as_ref()
735                .map(|event| event.timestamp.as_str()),
736        );
737        let claim_to_close = duration_between(
738            history
739                .milestones
740                .claimed
741                .as_ref()
742                .map(|event| event.timestamp.as_str()),
743            history
744                .milestones
745                .closed
746                .as_ref()
747                .map(|event| event.timestamp.as_str()),
748        );
749        let create_to_claim = duration_between(
750            history
751                .milestones
752                .created
753                .as_ref()
754                .map(|event| event.timestamp.as_str()),
755            history
756                .milestones
757                .claimed
758                .as_ref()
759                .map(|event| event.timestamp.as_str()),
760        );
761
762        if create_to_close.is_some() || claim_to_close.is_some() || create_to_claim.is_some() {
763            history.cycle_time = Some(HistoryCycleCompat {
764                claim_to_close: claim_to_close.map(format_duration_compact),
765                create_to_close: create_to_close.map(format_duration_compact),
766                create_to_claim: create_to_claim.map(format_duration_compact),
767            });
768        }
769
770        history.last_author = history
771            .commits
772            .as_ref()
773            .and_then(|c| c.last())
774            .map_or_else(String::new, |commit| commit.author.clone());
775
776        // Normalize: empty Vec -> None (serializes as null, matching legacy)
777        if history.commits.as_ref().is_some_and(Vec::is_empty) {
778            history.commits = None;
779        }
780    }
781}
782
783fn infer_event_type_from_commit(index: usize, message: &str) -> String {
784    let lower = message.to_ascii_lowercase();
785    if has_word_token(&lower, "reopen") || has_word_token(&lower, "reopened") {
786        "reopened".to_string()
787    } else if has_word_token(&lower, "close") || has_word_token(&lower, "closed") {
788        "closed".to_string()
789    } else if has_word_token(&lower, "claim")
790        || has_word_token(&lower, "claimed")
791        || lower.contains("in_progress")
792        || has_word_sequence(&lower, "in progress")
793    {
794        "claimed".to_string()
795    } else if index == 0 {
796        "created".to_string()
797    } else {
798        "modified".to_string()
799    }
800}
801
802/// Check if `text` contains `token` delimited by non-alphanumeric boundaries.
803/// This avoids false positives like "disclose" matching "close" or
804/// "closedown" matching "closed".
805fn has_word_token(text: &str, token: &str) -> bool {
806    text.match_indices(token).any(|(start, _)| {
807        let end = start + token.len();
808        let left = start == 0 || !text.as_bytes()[start - 1].is_ascii_alphanumeric();
809        let right = end == text.len() || !text.as_bytes()[end].is_ascii_alphanumeric();
810        left && right
811    })
812}
813
814/// Check if `text` contains the exact two-word sequence.
815fn has_word_sequence(text: &str, sequence: &str) -> bool {
816    text.contains(sequence)
817}
818
819fn compare_timestamps(left: &str, right: &str) -> std::cmp::Ordering {
820    match (parse_rfc3339_utc(left), parse_rfc3339_utc(right)) {
821        (Some(left), Some(right)) => left.cmp(&right),
822        _ => left.cmp(right),
823    }
824}
825
826fn parse_rfc3339_utc(value: &str) -> Option<DateTime<Utc>> {
827    DateTime::parse_from_rfc3339(value)
828        .ok()
829        .map(|value| value.with_timezone(&Utc))
830}
831
832fn duration_between(start: Option<&str>, end: Option<&str>) -> Option<chrono::Duration> {
833    let start = start.and_then(parse_rfc3339_utc)?;
834    let end = end.and_then(parse_rfc3339_utc)?;
835    let duration = end - start;
836    if duration.num_seconds() >= 0 {
837        Some(duration)
838    } else {
839        None
840    }
841}
842
843fn format_duration_compact(duration: chrono::Duration) -> String {
844    let days = duration.num_days();
845    let hours = duration.num_hours() - days * 24;
846    let minutes = duration.num_minutes() - duration.num_hours() * 60;
847    format!("{days}d {hours}h {minutes}m")
848}
849
850#[cfg(test)]
851#[allow(clippy::items_after_test_module)]
852mod tests {
853    use super::*;
854
855    #[test]
856    fn has_word_token_rejects_substring_match() {
857        // "disclose" should NOT match "close"
858        assert!(!has_word_token("disclose the issue", "close"));
859        // "exclaim" should NOT match "claim"
860        assert!(!has_word_token("exclaim loudly", "claim"));
861        // "reopen" embedded in "unreopened" should not match at word boundary
862        assert!(!has_word_token("unreopened", "reopen"));
863        // Suffix continuations should not match either
864        assert!(!has_word_token("closedown the task", "closed"));
865        assert!(!has_word_token("claimer picked it up", "claim"));
866    }
867
868    #[test]
869    fn has_word_token_accepts_word_boundary() {
870        assert!(has_word_token("close the issue", "close"));
871        assert!(has_word_token("closed bd-123", "closed"));
872        assert!(has_word_token("claim this task", "claim"));
873        assert!(has_word_token("reopen bd-456", "reopen"));
874        assert!(has_word_token("reopened bd-456", "reopened"));
875        // At start of text
876        assert!(has_word_token("close", "close"));
877        // After punctuation
878        assert!(has_word_token("[close] bd-789", "close"));
879    }
880
881    #[test]
882    fn infer_event_type_close_vs_disclose() {
883        assert_eq!(infer_event_type_from_commit(1, "close bd-123"), "closed");
884        assert_eq!(infer_event_type_from_commit(1, "Closed bd-123"), "closed");
885        // "disclose" should NOT trigger "closed"
886        assert_eq!(
887            infer_event_type_from_commit(1, "disclose internal details"),
888            "modified"
889        );
890        assert_eq!(
891            infer_event_type_from_commit(1, "closedown remaining tasks"),
892            "modified"
893        );
894    }
895
896    #[test]
897    fn infer_event_type_claim_vs_exclaim() {
898        assert_eq!(infer_event_type_from_commit(1, "claim bd-abc"), "claimed");
899        assert_eq!(
900            infer_event_type_from_commit(1, "set status to in_progress"),
901            "claimed"
902        );
903        assert_eq!(
904            infer_event_type_from_commit(1, "mark in progress"),
905            "claimed"
906        );
907        // "exclaim" should NOT trigger "claimed"
908        assert_eq!(
909            infer_event_type_from_commit(1, "exclaim about progress"),
910            "modified"
911        );
912        assert_eq!(
913            infer_event_type_from_commit(1, "claimer rotation updated"),
914            "modified"
915        );
916    }
917
918    #[test]
919    fn infer_event_type_reopen_vs_embedded() {
920        assert_eq!(infer_event_type_from_commit(1, "reopen bd-xyz"), "reopened");
921        assert_eq!(
922            infer_event_type_from_commit(1, "Reopened the issue"),
923            "reopened"
924        );
925    }
926
927    #[test]
928    fn infer_event_type_index_zero_fallback() {
929        // Index 0 with no keyword match => "created"
930        assert_eq!(infer_event_type_from_commit(0, "initial setup"), "created");
931        // Index >0 with no keyword match => "modified"
932        assert_eq!(infer_event_type_from_commit(1, "update readme"), "modified");
933    }
934
935    #[test]
936    fn none_commits_serialize_as_null() {
937        let history = HistoryBeadCompat {
938            bead_id: "bd-test".to_string(),
939            title: "Test".to_string(),
940            status: "open".to_string(),
941            events: vec![],
942            milestones: HistoryMilestonesCompat::default(),
943            commits: None,
944            cycle_time: None,
945            last_author: String::new(),
946        };
947        let json = serde_json::to_value(&history).unwrap();
948        // commits and cycle_time are always present (as null) to match
949        // legacy Go output shape.
950        assert!(
951            json.get("commits").is_some_and(serde_json::Value::is_null),
952            "None commits should serialize as null"
953        );
954        assert!(
955            json.get("cycle_time")
956                .is_some_and(serde_json::Value::is_null),
957            "None cycle_time should serialize as null"
958        );
959    }
960
961    #[test]
962    fn some_commits_serialize_as_array() {
963        let history = HistoryBeadCompat {
964            bead_id: "bd-test".to_string(),
965            title: "Test".to_string(),
966            status: "open".to_string(),
967            events: vec![],
968            milestones: HistoryMilestonesCompat::default(),
969            commits: Some(vec![HistoryCommitCompat {
970                sha: "abc123".to_string(),
971                short_sha: "abc".to_string(),
972                message: "test commit".to_string(),
973                author: "tester".to_string(),
974                author_email: "test@example.com".to_string(),
975                timestamp: "2024-01-01T00:00:00Z".to_string(),
976                files: vec![],
977                method: "log".to_string(),
978                confidence: 1.0,
979                reason: "test".to_string(),
980                field_changes: vec![],
981                bead_diff_lines: vec![],
982            }]),
983            cycle_time: None,
984            last_author: String::new(),
985        };
986        let json = serde_json::to_value(&history).unwrap();
987        assert!(
988            json["commits"].is_array(),
989            "Some commits should serialize as array"
990        );
991        assert_eq!(json["commits"].as_array().unwrap().len(), 1);
992    }
993
994    #[test]
995    fn milestones_omit_null_fields() {
996        let milestones = HistoryMilestonesCompat {
997            created: Some(HistoryEventCompat {
998                bead_id: "bd-test".to_string(),
999                event_type: "created".to_string(),
1000                timestamp: "2024-01-01T00:00:00Z".to_string(),
1001                commit_sha: "abc".to_string(),
1002                commit_message: "init".to_string(),
1003                author: "tester".to_string(),
1004                author_email: "test@example.com".to_string(),
1005            }),
1006            claimed: None,
1007            closed: None,
1008            reopened: None,
1009        };
1010        let json = serde_json::to_value(&milestones).unwrap();
1011        let obj = json.as_object().unwrap();
1012        assert!(obj.contains_key("created"), "created should be present");
1013        assert!(
1014            !obj.contains_key("claimed"),
1015            "None claimed should be omitted"
1016        );
1017        assert!(!obj.contains_key("closed"), "None closed should be omitted");
1018        assert!(
1019            !obj.contains_key("reopened"),
1020            "None reopened should be omitted"
1021        );
1022    }
1023
1024    #[test]
1025    fn cycle_time_omits_null_fields() {
1026        let cycle = HistoryCycleCompat {
1027            create_to_close: Some("2d 3h 0m".to_string()),
1028            claim_to_close: None,
1029            create_to_claim: None,
1030        };
1031        let json = serde_json::to_value(&cycle).unwrap();
1032        let obj = json.as_object().unwrap();
1033        assert!(obj.contains_key("create_to_close"));
1034        assert!(
1035            !obj.contains_key("claim_to_close"),
1036            "None should be omitted"
1037        );
1038        assert!(
1039            !obj.contains_key("create_to_claim"),
1040            "None should be omitted"
1041        );
1042    }
1043
1044    #[test]
1045    fn history_stats_omit_absent_avg_cycle_time_days() {
1046        let stats = HistoryStatsCompat {
1047            total_beads: 2,
1048            beads_with_commits: 0,
1049            total_commits: 0,
1050            unique_authors: 0,
1051            avg_commits_per_bead: 0.0,
1052            avg_cycle_time_days: None,
1053            method_distribution: BTreeMap::new(),
1054        };
1055
1056        let json = serde_json::to_value(&stats).unwrap();
1057        let obj = json.as_object().unwrap();
1058        assert!(
1059            !obj.contains_key("avg_cycle_time_days"),
1060            "None avg_cycle_time_days should be omitted"
1061        );
1062    }
1063
1064    #[test]
1065    fn is_beads_jsonl_path_accepts_nested_workspace_beads_files() {
1066        assert!(is_beads_jsonl_path("services/api/.beads/issues.jsonl"));
1067        assert!(is_beads_jsonl_path("apps\\web\\.beads\\beads.jsonl"));
1068        assert!(!is_beads_jsonl_path("services/api/beads/issues.jsonl"));
1069        assert!(!is_beads_jsonl_path("services/api/.beads/issues.json"));
1070    }
1071
1072    #[test]
1073    fn build_workspace_id_aliases_maps_raw_to_prefixed() {
1074        let issues = vec![
1075            Issue {
1076                id: "api-bd-1234".to_string(),
1077                source_repo: "api".to_string(),
1078                ..Issue::default()
1079            },
1080            Issue {
1081                id: "web-bd-5678".to_string(),
1082                source_repo: "web".to_string(),
1083                ..Issue::default()
1084            },
1085        ];
1086
1087        let aliases = build_workspace_id_aliases(&issues);
1088        assert_eq!(aliases.get("bd-1234").unwrap(), "api-bd-1234");
1089        assert_eq!(aliases.get("bd-5678").unwrap(), "web-bd-5678");
1090    }
1091
1092    #[test]
1093    fn build_workspace_id_aliases_skips_non_workspace_issues() {
1094        let issues = vec![Issue {
1095            id: "bd-abcd".to_string(),
1096            source_repo: String::new(),
1097            ..Issue::default()
1098        }];
1099
1100        let aliases = build_workspace_id_aliases(&issues);
1101        assert!(aliases.is_empty());
1102    }
1103
1104    #[test]
1105    fn build_workspace_id_aliases_handles_case_insensitive_prefix() {
1106        let issues = vec![Issue {
1107            id: "API-bd-1234".to_string(),
1108            source_repo: "API".to_string(),
1109            ..Issue::default()
1110        }];
1111
1112        let aliases = build_workspace_id_aliases(&issues);
1113        assert_eq!(aliases.get("bd-1234").unwrap(), "API-bd-1234");
1114    }
1115
1116    #[test]
1117    fn build_workspace_id_aliases_first_wins_on_collision() {
1118        let issues = vec![
1119            Issue {
1120                id: "api-bd-same".to_string(),
1121                source_repo: "api".to_string(),
1122                ..Issue::default()
1123            },
1124            Issue {
1125                id: "web-bd-same".to_string(),
1126                source_repo: "web".to_string(),
1127                ..Issue::default()
1128            },
1129        ];
1130
1131        let aliases = build_workspace_id_aliases(&issues);
1132        // First insertion wins (BTreeMap::entry + or_insert)
1133        assert_eq!(aliases.get("bd-same").unwrap(), "api-bd-same");
1134    }
1135
1136    #[test]
1137    fn build_workspace_id_aliases_uses_workspace_prefix_when_repo_name_differs() {
1138        let issues = vec![Issue {
1139            id: "api-bd-1234".to_string(),
1140            source_repo: "payments-service".to_string(),
1141            workspace_prefix: Some("api-".to_string()),
1142            ..Issue::default()
1143        }];
1144
1145        let aliases = build_workspace_id_aliases(&issues);
1146        assert_eq!(aliases.get("bd-1234").unwrap(), "api-bd-1234");
1147    }
1148
1149    #[test]
1150    fn correlate_with_aliases_matches_raw_ids_from_commit_messages() {
1151        let mut histories = BTreeMap::new();
1152        histories.insert(
1153            "api-bd-1234".to_string(),
1154            HistoryBeadCompat {
1155                bead_id: "api-bd-1234".to_string(),
1156                title: "Test issue".to_string(),
1157                status: "open".to_string(),
1158                events: Vec::new(),
1159                milestones: HistoryMilestonesCompat::default(),
1160                commits: None,
1161                cycle_time: None,
1162                last_author: String::new(),
1163            },
1164        );
1165
1166        let commits = vec![GitCommitRecord {
1167            sha: "abc123".to_string(),
1168            short_sha: "abc".to_string(),
1169            timestamp: "2024-01-01T00:00:00Z".to_string(),
1170            author: "dev".to_string(),
1171            author_email: "dev@example.com".to_string(),
1172            message: "fix: resolve bd-1234 bug".to_string(),
1173            files: Vec::new(),
1174            changed_beads: false,
1175            changed_non_beads: true,
1176        }];
1177
1178        let aliases: BTreeMap<String, String> =
1179            std::iter::once(("bd-1234".to_string(), "api-bd-1234".to_string())).collect();
1180
1181        let mut commit_index = BTreeMap::new();
1182        let mut method_dist = BTreeMap::new();
1183
1184        // Without aliases, the raw ID "bd-1234" won't match "api-bd-1234"
1185        correlate_histories_with_git_aliases(
1186            Path::new("."),
1187            &commits,
1188            &mut histories,
1189            &mut commit_index,
1190            &mut method_dist,
1191            &aliases,
1192        );
1193
1194        let history = histories.get("api-bd-1234").unwrap();
1195        assert!(
1196            history.commits.as_ref().is_some_and(|c| !c.is_empty()),
1197            "raw ID bd-1234 in commit message should match prefixed api-bd-1234 via alias"
1198        );
1199    }
1200}
1201
1202pub fn compute_history_stats(
1203    histories_map: &BTreeMap<String, HistoryBeadCompat>,
1204    commit_index: &BTreeMap<String, Vec<String>>,
1205    method_distribution: BTreeMap<String, usize>,
1206) -> HistoryStatsCompat {
1207    let total_beads = histories_map.len();
1208    let beads_with_commits = histories_map
1209        .values()
1210        .filter(|history| history.commits.as_ref().is_some_and(|c| !c.is_empty()))
1211        .count();
1212    let total_commits = commit_index.len();
1213
1214    let mut authors = BTreeSet::<String>::new();
1215    let mut claim_to_close_days = Vec::<f64>::new();
1216
1217    for history in histories_map.values() {
1218        for commit in history.commits.as_deref().unwrap_or_default() {
1219            if !commit.author.is_empty() {
1220                authors.insert(commit.author.clone());
1221            }
1222        }
1223        for event in &history.events {
1224            if !event.author.is_empty() {
1225                authors.insert(event.author.clone());
1226            }
1227        }
1228
1229        if let Some(duration) = duration_between(
1230            history
1231                .milestones
1232                .claimed
1233                .as_ref()
1234                .map(|event| event.timestamp.as_str()),
1235            history
1236                .milestones
1237                .closed
1238                .as_ref()
1239                .map(|event| event.timestamp.as_str()),
1240        ) {
1241            let seconds_i32 = i32::try_from(duration.num_seconds()).unwrap_or(i32::MAX);
1242            const SECS_PER_DAY: f64 = 86_400.0;
1243            claim_to_close_days.push(f64::from(seconds_i32) / SECS_PER_DAY);
1244        }
1245    }
1246
1247    let avg_commits_per_bead = if beads_with_commits == 0 {
1248        0.0
1249    } else {
1250        let total_commits_u32 = u32::try_from(total_commits).unwrap_or(u32::MAX);
1251        let beads_with_commits_u32 = u32::try_from(beads_with_commits).unwrap_or(u32::MAX);
1252        f64::from(total_commits_u32) / f64::from(beads_with_commits_u32)
1253    };
1254
1255    let avg_cycle_time_days = if claim_to_close_days.is_empty() {
1256        None
1257    } else {
1258        let count_u32 = u32::try_from(claim_to_close_days.len()).unwrap_or(u32::MAX);
1259        Some(claim_to_close_days.iter().sum::<f64>() / f64::from(count_u32))
1260    };
1261
1262    HistoryStatsCompat {
1263        total_beads,
1264        beads_with_commits,
1265        total_commits,
1266        unique_authors: authors.len(),
1267        avg_commits_per_bead,
1268        avg_cycle_time_days,
1269        method_distribution,
1270    }
1271}