Skip to main content

git_forensic/
attribution.rs

1//! Attribution timeline — who did what, when, from which timezone.
2//!
3//! Every commit carries two identities: the **author** (who wrote the change)
4//! and the **committer** (who applied it). This module flattens a set of commits
5//! into a single time-ordered stream of identity events — the who-did-what-when
6//! backbone an examiner builds a narrative on. The timezone offset is retained
7//! because it can corroborate or contradict a claimed location.
8
9use git_core::{CommitObject, GitHash, GitRepo, Result};
10
11/// Which identity an attribution event came from.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13#[repr(u8)]
14pub enum Role {
15    /// The author — who wrote the change.
16    Author = 0,
17    /// The committer — who applied it to the repository.
18    Committer = 1,
19}
20
21/// One identity event on the attribution timeline.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct AttributionEvent {
24    /// The commit this identity is attached to.
25    pub commit: GitHash,
26    /// Author or committer.
27    pub role: Role,
28    /// Identity name.
29    pub name: String,
30    /// Identity email.
31    pub email: String,
32    /// Event time (epoch seconds).
33    pub timestamp: i64,
34    /// Timezone offset of the recorded time, in seconds east of UTC.
35    pub tz_offset_secs: i32,
36}
37
38/// Build a time-ordered attribution timeline from a set of commits.
39///
40/// Each commit contributes two events (author, then committer). Events are
41/// sorted by timestamp ascending; ties keep author before committer.
42#[must_use]
43pub fn attribution_timeline(commits: &[CommitObject]) -> Vec<AttributionEvent> {
44    let mut events = Vec::with_capacity(commits.len() * 2);
45    for c in commits {
46        for (role, sig) in [(Role::Author, &c.author), (Role::Committer, &c.committer)] {
47            events.push(AttributionEvent {
48                commit: c.hash,
49                role,
50                name: sig.name.clone(),
51                email: sig.email.clone(),
52                timestamp: sig.timestamp,
53                tz_offset_secs: sig.tz_offset_secs,
54            });
55        }
56    }
57    events.sort_by(|a, b| a.timestamp.cmp(&b.timestamp).then((a.role as u8).cmp(&(b.role as u8))));
58    events
59}
60
61/// Distinct `(name, email)` identities appearing across `commits`, in first-seen
62/// order. A surprising count or unexpected identity is a lead, not a verdict.
63#[must_use]
64pub fn distinct_identities(commits: &[CommitObject]) -> Vec<(String, String)> {
65    let mut seen = Vec::new();
66    for c in commits {
67        for sig in [&c.author, &c.committer] {
68            let id = (sig.name.clone(), sig.email.clone());
69            if !seen.contains(&id) {
70                seen.push(id);
71            }
72        }
73    }
74    seen
75}
76
77/// Walk every commit reachable from `from` and build its attribution timeline.
78///
79/// # Errors
80/// Propagates any [`git_core`] read error encountered while walking.
81pub fn attribution_repo(repo: &GitRepo, from: GitHash) -> Result<Vec<AttributionEvent>> {
82    let mut commits = Vec::new();
83    for commit in repo.walk_commits(from) {
84        commits.push(commit?);
85    }
86    Ok(attribution_timeline(&commits))
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92    use git_core::Signature;
93
94    fn sig(name: &str, ts: i64, tz: i32) -> Signature {
95        Signature {
96            name: name.into(),
97            email: format!("{name}@x"),
98            timestamp: ts,
99            tz_offset_secs: tz,
100        }
101    }
102
103    fn commit(hex: &str, author: Signature, committer: Signature) -> CommitObject {
104        CommitObject {
105            hash: GitHash::from_hex(hex).unwrap(),
106            tree: GitHash::from_hex("89abcdef0123456789abcdef0123456789abcdef").unwrap(),
107            parents: vec![],
108            author,
109            committer,
110            message: "m".into(),
111            is_signed: false,
112        }
113    }
114
115    #[test]
116    fn timeline_is_time_ordered_author_before_committer() {
117        let c1 = commit(
118            "0123456789abcdef0123456789abcdef01234567",
119            sig("alice", 1_000, 0),
120            sig("bob", 2_000, 3600),
121        );
122        let c2 = commit(
123            "1123456789abcdef0123456789abcdef01234567",
124            sig("carol", 1_500, -7200),
125            sig("carol", 1_500, -7200),
126        );
127        let tl = attribution_timeline(&[c1, c2]);
128        // 2 commits → 4 events, sorted by time: alice@1000, carol-author@1500,
129        // carol-committer@1500, bob@2000.
130        let times: Vec<i64> = tl.iter().map(|e| e.timestamp).collect();
131        assert_eq!(times, vec![1_000, 1_500, 1_500, 2_000]);
132        assert_eq!(tl[0].name, "alice");
133        assert_eq!(tl[0].role, Role::Author);
134        assert_eq!(tl[1].role, Role::Author); // carol author before committer at the tie
135        assert_eq!(tl[2].role, Role::Committer);
136        assert_eq!(tl[3].name, "bob");
137        assert_eq!(tl[3].tz_offset_secs, 3600);
138    }
139
140    #[test]
141    fn distinct_identities_dedup_in_first_seen_order() {
142        let c = commit(
143            "0123456789abcdef0123456789abcdef01234567",
144            sig("alice", 1, 0),
145            sig("bob", 2, 0),
146        );
147        let ids = distinct_identities(std::slice::from_ref(&c));
148        assert_eq!(
149            ids,
150            vec![("alice".into(), "alice@x".into()), ("bob".into(), "bob@x".into())]
151        );
152        // committer == author → a single identity, no duplicate.
153        let solo = commit(
154            "1123456789abcdef0123456789abcdef01234567",
155            sig("alice", 1, 0),
156            sig("alice", 2, 0),
157        );
158        assert_eq!(distinct_identities(&[solo]).len(), 1);
159    }
160}