Skip to main content

gitgraph_core/
log_parser.rs

1use crate::error::{GitLgError, Result};
2use crate::models::{GitRef, GitRefKind, GraphEdge, GraphRow};
3
4pub const FIELD_SEP: char = '\u{001f}';
5pub const RECORD_SEP: char = '\u{001e}';
6
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub struct RawCommit {
9    pub hash: String,
10    pub short_hash: String,
11    pub parents: Vec<String>,
12    pub author_name: String,
13    pub author_email: String,
14    pub authored_unix: i64,
15    pub committed_unix: i64,
16    pub refs: Vec<GitRef>,
17    pub subject: String,
18    pub body: String,
19}
20
21pub fn parse_git_log_records(stdout: &str) -> Result<Vec<RawCommit>> {
22    let mut commits = Vec::new();
23    for raw_record in stdout.split(RECORD_SEP) {
24        let record = raw_record.trim_matches(['\r', '\n', ' ']);
25        if record.is_empty() {
26            continue;
27        }
28        let fields: Vec<&str> = record.splitn(10, FIELD_SEP).collect();
29        if fields.len() != 10 {
30            return Err(GitLgError::Parse(format!(
31                "expected 10 fields, got {} in record {:?}",
32                fields.len(),
33                record
34            )));
35        }
36        let authored_unix = fields[5].parse::<i64>().map_err(|e| {
37            GitLgError::Parse(format!(
38                "invalid authored unix timestamp {:?}: {}",
39                fields[5], e
40            ))
41        })?;
42        let committed_unix = fields[6].parse::<i64>().map_err(|e| {
43            GitLgError::Parse(format!(
44                "invalid committed unix timestamp {:?}: {}",
45                fields[6], e
46            ))
47        })?;
48        let parents = if fields[2].trim().is_empty() {
49            Vec::new()
50        } else {
51            fields[2]
52                .split_whitespace()
53                .map(ToString::to_string)
54                .collect::<Vec<_>>()
55        };
56
57        commits.push(RawCommit {
58            hash: fields[0].to_string(),
59            short_hash: fields[1].to_string(),
60            parents,
61            author_name: fields[3].to_string(),
62            author_email: fields[4].to_string(),
63            authored_unix,
64            committed_unix,
65            refs: parse_refs(fields[7]),
66            subject: fields[8].to_string(),
67            body: fields[9].to_string(),
68        });
69    }
70    Ok(commits)
71}
72
73pub fn build_graph_rows(commits: Vec<RawCommit>) -> Vec<GraphRow> {
74    let mut active_lanes: Vec<Option<String>> = Vec::new();
75    let mut rows = Vec::with_capacity(commits.len());
76
77    for commit in commits {
78        let lane = find_or_allocate_lane(&commit.hash, &mut active_lanes);
79
80        // A hash can appear in multiple lanes due to merge ancestry.
81        // Collapse duplicates after choosing primary lane.
82        for (idx, lane_hash) in active_lanes.iter_mut().enumerate() {
83            if idx != lane && lane_hash.as_deref() == Some(commit.hash.as_str()) {
84                *lane_hash = None;
85            }
86        }
87
88        let mut edges = Vec::new();
89        if let Some(first_parent) = commit.parents.first() {
90            active_lanes[lane] = Some(first_parent.clone());
91            edges.push(GraphEdge {
92                to_lane: lane,
93                parent_hash: first_parent.clone(),
94            });
95        } else {
96            active_lanes[lane] = None;
97        }
98
99        for parent in commit.parents.iter().skip(1) {
100            let target_lane = find_or_allocate_lane(parent, &mut active_lanes);
101            edges.push(GraphEdge {
102                to_lane: target_lane,
103                parent_hash: parent.clone(),
104            });
105        }
106
107        while active_lanes.last().is_some_and(Option::is_none) {
108            active_lanes.pop();
109        }
110
111        rows.push(GraphRow {
112            hash: commit.hash,
113            short_hash: commit.short_hash,
114            parents: commit.parents,
115            author_name: commit.author_name,
116            author_email: commit.author_email,
117            authored_unix: commit.authored_unix,
118            committed_unix: commit.committed_unix,
119            subject: commit.subject,
120            body: commit.body,
121            refs: commit.refs,
122            lane,
123            active_lane_count: active_lanes.len(),
124            edges,
125        });
126    }
127    rows
128}
129
130fn find_or_allocate_lane(hash: &str, active_lanes: &mut Vec<Option<String>>) -> usize {
131    if let Some((idx, _)) = active_lanes
132        .iter()
133        .enumerate()
134        .find(|(_, slot)| slot.as_deref() == Some(hash))
135    {
136        return idx;
137    }
138    if let Some((idx, slot)) = active_lanes
139        .iter_mut()
140        .enumerate()
141        .find(|(_, slot)| slot.is_none())
142    {
143        *slot = Some(hash.to_string());
144        return idx;
145    }
146    active_lanes.push(Some(hash.to_string()));
147    active_lanes.len() - 1
148}
149
150fn parse_refs(decorations: &str) -> Vec<GitRef> {
151    let cleaned = decorations.trim();
152    if cleaned.is_empty() {
153        return Vec::new();
154    }
155
156    cleaned
157        .split(',')
158        .map(str::trim)
159        .filter(|token| !token.is_empty())
160        .map(parse_ref_token)
161        .collect()
162}
163
164fn parse_ref_token(token: &str) -> GitRef {
165    if let Some((left, right)) = token.split_once(" -> ") {
166        let left_name = simplify_ref_name(left.trim());
167        let target = simplify_ref_name(right.trim());
168        let kind = if left.trim() == "HEAD" {
169            GitRefKind::Head
170        } else {
171            classify_ref(left.trim())
172        };
173        return GitRef {
174            kind,
175            name: left_name,
176            target: Some(target),
177        };
178    }
179
180    if let Some(rest) = token.strip_prefix("tag: ") {
181        return GitRef {
182            kind: GitRefKind::Tag,
183            name: simplify_ref_name(rest.trim()),
184            target: None,
185        };
186    }
187
188    if token == "HEAD" {
189        return GitRef {
190            kind: GitRefKind::Head,
191            name: "HEAD".to_string(),
192            target: None,
193        };
194    }
195
196    GitRef {
197        kind: classify_ref(token),
198        name: simplify_ref_name(token),
199        target: None,
200    }
201}
202
203fn classify_ref(raw: &str) -> GitRefKind {
204    if raw.starts_with("refs/heads/") {
205        return GitRefKind::LocalBranch;
206    }
207    if raw.starts_with("refs/remotes/") {
208        return GitRefKind::RemoteBranch;
209    }
210    if raw.starts_with("refs/tags/") {
211        return GitRefKind::Tag;
212    }
213    if raw == "refs/stash" {
214        return GitRefKind::Stash;
215    }
216    GitRefKind::Other
217}
218
219fn simplify_ref_name(raw: &str) -> String {
220    raw.strip_prefix("refs/heads/")
221        .or_else(|| raw.strip_prefix("refs/remotes/"))
222        .or_else(|| raw.strip_prefix("refs/tags/"))
223        .unwrap_or(raw)
224        .to_string()
225}
226
227#[cfg(test)]
228mod tests {
229    use super::{
230        FIELD_SEP, GitRefKind, RECORD_SEP, build_graph_rows, parse_git_log_records, parse_ref_token,
231    };
232
233    #[test]
234    fn parses_one_record() {
235        let rec = format!(
236            "aaaaaaaa{}aaaaaaa{}bbbbbbbb{}Alice{}alice@example.com{}1700000000{}1700000001{}HEAD -> refs/heads/main, refs/remotes/origin/main, tag: refs/tags/v1.0{}Subject{}Body{}",
237            FIELD_SEP,
238            FIELD_SEP,
239            FIELD_SEP,
240            FIELD_SEP,
241            FIELD_SEP,
242            FIELD_SEP,
243            FIELD_SEP,
244            FIELD_SEP,
245            FIELD_SEP,
246            RECORD_SEP
247        );
248        let parsed = parse_git_log_records(&rec).expect("parse records");
249        assert_eq!(parsed.len(), 1);
250        assert_eq!(parsed[0].hash, "aaaaaaaa");
251        assert_eq!(parsed[0].parents, vec!["bbbbbbbb"]);
252        assert_eq!(parsed[0].refs.len(), 3);
253    }
254
255    #[test]
256    fn parses_ref_types() {
257        let head = parse_ref_token("HEAD -> refs/heads/main");
258        assert_eq!(head.kind, GitRefKind::Head);
259        assert_eq!(head.target.as_deref(), Some("main"));
260
261        let remote = parse_ref_token("refs/remotes/origin/main");
262        assert_eq!(remote.kind, GitRefKind::RemoteBranch);
263        assert_eq!(remote.name, "origin/main");
264
265        let tag = parse_ref_token("tag: refs/tags/v1.0.0");
266        assert_eq!(tag.kind, GitRefKind::Tag);
267        assert_eq!(tag.name, "v1.0.0");
268    }
269
270    #[test]
271    fn assigns_lanes_for_merge() {
272        let rec = format!(
273            "c3{f}c3{f}p1 p2{f}A{f}a@e{f}10{f}10{f}{f}merge{f}{r}p1{f}p1{f}p0{f}A{f}a@e{f}9{f}9{f}{f}parent1{f}{r}p2{f}p2{f}p0{f}A{f}a@e{f}8{f}8{f}{f}parent2{f}{r}p0{f}p0{f}{f}A{f}a@e{f}7{f}7{f}{f}root{f}{r}",
274            f = FIELD_SEP,
275            r = RECORD_SEP
276        );
277        let raw = parse_git_log_records(&rec).expect("valid records");
278        let rows = build_graph_rows(raw);
279        assert_eq!(rows.len(), 4);
280        assert_eq!(rows[0].hash, "c3");
281        assert_eq!(rows[0].lane, 0);
282        assert_eq!(rows[0].edges.len(), 2);
283        assert!(rows[0].edges.iter().any(|e| e.to_lane == 1));
284    }
285}