git_blame_parser/
lib.rs

1//!
2//! git-blame-parser
3//!
4
5/// The porcelain format parser error
6#[derive(Debug, Clone)]
7pub struct ParseError(String);
8
9impl std::fmt::Display for ParseError {
10    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
11        write!(f, "Error parsing git-blame: {}", self.0)
12    }
13}
14
15impl std::error::Error for ParseError {}
16
17/// The blame information
18///
19/// This struct stores the `git blame` output in PORCELAIN format.
20///
21/// ## The Porcelain Format
22/// The porcelain format is the output format produced by `git blame` when
23/// the `--porcelain` option is used.
24///
25/// Note that the `--porcelain` option generally suppresses commit information
26/// that has already been seen.
27///
28/// The `--line-porcelain` option can be used to outputs the full commit
29/// information for each line, use this when parsing.
30/// so use this for parsing.
31///
32/// More information about the porcelain format can be
33/// For more information, see [git doc](https://git-scm.com/docs/git-blame#_the_porcelain_format).
34///
35/// ## time
36/// `author_time` and `committer_time` are UNIX times (seconds).
37///
38/// ## boundary
39/// `boundary` is a metadata indicating the commit where the history tracking
40/// is stopped in git blame. This line means that the history will not be followed
41/// this point.
42///
43/// It is output only when necessary, and in that case, it is set to `true`.
44#[derive(Debug, Default)]
45pub struct Blame {
46    pub commit: String,
47    pub original_line_no: usize,
48    pub final_line_no: usize,
49
50    pub filename: String,
51    pub summary: String,
52
53    /// The contents of the actual line
54    pub content: String,
55
56    // previous
57    pub previous_commit: Option<String>,
58    pub previous_filepath: Option<String>,
59
60    /// Set to true when blame output contains `boundary`.
61    pub boundary: bool,
62
63    pub author: String,
64    pub author_mail: String,
65    pub author_time: u64,
66    pub author_tz: String,
67
68    pub committer: String,
69    pub committer_mail: String,
70    pub committer_time: u64,
71    pub committer_tz: String,
72}
73
74impl Blame {
75    /// Returns the abbreviated (short-hand) version of the commit hash.
76    pub fn short_commit(&self) -> String {
77        self.commit[..7.min(self.commit.len())].to_string()
78    }
79}
80
81/// Parses the porcelain format output corresponding to a single line to
82/// construct a Blame object.
83pub fn parse_one_blame(porcelain: &[&str]) -> Result<Blame, ParseError> {
84    let mut blame = Blame::default();
85
86    // Parse header
87    if let Some(header) = porcelain.first() {
88        let parts: Vec<&str> = header.split_whitespace().collect();
89        blame.commit = parts[0].to_string();
90
91        if let Some(lineno) = parts.get(1) {
92            blame.original_line_no = lineno.parse::<usize>().unwrap_or(0);
93        }
94        if let Some(lineno) = parts.get(2) {
95            blame.final_line_no = lineno.parse::<usize>().unwrap_or(0);
96        }
97    } else {
98        return Err(ParseError("no header".to_string()));
99    }
100
101    // Parse details
102    for line in porcelain.iter().skip(1) {
103        if line.starts_with('\t') {
104            let src = line.strip_prefix('\t').unwrap_or(line);
105            blame.content = src.to_string();
106        } else {
107            match line.split_once(' ') {
108                Some(("filename", value)) => blame.filename = value.to_string(),
109                Some(("summary", value)) => blame.summary = value.to_string(),
110
111                Some(("author", value)) => blame.author = value.to_string(),
112                Some(("author-mail", value)) => blame.author_mail = value.to_string(),
113                Some(("author-time", value)) => {
114                    blame.author_time = value.parse::<u64>().unwrap_or(0)
115                }
116                Some(("author-tz", value)) => blame.author_tz = value.to_string(),
117
118                Some(("committer", value)) => blame.committer = value.to_string(),
119                Some(("committer-mail", value)) => blame.committer_mail = value.to_string(),
120                Some(("committer-time", value)) => {
121                    blame.committer_time = value.parse::<u64>().unwrap_or(0)
122                }
123                Some(("committer-tz", value)) => blame.committer_tz = value.to_string(),
124
125                Some(("previous", value)) => {
126                    if let Some((commit, filepath)) = value.split_once(' ') {
127                        blame.previous_commit = Some(commit.to_string());
128                        blame.previous_filepath = Some(filepath.to_string());
129                    }
130                }
131
132                None => match *line {
133                    "boundary" => blame.boundary = true,
134                    _ => continue,
135                },
136
137                _ => continue,
138            }
139        }
140    }
141
142    Ok(blame)
143}
144
145/// Parses the output of `git blame` command in the porcelain format.
146/// the output must be generated using the `--line-porcelain` option.
147pub fn parse(porcelain: &str) -> Result<Vec<Blame>, ParseError> {
148    let lines = porcelain.lines();
149    let mut blames = Vec::new();
150
151    let mut blob: Vec<&str> = Vec::new();
152    for line in lines {
153        blob.push(line);
154
155        // end of one blame output.
156        if line.starts_with('\t') {
157            match parse_one_blame(&blob) {
158                Ok(blame) => blames.push(blame),
159                Err(e) => return Err(e),
160            }
161
162            blob.clear();
163        }
164    }
165
166    Ok(blames)
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn it_works() {
175        let path = std::path::PathBuf::from("tests").join("sample-blame.txt");
176        let raw_blame = std::fs::read_to_string(path).unwrap();
177
178        let blames = parse(&raw_blame);
179        assert!(blames.is_ok());
180
181        let blames = blames.unwrap();
182        assert_eq!(blames.len(), 43);
183
184        let first = blames.first().unwrap();
185
186        assert_eq!(first.commit, "c9a79e91e05355fc42ec519593806466c2f66de0");
187        assert_eq!(first.original_line_no, 1);
188        assert_eq!(first.final_line_no, 1);
189        assert!(!first.boundary);
190
191        assert_eq!(first.filename, "README.md");
192        assert_eq!(first.summary, "Update README.md");
193        assert_eq!(first.content, "<div align=\"center\">");
194
195        assert_eq!(first.author, "mitsu-ksgr");
196        assert_eq!(first.author_mail, "<mitsu-ksgr@users.noreply.github.com>");
197        assert_eq!(first.author_time, 1744981061);
198        assert_eq!(first.author_tz, "+0900");
199
200        assert_eq!(first.committer, "GitHub");
201        assert_eq!(first.committer_mail, "<noreply@github.com>");
202        assert_eq!(first.committer_time, 1744981061);
203        assert_eq!(first.committer_tz, "+0900");
204
205        assert!(first.previous_commit.is_some());
206        assert!(first.previous_filepath.is_some());
207
208        let prev_commit = first.previous_commit.as_ref().unwrap();
209        let prev_filepath = first.previous_filepath.as_ref().unwrap();
210        assert_eq!(prev_commit, "5d31b11bd146562bb1b472e1334233a6a8ef66e5");
211        assert_eq!(prev_filepath, "README.md");
212    }
213
214    #[test]
215    fn one_line_blame() {
216        let path = std::path::PathBuf::from("tests").join("one-line-blame.txt");
217        let raw_blame = std::fs::read_to_string(path).unwrap();
218
219        let blames = parse(&raw_blame);
220        assert!(blames.is_ok());
221
222        let blames = blames.unwrap();
223        assert_eq!(blames.len(), 1);
224
225        let first = blames.first().unwrap();
226        assert_eq!(first.commit, "6cebf082a694d9dec6c1928531fcb649791885ec");
227        assert_eq!(first.original_line_no, 1);
228        assert_eq!(first.final_line_no, 1);
229        assert!(first.boundary);
230        assert_eq!(first.summary, "Initial commit");
231        assert_eq!(first.content, "# git-blame-parser");
232    }
233
234    #[test]
235    fn no_commited_yet() {
236        let path = std::path::PathBuf::from("tests").join("no-committed.txt");
237        let raw_blame = std::fs::read_to_string(path).unwrap();
238
239        let blames = parse(&raw_blame);
240        assert!(blames.is_ok());
241
242        let blames = blames.unwrap();
243        let first = blames.first().unwrap();
244        assert_eq!(first.commit, "0000000000000000000000000000000000000000");
245        assert_eq!(first.author, "Not Committed Yet");
246        assert_eq!(first.author_mail, "<not.committed.yet>");
247        assert_eq!(first.committer, "Not Committed Yet");
248        assert_eq!(first.committer_mail, "<not.committed.yet>");
249    }
250
251    #[test]
252    fn test_shor_commit() {
253        let blame = Blame {
254            commit: String::from("abcdefghijklmnopqrstuvwxyz1234567890abcd"),
255            ..Default::default()
256        };
257        assert_eq!(blame.short_commit(), "abcdefg");
258    }
259}