Skip to main content

gix_object/commit/message/
body.rs

1use std::{borrow::Cow, ops::Deref};
2
3use crate::{
4    bstr::{BStr, BString, ByteSlice, ByteVec},
5    commit::message::BodyRef,
6};
7
8/// An iterator over trailers as parsed from a commit message body.
9///
10/// lines with parsing failures will be skipped
11pub struct Trailers<'a> {
12    pub(crate) cursor: &'a [u8],
13}
14
15/// A trailer as parsed from the commit message body.
16#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
17#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
18pub struct TrailerRef<'a> {
19    /// The name of the trailer, like "Signed-off-by", up to the separator `: `.
20    #[cfg_attr(feature = "serde", serde(borrow))]
21    pub token: &'a BStr,
22    /// The value right after the separator `: `, with leading and trailing whitespace trimmed.
23    /// Multi-line values are unfolded to match `git interpret-trailers --parse`, which is when
24    /// this field is [`Cow::Owned`].
25    #[cfg_attr(feature = "serde", serde(borrow))]
26    pub value: Cow<'a, BStr>,
27}
28
29// Git treats these as built-in, recognized trailer prefixes when deciding whether a
30// trailing paragraph is a trailer block at all. The cherry-pick marker is special in
31// that it is not a `token: value` trailer, but it still contributes to Git's
32// recognized-prefix / 25% heuristic in `interpret-trailers`.
33const GIT_GENERATED_PREFIXES: [&[u8]; 2] = [b"Signed-off-by: ", b"(cherry picked from commit "];
34
35#[derive(Clone, Copy)]
36/// A physical line in the original message body.
37///
38/// `text` has its trailing line ending removed for parsing, while `start`
39/// points to the first byte of that line in the original `body` slice.
40struct Line<'a> {
41    /// The line contents without a trailing `\n` or `\r\n`.
42    text: &'a [u8],
43    /// Byte offset of the start of this line in the original body buffer.
44    start: usize,
45}
46
47/// Windows or linux line endings are supported here.
48fn trim_line_ending(mut line: &[u8]) -> &[u8] {
49    if let Some(stripped) = line.strip_suffix(b"\n") {
50        line = stripped;
51        if let Some(stripped) = line.strip_suffix(b"\r") {
52            line = stripped;
53        }
54    } else if let Some(stripped) = line.strip_suffix(b"\r") {
55        line = stripped;
56    }
57    line
58}
59
60/// Split `input` into physical lines while keeping enough information to map
61/// parser decisions back to the original byte slice.
62///
63/// This is different from using plain `.lines()` because trailer block detection
64/// needs normalized line contents for parsing *and* exact byte offsets to slice
65/// the original body at the eventual trailer boundary.
66fn lines(input: &[u8]) -> Vec<Line<'_>> {
67    let mut start = 0;
68    input
69        .lines_with_terminator()
70        .map(|raw| {
71            let line = Line {
72                text: trim_line_ending(raw),
73                start,
74            };
75            start += raw.len();
76            line
77        })
78        .collect()
79}
80
81/// Find the byte position of a Git trailer separator in `line`.
82///
83/// This recognizes the `:` that terminates a trailer token like `Acked-by: Alice`
84/// as well as the looser Git form with optional whitespace before the separator,
85/// like `Acked-by : Alice`.
86fn find_separator(line: &[u8]) -> Option<usize> {
87    let mut whitespace_found = false;
88    for (idx, byte) in line.iter().copied().enumerate() {
89        if byte == b':' {
90            return Some(idx);
91        }
92        if !whitespace_found && (byte.is_ascii_alphanumeric() || byte == b'-') {
93            continue;
94        }
95        if idx != 0 && matches!(byte, b' ' | b'\t') {
96            whitespace_found = true;
97            continue;
98        }
99        break;
100    }
101    None
102}
103
104/// Parse a single physical trailer line.
105///
106/// Returns `None` if `line` is not a valid trailer line at all.
107///
108/// Returns `Some((token, separator_offset))` if parsing succeeds, where `token`
109/// is the normalized trailer token as a `BStr` and `separator_offset` is the
110/// byte offset of the `:` separator in the original `line`. Callers use that
111/// offset to slice out the raw value bytes, potentially including following
112/// continuation lines.
113fn parse_trailer_line(line: &[u8]) -> Option<(&BStr, usize)> {
114    if line.first().is_some_and(u8::is_ascii_whitespace) {
115        return None;
116    }
117    let separator = find_separator(line)?;
118    (separator > 0).then_some((line[..separator].trim().as_bstr(), separator))
119}
120
121fn is_blank_line(line: &[u8]) -> bool {
122    line.iter().all(u8::is_ascii_whitespace)
123}
124
125fn is_recognized_prefix(line: &[u8]) -> bool {
126    GIT_GENERATED_PREFIXES.iter().any(|prefix| line.starts_with(prefix))
127}
128
129/// Turn a raw trailer value, possibly spanning multiple physical lines, into
130/// the unfolded value Git would expose for parsing.
131///
132/// A single-line value is returned borrowed. If continuation lines are present,
133/// embedded newlines and leading continuation whitespace are collapsed into
134/// single spaces and the unfolded value is returned owned.
135fn unfold_value(value: &[u8]) -> Cow<'_, BStr> {
136    let mut physical_lines = value.lines().peekable();
137    let Some(first_line) = physical_lines.next() else {
138        return Cow::Borrowed(b"".as_bstr());
139    };
140
141    if physical_lines.peek().is_none() {
142        return Cow::Borrowed(first_line.trim().as_bstr());
143    }
144
145    let mut out = BString::from(first_line.trim());
146    for line in physical_lines {
147        let line = line.trim();
148        if line.is_empty() {
149            continue;
150        }
151        if !out.is_empty() {
152            out.push_byte(b' ');
153        }
154        out.extend_from_slice(line);
155    }
156    Cow::Owned(out)
157}
158
159/// Find the byte offset at which the trailer block begins in `body`.
160///
161/// Returns `None` if the trailing paragraph does not satisfy Git's trailer-block
162/// heuristic. Returns `Some(offset)`  if it does, where `offset` points into the
163/// original `body` slice at the first byte that belongs to the trailer block,
164/// including the separating blank line when one is present.
165///
166/// Internally this mirrors Git's backward scan: count trailer lines,
167/// non-trailer lines, continuation lines, and whether a recognized built-in
168/// prefix was seen, then apply the "all trailers" or recognized-prefix / 25%
169/// rule to the last paragraph of the body.
170fn trailer_block_start(body: &[u8]) -> Option<usize> {
171    /// Git accepts the trailing paragraph either if it is made entirely of
172    /// trailers, or if it contains at least one recognized built-in trailer
173    /// prefix and at least 25% of the paragraph consists of trailer lines.
174    fn accepts_as_trailer_block(recognized_prefix: bool, trailer_lines: usize, non_trailer_lines: usize) -> bool {
175        (trailer_lines > 0 && non_trailer_lines == 0) || (recognized_prefix && trailer_lines * 3 >= non_trailer_lines)
176    }
177
178    let lines = lines(body);
179    let mut recognized_prefix = false;
180    let mut trailer_lines = 0usize;
181    let mut non_trailer_lines = 0usize;
182    let mut possible_continuation_lines = 0usize;
183    let mut saw_non_blank_line = false;
184
185    for idx in (0..lines.len()).rev() {
186        let line = &lines[idx];
187        if is_blank_line(line.text) {
188            if !saw_non_blank_line {
189                continue;
190            }
191            non_trailer_lines += possible_continuation_lines;
192            return accepts_as_trailer_block(recognized_prefix, trailer_lines, non_trailer_lines).then_some(
193                idx.checked_sub(1)
194                    .map_or(0, |prev| lines[prev].start + lines[prev].text.len()),
195            );
196        }
197
198        saw_non_blank_line = true;
199        if is_recognized_prefix(line.text) {
200            trailer_lines += 1;
201            possible_continuation_lines = 0;
202            recognized_prefix = true;
203            continue;
204        }
205
206        if parse_trailer_line(line.text).is_some() {
207            trailer_lines += 1;
208            possible_continuation_lines = 0;
209            continue;
210        }
211
212        if line.text.first().is_some_and(u8::is_ascii_whitespace) {
213            possible_continuation_lines += 1;
214            continue;
215        }
216
217        non_trailer_lines += 1 + possible_continuation_lines;
218        possible_continuation_lines = 0;
219    }
220
221    non_trailer_lines += possible_continuation_lines;
222    accepts_as_trailer_block(recognized_prefix, trailer_lines, non_trailer_lines).then_some(0)
223}
224
225impl<'a> Iterator for Trailers<'a> {
226    type Item = TrailerRef<'a>;
227
228    fn next(&mut self) -> Option<Self::Item> {
229        if self.cursor.is_empty() {
230            return None;
231        }
232
233        while let Some(line) = self.cursor.lines_with_terminator().next() {
234            let line = trim_line_ending(line);
235            let consumed = self.cursor.lines_with_terminator().next().map_or(0, <[u8]>::len);
236            if let Some((token, separator)) = parse_trailer_line(line) {
237                let mut trailer_len = consumed;
238                let mut cursor = &self.cursor[consumed..];
239                while let Some(next_line) = cursor.lines_with_terminator().next() {
240                    let next_text = trim_line_ending(next_line);
241                    if is_blank_line(next_text) || !next_text.first().is_some_and(u8::is_ascii_whitespace) {
242                        break;
243                    }
244                    trailer_len += next_line.len();
245                    cursor = &cursor[next_line.len()..];
246                }
247
248                let value = unfold_value(&self.cursor[separator + 1..trailer_len]);
249                self.cursor = &self.cursor[trailer_len..];
250                return Some(TrailerRef { token, value });
251            }
252            self.cursor = &self.cursor[consumed..];
253        }
254
255        None
256    }
257}
258
259impl<'a> BodyRef<'a> {
260    /// Parse `body` bytes into the trailer and the actual body.
261    pub fn from_bytes(body: &'a [u8]) -> Self {
262        trailer_block_start(body).map_or(
263            BodyRef {
264                body_without_trailer: body.as_bstr(),
265                start_of_trailer: &[],
266            },
267            |start| BodyRef {
268                body_without_trailer: body[..start].as_bstr(),
269                start_of_trailer: &body[start..],
270            },
271        )
272    }
273
274    /// Returns the body with the trailers stripped.
275    ///
276    /// You can iterate trailers with the [`trailers()`][BodyRef::trailers()] method.
277    pub fn without_trailer(&self) -> &'a BStr {
278        self.body_without_trailer
279    }
280
281    /// Return an iterator over the trailers parsed from the last paragraph of the body. Maybe empty.
282    pub fn trailers(&self) -> Trailers<'a> {
283        Trailers {
284            cursor: self.start_of_trailer,
285        }
286    }
287}
288
289impl AsRef<BStr> for BodyRef<'_> {
290    fn as_ref(&self) -> &BStr {
291        self.body_without_trailer
292    }
293}
294
295impl Deref for BodyRef<'_> {
296    type Target = BStr;
297
298    fn deref(&self) -> &Self::Target {
299        self.body_without_trailer
300    }
301}
302
303/// Convenience methods
304impl TrailerRef<'_> {
305    /// Check if this trailer is a `Signed-off-by` trailer (case-insensitive).
306    pub fn is_signed_off_by(&self) -> bool {
307        self.token.eq_ignore_ascii_case(b"Signed-off-by")
308    }
309
310    /// Check if this trailer is a `Co-authored-by` trailer (case-insensitive).
311    pub fn is_co_authored_by(&self) -> bool {
312        self.token.eq_ignore_ascii_case(b"Co-authored-by")
313    }
314
315    /// Check if this trailer is an `Acked-by` trailer (case-insensitive).
316    pub fn is_acked_by(&self) -> bool {
317        self.token.eq_ignore_ascii_case(b"Acked-by")
318    }
319
320    /// Check if this trailer is a `Reviewed-by` trailer (case-insensitive).
321    pub fn is_reviewed_by(&self) -> bool {
322        self.token.eq_ignore_ascii_case(b"Reviewed-by")
323    }
324
325    /// Check if this trailer is a `Tested-by` trailer (case-insensitive).
326    pub fn is_tested_by(&self) -> bool {
327        self.token.eq_ignore_ascii_case(b"Tested-by")
328    }
329
330    /// Check if this trailer represents any kind of authorship or attribution
331    /// (`Signed-off-by`, `Co-authored-by`, etc.).
332    pub fn is_attribution(&self) -> bool {
333        self.is_signed_off_by()
334            || self.is_co_authored_by()
335            || self.is_acked_by()
336            || self.is_reviewed_by()
337            || self.is_tested_by()
338    }
339}
340
341/// Convenience methods
342impl<'a> Trailers<'a> {
343    /// Filter trailers to only include `Signed-off-by` entries.
344    pub fn signed_off_by(self) -> impl Iterator<Item = TrailerRef<'a>> {
345        self.filter(TrailerRef::is_signed_off_by)
346    }
347
348    /// Filter trailers to only include `Co-authored-by` entries.
349    pub fn co_authored_by(self) -> impl Iterator<Item = TrailerRef<'a>> {
350        self.filter(TrailerRef::is_co_authored_by)
351    }
352
353    /// Filter trailers to only include attribution-related entries.
354    /// (`Signed-off-by`, `Co-authored-by`, `Acked-by`, `Reviewed-by`, `Tested-by`).
355    pub fn attributions(self) -> impl Iterator<Item = TrailerRef<'a>> {
356        self.filter(TrailerRef::is_attribution)
357    }
358
359    /// Filter trailers to only include authors from `Signed-off-by` and `Co-authored-by` entries.
360    pub fn authors(self) -> impl Iterator<Item = TrailerRef<'a>> {
361        self.filter(|trailer| trailer.is_signed_off_by() || trailer.is_co_authored_by())
362    }
363}
364
365#[cfg(test)]
366mod test_parse_trailer {
367    use super::*;
368
369    fn parse(input: &str) -> TrailerRef<'_> {
370        Trailers {
371            cursor: input.as_bytes(),
372        }
373        .next()
374        .expect("a trailer to be parsed")
375    }
376
377    #[test]
378    fn simple_newline() {
379        assert_eq!(
380            parse("foo: bar\n"),
381            TrailerRef {
382                token: "foo".into(),
383                value: b"bar".as_bstr().into()
384            }
385        );
386    }
387
388    #[test]
389    fn whitespace_around_separator_is_normalized() {
390        assert_eq!(
391            parse("foo :  bar"),
392            TrailerRef {
393                token: "foo".into(),
394                value: b"bar".as_bstr().into()
395            }
396        );
397    }
398
399    #[test]
400    fn trailing_whitespace_after_value_is_trimmed() {
401        assert_eq!(
402            parse("hello-foo: bar there   \n"),
403            TrailerRef {
404                token: "hello-foo".into(),
405                value: b"bar there".as_bstr().into()
406            }
407        );
408    }
409
410    #[test]
411    fn invalid_token_is_not_a_trailer() {
412        assert_eq!(
413            Trailers {
414                cursor: "🤗: 🎉".as_bytes()
415            }
416            .next(),
417            None
418        );
419    }
420
421    #[test]
422    fn simple_newline_windows() {
423        assert_eq!(
424            parse("foo: bar\r\n"),
425            TrailerRef {
426                token: "foo".into(),
427                value: b"bar".as_bstr().into()
428            }
429        );
430    }
431
432    #[test]
433    fn folded_value_is_unfolded() {
434        assert_eq!(
435            parse("foo: bar\n continued\r\n  here"),
436            TrailerRef {
437                token: "foo".into(),
438                value: b"bar continued here".as_bstr().into()
439            }
440        );
441    }
442}