Skip to main content

radicle_cli/git/
unified_diff.rs

1//! Formatting support for Git's [diff format](https://git-scm.com/docs/diff-format).
2use std::fmt;
3use std::io;
4use std::path::PathBuf;
5
6use radicle_surf::diff::FileStats;
7use thiserror::Error;
8
9use radicle::git;
10use radicle_surf::diff;
11use radicle_surf::diff::{Diff, DiffContent, DiffFile, FileDiff, Hunk, Hunks, Line, Modification};
12
13use crate::terminal as term;
14
15#[derive(Debug, Error)]
16pub enum Error {
17    /// Attempt to decode from a source with no data left.
18    #[error("unexpected end of file")]
19    UnexpectedEof,
20    #[error(transparent)]
21    Io(#[from] io::Error),
22    /// Catchall for syntax error messages.
23    #[error("{0}")]
24    Syntax(String),
25    #[error(transparent)]
26    ParseInt(#[from] std::num::ParseIntError),
27    #[error(transparent)]
28    Utf8(#[from] std::string::FromUtf8Error),
29}
30
31impl Error {
32    pub fn syntax(msg: impl ToString) -> Self {
33        Self::Syntax(msg.to_string())
34    }
35
36    #[must_use]
37    pub fn is_eof(&self) -> bool {
38        match self {
39            Self::UnexpectedEof => true,
40            Self::Io(e) => e.kind() == io::ErrorKind::UnexpectedEof,
41            Self::Syntax(_) | Self::ParseInt(_) | Self::Utf8(_) => false,
42        }
43    }
44}
45
46/// The kind of FileDiff Header which can be used to print the FileDiff information which precedes
47/// `Hunks`.
48#[derive(Debug, Clone, PartialEq)]
49pub enum FileHeader {
50    Added {
51        path: PathBuf,
52        new: DiffFile,
53        binary: bool,
54    },
55    Copied {
56        old_path: PathBuf,
57        new_path: PathBuf,
58    },
59    Deleted {
60        path: PathBuf,
61        old: DiffFile,
62        binary: bool,
63    },
64    Modified {
65        path: PathBuf,
66        old: DiffFile,
67        new: DiffFile,
68        binary: bool,
69    },
70    Moved {
71        old_path: PathBuf,
72        new_path: PathBuf,
73    },
74}
75
76impl std::convert::From<&FileDiff> for FileHeader {
77    // TODO: Pathnames with 'unusual names' need to be quoted.
78    fn from(value: &FileDiff) -> Self {
79        match value {
80            FileDiff::Modified(v) => FileHeader::Modified {
81                path: v.path.clone(),
82                old: v.old.clone(),
83                new: v.new.clone(),
84                binary: matches!(v.diff, DiffContent::Binary),
85            },
86            FileDiff::Added(v) => FileHeader::Added {
87                path: v.path.clone(),
88                new: v.new.clone(),
89                binary: matches!(v.diff, DiffContent::Binary),
90            },
91            FileDiff::Copied(c) => FileHeader::Copied {
92                old_path: c.old_path.clone(),
93                new_path: c.new_path.clone(),
94            },
95            FileDiff::Deleted(v) => FileHeader::Deleted {
96                path: v.path.clone(),
97                old: v.old.clone(),
98                binary: matches!(v.diff, DiffContent::Binary),
99            },
100            FileDiff::Moved(v) => FileHeader::Moved {
101                old_path: v.old_path.clone(),
102                new_path: v.new_path.clone(),
103            },
104        }
105    }
106}
107
108/// Meta data which precedes a `Hunk`s content.
109///
110/// For example:
111/// @@ -24,8 +24,6 @@ use radicle_surf::diff::*;
112#[derive(Clone, Debug, Default, PartialEq)]
113pub struct HunkHeader {
114    /// Line the hunk started in the old file.
115    pub old_line_no: u32,
116    /// Number of removed and context lines.
117    pub old_size: u32,
118    /// Line the hunk started in the new file.
119    pub new_line_no: u32,
120    /// Number of added and context lines.
121    pub new_size: u32,
122    /// Trailing text for the Hunk Header.
123    ///
124    /// From Git's documentation "Hunk headers mention the name of the function to which the hunk
125    /// applies. See "Defining a custom hunk-header" in gitattributes for details of how to tailor
126    /// to this to specific languages.".  It is likely best to leave this empty when generating
127    /// diffs.
128    pub text: Vec<u8>,
129}
130
131impl TryFrom<&Hunk<Modification>> for HunkHeader {
132    type Error = Error;
133
134    fn try_from(hunk: &Hunk<Modification>) -> Result<Self, Self::Error> {
135        let mut r = io::BufReader::new(hunk.header.as_bytes());
136        Self::decode(&mut r)
137    }
138}
139
140impl HunkHeader {
141    #[must_use]
142    pub fn old_line_range(&self) -> std::ops::Range<u32> {
143        let start: u32 = self.old_line_no;
144        let end: u32 = self.old_line_no + self.old_size;
145        start..end + 1
146    }
147
148    #[must_use]
149    pub fn new_line_range(&self) -> std::ops::Range<u32> {
150        let start: u32 = self.new_line_no;
151        let end: u32 = self.new_line_no + self.new_size;
152        start..end + 1
153    }
154}
155
156/// Diff-related types that can be decoded from the unified diff format.
157pub trait Decode: Sized {
158    /// Decode, and fail if we reach the end of the stream.
159    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error>;
160
161    /// Decode, and return a `None` if we reached the end of the stream.
162    fn try_decode(r: &mut impl io::BufRead) -> Result<Option<Self>, Error> {
163        match Self::decode(r) {
164            Ok(v) => Ok(Some(v)),
165            Err(Error::UnexpectedEof) => Ok(None),
166            Err(e) => Err(e),
167        }
168    }
169
170    /// Decode from a string input.
171    fn parse(s: &str) -> Result<Self, Error> {
172        Self::from_bytes(s.as_bytes())
173    }
174
175    /// Decode from a string input.
176    fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
177        let mut r = io::BufReader::new(bytes);
178        Self::decode(&mut r)
179    }
180}
181
182/// Diff-related types that can be encoded intro the unified diff format.
183pub trait Encode: Sized {
184    /// Encode type into diff writer.
185    fn encode(&self, w: &mut Writer) -> Result<(), Error>;
186
187    /// Encode into unified diff string.
188    fn to_unified_string(&self) -> Result<String, Error> {
189        let mut buf = Vec::new();
190        let mut w = Writer::new(&mut buf);
191
192        w.encode(self)?;
193        drop(w);
194
195        String::from_utf8(buf).map_err(Error::from)
196    }
197}
198
199impl Decode for Diff {
200    /// Decode from git's unified diff format, consuming the entire input.
201    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
202        let mut s = String::new();
203
204        r.read_to_string(&mut s)?;
205
206        let d = git::raw::Diff::from_buffer(s.as_ref())
207            .map_err(|e| Error::syntax(format!("decoding unified diff: {e}")))?;
208        let d =
209            Diff::try_from(d).map_err(|e| Error::syntax(format!("decoding unified diff: {e}")))?;
210
211        Ok(d)
212    }
213}
214
215impl Encode for Diff {
216    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
217        for fdiff in self.files() {
218            fdiff.encode(w)?;
219        }
220        Ok(())
221    }
222}
223
224impl Decode for DiffContent {
225    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
226        let mut hunks = Vec::default();
227        let mut additions = 0;
228        let mut deletions = 0;
229
230        while let Some(h) = Hunk::try_decode(r)? {
231            for l in &h.lines {
232                match l {
233                    Modification::Addition(_) => additions += 1,
234                    Modification::Deletion(_) => deletions += 1,
235                    _ => {}
236                }
237            }
238            hunks.push(h);
239        }
240
241        if hunks.is_empty() {
242            Ok(DiffContent::Empty)
243        } else {
244            // TODO: Handle case for binary.
245            Ok(DiffContent::Plain {
246                hunks: Hunks::from(hunks),
247                stats: FileStats {
248                    additions,
249                    deletions,
250                },
251                // TODO: Properly handle EndOfLine field
252                eof: diff::EofNewLine::NoneMissing,
253            })
254        }
255    }
256}
257
258impl Encode for DiffContent {
259    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
260        match self {
261            DiffContent::Plain { hunks, .. } => {
262                for h in hunks.iter() {
263                    h.encode(w)?;
264                }
265            }
266            DiffContent::Empty => {}
267            DiffContent::Binary => todo!("DiffContent::Binary encoding not implemented"),
268        }
269        Ok(())
270    }
271}
272
273impl Encode for FileDiff {
274    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
275        w.encode(&FileHeader::from(self))?;
276        match self {
277            FileDiff::Modified(f) => {
278                w.encode(&f.diff)?;
279            }
280            FileDiff::Added(f) => {
281                w.encode(&f.diff)?;
282            }
283            FileDiff::Copied(f) => {
284                w.encode(&f.diff)?;
285            }
286            FileDiff::Deleted(f) => {
287                w.encode(&f.diff)?;
288            }
289            FileDiff::Moved(f) => {
290                // Nb. We only display diffs as moves when the file was not changed.
291                w.encode(&f.diff)?;
292            }
293        }
294
295        Ok(())
296    }
297}
298
299impl Encode for FileHeader {
300    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
301        match self {
302            FileHeader::Modified { path, old, new, .. } => {
303                w.meta(format!(
304                    "diff --git a/{} b/{}",
305                    path.display(),
306                    path.display()
307                ))?;
308
309                if old.mode == new.mode {
310                    w.meta(format!(
311                        "index {}..{} {:o}",
312                        term::format::oid(*old.oid),
313                        term::format::oid(*new.oid),
314                        u32::from(old.mode.clone()),
315                    ))?;
316                } else {
317                    w.meta(format!("old mode {:o}", u32::from(old.mode.clone())))?;
318                    w.meta(format!("new mode {:o}", u32::from(new.mode.clone())))?;
319                    w.meta(format!(
320                        "index {}..{}",
321                        term::format::oid(*old.oid),
322                        term::format::oid(*new.oid)
323                    ))?;
324                }
325
326                w.meta(format!("--- a/{}", path.display()))?;
327                w.meta(format!("+++ b/{}", path.display()))?;
328            }
329            FileHeader::Added { path, new, .. } => {
330                w.meta(format!(
331                    "diff --git a/{} b/{}",
332                    path.display(),
333                    path.display()
334                ))?;
335
336                w.meta(format!("new file mode {:o}", u32::from(new.mode.clone())))?;
337                w.meta(format!(
338                    "index {}..{}",
339                    term::format::oid(git::Oid::sha1_zero()),
340                    term::format::oid(*new.oid),
341                ))?;
342
343                w.meta("--- /dev/null")?;
344                w.meta(format!("+++ b/{}", path.display()))?;
345            }
346            FileHeader::Copied { .. } => todo!(),
347            FileHeader::Deleted { path, old, .. } => {
348                w.meta(format!(
349                    "diff --git a/{} b/{}",
350                    path.display(),
351                    path.display()
352                ))?;
353
354                w.meta(format!(
355                    "deleted file mode {:o}",
356                    u32::from(old.mode.clone())
357                ))?;
358                w.meta(format!(
359                    "index {}..{}",
360                    term::format::oid(*old.oid),
361                    term::format::oid(git::Oid::sha1_zero())
362                ))?;
363
364                w.meta(format!("--- a/{}", path.display()))?;
365                w.meta("+++ /dev/null".to_string())?;
366            }
367            FileHeader::Moved { old_path, new_path } => {
368                w.meta(format!(
369                    "diff --git a/{} b/{}",
370                    old_path.display(),
371                    new_path.display()
372                ))?;
373                w.meta("similarity index 100%")?;
374                w.meta(format!("rename from {}", old_path.display()))?;
375                w.meta(format!("rename to {}", new_path.display()))?;
376            }
377        };
378        Ok(())
379    }
380}
381
382impl Decode for HunkHeader {
383    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
384        let mut line = String::default();
385        if r.read_line(&mut line)? == 0 {
386            return Err(Error::UnexpectedEof);
387        };
388
389        let mut header = HunkHeader::default();
390        let s = line
391            .strip_prefix("@@ -")
392            .ok_or(Error::syntax("missing '@@ -'"))?;
393
394        let (old, s) = s
395            .split_once(" +")
396            .ok_or(Error::syntax("missing new line information"))?;
397        let (line_no, size) = old.split_once(',').unwrap_or((old, "1"));
398
399        header.old_line_no = line_no.parse()?;
400        header.old_size = size.parse()?;
401
402        let (new, s) = s
403            .split_once(" @@")
404            .ok_or(Error::syntax("closing '@@' is missing"))?;
405        let (line_no, size) = new.split_once(',').unwrap_or((new, "1"));
406
407        header.new_line_no = line_no.parse()?;
408        header.new_size = size.parse()?;
409
410        let s = s.strip_prefix(' ').unwrap_or(s);
411        header.text = s.as_bytes().to_vec();
412
413        Ok(header)
414    }
415}
416
417impl Encode for HunkHeader {
418    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
419        let old = if self.old_size == 1 {
420            format!("{}", self.old_line_no)
421        } else {
422            format!("{},{}", self.old_line_no, self.old_size)
423        };
424        let new = if self.new_size == 1 {
425            format!("{}", self.new_line_no)
426        } else {
427            format!("{},{}", self.new_line_no, self.new_size)
428        };
429        let text = if self.text.is_empty() {
430            "".to_string()
431        } else {
432            format!(" {}", String::from_utf8_lossy(&self.text))
433        };
434        w.meta(format!("@@ -{old} +{new} @@{text}"))?;
435
436        Ok(())
437    }
438}
439
440impl Decode for Hunk<Modification> {
441    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
442        let header = HunkHeader::decode(r)?;
443
444        let mut lines = Vec::new();
445        let mut new_line: u32 = 0;
446        let mut old_line: u32 = 0;
447
448        while old_line < header.old_size || new_line < header.new_size {
449            if old_line > header.old_size {
450                return Err(Error::syntax(format!(
451                    "expected '{}' old lines",
452                    header.old_size
453                )));
454            } else if new_line > header.new_size {
455                return Err(Error::syntax(format!(
456                    "expected '{0}' new lines",
457                    header.new_size
458                )));
459            }
460
461            let Some(line) = Modification::try_decode(r)? else {
462                return Err(Error::syntax(format!(
463                    "expected '{}' old lines and '{}' new lines, but found '{}' and '{}'",
464                    header.old_size, header.new_size, old_line, new_line,
465                )));
466            };
467
468            let line = match line {
469                Modification::Addition(v) => {
470                    let l = Modification::addition(v.line, header.new_line_no + new_line);
471                    new_line += 1;
472                    l
473                }
474                Modification::Deletion(v) => {
475                    let l = Modification::deletion(v.line, header.old_line_no + old_line);
476                    old_line += 1;
477                    l
478                }
479                Modification::Context { line, .. } => {
480                    let l = Modification::Context {
481                        line,
482                        line_no_old: header.old_line_no + old_line,
483                        line_no_new: header.new_line_no + new_line,
484                    };
485                    new_line += 1;
486                    old_line += 1;
487                    l
488                }
489            };
490
491            lines.push(line);
492        }
493
494        Ok(Hunk {
495            header: Line::from(header.to_unified_string()?),
496            lines,
497            old: header.old_line_range(),
498            new: header.new_line_range(),
499        })
500    }
501}
502
503impl Encode for Hunk<Modification> {
504    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
505        // TODO: Remove trailing newlines accurately.
506        // `trim_end()` will destroy diff information if the diff has a trailing whitespace on
507        // purpose.
508        w.magenta(self.header.from_utf8_lossy().trim_end())?;
509        for l in &self.lines {
510            l.encode(w)?;
511        }
512
513        Ok(())
514    }
515}
516
517impl Decode for Modification {
518    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
519        let mut line = String::new();
520        if r.read_line(&mut line)? == 0 {
521            return Err(Error::UnexpectedEof);
522        };
523
524        let mut chars = line.chars();
525        let l = match chars.next() {
526            Some('+') => Modification::addition(chars.as_str().to_string(), 0),
527            Some('-') => Modification::deletion(chars.as_str().to_string(), 0),
528            Some(' ') => Modification::Context {
529                line: chars.as_str().to_string().into(),
530                line_no_old: 0,
531                line_no_new: 0,
532            },
533            Some(c) => {
534                return Err(Error::syntax(format!(
535                    "indicator character expected, but got '{c}'",
536                )))
537            }
538            None => return Err(Error::UnexpectedEof),
539        };
540
541        Ok(l)
542    }
543}
544
545impl Encode for Modification {
546    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
547        match self {
548            Modification::Deletion(radicle_surf::diff::Deletion { line, .. }) => {
549                let s = format!("-{}", String::from_utf8_lossy(line.as_bytes()).trim_end());
550                w.write(s, term::Style::new(term::Color::Red))?;
551            }
552            Modification::Addition(radicle_surf::diff::Addition { line, .. }) => {
553                let s = format!("+{}", String::from_utf8_lossy(line.as_bytes()).trim_end());
554                w.write(s, term::Style::new(term::Color::Green))?;
555            }
556            Modification::Context { line, .. } => {
557                let s = format!(" {}", String::from_utf8_lossy(line.as_bytes()).trim_end());
558                w.write(s, term::Style::default().dim())?;
559            }
560        }
561
562        Ok(())
563    }
564}
565
566/// An IO Writer with color printing to the terminal.
567pub struct Writer<'a> {
568    styled: bool,
569    stream: Box<dyn io::Write + 'a>,
570}
571
572impl<'a> Writer<'a> {
573    pub fn new(w: impl io::Write + 'a) -> Self {
574        Self {
575            styled: false,
576            stream: Box::new(w),
577        }
578    }
579
580    pub fn encode<T: Encode>(&mut self, arg: &T) -> Result<(), Error> {
581        arg.encode(self)?;
582        Ok(())
583    }
584
585    #[must_use]
586    pub fn styled(mut self, value: bool) -> Self {
587        self.styled = value;
588        self
589    }
590
591    pub fn write(&mut self, s: impl fmt::Display, style: term::Style) -> io::Result<()> {
592        #[cfg(windows)]
593        const EOL: &str = "\r\n";
594
595        #[cfg(not(windows))]
596        const EOL: &str = "\n";
597
598        if self.styled {
599            write!(
600                self.stream,
601                "{}{EOL}",
602                term::Paint::new(s).with_style(style)
603            )
604        } else {
605            write!(self.stream, "{s}{EOL}")
606        }
607    }
608
609    pub fn meta(&mut self, s: impl fmt::Display) -> io::Result<()> {
610        self.write(s, term::Style::new(term::Color::Yellow))
611    }
612
613    pub fn magenta(&mut self, s: impl fmt::Display) -> io::Result<()> {
614        self.write(s, term::Style::new(term::Color::Magenta))
615    }
616}
617
618#[cfg(test)]
619mod test {
620    use super::*;
621
622    #[test]
623    fn test_diff_encode_decode_diff() {
624        let diff_a = diff::Diff::parse(include_str!(concat!(
625            env!("CARGO_MANIFEST_DIR"),
626            "/tests/data/diff.diff"
627        )))
628        .unwrap();
629        assert_eq!(
630            include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/diff.diff")),
631            diff_a.to_unified_string().unwrap()
632        );
633    }
634
635    #[test]
636    fn test_diff_content_encode_decode_content() {
637        let diff_content = diff::DiffContent::parse(include_str!(concat!(
638            env!("CARGO_MANIFEST_DIR"),
639            "/tests/data/diff_body.diff"
640        )))
641        .unwrap();
642        assert_eq!(
643            include_str!(concat!(
644                env!("CARGO_MANIFEST_DIR"),
645                "/tests/data/diff_body.diff"
646            )),
647            diff_content.to_unified_string().unwrap()
648        );
649    }
650
651    // TODO: Test parsing a real diff from this repository.
652}