radicle_cli/git/
unified_diff.rs

1//! Formatting support for Git's [diff format](https://git-scm.com/docs/diff-format).
2use std::fmt;
3use std::io;
4use std::path::PathBuf;
5
6use radicle_surf::diff::FileStats;
7use thiserror::Error;
8
9use radicle::git;
10use radicle::git::raw::Oid;
11use radicle_surf::diff;
12use radicle_surf::diff::{Diff, DiffContent, DiffFile, FileDiff, Hunk, Hunks, Line, Modification};
13
14use crate::terminal as term;
15
16#[derive(Debug, Error)]
17pub enum Error {
18    /// Attempt to decode from a source with no data left.
19    #[error("unexpected end of file")]
20    UnexpectedEof,
21    #[error(transparent)]
22    Io(#[from] io::Error),
23    /// Catchall for syntax error messages.
24    #[error("{0}")]
25    Syntax(String),
26    #[error(transparent)]
27    ParseInt(#[from] std::num::ParseIntError),
28    #[error(transparent)]
29    Utf8(#[from] std::string::FromUtf8Error),
30}
31
32impl Error {
33    pub fn syntax(msg: impl ToString) -> Self {
34        Self::Syntax(msg.to_string())
35    }
36
37    pub fn is_eof(&self) -> bool {
38        match self {
39            Self::UnexpectedEof => true,
40            Self::Io(e) => e.kind() == io::ErrorKind::UnexpectedEof,
41            _ => false,
42        }
43    }
44}
45
46/// The kind of FileDiff Header which can be used to print the FileDiff information which precedes
47/// `Hunks`.
48#[derive(Debug, Clone, PartialEq)]
49pub enum FileHeader {
50    Added {
51        path: PathBuf,
52        new: DiffFile,
53        binary: bool,
54    },
55    Copied {
56        old_path: PathBuf,
57        new_path: PathBuf,
58    },
59    Deleted {
60        path: PathBuf,
61        old: DiffFile,
62        binary: bool,
63    },
64    Modified {
65        path: PathBuf,
66        old: DiffFile,
67        new: DiffFile,
68        binary: bool,
69    },
70    Moved {
71        old_path: PathBuf,
72        new_path: PathBuf,
73    },
74}
75
76impl std::convert::From<&FileDiff> for FileHeader {
77    // TODO: Pathnames with 'unusual names' need to be quoted.
78    fn from(value: &FileDiff) -> Self {
79        match value {
80            FileDiff::Modified(v) => FileHeader::Modified {
81                path: v.path.clone(),
82                old: v.old.clone(),
83                new: v.new.clone(),
84                binary: matches!(v.diff, DiffContent::Binary),
85            },
86            FileDiff::Added(v) => FileHeader::Added {
87                path: v.path.clone(),
88                new: v.new.clone(),
89                binary: matches!(v.diff, DiffContent::Binary),
90            },
91            FileDiff::Copied(c) => FileHeader::Copied {
92                old_path: c.old_path.clone(),
93                new_path: c.new_path.clone(),
94            },
95            FileDiff::Deleted(v) => FileHeader::Deleted {
96                path: v.path.clone(),
97                old: v.old.clone(),
98                binary: matches!(v.diff, DiffContent::Binary),
99            },
100            FileDiff::Moved(v) => FileHeader::Moved {
101                old_path: v.old_path.clone(),
102                new_path: v.new_path.clone(),
103            },
104        }
105    }
106}
107
108/// Meta data which precedes a `Hunk`s content.
109///
110/// For example:
111/// @@ -24,8 +24,6 @@ use radicle_surf::diff::*;
112#[derive(Clone, Debug, Default, PartialEq)]
113pub struct HunkHeader {
114    /// Line the hunk started in the old file.
115    pub old_line_no: u32,
116    /// Number of removed and context lines.
117    pub old_size: u32,
118    /// Line the hunk started in the new file.
119    pub new_line_no: u32,
120    /// Number of added and context lines.
121    pub new_size: u32,
122    /// Trailing text for the Hunk Header.
123    ///
124    /// From Git's documentation "Hunk headers mention the name of the function to which the hunk
125    /// applies. See "Defining a custom hunk-header" in gitattributes for details of how to tailor
126    /// to this to specific languages.".  It is likely best to leave this empty when generating
127    /// diffs.
128    pub text: Vec<u8>,
129}
130
131impl TryFrom<&Hunk<Modification>> for HunkHeader {
132    type Error = Error;
133
134    fn try_from(hunk: &Hunk<Modification>) -> Result<Self, Self::Error> {
135        let mut r = io::BufReader::new(hunk.header.as_bytes());
136        Self::decode(&mut r)
137    }
138}
139
140impl HunkHeader {
141    pub fn old_line_range(&self) -> std::ops::Range<u32> {
142        let start: u32 = self.old_line_no;
143        let end: u32 = self.old_line_no + self.old_size;
144        start..end + 1
145    }
146
147    pub fn new_line_range(&self) -> std::ops::Range<u32> {
148        let start: u32 = self.new_line_no;
149        let end: u32 = self.new_line_no + self.new_size;
150        start..end + 1
151    }
152}
153
154/// Diff-related types that can be decoded from the unified diff format.
155pub trait Decode: Sized {
156    /// Decode, and fail if we reach the end of the stream.
157    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error>;
158
159    /// Decode, and return a `None` if we reached the end of the stream.
160    fn try_decode(r: &mut impl io::BufRead) -> Result<Option<Self>, Error> {
161        match Self::decode(r) {
162            Ok(v) => Ok(Some(v)),
163            Err(Error::UnexpectedEof) => Ok(None),
164            Err(e) => Err(e),
165        }
166    }
167
168    /// Decode from a string input.
169    fn parse(s: &str) -> Result<Self, Error> {
170        Self::from_bytes(s.as_bytes())
171    }
172
173    /// Decode from a string input.
174    fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
175        let mut r = io::BufReader::new(bytes);
176        Self::decode(&mut r)
177    }
178}
179
180/// Diff-related types that can be encoded intro the unified diff format.
181pub trait Encode: Sized {
182    /// Encode type into diff writer.
183    fn encode(&self, w: &mut Writer) -> Result<(), Error>;
184
185    /// Encode into unified diff string.
186    fn to_unified_string(&self) -> Result<String, Error> {
187        let mut buf = Vec::new();
188        let mut w = Writer::new(&mut buf);
189
190        w.encode(self)?;
191        drop(w);
192
193        String::from_utf8(buf).map_err(Error::from)
194    }
195}
196
197impl Decode for Diff {
198    /// Decode from git's unified diff format, consuming the entire input.
199    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
200        let mut s = String::new();
201
202        r.read_to_string(&mut s)?;
203
204        let d = git::raw::Diff::from_buffer(s.as_ref())
205            .map_err(|e| Error::syntax(format!("decoding unified diff: {e}")))?;
206        let d =
207            Diff::try_from(d).map_err(|e| Error::syntax(format!("decoding unified diff: {e}")))?;
208
209        Ok(d)
210    }
211}
212
213impl Encode for Diff {
214    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
215        for fdiff in self.files() {
216            fdiff.encode(w)?;
217        }
218        Ok(())
219    }
220}
221
222impl Decode for DiffContent {
223    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
224        let mut hunks = Vec::default();
225        let mut additions = 0;
226        let mut deletions = 0;
227
228        while let Some(h) = Hunk::try_decode(r)? {
229            for l in &h.lines {
230                match l {
231                    Modification::Addition(_) => additions += 1,
232                    Modification::Deletion(_) => deletions += 1,
233                    _ => {}
234                }
235            }
236            hunks.push(h);
237        }
238
239        if hunks.is_empty() {
240            Ok(DiffContent::Empty)
241        } else {
242            // TODO: Handle case for binary.
243            Ok(DiffContent::Plain {
244                hunks: Hunks::from(hunks),
245                stats: FileStats {
246                    additions,
247                    deletions,
248                },
249                // TODO: Properly handle EndOfLine field
250                eof: diff::EofNewLine::NoneMissing,
251            })
252        }
253    }
254}
255
256impl Encode for DiffContent {
257    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
258        match self {
259            DiffContent::Plain { hunks, .. } => {
260                for h in hunks.iter() {
261                    h.encode(w)?;
262                }
263            }
264            DiffContent::Empty => {}
265            DiffContent::Binary => todo!("DiffContent::Binary encoding not implemented"),
266        }
267        Ok(())
268    }
269}
270
271impl Encode for FileDiff {
272    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
273        w.encode(&FileHeader::from(self))?;
274        match self {
275            FileDiff::Modified(f) => {
276                w.encode(&f.diff)?;
277            }
278            FileDiff::Added(f) => {
279                w.encode(&f.diff)?;
280            }
281            FileDiff::Copied(f) => {
282                w.encode(&f.diff)?;
283            }
284            FileDiff::Deleted(f) => {
285                w.encode(&f.diff)?;
286            }
287            FileDiff::Moved(f) => {
288                // Nb. We only display diffs as moves when the file was not changed.
289                w.encode(&f.diff)?;
290            }
291        }
292
293        Ok(())
294    }
295}
296
297impl Encode for FileHeader {
298    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
299        match self {
300            FileHeader::Modified { path, old, new, .. } => {
301                w.meta(format!(
302                    "diff --git a/{} b/{}",
303                    path.display(),
304                    path.display()
305                ))?;
306
307                if old.mode == new.mode {
308                    w.meta(format!(
309                        "index {}..{} {:o}",
310                        term::format::oid(old.oid),
311                        term::format::oid(new.oid),
312                        u32::from(old.mode.clone()),
313                    ))?;
314                } else {
315                    w.meta(format!("old mode {:o}", u32::from(old.mode.clone())))?;
316                    w.meta(format!("new mode {:o}", u32::from(new.mode.clone())))?;
317                    w.meta(format!(
318                        "index {}..{}",
319                        term::format::oid(old.oid),
320                        term::format::oid(new.oid)
321                    ))?;
322                }
323
324                w.meta(format!("--- a/{}", path.display()))?;
325                w.meta(format!("+++ b/{}", path.display()))?;
326            }
327            FileHeader::Added { path, new, .. } => {
328                w.meta(format!(
329                    "diff --git a/{} b/{}",
330                    path.display(),
331                    path.display()
332                ))?;
333
334                w.meta(format!("new file mode {:o}", u32::from(new.mode.clone())))?;
335                w.meta(format!(
336                    "index {}..{}",
337                    term::format::oid(Oid::zero()),
338                    term::format::oid(new.oid),
339                ))?;
340
341                w.meta("--- /dev/null")?;
342                w.meta(format!("+++ b/{}", path.display()))?;
343            }
344            FileHeader::Copied { .. } => todo!(),
345            FileHeader::Deleted { path, old, .. } => {
346                w.meta(format!(
347                    "diff --git a/{} b/{}",
348                    path.display(),
349                    path.display()
350                ))?;
351
352                w.meta(format!(
353                    "deleted file mode {:o}",
354                    u32::from(old.mode.clone())
355                ))?;
356                w.meta(format!(
357                    "index {}..{}",
358                    term::format::oid(old.oid),
359                    term::format::oid(Oid::zero())
360                ))?;
361
362                w.meta(format!("--- a/{}", path.display()))?;
363                w.meta("+++ /dev/null".to_string())?;
364            }
365            FileHeader::Moved { old_path, new_path } => {
366                w.meta(format!(
367                    "diff --git a/{} b/{}",
368                    old_path.display(),
369                    new_path.display()
370                ))?;
371                w.meta("similarity index 100%")?;
372                w.meta(format!("rename from {}", old_path.display()))?;
373                w.meta(format!("rename to {}", new_path.display()))?;
374            }
375        };
376        Ok(())
377    }
378}
379
380impl Decode for HunkHeader {
381    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
382        let mut line = String::default();
383        if r.read_line(&mut line)? == 0 {
384            return Err(Error::UnexpectedEof);
385        };
386
387        let mut header = HunkHeader::default();
388        let s = line
389            .strip_prefix("@@ -")
390            .ok_or(Error::syntax("missing '@@ -'"))?;
391
392        let (old, s) = s
393            .split_once(" +")
394            .ok_or(Error::syntax("missing new line information"))?;
395        let (line_no, size) = old.split_once(',').unwrap_or((old, "1"));
396
397        header.old_line_no = line_no.parse()?;
398        header.old_size = size.parse()?;
399
400        let (new, s) = s
401            .split_once(" @@")
402            .ok_or(Error::syntax("closing '@@' is missing"))?;
403        let (line_no, size) = new.split_once(',').unwrap_or((new, "1"));
404
405        header.new_line_no = line_no.parse()?;
406        header.new_size = size.parse()?;
407
408        let s = s.strip_prefix(' ').unwrap_or(s);
409        header.text = s.as_bytes().to_vec();
410
411        Ok(header)
412    }
413}
414
415impl Encode for HunkHeader {
416    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
417        let old = if self.old_size == 1 {
418            format!("{}", self.old_line_no)
419        } else {
420            format!("{},{}", self.old_line_no, self.old_size)
421        };
422        let new = if self.new_size == 1 {
423            format!("{}", self.new_line_no)
424        } else {
425            format!("{},{}", self.new_line_no, self.new_size)
426        };
427        let text = if self.text.is_empty() {
428            "".to_string()
429        } else {
430            format!(" {}", String::from_utf8_lossy(&self.text))
431        };
432        w.meta(format!("@@ -{old} +{new} @@{text}"))?;
433
434        Ok(())
435    }
436}
437
438impl Decode for Hunk<Modification> {
439    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
440        let header = HunkHeader::decode(r)?;
441
442        let mut lines = Vec::new();
443        let mut new_line: u32 = 0;
444        let mut old_line: u32 = 0;
445
446        while old_line < header.old_size || new_line < header.new_size {
447            if old_line > header.old_size {
448                return Err(Error::syntax(format!(
449                    "expected '{}' old lines",
450                    header.old_size
451                )));
452            } else if new_line > header.new_size {
453                return Err(Error::syntax(format!(
454                    "expected '{0}' new lines",
455                    header.new_size
456                )));
457            }
458
459            let Some(line) = Modification::try_decode(r)? else {
460                return Err(Error::syntax(format!(
461                    "expected '{}' old lines and '{}' new lines, but found '{}' and '{}'",
462                    header.old_size, header.new_size, old_line, new_line,
463                )));
464            };
465
466            let line = match line {
467                Modification::Addition(v) => {
468                    let l = Modification::addition(v.line, header.new_line_no + new_line);
469                    new_line += 1;
470                    l
471                }
472                Modification::Deletion(v) => {
473                    let l = Modification::deletion(v.line, header.old_line_no + old_line);
474                    old_line += 1;
475                    l
476                }
477                Modification::Context { line, .. } => {
478                    let l = Modification::Context {
479                        line,
480                        line_no_old: header.old_line_no + old_line,
481                        line_no_new: header.new_line_no + new_line,
482                    };
483                    new_line += 1;
484                    old_line += 1;
485                    l
486                }
487            };
488
489            lines.push(line);
490        }
491
492        Ok(Hunk {
493            header: Line::from(header.to_unified_string()?),
494            lines,
495            old: header.old_line_range(),
496            new: header.new_line_range(),
497        })
498    }
499}
500
501impl Encode for Hunk<Modification> {
502    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
503        // TODO: Remove trailing newlines accurately.
504        // `trim_end()` will destroy diff information if the diff has a trailing whitespace on
505        // purpose.
506        w.magenta(self.header.from_utf8_lossy().trim_end())?;
507        for l in &self.lines {
508            l.encode(w)?;
509        }
510
511        Ok(())
512    }
513}
514
515impl Decode for Modification {
516    fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
517        let mut line = String::new();
518        if r.read_line(&mut line)? == 0 {
519            return Err(Error::UnexpectedEof);
520        };
521
522        let mut chars = line.chars();
523        let l = match chars.next() {
524            Some('+') => Modification::addition(chars.as_str().to_string(), 0),
525            Some('-') => Modification::deletion(chars.as_str().to_string(), 0),
526            Some(' ') => Modification::Context {
527                line: chars.as_str().to_string().into(),
528                line_no_old: 0,
529                line_no_new: 0,
530            },
531            Some(c) => {
532                return Err(Error::syntax(format!(
533                    "indicator character expected, but got '{c}'",
534                )))
535            }
536            None => return Err(Error::UnexpectedEof),
537        };
538
539        Ok(l)
540    }
541}
542
543impl Encode for Modification {
544    fn encode(&self, w: &mut Writer) -> Result<(), Error> {
545        match self {
546            Modification::Deletion(radicle_surf::diff::Deletion { line, .. }) => {
547                let s = format!("-{}", String::from_utf8_lossy(line.as_bytes()).trim_end());
548                w.write(s, term::Style::new(term::Color::Red))?;
549            }
550            Modification::Addition(radicle_surf::diff::Addition { line, .. }) => {
551                let s = format!("+{}", String::from_utf8_lossy(line.as_bytes()).trim_end());
552                w.write(s, term::Style::new(term::Color::Green))?;
553            }
554            Modification::Context { line, .. } => {
555                let s = format!(" {}", String::from_utf8_lossy(line.as_bytes()).trim_end());
556                w.write(s, term::Style::default().dim())?;
557            }
558        }
559
560        Ok(())
561    }
562}
563
564/// An IO Writer with color printing to the terminal.
565pub struct Writer<'a> {
566    styled: bool,
567    stream: Box<dyn io::Write + 'a>,
568}
569
570impl<'a> Writer<'a> {
571    pub fn new(w: impl io::Write + 'a) -> Self {
572        Self {
573            styled: false,
574            stream: Box::new(w),
575        }
576    }
577
578    pub fn encode<T: Encode>(&mut self, arg: &T) -> Result<(), Error> {
579        arg.encode(self)?;
580        Ok(())
581    }
582
583    pub fn styled(mut self, value: bool) -> Self {
584        self.styled = value;
585        self
586    }
587
588    pub fn write(&mut self, s: impl fmt::Display, style: term::Style) -> io::Result<()> {
589        if self.styled {
590            writeln!(self.stream, "{}", term::Paint::new(s).with_style(style))
591        } else {
592            writeln!(self.stream, "{s}")
593        }
594    }
595
596    pub fn meta(&mut self, s: impl fmt::Display) -> io::Result<()> {
597        self.write(s, term::Style::new(term::Color::Yellow))
598    }
599
600    pub fn magenta(&mut self, s: impl fmt::Display) -> io::Result<()> {
601        self.write(s, term::Style::new(term::Color::Magenta))
602    }
603}
604
605#[cfg(test)]
606mod test {
607    use super::*;
608
609    #[test]
610    fn test_diff_encode_decode_diff() {
611        let diff_a = diff::Diff::parse(include_str!(concat!(
612            env!("CARGO_MANIFEST_DIR"),
613            "/tests/data/diff.diff"
614        )))
615        .unwrap();
616        assert_eq!(
617            include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/diff.diff")),
618            diff_a.to_unified_string().unwrap()
619        );
620    }
621
622    #[test]
623    fn test_diff_content_encode_decode_content() {
624        let diff_content = diff::DiffContent::parse(include_str!(concat!(
625            env!("CARGO_MANIFEST_DIR"),
626            "/tests/data/diff_body.diff"
627        )))
628        .unwrap();
629        assert_eq!(
630            include_str!(concat!(
631                env!("CARGO_MANIFEST_DIR"),
632                "/tests/data/diff_body.diff"
633            )),
634            diff_content.to_unified_string().unwrap()
635        );
636    }
637
638    // TODO: Test parsing a real diff from this repository.
639}