1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
use crate::index::diff::Error;
use crate::{Change, CrateVersion};
use ahash::{AHashSet, RandomState};
use bstr::BStr;
use git_repository as git;
use hashbrown::raw::RawTable;

#[derive(Default)]
pub(crate) struct Delegate {
    changes: Vec<Change>,
    err: Option<Error>,
}

impl Delegate {
    pub fn handle(
        &mut self,
        change: git::object::tree::diff::Change<'_, '_, '_>,
    ) -> Result<git::object::tree::diff::Action, Error> {
        use git::bstr::ByteSlice;
        use git::object::tree::diff::change::Event::*;
        use git::objs::tree::EntryMode::*;
        fn entry_data(
            entry: git::objs::tree::EntryMode,
            id: git::Id<'_>,
        ) -> Result<Option<git::Object<'_>>, Error> {
            matches!(entry, Blob | BlobExecutable)
                .then(|| id.object())
                .transpose()
                .map_err(Into::into)
        }
        if change.location.contains(&b'.') {
            return Ok(Default::default());
        }

        match change.event {
            Addition { entry_mode, id } => {
                if let Some(obj) = entry_data(entry_mode, id)? {
                    for line in obj.data.lines() {
                        let version = version_from_json_line(line, change.location)?;
                        let change = if version.yanked {
                            Change::AddedAndYanked(version)
                        } else {
                            Change::Added(version)
                        };
                        self.changes.push(change)
                    }
                }
            }
            Deletion { entry_mode, id, .. } => {
                if entry_mode.is_no_tree() {
                    let obj = id.object()?;
                    let mut deleted = Vec::with_capacity(obj.data.lines().count());
                    for line in obj.data.lines() {
                        deleted.push(version_from_json_line(line, change.location)?);
                    }
                    self.changes.push(Change::Deleted {
                        name: change.location.to_string(),
                        versions: deleted,
                    });
                }
            }
            Modification { .. } => {
                if let Some(diff) = change.event.diff().transpose()? {
                    let mut old_lines = AHashSet::with_capacity(1024);
                    let location = change.location;
                    for line in diff.old.data.lines() {
                        old_lines.insert(line);
                    }

                    // A RawTable is used to represent a Checksum -> CrateVersion map
                    // because the checksum is already stored in the CrateVersion
                    // and we want to avoid storing the checksum twice for performance reasons
                    let mut new_versions = RawTable::with_capacity(old_lines.len().min(1024));
                    let hasher = RandomState::new();

                    for line in diff.new.data.lines() {
                        // first quickly check if the exact same line is already present in this file in that case we don't need to do anything else
                        if old_lines.remove(line) {
                            continue;
                        }
                        // no need to check if the checksum already exists in the hashmap
                        // as each checksum appear only once
                        let new_version = version_from_json_line(line, location)?;
                        new_versions.insert(
                            hasher.hash_one(new_version.checksum),
                            new_version,
                            |rehashed| hasher.hash_one(rehashed.checksum),
                        );
                    }

                    let mut deleted = Vec::new();
                    for line in old_lines.drain() {
                        let old_version = version_from_json_line(line, location)?;
                        let new_version = new_versions
                            .remove_entry(hasher.hash_one(old_version.checksum), |version| {
                                version.checksum == old_version.checksum
                            });
                        match new_version {
                            Some(new_version) => {
                                let change = match (old_version.yanked, new_version.yanked) {
                                    (true, false) => Change::Unyanked(new_version),
                                    (false, true) => Change::Yanked(new_version),
                                    _ => continue,
                                };
                                self.changes.push(change)
                            }
                            None => deleted.push(old_version),
                        }
                    }
                    if !deleted.is_empty() {
                        self.changes.push(Change::Deleted {
                            name: deleted[0].name.to_string(),
                            versions: deleted,
                        })
                    }
                    for version in new_versions.drain() {
                        let change = if version.yanked {
                            Change::AddedAndYanked(version)
                        } else {
                            Change::Added(version)
                        };
                        self.changes.push(change);
                    }
                }
            }
        }
        Ok(Default::default())
    }

    pub fn into_result(self) -> Result<Vec<Change>, Error> {
        match self.err {
            Some(err) => Err(err),
            None => Ok(self.changes),
        }
    }
}

fn version_from_json_line(line: &[u8], file_name: &BStr) -> Result<CrateVersion, Error> {
    serde_json::from_slice(line).map_err(|err| Error::VersionDecode {
        source: err,
        file_name: file_name.into(),
        line: line.into(),
    })
}