crates_index_diff/index/diff/
delegate.rs

1use crate::index::diff::Error;
2use crate::{Change, CrateVersion};
3use ahash::{AHashSet, RandomState};
4use bstr::BStr;
5use hashbrown::raw::RawTable;
6use std::hash::Hasher;
7use std::ops::Deref;
8
9#[derive(Default)]
10pub(crate) struct Delegate {
11    changes: Vec<Change>,
12    /// All changes that happen within a file, along the line-number it happens in .
13    per_file_changes: Vec<(usize, Change)>,
14    err: Option<Error>,
15}
16
17impl Delegate {
18    pub fn handle(
19        &mut self,
20        change: gix::object::tree::diff::Change<'_, '_, '_>,
21    ) -> Result<gix::object::tree::diff::Action, Error> {
22        use gix::bstr::ByteSlice;
23        use gix::object::tree::diff::Change::*;
24        use gix::objs::tree::EntryKind::*;
25        fn entry_data(
26            entry: gix::objs::tree::EntryKind,
27            id: gix::Id<'_>,
28        ) -> Result<Option<gix::Object<'_>>, Error> {
29            matches!(entry, Blob | BlobExecutable)
30                .then(|| id.object())
31                .transpose()
32                .map_err(Into::into)
33        }
34        if change.location().contains(&b'.') {
35            return Ok(Default::default());
36        }
37
38        match change {
39            Rewrite { .. } => {
40                unreachable!("BUG: this is disabled so shouldn't happen")
41            }
42            Addition {
43                entry_mode,
44                id,
45                location,
46                ..
47            } => {
48                if let Some(obj) = entry_data(entry_mode.kind(), id)? {
49                    for line in obj.data.lines() {
50                        let version = version_from_json_line(line, location)?;
51                        let change = if version.yanked {
52                            Change::AddedAndYanked(version)
53                        } else {
54                            Change::Added(version)
55                        };
56                        self.changes.push(change)
57                    }
58                }
59            }
60            Deletion {
61                entry_mode,
62                id,
63                location,
64                ..
65            } => {
66                if entry_mode.is_no_tree() {
67                    let obj = id.object()?;
68                    let mut deleted = Vec::with_capacity(obj.data.lines().count());
69                    for line in obj.data.lines() {
70                        deleted.push(version_from_json_line(line, location)?);
71                    }
72                    self.changes.push(Change::CrateDeleted {
73                        name: location.to_string(),
74                        versions: deleted,
75                    });
76                }
77            }
78            Modification {
79                entry_mode,
80                previous_id,
81                id,
82                location,
83                ..
84            } => {
85                if entry_mode.is_blob() {
86                    let old = previous_id.object()?.into_blob();
87                    let new = id.object()?.into_blob();
88                    let mut old_lines = AHashSet::with_capacity(1024);
89                    let location = location;
90                    for (number, line) in old.data.lines().enumerate() {
91                        old_lines.insert(Line(number, line));
92                    }
93
94                    // A RawTable is used to represent a Checksum -> CrateVersion map
95                    // because the checksum is already stored in the CrateVersion
96                    // and we want to avoid storing the checksum twice for performance reasons
97                    let mut new_versions = RawTable::with_capacity(old_lines.len().min(1024));
98                    let hasher = RandomState::new();
99
100                    for (number, line) in new.data.lines().enumerate() {
101                        // first quickly check if the exact same line is already present in this file in that case we don't need to do anything else
102                        if old_lines.remove(&Line(number, line)) {
103                            continue;
104                        }
105                        // no need to check if the checksum already exists in the hashmap
106                        // as each checksum appears only once
107                        let new_version = version_from_json_line(line, location)?;
108                        new_versions.insert(
109                            hasher.hash_one(new_version.checksum),
110                            (number, new_version),
111                            |rehashed| hasher.hash_one(rehashed.1.checksum),
112                        );
113                    }
114
115                    for line in old_lines.drain() {
116                        let old_version = version_from_json_line(&line, location)?;
117                        let new_version = new_versions
118                            .remove_entry(hasher.hash_one(old_version.checksum), |version| {
119                                version.1.checksum == old_version.checksum
120                            });
121                        match new_version {
122                            Some((_, new_version)) => {
123                                let change = match (old_version.yanked, new_version.yanked) {
124                                    (true, false) => Change::Unyanked(new_version),
125                                    (false, true) => Change::Yanked(new_version),
126                                    _ => continue,
127                                };
128                                self.per_file_changes.push((line.0, change))
129                            }
130                            None => self
131                                .per_file_changes
132                                .push((line.0, Change::VersionDeleted(old_version))),
133                        }
134                    }
135                    for (number, version) in new_versions.drain() {
136                        let change = if version.yanked {
137                            Change::AddedAndYanked(version)
138                        } else {
139                            Change::Added(version)
140                        };
141                        self.per_file_changes.push((number, change));
142                    }
143                    self.per_file_changes.sort_by_key(|t| t.0);
144                    self.changes
145                        .extend(self.per_file_changes.drain(..).map(|t| t.1));
146                }
147            }
148        }
149        Ok(Default::default())
150    }
151
152    pub fn into_result(self) -> Result<Vec<Change>, Error> {
153        match self.err {
154            Some(err) => Err(err),
155            None => Ok(self.changes),
156        }
157    }
158}
159
160/// A line that assumes there never are equal lines within a file which
161/// is the case due to the checksum.
162struct Line<'a>(usize, &'a [u8]);
163
164impl std::hash::Hash for Line<'_> {
165    fn hash<H: Hasher>(&self, state: &mut H) {
166        self.1.hash(state)
167    }
168}
169
170impl PartialEq<Self> for Line<'_> {
171    fn eq(&self, other: &Self) -> bool {
172        self.1.eq(other.1)
173    }
174}
175
176impl Eq for Line<'_> {}
177
178impl<'a> Deref for Line<'a> {
179    type Target = [u8];
180
181    fn deref(&self) -> &Self::Target {
182        self.1
183    }
184}
185
186fn version_from_json_line(line: &[u8], file_name: &BStr) -> Result<CrateVersion, Error> {
187    serde_json::from_slice(line).map_err(|err| Error::VersionDecode {
188        source: err,
189        file_name: file_name.into(),
190        line: line.into(),
191    })
192}