1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
use crate::{Change, Index};
use git_repository as git;
use git_repository::prelude::ObjectIdExt;
use git_repository::refs::transaction::PreviousValue;
use std::convert::TryFrom;

mod delegate;
use delegate::Delegate;

/// The error returned by methods dealing with obtaining index changes.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
    #[error("Failed to fetch crates.io index repository")]
    Fetch(#[from] git2::Error),
    #[error("Couldn't update marker reference")]
    ReferenceEdit(#[from] git::reference::edit::Error),
    #[error("Failed to parse rev-spec to determine which revisions to diff")]
    RevParse(#[from] git::revision::spec::parse::Error),
    #[error("Couldn't find blob that showed up when diffing trees")]
    FindObject(#[from] git::object::find::existing::Error),
    #[error("Couldn't get the tree of a commit for diffing purposes")]
    PeelToTree(#[from] git::object::peel::to_kind::Error),
    #[error("Failed to diff two trees to find changed crates")]
    Diff(#[from] git::object::tree::diff::Error),
    #[error("Failed to decode {line:?} in file {file_name:?} as crate version")]
    VersionDecode {
        source: serde_json::Error,
        file_name: bstr::BString,
        line: bstr::BString,
    },
}

/// Find changes without modifying the underling repository
impl Index {
    /// As `peek_changes_with_options`, but without the options.
    pub fn peek_changes(&self) -> Result<(Vec<Change>, git::hash::ObjectId), Error> {
        self.peek_changes_with_options(None)
    }

    /// Return all `Change`s that are observed between the last time `fetch_changes(…)` was called
    /// and the latest state of the `crates.io` index repository, which is obtained by fetching
    /// the remote called `origin`.
    /// The `last_seen_reference()` will not be created or updated.
    /// The second field in the returned tuple is the commit object to which the changes were provided.
    /// If one would set the `last_seen_reference()` to that object, the effect is exactly the same
    /// as if `fetch_changes(…)` had been called.
    ///
    /// # Resource Usage
    ///
    /// As this method fetches the git repository, loose objects or small packs may be created. Over time,
    /// these will accumulate and either slow down subsequent operations, or cause them to fail due to exhaustion
    /// of the maximum number of open file handles as configured with `ulimit`.
    ///
    /// Thus it is advised for the caller to run `git gc` occasionally based on their own requirements and usage patterns.
    pub fn peek_changes_with_options(
        &self,
        options: Option<&mut git2::FetchOptions<'_>>,
    ) -> Result<(Vec<Change>, git::hash::ObjectId), Error> {
        let repo = &self.repo;
        let from = repo
            .find_reference(self.seen_ref_name)
            .ok()
            .and_then(|r| r.try_id().map(|id| id.detach()))
            .unwrap_or_else(|| git::hash::ObjectId::empty_tree(repo.object_hash()));
        let to = {
            let repo = git2::Repository::open(repo.git_dir())?;
            repo.find_remote(self.remote_name).and_then(|mut r| {
                r.fetch(
                    &[format!(
                        "+refs/heads/{branch}:refs/remotes/{remote}/{branch}",
                        remote = self.remote_name,
                        branch = self.branch_name,
                    )],
                    options,
                    None,
                )
            })?;
            git::hash::ObjectId::try_from(
                repo.refname_to_id(&format!(
                    "refs/remotes/{}/{}",
                    self.remote_name, self.branch_name
                ))?
                .as_bytes(),
            )
            .expect("valid oid")
        };

        Ok((self.changes_between_commits(from, to)?, to))
    }

    /// Similar to `changes()`, but requires `from` and `to` objects to be provided. They may point
    /// to either `Commit`s or `Tree`s.
    pub fn changes_between_commits(
        &self,
        from: impl Into<git::hash::ObjectId>,
        to: impl Into<git::hash::ObjectId>,
    ) -> Result<Vec<Change>, Error> {
        let into_tree = |id: git::hash::ObjectId| -> Result<git::Tree<'_>, Error> {
            Ok(id
                .attach(&self.repo)
                .object()?
                .peel_to_kind(git::object::Kind::Tree)?
                .into_tree())
        };
        let from = into_tree(from.into())?;
        let to = into_tree(to.into())?;
        let mut delegate = Delegate::default();
        from.changes()
            .track_filename()
            .for_each_to_obtain_tree(&to, |change| delegate.handle(change))?;
        delegate.into_result()
    }
}

/// Find changes while changing the underlying repository in one way or another.
impl Index {
    /// As `fetch_changes_with_options`, but without the options.
    pub fn fetch_changes(&self) -> Result<Vec<Change>, Error> {
        self.fetch_changes_with_options(None)
    }

    /// Return all `Change`s that are observed between the last time this method was called
    /// and the latest state of the `crates.io` index repository, which is obtained by fetching
    /// the remote called `origin`.
    /// The `last_seen_reference()` will be created or adjusted to point to the latest fetched
    /// state, which causes this method to have a different result each time it is called.
    ///
    /// # Resource Usage
    ///
    /// As this method fetches the git repository, loose objects or small packs may be created. Over time,
    /// these will accumulate and either slow down subsequent operations, or cause them to fail due to exhaustion
    /// of the maximum number of open file handles as configured with `ulimit`.
    ///
    /// Thus it is advised for the caller to run `git gc` occasionally based on their own requirements and usage patterns.
    pub fn fetch_changes_with_options(
        &self,
        options: Option<&mut git2::FetchOptions<'_>>,
    ) -> Result<Vec<Change>, Error> {
        let (changes, to) = self.peek_changes_with_options(options)?;
        self.set_last_seen_reference(to)?;
        Ok(changes)
    }

    /// Set the last seen reference to the given Oid. It will be created if it does not yet exists.
    pub fn set_last_seen_reference(&self, to: git::hash::ObjectId) -> Result<(), Error> {
        let repo = self.repository();
        repo.reference(
            self.seen_ref_name,
            to,
            PreviousValue::Any,
            "updating seen-ref head to latest fetched commit",
        )?;
        Ok(())
    }

    /// Return all `CreateVersion`s observed between `from` and `to`. Both parameter are ref-specs
    /// pointing to either a commit or a tree.
    /// Learn more about specifying revisions
    /// in the
    /// [official documentation](https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html)
    pub fn changes(
        &self,
        from: impl AsRef<str>,
        to: impl AsRef<str>,
    ) -> Result<Vec<Change>, Error> {
        let repo = self.repository();
        let from = repo
            .rev_parse(from.as_ref())?
            .single()
            .expect("revspec was not a range")
            .detach();
        let to = repo
            .rev_parse(to.as_ref())?
            .single()
            .expect("revspec was not a range")
            .detach();
        self.changes_between_commits(from, to)
    }
}