1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
use super::CrateVersion;
use serde_json;
use std::path::Path;

use git2::{
    build::RepoBuilder, Delta, DiffFormat, Error as GitError, ErrorClass, Object, ObjectType, Oid,
    Reference, Repository, Tree,
};
use std::str;

static INDEX_GIT_URL: &str = "https://github.com/rust-lang/crates.io-index";
static LAST_SEEN_REFNAME: &str = "refs/heads/crates-index-diff_last-seen";
static EMPTY_TREE_HASH: &str = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
static LINE_ADDED_INDICATOR: char = '+';

/// A wrapper for a repository of the crates.io index.
pub struct Index {
    /// The name and path of the reference used to keep track of the last seen state of the
    /// crates.io repository. The default value is `refs/heads/crates-index-diff_last-seen`.
    pub seen_ref_name: &'static str,
    /// The crates.io repository.
    repo: Repository,
}

/// Options for use in `Index::from_path_or_cloned_with_options`
pub struct CloneOptions {
    repository_url: String,
}

impl Index {
    /// Return the crates.io repository.
    pub fn repository(&self) -> &Repository {
        &self.repo
    }

    /// Return the reference pointing to the state we have seen after calling `fetch_changes()`.
    pub fn last_seen_reference(&self) -> Result<Reference, GitError> {
        self.repo.find_reference(self.seen_ref_name)
    }

    /// Return a new `Index` instance from the given `path`, which should contain a bare or non-bare
    /// clone of the `crates.io` index.
    /// If the directory does not contain the repository or does not exist, it will be cloned from
    /// the official location automatically (with complete history).
    ///
    /// An error will occour if the repository exists and the remote URL does not match the given repository URL.
    pub fn from_path_or_cloned_with_options(
        path: impl AsRef<Path>,
        options: CloneOptions,
    ) -> Result<Index, GitError> {
        let mut repo_did_exist = true;
        let repo = Repository::open(path.as_ref()).or_else(|err| {
            if err.class() == ErrorClass::Repository {
                repo_did_exist = false;
                RepoBuilder::new()
                    .bare(true)
                    .clone(&options.repository_url, path.as_ref())
            } else {
                Err(err)
            }
        })?;

        if repo_did_exist {
            let remote = repo.find_remote("origin")?;
            let actual_remote_url = remote
                .url()
                .ok_or_else(|| GitError::from_str("did not obtain URL of remote named 'origin'"))?;
            if actual_remote_url != options.repository_url {
                return Err(GitError::from_str(&format!(
                    "Actual 'origin' remote url {:#?} did not match desired one at {:#?}",
                    actual_remote_url, options.repository_url
                )));
            }
        }

        Ok(Index {
            repo,
            seen_ref_name: LAST_SEEN_REFNAME,
        })
    }

    /// Return a new `Index` instance from the given `path`, which should contain a bare or non-bare
    /// clone of the `crates.io` index.
    /// If the directory does not contain the repository or does not exist, it will be cloned from
    /// the official location automatically (with complete history).
    pub fn from_path_or_cloned(path: impl AsRef<Path>) -> Result<Index, GitError> {
        Index::from_path_or_cloned_with_options(
            path,
            CloneOptions {
                repository_url: INDEX_GIT_URL.into(),
            },
        )
    }

    /// As `peek_changes_with_options`, but without the options.
    pub fn peek_changes(&self) -> Result<(Vec<CrateVersion>, git2::Oid), GitError> {
        self.peek_changes_with_options(None)
    }

    /// Return all `CrateVersion`s that are observed between the last time `fetch_changes(…)` was called
    /// and the latest state of the `crates.io` index repository, which is obtained by fetching
    /// the remote called `origin`.
    /// The `last_seen_reference()` will not be created or updated.
    /// The second field in the returned tuple is the commit object to which the changes were provided.
    /// If one would set the `last_seen_reference()` to that object, the effect is exactly the same
    /// as if `fetch_changes(…)` had been called.
    pub fn peek_changes_with_options(
        &self,
        options: Option<&mut git2::FetchOptions<'_>>,
    ) -> Result<(Vec<CrateVersion>, git2::Oid), GitError> {
        let from = self
            .last_seen_reference()
            .and_then(|r| {
                r.target().ok_or_else(|| {
                    GitError::from_str("last-seen reference did not have a valid target")
                })
            })
            .or_else(|_| Oid::from_str(EMPTY_TREE_HASH))?;
        let to = {
            self.repo.find_remote("origin").and_then(|mut r| {
                r.fetch(&["refs/heads/*:refs/remotes/origin/*"], options, None)
            })?;
            let latest_fetched_commit_oid =
                self.repo.refname_to_id("refs/remotes/origin/master")?;
            latest_fetched_commit_oid
        };

        Ok((
            self.changes_from_objects(
                &self.repo.find_object(from, None)?,
                &self.repo.find_object(to, None)?,
            )?,
            to,
        ))
    }

    /// As `fetch_changes_with_options`, but without the options.
    pub fn fetch_changes(&self) -> Result<Vec<CrateVersion>, GitError> {
        self.fetch_changes_with_options(None)
    }

    /// Return all `CrateVersion`s that are observed between the last time this method was called
    /// and the latest state of the `crates.io` index repository, which is obtained by fetching
    /// the remote called `origin`.
    /// The `last_seen_reference()` will be created or adjusted to point to the latest fetched
    /// state, which causes this method to have a different result each time it is called.
    pub fn fetch_changes_with_options(
        &self,
        options: Option<&mut git2::FetchOptions<'_>>,
    ) -> Result<Vec<CrateVersion>, GitError> {
        let (changes, to) = self.peek_changes_with_options(options)?;
        self.set_last_seen_reference(to)?;
        Ok(changes)
    }

    /// Set the last seen reference to the given Oid. It will be created if it does not yet exists.
    pub fn set_last_seen_reference(&self, to: Oid) -> Result<(), GitError> {
        self.last_seen_reference()
            .and_then(|mut seen_ref| {
                seen_ref.set_target(to, "updating seen-ref head to latest fetched commit")
            })
            .or_else(|_err| {
                self.repo.reference(
                    self.seen_ref_name,
                    to,
                    true,
                    "creating seen-ref at latest fetched commit",
                )
            })?;
        Ok(())
    }

    /// Return all `CreateVersion`s observed between `from` and `to`. Both parameter are ref-specs
    /// pointing to either a commit or a tree.
    /// Learn more about specifying revisions
    /// in the
    /// [official documentation](https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html)
    pub fn changes(
        &self,
        from: impl AsRef<str>,
        to: impl AsRef<str>,
    ) -> Result<Vec<CrateVersion>, GitError> {
        self.changes_from_objects(
            &self.repo.revparse_single(from.as_ref())?,
            &self.repo.revparse_single(to.as_ref())?,
        )
    }

    /// Similar to `changes()`, but requires `from` and `to` objects to be provided. They may point
    /// to either `Commit`s or `Tree`s.
    pub fn changes_from_objects(
        &self,
        from: &Object,
        to: &Object,
    ) -> Result<Vec<CrateVersion>, GitError> {
        fn into_tree<'a>(repo: &'a Repository, obj: &Object) -> Result<Tree<'a>, GitError> {
            repo.find_tree(match obj.kind() {
                Some(ObjectType::Commit) => obj
                    .as_commit()
                    .expect("object of kind commit yields commit")
                    .tree_id(),
                _ =>
                /* let it possibly fail later */
                {
                    obj.id()
                }
            })
        }
        let diff = self.repo.diff_tree_to_tree(
            Some(&into_tree(&self.repo, from)?),
            Some(&into_tree(&self.repo, to)?),
            None,
        )?;
        let mut res: Vec<CrateVersion> = Vec::new();
        diff.print(DiffFormat::Patch, |delta, _, diffline| {
            if diffline.origin() != LINE_ADDED_INDICATOR {
                return true;
            }

            if !match delta.status() {
                Delta::Added | Delta::Modified => true,
                _ => false,
            } {
                return true;
            }

            if let Ok(c) = serde_json::from_slice(diffline.content()) {
                res.push(c)
            }
            true
        })
        .map(|_| res)
    }
}