Skip to main content

cargo/sources/git/
utils.rs

1use crate::core::GitReference;
2use crate::util::errors::{CargoResult, CargoResultExt};
3use crate::util::paths;
4use crate::util::process_builder::process;
5use crate::util::{network, Config, IntoUrl, Progress};
6use curl::easy::{Easy, List};
7use git2::{self, ObjectType};
8use log::{debug, info};
9use serde::ser;
10use serde::Serialize;
11use std::env;
12use std::fmt;
13use std::fs::File;
14use std::mem;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17use url::Url;
18
19#[derive(PartialEq, Clone, Debug)]
20pub struct GitRevision(git2::Oid);
21
22impl ser::Serialize for GitRevision {
23    fn serialize<S: ser::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
24        serialize_str(self, s)
25    }
26}
27
28fn serialize_str<T, S>(t: &T, s: S) -> Result<S::Ok, S::Error>
29where
30    T: fmt::Display,
31    S: ser::Serializer,
32{
33    s.collect_str(t)
34}
35
36impl fmt::Display for GitRevision {
37    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
38        fmt::Display::fmt(&self.0, f)
39    }
40}
41
42pub struct GitShortID(git2::Buf);
43
44impl GitShortID {
45    pub fn as_str(&self) -> &str {
46        self.0.as_str().unwrap()
47    }
48}
49
50/// `GitRemote` represents a remote repository. It gets cloned into a local
51/// `GitDatabase`.
52#[derive(PartialEq, Clone, Debug, Serialize)]
53pub struct GitRemote {
54    #[serde(serialize_with = "serialize_str")]
55    url: Url,
56}
57
58/// `GitDatabase` is a local clone of a remote repository's database. Multiple
59/// `GitCheckouts` can be cloned from this `GitDatabase`.
60#[derive(Serialize)]
61pub struct GitDatabase {
62    remote: GitRemote,
63    path: PathBuf,
64    #[serde(skip_serializing)]
65    repo: git2::Repository,
66}
67
68/// `GitCheckout` is a local checkout of a particular revision. Calling
69/// `clone_into` with a reference will resolve the reference into a revision,
70/// and return a `anyhow::Error` if no revision for that reference was found.
71#[derive(Serialize)]
72pub struct GitCheckout<'a> {
73    database: &'a GitDatabase,
74    location: PathBuf,
75    revision: GitRevision,
76    #[serde(skip_serializing)]
77    repo: git2::Repository,
78}
79
80// Implementations
81
82impl GitRemote {
83    pub fn new(url: &Url) -> GitRemote {
84        GitRemote { url: url.clone() }
85    }
86
87    pub fn url(&self) -> &Url {
88        &self.url
89    }
90
91    pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult<GitRevision> {
92        reference.resolve(&self.db_at(path)?.repo)
93    }
94
95    pub fn checkout(
96        &self,
97        into: &Path,
98        reference: &GitReference,
99        cargo_config: &Config,
100    ) -> CargoResult<(GitDatabase, GitRevision)> {
101        let mut repo_and_rev = None;
102        if let Ok(mut repo) = git2::Repository::open(into) {
103            self.fetch_into(&mut repo, cargo_config)
104                .chain_err(|| format!("failed to fetch into {}", into.display()))?;
105            if let Ok(rev) = reference.resolve(&repo) {
106                repo_and_rev = Some((repo, rev));
107            }
108        }
109        let (repo, rev) = match repo_and_rev {
110            Some(pair) => pair,
111            None => {
112                let repo = self
113                    .clone_into(into, cargo_config)
114                    .chain_err(|| format!("failed to clone into: {}", into.display()))?;
115                let rev = reference.resolve(&repo)?;
116                (repo, rev)
117            }
118        };
119
120        Ok((
121            GitDatabase {
122                remote: self.clone(),
123                path: into.to_path_buf(),
124                repo,
125            },
126            rev,
127        ))
128    }
129
130    pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
131        let repo = git2::Repository::open(db_path)?;
132        Ok(GitDatabase {
133            remote: self.clone(),
134            path: db_path.to_path_buf(),
135            repo,
136        })
137    }
138
139    fn fetch_into(&self, dst: &mut git2::Repository, cargo_config: &Config) -> CargoResult<()> {
140        // Create a local anonymous remote in the repository to fetch the url
141        let refspec = "refs/heads/*:refs/heads/*";
142        fetch(dst, self.url.as_str(), refspec, cargo_config)
143    }
144
145    fn clone_into(&self, dst: &Path, cargo_config: &Config) -> CargoResult<git2::Repository> {
146        if dst.exists() {
147            paths::remove_dir_all(dst)?;
148        }
149        paths::create_dir_all(dst)?;
150        let mut repo = init(dst, true)?;
151        fetch(
152            &mut repo,
153            self.url.as_str(),
154            "refs/heads/*:refs/heads/*",
155            cargo_config,
156        )?;
157        Ok(repo)
158    }
159}
160
161impl GitDatabase {
162    pub fn copy_to(
163        &self,
164        rev: GitRevision,
165        dest: &Path,
166        cargo_config: &Config,
167    ) -> CargoResult<GitCheckout<'_>> {
168        let mut checkout = None;
169        if let Ok(repo) = git2::Repository::open(dest) {
170            let mut co = GitCheckout::new(dest, self, rev.clone(), repo);
171            if !co.is_fresh() {
172                // After a successful fetch operation do a sanity check to
173                // ensure we've got the object in our database to reset to. This
174                // can fail sometimes for corrupt repositories where the fetch
175                // operation succeeds but the object isn't actually there.
176                co.fetch(cargo_config)?;
177                if co.has_object() {
178                    co.reset(cargo_config)?;
179                    assert!(co.is_fresh());
180                    checkout = Some(co);
181                }
182            } else {
183                checkout = Some(co);
184            }
185        };
186        let checkout = match checkout {
187            Some(c) => c,
188            None => GitCheckout::clone_into(dest, self, rev, cargo_config)?,
189        };
190        checkout.update_submodules(cargo_config)?;
191        Ok(checkout)
192    }
193
194    pub fn to_short_id(&self, revision: &GitRevision) -> CargoResult<GitShortID> {
195        let obj = self.repo.find_object(revision.0, None)?;
196        Ok(GitShortID(obj.short_id()?))
197    }
198
199    pub fn has_ref(&self, reference: &str) -> CargoResult<()> {
200        self.repo.revparse_single(reference)?;
201        Ok(())
202    }
203}
204
205impl GitReference {
206    fn resolve(&self, repo: &git2::Repository) -> CargoResult<GitRevision> {
207        let id = match *self {
208            GitReference::Tag(ref s) => (|| -> CargoResult<git2::Oid> {
209                let refname = format!("refs/tags/{}", s);
210                let id = repo.refname_to_id(&refname)?;
211                let obj = repo.find_object(id, None)?;
212                let obj = obj.peel(ObjectType::Commit)?;
213                Ok(obj.id())
214            })()
215            .chain_err(|| format!("failed to find tag `{}`", s))?,
216            GitReference::Branch(ref s) => {
217                let b = repo
218                    .find_branch(s, git2::BranchType::Local)
219                    .chain_err(|| format!("failed to find branch `{}`", s))?;
220                b.get()
221                    .target()
222                    .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
223            }
224            GitReference::Rev(ref s) => {
225                let obj = repo.revparse_single(s)?;
226                match obj.as_tag() {
227                    Some(tag) => tag.target_id(),
228                    None => obj.id(),
229                }
230            }
231        };
232        Ok(GitRevision(id))
233    }
234}
235
236impl<'a> GitCheckout<'a> {
237    fn new(
238        path: &Path,
239        database: &'a GitDatabase,
240        revision: GitRevision,
241        repo: git2::Repository,
242    ) -> GitCheckout<'a> {
243        GitCheckout {
244            location: path.to_path_buf(),
245            database,
246            revision,
247            repo,
248        }
249    }
250
251    fn clone_into(
252        into: &Path,
253        database: &'a GitDatabase,
254        revision: GitRevision,
255        config: &Config,
256    ) -> CargoResult<GitCheckout<'a>> {
257        let dirname = into.parent().unwrap();
258        paths::create_dir_all(&dirname)?;
259        if into.exists() {
260            paths::remove_dir_all(into)?;
261        }
262
263        // we're doing a local filesystem-to-filesystem clone so there should
264        // be no need to respect global configuration options, so pass in
265        // an empty instance of `git2::Config` below.
266        let git_config = git2::Config::new()?;
267
268        // Clone the repository, but make sure we use the "local" option in
269        // libgit2 which will attempt to use hardlinks to set up the database.
270        // This should speed up the clone operation quite a bit if it works.
271        //
272        // Note that we still use the same fetch options because while we don't
273        // need authentication information we may want progress bars and such.
274        let url = database.path.into_url()?;
275        let mut repo = None;
276        with_fetch_options(&git_config, url.as_str(), config, &mut |fopts| {
277            let mut checkout = git2::build::CheckoutBuilder::new();
278            checkout.dry_run(); // we'll do this below during a `reset`
279
280            let r = git2::build::RepoBuilder::new()
281                // use hard links and/or copy the database, we're doing a
282                // filesystem clone so this'll speed things up quite a bit.
283                .clone_local(git2::build::CloneLocal::Local)
284                .with_checkout(checkout)
285                .fetch_options(fopts)
286                // .remote_create(|repo, _name, url| repo.remote_anonymous(url))
287                .clone(url.as_str(), into)?;
288            repo = Some(r);
289            Ok(())
290        })?;
291        let repo = repo.unwrap();
292
293        let checkout = GitCheckout::new(into, database, revision, repo);
294        checkout.reset(config)?;
295        Ok(checkout)
296    }
297
298    fn is_fresh(&self) -> bool {
299        match self.repo.revparse_single("HEAD") {
300            Ok(ref head) if head.id() == self.revision.0 => {
301                // See comments in reset() for why we check this
302                self.location.join(".cargo-ok").exists()
303            }
304            _ => false,
305        }
306    }
307
308    fn fetch(&mut self, cargo_config: &Config) -> CargoResult<()> {
309        info!("fetch {}", self.repo.path().display());
310        let url = self.database.path.into_url()?;
311        let refspec = "refs/heads/*:refs/heads/*";
312        fetch(&mut self.repo, url.as_str(), refspec, cargo_config)?;
313        Ok(())
314    }
315
316    fn has_object(&self) -> bool {
317        self.repo.find_object(self.revision.0, None).is_ok()
318    }
319
320    fn reset(&self, config: &Config) -> CargoResult<()> {
321        // If we're interrupted while performing this reset (e.g., we die because
322        // of a signal) Cargo needs to be sure to try to check out this repo
323        // again on the next go-round.
324        //
325        // To enable this we have a dummy file in our checkout, .cargo-ok, which
326        // if present means that the repo has been successfully reset and is
327        // ready to go. Hence if we start to do a reset, we make sure this file
328        // *doesn't* exist, and then once we're done we create the file.
329        let ok_file = self.location.join(".cargo-ok");
330        let _ = paths::remove_file(&ok_file);
331        info!("reset {} to {}", self.repo.path().display(), self.revision);
332        let object = self.repo.find_object(self.revision.0, None)?;
333        reset(&self.repo, &object, config)?;
334        File::create(ok_file)?;
335        Ok(())
336    }
337
338    fn update_submodules(&self, cargo_config: &Config) -> CargoResult<()> {
339        return update_submodules(&self.repo, cargo_config);
340
341        fn update_submodules(repo: &git2::Repository, cargo_config: &Config) -> CargoResult<()> {
342            info!("update submodules for: {:?}", repo.workdir().unwrap());
343
344            for mut child in repo.submodules()? {
345                update_submodule(repo, &mut child, cargo_config).chain_err(|| {
346                    format!(
347                        "failed to update submodule `{}`",
348                        child.name().unwrap_or("")
349                    )
350                })?;
351            }
352            Ok(())
353        }
354
355        fn update_submodule(
356            parent: &git2::Repository,
357            child: &mut git2::Submodule<'_>,
358            cargo_config: &Config,
359        ) -> CargoResult<()> {
360            child.init(false)?;
361            let url = child.url().ok_or_else(|| {
362                anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
363            })?;
364
365            // A submodule which is listed in .gitmodules but not actually
366            // checked out will not have a head id, so we should ignore it.
367            let head = match child.head_id() {
368                Some(head) => head,
369                None => return Ok(()),
370            };
371
372            // If the submodule hasn't been checked out yet, we need to
373            // clone it. If it has been checked out and the head is the same
374            // as the submodule's head, then we can skip an update and keep
375            // recursing.
376            let head_and_repo = child.open().and_then(|repo| {
377                let target = repo.head()?.target();
378                Ok((target, repo))
379            });
380            let mut repo = match head_and_repo {
381                Ok((head, repo)) => {
382                    if child.head_id() == head {
383                        return update_submodules(&repo, cargo_config);
384                    }
385                    repo
386                }
387                Err(..) => {
388                    let path = parent.workdir().unwrap().join(child.path());
389                    let _ = paths::remove_dir_all(&path);
390                    init(&path, false)?
391                }
392            };
393            // Fetch data from origin and reset to the head commit
394            let refspec = "refs/heads/*:refs/heads/*";
395            cargo_config
396                .shell()
397                .status("Updating", format!("git submodule `{}`", url))?;
398            fetch(&mut repo, url, refspec, cargo_config).chain_err(|| {
399                format!(
400                    "failed to fetch submodule `{}` from {}",
401                    child.name().unwrap_or(""),
402                    url
403                )
404            })?;
405
406            let obj = repo.find_object(head, None)?;
407            reset(&repo, &obj, cargo_config)?;
408            update_submodules(&repo, cargo_config)
409        }
410    }
411}
412
413/// Prepare the authentication callbacks for cloning a git repository.
414///
415/// The main purpose of this function is to construct the "authentication
416/// callback" which is used to clone a repository. This callback will attempt to
417/// find the right authentication on the system (without user input) and will
418/// guide libgit2 in doing so.
419///
420/// The callback is provided `allowed` types of credentials, and we try to do as
421/// much as possible based on that:
422///
423/// * Prioritize SSH keys from the local ssh agent as they're likely the most
424///   reliable. The username here is prioritized from the credential
425///   callback, then from whatever is configured in git itself, and finally
426///   we fall back to the generic user of `git`.
427///
428/// * If a username/password is allowed, then we fallback to git2-rs's
429///   implementation of the credential helper. This is what is configured
430///   with `credential.helper` in git, and is the interface for the macOS
431///   keychain, for example.
432///
433/// * After the above two have failed, we just kinda grapple attempting to
434///   return *something*.
435///
436/// If any form of authentication fails, libgit2 will repeatedly ask us for
437/// credentials until we give it a reason to not do so. To ensure we don't
438/// just sit here looping forever we keep track of authentications we've
439/// attempted and we don't try the same ones again.
440fn with_authentication<T, F>(url: &str, cfg: &git2::Config, mut f: F) -> CargoResult<T>
441where
442    F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
443{
444    let mut cred_helper = git2::CredentialHelper::new(url);
445    cred_helper.config(cfg);
446
447    let mut ssh_username_requested = false;
448    let mut cred_helper_bad = None;
449    let mut ssh_agent_attempts = Vec::new();
450    let mut any_attempts = false;
451    let mut tried_sshkey = false;
452
453    let mut res = f(&mut |url, username, allowed| {
454        any_attempts = true;
455        // libgit2's "USERNAME" authentication actually means that it's just
456        // asking us for a username to keep going. This is currently only really
457        // used for SSH authentication and isn't really an authentication type.
458        // The logic currently looks like:
459        //
460        //      let user = ...;
461        //      if (user.is_null())
462        //          user = callback(USERNAME, null, ...);
463        //
464        //      callback(SSH_KEY, user, ...)
465        //
466        // So if we're being called here then we know that (a) we're using ssh
467        // authentication and (b) no username was specified in the URL that
468        // we're trying to clone. We need to guess an appropriate username here,
469        // but that may involve a few attempts. Unfortunately we can't switch
470        // usernames during one authentication session with libgit2, so to
471        // handle this we bail out of this authentication session after setting
472        // the flag `ssh_username_requested`, and then we handle this below.
473        if allowed.contains(git2::CredentialType::USERNAME) {
474            debug_assert!(username.is_none());
475            ssh_username_requested = true;
476            return Err(git2::Error::from_str("gonna try usernames later"));
477        }
478
479        // An "SSH_KEY" authentication indicates that we need some sort of SSH
480        // authentication. This can currently either come from the ssh-agent
481        // process or from a raw in-memory SSH key. Cargo only supports using
482        // ssh-agent currently.
483        //
484        // If we get called with this then the only way that should be possible
485        // is if a username is specified in the URL itself (e.g., `username` is
486        // Some), hence the unwrap() here. We try custom usernames down below.
487        if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
488            // If ssh-agent authentication fails, libgit2 will keep
489            // calling this callback asking for other authentication
490            // methods to try. Make sure we only try ssh-agent once,
491            // to avoid looping forever.
492            tried_sshkey = true;
493            let username = username.unwrap();
494            debug_assert!(!ssh_username_requested);
495            ssh_agent_attempts.push(username.to_string());
496            return git2::Cred::ssh_key_from_agent(username);
497        }
498
499        // Sometimes libgit2 will ask for a username/password in plaintext. This
500        // is where Cargo would have an interactive prompt if we supported it,
501        // but we currently don't! Right now the only way we support fetching a
502        // plaintext password is through the `credential.helper` support, so
503        // fetch that here.
504        //
505        // If ssh-agent authentication fails, libgit2 will keep calling this
506        // callback asking for other authentication methods to try. Check
507        // cred_helper_bad to make sure we only try the git credentail helper
508        // once, to avoid looping forever.
509        if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
510        {
511            let r = git2::Cred::credential_helper(cfg, url, username);
512            cred_helper_bad = Some(r.is_err());
513            return r;
514        }
515
516        // I'm... not sure what the DEFAULT kind of authentication is, but seems
517        // easy to support?
518        if allowed.contains(git2::CredentialType::DEFAULT) {
519            return git2::Cred::default();
520        }
521
522        // Whelp, we tried our best
523        Err(git2::Error::from_str("no authentication available"))
524    });
525
526    // Ok, so if it looks like we're going to be doing ssh authentication, we
527    // want to try a few different usernames as one wasn't specified in the URL
528    // for us to use. In order, we'll try:
529    //
530    // * A credential helper's username for this URL, if available.
531    // * This account's username.
532    // * "git"
533    //
534    // We have to restart the authentication session each time (due to
535    // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
536    // call our callback, `f`, in a loop here.
537    if ssh_username_requested {
538        debug_assert!(res.is_err());
539        let mut attempts = Vec::new();
540        attempts.push("git".to_string());
541        if let Ok(s) = env::var("USER").or_else(|_| env::var("USERNAME")) {
542            attempts.push(s);
543        }
544        if let Some(ref s) = cred_helper.username {
545            attempts.push(s.clone());
546        }
547
548        while let Some(s) = attempts.pop() {
549            // We should get `USERNAME` first, where we just return our attempt,
550            // and then after that we should get `SSH_KEY`. If the first attempt
551            // fails we'll get called again, but we don't have another option so
552            // we bail out.
553            let mut attempts = 0;
554            res = f(&mut |_url, username, allowed| {
555                if allowed.contains(git2::CredentialType::USERNAME) {
556                    return git2::Cred::username(&s);
557                }
558                if allowed.contains(git2::CredentialType::SSH_KEY) {
559                    debug_assert_eq!(Some(&s[..]), username);
560                    attempts += 1;
561                    if attempts == 1 {
562                        ssh_agent_attempts.push(s.to_string());
563                        return git2::Cred::ssh_key_from_agent(&s);
564                    }
565                }
566                Err(git2::Error::from_str("no authentication available"))
567            });
568
569            // If we made two attempts then that means:
570            //
571            // 1. A username was requested, we returned `s`.
572            // 2. An ssh key was requested, we returned to look up `s` in the
573            //    ssh agent.
574            // 3. For whatever reason that lookup failed, so we were asked again
575            //    for another mode of authentication.
576            //
577            // Essentially, if `attempts == 2` then in theory the only error was
578            // that this username failed to authenticate (e.g., no other network
579            // errors happened). Otherwise something else is funny so we bail
580            // out.
581            if attempts != 2 {
582                break;
583            }
584        }
585    }
586
587    if res.is_ok() || !any_attempts {
588        return res.map_err(From::from);
589    }
590
591    // In the case of an authentication failure (where we tried something) then
592    // we try to give a more helpful error message about precisely what we
593    // tried.
594    let res = res.map_err(anyhow::Error::from).chain_err(|| {
595        let mut msg = "failed to authenticate when downloading \
596                       repository"
597            .to_string();
598        if !ssh_agent_attempts.is_empty() {
599            let names = ssh_agent_attempts
600                .iter()
601                .map(|s| format!("`{}`", s))
602                .collect::<Vec<_>>()
603                .join(", ");
604            msg.push_str(&format!(
605                "\nattempted ssh-agent authentication, but \
606                 none of the usernames {} succeeded",
607                names
608            ));
609        }
610        if let Some(failed_cred_helper) = cred_helper_bad {
611            if failed_cred_helper {
612                msg.push_str(
613                    "\nattempted to find username/password via \
614                     git's `credential.helper` support, but failed",
615                );
616            } else {
617                msg.push_str(
618                    "\nattempted to find username/password via \
619                     `credential.helper`, but maybe the found \
620                     credentials were incorrect",
621                );
622            }
623        }
624        msg
625    })?;
626    Ok(res)
627}
628
629fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, config: &Config) -> CargoResult<()> {
630    let mut pb = Progress::new("Checkout", config);
631    let mut opts = git2::build::CheckoutBuilder::new();
632    opts.progress(|_, cur, max| {
633        drop(pb.tick(cur, max));
634    });
635    repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
636    Ok(())
637}
638
639pub fn with_fetch_options(
640    git_config: &git2::Config,
641    url: &str,
642    config: &Config,
643    cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
644) -> CargoResult<()> {
645    let mut progress = Progress::new("Fetch", config);
646    network::with_retry(config, || {
647        with_authentication(url, git_config, |f| {
648            let mut rcb = git2::RemoteCallbacks::new();
649            rcb.credentials(f);
650
651            rcb.transfer_progress(|stats| {
652                progress
653                    .tick(stats.indexed_objects(), stats.total_objects())
654                    .is_ok()
655            });
656
657            // Create a local anonymous remote in the repository to fetch the
658            // url
659            let mut opts = git2::FetchOptions::new();
660            opts.remote_callbacks(rcb)
661                .download_tags(git2::AutotagOption::All);
662            cb(opts)
663        })?;
664        Ok(())
665    })
666}
667
668pub fn fetch(
669    repo: &mut git2::Repository,
670    url: &str,
671    refspec: &str,
672    config: &Config,
673) -> CargoResult<()> {
674    if config.frozen() {
675        anyhow::bail!(
676            "attempting to update a git repository, but --frozen \
677             was specified"
678        )
679    }
680    if !config.network_allowed() {
681        anyhow::bail!("can't update a git repository in the offline mode")
682    }
683
684    // If we're fetching from GitHub, attempt GitHub's special fast path for
685    // testing if we've already got an up-to-date copy of the repository
686
687    if let Ok(url) = Url::parse(url) {
688        if url.host_str() == Some("github.com") {
689            if let Ok(oid) = repo.refname_to_id("refs/remotes/origin/master") {
690                let mut handle = config.http()?.borrow_mut();
691                debug!("attempting GitHub fast path for {}", url);
692                if github_up_to_date(&mut handle, &url, &oid) {
693                    return Ok(());
694                } else {
695                    debug!("fast path failed, falling back to a git fetch");
696                }
697            }
698        }
699    }
700
701    // We reuse repositories quite a lot, so before we go through and update the
702    // repo check to see if it's a little too old and could benefit from a gc.
703    // In theory this shouldn't be too too expensive compared to the network
704    // request we're about to issue.
705    maybe_gc_repo(repo)?;
706
707    // Unfortunately `libgit2` is notably lacking in the realm of authentication
708    // when compared to the `git` command line. As a result, allow an escape
709    // hatch for users that would prefer to use `git`-the-CLI for fetching
710    // repositories instead of `libgit2`-the-library. This should make more
711    // flavors of authentication possible while also still giving us all the
712    // speed and portability of using `libgit2`.
713    if let Some(true) = config.net_config()?.git_fetch_with_cli {
714        return fetch_with_cli(repo, url, refspec, config);
715    }
716
717    debug!("doing a fetch for {}", url);
718    let git_config = git2::Config::open_default()?;
719    with_fetch_options(&git_config, url, config, &mut |mut opts| {
720        // The `fetch` operation here may fail spuriously due to a corrupt
721        // repository. It could also fail, however, for a whole slew of other
722        // reasons (aka network related reasons). We want Cargo to automatically
723        // recover from corrupt repositories, but we don't want Cargo to stomp
724        // over other legitimate errors.
725        //
726        // Consequently we save off the error of the `fetch` operation and if it
727        // looks like a "corrupt repo" error then we blow away the repo and try
728        // again. If it looks like any other kind of error, or if we've already
729        // blown away the repository, then we want to return the error as-is.
730        let mut repo_reinitialized = false;
731        loop {
732            debug!("initiating fetch of {} from {}", refspec, url);
733            let res = repo
734                .remote_anonymous(url)?
735                .fetch(&[refspec], Some(&mut opts), None);
736            let err = match res {
737                Ok(()) => break,
738                Err(e) => e,
739            };
740            debug!("fetch failed: {}", err);
741
742            if !repo_reinitialized && err.class() == git2::ErrorClass::Reference {
743                repo_reinitialized = true;
744                debug!(
745                    "looks like this is a corrupt repository, reinitializing \
746                     and trying again"
747                );
748                if reinitialize(repo).is_ok() {
749                    continue;
750                }
751            }
752
753            return Err(err.into());
754        }
755        Ok(())
756    })
757}
758
759fn fetch_with_cli(
760    repo: &mut git2::Repository,
761    url: &str,
762    refspec: &str,
763    config: &Config,
764) -> CargoResult<()> {
765    let mut cmd = process("git");
766    cmd.arg("fetch")
767        .arg("--tags") // fetch all tags
768        .arg("--force") // handle force pushes
769        .arg("--update-head-ok") // see discussion in #2078
770        .arg(url)
771        .arg(refspec)
772        // If cargo is run by git (for example, the `exec` command in `git
773        // rebase`), the GIT_DIR is set by git and will point to the wrong
774        // location (this takes precedence over the cwd). Make sure this is
775        // unset so git will look at cwd for the repo.
776        .env_remove("GIT_DIR")
777        // The reset of these may not be necessary, but I'm including them
778        // just to be extra paranoid and avoid any issues.
779        .env_remove("GIT_WORK_TREE")
780        .env_remove("GIT_INDEX_FILE")
781        .env_remove("GIT_OBJECT_DIRECTORY")
782        .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
783        .cwd(repo.path());
784    config
785        .shell()
786        .verbose(|s| s.status("Running", &cmd.to_string()))?;
787    cmd.exec_with_output()?;
788    Ok(())
789}
790
791/// Cargo has a bunch of long-lived git repositories in its global cache and
792/// some, like the index, are updated very frequently. Right now each update
793/// creates a new "pack file" inside the git database, and over time this can
794/// cause bad performance and bad current behavior in libgit2.
795///
796/// One pathological use case today is where libgit2 opens hundreds of file
797/// descriptors, getting us dangerously close to blowing out the OS limits of
798/// how many fds we can have open. This is detailed in #4403.
799///
800/// To try to combat this problem we attempt a `git gc` here. Note, though, that
801/// we may not even have `git` installed on the system! As a result we
802/// opportunistically try a `git gc` when the pack directory looks too big, and
803/// failing that we just blow away the repository and start over.
804fn maybe_gc_repo(repo: &mut git2::Repository) -> CargoResult<()> {
805    // Here we arbitrarily declare that if you have more than 100 files in your
806    // `pack` folder that we need to do a gc.
807    let entries = match repo.path().join("objects/pack").read_dir() {
808        Ok(e) => e.count(),
809        Err(_) => {
810            debug!("skipping gc as pack dir appears gone");
811            return Ok(());
812        }
813    };
814    let max = env::var("__CARGO_PACKFILE_LIMIT")
815        .ok()
816        .and_then(|s| s.parse::<usize>().ok())
817        .unwrap_or(100);
818    if entries < max {
819        debug!("skipping gc as there's only {} pack files", entries);
820        return Ok(());
821    }
822
823    // First up, try a literal `git gc` by shelling out to git. This is pretty
824    // likely to fail though as we may not have `git` installed. Note that
825    // libgit2 doesn't currently implement the gc operation, so there's no
826    // equivalent there.
827    match Command::new("git")
828        .arg("gc")
829        .current_dir(repo.path())
830        .output()
831    {
832        Ok(out) => {
833            debug!(
834                "git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
835                out.status,
836                String::from_utf8_lossy(&out.stdout),
837                String::from_utf8_lossy(&out.stderr)
838            );
839            if out.status.success() {
840                let new = git2::Repository::open(repo.path())?;
841                mem::replace(repo, new);
842                return Ok(());
843            }
844        }
845        Err(e) => debug!("git-gc failed to spawn: {}", e),
846    }
847
848    // Alright all else failed, let's start over.
849    reinitialize(repo)
850}
851
852fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
853    // Here we want to drop the current repository object pointed to by `repo`,
854    // so we initialize temporary repository in a sub-folder, blow away the
855    // existing git folder, and then recreate the git repo. Finally we blow away
856    // the `tmp` folder we allocated.
857    let path = repo.path().to_path_buf();
858    debug!("reinitializing git repo at {:?}", path);
859    let tmp = path.join("tmp");
860    let bare = !repo.path().ends_with(".git");
861    *repo = init(&tmp, false)?;
862    for entry in path.read_dir()? {
863        let entry = entry?;
864        if entry.file_name().to_str() == Some("tmp") {
865            continue;
866        }
867        let path = entry.path();
868        drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
869    }
870    *repo = init(&path, bare)?;
871    paths::remove_dir_all(&tmp)?;
872    Ok(())
873}
874
875fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
876    let mut opts = git2::RepositoryInitOptions::new();
877    // Skip anything related to templates, they just call all sorts of issues as
878    // we really don't want to use them yet they insist on being used. See #6240
879    // for an example issue that comes up.
880    opts.external_template(false);
881    opts.bare(bare);
882    Ok(git2::Repository::init_opts(&path, &opts)?)
883}
884
885/// Updating the index is done pretty regularly so we want it to be as fast as
886/// possible. For registries hosted on GitHub (like the crates.io index) there's
887/// a fast path available to use [1] to tell us that there's no updates to be
888/// made.
889///
890/// This function will attempt to hit that fast path and verify that the `oid`
891/// is actually the current `master` branch of the repository. If `true` is
892/// returned then no update needs to be performed, but if `false` is returned
893/// then the standard update logic still needs to happen.
894///
895/// [1]: https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference
896///
897/// Note that this function should never cause an actual failure because it's
898/// just a fast path. As a result all errors are ignored in this function and we
899/// just return a `bool`. Any real errors will be reported through the normal
900/// update path above.
901fn github_up_to_date(handle: &mut Easy, url: &Url, oid: &git2::Oid) -> bool {
902    macro_rules! r#try {
903        ($e:expr) => {
904            match $e {
905                Some(e) => e,
906                None => return false,
907            }
908        };
909    }
910
911    // This expects GitHub urls in the form `github.com/user/repo` and nothing
912    // else
913    let mut pieces = r#try!(url.path_segments());
914    let username = r#try!(pieces.next());
915    let repo = r#try!(pieces.next());
916    if pieces.next().is_some() {
917        return false;
918    }
919
920    let url = format!(
921        "https://api.github.com/repos/{}/{}/commits/master",
922        username, repo
923    );
924    r#try!(handle.get(true).ok());
925    r#try!(handle.url(&url).ok());
926    r#try!(handle.useragent("cargo").ok());
927    let mut headers = List::new();
928    r#try!(headers.append("Accept: application/vnd.github.3.sha").ok());
929    r#try!(headers.append(&format!("If-None-Match: \"{}\"", oid)).ok());
930    r#try!(handle.http_headers(headers).ok());
931    r#try!(handle.perform().ok());
932
933    r#try!(handle.response_code().ok()) == 304
934}