cargo/core/source/
source_id.rs

1use std::cmp::{self, Ordering};
2use std::collections::HashSet;
3use std::fmt::{self, Formatter};
4use std::hash::{self, Hash};
5use std::path::Path;
6use std::ptr;
7use std::sync::atomic::AtomicBool;
8use std::sync::atomic::Ordering::SeqCst;
9use std::sync::Mutex;
10
11use log::trace;
12use serde::de;
13use serde::ser;
14use url::Url;
15
16use crate::core::PackageId;
17use crate::ops;
18use crate::sources::DirectorySource;
19use crate::sources::{GitSource, PathSource, RegistrySource, CRATES_IO_INDEX};
20use crate::util::{CanonicalUrl, CargoResult, Config, IntoUrl};
21
22lazy_static::lazy_static! {
23    static ref SOURCE_ID_CACHE: Mutex<HashSet<&'static SourceIdInner>> = Mutex::new(HashSet::new());
24}
25
26/// Unique identifier for a source of packages.
27#[derive(Clone, Copy, Eq, Debug)]
28pub struct SourceId {
29    inner: &'static SourceIdInner,
30}
31
32#[derive(PartialEq, Eq, Clone, Debug, Hash)]
33struct SourceIdInner {
34    /// The source URL.
35    url: Url,
36    /// The canonical version of the above url
37    canonical_url: CanonicalUrl,
38    /// The source kind.
39    kind: SourceKind,
40    /// For example, the exact Git revision of the specified branch for a Git Source.
41    precise: Option<String>,
42    /// Name of the registry source for alternative registries
43    /// WARNING: this is not always set for alt-registries when the name is
44    /// not known.
45    name: Option<String>,
46}
47
48/// The possible kinds of code source. Along with `SourceIdInner`, this fully defines the
49/// source.
50#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
51enum SourceKind {
52    /// A git repository.
53    Git(GitReference),
54    /// A local path..
55    Path,
56    /// A remote registry.
57    Registry,
58    /// A local filesystem-based registry.
59    LocalRegistry,
60    /// A directory-based registry.
61    Directory,
62}
63
64/// Information to find a specific commit in a Git repository.
65#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
66pub enum GitReference {
67    /// From a tag.
68    Tag(String),
69    /// From the HEAD of a branch.
70    Branch(String),
71    /// From a specific revision.
72    Rev(String),
73}
74
75impl SourceId {
76    /// Creates a `SourceId` object from the kind and URL.
77    ///
78    /// The canonical url will be calculated, but the precise field will not
79    fn new(kind: SourceKind, url: Url) -> CargoResult<SourceId> {
80        let source_id = SourceId::wrap(SourceIdInner {
81            kind,
82            canonical_url: CanonicalUrl::new(&url)?,
83            url,
84            precise: None,
85            name: None,
86        });
87        Ok(source_id)
88    }
89
90    fn wrap(inner: SourceIdInner) -> SourceId {
91        let mut cache = SOURCE_ID_CACHE.lock().unwrap();
92        let inner = cache.get(&inner).cloned().unwrap_or_else(|| {
93            let inner = Box::leak(Box::new(inner));
94            cache.insert(inner);
95            inner
96        });
97        SourceId { inner }
98    }
99
100    /// Parses a source URL and returns the corresponding ID.
101    ///
102    /// ## Example
103    ///
104    /// ```
105    /// use cargo::core::SourceId;
106    /// SourceId::from_url("git+https://github.com/alexcrichton/\
107    ///                     libssh2-static-sys#80e71a3021618eb05\
108    ///                     656c58fb7c5ef5f12bc747f");
109    /// ```
110    pub fn from_url(string: &str) -> CargoResult<SourceId> {
111        let mut parts = string.splitn(2, '+');
112        let kind = parts.next().unwrap();
113        let url = parts
114            .next()
115            .ok_or_else(|| anyhow::format_err!("invalid source `{}`", string))?;
116
117        match kind {
118            "git" => {
119                let mut url = url.into_url()?;
120                let mut reference = GitReference::Branch("master".to_string());
121                for (k, v) in url.query_pairs() {
122                    match &k[..] {
123                        // Map older 'ref' to branch.
124                        "branch" | "ref" => reference = GitReference::Branch(v.into_owned()),
125
126                        "rev" => reference = GitReference::Rev(v.into_owned()),
127                        "tag" => reference = GitReference::Tag(v.into_owned()),
128                        _ => {}
129                    }
130                }
131                let precise = url.fragment().map(|s| s.to_owned());
132                url.set_fragment(None);
133                url.set_query(None);
134                Ok(SourceId::for_git(&url, reference)?.with_precise(precise))
135            }
136            "registry" => {
137                let url = url.into_url()?;
138                Ok(SourceId::new(SourceKind::Registry, url)?
139                    .with_precise(Some("locked".to_string())))
140            }
141            "path" => {
142                let url = url.into_url()?;
143                SourceId::new(SourceKind::Path, url)
144            }
145            kind => Err(anyhow::format_err!("unsupported source protocol: {}", kind)),
146        }
147    }
148
149    /// A view of the `SourceId` that can be `Display`ed as a URL.
150    pub fn into_url(&self) -> SourceIdIntoUrl<'_> {
151        SourceIdIntoUrl {
152            inner: &*self.inner,
153        }
154    }
155
156    /// Creates a `SourceId` from a filesystem path.
157    ///
158    /// `path`: an absolute path.
159    pub fn for_path(path: &Path) -> CargoResult<SourceId> {
160        let url = path.into_url()?;
161        SourceId::new(SourceKind::Path, url)
162    }
163
164    /// Creates a `SourceId` from a Git reference.
165    pub fn for_git(url: &Url, reference: GitReference) -> CargoResult<SourceId> {
166        SourceId::new(SourceKind::Git(reference), url.clone())
167    }
168
169    /// Creates a SourceId from a registry URL.
170    pub fn for_registry(url: &Url) -> CargoResult<SourceId> {
171        SourceId::new(SourceKind::Registry, url.clone())
172    }
173
174    /// Creates a SourceId from a local registry path.
175    pub fn for_local_registry(path: &Path) -> CargoResult<SourceId> {
176        let url = path.into_url()?;
177        SourceId::new(SourceKind::LocalRegistry, url)
178    }
179
180    /// Creates a `SourceId` from a directory path.
181    pub fn for_directory(path: &Path) -> CargoResult<SourceId> {
182        let url = path.into_url()?;
183        SourceId::new(SourceKind::Directory, url)
184    }
185
186    /// Returns the `SourceId` corresponding to the main repository.
187    ///
188    /// This is the main cargo registry by default, but it can be overridden in
189    /// a `.cargo/config`.
190    pub fn crates_io(config: &Config) -> CargoResult<SourceId> {
191        config.crates_io_source_id(|| {
192            let cfg = ops::registry_configuration(config, None)?;
193            let url = if let Some(ref index) = cfg.index {
194                static WARNED: AtomicBool = AtomicBool::new(false);
195                if !WARNED.swap(true, SeqCst) {
196                    config.shell().warn(
197                        "custom registry support via \
198                         the `registry.index` configuration is \
199                         being removed, this functionality \
200                         will not work in the future",
201                    )?;
202                }
203                &index[..]
204            } else {
205                CRATES_IO_INDEX
206            };
207            let url = url.into_url()?;
208            SourceId::for_registry(&url)
209        })
210    }
211
212    pub fn alt_registry(config: &Config, key: &str) -> CargoResult<SourceId> {
213        let url = config.get_registry_index(key)?;
214        Ok(SourceId::wrap(SourceIdInner {
215            kind: SourceKind::Registry,
216            canonical_url: CanonicalUrl::new(&url)?,
217            url,
218            precise: None,
219            name: Some(key.to_string()),
220        }))
221    }
222
223    /// Gets this source URL.
224    pub fn url(&self) -> &Url {
225        &self.inner.url
226    }
227
228    /// Gets the canonical URL of this source, used for internal comparison
229    /// purposes.
230    pub fn canonical_url(&self) -> &CanonicalUrl {
231        &self.inner.canonical_url
232    }
233
234    pub fn display_index(self) -> String {
235        if self.is_default_registry() {
236            "crates.io index".to_string()
237        } else {
238            format!("`{}` index", url_display(self.url()))
239        }
240    }
241
242    pub fn display_registry_name(self) -> String {
243        if self.is_default_registry() {
244            "crates.io".to_string()
245        } else if let Some(name) = &self.inner.name {
246            name.clone()
247        } else {
248            url_display(self.url())
249        }
250    }
251
252    /// Returns `true` if this source is from a filesystem path.
253    pub fn is_path(self) -> bool {
254        self.inner.kind == SourceKind::Path
255    }
256
257    /// Returns `true` if this source is from a registry (either local or not).
258    pub fn is_registry(self) -> bool {
259        match self.inner.kind {
260            SourceKind::Registry | SourceKind::LocalRegistry => true,
261            _ => false,
262        }
263    }
264
265    /// Returns `true` if this source is a "remote" registry.
266    ///
267    /// "remote" may also mean a file URL to a git index, so it is not
268    /// necessarily "remote". This just means it is not `local-registry`.
269    pub fn is_remote_registry(self) -> bool {
270        match self.inner.kind {
271            SourceKind::Registry => true,
272            _ => false,
273        }
274    }
275
276    /// Returns `true` if this source from a Git repository.
277    pub fn is_git(self) -> bool {
278        match self.inner.kind {
279            SourceKind::Git(_) => true,
280            _ => false,
281        }
282    }
283
284    /// Creates an implementation of `Source` corresponding to this ID.
285    pub fn load<'a>(
286        self,
287        config: &'a Config,
288        yanked_whitelist: &HashSet<PackageId>,
289    ) -> CargoResult<Box<dyn super::Source + 'a>> {
290        trace!("loading SourceId; {}", self);
291        match self.inner.kind {
292            SourceKind::Git(..) => Ok(Box::new(GitSource::new(self, config)?)),
293            SourceKind::Path => {
294                let path = match self.inner.url.to_file_path() {
295                    Ok(p) => p,
296                    Err(()) => panic!("path sources cannot be remote"),
297                };
298                Ok(Box::new(PathSource::new(&path, self, config)))
299            }
300            SourceKind::Registry => Ok(Box::new(RegistrySource::remote(
301                self,
302                yanked_whitelist,
303                config,
304            ))),
305            SourceKind::LocalRegistry => {
306                let path = match self.inner.url.to_file_path() {
307                    Ok(p) => p,
308                    Err(()) => panic!("path sources cannot be remote"),
309                };
310                Ok(Box::new(RegistrySource::local(
311                    self,
312                    &path,
313                    yanked_whitelist,
314                    config,
315                )))
316            }
317            SourceKind::Directory => {
318                let path = match self.inner.url.to_file_path() {
319                    Ok(p) => p,
320                    Err(()) => panic!("path sources cannot be remote"),
321                };
322                Ok(Box::new(DirectorySource::new(&path, self, config)))
323            }
324        }
325    }
326
327    /// Gets the value of the precise field.
328    pub fn precise(self) -> Option<&'static str> {
329        self.inner.precise.as_deref()
330    }
331
332    /// Gets the Git reference if this is a git source, otherwise `None`.
333    pub fn git_reference(self) -> Option<&'static GitReference> {
334        match self.inner.kind {
335            SourceKind::Git(ref s) => Some(s),
336            _ => None,
337        }
338    }
339
340    /// Creates a new `SourceId` from this source with the given `precise`.
341    pub fn with_precise(self, v: Option<String>) -> SourceId {
342        SourceId::wrap(SourceIdInner {
343            precise: v,
344            ..(*self.inner).clone()
345        })
346    }
347
348    /// Returns `true` if the remote registry is the standard <https://crates.io>.
349    pub fn is_default_registry(self) -> bool {
350        match self.inner.kind {
351            SourceKind::Registry => {}
352            _ => return false,
353        }
354        self.inner.url.as_str() == CRATES_IO_INDEX
355    }
356
357    /// Hashes `self`.
358    ///
359    /// For paths, remove the workspace prefix so the same source will give the
360    /// same hash in different locations.
361    pub fn stable_hash<S: hash::Hasher>(self, workspace: &Path, into: &mut S) {
362        if self.is_path() {
363            if let Ok(p) = self
364                .inner
365                .url
366                .to_file_path()
367                .unwrap()
368                .strip_prefix(workspace)
369            {
370                self.inner.kind.hash(into);
371                p.to_str().unwrap().hash(into);
372                return;
373            }
374        }
375        self.hash(into)
376    }
377
378    pub fn full_eq(self, other: SourceId) -> bool {
379        ptr::eq(self.inner, other.inner)
380    }
381
382    pub fn full_hash<S: hash::Hasher>(self, into: &mut S) {
383        ptr::NonNull::from(self.inner).hash(into)
384    }
385}
386
387impl PartialOrd for SourceId {
388    fn partial_cmp(&self, other: &SourceId) -> Option<Ordering> {
389        Some(self.cmp(other))
390    }
391}
392
393impl Ord for SourceId {
394    fn cmp(&self, other: &SourceId) -> Ordering {
395        self.inner.cmp(other.inner)
396    }
397}
398
399impl ser::Serialize for SourceId {
400    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
401    where
402        S: ser::Serializer,
403    {
404        if self.is_path() {
405            None::<String>.serialize(s)
406        } else {
407            s.collect_str(&self.into_url())
408        }
409    }
410}
411
412impl<'de> de::Deserialize<'de> for SourceId {
413    fn deserialize<D>(d: D) -> Result<SourceId, D::Error>
414    where
415        D: de::Deserializer<'de>,
416    {
417        let string = String::deserialize(d)?;
418        SourceId::from_url(&string).map_err(de::Error::custom)
419    }
420}
421
422fn url_display(url: &Url) -> String {
423    if url.scheme() == "file" {
424        if let Ok(path) = url.to_file_path() {
425            if let Some(path_str) = path.to_str() {
426                return path_str.to_string();
427            }
428        }
429    }
430
431    url.as_str().to_string()
432}
433
434impl fmt::Display for SourceId {
435    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
436        match self.inner.kind {
437            SourceKind::Git(ref reference) => {
438                // Don't replace the URL display for git references,
439                // because those are kind of expected to be URLs.
440                write!(f, "{}", self.inner.url)?;
441                if let Some(pretty) = reference.pretty_ref() {
442                    write!(f, "?{}", pretty)?;
443                }
444
445                if let Some(ref s) = self.inner.precise {
446                    let len = cmp::min(s.len(), 8);
447                    write!(f, "#{}", &s[..len])?;
448                }
449                Ok(())
450            }
451            SourceKind::Path => write!(f, "{}", url_display(&self.inner.url)),
452            SourceKind::Registry => write!(f, "registry `{}`", url_display(&self.inner.url)),
453            SourceKind::LocalRegistry => write!(f, "registry `{}`", url_display(&self.inner.url)),
454            SourceKind::Directory => write!(f, "dir {}", url_display(&self.inner.url)),
455        }
456    }
457}
458
459// Custom equality defined as canonical URL equality for git sources and
460// URL equality for other sources, ignoring the `precise` and `name` fields.
461impl PartialEq for SourceId {
462    fn eq(&self, other: &SourceId) -> bool {
463        if ptr::eq(self.inner, other.inner) {
464            return true;
465        }
466        if self.inner.kind != other.inner.kind {
467            return false;
468        }
469        if self.inner.url == other.inner.url {
470            return true;
471        }
472
473        match (&self.inner.kind, &other.inner.kind) {
474            (SourceKind::Git(ref1), SourceKind::Git(ref2)) => {
475                ref1 == ref2 && self.inner.canonical_url == other.inner.canonical_url
476            }
477            _ => false,
478        }
479    }
480}
481
482impl PartialOrd for SourceIdInner {
483    fn partial_cmp(&self, other: &SourceIdInner) -> Option<Ordering> {
484        Some(self.cmp(other))
485    }
486}
487
488impl Ord for SourceIdInner {
489    fn cmp(&self, other: &SourceIdInner) -> Ordering {
490        match self.kind.cmp(&other.kind) {
491            Ordering::Equal => {}
492            ord => return ord,
493        }
494        match self.url.cmp(&other.url) {
495            Ordering::Equal => {}
496            ord => return ord,
497        }
498        match (&self.kind, &other.kind) {
499            (SourceKind::Git(ref1), SourceKind::Git(ref2)) => {
500                (ref1, &self.canonical_url).cmp(&(ref2, &other.canonical_url))
501            }
502            _ => self.kind.cmp(&other.kind),
503        }
504    }
505}
506
507// The hash of SourceId is used in the name of some Cargo folders, so shouldn't
508// vary. `as_str` gives the serialisation of a url (which has a spec) and so
509// insulates against possible changes in how the url crate does hashing.
510impl Hash for SourceId {
511    fn hash<S: hash::Hasher>(&self, into: &mut S) {
512        self.inner.kind.hash(into);
513        match self.inner.kind {
514            SourceKind::Git(_) => self.inner.canonical_url.hash(into),
515            _ => self.inner.url.as_str().hash(into),
516        }
517    }
518}
519
520/// A `Display`able view into a `SourceId` that will write it as a url
521pub struct SourceIdIntoUrl<'a> {
522    inner: &'a SourceIdInner,
523}
524
525impl<'a> fmt::Display for SourceIdIntoUrl<'a> {
526    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527        match *self.inner {
528            SourceIdInner {
529                kind: SourceKind::Path,
530                ref url,
531                ..
532            } => write!(f, "path+{}", url),
533            SourceIdInner {
534                kind: SourceKind::Git(ref reference),
535                ref url,
536                ref precise,
537                ..
538            } => {
539                write!(f, "git+{}", url)?;
540                if let Some(pretty) = reference.pretty_ref() {
541                    write!(f, "?{}", pretty)?;
542                }
543                if let Some(precise) = precise.as_ref() {
544                    write!(f, "#{}", precise)?;
545                }
546                Ok(())
547            }
548            SourceIdInner {
549                kind: SourceKind::Registry,
550                ref url,
551                ..
552            } => write!(f, "registry+{}", url),
553            SourceIdInner {
554                kind: SourceKind::LocalRegistry,
555                ref url,
556                ..
557            } => write!(f, "local-registry+{}", url),
558            SourceIdInner {
559                kind: SourceKind::Directory,
560                ref url,
561                ..
562            } => write!(f, "directory+{}", url),
563        }
564    }
565}
566
567impl GitReference {
568    /// Returns a `Display`able view of this git reference, or None if using
569    /// the head of the "master" branch
570    pub fn pretty_ref(&self) -> Option<PrettyRef<'_>> {
571        match *self {
572            GitReference::Branch(ref s) if *s == "master" => None,
573            _ => Some(PrettyRef { inner: self }),
574        }
575    }
576}
577
578/// A git reference that can be `Display`ed
579pub struct PrettyRef<'a> {
580    inner: &'a GitReference,
581}
582
583impl<'a> fmt::Display for PrettyRef<'a> {
584    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
585        match *self.inner {
586            GitReference::Branch(ref b) => write!(f, "branch={}", b),
587            GitReference::Tag(ref s) => write!(f, "tag={}", s),
588            GitReference::Rev(ref s) => write!(f, "rev={}", s),
589        }
590    }
591}
592
593#[cfg(test)]
594mod tests {
595    use super::{GitReference, SourceId, SourceKind};
596    use crate::util::IntoUrl;
597
598    #[test]
599    fn github_sources_equal() {
600        let loc = "https://github.com/foo/bar".into_url().unwrap();
601        let master = SourceKind::Git(GitReference::Branch("master".to_string()));
602        let s1 = SourceId::new(master.clone(), loc).unwrap();
603
604        let loc = "git://github.com/foo/bar".into_url().unwrap();
605        let s2 = SourceId::new(master, loc.clone()).unwrap();
606
607        assert_eq!(s1, s2);
608
609        let foo = SourceKind::Git(GitReference::Branch("foo".to_string()));
610        let s3 = SourceId::new(foo, loc).unwrap();
611        assert_ne!(s1, s3);
612    }
613}