cargo/sources/
path.rs

1use std::fmt::{self, Debug, Formatter};
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use filetime::FileTime;
6use ignore::gitignore::GitignoreBuilder;
7use ignore::Match;
8use log::{trace, warn};
9
10use crate::core::source::MaybePackage;
11use crate::core::{Dependency, Package, PackageId, Source, SourceId, Summary};
12use crate::ops;
13use crate::util::{internal, paths, CargoResult, CargoResultExt, Config};
14
15pub struct PathSource<'cfg> {
16    source_id: SourceId,
17    path: PathBuf,
18    updated: bool,
19    packages: Vec<Package>,
20    config: &'cfg Config,
21    recursive: bool,
22}
23
24impl<'cfg> PathSource<'cfg> {
25    /// Invoked with an absolute path to a directory that contains a `Cargo.toml`.
26    ///
27    /// This source will only return the package at precisely the `path`
28    /// specified, and it will be an error if there's not a package at `path`.
29    pub fn new(path: &Path, source_id: SourceId, config: &'cfg Config) -> PathSource<'cfg> {
30        PathSource {
31            source_id,
32            path: path.to_path_buf(),
33            updated: false,
34            packages: Vec::new(),
35            config,
36            recursive: false,
37        }
38    }
39
40    /// Creates a new source which is walked recursively to discover packages.
41    ///
42    /// This is similar to the `new` method except that instead of requiring a
43    /// valid package to be present at `root` the folder is walked entirely to
44    /// crawl for packages.
45    ///
46    /// Note that this should be used with care and likely shouldn't be chosen
47    /// by default!
48    pub fn new_recursive(root: &Path, id: SourceId, config: &'cfg Config) -> PathSource<'cfg> {
49        PathSource {
50            recursive: true,
51            ..PathSource::new(root, id, config)
52        }
53    }
54
55    pub fn preload_with(&mut self, pkg: Package) {
56        assert!(!self.updated);
57        assert!(!self.recursive);
58        assert!(self.packages.is_empty());
59        self.updated = true;
60        self.packages.push(pkg);
61    }
62
63    pub fn root_package(&mut self) -> CargoResult<Package> {
64        trace!("root_package; source={:?}", self);
65
66        self.update()?;
67
68        match self.packages.iter().find(|p| p.root() == &*self.path) {
69            Some(pkg) => Ok(pkg.clone()),
70            None => Err(internal(format!(
71                "no package found in source {:?}",
72                self.path
73            ))),
74        }
75    }
76
77    pub fn read_packages(&self) -> CargoResult<Vec<Package>> {
78        if self.updated {
79            Ok(self.packages.clone())
80        } else if self.recursive {
81            ops::read_packages(&self.path, self.source_id, self.config)
82        } else {
83            let path = self.path.join("Cargo.toml");
84            let (pkg, _) = ops::read_package(&path, self.source_id, self.config)?;
85            Ok(vec![pkg])
86        }
87    }
88
89    /// List all files relevant to building this package inside this source.
90    ///
91    /// This function will use the appropriate methods to determine the
92    /// set of files underneath this source's directory which are relevant for
93    /// building `pkg`.
94    ///
95    /// The basic assumption of this method is that all files in the directory
96    /// are relevant for building this package, but it also contains logic to
97    /// use other methods like .gitignore to filter the list of files.
98    pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathBuf>> {
99        let root = pkg.root();
100        let no_include_option = pkg.manifest().include().is_empty();
101
102        let mut exclude_builder = GitignoreBuilder::new(root);
103        for rule in pkg.manifest().exclude() {
104            exclude_builder.add_line(None, rule)?;
105        }
106        let ignore_exclude = exclude_builder.build()?;
107
108        let mut include_builder = GitignoreBuilder::new(root);
109        for rule in pkg.manifest().include() {
110            include_builder.add_line(None, rule)?;
111        }
112        let ignore_include = include_builder.build()?;
113
114        let ignore_should_package = |relative_path: &Path| -> CargoResult<bool> {
115            // "Include" and "exclude" options are mutually exclusive.
116            if no_include_option {
117                match ignore_exclude
118                    .matched_path_or_any_parents(relative_path, /* is_dir */ false)
119                {
120                    Match::None => Ok(true),
121                    Match::Ignore(_) => Ok(false),
122                    Match::Whitelist(_) => Ok(true),
123                }
124            } else {
125                match ignore_include
126                    .matched_path_or_any_parents(relative_path, /* is_dir */ false)
127                {
128                    Match::None => Ok(false),
129                    Match::Ignore(_) => Ok(true),
130                    Match::Whitelist(_) => Ok(false),
131                }
132            }
133        };
134
135        let mut filter = |path: &Path| -> CargoResult<bool> {
136            let relative_path = path.strip_prefix(root)?;
137
138            let rel = relative_path.as_os_str();
139            if rel == "Cargo.lock" {
140                return Ok(pkg.include_lockfile());
141            } else if rel == "Cargo.toml" {
142                return Ok(true);
143            }
144
145            ignore_should_package(relative_path)
146        };
147
148        // Attempt Git-prepopulate only if no `include` (see rust-lang/cargo#4135).
149        if no_include_option {
150            if let Some(result) = self.discover_git_and_list_files(pkg, root, &mut filter) {
151                return result;
152            }
153            // no include option and not git repo discovered (see rust-lang/cargo#7183).
154            return self.list_files_walk_except_dot_files_and_dirs(pkg, &mut filter);
155        }
156        self.list_files_walk(pkg, &mut filter)
157    }
158
159    // Returns `Some(_)` if found sibling `Cargo.toml` and `.git` directory;
160    // otherwise, caller should fall back on full file list.
161    fn discover_git_and_list_files(
162        &self,
163        pkg: &Package,
164        root: &Path,
165        filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
166    ) -> Option<CargoResult<Vec<PathBuf>>> {
167        // If this package is in a Git repository, then we really do want to
168        // query the Git repository as it takes into account items such as
169        // `.gitignore`. We're not quite sure where the Git repository is,
170        // however, so we do a bit of a probe.
171        //
172        // We walk this package's path upwards and look for a sibling
173        // `Cargo.toml` and `.git` directory. If we find one then we assume that
174        // we're part of that repository.
175        let mut cur = root;
176        loop {
177            if cur.join("Cargo.toml").is_file() {
178                // If we find a Git repository next to this `Cargo.toml`, we still
179                // check to see if we are indeed part of the index. If not, then
180                // this is likely an unrelated Git repo, so keep going.
181                if let Ok(repo) = git2::Repository::open(cur) {
182                    let index = match repo.index() {
183                        Ok(index) => index,
184                        Err(err) => return Some(Err(err.into())),
185                    };
186                    let path = root.strip_prefix(cur).unwrap().join("Cargo.toml");
187                    if index.get_path(&path, 0).is_some() {
188                        return Some(self.list_files_git(pkg, &repo, filter));
189                    }
190                }
191            }
192            // Don't cross submodule boundaries.
193            if cur.join(".git").is_dir() {
194                break;
195            }
196            match cur.parent() {
197                Some(parent) => cur = parent,
198                None => break,
199            }
200        }
201        None
202    }
203
204    fn list_files_git(
205        &self,
206        pkg: &Package,
207        repo: &git2::Repository,
208        filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
209    ) -> CargoResult<Vec<PathBuf>> {
210        warn!("list_files_git {}", pkg.package_id());
211        let index = repo.index()?;
212        let root = repo
213            .workdir()
214            .ok_or_else(|| anyhow::format_err!("can't list files on a bare repository"))?;
215        let pkg_path = pkg.root();
216
217        let mut ret = Vec::<PathBuf>::new();
218
219        // We use information from the Git repository to guide us in traversing
220        // its tree. The primary purpose of this is to take advantage of the
221        // `.gitignore` and auto-ignore files that don't matter.
222        //
223        // Here we're also careful to look at both tracked and untracked files as
224        // the untracked files are often part of a build and may become relevant
225        // as part of a future commit.
226        let index_files = index.iter().map(|entry| {
227            use libgit2_sys::{GIT_FILEMODE_COMMIT, GIT_FILEMODE_LINK};
228            // ``is_dir`` is an optimization to avoid calling
229            // ``fs::metadata`` on every file.
230            let is_dir = if entry.mode == GIT_FILEMODE_LINK as u32 {
231                // Let the code below figure out if this symbolic link points
232                // to a directory or not.
233                None
234            } else {
235                Some(entry.mode == GIT_FILEMODE_COMMIT as u32)
236            };
237            (join(root, &entry.path), is_dir)
238        });
239        let mut opts = git2::StatusOptions::new();
240        opts.include_untracked(true);
241        if let Ok(suffix) = pkg_path.strip_prefix(root) {
242            opts.pathspec(suffix);
243        }
244        let statuses = repo.statuses(Some(&mut opts))?;
245        let untracked = statuses.iter().filter_map(|entry| match entry.status() {
246            // Don't include Cargo.lock if it is untracked. Packaging will
247            // generate a new one as needed.
248            git2::Status::WT_NEW if entry.path() != Some("Cargo.lock") => {
249                Some((join(root, entry.path_bytes()), None))
250            }
251            _ => None,
252        });
253
254        let mut subpackages_found = Vec::new();
255
256        for (file_path, is_dir) in index_files.chain(untracked) {
257            let file_path = file_path?;
258
259            // Filter out files blatantly outside this package. This is helped a
260            // bit above via the `pathspec` function call, but we need to filter
261            // the entries in the index as well.
262            if !file_path.starts_with(pkg_path) {
263                continue;
264            }
265
266            match file_path.file_name().and_then(|s| s.to_str()) {
267                // The `target` directory is never included.
268                Some("target") => continue,
269
270                // Keep track of all sub-packages found and also strip out all
271                // matches we've found so far. Note, though, that if we find
272                // our own `Cargo.toml`, we keep going.
273                Some("Cargo.toml") => {
274                    let path = file_path.parent().unwrap();
275                    if path != pkg_path {
276                        warn!("subpackage found: {}", path.display());
277                        ret.retain(|p| !p.starts_with(path));
278                        subpackages_found.push(path.to_path_buf());
279                        continue;
280                    }
281                }
282
283                _ => {}
284            }
285
286            // If this file is part of any other sub-package we've found so far,
287            // skip it.
288            if subpackages_found.iter().any(|p| file_path.starts_with(p)) {
289                continue;
290            }
291
292            if is_dir.unwrap_or_else(|| file_path.is_dir()) {
293                warn!("  found submodule {}", file_path.display());
294                let rel = file_path.strip_prefix(root)?;
295                let rel = rel.to_str().ok_or_else(|| {
296                    anyhow::format_err!("invalid utf-8 filename: {}", rel.display())
297                })?;
298                // Git submodules are currently only named through `/` path
299                // separators, explicitly not `\` which windows uses. Who knew?
300                let rel = rel.replace(r"\", "/");
301                match repo.find_submodule(&rel).and_then(|s| s.open()) {
302                    Ok(repo) => {
303                        let files = self.list_files_git(pkg, &repo, filter)?;
304                        ret.extend(files.into_iter());
305                    }
306                    Err(..) => {
307                        PathSource::walk(&file_path, &mut ret, false, filter)?;
308                    }
309                }
310            } else if (*filter)(&file_path)? {
311                // We found a file!
312                warn!("  found {}", file_path.display());
313                ret.push(file_path);
314            }
315        }
316        return Ok(ret);
317
318        #[cfg(unix)]
319        fn join(path: &Path, data: &[u8]) -> CargoResult<PathBuf> {
320            use std::ffi::OsStr;
321            use std::os::unix::prelude::*;
322            Ok(path.join(<OsStr as OsStrExt>::from_bytes(data)))
323        }
324        #[cfg(windows)]
325        fn join(path: &Path, data: &[u8]) -> CargoResult<PathBuf> {
326            use std::str;
327            match str::from_utf8(data) {
328                Ok(s) => Ok(path.join(s)),
329                Err(e) => Err(anyhow::format_err!(
330                    "cannot process path in git with a non utf8 filename: {}\n{:?}",
331                    e,
332                    data
333                )),
334            }
335        }
336    }
337
338    fn list_files_walk_except_dot_files_and_dirs(
339        &self,
340        pkg: &Package,
341        filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
342    ) -> CargoResult<Vec<PathBuf>> {
343        let root = pkg.root();
344        let mut exclude_dot_files_dir_builder = GitignoreBuilder::new(root);
345        exclude_dot_files_dir_builder.add_line(None, ".*")?;
346        let ignore_dot_files_and_dirs = exclude_dot_files_dir_builder.build()?;
347
348        let mut filter_ignore_dot_files_and_dirs = |path: &Path| -> CargoResult<bool> {
349            let relative_path = path.strip_prefix(root)?;
350            match ignore_dot_files_and_dirs
351                .matched_path_or_any_parents(relative_path, /* is_dir */ false)
352            {
353                Match::Ignore(_) => Ok(false),
354                _ => filter(path),
355            }
356        };
357        self.list_files_walk(pkg, &mut filter_ignore_dot_files_and_dirs)
358    }
359
360    fn list_files_walk(
361        &self,
362        pkg: &Package,
363        filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
364    ) -> CargoResult<Vec<PathBuf>> {
365        let mut ret = Vec::new();
366        PathSource::walk(pkg.root(), &mut ret, true, filter)?;
367        Ok(ret)
368    }
369
370    fn walk(
371        path: &Path,
372        ret: &mut Vec<PathBuf>,
373        is_root: bool,
374        filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
375    ) -> CargoResult<()> {
376        if !fs::metadata(&path).map(|m| m.is_dir()).unwrap_or(false) {
377            if (*filter)(path)? {
378                ret.push(path.to_path_buf());
379            }
380            return Ok(());
381        }
382        // Don't recurse into any sub-packages that we have.
383        if !is_root && fs::metadata(&path.join("Cargo.toml")).is_ok() {
384            return Ok(());
385        }
386
387        // For package integration tests, we need to sort the paths in a deterministic order to
388        // be able to match stdout warnings in the same order.
389        //
390        // TODO: drop `collect` and sort after transition period and dropping warning tests.
391        // See rust-lang/cargo#4268 and rust-lang/cargo#4270.
392        let mut entries: Vec<PathBuf> = fs::read_dir(path)
393            .chain_err(|| format!("cannot read {:?}", path))?
394            .map(|e| e.unwrap().path())
395            .collect();
396        entries.sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
397        for path in entries {
398            let name = path.file_name().and_then(|s| s.to_str());
399            if is_root && name == Some("target") {
400                // Skip Cargo artifacts.
401                continue;
402            }
403            PathSource::walk(&path, ret, false, filter)?;
404        }
405        Ok(())
406    }
407
408    pub fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> {
409        if !self.updated {
410            return Err(internal(format!(
411                "BUG: source `{:?}` was not updated",
412                self.path
413            )));
414        }
415
416        let mut max = FileTime::zero();
417        let mut max_path = PathBuf::new();
418        for file in self.list_files(pkg)? {
419            // An `fs::stat` error here is either because path is a
420            // broken symlink, a permissions error, or a race
421            // condition where this path was `rm`-ed -- either way,
422            // we can ignore the error and treat the path's `mtime`
423            // as `0`.
424            let mtime = paths::mtime(&file).unwrap_or_else(|_| FileTime::zero());
425            if mtime > max {
426                max = mtime;
427                max_path = file;
428            }
429        }
430        trace!("last modified file {}: {}", self.path.display(), max);
431        Ok((max, max_path))
432    }
433
434    pub fn path(&self) -> &Path {
435        &self.path
436    }
437}
438
439impl<'cfg> Debug for PathSource<'cfg> {
440    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
441        write!(f, "the paths source")
442    }
443}
444
445impl<'cfg> Source for PathSource<'cfg> {
446    fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> {
447        for s in self.packages.iter().map(|p| p.summary()) {
448            if dep.matches(s) {
449                f(s.clone())
450            }
451        }
452        Ok(())
453    }
454
455    fn fuzzy_query(&mut self, _dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> {
456        for s in self.packages.iter().map(|p| p.summary()) {
457            f(s.clone())
458        }
459        Ok(())
460    }
461
462    fn supports_checksums(&self) -> bool {
463        false
464    }
465
466    fn requires_precise(&self) -> bool {
467        false
468    }
469
470    fn source_id(&self) -> SourceId {
471        self.source_id
472    }
473
474    fn update(&mut self) -> CargoResult<()> {
475        if !self.updated {
476            let packages = self.read_packages()?;
477            self.packages.extend(packages.into_iter());
478            self.updated = true;
479        }
480
481        Ok(())
482    }
483
484    fn download(&mut self, id: PackageId) -> CargoResult<MaybePackage> {
485        trace!("getting packages; id={}", id);
486
487        let pkg = self.packages.iter().find(|pkg| pkg.package_id() == id);
488        pkg.cloned()
489            .map(MaybePackage::Ready)
490            .ok_or_else(|| internal(format!("failed to find {} in path source", id)))
491    }
492
493    fn finish_download(&mut self, _id: PackageId, _data: Vec<u8>) -> CargoResult<Package> {
494        panic!("no download should have started")
495    }
496
497    fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
498        let (max, max_path) = self.last_modified_file(pkg)?;
499        Ok(format!("{} ({})", max, max_path.display()))
500    }
501
502    fn describe(&self) -> String {
503        match self.source_id.url().to_file_path() {
504            Ok(path) => path.display().to_string(),
505            Err(_) => self.source_id.to_string(),
506        }
507    }
508
509    fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {}
510
511    fn is_yanked(&mut self, _pkg: PackageId) -> CargoResult<bool> {
512        Ok(false)
513    }
514}