Skip to main content

radicle_surf/
fs.rs

1//! Definition for a file system consisting of `Directory` and `File`.
2//!
3//! A `Directory` is expected to be a non-empty tree of directories and files.
4//! See [`Directory`] for more information.
5
6use std::{
7    cmp::Ordering,
8    collections::BTreeMap,
9    convert::{Infallible, Into as _},
10    path::{Path, PathBuf},
11};
12
13use git2::Blob;
14use radicle_git_ext::{is_not_found_err, Oid};
15use radicle_std_ext::result::ResultExt as _;
16use url::Url;
17
18use crate::{Repository, Revision};
19
20pub mod error {
21    use std::path::PathBuf;
22
23    use thiserror::Error;
24
25    #[derive(Debug, Error, PartialEq)]
26    pub enum Directory {
27        #[error(transparent)]
28        Git(#[from] git2::Error),
29        #[error(transparent)]
30        File(#[from] File),
31        #[error("the path {0} is not valid")]
32        InvalidPath(PathBuf),
33        #[error("the entry at '{0}' must be of type {1}")]
34        InvalidType(PathBuf, &'static str),
35        #[error("the entry name was not valid UTF-8")]
36        Utf8Error,
37        #[error("the path {0} not found")]
38        PathNotFound(PathBuf),
39        #[error(transparent)]
40        Submodule(#[from] Submodule),
41    }
42
43    #[derive(Debug, Error, PartialEq)]
44    pub enum File {
45        #[error(transparent)]
46        Git(#[from] git2::Error),
47    }
48
49    #[derive(Debug, Error, PartialEq)]
50    pub enum Submodule {
51        #[error("URL is invalid utf-8 for submodule '{name}': {err}")]
52        Utf8 {
53            name: String,
54            #[source]
55            err: std::str::Utf8Error,
56        },
57        #[error("failed to parse URL '{url}' for submodule '{name}': {err}")]
58        ParseUrl {
59            name: String,
60            url: String,
61            #[source]
62            err: url::ParseError,
63        },
64    }
65}
66
67/// A `File` in a git repository.
68///
69/// The representation is lightweight and contains the [`Oid`] that
70/// points to the git blob which is this file.
71///
72/// The name of a file can be retrieved via [`File::name`].
73///
74/// The [`FileContent`] of a file can be retrieved via
75/// [`File::content`].
76#[derive(Clone, PartialEq, Eq, Debug)]
77pub struct File {
78    /// The name of the file.
79    name: String,
80    /// The relative path of the file, not including the `name`,
81    /// in respect to the root of the git repository.
82    prefix: PathBuf,
83    /// The object identifier of the git blob of this file.
84    id: Oid,
85}
86
87impl File {
88    /// Construct a new `File`.
89    ///
90    /// The `path` must be the prefix location of the directory, and
91    /// so should not end in `name`.
92    ///
93    /// The `id` must point to a git blob.
94    pub(crate) fn new(name: String, prefix: PathBuf, id: Oid) -> Self {
95        debug_assert!(
96            !prefix.ends_with(&name),
97            "prefix = {prefix:?}, name = {name}",
98        );
99        Self { name, prefix, id }
100    }
101
102    /// The name of this `File`.
103    pub fn name(&self) -> &str {
104        self.name.as_str()
105    }
106
107    /// The object identifier of this `File`.
108    pub fn id(&self) -> Oid {
109        self.id
110    }
111
112    /// Return the exact path for this `File`, including the `name` of
113    /// the directory itself.
114    ///
115    /// The path is relative to the git repository root.
116    pub fn path(&self) -> PathBuf {
117        self.prefix.join(&self.name)
118    }
119
120    /// Return the [`Path`] where this `File` is located, relative to the
121    /// git repository root.
122    pub fn location(&self) -> &Path {
123        &self.prefix
124    }
125
126    /// Get the [`FileContent`] for this `File`.
127    ///
128    /// # Errors
129    ///
130    /// This function will fail if it could not find the `git` blob
131    /// for the `Oid` of this `File`.
132    pub fn content<'a>(&self, repo: &'a Repository) -> Result<FileContent<'a>, error::File> {
133        let blob = repo.find_blob(self.id)?;
134        Ok(FileContent { blob })
135    }
136}
137
138/// The contents of a [`File`].
139///
140/// To construct a `FileContent` use [`File::content`].
141pub struct FileContent<'a> {
142    blob: Blob<'a>,
143}
144
145impl<'a> FileContent<'a> {
146    /// Return the file contents as a byte slice.
147    pub fn as_bytes(&self) -> &[u8] {
148        self.blob.content()
149    }
150
151    /// Return the size of the file contents.
152    pub fn size(&self) -> usize {
153        self.blob.size()
154    }
155
156    /// Creates a `FileContent` using a blob.
157    pub(crate) fn new(blob: Blob<'a>) -> Self {
158        Self { blob }
159    }
160}
161
162/// A representations of a [`Directory`]'s entries.
163pub struct Entries {
164    listing: BTreeMap<String, Entry>,
165}
166
167impl Entries {
168    /// Return the name of each [`Entry`].
169    pub fn names(&self) -> impl Iterator<Item = &String> {
170        self.listing.keys()
171    }
172
173    /// Return each [`Entry`].
174    pub fn entries(&self) -> impl Iterator<Item = &Entry> {
175        self.listing.values()
176    }
177
178    /// Return each [`Entry`] and its name.
179    pub fn iter(&self) -> impl Iterator<Item = (&String, &Entry)> {
180        self.listing.iter()
181    }
182}
183
184impl Iterator for Entries {
185    type Item = Entry;
186
187    fn next(&mut self) -> Option<Self::Item> {
188        // Can be improved when `pop_first()` is stable for BTreeMap.
189        let next_key = match self.listing.keys().next() {
190            Some(k) => k.clone(),
191            None => return None,
192        };
193        self.listing.remove(&next_key)
194    }
195}
196
197/// An `Entry` is either a [`File`] entry or a [`Directory`] entry.
198#[derive(Debug, Clone, PartialEq, Eq)]
199pub enum Entry {
200    /// A file entry within a [`Directory`].
201    File(File),
202    /// A sub-directory of a [`Directory`].
203    Directory(Directory),
204    /// An entry points to a submodule.
205    Submodule(Submodule),
206}
207
208impl PartialOrd for Entry {
209    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
210        Some(self.cmp(other))
211    }
212}
213
214impl Ord for Entry {
215    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
216        match (self, other) {
217            (Entry::File(x), Entry::File(y)) => x.name().cmp(y.name()),
218            (Entry::File(_), Entry::Directory(_)) => Ordering::Less,
219            (Entry::File(_), Entry::Submodule(_)) => Ordering::Less,
220            (Entry::Directory(_), Entry::File(_)) => Ordering::Greater,
221            (Entry::Submodule(_), Entry::File(_)) => Ordering::Less,
222            (Entry::Directory(x), Entry::Directory(y)) => x.name().cmp(y.name()),
223            (Entry::Directory(x), Entry::Submodule(y)) => x.name().cmp(y.name()),
224            (Entry::Submodule(x), Entry::Directory(y)) => x.name().cmp(y.name()),
225            (Entry::Submodule(x), Entry::Submodule(y)) => x.name().cmp(y.name()),
226        }
227    }
228}
229
230impl Entry {
231    /// Get a label for the `Entriess`, either the name of the [`File`],
232    /// the name of the [`Directory`], or the name of the [`Submodule`].
233    pub fn name(&self) -> &String {
234        match self {
235            Entry::File(file) => &file.name,
236            Entry::Directory(directory) => directory.name(),
237            Entry::Submodule(submodule) => submodule.name(),
238        }
239    }
240
241    pub fn path(&self) -> PathBuf {
242        match self {
243            Entry::File(file) => file.path(),
244            Entry::Directory(directory) => directory.path(),
245            Entry::Submodule(submodule) => submodule.path(),
246        }
247    }
248
249    pub fn location(&self) -> &Path {
250        match self {
251            Entry::File(file) => file.location(),
252            Entry::Directory(directory) => directory.location(),
253            Entry::Submodule(submodule) => submodule.location(),
254        }
255    }
256
257    /// Returns `true` if the `Entry` is a file.
258    pub fn is_file(&self) -> bool {
259        matches!(self, Entry::File(_))
260    }
261
262    /// Returns `true` if the `Entry` is a directory.
263    pub fn is_directory(&self) -> bool {
264        matches!(self, Entry::Directory(_))
265    }
266
267    pub(crate) fn from_entry(
268        entry: &git2::TreeEntry,
269        path: PathBuf,
270        repo: &Repository,
271    ) -> Result<Self, error::Directory> {
272        let name = entry.name().ok_or(error::Directory::Utf8Error)?.to_string();
273        let id = entry.id().into();
274
275        match entry.kind() {
276            Some(git2::ObjectType::Tree) => Ok(Self::Directory(Directory::new(name, path, id))),
277            Some(git2::ObjectType::Blob) => Ok(Self::File(File::new(name, path, id))),
278            Some(git2::ObjectType::Commit) => {
279                let submodule = (!repo.is_bare())
280                    .then(|| repo.find_submodule(&name))
281                    .transpose()?;
282                Ok(Self::Submodule(Submodule::new(name, path, submodule, id)?))
283            }
284            _ => Err(error::Directory::InvalidType(path, "tree or blob")),
285        }
286    }
287}
288
289/// A `Directory` is the representation of a file system directory, for a given
290/// [`git` tree][git-tree].
291///
292/// The name of a directory can be retrieved via [`File::name`].
293///
294/// The [`Entries`] of a directory can be retrieved via
295/// [`Directory::entries`].
296///
297/// [git-tree]: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects
298#[derive(Debug, Clone, PartialEq, Eq)]
299pub struct Directory {
300    /// The name of the directoy.
301    name: String,
302    /// The relative path of the directory, not including the `name`,
303    /// in respect to the root of the git repository.
304    prefix: PathBuf,
305    /// The object identifier of the git tree of this directory.
306    id: Oid,
307}
308
309const ROOT_DIR: &str = "";
310
311impl Directory {
312    /// Creates a directory given its `tree_id`.
313    ///
314    /// The `name` and `prefix` are both set to be empty.
315    pub(crate) fn root(id: Oid) -> Self {
316        Self::new(ROOT_DIR.to_string(), PathBuf::new(), id)
317    }
318
319    /// Creates a directory given its `name` and `id`.
320    ///
321    /// The `path` must be the prefix location of the directory, and
322    /// so should not end in `name`.
323    ///
324    /// The `id` must point to a `git` tree.
325    pub(crate) fn new(name: String, prefix: PathBuf, id: Oid) -> Self {
326        debug_assert!(
327            name.is_empty() || !prefix.ends_with(&name),
328            "prefix = {prefix:?}, name = {name}",
329        );
330        Self { name, prefix, id }
331    }
332
333    /// Get the name of the current `Directory`.
334    pub fn name(&self) -> &String {
335        &self.name
336    }
337
338    /// The object identifier of this `[Directory]`.
339    pub fn id(&self) -> Oid {
340        self.id
341    }
342
343    /// Return the exact path for this `Directory`, including the `name` of the
344    /// directory itself.
345    ///
346    /// The path is relative to the git repository root.
347    pub fn path(&self) -> PathBuf {
348        self.prefix.join(&self.name)
349    }
350
351    /// Return the [`Path`] where this `Directory` is located, relative to the
352    /// git repository root.
353    pub fn location(&self) -> &Path {
354        &self.prefix
355    }
356
357    /// Return the [`Entries`] for this `Directory`'s `Oid`.
358    ///
359    /// The resulting `Entries` will only resolve to this
360    /// `Directory`'s entries. Any sub-directories will need to be
361    /// resolved independently.
362    ///
363    /// # Errors
364    ///
365    /// This function will fail if it could not find the `git` tree
366    /// for the `Oid`.
367    pub fn entries(&self, repo: &Repository) -> Result<Entries, error::Directory> {
368        let tree = repo.find_tree(self.id)?;
369
370        let mut entries = BTreeMap::new();
371        let mut error = None;
372        let path = self.path();
373
374        // Walks only the first level of entries. And `_entry_path` is always
375        // empty for the first level.
376        tree.walk(git2::TreeWalkMode::PreOrder, |_entry_path, entry| {
377            match Entry::from_entry(entry, path.clone(), repo) {
378                Ok(entry) => match entry {
379                    Entry::File(_) => {
380                        entries.insert(entry.name().clone(), entry);
381                        git2::TreeWalkResult::Ok
382                    }
383                    Entry::Directory(_) => {
384                        entries.insert(entry.name().clone(), entry);
385                        // Skip nested directories
386                        git2::TreeWalkResult::Skip
387                    }
388                    Entry::Submodule(_) => {
389                        entries.insert(entry.name().clone(), entry);
390                        git2::TreeWalkResult::Ok
391                    }
392                },
393                Err(err) => {
394                    error = Some(err);
395                    git2::TreeWalkResult::Abort
396                }
397            }
398        })?;
399
400        match error {
401            Some(err) => Err(err),
402            None => Ok(Entries { listing: entries }),
403        }
404    }
405
406    /// Find the [`Entry`] found at a non-empty `path`, if it exists.
407    pub fn find_entry<P>(&self, path: &P, repo: &Repository) -> Result<Entry, error::Directory>
408    where
409        P: AsRef<Path>,
410    {
411        // Search the path in git2 tree.
412        let path = path.as_ref();
413        let git2_tree = repo.find_tree(self.id)?;
414        let entry = git2_tree
415            .get_path(path)
416            .or_matches::<error::Directory, _, _>(is_not_found_err, || {
417                Err(error::Directory::PathNotFound(path.to_path_buf()))
418            })?;
419        let parent = path
420            .parent()
421            .ok_or_else(|| error::Directory::InvalidPath(path.to_path_buf()))?;
422        let root_path = self.path().join(parent);
423
424        Entry::from_entry(&entry, root_path, repo)
425    }
426
427    /// Find the `Oid`, for a [`File`], found at `path`, if it exists.
428    pub fn find_file<P>(&self, path: &P, repo: &Repository) -> Result<File, error::Directory>
429    where
430        P: AsRef<Path>,
431    {
432        match self.find_entry(path, repo)? {
433            Entry::File(file) => Ok(file),
434            _ => Err(error::Directory::InvalidType(
435                path.as_ref().to_path_buf(),
436                "file",
437            )),
438        }
439    }
440
441    /// Find the `Directory` found at `path`, if it exists.
442    ///
443    /// If `path` is `ROOT_DIR` (i.e. an empty path), returns self.
444    pub fn find_directory<P>(&self, path: &P, repo: &Repository) -> Result<Self, error::Directory>
445    where
446        P: AsRef<Path>,
447    {
448        if path.as_ref() == Path::new(ROOT_DIR) {
449            return Ok(self.clone());
450        }
451
452        match self.find_entry(path, repo)? {
453            Entry::Directory(d) => Ok(d),
454            _ => Err(error::Directory::InvalidType(
455                path.as_ref().to_path_buf(),
456                "directory",
457            )),
458        }
459    }
460
461    // TODO(fintan): This is going to be a bit trickier so going to leave it out for
462    // now
463    #[allow(dead_code)]
464    fn fuzzy_find(_label: &Path) -> Vec<Self> {
465        unimplemented!()
466    }
467
468    /// Get the total size, in bytes, of a `Directory`. The size is
469    /// the sum of all files that can be reached from this `Directory`.
470    pub fn size(&self, repo: &Repository) -> Result<usize, error::Directory> {
471        self.traverse(repo, 0, &mut |size, entry| match entry {
472            Entry::File(file) => Ok(size + file.content(repo)?.size()),
473            Entry::Directory(dir) => Ok(size + dir.size(repo)?),
474            Entry::Submodule(_) => Ok(size),
475        })
476    }
477
478    /// Traverse the entire `Directory` using the `initial`
479    /// accumulator and the function `f`.
480    ///
481    /// For each [`Entry::Directory`] this will recursively call
482    /// [`Directory::traverse`] and obtain its [`Entries`].
483    ///
484    /// `Error` is the error type of the fallible function.
485    /// `B` is the type of the accumulator.
486    /// `F` is the fallible function that takes the accumulator and
487    /// the next [`Entry`], possibly providing the next accumulator
488    /// value.
489    pub fn traverse<Error, B, F>(
490        &self,
491        repo: &Repository,
492        initial: B,
493        f: &mut F,
494    ) -> Result<B, Error>
495    where
496        Error: From<error::Directory>,
497        F: FnMut(B, &Entry) -> Result<B, Error>,
498    {
499        self.entries(repo)?
500            .entries()
501            .try_fold(initial, |acc, entry| match entry {
502                Entry::File(_) => f(acc, entry),
503                Entry::Directory(directory) => {
504                    let acc = directory.traverse(repo, acc, f)?;
505                    f(acc, entry)
506                }
507                Entry::Submodule(_) => f(acc, entry),
508            })
509    }
510}
511
512impl Revision for Directory {
513    type Error = Infallible;
514
515    fn object_id(&self, _repo: &Repository) -> Result<Oid, Self::Error> {
516        Ok(self.id)
517    }
518}
519
520/// A representation of a Git [submodule] when encountered in a Git
521/// repository.
522///
523/// [submodule]: https://git-scm.com/book/en/v2/Git-Tools-Submodules
524#[derive(Debug, Clone, PartialEq, Eq)]
525pub struct Submodule {
526    name: String,
527    prefix: PathBuf,
528    id: Oid,
529    url: Option<Url>,
530}
531
532impl Submodule {
533    /// Construct a new `Submodule`.
534    ///
535    /// The `path` must be the prefix location of the directory, and
536    /// so should not end in `name`.
537    ///
538    /// The `id` is the commit pointer that Git provides when listing
539    /// a submodule.
540    pub fn new(
541        name: String,
542        prefix: PathBuf,
543        submodule: Option<git2::Submodule>,
544        id: Oid,
545    ) -> Result<Self, error::Submodule> {
546        let url = submodule
547            .and_then(|module| {
548                module
549                    .opt_url_bytes()
550                    .map(|bs| std::str::from_utf8(bs).map(|url| url.to_string()))
551            })
552            .transpose()
553            .map_err(|err| error::Submodule::Utf8 {
554                name: name.clone(),
555                err,
556            })?;
557        let url = url
558            .map(|url| {
559                Url::parse(&url).map_err(|err| error::Submodule::ParseUrl {
560                    name: name.clone(),
561                    url,
562                    err,
563                })
564            })
565            .transpose()?;
566        Ok(Self {
567            name,
568            prefix,
569            id,
570            url,
571        })
572    }
573
574    /// The name of this `Submodule`.
575    pub fn name(&self) -> &String {
576        &self.name
577    }
578
579    /// Return the [`Path`] where this `Submodule` is located, relative to the
580    /// git repository root.
581    pub fn location(&self) -> &Path {
582        &self.prefix
583    }
584
585    /// Return the exact path for this `Submodule`, including the
586    /// `name` of the submodule itself.
587    ///
588    /// The path is relative to the git repository root.
589    pub fn path(&self) -> PathBuf {
590        self.prefix.join(&self.name)
591    }
592
593    /// The object identifier of this `Submodule`.
594    ///
595    /// Note that this does not exist in the parent `Repository`. A
596    /// new `Repository` should be opened for the submodule.
597    pub fn id(&self) -> Oid {
598        self.id
599    }
600
601    /// The URL for the submodule, if it is defined.
602    pub fn url(&self) -> &Option<Url> {
603        &self.url
604    }
605}