Skip to main content

swh_graph_stdlib/
fs.rs

1// Copyright (C) 2024-2026  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Filesystem manipulation functions, ie. directories and contents.
7
8use std::collections::HashMap;
9
10use anyhow::{Context, Result, ensure};
11use log::warn;
12use sux::bits::BitFieldVec;
13use sux::traits::bit_field_slice::BitFieldSlice;
14
15use swh_graph::NodeType;
16use swh_graph::graph::*;
17use swh_graph::labels::{EdgeLabel, LabelNameId, Permission};
18use swh_graph::properties;
19
20fn msg_no_label_name_id(name: impl AsRef<[u8]>) -> String {
21    format!(
22        "no label_name id found for entry \"{}\"",
23        String::from_utf8_lossy(name.as_ref())
24    )
25}
26
27/// An ordered collection of [`LabelNameId`].
28///
29/// Usually this is `Box<[LabelNameId]>` for simplicity, but [`BitFieldIdPath`] is an
30/// implementation backed by [`BitFieldVec`] for cases where memory usage matters.
31pub trait IdPath {
32    fn iter(&self) -> impl Iterator<Item = LabelNameId>;
33}
34
35impl IdPath for Vec<LabelNameId> {
36    fn iter(&self) -> impl Iterator<Item = LabelNameId> {
37        #[allow(clippy::into_iter_on_ref)] // false positive
38        self.into_iter().copied()
39    }
40}
41
42impl IdPath for Box<[LabelNameId]> {
43    fn iter(&self) -> impl Iterator<Item = LabelNameId> {
44        self.into_iter().copied()
45    }
46}
47
48impl IdPath for &[LabelNameId] {
49    fn iter(&self) -> impl Iterator<Item = LabelNameId> {
50        #[allow(clippy::into_iter_on_ref)] // false positive
51        self.into_iter().copied()
52    }
53}
54
55/// Newtype wrapping a [`BitFieldVec`] (or other implementations of [`BitFieldSlice`])
56/// to interpret them as an [`IdPath`].
57#[derive(Debug, Clone, PartialEq, Eq, Hash)]
58pub struct BitFieldIdPath<B: BitFieldSlice<u64>>(B);
59
60impl<B: BitFieldSlice<u64>> From<B> for BitFieldIdPath<B> {
61    fn from(v: B) -> Self {
62        Self(v)
63    }
64}
65
66#[inline]
67fn bitfieldvec_from_slice(b: &[LabelNameId]) -> BitFieldVec<u64, Vec<u64>> {
68    let bit_width = u64::BITS
69        - b.iter()
70            .map(|LabelNameId(item)| item)
71            .max()
72            .unwrap_or(&1)
73            .leading_zeros();
74    let mut v = BitFieldVec::with_capacity(
75        bit_width
76            .try_into()
77            .expect("overflowed BitFieldVec item length"),
78        b.len(),
79    );
80    for &LabelNameId(item) in b {
81        v.push(item);
82    }
83
84    v
85}
86
87impl From<Box<[LabelNameId]>> for BitFieldIdPath<BitFieldVec<u64, Box<[u64]>>> {
88    #[inline(always)]
89    fn from(b: Box<[LabelNameId]>) -> Self {
90        BitFieldIdPath(bitfieldvec_from_slice(&b).into())
91    }
92}
93
94impl From<Vec<LabelNameId>> for BitFieldIdPath<BitFieldVec<u64, Box<[u64]>>> {
95    #[inline(always)]
96    fn from(v: Vec<LabelNameId>) -> Self {
97        BitFieldIdPath(bitfieldvec_from_slice(&v).into())
98    }
99}
100
101impl From<&[LabelNameId]> for BitFieldIdPath<BitFieldVec<u64, Box<[u64]>>> {
102    #[inline(always)]
103    fn from(s: &[LabelNameId]) -> Self {
104        BitFieldIdPath(bitfieldvec_from_slice(s).into())
105    }
106}
107
108impl<B: BitFieldSlice<u64>> From<BitFieldIdPath<B>> for Box<[LabelNameId]> {
109    #[inline(always)]
110    fn from(v: BitFieldIdPath<B>) -> Self {
111        v.iter().collect()
112    }
113}
114
115impl<B: BitFieldSlice<u64>> IdPath for BitFieldIdPath<B> {
116    #[inline(always)]
117    fn iter(&self) -> impl Iterator<Item = LabelNameId> {
118        (0..self.0.len()).map(|i| LabelNameId(self.0.get_value(i).unwrap()))
119    }
120}
121
122/// Given a graph and a directory node, return the node id of a named directory
123/// entry located (not recursively) in that directory, if it exists.
124///
125/// See [resolve_path] for a version of this function that traverses
126/// sub-directories recursively.
127///
128/// ```ignore
129/// if let Ok(Some(node)) == resolve_name(&graph, 42, "README.md") {
130///     // do something with node
131/// }
132/// ```
133pub fn resolve_name<G>(graph: &G, dir: NodeId, name: impl AsRef<[u8]>) -> Result<Option<NodeId>>
134where
135    G: SwhLabeledForwardGraph + SwhGraphWithProperties,
136    <G as SwhGraphWithProperties>::LabelNames: properties::LabelNames,
137    <G as SwhGraphWithProperties>::Maps: properties::Maps,
138{
139    let props = graph.properties();
140    let name_id = props
141        .label_name_id(name.as_ref())
142        .with_context(|| msg_no_label_name_id(name))?;
143    resolve_name_by_id(&graph, dir, name_id)
144}
145
146/// Same as [resolve_name], but using a pre-resolved [LabelNameId] as entry
147/// name. Using this function is more efficient in case the same name (e.g.,
148/// "README.md") is to be looked up in many directories.
149pub fn resolve_name_by_id<G>(graph: &G, dir: NodeId, name: LabelNameId) -> Result<Option<NodeId>>
150where
151    G: SwhLabeledForwardGraph + SwhGraphWithProperties,
152    <G as SwhGraphWithProperties>::Maps: properties::Maps,
153{
154    let node_type = graph.properties().node_type(dir);
155    ensure!(
156        node_type == NodeType::Directory,
157        "Type of {dir} should be dir, but is {node_type} instead"
158    );
159
160    for (succ, label) in graph.labeled_successors(dir).flatten_labels() {
161        #[allow(clippy::collapsible_if)]
162        if let EdgeLabel::DirEntry(dentry) = label {
163            if dentry.label_name_id() == name {
164                return Ok(Some(succ));
165            }
166        }
167    }
168    Ok(None)
169}
170
171/// Given a graph and a directory node, return the node id of a directory entry
172/// located at a given path within that directory, if it exists.
173///
174/// Slashes (`/`) contained in `path` are interpreted as path separators.
175///
176/// See [resolve_name] for a non-recursive version of this function.
177///
178/// ```ignore
179/// if let Ok(Some(node)) == resolve_path(&graph, 42, "src/main.c") {
180///     // do something with node
181/// }
182/// ```
183pub fn resolve_path<G>(graph: &G, dir: NodeId, path: impl AsRef<[u8]>) -> Result<Option<NodeId>>
184where
185    G: SwhLabeledForwardGraph + SwhGraphWithProperties,
186    <G as SwhGraphWithProperties>::LabelNames: properties::LabelNames,
187    <G as SwhGraphWithProperties>::Maps: properties::Maps,
188{
189    let props = graph.properties();
190    let path = path
191        .as_ref()
192        .split(|byte| *byte == b'/')
193        .map(|name| {
194            props
195                .label_name_id(name)
196                .with_context(|| msg_no_label_name_id(name))
197        })
198        .collect::<Result<Vec<LabelNameId>, _>>()?;
199    resolve_path_by_id(&graph, dir, path)
200}
201
202/// Same as [resolve_path], but using as path a sequence of pre-resolved
203/// [LabelNameId]-s. Using this function is more efficient in case the same path
204/// (e.g., "src/main.c") is to be looked up in many directories.
205pub fn resolve_path_by_id<G, P: IdPath>(graph: &G, dir: NodeId, path: P) -> Result<Option<NodeId>>
206where
207    G: SwhLabeledForwardGraph + SwhGraphWithProperties,
208    <G as SwhGraphWithProperties>::Maps: properties::Maps,
209{
210    let mut cur_entry = dir;
211    for name in path.iter() {
212        match resolve_name_by_id(graph, cur_entry, name)? {
213            None => return Ok(None),
214            Some(entry) => cur_entry = entry,
215        }
216    }
217    Ok(Some(cur_entry))
218}
219
220/// Recursive representation of a directory tree, ignoring sharing.
221///
222/// Note that a `Revision` variant can in fact point to either revision or
223/// release nodes.
224#[derive(Debug, Default, PartialEq)]
225pub enum FsTree {
226    #[default]
227    Content,
228    Directory(HashMap<Vec<u8>, (FsTree, Option<Permission>)>),
229    Revision(NodeId),
230}
231
232/// Given a graph and a directory node in it (usually, but not necessarily, the
233/// *root* directory of a repository), return a recursive list of the contained
234/// files and directories.
235///
236/// Note that symlinks are not followed during listing and are reported as
237/// files in the returned tree. To recognize them as links, check the
238/// permissions of the associated directory entries.
239pub fn ls_tree<G>(graph: &G, dir: NodeId) -> Result<FsTree>
240where
241    G: SwhLabeledForwardGraph + SwhGraphWithProperties,
242    <G as SwhGraphWithProperties>::LabelNames: properties::LabelNames,
243    <G as SwhGraphWithProperties>::Maps: properties::Maps,
244{
245    let props = graph.properties();
246    let node_type = props.node_type(dir);
247    ensure!(
248        node_type == NodeType::Directory,
249        "Type of {dir} should be dir, but is {node_type} instead"
250    );
251
252    let mut dir_entries = HashMap::new();
253    for (succ, labels) in graph.labeled_successors(dir) {
254        let node_type = props.node_type(succ);
255        for label in labels {
256            if let EdgeLabel::DirEntry(dentry) = label {
257                let file_name = props.label_name(dentry.label_name_id());
258                let perm = dentry.permission();
259                match node_type {
260                    NodeType::Content => {
261                        dir_entries.insert(file_name, (FsTree::Content, perm));
262                    }
263                    NodeType::Directory => {
264                        // recurse into subdir
265                        if let Ok(subdir) = ls_tree(graph, succ) {
266                            dir_entries.insert(file_name, (subdir, perm));
267                        } else {
268                            warn!("Cannot list (sub-)directory {succ}, skipping it");
269                        }
270                    }
271                    NodeType::Revision | NodeType::Release => {
272                        dir_entries.insert(file_name, (FsTree::Revision(succ), perm));
273                    }
274                    NodeType::Origin | NodeType::Snapshot => {
275                        warn!("Ignoring dir entry with unexpected type {node_type}");
276                    }
277                }
278            }
279        }
280    }
281
282    Ok(FsTree::Directory(dir_entries))
283}