iroh_blobs/util/
fs.rs

1//! Utilities for filesystem operations.
2use std::{
3    borrow::Cow,
4    fs::read_dir,
5    path::{Component, Path, PathBuf},
6};
7
8use anyhow::{bail, Context};
9use bytes::Bytes;
10/// A data source
11#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
12pub struct DataSource {
13    /// Custom name
14    name: String,
15    /// Path to the file
16    path: PathBuf,
17}
18
19impl DataSource {
20    /// Creates a new [`DataSource`] from a [`PathBuf`].
21    pub fn new(path: PathBuf) -> Self {
22        let name = path
23            .file_name()
24            .map(|s| s.to_string_lossy().to_string())
25            .unwrap_or_default();
26        DataSource { path, name }
27    }
28    /// Creates a new [`DataSource`] from a [`PathBuf`] and a custom name.
29    pub fn with_name(path: PathBuf, name: String) -> Self {
30        DataSource { path, name }
31    }
32
33    /// Returns blob name for this data source.
34    ///
35    /// If no name was provided when created it is derived from the path name.
36    pub fn name(&self) -> Cow<'_, str> {
37        Cow::Borrowed(&self.name)
38    }
39
40    /// Returns the path of this data source.
41    pub fn path(&self) -> &Path {
42        &self.path
43    }
44}
45
46impl From<PathBuf> for DataSource {
47    fn from(value: PathBuf) -> Self {
48        DataSource::new(value)
49    }
50}
51
52impl From<&std::path::Path> for DataSource {
53    fn from(value: &std::path::Path) -> Self {
54        DataSource::new(value.to_path_buf())
55    }
56}
57
58/// Create data sources from a path.
59#[cfg(feature = "rpc")]
60pub fn scan_path(
61    path: PathBuf,
62    wrap: crate::rpc::client::blobs::WrapOption,
63) -> anyhow::Result<Vec<DataSource>> {
64    use crate::rpc::client::blobs::WrapOption;
65    if path.is_dir() {
66        scan_dir(path, wrap)
67    } else {
68        let name = match wrap {
69            WrapOption::NoWrap => bail!("Cannot scan a file without wrapping"),
70            WrapOption::Wrap { name: None } => file_name(&path)?,
71            WrapOption::Wrap { name: Some(name) } => name,
72        };
73        Ok(vec![DataSource { name, path }])
74    }
75}
76
77#[cfg(feature = "rpc")]
78fn file_name(path: &Path) -> anyhow::Result<String> {
79    relative_canonicalized_path_to_string(path.file_name().context("path is invalid")?)
80}
81
82/// Create data sources from a directory.
83#[cfg(feature = "rpc")]
84pub fn scan_dir(
85    root: PathBuf,
86    wrap: crate::rpc::client::blobs::WrapOption,
87) -> anyhow::Result<Vec<DataSource>> {
88    use crate::rpc::client::blobs::WrapOption;
89    if !root.is_dir() {
90        bail!("Expected {} to be a file", root.to_string_lossy());
91    }
92    let prefix = match wrap {
93        WrapOption::NoWrap => None,
94        WrapOption::Wrap { name: None } => Some(file_name(&root)?),
95        WrapOption::Wrap { name: Some(name) } => Some(name),
96    };
97    let files = walkdir::WalkDir::new(&root).into_iter();
98    let data_sources = files
99        .map(|entry| {
100            let entry = entry?;
101            if !entry.file_type().is_file() {
102                // Skip symlinks. Directories are handled by WalkDir.
103                return Ok(None);
104            }
105            let path = entry.into_path();
106            let mut name = relative_canonicalized_path_to_string(path.strip_prefix(&root)?)?;
107            if let Some(prefix) = &prefix {
108                name = format!("{prefix}/{name}");
109            }
110            anyhow::Ok(Some(DataSource { name, path }))
111        })
112        .filter_map(Result::transpose);
113    let data_sources: Vec<anyhow::Result<DataSource>> = data_sources.collect::<Vec<_>>();
114    data_sources.into_iter().collect::<anyhow::Result<Vec<_>>>()
115}
116
117/// This function converts a canonicalized relative path to a string, returning
118/// an error if the path is not valid unicode.
119///
120/// This function will also fail if the path is non canonical, i.e. contains
121/// `..` or `.`, or if the path components contain any windows or unix path
122/// separators.
123pub fn relative_canonicalized_path_to_string(path: impl AsRef<Path>) -> anyhow::Result<String> {
124    canonicalized_path_to_string(path, true)
125}
126
127/// Loads a [`iroh::SecretKey`] from the provided file, or stores a newly generated one
128/// at the given location.
129#[cfg(feature = "rpc")]
130pub async fn load_secret_key(key_path: PathBuf) -> anyhow::Result<iroh::SecretKey> {
131    use iroh::SecretKey;
132    use tokio::io::AsyncWriteExt;
133
134    if key_path.exists() {
135        let keystr = tokio::fs::read(key_path).await?;
136
137        let ser_key = ssh_key::private::PrivateKey::from_openssh(keystr)?;
138        let ssh_key::private::KeypairData::Ed25519(kp) = ser_key.key_data() else {
139            bail!("invalid key format");
140        };
141        let secret_key = SecretKey::from_bytes(&kp.private.to_bytes());
142        Ok(secret_key)
143    } else {
144        let secret_key = SecretKey::generate(rand::rngs::OsRng);
145        let ckey = ssh_key::private::Ed25519Keypair {
146            public: secret_key.public().public().into(),
147            private: secret_key.secret().into(),
148        };
149        let ser_key =
150            ssh_key::private::PrivateKey::from(ckey).to_openssh(ssh_key::LineEnding::default())?;
151
152        // Try to canonicalize if possible
153        let key_path = key_path.canonicalize().unwrap_or(key_path);
154        let key_path_parent = key_path.parent().ok_or_else(|| {
155            anyhow::anyhow!("no parent directory found for '{}'", key_path.display())
156        })?;
157        tokio::fs::create_dir_all(&key_path_parent).await?;
158
159        // write to tempfile
160        let (file, temp_file_path) = tempfile::NamedTempFile::new_in(key_path_parent)
161            .context("unable to create tempfile")?
162            .into_parts();
163        let mut file = tokio::fs::File::from_std(file);
164        file.write_all(ser_key.as_bytes())
165            .await
166            .context("unable to write keyfile")?;
167        file.flush().await?;
168        drop(file);
169
170        // move file
171        tokio::fs::rename(temp_file_path, key_path)
172            .await
173            .context("failed to rename keyfile")?;
174
175        Ok(secret_key)
176    }
177}
178
179/// Information about the content on a path
180#[derive(Debug, Clone)]
181pub struct PathContent {
182    /// total size of all the files in the directory
183    pub size: u64,
184    /// total number of files in the directory
185    pub files: u64,
186}
187
188/// Walks the directory to get the total size and number of files in directory or file
189// TODO: possible combine with `scan_dir`
190pub fn path_content_info(path: impl AsRef<Path>) -> anyhow::Result<PathContent> {
191    path_content_info0(path)
192}
193
194fn path_content_info0(path: impl AsRef<Path>) -> anyhow::Result<PathContent> {
195    let mut files = 0;
196    let mut size = 0;
197    let path = path.as_ref();
198
199    if path.is_dir() {
200        for entry in read_dir(path)? {
201            let path0 = entry?.path();
202
203            match path_content_info0(path0) {
204                Ok(path_content) => {
205                    size += path_content.size;
206                    files += path_content.files;
207                }
208                Err(e) => bail!(e),
209            }
210        }
211    } else {
212        match path.try_exists() {
213            Ok(true) => {
214                size = path
215                    .metadata()
216                    .context(format!("Error reading metadata for {path:?}"))?
217                    .len();
218                files = 1;
219            }
220            Ok(false) => {
221                tracing::warn!("Not including broking symlink at {path:?}");
222            }
223            Err(e) => {
224                bail!(e);
225            }
226        }
227    }
228    Ok(PathContent { size, files })
229}
230
231/// Helper function that translates a key that was derived from the [`path_to_key`] function back
232/// into a path.
233///
234/// If `prefix` exists, it will be stripped before converting back to a path
235/// If `root` exists, will add the root as a parent to the created path
236/// Removes any null byte that has been appended to the key
237pub fn key_to_path(
238    key: impl AsRef<[u8]>,
239    prefix: Option<String>,
240    root: Option<PathBuf>,
241) -> anyhow::Result<PathBuf> {
242    let mut key = key.as_ref();
243    if key.is_empty() {
244        return Ok(PathBuf::new());
245    }
246    // if the last element is the null byte, remove it
247    if b'\0' == key[key.len() - 1] {
248        key = &key[..key.len() - 1]
249    }
250
251    let key = if let Some(prefix) = prefix {
252        let prefix = prefix.into_bytes();
253        if prefix[..] == key[..prefix.len()] {
254            &key[prefix.len()..]
255        } else {
256            anyhow::bail!("key {:?} does not begin with prefix {:?}", key, prefix);
257        }
258    } else {
259        key
260    };
261
262    let mut path = if key[0] == b'/' {
263        PathBuf::from("/")
264    } else {
265        PathBuf::new()
266    };
267    for component in key
268        .split(|c| c == &b'/')
269        .map(|c| String::from_utf8(c.into()).context("key contains invalid data"))
270    {
271        let component = component?;
272        path = path.join(component);
273    }
274
275    // add root if it exists
276    let path = if let Some(root) = root {
277        root.join(path)
278    } else {
279        path
280    };
281
282    Ok(path)
283}
284
285/// Helper function that creates a document key from a canonicalized path, removing the `root` and adding the `prefix`, if they exist
286///
287/// Appends the null byte to the end of the key.
288pub fn path_to_key(
289    path: impl AsRef<Path>,
290    prefix: Option<String>,
291    root: Option<PathBuf>,
292) -> anyhow::Result<Bytes> {
293    let path = path.as_ref();
294    let path = if let Some(root) = root {
295        path.strip_prefix(root)?
296    } else {
297        path
298    };
299    let suffix = canonicalized_path_to_string(path, false)?.into_bytes();
300    let mut key = if let Some(prefix) = prefix {
301        prefix.into_bytes().to_vec()
302    } else {
303        Vec::new()
304    };
305    key.extend(suffix);
306    key.push(b'\0');
307    Ok(key.into())
308}
309
310/// This function converts an already canonicalized path to a string.
311///
312/// If `must_be_relative` is true, the function will fail if any component of the path is
313/// `Component::RootDir`
314///
315/// This function will also fail if the path is non canonical, i.e. contains
316/// `..` or `.`, or if the path components contain any windows or unix path
317/// separators.
318pub fn canonicalized_path_to_string(
319    path: impl AsRef<Path>,
320    must_be_relative: bool,
321) -> anyhow::Result<String> {
322    let mut path_str = String::new();
323    let parts = path
324        .as_ref()
325        .components()
326        .filter_map(|c| match c {
327            Component::Normal(x) => {
328                let c = match x.to_str() {
329                    Some(c) => c,
330                    None => return Some(Err(anyhow::anyhow!("invalid character in path"))),
331                };
332
333                if !c.contains('/') && !c.contains('\\') {
334                    Some(Ok(c))
335                } else {
336                    Some(Err(anyhow::anyhow!("invalid path component {:?}", c)))
337                }
338            }
339            Component::RootDir => {
340                if must_be_relative {
341                    Some(Err(anyhow::anyhow!("invalid path component {:?}", c)))
342                } else {
343                    path_str.push('/');
344                    None
345                }
346            }
347            _ => Some(Err(anyhow::anyhow!("invalid path component {:?}", c))),
348        })
349        .collect::<anyhow::Result<Vec<_>>>()?;
350    let parts = parts.join("/");
351    path_str.push_str(&parts);
352    Ok(path_str)
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358
359    #[test]
360    fn test_path_to_key_roundtrip() {
361        let path = PathBuf::from("/foo/bar");
362        let expect_path = PathBuf::from("/foo/bar");
363        let key = b"/foo/bar\0";
364        let expect_key = Bytes::from(&key[..]);
365
366        let got_key = path_to_key(path.clone(), None, None).unwrap();
367        let got_path = key_to_path(got_key.clone(), None, None).unwrap();
368
369        assert_eq!(expect_key, got_key);
370        assert_eq!(expect_path, got_path);
371
372        // including prefix
373        let prefix = String::from("prefix:");
374        let key = b"prefix:/foo/bar\0";
375        let expect_key = Bytes::from(&key[..]);
376        let got_key = path_to_key(path.clone(), Some(prefix.clone()), None).unwrap();
377        assert_eq!(expect_key, got_key);
378        let got_path = key_to_path(got_key, Some(prefix.clone()), None).unwrap();
379        assert_eq!(expect_path, got_path);
380
381        // including root
382        let root = PathBuf::from("/foo");
383        let key = b"prefix:bar\0";
384        let expect_key = Bytes::from(&key[..]);
385        let got_key = path_to_key(path, Some(prefix.clone()), Some(root.clone())).unwrap();
386        assert_eq!(expect_key, got_key);
387        let got_path = key_to_path(got_key, Some(prefix), Some(root)).unwrap();
388        assert_eq!(expect_path, got_path);
389    }
390
391    #[test]
392    fn test_canonicalized_path_to_string() {
393        assert_eq!(
394            canonicalized_path_to_string("foo/bar", true).unwrap(),
395            "foo/bar"
396        );
397        assert_eq!(canonicalized_path_to_string("", true).unwrap(), "");
398        assert_eq!(
399            canonicalized_path_to_string("foo bar/baz/bat", true).unwrap(),
400            "foo bar/baz/bat"
401        );
402        assert_eq!(
403            canonicalized_path_to_string("/foo/bar", true).map_err(|e| e.to_string()),
404            Err("invalid path component RootDir".to_string())
405        );
406
407        assert_eq!(
408            canonicalized_path_to_string("/foo/bar", false).unwrap(),
409            "/foo/bar"
410        );
411        let path = PathBuf::from("/").join("Ü").join("⁰€™■・�").join("東京");
412        assert_eq!(
413            canonicalized_path_to_string(path, false).unwrap(),
414            "/Ü/⁰€™■・�/東京"
415        )
416    }
417
418    #[test]
419    fn test_get_path_content() {
420        let dir = testdir::testdir!();
421        let PathContent { size, files } = path_content_info(&dir).unwrap();
422        assert_eq!(0, size);
423        assert_eq!(0, files);
424        let foo = b"hello_world";
425        let bar = b"ipsum lorem";
426        let bat = b"happy birthday";
427        let expect_size = foo.len() + bar.len() + bat.len();
428        std::fs::write(dir.join("foo.txt"), foo).unwrap();
429        std::fs::write(dir.join("bar.txt"), bar).unwrap();
430        std::fs::write(dir.join("bat.txt"), bat).unwrap();
431        let PathContent { size, files } = path_content_info(&dir).unwrap();
432        assert_eq!(expect_size as u64, size);
433        assert_eq!(3, files);
434
435        // create nested empty dirs
436        std::fs::create_dir(dir.join("1")).unwrap();
437        std::fs::create_dir(dir.join("2")).unwrap();
438        let dir3 = dir.join("3");
439        std::fs::create_dir(&dir3).unwrap();
440
441        // create a nested dir w/ content
442        let dir4 = dir3.join("4");
443        std::fs::create_dir(&dir4).unwrap();
444        std::fs::write(dir4.join("foo.txt"), foo).unwrap();
445        std::fs::write(dir4.join("bar.txt"), bar).unwrap();
446        std::fs::write(dir4.join("bat.txt"), bat).unwrap();
447
448        let expect_size = expect_size * 2;
449        let PathContent { size, files } = path_content_info(&dir).unwrap();
450        assert_eq!(expect_size as u64, size);
451        assert_eq!(6, files);
452    }
453}