Skip to main content

jj_lib/
file_util.rs

1// Copyright 2021 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::borrow::Cow;
18use std::ffi::OsString;
19use std::fs;
20use std::fs::File;
21use std::io;
22use std::io::ErrorKind;
23use std::io::Write;
24use std::path::Component;
25use std::path::Path;
26use std::path::PathBuf;
27
28use futures::AsyncRead;
29use futures::AsyncReadExt as _;
30use tempfile::NamedTempFile;
31use tempfile::PersistError;
32use thiserror::Error;
33
34#[cfg(unix)]
35pub use self::platform::check_executable_bit_support;
36pub use self::platform::check_symlink_support;
37pub use self::platform::symlink_dir;
38pub use self::platform::symlink_file;
39
40#[derive(Debug, Error)]
41#[error("Cannot access {path}")]
42pub struct PathError {
43    pub path: PathBuf,
44    pub source: io::Error,
45}
46
47pub trait IoResultExt<T> {
48    fn context(self, path: impl AsRef<Path>) -> Result<T, PathError>;
49}
50
51impl<T> IoResultExt<T> for io::Result<T> {
52    fn context(self, path: impl AsRef<Path>) -> Result<T, PathError> {
53        self.map_err(|error| PathError {
54            path: path.as_ref().to_path_buf(),
55            source: error,
56        })
57    }
58}
59
60/// Creates a directory or does nothing if the directory already exists.
61///
62/// Returns the underlying error if the directory can't be created.
63/// The function will also fail if intermediate directories on the path do not
64/// already exist.
65pub fn create_or_reuse_dir(dirname: &Path) -> io::Result<()> {
66    match fs::create_dir(dirname) {
67        Ok(()) => Ok(()),
68        Err(_) if dirname.is_dir() => Ok(()),
69        Err(e) => Err(e),
70    }
71}
72
73/// Removes all files in the directory, but not the directory itself.
74///
75/// The directory must exist, and there should be no sub directories.
76pub fn remove_dir_contents(dirname: &Path) -> Result<(), PathError> {
77    for entry in dirname.read_dir().context(dirname)? {
78        let entry = entry.context(dirname)?;
79        let path = entry.path();
80        fs::remove_file(&path).context(&path)?;
81    }
82    Ok(())
83}
84
85/// Checks if path points at an empty directory.
86pub fn is_empty_dir(path: &Path) -> Result<bool, PathError> {
87    match path.read_dir() {
88        Ok(mut entries) => Ok(entries.next().is_none()),
89        Err(error) => match error.kind() {
90            ErrorKind::NotADirectory => Ok(false),
91            ErrorKind::NotFound => Ok(false),
92            _ => Err(error).context(path)?,
93        },
94    }
95}
96
97#[derive(Debug, Error)]
98#[error(transparent)]
99pub struct BadPathEncoding(platform::BadOsStrEncoding);
100
101/// Constructs [`Path`] from `bytes` in platform-specific manner.
102///
103/// On Unix, this function never fails because paths are just bytes. On Windows,
104/// this may return error if the input wasn't well-formed UTF-8.
105pub fn path_from_bytes(bytes: &[u8]) -> Result<&Path, BadPathEncoding> {
106    let s = platform::os_str_from_bytes(bytes).map_err(BadPathEncoding)?;
107    Ok(Path::new(s))
108}
109
110/// Converts `path` to bytes in platform-specific manner.
111///
112/// On Unix, this function never fails because paths are just bytes. On Windows,
113/// this may return error if the input wasn't well-formed UTF-8.
114///
115/// The returned byte sequence can be considered a superset of ASCII (such as
116/// UTF-8 bytes.)
117pub fn path_to_bytes(path: &Path) -> Result<&[u8], BadPathEncoding> {
118    platform::os_str_to_bytes(path.as_ref()).map_err(BadPathEncoding)
119}
120
121/// Expands "~/" to the user's home directory.
122pub fn expand_home_path(path_str: &str) -> PathBuf {
123    if let Some(remainder) = path_str.strip_prefix("~/")
124        && let Ok(home_dir) = etcetera::home_dir()
125    {
126        return home_dir.join(remainder);
127    }
128    PathBuf::from(path_str)
129}
130
131/// Turns the given `to` path into relative path starting from the `from` path.
132///
133/// Both `from` and `to` paths are supposed to be absolute and normalized in the
134/// same manner.
135pub fn relative_path(from: &Path, to: &Path) -> PathBuf {
136    // Find common prefix.
137    for (i, base) in from.ancestors().enumerate() {
138        if let Ok(suffix) = to.strip_prefix(base) {
139            if i == 0 && suffix.as_os_str().is_empty() {
140                return ".".into();
141            } else {
142                return std::iter::repeat_n(Path::new(".."), i)
143                    .chain(std::iter::once(suffix))
144                    .collect();
145            }
146        }
147    }
148
149    // No common prefix found. Return the original (absolute) path.
150    to.to_owned()
151}
152
153/// Consumes as much `..` and `.` as possible without considering symlinks.
154pub fn normalize_path(path: &Path) -> PathBuf {
155    let mut result = PathBuf::new();
156    for c in path.components() {
157        match c {
158            Component::CurDir => {}
159            Component::ParentDir
160                if matches!(result.components().next_back(), Some(Component::Normal(_))) =>
161            {
162                // Do not pop ".."
163                let popped = result.pop();
164                assert!(popped);
165            }
166            _ => {
167                result.push(c);
168            }
169        }
170    }
171
172    if result.as_os_str().is_empty() {
173        ".".into()
174    } else {
175        result
176    }
177}
178
179/// Converts the given `path` to Unix-like path separated by "/".
180///
181/// The returned path might not work on Windows if it was canonicalized. On
182/// Unix, this function is noop.
183pub fn slash_path(path: &Path) -> Cow<'_, Path> {
184    if cfg!(windows) {
185        Cow::Owned(to_slash_separated(path).into())
186    } else {
187        Cow::Borrowed(path)
188    }
189}
190
191fn to_slash_separated(path: &Path) -> OsString {
192    let mut buf = OsString::with_capacity(path.as_os_str().len());
193    let mut components = path.components();
194    match components.next() {
195        Some(c) => buf.push(c),
196        None => return buf,
197    }
198    for c in components {
199        buf.push("/");
200        buf.push(c);
201    }
202    buf
203}
204
205/// Persists the temporary file after synchronizing the content.
206///
207/// After system crash, the persisted file should have a valid content if
208/// existed. However, the persisted file name (or directory entry) could be
209/// lost. It's up to caller to synchronize the directory entries.
210///
211/// See also <https://lwn.net/Articles/457667/> for the behavior on Linux.
212pub fn persist_temp_file<P: AsRef<Path>>(
213    temp_file: NamedTempFile,
214    new_path: P,
215) -> io::Result<File> {
216    // Ensure persisted file content is flushed to disk.
217    temp_file.as_file().sync_data()?;
218    temp_file
219        .persist(new_path)
220        .map_err(|PersistError { error, file: _ }| error)
221}
222
223/// Like [`persist_temp_file()`], but doesn't try to overwrite the existing
224/// target on Windows.
225pub fn persist_content_addressed_temp_file<P: AsRef<Path>>(
226    temp_file: NamedTempFile,
227    new_path: P,
228) -> io::Result<File> {
229    // Ensure new file content is flushed to disk, so the old file content
230    // wouldn't be lost if existed at the same location.
231    temp_file.as_file().sync_data()?;
232    if cfg!(windows) {
233        // On Windows, overwriting file can fail if the file is opened without
234        // FILE_SHARE_DELETE for example. We don't need to take a risk if the
235        // file already exists.
236        match temp_file.persist_noclobber(&new_path) {
237            Ok(file) => Ok(file),
238            Err(PersistError { error, file: _ }) => {
239                if let Ok(existing_file) = File::open(new_path) {
240                    // TODO: Update mtime to help GC keep this file
241                    Ok(existing_file)
242                } else {
243                    Err(error)
244                }
245            }
246        }
247    } else {
248        // On Unix, rename() is atomic and should succeed even if the
249        // destination file exists. Checking if the target exists might involve
250        // non-atomic operation, so don't use persist_noclobber().
251        temp_file
252            .persist(new_path)
253            .map_err(|PersistError { error, file: _ }| error)
254    }
255}
256
257/// Opaque value that can be tested to know whether file or directory paths
258/// point to the same filesystem entity.
259///
260/// The primary use case is to detect file name aliases on case-insensitive
261/// filesystem. On Unix, device and inode numbers are compared.
262#[derive(Debug, Eq, Hash, PartialEq)]
263pub struct FileIdentity(platform::FileIdentity);
264
265impl FileIdentity {
266    /// Queries file identity without following symlinks.
267    ///
268    /// BUG: On Windows, symbolic links would be followed.
269    pub fn from_symlink_path(path: impl AsRef<Path>) -> io::Result<Self> {
270        platform::file_identity_from_symlink_path(path.as_ref()).map(Self)
271    }
272
273    /// Queries file identity of the given `file`.
274    // TODO: do not consume file object
275    pub fn from_file(file: File) -> io::Result<Self> {
276        platform::file_identity_from_file(file).map(Self)
277    }
278}
279
280/// Reads from an async source and writes to a sync destination. Does not spawn
281/// a task, so writes will block.
282pub async fn copy_async_to_sync<R: AsyncRead, W: Write + ?Sized>(
283    reader: R,
284    writer: &mut W,
285) -> io::Result<usize> {
286    let mut buf = vec![0; 16 << 10];
287    let mut total_written_bytes = 0;
288
289    let mut reader = std::pin::pin!(reader);
290    loop {
291        let written_bytes = reader.read(&mut buf).await?;
292        if written_bytes == 0 {
293            return Ok(total_written_bytes);
294        }
295        writer.write_all(&buf[0..written_bytes])?;
296        total_written_bytes += written_bytes;
297    }
298}
299
300#[cfg(unix)]
301mod platform {
302    use std::convert::Infallible;
303    use std::ffi::OsStr;
304    use std::fs;
305    use std::fs::File;
306    use std::io;
307    use std::os::unix::ffi::OsStrExt as _;
308    use std::os::unix::fs::MetadataExt as _;
309    use std::os::unix::fs::PermissionsExt;
310    use std::os::unix::fs::symlink;
311    use std::path::Path;
312
313    pub type BadOsStrEncoding = Infallible;
314
315    pub fn os_str_from_bytes(data: &[u8]) -> Result<&OsStr, BadOsStrEncoding> {
316        Ok(OsStr::from_bytes(data))
317    }
318
319    pub fn os_str_to_bytes(data: &OsStr) -> Result<&[u8], BadOsStrEncoding> {
320        Ok(data.as_bytes())
321    }
322
323    /// Whether changing executable bits is permitted on the filesystem of this
324    /// directory, and whether attempting to flip one has an observable effect.
325    pub fn check_executable_bit_support(path: impl AsRef<Path>) -> io::Result<bool> {
326        // Get current permissions and try to flip just the user's executable bit.
327        let temp_file = tempfile::tempfile_in(path)?;
328        let old_mode = temp_file.metadata()?.permissions().mode();
329        let new_mode = old_mode ^ 0o100;
330        let result = temp_file.set_permissions(PermissionsExt::from_mode(new_mode));
331        match result {
332            // If permission was denied, we do not have executable bit support.
333            Err(err) if err.kind() == io::ErrorKind::PermissionDenied => Ok(false),
334            Err(err) => Err(err),
335            Ok(()) => {
336                // Verify that the permission change was not silently ignored.
337                let mode = temp_file.metadata()?.permissions().mode();
338                Ok(mode == new_mode)
339            }
340        }
341    }
342
343    /// Symlinks are always available on Unix.
344    pub fn check_symlink_support() -> io::Result<bool> {
345        Ok(true)
346    }
347
348    /// Creates a new symlink `link` pointing to the `original` path.
349    ///
350    /// On Unix, the `original` path doesn't have to be a directory.
351    pub fn symlink_dir<P: AsRef<Path>, Q: AsRef<Path>>(original: P, link: Q) -> io::Result<()> {
352        symlink(original, link)
353    }
354
355    /// Creates a new symlink `link` pointing to the `original` path.
356    ///
357    /// On Unix, the `original` path doesn't have to be a file.
358    pub fn symlink_file<P: AsRef<Path>, Q: AsRef<Path>>(original: P, link: Q) -> io::Result<()> {
359        symlink(original, link)
360    }
361
362    #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
363    pub struct FileIdentity {
364        // https://github.com/BurntSushi/same-file/blob/1.0.6/src/unix.rs#L30
365        dev: u64,
366        ino: u64,
367    }
368
369    impl FileIdentity {
370        fn from_metadata(metadata: fs::Metadata) -> Self {
371            Self {
372                dev: metadata.dev(),
373                ino: metadata.ino(),
374            }
375        }
376    }
377
378    pub fn file_identity_from_symlink_path(path: &Path) -> io::Result<FileIdentity> {
379        path.symlink_metadata().map(FileIdentity::from_metadata)
380    }
381
382    pub fn file_identity_from_file(file: File) -> io::Result<FileIdentity> {
383        file.metadata().map(FileIdentity::from_metadata)
384    }
385}
386
387#[cfg(windows)]
388mod platform {
389    use std::fs::File;
390    use std::io;
391    pub use std::os::windows::fs::symlink_dir;
392    pub use std::os::windows::fs::symlink_file;
393    use std::path::Path;
394
395    use winreg::RegKey;
396    use winreg::enums::HKEY_LOCAL_MACHINE;
397
398    pub use super::fallback::BadOsStrEncoding;
399    pub use super::fallback::os_str_from_bytes;
400    pub use super::fallback::os_str_to_bytes;
401
402    /// Symlinks may or may not be enabled on Windows. They require the
403    /// Developer Mode setting, which is stored in the registry key below.
404    ///
405    /// Note: If developer mode is not enabled, the error code of symlink
406    /// creation will be 1314, `ERROR_PRIVILEGE_NOT_HELD`.
407    pub fn check_symlink_support() -> io::Result<bool> {
408        let hklm = RegKey::predef(HKEY_LOCAL_MACHINE);
409        let sideloading =
410            hklm.open_subkey("SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock")?;
411        let developer_mode: u32 = sideloading.get_value("AllowDevelopmentWithoutDevLicense")?;
412        Ok(developer_mode == 1)
413    }
414
415    pub type FileIdentity = same_file::Handle;
416
417    // FIXME: This shouldn't follow symlinks when querying file identity.
418    // Perhaps, we need to open file with FILE_FLAG_BACKUP_SEMANTICS and
419    // FILE_FLAG_OPEN_REPARSE_POINT, then pass it to from_file(). Alternatively,
420    // maybe we can use symlink_metadata(), volume_serial_number(), and
421    // file_index() when they get stabilized. See the same-file crate and std
422    // lstat() implementation. https://github.com/rust-lang/rust/issues/63010
423    pub fn file_identity_from_symlink_path(path: &Path) -> io::Result<FileIdentity> {
424        same_file::Handle::from_path(path)
425    }
426
427    pub fn file_identity_from_file(file: File) -> io::Result<FileIdentity> {
428        same_file::Handle::from_file(file)
429    }
430}
431
432#[cfg_attr(unix, expect(dead_code))]
433mod fallback {
434    use std::ffi::OsStr;
435
436    use thiserror::Error;
437
438    // Define error per platform so we can explicitly say UTF-8 is expected.
439    #[derive(Debug, Error)]
440    #[error("Invalid UTF-8 sequence")]
441    pub struct BadOsStrEncoding;
442
443    pub fn os_str_from_bytes(data: &[u8]) -> Result<&OsStr, BadOsStrEncoding> {
444        Ok(str::from_utf8(data).map_err(|_| BadOsStrEncoding)?.as_ref())
445    }
446
447    pub fn os_str_to_bytes(data: &OsStr) -> Result<&[u8], BadOsStrEncoding> {
448        Ok(data.to_str().ok_or(BadOsStrEncoding)?.as_ref())
449    }
450}
451
452#[cfg(test)]
453mod tests {
454    use std::io::Write as _;
455
456    use futures::io::Cursor;
457    use itertools::Itertools as _;
458    use pollster::FutureExt as _;
459    use test_case::test_case;
460
461    use super::*;
462    use crate::tests::TestResult;
463    use crate::tests::new_temp_dir;
464
465    #[test]
466    #[cfg(unix)]
467    fn exec_bit_support_in_temp_dir() -> TestResult {
468        // Temporary directories on Unix should always have executable support.
469        // Note that it would be problematic to test in a non-temp directory, as
470        // a developer's filesystem may or may not have executable bit support.
471        let dir = new_temp_dir();
472        let supported = check_executable_bit_support(dir.path())?;
473        assert!(supported);
474        Ok(())
475    }
476
477    #[test]
478    fn test_path_bytes_roundtrip() -> TestResult {
479        let bytes = b"ascii";
480        let path = path_from_bytes(bytes)?;
481        assert_eq!(path_to_bytes(path)?, bytes);
482
483        let bytes = b"utf-8.\xc3\xa0";
484        let path = path_from_bytes(bytes)?;
485        assert_eq!(path_to_bytes(path)?, bytes);
486
487        let bytes = b"latin1.\xe0";
488        if cfg!(unix) {
489            let path = path_from_bytes(bytes)?;
490            assert_eq!(path_to_bytes(path)?, bytes);
491        } else {
492            assert!(path_from_bytes(bytes).is_err());
493        }
494        Ok(())
495    }
496
497    #[test]
498    fn normalize_too_many_dot_dot() {
499        assert_eq!(normalize_path(Path::new("foo/..")), Path::new("."));
500        assert_eq!(normalize_path(Path::new("foo/../..")), Path::new(".."));
501        assert_eq!(
502            normalize_path(Path::new("foo/../../..")),
503            Path::new("../..")
504        );
505        assert_eq!(
506            normalize_path(Path::new("foo/../../../bar/baz/..")),
507            Path::new("../../bar")
508        );
509    }
510
511    #[test]
512    fn test_slash_path() {
513        assert_eq!(slash_path(Path::new("")), Path::new(""));
514        assert_eq!(slash_path(Path::new("foo")), Path::new("foo"));
515        assert_eq!(slash_path(Path::new("foo/bar")), Path::new("foo/bar"));
516        assert_eq!(slash_path(Path::new("foo/bar/..")), Path::new("foo/bar/.."));
517        assert_eq!(
518            slash_path(Path::new(r"foo\bar")),
519            if cfg!(windows) {
520                Path::new("foo/bar")
521            } else {
522                Path::new(r"foo\bar")
523            }
524        );
525        assert_eq!(
526            slash_path(Path::new(r"..\foo\bar")),
527            if cfg!(windows) {
528                Path::new("../foo/bar")
529            } else {
530                Path::new(r"..\foo\bar")
531            }
532        );
533    }
534
535    #[test]
536    fn test_persist_no_existing_file() -> TestResult {
537        let temp_dir = new_temp_dir();
538        let target = temp_dir.path().join("file");
539        let mut temp_file = NamedTempFile::new_in(&temp_dir)?;
540        temp_file.write_all(b"contents")?;
541        assert!(persist_content_addressed_temp_file(temp_file, target).is_ok());
542        Ok(())
543    }
544
545    #[test_case(false ; "existing file open")]
546    #[test_case(true ; "existing file closed")]
547    fn test_persist_target_exists(existing_file_closed: bool) -> TestResult {
548        let temp_dir = new_temp_dir();
549        let target = temp_dir.path().join("file");
550        let mut temp_file = NamedTempFile::new_in(&temp_dir)?;
551        temp_file.write_all(b"contents")?;
552
553        let mut file = File::create(&target)?;
554        file.write_all(b"contents")?;
555        if existing_file_closed {
556            drop(file);
557        }
558
559        assert!(persist_content_addressed_temp_file(temp_file, &target).is_ok());
560        Ok(())
561    }
562
563    #[test]
564    fn test_file_identity_hard_link() -> TestResult {
565        let temp_dir = new_temp_dir();
566        let file_path = temp_dir.path().join("file");
567        let other_file_path = temp_dir.path().join("other_file");
568        let link_path = temp_dir.path().join("link");
569        fs::write(&file_path, "")?;
570        fs::write(&other_file_path, "")?;
571        fs::hard_link(&file_path, &link_path)?;
572        assert_eq!(
573            FileIdentity::from_symlink_path(&file_path)?,
574            FileIdentity::from_symlink_path(&link_path)?
575        );
576        assert_ne!(
577            FileIdentity::from_symlink_path(&other_file_path)?,
578            FileIdentity::from_symlink_path(&link_path)?
579        );
580        assert_eq!(
581            FileIdentity::from_symlink_path(&file_path)?,
582            FileIdentity::from_file(File::open(&link_path)?)?
583        );
584        Ok(())
585    }
586
587    #[cfg(unix)]
588    #[test]
589    fn test_file_identity_unix_symlink_dir() -> TestResult {
590        let temp_dir = new_temp_dir();
591        let dir_path = temp_dir.path().join("dir");
592        let symlink_path = temp_dir.path().join("symlink");
593        fs::create_dir(&dir_path)?;
594        std::os::unix::fs::symlink("dir", &symlink_path)?;
595        // symlink should be identical to itself
596        assert_eq!(
597            FileIdentity::from_symlink_path(&symlink_path)?,
598            FileIdentity::from_symlink_path(&symlink_path)?
599        );
600        // symlink should be different from the target directory
601        assert_ne!(
602            FileIdentity::from_symlink_path(&dir_path)?,
603            FileIdentity::from_symlink_path(&symlink_path)?
604        );
605        // File::open() follows symlinks
606        assert_eq!(
607            FileIdentity::from_symlink_path(&dir_path)?,
608            FileIdentity::from_file(File::open(&symlink_path)?)?
609        );
610        assert_ne!(
611            FileIdentity::from_symlink_path(&symlink_path)?,
612            FileIdentity::from_file(File::open(&symlink_path)?)?
613        );
614        Ok(())
615    }
616
617    #[cfg(unix)]
618    #[test]
619    fn test_file_identity_unix_symlink_loop() -> TestResult {
620        let temp_dir = new_temp_dir();
621        let lower_file_path = temp_dir.path().join("file");
622        let upper_file_path = temp_dir.path().join("FILE");
623        let lower_symlink_path = temp_dir.path().join("symlink");
624        let upper_symlink_path = temp_dir.path().join("SYMLINK");
625        fs::write(&lower_file_path, "")?;
626        std::os::unix::fs::symlink("symlink", &lower_symlink_path)?;
627        let is_icase_fs = upper_file_path.try_exists()?;
628        // symlink should be identical to itself
629        assert_eq!(
630            FileIdentity::from_symlink_path(&lower_symlink_path)?,
631            FileIdentity::from_symlink_path(&lower_symlink_path)?
632        );
633        assert_ne!(
634            FileIdentity::from_symlink_path(&lower_symlink_path)?,
635            FileIdentity::from_symlink_path(&lower_file_path)?
636        );
637        if is_icase_fs {
638            assert_eq!(
639                FileIdentity::from_symlink_path(&lower_symlink_path)?,
640                FileIdentity::from_symlink_path(&upper_symlink_path)?
641            );
642        } else {
643            assert!(FileIdentity::from_symlink_path(&upper_symlink_path).is_err());
644        }
645        Ok(())
646    }
647
648    #[test]
649    fn test_copy_async_to_sync_small() -> TestResult {
650        let input = b"hello";
651        let mut output = vec![];
652
653        let result = copy_async_to_sync(Cursor::new(&input), &mut output).block_on();
654        assert!(result.is_ok());
655        assert_eq!(result?, 5);
656        assert_eq!(output, input);
657        Ok(())
658    }
659
660    #[test]
661    fn test_copy_async_to_sync_large() -> TestResult {
662        // More than 1 buffer worth of data
663        let input = (0..100u8).cycle().take(40000).collect_vec();
664        let mut output = vec![];
665
666        let result = copy_async_to_sync(Cursor::new(&input), &mut output).block_on();
667        assert!(result.is_ok());
668        assert_eq!(result?, 40000);
669        assert_eq!(output, input);
670        Ok(())
671    }
672}