jj_lib/
local_working_copy.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::cmp::Ordering;
18use std::collections::HashSet;
19use std::error::Error;
20use std::fs;
21use std::fs::DirEntry;
22use std::fs::File;
23use std::fs::Metadata;
24use std::fs::OpenOptions;
25use std::io;
26use std::io::Read as _;
27use std::io::Write as _;
28use std::iter;
29use std::mem;
30use std::ops::Range;
31#[cfg(unix)]
32use std::os::unix::fs::PermissionsExt as _;
33use std::path::Path;
34use std::path::PathBuf;
35use std::slice;
36use std::sync::Arc;
37use std::sync::OnceLock;
38use std::sync::mpsc::Sender;
39use std::sync::mpsc::channel;
40use std::time::UNIX_EPOCH;
41
42use either::Either;
43use futures::StreamExt as _;
44use itertools::EitherOrBoth;
45use itertools::Itertools as _;
46use once_cell::unsync::OnceCell;
47use pollster::FutureExt as _;
48use prost::Message as _;
49use rayon::iter::IntoParallelIterator as _;
50use rayon::prelude::IndexedParallelIterator as _;
51use rayon::prelude::ParallelIterator as _;
52use tempfile::NamedTempFile;
53use thiserror::Error;
54use tokio::io::AsyncRead;
55use tokio::io::AsyncReadExt as _;
56use tracing::instrument;
57use tracing::trace_span;
58
59use crate::backend::BackendError;
60use crate::backend::BackendResult;
61use crate::backend::CopyId;
62use crate::backend::FileId;
63use crate::backend::MergedTreeId;
64use crate::backend::MillisSinceEpoch;
65use crate::backend::SymlinkId;
66use crate::backend::TreeId;
67use crate::backend::TreeValue;
68use crate::commit::Commit;
69use crate::config::ConfigGetError;
70use crate::conflicts;
71use crate::conflicts::ConflictMarkerStyle;
72use crate::conflicts::ConflictMaterializeOptions;
73use crate::conflicts::MIN_CONFLICT_MARKER_LEN;
74use crate::conflicts::MaterializedTreeValue;
75use crate::conflicts::choose_materialized_conflict_marker_len;
76use crate::conflicts::materialize_merge_result_to_bytes;
77use crate::conflicts::materialize_tree_value;
78pub use crate::eol::EolConversionMode;
79use crate::eol::TargetEolStrategy;
80use crate::file_util::BlockingAsyncReader;
81use crate::file_util::check_symlink_support;
82use crate::file_util::copy_async_to_sync;
83use crate::file_util::persist_temp_file;
84use crate::file_util::try_symlink;
85use crate::fsmonitor::FsmonitorSettings;
86#[cfg(feature = "watchman")]
87use crate::fsmonitor::WatchmanConfig;
88#[cfg(feature = "watchman")]
89use crate::fsmonitor::watchman;
90use crate::gitignore::GitIgnoreFile;
91use crate::lock::FileLock;
92use crate::matchers::DifferenceMatcher;
93use crate::matchers::EverythingMatcher;
94use crate::matchers::FilesMatcher;
95use crate::matchers::IntersectionMatcher;
96use crate::matchers::Matcher;
97use crate::matchers::PrefixMatcher;
98use crate::merge::Merge;
99use crate::merge::MergeBuilder;
100use crate::merge::MergedTreeValue;
101use crate::merge::SameChange;
102use crate::merged_tree::MergedTree;
103use crate::merged_tree::MergedTreeBuilder;
104use crate::merged_tree::TreeDiffEntry;
105use crate::object_id::ObjectId as _;
106use crate::op_store::OperationId;
107use crate::ref_name::WorkspaceName;
108use crate::ref_name::WorkspaceNameBuf;
109use crate::repo_path::RepoPath;
110use crate::repo_path::RepoPathBuf;
111use crate::repo_path::RepoPathComponent;
112use crate::settings::UserSettings;
113use crate::store::Store;
114use crate::tree::Tree;
115use crate::working_copy::CheckoutError;
116use crate::working_copy::CheckoutStats;
117use crate::working_copy::LockedWorkingCopy;
118use crate::working_copy::ResetError;
119use crate::working_copy::SnapshotError;
120use crate::working_copy::SnapshotOptions;
121use crate::working_copy::SnapshotProgress;
122use crate::working_copy::SnapshotStats;
123use crate::working_copy::UntrackedReason;
124use crate::working_copy::WorkingCopy;
125use crate::working_copy::WorkingCopyFactory;
126use crate::working_copy::WorkingCopyStateError;
127
128/// On-disk state of file executable bit.
129// TODO: maybe better to preserve the executable bit on all platforms, and
130// ignore conditionally? #3949
131#[derive(Clone, Copy, Debug, Eq, PartialEq)]
132pub struct FileExecutableFlag(#[cfg(unix)] bool);
133
134#[cfg(unix)]
135impl FileExecutableFlag {
136    pub const fn from_bool_lossy(executable: bool) -> Self {
137        Self(executable)
138    }
139
140    pub fn unwrap_or_else(self, _: impl FnOnce() -> bool) -> bool {
141        self.0
142    }
143}
144
145// Windows doesn't support executable bit.
146#[cfg(windows)]
147impl FileExecutableFlag {
148    pub const fn from_bool_lossy(_executable: bool) -> Self {
149        Self()
150    }
151
152    pub fn unwrap_or_else(self, f: impl FnOnce() -> bool) -> bool {
153        f()
154    }
155}
156
157#[derive(Debug, PartialEq, Eq, Clone)]
158pub enum FileType {
159    Normal { executable: FileExecutableFlag },
160    Symlink,
161    GitSubmodule,
162}
163
164#[derive(Debug, PartialEq, Eq, Clone, Copy)]
165pub struct MaterializedConflictData {
166    pub conflict_marker_len: u32,
167}
168
169#[derive(Debug, PartialEq, Eq, Clone)]
170pub struct FileState {
171    pub file_type: FileType,
172    pub mtime: MillisSinceEpoch,
173    pub size: u64,
174    pub materialized_conflict_data: Option<MaterializedConflictData>,
175    /* TODO: What else do we need here? Git stores a lot of fields.
176     * TODO: Could possibly handle case-insensitive file systems keeping an
177     *       Option<PathBuf> with the actual path here. */
178}
179
180impl FileState {
181    /// Check whether a file state appears clean compared to a previous file
182    /// state, ignoring materialized conflict data.
183    pub fn is_clean(&self, old_file_state: &Self) -> bool {
184        self.file_type == old_file_state.file_type
185            && self.mtime == old_file_state.mtime
186            && self.size == old_file_state.size
187    }
188
189    /// Indicates that a file exists in the tree but that it needs to be
190    /// re-stat'ed on the next snapshot.
191    fn placeholder() -> Self {
192        let executable = FileExecutableFlag::from_bool_lossy(false);
193        Self {
194            file_type: FileType::Normal { executable },
195            mtime: MillisSinceEpoch(0),
196            size: 0,
197            materialized_conflict_data: None,
198        }
199    }
200
201    fn for_file(executable: bool, size: u64, metadata: &Metadata) -> Self {
202        let executable = FileExecutableFlag::from_bool_lossy(executable);
203        Self {
204            file_type: FileType::Normal { executable },
205            mtime: mtime_from_metadata(metadata),
206            size,
207            materialized_conflict_data: None,
208        }
209    }
210
211    fn for_symlink(metadata: &Metadata) -> Self {
212        // When using fscrypt, the reported size is not the content size. So if
213        // we were to record the content size here (like we do for regular files), we
214        // would end up thinking the file has changed every time we snapshot.
215        Self {
216            file_type: FileType::Symlink,
217            mtime: mtime_from_metadata(metadata),
218            size: metadata.len(),
219            materialized_conflict_data: None,
220        }
221    }
222
223    fn for_gitsubmodule() -> Self {
224        Self {
225            file_type: FileType::GitSubmodule,
226            mtime: MillisSinceEpoch(0),
227            size: 0,
228            materialized_conflict_data: None,
229        }
230    }
231}
232
233/// Owned map of path to file states, backed by proto data.
234#[derive(Clone, Debug)]
235struct FileStatesMap {
236    data: Vec<crate::protos::local_working_copy::FileStateEntry>,
237}
238
239impl FileStatesMap {
240    fn new() -> Self {
241        Self { data: Vec::new() }
242    }
243
244    fn from_proto(
245        mut data: Vec<crate::protos::local_working_copy::FileStateEntry>,
246        is_sorted: bool,
247    ) -> Self {
248        if !is_sorted {
249            data.sort_unstable_by(|entry1, entry2| {
250                let path1 = RepoPath::from_internal_string(&entry1.path).unwrap();
251                let path2 = RepoPath::from_internal_string(&entry2.path).unwrap();
252                path1.cmp(path2)
253            });
254        }
255        debug_assert!(is_file_state_entries_proto_unique_and_sorted(&data));
256        Self { data }
257    }
258
259    /// Merges changed and deleted entries into this map. The changed entries
260    /// must be sorted by path.
261    fn merge_in(
262        &mut self,
263        changed_file_states: Vec<(RepoPathBuf, FileState)>,
264        deleted_files: &HashSet<RepoPathBuf>,
265    ) {
266        if changed_file_states.is_empty() && deleted_files.is_empty() {
267            return;
268        }
269        debug_assert!(
270            changed_file_states.is_sorted_by(|(path1, _), (path2, _)| path1 < path2),
271            "changed_file_states must be sorted and have no duplicates"
272        );
273        self.data = itertools::merge_join_by(
274            mem::take(&mut self.data),
275            changed_file_states,
276            |old_entry, (changed_path, _)| {
277                RepoPath::from_internal_string(&old_entry.path)
278                    .unwrap()
279                    .cmp(changed_path)
280            },
281        )
282        .filter_map(|diff| match diff {
283            EitherOrBoth::Both(_, (path, state)) | EitherOrBoth::Right((path, state)) => {
284                debug_assert!(!deleted_files.contains(&path));
285                Some(file_state_entry_to_proto(path, &state))
286            }
287            EitherOrBoth::Left(entry) => {
288                let present =
289                    !deleted_files.contains(RepoPath::from_internal_string(&entry.path).unwrap());
290                present.then_some(entry)
291            }
292        })
293        .collect();
294    }
295
296    fn clear(&mut self) {
297        self.data.clear();
298    }
299
300    /// Returns read-only map containing all file states.
301    fn all(&self) -> FileStates<'_> {
302        FileStates::from_sorted(&self.data)
303    }
304}
305
306/// Read-only map of path to file states, possibly filtered by path prefix.
307#[derive(Clone, Copy, Debug)]
308pub struct FileStates<'a> {
309    data: &'a [crate::protos::local_working_copy::FileStateEntry],
310}
311
312impl<'a> FileStates<'a> {
313    fn from_sorted(data: &'a [crate::protos::local_working_copy::FileStateEntry]) -> Self {
314        debug_assert!(is_file_state_entries_proto_unique_and_sorted(data));
315        Self { data }
316    }
317
318    /// Returns file states under the given directory path.
319    pub fn prefixed(&self, base: &RepoPath) -> Self {
320        let range = self.prefixed_range(base);
321        Self::from_sorted(&self.data[range])
322    }
323
324    /// Faster version of `prefixed("<dir>/<base>")`. Requires that all entries
325    /// share the same prefix `dir`.
326    fn prefixed_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Self {
327        let range = self.prefixed_range_at(dir, base);
328        Self::from_sorted(&self.data[range])
329    }
330
331    /// Returns true if this contains no entries.
332    pub fn is_empty(&self) -> bool {
333        self.data.is_empty()
334    }
335
336    /// Returns true if the given `path` exists.
337    pub fn contains_path(&self, path: &RepoPath) -> bool {
338        self.exact_position(path).is_some()
339    }
340
341    /// Returns file state for the given `path`.
342    pub fn get(&self, path: &RepoPath) -> Option<FileState> {
343        let pos = self.exact_position(path)?;
344        let (_, state) = file_state_entry_from_proto(&self.data[pos]);
345        Some(state)
346    }
347
348    /// Faster version of `get("<dir>/<name>")`. Requires that all entries share
349    /// the same prefix `dir`.
350    fn get_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<FileState> {
351        let pos = self.exact_position_at(dir, name)?;
352        let (_, state) = file_state_entry_from_proto(&self.data[pos]);
353        Some(state)
354    }
355
356    fn exact_position(&self, path: &RepoPath) -> Option<usize> {
357        self.data
358            .binary_search_by(|entry| {
359                RepoPath::from_internal_string(&entry.path)
360                    .unwrap()
361                    .cmp(path)
362            })
363            .ok()
364    }
365
366    fn exact_position_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<usize> {
367        debug_assert!(self.paths().all(|path| path.starts_with(dir)));
368        let slash_len = !dir.is_root() as usize;
369        let prefix_len = dir.as_internal_file_string().len() + slash_len;
370        self.data
371            .binary_search_by(|entry| {
372                let tail = entry.path.get(prefix_len..).unwrap_or("");
373                match tail.split_once('/') {
374                    // "<name>/*" > "<name>"
375                    Some((pre, _)) => pre.cmp(name.as_internal_str()).then(Ordering::Greater),
376                    None => tail.cmp(name.as_internal_str()),
377                }
378            })
379            .ok()
380    }
381
382    fn prefixed_range(&self, base: &RepoPath) -> Range<usize> {
383        let start = self
384            .data
385            .partition_point(|entry| RepoPath::from_internal_string(&entry.path).unwrap() < base);
386        let len = self.data[start..].partition_point(|entry| {
387            RepoPath::from_internal_string(&entry.path)
388                .unwrap()
389                .starts_with(base)
390        });
391        start..(start + len)
392    }
393
394    fn prefixed_range_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Range<usize> {
395        debug_assert!(self.paths().all(|path| path.starts_with(dir)));
396        let slash_len = !dir.is_root() as usize;
397        let prefix_len = dir.as_internal_file_string().len() + slash_len;
398        let start = self.data.partition_point(|entry| {
399            let tail = entry.path.get(prefix_len..).unwrap_or("");
400            let entry_name = tail.split_once('/').map_or(tail, |(name, _)| name);
401            entry_name < base.as_internal_str()
402        });
403        let len = self.data[start..].partition_point(|entry| {
404            let tail = entry.path.get(prefix_len..).unwrap_or("");
405            let entry_name = tail.split_once('/').map_or(tail, |(name, _)| name);
406            entry_name == base.as_internal_str()
407        });
408        start..(start + len)
409    }
410
411    /// Iterates file state entries sorted by path.
412    pub fn iter(&self) -> FileStatesIter<'a> {
413        self.data.iter().map(file_state_entry_from_proto)
414    }
415
416    /// Iterates sorted file paths.
417    pub fn paths(&self) -> impl ExactSizeIterator<Item = &'a RepoPath> + use<'a> {
418        self.data
419            .iter()
420            .map(|entry| RepoPath::from_internal_string(&entry.path).unwrap())
421    }
422}
423
424type FileStatesIter<'a> = iter::Map<
425    slice::Iter<'a, crate::protos::local_working_copy::FileStateEntry>,
426    fn(&crate::protos::local_working_copy::FileStateEntry) -> (&RepoPath, FileState),
427>;
428
429impl<'a> IntoIterator for FileStates<'a> {
430    type Item = (&'a RepoPath, FileState);
431    type IntoIter = FileStatesIter<'a>;
432
433    fn into_iter(self) -> Self::IntoIter {
434        self.iter()
435    }
436}
437
438fn file_state_from_proto(proto: &crate::protos::local_working_copy::FileState) -> FileState {
439    let file_type = match proto.file_type() {
440        crate::protos::local_working_copy::FileType::Normal => FileType::Normal {
441            executable: FileExecutableFlag::from_bool_lossy(false),
442        },
443        // On Windows, FileType::Executable can exist in files written by older
444        // versions of jj
445        crate::protos::local_working_copy::FileType::Executable => FileType::Normal {
446            executable: FileExecutableFlag::from_bool_lossy(true),
447        },
448        crate::protos::local_working_copy::FileType::Symlink => FileType::Symlink,
449        crate::protos::local_working_copy::FileType::Conflict => FileType::Normal {
450            executable: FileExecutableFlag::from_bool_lossy(false),
451        },
452        crate::protos::local_working_copy::FileType::GitSubmodule => FileType::GitSubmodule,
453    };
454    FileState {
455        file_type,
456        mtime: MillisSinceEpoch(proto.mtime_millis_since_epoch),
457        size: proto.size,
458        materialized_conflict_data: proto.materialized_conflict_data.as_ref().map(|data| {
459            MaterializedConflictData {
460                conflict_marker_len: data.conflict_marker_len,
461            }
462        }),
463    }
464}
465
466fn file_state_to_proto(file_state: &FileState) -> crate::protos::local_working_copy::FileState {
467    let mut proto = crate::protos::local_working_copy::FileState::default();
468    let file_type = match &file_state.file_type {
469        FileType::Normal { executable } => {
470            if executable.unwrap_or_else(Default::default) {
471                crate::protos::local_working_copy::FileType::Executable
472            } else {
473                crate::protos::local_working_copy::FileType::Normal
474            }
475        }
476        FileType::Symlink => crate::protos::local_working_copy::FileType::Symlink,
477        FileType::GitSubmodule => crate::protos::local_working_copy::FileType::GitSubmodule,
478    };
479    proto.file_type = file_type as i32;
480    proto.mtime_millis_since_epoch = file_state.mtime.0;
481    proto.size = file_state.size;
482    proto.materialized_conflict_data = file_state.materialized_conflict_data.map(|data| {
483        crate::protos::local_working_copy::MaterializedConflictData {
484            conflict_marker_len: data.conflict_marker_len,
485        }
486    });
487    proto
488}
489
490fn file_state_entry_from_proto(
491    proto: &crate::protos::local_working_copy::FileStateEntry,
492) -> (&RepoPath, FileState) {
493    let path = RepoPath::from_internal_string(&proto.path).unwrap();
494    (path, file_state_from_proto(proto.state.as_ref().unwrap()))
495}
496
497fn file_state_entry_to_proto(
498    path: RepoPathBuf,
499    state: &FileState,
500) -> crate::protos::local_working_copy::FileStateEntry {
501    crate::protos::local_working_copy::FileStateEntry {
502        path: path.into_internal_string(),
503        state: Some(file_state_to_proto(state)),
504    }
505}
506
507fn is_file_state_entries_proto_unique_and_sorted(
508    data: &[crate::protos::local_working_copy::FileStateEntry],
509) -> bool {
510    data.iter()
511        .map(|entry| RepoPath::from_internal_string(&entry.path).unwrap())
512        .is_sorted_by(|path1, path2| path1 < path2)
513}
514
515fn sparse_patterns_from_proto(
516    proto: Option<&crate::protos::local_working_copy::SparsePatterns>,
517) -> Vec<RepoPathBuf> {
518    let mut sparse_patterns = vec![];
519    if let Some(proto_sparse_patterns) = proto {
520        for prefix in &proto_sparse_patterns.prefixes {
521            sparse_patterns.push(RepoPathBuf::from_internal_string(prefix).unwrap());
522        }
523    } else {
524        // For compatibility with old working copies.
525        // TODO: Delete this is late 2022 or so.
526        sparse_patterns.push(RepoPathBuf::root());
527    }
528    sparse_patterns
529}
530
531/// Creates intermediate directories from the `working_copy_path` to the
532/// `repo_path` parent. Returns disk path for the `repo_path` file.
533///
534/// If an intermediate directory exists and if it is a file or symlink, this
535/// function returns `Ok(None)` to signal that the path should be skipped.
536/// The `working_copy_path` directory may be a symlink.
537///
538/// If an existing or newly-created sub directory points to ".git" or ".jj",
539/// this function returns an error.
540///
541/// Note that this does not prevent TOCTOU bugs caused by concurrent checkouts.
542/// Another process may remove the directory created by this function and put a
543/// symlink there.
544fn create_parent_dirs(
545    working_copy_path: &Path,
546    repo_path: &RepoPath,
547) -> Result<Option<PathBuf>, CheckoutError> {
548    let (parent_path, basename) = repo_path.split().expect("repo path shouldn't be root");
549    let mut dir_path = working_copy_path.to_owned();
550    for c in parent_path.components() {
551        // Ensure that the name is a normal entry of the current dir_path.
552        dir_path.push(c.to_fs_name().map_err(|err| err.with_path(repo_path))?);
553        // A directory named ".git" or ".jj" can be temporarily created. It
554        // might trick workspace path discovery, but is harmless so long as the
555        // directory is empty.
556        let new_dir_created = match fs::create_dir(&dir_path) {
557            Ok(()) => true, // New directory
558            Err(err) => match dir_path.symlink_metadata() {
559                Ok(m) if m.is_dir() => false, // Existing directory
560                Ok(_) => {
561                    return Ok(None); // Skip existing file or symlink
562                }
563                Err(_) => {
564                    return Err(CheckoutError::Other {
565                        message: format!(
566                            "Failed to create parent directories for {}",
567                            repo_path.to_fs_path_unchecked(working_copy_path).display(),
568                        ),
569                        err: err.into(),
570                    });
571                }
572            },
573        };
574        // Invalid component (e.g. "..") should have been rejected.
575        // The current dir_path should be an entry of dir_path.parent().
576        reject_reserved_existing_path(&dir_path).inspect_err(|_| {
577            if new_dir_created {
578                fs::remove_dir(&dir_path).ok();
579            }
580        })?;
581    }
582
583    let mut file_path = dir_path;
584    file_path.push(
585        basename
586            .to_fs_name()
587            .map_err(|err| err.with_path(repo_path))?,
588    );
589    Ok(Some(file_path))
590}
591
592/// Removes existing file named `disk_path` if any. Returns `Ok(true)` if the
593/// file was there and got removed, meaning that new file can be safely created.
594///
595/// If the existing file points to ".git" or ".jj", this function returns an
596/// error.
597fn remove_old_file(disk_path: &Path) -> Result<bool, CheckoutError> {
598    reject_reserved_existing_path(disk_path)?;
599    match fs::remove_file(disk_path) {
600        Ok(()) => Ok(true),
601        Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false),
602        // TODO: Use io::ErrorKind::IsADirectory if it gets stabilized
603        Err(_) if disk_path.symlink_metadata().is_ok_and(|m| m.is_dir()) => Ok(false),
604        Err(err) => Err(CheckoutError::Other {
605            message: format!("Failed to remove file {}", disk_path.display()),
606            err: err.into(),
607        }),
608    }
609}
610
611/// Checks if new file or symlink named `disk_path` can be created.
612///
613/// If the file already exists, this function return `Ok(false)` to signal
614/// that the path should be skipped.
615///
616/// If the path may point to ".git" or ".jj" entry, this function returns an
617/// error.
618///
619/// This function can fail if `disk_path.parent()` isn't a directory.
620fn can_create_new_file(disk_path: &Path) -> Result<bool, CheckoutError> {
621    // New file or symlink will be created by caller. If it were pointed to by
622    // name ".git" or ".jj", git/jj CLI could be tricked to load configuration
623    // from an attacker-controlled location. So we first test the path by
624    // creating an empty file.
625    let new_file_created = match OpenOptions::new()
626        .write(true)
627        .create_new(true) // Don't overwrite, don't follow symlink
628        .open(disk_path)
629    {
630        Ok(_) => true,
631        Err(err) if err.kind() == io::ErrorKind::AlreadyExists => false,
632        // Workaround for "Access is denied. (os error 5)" error on Windows.
633        Err(_) => match disk_path.symlink_metadata() {
634            Ok(_) => false,
635            Err(err) => {
636                return Err(CheckoutError::Other {
637                    message: format!("Failed to stat {}", disk_path.display()),
638                    err: err.into(),
639                });
640            }
641        },
642    };
643    reject_reserved_existing_path(disk_path).inspect_err(|_| {
644        if new_file_created {
645            fs::remove_file(disk_path).ok();
646        }
647    })?;
648    if new_file_created {
649        fs::remove_file(disk_path).map_err(|err| CheckoutError::Other {
650            message: format!("Failed to remove temporary file {}", disk_path.display()),
651            err: err.into(),
652        })?;
653    }
654    Ok(new_file_created)
655}
656
657const RESERVED_DIR_NAMES: &[&str] = &[".git", ".jj"];
658
659/// Suppose the `disk_path` exists, checks if the last component points to
660/// ".git" or ".jj" in the same parent directory.
661fn reject_reserved_existing_path(disk_path: &Path) -> Result<(), CheckoutError> {
662    let parent_dir_path = disk_path.parent().expect("content path shouldn't be root");
663    for name in RESERVED_DIR_NAMES {
664        let reserved_path = parent_dir_path.join(name);
665        match same_file::is_same_file(disk_path, &reserved_path) {
666            Ok(true) => {
667                return Err(CheckoutError::ReservedPathComponent {
668                    path: disk_path.to_owned(),
669                    name,
670                });
671            }
672            Ok(false) => {}
673            // If the existing disk_path pointed to the reserved path, the
674            // reserved path would exist.
675            Err(err) if err.kind() == io::ErrorKind::NotFound => {}
676            Err(err) => {
677                return Err(CheckoutError::Other {
678                    message: format!("Failed to validate path {}", disk_path.display()),
679                    err: err.into(),
680                });
681            }
682        }
683    }
684    Ok(())
685}
686
687fn mtime_from_metadata(metadata: &Metadata) -> MillisSinceEpoch {
688    let time = metadata
689        .modified()
690        .expect("File mtime not supported on this platform?");
691    let since_epoch = time
692        .duration_since(UNIX_EPOCH)
693        .expect("mtime before unix epoch");
694
695    MillisSinceEpoch(
696        i64::try_from(since_epoch.as_millis())
697            .expect("mtime billions of years into the future or past"),
698    )
699}
700
701fn file_state(metadata: &Metadata) -> Option<FileState> {
702    let metadata_file_type = metadata.file_type();
703    let file_type = if metadata_file_type.is_dir() {
704        None
705    } else if metadata_file_type.is_symlink() {
706        Some(FileType::Symlink)
707    } else if metadata_file_type.is_file() {
708        #[cfg(unix)]
709        let executable = metadata.permissions().mode() & 0o111 != 0;
710        #[cfg(windows)]
711        let executable = false;
712        let executable = FileExecutableFlag::from_bool_lossy(executable);
713        Some(FileType::Normal { executable })
714    } else {
715        None
716    };
717    file_type.map(|file_type| {
718        let mtime = mtime_from_metadata(metadata);
719        let size = metadata.len();
720        FileState {
721            file_type,
722            mtime,
723            size,
724            materialized_conflict_data: None,
725        }
726    })
727}
728
729struct FsmonitorMatcher {
730    matcher: Option<Box<dyn Matcher>>,
731    watchman_clock: Option<crate::protos::local_working_copy::WatchmanClock>,
732}
733
734/// Settings specific to the tree state of the [`LocalWorkingCopy`] backend.
735#[derive(Clone, Debug)]
736pub struct TreeStateSettings {
737    /// Conflict marker style to use when materializing files or when checking
738    /// changed files.
739    pub conflict_marker_style: ConflictMarkerStyle,
740    /// Configuring auto-converting CRLF line endings into LF when you add a
741    /// file to the backend, and vice versa when it checks out code onto your
742    /// filesystem.
743    pub eol_conversion_mode: EolConversionMode,
744    /// The fsmonitor (e.g. Watchman) to use, if any.
745    pub fsmonitor_settings: FsmonitorSettings,
746}
747
748impl TreeStateSettings {
749    /// Create [`TreeStateSettings`] from [`UserSettings`].
750    pub fn try_from_user_settings(user_settings: &UserSettings) -> Result<Self, ConfigGetError> {
751        Ok(Self {
752            conflict_marker_style: user_settings.get("ui.conflict-marker-style")?,
753            eol_conversion_mode: EolConversionMode::try_from_settings(user_settings)?,
754            fsmonitor_settings: FsmonitorSettings::from_settings(user_settings)?,
755        })
756    }
757}
758
759pub struct TreeState {
760    store: Arc<Store>,
761    working_copy_path: PathBuf,
762    state_path: PathBuf,
763    tree_id: MergedTreeId,
764    file_states: FileStatesMap,
765    // Currently only path prefixes
766    sparse_patterns: Vec<RepoPathBuf>,
767    own_mtime: MillisSinceEpoch,
768    symlink_support: bool,
769
770    /// The most recent clock value returned by Watchman. Will only be set if
771    /// the repo is configured to use the Watchman filesystem monitor and
772    /// Watchman has been queried at least once.
773    watchman_clock: Option<crate::protos::local_working_copy::WatchmanClock>,
774
775    conflict_marker_style: ConflictMarkerStyle,
776    fsmonitor_settings: FsmonitorSettings,
777    target_eol_strategy: TargetEolStrategy,
778}
779
780#[derive(Debug, Error)]
781pub enum TreeStateError {
782    #[error("Reading tree state from {path}")]
783    ReadTreeState { path: PathBuf, source: io::Error },
784    #[error("Decoding tree state from {path}")]
785    DecodeTreeState {
786        path: PathBuf,
787        source: prost::DecodeError,
788    },
789    #[error("Writing tree state to temporary file {path}")]
790    WriteTreeState { path: PathBuf, source: io::Error },
791    #[error("Persisting tree state to file {path}")]
792    PersistTreeState { path: PathBuf, source: io::Error },
793    #[error("Filesystem monitor error")]
794    Fsmonitor(#[source] Box<dyn Error + Send + Sync>),
795}
796
797impl TreeState {
798    pub fn working_copy_path(&self) -> &Path {
799        &self.working_copy_path
800    }
801
802    pub fn current_tree_id(&self) -> &MergedTreeId {
803        &self.tree_id
804    }
805
806    pub fn file_states(&self) -> FileStates<'_> {
807        self.file_states.all()
808    }
809
810    pub fn sparse_patterns(&self) -> &Vec<RepoPathBuf> {
811        &self.sparse_patterns
812    }
813
814    fn sparse_matcher(&self) -> Box<dyn Matcher> {
815        Box::new(PrefixMatcher::new(&self.sparse_patterns))
816    }
817
818    pub fn init(
819        store: Arc<Store>,
820        working_copy_path: PathBuf,
821        state_path: PathBuf,
822        tree_state_settings: &TreeStateSettings,
823    ) -> Result<Self, TreeStateError> {
824        let mut wc = Self::empty(store, working_copy_path, state_path, tree_state_settings);
825        wc.save()?;
826        Ok(wc)
827    }
828
829    fn empty(
830        store: Arc<Store>,
831        working_copy_path: PathBuf,
832        state_path: PathBuf,
833        &TreeStateSettings {
834            conflict_marker_style,
835            eol_conversion_mode,
836            ref fsmonitor_settings,
837        }: &TreeStateSettings,
838    ) -> Self {
839        let tree_id = store.empty_merged_tree_id();
840        Self {
841            store,
842            working_copy_path,
843            state_path,
844            tree_id,
845            file_states: FileStatesMap::new(),
846            sparse_patterns: vec![RepoPathBuf::root()],
847            own_mtime: MillisSinceEpoch(0),
848            symlink_support: check_symlink_support().unwrap_or(false),
849            watchman_clock: None,
850            conflict_marker_style,
851            fsmonitor_settings: fsmonitor_settings.clone(),
852            target_eol_strategy: TargetEolStrategy::new(eol_conversion_mode),
853        }
854    }
855
856    pub fn load(
857        store: Arc<Store>,
858        working_copy_path: PathBuf,
859        state_path: PathBuf,
860        tree_state_settings: &TreeStateSettings,
861    ) -> Result<Self, TreeStateError> {
862        let tree_state_path = state_path.join("tree_state");
863        let file = match File::open(&tree_state_path) {
864            Err(ref err) if err.kind() == io::ErrorKind::NotFound => {
865                return Self::init(store, working_copy_path, state_path, tree_state_settings);
866            }
867            Err(err) => {
868                return Err(TreeStateError::ReadTreeState {
869                    path: tree_state_path,
870                    source: err,
871                });
872            }
873            Ok(file) => file,
874        };
875
876        let mut wc = Self::empty(store, working_copy_path, state_path, tree_state_settings);
877        wc.read(&tree_state_path, file)?;
878        Ok(wc)
879    }
880
881    fn update_own_mtime(&mut self) {
882        if let Ok(metadata) = self.state_path.join("tree_state").symlink_metadata() {
883            self.own_mtime = mtime_from_metadata(&metadata);
884        } else {
885            self.own_mtime = MillisSinceEpoch(0);
886        }
887    }
888
889    fn read(&mut self, tree_state_path: &Path, mut file: File) -> Result<(), TreeStateError> {
890        self.update_own_mtime();
891        let mut buf = Vec::new();
892        file.read_to_end(&mut buf)
893            .map_err(|err| TreeStateError::ReadTreeState {
894                path: tree_state_path.to_owned(),
895                source: err,
896            })?;
897        let proto = crate::protos::local_working_copy::TreeState::decode(&*buf).map_err(|err| {
898            TreeStateError::DecodeTreeState {
899                path: tree_state_path.to_owned(),
900                source: err,
901            }
902        })?;
903        #[expect(deprecated)]
904        if proto.tree_ids.is_empty() {
905            self.tree_id = MergedTreeId::resolved(TreeId::new(proto.legacy_tree_id.clone()));
906        } else {
907            let tree_ids_builder: MergeBuilder<TreeId> = proto
908                .tree_ids
909                .iter()
910                .map(|id| TreeId::new(id.clone()))
911                .collect();
912            self.tree_id = MergedTreeId::Merge(tree_ids_builder.build());
913        }
914        self.file_states =
915            FileStatesMap::from_proto(proto.file_states, proto.is_file_states_sorted);
916        self.sparse_patterns = sparse_patterns_from_proto(proto.sparse_patterns.as_ref());
917        self.watchman_clock = proto.watchman_clock;
918        Ok(())
919    }
920
921    #[expect(clippy::assigning_clones)]
922    pub fn save(&mut self) -> Result<(), TreeStateError> {
923        let mut proto: crate::protos::local_working_copy::TreeState = Default::default();
924        match &self.tree_id {
925            MergedTreeId::Legacy(_) => {
926                unreachable!();
927            }
928            MergedTreeId::Merge(tree_ids) => {
929                proto.tree_ids = tree_ids.iter().map(|id| id.to_bytes()).collect();
930            }
931        }
932
933        proto.file_states = self.file_states.data.clone();
934        // `FileStatesMap` is guaranteed to be sorted.
935        proto.is_file_states_sorted = true;
936        let mut sparse_patterns = crate::protos::local_working_copy::SparsePatterns::default();
937        for path in &self.sparse_patterns {
938            sparse_patterns
939                .prefixes
940                .push(path.as_internal_file_string().to_owned());
941        }
942        proto.sparse_patterns = Some(sparse_patterns);
943        proto.watchman_clock = self.watchman_clock.clone();
944
945        let wrap_write_err = |source| TreeStateError::WriteTreeState {
946            path: self.state_path.clone(),
947            source,
948        };
949        let mut temp_file = NamedTempFile::new_in(&self.state_path).map_err(wrap_write_err)?;
950        temp_file
951            .as_file_mut()
952            .write_all(&proto.encode_to_vec())
953            .map_err(wrap_write_err)?;
954        // update own write time while we before we rename it, so we know
955        // there is no unknown data in it
956        self.update_own_mtime();
957        // TODO: Retry if persisting fails (it will on Windows if the file happened to
958        // be open for read).
959        let target_path = self.state_path.join("tree_state");
960        persist_temp_file(temp_file, &target_path).map_err(|source| {
961            TreeStateError::PersistTreeState {
962                path: target_path.clone(),
963                source,
964            }
965        })?;
966        Ok(())
967    }
968
969    fn current_tree(&self) -> BackendResult<MergedTree> {
970        self.store.get_root_tree(&self.tree_id)
971    }
972
973    fn reset_watchman(&mut self) {
974        self.watchman_clock.take();
975    }
976
977    #[cfg(feature = "watchman")]
978    #[tokio::main(flavor = "current_thread")]
979    #[instrument(skip(self))]
980    pub async fn query_watchman(
981        &self,
982        config: &WatchmanConfig,
983    ) -> Result<(watchman::Clock, Option<Vec<PathBuf>>), TreeStateError> {
984        let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path, config)
985            .await
986            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
987        let previous_clock = self.watchman_clock.clone().map(watchman::Clock::from);
988        let changed_files = fsmonitor
989            .query_changed_files(previous_clock)
990            .await
991            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
992        Ok(changed_files)
993    }
994
995    #[cfg(feature = "watchman")]
996    #[tokio::main(flavor = "current_thread")]
997    #[instrument(skip(self))]
998    pub async fn is_watchman_trigger_registered(
999        &self,
1000        config: &WatchmanConfig,
1001    ) -> Result<bool, TreeStateError> {
1002        let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path, config)
1003            .await
1004            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
1005        fsmonitor
1006            .is_trigger_registered()
1007            .await
1008            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))
1009    }
1010}
1011
1012/// Functions to snapshot local-disk files to the store.
1013impl TreeState {
1014    /// Look for changes to the working copy. If there are any changes, create
1015    /// a new tree from it.
1016    #[instrument(skip_all)]
1017    pub fn snapshot(
1018        &mut self,
1019        options: &SnapshotOptions,
1020    ) -> Result<(bool, SnapshotStats), SnapshotError> {
1021        let &SnapshotOptions {
1022            ref base_ignores,
1023            progress,
1024            start_tracking_matcher,
1025            max_new_file_size,
1026        } = options;
1027
1028        let sparse_matcher = self.sparse_matcher();
1029
1030        let fsmonitor_clock_needs_save = self.fsmonitor_settings != FsmonitorSettings::None;
1031        let mut is_dirty = fsmonitor_clock_needs_save;
1032        let FsmonitorMatcher {
1033            matcher: fsmonitor_matcher,
1034            watchman_clock,
1035        } = self.make_fsmonitor_matcher(&self.fsmonitor_settings)?;
1036        let fsmonitor_matcher = match fsmonitor_matcher.as_ref() {
1037            None => &EverythingMatcher,
1038            Some(fsmonitor_matcher) => fsmonitor_matcher.as_ref(),
1039        };
1040
1041        let matcher = IntersectionMatcher::new(sparse_matcher.as_ref(), fsmonitor_matcher);
1042        if matcher.visit(RepoPath::root()).is_nothing() {
1043            // No need to load the current tree, set up channels, etc.
1044            self.watchman_clock = watchman_clock;
1045            return Ok((is_dirty, SnapshotStats::default()));
1046        }
1047
1048        let (tree_entries_tx, tree_entries_rx) = channel();
1049        let (file_states_tx, file_states_rx) = channel();
1050        let (untracked_paths_tx, untracked_paths_rx) = channel();
1051        let (deleted_files_tx, deleted_files_rx) = channel();
1052
1053        trace_span!("traverse filesystem").in_scope(|| -> Result<(), SnapshotError> {
1054            let snapshotter = FileSnapshotter {
1055                tree_state: self,
1056                current_tree: &self.current_tree()?,
1057                matcher: &matcher,
1058                start_tracking_matcher,
1059                // Move tx sides so they'll be dropped at the end of the scope.
1060                tree_entries_tx,
1061                file_states_tx,
1062                untracked_paths_tx,
1063                deleted_files_tx,
1064                error: OnceLock::new(),
1065                progress,
1066                max_new_file_size,
1067            };
1068            let directory_to_visit = DirectoryToVisit {
1069                dir: RepoPathBuf::root(),
1070                disk_dir: self.working_copy_path.clone(),
1071                git_ignore: base_ignores.clone(),
1072                file_states: self.file_states.all(),
1073            };
1074            // Here we use scope as a queue of per-directory jobs.
1075            rayon::scope(|scope| {
1076                snapshotter.spawn_ok(scope, |scope| {
1077                    snapshotter.visit_directory(directory_to_visit, scope)
1078                });
1079            });
1080            snapshotter.into_result()
1081        })?;
1082
1083        let stats = SnapshotStats {
1084            untracked_paths: untracked_paths_rx.into_iter().collect(),
1085        };
1086        let mut tree_builder = MergedTreeBuilder::new(self.tree_id.clone());
1087        trace_span!("process tree entries").in_scope(|| {
1088            for (path, tree_values) in &tree_entries_rx {
1089                tree_builder.set_or_remove(path, tree_values);
1090            }
1091        });
1092        let deleted_files = trace_span!("process deleted tree entries").in_scope(|| {
1093            let deleted_files = HashSet::from_iter(deleted_files_rx);
1094            is_dirty |= !deleted_files.is_empty();
1095            for file in &deleted_files {
1096                tree_builder.set_or_remove(file.clone(), Merge::absent());
1097            }
1098            deleted_files
1099        });
1100        trace_span!("process file states").in_scope(|| {
1101            let changed_file_states = file_states_rx
1102                .iter()
1103                .sorted_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2))
1104                .collect_vec();
1105            is_dirty |= !changed_file_states.is_empty();
1106            self.file_states
1107                .merge_in(changed_file_states, &deleted_files);
1108        });
1109        trace_span!("write tree").in_scope(|| -> Result<(), BackendError> {
1110            let new_tree_id = tree_builder.write_tree(&self.store)?;
1111            is_dirty |= new_tree_id != self.tree_id;
1112            self.tree_id = new_tree_id;
1113            Ok(())
1114        })?;
1115        if cfg!(debug_assertions) {
1116            let tree = self.current_tree().unwrap();
1117            let tree_paths: HashSet<_> = tree
1118                .entries_matching(sparse_matcher.as_ref())
1119                .filter_map(|(path, result)| result.is_ok().then_some(path))
1120                .collect();
1121            let file_states = self.file_states.all();
1122            let state_paths: HashSet<_> = file_states.paths().map(|path| path.to_owned()).collect();
1123            assert_eq!(state_paths, tree_paths);
1124        }
1125        // Since untracked paths aren't cached in the tree state, we'll need to
1126        // rescan the working directory changes to report or track them later.
1127        // TODO: store untracked paths and update watchman_clock?
1128        if stats.untracked_paths.is_empty() || watchman_clock.is_none() {
1129            self.watchman_clock = watchman_clock;
1130        } else {
1131            tracing::info!("not updating watchman clock because there are untracked files");
1132        }
1133        Ok((is_dirty, stats))
1134    }
1135
1136    #[instrument(skip_all)]
1137    fn make_fsmonitor_matcher(
1138        &self,
1139        fsmonitor_settings: &FsmonitorSettings,
1140    ) -> Result<FsmonitorMatcher, SnapshotError> {
1141        let (watchman_clock, changed_files) = match fsmonitor_settings {
1142            FsmonitorSettings::None => (None, None),
1143            FsmonitorSettings::Test { changed_files } => (None, Some(changed_files.clone())),
1144            #[cfg(feature = "watchman")]
1145            FsmonitorSettings::Watchman(config) => match self.query_watchman(config) {
1146                Ok((watchman_clock, changed_files)) => (Some(watchman_clock.into()), changed_files),
1147                Err(err) => {
1148                    tracing::warn!(?err, "Failed to query filesystem monitor");
1149                    (None, None)
1150                }
1151            },
1152            #[cfg(not(feature = "watchman"))]
1153            FsmonitorSettings::Watchman(_) => {
1154                return Err(SnapshotError::Other {
1155                    message: "Failed to query the filesystem monitor".to_string(),
1156                    err: "Cannot query Watchman because jj was not compiled with the `watchman` \
1157                          feature (consider disabling `fsmonitor.backend`)"
1158                        .into(),
1159                });
1160            }
1161        };
1162        let matcher: Option<Box<dyn Matcher>> = match changed_files {
1163            None => None,
1164            Some(changed_files) => {
1165                let repo_paths = trace_span!("processing fsmonitor paths").in_scope(|| {
1166                    changed_files
1167                        .into_iter()
1168                        .filter_map(|path| RepoPathBuf::from_relative_path(path).ok())
1169                        .collect_vec()
1170                });
1171
1172                Some(Box::new(FilesMatcher::new(repo_paths)))
1173            }
1174        };
1175        Ok(FsmonitorMatcher {
1176            matcher,
1177            watchman_clock,
1178        })
1179    }
1180}
1181
1182struct DirectoryToVisit<'a> {
1183    dir: RepoPathBuf,
1184    disk_dir: PathBuf,
1185    git_ignore: Arc<GitIgnoreFile>,
1186    file_states: FileStates<'a>,
1187}
1188
1189#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1190enum PresentDirEntryKind {
1191    Dir,
1192    File,
1193}
1194
1195#[derive(Clone, Debug)]
1196struct PresentDirEntries {
1197    dirs: HashSet<String>,
1198    files: HashSet<String>,
1199}
1200
1201/// Helper to scan local-disk directories and files in parallel.
1202struct FileSnapshotter<'a> {
1203    tree_state: &'a TreeState,
1204    current_tree: &'a MergedTree,
1205    matcher: &'a dyn Matcher,
1206    start_tracking_matcher: &'a dyn Matcher,
1207    tree_entries_tx: Sender<(RepoPathBuf, MergedTreeValue)>,
1208    file_states_tx: Sender<(RepoPathBuf, FileState)>,
1209    untracked_paths_tx: Sender<(RepoPathBuf, UntrackedReason)>,
1210    deleted_files_tx: Sender<RepoPathBuf>,
1211    error: OnceLock<SnapshotError>,
1212    progress: Option<&'a SnapshotProgress<'a>>,
1213    max_new_file_size: u64,
1214}
1215
1216impl FileSnapshotter<'_> {
1217    fn spawn_ok<'scope, F>(&'scope self, scope: &rayon::Scope<'scope>, body: F)
1218    where
1219        F: FnOnce(&rayon::Scope<'scope>) -> Result<(), SnapshotError> + Send + 'scope,
1220    {
1221        scope.spawn(|scope| {
1222            if self.error.get().is_some() {
1223                return;
1224            }
1225            match body(scope) {
1226                Ok(()) => {}
1227                Err(err) => self.error.set(err).unwrap_or(()),
1228            };
1229        });
1230    }
1231
1232    /// Extracts the result of the snapshot.
1233    fn into_result(self) -> Result<(), SnapshotError> {
1234        match self.error.into_inner() {
1235            Some(err) => Err(err),
1236            None => Ok(()),
1237        }
1238    }
1239
1240    /// Visits the directory entries, spawns jobs to recurse into sub
1241    /// directories.
1242    fn visit_directory<'scope>(
1243        &'scope self,
1244        directory_to_visit: DirectoryToVisit<'scope>,
1245        scope: &rayon::Scope<'scope>,
1246    ) -> Result<(), SnapshotError> {
1247        let DirectoryToVisit {
1248            dir,
1249            disk_dir,
1250            git_ignore,
1251            file_states,
1252        } = directory_to_visit;
1253
1254        let git_ignore = git_ignore
1255            .chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"))?;
1256        let dir_entries: Vec<_> = disk_dir
1257            .read_dir()
1258            .and_then(|entries| entries.try_collect())
1259            .map_err(|err| SnapshotError::Other {
1260                message: format!("Failed to read directory {}", disk_dir.display()),
1261                err: err.into(),
1262            })?;
1263        let (dirs, files) = dir_entries
1264            .into_par_iter()
1265            // Don't split into too many small jobs. For a small directory,
1266            // sequential scan should be fast enough.
1267            .with_min_len(100)
1268            .filter_map(|entry| {
1269                self.process_dir_entry(&dir, &git_ignore, file_states, &entry, scope)
1270                    .transpose()
1271            })
1272            .map(|item| match item {
1273                Ok((PresentDirEntryKind::Dir, name)) => Ok(Either::Left(name)),
1274                Ok((PresentDirEntryKind::File, name)) => Ok(Either::Right(name)),
1275                Err(err) => Err(err),
1276            })
1277            .collect::<Result<_, _>>()?;
1278        let present_entries = PresentDirEntries { dirs, files };
1279        self.emit_deleted_files(&dir, file_states, &present_entries);
1280        Ok(())
1281    }
1282
1283    fn process_dir_entry<'scope>(
1284        &'scope self,
1285        dir: &RepoPath,
1286        git_ignore: &Arc<GitIgnoreFile>,
1287        file_states: FileStates<'scope>,
1288        entry: &DirEntry,
1289        scope: &rayon::Scope<'scope>,
1290    ) -> Result<Option<(PresentDirEntryKind, String)>, SnapshotError> {
1291        let file_type = entry.file_type().unwrap();
1292        let file_name = entry.file_name();
1293        let name_string = file_name
1294            .into_string()
1295            .map_err(|path| SnapshotError::InvalidUtf8Path { path })?;
1296
1297        if RESERVED_DIR_NAMES.contains(&name_string.as_str()) {
1298            return Ok(None);
1299        }
1300        let name = RepoPathComponent::new(&name_string).unwrap();
1301        let path = dir.join(name);
1302        let maybe_current_file_state = file_states.get_at(dir, name);
1303        if let Some(file_state) = &maybe_current_file_state
1304            && file_state.file_type == FileType::GitSubmodule
1305        {
1306            return Ok(None);
1307        }
1308
1309        if file_type.is_dir() {
1310            let file_states = file_states.prefixed_at(dir, name);
1311            if git_ignore.matches(&path.to_internal_dir_string()) {
1312                // If the whole directory is ignored by .gitignore, visit only
1313                // paths we're already tracking. This is because .gitignore in
1314                // ignored directory must be ignored. It's also more efficient.
1315                // start_tracking_matcher is NOT tested here because we need to
1316                // scan directory entries to report untracked paths.
1317                self.spawn_ok(scope, move |_| self.visit_tracked_files(file_states));
1318            } else if !self.matcher.visit(&path).is_nothing() {
1319                let directory_to_visit = DirectoryToVisit {
1320                    dir: path,
1321                    disk_dir: entry.path(),
1322                    git_ignore: git_ignore.clone(),
1323                    file_states,
1324                };
1325                self.spawn_ok(scope, |scope| {
1326                    self.visit_directory(directory_to_visit, scope)
1327                });
1328            }
1329            // Whether or not the directory path matches, any child file entries
1330            // shouldn't be touched within the current recursion step.
1331            Ok(Some((PresentDirEntryKind::Dir, name_string)))
1332        } else if self.matcher.matches(&path) {
1333            if let Some(progress) = self.progress {
1334                progress(&path);
1335            }
1336            if maybe_current_file_state.is_none()
1337                && git_ignore.matches(path.as_internal_file_string())
1338            {
1339                // If it wasn't already tracked and it matches
1340                // the ignored paths, then ignore it.
1341                Ok(None)
1342            } else if maybe_current_file_state.is_none()
1343                && !self.start_tracking_matcher.matches(&path)
1344            {
1345                // Leave the file untracked
1346                self.untracked_paths_tx
1347                    .send((path, UntrackedReason::FileNotAutoTracked))
1348                    .ok();
1349                Ok(None)
1350            } else {
1351                let metadata = entry.metadata().map_err(|err| SnapshotError::Other {
1352                    message: format!("Failed to stat file {}", entry.path().display()),
1353                    err: err.into(),
1354                })?;
1355                if maybe_current_file_state.is_none() && metadata.len() > self.max_new_file_size {
1356                    // Leave the large file untracked
1357                    let reason = UntrackedReason::FileTooLarge {
1358                        size: metadata.len(),
1359                        max_size: self.max_new_file_size,
1360                    };
1361                    self.untracked_paths_tx.send((path, reason)).ok();
1362                    Ok(None)
1363                } else if let Some(new_file_state) = file_state(&metadata) {
1364                    self.process_present_file(
1365                        path,
1366                        &entry.path(),
1367                        maybe_current_file_state.as_ref(),
1368                        new_file_state,
1369                    )?;
1370                    Ok(Some((PresentDirEntryKind::File, name_string)))
1371                } else {
1372                    // Special file is not considered present
1373                    Ok(None)
1374                }
1375            }
1376        } else {
1377            Ok(None)
1378        }
1379    }
1380
1381    /// Visits only paths we're already tracking.
1382    fn visit_tracked_files(&self, file_states: FileStates<'_>) -> Result<(), SnapshotError> {
1383        for (tracked_path, current_file_state) in file_states {
1384            if current_file_state.file_type == FileType::GitSubmodule {
1385                continue;
1386            }
1387            if !self.matcher.matches(tracked_path) {
1388                continue;
1389            }
1390            let disk_path = tracked_path.to_fs_path(&self.tree_state.working_copy_path)?;
1391            let metadata = match disk_path.symlink_metadata() {
1392                Ok(metadata) => Some(metadata),
1393                Err(err) if err.kind() == io::ErrorKind::NotFound => None,
1394                Err(err) => {
1395                    return Err(SnapshotError::Other {
1396                        message: format!("Failed to stat file {}", disk_path.display()),
1397                        err: err.into(),
1398                    });
1399                }
1400            };
1401            if let Some(new_file_state) = metadata.as_ref().and_then(file_state) {
1402                self.process_present_file(
1403                    tracked_path.to_owned(),
1404                    &disk_path,
1405                    Some(&current_file_state),
1406                    new_file_state,
1407                )?;
1408            } else {
1409                self.deleted_files_tx.send(tracked_path.to_owned()).ok();
1410            }
1411        }
1412        Ok(())
1413    }
1414
1415    fn process_present_file(
1416        &self,
1417        path: RepoPathBuf,
1418        disk_path: &Path,
1419        maybe_current_file_state: Option<&FileState>,
1420        mut new_file_state: FileState,
1421    ) -> Result<(), SnapshotError> {
1422        let update = self.get_updated_tree_value(
1423            &path,
1424            disk_path,
1425            maybe_current_file_state,
1426            &new_file_state,
1427        )?;
1428        // Preserve materialized conflict data for normal, non-resolved files
1429        if matches!(new_file_state.file_type, FileType::Normal { .. })
1430            && !update.as_ref().is_some_and(|update| update.is_resolved())
1431        {
1432            new_file_state.materialized_conflict_data =
1433                maybe_current_file_state.and_then(|state| state.materialized_conflict_data);
1434        }
1435        if let Some(tree_value) = update {
1436            self.tree_entries_tx.send((path.clone(), tree_value)).ok();
1437        }
1438        if Some(&new_file_state) != maybe_current_file_state {
1439            self.file_states_tx.send((path, new_file_state)).ok();
1440        }
1441        Ok(())
1442    }
1443
1444    /// Emits file paths that don't exist in the `present_entries`.
1445    fn emit_deleted_files(
1446        &self,
1447        dir: &RepoPath,
1448        file_states: FileStates<'_>,
1449        present_entries: &PresentDirEntries,
1450    ) {
1451        let file_state_chunks = file_states.iter().chunk_by(|(path, _state)| {
1452            // Extract <name> from <dir>, <dir>/<name>, or <dir>/<name>/**.
1453            // (file_states may contain <dir> file on file->dir transition.)
1454            debug_assert!(path.starts_with(dir));
1455            let slash = !dir.is_root() as usize;
1456            let len = dir.as_internal_file_string().len() + slash;
1457            let tail = path.as_internal_file_string().get(len..).unwrap_or("");
1458            match tail.split_once('/') {
1459                Some((name, _)) => (PresentDirEntryKind::Dir, name),
1460                None => (PresentDirEntryKind::File, tail),
1461            }
1462        });
1463        file_state_chunks
1464            .into_iter()
1465            .filter(|&((kind, name), _)| match kind {
1466                PresentDirEntryKind::Dir => !present_entries.dirs.contains(name),
1467                PresentDirEntryKind::File => !present_entries.files.contains(name),
1468            })
1469            .flat_map(|(_, chunk)| chunk)
1470            // Whether or not the entry exists, submodule should be ignored
1471            .filter(|(_, state)| state.file_type != FileType::GitSubmodule)
1472            .filter(|(path, _)| self.matcher.matches(path))
1473            .try_for_each(|(path, _)| self.deleted_files_tx.send(path.to_owned()))
1474            .ok();
1475    }
1476
1477    fn get_updated_tree_value(
1478        &self,
1479        repo_path: &RepoPath,
1480        disk_path: &Path,
1481        maybe_current_file_state: Option<&FileState>,
1482        new_file_state: &FileState,
1483    ) -> Result<Option<MergedTreeValue>, SnapshotError> {
1484        let clean = match maybe_current_file_state {
1485            None => {
1486                // untracked
1487                false
1488            }
1489            Some(current_file_state) => {
1490                // If the file's mtime was set at the same time as this state file's own mtime,
1491                // then we don't know if the file was modified before or after this state file.
1492                new_file_state.is_clean(current_file_state)
1493                    && current_file_state.mtime < self.tree_state.own_mtime
1494            }
1495        };
1496        if clean {
1497            Ok(None)
1498        } else {
1499            let current_tree_values = self.current_tree.path_value(repo_path)?;
1500            let new_file_type = if !self.tree_state.symlink_support {
1501                let mut new_file_type = new_file_state.file_type.clone();
1502                if matches!(new_file_type, FileType::Normal { .. })
1503                    && matches!(current_tree_values.as_normal(), Some(TreeValue::Symlink(_)))
1504                {
1505                    new_file_type = FileType::Symlink;
1506                }
1507                new_file_type
1508            } else {
1509                new_file_state.file_type.clone()
1510            };
1511            let new_tree_values = match new_file_type {
1512                FileType::Normal { executable } => self
1513                    .write_path_to_store(
1514                        repo_path,
1515                        disk_path,
1516                        &current_tree_values,
1517                        executable,
1518                        maybe_current_file_state.and_then(|state| state.materialized_conflict_data),
1519                    )
1520                    .block_on()?,
1521                FileType::Symlink => {
1522                    let id = self
1523                        .write_symlink_to_store(repo_path, disk_path)
1524                        .block_on()?;
1525                    Merge::normal(TreeValue::Symlink(id))
1526                }
1527                FileType::GitSubmodule => panic!("git submodule cannot be written to store"),
1528            };
1529            if new_tree_values != current_tree_values {
1530                Ok(Some(new_tree_values))
1531            } else {
1532                Ok(None)
1533            }
1534        }
1535    }
1536
1537    fn store(&self) -> &Store {
1538        &self.tree_state.store
1539    }
1540
1541    async fn write_path_to_store(
1542        &self,
1543        repo_path: &RepoPath,
1544        disk_path: &Path,
1545        current_tree_values: &MergedTreeValue,
1546        executable: FileExecutableFlag,
1547        materialized_conflict_data: Option<MaterializedConflictData>,
1548    ) -> Result<MergedTreeValue, SnapshotError> {
1549        if let Some(current_tree_value) = current_tree_values.as_resolved() {
1550            let id = self.write_file_to_store(repo_path, disk_path).await?;
1551            // On Windows, we preserve the executable bit from the current tree.
1552            let executable = executable.unwrap_or_else(|| {
1553                if let Some(TreeValue::File {
1554                    id: _,
1555                    executable,
1556                    copy_id: _,
1557                }) = current_tree_value
1558                {
1559                    *executable
1560                } else {
1561                    false
1562                }
1563            });
1564            // Preserve the copy id from the current tree
1565            let copy_id = {
1566                if let Some(TreeValue::File {
1567                    id: _,
1568                    executable: _,
1569                    copy_id,
1570                }) = current_tree_value
1571                {
1572                    copy_id.clone()
1573                } else {
1574                    CopyId::placeholder()
1575                }
1576            };
1577            Ok(Merge::normal(TreeValue::File {
1578                id,
1579                executable,
1580                copy_id,
1581            }))
1582        } else if let Some(old_file_ids) = current_tree_values.to_file_merge() {
1583            // Safe to unwrap because the copy id exists exactly on the file variant
1584            let copy_id_merge = current_tree_values.to_copy_id_merge().unwrap();
1585            let copy_id = copy_id_merge
1586                .resolve_trivial(SameChange::Accept)
1587                .cloned()
1588                .flatten()
1589                .unwrap_or_else(CopyId::placeholder);
1590            let mut contents = vec![];
1591            let file = File::open(disk_path).map_err(|err| SnapshotError::Other {
1592                message: format!("Failed to open file {}", disk_path.display()),
1593                err: err.into(),
1594            })?;
1595            self.tree_state
1596                .target_eol_strategy
1597                .convert_eol_for_snapshot(BlockingAsyncReader::new(file))
1598                .await
1599                .map_err(|err| SnapshotError::Other {
1600                    message: "Failed to convert the EOL".to_string(),
1601                    err: err.into(),
1602                })?
1603                .read_to_end(&mut contents)
1604                .await
1605                .map_err(|err| SnapshotError::Other {
1606                    message: "Failed to read the EOL converted contents".to_string(),
1607                    err: err.into(),
1608                })?;
1609            // If the file contained a conflict before and is a normal file on
1610            // disk, we try to parse any conflict markers in the file into a
1611            // conflict.
1612            let new_file_ids = conflicts::update_from_content(
1613                &old_file_ids,
1614                self.store(),
1615                repo_path,
1616                &contents,
1617                materialized_conflict_data.map_or(MIN_CONFLICT_MARKER_LEN, |data| {
1618                    data.conflict_marker_len as usize
1619                }),
1620            )
1621            .await?;
1622            match new_file_ids.into_resolved() {
1623                Ok(file_id) => {
1624                    // On Windows, we preserve the executable bit from the merged trees.
1625                    let executable = executable.unwrap_or_else(|| {
1626                        if let Some(merge) = current_tree_values.to_executable_merge() {
1627                            conflicts::resolve_file_executable(&merge).unwrap_or(false)
1628                        } else {
1629                            false
1630                        }
1631                    });
1632                    Ok(Merge::normal(TreeValue::File {
1633                        id: file_id.unwrap(),
1634                        executable,
1635                        copy_id,
1636                    }))
1637                }
1638                Err(new_file_ids) => {
1639                    if new_file_ids != old_file_ids {
1640                        Ok(current_tree_values.with_new_file_ids(&new_file_ids))
1641                    } else {
1642                        Ok(current_tree_values.clone())
1643                    }
1644                }
1645            }
1646        } else {
1647            Ok(current_tree_values.clone())
1648        }
1649    }
1650
1651    async fn write_file_to_store(
1652        &self,
1653        path: &RepoPath,
1654        disk_path: &Path,
1655    ) -> Result<FileId, SnapshotError> {
1656        let file = File::open(disk_path).map_err(|err| SnapshotError::Other {
1657            message: format!("Failed to open file {}", disk_path.display()),
1658            err: err.into(),
1659        })?;
1660        let mut contents = self
1661            .tree_state
1662            .target_eol_strategy
1663            .convert_eol_for_snapshot(BlockingAsyncReader::new(file))
1664            .await
1665            .map_err(|err| SnapshotError::Other {
1666                message: "Failed to convert the EOL".to_string(),
1667                err: err.into(),
1668            })?;
1669        Ok(self.store().write_file(path, &mut contents).await?)
1670    }
1671
1672    async fn write_symlink_to_store(
1673        &self,
1674        path: &RepoPath,
1675        disk_path: &Path,
1676    ) -> Result<SymlinkId, SnapshotError> {
1677        if self.tree_state.symlink_support {
1678            let target = disk_path.read_link().map_err(|err| SnapshotError::Other {
1679                message: format!("Failed to read symlink {}", disk_path.display()),
1680                err: err.into(),
1681            })?;
1682            let str_target =
1683                target
1684                    .to_str()
1685                    .ok_or_else(|| SnapshotError::InvalidUtf8SymlinkTarget {
1686                        path: disk_path.to_path_buf(),
1687                    })?;
1688            Ok(self.store().write_symlink(path, str_target).await?)
1689        } else {
1690            let target = fs::read(disk_path).map_err(|err| SnapshotError::Other {
1691                message: format!("Failed to read file {}", disk_path.display()),
1692                err: err.into(),
1693            })?;
1694            let string_target =
1695                String::from_utf8(target).map_err(|_| SnapshotError::InvalidUtf8SymlinkTarget {
1696                    path: disk_path.to_path_buf(),
1697                })?;
1698            Ok(self.store().write_symlink(path, &string_target).await?)
1699        }
1700    }
1701}
1702
1703/// Functions to update local-disk files from the store.
1704impl TreeState {
1705    async fn write_file(
1706        &self,
1707        disk_path: &Path,
1708        contents: impl AsyncRead + Send + Unpin,
1709        executable: bool,
1710        apply_eol_conversion: bool,
1711    ) -> Result<FileState, CheckoutError> {
1712        let mut file = File::options()
1713            .write(true)
1714            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
1715            .open(disk_path)
1716            .map_err(|err| CheckoutError::Other {
1717                message: format!("Failed to open file {} for writing", disk_path.display()),
1718                err: err.into(),
1719            })?;
1720        let contents = if apply_eol_conversion {
1721            self.target_eol_strategy
1722                .convert_eol_for_update(contents)
1723                .await
1724                .map_err(|err| CheckoutError::Other {
1725                    message: "Failed to convert the EOL for the content".to_string(),
1726                    err: err.into(),
1727                })?
1728        } else {
1729            Box::new(contents)
1730        };
1731        let size = copy_async_to_sync(contents, &mut file)
1732            .await
1733            .map_err(|err| CheckoutError::Other {
1734                message: format!(
1735                    "Failed to write the content to the file {}",
1736                    disk_path.display()
1737                ),
1738                err: err.into(),
1739            })?;
1740        self.set_executable(disk_path, executable)?;
1741        // Read the file state from the file descriptor. That way, know that the file
1742        // exists and is of the expected type, and the stat information is most likely
1743        // accurate, except for other processes modifying the file concurrently (The
1744        // mtime is set at write time and won't change when we close the file.)
1745        let metadata = file
1746            .metadata()
1747            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1748        Ok(FileState::for_file(executable, size as u64, &metadata))
1749    }
1750
1751    fn write_symlink(&self, disk_path: &Path, target: String) -> Result<FileState, CheckoutError> {
1752        let target = PathBuf::from(&target);
1753        try_symlink(&target, disk_path).map_err(|err| CheckoutError::Other {
1754            message: format!(
1755                "Failed to create symlink from {} to {}",
1756                disk_path.display(),
1757                target.display()
1758            ),
1759            err: err.into(),
1760        })?;
1761        let metadata = disk_path
1762            .symlink_metadata()
1763            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1764        Ok(FileState::for_symlink(&metadata))
1765    }
1766
1767    async fn write_conflict(
1768        &self,
1769        disk_path: &Path,
1770        contents: &[u8],
1771        executable: bool,
1772    ) -> Result<FileState, CheckoutError> {
1773        let contents = self
1774            .target_eol_strategy
1775            .convert_eol_for_update(contents)
1776            .await
1777            .map_err(|err| CheckoutError::Other {
1778                message: "Failed to convert the EOL when writing a merge conflict".to_string(),
1779                err: err.into(),
1780            })?;
1781        let mut file = OpenOptions::new()
1782            .write(true)
1783            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
1784            .open(disk_path)
1785            .map_err(|err| CheckoutError::Other {
1786                message: format!("Failed to open file {} for writing", disk_path.display()),
1787                err: err.into(),
1788            })?;
1789        let size = copy_async_to_sync(contents, &mut file)
1790            .await
1791            .map_err(|err| CheckoutError::Other {
1792                message: format!("Failed to write conflict to file {}", disk_path.display()),
1793                err: err.into(),
1794            })? as u64;
1795        self.set_executable(disk_path, executable)?;
1796        let metadata = file
1797            .metadata()
1798            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1799        Ok(FileState::for_file(executable, size, &metadata))
1800    }
1801
1802    #[cfg_attr(windows, expect(unused_variables))]
1803    fn set_executable(&self, disk_path: &Path, executable: bool) -> Result<(), CheckoutError> {
1804        #[cfg(unix)]
1805        {
1806            let mode = if executable { 0o755 } else { 0o644 };
1807            fs::set_permissions(disk_path, fs::Permissions::from_mode(mode))
1808                .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1809        }
1810        Ok(())
1811    }
1812
1813    pub fn check_out(&mut self, new_tree: &MergedTree) -> Result<CheckoutStats, CheckoutError> {
1814        let old_tree = self.current_tree().map_err(|err| match err {
1815            err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound {
1816                source: Box::new(err),
1817            },
1818            other => CheckoutError::InternalBackendError(other),
1819        })?;
1820        let stats = self
1821            .update(&old_tree, new_tree, self.sparse_matcher().as_ref())
1822            .block_on()?;
1823        self.tree_id = new_tree.id();
1824        Ok(stats)
1825    }
1826
1827    pub fn set_sparse_patterns(
1828        &mut self,
1829        sparse_patterns: Vec<RepoPathBuf>,
1830    ) -> Result<CheckoutStats, CheckoutError> {
1831        let tree = self.current_tree().map_err(|err| match err {
1832            err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound {
1833                source: Box::new(err),
1834            },
1835            other => CheckoutError::InternalBackendError(other),
1836        })?;
1837        let old_matcher = PrefixMatcher::new(&self.sparse_patterns);
1838        let new_matcher = PrefixMatcher::new(&sparse_patterns);
1839        let added_matcher = DifferenceMatcher::new(&new_matcher, &old_matcher);
1840        let removed_matcher = DifferenceMatcher::new(&old_matcher, &new_matcher);
1841        let empty_tree = MergedTree::resolved(Tree::empty(self.store.clone(), RepoPathBuf::root()));
1842        let added_stats = self.update(&empty_tree, &tree, &added_matcher).block_on()?;
1843        let removed_stats = self
1844            .update(&tree, &empty_tree, &removed_matcher)
1845            .block_on()?;
1846        self.sparse_patterns = sparse_patterns;
1847        assert_eq!(added_stats.updated_files, 0);
1848        assert_eq!(added_stats.removed_files, 0);
1849        assert_eq!(removed_stats.updated_files, 0);
1850        assert_eq!(removed_stats.added_files, 0);
1851        assert_eq!(removed_stats.skipped_files, 0);
1852        Ok(CheckoutStats {
1853            updated_files: 0,
1854            added_files: added_stats.added_files,
1855            removed_files: removed_stats.removed_files,
1856            skipped_files: added_stats.skipped_files,
1857        })
1858    }
1859
1860    async fn update(
1861        &mut self,
1862        old_tree: &MergedTree,
1863        new_tree: &MergedTree,
1864        matcher: &dyn Matcher,
1865    ) -> Result<CheckoutStats, CheckoutError> {
1866        // TODO: maybe it's better not include the skipped counts in the "intended"
1867        // counts
1868        let mut stats = CheckoutStats {
1869            updated_files: 0,
1870            added_files: 0,
1871            removed_files: 0,
1872            skipped_files: 0,
1873        };
1874        let mut changed_file_states = Vec::new();
1875        let mut deleted_files = HashSet::new();
1876        let mut diff_stream = old_tree
1877            .diff_stream_for_file_system(new_tree, matcher)
1878            .map(async |TreeDiffEntry { path, values }| match values {
1879                Ok(diff) => {
1880                    let result = materialize_tree_value(&self.store, &path, diff.after).await;
1881                    (path, result.map(|value| (diff.before, value)))
1882                }
1883                Err(err) => (path, Err(err)),
1884            })
1885            .buffered(self.store.concurrency().max(1));
1886        while let Some((path, data)) = diff_stream.next().await {
1887            let (before, after) = data?;
1888            if after.is_absent() {
1889                stats.removed_files += 1;
1890            } else if before.is_absent() {
1891                stats.added_files += 1;
1892            } else {
1893                stats.updated_files += 1;
1894            }
1895
1896            // Existing Git submodule can be a non-empty directory on disk. We
1897            // shouldn't attempt to manage it as a tracked path.
1898            //
1899            // TODO: It might be better to add general support for paths not
1900            // tracked by jj than processing submodules specially. For example,
1901            // paths excluded by .gitignore can be marked as such so that
1902            // newly-"unignored" paths won't be snapshotted automatically.
1903            if matches!(before.as_normal(), Some(TreeValue::GitSubmodule(_)))
1904                && matches!(after, MaterializedTreeValue::GitSubmodule(_))
1905            {
1906                eprintln!("ignoring git submodule at {path:?}");
1907                // Not updating the file state as if there were no diffs. Leave
1908                // the state type as FileType::GitSubmodule if it was before.
1909                continue;
1910            }
1911
1912            // Create parent directories no matter if after.is_present(). This
1913            // ensures that the path never traverses symlinks.
1914            let Some(disk_path) = create_parent_dirs(&self.working_copy_path, &path)? else {
1915                changed_file_states.push((path, FileState::placeholder()));
1916                stats.skipped_files += 1;
1917                continue;
1918            };
1919            // If the path was present, check reserved path first and delete it.
1920            let present_file_deleted = before.is_present() && remove_old_file(&disk_path)?;
1921            // If not, create temporary file to test the path validity.
1922            if !present_file_deleted && !can_create_new_file(&disk_path)? {
1923                changed_file_states.push((path, FileState::placeholder()));
1924                stats.skipped_files += 1;
1925                continue;
1926            }
1927
1928            // TODO: Check that the file has not changed before overwriting/removing it.
1929            let file_state = match after {
1930                MaterializedTreeValue::Absent | MaterializedTreeValue::AccessDenied(_) => {
1931                    let mut parent_dir = disk_path.parent().unwrap();
1932                    loop {
1933                        if fs::remove_dir(parent_dir).is_err() {
1934                            break;
1935                        }
1936                        parent_dir = parent_dir.parent().unwrap();
1937                    }
1938                    deleted_files.insert(path);
1939                    continue;
1940                }
1941                MaterializedTreeValue::File(file) => {
1942                    self.write_file(&disk_path, file.reader, file.executable, true)
1943                        .await?
1944                }
1945                MaterializedTreeValue::Symlink { id: _, target } => {
1946                    if self.symlink_support {
1947                        self.write_symlink(&disk_path, target)?
1948                    } else {
1949                        self.write_file(&disk_path, target.as_bytes(), false, false)
1950                            .await?
1951                    }
1952                }
1953                MaterializedTreeValue::GitSubmodule(_) => {
1954                    eprintln!("ignoring git submodule at {path:?}");
1955                    FileState::for_gitsubmodule()
1956                }
1957                MaterializedTreeValue::Tree(_) => {
1958                    panic!("unexpected tree entry in diff at {path:?}");
1959                }
1960                MaterializedTreeValue::FileConflict(file) => {
1961                    let conflict_marker_len =
1962                        choose_materialized_conflict_marker_len(&file.contents);
1963                    let options = ConflictMaterializeOptions {
1964                        marker_style: self.conflict_marker_style,
1965                        marker_len: Some(conflict_marker_len),
1966                        merge: self.store.merge_options().clone(),
1967                    };
1968                    let contents = materialize_merge_result_to_bytes(&file.contents, &options);
1969                    let mut file_state = self
1970                        .write_conflict(&disk_path, &contents, file.executable.unwrap_or(false))
1971                        .await?;
1972                    file_state.materialized_conflict_data = Some(MaterializedConflictData {
1973                        conflict_marker_len: conflict_marker_len.try_into().unwrap_or(u32::MAX),
1974                    });
1975                    file_state
1976                }
1977                MaterializedTreeValue::OtherConflict { id } => {
1978                    // Unless all terms are regular files, we can't do much
1979                    // better than trying to describe the merge.
1980                    let contents = id.describe();
1981                    let executable = false;
1982                    self.write_conflict(&disk_path, contents.as_bytes(), executable)
1983                        .await?
1984                }
1985            };
1986            changed_file_states.push((path, file_state));
1987        }
1988        self.file_states
1989            .merge_in(changed_file_states, &deleted_files);
1990        Ok(stats)
1991    }
1992
1993    pub async fn reset(&mut self, new_tree: &MergedTree) -> Result<(), ResetError> {
1994        let old_tree = self.current_tree().map_err(|err| match err {
1995            err @ BackendError::ObjectNotFound { .. } => ResetError::SourceNotFound {
1996                source: Box::new(err),
1997            },
1998            other => ResetError::InternalBackendError(other),
1999        })?;
2000
2001        let matcher = self.sparse_matcher();
2002        let mut changed_file_states = Vec::new();
2003        let mut deleted_files = HashSet::new();
2004        let mut diff_stream = old_tree.diff_stream_for_file_system(new_tree, matcher.as_ref());
2005        while let Some(TreeDiffEntry { path, values }) = diff_stream.next().await {
2006            let after = values?.after;
2007            if after.is_absent() {
2008                deleted_files.insert(path);
2009            } else {
2010                let file_type = match after.into_resolved() {
2011                    Ok(value) => match value.unwrap() {
2012                        TreeValue::File {
2013                            id: _,
2014                            executable,
2015                            copy_id: _,
2016                        } => FileType::Normal {
2017                            executable: FileExecutableFlag::from_bool_lossy(executable),
2018                        },
2019                        TreeValue::Symlink(_id) => FileType::Symlink,
2020                        TreeValue::GitSubmodule(_id) => {
2021                            eprintln!("ignoring git submodule at {path:?}");
2022                            FileType::GitSubmodule
2023                        }
2024                        TreeValue::Tree(_id) => {
2025                            panic!("unexpected tree entry in diff at {path:?}");
2026                        }
2027                    },
2028                    Err(_values) => {
2029                        // TODO: Try to set the executable bit based on the conflict
2030                        FileType::Normal {
2031                            executable: FileExecutableFlag::from_bool_lossy(false),
2032                        }
2033                    }
2034                };
2035                let file_state = FileState {
2036                    file_type,
2037                    mtime: MillisSinceEpoch(0),
2038                    size: 0,
2039                    materialized_conflict_data: None,
2040                };
2041                changed_file_states.push((path, file_state));
2042            }
2043        }
2044        self.file_states
2045            .merge_in(changed_file_states, &deleted_files);
2046        self.tree_id = new_tree.id();
2047        Ok(())
2048    }
2049
2050    pub async fn recover(&mut self, new_tree: &MergedTree) -> Result<(), ResetError> {
2051        self.file_states.clear();
2052        self.tree_id = self.store.empty_merged_tree_id();
2053        self.reset(new_tree).await
2054    }
2055}
2056
2057fn checkout_error_for_stat_error(err: io::Error, path: &Path) -> CheckoutError {
2058    CheckoutError::Other {
2059        message: format!("Failed to stat file {}", path.display()),
2060        err: err.into(),
2061    }
2062}
2063
2064/// Working copy state stored in "checkout" file.
2065#[derive(Clone, Debug)]
2066struct CheckoutState {
2067    operation_id: OperationId,
2068    workspace_name: WorkspaceNameBuf,
2069}
2070
2071impl CheckoutState {
2072    fn load(state_path: &Path) -> Result<Self, WorkingCopyStateError> {
2073        let wrap_err = |err| WorkingCopyStateError {
2074            message: "Failed to read checkout state".to_owned(),
2075            err,
2076        };
2077        let buf = fs::read(state_path.join("checkout")).map_err(|err| wrap_err(err.into()))?;
2078        let proto = crate::protos::local_working_copy::Checkout::decode(&*buf)
2079            .map_err(|err| wrap_err(err.into()))?;
2080        Ok(Self {
2081            operation_id: OperationId::new(proto.operation_id),
2082            workspace_name: if proto.workspace_name.is_empty() {
2083                // For compatibility with old working copies.
2084                // TODO: Delete in mid 2022 or so
2085                WorkspaceName::DEFAULT.to_owned()
2086            } else {
2087                proto.workspace_name.into()
2088            },
2089        })
2090    }
2091
2092    #[instrument(skip_all)]
2093    fn save(&self, state_path: &Path) -> Result<(), WorkingCopyStateError> {
2094        let wrap_err = |err| WorkingCopyStateError {
2095            message: "Failed to write checkout state".to_owned(),
2096            err,
2097        };
2098        let proto = crate::protos::local_working_copy::Checkout {
2099            operation_id: self.operation_id.to_bytes(),
2100            workspace_name: (*self.workspace_name).into(),
2101        };
2102        let mut temp_file =
2103            NamedTempFile::new_in(state_path).map_err(|err| wrap_err(err.into()))?;
2104        temp_file
2105            .as_file_mut()
2106            .write_all(&proto.encode_to_vec())
2107            .map_err(|err| wrap_err(err.into()))?;
2108        // TODO: Retry if persisting fails (it will on Windows if the file happened to
2109        // be open for read).
2110        persist_temp_file(temp_file, state_path.join("checkout"))
2111            .map_err(|err| wrap_err(err.into()))?;
2112        Ok(())
2113    }
2114}
2115
2116pub struct LocalWorkingCopy {
2117    store: Arc<Store>,
2118    working_copy_path: PathBuf,
2119    state_path: PathBuf,
2120    checkout_state: CheckoutState,
2121    tree_state: OnceCell<TreeState>,
2122    tree_state_settings: TreeStateSettings,
2123}
2124
2125impl WorkingCopy for LocalWorkingCopy {
2126    fn name(&self) -> &str {
2127        Self::name()
2128    }
2129
2130    fn workspace_name(&self) -> &WorkspaceName {
2131        &self.checkout_state.workspace_name
2132    }
2133
2134    fn operation_id(&self) -> &OperationId {
2135        &self.checkout_state.operation_id
2136    }
2137
2138    fn tree_id(&self) -> Result<&MergedTreeId, WorkingCopyStateError> {
2139        Ok(self.tree_state()?.current_tree_id())
2140    }
2141
2142    fn sparse_patterns(&self) -> Result<&[RepoPathBuf], WorkingCopyStateError> {
2143        Ok(self.tree_state()?.sparse_patterns())
2144    }
2145
2146    fn start_mutation(&self) -> Result<Box<dyn LockedWorkingCopy>, WorkingCopyStateError> {
2147        let lock_path = self.state_path.join("working_copy.lock");
2148        let lock = FileLock::lock(lock_path).map_err(|err| WorkingCopyStateError {
2149            message: "Failed to lock working copy".to_owned(),
2150            err: err.into(),
2151        })?;
2152
2153        let wc = Self {
2154            store: self.store.clone(),
2155            working_copy_path: self.working_copy_path.clone(),
2156            state_path: self.state_path.clone(),
2157            // Re-read the state after taking the lock
2158            checkout_state: CheckoutState::load(&self.state_path)?,
2159            // Empty so we re-read the state after taking the lock
2160            // TODO: It's expensive to reload the whole tree. We should copy it from `self` if it
2161            // hasn't changed.
2162            tree_state: OnceCell::new(),
2163            tree_state_settings: self.tree_state_settings.clone(),
2164        };
2165        let old_operation_id = wc.operation_id().clone();
2166        let old_tree_id = wc.tree_id()?.clone();
2167        Ok(Box::new(LockedLocalWorkingCopy {
2168            wc,
2169            old_operation_id,
2170            old_tree_id,
2171            tree_state_dirty: false,
2172            new_workspace_name: None,
2173            _lock: lock,
2174        }))
2175    }
2176}
2177
2178impl LocalWorkingCopy {
2179    pub fn name() -> &'static str {
2180        "local"
2181    }
2182
2183    /// Initializes a new working copy at `working_copy_path`. The working
2184    /// copy's state will be stored in the `state_path` directory. The working
2185    /// copy will have the empty tree checked out.
2186    pub fn init(
2187        store: Arc<Store>,
2188        working_copy_path: PathBuf,
2189        state_path: PathBuf,
2190        operation_id: OperationId,
2191        workspace_name: WorkspaceNameBuf,
2192        user_settings: &UserSettings,
2193    ) -> Result<Self, WorkingCopyStateError> {
2194        let checkout_state = CheckoutState {
2195            operation_id,
2196            workspace_name,
2197        };
2198        checkout_state.save(&state_path)?;
2199        let tree_state_settings = TreeStateSettings::try_from_user_settings(user_settings)
2200            .map_err(|err| WorkingCopyStateError {
2201                message: "Failed to read the tree state settings".to_string(),
2202                err: err.into(),
2203            })?;
2204        let tree_state = TreeState::init(
2205            store.clone(),
2206            working_copy_path.clone(),
2207            state_path.clone(),
2208            &tree_state_settings,
2209        )
2210        .map_err(|err| WorkingCopyStateError {
2211            message: "Failed to initialize working copy state".to_string(),
2212            err: err.into(),
2213        })?;
2214        Ok(Self {
2215            store,
2216            working_copy_path,
2217            state_path,
2218            checkout_state,
2219            tree_state: OnceCell::with_value(tree_state),
2220            tree_state_settings,
2221        })
2222    }
2223
2224    pub fn load(
2225        store: Arc<Store>,
2226        working_copy_path: PathBuf,
2227        state_path: PathBuf,
2228        user_settings: &UserSettings,
2229    ) -> Result<Self, WorkingCopyStateError> {
2230        let checkout_state = CheckoutState::load(&state_path)?;
2231        let tree_state_settings = TreeStateSettings::try_from_user_settings(user_settings)
2232            .map_err(|err| WorkingCopyStateError {
2233                message: "Failed to read the tree state settings".to_string(),
2234                err: err.into(),
2235            })?;
2236        Ok(Self {
2237            store,
2238            working_copy_path,
2239            state_path,
2240            checkout_state,
2241            tree_state: OnceCell::new(),
2242            tree_state_settings,
2243        })
2244    }
2245
2246    pub fn state_path(&self) -> &Path {
2247        &self.state_path
2248    }
2249
2250    #[instrument(skip_all)]
2251    fn tree_state(&self) -> Result<&TreeState, WorkingCopyStateError> {
2252        self.tree_state.get_or_try_init(|| {
2253            TreeState::load(
2254                self.store.clone(),
2255                self.working_copy_path.clone(),
2256                self.state_path.clone(),
2257                &self.tree_state_settings,
2258            )
2259            .map_err(|err| WorkingCopyStateError {
2260                message: "Failed to read working copy state".to_string(),
2261                err: err.into(),
2262            })
2263        })
2264    }
2265
2266    fn tree_state_mut(&mut self) -> Result<&mut TreeState, WorkingCopyStateError> {
2267        self.tree_state()?; // ensure loaded
2268        Ok(self.tree_state.get_mut().unwrap())
2269    }
2270
2271    pub fn file_states(&self) -> Result<FileStates<'_>, WorkingCopyStateError> {
2272        Ok(self.tree_state()?.file_states())
2273    }
2274
2275    #[cfg(feature = "watchman")]
2276    pub fn query_watchman(
2277        &self,
2278        config: &WatchmanConfig,
2279    ) -> Result<(watchman::Clock, Option<Vec<PathBuf>>), WorkingCopyStateError> {
2280        self.tree_state()?
2281            .query_watchman(config)
2282            .map_err(|err| WorkingCopyStateError {
2283                message: "Failed to query watchman".to_string(),
2284                err: err.into(),
2285            })
2286    }
2287
2288    #[cfg(feature = "watchman")]
2289    pub fn is_watchman_trigger_registered(
2290        &self,
2291        config: &WatchmanConfig,
2292    ) -> Result<bool, WorkingCopyStateError> {
2293        self.tree_state()?
2294            .is_watchman_trigger_registered(config)
2295            .map_err(|err| WorkingCopyStateError {
2296                message: "Failed to query watchman".to_string(),
2297                err: err.into(),
2298            })
2299    }
2300}
2301
2302pub struct LocalWorkingCopyFactory {}
2303
2304impl WorkingCopyFactory for LocalWorkingCopyFactory {
2305    fn init_working_copy(
2306        &self,
2307        store: Arc<Store>,
2308        working_copy_path: PathBuf,
2309        state_path: PathBuf,
2310        operation_id: OperationId,
2311        workspace_name: WorkspaceNameBuf,
2312        settings: &UserSettings,
2313    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2314        Ok(Box::new(LocalWorkingCopy::init(
2315            store,
2316            working_copy_path,
2317            state_path,
2318            operation_id,
2319            workspace_name,
2320            settings,
2321        )?))
2322    }
2323
2324    fn load_working_copy(
2325        &self,
2326        store: Arc<Store>,
2327        working_copy_path: PathBuf,
2328        state_path: PathBuf,
2329        settings: &UserSettings,
2330    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2331        Ok(Box::new(LocalWorkingCopy::load(
2332            store,
2333            working_copy_path,
2334            state_path,
2335            settings,
2336        )?))
2337    }
2338}
2339
2340/// A working copy that's locked on disk. The lock is held until you call
2341/// `finish()` or `discard()`.
2342pub struct LockedLocalWorkingCopy {
2343    wc: LocalWorkingCopy,
2344    old_operation_id: OperationId,
2345    old_tree_id: MergedTreeId,
2346    tree_state_dirty: bool,
2347    new_workspace_name: Option<WorkspaceNameBuf>,
2348    _lock: FileLock,
2349}
2350
2351impl LockedWorkingCopy for LockedLocalWorkingCopy {
2352    fn old_operation_id(&self) -> &OperationId {
2353        &self.old_operation_id
2354    }
2355
2356    fn old_tree_id(&self) -> &MergedTreeId {
2357        &self.old_tree_id
2358    }
2359
2360    fn snapshot(
2361        &mut self,
2362        options: &SnapshotOptions,
2363    ) -> Result<(MergedTreeId, SnapshotStats), SnapshotError> {
2364        let tree_state = self.wc.tree_state_mut()?;
2365        let (is_dirty, stats) = tree_state.snapshot(options)?;
2366        self.tree_state_dirty |= is_dirty;
2367        Ok((tree_state.current_tree_id().clone(), stats))
2368    }
2369
2370    fn check_out(&mut self, commit: &Commit) -> Result<CheckoutStats, CheckoutError> {
2371        // TODO: Write a "pending_checkout" file with the new TreeId so we can
2372        // continue an interrupted update if we find such a file.
2373        let new_tree = commit.tree()?;
2374        let tree_state = self.wc.tree_state_mut()?;
2375        if tree_state.tree_id != *commit.tree_id() {
2376            let stats = tree_state.check_out(&new_tree)?;
2377            self.tree_state_dirty = true;
2378            Ok(stats)
2379        } else {
2380            Ok(CheckoutStats::default())
2381        }
2382    }
2383
2384    fn rename_workspace(&mut self, new_name: WorkspaceNameBuf) {
2385        self.new_workspace_name = Some(new_name);
2386    }
2387
2388    fn reset(&mut self, commit: &Commit) -> Result<(), ResetError> {
2389        let new_tree = commit.tree()?;
2390        self.wc.tree_state_mut()?.reset(&new_tree).block_on()?;
2391        self.tree_state_dirty = true;
2392        Ok(())
2393    }
2394
2395    fn recover(&mut self, commit: &Commit) -> Result<(), ResetError> {
2396        let new_tree = commit.tree()?;
2397        self.wc.tree_state_mut()?.recover(&new_tree).block_on()?;
2398        self.tree_state_dirty = true;
2399        Ok(())
2400    }
2401
2402    fn sparse_patterns(&self) -> Result<&[RepoPathBuf], WorkingCopyStateError> {
2403        self.wc.sparse_patterns()
2404    }
2405
2406    fn set_sparse_patterns(
2407        &mut self,
2408        new_sparse_patterns: Vec<RepoPathBuf>,
2409    ) -> Result<CheckoutStats, CheckoutError> {
2410        // TODO: Write a "pending_checkout" file with new sparse patterns so we can
2411        // continue an interrupted update if we find such a file.
2412        let stats = self
2413            .wc
2414            .tree_state_mut()?
2415            .set_sparse_patterns(new_sparse_patterns)?;
2416        self.tree_state_dirty = true;
2417        Ok(stats)
2418    }
2419
2420    #[instrument(skip_all)]
2421    fn finish(
2422        mut self: Box<Self>,
2423        operation_id: OperationId,
2424    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2425        assert!(self.tree_state_dirty || &self.old_tree_id == self.wc.tree_id()?);
2426        if self.tree_state_dirty {
2427            self.wc
2428                .tree_state_mut()?
2429                .save()
2430                .map_err(|err| WorkingCopyStateError {
2431                    message: "Failed to write working copy state".to_string(),
2432                    err: Box::new(err),
2433                })?;
2434        }
2435        if self.old_operation_id != operation_id || self.new_workspace_name.is_some() {
2436            self.wc.checkout_state.operation_id = operation_id;
2437            if let Some(workspace_name) = self.new_workspace_name {
2438                self.wc.checkout_state.workspace_name = workspace_name;
2439            }
2440            self.wc.checkout_state.save(&self.wc.state_path)?;
2441        }
2442        // TODO: Clear the "pending_checkout" file here.
2443        Ok(Box::new(self.wc))
2444    }
2445}
2446
2447impl LockedLocalWorkingCopy {
2448    pub fn reset_watchman(&mut self) -> Result<(), SnapshotError> {
2449        self.wc.tree_state_mut()?.reset_watchman();
2450        self.tree_state_dirty = true;
2451        Ok(())
2452    }
2453}
2454
2455#[cfg(test)]
2456mod tests {
2457    use maplit::hashset;
2458
2459    use super::*;
2460
2461    fn repo_path(value: &str) -> &RepoPath {
2462        RepoPath::from_internal_string(value).unwrap()
2463    }
2464
2465    fn repo_path_component(value: &str) -> &RepoPathComponent {
2466        RepoPathComponent::new(value).unwrap()
2467    }
2468
2469    fn new_state(size: u64) -> FileState {
2470        FileState {
2471            file_type: FileType::Normal {
2472                executable: FileExecutableFlag::from_bool_lossy(false),
2473            },
2474            mtime: MillisSinceEpoch(0),
2475            size,
2476            materialized_conflict_data: None,
2477        }
2478    }
2479
2480    #[test]
2481    fn test_file_states_merge() {
2482        let new_static_entry = |path: &'static str, size| (repo_path(path), new_state(size));
2483        let new_owned_entry = |path: &str, size| (repo_path(path).to_owned(), new_state(size));
2484        let new_proto_entry = |path: &str, size| {
2485            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2486        };
2487        let data = vec![
2488            new_proto_entry("aa", 0),
2489            new_proto_entry("b#", 4), // '#' < '/'
2490            new_proto_entry("b/c", 1),
2491            new_proto_entry("b/d/e", 2),
2492            new_proto_entry("b/e", 3),
2493            new_proto_entry("bc", 5),
2494        ];
2495        let mut file_states = FileStatesMap::from_proto(data, false);
2496
2497        let changed_file_states = vec![
2498            new_owned_entry("aa", 10),    // change
2499            new_owned_entry("b/d/f", 11), // add
2500            new_owned_entry("b/e", 12),   // change
2501            new_owned_entry("c", 13),     // add
2502        ];
2503        let deleted_files = hashset! {
2504            repo_path("b/c").to_owned(),
2505            repo_path("b#").to_owned(),
2506        };
2507        file_states.merge_in(changed_file_states, &deleted_files);
2508        assert_eq!(
2509            file_states.all().iter().collect_vec(),
2510            vec![
2511                new_static_entry("aa", 10),
2512                new_static_entry("b/d/e", 2),
2513                new_static_entry("b/d/f", 11),
2514                new_static_entry("b/e", 12),
2515                new_static_entry("bc", 5),
2516                new_static_entry("c", 13),
2517            ],
2518        );
2519    }
2520
2521    #[test]
2522    fn test_file_states_lookup() {
2523        let new_proto_entry = |path: &str, size| {
2524            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2525        };
2526        let data = vec![
2527            new_proto_entry("aa", 0),
2528            new_proto_entry("b/c", 1),
2529            new_proto_entry("b/d/e", 2),
2530            new_proto_entry("b/e", 3),
2531            new_proto_entry("b#", 4), // '#' < '/'
2532            new_proto_entry("bc", 5),
2533        ];
2534        let file_states = FileStates::from_sorted(&data);
2535
2536        assert_eq!(
2537            file_states.prefixed(repo_path("")).paths().collect_vec(),
2538            ["aa", "b/c", "b/d/e", "b/e", "b#", "bc"].map(repo_path)
2539        );
2540        assert!(file_states.prefixed(repo_path("a")).is_empty());
2541        assert_eq!(
2542            file_states.prefixed(repo_path("aa")).paths().collect_vec(),
2543            ["aa"].map(repo_path)
2544        );
2545        assert_eq!(
2546            file_states.prefixed(repo_path("b")).paths().collect_vec(),
2547            ["b/c", "b/d/e", "b/e"].map(repo_path)
2548        );
2549        assert_eq!(
2550            file_states.prefixed(repo_path("b/d")).paths().collect_vec(),
2551            ["b/d/e"].map(repo_path)
2552        );
2553        assert_eq!(
2554            file_states.prefixed(repo_path("b#")).paths().collect_vec(),
2555            ["b#"].map(repo_path)
2556        );
2557        assert_eq!(
2558            file_states.prefixed(repo_path("bc")).paths().collect_vec(),
2559            ["bc"].map(repo_path)
2560        );
2561        assert!(file_states.prefixed(repo_path("z")).is_empty());
2562
2563        assert!(!file_states.contains_path(repo_path("a")));
2564        assert!(file_states.contains_path(repo_path("aa")));
2565        assert!(file_states.contains_path(repo_path("b/d/e")));
2566        assert!(!file_states.contains_path(repo_path("b/d")));
2567        assert!(file_states.contains_path(repo_path("b#")));
2568        assert!(file_states.contains_path(repo_path("bc")));
2569        assert!(!file_states.contains_path(repo_path("z")));
2570
2571        assert_eq!(file_states.get(repo_path("a")), None);
2572        assert_eq!(file_states.get(repo_path("aa")), Some(new_state(0)));
2573        assert_eq!(file_states.get(repo_path("b/d/e")), Some(new_state(2)));
2574        assert_eq!(file_states.get(repo_path("bc")), Some(new_state(5)));
2575        assert_eq!(file_states.get(repo_path("z")), None);
2576    }
2577
2578    #[test]
2579    fn test_file_states_lookup_at() {
2580        let new_proto_entry = |path: &str, size| {
2581            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2582        };
2583        let data = vec![
2584            new_proto_entry("b/c", 0),
2585            new_proto_entry("b/d/e", 1),
2586            new_proto_entry("b/d#", 2), // '#' < '/'
2587            new_proto_entry("b/e", 3),
2588            new_proto_entry("b#", 4), // '#' < '/'
2589        ];
2590        let file_states = FileStates::from_sorted(&data);
2591
2592        // At root
2593        assert_eq!(
2594            file_states.get_at(RepoPath::root(), repo_path_component("b")),
2595            None
2596        );
2597        assert_eq!(
2598            file_states.get_at(RepoPath::root(), repo_path_component("b#")),
2599            Some(new_state(4))
2600        );
2601
2602        // At prefixed dir
2603        let prefixed_states = file_states.prefixed_at(RepoPath::root(), repo_path_component("b"));
2604        assert_eq!(
2605            prefixed_states.paths().collect_vec(),
2606            ["b/c", "b/d/e", "b/d#", "b/e"].map(repo_path)
2607        );
2608        assert_eq!(
2609            prefixed_states.get_at(repo_path("b"), repo_path_component("c")),
2610            Some(new_state(0))
2611        );
2612        assert_eq!(
2613            prefixed_states.get_at(repo_path("b"), repo_path_component("d")),
2614            None
2615        );
2616        assert_eq!(
2617            prefixed_states.get_at(repo_path("b"), repo_path_component("d#")),
2618            Some(new_state(2))
2619        );
2620
2621        // At nested prefixed dir
2622        let prefixed_states = prefixed_states.prefixed_at(repo_path("b"), repo_path_component("d"));
2623        assert_eq!(
2624            prefixed_states.paths().collect_vec(),
2625            ["b/d/e"].map(repo_path)
2626        );
2627        assert_eq!(
2628            prefixed_states.get_at(repo_path("b/d"), repo_path_component("e")),
2629            Some(new_state(1))
2630        );
2631        assert_eq!(
2632            prefixed_states.get_at(repo_path("b/d"), repo_path_component("#")),
2633            None
2634        );
2635
2636        // At prefixed file
2637        let prefixed_states = file_states.prefixed_at(RepoPath::root(), repo_path_component("b#"));
2638        assert_eq!(prefixed_states.paths().collect_vec(), ["b#"].map(repo_path));
2639        assert_eq!(
2640            prefixed_states.get_at(repo_path("b#"), repo_path_component("#")),
2641            None
2642        );
2643    }
2644}