jj_lib/
local_working_copy.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::cmp::Ordering;
18use std::collections::HashSet;
19use std::error::Error;
20use std::fs;
21use std::fs::DirEntry;
22use std::fs::File;
23use std::fs::Metadata;
24use std::fs::OpenOptions;
25use std::io;
26use std::io::Read as _;
27use std::io::Write as _;
28use std::iter;
29use std::mem;
30use std::ops::Range;
31#[cfg(unix)]
32use std::os::unix::fs::PermissionsExt as _;
33use std::path::Path;
34use std::path::PathBuf;
35use std::slice;
36use std::sync::Arc;
37use std::sync::OnceLock;
38use std::sync::mpsc::Sender;
39use std::sync::mpsc::channel;
40use std::time::UNIX_EPOCH;
41
42use async_trait::async_trait;
43use either::Either;
44use futures::StreamExt as _;
45use itertools::EitherOrBoth;
46use itertools::Itertools as _;
47use once_cell::unsync::OnceCell;
48use pollster::FutureExt as _;
49use prost::Message as _;
50use rayon::iter::IntoParallelIterator as _;
51use rayon::prelude::IndexedParallelIterator as _;
52use rayon::prelude::ParallelIterator as _;
53use tempfile::NamedTempFile;
54use thiserror::Error;
55use tokio::io::AsyncRead;
56use tokio::io::AsyncReadExt as _;
57use tracing::instrument;
58use tracing::trace_span;
59
60use crate::backend::BackendError;
61use crate::backend::BackendResult;
62use crate::backend::CopyId;
63use crate::backend::FileId;
64use crate::backend::MergedTreeId;
65use crate::backend::MillisSinceEpoch;
66use crate::backend::SymlinkId;
67use crate::backend::TreeId;
68use crate::backend::TreeValue;
69use crate::commit::Commit;
70use crate::config::ConfigGetError;
71use crate::conflicts;
72use crate::conflicts::ConflictMarkerStyle;
73use crate::conflicts::ConflictMaterializeOptions;
74use crate::conflicts::MIN_CONFLICT_MARKER_LEN;
75use crate::conflicts::MaterializedTreeValue;
76use crate::conflicts::choose_materialized_conflict_marker_len;
77use crate::conflicts::materialize_merge_result_to_bytes;
78use crate::conflicts::materialize_tree_value;
79pub use crate::eol::EolConversionMode;
80use crate::eol::TargetEolStrategy;
81use crate::file_util::BlockingAsyncReader;
82use crate::file_util::check_symlink_support;
83use crate::file_util::copy_async_to_sync;
84use crate::file_util::persist_temp_file;
85use crate::file_util::try_symlink;
86use crate::fsmonitor::FsmonitorSettings;
87#[cfg(feature = "watchman")]
88use crate::fsmonitor::WatchmanConfig;
89#[cfg(feature = "watchman")]
90use crate::fsmonitor::watchman;
91use crate::gitignore::GitIgnoreFile;
92use crate::lock::FileLock;
93use crate::matchers::DifferenceMatcher;
94use crate::matchers::EverythingMatcher;
95use crate::matchers::FilesMatcher;
96use crate::matchers::IntersectionMatcher;
97use crate::matchers::Matcher;
98use crate::matchers::PrefixMatcher;
99use crate::merge::Merge;
100use crate::merge::MergeBuilder;
101use crate::merge::MergedTreeValue;
102use crate::merge::SameChange;
103use crate::merged_tree::MergedTree;
104use crate::merged_tree::MergedTreeBuilder;
105use crate::merged_tree::TreeDiffEntry;
106use crate::object_id::ObjectId as _;
107use crate::op_store::OperationId;
108use crate::ref_name::WorkspaceName;
109use crate::ref_name::WorkspaceNameBuf;
110use crate::repo_path::RepoPath;
111use crate::repo_path::RepoPathBuf;
112use crate::repo_path::RepoPathComponent;
113use crate::settings::UserSettings;
114use crate::store::Store;
115use crate::tree::Tree;
116use crate::working_copy::CheckoutError;
117use crate::working_copy::CheckoutStats;
118use crate::working_copy::LockedWorkingCopy;
119use crate::working_copy::ResetError;
120use crate::working_copy::SnapshotError;
121use crate::working_copy::SnapshotOptions;
122use crate::working_copy::SnapshotProgress;
123use crate::working_copy::SnapshotStats;
124use crate::working_copy::UntrackedReason;
125use crate::working_copy::WorkingCopy;
126use crate::working_copy::WorkingCopyFactory;
127use crate::working_copy::WorkingCopyStateError;
128
129/// On-disk state of file executable bit.
130// TODO: maybe better to preserve the executable bit on all platforms, and
131// ignore conditionally? #3949
132#[derive(Clone, Copy, Debug, Eq, PartialEq)]
133pub struct FileExecutableFlag(#[cfg(unix)] bool);
134
135#[cfg(unix)]
136impl FileExecutableFlag {
137    pub const fn from_bool_lossy(executable: bool) -> Self {
138        Self(executable)
139    }
140
141    pub fn unwrap_or_else(self, _: impl FnOnce() -> bool) -> bool {
142        self.0
143    }
144}
145
146// Windows doesn't support executable bit.
147#[cfg(windows)]
148impl FileExecutableFlag {
149    pub const fn from_bool_lossy(_executable: bool) -> Self {
150        Self()
151    }
152
153    pub fn unwrap_or_else(self, f: impl FnOnce() -> bool) -> bool {
154        f()
155    }
156}
157
158#[derive(Debug, PartialEq, Eq, Clone)]
159pub enum FileType {
160    Normal { executable: FileExecutableFlag },
161    Symlink,
162    GitSubmodule,
163}
164
165#[derive(Debug, PartialEq, Eq, Clone, Copy)]
166pub struct MaterializedConflictData {
167    pub conflict_marker_len: u32,
168}
169
170#[derive(Debug, PartialEq, Eq, Clone)]
171pub struct FileState {
172    pub file_type: FileType,
173    pub mtime: MillisSinceEpoch,
174    pub size: u64,
175    pub materialized_conflict_data: Option<MaterializedConflictData>,
176    /* TODO: What else do we need here? Git stores a lot of fields.
177     * TODO: Could possibly handle case-insensitive file systems keeping an
178     *       Option<PathBuf> with the actual path here. */
179}
180
181impl FileState {
182    /// Check whether a file state appears clean compared to a previous file
183    /// state, ignoring materialized conflict data.
184    pub fn is_clean(&self, old_file_state: &Self) -> bool {
185        self.file_type == old_file_state.file_type
186            && self.mtime == old_file_state.mtime
187            && self.size == old_file_state.size
188    }
189
190    /// Indicates that a file exists in the tree but that it needs to be
191    /// re-stat'ed on the next snapshot.
192    fn placeholder() -> Self {
193        let executable = FileExecutableFlag::from_bool_lossy(false);
194        Self {
195            file_type: FileType::Normal { executable },
196            mtime: MillisSinceEpoch(0),
197            size: 0,
198            materialized_conflict_data: None,
199        }
200    }
201
202    fn for_file(executable: bool, size: u64, metadata: &Metadata) -> Self {
203        let executable = FileExecutableFlag::from_bool_lossy(executable);
204        Self {
205            file_type: FileType::Normal { executable },
206            mtime: mtime_from_metadata(metadata),
207            size,
208            materialized_conflict_data: None,
209        }
210    }
211
212    fn for_symlink(metadata: &Metadata) -> Self {
213        // When using fscrypt, the reported size is not the content size. So if
214        // we were to record the content size here (like we do for regular files), we
215        // would end up thinking the file has changed every time we snapshot.
216        Self {
217            file_type: FileType::Symlink,
218            mtime: mtime_from_metadata(metadata),
219            size: metadata.len(),
220            materialized_conflict_data: None,
221        }
222    }
223
224    fn for_gitsubmodule() -> Self {
225        Self {
226            file_type: FileType::GitSubmodule,
227            mtime: MillisSinceEpoch(0),
228            size: 0,
229            materialized_conflict_data: None,
230        }
231    }
232}
233
234/// Owned map of path to file states, backed by proto data.
235#[derive(Clone, Debug)]
236struct FileStatesMap {
237    data: Vec<crate::protos::local_working_copy::FileStateEntry>,
238}
239
240impl FileStatesMap {
241    fn new() -> Self {
242        Self { data: Vec::new() }
243    }
244
245    fn from_proto(
246        mut data: Vec<crate::protos::local_working_copy::FileStateEntry>,
247        is_sorted: bool,
248    ) -> Self {
249        if !is_sorted {
250            data.sort_unstable_by(|entry1, entry2| {
251                let path1 = RepoPath::from_internal_string(&entry1.path).unwrap();
252                let path2 = RepoPath::from_internal_string(&entry2.path).unwrap();
253                path1.cmp(path2)
254            });
255        }
256        debug_assert!(is_file_state_entries_proto_unique_and_sorted(&data));
257        Self { data }
258    }
259
260    /// Merges changed and deleted entries into this map. The changed entries
261    /// must be sorted by path.
262    fn merge_in(
263        &mut self,
264        changed_file_states: Vec<(RepoPathBuf, FileState)>,
265        deleted_files: &HashSet<RepoPathBuf>,
266    ) {
267        if changed_file_states.is_empty() && deleted_files.is_empty() {
268            return;
269        }
270        debug_assert!(
271            changed_file_states.is_sorted_by(|(path1, _), (path2, _)| path1 < path2),
272            "changed_file_states must be sorted and have no duplicates"
273        );
274        self.data = itertools::merge_join_by(
275            mem::take(&mut self.data),
276            changed_file_states,
277            |old_entry, (changed_path, _)| {
278                RepoPath::from_internal_string(&old_entry.path)
279                    .unwrap()
280                    .cmp(changed_path)
281            },
282        )
283        .filter_map(|diff| match diff {
284            EitherOrBoth::Both(_, (path, state)) | EitherOrBoth::Right((path, state)) => {
285                debug_assert!(!deleted_files.contains(&path));
286                Some(file_state_entry_to_proto(path, &state))
287            }
288            EitherOrBoth::Left(entry) => {
289                let present =
290                    !deleted_files.contains(RepoPath::from_internal_string(&entry.path).unwrap());
291                present.then_some(entry)
292            }
293        })
294        .collect();
295    }
296
297    fn clear(&mut self) {
298        self.data.clear();
299    }
300
301    /// Returns read-only map containing all file states.
302    fn all(&self) -> FileStates<'_> {
303        FileStates::from_sorted(&self.data)
304    }
305}
306
307/// Read-only map of path to file states, possibly filtered by path prefix.
308#[derive(Clone, Copy, Debug)]
309pub struct FileStates<'a> {
310    data: &'a [crate::protos::local_working_copy::FileStateEntry],
311}
312
313impl<'a> FileStates<'a> {
314    fn from_sorted(data: &'a [crate::protos::local_working_copy::FileStateEntry]) -> Self {
315        debug_assert!(is_file_state_entries_proto_unique_and_sorted(data));
316        Self { data }
317    }
318
319    /// Returns file states under the given directory path.
320    pub fn prefixed(&self, base: &RepoPath) -> Self {
321        let range = self.prefixed_range(base);
322        Self::from_sorted(&self.data[range])
323    }
324
325    /// Faster version of `prefixed("<dir>/<base>")`. Requires that all entries
326    /// share the same prefix `dir`.
327    fn prefixed_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Self {
328        let range = self.prefixed_range_at(dir, base);
329        Self::from_sorted(&self.data[range])
330    }
331
332    /// Returns true if this contains no entries.
333    pub fn is_empty(&self) -> bool {
334        self.data.is_empty()
335    }
336
337    /// Returns true if the given `path` exists.
338    pub fn contains_path(&self, path: &RepoPath) -> bool {
339        self.exact_position(path).is_some()
340    }
341
342    /// Returns file state for the given `path`.
343    pub fn get(&self, path: &RepoPath) -> Option<FileState> {
344        let pos = self.exact_position(path)?;
345        let (_, state) = file_state_entry_from_proto(&self.data[pos]);
346        Some(state)
347    }
348
349    /// Faster version of `get("<dir>/<name>")`. Requires that all entries share
350    /// the same prefix `dir`.
351    fn get_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<FileState> {
352        let pos = self.exact_position_at(dir, name)?;
353        let (_, state) = file_state_entry_from_proto(&self.data[pos]);
354        Some(state)
355    }
356
357    fn exact_position(&self, path: &RepoPath) -> Option<usize> {
358        self.data
359            .binary_search_by(|entry| {
360                RepoPath::from_internal_string(&entry.path)
361                    .unwrap()
362                    .cmp(path)
363            })
364            .ok()
365    }
366
367    fn exact_position_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<usize> {
368        debug_assert!(self.paths().all(|path| path.starts_with(dir)));
369        let slash_len = !dir.is_root() as usize;
370        let prefix_len = dir.as_internal_file_string().len() + slash_len;
371        self.data
372            .binary_search_by(|entry| {
373                let tail = entry.path.get(prefix_len..).unwrap_or("");
374                match tail.split_once('/') {
375                    // "<name>/*" > "<name>"
376                    Some((pre, _)) => pre.cmp(name.as_internal_str()).then(Ordering::Greater),
377                    None => tail.cmp(name.as_internal_str()),
378                }
379            })
380            .ok()
381    }
382
383    fn prefixed_range(&self, base: &RepoPath) -> Range<usize> {
384        let start = self
385            .data
386            .partition_point(|entry| RepoPath::from_internal_string(&entry.path).unwrap() < base);
387        let len = self.data[start..].partition_point(|entry| {
388            RepoPath::from_internal_string(&entry.path)
389                .unwrap()
390                .starts_with(base)
391        });
392        start..(start + len)
393    }
394
395    fn prefixed_range_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Range<usize> {
396        debug_assert!(self.paths().all(|path| path.starts_with(dir)));
397        let slash_len = !dir.is_root() as usize;
398        let prefix_len = dir.as_internal_file_string().len() + slash_len;
399        let start = self.data.partition_point(|entry| {
400            let tail = entry.path.get(prefix_len..).unwrap_or("");
401            let entry_name = tail.split_once('/').map_or(tail, |(name, _)| name);
402            entry_name < base.as_internal_str()
403        });
404        let len = self.data[start..].partition_point(|entry| {
405            let tail = entry.path.get(prefix_len..).unwrap_or("");
406            let entry_name = tail.split_once('/').map_or(tail, |(name, _)| name);
407            entry_name == base.as_internal_str()
408        });
409        start..(start + len)
410    }
411
412    /// Iterates file state entries sorted by path.
413    pub fn iter(&self) -> FileStatesIter<'a> {
414        self.data.iter().map(file_state_entry_from_proto)
415    }
416
417    /// Iterates sorted file paths.
418    pub fn paths(&self) -> impl ExactSizeIterator<Item = &'a RepoPath> + use<'a> {
419        self.data
420            .iter()
421            .map(|entry| RepoPath::from_internal_string(&entry.path).unwrap())
422    }
423}
424
425type FileStatesIter<'a> = iter::Map<
426    slice::Iter<'a, crate::protos::local_working_copy::FileStateEntry>,
427    fn(&crate::protos::local_working_copy::FileStateEntry) -> (&RepoPath, FileState),
428>;
429
430impl<'a> IntoIterator for FileStates<'a> {
431    type Item = (&'a RepoPath, FileState);
432    type IntoIter = FileStatesIter<'a>;
433
434    fn into_iter(self) -> Self::IntoIter {
435        self.iter()
436    }
437}
438
439fn file_state_from_proto(proto: &crate::protos::local_working_copy::FileState) -> FileState {
440    let file_type = match proto.file_type() {
441        crate::protos::local_working_copy::FileType::Normal => FileType::Normal {
442            executable: FileExecutableFlag::from_bool_lossy(false),
443        },
444        // On Windows, FileType::Executable can exist in files written by older
445        // versions of jj
446        crate::protos::local_working_copy::FileType::Executable => FileType::Normal {
447            executable: FileExecutableFlag::from_bool_lossy(true),
448        },
449        crate::protos::local_working_copy::FileType::Symlink => FileType::Symlink,
450        crate::protos::local_working_copy::FileType::Conflict => FileType::Normal {
451            executable: FileExecutableFlag::from_bool_lossy(false),
452        },
453        crate::protos::local_working_copy::FileType::GitSubmodule => FileType::GitSubmodule,
454    };
455    FileState {
456        file_type,
457        mtime: MillisSinceEpoch(proto.mtime_millis_since_epoch),
458        size: proto.size,
459        materialized_conflict_data: proto.materialized_conflict_data.as_ref().map(|data| {
460            MaterializedConflictData {
461                conflict_marker_len: data.conflict_marker_len,
462            }
463        }),
464    }
465}
466
467fn file_state_to_proto(file_state: &FileState) -> crate::protos::local_working_copy::FileState {
468    let mut proto = crate::protos::local_working_copy::FileState::default();
469    let file_type = match &file_state.file_type {
470        FileType::Normal { executable } => {
471            if executable.unwrap_or_else(Default::default) {
472                crate::protos::local_working_copy::FileType::Executable
473            } else {
474                crate::protos::local_working_copy::FileType::Normal
475            }
476        }
477        FileType::Symlink => crate::protos::local_working_copy::FileType::Symlink,
478        FileType::GitSubmodule => crate::protos::local_working_copy::FileType::GitSubmodule,
479    };
480    proto.file_type = file_type as i32;
481    proto.mtime_millis_since_epoch = file_state.mtime.0;
482    proto.size = file_state.size;
483    proto.materialized_conflict_data = file_state.materialized_conflict_data.map(|data| {
484        crate::protos::local_working_copy::MaterializedConflictData {
485            conflict_marker_len: data.conflict_marker_len,
486        }
487    });
488    proto
489}
490
491fn file_state_entry_from_proto(
492    proto: &crate::protos::local_working_copy::FileStateEntry,
493) -> (&RepoPath, FileState) {
494    let path = RepoPath::from_internal_string(&proto.path).unwrap();
495    (path, file_state_from_proto(proto.state.as_ref().unwrap()))
496}
497
498fn file_state_entry_to_proto(
499    path: RepoPathBuf,
500    state: &FileState,
501) -> crate::protos::local_working_copy::FileStateEntry {
502    crate::protos::local_working_copy::FileStateEntry {
503        path: path.into_internal_string(),
504        state: Some(file_state_to_proto(state)),
505    }
506}
507
508fn is_file_state_entries_proto_unique_and_sorted(
509    data: &[crate::protos::local_working_copy::FileStateEntry],
510) -> bool {
511    data.iter()
512        .map(|entry| RepoPath::from_internal_string(&entry.path).unwrap())
513        .is_sorted_by(|path1, path2| path1 < path2)
514}
515
516fn sparse_patterns_from_proto(
517    proto: Option<&crate::protos::local_working_copy::SparsePatterns>,
518) -> Vec<RepoPathBuf> {
519    let mut sparse_patterns = vec![];
520    if let Some(proto_sparse_patterns) = proto {
521        for prefix in &proto_sparse_patterns.prefixes {
522            sparse_patterns.push(RepoPathBuf::from_internal_string(prefix).unwrap());
523        }
524    } else {
525        // For compatibility with old working copies.
526        // TODO: Delete this is late 2022 or so.
527        sparse_patterns.push(RepoPathBuf::root());
528    }
529    sparse_patterns
530}
531
532/// Creates intermediate directories from the `working_copy_path` to the
533/// `repo_path` parent. Returns disk path for the `repo_path` file.
534///
535/// If an intermediate directory exists and if it is a file or symlink, this
536/// function returns `Ok(None)` to signal that the path should be skipped.
537/// The `working_copy_path` directory may be a symlink.
538///
539/// If an existing or newly-created sub directory points to ".git" or ".jj",
540/// this function returns an error.
541///
542/// Note that this does not prevent TOCTOU bugs caused by concurrent checkouts.
543/// Another process may remove the directory created by this function and put a
544/// symlink there.
545fn create_parent_dirs(
546    working_copy_path: &Path,
547    repo_path: &RepoPath,
548) -> Result<Option<PathBuf>, CheckoutError> {
549    let (parent_path, basename) = repo_path.split().expect("repo path shouldn't be root");
550    let mut dir_path = working_copy_path.to_owned();
551    for c in parent_path.components() {
552        // Ensure that the name is a normal entry of the current dir_path.
553        dir_path.push(c.to_fs_name().map_err(|err| err.with_path(repo_path))?);
554        // A directory named ".git" or ".jj" can be temporarily created. It
555        // might trick workspace path discovery, but is harmless so long as the
556        // directory is empty.
557        let new_dir_created = match fs::create_dir(&dir_path) {
558            Ok(()) => true, // New directory
559            Err(err) => match dir_path.symlink_metadata() {
560                Ok(m) if m.is_dir() => false, // Existing directory
561                Ok(_) => {
562                    return Ok(None); // Skip existing file or symlink
563                }
564                Err(_) => {
565                    return Err(CheckoutError::Other {
566                        message: format!(
567                            "Failed to create parent directories for {}",
568                            repo_path.to_fs_path_unchecked(working_copy_path).display(),
569                        ),
570                        err: err.into(),
571                    });
572                }
573            },
574        };
575        // Invalid component (e.g. "..") should have been rejected.
576        // The current dir_path should be an entry of dir_path.parent().
577        reject_reserved_existing_path(&dir_path).inspect_err(|_| {
578            if new_dir_created {
579                fs::remove_dir(&dir_path).ok();
580            }
581        })?;
582    }
583
584    let mut file_path = dir_path;
585    file_path.push(
586        basename
587            .to_fs_name()
588            .map_err(|err| err.with_path(repo_path))?,
589    );
590    Ok(Some(file_path))
591}
592
593/// Removes existing file named `disk_path` if any. Returns `Ok(true)` if the
594/// file was there and got removed, meaning that new file can be safely created.
595///
596/// If the existing file points to ".git" or ".jj", this function returns an
597/// error.
598fn remove_old_file(disk_path: &Path) -> Result<bool, CheckoutError> {
599    reject_reserved_existing_path(disk_path)?;
600    match fs::remove_file(disk_path) {
601        Ok(()) => Ok(true),
602        Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false),
603        // TODO: Use io::ErrorKind::IsADirectory if it gets stabilized
604        Err(_) if disk_path.symlink_metadata().is_ok_and(|m| m.is_dir()) => Ok(false),
605        Err(err) => Err(CheckoutError::Other {
606            message: format!("Failed to remove file {}", disk_path.display()),
607            err: err.into(),
608        }),
609    }
610}
611
612/// Checks if new file or symlink named `disk_path` can be created.
613///
614/// If the file already exists, this function return `Ok(false)` to signal
615/// that the path should be skipped.
616///
617/// If the path may point to ".git" or ".jj" entry, this function returns an
618/// error.
619///
620/// This function can fail if `disk_path.parent()` isn't a directory.
621fn can_create_new_file(disk_path: &Path) -> Result<bool, CheckoutError> {
622    // New file or symlink will be created by caller. If it were pointed to by
623    // name ".git" or ".jj", git/jj CLI could be tricked to load configuration
624    // from an attacker-controlled location. So we first test the path by
625    // creating an empty file.
626    let new_file = match OpenOptions::new()
627        .write(true)
628        .create_new(true) // Don't overwrite, don't follow symlink
629        .open(disk_path)
630    {
631        Ok(file) => Some(file),
632        Err(err) if err.kind() == io::ErrorKind::AlreadyExists => None,
633        // Workaround for "Access is denied. (os error 5)" error on Windows.
634        Err(_) => match disk_path.symlink_metadata() {
635            Ok(_) => None,
636            Err(err) => {
637                return Err(CheckoutError::Other {
638                    message: format!("Failed to stat {}", disk_path.display()),
639                    err: err.into(),
640                });
641            }
642        },
643    };
644
645    let new_file_created = new_file.is_some();
646
647    if let Some(new_file) = new_file {
648        reject_reserved_existing_file(new_file, disk_path).inspect_err(|_| {
649            // We keep the error from `reject_reserved_existing_file`
650            let _ = fs::remove_file(disk_path);
651        })?;
652
653        fs::remove_file(disk_path).map_err(|err| CheckoutError::Other {
654            message: format!("Failed to remove temporary file {}", disk_path.display()),
655            err: err.into(),
656        })?;
657    } else {
658        reject_reserved_existing_path(disk_path)?;
659    }
660    Ok(new_file_created)
661}
662
663const RESERVED_DIR_NAMES: &[&str] = &[".git", ".jj"];
664
665fn same_file_handle_from_path(disk_path: &Path) -> io::Result<Option<same_file::Handle>> {
666    match same_file::Handle::from_path(disk_path) {
667        Ok(handle) => Ok(Some(handle)),
668        Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None),
669        Err(err) => Err(err),
670    }
671}
672
673/// Wrapper for [`reject_reserved_existing_handle`] which avoids a syscall
674/// by converting the provided `file` to a `same_file::Handle` via its
675/// file descriptor.
676///
677/// See [`reject_reserved_existing_handle`] for more info.
678fn reject_reserved_existing_file(file: File, disk_path: &Path) -> Result<(), CheckoutError> {
679    // Note: since the file is open, we don't expect that it's possible for
680    // `io::ErrorKind::NotFound` to be a possible error returned here.
681    let file_handle = same_file::Handle::from_file(file).map_err(|err| CheckoutError::Other {
682        message: format!("Failed to validate path {}", disk_path.display()),
683        err: err.into(),
684    })?;
685
686    reject_reserved_existing_handle(file_handle, disk_path)
687}
688
689/// Wrapper for [`reject_reserved_existing_handle`] which converts
690/// the provided `disk_path` to a `same_file::Handle`.
691///
692/// See [`reject_reserved_existing_handle`] for more info.
693///
694/// # Remarks
695///
696/// Incurs an additional syscall cost to open and close the file
697/// descriptor/`HANDLE` for `disk_path`.
698fn reject_reserved_existing_path(disk_path: &Path) -> Result<(), CheckoutError> {
699    let Some(disk_handle) =
700        same_file_handle_from_path(disk_path).map_err(|err| CheckoutError::Other {
701            message: format!("Failed to validate path {}", disk_path.display()),
702            err: err.into(),
703        })?
704    else {
705        // If the existing disk_path pointed to the reserved path, we would have
706        // gotten a handle back. Since we got nothing, the file does not exist
707        // and cannot be a reserved path name.
708        return Ok(());
709    };
710
711    reject_reserved_existing_handle(disk_handle, disk_path)
712}
713
714/// Suppose the `disk_path` exists, checks if the last component points to
715/// ".git" or ".jj" in the same parent directory.
716///
717/// `disk_handle` is expected to be a handle to the file described by
718/// `disk_path`.
719///
720/// # Remarks
721///
722/// Incurs a syscall cost to open and close a file descriptor/`HANDLE` for
723/// each filename in `RESERVED_DIR_NAMES`.
724fn reject_reserved_existing_handle(
725    disk_handle: same_file::Handle,
726    disk_path: &Path,
727) -> Result<(), CheckoutError> {
728    let parent_dir_path = disk_path.parent().expect("content path shouldn't be root");
729    for name in RESERVED_DIR_NAMES {
730        let reserved_path = parent_dir_path.join(name);
731
732        let Some(reserved_handle) =
733            same_file_handle_from_path(&reserved_path).map_err(|err| CheckoutError::Other {
734                message: format!("Failed to validate path {}", disk_path.display()),
735                err: err.into(),
736            })?
737        else {
738            // If the existing disk_path pointed to the reserved path, we would have
739            // gotten a handle back. Since we got nothing, the file does not exist
740            // and cannot be a reserved path name.
741            continue;
742        };
743
744        if disk_handle == reserved_handle {
745            return Err(CheckoutError::ReservedPathComponent {
746                path: disk_path.to_owned(),
747                name,
748            });
749        }
750    }
751
752    Ok(())
753}
754
755fn mtime_from_metadata(metadata: &Metadata) -> MillisSinceEpoch {
756    let time = metadata
757        .modified()
758        .expect("File mtime not supported on this platform?");
759    let since_epoch = time
760        .duration_since(UNIX_EPOCH)
761        .expect("mtime before unix epoch");
762
763    MillisSinceEpoch(
764        i64::try_from(since_epoch.as_millis())
765            .expect("mtime billions of years into the future or past"),
766    )
767}
768
769fn file_state(metadata: &Metadata) -> Option<FileState> {
770    let metadata_file_type = metadata.file_type();
771    let file_type = if metadata_file_type.is_dir() {
772        None
773    } else if metadata_file_type.is_symlink() {
774        Some(FileType::Symlink)
775    } else if metadata_file_type.is_file() {
776        #[cfg(unix)]
777        let executable = metadata.permissions().mode() & 0o111 != 0;
778        #[cfg(windows)]
779        let executable = false;
780        let executable = FileExecutableFlag::from_bool_lossy(executable);
781        Some(FileType::Normal { executable })
782    } else {
783        None
784    };
785    file_type.map(|file_type| {
786        let mtime = mtime_from_metadata(metadata);
787        let size = metadata.len();
788        FileState {
789            file_type,
790            mtime,
791            size,
792            materialized_conflict_data: None,
793        }
794    })
795}
796
797struct FsmonitorMatcher {
798    matcher: Option<Box<dyn Matcher>>,
799    watchman_clock: Option<crate::protos::local_working_copy::WatchmanClock>,
800}
801
802/// Settings specific to the tree state of the [`LocalWorkingCopy`] backend.
803#[derive(Clone, Debug)]
804pub struct TreeStateSettings {
805    /// Conflict marker style to use when materializing files or when checking
806    /// changed files.
807    pub conflict_marker_style: ConflictMarkerStyle,
808    /// Configuring auto-converting CRLF line endings into LF when you add a
809    /// file to the backend, and vice versa when it checks out code onto your
810    /// filesystem.
811    pub eol_conversion_mode: EolConversionMode,
812    /// The fsmonitor (e.g. Watchman) to use, if any.
813    pub fsmonitor_settings: FsmonitorSettings,
814}
815
816impl TreeStateSettings {
817    /// Create [`TreeStateSettings`] from [`UserSettings`].
818    pub fn try_from_user_settings(user_settings: &UserSettings) -> Result<Self, ConfigGetError> {
819        Ok(Self {
820            conflict_marker_style: user_settings.get("ui.conflict-marker-style")?,
821            eol_conversion_mode: EolConversionMode::try_from_settings(user_settings)?,
822            fsmonitor_settings: FsmonitorSettings::from_settings(user_settings)?,
823        })
824    }
825}
826
827pub struct TreeState {
828    store: Arc<Store>,
829    working_copy_path: PathBuf,
830    state_path: PathBuf,
831    tree_id: MergedTreeId,
832    file_states: FileStatesMap,
833    // Currently only path prefixes
834    sparse_patterns: Vec<RepoPathBuf>,
835    own_mtime: MillisSinceEpoch,
836    symlink_support: bool,
837
838    /// The most recent clock value returned by Watchman. Will only be set if
839    /// the repo is configured to use the Watchman filesystem monitor and
840    /// Watchman has been queried at least once.
841    watchman_clock: Option<crate::protos::local_working_copy::WatchmanClock>,
842
843    conflict_marker_style: ConflictMarkerStyle,
844    fsmonitor_settings: FsmonitorSettings,
845    target_eol_strategy: TargetEolStrategy,
846}
847
848#[derive(Debug, Error)]
849pub enum TreeStateError {
850    #[error("Reading tree state from {path}")]
851    ReadTreeState { path: PathBuf, source: io::Error },
852    #[error("Decoding tree state from {path}")]
853    DecodeTreeState {
854        path: PathBuf,
855        source: prost::DecodeError,
856    },
857    #[error("Writing tree state to temporary file {path}")]
858    WriteTreeState { path: PathBuf, source: io::Error },
859    #[error("Persisting tree state to file {path}")]
860    PersistTreeState { path: PathBuf, source: io::Error },
861    #[error("Filesystem monitor error")]
862    Fsmonitor(#[source] Box<dyn Error + Send + Sync>),
863}
864
865impl TreeState {
866    pub fn working_copy_path(&self) -> &Path {
867        &self.working_copy_path
868    }
869
870    pub fn current_tree_id(&self) -> &MergedTreeId {
871        &self.tree_id
872    }
873
874    pub fn file_states(&self) -> FileStates<'_> {
875        self.file_states.all()
876    }
877
878    pub fn sparse_patterns(&self) -> &Vec<RepoPathBuf> {
879        &self.sparse_patterns
880    }
881
882    fn sparse_matcher(&self) -> Box<dyn Matcher> {
883        Box::new(PrefixMatcher::new(&self.sparse_patterns))
884    }
885
886    pub fn init(
887        store: Arc<Store>,
888        working_copy_path: PathBuf,
889        state_path: PathBuf,
890        tree_state_settings: &TreeStateSettings,
891    ) -> Result<Self, TreeStateError> {
892        let mut wc = Self::empty(store, working_copy_path, state_path, tree_state_settings);
893        wc.save()?;
894        Ok(wc)
895    }
896
897    fn empty(
898        store: Arc<Store>,
899        working_copy_path: PathBuf,
900        state_path: PathBuf,
901        &TreeStateSettings {
902            conflict_marker_style,
903            eol_conversion_mode,
904            ref fsmonitor_settings,
905        }: &TreeStateSettings,
906    ) -> Self {
907        let tree_id = store.empty_merged_tree_id();
908        Self {
909            store,
910            working_copy_path,
911            state_path,
912            tree_id,
913            file_states: FileStatesMap::new(),
914            sparse_patterns: vec![RepoPathBuf::root()],
915            own_mtime: MillisSinceEpoch(0),
916            symlink_support: check_symlink_support().unwrap_or(false),
917            watchman_clock: None,
918            conflict_marker_style,
919            fsmonitor_settings: fsmonitor_settings.clone(),
920            target_eol_strategy: TargetEolStrategy::new(eol_conversion_mode),
921        }
922    }
923
924    pub fn load(
925        store: Arc<Store>,
926        working_copy_path: PathBuf,
927        state_path: PathBuf,
928        tree_state_settings: &TreeStateSettings,
929    ) -> Result<Self, TreeStateError> {
930        let tree_state_path = state_path.join("tree_state");
931        let file = match File::open(&tree_state_path) {
932            Err(ref err) if err.kind() == io::ErrorKind::NotFound => {
933                return Self::init(store, working_copy_path, state_path, tree_state_settings);
934            }
935            Err(err) => {
936                return Err(TreeStateError::ReadTreeState {
937                    path: tree_state_path,
938                    source: err,
939                });
940            }
941            Ok(file) => file,
942        };
943
944        let mut wc = Self::empty(store, working_copy_path, state_path, tree_state_settings);
945        wc.read(&tree_state_path, file)?;
946        Ok(wc)
947    }
948
949    fn update_own_mtime(&mut self) {
950        if let Ok(metadata) = self.state_path.join("tree_state").symlink_metadata() {
951            self.own_mtime = mtime_from_metadata(&metadata);
952        } else {
953            self.own_mtime = MillisSinceEpoch(0);
954        }
955    }
956
957    fn read(&mut self, tree_state_path: &Path, mut file: File) -> Result<(), TreeStateError> {
958        self.update_own_mtime();
959        let mut buf = Vec::new();
960        file.read_to_end(&mut buf)
961            .map_err(|err| TreeStateError::ReadTreeState {
962                path: tree_state_path.to_owned(),
963                source: err,
964            })?;
965        let proto = crate::protos::local_working_copy::TreeState::decode(&*buf).map_err(|err| {
966            TreeStateError::DecodeTreeState {
967                path: tree_state_path.to_owned(),
968                source: err,
969            }
970        })?;
971        #[expect(deprecated)]
972        if proto.tree_ids.is_empty() {
973            self.tree_id = MergedTreeId::resolved(TreeId::new(proto.legacy_tree_id.clone()));
974        } else {
975            let tree_ids_builder: MergeBuilder<TreeId> = proto
976                .tree_ids
977                .iter()
978                .map(|id| TreeId::new(id.clone()))
979                .collect();
980            self.tree_id = MergedTreeId::new(tree_ids_builder.build());
981        }
982        self.file_states =
983            FileStatesMap::from_proto(proto.file_states, proto.is_file_states_sorted);
984        self.sparse_patterns = sparse_patterns_from_proto(proto.sparse_patterns.as_ref());
985        self.watchman_clock = proto.watchman_clock;
986        Ok(())
987    }
988
989    #[expect(clippy::assigning_clones, clippy::field_reassign_with_default)]
990    pub fn save(&mut self) -> Result<(), TreeStateError> {
991        let mut proto: crate::protos::local_working_copy::TreeState = Default::default();
992        proto.tree_ids = self
993            .tree_id
994            .as_merge()
995            .iter()
996            .map(|id| id.to_bytes())
997            .collect();
998        proto.file_states = self.file_states.data.clone();
999        // `FileStatesMap` is guaranteed to be sorted.
1000        proto.is_file_states_sorted = true;
1001        let mut sparse_patterns = crate::protos::local_working_copy::SparsePatterns::default();
1002        for path in &self.sparse_patterns {
1003            sparse_patterns
1004                .prefixes
1005                .push(path.as_internal_file_string().to_owned());
1006        }
1007        proto.sparse_patterns = Some(sparse_patterns);
1008        proto.watchman_clock = self.watchman_clock.clone();
1009
1010        let wrap_write_err = |source| TreeStateError::WriteTreeState {
1011            path: self.state_path.clone(),
1012            source,
1013        };
1014        let mut temp_file = NamedTempFile::new_in(&self.state_path).map_err(wrap_write_err)?;
1015        temp_file
1016            .as_file_mut()
1017            .write_all(&proto.encode_to_vec())
1018            .map_err(wrap_write_err)?;
1019        // update own write time while we before we rename it, so we know
1020        // there is no unknown data in it
1021        self.update_own_mtime();
1022        // TODO: Retry if persisting fails (it will on Windows if the file happened to
1023        // be open for read).
1024        let target_path = self.state_path.join("tree_state");
1025        persist_temp_file(temp_file, &target_path).map_err(|source| {
1026            TreeStateError::PersistTreeState {
1027                path: target_path.clone(),
1028                source,
1029            }
1030        })?;
1031        Ok(())
1032    }
1033
1034    fn current_tree(&self) -> BackendResult<MergedTree> {
1035        self.store.get_root_tree(&self.tree_id)
1036    }
1037
1038    fn reset_watchman(&mut self) {
1039        self.watchman_clock.take();
1040    }
1041
1042    #[cfg(feature = "watchman")]
1043    #[tokio::main(flavor = "current_thread")]
1044    #[instrument(skip(self))]
1045    pub async fn query_watchman(
1046        &self,
1047        config: &WatchmanConfig,
1048    ) -> Result<(watchman::Clock, Option<Vec<PathBuf>>), TreeStateError> {
1049        let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path, config)
1050            .await
1051            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
1052        let previous_clock = self.watchman_clock.clone().map(watchman::Clock::from);
1053        let changed_files = fsmonitor
1054            .query_changed_files(previous_clock)
1055            .await
1056            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
1057        Ok(changed_files)
1058    }
1059
1060    #[cfg(feature = "watchman")]
1061    #[tokio::main(flavor = "current_thread")]
1062    #[instrument(skip(self))]
1063    pub async fn is_watchman_trigger_registered(
1064        &self,
1065        config: &WatchmanConfig,
1066    ) -> Result<bool, TreeStateError> {
1067        let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path, config)
1068            .await
1069            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
1070        fsmonitor
1071            .is_trigger_registered()
1072            .await
1073            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))
1074    }
1075}
1076
1077/// Functions to snapshot local-disk files to the store.
1078impl TreeState {
1079    /// Look for changes to the working copy. If there are any changes, create
1080    /// a new tree from it.
1081    #[instrument(skip_all)]
1082    pub fn snapshot(
1083        &mut self,
1084        options: &SnapshotOptions,
1085    ) -> Result<(bool, SnapshotStats), SnapshotError> {
1086        let &SnapshotOptions {
1087            ref base_ignores,
1088            progress,
1089            start_tracking_matcher,
1090            max_new_file_size,
1091        } = options;
1092
1093        let sparse_matcher = self.sparse_matcher();
1094
1095        let fsmonitor_clock_needs_save = self.fsmonitor_settings != FsmonitorSettings::None;
1096        let mut is_dirty = fsmonitor_clock_needs_save;
1097        let FsmonitorMatcher {
1098            matcher: fsmonitor_matcher,
1099            watchman_clock,
1100        } = self.make_fsmonitor_matcher(&self.fsmonitor_settings)?;
1101        let fsmonitor_matcher = match fsmonitor_matcher.as_ref() {
1102            None => &EverythingMatcher,
1103            Some(fsmonitor_matcher) => fsmonitor_matcher.as_ref(),
1104        };
1105
1106        let matcher = IntersectionMatcher::new(sparse_matcher.as_ref(), fsmonitor_matcher);
1107        if matcher.visit(RepoPath::root()).is_nothing() {
1108            // No need to load the current tree, set up channels, etc.
1109            self.watchman_clock = watchman_clock;
1110            return Ok((is_dirty, SnapshotStats::default()));
1111        }
1112
1113        let (tree_entries_tx, tree_entries_rx) = channel();
1114        let (file_states_tx, file_states_rx) = channel();
1115        let (untracked_paths_tx, untracked_paths_rx) = channel();
1116        let (deleted_files_tx, deleted_files_rx) = channel();
1117
1118        trace_span!("traverse filesystem").in_scope(|| -> Result<(), SnapshotError> {
1119            let snapshotter = FileSnapshotter {
1120                tree_state: self,
1121                current_tree: &self.current_tree()?,
1122                matcher: &matcher,
1123                start_tracking_matcher,
1124                // Move tx sides so they'll be dropped at the end of the scope.
1125                tree_entries_tx,
1126                file_states_tx,
1127                untracked_paths_tx,
1128                deleted_files_tx,
1129                error: OnceLock::new(),
1130                progress,
1131                max_new_file_size,
1132            };
1133            let directory_to_visit = DirectoryToVisit {
1134                dir: RepoPathBuf::root(),
1135                disk_dir: self.working_copy_path.clone(),
1136                git_ignore: base_ignores.clone(),
1137                file_states: self.file_states.all(),
1138            };
1139            // Here we use scope as a queue of per-directory jobs.
1140            rayon::scope(|scope| {
1141                snapshotter.spawn_ok(scope, |scope| {
1142                    snapshotter.visit_directory(directory_to_visit, scope)
1143                });
1144            });
1145            snapshotter.into_result()
1146        })?;
1147
1148        let stats = SnapshotStats {
1149            untracked_paths: untracked_paths_rx.into_iter().collect(),
1150        };
1151        let mut tree_builder = MergedTreeBuilder::new(self.tree_id.clone());
1152        trace_span!("process tree entries").in_scope(|| {
1153            for (path, tree_values) in &tree_entries_rx {
1154                tree_builder.set_or_remove(path, tree_values);
1155            }
1156        });
1157        let deleted_files = trace_span!("process deleted tree entries").in_scope(|| {
1158            let deleted_files = HashSet::from_iter(deleted_files_rx);
1159            is_dirty |= !deleted_files.is_empty();
1160            for file in &deleted_files {
1161                tree_builder.set_or_remove(file.clone(), Merge::absent());
1162            }
1163            deleted_files
1164        });
1165        trace_span!("process file states").in_scope(|| {
1166            let changed_file_states = file_states_rx
1167                .iter()
1168                .sorted_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2))
1169                .collect_vec();
1170            is_dirty |= !changed_file_states.is_empty();
1171            self.file_states
1172                .merge_in(changed_file_states, &deleted_files);
1173        });
1174        trace_span!("write tree").in_scope(|| -> Result<(), BackendError> {
1175            let new_tree_id = tree_builder.write_tree(&self.store)?;
1176            is_dirty |= new_tree_id != self.tree_id;
1177            self.tree_id = new_tree_id;
1178            Ok(())
1179        })?;
1180        if cfg!(debug_assertions) {
1181            let tree = self.current_tree().unwrap();
1182            let tree_paths: HashSet<_> = tree
1183                .entries_matching(sparse_matcher.as_ref())
1184                .filter_map(|(path, result)| result.is_ok().then_some(path))
1185                .collect();
1186            let file_states = self.file_states.all();
1187            let state_paths: HashSet<_> = file_states.paths().map(|path| path.to_owned()).collect();
1188            assert_eq!(state_paths, tree_paths);
1189        }
1190        // Since untracked paths aren't cached in the tree state, we'll need to
1191        // rescan the working directory changes to report or track them later.
1192        // TODO: store untracked paths and update watchman_clock?
1193        if stats.untracked_paths.is_empty() || watchman_clock.is_none() {
1194            self.watchman_clock = watchman_clock;
1195        } else {
1196            tracing::info!("not updating watchman clock because there are untracked files");
1197        }
1198        Ok((is_dirty, stats))
1199    }
1200
1201    #[instrument(skip_all)]
1202    fn make_fsmonitor_matcher(
1203        &self,
1204        fsmonitor_settings: &FsmonitorSettings,
1205    ) -> Result<FsmonitorMatcher, SnapshotError> {
1206        let (watchman_clock, changed_files) = match fsmonitor_settings {
1207            FsmonitorSettings::None => (None, None),
1208            FsmonitorSettings::Test { changed_files } => (None, Some(changed_files.clone())),
1209            #[cfg(feature = "watchman")]
1210            FsmonitorSettings::Watchman(config) => match self.query_watchman(config) {
1211                Ok((watchman_clock, changed_files)) => (Some(watchman_clock.into()), changed_files),
1212                Err(err) => {
1213                    tracing::warn!(?err, "Failed to query filesystem monitor");
1214                    (None, None)
1215                }
1216            },
1217            #[cfg(not(feature = "watchman"))]
1218            FsmonitorSettings::Watchman(_) => {
1219                return Err(SnapshotError::Other {
1220                    message: "Failed to query the filesystem monitor".to_string(),
1221                    err: "Cannot query Watchman because jj was not compiled with the `watchman` \
1222                          feature (consider disabling `fsmonitor.backend`)"
1223                        .into(),
1224                });
1225            }
1226        };
1227        let matcher: Option<Box<dyn Matcher>> = match changed_files {
1228            None => None,
1229            Some(changed_files) => {
1230                let repo_paths = trace_span!("processing fsmonitor paths").in_scope(|| {
1231                    changed_files
1232                        .into_iter()
1233                        .filter_map(|path| RepoPathBuf::from_relative_path(path).ok())
1234                        .collect_vec()
1235                });
1236
1237                Some(Box::new(FilesMatcher::new(repo_paths)))
1238            }
1239        };
1240        Ok(FsmonitorMatcher {
1241            matcher,
1242            watchman_clock,
1243        })
1244    }
1245}
1246
1247struct DirectoryToVisit<'a> {
1248    dir: RepoPathBuf,
1249    disk_dir: PathBuf,
1250    git_ignore: Arc<GitIgnoreFile>,
1251    file_states: FileStates<'a>,
1252}
1253
1254#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1255enum PresentDirEntryKind {
1256    Dir,
1257    File,
1258}
1259
1260#[derive(Clone, Debug)]
1261struct PresentDirEntries {
1262    dirs: HashSet<String>,
1263    files: HashSet<String>,
1264}
1265
1266/// Helper to scan local-disk directories and files in parallel.
1267struct FileSnapshotter<'a> {
1268    tree_state: &'a TreeState,
1269    current_tree: &'a MergedTree,
1270    matcher: &'a dyn Matcher,
1271    start_tracking_matcher: &'a dyn Matcher,
1272    tree_entries_tx: Sender<(RepoPathBuf, MergedTreeValue)>,
1273    file_states_tx: Sender<(RepoPathBuf, FileState)>,
1274    untracked_paths_tx: Sender<(RepoPathBuf, UntrackedReason)>,
1275    deleted_files_tx: Sender<RepoPathBuf>,
1276    error: OnceLock<SnapshotError>,
1277    progress: Option<&'a SnapshotProgress<'a>>,
1278    max_new_file_size: u64,
1279}
1280
1281impl FileSnapshotter<'_> {
1282    fn spawn_ok<'scope, F>(&'scope self, scope: &rayon::Scope<'scope>, body: F)
1283    where
1284        F: FnOnce(&rayon::Scope<'scope>) -> Result<(), SnapshotError> + Send + 'scope,
1285    {
1286        scope.spawn(|scope| {
1287            if self.error.get().is_some() {
1288                return;
1289            }
1290            match body(scope) {
1291                Ok(()) => {}
1292                Err(err) => self.error.set(err).unwrap_or(()),
1293            };
1294        });
1295    }
1296
1297    /// Extracts the result of the snapshot.
1298    fn into_result(self) -> Result<(), SnapshotError> {
1299        match self.error.into_inner() {
1300            Some(err) => Err(err),
1301            None => Ok(()),
1302        }
1303    }
1304
1305    /// Visits the directory entries, spawns jobs to recurse into sub
1306    /// directories.
1307    fn visit_directory<'scope>(
1308        &'scope self,
1309        directory_to_visit: DirectoryToVisit<'scope>,
1310        scope: &rayon::Scope<'scope>,
1311    ) -> Result<(), SnapshotError> {
1312        let DirectoryToVisit {
1313            dir,
1314            disk_dir,
1315            git_ignore,
1316            file_states,
1317        } = directory_to_visit;
1318
1319        let git_ignore = git_ignore
1320            .chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"))?;
1321        let dir_entries: Vec<_> = disk_dir
1322            .read_dir()
1323            .and_then(|entries| entries.try_collect())
1324            .map_err(|err| SnapshotError::Other {
1325                message: format!("Failed to read directory {}", disk_dir.display()),
1326                err: err.into(),
1327            })?;
1328        let (dirs, files) = dir_entries
1329            .into_par_iter()
1330            // Don't split into too many small jobs. For a small directory,
1331            // sequential scan should be fast enough.
1332            .with_min_len(100)
1333            .filter_map(|entry| {
1334                self.process_dir_entry(&dir, &git_ignore, file_states, &entry, scope)
1335                    .transpose()
1336            })
1337            .map(|item| match item {
1338                Ok((PresentDirEntryKind::Dir, name)) => Ok(Either::Left(name)),
1339                Ok((PresentDirEntryKind::File, name)) => Ok(Either::Right(name)),
1340                Err(err) => Err(err),
1341            })
1342            .collect::<Result<_, _>>()?;
1343        let present_entries = PresentDirEntries { dirs, files };
1344        self.emit_deleted_files(&dir, file_states, &present_entries);
1345        Ok(())
1346    }
1347
1348    fn process_dir_entry<'scope>(
1349        &'scope self,
1350        dir: &RepoPath,
1351        git_ignore: &Arc<GitIgnoreFile>,
1352        file_states: FileStates<'scope>,
1353        entry: &DirEntry,
1354        scope: &rayon::Scope<'scope>,
1355    ) -> Result<Option<(PresentDirEntryKind, String)>, SnapshotError> {
1356        let file_type = entry.file_type().unwrap();
1357        let file_name = entry.file_name();
1358        let name_string = file_name
1359            .into_string()
1360            .map_err(|path| SnapshotError::InvalidUtf8Path { path })?;
1361
1362        if RESERVED_DIR_NAMES.contains(&name_string.as_str()) {
1363            return Ok(None);
1364        }
1365        let name = RepoPathComponent::new(&name_string).unwrap();
1366        let path = dir.join(name);
1367        let maybe_current_file_state = file_states.get_at(dir, name);
1368        if let Some(file_state) = &maybe_current_file_state
1369            && file_state.file_type == FileType::GitSubmodule
1370        {
1371            return Ok(None);
1372        }
1373
1374        if file_type.is_dir() {
1375            let file_states = file_states.prefixed_at(dir, name);
1376            if git_ignore.matches(&path.to_internal_dir_string()) {
1377                // If the whole directory is ignored by .gitignore, visit only
1378                // paths we're already tracking. This is because .gitignore in
1379                // ignored directory must be ignored. It's also more efficient.
1380                // start_tracking_matcher is NOT tested here because we need to
1381                // scan directory entries to report untracked paths.
1382                self.spawn_ok(scope, move |_| self.visit_tracked_files(file_states));
1383            } else if !self.matcher.visit(&path).is_nothing() {
1384                let directory_to_visit = DirectoryToVisit {
1385                    dir: path,
1386                    disk_dir: entry.path(),
1387                    git_ignore: git_ignore.clone(),
1388                    file_states,
1389                };
1390                self.spawn_ok(scope, |scope| {
1391                    self.visit_directory(directory_to_visit, scope)
1392                });
1393            }
1394            // Whether or not the directory path matches, any child file entries
1395            // shouldn't be touched within the current recursion step.
1396            Ok(Some((PresentDirEntryKind::Dir, name_string)))
1397        } else if self.matcher.matches(&path) {
1398            if let Some(progress) = self.progress {
1399                progress(&path);
1400            }
1401            if maybe_current_file_state.is_none()
1402                && git_ignore.matches(path.as_internal_file_string())
1403            {
1404                // If it wasn't already tracked and it matches
1405                // the ignored paths, then ignore it.
1406                Ok(None)
1407            } else if maybe_current_file_state.is_none()
1408                && !self.start_tracking_matcher.matches(&path)
1409            {
1410                // Leave the file untracked
1411                self.untracked_paths_tx
1412                    .send((path, UntrackedReason::FileNotAutoTracked))
1413                    .ok();
1414                Ok(None)
1415            } else {
1416                let metadata = entry.metadata().map_err(|err| SnapshotError::Other {
1417                    message: format!("Failed to stat file {}", entry.path().display()),
1418                    err: err.into(),
1419                })?;
1420                if maybe_current_file_state.is_none() && metadata.len() > self.max_new_file_size {
1421                    // Leave the large file untracked
1422                    let reason = UntrackedReason::FileTooLarge {
1423                        size: metadata.len(),
1424                        max_size: self.max_new_file_size,
1425                    };
1426                    self.untracked_paths_tx.send((path, reason)).ok();
1427                    Ok(None)
1428                } else if let Some(new_file_state) = file_state(&metadata) {
1429                    self.process_present_file(
1430                        path,
1431                        &entry.path(),
1432                        maybe_current_file_state.as_ref(),
1433                        new_file_state,
1434                    )?;
1435                    Ok(Some((PresentDirEntryKind::File, name_string)))
1436                } else {
1437                    // Special file is not considered present
1438                    Ok(None)
1439                }
1440            }
1441        } else {
1442            Ok(None)
1443        }
1444    }
1445
1446    /// Visits only paths we're already tracking.
1447    fn visit_tracked_files(&self, file_states: FileStates<'_>) -> Result<(), SnapshotError> {
1448        for (tracked_path, current_file_state) in file_states {
1449            if current_file_state.file_type == FileType::GitSubmodule {
1450                continue;
1451            }
1452            if !self.matcher.matches(tracked_path) {
1453                continue;
1454            }
1455            let disk_path = tracked_path.to_fs_path(&self.tree_state.working_copy_path)?;
1456            let metadata = match disk_path.symlink_metadata() {
1457                Ok(metadata) => Some(metadata),
1458                Err(err) if err.kind() == io::ErrorKind::NotFound => None,
1459                Err(err) => {
1460                    return Err(SnapshotError::Other {
1461                        message: format!("Failed to stat file {}", disk_path.display()),
1462                        err: err.into(),
1463                    });
1464                }
1465            };
1466            if let Some(new_file_state) = metadata.as_ref().and_then(file_state) {
1467                self.process_present_file(
1468                    tracked_path.to_owned(),
1469                    &disk_path,
1470                    Some(&current_file_state),
1471                    new_file_state,
1472                )?;
1473            } else {
1474                self.deleted_files_tx.send(tracked_path.to_owned()).ok();
1475            }
1476        }
1477        Ok(())
1478    }
1479
1480    fn process_present_file(
1481        &self,
1482        path: RepoPathBuf,
1483        disk_path: &Path,
1484        maybe_current_file_state: Option<&FileState>,
1485        mut new_file_state: FileState,
1486    ) -> Result<(), SnapshotError> {
1487        let update = self.get_updated_tree_value(
1488            &path,
1489            disk_path,
1490            maybe_current_file_state,
1491            &new_file_state,
1492        )?;
1493        // Preserve materialized conflict data for normal, non-resolved files
1494        if matches!(new_file_state.file_type, FileType::Normal { .. })
1495            && !update.as_ref().is_some_and(|update| update.is_resolved())
1496        {
1497            new_file_state.materialized_conflict_data =
1498                maybe_current_file_state.and_then(|state| state.materialized_conflict_data);
1499        }
1500        if let Some(tree_value) = update {
1501            self.tree_entries_tx.send((path.clone(), tree_value)).ok();
1502        }
1503        if Some(&new_file_state) != maybe_current_file_state {
1504            self.file_states_tx.send((path, new_file_state)).ok();
1505        }
1506        Ok(())
1507    }
1508
1509    /// Emits file paths that don't exist in the `present_entries`.
1510    fn emit_deleted_files(
1511        &self,
1512        dir: &RepoPath,
1513        file_states: FileStates<'_>,
1514        present_entries: &PresentDirEntries,
1515    ) {
1516        let file_state_chunks = file_states.iter().chunk_by(|(path, _state)| {
1517            // Extract <name> from <dir>, <dir>/<name>, or <dir>/<name>/**.
1518            // (file_states may contain <dir> file on file->dir transition.)
1519            debug_assert!(path.starts_with(dir));
1520            let slash = !dir.is_root() as usize;
1521            let len = dir.as_internal_file_string().len() + slash;
1522            let tail = path.as_internal_file_string().get(len..).unwrap_or("");
1523            match tail.split_once('/') {
1524                Some((name, _)) => (PresentDirEntryKind::Dir, name),
1525                None => (PresentDirEntryKind::File, tail),
1526            }
1527        });
1528        file_state_chunks
1529            .into_iter()
1530            .filter(|&((kind, name), _)| match kind {
1531                PresentDirEntryKind::Dir => !present_entries.dirs.contains(name),
1532                PresentDirEntryKind::File => !present_entries.files.contains(name),
1533            })
1534            .flat_map(|(_, chunk)| chunk)
1535            // Whether or not the entry exists, submodule should be ignored
1536            .filter(|(_, state)| state.file_type != FileType::GitSubmodule)
1537            .filter(|(path, _)| self.matcher.matches(path))
1538            .try_for_each(|(path, _)| self.deleted_files_tx.send(path.to_owned()))
1539            .ok();
1540    }
1541
1542    fn get_updated_tree_value(
1543        &self,
1544        repo_path: &RepoPath,
1545        disk_path: &Path,
1546        maybe_current_file_state: Option<&FileState>,
1547        new_file_state: &FileState,
1548    ) -> Result<Option<MergedTreeValue>, SnapshotError> {
1549        let clean = match maybe_current_file_state {
1550            None => {
1551                // untracked
1552                false
1553            }
1554            Some(current_file_state) => {
1555                // If the file's mtime was set at the same time as this state file's own mtime,
1556                // then we don't know if the file was modified before or after this state file.
1557                new_file_state.is_clean(current_file_state)
1558                    && current_file_state.mtime < self.tree_state.own_mtime
1559            }
1560        };
1561        if clean {
1562            Ok(None)
1563        } else {
1564            let current_tree_values = self.current_tree.path_value(repo_path)?;
1565            let new_file_type = if !self.tree_state.symlink_support {
1566                let mut new_file_type = new_file_state.file_type.clone();
1567                if matches!(new_file_type, FileType::Normal { .. })
1568                    && matches!(current_tree_values.as_normal(), Some(TreeValue::Symlink(_)))
1569                {
1570                    new_file_type = FileType::Symlink;
1571                }
1572                new_file_type
1573            } else {
1574                new_file_state.file_type.clone()
1575            };
1576            let new_tree_values = match new_file_type {
1577                FileType::Normal { executable } => self
1578                    .write_path_to_store(
1579                        repo_path,
1580                        disk_path,
1581                        &current_tree_values,
1582                        executable,
1583                        maybe_current_file_state.and_then(|state| state.materialized_conflict_data),
1584                    )
1585                    .block_on()?,
1586                FileType::Symlink => {
1587                    let id = self
1588                        .write_symlink_to_store(repo_path, disk_path)
1589                        .block_on()?;
1590                    Merge::normal(TreeValue::Symlink(id))
1591                }
1592                FileType::GitSubmodule => panic!("git submodule cannot be written to store"),
1593            };
1594            if new_tree_values != current_tree_values {
1595                Ok(Some(new_tree_values))
1596            } else {
1597                Ok(None)
1598            }
1599        }
1600    }
1601
1602    fn store(&self) -> &Store {
1603        &self.tree_state.store
1604    }
1605
1606    async fn write_path_to_store(
1607        &self,
1608        repo_path: &RepoPath,
1609        disk_path: &Path,
1610        current_tree_values: &MergedTreeValue,
1611        executable: FileExecutableFlag,
1612        materialized_conflict_data: Option<MaterializedConflictData>,
1613    ) -> Result<MergedTreeValue, SnapshotError> {
1614        if let Some(current_tree_value) = current_tree_values.as_resolved() {
1615            let id = self.write_file_to_store(repo_path, disk_path).await?;
1616            // On Windows, we preserve the executable bit from the current tree.
1617            let executable = executable.unwrap_or_else(|| {
1618                if let Some(TreeValue::File {
1619                    id: _,
1620                    executable,
1621                    copy_id: _,
1622                }) = current_tree_value
1623                {
1624                    *executable
1625                } else {
1626                    false
1627                }
1628            });
1629            // Preserve the copy id from the current tree
1630            let copy_id = {
1631                if let Some(TreeValue::File {
1632                    id: _,
1633                    executable: _,
1634                    copy_id,
1635                }) = current_tree_value
1636                {
1637                    copy_id.clone()
1638                } else {
1639                    CopyId::placeholder()
1640                }
1641            };
1642            Ok(Merge::normal(TreeValue::File {
1643                id,
1644                executable,
1645                copy_id,
1646            }))
1647        } else if let Some(old_file_ids) = current_tree_values.to_file_merge() {
1648            // Safe to unwrap because the copy id exists exactly on the file variant
1649            let copy_id_merge = current_tree_values.to_copy_id_merge().unwrap();
1650            let copy_id = copy_id_merge
1651                .resolve_trivial(SameChange::Accept)
1652                .cloned()
1653                .flatten()
1654                .unwrap_or_else(CopyId::placeholder);
1655            let mut contents = vec![];
1656            let file = File::open(disk_path).map_err(|err| SnapshotError::Other {
1657                message: format!("Failed to open file {}", disk_path.display()),
1658                err: err.into(),
1659            })?;
1660            self.tree_state
1661                .target_eol_strategy
1662                .convert_eol_for_snapshot(BlockingAsyncReader::new(file))
1663                .await
1664                .map_err(|err| SnapshotError::Other {
1665                    message: "Failed to convert the EOL".to_string(),
1666                    err: err.into(),
1667                })?
1668                .read_to_end(&mut contents)
1669                .await
1670                .map_err(|err| SnapshotError::Other {
1671                    message: "Failed to read the EOL converted contents".to_string(),
1672                    err: err.into(),
1673                })?;
1674            // If the file contained a conflict before and is a normal file on
1675            // disk, we try to parse any conflict markers in the file into a
1676            // conflict.
1677            let new_file_ids = conflicts::update_from_content(
1678                &old_file_ids,
1679                self.store(),
1680                repo_path,
1681                &contents,
1682                materialized_conflict_data.map_or(MIN_CONFLICT_MARKER_LEN, |data| {
1683                    data.conflict_marker_len as usize
1684                }),
1685            )
1686            .await?;
1687            match new_file_ids.into_resolved() {
1688                Ok(file_id) => {
1689                    // On Windows, we preserve the executable bit from the merged trees.
1690                    let executable = executable.unwrap_or_else(|| {
1691                        if let Some(merge) = current_tree_values.to_executable_merge() {
1692                            conflicts::resolve_file_executable(&merge).unwrap_or(false)
1693                        } else {
1694                            false
1695                        }
1696                    });
1697                    Ok(Merge::normal(TreeValue::File {
1698                        id: file_id.unwrap(),
1699                        executable,
1700                        copy_id,
1701                    }))
1702                }
1703                Err(new_file_ids) => {
1704                    if new_file_ids != old_file_ids {
1705                        Ok(current_tree_values.with_new_file_ids(&new_file_ids))
1706                    } else {
1707                        Ok(current_tree_values.clone())
1708                    }
1709                }
1710            }
1711        } else {
1712            Ok(current_tree_values.clone())
1713        }
1714    }
1715
1716    async fn write_file_to_store(
1717        &self,
1718        path: &RepoPath,
1719        disk_path: &Path,
1720    ) -> Result<FileId, SnapshotError> {
1721        let file = File::open(disk_path).map_err(|err| SnapshotError::Other {
1722            message: format!("Failed to open file {}", disk_path.display()),
1723            err: err.into(),
1724        })?;
1725        let mut contents = self
1726            .tree_state
1727            .target_eol_strategy
1728            .convert_eol_for_snapshot(BlockingAsyncReader::new(file))
1729            .await
1730            .map_err(|err| SnapshotError::Other {
1731                message: "Failed to convert the EOL".to_string(),
1732                err: err.into(),
1733            })?;
1734        Ok(self.store().write_file(path, &mut contents).await?)
1735    }
1736
1737    async fn write_symlink_to_store(
1738        &self,
1739        path: &RepoPath,
1740        disk_path: &Path,
1741    ) -> Result<SymlinkId, SnapshotError> {
1742        if self.tree_state.symlink_support {
1743            let target = disk_path.read_link().map_err(|err| SnapshotError::Other {
1744                message: format!("Failed to read symlink {}", disk_path.display()),
1745                err: err.into(),
1746            })?;
1747            let str_target =
1748                target
1749                    .to_str()
1750                    .ok_or_else(|| SnapshotError::InvalidUtf8SymlinkTarget {
1751                        path: disk_path.to_path_buf(),
1752                    })?;
1753            Ok(self.store().write_symlink(path, str_target).await?)
1754        } else {
1755            let target = fs::read(disk_path).map_err(|err| SnapshotError::Other {
1756                message: format!("Failed to read file {}", disk_path.display()),
1757                err: err.into(),
1758            })?;
1759            let string_target =
1760                String::from_utf8(target).map_err(|_| SnapshotError::InvalidUtf8SymlinkTarget {
1761                    path: disk_path.to_path_buf(),
1762                })?;
1763            Ok(self.store().write_symlink(path, &string_target).await?)
1764        }
1765    }
1766}
1767
1768/// Functions to update local-disk files from the store.
1769impl TreeState {
1770    async fn write_file(
1771        &self,
1772        disk_path: &Path,
1773        contents: impl AsyncRead + Send + Unpin,
1774        executable: bool,
1775        apply_eol_conversion: bool,
1776    ) -> Result<FileState, CheckoutError> {
1777        let mut file = File::options()
1778            .write(true)
1779            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
1780            .open(disk_path)
1781            .map_err(|err| CheckoutError::Other {
1782                message: format!("Failed to open file {} for writing", disk_path.display()),
1783                err: err.into(),
1784            })?;
1785        let contents = if apply_eol_conversion {
1786            self.target_eol_strategy
1787                .convert_eol_for_update(contents)
1788                .await
1789                .map_err(|err| CheckoutError::Other {
1790                    message: "Failed to convert the EOL for the content".to_string(),
1791                    err: err.into(),
1792                })?
1793        } else {
1794            Box::new(contents)
1795        };
1796        let size = copy_async_to_sync(contents, &mut file)
1797            .await
1798            .map_err(|err| CheckoutError::Other {
1799                message: format!(
1800                    "Failed to write the content to the file {}",
1801                    disk_path.display()
1802                ),
1803                err: err.into(),
1804            })?;
1805        self.set_executable(disk_path, executable)?;
1806        // Read the file state from the file descriptor. That way, know that the file
1807        // exists and is of the expected type, and the stat information is most likely
1808        // accurate, except for other processes modifying the file concurrently (The
1809        // mtime is set at write time and won't change when we close the file.)
1810        let metadata = file
1811            .metadata()
1812            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1813        Ok(FileState::for_file(executable, size as u64, &metadata))
1814    }
1815
1816    fn write_symlink(&self, disk_path: &Path, target: String) -> Result<FileState, CheckoutError> {
1817        let target = PathBuf::from(&target);
1818        try_symlink(&target, disk_path).map_err(|err| CheckoutError::Other {
1819            message: format!(
1820                "Failed to create symlink from {} to {}",
1821                disk_path.display(),
1822                target.display()
1823            ),
1824            err: err.into(),
1825        })?;
1826        let metadata = disk_path
1827            .symlink_metadata()
1828            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1829        Ok(FileState::for_symlink(&metadata))
1830    }
1831
1832    async fn write_conflict(
1833        &self,
1834        disk_path: &Path,
1835        contents: &[u8],
1836        executable: bool,
1837    ) -> Result<FileState, CheckoutError> {
1838        let contents = self
1839            .target_eol_strategy
1840            .convert_eol_for_update(contents)
1841            .await
1842            .map_err(|err| CheckoutError::Other {
1843                message: "Failed to convert the EOL when writing a merge conflict".to_string(),
1844                err: err.into(),
1845            })?;
1846        let mut file = OpenOptions::new()
1847            .write(true)
1848            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
1849            .open(disk_path)
1850            .map_err(|err| CheckoutError::Other {
1851                message: format!("Failed to open file {} for writing", disk_path.display()),
1852                err: err.into(),
1853            })?;
1854        let size = copy_async_to_sync(contents, &mut file)
1855            .await
1856            .map_err(|err| CheckoutError::Other {
1857                message: format!("Failed to write conflict to file {}", disk_path.display()),
1858                err: err.into(),
1859            })? as u64;
1860        self.set_executable(disk_path, executable)?;
1861        let metadata = file
1862            .metadata()
1863            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1864        Ok(FileState::for_file(executable, size, &metadata))
1865    }
1866
1867    #[cfg_attr(windows, expect(unused_variables))]
1868    fn set_executable(&self, disk_path: &Path, executable: bool) -> Result<(), CheckoutError> {
1869        #[cfg(unix)]
1870        {
1871            let mode = if executable { 0o755 } else { 0o644 };
1872            fs::set_permissions(disk_path, fs::Permissions::from_mode(mode))
1873                .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1874        }
1875        Ok(())
1876    }
1877
1878    pub fn check_out(&mut self, new_tree: &MergedTree) -> Result<CheckoutStats, CheckoutError> {
1879        let old_tree = self.current_tree().map_err(|err| match err {
1880            err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound {
1881                source: Box::new(err),
1882            },
1883            other => CheckoutError::InternalBackendError(other),
1884        })?;
1885        let stats = self
1886            .update(&old_tree, new_tree, self.sparse_matcher().as_ref())
1887            .block_on()?;
1888        self.tree_id = new_tree.id();
1889        Ok(stats)
1890    }
1891
1892    pub fn set_sparse_patterns(
1893        &mut self,
1894        sparse_patterns: Vec<RepoPathBuf>,
1895    ) -> Result<CheckoutStats, CheckoutError> {
1896        let tree = self.current_tree().map_err(|err| match err {
1897            err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound {
1898                source: Box::new(err),
1899            },
1900            other => CheckoutError::InternalBackendError(other),
1901        })?;
1902        let old_matcher = PrefixMatcher::new(&self.sparse_patterns);
1903        let new_matcher = PrefixMatcher::new(&sparse_patterns);
1904        let added_matcher = DifferenceMatcher::new(&new_matcher, &old_matcher);
1905        let removed_matcher = DifferenceMatcher::new(&old_matcher, &new_matcher);
1906        let empty_tree = MergedTree::resolved(Tree::empty(self.store.clone(), RepoPathBuf::root()));
1907        let added_stats = self.update(&empty_tree, &tree, &added_matcher).block_on()?;
1908        let removed_stats = self
1909            .update(&tree, &empty_tree, &removed_matcher)
1910            .block_on()?;
1911        self.sparse_patterns = sparse_patterns;
1912        assert_eq!(added_stats.updated_files, 0);
1913        assert_eq!(added_stats.removed_files, 0);
1914        assert_eq!(removed_stats.updated_files, 0);
1915        assert_eq!(removed_stats.added_files, 0);
1916        assert_eq!(removed_stats.skipped_files, 0);
1917        Ok(CheckoutStats {
1918            updated_files: 0,
1919            added_files: added_stats.added_files,
1920            removed_files: removed_stats.removed_files,
1921            skipped_files: added_stats.skipped_files,
1922        })
1923    }
1924
1925    async fn update(
1926        &mut self,
1927        old_tree: &MergedTree,
1928        new_tree: &MergedTree,
1929        matcher: &dyn Matcher,
1930    ) -> Result<CheckoutStats, CheckoutError> {
1931        // TODO: maybe it's better not include the skipped counts in the "intended"
1932        // counts
1933        let mut stats = CheckoutStats {
1934            updated_files: 0,
1935            added_files: 0,
1936            removed_files: 0,
1937            skipped_files: 0,
1938        };
1939        let mut changed_file_states = Vec::new();
1940        let mut deleted_files = HashSet::new();
1941        let mut diff_stream = old_tree
1942            .diff_stream_for_file_system(new_tree, matcher)
1943            .map(async |TreeDiffEntry { path, values }| match values {
1944                Ok(diff) => {
1945                    let result = materialize_tree_value(&self.store, &path, diff.after).await;
1946                    (path, result.map(|value| (diff.before, value)))
1947                }
1948                Err(err) => (path, Err(err)),
1949            })
1950            .buffered(self.store.concurrency().max(1));
1951        while let Some((path, data)) = diff_stream.next().await {
1952            let (before, after) = data?;
1953            if after.is_absent() {
1954                stats.removed_files += 1;
1955            } else if before.is_absent() {
1956                stats.added_files += 1;
1957            } else {
1958                stats.updated_files += 1;
1959            }
1960
1961            // Existing Git submodule can be a non-empty directory on disk. We
1962            // shouldn't attempt to manage it as a tracked path.
1963            //
1964            // TODO: It might be better to add general support for paths not
1965            // tracked by jj than processing submodules specially. For example,
1966            // paths excluded by .gitignore can be marked as such so that
1967            // newly-"unignored" paths won't be snapshotted automatically.
1968            if matches!(before.as_normal(), Some(TreeValue::GitSubmodule(_)))
1969                && matches!(after, MaterializedTreeValue::GitSubmodule(_))
1970            {
1971                eprintln!("ignoring git submodule at {path:?}");
1972                // Not updating the file state as if there were no diffs. Leave
1973                // the state type as FileType::GitSubmodule if it was before.
1974                continue;
1975            }
1976
1977            // Create parent directories no matter if after.is_present(). This
1978            // ensures that the path never traverses symlinks.
1979            let Some(disk_path) = create_parent_dirs(&self.working_copy_path, &path)? else {
1980                changed_file_states.push((path, FileState::placeholder()));
1981                stats.skipped_files += 1;
1982                continue;
1983            };
1984            // If the path was present, check reserved path first and delete it.
1985            let present_file_deleted = before.is_present() && remove_old_file(&disk_path)?;
1986            // If not, create temporary file to test the path validity.
1987            if !present_file_deleted && !can_create_new_file(&disk_path)? {
1988                changed_file_states.push((path, FileState::placeholder()));
1989                stats.skipped_files += 1;
1990                continue;
1991            }
1992
1993            // TODO: Check that the file has not changed before overwriting/removing it.
1994            let file_state = match after {
1995                MaterializedTreeValue::Absent | MaterializedTreeValue::AccessDenied(_) => {
1996                    let mut parent_dir = disk_path.parent().unwrap();
1997                    loop {
1998                        if fs::remove_dir(parent_dir).is_err() {
1999                            break;
2000                        }
2001                        parent_dir = parent_dir.parent().unwrap();
2002                    }
2003                    deleted_files.insert(path);
2004                    continue;
2005                }
2006                MaterializedTreeValue::File(file) => {
2007                    self.write_file(&disk_path, file.reader, file.executable, true)
2008                        .await?
2009                }
2010                MaterializedTreeValue::Symlink { id: _, target } => {
2011                    if self.symlink_support {
2012                        self.write_symlink(&disk_path, target)?
2013                    } else {
2014                        self.write_file(&disk_path, target.as_bytes(), false, false)
2015                            .await?
2016                    }
2017                }
2018                MaterializedTreeValue::GitSubmodule(_) => {
2019                    eprintln!("ignoring git submodule at {path:?}");
2020                    FileState::for_gitsubmodule()
2021                }
2022                MaterializedTreeValue::Tree(_) => {
2023                    panic!("unexpected tree entry in diff at {path:?}");
2024                }
2025                MaterializedTreeValue::FileConflict(file) => {
2026                    let conflict_marker_len =
2027                        choose_materialized_conflict_marker_len(&file.contents);
2028                    let options = ConflictMaterializeOptions {
2029                        marker_style: self.conflict_marker_style,
2030                        marker_len: Some(conflict_marker_len),
2031                        merge: self.store.merge_options().clone(),
2032                    };
2033                    let contents = materialize_merge_result_to_bytes(&file.contents, &options);
2034                    let mut file_state = self
2035                        .write_conflict(&disk_path, &contents, file.executable.unwrap_or(false))
2036                        .await?;
2037                    file_state.materialized_conflict_data = Some(MaterializedConflictData {
2038                        conflict_marker_len: conflict_marker_len.try_into().unwrap_or(u32::MAX),
2039                    });
2040                    file_state
2041                }
2042                MaterializedTreeValue::OtherConflict { id } => {
2043                    // Unless all terms are regular files, we can't do much
2044                    // better than trying to describe the merge.
2045                    let contents = id.describe();
2046                    let executable = false;
2047                    self.write_conflict(&disk_path, contents.as_bytes(), executable)
2048                        .await?
2049                }
2050            };
2051            changed_file_states.push((path, file_state));
2052        }
2053        self.file_states
2054            .merge_in(changed_file_states, &deleted_files);
2055        Ok(stats)
2056    }
2057
2058    pub async fn reset(&mut self, new_tree: &MergedTree) -> Result<(), ResetError> {
2059        let old_tree = self.current_tree().map_err(|err| match err {
2060            err @ BackendError::ObjectNotFound { .. } => ResetError::SourceNotFound {
2061                source: Box::new(err),
2062            },
2063            other => ResetError::InternalBackendError(other),
2064        })?;
2065
2066        let matcher = self.sparse_matcher();
2067        let mut changed_file_states = Vec::new();
2068        let mut deleted_files = HashSet::new();
2069        let mut diff_stream = old_tree.diff_stream_for_file_system(new_tree, matcher.as_ref());
2070        while let Some(TreeDiffEntry { path, values }) = diff_stream.next().await {
2071            let after = values?.after;
2072            if after.is_absent() {
2073                deleted_files.insert(path);
2074            } else {
2075                let file_type = match after.into_resolved() {
2076                    Ok(value) => match value.unwrap() {
2077                        TreeValue::File {
2078                            id: _,
2079                            executable,
2080                            copy_id: _,
2081                        } => FileType::Normal {
2082                            executable: FileExecutableFlag::from_bool_lossy(executable),
2083                        },
2084                        TreeValue::Symlink(_id) => FileType::Symlink,
2085                        TreeValue::GitSubmodule(_id) => {
2086                            eprintln!("ignoring git submodule at {path:?}");
2087                            FileType::GitSubmodule
2088                        }
2089                        TreeValue::Tree(_id) => {
2090                            panic!("unexpected tree entry in diff at {path:?}");
2091                        }
2092                    },
2093                    Err(_values) => {
2094                        // TODO: Try to set the executable bit based on the conflict
2095                        FileType::Normal {
2096                            executable: FileExecutableFlag::from_bool_lossy(false),
2097                        }
2098                    }
2099                };
2100                let file_state = FileState {
2101                    file_type,
2102                    mtime: MillisSinceEpoch(0),
2103                    size: 0,
2104                    materialized_conflict_data: None,
2105                };
2106                changed_file_states.push((path, file_state));
2107            }
2108        }
2109        self.file_states
2110            .merge_in(changed_file_states, &deleted_files);
2111        self.tree_id = new_tree.id();
2112        Ok(())
2113    }
2114
2115    pub async fn recover(&mut self, new_tree: &MergedTree) -> Result<(), ResetError> {
2116        self.file_states.clear();
2117        self.tree_id = self.store.empty_merged_tree_id();
2118        self.reset(new_tree).await
2119    }
2120}
2121
2122fn checkout_error_for_stat_error(err: io::Error, path: &Path) -> CheckoutError {
2123    CheckoutError::Other {
2124        message: format!("Failed to stat file {}", path.display()),
2125        err: err.into(),
2126    }
2127}
2128
2129/// Working copy state stored in "checkout" file.
2130#[derive(Clone, Debug)]
2131struct CheckoutState {
2132    operation_id: OperationId,
2133    workspace_name: WorkspaceNameBuf,
2134}
2135
2136impl CheckoutState {
2137    fn load(state_path: &Path) -> Result<Self, WorkingCopyStateError> {
2138        let wrap_err = |err| WorkingCopyStateError {
2139            message: "Failed to read checkout state".to_owned(),
2140            err,
2141        };
2142        let buf = fs::read(state_path.join("checkout")).map_err(|err| wrap_err(err.into()))?;
2143        let proto = crate::protos::local_working_copy::Checkout::decode(&*buf)
2144            .map_err(|err| wrap_err(err.into()))?;
2145        Ok(Self {
2146            operation_id: OperationId::new(proto.operation_id),
2147            workspace_name: if proto.workspace_name.is_empty() {
2148                // For compatibility with old working copies.
2149                // TODO: Delete in mid 2022 or so
2150                WorkspaceName::DEFAULT.to_owned()
2151            } else {
2152                proto.workspace_name.into()
2153            },
2154        })
2155    }
2156
2157    #[instrument(skip_all)]
2158    fn save(&self, state_path: &Path) -> Result<(), WorkingCopyStateError> {
2159        let wrap_err = |err| WorkingCopyStateError {
2160            message: "Failed to write checkout state".to_owned(),
2161            err,
2162        };
2163        let proto = crate::protos::local_working_copy::Checkout {
2164            operation_id: self.operation_id.to_bytes(),
2165            workspace_name: (*self.workspace_name).into(),
2166        };
2167        let mut temp_file =
2168            NamedTempFile::new_in(state_path).map_err(|err| wrap_err(err.into()))?;
2169        temp_file
2170            .as_file_mut()
2171            .write_all(&proto.encode_to_vec())
2172            .map_err(|err| wrap_err(err.into()))?;
2173        // TODO: Retry if persisting fails (it will on Windows if the file happened to
2174        // be open for read).
2175        persist_temp_file(temp_file, state_path.join("checkout"))
2176            .map_err(|err| wrap_err(err.into()))?;
2177        Ok(())
2178    }
2179}
2180
2181pub struct LocalWorkingCopy {
2182    store: Arc<Store>,
2183    working_copy_path: PathBuf,
2184    state_path: PathBuf,
2185    checkout_state: CheckoutState,
2186    tree_state: OnceCell<TreeState>,
2187    tree_state_settings: TreeStateSettings,
2188}
2189
2190impl WorkingCopy for LocalWorkingCopy {
2191    fn name(&self) -> &str {
2192        Self::name()
2193    }
2194
2195    fn workspace_name(&self) -> &WorkspaceName {
2196        &self.checkout_state.workspace_name
2197    }
2198
2199    fn operation_id(&self) -> &OperationId {
2200        &self.checkout_state.operation_id
2201    }
2202
2203    fn tree_id(&self) -> Result<&MergedTreeId, WorkingCopyStateError> {
2204        Ok(self.tree_state()?.current_tree_id())
2205    }
2206
2207    fn sparse_patterns(&self) -> Result<&[RepoPathBuf], WorkingCopyStateError> {
2208        Ok(self.tree_state()?.sparse_patterns())
2209    }
2210
2211    fn start_mutation(&self) -> Result<Box<dyn LockedWorkingCopy>, WorkingCopyStateError> {
2212        let lock_path = self.state_path.join("working_copy.lock");
2213        let lock = FileLock::lock(lock_path).map_err(|err| WorkingCopyStateError {
2214            message: "Failed to lock working copy".to_owned(),
2215            err: err.into(),
2216        })?;
2217
2218        let wc = Self {
2219            store: self.store.clone(),
2220            working_copy_path: self.working_copy_path.clone(),
2221            state_path: self.state_path.clone(),
2222            // Re-read the state after taking the lock
2223            checkout_state: CheckoutState::load(&self.state_path)?,
2224            // Empty so we re-read the state after taking the lock
2225            // TODO: It's expensive to reload the whole tree. We should copy it from `self` if it
2226            // hasn't changed.
2227            tree_state: OnceCell::new(),
2228            tree_state_settings: self.tree_state_settings.clone(),
2229        };
2230        let old_operation_id = wc.operation_id().clone();
2231        let old_tree_id = wc.tree_id()?.clone();
2232        Ok(Box::new(LockedLocalWorkingCopy {
2233            wc,
2234            old_operation_id,
2235            old_tree_id,
2236            tree_state_dirty: false,
2237            new_workspace_name: None,
2238            _lock: lock,
2239        }))
2240    }
2241}
2242
2243impl LocalWorkingCopy {
2244    pub fn name() -> &'static str {
2245        "local"
2246    }
2247
2248    /// Initializes a new working copy at `working_copy_path`. The working
2249    /// copy's state will be stored in the `state_path` directory. The working
2250    /// copy will have the empty tree checked out.
2251    pub fn init(
2252        store: Arc<Store>,
2253        working_copy_path: PathBuf,
2254        state_path: PathBuf,
2255        operation_id: OperationId,
2256        workspace_name: WorkspaceNameBuf,
2257        user_settings: &UserSettings,
2258    ) -> Result<Self, WorkingCopyStateError> {
2259        let checkout_state = CheckoutState {
2260            operation_id,
2261            workspace_name,
2262        };
2263        checkout_state.save(&state_path)?;
2264        let tree_state_settings = TreeStateSettings::try_from_user_settings(user_settings)
2265            .map_err(|err| WorkingCopyStateError {
2266                message: "Failed to read the tree state settings".to_string(),
2267                err: err.into(),
2268            })?;
2269        let tree_state = TreeState::init(
2270            store.clone(),
2271            working_copy_path.clone(),
2272            state_path.clone(),
2273            &tree_state_settings,
2274        )
2275        .map_err(|err| WorkingCopyStateError {
2276            message: "Failed to initialize working copy state".to_string(),
2277            err: err.into(),
2278        })?;
2279        Ok(Self {
2280            store,
2281            working_copy_path,
2282            state_path,
2283            checkout_state,
2284            tree_state: OnceCell::with_value(tree_state),
2285            tree_state_settings,
2286        })
2287    }
2288
2289    pub fn load(
2290        store: Arc<Store>,
2291        working_copy_path: PathBuf,
2292        state_path: PathBuf,
2293        user_settings: &UserSettings,
2294    ) -> Result<Self, WorkingCopyStateError> {
2295        let checkout_state = CheckoutState::load(&state_path)?;
2296        let tree_state_settings = TreeStateSettings::try_from_user_settings(user_settings)
2297            .map_err(|err| WorkingCopyStateError {
2298                message: "Failed to read the tree state settings".to_string(),
2299                err: err.into(),
2300            })?;
2301        Ok(Self {
2302            store,
2303            working_copy_path,
2304            state_path,
2305            checkout_state,
2306            tree_state: OnceCell::new(),
2307            tree_state_settings,
2308        })
2309    }
2310
2311    pub fn state_path(&self) -> &Path {
2312        &self.state_path
2313    }
2314
2315    #[instrument(skip_all)]
2316    fn tree_state(&self) -> Result<&TreeState, WorkingCopyStateError> {
2317        self.tree_state.get_or_try_init(|| {
2318            TreeState::load(
2319                self.store.clone(),
2320                self.working_copy_path.clone(),
2321                self.state_path.clone(),
2322                &self.tree_state_settings,
2323            )
2324            .map_err(|err| WorkingCopyStateError {
2325                message: "Failed to read working copy state".to_string(),
2326                err: err.into(),
2327            })
2328        })
2329    }
2330
2331    fn tree_state_mut(&mut self) -> Result<&mut TreeState, WorkingCopyStateError> {
2332        self.tree_state()?; // ensure loaded
2333        Ok(self.tree_state.get_mut().unwrap())
2334    }
2335
2336    pub fn file_states(&self) -> Result<FileStates<'_>, WorkingCopyStateError> {
2337        Ok(self.tree_state()?.file_states())
2338    }
2339
2340    #[cfg(feature = "watchman")]
2341    pub fn query_watchman(
2342        &self,
2343        config: &WatchmanConfig,
2344    ) -> Result<(watchman::Clock, Option<Vec<PathBuf>>), WorkingCopyStateError> {
2345        self.tree_state()?
2346            .query_watchman(config)
2347            .map_err(|err| WorkingCopyStateError {
2348                message: "Failed to query watchman".to_string(),
2349                err: err.into(),
2350            })
2351    }
2352
2353    #[cfg(feature = "watchman")]
2354    pub fn is_watchman_trigger_registered(
2355        &self,
2356        config: &WatchmanConfig,
2357    ) -> Result<bool, WorkingCopyStateError> {
2358        self.tree_state()?
2359            .is_watchman_trigger_registered(config)
2360            .map_err(|err| WorkingCopyStateError {
2361                message: "Failed to query watchman".to_string(),
2362                err: err.into(),
2363            })
2364    }
2365}
2366
2367pub struct LocalWorkingCopyFactory {}
2368
2369impl WorkingCopyFactory for LocalWorkingCopyFactory {
2370    fn init_working_copy(
2371        &self,
2372        store: Arc<Store>,
2373        working_copy_path: PathBuf,
2374        state_path: PathBuf,
2375        operation_id: OperationId,
2376        workspace_name: WorkspaceNameBuf,
2377        settings: &UserSettings,
2378    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2379        Ok(Box::new(LocalWorkingCopy::init(
2380            store,
2381            working_copy_path,
2382            state_path,
2383            operation_id,
2384            workspace_name,
2385            settings,
2386        )?))
2387    }
2388
2389    fn load_working_copy(
2390        &self,
2391        store: Arc<Store>,
2392        working_copy_path: PathBuf,
2393        state_path: PathBuf,
2394        settings: &UserSettings,
2395    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2396        Ok(Box::new(LocalWorkingCopy::load(
2397            store,
2398            working_copy_path,
2399            state_path,
2400            settings,
2401        )?))
2402    }
2403}
2404
2405/// A working copy that's locked on disk. The lock is held until you call
2406/// `finish()` or `discard()`.
2407pub struct LockedLocalWorkingCopy {
2408    wc: LocalWorkingCopy,
2409    old_operation_id: OperationId,
2410    old_tree_id: MergedTreeId,
2411    tree_state_dirty: bool,
2412    new_workspace_name: Option<WorkspaceNameBuf>,
2413    _lock: FileLock,
2414}
2415
2416#[async_trait]
2417impl LockedWorkingCopy for LockedLocalWorkingCopy {
2418    fn old_operation_id(&self) -> &OperationId {
2419        &self.old_operation_id
2420    }
2421
2422    fn old_tree_id(&self) -> &MergedTreeId {
2423        &self.old_tree_id
2424    }
2425
2426    async fn snapshot(
2427        &mut self,
2428        options: &SnapshotOptions,
2429    ) -> Result<(MergedTreeId, SnapshotStats), SnapshotError> {
2430        let tree_state = self.wc.tree_state_mut()?;
2431        let (is_dirty, stats) = tree_state.snapshot(options)?;
2432        self.tree_state_dirty |= is_dirty;
2433        Ok((tree_state.current_tree_id().clone(), stats))
2434    }
2435
2436    async fn check_out(&mut self, commit: &Commit) -> Result<CheckoutStats, CheckoutError> {
2437        // TODO: Write a "pending_checkout" file with the new TreeId so we can
2438        // continue an interrupted update if we find such a file.
2439        let new_tree = commit.tree()?;
2440        let tree_state = self.wc.tree_state_mut()?;
2441        if tree_state.tree_id != *commit.tree_id() {
2442            let stats = tree_state.check_out(&new_tree)?;
2443            self.tree_state_dirty = true;
2444            Ok(stats)
2445        } else {
2446            Ok(CheckoutStats::default())
2447        }
2448    }
2449
2450    fn rename_workspace(&mut self, new_name: WorkspaceNameBuf) {
2451        self.new_workspace_name = Some(new_name);
2452    }
2453
2454    async fn reset(&mut self, commit: &Commit) -> Result<(), ResetError> {
2455        let new_tree = commit.tree()?;
2456        self.wc.tree_state_mut()?.reset(&new_tree).await?;
2457        self.tree_state_dirty = true;
2458        Ok(())
2459    }
2460
2461    async fn recover(&mut self, commit: &Commit) -> Result<(), ResetError> {
2462        let new_tree = commit.tree()?;
2463        self.wc.tree_state_mut()?.recover(&new_tree).await?;
2464        self.tree_state_dirty = true;
2465        Ok(())
2466    }
2467
2468    fn sparse_patterns(&self) -> Result<&[RepoPathBuf], WorkingCopyStateError> {
2469        self.wc.sparse_patterns()
2470    }
2471
2472    async fn set_sparse_patterns(
2473        &mut self,
2474        new_sparse_patterns: Vec<RepoPathBuf>,
2475    ) -> Result<CheckoutStats, CheckoutError> {
2476        // TODO: Write a "pending_checkout" file with new sparse patterns so we can
2477        // continue an interrupted update if we find such a file.
2478        let stats = self
2479            .wc
2480            .tree_state_mut()?
2481            .set_sparse_patterns(new_sparse_patterns)?;
2482        self.tree_state_dirty = true;
2483        Ok(stats)
2484    }
2485
2486    #[instrument(skip_all)]
2487    async fn finish(
2488        mut self: Box<Self>,
2489        operation_id: OperationId,
2490    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2491        assert!(self.tree_state_dirty || &self.old_tree_id == self.wc.tree_id()?);
2492        if self.tree_state_dirty {
2493            self.wc
2494                .tree_state_mut()?
2495                .save()
2496                .map_err(|err| WorkingCopyStateError {
2497                    message: "Failed to write working copy state".to_string(),
2498                    err: Box::new(err),
2499                })?;
2500        }
2501        if self.old_operation_id != operation_id || self.new_workspace_name.is_some() {
2502            self.wc.checkout_state.operation_id = operation_id;
2503            if let Some(workspace_name) = self.new_workspace_name {
2504                self.wc.checkout_state.workspace_name = workspace_name;
2505            }
2506            self.wc.checkout_state.save(&self.wc.state_path)?;
2507        }
2508        // TODO: Clear the "pending_checkout" file here.
2509        Ok(Box::new(self.wc))
2510    }
2511}
2512
2513impl LockedLocalWorkingCopy {
2514    pub fn reset_watchman(&mut self) -> Result<(), SnapshotError> {
2515        self.wc.tree_state_mut()?.reset_watchman();
2516        self.tree_state_dirty = true;
2517        Ok(())
2518    }
2519}
2520
2521#[cfg(test)]
2522mod tests {
2523    use maplit::hashset;
2524
2525    use super::*;
2526
2527    fn repo_path(value: &str) -> &RepoPath {
2528        RepoPath::from_internal_string(value).unwrap()
2529    }
2530
2531    fn repo_path_component(value: &str) -> &RepoPathComponent {
2532        RepoPathComponent::new(value).unwrap()
2533    }
2534
2535    fn new_state(size: u64) -> FileState {
2536        FileState {
2537            file_type: FileType::Normal {
2538                executable: FileExecutableFlag::from_bool_lossy(false),
2539            },
2540            mtime: MillisSinceEpoch(0),
2541            size,
2542            materialized_conflict_data: None,
2543        }
2544    }
2545
2546    #[test]
2547    fn test_file_states_merge() {
2548        let new_static_entry = |path: &'static str, size| (repo_path(path), new_state(size));
2549        let new_owned_entry = |path: &str, size| (repo_path(path).to_owned(), new_state(size));
2550        let new_proto_entry = |path: &str, size| {
2551            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2552        };
2553        let data = vec![
2554            new_proto_entry("aa", 0),
2555            new_proto_entry("b#", 4), // '#' < '/'
2556            new_proto_entry("b/c", 1),
2557            new_proto_entry("b/d/e", 2),
2558            new_proto_entry("b/e", 3),
2559            new_proto_entry("bc", 5),
2560        ];
2561        let mut file_states = FileStatesMap::from_proto(data, false);
2562
2563        let changed_file_states = vec![
2564            new_owned_entry("aa", 10),    // change
2565            new_owned_entry("b/d/f", 11), // add
2566            new_owned_entry("b/e", 12),   // change
2567            new_owned_entry("c", 13),     // add
2568        ];
2569        let deleted_files = hashset! {
2570            repo_path("b/c").to_owned(),
2571            repo_path("b#").to_owned(),
2572        };
2573        file_states.merge_in(changed_file_states, &deleted_files);
2574        assert_eq!(
2575            file_states.all().iter().collect_vec(),
2576            vec![
2577                new_static_entry("aa", 10),
2578                new_static_entry("b/d/e", 2),
2579                new_static_entry("b/d/f", 11),
2580                new_static_entry("b/e", 12),
2581                new_static_entry("bc", 5),
2582                new_static_entry("c", 13),
2583            ],
2584        );
2585    }
2586
2587    #[test]
2588    fn test_file_states_lookup() {
2589        let new_proto_entry = |path: &str, size| {
2590            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2591        };
2592        let data = vec![
2593            new_proto_entry("aa", 0),
2594            new_proto_entry("b/c", 1),
2595            new_proto_entry("b/d/e", 2),
2596            new_proto_entry("b/e", 3),
2597            new_proto_entry("b#", 4), // '#' < '/'
2598            new_proto_entry("bc", 5),
2599        ];
2600        let file_states = FileStates::from_sorted(&data);
2601
2602        assert_eq!(
2603            file_states.prefixed(repo_path("")).paths().collect_vec(),
2604            ["aa", "b/c", "b/d/e", "b/e", "b#", "bc"].map(repo_path)
2605        );
2606        assert!(file_states.prefixed(repo_path("a")).is_empty());
2607        assert_eq!(
2608            file_states.prefixed(repo_path("aa")).paths().collect_vec(),
2609            ["aa"].map(repo_path)
2610        );
2611        assert_eq!(
2612            file_states.prefixed(repo_path("b")).paths().collect_vec(),
2613            ["b/c", "b/d/e", "b/e"].map(repo_path)
2614        );
2615        assert_eq!(
2616            file_states.prefixed(repo_path("b/d")).paths().collect_vec(),
2617            ["b/d/e"].map(repo_path)
2618        );
2619        assert_eq!(
2620            file_states.prefixed(repo_path("b#")).paths().collect_vec(),
2621            ["b#"].map(repo_path)
2622        );
2623        assert_eq!(
2624            file_states.prefixed(repo_path("bc")).paths().collect_vec(),
2625            ["bc"].map(repo_path)
2626        );
2627        assert!(file_states.prefixed(repo_path("z")).is_empty());
2628
2629        assert!(!file_states.contains_path(repo_path("a")));
2630        assert!(file_states.contains_path(repo_path("aa")));
2631        assert!(file_states.contains_path(repo_path("b/d/e")));
2632        assert!(!file_states.contains_path(repo_path("b/d")));
2633        assert!(file_states.contains_path(repo_path("b#")));
2634        assert!(file_states.contains_path(repo_path("bc")));
2635        assert!(!file_states.contains_path(repo_path("z")));
2636
2637        assert_eq!(file_states.get(repo_path("a")), None);
2638        assert_eq!(file_states.get(repo_path("aa")), Some(new_state(0)));
2639        assert_eq!(file_states.get(repo_path("b/d/e")), Some(new_state(2)));
2640        assert_eq!(file_states.get(repo_path("bc")), Some(new_state(5)));
2641        assert_eq!(file_states.get(repo_path("z")), None);
2642    }
2643
2644    #[test]
2645    fn test_file_states_lookup_at() {
2646        let new_proto_entry = |path: &str, size| {
2647            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2648        };
2649        let data = vec![
2650            new_proto_entry("b/c", 0),
2651            new_proto_entry("b/d/e", 1),
2652            new_proto_entry("b/d#", 2), // '#' < '/'
2653            new_proto_entry("b/e", 3),
2654            new_proto_entry("b#", 4), // '#' < '/'
2655        ];
2656        let file_states = FileStates::from_sorted(&data);
2657
2658        // At root
2659        assert_eq!(
2660            file_states.get_at(RepoPath::root(), repo_path_component("b")),
2661            None
2662        );
2663        assert_eq!(
2664            file_states.get_at(RepoPath::root(), repo_path_component("b#")),
2665            Some(new_state(4))
2666        );
2667
2668        // At prefixed dir
2669        let prefixed_states = file_states.prefixed_at(RepoPath::root(), repo_path_component("b"));
2670        assert_eq!(
2671            prefixed_states.paths().collect_vec(),
2672            ["b/c", "b/d/e", "b/d#", "b/e"].map(repo_path)
2673        );
2674        assert_eq!(
2675            prefixed_states.get_at(repo_path("b"), repo_path_component("c")),
2676            Some(new_state(0))
2677        );
2678        assert_eq!(
2679            prefixed_states.get_at(repo_path("b"), repo_path_component("d")),
2680            None
2681        );
2682        assert_eq!(
2683            prefixed_states.get_at(repo_path("b"), repo_path_component("d#")),
2684            Some(new_state(2))
2685        );
2686
2687        // At nested prefixed dir
2688        let prefixed_states = prefixed_states.prefixed_at(repo_path("b"), repo_path_component("d"));
2689        assert_eq!(
2690            prefixed_states.paths().collect_vec(),
2691            ["b/d/e"].map(repo_path)
2692        );
2693        assert_eq!(
2694            prefixed_states.get_at(repo_path("b/d"), repo_path_component("e")),
2695            Some(new_state(1))
2696        );
2697        assert_eq!(
2698            prefixed_states.get_at(repo_path("b/d"), repo_path_component("#")),
2699            None
2700        );
2701
2702        // At prefixed file
2703        let prefixed_states = file_states.prefixed_at(RepoPath::root(), repo_path_component("b#"));
2704        assert_eq!(prefixed_states.paths().collect_vec(), ["b#"].map(repo_path));
2705        assert_eq!(
2706            prefixed_states.get_at(repo_path("b#"), repo_path_component("#")),
2707            None
2708        );
2709    }
2710}