jj_lib/
local_working_copy.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::cmp::Ordering;
18use std::collections::HashSet;
19use std::error::Error;
20use std::fs;
21use std::fs::DirEntry;
22use std::fs::File;
23use std::fs::Metadata;
24use std::fs::OpenOptions;
25use std::io;
26use std::io::Read as _;
27use std::io::Write as _;
28use std::iter;
29use std::mem;
30use std::ops::Range;
31#[cfg(unix)]
32use std::os::unix::fs::PermissionsExt as _;
33use std::path::Path;
34use std::path::PathBuf;
35use std::slice;
36use std::sync::Arc;
37use std::sync::OnceLock;
38use std::sync::mpsc::Sender;
39use std::sync::mpsc::channel;
40use std::time::UNIX_EPOCH;
41
42use async_trait::async_trait;
43use either::Either;
44use futures::StreamExt as _;
45use itertools::EitherOrBoth;
46use itertools::Itertools as _;
47use once_cell::unsync::OnceCell;
48use pollster::FutureExt as _;
49use prost::Message as _;
50use rayon::iter::IntoParallelIterator as _;
51use rayon::prelude::IndexedParallelIterator as _;
52use rayon::prelude::ParallelIterator as _;
53use tempfile::NamedTempFile;
54use thiserror::Error;
55use tokio::io::AsyncRead;
56use tokio::io::AsyncReadExt as _;
57use tracing::instrument;
58use tracing::trace_span;
59
60use crate::backend::BackendError;
61use crate::backend::CopyId;
62use crate::backend::FileId;
63use crate::backend::MillisSinceEpoch;
64use crate::backend::SymlinkId;
65use crate::backend::TreeId;
66use crate::backend::TreeValue;
67use crate::commit::Commit;
68use crate::config::ConfigGetError;
69use crate::conflicts;
70use crate::conflicts::ConflictMarkerStyle;
71use crate::conflicts::ConflictMaterializeOptions;
72use crate::conflicts::MIN_CONFLICT_MARKER_LEN;
73use crate::conflicts::MaterializedTreeValue;
74use crate::conflicts::choose_materialized_conflict_marker_len;
75use crate::conflicts::materialize_merge_result_to_bytes;
76use crate::conflicts::materialize_tree_value;
77pub use crate::eol::EolConversionMode;
78use crate::eol::TargetEolStrategy;
79use crate::file_util::BlockingAsyncReader;
80use crate::file_util::check_symlink_support;
81use crate::file_util::copy_async_to_sync;
82use crate::file_util::persist_temp_file;
83use crate::file_util::try_symlink;
84use crate::fsmonitor::FsmonitorSettings;
85#[cfg(feature = "watchman")]
86use crate::fsmonitor::WatchmanConfig;
87#[cfg(feature = "watchman")]
88use crate::fsmonitor::watchman;
89use crate::gitignore::GitIgnoreFile;
90use crate::lock::FileLock;
91use crate::matchers::DifferenceMatcher;
92use crate::matchers::EverythingMatcher;
93use crate::matchers::FilesMatcher;
94use crate::matchers::IntersectionMatcher;
95use crate::matchers::Matcher;
96use crate::matchers::PrefixMatcher;
97use crate::merge::Merge;
98use crate::merge::MergeBuilder;
99use crate::merge::MergedTreeValue;
100use crate::merge::SameChange;
101use crate::merged_tree::MergedTree;
102use crate::merged_tree::MergedTreeBuilder;
103use crate::merged_tree::TreeDiffEntry;
104use crate::object_id::ObjectId as _;
105use crate::op_store::OperationId;
106use crate::ref_name::WorkspaceName;
107use crate::ref_name::WorkspaceNameBuf;
108use crate::repo_path::RepoPath;
109use crate::repo_path::RepoPathBuf;
110use crate::repo_path::RepoPathComponent;
111use crate::settings::UserSettings;
112use crate::store::Store;
113use crate::working_copy::CheckoutError;
114use crate::working_copy::CheckoutStats;
115use crate::working_copy::LockedWorkingCopy;
116use crate::working_copy::ResetError;
117use crate::working_copy::SnapshotError;
118use crate::working_copy::SnapshotOptions;
119use crate::working_copy::SnapshotProgress;
120use crate::working_copy::SnapshotStats;
121use crate::working_copy::UntrackedReason;
122use crate::working_copy::WorkingCopy;
123use crate::working_copy::WorkingCopyFactory;
124use crate::working_copy::WorkingCopyStateError;
125
126/// On-disk state of file executable bit.
127// TODO: maybe better to preserve the executable bit on all platforms, and
128// ignore conditionally? #3949
129#[derive(Clone, Copy, Debug, Eq, PartialEq)]
130pub struct FileExecutableFlag(#[cfg(unix)] bool);
131
132#[cfg(unix)]
133impl FileExecutableFlag {
134    pub const fn from_bool_lossy(executable: bool) -> Self {
135        Self(executable)
136    }
137
138    pub fn unwrap_or_else(self, _: impl FnOnce() -> bool) -> bool {
139        self.0
140    }
141}
142
143// Windows doesn't support executable bit.
144#[cfg(windows)]
145impl FileExecutableFlag {
146    pub const fn from_bool_lossy(_executable: bool) -> Self {
147        Self()
148    }
149
150    pub fn unwrap_or_else(self, f: impl FnOnce() -> bool) -> bool {
151        f()
152    }
153}
154
155#[derive(Debug, PartialEq, Eq, Clone)]
156pub enum FileType {
157    Normal { executable: FileExecutableFlag },
158    Symlink,
159    GitSubmodule,
160}
161
162#[derive(Debug, PartialEq, Eq, Clone, Copy)]
163pub struct MaterializedConflictData {
164    pub conflict_marker_len: u32,
165}
166
167#[derive(Debug, PartialEq, Eq, Clone)]
168pub struct FileState {
169    pub file_type: FileType,
170    pub mtime: MillisSinceEpoch,
171    pub size: u64,
172    pub materialized_conflict_data: Option<MaterializedConflictData>,
173    /* TODO: What else do we need here? Git stores a lot of fields.
174     * TODO: Could possibly handle case-insensitive file systems keeping an
175     *       Option<PathBuf> with the actual path here. */
176}
177
178impl FileState {
179    /// Check whether a file state appears clean compared to a previous file
180    /// state, ignoring materialized conflict data.
181    pub fn is_clean(&self, old_file_state: &Self) -> bool {
182        self.file_type == old_file_state.file_type
183            && self.mtime == old_file_state.mtime
184            && self.size == old_file_state.size
185    }
186
187    /// Indicates that a file exists in the tree but that it needs to be
188    /// re-stat'ed on the next snapshot.
189    fn placeholder() -> Self {
190        let executable = FileExecutableFlag::from_bool_lossy(false);
191        Self {
192            file_type: FileType::Normal { executable },
193            mtime: MillisSinceEpoch(0),
194            size: 0,
195            materialized_conflict_data: None,
196        }
197    }
198
199    fn for_file(executable: bool, size: u64, metadata: &Metadata) -> Self {
200        let executable = FileExecutableFlag::from_bool_lossy(executable);
201        Self {
202            file_type: FileType::Normal { executable },
203            mtime: mtime_from_metadata(metadata),
204            size,
205            materialized_conflict_data: None,
206        }
207    }
208
209    fn for_symlink(metadata: &Metadata) -> Self {
210        // When using fscrypt, the reported size is not the content size. So if
211        // we were to record the content size here (like we do for regular files), we
212        // would end up thinking the file has changed every time we snapshot.
213        Self {
214            file_type: FileType::Symlink,
215            mtime: mtime_from_metadata(metadata),
216            size: metadata.len(),
217            materialized_conflict_data: None,
218        }
219    }
220
221    fn for_gitsubmodule() -> Self {
222        Self {
223            file_type: FileType::GitSubmodule,
224            mtime: MillisSinceEpoch(0),
225            size: 0,
226            materialized_conflict_data: None,
227        }
228    }
229}
230
231/// Owned map of path to file states, backed by proto data.
232#[derive(Clone, Debug)]
233struct FileStatesMap {
234    data: Vec<crate::protos::local_working_copy::FileStateEntry>,
235}
236
237impl FileStatesMap {
238    fn new() -> Self {
239        Self { data: Vec::new() }
240    }
241
242    fn from_proto(
243        mut data: Vec<crate::protos::local_working_copy::FileStateEntry>,
244        is_sorted: bool,
245    ) -> Self {
246        if !is_sorted {
247            data.sort_unstable_by(|entry1, entry2| {
248                let path1 = RepoPath::from_internal_string(&entry1.path).unwrap();
249                let path2 = RepoPath::from_internal_string(&entry2.path).unwrap();
250                path1.cmp(path2)
251            });
252        }
253        debug_assert!(is_file_state_entries_proto_unique_and_sorted(&data));
254        Self { data }
255    }
256
257    /// Merges changed and deleted entries into this map. The changed entries
258    /// must be sorted by path.
259    fn merge_in(
260        &mut self,
261        changed_file_states: Vec<(RepoPathBuf, FileState)>,
262        deleted_files: &HashSet<RepoPathBuf>,
263    ) {
264        if changed_file_states.is_empty() && deleted_files.is_empty() {
265            return;
266        }
267        debug_assert!(
268            changed_file_states.is_sorted_by(|(path1, _), (path2, _)| path1 < path2),
269            "changed_file_states must be sorted and have no duplicates"
270        );
271        self.data = itertools::merge_join_by(
272            mem::take(&mut self.data),
273            changed_file_states,
274            |old_entry, (changed_path, _)| {
275                RepoPath::from_internal_string(&old_entry.path)
276                    .unwrap()
277                    .cmp(changed_path)
278            },
279        )
280        .filter_map(|diff| match diff {
281            EitherOrBoth::Both(_, (path, state)) | EitherOrBoth::Right((path, state)) => {
282                debug_assert!(!deleted_files.contains(&path));
283                Some(file_state_entry_to_proto(path, &state))
284            }
285            EitherOrBoth::Left(entry) => {
286                let present =
287                    !deleted_files.contains(RepoPath::from_internal_string(&entry.path).unwrap());
288                present.then_some(entry)
289            }
290        })
291        .collect();
292    }
293
294    fn clear(&mut self) {
295        self.data.clear();
296    }
297
298    /// Returns read-only map containing all file states.
299    fn all(&self) -> FileStates<'_> {
300        FileStates::from_sorted(&self.data)
301    }
302}
303
304/// Read-only map of path to file states, possibly filtered by path prefix.
305#[derive(Clone, Copy, Debug)]
306pub struct FileStates<'a> {
307    data: &'a [crate::protos::local_working_copy::FileStateEntry],
308}
309
310impl<'a> FileStates<'a> {
311    fn from_sorted(data: &'a [crate::protos::local_working_copy::FileStateEntry]) -> Self {
312        debug_assert!(is_file_state_entries_proto_unique_and_sorted(data));
313        Self { data }
314    }
315
316    /// Returns file states under the given directory path.
317    pub fn prefixed(&self, base: &RepoPath) -> Self {
318        let range = self.prefixed_range(base);
319        Self::from_sorted(&self.data[range])
320    }
321
322    /// Faster version of `prefixed("<dir>/<base>")`. Requires that all entries
323    /// share the same prefix `dir`.
324    fn prefixed_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Self {
325        let range = self.prefixed_range_at(dir, base);
326        Self::from_sorted(&self.data[range])
327    }
328
329    /// Returns true if this contains no entries.
330    pub fn is_empty(&self) -> bool {
331        self.data.is_empty()
332    }
333
334    /// Returns true if the given `path` exists.
335    pub fn contains_path(&self, path: &RepoPath) -> bool {
336        self.exact_position(path).is_some()
337    }
338
339    /// Returns file state for the given `path`.
340    pub fn get(&self, path: &RepoPath) -> Option<FileState> {
341        let pos = self.exact_position(path)?;
342        let (_, state) = file_state_entry_from_proto(&self.data[pos]);
343        Some(state)
344    }
345
346    /// Faster version of `get("<dir>/<name>")`. Requires that all entries share
347    /// the same prefix `dir`.
348    fn get_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<FileState> {
349        let pos = self.exact_position_at(dir, name)?;
350        let (_, state) = file_state_entry_from_proto(&self.data[pos]);
351        Some(state)
352    }
353
354    fn exact_position(&self, path: &RepoPath) -> Option<usize> {
355        self.data
356            .binary_search_by(|entry| {
357                RepoPath::from_internal_string(&entry.path)
358                    .unwrap()
359                    .cmp(path)
360            })
361            .ok()
362    }
363
364    fn exact_position_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<usize> {
365        debug_assert!(self.paths().all(|path| path.starts_with(dir)));
366        let slash_len = !dir.is_root() as usize;
367        let prefix_len = dir.as_internal_file_string().len() + slash_len;
368        self.data
369            .binary_search_by(|entry| {
370                let tail = entry.path.get(prefix_len..).unwrap_or("");
371                match tail.split_once('/') {
372                    // "<name>/*" > "<name>"
373                    Some((pre, _)) => pre.cmp(name.as_internal_str()).then(Ordering::Greater),
374                    None => tail.cmp(name.as_internal_str()),
375                }
376            })
377            .ok()
378    }
379
380    fn prefixed_range(&self, base: &RepoPath) -> Range<usize> {
381        let start = self
382            .data
383            .partition_point(|entry| RepoPath::from_internal_string(&entry.path).unwrap() < base);
384        let len = self.data[start..].partition_point(|entry| {
385            RepoPath::from_internal_string(&entry.path)
386                .unwrap()
387                .starts_with(base)
388        });
389        start..(start + len)
390    }
391
392    fn prefixed_range_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Range<usize> {
393        debug_assert!(self.paths().all(|path| path.starts_with(dir)));
394        let slash_len = !dir.is_root() as usize;
395        let prefix_len = dir.as_internal_file_string().len() + slash_len;
396        let start = self.data.partition_point(|entry| {
397            let tail = entry.path.get(prefix_len..).unwrap_or("");
398            let entry_name = tail.split_once('/').map_or(tail, |(name, _)| name);
399            entry_name < base.as_internal_str()
400        });
401        let len = self.data[start..].partition_point(|entry| {
402            let tail = entry.path.get(prefix_len..).unwrap_or("");
403            let entry_name = tail.split_once('/').map_or(tail, |(name, _)| name);
404            entry_name == base.as_internal_str()
405        });
406        start..(start + len)
407    }
408
409    /// Iterates file state entries sorted by path.
410    pub fn iter(&self) -> FileStatesIter<'a> {
411        self.data.iter().map(file_state_entry_from_proto)
412    }
413
414    /// Iterates sorted file paths.
415    pub fn paths(&self) -> impl ExactSizeIterator<Item = &'a RepoPath> + use<'a> {
416        self.data
417            .iter()
418            .map(|entry| RepoPath::from_internal_string(&entry.path).unwrap())
419    }
420}
421
422type FileStatesIter<'a> = iter::Map<
423    slice::Iter<'a, crate::protos::local_working_copy::FileStateEntry>,
424    fn(&crate::protos::local_working_copy::FileStateEntry) -> (&RepoPath, FileState),
425>;
426
427impl<'a> IntoIterator for FileStates<'a> {
428    type Item = (&'a RepoPath, FileState);
429    type IntoIter = FileStatesIter<'a>;
430
431    fn into_iter(self) -> Self::IntoIter {
432        self.iter()
433    }
434}
435
436fn file_state_from_proto(proto: &crate::protos::local_working_copy::FileState) -> FileState {
437    let file_type = match proto.file_type() {
438        crate::protos::local_working_copy::FileType::Normal => FileType::Normal {
439            executable: FileExecutableFlag::from_bool_lossy(false),
440        },
441        // On Windows, FileType::Executable can exist in files written by older
442        // versions of jj
443        crate::protos::local_working_copy::FileType::Executable => FileType::Normal {
444            executable: FileExecutableFlag::from_bool_lossy(true),
445        },
446        crate::protos::local_working_copy::FileType::Symlink => FileType::Symlink,
447        crate::protos::local_working_copy::FileType::Conflict => FileType::Normal {
448            executable: FileExecutableFlag::from_bool_lossy(false),
449        },
450        crate::protos::local_working_copy::FileType::GitSubmodule => FileType::GitSubmodule,
451    };
452    FileState {
453        file_type,
454        mtime: MillisSinceEpoch(proto.mtime_millis_since_epoch),
455        size: proto.size,
456        materialized_conflict_data: proto.materialized_conflict_data.as_ref().map(|data| {
457            MaterializedConflictData {
458                conflict_marker_len: data.conflict_marker_len,
459            }
460        }),
461    }
462}
463
464fn file_state_to_proto(file_state: &FileState) -> crate::protos::local_working_copy::FileState {
465    let mut proto = crate::protos::local_working_copy::FileState::default();
466    let file_type = match &file_state.file_type {
467        FileType::Normal { executable } => {
468            if executable.unwrap_or_else(Default::default) {
469                crate::protos::local_working_copy::FileType::Executable
470            } else {
471                crate::protos::local_working_copy::FileType::Normal
472            }
473        }
474        FileType::Symlink => crate::protos::local_working_copy::FileType::Symlink,
475        FileType::GitSubmodule => crate::protos::local_working_copy::FileType::GitSubmodule,
476    };
477    proto.file_type = file_type as i32;
478    proto.mtime_millis_since_epoch = file_state.mtime.0;
479    proto.size = file_state.size;
480    proto.materialized_conflict_data = file_state.materialized_conflict_data.map(|data| {
481        crate::protos::local_working_copy::MaterializedConflictData {
482            conflict_marker_len: data.conflict_marker_len,
483        }
484    });
485    proto
486}
487
488fn file_state_entry_from_proto(
489    proto: &crate::protos::local_working_copy::FileStateEntry,
490) -> (&RepoPath, FileState) {
491    let path = RepoPath::from_internal_string(&proto.path).unwrap();
492    (path, file_state_from_proto(proto.state.as_ref().unwrap()))
493}
494
495fn file_state_entry_to_proto(
496    path: RepoPathBuf,
497    state: &FileState,
498) -> crate::protos::local_working_copy::FileStateEntry {
499    crate::protos::local_working_copy::FileStateEntry {
500        path: path.into_internal_string(),
501        state: Some(file_state_to_proto(state)),
502    }
503}
504
505fn is_file_state_entries_proto_unique_and_sorted(
506    data: &[crate::protos::local_working_copy::FileStateEntry],
507) -> bool {
508    data.iter()
509        .map(|entry| RepoPath::from_internal_string(&entry.path).unwrap())
510        .is_sorted_by(|path1, path2| path1 < path2)
511}
512
513fn sparse_patterns_from_proto(
514    proto: Option<&crate::protos::local_working_copy::SparsePatterns>,
515) -> Vec<RepoPathBuf> {
516    let mut sparse_patterns = vec![];
517    if let Some(proto_sparse_patterns) = proto {
518        for prefix in &proto_sparse_patterns.prefixes {
519            sparse_patterns.push(RepoPathBuf::from_internal_string(prefix).unwrap());
520        }
521    } else {
522        // For compatibility with old working copies.
523        // TODO: Delete this is late 2022 or so.
524        sparse_patterns.push(RepoPathBuf::root());
525    }
526    sparse_patterns
527}
528
529/// Creates intermediate directories from the `working_copy_path` to the
530/// `repo_path` parent. Returns disk path for the `repo_path` file.
531///
532/// If an intermediate directory exists and if it is a file or symlink, this
533/// function returns `Ok(None)` to signal that the path should be skipped.
534/// The `working_copy_path` directory may be a symlink.
535///
536/// If an existing or newly-created sub directory points to ".git" or ".jj",
537/// this function returns an error.
538///
539/// Note that this does not prevent TOCTOU bugs caused by concurrent checkouts.
540/// Another process may remove the directory created by this function and put a
541/// symlink there.
542fn create_parent_dirs(
543    working_copy_path: &Path,
544    repo_path: &RepoPath,
545) -> Result<Option<PathBuf>, CheckoutError> {
546    let (parent_path, basename) = repo_path.split().expect("repo path shouldn't be root");
547    let mut dir_path = working_copy_path.to_owned();
548    for c in parent_path.components() {
549        // Ensure that the name is a normal entry of the current dir_path.
550        dir_path.push(c.to_fs_name().map_err(|err| err.with_path(repo_path))?);
551        // A directory named ".git" or ".jj" can be temporarily created. It
552        // might trick workspace path discovery, but is harmless so long as the
553        // directory is empty.
554        let new_dir_created = match fs::create_dir(&dir_path) {
555            Ok(()) => true, // New directory
556            Err(err) => match dir_path.symlink_metadata() {
557                Ok(m) if m.is_dir() => false, // Existing directory
558                Ok(_) => {
559                    return Ok(None); // Skip existing file or symlink
560                }
561                Err(_) => {
562                    return Err(CheckoutError::Other {
563                        message: format!(
564                            "Failed to create parent directories for {}",
565                            repo_path.to_fs_path_unchecked(working_copy_path).display(),
566                        ),
567                        err: err.into(),
568                    });
569                }
570            },
571        };
572        // Invalid component (e.g. "..") should have been rejected.
573        // The current dir_path should be an entry of dir_path.parent().
574        reject_reserved_existing_path(&dir_path).inspect_err(|_| {
575            if new_dir_created {
576                fs::remove_dir(&dir_path).ok();
577            }
578        })?;
579    }
580
581    let mut file_path = dir_path;
582    file_path.push(
583        basename
584            .to_fs_name()
585            .map_err(|err| err.with_path(repo_path))?,
586    );
587    Ok(Some(file_path))
588}
589
590/// Removes existing file named `disk_path` if any. Returns `Ok(true)` if the
591/// file was there and got removed, meaning that new file can be safely created.
592///
593/// If the existing file points to ".git" or ".jj", this function returns an
594/// error.
595fn remove_old_file(disk_path: &Path) -> Result<bool, CheckoutError> {
596    reject_reserved_existing_path(disk_path)?;
597    match fs::remove_file(disk_path) {
598        Ok(()) => Ok(true),
599        Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false),
600        // TODO: Use io::ErrorKind::IsADirectory if it gets stabilized
601        Err(_) if disk_path.symlink_metadata().is_ok_and(|m| m.is_dir()) => Ok(false),
602        Err(err) => Err(CheckoutError::Other {
603            message: format!("Failed to remove file {}", disk_path.display()),
604            err: err.into(),
605        }),
606    }
607}
608
609/// Checks if new file or symlink named `disk_path` can be created.
610///
611/// If the file already exists, this function return `Ok(false)` to signal
612/// that the path should be skipped.
613///
614/// If the path may point to ".git" or ".jj" entry, this function returns an
615/// error.
616///
617/// This function can fail if `disk_path.parent()` isn't a directory.
618fn can_create_new_file(disk_path: &Path) -> Result<bool, CheckoutError> {
619    // New file or symlink will be created by caller. If it were pointed to by
620    // name ".git" or ".jj", git/jj CLI could be tricked to load configuration
621    // from an attacker-controlled location. So we first test the path by
622    // creating an empty file.
623    let new_file = match OpenOptions::new()
624        .write(true)
625        .create_new(true) // Don't overwrite, don't follow symlink
626        .open(disk_path)
627    {
628        Ok(file) => Some(file),
629        Err(err) if err.kind() == io::ErrorKind::AlreadyExists => None,
630        // Workaround for "Access is denied. (os error 5)" error on Windows.
631        Err(_) => match disk_path.symlink_metadata() {
632            Ok(_) => None,
633            Err(err) => {
634                return Err(CheckoutError::Other {
635                    message: format!("Failed to stat {}", disk_path.display()),
636                    err: err.into(),
637                });
638            }
639        },
640    };
641
642    let new_file_created = new_file.is_some();
643
644    if let Some(new_file) = new_file {
645        reject_reserved_existing_file(new_file, disk_path).inspect_err(|_| {
646            // We keep the error from `reject_reserved_existing_file`
647            let _ = fs::remove_file(disk_path);
648        })?;
649
650        fs::remove_file(disk_path).map_err(|err| CheckoutError::Other {
651            message: format!("Failed to remove temporary file {}", disk_path.display()),
652            err: err.into(),
653        })?;
654    } else {
655        reject_reserved_existing_path(disk_path)?;
656    }
657    Ok(new_file_created)
658}
659
660const RESERVED_DIR_NAMES: &[&str] = &[".git", ".jj"];
661
662fn same_file_handle_from_path(disk_path: &Path) -> io::Result<Option<same_file::Handle>> {
663    match same_file::Handle::from_path(disk_path) {
664        Ok(handle) => Ok(Some(handle)),
665        Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None),
666        Err(err) => Err(err),
667    }
668}
669
670/// Wrapper for [`reject_reserved_existing_handle`] which avoids a syscall
671/// by converting the provided `file` to a `same_file::Handle` via its
672/// file descriptor.
673///
674/// See [`reject_reserved_existing_handle`] for more info.
675fn reject_reserved_existing_file(file: File, disk_path: &Path) -> Result<(), CheckoutError> {
676    // Note: since the file is open, we don't expect that it's possible for
677    // `io::ErrorKind::NotFound` to be a possible error returned here.
678    let file_handle = same_file::Handle::from_file(file).map_err(|err| CheckoutError::Other {
679        message: format!("Failed to validate path {}", disk_path.display()),
680        err: err.into(),
681    })?;
682
683    reject_reserved_existing_handle(file_handle, disk_path)
684}
685
686/// Wrapper for [`reject_reserved_existing_handle`] which converts
687/// the provided `disk_path` to a `same_file::Handle`.
688///
689/// See [`reject_reserved_existing_handle`] for more info.
690///
691/// # Remarks
692///
693/// Incurs an additional syscall cost to open and close the file
694/// descriptor/`HANDLE` for `disk_path`.
695fn reject_reserved_existing_path(disk_path: &Path) -> Result<(), CheckoutError> {
696    let Some(disk_handle) =
697        same_file_handle_from_path(disk_path).map_err(|err| CheckoutError::Other {
698            message: format!("Failed to validate path {}", disk_path.display()),
699            err: err.into(),
700        })?
701    else {
702        // If the existing disk_path pointed to the reserved path, we would have
703        // gotten a handle back. Since we got nothing, the file does not exist
704        // and cannot be a reserved path name.
705        return Ok(());
706    };
707
708    reject_reserved_existing_handle(disk_handle, disk_path)
709}
710
711/// Suppose the `disk_path` exists, checks if the last component points to
712/// ".git" or ".jj" in the same parent directory.
713///
714/// `disk_handle` is expected to be a handle to the file described by
715/// `disk_path`.
716///
717/// # Remarks
718///
719/// Incurs a syscall cost to open and close a file descriptor/`HANDLE` for
720/// each filename in `RESERVED_DIR_NAMES`.
721fn reject_reserved_existing_handle(
722    disk_handle: same_file::Handle,
723    disk_path: &Path,
724) -> Result<(), CheckoutError> {
725    let parent_dir_path = disk_path.parent().expect("content path shouldn't be root");
726    for name in RESERVED_DIR_NAMES {
727        let reserved_path = parent_dir_path.join(name);
728
729        let Some(reserved_handle) =
730            same_file_handle_from_path(&reserved_path).map_err(|err| CheckoutError::Other {
731                message: format!("Failed to validate path {}", disk_path.display()),
732                err: err.into(),
733            })?
734        else {
735            // If the existing disk_path pointed to the reserved path, we would have
736            // gotten a handle back. Since we got nothing, the file does not exist
737            // and cannot be a reserved path name.
738            continue;
739        };
740
741        if disk_handle == reserved_handle {
742            return Err(CheckoutError::ReservedPathComponent {
743                path: disk_path.to_owned(),
744                name,
745            });
746        }
747    }
748
749    Ok(())
750}
751
752fn mtime_from_metadata(metadata: &Metadata) -> MillisSinceEpoch {
753    let time = metadata
754        .modified()
755        .expect("File mtime not supported on this platform?");
756    let since_epoch = time
757        .duration_since(UNIX_EPOCH)
758        .expect("mtime before unix epoch");
759
760    MillisSinceEpoch(
761        i64::try_from(since_epoch.as_millis())
762            .expect("mtime billions of years into the future or past"),
763    )
764}
765
766fn file_state(metadata: &Metadata) -> Option<FileState> {
767    let metadata_file_type = metadata.file_type();
768    let file_type = if metadata_file_type.is_dir() {
769        None
770    } else if metadata_file_type.is_symlink() {
771        Some(FileType::Symlink)
772    } else if metadata_file_type.is_file() {
773        #[cfg(unix)]
774        let executable = metadata.permissions().mode() & 0o111 != 0;
775        #[cfg(windows)]
776        let executable = false;
777        let executable = FileExecutableFlag::from_bool_lossy(executable);
778        Some(FileType::Normal { executable })
779    } else {
780        None
781    };
782    file_type.map(|file_type| {
783        let mtime = mtime_from_metadata(metadata);
784        let size = metadata.len();
785        FileState {
786            file_type,
787            mtime,
788            size,
789            materialized_conflict_data: None,
790        }
791    })
792}
793
794struct FsmonitorMatcher {
795    matcher: Option<Box<dyn Matcher>>,
796    watchman_clock: Option<crate::protos::local_working_copy::WatchmanClock>,
797}
798
799/// Settings specific to the tree state of the [`LocalWorkingCopy`] backend.
800#[derive(Clone, Debug)]
801pub struct TreeStateSettings {
802    /// Conflict marker style to use when materializing files or when checking
803    /// changed files.
804    pub conflict_marker_style: ConflictMarkerStyle,
805    /// Configuring auto-converting CRLF line endings into LF when you add a
806    /// file to the backend, and vice versa when it checks out code onto your
807    /// filesystem.
808    pub eol_conversion_mode: EolConversionMode,
809    /// The fsmonitor (e.g. Watchman) to use, if any.
810    pub fsmonitor_settings: FsmonitorSettings,
811}
812
813impl TreeStateSettings {
814    /// Create [`TreeStateSettings`] from [`UserSettings`].
815    pub fn try_from_user_settings(user_settings: &UserSettings) -> Result<Self, ConfigGetError> {
816        Ok(Self {
817            conflict_marker_style: user_settings.get("ui.conflict-marker-style")?,
818            eol_conversion_mode: EolConversionMode::try_from_settings(user_settings)?,
819            fsmonitor_settings: FsmonitorSettings::from_settings(user_settings)?,
820        })
821    }
822}
823
824pub struct TreeState {
825    store: Arc<Store>,
826    working_copy_path: PathBuf,
827    state_path: PathBuf,
828    tree: MergedTree,
829    file_states: FileStatesMap,
830    // Currently only path prefixes
831    sparse_patterns: Vec<RepoPathBuf>,
832    own_mtime: MillisSinceEpoch,
833    symlink_support: bool,
834
835    /// The most recent clock value returned by Watchman. Will only be set if
836    /// the repo is configured to use the Watchman filesystem monitor and
837    /// Watchman has been queried at least once.
838    watchman_clock: Option<crate::protos::local_working_copy::WatchmanClock>,
839
840    conflict_marker_style: ConflictMarkerStyle,
841    fsmonitor_settings: FsmonitorSettings,
842    target_eol_strategy: TargetEolStrategy,
843}
844
845#[derive(Debug, Error)]
846pub enum TreeStateError {
847    #[error("Reading tree state from {path}")]
848    ReadTreeState { path: PathBuf, source: io::Error },
849    #[error("Decoding tree state from {path}")]
850    DecodeTreeState {
851        path: PathBuf,
852        source: prost::DecodeError,
853    },
854    #[error("Writing tree state to temporary file {path}")]
855    WriteTreeState { path: PathBuf, source: io::Error },
856    #[error("Persisting tree state to file {path}")]
857    PersistTreeState { path: PathBuf, source: io::Error },
858    #[error("Filesystem monitor error")]
859    Fsmonitor(#[source] Box<dyn Error + Send + Sync>),
860}
861
862impl TreeState {
863    pub fn working_copy_path(&self) -> &Path {
864        &self.working_copy_path
865    }
866
867    pub fn current_tree(&self) -> &MergedTree {
868        &self.tree
869    }
870
871    pub fn file_states(&self) -> FileStates<'_> {
872        self.file_states.all()
873    }
874
875    pub fn sparse_patterns(&self) -> &Vec<RepoPathBuf> {
876        &self.sparse_patterns
877    }
878
879    fn sparse_matcher(&self) -> Box<dyn Matcher> {
880        Box::new(PrefixMatcher::new(&self.sparse_patterns))
881    }
882
883    pub fn init(
884        store: Arc<Store>,
885        working_copy_path: PathBuf,
886        state_path: PathBuf,
887        tree_state_settings: &TreeStateSettings,
888    ) -> Result<Self, TreeStateError> {
889        let mut wc = Self::empty(store, working_copy_path, state_path, tree_state_settings);
890        wc.save()?;
891        Ok(wc)
892    }
893
894    fn empty(
895        store: Arc<Store>,
896        working_copy_path: PathBuf,
897        state_path: PathBuf,
898        &TreeStateSettings {
899            conflict_marker_style,
900            eol_conversion_mode,
901            ref fsmonitor_settings,
902        }: &TreeStateSettings,
903    ) -> Self {
904        Self {
905            store: store.clone(),
906            working_copy_path,
907            state_path,
908            tree: store.empty_merged_tree(),
909            file_states: FileStatesMap::new(),
910            sparse_patterns: vec![RepoPathBuf::root()],
911            own_mtime: MillisSinceEpoch(0),
912            symlink_support: check_symlink_support().unwrap_or(false),
913            watchman_clock: None,
914            conflict_marker_style,
915            fsmonitor_settings: fsmonitor_settings.clone(),
916            target_eol_strategy: TargetEolStrategy::new(eol_conversion_mode),
917        }
918    }
919
920    pub fn load(
921        store: Arc<Store>,
922        working_copy_path: PathBuf,
923        state_path: PathBuf,
924        tree_state_settings: &TreeStateSettings,
925    ) -> Result<Self, TreeStateError> {
926        let tree_state_path = state_path.join("tree_state");
927        let file = match File::open(&tree_state_path) {
928            Err(ref err) if err.kind() == io::ErrorKind::NotFound => {
929                return Self::init(store, working_copy_path, state_path, tree_state_settings);
930            }
931            Err(err) => {
932                return Err(TreeStateError::ReadTreeState {
933                    path: tree_state_path,
934                    source: err,
935                });
936            }
937            Ok(file) => file,
938        };
939
940        let mut wc = Self::empty(store, working_copy_path, state_path, tree_state_settings);
941        wc.read(&tree_state_path, file)?;
942        Ok(wc)
943    }
944
945    fn update_own_mtime(&mut self) {
946        if let Ok(metadata) = self.state_path.join("tree_state").symlink_metadata() {
947            self.own_mtime = mtime_from_metadata(&metadata);
948        } else {
949            self.own_mtime = MillisSinceEpoch(0);
950        }
951    }
952
953    fn read(&mut self, tree_state_path: &Path, mut file: File) -> Result<(), TreeStateError> {
954        self.update_own_mtime();
955        let mut buf = Vec::new();
956        file.read_to_end(&mut buf)
957            .map_err(|err| TreeStateError::ReadTreeState {
958                path: tree_state_path.to_owned(),
959                source: err,
960            })?;
961        let proto = crate::protos::local_working_copy::TreeState::decode(&*buf).map_err(|err| {
962            TreeStateError::DecodeTreeState {
963                path: tree_state_path.to_owned(),
964                source: err,
965            }
966        })?;
967        #[expect(deprecated)]
968        if proto.tree_ids.is_empty() {
969            self.tree = MergedTree::resolved(
970                self.store.clone(),
971                TreeId::new(proto.legacy_tree_id.clone()),
972            );
973        } else {
974            let tree_ids_builder: MergeBuilder<TreeId> = proto
975                .tree_ids
976                .iter()
977                .map(|id| TreeId::new(id.clone()))
978                .collect();
979            self.tree = MergedTree::new(self.store.clone(), tree_ids_builder.build());
980        }
981        self.file_states =
982            FileStatesMap::from_proto(proto.file_states, proto.is_file_states_sorted);
983        self.sparse_patterns = sparse_patterns_from_proto(proto.sparse_patterns.as_ref());
984        self.watchman_clock = proto.watchman_clock;
985        Ok(())
986    }
987
988    #[expect(clippy::assigning_clones, clippy::field_reassign_with_default)]
989    pub fn save(&mut self) -> Result<(), TreeStateError> {
990        let mut proto: crate::protos::local_working_copy::TreeState = Default::default();
991        proto.tree_ids = self
992            .tree
993            .tree_ids()
994            .iter()
995            .map(|id| id.to_bytes())
996            .collect();
997        proto.file_states = self.file_states.data.clone();
998        // `FileStatesMap` is guaranteed to be sorted.
999        proto.is_file_states_sorted = true;
1000        let mut sparse_patterns = crate::protos::local_working_copy::SparsePatterns::default();
1001        for path in &self.sparse_patterns {
1002            sparse_patterns
1003                .prefixes
1004                .push(path.as_internal_file_string().to_owned());
1005        }
1006        proto.sparse_patterns = Some(sparse_patterns);
1007        proto.watchman_clock = self.watchman_clock.clone();
1008
1009        let wrap_write_err = |source| TreeStateError::WriteTreeState {
1010            path: self.state_path.clone(),
1011            source,
1012        };
1013        let mut temp_file = NamedTempFile::new_in(&self.state_path).map_err(wrap_write_err)?;
1014        temp_file
1015            .as_file_mut()
1016            .write_all(&proto.encode_to_vec())
1017            .map_err(wrap_write_err)?;
1018        // update own write time while we before we rename it, so we know
1019        // there is no unknown data in it
1020        self.update_own_mtime();
1021        // TODO: Retry if persisting fails (it will on Windows if the file happened to
1022        // be open for read).
1023        let target_path = self.state_path.join("tree_state");
1024        persist_temp_file(temp_file, &target_path).map_err(|source| {
1025            TreeStateError::PersistTreeState {
1026                path: target_path.clone(),
1027                source,
1028            }
1029        })?;
1030        Ok(())
1031    }
1032
1033    fn reset_watchman(&mut self) {
1034        self.watchman_clock.take();
1035    }
1036
1037    #[cfg(feature = "watchman")]
1038    #[tokio::main(flavor = "current_thread")]
1039    #[instrument(skip(self))]
1040    pub async fn query_watchman(
1041        &self,
1042        config: &WatchmanConfig,
1043    ) -> Result<(watchman::Clock, Option<Vec<PathBuf>>), TreeStateError> {
1044        let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path, config)
1045            .await
1046            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
1047        let previous_clock = self.watchman_clock.clone().map(watchman::Clock::from);
1048        let changed_files = fsmonitor
1049            .query_changed_files(previous_clock)
1050            .await
1051            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
1052        Ok(changed_files)
1053    }
1054
1055    #[cfg(feature = "watchman")]
1056    #[tokio::main(flavor = "current_thread")]
1057    #[instrument(skip(self))]
1058    pub async fn is_watchman_trigger_registered(
1059        &self,
1060        config: &WatchmanConfig,
1061    ) -> Result<bool, TreeStateError> {
1062        let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path, config)
1063            .await
1064            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
1065        fsmonitor
1066            .is_trigger_registered()
1067            .await
1068            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))
1069    }
1070}
1071
1072/// Functions to snapshot local-disk files to the store.
1073impl TreeState {
1074    /// Look for changes to the working copy. If there are any changes, create
1075    /// a new tree from it.
1076    #[instrument(skip_all)]
1077    pub fn snapshot(
1078        &mut self,
1079        options: &SnapshotOptions,
1080    ) -> Result<(bool, SnapshotStats), SnapshotError> {
1081        let &SnapshotOptions {
1082            ref base_ignores,
1083            progress,
1084            start_tracking_matcher,
1085            force_tracking_matcher,
1086            max_new_file_size,
1087        } = options;
1088
1089        let sparse_matcher = self.sparse_matcher();
1090
1091        let fsmonitor_clock_needs_save = self.fsmonitor_settings != FsmonitorSettings::None;
1092        let mut is_dirty = fsmonitor_clock_needs_save;
1093        let FsmonitorMatcher {
1094            matcher: fsmonitor_matcher,
1095            watchman_clock,
1096        } = self.make_fsmonitor_matcher(&self.fsmonitor_settings)?;
1097        let fsmonitor_matcher = match fsmonitor_matcher.as_ref() {
1098            None => &EverythingMatcher,
1099            Some(fsmonitor_matcher) => fsmonitor_matcher.as_ref(),
1100        };
1101
1102        let matcher = IntersectionMatcher::new(sparse_matcher.as_ref(), fsmonitor_matcher);
1103        if matcher.visit(RepoPath::root()).is_nothing() {
1104            // No need to load the current tree, set up channels, etc.
1105            self.watchman_clock = watchman_clock;
1106            return Ok((is_dirty, SnapshotStats::default()));
1107        }
1108
1109        let (tree_entries_tx, tree_entries_rx) = channel();
1110        let (file_states_tx, file_states_rx) = channel();
1111        let (untracked_paths_tx, untracked_paths_rx) = channel();
1112        let (deleted_files_tx, deleted_files_rx) = channel();
1113
1114        trace_span!("traverse filesystem").in_scope(|| -> Result<(), SnapshotError> {
1115            let snapshotter = FileSnapshotter {
1116                tree_state: self,
1117                current_tree: &self.tree,
1118                matcher: &matcher,
1119                start_tracking_matcher,
1120                force_tracking_matcher,
1121                // Move tx sides so they'll be dropped at the end of the scope.
1122                tree_entries_tx,
1123                file_states_tx,
1124                untracked_paths_tx,
1125                deleted_files_tx,
1126                error: OnceLock::new(),
1127                progress,
1128                max_new_file_size,
1129            };
1130            let directory_to_visit = DirectoryToVisit {
1131                dir: RepoPathBuf::root(),
1132                disk_dir: self.working_copy_path.clone(),
1133                git_ignore: base_ignores.clone(),
1134                file_states: self.file_states.all(),
1135            };
1136            // Here we use scope as a queue of per-directory jobs.
1137            rayon::scope(|scope| {
1138                snapshotter.spawn_ok(scope, |scope| {
1139                    snapshotter.visit_directory(directory_to_visit, scope)
1140                });
1141            });
1142            snapshotter.into_result()
1143        })?;
1144
1145        let stats = SnapshotStats {
1146            untracked_paths: untracked_paths_rx.into_iter().collect(),
1147        };
1148        let mut tree_builder = MergedTreeBuilder::new(self.tree.clone());
1149        trace_span!("process tree entries").in_scope(|| {
1150            for (path, tree_values) in &tree_entries_rx {
1151                tree_builder.set_or_remove(path, tree_values);
1152            }
1153        });
1154        let deleted_files = trace_span!("process deleted tree entries").in_scope(|| {
1155            let deleted_files = HashSet::from_iter(deleted_files_rx);
1156            is_dirty |= !deleted_files.is_empty();
1157            for file in &deleted_files {
1158                tree_builder.set_or_remove(file.clone(), Merge::absent());
1159            }
1160            deleted_files
1161        });
1162        trace_span!("process file states").in_scope(|| {
1163            let changed_file_states = file_states_rx
1164                .iter()
1165                .sorted_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2))
1166                .collect_vec();
1167            is_dirty |= !changed_file_states.is_empty();
1168            self.file_states
1169                .merge_in(changed_file_states, &deleted_files);
1170        });
1171        trace_span!("write tree").in_scope(|| -> Result<(), BackendError> {
1172            let new_tree = tree_builder.write_tree()?;
1173            is_dirty |= new_tree.tree_ids() != self.tree.tree_ids();
1174            self.tree = new_tree.clone();
1175            Ok(())
1176        })?;
1177        if cfg!(debug_assertions) {
1178            let tree_paths: HashSet<_> = self
1179                .tree
1180                .entries_matching(sparse_matcher.as_ref())
1181                .filter_map(|(path, result)| result.is_ok().then_some(path))
1182                .collect();
1183            let file_states = self.file_states.all();
1184            let state_paths: HashSet<_> = file_states.paths().map(|path| path.to_owned()).collect();
1185            assert_eq!(state_paths, tree_paths);
1186        }
1187        // Since untracked paths aren't cached in the tree state, we'll need to
1188        // rescan the working directory changes to report or track them later.
1189        // TODO: store untracked paths and update watchman_clock?
1190        if stats.untracked_paths.is_empty() || watchman_clock.is_none() {
1191            self.watchman_clock = watchman_clock;
1192        } else {
1193            tracing::info!("not updating watchman clock because there are untracked files");
1194        }
1195        Ok((is_dirty, stats))
1196    }
1197
1198    #[instrument(skip_all)]
1199    fn make_fsmonitor_matcher(
1200        &self,
1201        fsmonitor_settings: &FsmonitorSettings,
1202    ) -> Result<FsmonitorMatcher, SnapshotError> {
1203        let (watchman_clock, changed_files) = match fsmonitor_settings {
1204            FsmonitorSettings::None => (None, None),
1205            FsmonitorSettings::Test { changed_files } => (None, Some(changed_files.clone())),
1206            #[cfg(feature = "watchman")]
1207            FsmonitorSettings::Watchman(config) => match self.query_watchman(config) {
1208                Ok((watchman_clock, changed_files)) => (Some(watchman_clock.into()), changed_files),
1209                Err(err) => {
1210                    tracing::warn!(?err, "Failed to query filesystem monitor");
1211                    (None, None)
1212                }
1213            },
1214            #[cfg(not(feature = "watchman"))]
1215            FsmonitorSettings::Watchman(_) => {
1216                return Err(SnapshotError::Other {
1217                    message: "Failed to query the filesystem monitor".to_string(),
1218                    err: "Cannot query Watchman because jj was not compiled with the `watchman` \
1219                          feature (consider disabling `fsmonitor.backend`)"
1220                        .into(),
1221                });
1222            }
1223        };
1224        let matcher: Option<Box<dyn Matcher>> = match changed_files {
1225            None => None,
1226            Some(changed_files) => {
1227                let repo_paths = trace_span!("processing fsmonitor paths").in_scope(|| {
1228                    changed_files
1229                        .into_iter()
1230                        .filter_map(|path| RepoPathBuf::from_relative_path(path).ok())
1231                        .collect_vec()
1232                });
1233
1234                Some(Box::new(FilesMatcher::new(repo_paths)))
1235            }
1236        };
1237        Ok(FsmonitorMatcher {
1238            matcher,
1239            watchman_clock,
1240        })
1241    }
1242}
1243
1244struct DirectoryToVisit<'a> {
1245    dir: RepoPathBuf,
1246    disk_dir: PathBuf,
1247    git_ignore: Arc<GitIgnoreFile>,
1248    file_states: FileStates<'a>,
1249}
1250
1251#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1252enum PresentDirEntryKind {
1253    Dir,
1254    File,
1255}
1256
1257#[derive(Clone, Debug)]
1258struct PresentDirEntries {
1259    dirs: HashSet<String>,
1260    files: HashSet<String>,
1261}
1262
1263/// Helper to scan local-disk directories and files in parallel.
1264struct FileSnapshotter<'a> {
1265    tree_state: &'a TreeState,
1266    current_tree: &'a MergedTree,
1267    matcher: &'a dyn Matcher,
1268    start_tracking_matcher: &'a dyn Matcher,
1269    force_tracking_matcher: &'a dyn Matcher,
1270    tree_entries_tx: Sender<(RepoPathBuf, MergedTreeValue)>,
1271    file_states_tx: Sender<(RepoPathBuf, FileState)>,
1272    untracked_paths_tx: Sender<(RepoPathBuf, UntrackedReason)>,
1273    deleted_files_tx: Sender<RepoPathBuf>,
1274    error: OnceLock<SnapshotError>,
1275    progress: Option<&'a SnapshotProgress<'a>>,
1276    max_new_file_size: u64,
1277}
1278
1279impl FileSnapshotter<'_> {
1280    fn spawn_ok<'scope, F>(&'scope self, scope: &rayon::Scope<'scope>, body: F)
1281    where
1282        F: FnOnce(&rayon::Scope<'scope>) -> Result<(), SnapshotError> + Send + 'scope,
1283    {
1284        scope.spawn(|scope| {
1285            if self.error.get().is_some() {
1286                return;
1287            }
1288            match body(scope) {
1289                Ok(()) => {}
1290                Err(err) => self.error.set(err).unwrap_or(()),
1291            };
1292        });
1293    }
1294
1295    /// Extracts the result of the snapshot.
1296    fn into_result(self) -> Result<(), SnapshotError> {
1297        match self.error.into_inner() {
1298            Some(err) => Err(err),
1299            None => Ok(()),
1300        }
1301    }
1302
1303    /// Visits the directory entries, spawns jobs to recurse into sub
1304    /// directories.
1305    fn visit_directory<'scope>(
1306        &'scope self,
1307        directory_to_visit: DirectoryToVisit<'scope>,
1308        scope: &rayon::Scope<'scope>,
1309    ) -> Result<(), SnapshotError> {
1310        let DirectoryToVisit {
1311            dir,
1312            disk_dir,
1313            git_ignore,
1314            file_states,
1315        } = directory_to_visit;
1316
1317        let git_ignore = git_ignore
1318            .chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"))?;
1319        let dir_entries: Vec<_> = disk_dir
1320            .read_dir()
1321            .and_then(|entries| entries.try_collect())
1322            .map_err(|err| SnapshotError::Other {
1323                message: format!("Failed to read directory {}", disk_dir.display()),
1324                err: err.into(),
1325            })?;
1326        let (dirs, files) = dir_entries
1327            .into_par_iter()
1328            // Don't split into too many small jobs. For a small directory,
1329            // sequential scan should be fast enough.
1330            .with_min_len(100)
1331            .filter_map(|entry| {
1332                self.process_dir_entry(&dir, &git_ignore, file_states, &entry, scope)
1333                    .transpose()
1334            })
1335            .map(|item| match item {
1336                Ok((PresentDirEntryKind::Dir, name)) => Ok(Either::Left(name)),
1337                Ok((PresentDirEntryKind::File, name)) => Ok(Either::Right(name)),
1338                Err(err) => Err(err),
1339            })
1340            .collect::<Result<_, _>>()?;
1341        let present_entries = PresentDirEntries { dirs, files };
1342        self.emit_deleted_files(&dir, file_states, &present_entries);
1343        Ok(())
1344    }
1345
1346    fn process_dir_entry<'scope>(
1347        &'scope self,
1348        dir: &RepoPath,
1349        git_ignore: &Arc<GitIgnoreFile>,
1350        file_states: FileStates<'scope>,
1351        entry: &DirEntry,
1352        scope: &rayon::Scope<'scope>,
1353    ) -> Result<Option<(PresentDirEntryKind, String)>, SnapshotError> {
1354        let file_type = entry.file_type().unwrap();
1355        let file_name = entry.file_name();
1356        let name_string = file_name
1357            .into_string()
1358            .map_err(|path| SnapshotError::InvalidUtf8Path { path })?;
1359
1360        if RESERVED_DIR_NAMES.contains(&name_string.as_str()) {
1361            return Ok(None);
1362        }
1363        let name = RepoPathComponent::new(&name_string).unwrap();
1364        let path = dir.join(name);
1365        let maybe_current_file_state = file_states.get_at(dir, name);
1366        if let Some(file_state) = &maybe_current_file_state
1367            && file_state.file_type == FileType::GitSubmodule
1368        {
1369            return Ok(None);
1370        }
1371
1372        if file_type.is_dir() {
1373            let file_states = file_states.prefixed_at(dir, name);
1374            // If a submodule was added in commit C, and a user decides to run
1375            // `jj new <something before C>` from after C, then the submodule
1376            // files stick around but it is no longer seen as a submodule.
1377            // We need to ensure that it is not tracked as if it was added to
1378            // the main repo.
1379            // See https://github.com/jj-vcs/jj/issues/4349.
1380            // To solve this, we ignore all nested repos entirely.
1381            let disk_dir = entry.path();
1382            for &name in RESERVED_DIR_NAMES {
1383                if disk_dir.join(name).symlink_metadata().is_ok() {
1384                    return Ok(None);
1385                }
1386            }
1387
1388            if git_ignore.matches(&path.to_internal_dir_string())
1389                && self.force_tracking_matcher.visit(&path).is_nothing()
1390            {
1391                // If the whole directory is ignored by .gitignore, visit only
1392                // paths we're already tracking. This is because .gitignore in
1393                // ignored directory must be ignored. It's also more efficient.
1394                // start_tracking_matcher is NOT tested here because we need to
1395                // scan directory entries to report untracked paths.
1396                self.spawn_ok(scope, move |_| self.visit_tracked_files(file_states));
1397            } else if !self.matcher.visit(&path).is_nothing() {
1398                let directory_to_visit = DirectoryToVisit {
1399                    dir: path,
1400                    disk_dir,
1401                    git_ignore: git_ignore.clone(),
1402                    file_states,
1403                };
1404                self.spawn_ok(scope, |scope| {
1405                    self.visit_directory(directory_to_visit, scope)
1406                });
1407            }
1408            // Whether or not the directory path matches, any child file entries
1409            // shouldn't be touched within the current recursion step.
1410            Ok(Some((PresentDirEntryKind::Dir, name_string)))
1411        } else if self.matcher.matches(&path) {
1412            if let Some(progress) = self.progress {
1413                progress(&path);
1414            }
1415            if maybe_current_file_state.is_none()
1416                && (git_ignore.matches(path.as_internal_file_string())
1417                    && !self.force_tracking_matcher.matches(&path))
1418            {
1419                // If it wasn't already tracked and it matches
1420                // the ignored paths, then ignore it.
1421                Ok(None)
1422            } else if maybe_current_file_state.is_none()
1423                && !self.start_tracking_matcher.matches(&path)
1424            {
1425                // Leave the file untracked
1426                self.untracked_paths_tx
1427                    .send((path, UntrackedReason::FileNotAutoTracked))
1428                    .ok();
1429                Ok(None)
1430            } else {
1431                let metadata = entry.metadata().map_err(|err| SnapshotError::Other {
1432                    message: format!("Failed to stat file {}", entry.path().display()),
1433                    err: err.into(),
1434                })?;
1435                if maybe_current_file_state.is_none()
1436                    && (metadata.len() > self.max_new_file_size
1437                        && !self.force_tracking_matcher.matches(&path))
1438                {
1439                    // Leave the large file untracked
1440                    let reason = UntrackedReason::FileTooLarge {
1441                        size: metadata.len(),
1442                        max_size: self.max_new_file_size,
1443                    };
1444                    self.untracked_paths_tx.send((path, reason)).ok();
1445                    Ok(None)
1446                } else if let Some(new_file_state) = file_state(&metadata) {
1447                    self.process_present_file(
1448                        path,
1449                        &entry.path(),
1450                        maybe_current_file_state.as_ref(),
1451                        new_file_state,
1452                    )?;
1453                    Ok(Some((PresentDirEntryKind::File, name_string)))
1454                } else {
1455                    // Special file is not considered present
1456                    Ok(None)
1457                }
1458            }
1459        } else {
1460            Ok(None)
1461        }
1462    }
1463
1464    /// Visits only paths we're already tracking.
1465    fn visit_tracked_files(&self, file_states: FileStates<'_>) -> Result<(), SnapshotError> {
1466        for (tracked_path, current_file_state) in file_states {
1467            if current_file_state.file_type == FileType::GitSubmodule {
1468                continue;
1469            }
1470            if !self.matcher.matches(tracked_path) {
1471                continue;
1472            }
1473            let disk_path = tracked_path.to_fs_path(&self.tree_state.working_copy_path)?;
1474            let metadata = match disk_path.symlink_metadata() {
1475                Ok(metadata) => Some(metadata),
1476                Err(err) if err.kind() == io::ErrorKind::NotFound => None,
1477                Err(err) => {
1478                    return Err(SnapshotError::Other {
1479                        message: format!("Failed to stat file {}", disk_path.display()),
1480                        err: err.into(),
1481                    });
1482                }
1483            };
1484            if let Some(new_file_state) = metadata.as_ref().and_then(file_state) {
1485                self.process_present_file(
1486                    tracked_path.to_owned(),
1487                    &disk_path,
1488                    Some(&current_file_state),
1489                    new_file_state,
1490                )?;
1491            } else {
1492                self.deleted_files_tx.send(tracked_path.to_owned()).ok();
1493            }
1494        }
1495        Ok(())
1496    }
1497
1498    fn process_present_file(
1499        &self,
1500        path: RepoPathBuf,
1501        disk_path: &Path,
1502        maybe_current_file_state: Option<&FileState>,
1503        mut new_file_state: FileState,
1504    ) -> Result<(), SnapshotError> {
1505        let update = self.get_updated_tree_value(
1506            &path,
1507            disk_path,
1508            maybe_current_file_state,
1509            &new_file_state,
1510        )?;
1511        // Preserve materialized conflict data for normal, non-resolved files
1512        if matches!(new_file_state.file_type, FileType::Normal { .. })
1513            && !update.as_ref().is_some_and(|update| update.is_resolved())
1514        {
1515            new_file_state.materialized_conflict_data =
1516                maybe_current_file_state.and_then(|state| state.materialized_conflict_data);
1517        }
1518        if let Some(tree_value) = update {
1519            self.tree_entries_tx.send((path.clone(), tree_value)).ok();
1520        }
1521        if Some(&new_file_state) != maybe_current_file_state {
1522            self.file_states_tx.send((path, new_file_state)).ok();
1523        }
1524        Ok(())
1525    }
1526
1527    /// Emits file paths that don't exist in the `present_entries`.
1528    fn emit_deleted_files(
1529        &self,
1530        dir: &RepoPath,
1531        file_states: FileStates<'_>,
1532        present_entries: &PresentDirEntries,
1533    ) {
1534        let file_state_chunks = file_states.iter().chunk_by(|(path, _state)| {
1535            // Extract <name> from <dir>, <dir>/<name>, or <dir>/<name>/**.
1536            // (file_states may contain <dir> file on file->dir transition.)
1537            debug_assert!(path.starts_with(dir));
1538            let slash = !dir.is_root() as usize;
1539            let len = dir.as_internal_file_string().len() + slash;
1540            let tail = path.as_internal_file_string().get(len..).unwrap_or("");
1541            match tail.split_once('/') {
1542                Some((name, _)) => (PresentDirEntryKind::Dir, name),
1543                None => (PresentDirEntryKind::File, tail),
1544            }
1545        });
1546        file_state_chunks
1547            .into_iter()
1548            .filter(|&((kind, name), _)| match kind {
1549                PresentDirEntryKind::Dir => !present_entries.dirs.contains(name),
1550                PresentDirEntryKind::File => !present_entries.files.contains(name),
1551            })
1552            .flat_map(|(_, chunk)| chunk)
1553            // Whether or not the entry exists, submodule should be ignored
1554            .filter(|(_, state)| state.file_type != FileType::GitSubmodule)
1555            .filter(|(path, _)| self.matcher.matches(path))
1556            .try_for_each(|(path, _)| self.deleted_files_tx.send(path.to_owned()))
1557            .ok();
1558    }
1559
1560    fn get_updated_tree_value(
1561        &self,
1562        repo_path: &RepoPath,
1563        disk_path: &Path,
1564        maybe_current_file_state: Option<&FileState>,
1565        new_file_state: &FileState,
1566    ) -> Result<Option<MergedTreeValue>, SnapshotError> {
1567        let clean = match maybe_current_file_state {
1568            None => {
1569                // untracked
1570                false
1571            }
1572            Some(current_file_state) => {
1573                // If the file's mtime was set at the same time as this state file's own mtime,
1574                // then we don't know if the file was modified before or after this state file.
1575                new_file_state.is_clean(current_file_state)
1576                    && current_file_state.mtime < self.tree_state.own_mtime
1577            }
1578        };
1579        if clean {
1580            Ok(None)
1581        } else {
1582            let current_tree_values = self.current_tree.path_value(repo_path)?;
1583            let new_file_type = if !self.tree_state.symlink_support {
1584                let mut new_file_type = new_file_state.file_type.clone();
1585                if matches!(new_file_type, FileType::Normal { .. })
1586                    && matches!(current_tree_values.as_normal(), Some(TreeValue::Symlink(_)))
1587                {
1588                    new_file_type = FileType::Symlink;
1589                }
1590                new_file_type
1591            } else {
1592                new_file_state.file_type.clone()
1593            };
1594            let new_tree_values = match new_file_type {
1595                FileType::Normal { executable } => self
1596                    .write_path_to_store(
1597                        repo_path,
1598                        disk_path,
1599                        &current_tree_values,
1600                        executable,
1601                        maybe_current_file_state.and_then(|state| state.materialized_conflict_data),
1602                    )
1603                    .block_on()?,
1604                FileType::Symlink => {
1605                    let id = self
1606                        .write_symlink_to_store(repo_path, disk_path)
1607                        .block_on()?;
1608                    Merge::normal(TreeValue::Symlink(id))
1609                }
1610                FileType::GitSubmodule => panic!("git submodule cannot be written to store"),
1611            };
1612            if new_tree_values != current_tree_values {
1613                Ok(Some(new_tree_values))
1614            } else {
1615                Ok(None)
1616            }
1617        }
1618    }
1619
1620    fn store(&self) -> &Store {
1621        &self.tree_state.store
1622    }
1623
1624    async fn write_path_to_store(
1625        &self,
1626        repo_path: &RepoPath,
1627        disk_path: &Path,
1628        current_tree_values: &MergedTreeValue,
1629        executable: FileExecutableFlag,
1630        materialized_conflict_data: Option<MaterializedConflictData>,
1631    ) -> Result<MergedTreeValue, SnapshotError> {
1632        if let Some(current_tree_value) = current_tree_values.as_resolved() {
1633            let id = self.write_file_to_store(repo_path, disk_path).await?;
1634            // On Windows, we preserve the executable bit from the current tree.
1635            let executable = executable.unwrap_or_else(|| {
1636                if let Some(TreeValue::File {
1637                    id: _,
1638                    executable,
1639                    copy_id: _,
1640                }) = current_tree_value
1641                {
1642                    *executable
1643                } else {
1644                    false
1645                }
1646            });
1647            // Preserve the copy id from the current tree
1648            let copy_id = {
1649                if let Some(TreeValue::File {
1650                    id: _,
1651                    executable: _,
1652                    copy_id,
1653                }) = current_tree_value
1654                {
1655                    copy_id.clone()
1656                } else {
1657                    CopyId::placeholder()
1658                }
1659            };
1660            Ok(Merge::normal(TreeValue::File {
1661                id,
1662                executable,
1663                copy_id,
1664            }))
1665        } else if let Some(old_file_ids) = current_tree_values.to_file_merge() {
1666            // Safe to unwrap because the copy id exists exactly on the file variant
1667            let copy_id_merge = current_tree_values.to_copy_id_merge().unwrap();
1668            let copy_id = copy_id_merge
1669                .resolve_trivial(SameChange::Accept)
1670                .cloned()
1671                .flatten()
1672                .unwrap_or_else(CopyId::placeholder);
1673            let mut contents = vec![];
1674            let file = File::open(disk_path).map_err(|err| SnapshotError::Other {
1675                message: format!("Failed to open file {}", disk_path.display()),
1676                err: err.into(),
1677            })?;
1678            self.tree_state
1679                .target_eol_strategy
1680                .convert_eol_for_snapshot(BlockingAsyncReader::new(file))
1681                .await
1682                .map_err(|err| SnapshotError::Other {
1683                    message: "Failed to convert the EOL".to_string(),
1684                    err: err.into(),
1685                })?
1686                .read_to_end(&mut contents)
1687                .await
1688                .map_err(|err| SnapshotError::Other {
1689                    message: "Failed to read the EOL converted contents".to_string(),
1690                    err: err.into(),
1691                })?;
1692            // If the file contained a conflict before and is a normal file on
1693            // disk, we try to parse any conflict markers in the file into a
1694            // conflict.
1695            let new_file_ids = conflicts::update_from_content(
1696                &old_file_ids,
1697                self.store(),
1698                repo_path,
1699                &contents,
1700                materialized_conflict_data.map_or(MIN_CONFLICT_MARKER_LEN, |data| {
1701                    data.conflict_marker_len as usize
1702                }),
1703            )
1704            .await?;
1705            match new_file_ids.into_resolved() {
1706                Ok(file_id) => {
1707                    // On Windows, we preserve the executable bit from the merged trees.
1708                    let executable = executable.unwrap_or_else(|| {
1709                        if let Some(merge) = current_tree_values.to_executable_merge() {
1710                            conflicts::resolve_file_executable(&merge).unwrap_or(false)
1711                        } else {
1712                            false
1713                        }
1714                    });
1715                    Ok(Merge::normal(TreeValue::File {
1716                        id: file_id.unwrap(),
1717                        executable,
1718                        copy_id,
1719                    }))
1720                }
1721                Err(new_file_ids) => {
1722                    if new_file_ids != old_file_ids {
1723                        Ok(current_tree_values.with_new_file_ids(&new_file_ids))
1724                    } else {
1725                        Ok(current_tree_values.clone())
1726                    }
1727                }
1728            }
1729        } else {
1730            Ok(current_tree_values.clone())
1731        }
1732    }
1733
1734    async fn write_file_to_store(
1735        &self,
1736        path: &RepoPath,
1737        disk_path: &Path,
1738    ) -> Result<FileId, SnapshotError> {
1739        let file = File::open(disk_path).map_err(|err| SnapshotError::Other {
1740            message: format!("Failed to open file {}", disk_path.display()),
1741            err: err.into(),
1742        })?;
1743        let mut contents = self
1744            .tree_state
1745            .target_eol_strategy
1746            .convert_eol_for_snapshot(BlockingAsyncReader::new(file))
1747            .await
1748            .map_err(|err| SnapshotError::Other {
1749                message: "Failed to convert the EOL".to_string(),
1750                err: err.into(),
1751            })?;
1752        Ok(self.store().write_file(path, &mut contents).await?)
1753    }
1754
1755    async fn write_symlink_to_store(
1756        &self,
1757        path: &RepoPath,
1758        disk_path: &Path,
1759    ) -> Result<SymlinkId, SnapshotError> {
1760        if self.tree_state.symlink_support {
1761            let target = disk_path.read_link().map_err(|err| SnapshotError::Other {
1762                message: format!("Failed to read symlink {}", disk_path.display()),
1763                err: err.into(),
1764            })?;
1765            let str_target =
1766                target
1767                    .to_str()
1768                    .ok_or_else(|| SnapshotError::InvalidUtf8SymlinkTarget {
1769                        path: disk_path.to_path_buf(),
1770                    })?;
1771            Ok(self.store().write_symlink(path, str_target).await?)
1772        } else {
1773            let target = fs::read(disk_path).map_err(|err| SnapshotError::Other {
1774                message: format!("Failed to read file {}", disk_path.display()),
1775                err: err.into(),
1776            })?;
1777            let string_target =
1778                String::from_utf8(target).map_err(|_| SnapshotError::InvalidUtf8SymlinkTarget {
1779                    path: disk_path.to_path_buf(),
1780                })?;
1781            Ok(self.store().write_symlink(path, &string_target).await?)
1782        }
1783    }
1784}
1785
1786/// Functions to update local-disk files from the store.
1787impl TreeState {
1788    async fn write_file(
1789        &self,
1790        disk_path: &Path,
1791        contents: impl AsyncRead + Send + Unpin,
1792        executable: bool,
1793        apply_eol_conversion: bool,
1794    ) -> Result<FileState, CheckoutError> {
1795        let mut file = File::options()
1796            .write(true)
1797            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
1798            .open(disk_path)
1799            .map_err(|err| CheckoutError::Other {
1800                message: format!("Failed to open file {} for writing", disk_path.display()),
1801                err: err.into(),
1802            })?;
1803        let contents = if apply_eol_conversion {
1804            self.target_eol_strategy
1805                .convert_eol_for_update(contents)
1806                .await
1807                .map_err(|err| CheckoutError::Other {
1808                    message: "Failed to convert the EOL for the content".to_string(),
1809                    err: err.into(),
1810                })?
1811        } else {
1812            Box::new(contents)
1813        };
1814        let size = copy_async_to_sync(contents, &mut file)
1815            .await
1816            .map_err(|err| CheckoutError::Other {
1817                message: format!(
1818                    "Failed to write the content to the file {}",
1819                    disk_path.display()
1820                ),
1821                err: err.into(),
1822            })?;
1823        self.set_executable(disk_path, executable)?;
1824        // Read the file state from the file descriptor. That way, know that the file
1825        // exists and is of the expected type, and the stat information is most likely
1826        // accurate, except for other processes modifying the file concurrently (The
1827        // mtime is set at write time and won't change when we close the file.)
1828        let metadata = file
1829            .metadata()
1830            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1831        Ok(FileState::for_file(executable, size as u64, &metadata))
1832    }
1833
1834    fn write_symlink(&self, disk_path: &Path, target: String) -> Result<FileState, CheckoutError> {
1835        let target = PathBuf::from(&target);
1836        try_symlink(&target, disk_path).map_err(|err| CheckoutError::Other {
1837            message: format!(
1838                "Failed to create symlink from {} to {}",
1839                disk_path.display(),
1840                target.display()
1841            ),
1842            err: err.into(),
1843        })?;
1844        let metadata = disk_path
1845            .symlink_metadata()
1846            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1847        Ok(FileState::for_symlink(&metadata))
1848    }
1849
1850    async fn write_conflict(
1851        &self,
1852        disk_path: &Path,
1853        contents: &[u8],
1854        executable: bool,
1855    ) -> Result<FileState, CheckoutError> {
1856        let contents = self
1857            .target_eol_strategy
1858            .convert_eol_for_update(contents)
1859            .await
1860            .map_err(|err| CheckoutError::Other {
1861                message: "Failed to convert the EOL when writing a merge conflict".to_string(),
1862                err: err.into(),
1863            })?;
1864        let mut file = OpenOptions::new()
1865            .write(true)
1866            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
1867            .open(disk_path)
1868            .map_err(|err| CheckoutError::Other {
1869                message: format!("Failed to open file {} for writing", disk_path.display()),
1870                err: err.into(),
1871            })?;
1872        let size = copy_async_to_sync(contents, &mut file)
1873            .await
1874            .map_err(|err| CheckoutError::Other {
1875                message: format!("Failed to write conflict to file {}", disk_path.display()),
1876                err: err.into(),
1877            })? as u64;
1878        self.set_executable(disk_path, executable)?;
1879        let metadata = file
1880            .metadata()
1881            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1882        Ok(FileState::for_file(executable, size, &metadata))
1883    }
1884
1885    #[cfg_attr(windows, expect(unused_variables))]
1886    fn set_executable(&self, disk_path: &Path, executable: bool) -> Result<(), CheckoutError> {
1887        #[cfg(unix)]
1888        {
1889            let mode = if executable { 0o755 } else { 0o644 };
1890            fs::set_permissions(disk_path, fs::Permissions::from_mode(mode))
1891                .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1892        }
1893        Ok(())
1894    }
1895
1896    pub fn check_out(&mut self, new_tree: &MergedTree) -> Result<CheckoutStats, CheckoutError> {
1897        let old_tree = self.tree.clone();
1898        let stats = self
1899            .update(&old_tree, new_tree, self.sparse_matcher().as_ref())
1900            .block_on()?;
1901        self.tree = new_tree.clone();
1902        Ok(stats)
1903    }
1904
1905    pub fn set_sparse_patterns(
1906        &mut self,
1907        sparse_patterns: Vec<RepoPathBuf>,
1908    ) -> Result<CheckoutStats, CheckoutError> {
1909        let tree = self.tree.clone();
1910        let old_matcher = PrefixMatcher::new(&self.sparse_patterns);
1911        let new_matcher = PrefixMatcher::new(&sparse_patterns);
1912        let added_matcher = DifferenceMatcher::new(&new_matcher, &old_matcher);
1913        let removed_matcher = DifferenceMatcher::new(&old_matcher, &new_matcher);
1914        let empty_tree = self.store.empty_merged_tree();
1915        let added_stats = self.update(&empty_tree, &tree, &added_matcher).block_on()?;
1916        let removed_stats = self
1917            .update(&tree, &empty_tree, &removed_matcher)
1918            .block_on()?;
1919        self.sparse_patterns = sparse_patterns;
1920        assert_eq!(added_stats.updated_files, 0);
1921        assert_eq!(added_stats.removed_files, 0);
1922        assert_eq!(removed_stats.updated_files, 0);
1923        assert_eq!(removed_stats.added_files, 0);
1924        assert_eq!(removed_stats.skipped_files, 0);
1925        Ok(CheckoutStats {
1926            updated_files: 0,
1927            added_files: added_stats.added_files,
1928            removed_files: removed_stats.removed_files,
1929            skipped_files: added_stats.skipped_files,
1930        })
1931    }
1932
1933    async fn update(
1934        &mut self,
1935        old_tree: &MergedTree,
1936        new_tree: &MergedTree,
1937        matcher: &dyn Matcher,
1938    ) -> Result<CheckoutStats, CheckoutError> {
1939        // TODO: maybe it's better not include the skipped counts in the "intended"
1940        // counts
1941        let mut stats = CheckoutStats {
1942            updated_files: 0,
1943            added_files: 0,
1944            removed_files: 0,
1945            skipped_files: 0,
1946        };
1947        let mut changed_file_states = Vec::new();
1948        let mut deleted_files = HashSet::new();
1949        let mut diff_stream = old_tree
1950            .diff_stream_for_file_system(new_tree, matcher)
1951            .map(async |TreeDiffEntry { path, values }| match values {
1952                Ok(diff) => {
1953                    let result = materialize_tree_value(&self.store, &path, diff.after).await;
1954                    (path, result.map(|value| (diff.before, value)))
1955                }
1956                Err(err) => (path, Err(err)),
1957            })
1958            .buffered(self.store.concurrency().max(1));
1959
1960        let mut prev_created_path: RepoPathBuf = RepoPathBuf::root();
1961
1962        while let Some((path, data)) = diff_stream.next().await {
1963            let (before, after) = data?;
1964            if after.is_absent() {
1965                stats.removed_files += 1;
1966            } else if before.is_absent() {
1967                stats.added_files += 1;
1968            } else {
1969                stats.updated_files += 1;
1970            }
1971
1972            // Existing Git submodule can be a non-empty directory on disk. We
1973            // shouldn't attempt to manage it as a tracked path.
1974            //
1975            // TODO: It might be better to add general support for paths not
1976            // tracked by jj than processing submodules specially. For example,
1977            // paths excluded by .gitignore can be marked as such so that
1978            // newly-"unignored" paths won't be snapshotted automatically.
1979            if matches!(before.as_normal(), Some(TreeValue::GitSubmodule(_)))
1980                && matches!(after, MaterializedTreeValue::GitSubmodule(_))
1981            {
1982                eprintln!("ignoring git submodule at {path:?}");
1983                // Not updating the file state as if there were no diffs. Leave
1984                // the state type as FileType::GitSubmodule if it was before.
1985                continue;
1986            }
1987
1988            // This path and the previous one we did work for may have a common prefix. We
1989            // can adjust the "working copy" path to the parent directory which we know
1990            // is already created. If there is no common prefix, this will by default use
1991            // RepoPath::root() as the common prefix.
1992            let (common_prefix, adjusted_diff_file_path) =
1993                path.split_common_prefix(&prev_created_path);
1994
1995            let disk_path = if adjusted_diff_file_path.is_root() {
1996                // The path being "root" here implies that the entire path has already been
1997                // created.
1998                //
1999                // e.g we may have have already processed a path like: "foo/bar/baz" and this is
2000                // our `prev_created_path`.
2001                //
2002                // and the current path is:
2003                // "foo/bar"
2004                //
2005                // This results in a common prefix of "foo/bar" with empty string for the
2006                // remainder since its entire prefix has already been created.
2007                // This means that we _dont_ need to create its parent dirs
2008                // either.
2009
2010                path.to_fs_path(self.working_copy_path())?
2011            } else {
2012                let adjusted_working_copy_path =
2013                    common_prefix.to_fs_path(self.working_copy_path())?;
2014
2015                // Create parent directories no matter if after.is_present(). This
2016                // ensures that the path never traverses symlinks.
2017                let Some(disk_path) =
2018                    create_parent_dirs(&adjusted_working_copy_path, adjusted_diff_file_path)?
2019                else {
2020                    changed_file_states.push((path, FileState::placeholder()));
2021                    stats.skipped_files += 1;
2022                    continue;
2023                };
2024
2025                // Cache this path for the next iteration. This must occur after
2026                // `create_parent_dirs` to ensure that the path is only set when
2027                // no symlinks are encountered. Otherwise there could be
2028                // opportunity for a filesystem write-what-where attack.
2029                prev_created_path = path
2030                    .parent()
2031                    .map(RepoPath::to_owned)
2032                    .expect("diff path has no parent");
2033
2034                disk_path
2035            };
2036
2037            // If the path was present, check reserved path first and delete it.
2038            let present_file_deleted = before.is_present() && remove_old_file(&disk_path)?;
2039            // If not, create temporary file to test the path validity.
2040            if !present_file_deleted && !can_create_new_file(&disk_path)? {
2041                changed_file_states.push((path, FileState::placeholder()));
2042                stats.skipped_files += 1;
2043                continue;
2044            }
2045
2046            // TODO: Check that the file has not changed before overwriting/removing it.
2047            let file_state = match after {
2048                MaterializedTreeValue::Absent | MaterializedTreeValue::AccessDenied(_) => {
2049                    // Reset the previous path to avoid scenarios where this path is deleted,
2050                    // then on the next iteration recreation is skipped because of this
2051                    // optimization.
2052                    prev_created_path = RepoPathBuf::root();
2053
2054                    let mut parent_dir = disk_path.parent().unwrap();
2055                    loop {
2056                        if fs::remove_dir(parent_dir).is_err() {
2057                            break;
2058                        }
2059
2060                        parent_dir = parent_dir.parent().unwrap();
2061                    }
2062                    deleted_files.insert(path);
2063                    continue;
2064                }
2065                MaterializedTreeValue::File(file) => {
2066                    self.write_file(&disk_path, file.reader, file.executable, true)
2067                        .await?
2068                }
2069                MaterializedTreeValue::Symlink { id: _, target } => {
2070                    if self.symlink_support {
2071                        self.write_symlink(&disk_path, target)?
2072                    } else {
2073                        self.write_file(&disk_path, target.as_bytes(), false, false)
2074                            .await?
2075                    }
2076                }
2077                MaterializedTreeValue::GitSubmodule(_) => {
2078                    eprintln!("ignoring git submodule at {path:?}");
2079                    FileState::for_gitsubmodule()
2080                }
2081                MaterializedTreeValue::Tree(_) => {
2082                    panic!("unexpected tree entry in diff at {path:?}");
2083                }
2084                MaterializedTreeValue::FileConflict(file) => {
2085                    let conflict_marker_len =
2086                        choose_materialized_conflict_marker_len(&file.contents);
2087                    let options = ConflictMaterializeOptions {
2088                        marker_style: self.conflict_marker_style,
2089                        marker_len: Some(conflict_marker_len),
2090                        merge: self.store.merge_options().clone(),
2091                    };
2092                    let contents = materialize_merge_result_to_bytes(&file.contents, &options);
2093                    let mut file_state = self
2094                        .write_conflict(&disk_path, &contents, file.executable.unwrap_or(false))
2095                        .await?;
2096                    file_state.materialized_conflict_data = Some(MaterializedConflictData {
2097                        conflict_marker_len: conflict_marker_len.try_into().unwrap_or(u32::MAX),
2098                    });
2099                    file_state
2100                }
2101                MaterializedTreeValue::OtherConflict { id } => {
2102                    // Unless all terms are regular files, we can't do much
2103                    // better than trying to describe the merge.
2104                    let contents = id.describe();
2105                    let executable = false;
2106                    self.write_conflict(&disk_path, contents.as_bytes(), executable)
2107                        .await?
2108                }
2109            };
2110            changed_file_states.push((path, file_state));
2111        }
2112        self.file_states
2113            .merge_in(changed_file_states, &deleted_files);
2114        Ok(stats)
2115    }
2116
2117    pub async fn reset(&mut self, new_tree: &MergedTree) -> Result<(), ResetError> {
2118        let matcher = self.sparse_matcher();
2119        let mut changed_file_states = Vec::new();
2120        let mut deleted_files = HashSet::new();
2121        let mut diff_stream = self
2122            .tree
2123            .diff_stream_for_file_system(new_tree, matcher.as_ref());
2124        while let Some(TreeDiffEntry { path, values }) = diff_stream.next().await {
2125            let after = values?.after;
2126            if after.is_absent() {
2127                deleted_files.insert(path);
2128            } else {
2129                let file_type = match after.into_resolved() {
2130                    Ok(value) => match value.unwrap() {
2131                        TreeValue::File {
2132                            id: _,
2133                            executable,
2134                            copy_id: _,
2135                        } => FileType::Normal {
2136                            executable: FileExecutableFlag::from_bool_lossy(executable),
2137                        },
2138                        TreeValue::Symlink(_id) => FileType::Symlink,
2139                        TreeValue::GitSubmodule(_id) => {
2140                            eprintln!("ignoring git submodule at {path:?}");
2141                            FileType::GitSubmodule
2142                        }
2143                        TreeValue::Tree(_id) => {
2144                            panic!("unexpected tree entry in diff at {path:?}");
2145                        }
2146                    },
2147                    Err(_values) => {
2148                        // TODO: Try to set the executable bit based on the conflict
2149                        FileType::Normal {
2150                            executable: FileExecutableFlag::from_bool_lossy(false),
2151                        }
2152                    }
2153                };
2154                let file_state = FileState {
2155                    file_type,
2156                    mtime: MillisSinceEpoch(0),
2157                    size: 0,
2158                    materialized_conflict_data: None,
2159                };
2160                changed_file_states.push((path, file_state));
2161            }
2162        }
2163        self.file_states
2164            .merge_in(changed_file_states, &deleted_files);
2165        self.tree = new_tree.clone();
2166        Ok(())
2167    }
2168
2169    pub async fn recover(&mut self, new_tree: &MergedTree) -> Result<(), ResetError> {
2170        self.file_states.clear();
2171        self.tree = self.store.empty_merged_tree();
2172        self.reset(new_tree).await
2173    }
2174}
2175
2176fn checkout_error_for_stat_error(err: io::Error, path: &Path) -> CheckoutError {
2177    CheckoutError::Other {
2178        message: format!("Failed to stat file {}", path.display()),
2179        err: err.into(),
2180    }
2181}
2182
2183/// Working copy state stored in "checkout" file.
2184#[derive(Clone, Debug)]
2185struct CheckoutState {
2186    operation_id: OperationId,
2187    workspace_name: WorkspaceNameBuf,
2188}
2189
2190impl CheckoutState {
2191    fn load(state_path: &Path) -> Result<Self, WorkingCopyStateError> {
2192        let wrap_err = |err| WorkingCopyStateError {
2193            message: "Failed to read checkout state".to_owned(),
2194            err,
2195        };
2196        let buf = fs::read(state_path.join("checkout")).map_err(|err| wrap_err(err.into()))?;
2197        let proto = crate::protos::local_working_copy::Checkout::decode(&*buf)
2198            .map_err(|err| wrap_err(err.into()))?;
2199        Ok(Self {
2200            operation_id: OperationId::new(proto.operation_id),
2201            workspace_name: if proto.workspace_name.is_empty() {
2202                // For compatibility with old working copies.
2203                // TODO: Delete in mid 2022 or so
2204                WorkspaceName::DEFAULT.to_owned()
2205            } else {
2206                proto.workspace_name.into()
2207            },
2208        })
2209    }
2210
2211    #[instrument(skip_all)]
2212    fn save(&self, state_path: &Path) -> Result<(), WorkingCopyStateError> {
2213        let wrap_err = |err| WorkingCopyStateError {
2214            message: "Failed to write checkout state".to_owned(),
2215            err,
2216        };
2217        let proto = crate::protos::local_working_copy::Checkout {
2218            operation_id: self.operation_id.to_bytes(),
2219            workspace_name: (*self.workspace_name).into(),
2220        };
2221        let mut temp_file =
2222            NamedTempFile::new_in(state_path).map_err(|err| wrap_err(err.into()))?;
2223        temp_file
2224            .as_file_mut()
2225            .write_all(&proto.encode_to_vec())
2226            .map_err(|err| wrap_err(err.into()))?;
2227        // TODO: Retry if persisting fails (it will on Windows if the file happened to
2228        // be open for read).
2229        persist_temp_file(temp_file, state_path.join("checkout"))
2230            .map_err(|err| wrap_err(err.into()))?;
2231        Ok(())
2232    }
2233}
2234
2235pub struct LocalWorkingCopy {
2236    store: Arc<Store>,
2237    working_copy_path: PathBuf,
2238    state_path: PathBuf,
2239    checkout_state: CheckoutState,
2240    tree_state: OnceCell<TreeState>,
2241    tree_state_settings: TreeStateSettings,
2242}
2243
2244impl WorkingCopy for LocalWorkingCopy {
2245    fn name(&self) -> &str {
2246        Self::name()
2247    }
2248
2249    fn workspace_name(&self) -> &WorkspaceName {
2250        &self.checkout_state.workspace_name
2251    }
2252
2253    fn operation_id(&self) -> &OperationId {
2254        &self.checkout_state.operation_id
2255    }
2256
2257    fn tree(&self) -> Result<&MergedTree, WorkingCopyStateError> {
2258        Ok(self.tree_state()?.current_tree())
2259    }
2260
2261    fn sparse_patterns(&self) -> Result<&[RepoPathBuf], WorkingCopyStateError> {
2262        Ok(self.tree_state()?.sparse_patterns())
2263    }
2264
2265    fn start_mutation(&self) -> Result<Box<dyn LockedWorkingCopy>, WorkingCopyStateError> {
2266        let lock_path = self.state_path.join("working_copy.lock");
2267        let lock = FileLock::lock(lock_path).map_err(|err| WorkingCopyStateError {
2268            message: "Failed to lock working copy".to_owned(),
2269            err: err.into(),
2270        })?;
2271
2272        let wc = Self {
2273            store: self.store.clone(),
2274            working_copy_path: self.working_copy_path.clone(),
2275            state_path: self.state_path.clone(),
2276            // Re-read the state after taking the lock
2277            checkout_state: CheckoutState::load(&self.state_path)?,
2278            // Empty so we re-read the state after taking the lock
2279            // TODO: It's expensive to reload the whole tree. We should copy it from `self` if it
2280            // hasn't changed.
2281            tree_state: OnceCell::new(),
2282            tree_state_settings: self.tree_state_settings.clone(),
2283        };
2284        let old_operation_id = wc.operation_id().clone();
2285        let old_tree = wc.tree()?.clone();
2286        Ok(Box::new(LockedLocalWorkingCopy {
2287            wc,
2288            old_operation_id,
2289            old_tree,
2290            tree_state_dirty: false,
2291            new_workspace_name: None,
2292            _lock: lock,
2293        }))
2294    }
2295}
2296
2297impl LocalWorkingCopy {
2298    pub fn name() -> &'static str {
2299        "local"
2300    }
2301
2302    /// Initializes a new working copy at `working_copy_path`. The working
2303    /// copy's state will be stored in the `state_path` directory. The working
2304    /// copy will have the empty tree checked out.
2305    pub fn init(
2306        store: Arc<Store>,
2307        working_copy_path: PathBuf,
2308        state_path: PathBuf,
2309        operation_id: OperationId,
2310        workspace_name: WorkspaceNameBuf,
2311        user_settings: &UserSettings,
2312    ) -> Result<Self, WorkingCopyStateError> {
2313        let checkout_state = CheckoutState {
2314            operation_id,
2315            workspace_name,
2316        };
2317        checkout_state.save(&state_path)?;
2318        let tree_state_settings = TreeStateSettings::try_from_user_settings(user_settings)
2319            .map_err(|err| WorkingCopyStateError {
2320                message: "Failed to read the tree state settings".to_string(),
2321                err: err.into(),
2322            })?;
2323        let tree_state = TreeState::init(
2324            store.clone(),
2325            working_copy_path.clone(),
2326            state_path.clone(),
2327            &tree_state_settings,
2328        )
2329        .map_err(|err| WorkingCopyStateError {
2330            message: "Failed to initialize working copy state".to_string(),
2331            err: err.into(),
2332        })?;
2333        Ok(Self {
2334            store,
2335            working_copy_path,
2336            state_path,
2337            checkout_state,
2338            tree_state: OnceCell::with_value(tree_state),
2339            tree_state_settings,
2340        })
2341    }
2342
2343    pub fn load(
2344        store: Arc<Store>,
2345        working_copy_path: PathBuf,
2346        state_path: PathBuf,
2347        user_settings: &UserSettings,
2348    ) -> Result<Self, WorkingCopyStateError> {
2349        let checkout_state = CheckoutState::load(&state_path)?;
2350        let tree_state_settings = TreeStateSettings::try_from_user_settings(user_settings)
2351            .map_err(|err| WorkingCopyStateError {
2352                message: "Failed to read the tree state settings".to_string(),
2353                err: err.into(),
2354            })?;
2355        Ok(Self {
2356            store,
2357            working_copy_path,
2358            state_path,
2359            checkout_state,
2360            tree_state: OnceCell::new(),
2361            tree_state_settings,
2362        })
2363    }
2364
2365    pub fn state_path(&self) -> &Path {
2366        &self.state_path
2367    }
2368
2369    #[instrument(skip_all)]
2370    fn tree_state(&self) -> Result<&TreeState, WorkingCopyStateError> {
2371        self.tree_state.get_or_try_init(|| {
2372            TreeState::load(
2373                self.store.clone(),
2374                self.working_copy_path.clone(),
2375                self.state_path.clone(),
2376                &self.tree_state_settings,
2377            )
2378            .map_err(|err| WorkingCopyStateError {
2379                message: "Failed to read working copy state".to_string(),
2380                err: err.into(),
2381            })
2382        })
2383    }
2384
2385    fn tree_state_mut(&mut self) -> Result<&mut TreeState, WorkingCopyStateError> {
2386        self.tree_state()?; // ensure loaded
2387        Ok(self.tree_state.get_mut().unwrap())
2388    }
2389
2390    pub fn file_states(&self) -> Result<FileStates<'_>, WorkingCopyStateError> {
2391        Ok(self.tree_state()?.file_states())
2392    }
2393
2394    #[cfg(feature = "watchman")]
2395    pub fn query_watchman(
2396        &self,
2397        config: &WatchmanConfig,
2398    ) -> Result<(watchman::Clock, Option<Vec<PathBuf>>), WorkingCopyStateError> {
2399        self.tree_state()?
2400            .query_watchman(config)
2401            .map_err(|err| WorkingCopyStateError {
2402                message: "Failed to query watchman".to_string(),
2403                err: err.into(),
2404            })
2405    }
2406
2407    #[cfg(feature = "watchman")]
2408    pub fn is_watchman_trigger_registered(
2409        &self,
2410        config: &WatchmanConfig,
2411    ) -> Result<bool, WorkingCopyStateError> {
2412        self.tree_state()?
2413            .is_watchman_trigger_registered(config)
2414            .map_err(|err| WorkingCopyStateError {
2415                message: "Failed to query watchman".to_string(),
2416                err: err.into(),
2417            })
2418    }
2419}
2420
2421pub struct LocalWorkingCopyFactory {}
2422
2423impl WorkingCopyFactory for LocalWorkingCopyFactory {
2424    fn init_working_copy(
2425        &self,
2426        store: Arc<Store>,
2427        working_copy_path: PathBuf,
2428        state_path: PathBuf,
2429        operation_id: OperationId,
2430        workspace_name: WorkspaceNameBuf,
2431        settings: &UserSettings,
2432    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2433        Ok(Box::new(LocalWorkingCopy::init(
2434            store,
2435            working_copy_path,
2436            state_path,
2437            operation_id,
2438            workspace_name,
2439            settings,
2440        )?))
2441    }
2442
2443    fn load_working_copy(
2444        &self,
2445        store: Arc<Store>,
2446        working_copy_path: PathBuf,
2447        state_path: PathBuf,
2448        settings: &UserSettings,
2449    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2450        Ok(Box::new(LocalWorkingCopy::load(
2451            store,
2452            working_copy_path,
2453            state_path,
2454            settings,
2455        )?))
2456    }
2457}
2458
2459/// A working copy that's locked on disk. The lock is held until you call
2460/// `finish()` or `discard()`.
2461pub struct LockedLocalWorkingCopy {
2462    wc: LocalWorkingCopy,
2463    old_operation_id: OperationId,
2464    old_tree: MergedTree,
2465    tree_state_dirty: bool,
2466    new_workspace_name: Option<WorkspaceNameBuf>,
2467    _lock: FileLock,
2468}
2469
2470#[async_trait]
2471impl LockedWorkingCopy for LockedLocalWorkingCopy {
2472    fn old_operation_id(&self) -> &OperationId {
2473        &self.old_operation_id
2474    }
2475
2476    fn old_tree(&self) -> &MergedTree {
2477        &self.old_tree
2478    }
2479
2480    async fn snapshot(
2481        &mut self,
2482        options: &SnapshotOptions,
2483    ) -> Result<(MergedTree, SnapshotStats), SnapshotError> {
2484        let tree_state = self.wc.tree_state_mut()?;
2485        let (is_dirty, stats) = tree_state.snapshot(options)?;
2486        self.tree_state_dirty |= is_dirty;
2487        Ok((tree_state.current_tree().clone(), stats))
2488    }
2489
2490    async fn check_out(&mut self, commit: &Commit) -> Result<CheckoutStats, CheckoutError> {
2491        // TODO: Write a "pending_checkout" file with the new TreeId so we can
2492        // continue an interrupted update if we find such a file.
2493        let new_tree = commit.tree();
2494        let tree_state = self.wc.tree_state_mut()?;
2495        if tree_state.tree.tree_ids() != new_tree.tree_ids() {
2496            let stats = tree_state.check_out(&new_tree)?;
2497            self.tree_state_dirty = true;
2498            Ok(stats)
2499        } else {
2500            Ok(CheckoutStats::default())
2501        }
2502    }
2503
2504    fn rename_workspace(&mut self, new_name: WorkspaceNameBuf) {
2505        self.new_workspace_name = Some(new_name);
2506    }
2507
2508    async fn reset(&mut self, commit: &Commit) -> Result<(), ResetError> {
2509        let new_tree = commit.tree();
2510        self.wc.tree_state_mut()?.reset(&new_tree).await?;
2511        self.tree_state_dirty = true;
2512        Ok(())
2513    }
2514
2515    async fn recover(&mut self, commit: &Commit) -> Result<(), ResetError> {
2516        let new_tree = commit.tree();
2517        self.wc.tree_state_mut()?.recover(&new_tree).await?;
2518        self.tree_state_dirty = true;
2519        Ok(())
2520    }
2521
2522    fn sparse_patterns(&self) -> Result<&[RepoPathBuf], WorkingCopyStateError> {
2523        self.wc.sparse_patterns()
2524    }
2525
2526    async fn set_sparse_patterns(
2527        &mut self,
2528        new_sparse_patterns: Vec<RepoPathBuf>,
2529    ) -> Result<CheckoutStats, CheckoutError> {
2530        // TODO: Write a "pending_checkout" file with new sparse patterns so we can
2531        // continue an interrupted update if we find such a file.
2532        let stats = self
2533            .wc
2534            .tree_state_mut()?
2535            .set_sparse_patterns(new_sparse_patterns)?;
2536        self.tree_state_dirty = true;
2537        Ok(stats)
2538    }
2539
2540    #[instrument(skip_all)]
2541    async fn finish(
2542        mut self: Box<Self>,
2543        operation_id: OperationId,
2544    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2545        assert!(self.tree_state_dirty || self.old_tree.tree_ids() == self.wc.tree()?.tree_ids());
2546        if self.tree_state_dirty {
2547            self.wc
2548                .tree_state_mut()?
2549                .save()
2550                .map_err(|err| WorkingCopyStateError {
2551                    message: "Failed to write working copy state".to_string(),
2552                    err: Box::new(err),
2553                })?;
2554        }
2555        if self.old_operation_id != operation_id || self.new_workspace_name.is_some() {
2556            self.wc.checkout_state.operation_id = operation_id;
2557            if let Some(workspace_name) = self.new_workspace_name {
2558                self.wc.checkout_state.workspace_name = workspace_name;
2559            }
2560            self.wc.checkout_state.save(&self.wc.state_path)?;
2561        }
2562        // TODO: Clear the "pending_checkout" file here.
2563        Ok(Box::new(self.wc))
2564    }
2565}
2566
2567impl LockedLocalWorkingCopy {
2568    pub fn reset_watchman(&mut self) -> Result<(), SnapshotError> {
2569        self.wc.tree_state_mut()?.reset_watchman();
2570        self.tree_state_dirty = true;
2571        Ok(())
2572    }
2573}
2574
2575#[cfg(test)]
2576mod tests {
2577    use maplit::hashset;
2578
2579    use super::*;
2580
2581    fn repo_path(value: &str) -> &RepoPath {
2582        RepoPath::from_internal_string(value).unwrap()
2583    }
2584
2585    fn repo_path_component(value: &str) -> &RepoPathComponent {
2586        RepoPathComponent::new(value).unwrap()
2587    }
2588
2589    fn new_state(size: u64) -> FileState {
2590        FileState {
2591            file_type: FileType::Normal {
2592                executable: FileExecutableFlag::from_bool_lossy(false),
2593            },
2594            mtime: MillisSinceEpoch(0),
2595            size,
2596            materialized_conflict_data: None,
2597        }
2598    }
2599
2600    #[test]
2601    fn test_file_states_merge() {
2602        let new_static_entry = |path: &'static str, size| (repo_path(path), new_state(size));
2603        let new_owned_entry = |path: &str, size| (repo_path(path).to_owned(), new_state(size));
2604        let new_proto_entry = |path: &str, size| {
2605            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2606        };
2607        let data = vec![
2608            new_proto_entry("aa", 0),
2609            new_proto_entry("b#", 4), // '#' < '/'
2610            new_proto_entry("b/c", 1),
2611            new_proto_entry("b/d/e", 2),
2612            new_proto_entry("b/e", 3),
2613            new_proto_entry("bc", 5),
2614        ];
2615        let mut file_states = FileStatesMap::from_proto(data, false);
2616
2617        let changed_file_states = vec![
2618            new_owned_entry("aa", 10),    // change
2619            new_owned_entry("b/d/f", 11), // add
2620            new_owned_entry("b/e", 12),   // change
2621            new_owned_entry("c", 13),     // add
2622        ];
2623        let deleted_files = hashset! {
2624            repo_path("b/c").to_owned(),
2625            repo_path("b#").to_owned(),
2626        };
2627        file_states.merge_in(changed_file_states, &deleted_files);
2628        assert_eq!(
2629            file_states.all().iter().collect_vec(),
2630            vec![
2631                new_static_entry("aa", 10),
2632                new_static_entry("b/d/e", 2),
2633                new_static_entry("b/d/f", 11),
2634                new_static_entry("b/e", 12),
2635                new_static_entry("bc", 5),
2636                new_static_entry("c", 13),
2637            ],
2638        );
2639    }
2640
2641    #[test]
2642    fn test_file_states_lookup() {
2643        let new_proto_entry = |path: &str, size| {
2644            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2645        };
2646        let data = vec![
2647            new_proto_entry("aa", 0),
2648            new_proto_entry("b/c", 1),
2649            new_proto_entry("b/d/e", 2),
2650            new_proto_entry("b/e", 3),
2651            new_proto_entry("b#", 4), // '#' < '/'
2652            new_proto_entry("bc", 5),
2653        ];
2654        let file_states = FileStates::from_sorted(&data);
2655
2656        assert_eq!(
2657            file_states.prefixed(repo_path("")).paths().collect_vec(),
2658            ["aa", "b/c", "b/d/e", "b/e", "b#", "bc"].map(repo_path)
2659        );
2660        assert!(file_states.prefixed(repo_path("a")).is_empty());
2661        assert_eq!(
2662            file_states.prefixed(repo_path("aa")).paths().collect_vec(),
2663            ["aa"].map(repo_path)
2664        );
2665        assert_eq!(
2666            file_states.prefixed(repo_path("b")).paths().collect_vec(),
2667            ["b/c", "b/d/e", "b/e"].map(repo_path)
2668        );
2669        assert_eq!(
2670            file_states.prefixed(repo_path("b/d")).paths().collect_vec(),
2671            ["b/d/e"].map(repo_path)
2672        );
2673        assert_eq!(
2674            file_states.prefixed(repo_path("b#")).paths().collect_vec(),
2675            ["b#"].map(repo_path)
2676        );
2677        assert_eq!(
2678            file_states.prefixed(repo_path("bc")).paths().collect_vec(),
2679            ["bc"].map(repo_path)
2680        );
2681        assert!(file_states.prefixed(repo_path("z")).is_empty());
2682
2683        assert!(!file_states.contains_path(repo_path("a")));
2684        assert!(file_states.contains_path(repo_path("aa")));
2685        assert!(file_states.contains_path(repo_path("b/d/e")));
2686        assert!(!file_states.contains_path(repo_path("b/d")));
2687        assert!(file_states.contains_path(repo_path("b#")));
2688        assert!(file_states.contains_path(repo_path("bc")));
2689        assert!(!file_states.contains_path(repo_path("z")));
2690
2691        assert_eq!(file_states.get(repo_path("a")), None);
2692        assert_eq!(file_states.get(repo_path("aa")), Some(new_state(0)));
2693        assert_eq!(file_states.get(repo_path("b/d/e")), Some(new_state(2)));
2694        assert_eq!(file_states.get(repo_path("bc")), Some(new_state(5)));
2695        assert_eq!(file_states.get(repo_path("z")), None);
2696    }
2697
2698    #[test]
2699    fn test_file_states_lookup_at() {
2700        let new_proto_entry = |path: &str, size| {
2701            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2702        };
2703        let data = vec![
2704            new_proto_entry("b/c", 0),
2705            new_proto_entry("b/d/e", 1),
2706            new_proto_entry("b/d#", 2), // '#' < '/'
2707            new_proto_entry("b/e", 3),
2708            new_proto_entry("b#", 4), // '#' < '/'
2709        ];
2710        let file_states = FileStates::from_sorted(&data);
2711
2712        // At root
2713        assert_eq!(
2714            file_states.get_at(RepoPath::root(), repo_path_component("b")),
2715            None
2716        );
2717        assert_eq!(
2718            file_states.get_at(RepoPath::root(), repo_path_component("b#")),
2719            Some(new_state(4))
2720        );
2721
2722        // At prefixed dir
2723        let prefixed_states = file_states.prefixed_at(RepoPath::root(), repo_path_component("b"));
2724        assert_eq!(
2725            prefixed_states.paths().collect_vec(),
2726            ["b/c", "b/d/e", "b/d#", "b/e"].map(repo_path)
2727        );
2728        assert_eq!(
2729            prefixed_states.get_at(repo_path("b"), repo_path_component("c")),
2730            Some(new_state(0))
2731        );
2732        assert_eq!(
2733            prefixed_states.get_at(repo_path("b"), repo_path_component("d")),
2734            None
2735        );
2736        assert_eq!(
2737            prefixed_states.get_at(repo_path("b"), repo_path_component("d#")),
2738            Some(new_state(2))
2739        );
2740
2741        // At nested prefixed dir
2742        let prefixed_states = prefixed_states.prefixed_at(repo_path("b"), repo_path_component("d"));
2743        assert_eq!(
2744            prefixed_states.paths().collect_vec(),
2745            ["b/d/e"].map(repo_path)
2746        );
2747        assert_eq!(
2748            prefixed_states.get_at(repo_path("b/d"), repo_path_component("e")),
2749            Some(new_state(1))
2750        );
2751        assert_eq!(
2752            prefixed_states.get_at(repo_path("b/d"), repo_path_component("#")),
2753            None
2754        );
2755
2756        // At prefixed file
2757        let prefixed_states = file_states.prefixed_at(RepoPath::root(), repo_path_component("b#"));
2758        assert_eq!(prefixed_states.paths().collect_vec(), ["b#"].map(repo_path));
2759        assert_eq!(
2760            prefixed_states.get_at(repo_path("b#"), repo_path_component("#")),
2761            None
2762        );
2763    }
2764}