jj_lib/
local_working_copy.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16#![allow(clippy::let_unit_value)]
17
18use std::any::Any;
19use std::cmp::Ordering;
20use std::collections::HashSet;
21use std::error::Error;
22use std::fs;
23use std::fs::DirEntry;
24use std::fs::File;
25use std::fs::Metadata;
26use std::fs::OpenOptions;
27use std::io;
28use std::io::Read;
29use std::io::Write as _;
30use std::iter;
31use std::mem;
32use std::ops::Range;
33#[cfg(unix)]
34use std::os::unix::fs::PermissionsExt as _;
35use std::path::Path;
36use std::path::PathBuf;
37use std::slice;
38use std::sync::mpsc::channel;
39use std::sync::mpsc::Sender;
40use std::sync::Arc;
41use std::sync::OnceLock;
42use std::time::UNIX_EPOCH;
43
44use either::Either;
45use futures::StreamExt as _;
46use itertools::EitherOrBoth;
47use itertools::Itertools as _;
48use once_cell::unsync::OnceCell;
49use pollster::FutureExt as _;
50use prost::Message as _;
51use rayon::iter::IntoParallelIterator as _;
52use rayon::prelude::IndexedParallelIterator as _;
53use rayon::prelude::ParallelIterator as _;
54use tempfile::NamedTempFile;
55use thiserror::Error;
56use tracing::instrument;
57use tracing::trace_span;
58
59use crate::backend::BackendError;
60use crate::backend::BackendResult;
61use crate::backend::FileId;
62use crate::backend::MergedTreeId;
63use crate::backend::MillisSinceEpoch;
64use crate::backend::SymlinkId;
65use crate::backend::TreeId;
66use crate::backend::TreeValue;
67use crate::commit::Commit;
68use crate::conflicts;
69use crate::conflicts::choose_materialized_conflict_marker_len;
70use crate::conflicts::materialize_merge_result_to_bytes_with_marker_len;
71use crate::conflicts::materialize_tree_value;
72use crate::conflicts::ConflictMarkerStyle;
73use crate::conflicts::MaterializedTreeValue;
74use crate::conflicts::MIN_CONFLICT_MARKER_LEN;
75use crate::file_util::check_symlink_support;
76use crate::file_util::try_symlink;
77#[cfg(feature = "watchman")]
78use crate::fsmonitor::watchman;
79use crate::fsmonitor::FsmonitorSettings;
80#[cfg(feature = "watchman")]
81use crate::fsmonitor::WatchmanConfig;
82use crate::gitignore::GitIgnoreFile;
83use crate::lock::FileLock;
84use crate::matchers::DifferenceMatcher;
85use crate::matchers::EverythingMatcher;
86use crate::matchers::FilesMatcher;
87use crate::matchers::IntersectionMatcher;
88use crate::matchers::Matcher;
89use crate::matchers::PrefixMatcher;
90use crate::merge::Merge;
91use crate::merge::MergeBuilder;
92use crate::merge::MergedTreeValue;
93use crate::merged_tree::MergedTree;
94use crate::merged_tree::MergedTreeBuilder;
95use crate::merged_tree::TreeDiffEntry;
96use crate::object_id::ObjectId as _;
97use crate::op_store::OperationId;
98use crate::ref_name::WorkspaceName;
99use crate::ref_name::WorkspaceNameBuf;
100use crate::repo_path::RepoPath;
101use crate::repo_path::RepoPathBuf;
102use crate::repo_path::RepoPathComponent;
103use crate::store::Store;
104use crate::tree::Tree;
105use crate::working_copy::CheckoutError;
106use crate::working_copy::CheckoutOptions;
107use crate::working_copy::CheckoutStats;
108use crate::working_copy::LockedWorkingCopy;
109use crate::working_copy::ResetError;
110use crate::working_copy::SnapshotError;
111use crate::working_copy::SnapshotOptions;
112use crate::working_copy::SnapshotProgress;
113use crate::working_copy::SnapshotStats;
114use crate::working_copy::UntrackedReason;
115use crate::working_copy::WorkingCopy;
116use crate::working_copy::WorkingCopyFactory;
117use crate::working_copy::WorkingCopyStateError;
118
119#[cfg(unix)]
120type FileExecutableFlag = bool;
121#[cfg(windows)]
122type FileExecutableFlag = ();
123
124#[derive(Debug, PartialEq, Eq, Clone)]
125pub enum FileType {
126    Normal { executable: FileExecutableFlag },
127    Symlink,
128    GitSubmodule,
129}
130
131#[derive(Debug, PartialEq, Eq, Clone, Copy)]
132pub struct MaterializedConflictData {
133    pub conflict_marker_len: u32,
134}
135
136#[derive(Debug, PartialEq, Eq, Clone)]
137pub struct FileState {
138    pub file_type: FileType,
139    pub mtime: MillisSinceEpoch,
140    pub size: u64,
141    pub materialized_conflict_data: Option<MaterializedConflictData>,
142    /* TODO: What else do we need here? Git stores a lot of fields.
143     * TODO: Could possibly handle case-insensitive file systems keeping an
144     *       Option<PathBuf> with the actual path here. */
145}
146
147impl FileState {
148    /// Check whether a file state appears clean compared to a previous file
149    /// state, ignoring materialized conflict data.
150    pub fn is_clean(&self, old_file_state: &Self) -> bool {
151        self.file_type == old_file_state.file_type
152            && self.mtime == old_file_state.mtime
153            && self.size == old_file_state.size
154    }
155
156    /// Indicates that a file exists in the tree but that it needs to be
157    /// re-stat'ed on the next snapshot.
158    fn placeholder() -> Self {
159        #[cfg(unix)]
160        let executable = false;
161        #[cfg(windows)]
162        let executable = ();
163        FileState {
164            file_type: FileType::Normal { executable },
165            mtime: MillisSinceEpoch(0),
166            size: 0,
167            materialized_conflict_data: None,
168        }
169    }
170
171    fn for_file(
172        executable: bool,
173        size: u64,
174        metadata: &Metadata,
175        materialized_conflict_data: Option<MaterializedConflictData>,
176    ) -> Self {
177        #[cfg(windows)]
178        let executable = {
179            // Windows doesn't support executable bit.
180            let _ = executable;
181        };
182        FileState {
183            file_type: FileType::Normal { executable },
184            mtime: mtime_from_metadata(metadata),
185            size,
186            materialized_conflict_data,
187        }
188    }
189
190    fn for_symlink(metadata: &Metadata) -> Self {
191        // When using fscrypt, the reported size is not the content size. So if
192        // we were to record the content size here (like we do for regular files), we
193        // would end up thinking the file has changed every time we snapshot.
194        FileState {
195            file_type: FileType::Symlink,
196            mtime: mtime_from_metadata(metadata),
197            size: metadata.len(),
198            materialized_conflict_data: None,
199        }
200    }
201
202    fn for_gitsubmodule() -> Self {
203        FileState {
204            file_type: FileType::GitSubmodule,
205            mtime: MillisSinceEpoch(0),
206            size: 0,
207            materialized_conflict_data: None,
208        }
209    }
210}
211
212/// Owned map of path to file states, backed by proto data.
213#[derive(Clone, Debug)]
214struct FileStatesMap {
215    data: Vec<crate::protos::working_copy::FileStateEntry>,
216}
217
218impl FileStatesMap {
219    fn new() -> Self {
220        FileStatesMap { data: Vec::new() }
221    }
222
223    fn from_proto(
224        mut data: Vec<crate::protos::working_copy::FileStateEntry>,
225        is_sorted: bool,
226    ) -> Self {
227        if !is_sorted {
228            data.sort_unstable_by(|entry1, entry2| {
229                let path1 = RepoPath::from_internal_string(&entry1.path);
230                let path2 = RepoPath::from_internal_string(&entry2.path);
231                path1.cmp(path2)
232            });
233        }
234        debug_assert!(is_file_state_entries_proto_unique_and_sorted(&data));
235        FileStatesMap { data }
236    }
237
238    /// Merges changed and deleted entries into this map. The changed entries
239    /// must be sorted by path.
240    fn merge_in(
241        &mut self,
242        changed_file_states: Vec<(RepoPathBuf, FileState)>,
243        deleted_files: &HashSet<RepoPathBuf>,
244    ) {
245        if changed_file_states.is_empty() && deleted_files.is_empty() {
246            return;
247        }
248        debug_assert!(
249            changed_file_states
250                .iter()
251                .tuple_windows()
252                .all(|((path1, _), (path2, _))| path1 < path2),
253            "changed_file_states must be sorted and have no duplicates"
254        );
255        self.data = itertools::merge_join_by(
256            mem::take(&mut self.data),
257            changed_file_states,
258            |old_entry, (changed_path, _)| {
259                RepoPath::from_internal_string(&old_entry.path).cmp(changed_path)
260            },
261        )
262        .filter_map(|diff| match diff {
263            EitherOrBoth::Both(_, (path, state)) | EitherOrBoth::Right((path, state)) => {
264                debug_assert!(!deleted_files.contains(&path));
265                Some(file_state_entry_to_proto(path, &state))
266            }
267            EitherOrBoth::Left(entry) => {
268                let present = !deleted_files.contains(RepoPath::from_internal_string(&entry.path));
269                present.then_some(entry)
270            }
271        })
272        .collect();
273    }
274
275    fn clear(&mut self) {
276        self.data.clear();
277    }
278
279    /// Returns read-only map containing all file states.
280    fn all(&self) -> FileStates<'_> {
281        FileStates::from_sorted(&self.data)
282    }
283}
284
285/// Read-only map of path to file states, possibly filtered by path prefix.
286#[derive(Clone, Copy, Debug)]
287pub struct FileStates<'a> {
288    data: &'a [crate::protos::working_copy::FileStateEntry],
289}
290
291impl<'a> FileStates<'a> {
292    fn from_sorted(data: &'a [crate::protos::working_copy::FileStateEntry]) -> Self {
293        debug_assert!(is_file_state_entries_proto_unique_and_sorted(data));
294        FileStates { data }
295    }
296
297    /// Returns file states under the given directory path.
298    pub fn prefixed(&self, base: &RepoPath) -> Self {
299        let range = self.prefixed_range(base);
300        Self::from_sorted(&self.data[range])
301    }
302
303    /// Faster version of `prefixed("<dir>/<base>")`. Requires that all entries
304    /// share the same prefix `dir`.
305    fn prefixed_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Self {
306        let range = self.prefixed_range_at(dir, base);
307        Self::from_sorted(&self.data[range])
308    }
309
310    /// Returns true if this contains no entries.
311    pub fn is_empty(&self) -> bool {
312        self.data.is_empty()
313    }
314
315    /// Returns true if the given `path` exists.
316    pub fn contains_path(&self, path: &RepoPath) -> bool {
317        self.exact_position(path).is_some()
318    }
319
320    /// Returns file state for the given `path`.
321    pub fn get(&self, path: &RepoPath) -> Option<FileState> {
322        let pos = self.exact_position(path)?;
323        let (_, state) = file_state_entry_from_proto(&self.data[pos]);
324        Some(state)
325    }
326
327    /// Faster version of `get("<dir>/<name>")`. Requires that all entries share
328    /// the same prefix `dir`.
329    fn get_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<FileState> {
330        let pos = self.exact_position_at(dir, name)?;
331        let (_, state) = file_state_entry_from_proto(&self.data[pos]);
332        Some(state)
333    }
334
335    fn exact_position(&self, path: &RepoPath) -> Option<usize> {
336        self.data
337            .binary_search_by(|entry| RepoPath::from_internal_string(&entry.path).cmp(path))
338            .ok()
339    }
340
341    fn exact_position_at(&self, dir: &RepoPath, name: &RepoPathComponent) -> Option<usize> {
342        debug_assert!(self.paths().all(|path| path.starts_with(dir)));
343        let slash_len = !dir.is_root() as usize;
344        let prefix_len = dir.as_internal_file_string().len() + slash_len;
345        self.data
346            .binary_search_by(|entry| {
347                let tail = entry.path.get(prefix_len..).unwrap_or("");
348                match tail.split_once('/') {
349                    // "<name>/*" > "<name>"
350                    Some((pre, _)) => pre.cmp(name.as_internal_str()).then(Ordering::Greater),
351                    None => tail.cmp(name.as_internal_str()),
352                }
353            })
354            .ok()
355    }
356
357    fn prefixed_range(&self, base: &RepoPath) -> Range<usize> {
358        let start = self
359            .data
360            .partition_point(|entry| RepoPath::from_internal_string(&entry.path) < base);
361        let len = self.data[start..]
362            .partition_point(|entry| RepoPath::from_internal_string(&entry.path).starts_with(base));
363        start..(start + len)
364    }
365
366    fn prefixed_range_at(&self, dir: &RepoPath, base: &RepoPathComponent) -> Range<usize> {
367        debug_assert!(self.paths().all(|path| path.starts_with(dir)));
368        let slash_len = !dir.is_root() as usize;
369        let prefix_len = dir.as_internal_file_string().len() + slash_len;
370        let start = self.data.partition_point(|entry| {
371            let tail = entry.path.get(prefix_len..).unwrap_or("");
372            let entry_name = tail.split_once('/').map_or(tail, |(name, _)| name);
373            entry_name < base.as_internal_str()
374        });
375        let len = self.data[start..].partition_point(|entry| {
376            let tail = entry.path.get(prefix_len..).unwrap_or("");
377            let entry_name = tail.split_once('/').map_or(tail, |(name, _)| name);
378            entry_name == base.as_internal_str()
379        });
380        start..(start + len)
381    }
382
383    /// Iterates file state entries sorted by path.
384    pub fn iter(&self) -> FileStatesIter<'a> {
385        self.data.iter().map(file_state_entry_from_proto)
386    }
387
388    /// Iterates sorted file paths.
389    pub fn paths(&self) -> impl ExactSizeIterator<Item = &'a RepoPath> + use<'a> {
390        self.data
391            .iter()
392            .map(|entry| RepoPath::from_internal_string(&entry.path))
393    }
394}
395
396type FileStatesIter<'a> = iter::Map<
397    slice::Iter<'a, crate::protos::working_copy::FileStateEntry>,
398    fn(&crate::protos::working_copy::FileStateEntry) -> (&RepoPath, FileState),
399>;
400
401impl<'a> IntoIterator for FileStates<'a> {
402    type Item = (&'a RepoPath, FileState);
403    type IntoIter = FileStatesIter<'a>;
404
405    fn into_iter(self) -> Self::IntoIter {
406        self.iter()
407    }
408}
409
410pub struct TreeState {
411    store: Arc<Store>,
412    working_copy_path: PathBuf,
413    state_path: PathBuf,
414    tree_id: MergedTreeId,
415    file_states: FileStatesMap,
416    // Currently only path prefixes
417    sparse_patterns: Vec<RepoPathBuf>,
418    own_mtime: MillisSinceEpoch,
419    symlink_support: bool,
420
421    /// The most recent clock value returned by Watchman. Will only be set if
422    /// the repo is configured to use the Watchman filesystem monitor and
423    /// Watchman has been queried at least once.
424    watchman_clock: Option<crate::protos::working_copy::WatchmanClock>,
425}
426
427fn file_state_from_proto(proto: &crate::protos::working_copy::FileState) -> FileState {
428    let file_type = match proto.file_type() {
429        crate::protos::working_copy::FileType::Normal => FileType::Normal {
430            executable: FileExecutableFlag::default(),
431        },
432        #[cfg(unix)]
433        crate::protos::working_copy::FileType::Executable => FileType::Normal { executable: true },
434        // can exist in files written by older versions of jj
435        #[cfg(windows)]
436        crate::protos::working_copy::FileType::Executable => FileType::Normal { executable: () },
437        crate::protos::working_copy::FileType::Symlink => FileType::Symlink,
438        crate::protos::working_copy::FileType::Conflict => FileType::Normal {
439            executable: FileExecutableFlag::default(),
440        },
441        crate::protos::working_copy::FileType::GitSubmodule => FileType::GitSubmodule,
442    };
443    FileState {
444        file_type,
445        mtime: MillisSinceEpoch(proto.mtime_millis_since_epoch),
446        size: proto.size,
447        materialized_conflict_data: proto.materialized_conflict_data.as_ref().map(|data| {
448            MaterializedConflictData {
449                conflict_marker_len: data.conflict_marker_len,
450            }
451        }),
452    }
453}
454
455fn file_state_to_proto(file_state: &FileState) -> crate::protos::working_copy::FileState {
456    let mut proto = crate::protos::working_copy::FileState::default();
457    let file_type = match &file_state.file_type {
458        #[cfg(unix)]
459        FileType::Normal { executable: false } => crate::protos::working_copy::FileType::Normal,
460        #[cfg(unix)]
461        FileType::Normal { executable: true } => crate::protos::working_copy::FileType::Executable,
462        #[cfg(windows)]
463        FileType::Normal { executable: () } => crate::protos::working_copy::FileType::Normal,
464        FileType::Symlink => crate::protos::working_copy::FileType::Symlink,
465        FileType::GitSubmodule => crate::protos::working_copy::FileType::GitSubmodule,
466    };
467    proto.file_type = file_type as i32;
468    proto.mtime_millis_since_epoch = file_state.mtime.0;
469    proto.size = file_state.size;
470    proto.materialized_conflict_data = file_state.materialized_conflict_data.map(|data| {
471        crate::protos::working_copy::MaterializedConflictData {
472            conflict_marker_len: data.conflict_marker_len,
473        }
474    });
475    proto
476}
477
478fn file_state_entry_from_proto(
479    proto: &crate::protos::working_copy::FileStateEntry,
480) -> (&RepoPath, FileState) {
481    let path = RepoPath::from_internal_string(&proto.path);
482    (path, file_state_from_proto(proto.state.as_ref().unwrap()))
483}
484
485fn file_state_entry_to_proto(
486    path: RepoPathBuf,
487    state: &FileState,
488) -> crate::protos::working_copy::FileStateEntry {
489    crate::protos::working_copy::FileStateEntry {
490        path: path.into_internal_string(),
491        state: Some(file_state_to_proto(state)),
492    }
493}
494
495fn is_file_state_entries_proto_unique_and_sorted(
496    data: &[crate::protos::working_copy::FileStateEntry],
497) -> bool {
498    data.iter()
499        .map(|entry| RepoPath::from_internal_string(&entry.path))
500        .tuple_windows()
501        .all(|(path1, path2)| path1 < path2)
502}
503
504fn sparse_patterns_from_proto(
505    proto: Option<&crate::protos::working_copy::SparsePatterns>,
506) -> Vec<RepoPathBuf> {
507    let mut sparse_patterns = vec![];
508    if let Some(proto_sparse_patterns) = proto {
509        for prefix in &proto_sparse_patterns.prefixes {
510            sparse_patterns.push(RepoPathBuf::from_internal_string(prefix));
511        }
512    } else {
513        // For compatibility with old working copies.
514        // TODO: Delete this is late 2022 or so.
515        sparse_patterns.push(RepoPathBuf::root());
516    }
517    sparse_patterns
518}
519
520/// Creates intermediate directories from the `working_copy_path` to the
521/// `repo_path` parent. Returns disk path for the `repo_path` file.
522///
523/// If an intermediate directory exists and if it is a file or symlink, this
524/// function returns `Ok(None)` to signal that the path should be skipped.
525/// The `working_copy_path` directory may be a symlink.
526///
527/// If an existing or newly-created sub directory points to ".git" or ".jj",
528/// this function returns an error.
529///
530/// Note that this does not prevent TOCTOU bugs caused by concurrent checkouts.
531/// Another process may remove the directory created by this function and put a
532/// symlink there.
533fn create_parent_dirs(
534    working_copy_path: &Path,
535    repo_path: &RepoPath,
536) -> Result<Option<PathBuf>, CheckoutError> {
537    let (parent_path, basename) = repo_path.split().expect("repo path shouldn't be root");
538    let mut dir_path = working_copy_path.to_owned();
539    for c in parent_path.components() {
540        // Ensure that the name is a normal entry of the current dir_path.
541        dir_path.push(c.to_fs_name().map_err(|err| err.with_path(repo_path))?);
542        // A directory named ".git" or ".jj" can be temporarily created. It
543        // might trick workspace path discovery, but is harmless so long as the
544        // directory is empty.
545        let new_dir_created = match fs::create_dir(&dir_path) {
546            Ok(()) => true, // New directory
547            Err(err) => match dir_path.symlink_metadata() {
548                Ok(m) if m.is_dir() => false, // Existing directory
549                Ok(_) => {
550                    return Ok(None); // Skip existing file or symlink
551                }
552                Err(_) => {
553                    return Err(CheckoutError::Other {
554                        message: format!(
555                            "Failed to create parent directories for {}",
556                            repo_path.to_fs_path_unchecked(working_copy_path).display(),
557                        ),
558                        err: err.into(),
559                    })
560                }
561            },
562        };
563        // Invalid component (e.g. "..") should have been rejected.
564        // The current dir_path should be an entry of dir_path.parent().
565        reject_reserved_existing_path(&dir_path).inspect_err(|_| {
566            if new_dir_created {
567                fs::remove_dir(&dir_path).ok();
568            }
569        })?;
570    }
571
572    let mut file_path = dir_path;
573    file_path.push(
574        basename
575            .to_fs_name()
576            .map_err(|err| err.with_path(repo_path))?,
577    );
578    Ok(Some(file_path))
579}
580
581/// Removes existing file named `disk_path` if any. Returns `Ok(true)` if the
582/// file was there and got removed, meaning that new file can be safely created.
583///
584/// If the existing file points to ".git" or ".jj", this function returns an
585/// error.
586fn remove_old_file(disk_path: &Path) -> Result<bool, CheckoutError> {
587    reject_reserved_existing_path(disk_path)?;
588    match fs::remove_file(disk_path) {
589        Ok(()) => Ok(true),
590        Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false),
591        // TODO: Use io::ErrorKind::IsADirectory if it gets stabilized
592        Err(_) if disk_path.symlink_metadata().is_ok_and(|m| m.is_dir()) => Ok(false),
593        Err(err) => Err(CheckoutError::Other {
594            message: format!("Failed to remove file {}", disk_path.display()),
595            err: err.into(),
596        }),
597    }
598}
599
600/// Checks if new file or symlink named `disk_path` can be created.
601///
602/// If the file already exists, this function return `Ok(false)` to signal
603/// that the path should be skipped.
604///
605/// If the path may point to ".git" or ".jj" entry, this function returns an
606/// error.
607///
608/// This function can fail if `disk_path.parent()` isn't a directory.
609fn can_create_new_file(disk_path: &Path) -> Result<bool, CheckoutError> {
610    // New file or symlink will be created by caller. If it were pointed to by
611    // name ".git" or ".jj", git/jj CLI could be tricked to load configuration
612    // from an attacker-controlled location. So we first test the path by
613    // creating an empty file.
614    let new_file_created = match OpenOptions::new()
615        .write(true)
616        .create_new(true) // Don't overwrite, don't follow symlink
617        .open(disk_path)
618    {
619        Ok(_) => true,
620        Err(err) if err.kind() == io::ErrorKind::AlreadyExists => false,
621        // Workaround for "Access is denied. (os error 5)" error on Windows.
622        Err(_) => match disk_path.symlink_metadata() {
623            Ok(_) => false,
624            Err(err) => {
625                return Err(CheckoutError::Other {
626                    message: format!("Failed to stat {}", disk_path.display()),
627                    err: err.into(),
628                })
629            }
630        },
631    };
632    reject_reserved_existing_path(disk_path).inspect_err(|_| {
633        if new_file_created {
634            fs::remove_file(disk_path).ok();
635        }
636    })?;
637    if new_file_created {
638        fs::remove_file(disk_path).map_err(|err| CheckoutError::Other {
639            message: format!("Failed to remove temporary file {}", disk_path.display()),
640            err: err.into(),
641        })?;
642    }
643    Ok(new_file_created)
644}
645
646const RESERVED_DIR_NAMES: &[&str] = &[".git", ".jj"];
647
648/// Suppose the `disk_path` exists, checks if the last component points to
649/// ".git" or ".jj" in the same parent directory.
650fn reject_reserved_existing_path(disk_path: &Path) -> Result<(), CheckoutError> {
651    let parent_dir_path = disk_path.parent().expect("content path shouldn't be root");
652    for name in RESERVED_DIR_NAMES {
653        let reserved_path = parent_dir_path.join(name);
654        match same_file::is_same_file(disk_path, &reserved_path) {
655            Ok(true) => {
656                return Err(CheckoutError::ReservedPathComponent {
657                    path: disk_path.to_owned(),
658                    name,
659                });
660            }
661            Ok(false) => {}
662            // If the existing disk_path pointed to the reserved path, the
663            // reserved path would exist.
664            Err(err) if err.kind() == io::ErrorKind::NotFound => {}
665            Err(err) => {
666                return Err(CheckoutError::Other {
667                    message: format!("Failed to validate path {}", disk_path.display()),
668                    err: err.into(),
669                });
670            }
671        }
672    }
673    Ok(())
674}
675
676fn mtime_from_metadata(metadata: &Metadata) -> MillisSinceEpoch {
677    let time = metadata
678        .modified()
679        .expect("File mtime not supported on this platform?");
680    let since_epoch = time
681        .duration_since(UNIX_EPOCH)
682        .expect("mtime before unix epoch");
683
684    MillisSinceEpoch(
685        i64::try_from(since_epoch.as_millis())
686            .expect("mtime billions of years into the future or past"),
687    )
688}
689
690fn file_state(metadata: &Metadata) -> Option<FileState> {
691    let metadata_file_type = metadata.file_type();
692    let file_type = if metadata_file_type.is_dir() {
693        None
694    } else if metadata_file_type.is_symlink() {
695        Some(FileType::Symlink)
696    } else if metadata_file_type.is_file() {
697        #[cfg(unix)]
698        if metadata.permissions().mode() & 0o111 != 0 {
699            Some(FileType::Normal { executable: true })
700        } else {
701            Some(FileType::Normal { executable: false })
702        }
703        #[cfg(windows)]
704        Some(FileType::Normal { executable: () })
705    } else {
706        None
707    };
708    file_type.map(|file_type| {
709        let mtime = mtime_from_metadata(metadata);
710        let size = metadata.len();
711        FileState {
712            file_type,
713            mtime,
714            size,
715            materialized_conflict_data: None,
716        }
717    })
718}
719
720struct FsmonitorMatcher {
721    matcher: Option<Box<dyn Matcher>>,
722    watchman_clock: Option<crate::protos::working_copy::WatchmanClock>,
723}
724
725#[derive(Debug, Error)]
726pub enum TreeStateError {
727    #[error("Reading tree state from {path}")]
728    ReadTreeState { path: PathBuf, source: io::Error },
729    #[error("Decoding tree state from {path}")]
730    DecodeTreeState {
731        path: PathBuf,
732        source: prost::DecodeError,
733    },
734    #[error("Writing tree state to temporary file {path}")]
735    WriteTreeState { path: PathBuf, source: io::Error },
736    #[error("Persisting tree state to file {path}")]
737    PersistTreeState { path: PathBuf, source: io::Error },
738    #[error("Filesystem monitor error")]
739    Fsmonitor(#[source] Box<dyn Error + Send + Sync>),
740}
741
742impl TreeState {
743    pub fn working_copy_path(&self) -> &Path {
744        &self.working_copy_path
745    }
746
747    pub fn current_tree_id(&self) -> &MergedTreeId {
748        &self.tree_id
749    }
750
751    pub fn file_states(&self) -> FileStates<'_> {
752        self.file_states.all()
753    }
754
755    pub fn sparse_patterns(&self) -> &Vec<RepoPathBuf> {
756        &self.sparse_patterns
757    }
758
759    fn sparse_matcher(&self) -> Box<dyn Matcher> {
760        Box::new(PrefixMatcher::new(&self.sparse_patterns))
761    }
762
763    pub fn init(
764        store: Arc<Store>,
765        working_copy_path: PathBuf,
766        state_path: PathBuf,
767    ) -> Result<TreeState, TreeStateError> {
768        let mut wc = TreeState::empty(store, working_copy_path, state_path);
769        wc.save()?;
770        Ok(wc)
771    }
772
773    fn empty(store: Arc<Store>, working_copy_path: PathBuf, state_path: PathBuf) -> TreeState {
774        let tree_id = store.empty_merged_tree_id();
775        TreeState {
776            store,
777            working_copy_path,
778            state_path,
779            tree_id,
780            file_states: FileStatesMap::new(),
781            sparse_patterns: vec![RepoPathBuf::root()],
782            own_mtime: MillisSinceEpoch(0),
783            symlink_support: check_symlink_support().unwrap_or(false),
784            watchman_clock: None,
785        }
786    }
787
788    pub fn load(
789        store: Arc<Store>,
790        working_copy_path: PathBuf,
791        state_path: PathBuf,
792    ) -> Result<TreeState, TreeStateError> {
793        let tree_state_path = state_path.join("tree_state");
794        let file = match File::open(&tree_state_path) {
795            Err(ref err) if err.kind() == io::ErrorKind::NotFound => {
796                return TreeState::init(store, working_copy_path, state_path);
797            }
798            Err(err) => {
799                return Err(TreeStateError::ReadTreeState {
800                    path: tree_state_path,
801                    source: err,
802                });
803            }
804            Ok(file) => file,
805        };
806
807        let mut wc = TreeState::empty(store, working_copy_path, state_path);
808        wc.read(&tree_state_path, file)?;
809        Ok(wc)
810    }
811
812    fn update_own_mtime(&mut self) {
813        if let Ok(metadata) = self.state_path.join("tree_state").symlink_metadata() {
814            self.own_mtime = mtime_from_metadata(&metadata);
815        } else {
816            self.own_mtime = MillisSinceEpoch(0);
817        }
818    }
819
820    fn read(&mut self, tree_state_path: &Path, mut file: File) -> Result<(), TreeStateError> {
821        self.update_own_mtime();
822        let mut buf = Vec::new();
823        file.read_to_end(&mut buf)
824            .map_err(|err| TreeStateError::ReadTreeState {
825                path: tree_state_path.to_owned(),
826                source: err,
827            })?;
828        let proto = crate::protos::working_copy::TreeState::decode(&*buf).map_err(|err| {
829            TreeStateError::DecodeTreeState {
830                path: tree_state_path.to_owned(),
831                source: err,
832            }
833        })?;
834        if proto.tree_ids.is_empty() {
835            self.tree_id = MergedTreeId::Legacy(TreeId::new(proto.legacy_tree_id.clone()));
836        } else {
837            let tree_ids_builder: MergeBuilder<TreeId> = proto
838                .tree_ids
839                .iter()
840                .map(|id| TreeId::new(id.clone()))
841                .collect();
842            self.tree_id = MergedTreeId::Merge(tree_ids_builder.build());
843        }
844        self.file_states =
845            FileStatesMap::from_proto(proto.file_states, proto.is_file_states_sorted);
846        self.sparse_patterns = sparse_patterns_from_proto(proto.sparse_patterns.as_ref());
847        self.watchman_clock = proto.watchman_clock;
848        Ok(())
849    }
850
851    #[expect(clippy::assigning_clones)]
852    fn save(&mut self) -> Result<(), TreeStateError> {
853        let mut proto: crate::protos::working_copy::TreeState = Default::default();
854        match &self.tree_id {
855            MergedTreeId::Legacy(tree_id) => {
856                proto.legacy_tree_id = tree_id.to_bytes();
857            }
858            MergedTreeId::Merge(tree_ids) => {
859                proto.tree_ids = tree_ids.iter().map(|id| id.to_bytes()).collect();
860            }
861        }
862
863        proto.file_states = self.file_states.data.clone();
864        // `FileStatesMap` is guaranteed to be sorted.
865        proto.is_file_states_sorted = true;
866        let mut sparse_patterns = crate::protos::working_copy::SparsePatterns::default();
867        for path in &self.sparse_patterns {
868            sparse_patterns
869                .prefixes
870                .push(path.as_internal_file_string().to_owned());
871        }
872        proto.sparse_patterns = Some(sparse_patterns);
873        proto.watchman_clock = self.watchman_clock.clone();
874
875        let mut temp_file = NamedTempFile::new_in(&self.state_path).unwrap();
876        temp_file
877            .as_file_mut()
878            .write_all(&proto.encode_to_vec())
879            .map_err(|err| TreeStateError::WriteTreeState {
880                path: self.state_path.clone(),
881                source: err,
882            })?;
883        // update own write time while we before we rename it, so we know
884        // there is no unknown data in it
885        self.update_own_mtime();
886        // TODO: Retry if persisting fails (it will on Windows if the file happened to
887        // be open for read).
888        let target_path = self.state_path.join("tree_state");
889        temp_file
890            .persist(&target_path)
891            .map_err(|tempfile::PersistError { error, file: _ }| {
892                TreeStateError::PersistTreeState {
893                    path: target_path.clone(),
894                    source: error,
895                }
896            })?;
897        Ok(())
898    }
899
900    fn current_tree(&self) -> BackendResult<MergedTree> {
901        self.store.get_root_tree(&self.tree_id)
902    }
903
904    fn reset_watchman(&mut self) {
905        self.watchman_clock.take();
906    }
907
908    #[cfg(feature = "watchman")]
909    #[tokio::main(flavor = "current_thread")]
910    #[instrument(skip(self))]
911    pub async fn query_watchman(
912        &self,
913        config: &WatchmanConfig,
914    ) -> Result<(watchman::Clock, Option<Vec<PathBuf>>), TreeStateError> {
915        let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path, config)
916            .await
917            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
918        let previous_clock = self.watchman_clock.clone().map(watchman::Clock::from);
919        let changed_files = fsmonitor
920            .query_changed_files(previous_clock)
921            .await
922            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
923        Ok(changed_files)
924    }
925
926    #[cfg(feature = "watchman")]
927    #[tokio::main(flavor = "current_thread")]
928    #[instrument(skip(self))]
929    pub async fn is_watchman_trigger_registered(
930        &self,
931        config: &WatchmanConfig,
932    ) -> Result<bool, TreeStateError> {
933        let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path, config)
934            .await
935            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))?;
936        fsmonitor
937            .is_trigger_registered()
938            .await
939            .map_err(|err| TreeStateError::Fsmonitor(Box::new(err)))
940    }
941}
942
943/// Functions to snapshot local-disk files to the store.
944impl TreeState {
945    /// Look for changes to the working copy. If there are any changes, create
946    /// a new tree from it.
947    #[instrument(skip_all)]
948    pub fn snapshot(
949        &mut self,
950        options: &SnapshotOptions,
951    ) -> Result<(bool, SnapshotStats), SnapshotError> {
952        let &SnapshotOptions {
953            ref base_ignores,
954            ref fsmonitor_settings,
955            progress,
956            start_tracking_matcher,
957            max_new_file_size,
958            conflict_marker_style,
959        } = options;
960
961        let sparse_matcher = self.sparse_matcher();
962
963        let fsmonitor_clock_needs_save = *fsmonitor_settings != FsmonitorSettings::None;
964        let mut is_dirty = fsmonitor_clock_needs_save;
965        let FsmonitorMatcher {
966            matcher: fsmonitor_matcher,
967            watchman_clock,
968        } = self.make_fsmonitor_matcher(fsmonitor_settings)?;
969        let fsmonitor_matcher = match fsmonitor_matcher.as_ref() {
970            None => &EverythingMatcher,
971            Some(fsmonitor_matcher) => fsmonitor_matcher.as_ref(),
972        };
973
974        let matcher = IntersectionMatcher::new(sparse_matcher.as_ref(), fsmonitor_matcher);
975        if matcher.visit(RepoPath::root()).is_nothing() {
976            // No need to load the current tree, set up channels, etc.
977            self.watchman_clock = watchman_clock;
978            return Ok((is_dirty, SnapshotStats::default()));
979        }
980
981        let (tree_entries_tx, tree_entries_rx) = channel();
982        let (file_states_tx, file_states_rx) = channel();
983        let (untracked_paths_tx, untracked_paths_rx) = channel();
984        let (deleted_files_tx, deleted_files_rx) = channel();
985
986        trace_span!("traverse filesystem").in_scope(|| -> Result<(), SnapshotError> {
987            let snapshotter = FileSnapshotter {
988                tree_state: self,
989                current_tree: &self.current_tree()?,
990                matcher: &matcher,
991                start_tracking_matcher,
992                // Move tx sides so they'll be dropped at the end of the scope.
993                tree_entries_tx,
994                file_states_tx,
995                untracked_paths_tx,
996                deleted_files_tx,
997                error: OnceLock::new(),
998                progress,
999                max_new_file_size,
1000                conflict_marker_style,
1001            };
1002            let directory_to_visit = DirectoryToVisit {
1003                dir: RepoPathBuf::root(),
1004                disk_dir: self.working_copy_path.clone(),
1005                git_ignore: base_ignores.clone(),
1006                file_states: self.file_states.all(),
1007            };
1008            // Here we use scope as a queue of per-directory jobs.
1009            rayon::scope(|scope| {
1010                snapshotter.spawn_ok(scope, |scope| {
1011                    snapshotter.visit_directory(directory_to_visit, scope)
1012                });
1013            });
1014            snapshotter.into_result()
1015        })?;
1016
1017        let stats = SnapshotStats {
1018            untracked_paths: untracked_paths_rx.into_iter().collect(),
1019        };
1020        let mut tree_builder = MergedTreeBuilder::new(self.tree_id.clone());
1021        trace_span!("process tree entries").in_scope(|| {
1022            for (path, tree_values) in &tree_entries_rx {
1023                tree_builder.set_or_remove(path, tree_values);
1024            }
1025        });
1026        let deleted_files = trace_span!("process deleted tree entries").in_scope(|| {
1027            let deleted_files = HashSet::from_iter(deleted_files_rx);
1028            is_dirty |= !deleted_files.is_empty();
1029            for file in &deleted_files {
1030                tree_builder.set_or_remove(file.clone(), Merge::absent());
1031            }
1032            deleted_files
1033        });
1034        trace_span!("process file states").in_scope(|| {
1035            let changed_file_states = file_states_rx
1036                .iter()
1037                .sorted_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2))
1038                .collect_vec();
1039            is_dirty |= !changed_file_states.is_empty();
1040            self.file_states
1041                .merge_in(changed_file_states, &deleted_files);
1042        });
1043        trace_span!("write tree").in_scope(|| {
1044            let new_tree_id = tree_builder.write_tree(&self.store).unwrap();
1045            is_dirty |= new_tree_id != self.tree_id;
1046            self.tree_id = new_tree_id;
1047        });
1048        if cfg!(debug_assertions) {
1049            let tree = self.current_tree().unwrap();
1050            let tree_paths: HashSet<_> = tree
1051                .entries_matching(sparse_matcher.as_ref())
1052                .filter_map(|(path, result)| result.is_ok().then_some(path))
1053                .collect();
1054            let file_states = self.file_states.all();
1055            let state_paths: HashSet<_> = file_states.paths().map(|path| path.to_owned()).collect();
1056            assert_eq!(state_paths, tree_paths);
1057        }
1058        // Since untracked paths aren't cached in the tree state, we'll need to
1059        // rescan the working directory changes to report or track them later.
1060        // TODO: store untracked paths and update watchman_clock?
1061        if stats.untracked_paths.is_empty() || watchman_clock.is_none() {
1062            self.watchman_clock = watchman_clock;
1063        } else {
1064            tracing::info!("not updating watchman clock because there are untracked files");
1065        }
1066        Ok((is_dirty, stats))
1067    }
1068
1069    #[instrument(skip_all)]
1070    fn make_fsmonitor_matcher(
1071        &self,
1072        fsmonitor_settings: &FsmonitorSettings,
1073    ) -> Result<FsmonitorMatcher, SnapshotError> {
1074        let (watchman_clock, changed_files) = match fsmonitor_settings {
1075            FsmonitorSettings::None => (None, None),
1076            FsmonitorSettings::Test { changed_files } => (None, Some(changed_files.clone())),
1077            #[cfg(feature = "watchman")]
1078            FsmonitorSettings::Watchman(config) => match self.query_watchman(config) {
1079                Ok((watchman_clock, changed_files)) => (Some(watchman_clock.into()), changed_files),
1080                Err(err) => {
1081                    tracing::warn!(?err, "Failed to query filesystem monitor");
1082                    (None, None)
1083                }
1084            },
1085            #[cfg(not(feature = "watchman"))]
1086            FsmonitorSettings::Watchman(_) => {
1087                return Err(SnapshotError::Other {
1088                    message: "Failed to query the filesystem monitor".to_string(),
1089                    err: "Cannot query Watchman because jj was not compiled with the `watchman` \
1090                          feature (consider disabling `core.fsmonitor`)"
1091                        .into(),
1092                });
1093            }
1094        };
1095        let matcher: Option<Box<dyn Matcher>> = match changed_files {
1096            None => None,
1097            Some(changed_files) => {
1098                let repo_paths = trace_span!("processing fsmonitor paths").in_scope(|| {
1099                    changed_files
1100                        .into_iter()
1101                        .filter_map(|path| RepoPathBuf::from_relative_path(path).ok())
1102                        .collect_vec()
1103                });
1104
1105                Some(Box::new(FilesMatcher::new(repo_paths)))
1106            }
1107        };
1108        Ok(FsmonitorMatcher {
1109            matcher,
1110            watchman_clock,
1111        })
1112    }
1113}
1114
1115struct DirectoryToVisit<'a> {
1116    dir: RepoPathBuf,
1117    disk_dir: PathBuf,
1118    git_ignore: Arc<GitIgnoreFile>,
1119    file_states: FileStates<'a>,
1120}
1121
1122#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1123enum PresentDirEntryKind {
1124    Dir,
1125    File,
1126}
1127
1128#[derive(Clone, Debug)]
1129struct PresentDirEntries {
1130    dirs: HashSet<String>,
1131    files: HashSet<String>,
1132}
1133
1134/// Helper to scan local-disk directories and files in parallel.
1135struct FileSnapshotter<'a> {
1136    tree_state: &'a TreeState,
1137    current_tree: &'a MergedTree,
1138    matcher: &'a dyn Matcher,
1139    start_tracking_matcher: &'a dyn Matcher,
1140    tree_entries_tx: Sender<(RepoPathBuf, MergedTreeValue)>,
1141    file_states_tx: Sender<(RepoPathBuf, FileState)>,
1142    untracked_paths_tx: Sender<(RepoPathBuf, UntrackedReason)>,
1143    deleted_files_tx: Sender<RepoPathBuf>,
1144    error: OnceLock<SnapshotError>,
1145    progress: Option<&'a SnapshotProgress<'a>>,
1146    max_new_file_size: u64,
1147    conflict_marker_style: ConflictMarkerStyle,
1148}
1149
1150impl FileSnapshotter<'_> {
1151    fn spawn_ok<'scope, F>(&'scope self, scope: &rayon::Scope<'scope>, body: F)
1152    where
1153        F: FnOnce(&rayon::Scope<'scope>) -> Result<(), SnapshotError> + Send + 'scope,
1154    {
1155        scope.spawn(|scope| {
1156            if self.error.get().is_some() {
1157                return;
1158            }
1159            match body(scope) {
1160                Ok(()) => {}
1161                Err(err) => self.error.set(err).unwrap_or(()),
1162            };
1163        });
1164    }
1165
1166    /// Extracts the result of the snapshot.
1167    fn into_result(self) -> Result<(), SnapshotError> {
1168        match self.error.into_inner() {
1169            Some(err) => Err(err),
1170            None => Ok(()),
1171        }
1172    }
1173
1174    /// Visits the directory entries, spawns jobs to recurse into sub
1175    /// directories.
1176    fn visit_directory<'scope>(
1177        &'scope self,
1178        directory_to_visit: DirectoryToVisit<'scope>,
1179        scope: &rayon::Scope<'scope>,
1180    ) -> Result<(), SnapshotError> {
1181        let DirectoryToVisit {
1182            dir,
1183            disk_dir,
1184            git_ignore,
1185            file_states,
1186        } = directory_to_visit;
1187
1188        let git_ignore = git_ignore
1189            .chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"))?;
1190        let dir_entries: Vec<_> = disk_dir
1191            .read_dir()
1192            .and_then(|entries| entries.try_collect())
1193            .map_err(|err| SnapshotError::Other {
1194                message: format!("Failed to read directory {}", disk_dir.display()),
1195                err: err.into(),
1196            })?;
1197        let (dirs, files) = dir_entries
1198            .into_par_iter()
1199            // Don't split into too many small jobs. For a small directory,
1200            // sequential scan should be fast enough.
1201            .with_min_len(100)
1202            .filter_map(|entry| {
1203                self.process_dir_entry(&dir, &git_ignore, file_states, &entry, scope)
1204                    .transpose()
1205            })
1206            .map(|item| match item {
1207                Ok((PresentDirEntryKind::Dir, name)) => Ok(Either::Left(name)),
1208                Ok((PresentDirEntryKind::File, name)) => Ok(Either::Right(name)),
1209                Err(err) => Err(err),
1210            })
1211            .collect::<Result<_, _>>()?;
1212        let present_entries = PresentDirEntries { dirs, files };
1213        self.emit_deleted_files(&dir, file_states, &present_entries);
1214        Ok(())
1215    }
1216
1217    fn process_dir_entry<'scope>(
1218        &'scope self,
1219        dir: &RepoPath,
1220        git_ignore: &Arc<GitIgnoreFile>,
1221        file_states: FileStates<'scope>,
1222        entry: &DirEntry,
1223        scope: &rayon::Scope<'scope>,
1224    ) -> Result<Option<(PresentDirEntryKind, String)>, SnapshotError> {
1225        let file_type = entry.file_type().unwrap();
1226        let file_name = entry.file_name();
1227        let name_string = file_name
1228            .into_string()
1229            .map_err(|path| SnapshotError::InvalidUtf8Path { path })?;
1230
1231        if RESERVED_DIR_NAMES.contains(&name_string.as_str()) {
1232            return Ok(None);
1233        }
1234        let name = RepoPathComponent::new(&name_string);
1235        let path = dir.join(name);
1236        let maybe_current_file_state = file_states.get_at(dir, name);
1237        if let Some(file_state) = &maybe_current_file_state {
1238            if file_state.file_type == FileType::GitSubmodule {
1239                return Ok(None);
1240            }
1241        }
1242
1243        if file_type.is_dir() {
1244            let file_states = file_states.prefixed_at(dir, name);
1245            if git_ignore.matches(&path.to_internal_dir_string()) {
1246                // If the whole directory is ignored by .gitignore, visit only
1247                // paths we're already tracking. This is because .gitignore in
1248                // ignored directory must be ignored. It's also more efficient.
1249                // start_tracking_matcher is NOT tested here because we need to
1250                // scan directory entries to report untracked paths.
1251                self.spawn_ok(scope, move |_| self.visit_tracked_files(file_states));
1252            } else if !self.matcher.visit(&path).is_nothing() {
1253                let directory_to_visit = DirectoryToVisit {
1254                    dir: path,
1255                    disk_dir: entry.path(),
1256                    git_ignore: git_ignore.clone(),
1257                    file_states,
1258                };
1259                self.spawn_ok(scope, |scope| {
1260                    self.visit_directory(directory_to_visit, scope)
1261                });
1262            }
1263            // Whether or not the directory path matches, any child file entries
1264            // shouldn't be touched within the current recursion step.
1265            Ok(Some((PresentDirEntryKind::Dir, name_string)))
1266        } else if self.matcher.matches(&path) {
1267            if let Some(progress) = self.progress {
1268                progress(&path);
1269            }
1270            if maybe_current_file_state.is_none()
1271                && git_ignore.matches(path.as_internal_file_string())
1272            {
1273                // If it wasn't already tracked and it matches
1274                // the ignored paths, then ignore it.
1275                Ok(None)
1276            } else if maybe_current_file_state.is_none()
1277                && !self.start_tracking_matcher.matches(&path)
1278            {
1279                // Leave the file untracked
1280                self.untracked_paths_tx
1281                    .send((path, UntrackedReason::FileNotAutoTracked))
1282                    .ok();
1283                Ok(None)
1284            } else {
1285                let metadata = entry.metadata().map_err(|err| SnapshotError::Other {
1286                    message: format!("Failed to stat file {}", entry.path().display()),
1287                    err: err.into(),
1288                })?;
1289                if maybe_current_file_state.is_none() && metadata.len() > self.max_new_file_size {
1290                    // Leave the large file untracked
1291                    let reason = UntrackedReason::FileTooLarge {
1292                        size: metadata.len(),
1293                        max_size: self.max_new_file_size,
1294                    };
1295                    self.untracked_paths_tx.send((path, reason)).ok();
1296                    Ok(None)
1297                } else if let Some(new_file_state) = file_state(&metadata) {
1298                    self.process_present_file(
1299                        path,
1300                        &entry.path(),
1301                        maybe_current_file_state.as_ref(),
1302                        new_file_state,
1303                    )?;
1304                    Ok(Some((PresentDirEntryKind::File, name_string)))
1305                } else {
1306                    // Special file is not considered present
1307                    Ok(None)
1308                }
1309            }
1310        } else {
1311            Ok(None)
1312        }
1313    }
1314
1315    /// Visits only paths we're already tracking.
1316    fn visit_tracked_files(&self, file_states: FileStates<'_>) -> Result<(), SnapshotError> {
1317        for (tracked_path, current_file_state) in file_states {
1318            if current_file_state.file_type == FileType::GitSubmodule {
1319                continue;
1320            }
1321            if !self.matcher.matches(tracked_path) {
1322                continue;
1323            }
1324            let disk_path = tracked_path.to_fs_path(&self.tree_state.working_copy_path)?;
1325            let metadata = match disk_path.symlink_metadata() {
1326                Ok(metadata) => Some(metadata),
1327                Err(err) if err.kind() == io::ErrorKind::NotFound => None,
1328                Err(err) => {
1329                    return Err(SnapshotError::Other {
1330                        message: format!("Failed to stat file {}", disk_path.display()),
1331                        err: err.into(),
1332                    });
1333                }
1334            };
1335            if let Some(new_file_state) = metadata.as_ref().and_then(file_state) {
1336                self.process_present_file(
1337                    tracked_path.to_owned(),
1338                    &disk_path,
1339                    Some(&current_file_state),
1340                    new_file_state,
1341                )?;
1342            } else {
1343                self.deleted_files_tx.send(tracked_path.to_owned()).ok();
1344            }
1345        }
1346        Ok(())
1347    }
1348
1349    fn process_present_file(
1350        &self,
1351        path: RepoPathBuf,
1352        disk_path: &Path,
1353        maybe_current_file_state: Option<&FileState>,
1354        mut new_file_state: FileState,
1355    ) -> Result<(), SnapshotError> {
1356        let update = self.get_updated_tree_value(
1357            &path,
1358            disk_path,
1359            maybe_current_file_state,
1360            &new_file_state,
1361        )?;
1362        // Preserve materialized conflict data for normal, non-resolved files
1363        if matches!(new_file_state.file_type, FileType::Normal { .. })
1364            && !update.as_ref().is_some_and(|update| update.is_resolved())
1365        {
1366            new_file_state.materialized_conflict_data =
1367                maybe_current_file_state.and_then(|state| state.materialized_conflict_data);
1368        }
1369        if let Some(tree_value) = update {
1370            self.tree_entries_tx.send((path.clone(), tree_value)).ok();
1371        }
1372        if Some(&new_file_state) != maybe_current_file_state {
1373            self.file_states_tx.send((path, new_file_state)).ok();
1374        }
1375        Ok(())
1376    }
1377
1378    /// Emits file paths that don't exist in the `present_entries`.
1379    fn emit_deleted_files(
1380        &self,
1381        dir: &RepoPath,
1382        file_states: FileStates<'_>,
1383        present_entries: &PresentDirEntries,
1384    ) {
1385        let file_state_chunks = file_states.iter().chunk_by(|(path, _state)| {
1386            // Extract <name> from <dir>, <dir>/<name>, or <dir>/<name>/**.
1387            // (file_states may contain <dir> file on file->dir transition.)
1388            debug_assert!(path.starts_with(dir));
1389            let slash = !dir.is_root() as usize;
1390            let len = dir.as_internal_file_string().len() + slash;
1391            let tail = path.as_internal_file_string().get(len..).unwrap_or("");
1392            match tail.split_once('/') {
1393                Some((name, _)) => (PresentDirEntryKind::Dir, name),
1394                None => (PresentDirEntryKind::File, tail),
1395            }
1396        });
1397        file_state_chunks
1398            .into_iter()
1399            .filter(|&((kind, name), _)| match kind {
1400                PresentDirEntryKind::Dir => !present_entries.dirs.contains(name),
1401                PresentDirEntryKind::File => !present_entries.files.contains(name),
1402            })
1403            .flat_map(|(_, chunk)| chunk)
1404            // Whether or not the entry exists, submodule should be ignored
1405            .filter(|(_, state)| state.file_type != FileType::GitSubmodule)
1406            .filter(|(path, _)| self.matcher.matches(path))
1407            .try_for_each(|(path, _)| self.deleted_files_tx.send(path.to_owned()))
1408            .ok();
1409    }
1410
1411    fn get_updated_tree_value(
1412        &self,
1413        repo_path: &RepoPath,
1414        disk_path: &Path,
1415        maybe_current_file_state: Option<&FileState>,
1416        new_file_state: &FileState,
1417    ) -> Result<Option<MergedTreeValue>, SnapshotError> {
1418        let clean = match maybe_current_file_state {
1419            None => {
1420                // untracked
1421                false
1422            }
1423            Some(current_file_state) => {
1424                // If the file's mtime was set at the same time as this state file's own mtime,
1425                // then we don't know if the file was modified before or after this state file.
1426                new_file_state.is_clean(current_file_state)
1427                    && current_file_state.mtime < self.tree_state.own_mtime
1428            }
1429        };
1430        if clean {
1431            Ok(None)
1432        } else {
1433            let current_tree_values = self.current_tree.path_value(repo_path)?;
1434            let new_file_type = if !self.tree_state.symlink_support {
1435                let mut new_file_type = new_file_state.file_type.clone();
1436                if matches!(new_file_type, FileType::Normal { .. })
1437                    && matches!(current_tree_values.as_normal(), Some(TreeValue::Symlink(_)))
1438                {
1439                    new_file_type = FileType::Symlink;
1440                }
1441                new_file_type
1442            } else {
1443                new_file_state.file_type.clone()
1444            };
1445            let new_tree_values = match new_file_type {
1446                FileType::Normal { executable } => self
1447                    .write_path_to_store(
1448                        repo_path,
1449                        disk_path,
1450                        &current_tree_values,
1451                        executable,
1452                        maybe_current_file_state.and_then(|state| state.materialized_conflict_data),
1453                    )
1454                    .block_on()?,
1455                FileType::Symlink => {
1456                    let id = self
1457                        .write_symlink_to_store(repo_path, disk_path)
1458                        .block_on()?;
1459                    Merge::normal(TreeValue::Symlink(id))
1460                }
1461                FileType::GitSubmodule => panic!("git submodule cannot be written to store"),
1462            };
1463            if new_tree_values != current_tree_values {
1464                Ok(Some(new_tree_values))
1465            } else {
1466                Ok(None)
1467            }
1468        }
1469    }
1470
1471    fn store(&self) -> &Store {
1472        &self.tree_state.store
1473    }
1474
1475    async fn write_path_to_store(
1476        &self,
1477        repo_path: &RepoPath,
1478        disk_path: &Path,
1479        current_tree_values: &MergedTreeValue,
1480        executable: FileExecutableFlag,
1481        materialized_conflict_data: Option<MaterializedConflictData>,
1482    ) -> Result<MergedTreeValue, SnapshotError> {
1483        if let Some(current_tree_value) = current_tree_values.as_resolved() {
1484            #[cfg(unix)]
1485            let _ = current_tree_value; // use the variable
1486            let id = self.write_file_to_store(repo_path, disk_path).await?;
1487            // On Windows, we preserve the executable bit from the current tree.
1488            #[cfg(windows)]
1489            let executable = {
1490                let () = executable; // use the variable
1491                if let Some(TreeValue::File { id: _, executable }) = current_tree_value {
1492                    *executable
1493                } else {
1494                    false
1495                }
1496            };
1497            Ok(Merge::normal(TreeValue::File { id, executable }))
1498        } else if let Some(old_file_ids) = current_tree_values.to_file_merge() {
1499            // If the file contained a conflict before and is a normal file on
1500            // disk, we try to parse any conflict markers in the file into a
1501            // conflict.
1502            let content = fs::read(disk_path).map_err(|err| SnapshotError::Other {
1503                message: format!("Failed to open file {}", disk_path.display()),
1504                err: err.into(),
1505            })?;
1506            let new_file_ids = conflicts::update_from_content(
1507                &old_file_ids,
1508                self.store(),
1509                repo_path,
1510                &content,
1511                self.conflict_marker_style,
1512                materialized_conflict_data.map_or(MIN_CONFLICT_MARKER_LEN, |data| {
1513                    data.conflict_marker_len as usize
1514                }),
1515            )
1516            .block_on()?;
1517            match new_file_ids.into_resolved() {
1518                Ok(file_id) => {
1519                    // On Windows, we preserve the executable bit from the merged trees.
1520                    #[cfg(windows)]
1521                    let executable = {
1522                        let () = executable; // use the variable
1523                        if let Some(merge) = current_tree_values.to_executable_merge() {
1524                            merge.resolve_trivial().copied().unwrap_or_default()
1525                        } else {
1526                            false
1527                        }
1528                    };
1529                    Ok(Merge::normal(TreeValue::File {
1530                        id: file_id.unwrap(),
1531                        executable,
1532                    }))
1533                }
1534                Err(new_file_ids) => {
1535                    if new_file_ids != old_file_ids {
1536                        Ok(current_tree_values.with_new_file_ids(&new_file_ids))
1537                    } else {
1538                        Ok(current_tree_values.clone())
1539                    }
1540                }
1541            }
1542        } else {
1543            Ok(current_tree_values.clone())
1544        }
1545    }
1546
1547    async fn write_file_to_store(
1548        &self,
1549        path: &RepoPath,
1550        disk_path: &Path,
1551    ) -> Result<FileId, SnapshotError> {
1552        let mut file = File::open(disk_path).map_err(|err| SnapshotError::Other {
1553            message: format!("Failed to open file {}", disk_path.display()),
1554            err: err.into(),
1555        })?;
1556        Ok(self.store().write_file(path, &mut file).await?)
1557    }
1558
1559    async fn write_symlink_to_store(
1560        &self,
1561        path: &RepoPath,
1562        disk_path: &Path,
1563    ) -> Result<SymlinkId, SnapshotError> {
1564        if self.tree_state.symlink_support {
1565            let target = disk_path.read_link().map_err(|err| SnapshotError::Other {
1566                message: format!("Failed to read symlink {}", disk_path.display()),
1567                err: err.into(),
1568            })?;
1569            let str_target =
1570                target
1571                    .to_str()
1572                    .ok_or_else(|| SnapshotError::InvalidUtf8SymlinkTarget {
1573                        path: disk_path.to_path_buf(),
1574                    })?;
1575            Ok(self.store().write_symlink(path, str_target).await?)
1576        } else {
1577            let target = fs::read(disk_path).map_err(|err| SnapshotError::Other {
1578                message: format!("Failed to read file {}", disk_path.display()),
1579                err: err.into(),
1580            })?;
1581            let string_target =
1582                String::from_utf8(target).map_err(|_| SnapshotError::InvalidUtf8SymlinkTarget {
1583                    path: disk_path.to_path_buf(),
1584                })?;
1585            Ok(self.store().write_symlink(path, &string_target).await?)
1586        }
1587    }
1588}
1589
1590/// Functions to update local-disk files from the store.
1591impl TreeState {
1592    fn write_file(
1593        &self,
1594        disk_path: &Path,
1595        contents: &mut dyn Read,
1596        executable: bool,
1597    ) -> Result<FileState, CheckoutError> {
1598        let mut file = OpenOptions::new()
1599            .write(true)
1600            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
1601            .open(disk_path)
1602            .map_err(|err| CheckoutError::Other {
1603                message: format!("Failed to open file {} for writing", disk_path.display()),
1604                err: err.into(),
1605            })?;
1606        let size = io::copy(contents, &mut file).map_err(|err| CheckoutError::Other {
1607            message: format!("Failed to write file {}", disk_path.display()),
1608            err: err.into(),
1609        })?;
1610        self.set_executable(disk_path, executable)?;
1611        // Read the file state from the file descriptor. That way, know that the file
1612        // exists and is of the expected type, and the stat information is most likely
1613        // accurate, except for other processes modifying the file concurrently (The
1614        // mtime is set at write time and won't change when we close the file.)
1615        let metadata = file
1616            .metadata()
1617            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1618        Ok(FileState::for_file(executable, size, &metadata, None))
1619    }
1620
1621    fn write_symlink(&self, disk_path: &Path, target: String) -> Result<FileState, CheckoutError> {
1622        let target = PathBuf::from(&target);
1623        try_symlink(&target, disk_path).map_err(|err| CheckoutError::Other {
1624            message: format!(
1625                "Failed to create symlink from {} to {}",
1626                disk_path.display(),
1627                target.display()
1628            ),
1629            err: err.into(),
1630        })?;
1631        let metadata = disk_path
1632            .symlink_metadata()
1633            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1634        Ok(FileState::for_symlink(&metadata))
1635    }
1636
1637    fn write_conflict(
1638        &self,
1639        disk_path: &Path,
1640        conflict_data: Vec<u8>,
1641        executable: bool,
1642        materialized_conflict_data: Option<MaterializedConflictData>,
1643    ) -> Result<FileState, CheckoutError> {
1644        let mut file = OpenOptions::new()
1645            .write(true)
1646            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
1647            .open(disk_path)
1648            .map_err(|err| CheckoutError::Other {
1649                message: format!("Failed to open file {} for writing", disk_path.display()),
1650                err: err.into(),
1651            })?;
1652        file.write_all(&conflict_data)
1653            .map_err(|err| CheckoutError::Other {
1654                message: format!("Failed to write conflict to file {}", disk_path.display()),
1655                err: err.into(),
1656            })?;
1657        let size = conflict_data.len() as u64;
1658        self.set_executable(disk_path, executable)?;
1659        let metadata = file
1660            .metadata()
1661            .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1662        Ok(FileState::for_file(
1663            executable,
1664            size,
1665            &metadata,
1666            materialized_conflict_data,
1667        ))
1668    }
1669
1670    #[cfg_attr(windows, allow(unused_variables))]
1671    fn set_executable(&self, disk_path: &Path, executable: bool) -> Result<(), CheckoutError> {
1672        #[cfg(unix)]
1673        {
1674            let mode = if executable { 0o755 } else { 0o644 };
1675            fs::set_permissions(disk_path, fs::Permissions::from_mode(mode))
1676                .map_err(|err| checkout_error_for_stat_error(err, disk_path))?;
1677        }
1678        Ok(())
1679    }
1680
1681    pub fn check_out(
1682        &mut self,
1683        new_tree: &MergedTree,
1684        options: &CheckoutOptions,
1685    ) -> Result<CheckoutStats, CheckoutError> {
1686        let old_tree = self.current_tree().map_err(|err| match err {
1687            err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound {
1688                source: Box::new(err),
1689            },
1690            other => CheckoutError::InternalBackendError(other),
1691        })?;
1692        let stats = self
1693            .update(
1694                &old_tree,
1695                new_tree,
1696                self.sparse_matcher().as_ref(),
1697                options.conflict_marker_style,
1698            )
1699            .block_on()?;
1700        self.tree_id = new_tree.id();
1701        Ok(stats)
1702    }
1703
1704    pub fn set_sparse_patterns(
1705        &mut self,
1706        sparse_patterns: Vec<RepoPathBuf>,
1707        options: &CheckoutOptions,
1708    ) -> Result<CheckoutStats, CheckoutError> {
1709        let tree = self.current_tree().map_err(|err| match err {
1710            err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound {
1711                source: Box::new(err),
1712            },
1713            other => CheckoutError::InternalBackendError(other),
1714        })?;
1715        let old_matcher = PrefixMatcher::new(&self.sparse_patterns);
1716        let new_matcher = PrefixMatcher::new(&sparse_patterns);
1717        let added_matcher = DifferenceMatcher::new(&new_matcher, &old_matcher);
1718        let removed_matcher = DifferenceMatcher::new(&old_matcher, &new_matcher);
1719        let empty_tree = MergedTree::resolved(Tree::empty(self.store.clone(), RepoPathBuf::root()));
1720        let added_stats = self
1721            .update(
1722                &empty_tree,
1723                &tree,
1724                &added_matcher,
1725                options.conflict_marker_style,
1726            )
1727            .block_on()?;
1728        let removed_stats = self
1729            .update(
1730                &tree,
1731                &empty_tree,
1732                &removed_matcher,
1733                options.conflict_marker_style,
1734            )
1735            .block_on()?;
1736        self.sparse_patterns = sparse_patterns;
1737        assert_eq!(added_stats.updated_files, 0);
1738        assert_eq!(added_stats.removed_files, 0);
1739        assert_eq!(removed_stats.updated_files, 0);
1740        assert_eq!(removed_stats.added_files, 0);
1741        assert_eq!(removed_stats.skipped_files, 0);
1742        Ok(CheckoutStats {
1743            updated_files: 0,
1744            added_files: added_stats.added_files,
1745            removed_files: removed_stats.removed_files,
1746            skipped_files: added_stats.skipped_files,
1747        })
1748    }
1749
1750    async fn update(
1751        &mut self,
1752        old_tree: &MergedTree,
1753        new_tree: &MergedTree,
1754        matcher: &dyn Matcher,
1755        conflict_marker_style: ConflictMarkerStyle,
1756    ) -> Result<CheckoutStats, CheckoutError> {
1757        // TODO: maybe it's better not include the skipped counts in the "intended"
1758        // counts
1759        let mut stats = CheckoutStats {
1760            updated_files: 0,
1761            added_files: 0,
1762            removed_files: 0,
1763            skipped_files: 0,
1764        };
1765        let mut changed_file_states = Vec::new();
1766        let mut deleted_files = HashSet::new();
1767        let mut diff_stream = old_tree
1768            .diff_stream(new_tree, matcher)
1769            .map(|TreeDiffEntry { path, values }| async {
1770                match values {
1771                    Ok((before, after)) => {
1772                        let result = materialize_tree_value(&self.store, &path, after).await;
1773                        (path, result.map(|value| (before, value)))
1774                    }
1775                    Err(err) => (path, Err(err)),
1776                }
1777            })
1778            .buffered(self.store.concurrency().max(1));
1779        while let Some((path, data)) = diff_stream.next().await {
1780            let (before, after) = data?;
1781            if after.is_absent() {
1782                stats.removed_files += 1;
1783            } else if before.is_absent() {
1784                stats.added_files += 1;
1785            } else {
1786                stats.updated_files += 1;
1787            }
1788
1789            // Existing Git submodule can be a non-empty directory on disk. We
1790            // shouldn't attempt to manage it as a tracked path.
1791            //
1792            // TODO: It might be better to add general support for paths not
1793            // tracked by jj than processing submodules specially. For example,
1794            // paths excluded by .gitignore can be marked as such so that
1795            // newly-"unignored" paths won't be snapshotted automatically.
1796            if matches!(before.as_normal(), Some(TreeValue::GitSubmodule(_)))
1797                && matches!(after, MaterializedTreeValue::GitSubmodule(_))
1798            {
1799                eprintln!("ignoring git submodule at {path:?}");
1800                // Not updating the file state as if there were no diffs. Leave
1801                // the state type as FileType::GitSubmodule if it was before.
1802                continue;
1803            }
1804
1805            // Create parent directories no matter if after.is_present(). This
1806            // ensures that the path never traverses symlinks.
1807            let Some(disk_path) = create_parent_dirs(&self.working_copy_path, &path)? else {
1808                changed_file_states.push((path, FileState::placeholder()));
1809                stats.skipped_files += 1;
1810                continue;
1811            };
1812            // If the path was present, check reserved path first and delete it.
1813            let present_file_deleted = before.is_present() && remove_old_file(&disk_path)?;
1814            // If not, create temporary file to test the path validity.
1815            if !present_file_deleted && !can_create_new_file(&disk_path)? {
1816                changed_file_states.push((path, FileState::placeholder()));
1817                stats.skipped_files += 1;
1818                continue;
1819            }
1820
1821            // TODO: Check that the file has not changed before overwriting/removing it.
1822            let file_state = match after {
1823                MaterializedTreeValue::Absent | MaterializedTreeValue::AccessDenied(_) => {
1824                    let mut parent_dir = disk_path.parent().unwrap();
1825                    loop {
1826                        if fs::remove_dir(parent_dir).is_err() {
1827                            break;
1828                        }
1829                        parent_dir = parent_dir.parent().unwrap();
1830                    }
1831                    deleted_files.insert(path);
1832                    continue;
1833                }
1834                MaterializedTreeValue::File(mut file) => {
1835                    self.write_file(&disk_path, &mut file.reader, file.executable)?
1836                }
1837                MaterializedTreeValue::Symlink { id: _, target } => {
1838                    if self.symlink_support {
1839                        self.write_symlink(&disk_path, target)?
1840                    } else {
1841                        self.write_file(&disk_path, &mut target.as_bytes(), false)?
1842                    }
1843                }
1844                MaterializedTreeValue::GitSubmodule(_) => {
1845                    eprintln!("ignoring git submodule at {path:?}");
1846                    FileState::for_gitsubmodule()
1847                }
1848                MaterializedTreeValue::Tree(_) => {
1849                    panic!("unexpected tree entry in diff at {path:?}");
1850                }
1851                MaterializedTreeValue::FileConflict {
1852                    id: _,
1853                    contents,
1854                    executable,
1855                } => {
1856                    let conflict_marker_len = choose_materialized_conflict_marker_len(&contents);
1857                    let data = materialize_merge_result_to_bytes_with_marker_len(
1858                        &contents,
1859                        conflict_marker_style,
1860                        conflict_marker_len,
1861                    )
1862                    .into();
1863                    let materialized_conflict_data = MaterializedConflictData {
1864                        conflict_marker_len: conflict_marker_len.try_into().unwrap_or(u32::MAX),
1865                    };
1866                    self.write_conflict(
1867                        &disk_path,
1868                        data,
1869                        executable,
1870                        Some(materialized_conflict_data),
1871                    )?
1872                }
1873                MaterializedTreeValue::OtherConflict { id } => {
1874                    // Unless all terms are regular files, we can't do much
1875                    // better than trying to describe the merge.
1876                    let data = id.describe().into_bytes();
1877                    let executable = false;
1878                    self.write_conflict(&disk_path, data, executable, None)?
1879                }
1880            };
1881            changed_file_states.push((path, file_state));
1882        }
1883        self.file_states
1884            .merge_in(changed_file_states, &deleted_files);
1885        Ok(stats)
1886    }
1887
1888    pub async fn reset(&mut self, new_tree: &MergedTree) -> Result<(), ResetError> {
1889        let old_tree = self.current_tree().map_err(|err| match err {
1890            err @ BackendError::ObjectNotFound { .. } => ResetError::SourceNotFound {
1891                source: Box::new(err),
1892            },
1893            other => ResetError::InternalBackendError(other),
1894        })?;
1895
1896        let matcher = self.sparse_matcher();
1897        let mut changed_file_states = Vec::new();
1898        let mut deleted_files = HashSet::new();
1899        let mut diff_stream = old_tree.diff_stream(new_tree, matcher.as_ref());
1900        while let Some(TreeDiffEntry { path, values }) = diff_stream.next().await {
1901            let (_before, after) = values?;
1902            if after.is_absent() {
1903                deleted_files.insert(path);
1904            } else {
1905                let file_type = match after.into_resolved() {
1906                    Ok(value) => match value.unwrap() {
1907                        #[cfg(unix)]
1908                        TreeValue::File { id: _, executable } => FileType::Normal { executable },
1909                        #[cfg(windows)]
1910                        TreeValue::File { .. } => FileType::Normal { executable: () },
1911                        TreeValue::Symlink(_id) => FileType::Symlink,
1912                        TreeValue::Conflict(_id) => {
1913                            panic!("unexpected conflict entry in diff at {path:?}");
1914                        }
1915                        TreeValue::GitSubmodule(_id) => {
1916                            eprintln!("ignoring git submodule at {path:?}");
1917                            FileType::GitSubmodule
1918                        }
1919                        TreeValue::Tree(_id) => {
1920                            panic!("unexpected tree entry in diff at {path:?}");
1921                        }
1922                    },
1923                    Err(_values) => {
1924                        // TODO: Try to set the executable bit based on the conflict
1925                        FileType::Normal {
1926                            executable: FileExecutableFlag::default(),
1927                        }
1928                    }
1929                };
1930                let file_state = FileState {
1931                    file_type,
1932                    mtime: MillisSinceEpoch(0),
1933                    size: 0,
1934                    materialized_conflict_data: None,
1935                };
1936                changed_file_states.push((path, file_state));
1937            }
1938        }
1939        self.file_states
1940            .merge_in(changed_file_states, &deleted_files);
1941        self.tree_id = new_tree.id();
1942        Ok(())
1943    }
1944
1945    pub async fn recover(&mut self, new_tree: &MergedTree) -> Result<(), ResetError> {
1946        self.file_states.clear();
1947        self.tree_id = self.store.empty_merged_tree_id();
1948        self.reset(new_tree).await
1949    }
1950}
1951
1952fn checkout_error_for_stat_error(err: io::Error, path: &Path) -> CheckoutError {
1953    CheckoutError::Other {
1954        message: format!("Failed to stat file {}", path.display()),
1955        err: err.into(),
1956    }
1957}
1958
1959/// Working copy state stored in "checkout" file.
1960#[derive(Clone, Debug)]
1961struct CheckoutState {
1962    operation_id: OperationId,
1963    workspace_name: WorkspaceNameBuf,
1964}
1965
1966pub struct LocalWorkingCopy {
1967    store: Arc<Store>,
1968    working_copy_path: PathBuf,
1969    state_path: PathBuf,
1970    checkout_state: OnceCell<CheckoutState>,
1971    tree_state: OnceCell<TreeState>,
1972}
1973
1974impl WorkingCopy for LocalWorkingCopy {
1975    fn as_any(&self) -> &dyn Any {
1976        self
1977    }
1978
1979    fn name(&self) -> &str {
1980        Self::name()
1981    }
1982
1983    fn workspace_name(&self) -> &WorkspaceName {
1984        &self.checkout_state().workspace_name
1985    }
1986
1987    fn operation_id(&self) -> &OperationId {
1988        &self.checkout_state().operation_id
1989    }
1990
1991    fn tree_id(&self) -> Result<&MergedTreeId, WorkingCopyStateError> {
1992        Ok(self.tree_state()?.current_tree_id())
1993    }
1994
1995    fn sparse_patterns(&self) -> Result<&[RepoPathBuf], WorkingCopyStateError> {
1996        Ok(self.tree_state()?.sparse_patterns())
1997    }
1998
1999    fn start_mutation(&self) -> Result<Box<dyn LockedWorkingCopy>, WorkingCopyStateError> {
2000        let lock_path = self.state_path.join("working_copy.lock");
2001        let lock = FileLock::lock(lock_path).map_err(|err| WorkingCopyStateError {
2002            message: "Failed to lock working copy".to_owned(),
2003            err: err.into(),
2004        })?;
2005
2006        let wc = LocalWorkingCopy {
2007            store: self.store.clone(),
2008            working_copy_path: self.working_copy_path.clone(),
2009            state_path: self.state_path.clone(),
2010            // Empty so we re-read the state after taking the lock
2011            checkout_state: OnceCell::new(),
2012            // TODO: It's expensive to reload the whole tree. We should copy it from `self` if it
2013            // hasn't changed.
2014            tree_state: OnceCell::new(),
2015        };
2016        let old_operation_id = wc.operation_id().clone();
2017        let old_tree_id = wc.tree_id()?.clone();
2018        Ok(Box::new(LockedLocalWorkingCopy {
2019            wc,
2020            lock,
2021            old_operation_id,
2022            old_tree_id,
2023            tree_state_dirty: false,
2024            new_workspace_name: None,
2025        }))
2026    }
2027}
2028
2029impl LocalWorkingCopy {
2030    pub fn name() -> &'static str {
2031        "local"
2032    }
2033
2034    /// Initializes a new working copy at `working_copy_path`. The working
2035    /// copy's state will be stored in the `state_path` directory. The working
2036    /// copy will have the empty tree checked out.
2037    pub fn init(
2038        store: Arc<Store>,
2039        working_copy_path: PathBuf,
2040        state_path: PathBuf,
2041        operation_id: OperationId,
2042        workspace_name: WorkspaceNameBuf,
2043    ) -> Result<LocalWorkingCopy, WorkingCopyStateError> {
2044        let proto = crate::protos::working_copy::Checkout {
2045            operation_id: operation_id.to_bytes(),
2046            workspace_name: workspace_name.into(),
2047        };
2048        let mut file = OpenOptions::new()
2049            .create_new(true)
2050            .write(true)
2051            .open(state_path.join("checkout"))
2052            .unwrap();
2053        file.write_all(&proto.encode_to_vec()).unwrap();
2054        let tree_state =
2055            TreeState::init(store.clone(), working_copy_path.clone(), state_path.clone()).map_err(
2056                |err| WorkingCopyStateError {
2057                    message: "Failed to initialize working copy state".to_string(),
2058                    err: err.into(),
2059                },
2060            )?;
2061        Ok(LocalWorkingCopy {
2062            store,
2063            working_copy_path,
2064            state_path,
2065            checkout_state: OnceCell::new(),
2066            tree_state: OnceCell::with_value(tree_state),
2067        })
2068    }
2069
2070    pub fn load(
2071        store: Arc<Store>,
2072        working_copy_path: PathBuf,
2073        state_path: PathBuf,
2074    ) -> LocalWorkingCopy {
2075        LocalWorkingCopy {
2076            store,
2077            working_copy_path,
2078            state_path,
2079            checkout_state: OnceCell::new(),
2080            tree_state: OnceCell::new(),
2081        }
2082    }
2083
2084    pub fn state_path(&self) -> &Path {
2085        &self.state_path
2086    }
2087
2088    fn write_proto(&self, proto: crate::protos::working_copy::Checkout) {
2089        let mut temp_file = NamedTempFile::new_in(&self.state_path).unwrap();
2090        temp_file
2091            .as_file_mut()
2092            .write_all(&proto.encode_to_vec())
2093            .unwrap();
2094        // TODO: Retry if persisting fails (it will on Windows if the file happened to
2095        // be open for read).
2096        temp_file.persist(self.state_path.join("checkout")).unwrap();
2097    }
2098
2099    fn checkout_state(&self) -> &CheckoutState {
2100        self.checkout_state.get_or_init(|| {
2101            let buf = fs::read(self.state_path.join("checkout")).unwrap();
2102            let proto = crate::protos::working_copy::Checkout::decode(&*buf).unwrap();
2103            CheckoutState {
2104                operation_id: OperationId::new(proto.operation_id),
2105                workspace_name: if proto.workspace_name.is_empty() {
2106                    // For compatibility with old working copies.
2107                    // TODO: Delete in mid 2022 or so
2108                    WorkspaceName::DEFAULT.to_owned()
2109                } else {
2110                    proto.workspace_name.into()
2111                },
2112            }
2113        })
2114    }
2115
2116    fn checkout_state_mut(&mut self) -> &mut CheckoutState {
2117        self.checkout_state(); // ensure loaded
2118        self.checkout_state.get_mut().unwrap()
2119    }
2120
2121    #[instrument(skip_all)]
2122    fn tree_state(&self) -> Result<&TreeState, WorkingCopyStateError> {
2123        self.tree_state
2124            .get_or_try_init(|| {
2125                TreeState::load(
2126                    self.store.clone(),
2127                    self.working_copy_path.clone(),
2128                    self.state_path.clone(),
2129                )
2130            })
2131            .map_err(|err| WorkingCopyStateError {
2132                message: "Failed to read working copy state".to_string(),
2133                err: err.into(),
2134            })
2135    }
2136
2137    fn tree_state_mut(&mut self) -> Result<&mut TreeState, WorkingCopyStateError> {
2138        self.tree_state()?; // ensure loaded
2139        Ok(self.tree_state.get_mut().unwrap())
2140    }
2141
2142    pub fn file_states(&self) -> Result<FileStates<'_>, WorkingCopyStateError> {
2143        Ok(self.tree_state()?.file_states())
2144    }
2145
2146    #[instrument(skip_all)]
2147    fn save(&mut self) {
2148        self.write_proto(crate::protos::working_copy::Checkout {
2149            operation_id: self.operation_id().to_bytes(),
2150            workspace_name: self.workspace_name().into(),
2151        });
2152    }
2153
2154    #[cfg(feature = "watchman")]
2155    pub fn query_watchman(
2156        &self,
2157        config: &WatchmanConfig,
2158    ) -> Result<(watchman::Clock, Option<Vec<PathBuf>>), WorkingCopyStateError> {
2159        self.tree_state()?
2160            .query_watchman(config)
2161            .map_err(|err| WorkingCopyStateError {
2162                message: "Failed to query watchman".to_string(),
2163                err: err.into(),
2164            })
2165    }
2166
2167    #[cfg(feature = "watchman")]
2168    pub fn is_watchman_trigger_registered(
2169        &self,
2170        config: &WatchmanConfig,
2171    ) -> Result<bool, WorkingCopyStateError> {
2172        self.tree_state()?
2173            .is_watchman_trigger_registered(config)
2174            .map_err(|err| WorkingCopyStateError {
2175                message: "Failed to query watchman".to_string(),
2176                err: err.into(),
2177            })
2178    }
2179}
2180
2181pub struct LocalWorkingCopyFactory {}
2182
2183impl WorkingCopyFactory for LocalWorkingCopyFactory {
2184    fn init_working_copy(
2185        &self,
2186        store: Arc<Store>,
2187        working_copy_path: PathBuf,
2188        state_path: PathBuf,
2189        operation_id: OperationId,
2190        workspace_name: WorkspaceNameBuf,
2191    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2192        Ok(Box::new(LocalWorkingCopy::init(
2193            store,
2194            working_copy_path,
2195            state_path,
2196            operation_id,
2197            workspace_name,
2198        )?))
2199    }
2200
2201    fn load_working_copy(
2202        &self,
2203        store: Arc<Store>,
2204        working_copy_path: PathBuf,
2205        state_path: PathBuf,
2206    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2207        Ok(Box::new(LocalWorkingCopy::load(
2208            store,
2209            working_copy_path,
2210            state_path,
2211        )))
2212    }
2213}
2214
2215/// A working copy that's locked on disk. The lock is held until you call
2216/// `finish()` or `discard()`.
2217pub struct LockedLocalWorkingCopy {
2218    wc: LocalWorkingCopy,
2219    #[expect(dead_code)]
2220    lock: FileLock,
2221    old_operation_id: OperationId,
2222    old_tree_id: MergedTreeId,
2223    tree_state_dirty: bool,
2224    new_workspace_name: Option<WorkspaceNameBuf>,
2225}
2226
2227impl LockedWorkingCopy for LockedLocalWorkingCopy {
2228    fn as_any(&self) -> &dyn Any {
2229        self
2230    }
2231
2232    fn as_any_mut(&mut self) -> &mut dyn Any {
2233        self
2234    }
2235
2236    fn old_operation_id(&self) -> &OperationId {
2237        &self.old_operation_id
2238    }
2239
2240    fn old_tree_id(&self) -> &MergedTreeId {
2241        &self.old_tree_id
2242    }
2243
2244    fn snapshot(
2245        &mut self,
2246        options: &SnapshotOptions,
2247    ) -> Result<(MergedTreeId, SnapshotStats), SnapshotError> {
2248        let tree_state = self
2249            .wc
2250            .tree_state_mut()
2251            .map_err(|err| SnapshotError::Other {
2252                message: "Failed to read the working copy state".to_string(),
2253                err: err.into(),
2254            })?;
2255        let (is_dirty, stats) = tree_state.snapshot(options)?;
2256        self.tree_state_dirty |= is_dirty;
2257        Ok((tree_state.current_tree_id().clone(), stats))
2258    }
2259
2260    fn check_out(
2261        &mut self,
2262        commit: &Commit,
2263        options: &CheckoutOptions,
2264    ) -> Result<CheckoutStats, CheckoutError> {
2265        // TODO: Write a "pending_checkout" file with the new TreeId so we can
2266        // continue an interrupted update if we find such a file.
2267        let new_tree = commit.tree()?;
2268        let tree_state = self
2269            .wc
2270            .tree_state_mut()
2271            .map_err(|err| CheckoutError::Other {
2272                message: "Failed to load the working copy state".to_string(),
2273                err: err.into(),
2274            })?;
2275        if tree_state.tree_id != *commit.tree_id() {
2276            let stats = tree_state.check_out(&new_tree, options)?;
2277            self.tree_state_dirty = true;
2278            Ok(stats)
2279        } else {
2280            Ok(CheckoutStats::default())
2281        }
2282    }
2283
2284    fn rename_workspace(&mut self, new_name: WorkspaceNameBuf) {
2285        self.new_workspace_name = Some(new_name);
2286    }
2287
2288    fn reset(&mut self, commit: &Commit) -> Result<(), ResetError> {
2289        let new_tree = commit.tree()?;
2290        self.wc
2291            .tree_state_mut()
2292            .map_err(|err| ResetError::Other {
2293                message: "Failed to read the working copy state".to_string(),
2294                err: err.into(),
2295            })?
2296            .reset(&new_tree)
2297            .block_on()?;
2298        self.tree_state_dirty = true;
2299        Ok(())
2300    }
2301
2302    fn recover(&mut self, commit: &Commit) -> Result<(), ResetError> {
2303        let new_tree = commit.tree()?;
2304        self.wc
2305            .tree_state_mut()
2306            .map_err(|err| ResetError::Other {
2307                message: "Failed to read the working copy state".to_string(),
2308                err: err.into(),
2309            })?
2310            .recover(&new_tree)
2311            .block_on()?;
2312        self.tree_state_dirty = true;
2313        Ok(())
2314    }
2315
2316    fn sparse_patterns(&self) -> Result<&[RepoPathBuf], WorkingCopyStateError> {
2317        self.wc.sparse_patterns()
2318    }
2319
2320    fn set_sparse_patterns(
2321        &mut self,
2322        new_sparse_patterns: Vec<RepoPathBuf>,
2323        options: &CheckoutOptions,
2324    ) -> Result<CheckoutStats, CheckoutError> {
2325        // TODO: Write a "pending_checkout" file with new sparse patterns so we can
2326        // continue an interrupted update if we find such a file.
2327        let stats = self
2328            .wc
2329            .tree_state_mut()
2330            .map_err(|err| CheckoutError::Other {
2331                message: "Failed to load the working copy state".to_string(),
2332                err: err.into(),
2333            })?
2334            .set_sparse_patterns(new_sparse_patterns, options)?;
2335        self.tree_state_dirty = true;
2336        Ok(stats)
2337    }
2338
2339    #[instrument(skip_all)]
2340    fn finish(
2341        mut self: Box<Self>,
2342        operation_id: OperationId,
2343    ) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
2344        assert!(self.tree_state_dirty || &self.old_tree_id == self.wc.tree_id()?);
2345        if self.tree_state_dirty {
2346            self.wc
2347                .tree_state_mut()?
2348                .save()
2349                .map_err(|err| WorkingCopyStateError {
2350                    message: "Failed to write working copy state".to_string(),
2351                    err: Box::new(err),
2352                })?;
2353        }
2354        if self.old_operation_id != operation_id || self.new_workspace_name.is_some() {
2355            if let Some(new_name) = self.new_workspace_name {
2356                self.wc.checkout_state_mut().workspace_name = new_name;
2357            }
2358            self.wc.checkout_state_mut().operation_id = operation_id;
2359            self.wc.save();
2360        }
2361        // TODO: Clear the "pending_checkout" file here.
2362        Ok(Box::new(self.wc))
2363    }
2364}
2365
2366impl LockedLocalWorkingCopy {
2367    pub fn reset_watchman(&mut self) -> Result<(), SnapshotError> {
2368        self.wc
2369            .tree_state_mut()
2370            .map_err(|err| SnapshotError::Other {
2371                message: "Failed to read the working copy state".to_string(),
2372                err: err.into(),
2373            })?
2374            .reset_watchman();
2375        self.tree_state_dirty = true;
2376        Ok(())
2377    }
2378}
2379
2380#[cfg(test)]
2381mod tests {
2382    use maplit::hashset;
2383
2384    use super::*;
2385
2386    fn repo_path(value: &str) -> &RepoPath {
2387        RepoPath::from_internal_string(value)
2388    }
2389
2390    #[test]
2391    fn test_file_states_merge() {
2392        let new_state = |size| FileState {
2393            file_type: FileType::Normal {
2394                executable: FileExecutableFlag::default(),
2395            },
2396            mtime: MillisSinceEpoch(0),
2397            size,
2398            materialized_conflict_data: None,
2399        };
2400        let new_static_entry = |path: &'static str, size| (repo_path(path), new_state(size));
2401        let new_owned_entry = |path: &str, size| (repo_path(path).to_owned(), new_state(size));
2402        let new_proto_entry = |path: &str, size| {
2403            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2404        };
2405        let data = vec![
2406            new_proto_entry("aa", 0),
2407            new_proto_entry("b#", 4), // '#' < '/'
2408            new_proto_entry("b/c", 1),
2409            new_proto_entry("b/d/e", 2),
2410            new_proto_entry("b/e", 3),
2411            new_proto_entry("bc", 5),
2412        ];
2413        let mut file_states = FileStatesMap::from_proto(data, false);
2414
2415        let changed_file_states = vec![
2416            new_owned_entry("aa", 10),    // change
2417            new_owned_entry("b/d/f", 11), // add
2418            new_owned_entry("b/e", 12),   // change
2419            new_owned_entry("c", 13),     // add
2420        ];
2421        let deleted_files = hashset! {
2422            repo_path("b/c").to_owned(),
2423            repo_path("b#").to_owned(),
2424        };
2425        file_states.merge_in(changed_file_states, &deleted_files);
2426        assert_eq!(
2427            file_states.all().iter().collect_vec(),
2428            vec![
2429                new_static_entry("aa", 10),
2430                new_static_entry("b/d/e", 2),
2431                new_static_entry("b/d/f", 11),
2432                new_static_entry("b/e", 12),
2433                new_static_entry("bc", 5),
2434                new_static_entry("c", 13),
2435            ],
2436        );
2437    }
2438
2439    #[test]
2440    fn test_file_states_lookup() {
2441        let new_state = |size| FileState {
2442            file_type: FileType::Normal {
2443                executable: FileExecutableFlag::default(),
2444            },
2445            mtime: MillisSinceEpoch(0),
2446            size,
2447            materialized_conflict_data: None,
2448        };
2449        let new_proto_entry = |path: &str, size| {
2450            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2451        };
2452        let data = vec![
2453            new_proto_entry("aa", 0),
2454            new_proto_entry("b/c", 1),
2455            new_proto_entry("b/d/e", 2),
2456            new_proto_entry("b/e", 3),
2457            new_proto_entry("b#", 4), // '#' < '/'
2458            new_proto_entry("bc", 5),
2459        ];
2460        let file_states = FileStates::from_sorted(&data);
2461
2462        assert_eq!(
2463            file_states.prefixed(repo_path("")).paths().collect_vec(),
2464            ["aa", "b/c", "b/d/e", "b/e", "b#", "bc"].map(repo_path)
2465        );
2466        assert!(file_states.prefixed(repo_path("a")).is_empty());
2467        assert_eq!(
2468            file_states.prefixed(repo_path("aa")).paths().collect_vec(),
2469            ["aa"].map(repo_path)
2470        );
2471        assert_eq!(
2472            file_states.prefixed(repo_path("b")).paths().collect_vec(),
2473            ["b/c", "b/d/e", "b/e"].map(repo_path)
2474        );
2475        assert_eq!(
2476            file_states.prefixed(repo_path("b/d")).paths().collect_vec(),
2477            ["b/d/e"].map(repo_path)
2478        );
2479        assert_eq!(
2480            file_states.prefixed(repo_path("b#")).paths().collect_vec(),
2481            ["b#"].map(repo_path)
2482        );
2483        assert_eq!(
2484            file_states.prefixed(repo_path("bc")).paths().collect_vec(),
2485            ["bc"].map(repo_path)
2486        );
2487        assert!(file_states.prefixed(repo_path("z")).is_empty());
2488
2489        assert!(!file_states.contains_path(repo_path("a")));
2490        assert!(file_states.contains_path(repo_path("aa")));
2491        assert!(file_states.contains_path(repo_path("b/d/e")));
2492        assert!(!file_states.contains_path(repo_path("b/d")));
2493        assert!(file_states.contains_path(repo_path("b#")));
2494        assert!(file_states.contains_path(repo_path("bc")));
2495        assert!(!file_states.contains_path(repo_path("z")));
2496
2497        assert_eq!(file_states.get(repo_path("a")), None);
2498        assert_eq!(file_states.get(repo_path("aa")), Some(new_state(0)));
2499        assert_eq!(file_states.get(repo_path("b/d/e")), Some(new_state(2)));
2500        assert_eq!(file_states.get(repo_path("bc")), Some(new_state(5)));
2501        assert_eq!(file_states.get(repo_path("z")), None);
2502    }
2503
2504    #[test]
2505    fn test_file_states_lookup_at() {
2506        let new_state = |size| FileState {
2507            file_type: FileType::Normal {
2508                executable: FileExecutableFlag::default(),
2509            },
2510            mtime: MillisSinceEpoch(0),
2511            size,
2512            materialized_conflict_data: None,
2513        };
2514        let new_proto_entry = |path: &str, size| {
2515            file_state_entry_to_proto(repo_path(path).to_owned(), &new_state(size))
2516        };
2517        let data = vec![
2518            new_proto_entry("b/c", 0),
2519            new_proto_entry("b/d/e", 1),
2520            new_proto_entry("b/d#", 2), // '#' < '/'
2521            new_proto_entry("b/e", 3),
2522            new_proto_entry("b#", 4), // '#' < '/'
2523        ];
2524        let file_states = FileStates::from_sorted(&data);
2525
2526        // At root
2527        assert_eq!(
2528            file_states.get_at(RepoPath::root(), RepoPathComponent::new("b")),
2529            None
2530        );
2531        assert_eq!(
2532            file_states.get_at(RepoPath::root(), RepoPathComponent::new("b#")),
2533            Some(new_state(4))
2534        );
2535
2536        // At prefixed dir
2537        let prefixed_states =
2538            file_states.prefixed_at(RepoPath::root(), RepoPathComponent::new("b"));
2539        assert_eq!(
2540            prefixed_states.paths().collect_vec(),
2541            ["b/c", "b/d/e", "b/d#", "b/e"].map(repo_path)
2542        );
2543        assert_eq!(
2544            prefixed_states.get_at(repo_path("b"), RepoPathComponent::new("c")),
2545            Some(new_state(0))
2546        );
2547        assert_eq!(
2548            prefixed_states.get_at(repo_path("b"), RepoPathComponent::new("d")),
2549            None
2550        );
2551        assert_eq!(
2552            prefixed_states.get_at(repo_path("b"), RepoPathComponent::new("d#")),
2553            Some(new_state(2))
2554        );
2555
2556        // At nested prefixed dir
2557        let prefixed_states =
2558            prefixed_states.prefixed_at(repo_path("b"), RepoPathComponent::new("d"));
2559        assert_eq!(
2560            prefixed_states.paths().collect_vec(),
2561            ["b/d/e"].map(repo_path)
2562        );
2563        assert_eq!(
2564            prefixed_states.get_at(repo_path("b/d"), RepoPathComponent::new("e")),
2565            Some(new_state(1))
2566        );
2567        assert_eq!(
2568            prefixed_states.get_at(repo_path("b/d"), RepoPathComponent::new("#")),
2569            None
2570        );
2571
2572        // At prefixed file
2573        let prefixed_states =
2574            file_states.prefixed_at(RepoPath::root(), RepoPathComponent::new("b#"));
2575        assert_eq!(prefixed_states.paths().collect_vec(), ["b#"].map(repo_path));
2576        assert_eq!(
2577            prefixed_states.get_at(repo_path("b#"), RepoPathComponent::new("#")),
2578            None
2579        );
2580    }
2581}