jujutsu_lib/
working_copy.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{BTreeMap, HashSet};
16use std::ffi::OsString;
17use std::fs;
18use std::fs::{DirEntry, File, Metadata, OpenOptions};
19use std::io::{Read, Write};
20use std::ops::Bound;
21#[cfg(unix)]
22use std::os::unix::fs::symlink;
23#[cfg(unix)]
24use std::os::unix::fs::PermissionsExt;
25use std::path::{Path, PathBuf};
26use std::sync::Arc;
27use std::time::UNIX_EPOCH;
28
29use once_cell::unsync::OnceCell;
30use prost::Message;
31use tempfile::NamedTempFile;
32use thiserror::Error;
33
34use crate::backend::{
35    BackendError, ConflictId, FileId, MillisSinceEpoch, ObjectId, SymlinkId, TreeId, TreeValue,
36};
37use crate::conflicts::{materialize_conflict, update_conflict_from_content};
38use crate::gitignore::GitIgnoreFile;
39use crate::lock::FileLock;
40use crate::matchers::{DifferenceMatcher, Matcher, PrefixMatcher};
41use crate::op_store::{OperationId, WorkspaceId};
42use crate::repo_path::{RepoPath, RepoPathComponent, RepoPathJoin};
43use crate::store::Store;
44use crate::tree::{Diff, Tree};
45use crate::tree_builder::TreeBuilder;
46
47#[derive(Debug, PartialEq, Eq, Clone)]
48pub enum FileType {
49    Normal { executable: bool },
50    Symlink,
51    GitSubmodule,
52    Conflict { id: ConflictId },
53}
54
55#[derive(Debug, PartialEq, Eq, Clone)]
56pub struct FileState {
57    pub file_type: FileType,
58    pub mtime: MillisSinceEpoch,
59    pub size: u64,
60    /* TODO: What else do we need here? Git stores a lot of fields.
61     * TODO: Could possibly handle case-insensitive file systems keeping an
62     *       Option<PathBuf> with the actual path here. */
63}
64
65impl FileState {
66    fn for_file(executable: bool, size: u64, metadata: &Metadata) -> Self {
67        FileState {
68            file_type: FileType::Normal { executable },
69            mtime: mtime_from_metadata(metadata),
70            size,
71        }
72    }
73
74    fn for_symlink(metadata: &Metadata) -> Self {
75        // When using fscrypt, the reported size is not the content size. So if
76        // we were to record the content size here (like we do for regular files), we
77        // would end up thinking the file has changed every time we snapshot.
78        FileState {
79            file_type: FileType::Symlink,
80            mtime: mtime_from_metadata(metadata),
81            size: metadata.len(),
82        }
83    }
84
85    fn for_conflict(id: ConflictId, size: u64, metadata: &Metadata) -> Self {
86        FileState {
87            file_type: FileType::Conflict { id },
88            mtime: mtime_from_metadata(metadata),
89            size,
90        }
91    }
92
93    fn for_gitsubmodule() -> Self {
94        FileState {
95            file_type: FileType::GitSubmodule,
96            mtime: MillisSinceEpoch(0),
97            size: 0,
98        }
99    }
100
101    #[cfg_attr(unix, allow(dead_code))]
102    fn is_executable(&self) -> bool {
103        if let FileType::Normal { executable } = &self.file_type {
104            *executable
105        } else {
106            false
107        }
108    }
109
110    fn mark_executable(&mut self, executable: bool) {
111        if let FileType::Normal { .. } = &self.file_type {
112            self.file_type = FileType::Normal { executable }
113        }
114    }
115}
116
117pub struct TreeState {
118    store: Arc<Store>,
119    working_copy_path: PathBuf,
120    state_path: PathBuf,
121    tree_id: TreeId,
122    file_states: BTreeMap<RepoPath, FileState>,
123    // Currently only path prefixes
124    sparse_patterns: Vec<RepoPath>,
125    own_mtime: MillisSinceEpoch,
126}
127
128fn file_state_from_proto(proto: crate::protos::working_copy::FileState) -> FileState {
129    let file_type = match proto.file_type() {
130        crate::protos::working_copy::FileType::Normal => FileType::Normal { executable: false },
131        crate::protos::working_copy::FileType::Executable => FileType::Normal { executable: true },
132        crate::protos::working_copy::FileType::Symlink => FileType::Symlink,
133        crate::protos::working_copy::FileType::Conflict => {
134            let id = ConflictId::new(proto.conflict_id);
135            FileType::Conflict { id }
136        }
137        crate::protos::working_copy::FileType::GitSubmodule => FileType::GitSubmodule,
138    };
139    FileState {
140        file_type,
141        mtime: MillisSinceEpoch(proto.mtime_millis_since_epoch),
142        size: proto.size,
143    }
144}
145
146fn file_state_to_proto(file_state: &FileState) -> crate::protos::working_copy::FileState {
147    let mut proto = crate::protos::working_copy::FileState::default();
148    let file_type = match &file_state.file_type {
149        FileType::Normal { executable: false } => crate::protos::working_copy::FileType::Normal,
150        FileType::Normal { executable: true } => crate::protos::working_copy::FileType::Executable,
151        FileType::Symlink => crate::protos::working_copy::FileType::Symlink,
152        FileType::Conflict { id } => {
153            proto.conflict_id = id.to_bytes();
154            crate::protos::working_copy::FileType::Conflict
155        }
156        FileType::GitSubmodule => crate::protos::working_copy::FileType::GitSubmodule,
157    };
158    proto.file_type = file_type as i32;
159    proto.mtime_millis_since_epoch = file_state.mtime.0;
160    proto.size = file_state.size;
161    proto
162}
163
164fn file_states_from_proto(
165    proto: &crate::protos::working_copy::TreeState,
166) -> BTreeMap<RepoPath, FileState> {
167    let mut file_states = BTreeMap::new();
168    for (path_str, proto_file_state) in &proto.file_states {
169        let path = RepoPath::from_internal_string(path_str.as_str());
170        file_states.insert(path, file_state_from_proto(proto_file_state.clone()));
171    }
172    file_states
173}
174
175fn sparse_patterns_from_proto(proto: &crate::protos::working_copy::TreeState) -> Vec<RepoPath> {
176    let mut sparse_patterns = vec![];
177    if let Some(proto_sparse_patterns) = proto.sparse_patterns.as_ref() {
178        for prefix in &proto_sparse_patterns.prefixes {
179            sparse_patterns.push(RepoPath::from_internal_string(prefix.as_str()));
180        }
181    } else {
182        // For compatibility with old working copies.
183        // TODO: Delete this is late 2022 or so.
184        sparse_patterns.push(RepoPath::root());
185    }
186    sparse_patterns
187}
188
189/// Creates intermediate directories from the `working_copy_path` to the
190/// `repo_path` parent.
191///
192/// If an intermediate directory exists and if it is a symlink, this function
193/// will return an error. The `working_copy_path` directory may be a symlink.
194///
195/// Note that this does not prevent TOCTOU bugs caused by concurrent checkouts.
196/// Another process may remove the directory created by this function and put a
197/// symlink there.
198fn create_parent_dirs(working_copy_path: &Path, repo_path: &RepoPath) -> Result<(), CheckoutError> {
199    let (_, dir_components) = repo_path
200        .components()
201        .split_last()
202        .expect("repo path shouldn't be root");
203    let mut dir_path = working_copy_path.to_owned();
204    for c in dir_components {
205        dir_path.push(c.as_str());
206        match fs::create_dir(&dir_path) {
207            Ok(()) => {}
208            Err(_)
209                if dir_path
210                    .symlink_metadata()
211                    .map(|m| m.is_dir())
212                    .unwrap_or(false) => {}
213            Err(err) => {
214                return Err(CheckoutError::IoError {
215                    message: format!(
216                        "Failed to create parent directories for {}",
217                        repo_path.to_fs_path(working_copy_path).display(),
218                    ),
219                    err,
220                });
221            }
222        }
223    }
224    Ok(())
225}
226
227fn mtime_from_metadata(metadata: &Metadata) -> MillisSinceEpoch {
228    let time = metadata
229        .modified()
230        .expect("File mtime not supported on this platform?");
231    let since_epoch = time
232        .duration_since(UNIX_EPOCH)
233        .expect("mtime before unix epoch");
234
235    MillisSinceEpoch(
236        i64::try_from(since_epoch.as_millis())
237            .expect("mtime billions of years into the future or past"),
238    )
239}
240
241fn file_state(metadata: &Metadata) -> Option<FileState> {
242    let metadata_file_type = metadata.file_type();
243    let file_type = if metadata_file_type.is_dir() {
244        None
245    } else if metadata_file_type.is_symlink() {
246        Some(FileType::Symlink)
247    } else if metadata_file_type.is_file() {
248        #[cfg(unix)]
249        let mode = metadata.permissions().mode();
250        #[cfg(windows)]
251        let mode = 0;
252        if mode & 0o111 != 0 {
253            Some(FileType::Normal { executable: true })
254        } else {
255            Some(FileType::Normal { executable: false })
256        }
257    } else {
258        None
259    };
260    file_type.map(|file_type| {
261        let mtime = mtime_from_metadata(metadata);
262        let size = metadata.len();
263        FileState {
264            file_type,
265            mtime,
266            size,
267        }
268    })
269}
270
271#[derive(Debug, PartialEq, Eq, Clone)]
272pub struct CheckoutStats {
273    pub updated_files: u32,
274    pub added_files: u32,
275    pub removed_files: u32,
276}
277
278#[derive(Debug, Error)]
279pub enum SnapshotError {
280    #[error("{message}: {err}")]
281    IoError {
282        message: String,
283        #[source]
284        err: std::io::Error,
285    },
286    #[error("Working copy path {} is not valid UTF-8", path.to_string_lossy())]
287    InvalidUtf8Path { path: OsString },
288    #[error("Symlink {path} target is not valid UTF-8")]
289    InvalidUtf8SymlinkTarget { path: PathBuf, target: PathBuf },
290    #[error("Internal backend error: {0}")]
291    InternalBackendError(#[from] BackendError),
292}
293
294#[derive(Debug, Error)]
295pub enum CheckoutError {
296    // The current working-copy commit was deleted, maybe by an overly aggressive GC that happened
297    // while the current process was running.
298    #[error("Current working-copy commit not found: {source}")]
299    SourceNotFound {
300        source: Box<dyn std::error::Error + Send + Sync>,
301    },
302    // Another process checked out a commit while the current process was running (after the
303    // working copy was read by the current process).
304    #[error("Concurrent checkout")]
305    ConcurrentCheckout,
306    #[error("{message}: {err:?}")]
307    IoError {
308        message: String,
309        #[source]
310        err: std::io::Error,
311    },
312    #[error("Internal error: {0}")]
313    InternalBackendError(#[from] BackendError),
314}
315
316impl CheckoutError {
317    fn for_stat_error(err: std::io::Error, path: &Path) -> Self {
318        CheckoutError::IoError {
319            message: format!("Failed to stat file {}", path.display()),
320            err,
321        }
322    }
323}
324
325fn suppress_file_exists_error(orig_err: CheckoutError) -> Result<(), CheckoutError> {
326    match orig_err {
327        CheckoutError::IoError { err, .. } if err.kind() == std::io::ErrorKind::AlreadyExists => {
328            Ok(())
329        }
330        _ => Err(orig_err),
331    }
332}
333
334#[derive(Debug, Error)]
335pub enum ResetError {
336    // The current working-copy commit was deleted, maybe by an overly aggressive GC that happened
337    // while the current process was running.
338    #[error("Current working-copy commit not found: {source}")]
339    SourceNotFound {
340        source: Box<dyn std::error::Error + Send + Sync>,
341    },
342    #[error("Internal error: {0}")]
343    InternalBackendError(#[from] BackendError),
344}
345
346impl TreeState {
347    pub fn current_tree_id(&self) -> &TreeId {
348        &self.tree_id
349    }
350
351    pub fn file_states(&self) -> &BTreeMap<RepoPath, FileState> {
352        &self.file_states
353    }
354
355    pub fn sparse_patterns(&self) -> &Vec<RepoPath> {
356        &self.sparse_patterns
357    }
358
359    fn sparse_matcher(&self) -> Box<dyn Matcher> {
360        Box::new(PrefixMatcher::new(&self.sparse_patterns))
361    }
362
363    pub fn init(store: Arc<Store>, working_copy_path: PathBuf, state_path: PathBuf) -> TreeState {
364        let mut wc = TreeState::empty(store, working_copy_path, state_path);
365        wc.save();
366        wc
367    }
368
369    fn empty(store: Arc<Store>, working_copy_path: PathBuf, state_path: PathBuf) -> TreeState {
370        let tree_id = store.empty_tree_id().clone();
371        // Canonicalize the working copy path because "repo/." makes libgit2 think that
372        // everything should be ignored
373        TreeState {
374            store,
375            working_copy_path: working_copy_path.canonicalize().unwrap(),
376            state_path,
377            tree_id,
378            file_states: BTreeMap::new(),
379            sparse_patterns: vec![RepoPath::root()],
380            own_mtime: MillisSinceEpoch(0),
381        }
382    }
383
384    pub fn load(store: Arc<Store>, working_copy_path: PathBuf, state_path: PathBuf) -> TreeState {
385        let maybe_file = File::open(state_path.join("tree_state"));
386        let file = match maybe_file {
387            Err(ref err) if err.kind() == std::io::ErrorKind::NotFound => {
388                return TreeState::init(store, working_copy_path, state_path);
389            }
390            result => result.unwrap(),
391        };
392
393        let mut wc = TreeState::empty(store, working_copy_path, state_path);
394        wc.read(file);
395        wc
396    }
397
398    fn update_own_mtime(&mut self) {
399        if let Ok(metadata) = self.state_path.join("tree_state").symlink_metadata() {
400            self.own_mtime = mtime_from_metadata(&metadata);
401        } else {
402            self.own_mtime = MillisSinceEpoch(0);
403        }
404    }
405
406    fn read(&mut self, mut file: File) {
407        self.update_own_mtime();
408        let mut buf = Vec::new();
409        file.read_to_end(&mut buf).unwrap();
410        let proto = crate::protos::working_copy::TreeState::decode(&*buf).unwrap();
411        self.tree_id = TreeId::new(proto.tree_id.clone());
412        self.file_states = file_states_from_proto(&proto);
413        self.sparse_patterns = sparse_patterns_from_proto(&proto);
414    }
415
416    fn save(&mut self) {
417        let mut proto = crate::protos::working_copy::TreeState {
418            tree_id: self.tree_id.to_bytes(),
419            ..Default::default()
420        };
421        for (file, file_state) in &self.file_states {
422            proto.file_states.insert(
423                file.to_internal_file_string(),
424                file_state_to_proto(file_state),
425            );
426        }
427        let mut sparse_patterns = crate::protos::working_copy::SparsePatterns::default();
428        for path in &self.sparse_patterns {
429            sparse_patterns
430                .prefixes
431                .push(path.to_internal_file_string());
432        }
433        proto.sparse_patterns = Some(sparse_patterns);
434
435        let mut temp_file = NamedTempFile::new_in(&self.state_path).unwrap();
436        temp_file
437            .as_file_mut()
438            .write_all(&proto.encode_to_vec())
439            .unwrap();
440        // update own write time while we before we rename it, so we know
441        // there is no unknown data in it
442        self.update_own_mtime();
443        // TODO: Retry if persisting fails (it will on Windows if the file happened to
444        // be open for read).
445        temp_file
446            .persist(self.state_path.join("tree_state"))
447            .unwrap();
448    }
449
450    fn write_file_to_store(
451        &self,
452        path: &RepoPath,
453        disk_path: &Path,
454    ) -> Result<FileId, SnapshotError> {
455        let file = File::open(disk_path).map_err(|err| SnapshotError::IoError {
456            message: format!("Failed to open file {}", disk_path.display()),
457            err,
458        })?;
459        Ok(self.store.write_file(path, &mut Box::new(file))?)
460    }
461
462    fn write_symlink_to_store(
463        &self,
464        path: &RepoPath,
465        disk_path: &Path,
466    ) -> Result<SymlinkId, SnapshotError> {
467        let target = disk_path
468            .read_link()
469            .map_err(|err| SnapshotError::IoError {
470                message: format!("Failed to read symlink {}", disk_path.display()),
471                err,
472            })?;
473        let str_target =
474            target
475                .to_str()
476                .ok_or_else(|| SnapshotError::InvalidUtf8SymlinkTarget {
477                    path: disk_path.to_path_buf(),
478                    target: target.clone(),
479                })?;
480        Ok(self.store.write_symlink(path, str_target)?)
481    }
482
483    /// Look for changes to the working copy. If there are any changes, create
484    /// a new tree from it.
485    pub fn snapshot(&mut self, base_ignores: Arc<GitIgnoreFile>) -> Result<bool, SnapshotError> {
486        let sparse_matcher = self.sparse_matcher();
487        let mut work = vec![(
488            RepoPath::root(),
489            self.working_copy_path.clone(),
490            base_ignores,
491        )];
492
493        let mut tree_builder = self.store.tree_builder(self.tree_id.clone());
494        let mut deleted_files: HashSet<_> = self
495            .file_states
496            .iter()
497            .filter_map(|(path, state)| {
498                (state.file_type != FileType::GitSubmodule).then(|| path.clone())
499            })
500            .collect();
501        while let Some((dir, disk_dir, git_ignore)) = work.pop() {
502            if sparse_matcher.visit(&dir).is_nothing() {
503                continue;
504            }
505            let git_ignore = git_ignore
506                .chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"));
507            for maybe_entry in disk_dir.read_dir().unwrap() {
508                let entry = maybe_entry.unwrap();
509                let file_type = entry.file_type().unwrap();
510                let file_name = entry.file_name();
511                let name = file_name
512                    .to_str()
513                    .ok_or_else(|| SnapshotError::InvalidUtf8Path {
514                        path: file_name.clone(),
515                    })?;
516                if name == ".jj" || name == ".git" {
517                    continue;
518                }
519                let sub_path = dir.join(&RepoPathComponent::from(name));
520                if let Some(file_state) = self.file_states.get(&sub_path) {
521                    if file_state.file_type == FileType::GitSubmodule {
522                        continue;
523                    }
524                }
525
526                if file_type.is_dir() {
527                    // If the whole directory is ignored, skip it unless we're already tracking
528                    // some file in it.
529                    if git_ignore.matches_all_files_in(&sub_path.to_internal_dir_string())
530                        && !self.has_files_under(&sub_path)
531                    {
532                        continue;
533                    }
534                    work.push((sub_path, entry.path(), git_ignore.clone()));
535                } else {
536                    deleted_files.remove(&sub_path);
537                    if sparse_matcher.matches(&sub_path) {
538                        self.update_file_state(
539                            sub_path,
540                            &entry,
541                            git_ignore.as_ref(),
542                            &mut tree_builder,
543                        )?;
544                    }
545                }
546            }
547        }
548
549        for file in &deleted_files {
550            self.file_states.remove(file);
551            tree_builder.remove(file.clone());
552        }
553        let changed = tree_builder.has_overrides();
554        self.tree_id = tree_builder.write_tree();
555        Ok(changed)
556    }
557
558    fn has_files_under(&self, dir: &RepoPath) -> bool {
559        // TODO: This is pretty ugly... Also, we should
560        // optimize it to check exactly the already-tracked files (we know that
561        // we won't have to consider new files in the directory).
562        let first_file_in_dir = dir.join(&RepoPathComponent::from("\0"));
563        match self
564            .file_states
565            .range((Bound::Included(&first_file_in_dir), Bound::Unbounded))
566            .next()
567        {
568            Some((subdir_file, _)) => dir.contains(subdir_file),
569            None => {
570                // There are no tracked paths at all after `dir/` in alphabetical order, so
571                // there are no paths under `dir/`.
572                false
573            }
574        }
575    }
576
577    fn update_file_state(
578        &mut self,
579        repo_path: RepoPath,
580        dir_entry: &DirEntry,
581        git_ignore: &GitIgnoreFile,
582        tree_builder: &mut TreeBuilder,
583    ) -> Result<(), SnapshotError> {
584        let maybe_current_file_state = self.file_states.get_mut(&repo_path);
585        if maybe_current_file_state.is_none()
586            && git_ignore.matches_file(&repo_path.to_internal_file_string())
587        {
588            // If it wasn't already tracked and it matches the ignored paths, then
589            // ignore it.
590            return Ok(());
591        }
592
593        let disk_path = dir_entry.path();
594        let metadata = dir_entry.metadata().map_err(|err| SnapshotError::IoError {
595            message: format!("Failed to stat file {}", disk_path.display()),
596            err,
597        })?;
598        let maybe_new_file_state = file_state(&metadata);
599        match (maybe_current_file_state, maybe_new_file_state) {
600            (None, None) => {
601                // Untracked Unix socket or such
602            }
603            (Some(_), None) => {
604                // Tracked file replaced by Unix socket or such
605                self.file_states.remove(&repo_path);
606                tree_builder.remove(repo_path);
607            }
608            (None, Some(new_file_state)) => {
609                // untracked
610                let file_type = new_file_state.file_type.clone();
611                self.file_states.insert(repo_path.clone(), new_file_state);
612                let file_value = self.write_path_to_store(&repo_path, &disk_path, file_type)?;
613                tree_builder.set(repo_path, file_value);
614            }
615            (Some(current_file_state), Some(mut new_file_state)) => {
616                #[cfg(windows)]
617                {
618                    // On Windows, we preserve the state we had recorded
619                    // when we wrote the file.
620                    new_file_state.mark_executable(current_file_state.is_executable());
621                }
622                // If the file's mtime was set at the same time as this state file's own mtime,
623                // then we don't know if the file was modified before or after this state file.
624                // We set the file's mtime to 0 to simplify later code.
625                if current_file_state.mtime >= self.own_mtime {
626                    current_file_state.mtime = MillisSinceEpoch(0);
627                }
628                let mut clean = current_file_state == &new_file_state;
629                // Because the file system doesn't have a built-in way of indicating a conflict,
630                // we look at the current state instead. If that indicates that the path has a
631                // conflict and the contents are now a file, then we take interpret that as if
632                // it is still a conflict.
633                if !clean
634                    && matches!(current_file_state.file_type, FileType::Conflict { .. })
635                    && matches!(new_file_state.file_type, FileType::Normal { .. })
636                {
637                    // If the only change is that the type changed from conflict to regular file,
638                    // then we consider it clean (the same as a regular file being clean, it's
639                    // just that the file system doesn't have a conflict type).
640                    if new_file_state.mtime == current_file_state.mtime
641                        && new_file_state.size == current_file_state.size
642                    {
643                        clean = true;
644                    } else {
645                        // If the file contained a conflict before and is now a normal file on disk
646                        // (new_file_state cannot be a Conflict at this point), we try to parse
647                        // any conflict markers in the file into a conflict.
648                        if let (FileType::Conflict { id }, FileType::Normal { executable: _ }) =
649                            (&current_file_state.file_type, &new_file_state.file_type)
650                        {
651                            let mut file = File::open(&disk_path).unwrap();
652                            let mut content = vec![];
653                            file.read_to_end(&mut content).unwrap();
654                            if let Some(new_conflict_id) = update_conflict_from_content(
655                                self.store.as_ref(),
656                                &repo_path,
657                                id,
658                                &content,
659                            )
660                            .unwrap()
661                            {
662                                new_file_state.file_type = FileType::Conflict {
663                                    id: new_conflict_id.clone(),
664                                };
665                                *current_file_state = new_file_state;
666                                tree_builder.set(repo_path, TreeValue::Conflict(new_conflict_id));
667                                return Ok(());
668                            }
669                        }
670                    }
671                }
672                if !clean {
673                    let file_type = new_file_state.file_type.clone();
674                    *current_file_state = new_file_state;
675                    let file_value = self.write_path_to_store(&repo_path, &disk_path, file_type)?;
676                    tree_builder.set(repo_path, file_value);
677                }
678            }
679        };
680        Ok(())
681    }
682
683    fn write_path_to_store(
684        &self,
685        repo_path: &RepoPath,
686        disk_path: &Path,
687        file_type: FileType,
688    ) -> Result<TreeValue, SnapshotError> {
689        match file_type {
690            FileType::Normal { executable } => {
691                let id = self.write_file_to_store(repo_path, disk_path)?;
692                Ok(TreeValue::File { id, executable })
693            }
694            FileType::Symlink => {
695                let id = self.write_symlink_to_store(repo_path, disk_path)?;
696                Ok(TreeValue::Symlink(id))
697            }
698            FileType::Conflict { .. } => panic!("conflicts should be handled by the caller"),
699            FileType::GitSubmodule => panic!("git submodule cannot be written to store"),
700        }
701    }
702
703    fn write_file(
704        &self,
705        disk_path: &Path,
706        path: &RepoPath,
707        id: &FileId,
708        executable: bool,
709    ) -> Result<FileState, CheckoutError> {
710        create_parent_dirs(&self.working_copy_path, path)?;
711        let mut file = OpenOptions::new()
712            .write(true)
713            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
714            .open(disk_path)
715            .map_err(|err| CheckoutError::IoError {
716                message: format!("Failed to open file {} for writing", disk_path.display()),
717                err,
718            })?;
719        let mut contents = self.store.read_file(path, id)?;
720        let size =
721            std::io::copy(&mut contents, &mut file).map_err(|err| CheckoutError::IoError {
722                message: format!("Failed to write file {}", disk_path.display()),
723                err,
724            })?;
725        self.set_executable(disk_path, executable)?;
726        // Read the file state from the file descriptor. That way, know that the file
727        // exists and is of the expected type, and the stat information is most likely
728        // accurate, except for other processes modifying the file concurrently (The
729        // mtime is set at write time and won't change when we close the file.)
730        let metadata = file
731            .metadata()
732            .map_err(|err| CheckoutError::for_stat_error(err, disk_path))?;
733        Ok(FileState::for_file(executable, size, &metadata))
734    }
735
736    #[cfg_attr(windows, allow(unused_variables))]
737    fn write_symlink(
738        &self,
739        disk_path: &Path,
740        path: &RepoPath,
741        id: &SymlinkId,
742    ) -> Result<FileState, CheckoutError> {
743        create_parent_dirs(&self.working_copy_path, path)?;
744        let target = self.store.read_symlink(path, id)?;
745        #[cfg(windows)]
746        {
747            println!("ignoring symlink at {:?}", path);
748        }
749        #[cfg(unix)]
750        {
751            let target = PathBuf::from(&target);
752            symlink(&target, disk_path).map_err(|err| CheckoutError::IoError {
753                message: format!(
754                    "Failed to create symlink from {} to {}",
755                    disk_path.display(),
756                    target.display()
757                ),
758                err,
759            })?;
760        }
761        let metadata = disk_path
762            .symlink_metadata()
763            .map_err(|err| CheckoutError::for_stat_error(err, disk_path))?;
764        Ok(FileState::for_symlink(&metadata))
765    }
766
767    fn write_conflict(
768        &self,
769        disk_path: &Path,
770        path: &RepoPath,
771        id: &ConflictId,
772    ) -> Result<FileState, CheckoutError> {
773        create_parent_dirs(&self.working_copy_path, path)?;
774        let conflict = self.store.read_conflict(path, id)?;
775        let mut file = OpenOptions::new()
776            .write(true)
777            .create_new(true) // Don't overwrite un-ignored file. Don't follow symlink.
778            .open(disk_path)
779            .map_err(|err| CheckoutError::IoError {
780                message: format!("Failed to open file {} for writing", disk_path.display()),
781                err,
782            })?;
783        let mut conflict_data = vec![];
784        materialize_conflict(self.store.as_ref(), path, &conflict, &mut conflict_data)
785            .expect("Failed to materialize conflict to in-memory buffer");
786        file.write_all(&conflict_data)
787            .map_err(|err| CheckoutError::IoError {
788                message: format!("Failed to write conflict to file {}", disk_path.display()),
789                err,
790            })?;
791        let size = conflict_data.len() as u64;
792        // TODO: Set the executable bit correctly (when possible) and preserve that on
793        // Windows like we do with the executable bit for regular files.
794        let metadata = file
795            .metadata()
796            .map_err(|err| CheckoutError::for_stat_error(err, disk_path))?;
797        Ok(FileState::for_conflict(id.clone(), size, &metadata))
798    }
799
800    #[cfg_attr(windows, allow(unused_variables))]
801    fn set_executable(&self, disk_path: &Path, executable: bool) -> Result<(), CheckoutError> {
802        #[cfg(unix)]
803        {
804            let mode = if executable { 0o755 } else { 0o644 };
805            fs::set_permissions(disk_path, fs::Permissions::from_mode(mode))
806                .map_err(|err| CheckoutError::for_stat_error(err, disk_path))?;
807        }
808        Ok(())
809    }
810
811    pub fn check_out(&mut self, new_tree: &Tree) -> Result<CheckoutStats, CheckoutError> {
812        let old_tree = self
813            .store
814            .get_tree(&RepoPath::root(), &self.tree_id)
815            .map_err(|err| match err {
816                err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound {
817                    source: Box::new(err),
818                },
819                other => CheckoutError::InternalBackendError(other),
820            })?;
821        let stats = self.update(&old_tree, new_tree, self.sparse_matcher().as_ref(), Err)?;
822        self.tree_id = new_tree.id().clone();
823        Ok(stats)
824    }
825
826    pub fn set_sparse_patterns(
827        &mut self,
828        sparse_patterns: Vec<RepoPath>,
829    ) -> Result<CheckoutStats, CheckoutError> {
830        let tree = self
831            .store
832            .get_tree(&RepoPath::root(), &self.tree_id)
833            .map_err(|err| match err {
834                err @ BackendError::ObjectNotFound { .. } => CheckoutError::SourceNotFound {
835                    source: Box::new(err),
836                },
837                other => CheckoutError::InternalBackendError(other),
838            })?;
839        let old_matcher = PrefixMatcher::new(&self.sparse_patterns);
840        let new_matcher = PrefixMatcher::new(&sparse_patterns);
841        let added_matcher = DifferenceMatcher::new(&new_matcher, &old_matcher);
842        let removed_matcher = DifferenceMatcher::new(&old_matcher, &new_matcher);
843        let empty_tree = Tree::null(self.store.clone(), RepoPath::root());
844        let added_stats = self.update(
845            &empty_tree,
846            &tree,
847            &added_matcher,
848            suppress_file_exists_error, // Keep un-ignored file and mark it as modified
849        )?;
850        let removed_stats = self.update(&tree, &empty_tree, &removed_matcher, Err)?;
851        self.sparse_patterns = sparse_patterns;
852        assert_eq!(added_stats.updated_files, 0);
853        assert_eq!(added_stats.removed_files, 0);
854        assert_eq!(removed_stats.updated_files, 0);
855        assert_eq!(removed_stats.added_files, 0);
856        Ok(CheckoutStats {
857            updated_files: 0,
858            added_files: added_stats.added_files,
859            removed_files: removed_stats.removed_files,
860        })
861    }
862
863    fn update(
864        &mut self,
865        old_tree: &Tree,
866        new_tree: &Tree,
867        matcher: &dyn Matcher,
868        mut handle_error: impl FnMut(CheckoutError) -> Result<(), CheckoutError>,
869    ) -> Result<CheckoutStats, CheckoutError> {
870        let mut stats = CheckoutStats {
871            updated_files: 0,
872            added_files: 0,
873            removed_files: 0,
874        };
875        let mut apply_diff = |path: RepoPath, diff: Diff<TreeValue>| -> Result<(), CheckoutError> {
876            let disk_path = path.to_fs_path(&self.working_copy_path);
877
878            // TODO: Check that the file has not changed before overwriting/removing it.
879            match diff {
880                Diff::Removed(_before) => {
881                    fs::remove_file(&disk_path).ok();
882                    let mut parent_dir = disk_path.parent().unwrap();
883                    loop {
884                        if fs::remove_dir(parent_dir).is_err() {
885                            break;
886                        }
887                        parent_dir = parent_dir.parent().unwrap();
888                    }
889                    self.file_states.remove(&path);
890                    stats.removed_files += 1;
891                }
892                Diff::Added(after) => {
893                    let file_state = match after {
894                        TreeValue::File { id, executable } => {
895                            self.write_file(&disk_path, &path, &id, executable)?
896                        }
897                        TreeValue::Symlink(id) => self.write_symlink(&disk_path, &path, &id)?,
898                        TreeValue::Conflict(id) => self.write_conflict(&disk_path, &path, &id)?,
899                        TreeValue::GitSubmodule(_id) => {
900                            println!("ignoring git submodule at {path:?}");
901                            FileState::for_gitsubmodule()
902                        }
903                        TreeValue::Tree(_id) => {
904                            panic!("unexpected tree entry in diff at {path:?}");
905                        }
906                    };
907                    self.file_states.insert(path, file_state);
908                    stats.added_files += 1;
909                }
910                Diff::Modified(
911                    TreeValue::File {
912                        id: old_id,
913                        executable: old_executable,
914                    },
915                    TreeValue::File { id, executable },
916                ) if id == old_id => {
917                    // Optimization for when only the executable bit changed
918                    assert_ne!(executable, old_executable);
919                    self.set_executable(&disk_path, executable)?;
920                    let file_state = self.file_states.get_mut(&path).unwrap();
921                    file_state.mark_executable(executable);
922                    stats.updated_files += 1;
923                }
924                Diff::Modified(before, after) => {
925                    fs::remove_file(&disk_path).ok();
926                    let file_state = match (before, after) {
927                        (_, TreeValue::File { id, executable }) => {
928                            self.write_file(&disk_path, &path, &id, executable)?
929                        }
930                        (_, TreeValue::Symlink(id)) => {
931                            self.write_symlink(&disk_path, &path, &id)?
932                        }
933                        (_, TreeValue::Conflict(id)) => {
934                            self.write_conflict(&disk_path, &path, &id)?
935                        }
936                        (_, TreeValue::GitSubmodule(_id)) => {
937                            println!("ignoring git submodule at {path:?}");
938                            FileState::for_gitsubmodule()
939                        }
940                        (_, TreeValue::Tree(_id)) => {
941                            panic!("unexpected tree entry in diff at {path:?}");
942                        }
943                    };
944
945                    self.file_states.insert(path, file_state);
946                    stats.updated_files += 1;
947                }
948            }
949            Ok(())
950        };
951
952        for (path, diff) in old_tree.diff(new_tree, matcher) {
953            apply_diff(path, diff).or_else(&mut handle_error)?;
954        }
955        Ok(stats)
956    }
957
958    pub fn reset(&mut self, new_tree: &Tree) -> Result<(), ResetError> {
959        let old_tree = self
960            .store
961            .get_tree(&RepoPath::root(), &self.tree_id)
962            .map_err(|err| match err {
963                err @ BackendError::ObjectNotFound { .. } => ResetError::SourceNotFound {
964                    source: Box::new(err),
965                },
966                other => ResetError::InternalBackendError(other),
967            })?;
968
969        for (path, diff) in old_tree.diff(new_tree, self.sparse_matcher().as_ref()) {
970            match diff {
971                Diff::Removed(_before) => {
972                    self.file_states.remove(&path);
973                }
974                Diff::Added(after) | Diff::Modified(_, after) => {
975                    let file_type = match after {
976                        TreeValue::File { id: _, executable } => FileType::Normal { executable },
977                        TreeValue::Symlink(_id) => FileType::Symlink,
978                        TreeValue::Conflict(id) => FileType::Conflict { id },
979                        TreeValue::GitSubmodule(_id) => {
980                            println!("ignoring git submodule at {path:?}");
981                            FileType::GitSubmodule
982                        }
983                        TreeValue::Tree(_id) => {
984                            panic!("unexpected tree entry in diff at {path:?}");
985                        }
986                    };
987                    let file_state = FileState {
988                        file_type,
989                        mtime: MillisSinceEpoch(0),
990                        size: 0,
991                    };
992                    self.file_states.insert(path.clone(), file_state);
993                }
994            }
995        }
996        self.tree_id = new_tree.id().clone();
997        Ok(())
998    }
999}
1000
1001/// Working copy state stored in "checkout" file.
1002#[derive(Clone, Debug)]
1003struct CheckoutState {
1004    operation_id: OperationId,
1005    workspace_id: WorkspaceId,
1006}
1007
1008pub struct WorkingCopy {
1009    store: Arc<Store>,
1010    working_copy_path: PathBuf,
1011    state_path: PathBuf,
1012    checkout_state: OnceCell<CheckoutState>,
1013    tree_state: OnceCell<TreeState>,
1014}
1015
1016impl WorkingCopy {
1017    /// Initializes a new working copy at `working_copy_path`. The working
1018    /// copy's state will be stored in the `state_path` directory. The working
1019    /// copy will have the empty tree checked out.
1020    pub fn init(
1021        store: Arc<Store>,
1022        working_copy_path: PathBuf,
1023        state_path: PathBuf,
1024        operation_id: OperationId,
1025        workspace_id: WorkspaceId,
1026    ) -> WorkingCopy {
1027        let proto = crate::protos::working_copy::Checkout {
1028            operation_id: operation_id.to_bytes(),
1029            workspace_id: workspace_id.as_str().to_string(),
1030            ..Default::default()
1031        };
1032        let mut file = OpenOptions::new()
1033            .create_new(true)
1034            .write(true)
1035            .open(state_path.join("checkout"))
1036            .unwrap();
1037        file.write_all(&proto.encode_to_vec()).unwrap();
1038        let tree_state =
1039            TreeState::init(store.clone(), working_copy_path.clone(), state_path.clone());
1040        WorkingCopy {
1041            store,
1042            working_copy_path,
1043            state_path,
1044            checkout_state: OnceCell::new(),
1045            tree_state: OnceCell::with_value(tree_state),
1046        }
1047    }
1048
1049    pub fn load(store: Arc<Store>, working_copy_path: PathBuf, state_path: PathBuf) -> WorkingCopy {
1050        WorkingCopy {
1051            store,
1052            working_copy_path,
1053            state_path,
1054            checkout_state: OnceCell::new(),
1055            tree_state: OnceCell::new(),
1056        }
1057    }
1058
1059    pub fn working_copy_path(&self) -> &Path {
1060        &self.working_copy_path
1061    }
1062
1063    pub fn state_path(&self) -> &Path {
1064        &self.state_path
1065    }
1066
1067    fn write_proto(&self, proto: crate::protos::working_copy::Checkout) {
1068        let mut temp_file = NamedTempFile::new_in(&self.state_path).unwrap();
1069        temp_file
1070            .as_file_mut()
1071            .write_all(&proto.encode_to_vec())
1072            .unwrap();
1073        // TODO: Retry if persisting fails (it will on Windows if the file happened to
1074        // be open for read).
1075        temp_file.persist(self.state_path.join("checkout")).unwrap();
1076    }
1077
1078    fn checkout_state(&self) -> &CheckoutState {
1079        self.checkout_state.get_or_init(|| {
1080            let buf = fs::read(self.state_path.join("checkout")).unwrap();
1081            let proto = crate::protos::working_copy::Checkout::decode(&*buf).unwrap();
1082            CheckoutState {
1083                operation_id: OperationId::new(proto.operation_id),
1084                workspace_id: if proto.workspace_id.is_empty() {
1085                    // For compatibility with old working copies.
1086                    // TODO: Delete in mid 2022 or so
1087                    WorkspaceId::default()
1088                } else {
1089                    WorkspaceId::new(proto.workspace_id)
1090                },
1091            }
1092        })
1093    }
1094
1095    fn checkout_state_mut(&mut self) -> &mut CheckoutState {
1096        self.checkout_state(); // ensure loaded
1097        self.checkout_state.get_mut().unwrap()
1098    }
1099
1100    pub fn operation_id(&self) -> &OperationId {
1101        &self.checkout_state().operation_id
1102    }
1103
1104    pub fn workspace_id(&self) -> &WorkspaceId {
1105        &self.checkout_state().workspace_id
1106    }
1107
1108    fn tree_state(&self) -> &TreeState {
1109        self.tree_state.get_or_init(|| {
1110            TreeState::load(
1111                self.store.clone(),
1112                self.working_copy_path.clone(),
1113                self.state_path.clone(),
1114            )
1115        })
1116    }
1117
1118    fn tree_state_mut(&mut self) -> &mut TreeState {
1119        self.tree_state(); // ensure loaded
1120        self.tree_state.get_mut().unwrap()
1121    }
1122
1123    pub fn current_tree_id(&self) -> &TreeId {
1124        self.tree_state().current_tree_id()
1125    }
1126
1127    pub fn file_states(&self) -> &BTreeMap<RepoPath, FileState> {
1128        self.tree_state().file_states()
1129    }
1130
1131    pub fn sparse_patterns(&self) -> &[RepoPath] {
1132        self.tree_state().sparse_patterns()
1133    }
1134
1135    fn save(&mut self) {
1136        self.write_proto(crate::protos::working_copy::Checkout {
1137            operation_id: self.operation_id().to_bytes(),
1138            workspace_id: self.workspace_id().as_str().to_string(),
1139            ..Default::default()
1140        });
1141    }
1142
1143    pub fn start_mutation(&mut self) -> LockedWorkingCopy {
1144        let lock_path = self.state_path.join("working_copy.lock");
1145        let lock = FileLock::lock(lock_path);
1146
1147        // Re-read from disk after taking the lock
1148        self.checkout_state.take();
1149        // TODO: It's expensive to reload the whole tree. We should first check if it
1150        // has changed.
1151        self.tree_state.take();
1152        let old_operation_id = self.operation_id().clone();
1153        let old_tree_id = self.current_tree_id().clone();
1154
1155        LockedWorkingCopy {
1156            wc: self,
1157            lock,
1158            old_operation_id,
1159            old_tree_id,
1160            tree_state_dirty: false,
1161            closed: false,
1162        }
1163    }
1164
1165    pub fn check_out(
1166        &mut self,
1167        operation_id: OperationId,
1168        old_tree_id: Option<&TreeId>,
1169        new_tree: &Tree,
1170    ) -> Result<CheckoutStats, CheckoutError> {
1171        let mut locked_wc = self.start_mutation();
1172        // Check if the current working-copy commit has changed on disk compared to what
1173        // the caller expected. It's safe to check out another commit
1174        // regardless, but it's probably not what  the caller wanted, so we let
1175        // them know.
1176        if let Some(old_tree_id) = old_tree_id {
1177            if *old_tree_id != locked_wc.old_tree_id {
1178                locked_wc.discard();
1179                return Err(CheckoutError::ConcurrentCheckout);
1180            }
1181        }
1182        let stats = locked_wc.check_out(new_tree)?;
1183        locked_wc.finish(operation_id);
1184        Ok(stats)
1185    }
1186}
1187
1188/// A working copy that's locked on disk. The lock is held until you call
1189/// `finish()` or `discard()`.
1190pub struct LockedWorkingCopy<'a> {
1191    wc: &'a mut WorkingCopy,
1192    #[allow(dead_code)]
1193    lock: FileLock,
1194    old_operation_id: OperationId,
1195    old_tree_id: TreeId,
1196    tree_state_dirty: bool,
1197    closed: bool,
1198}
1199
1200impl LockedWorkingCopy<'_> {
1201    /// The operation at the time the lock was taken
1202    pub fn old_operation_id(&self) -> &OperationId {
1203        &self.old_operation_id
1204    }
1205
1206    /// The tree at the time the lock was taken
1207    pub fn old_tree_id(&self) -> &TreeId {
1208        &self.old_tree_id
1209    }
1210
1211    // The base_ignores are passed in here rather than being set on the TreeState
1212    // because the TreeState may be long-lived if the library is used in a
1213    // long-lived process.
1214    pub fn snapshot(&mut self, base_ignores: Arc<GitIgnoreFile>) -> Result<TreeId, SnapshotError> {
1215        let tree_state = self.wc.tree_state_mut();
1216        self.tree_state_dirty |= tree_state.snapshot(base_ignores)?;
1217        Ok(tree_state.current_tree_id().clone())
1218    }
1219
1220    pub fn check_out(&mut self, new_tree: &Tree) -> Result<CheckoutStats, CheckoutError> {
1221        // TODO: Write a "pending_checkout" file with the new TreeId so we can
1222        // continue an interrupted update if we find such a file.
1223        let stats = self.wc.tree_state_mut().check_out(new_tree)?;
1224        self.tree_state_dirty = true;
1225        Ok(stats)
1226    }
1227
1228    pub fn reset(&mut self, new_tree: &Tree) -> Result<(), ResetError> {
1229        self.wc.tree_state_mut().reset(new_tree)?;
1230        self.tree_state_dirty = true;
1231        Ok(())
1232    }
1233
1234    pub fn sparse_patterns(&self) -> &[RepoPath] {
1235        self.wc.sparse_patterns()
1236    }
1237
1238    pub fn set_sparse_patterns(
1239        &mut self,
1240        new_sparse_patterns: Vec<RepoPath>,
1241    ) -> Result<CheckoutStats, CheckoutError> {
1242        // TODO: Write a "pending_checkout" file with new sparse patterns so we can
1243        // continue an interrupted update if we find such a file.
1244        let stats = self
1245            .wc
1246            .tree_state_mut()
1247            .set_sparse_patterns(new_sparse_patterns)?;
1248        self.tree_state_dirty = true;
1249        Ok(stats)
1250    }
1251
1252    pub fn finish(mut self, operation_id: OperationId) {
1253        assert!(self.tree_state_dirty || &self.old_tree_id == self.wc.current_tree_id());
1254        if self.tree_state_dirty {
1255            self.wc.tree_state_mut().save();
1256        }
1257        if self.old_operation_id != operation_id {
1258            self.wc.checkout_state_mut().operation_id = operation_id;
1259            self.wc.save();
1260        }
1261        // TODO: Clear the "pending_checkout" file here.
1262        self.tree_state_dirty = false;
1263        self.closed = true;
1264    }
1265
1266    pub fn discard(mut self) {
1267        // Undo the changes in memory
1268        self.wc.tree_state.take();
1269        self.tree_state_dirty = false;
1270        self.closed = true;
1271    }
1272}
1273
1274impl Drop for LockedWorkingCopy<'_> {
1275    fn drop(&mut self) {
1276        if !self.closed && !std::thread::panicking() {
1277            eprintln!("BUG: Working copy lock was dropped without being closed.");
1278        }
1279    }
1280}