xvc_file/recheck/
mod.rs

1//! Data structures and functions for `xvc file recheck`.
2//!
3//! - [RecheckCLI] describes the command line options.
4//! - [cmd_recheck] is the entry point for the command line.
5use std::collections::HashSet;
6use std::thread::JoinHandle;
7use std::{fs, thread};
8
9use crate::common::compare::{diff_content_digest, diff_recheck_method, diff_xvc_path_metadata};
10use crate::common::gitignore::{make_ignore_handler, IgnoreOp};
11use crate::common::{
12    load_targets_from_store, only_file_targets, xvc_path_metadata_map_from_disk, FileTextOrBinary,
13};
14use crate::{common::recheck_from_cache, Result};
15use clap::Parser;
16use clap_complete::ArgValueCompleter;
17use crossbeam_channel::Sender;
18use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
19use xvc_core::{FromConfigKey, UpdateFromXvcConfig, XvcConfig, XvcConfigResult};
20
21use xvc_core::util::completer::{strum_variants_completer, xvc_path_completer};
22use xvc_core::{
23    apply_diff, ContentDigest, Diff, DiffStore, HashAlgorithm, RecheckMethod, XvcCachePath,
24    XvcMetadata, XvcPath, XvcRoot,
25};
26use xvc_core::{error, info, uwr, warn, XvcOutputSender};
27use xvc_core::{HStore, XvcEntity, XvcStore};
28
29/// Check out file from cache by a copy or link
30///
31/// There are three conditions to recheck a file:
32///
33/// - If the workspace copy is missing.
34/// - If the workspace copy is not changed but the user wants to change recheck method. (e.g. from copy
35///   to symlink.)
36/// - If the `--force` is set.
37///
38/// If the workspace copy of a file is changed, this command doesn't overwrite it by default. Set
39/// `--force` to do so.
40#[derive(Debug, Clone, PartialEq, Eq, Parser)]
41#[command(rename_all = "kebab-case", author, version)]
42pub struct RecheckCLI {
43    /// How to track the file contents in cache: One of copy, symlink, hardlink, reflink.
44    ///
45    /// Note: Reflink support requires "reflink" feature to be enabled and uses copy if the
46    /// underlying file system doesn't support it.
47    #[arg(long, alias = "as", add = ArgValueCompleter::new(strum_variants_completer::<RecheckMethod>) )]
48    pub recheck_method: Option<RecheckMethod>,
49
50    /// Don't use parallelism
51    #[arg(long)]
52    pub no_parallel: bool,
53
54    /// Force even if target exists.
55    #[arg(long)]
56    pub force: bool,
57
58    /// Files/directories to recheck
59    #[arg(add = ArgValueCompleter::new(xvc_path_completer))]
60    pub targets: Option<Vec<String>>,
61}
62
63impl UpdateFromXvcConfig for RecheckCLI {
64    fn update_from_conf(self, conf: &XvcConfig) -> XvcConfigResult<Box<Self>> {
65        let recheck_method = self
66            .recheck_method
67            .unwrap_or_else(|| RecheckMethod::from_conf(conf));
68        let no_parallel = self.no_parallel || conf.get_bool("file.track.no_parallel")?.option;
69
70        let force = self.force;
71
72        Ok(Box::new(Self {
73            targets: self.targets,
74            recheck_method: Some(recheck_method),
75            force,
76            no_parallel,
77        }))
78    }
79}
80
81/// Run `xvc file recheck` command on the repository `xvc_root` with `cli_opts` options.
82///
83/// If [`RecheckCLI.targets`] is empty, uses all paths in the repository as targets.
84///
85/// Uses [PathComparisonParams] to get the overview of all elements in the repository.
86/// After getting the list of file targets, runs either [recheck_serial] or [recheck_parallel].
87pub fn cmd_recheck(
88    output_snd: &XvcOutputSender,
89    xvc_root: &XvcRoot,
90    cli_opts: RecheckCLI,
91) -> Result<()> {
92    let conf = xvc_root.config();
93    // We copy this before
94    let requested_recheck_method = cli_opts.recheck_method;
95
96    let opts = cli_opts.update_from_conf(conf)?;
97    let current_dir = conf.current_dir()?;
98    let targets = load_targets_from_store(output_snd, xvc_root, current_dir, &opts.targets)?;
99
100    let stored_xvc_path_store = xvc_root.load_store::<XvcPath>()?;
101    let stored_xvc_metadata_store = xvc_root.load_store::<XvcMetadata>()?;
102    let target_files = only_file_targets(&stored_xvc_metadata_store, &targets)?;
103    let target_xvc_path_metadata_map = xvc_path_metadata_map_from_disk(xvc_root, &target_files);
104
105    let stored_recheck_method_store = xvc_root.load_store::<RecheckMethod>()?;
106    let stored_content_digest_store = xvc_root.load_store::<ContentDigest>()?;
107    let entities: HashSet<XvcEntity> = target_files.keys().copied().collect();
108    let default_recheck_method = RecheckMethod::from_conf(xvc_root.config());
109    let recheck_method_diff = diff_recheck_method(
110        default_recheck_method,
111        &stored_recheck_method_store,
112        requested_recheck_method,
113        &entities,
114    );
115    let mut recheck_method_targets = recheck_method_diff.filter(|_, d| d.changed());
116
117    let xvc_path_metadata_diff = diff_xvc_path_metadata(
118        xvc_root,
119        &stored_xvc_path_store,
120        &stored_xvc_metadata_store,
121        &target_xvc_path_metadata_map,
122    );
123    let xvc_path_diff: DiffStore<XvcPath> = xvc_path_metadata_diff.0;
124    let xvc_metadata_diff: DiffStore<XvcMetadata> = xvc_path_metadata_diff.1;
125
126    let algorithm = HashAlgorithm::from_conf(conf);
127    let stored_text_or_binary_store = xvc_root.load_store::<FileTextOrBinary>()?;
128
129    let content_digest_diff = diff_content_digest(
130        output_snd,
131        xvc_root,
132        &stored_xvc_path_store,
133        &stored_xvc_metadata_store,
134        &stored_content_digest_store,
135        &stored_text_or_binary_store,
136        &xvc_path_diff,
137        &xvc_metadata_diff,
138        None,
139        Some(algorithm),
140        !opts.no_parallel,
141    );
142
143    recheck_method_targets.retain(|xe, _| {
144        if content_digest_diff.contains_key(xe)
145            && matches!(
146                content_digest_diff[xe],
147                Diff::<ContentDigest>::Different { .. }
148            )
149        {
150            let output_snd = output_snd.clone();
151            let xp = &stored_xvc_path_store[xe];
152            error!(
153                output_snd,
154                "{} has changed on disk. Either carry in, force, or delete the target to recheck. ",
155                xp
156            );
157            false
158        } else {
159            true
160        }
161    });
162
163    let no_digest_targets =
164        content_digest_diff.filter(|_, d| matches!(d, Diff::ActualMissing { .. }));
165
166    // We recheck files
167    // - if they are not in the workspace
168    // - if their recheck method is different from the current recheck method
169    // - if they are in the workspace but force is set
170
171    let files_to_recheck = target_files.filter(|xe, _| {
172        opts.force || recheck_method_targets.contains_key(xe) || no_digest_targets.contains_key(xe)
173    });
174
175    // We only record the diffs if they are in files to recheck
176    let recordable_recheck_method_diff =
177        recheck_method_diff.subset(files_to_recheck.keys().copied())?;
178    let recordable_content_digest_diff =
179        content_digest_diff.subset(files_to_recheck.keys().copied())?;
180
181    let updated_recheck_method_store = apply_diff(
182        &stored_recheck_method_store,
183        &recordable_recheck_method_diff,
184        true,
185        false,
186    )?;
187
188    let updated_content_digest_store = apply_diff(
189        &stored_content_digest_store,
190        &recordable_content_digest_diff,
191        true,
192        false,
193    )?;
194
195    recheck(
196        output_snd,
197        xvc_root,
198        &files_to_recheck,
199        &updated_recheck_method_store,
200        &updated_content_digest_store,
201        opts.no_parallel,
202    )?;
203
204    xvc_root.save_store(&updated_recheck_method_store)?;
205    xvc_root.save_store(&updated_content_digest_store)?;
206
207    Ok(())
208}
209
210/// Recheck messages to be sent to the channel created by [`make_recheck_handler`].
211pub enum RecheckOperation {
212    /// Recheck message to copy/link path described by `content_digest` to `xvc_path`.
213    Recheck {
214        /// The destination of the message.
215        xvc_path: XvcPath,
216        /// The content digest of the file to recheck.
217        content_digest: ContentDigest,
218        /// The recheck method that defines whether to recheck by copy, hardlink, symlink, reflink.
219        recheck_method: RecheckMethod,
220    },
221}
222
223/// The actual messages in channels are `Option<T>` to close the channel by sending `None` when the operation ends.
224pub type RecheckOp = Option<RecheckOperation>;
225
226/// Build a recheck handler in a separate thread and connect it with a channel.
227/// You must build an ignore writer with [`make_ignore_handler`] before building this.
228/// All rechecked files are gitignored using given `ignore_handler`.
229/// Use the returned channel to send [`RecheckOp`] messages to recheck files, then send `None` to the channel to exit
230/// from the loop and join the returned thread.
231pub fn make_recheck_handler(
232    output_snd: &XvcOutputSender,
233    xvc_root: &XvcRoot,
234    ignore_writer: &Sender<IgnoreOp>,
235) -> Result<(Sender<RecheckOp>, JoinHandle<()>)> {
236    let (recheck_op_snd, recheck_op_rvc) = crossbeam_channel::bounded(crate::CHANNEL_CAPACITY);
237    let output_snd = output_snd.clone();
238    let xvc_root = xvc_root.clone();
239    let ignore_handler = ignore_writer.clone();
240
241    let handle = thread::spawn(move || {
242        while let Ok(Some(op)) = recheck_op_rvc.recv() {
243            match op {
244                RecheckOperation::Recheck {
245                    xvc_path,
246                    content_digest,
247                    recheck_method,
248                } => {
249                    let cache_path = XvcCachePath::new(&xvc_path, &content_digest).unwrap();
250                    uwr!(
251                        recheck_from_cache(
252                            &output_snd,
253                            &xvc_root,
254                            &xvc_path,
255                            &cache_path,
256                            recheck_method,
257                            &ignore_handler,
258                        ),
259                        output_snd
260                    );
261                }
262            }
263        }
264    });
265
266    Ok((recheck_op_snd, handle))
267}
268
269fn recheck(
270    output_snd: &XvcOutputSender,
271    xvc_root: &XvcRoot,
272    files_to_recheck: &HStore<&XvcPath>,
273    recheck_method_store: &XvcStore<RecheckMethod>,
274    content_digest_store: &XvcStore<ContentDigest>,
275    parallel: bool,
276) -> Result<()> {
277    let (ignore_writer, ignore_thread) = make_ignore_handler(output_snd, xvc_root)?;
278
279    let inner = |xe, xvc_path: &XvcPath| -> Result<()> {
280        let content_digest = content_digest_store[&xe];
281        let cache_path = XvcCachePath::new(xvc_path, &content_digest)?;
282        if cache_path.to_absolute_path(xvc_root).exists() {
283            let target_path = xvc_path.to_absolute_path(xvc_root);
284            if target_path.exists() {
285                info!(output_snd, "[EXISTS] {target_path}");
286                fs::remove_file(&target_path)?;
287                info!(output_snd, "[REMOVE] {target_path}");
288            }
289            let recheck_method = recheck_method_store[&xe];
290            recheck_from_cache(
291                output_snd,
292                xvc_root,
293                xvc_path,
294                &cache_path,
295                recheck_method,
296                &ignore_writer,
297            )
298        } else {
299            error!(
300                output_snd,
301                "{} cannot found in cache: {}", xvc_path, cache_path
302            );
303            Ok(())
304        }
305    };
306
307    if parallel {
308        files_to_recheck.par_iter().for_each(|(xe, xp)| {
309            inner(*xe, xp).unwrap_or_else(|e| warn!(output_snd, "{}", e));
310        });
311    } else {
312        files_to_recheck.iter().for_each(|(xe, xp)| {
313            inner(*xe, xp).unwrap_or_else(|e| warn!(output_snd, "{}", e));
314        });
315    }
316
317    ignore_writer.send(None).unwrap();
318    ignore_thread.join().unwrap();
319
320    Ok(())
321}