xvc_file/common/
compare.rs

1//! File comparison utilities.
2use crate::error::Error;
3use crate::Result;
4use anyhow::anyhow;
5use crossbeam_channel::{Receiver, Sender};
6
7use itertools::Itertools;
8use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
9
10use std::collections::HashSet;
11use std::path::PathBuf;
12use std::thread::{self, JoinHandle};
13
14use xvc_core::types::xvcdigest::{content_digest::ContentDigest, DIGEST_LENGTH};
15use xvc_core::FromConfigKey;
16use xvc_core::{SharedXStore, XvcEcsError};
17
18use xvc_core::{
19    diff_store, Diff, DiffStore, DiffStore2, HashAlgorithm, RecheckMethod, XvcDigest, XvcFileType,
20    XvcMetadata, XvcPath, XvcPathMetadataMap, XvcRoot,
21};
22
23use xvc_core::{debug, error, panic, XvcOutputSender};
24use xvc_core::{HStore, XvcEntity, XvcStore};
25
26use super::FileTextOrBinary;
27
28/// Compare the records and the actual info from `pmm` to find the differences
29/// in paths.
30/// This is used to detect changes between actual paths and our records.
31/// New entities are created for those paths missing from the records.
32pub fn diff_xvc_path_metadata(
33    xvc_root: &XvcRoot,
34    stored_xvc_path_store: &XvcStore<XvcPath>,
35    stored_xvc_metadata_store: &XvcStore<XvcMetadata>,
36    pmm: &XvcPathMetadataMap,
37) -> DiffStore2<XvcPath, XvcMetadata> {
38    let actual_xvc_path_store: HStore<XvcPath> = HStore::from_storable(
39        pmm.keys().cloned(),
40        stored_xvc_path_store,
41        xvc_root.entity_generator(),
42    );
43
44    let entities: HashSet<XvcEntity> = actual_xvc_path_store.keys().copied().collect();
45
46    let actual_xvc_metadata_store: HStore<XvcMetadata> = actual_xvc_path_store
47        .iter()
48        .map(|(xe, xp)| (*xe, pmm[xp]))
49        .collect();
50
51    let xvc_path_diff = diff_store(
52        stored_xvc_path_store,
53        &actual_xvc_path_store,
54        Some(&entities),
55    );
56
57    let xvc_metadata_diff = diff_store(
58        stored_xvc_metadata_store,
59        &actual_xvc_metadata_store,
60        Some(&entities),
61    );
62
63    DiffStore2(xvc_path_diff, xvc_metadata_diff)
64}
65
66/// For each command, we have a single requested_recheck_method.
67/// We build an actual store by repeating it for all entities we have.
68///
69/// If there is no requested_recheck_method, we use the stored one and if there is nothing in the
70/// store, we use the default from config.
71pub fn diff_recheck_method(
72    default_recheck_method: RecheckMethod,
73    stored_recheck_method_store: &XvcStore<RecheckMethod>,
74    requested_recheck_method: Option<RecheckMethod>,
75    entities: &HashSet<XvcEntity>,
76) -> DiffStore<RecheckMethod> {
77    let requested_recheck_method_store: HStore<RecheckMethod> =
78        HStore::from_iter(entities.iter().map(|x| {
79            if let Some(recheck_method) = requested_recheck_method {
80                (*x, recheck_method)
81            } else if stored_recheck_method_store.contains_key(x) {
82                (*x, *stored_recheck_method_store.get(x).unwrap())
83            } else {
84                (*x, default_recheck_method)
85            }
86        }));
87
88    diff_store(
89        stored_recheck_method_store,
90        &requested_recheck_method_store,
91        Some(entities),
92    )
93}
94
95/// For each command, we have a single requested_text_or_binary.
96/// We build an actual store by repeating it for all entities we have.
97/// This is used to find when the user wants to change recheck method.
98pub fn diff_text_or_binary(
99    stored_text_or_binary_store: &XvcStore<FileTextOrBinary>,
100    requested_text_or_binary: FileTextOrBinary,
101    entities: &HashSet<XvcEntity>,
102) -> DiffStore<FileTextOrBinary> {
103    let requested_text_or_binary_store: HStore<FileTextOrBinary> = entities
104        .iter()
105        .map(|x| (*x, requested_text_or_binary))
106        .collect();
107
108    diff_store(
109        stored_text_or_binary_store,
110        &requested_text_or_binary_store,
111        Some(entities),
112    )
113}
114
115/// Compare the content of a file with the stored content digest.
116///
117/// The file is defined by the entity `xe`. The comparison is done only when the path (`xvc_path_diff`) or the metadata
118/// (`xvc_metadata_diff`) of the file has changed.
119#[allow(clippy::too_many_arguments)]
120pub fn diff_file_content_digest(
121    output_snd: &XvcOutputSender,
122    xvc_root: &XvcRoot,
123    xe: XvcEntity,
124    xvc_path_diff: &Diff<XvcPath>,
125    xvc_metadata_diff: &Diff<XvcMetadata>,
126    stored_xvc_path_store: &XvcStore<XvcPath>,
127    stored_content_digest_store: &XvcStore<ContentDigest>,
128    algorithm: HashAlgorithm,
129    text_or_binary: FileTextOrBinary,
130) -> Result<(XvcEntity, Diff<ContentDigest>)> {
131    let anything_changed = xvc_path_diff.changed() || xvc_metadata_diff.changed();
132
133    if anything_changed {
134        let stored_content_digest = stored_content_digest_store.get(&xe);
135
136        let path_from_store = || -> Result<PathBuf> {
137            let xvc_path = stored_xvc_path_store
138                .get(&xe)
139                .ok_or(XvcEcsError::CannotFindEntityInStore { entity: xe })?;
140            let path = xvc_path.to_absolute_path(xvc_root).to_path_buf();
141            Ok(path)
142        };
143        let compare_with_stored_digest = |actual| -> Diff<ContentDigest> {
144            match stored_content_digest {
145                Some(record) => {
146                    if actual != *record {
147                        Diff::Different {
148                            record: *record,
149                            actual,
150                        }
151                    } else {
152                        Diff::Identical
153                    }
154                }
155                None => Diff::RecordMissing { actual },
156            }
157        };
158
159        let diff_content_digest = match xvc_path_diff {
160            Diff::Identical | Diff::Skipped => {
161                match xvc_metadata_diff {
162                    // text_or_binary should have changed.
163                    Diff::Skipped | Diff::Identical => {
164                        let path = path_from_store()?;
165                        let actual = ContentDigest::new(&path, algorithm, text_or_binary.0)?;
166                        compare_with_stored_digest(actual)
167                    }
168                    Diff::RecordMissing { .. } => {
169                        panic!(
170                            output_snd,
171                            "We have path but no metadata for entity {xe}. This shouldn't happen."
172                        );
173                    }
174                    Diff::ActualMissing { .. } => Diff::ActualMissing {
175                        record: *stored_content_digest.unwrap(),
176                    },
177                    // Either the metadata has changed, or the file is deleted.
178                    Diff::Different { actual, .. } => match actual.file_type {
179                        xvc_core::XvcFileType::Missing => Diff::ActualMissing {
180                            record: *stored_content_digest.unwrap(),
181                        },
182                        xvc_core::XvcFileType::File => {
183                            let path = path_from_store()?;
184                            let actual = ContentDigest::new(&path, algorithm, text_or_binary.0)?;
185                            compare_with_stored_digest(actual)
186                        }
187                        xvc_core::XvcFileType::Reflink
188                        | xvc_core::XvcFileType::Hardlink
189                        | xvc_core::XvcFileType::Directory
190                        | xvc_core::XvcFileType::Symlink => {
191                            let path = path_from_store()?;
192                            return Err(Error::ContentDigestNotSupported { path });
193                        }
194                    },
195                }
196            }
197            // The path is not recorded before.
198            Diff::RecordMissing { actual } => {
199                let path = actual.to_absolute_path(xvc_root);
200                let actual_digest = ContentDigest::new(&path, algorithm, text_or_binary.0)?;
201                compare_with_stored_digest(actual_digest)
202            }
203            // The path is changed. This can happen after a move
204            // operation, for example.
205            Diff::Different { actual, .. } => {
206                let path = actual.to_absolute_path(xvc_root);
207                let actual = ContentDigest::new(&path, algorithm, text_or_binary.0)?;
208                compare_with_stored_digest(actual)
209            }
210            // We have a record, but the path on disk is missing.
211            // We can't calculate the digest. We'll use the recorded
212            // one.
213            Diff::ActualMissing { .. } => {
214                match stored_content_digest {
215                    Some(record) => Diff::ActualMissing { record: *record },
216                    // if the both actual and the record are
217                    // missing, they are identical in their inexistence.
218                    // how can a man without hands clap?
219                    None => Diff::Identical,
220                }
221            }
222        };
223
224        Ok((xe, diff_content_digest))
225    } else {
226        Ok((xe, Diff::Skipped))
227    }
228}
229
230/// Used to signal diff channels to calculate diffs of the requested entity.
231pub struct DiffRequest {
232    xe: XvcEntity,
233}
234
235type FileContentDigestDiffHandlers = (
236    Sender<Option<DiffRequest>>,
237    Receiver<Option<Diff<ContentDigest>>>,
238    JoinHandle<()>,
239);
240
241/// This is a channel version of [diff_file_content_digest]. It creates a thread that listens to requests
242/// diff_request channel and sends the calculated diffs to the diff_result channel.
243///
244/// The thread will exit when the other ends of channel is dropped or when the diff_request_rcv gets a None.
245/// It sends a None to the diff_result_snd when it exits.
246#[allow(clippy::too_many_arguments)]
247pub fn make_file_content_digest_diff_handler(
248    output_snd: &XvcOutputSender,
249    xvc_root: &XvcRoot,
250    stored_xvc_path_store: &SharedXStore<XvcPath>,
251    stored_xvc_metadata_store: &SharedXStore<XvcMetadata>,
252    stored_content_digest_store: &SharedXStore<ContentDigest>,
253    stored_text_or_binary_store: &SharedXStore<FileTextOrBinary>,
254    requested_text_or_binary: Option<FileTextOrBinary>,
255    requested_hash_algorithm: Option<HashAlgorithm>,
256) -> Result<FileContentDigestDiffHandlers> {
257    let algorithm: HashAlgorithm =
258        requested_hash_algorithm.unwrap_or_else(|| HashAlgorithm::from_conf(xvc_root.config()));
259
260    let (diff_request_snd, diff_request_rcv) =
261        crossbeam_channel::bounded::<Option<DiffRequest>>(crate::CHANNEL_CAPACITY);
262    let (diff_result_snd, diff_result_rcv) = crossbeam_channel::bounded(crate::CHANNEL_CAPACITY);
263
264    let output_snd = output_snd.clone();
265    let xvc_root = xvc_root.clone();
266    let stored_xvc_path_store = stored_xvc_path_store.clone();
267    let stored_xvc_metadata_store = stored_xvc_metadata_store.clone();
268    let stored_content_digest_store = stored_content_digest_store.clone();
269    let stored_text_or_binary_store = stored_text_or_binary_store.clone();
270
271    let handle = thread::spawn(move || {
272        while let Ok(Some(diff_request)) = diff_request_rcv.recv() {
273            let stored_xvc_path_store = stored_xvc_path_store.read().unwrap();
274            let stored_xvc_metadata_store = stored_xvc_metadata_store.read().unwrap();
275            let stored_content_digest_store = stored_content_digest_store.read().unwrap();
276            let stored_text_or_binary_store = stored_text_or_binary_store.read().unwrap();
277            let xe = diff_request.xe;
278            let xvc_path = stored_xvc_path_store.get(&xe).unwrap();
279            let xvc_metadata = stored_xvc_metadata_store.get(&xe).unwrap();
280            if xvc_metadata.is_file() {
281                let stored_content_digest = stored_content_digest_store.get(&xe);
282                let text_or_binary = requested_text_or_binary.unwrap_or_else(|| {
283                    stored_text_or_binary_store
284                        .get(&xe)
285                        .cloned()
286                        .unwrap_or_default()
287                });
288                let path = xvc_path.to_absolute_path(&xvc_root);
289
290                if path.is_file() {
291                    let actual_content_digest_res =
292                        ContentDigest::new(&path, algorithm, text_or_binary.as_inner());
293                    match (actual_content_digest_res, stored_content_digest) {
294                        (Ok(actual), Some(stored)) => {
295                            if actual == *stored {
296                                diff_result_snd.send(Some(Diff::Identical)).unwrap();
297                            } else {
298                                diff_result_snd
299                                    .send(Some(Diff::Different {
300                                        actual,
301                                        record: *stored,
302                                    }))
303                                    .unwrap();
304                            }
305                        }
306                        (Err(e), _) => {
307                            debug!(
308                                output_snd,
309                                "Failed to calculate content digest of {:?}: {}", path, e
310                            );
311                        }
312                        (Ok(actual), None) => {
313                            diff_result_snd
314                                .send(Some(Diff::RecordMissing { actual }))
315                                .unwrap();
316                        }
317                    }
318                } else if let Some(stored_content_digest) = stored_content_digest {
319                    diff_result_snd
320                        .send(Some(Diff::ActualMissing {
321                            record: *stored_content_digest,
322                        }))
323                        .unwrap();
324                } else {
325                    diff_result_snd.send(Some(Diff::Identical)).unwrap();
326                }
327            }
328        }
329
330        // Send None to indicate that the thread is exiting.
331        diff_result_snd.send(None).unwrap();
332    });
333
334    Ok((diff_request_snd, diff_result_rcv, handle))
335}
336
337/// Check whether content digests of files and directories in xvc_path_store has
338/// changed.
339///
340/// This is used to identify the files that requires attention in several
341/// commands, like recheck or carry-in.
342#[allow(clippy::too_many_arguments)]
343pub fn diff_content_digest(
344    output_snd: &XvcOutputSender,
345    xvc_root: &XvcRoot,
346    stored_xvc_path_store: &XvcStore<XvcPath>,
347    stored_xvc_metadata_store: &XvcStore<XvcMetadata>,
348    stored_content_digest_store: &XvcStore<ContentDigest>,
349    stored_text_or_binary_store: &XvcStore<FileTextOrBinary>,
350    xvc_path_diff_store: &DiffStore<XvcPath>,
351    xvc_metadata_diff_store: &DiffStore<XvcMetadata>,
352    requested_text_or_binary: Option<FileTextOrBinary>,
353    requested_hash_algorithm: Option<HashAlgorithm>,
354    parallel: bool,
355) -> DiffStore<ContentDigest> {
356    let entities: HashSet<XvcEntity> = xvc_path_diff_store.keys().copied().collect();
357    let algorithm: HashAlgorithm =
358        requested_hash_algorithm.unwrap_or_else(|| HashAlgorithm::from_conf(xvc_root.config()));
359
360    let diff_file = |xe| -> Result<(XvcEntity, Diff<ContentDigest>)> {
361        let xvc_path_diff = xvc_path_diff_store
362            .get(&xe)
363            .unwrap_or(&Diff::<XvcPath>::Skipped);
364        let xvc_metadata_diff = xvc_metadata_diff_store
365            .get(&xe)
366            .unwrap_or(&Diff::<XvcMetadata>::Skipped);
367
368        let text_or_binary = requested_text_or_binary.unwrap_or_else(|| {
369            stored_text_or_binary_store
370                .get(&xe)
371                .copied()
372                .unwrap_or_else(|| FileTextOrBinary::from_conf(xvc_root.config()))
373        });
374
375        diff_file_content_digest(
376            output_snd,
377            xvc_root,
378            xe,
379            xvc_path_diff,
380            xvc_metadata_diff,
381            stored_xvc_path_store,
382            stored_content_digest_store,
383            algorithm,
384            text_or_binary,
385        )
386    };
387
388    let diff_dir = |xe,
389                    dir_entities: &HashSet<XvcEntity>,
390                    file_content_digest_store: &DiffStore<ContentDigest>| {
391        let from_store = |xe| stored_xvc_path_store.get(xe).unwrap();
392        let the_dir = match xvc_path_diff_store.get(xe) {
393            None | Some(Diff::Identical) | Some(Diff::Skipped) => from_store(xe),
394            Some(Diff::RecordMissing { actual }) => actual,
395            Some(Diff::ActualMissing { record }) => record,
396            Some(Diff::Different { actual, .. }) => actual,
397        };
398
399        let child_path_entities = entities
400            .iter()
401            .filter_map(|xe| {
402                // We don't consider directories in directories
403                if dir_entities.contains(xe) {
404                    return None;
405                }
406
407                let xvc_path = match xvc_path_diff_store.get(xe) {
408                    None | Some(Diff::Identical) | Some(Diff::Skipped) => from_store(xe),
409                    Some(Diff::RecordMissing { actual }) => actual,
410                    Some(Diff::ActualMissing { record }) => record,
411                    Some(Diff::Different { actual, .. }) => actual,
412                };
413
414                if xvc_path.starts_with(the_dir) {
415                    Some(*xe)
416                } else {
417                    None
418                }
419            })
420            .sorted()
421            .collect::<Vec<XvcEntity>>();
422
423        diff_dir_content_digest(
424            stored_content_digest_store.get(xe),
425            stored_content_digest_store,
426            file_content_digest_store,
427            &child_path_entities,
428        )
429    };
430
431    let file_type = |xe| {
432        stored_xvc_metadata_store
433            .get(&xe)
434            .map(|xmd| Ok(xmd.file_type))
435            .unwrap_or_else(|| match xvc_metadata_diff_store.get(&xe) {
436                None | Some(Diff::Identical) | Some(Diff::Skipped) => Err(anyhow!(
437                    "Cannot determine file type for path {} (entity {})",
438                    stored_xvc_path_store.get(&xe).unwrap(),
439                    xe
440                )),
441                Some(Diff::RecordMissing { actual }) => Ok(actual.file_type),
442                Some(Diff::ActualMissing { record }) => Ok(record.file_type),
443                Some(Diff::Different { record, actual }) => match actual.file_type {
444                    XvcFileType::Missing => Ok(record.file_type),
445                    _ => Ok(actual.file_type),
446                },
447            })
448    };
449
450    let file_entities = entities
451        .iter()
452        .filter(|xe| {
453            file_type(**xe)
454                .map(|ft| ft == XvcFileType::File)
455                .unwrap_or(false)
456        })
457        .copied()
458        .collect::<HashSet<XvcEntity>>();
459
460    let dir_entities = entities
461        .iter()
462        .filter(|xe| {
463            file_type(**xe)
464                .map(|ft| ft == XvcFileType::Directory)
465                .unwrap_or(false)
466        })
467        .copied()
468        .collect::<HashSet<XvcEntity>>();
469
470    entities
471        .difference(&file_entities)
472        .copied()
473        .collect::<HashSet<_>>()
474        .difference(&dir_entities)
475        .for_each(|xe| {
476            let ep = stored_xvc_path_store
477                .get(xe)
478                .map(|xp| xp.to_string())
479                .unwrap_or_else(|| format!("{:?}", xvc_path_diff_store.get(xe).unwrap()));
480            error!(
481                output_snd,
482                "Skipping {} because it is neither a file nor a directory", ep
483            );
484        });
485
486    let (file_content_digest_diff_store, dir_content_digest_diff_store) = if parallel {
487        let file_content_digest_diff_store = file_entities
488            .par_iter()
489            .filter_map(|xe| match diff_file(*xe) {
490                Ok((_, diff)) => Some((*xe, diff)),
491                Err(e) => {
492                    error!(output_snd, "{}", e);
493                    None
494                }
495            })
496            .collect::<DiffStore<ContentDigest>>();
497
498        let dir_content_digest_diff_store = dir_entities
499            .par_iter()
500            .filter_map(
501                |e| match diff_dir(e, &dir_entities, &file_content_digest_diff_store) {
502                    Ok(d) => Some((*e, d)),
503                    Err(e) => {
504                        error!(output_snd, "{}", e);
505                        None
506                    }
507                },
508            )
509            .collect::<DiffStore<ContentDigest>>();
510
511        (
512            file_content_digest_diff_store,
513            dir_content_digest_diff_store,
514        )
515    } else {
516        let file_content_digest_diff_store = file_entities
517            .iter()
518            .filter_map(|xe| match diff_file(*xe) {
519                Ok((_, diff)) => Some((*xe, diff)),
520                Err(e) => {
521                    error!(output_snd, "{}", e);
522                    None
523                }
524            })
525            .collect::<DiffStore<ContentDigest>>();
526
527        let dir_content_digest_diff_store = dir_entities
528            .iter()
529            .filter_map(
530                |e| match diff_dir(e, &dir_entities, &file_content_digest_diff_store) {
531                    Ok(d) => Some((*e, d)),
532                    Err(e) => {
533                        error!(output_snd, "{}", e);
534                        None
535                    }
536                },
537            )
538            .collect::<DiffStore<ContentDigest>>();
539
540        (
541            file_content_digest_diff_store,
542            dir_content_digest_diff_store,
543        )
544    };
545
546    let mut diff_store = DiffStore::with_capacity(
547        file_content_digest_diff_store.len() + dir_content_digest_diff_store.len(),
548    );
549
550    diff_store.extend(file_content_digest_diff_store);
551    diff_store.extend(dir_content_digest_diff_store);
552    diff_store
553}
554
555/// This is used to detect content changes in elements of path collections,
556/// e.g., directories or globs. When the content of these elements change, their
557/// content digests also change. We collect them together and calculate their
558/// digest to detect changes in the collection.
559pub fn diff_dir_content_digest(
560    stored_content_digest: Option<&ContentDigest>,
561    stored_xvc_content_store: &XvcStore<ContentDigest>,
562    content_diffs: &DiffStore<ContentDigest>,
563    sorted_entities: &[XvcEntity],
564) -> Result<Diff<ContentDigest>> {
565    let xvc_content_diff = content_diffs.subset(sorted_entities.iter().copied())?;
566    let mut content_digest_bytes = Vec::<u8>::with_capacity(sorted_entities.len() * DIGEST_LENGTH);
567
568    for xe in sorted_entities {
569        let xvc_content_diff =
570            xvc_content_diff
571                .get(xe)
572                .ok_or(XvcEcsError::CannotFindKeyInStore {
573                    key: xe.to_string(),
574                })?;
575        match xvc_content_diff {
576            Diff::Identical | Diff::Skipped => {
577                let content =
578                    stored_xvc_content_store
579                        .get(xe)
580                        .ok_or(XvcEcsError::CannotFindKeyInStore {
581                            key: xe.to_string(),
582                        })?;
583                content_digest_bytes.extend(content.digest().digest);
584            }
585            Diff::RecordMissing { actual } => {
586                content_digest_bytes.extend(actual.digest().digest);
587            }
588            Diff::Different { actual, .. } => {
589                content_digest_bytes.extend(actual.digest().digest);
590            }
591            Diff::ActualMissing { .. } => {
592                // This is to make sure the content digest is different when
593                // all records are missing or their order has changed.
594                let entity_bytes: u128 = (*xe).into();
595                let mut entity_bytes_as_digest = Vec::from([0u8; DIGEST_LENGTH]);
596                entity_bytes_as_digest.copy_from_slice(&entity_bytes.to_le_bytes());
597                content_digest_bytes.extend(
598                    &XvcDigest::from_bytes(&entity_bytes_as_digest, HashAlgorithm::AsIs).digest,
599                );
600            }
601        }
602    }
603
604    // We always use Blake3 to keep the content digest consistent.
605    let actual = ContentDigest::from(XvcDigest::from_bytes(
606        &content_digest_bytes,
607        HashAlgorithm::Blake3,
608    ));
609
610    let digest = match stored_content_digest {
611        Some(record) => {
612            if actual != *record {
613                Diff::Different {
614                    record: *record,
615                    actual,
616                }
617            } else {
618                Diff::Identical
619            }
620        }
621        None => Diff::RecordMissing { actual },
622    };
623
624    Ok(digest)
625}