Skip to main content

common/
cmp.rs

1use anyhow::{Context, Result};
2use async_recursion::async_recursion;
3use enum_map::{Enum, EnumMap};
4use tokio::io::AsyncWriteExt;
5use tracing::instrument;
6
7use crate::copy::is_file_type_same;
8use crate::filecmp;
9use crate::progress;
10
11#[derive(Copy, Clone, Debug, Enum)]
12pub enum CompareResult {
13    Same,
14    Different,
15    SrcMissing, // object missing in src but present in dst
16    DstMissing, // same as above but flipped
17}
18
19#[derive(Copy, Clone, Debug, Enum)]
20pub enum ObjType {
21    File,
22    Dir,
23    Symlink,
24    Other, // sockets, block devices, character devices, FIFOs, etc.
25}
26
27pub type ObjSettings = EnumMap<ObjType, filecmp::MetadataCmpSettings>;
28
29#[derive(Debug, Clone)]
30pub struct Settings {
31    pub compare: ObjSettings,
32    pub fail_early: bool,
33    pub exit_early: bool,
34    pub expand_missing: bool,
35    pub filter: Option<crate::filter::FilterSettings>,
36}
37
38pub type Mismatch = EnumMap<ObjType, EnumMap<CompareResult, u64>>;
39
40/// Count of skipped items per object type
41pub type Skipped = EnumMap<ObjType, u64>;
42
43/// Output format for comparison results and summary.
44#[derive(Copy, Clone, Debug, Default, clap::ValueEnum)]
45pub enum OutputFormat {
46    /// JSON output (NDJSON for differences, JSON object for summary)
47    #[default]
48    Json,
49    /// Human-readable text output (legacy format)
50    Text,
51}
52
53fn compare_result_name(cr: CompareResult) -> &'static str {
54    match cr {
55        CompareResult::Same => "same",
56        CompareResult::Different => "different",
57        CompareResult::SrcMissing => "src_missing",
58        CompareResult::DstMissing => "dst_missing",
59    }
60}
61
62fn obj_type_name(ot: ObjType) -> &'static str {
63    match ot {
64        ObjType::File => "file",
65        ObjType::Dir => "dir",
66        ObjType::Symlink => "symlink",
67        ObjType::Other => "other",
68    }
69}
70
71/// Encodes a path as a JSON-safe string that is round-trippable for arbitrary
72/// Unix paths. Literal backslashes are escaped as `\\`, and non-UTF-8 bytes
73/// are escaped as `\xHH`. To decode, first parse the JSON string, then scan
74/// left-to-right: `\\` → literal `\`, `\xHH` → raw byte, all other characters
75/// are literal UTF-8.
76fn path_to_json_string(path: &std::path::Path) -> String {
77    use std::os::unix::ffi::OsStrExt;
78    let bytes = path.as_os_str().as_bytes();
79    let mut out = String::with_capacity(bytes.len());
80    for chunk in bytes.utf8_chunks() {
81        for c in chunk.valid().chars() {
82            if c == '\\' {
83                out.push_str("\\\\");
84            } else {
85                out.push(c);
86            }
87        }
88        for &b in chunk.invalid() {
89            use std::fmt::Write;
90            write!(out, "\\x{b:02x}").unwrap();
91        }
92    }
93    out
94}
95
96#[derive(Default)]
97pub struct Summary {
98    pub mismatch: Mismatch,
99    pub skipped: Skipped,
100    /// Total size of regular files compared on the source side, in bytes.
101    pub src_bytes: u64,
102    /// Total size of regular files compared on the destination side, in bytes.
103    pub dst_bytes: u64,
104}
105
106impl std::ops::Add for Summary {
107    type Output = Self;
108    fn add(self, other: Self) -> Self {
109        let mut mismatch = self.mismatch;
110        for (obj_type, &cmp_res_map) in &other.mismatch {
111            for (cmp_res, &count) in &cmp_res_map {
112                mismatch[obj_type][cmp_res] += count;
113            }
114        }
115        let mut skipped = self.skipped;
116        for (obj_type, &count) in &other.skipped {
117            skipped[obj_type] += count;
118        }
119        Self {
120            mismatch,
121            skipped,
122            src_bytes: self.src_bytes + other.src_bytes,
123            dst_bytes: self.dst_bytes + other.dst_bytes,
124        }
125    }
126}
127
128impl std::fmt::Display for Summary {
129    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
130        writeln!(
131            f,
132            "src size (compared): {}",
133            bytesize::ByteSize(self.src_bytes)
134        )?;
135        writeln!(
136            f,
137            "dst size (compared): {}",
138            bytesize::ByteSize(self.dst_bytes)
139        )?;
140        for (obj_type, &cmp_res_map) in &self.mismatch {
141            for (cmp_res, &count) in &cmp_res_map {
142                writeln!(f, "{obj_type:?} {cmp_res:?}: {count}")?;
143            }
144        }
145        for (obj_type, &count) in &self.skipped {
146            if count > 0 {
147                writeln!(f, "{obj_type:?} Skipped: {count}")?;
148            }
149        }
150        Ok(())
151    }
152}
153
154/// Wraps a [`Summary`] with an [`OutputFormat`] so that [`Display`](std::fmt::Display)
155/// renders either human-readable text or JSON.
156pub struct FormattedSummary {
157    pub summary: Summary,
158    pub format: OutputFormat,
159}
160
161impl std::fmt::Display for FormattedSummary {
162    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
163        match self.format {
164            OutputFormat::Text => write!(f, "{}", self.summary),
165            OutputFormat::Json => {
166                let mut mismatch = serde_json::Map::new();
167                for (obj_type, &cmp_res_map) in &self.summary.mismatch {
168                    let mut counts = serde_json::Map::new();
169                    for (cmp_res, &count) in &cmp_res_map {
170                        counts.insert(
171                            compare_result_name(cmp_res).to_string(),
172                            serde_json::Value::Number(count.into()),
173                        );
174                    }
175                    mismatch.insert(
176                        obj_type_name(obj_type).to_string(),
177                        serde_json::Value::Object(counts),
178                    );
179                }
180                let mut skipped = serde_json::Map::new();
181                for (obj_type, &count) in &self.summary.skipped {
182                    if count > 0 {
183                        skipped.insert(
184                            obj_type_name(obj_type).to_string(),
185                            serde_json::Value::Number(count.into()),
186                        );
187                    }
188                }
189                let stats = crate::collect_runtime_stats();
190                let walltime = crate::get_progress().get_duration();
191                let obj = serde_json::json!({
192                    "src_bytes": self.summary.src_bytes,
193                    "dst_bytes": self.summary.dst_bytes,
194                    "mismatch": serde_json::Value::Object(mismatch),
195                    "skipped": serde_json::Value::Object(skipped),
196                    "walltime_ms": walltime.as_millis() as u64,
197                    "cpu_time_user_ms": stats.cpu_time_user_ms,
198                    "cpu_time_kernel_ms": stats.cpu_time_kernel_ms,
199                    "peak_rss_bytes": stats.peak_rss_bytes,
200                });
201                write!(f, "{obj}")
202            }
203        }
204    }
205}
206
207#[derive(Clone)]
208pub struct LogWriter {
209    file: Option<std::sync::Arc<tokio::sync::Mutex<tokio::io::BufWriter<tokio::fs::File>>>>,
210    stdout: Option<std::sync::Arc<tokio::sync::Mutex<tokio::io::BufWriter<tokio::io::Stdout>>>>,
211    format: OutputFormat,
212}
213
214impl std::fmt::Debug for LogWriter {
215    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216        f.debug_struct("LogWriter")
217            .field("file", &self.file.is_some())
218            .field("stdout", &self.stdout.is_some())
219            .field("format", &self.format)
220            .finish()
221    }
222}
223
224impl LogWriter {
225    /// Creates a new LogWriter.
226    ///
227    /// If `log_path_opt` is provided, output goes to that file.
228    /// Otherwise, if `use_stdout` is true, output goes to stdout.
229    /// If both are false/None, no output is produced.
230    pub async fn new(
231        log_path_opt: Option<&std::path::Path>,
232        use_stdout: bool,
233        format: OutputFormat,
234    ) -> Result<Self> {
235        if let Some(log_path) = log_path_opt {
236            let log_file = tokio::fs::OpenOptions::new()
237                .write(true)
238                .create_new(true)
239                .open(log_path)
240                .await
241                .with_context(|| format!("Failed to open log file: {log_path:?}"))?;
242            let log =
243                std::sync::Arc::new(tokio::sync::Mutex::new(tokio::io::BufWriter::new(log_file)));
244            Ok(Self {
245                file: Some(log),
246                stdout: None,
247                format,
248            })
249        } else if use_stdout {
250            Ok(Self {
251                file: None,
252                stdout: Some(std::sync::Arc::new(tokio::sync::Mutex::new(
253                    tokio::io::BufWriter::new(tokio::io::stdout()),
254                ))),
255                format,
256            })
257        } else {
258            Ok(Self {
259                file: None,
260                stdout: None,
261                format,
262            })
263        }
264    }
265    /// Creates a silent LogWriter that produces no output, using the default format.
266    /// Convenience constructor primarily for tests.
267    pub async fn silent() -> Result<Self> {
268        Self::new(None, false, OutputFormat::default()).await
269    }
270
271    pub async fn log_mismatch(
272        &self,
273        cmp_result: CompareResult,
274        src_obj_type: Option<ObjType>,
275        src: &std::path::Path,
276        dst_obj_type: Option<ObjType>,
277        dst: &std::path::Path,
278    ) -> Result<()> {
279        let msg = match self.format {
280            OutputFormat::Text => {
281                format!(
282                    "[{cmp_result:?}]\n\t[{src_obj_type:?}]\t{src:?}\n\t[{dst_obj_type:?}]\t{dst:?}\n"
283                )
284            }
285            OutputFormat::Json => {
286                let src_type_val = match src_obj_type {
287                    Some(ot) => serde_json::Value::String(obj_type_name(ot).to_string()),
288                    None => serde_json::Value::Null,
289                };
290                let dst_type_val = match dst_obj_type {
291                    Some(ot) => serde_json::Value::String(obj_type_name(ot).to_string()),
292                    None => serde_json::Value::Null,
293                };
294                let obj = serde_json::json!({
295                    "result": compare_result_name(cmp_result),
296                    "src_type": src_type_val,
297                    "src": path_to_json_string(src),
298                    "dst_type": dst_type_val,
299                    "dst": path_to_json_string(dst),
300                });
301                format!("{obj}\n")
302            }
303        };
304        self.write(&msg).await
305    }
306
307    async fn write(&self, msg: &str) -> Result<()> {
308        if let Some(log) = &self.file {
309            let mut log = log.lock().await;
310            log.write_all(msg.as_bytes())
311                .await
312                .context("Failed to write to log file")?;
313        }
314        if let Some(stdout) = &self.stdout {
315            let mut stdout = stdout.lock().await;
316            stdout
317                .write_all(msg.as_bytes())
318                .await
319                .context("Failed to write to stdout")?;
320        }
321        Ok(())
322    }
323
324    pub async fn flush(&self) -> Result<()> {
325        if let Some(log) = &self.file {
326            let mut log = log.lock().await;
327            log.flush().await.context("Failed to flush log file")?;
328        }
329        if let Some(stdout) = &self.stdout {
330            let mut stdout = stdout.lock().await;
331            stdout.flush().await.context("Failed to flush stdout")?;
332        }
333        Ok(())
334    }
335}
336
337fn obj_type(metadata: &std::fs::Metadata) -> ObjType {
338    if metadata.is_file() {
339        ObjType::File
340    } else if metadata.is_dir() {
341        ObjType::Dir
342    } else if metadata.is_symlink() {
343        ObjType::Symlink
344    } else {
345        // sockets, block devices, character devices, FIFOs, etc.
346        ObjType::Other
347    }
348}
349
350/// Public entry point for compare operations.
351/// Internally delegates to cmp_internal with source_root/dest_root tracking for proper filter matching.
352#[instrument(skip(prog_track))]
353pub async fn cmp(
354    prog_track: &'static progress::Progress,
355    src: &std::path::Path,
356    dst: &std::path::Path,
357    log: &LogWriter,
358    settings: &Settings,
359) -> Result<Summary> {
360    cmp_internal(prog_track, src, dst, src, dst, log, settings).await
361}
362
363/// Recursively walks a directory tree on the existing side and records every entry as missing
364/// on the other side.
365#[instrument(skip(prog_track))]
366#[async_recursion]
367async fn expand_missing_tree(
368    prog_track: &'static progress::Progress,
369    existing_path: &std::path::Path,
370    mirror_path: &std::path::Path,
371    existing_root: &std::path::Path,
372    result: CompareResult,
373    log: &LogWriter,
374    settings: &Settings,
375) -> Result<Summary> {
376    let _prog_guard = prog_track.ops.guard();
377    // The side we probe against is fully determined by which tree is
378    // missing: `DstMissing` means we're enumerating src, `SrcMissing`
379    // means we're enumerating dst. `Same` / `Different` are never
380    // passed in (the only two call sites pass the `*Missing` variants),
381    // but be defensive and default to source.
382    let side = match result {
383        CompareResult::DstMissing => congestion::Side::Source,
384        CompareResult::SrcMissing => congestion::Side::Destination,
385        CompareResult::Same | CompareResult::Different => congestion::Side::Source,
386    };
387    let metadata = crate::walk::run_metadata_probed(
388        side,
389        congestion::MetadataOp::Stat,
390        tokio::fs::symlink_metadata(existing_path),
391    )
392    .await
393    .with_context(|| format!("failed reading metadata from {:?}", &existing_path))?;
394    let existing_obj_type = obj_type(&metadata);
395    let mut summary = Summary::default();
396    summary.mismatch[existing_obj_type][result] += 1;
397    // track file sizes on the appropriate side
398    if metadata.is_file() {
399        match result {
400            CompareResult::DstMissing => summary.src_bytes += metadata.len(),
401            CompareResult::SrcMissing => summary.dst_bytes += metadata.len(),
402            _ => {}
403        }
404    }
405    match result {
406        CompareResult::DstMissing => {
407            log.log_mismatch(
408                result,
409                Some(existing_obj_type),
410                existing_path,
411                None,
412                mirror_path,
413            )
414            .await?;
415        }
416        CompareResult::SrcMissing => {
417            log.log_mismatch(
418                result,
419                None,
420                mirror_path,
421                Some(existing_obj_type),
422                existing_path,
423            )
424            .await?;
425        }
426        _ => {}
427    }
428    if settings.exit_early {
429        return Ok(summary);
430    }
431    if !metadata.is_dir() {
432        return Ok(summary);
433    }
434    let mut entries = tokio::fs::read_dir(existing_path)
435        .await
436        .with_context(|| format!("cannot open directory {:?} for reading", &existing_path))?;
437    let mut join_set = tokio::task::JoinSet::new();
438    let errors = crate::error_collector::ErrorCollector::default();
439    loop {
440        let Some((entry, entry_file_type)) =
441            crate::walk::next_entry_probed(&mut entries, side, || {
442                format!("failed traversing directory {:?}", &existing_path)
443            })
444            .await?
445        else {
446            break;
447        };
448        let entry_path = entry.path();
449        let entry_name = entry_path.file_name().unwrap();
450        // apply filter if configured
451        if let Some(ref filter) = settings.filter {
452            let relative_path = entry_path
453                .strip_prefix(existing_root)
454                .unwrap_or(&entry_path);
455            let is_dir = entry_file_type.map(|ft| ft.is_dir()).unwrap_or(false);
456            if !matches!(
457                filter.should_include(relative_path, is_dir),
458                crate::filter::FilterResult::Included
459            ) {
460                // increment skipped counter based on entry type
461                let entry_obj_type = if is_dir {
462                    ObjType::Dir
463                } else if entry_file_type.map(|ft| ft.is_symlink()).unwrap_or(false) {
464                    ObjType::Symlink
465                } else {
466                    ObjType::File
467                };
468                summary.skipped[entry_obj_type] += 1;
469                continue;
470            }
471        }
472        let child_mirror = mirror_path.join(entry_name);
473        let log = log.clone();
474        let settings = settings.clone();
475        let existing_root = existing_root.to_owned();
476        // for positively-known leaf entries (file/symlink/special), acquire
477        // the pending-meta permit BEFORE spawning so we don't create
478        // unbounded tasks. We deliberately skip pre-acquire when
479        // `entry_file_type` is None: the entry could actually be a directory,
480        // and chained unknown-typed directories holding permits while
481        // recursing would deadlock the pending-meta pool. Known directories
482        // also skip pre-acquire. We use the pending-meta semaphore (not
483        // open-files) because cmp doesn't hold fds; decoupling avoids
484        // contention with concurrent copy paths that hold open-files permits.
485        let known_leaf = entry_file_type.is_some_and(|ft| !ft.is_dir());
486        let pending_guard = if known_leaf {
487            Some(throttle::pending_meta_permit().await)
488        } else {
489            None
490        };
491        join_set.spawn(async move {
492            let _pending_guard = pending_guard;
493            expand_missing_tree(
494                prog_track,
495                &entry_path,
496                &child_mirror,
497                &existing_root,
498                result,
499                &log,
500                &settings,
501            )
502            .await
503        });
504    }
505    drop(entries);
506    while let Some(res) = join_set.join_next().await {
507        match res? {
508            Ok(child_summary) => summary = summary + child_summary,
509            Err(error) => {
510                tracing::error!(
511                    "expand_missing_tree: {:?} failed with: {:#}",
512                    existing_path,
513                    &error
514                );
515                errors.push(error);
516                if settings.fail_early {
517                    break;
518                }
519            }
520        }
521    }
522    if let Some(err) = errors.into_error() {
523        return Err(err);
524    }
525    Ok(summary)
526}
527
528#[instrument(skip(prog_track))]
529#[async_recursion]
530async fn cmp_internal(
531    prog_track: &'static progress::Progress,
532    src: &std::path::Path,
533    dst: &std::path::Path,
534    source_root: &std::path::Path,
535    dest_root: &std::path::Path,
536    log: &LogWriter,
537    settings: &Settings,
538) -> Result<Summary> {
539    let _prog_guard = prog_track.ops.guard();
540    tracing::debug!("reading source metadata");
541    // it is impossible for src not exist other than user passing invalid path (which is an error)
542    let src_metadata = crate::walk::run_metadata_probed(
543        congestion::Side::Source,
544        congestion::MetadataOp::Stat,
545        tokio::fs::symlink_metadata(src),
546    )
547    .await
548    .with_context(|| format!("failed reading metadata from {:?}", &src))?;
549    // apply filter to root item (when src == source_root, this is the initial call)
550    if src == source_root
551        && let Some(filter) = &settings.filter
552        && let Some(name) = src.file_name()
553    {
554        let is_dir = src_metadata.is_dir();
555        if !matches!(
556            filter.should_include_root_item(name.as_ref(), is_dir),
557            crate::filter::FilterResult::Included
558        ) {
559            // root item filtered out, return summary with skipped count
560            let src_obj_type = obj_type(&src_metadata);
561            let mut summary = Summary::default();
562            summary.skipped[src_obj_type] += 1;
563            return Ok(summary);
564        }
565    }
566    let mut cmp_summary = Summary::default();
567    let src_obj_type = obj_type(&src_metadata);
568    // track file sizes for the summary
569    if src_metadata.is_file() {
570        cmp_summary.src_bytes += src_metadata.len();
571    }
572    let dst_metadata = {
573        let probed = crate::walk::run_metadata_probed(
574            congestion::Side::Destination,
575            congestion::MetadataOp::Stat,
576            tokio::fs::symlink_metadata(dst),
577        )
578        .await;
579        match probed {
580            Ok(metadata) => metadata,
581            Err(err) => {
582                if err.kind() == std::io::ErrorKind::NotFound {
583                    if settings.expand_missing && src_metadata.is_dir() {
584                        let expanded = expand_missing_tree(
585                            prog_track,
586                            src,
587                            dst,
588                            source_root,
589                            CompareResult::DstMissing,
590                            log,
591                            settings,
592                        )
593                        .await?;
594                        cmp_summary = cmp_summary + expanded;
595                    } else {
596                        cmp_summary.mismatch[src_obj_type][CompareResult::DstMissing] += 1;
597                        log.log_mismatch(
598                            CompareResult::DstMissing,
599                            Some(src_obj_type),
600                            src,
601                            None,
602                            dst,
603                        )
604                        .await?;
605                    }
606                    return Ok(cmp_summary);
607                }
608                return Err(err).context(format!("failed reading metadata from {:?}", &dst));
609            }
610        }
611    };
612    if dst_metadata.is_file() {
613        cmp_summary.dst_bytes += dst_metadata.len();
614    }
615    if !is_file_type_same(&src_metadata, &dst_metadata)
616        || !filecmp::metadata_equal(
617            &settings.compare[src_obj_type],
618            &src_metadata,
619            &dst_metadata,
620        )
621    {
622        // we use the src type for the summary attribution
623        cmp_summary.mismatch[src_obj_type][CompareResult::Different] += 1;
624        let dst_obj_type = obj_type(&dst_metadata);
625        log.log_mismatch(
626            CompareResult::Different,
627            Some(src_obj_type),
628            src,
629            Some(dst_obj_type),
630            dst,
631        )
632        .await?;
633        if settings.exit_early {
634            return Ok(cmp_summary);
635        }
636    } else {
637        cmp_summary.mismatch[src_obj_type][CompareResult::Same] += 1;
638    }
639    if !src_metadata.is_dir() || !dst_metadata.is_dir() {
640        // nothing more to do
641        return Ok(cmp_summary);
642    }
643    tracing::debug!("process contents of 'src' directory");
644    let mut src_entries = tokio::fs::read_dir(src)
645        .await
646        .with_context(|| format!("cannot open directory {src:?} for reading"))?;
647    let mut join_set = tokio::task::JoinSet::new();
648    let errors = crate::error_collector::ErrorCollector::default();
649    // create a set of all the files we already processed
650    let mut processed_files = std::collections::HashSet::new();
651    // iterate through src entries and recursively call "cmp" on each one
652    loop {
653        let Some((src_entry, entry_file_type)) =
654            crate::walk::next_entry_probed(&mut src_entries, congestion::Side::Source, || {
655                format!("failed traversing directory {:?}", &src)
656            })
657            .await?
658        else {
659            break;
660        };
661        let entry_path = src_entry.path();
662        let entry_name = entry_path.file_name().unwrap();
663        // apply filter if configured
664        if let Some(ref filter) = settings.filter {
665            // compute relative path from source_root for filter matching
666            let relative_path = crate::walk::relative_to_root(&entry_path, source_root);
667            let is_dir = entry_file_type.map(|ft| ft.is_dir()).unwrap_or(false);
668            if !matches!(
669                filter.should_include(relative_path, is_dir),
670                crate::filter::FilterResult::Included
671            ) {
672                // increment skipped counter based on entry type
673                let entry_obj_type = if is_dir {
674                    ObjType::Dir
675                } else if entry_file_type.map(|ft| ft.is_symlink()).unwrap_or(false) {
676                    ObjType::Symlink
677                } else {
678                    ObjType::File
679                };
680                cmp_summary.skipped[entry_obj_type] += 1;
681                continue;
682            }
683        }
684        processed_files.insert(entry_name.to_owned());
685        let dst_path = dst.join(entry_name);
686        let log = log.clone();
687        let settings = settings.clone();
688        let source_root = source_root.to_owned();
689        let dest_root = dest_root.to_owned();
690        // for positively-known leaf entries (file/symlink/special), acquire
691        // the pending-meta permit BEFORE spawning so we don't create
692        // unbounded tasks. We deliberately skip pre-acquire when
693        // `entry_file_type` is None: the entry could actually be a directory,
694        // and chained unknown-typed directories holding permits while
695        // recursing would deadlock the pending-meta pool. Known directories
696        // also skip pre-acquire. We use the pending-meta semaphore (not
697        // open-files) because cmp doesn't hold fds; decoupling avoids
698        // contention with concurrent copy paths that hold open-files permits.
699        let known_leaf = entry_file_type.is_some_and(|ft| !ft.is_dir());
700        let pending_guard = if known_leaf {
701            Some(throttle::pending_meta_permit().await)
702        } else {
703            None
704        };
705        let do_cmp = || async move {
706            let _pending_guard = pending_guard;
707            cmp_internal(
708                prog_track,
709                &entry_path,
710                &dst_path,
711                &source_root,
712                &dest_root,
713                &log,
714                &settings,
715            )
716            .await
717        };
718        join_set.spawn(do_cmp());
719    }
720    // unfortunately ReadDir is opening file-descriptors and there's not a good way to limit this,
721    // one thing we CAN do however is to drop it as soon as we're done with it
722    drop(src_entries);
723    tracing::debug!("process contents of 'dst' directory");
724    let mut dst_entries = tokio::fs::read_dir(dst)
725        .await
726        .with_context(|| format!("cannot open directory {:?} for reading", &dst))?;
727    // iterate through update entries and log each one that's not present in src
728    loop {
729        let Some((dst_entry, entry_file_type)) =
730            crate::walk::next_entry_probed(&mut dst_entries, congestion::Side::Destination, || {
731                format!("failed traversing directory {:?}", &dst)
732            })
733            .await?
734        else {
735            break;
736        };
737        let entry_path = dst_entry.path();
738        let entry_name = entry_path.file_name().unwrap();
739        if processed_files.contains(entry_name) {
740            // we already must have considered this file, skip it
741            continue;
742        }
743        // apply filter if configured - if this entry would be filtered, don't report as missing
744        if let Some(ref filter) = settings.filter {
745            // compute relative path from dest_root for filter matching
746            let relative_path = crate::walk::relative_to_root(&entry_path, dest_root);
747            let is_dir = entry_file_type.map(|ft| ft.is_dir()).unwrap_or(false);
748            if !matches!(
749                filter.should_include(relative_path, is_dir),
750                crate::filter::FilterResult::Included
751            ) {
752                // increment skipped counter based on entry type
753                let entry_obj_type = if is_dir {
754                    ObjType::Dir
755                } else if entry_file_type.map(|ft| ft.is_symlink()).unwrap_or(false) {
756                    ObjType::Symlink
757                } else {
758                    ObjType::File
759                };
760                cmp_summary.skipped[entry_obj_type] += 1;
761                continue;
762            }
763        }
764        tracing::debug!("found a new entry in the 'dst' directory");
765        let dst_path = dst.join(entry_name);
766        let dst_entry_metadata = crate::walk::run_metadata_probed(
767            congestion::Side::Destination,
768            congestion::MetadataOp::Stat,
769            tokio::fs::symlink_metadata(&dst_path),
770        )
771        .await
772        .with_context(|| format!("failed reading metadata from {:?}", &dst_path))?;
773        let dst_obj_type = obj_type(&dst_entry_metadata);
774        if settings.expand_missing && dst_entry_metadata.is_dir() {
775            match expand_missing_tree(
776                prog_track,
777                &dst_path,
778                &src.join(entry_name),
779                dest_root,
780                CompareResult::SrcMissing,
781                log,
782                settings,
783            )
784            .await
785            {
786                Ok(expanded) => cmp_summary = cmp_summary + expanded,
787                Err(error) => {
788                    tracing::error!(
789                        "expand_missing_tree: {:?} failed with: {:#}",
790                        &dst_path,
791                        &error
792                    );
793                    errors.push(error);
794                    if settings.fail_early {
795                        // unwrap is safe: we just pushed an error
796                        return Err(errors.into_error().unwrap());
797                    }
798                }
799            }
800        } else {
801            if dst_entry_metadata.is_file() {
802                cmp_summary.dst_bytes += dst_entry_metadata.len();
803            }
804            cmp_summary.mismatch[dst_obj_type][CompareResult::SrcMissing] += 1;
805            log.log_mismatch(
806                CompareResult::SrcMissing,
807                None,
808                &src.join(entry_name),
809                Some(dst_obj_type),
810                &dst_path,
811            )
812            .await?;
813        }
814    }
815    // unfortunately ReadDir is opening file-descriptors and there's not a good way to limit this,
816    // one thing we CAN do however is to drop it as soon as we're done with it
817    drop(dst_entries);
818    while let Some(res) = join_set.join_next().await {
819        match res? {
820            Ok(summary) => cmp_summary = cmp_summary + summary,
821            Err(error) => {
822                tracing::error!("cmp: {:?} vs {:?} failed with: {:#}", src, dst, &error);
823                errors.push(error);
824                if settings.fail_early {
825                    break;
826                }
827            }
828        }
829    }
830    if let Some(err) = errors.into_error() {
831        return Err(err);
832    }
833    Ok(cmp_summary)
834}
835
836#[cfg(test)]
837mod cmp_tests {
838    use crate::copy;
839    use crate::preserve;
840    use crate::testutils;
841    use enum_map::enum_map;
842    use tracing_test::traced_test;
843
844    use super::*;
845
846    static PROGRESS: std::sync::LazyLock<progress::Progress> =
847        std::sync::LazyLock::new(progress::Progress::new);
848    static NO_PRESERVE_SETTINGS: std::sync::LazyLock<preserve::Settings> =
849        std::sync::LazyLock::new(preserve::preserve_none);
850    static DO_PRESERVE_SETTINGS: std::sync::LazyLock<preserve::Settings> =
851        std::sync::LazyLock::new(preserve::preserve_all);
852
853    async fn setup_test_dirs(preserve: bool) -> Result<std::path::PathBuf> {
854        let tmp_dir = testutils::setup_test_dir().await?;
855        let test_path = tmp_dir.as_path();
856        copy::copy(
857            &PROGRESS,
858            &test_path.join("foo"),
859            &test_path.join("bar"),
860            &copy::Settings {
861                dereference: false,
862                fail_early: false,
863                overwrite: false,
864                overwrite_compare: filecmp::MetadataCmpSettings {
865                    size: true,
866                    mtime: true,
867                    ..Default::default()
868                },
869                overwrite_filter: None,
870                ignore_existing: false,
871                chunk_size: 0,
872                skip_specials: false,
873                remote_copy_buffer_size: 0,
874                filter: None,
875                dry_run: None,
876                delete: None,
877            },
878            if preserve {
879                &DO_PRESERVE_SETTINGS
880            } else {
881                &NO_PRESERVE_SETTINGS
882            },
883            false,
884        )
885        .await?;
886        Ok(tmp_dir)
887    }
888
889    async fn truncate_file(path: &str) -> Result<()> {
890        let file = tokio::fs::File::create(path).await?;
891        file.set_len(0).await?;
892        Ok(())
893    }
894
895    #[tokio::test]
896    #[traced_test]
897    async fn check_basic_cmp() -> Result<()> {
898        let tmp_dir = setup_test_dirs(true).await?;
899        // drop 1 file from src
900        tokio::fs::remove_file(&tmp_dir.join("foo").join("bar").join("1.txt")).await?;
901        // sleep to ensure mtime is different, this acts as a poor-mans barrier
902        tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
903        // modify 1 file in dst
904        truncate_file(
905            tmp_dir
906                .join("bar")
907                .join("baz")
908                .join("4.txt")
909                .to_str()
910                .unwrap(),
911        )
912        .await?;
913        // drop 1 (other) file from dst
914        tokio::fs::remove_file(&tmp_dir.join("bar").join("bar").join("2.txt")).await?;
915        // create one more file in dst -- this will also modify the mtime of the directory
916        tokio::fs::File::create(&tmp_dir.join("bar").join("baz").join("7.txt")).await?;
917        let compare_settings = Settings {
918            fail_early: false,
919            exit_early: false,
920            expand_missing: false,
921            compare: enum_map! {
922                ObjType::File => filecmp::MetadataCmpSettings {
923                    size: true,
924                    mtime: true,
925                    ..Default::default()
926                },
927                ObjType::Dir => filecmp::MetadataCmpSettings {
928                    mtime: true,
929                    ..Default::default()
930                },
931                ObjType::Symlink => filecmp::MetadataCmpSettings {
932                    mtime: true,
933                    ..Default::default()
934                },
935                ObjType::Other => filecmp::MetadataCmpSettings {
936                    mtime: true,
937                    ..Default::default()
938                },
939            },
940            filter: None,
941        };
942        let summary = cmp(
943            &PROGRESS,
944            &tmp_dir.join("foo"),
945            &tmp_dir.join("bar"),
946            &LogWriter::new(
947                Some(tmp_dir.join("cmp.log").as_path()),
948                false,
949                OutputFormat::Text,
950            )
951            .await?,
952            &compare_settings,
953        )
954        .await?;
955        let mismatch: Mismatch = enum_map! {
956            ObjType::File => enum_map! {
957                CompareResult::Different => 1,
958                CompareResult::Same => 2,
959                CompareResult::SrcMissing => 2,
960                CompareResult::DstMissing => 1,
961            },
962            ObjType::Dir => enum_map! {
963                CompareResult::Different => 2,
964                CompareResult::Same => 1,
965                CompareResult::SrcMissing => 0,
966                CompareResult::DstMissing => 0,
967            },
968            ObjType::Symlink => enum_map! {
969                CompareResult::Different => 0,
970                CompareResult::Same => 2,
971                CompareResult::SrcMissing => 0,
972                CompareResult::DstMissing => 0,
973            },
974            ObjType::Other => enum_map! {
975                CompareResult::Different => 0,
976                CompareResult::Same => 0,
977                CompareResult::SrcMissing => 0,
978                CompareResult::DstMissing => 0,
979            },
980        };
981        assert_eq!(summary.mismatch, mismatch);
982        // src has 4 regular files of 1 byte each (0.txt, bar/2.txt, bar/3.txt, baz/4.txt)
983        assert_eq!(summary.src_bytes, 4);
984        // dst has: 0.txt(1B), bar/1.txt(1B, SrcMissing), bar/3.txt(1B), baz/4.txt(0B, truncated), baz/7.txt(0B, SrcMissing)
985        assert_eq!(summary.dst_bytes, 3);
986        Ok(())
987    }
988
989    #[tokio::test]
990    #[traced_test]
991    async fn cmp_with_filter_excludes_files() -> Result<()> {
992        let tmp_dir = setup_test_dirs(true).await?;
993        // setup: src=foo, dst=bar (identical at this point)
994        // add a file to dst that would be reported as SrcMissing
995        tokio::fs::write(&tmp_dir.join("bar").join("extra.txt"), "extra").await?;
996        // without filter, should report extra.txt as SrcMissing
997        let compare_settings_no_filter = Settings {
998            fail_early: false,
999            exit_early: false,
1000            expand_missing: false,
1001            compare: enum_map! {
1002                ObjType::File => filecmp::MetadataCmpSettings {
1003                    size: true,
1004                    mtime: true,
1005                    ..Default::default()
1006                },
1007                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1008                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1009                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1010            },
1011            filter: None,
1012        };
1013        let summary = cmp(
1014            &PROGRESS,
1015            &tmp_dir.join("foo"),
1016            &tmp_dir.join("bar"),
1017            &LogWriter::silent().await?,
1018            &compare_settings_no_filter,
1019        )
1020        .await?;
1021        assert_eq!(
1022            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1023            1
1024        );
1025        // with filter excluding extra.txt, should not report it
1026        let mut filter = crate::filter::FilterSettings::new();
1027        filter.add_exclude("extra.txt")?;
1028        let compare_settings_with_filter = Settings {
1029            fail_early: false,
1030            exit_early: false,
1031            expand_missing: false,
1032            compare: enum_map! {
1033                ObjType::File => filecmp::MetadataCmpSettings {
1034                    size: true,
1035                    mtime: true,
1036                    ..Default::default()
1037                },
1038                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1039                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1040                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1041            },
1042            filter: Some(filter),
1043        };
1044        let summary = cmp(
1045            &PROGRESS,
1046            &tmp_dir.join("foo"),
1047            &tmp_dir.join("bar"),
1048            &LogWriter::silent().await?,
1049            &compare_settings_with_filter,
1050        )
1051        .await?;
1052        assert_eq!(
1053            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1054            0
1055        );
1056        Ok(())
1057    }
1058
1059    #[tokio::test]
1060    #[traced_test]
1061    async fn cmp_with_include_only_compares_matching() -> Result<()> {
1062        let tmp_dir = setup_test_dirs(true).await?;
1063        // setup: src=foo, dst=bar (identical at this point)
1064        // modify a file that won't be included
1065        tokio::fs::write(&tmp_dir.join("bar").join("bar").join("1.txt"), "modified").await?;
1066        // with include pattern for only *.rs files, the .txt modification shouldn't appear
1067        let mut filter = crate::filter::FilterSettings::new();
1068        filter.add_include("*.rs")?;
1069        let compare_settings = Settings {
1070            fail_early: false,
1071            exit_early: false,
1072            expand_missing: false,
1073            compare: enum_map! {
1074                ObjType::File => filecmp::MetadataCmpSettings {
1075                    size: true,
1076                    mtime: true,
1077                    ..Default::default()
1078                },
1079                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1080                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1081                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1082            },
1083            filter: Some(filter),
1084        };
1085        let summary = cmp(
1086            &PROGRESS,
1087            &tmp_dir.join("foo"),
1088            &tmp_dir.join("bar"),
1089            &LogWriter::silent().await?,
1090            &compare_settings,
1091        )
1092        .await?;
1093        // no differences should be reported since all .txt files are excluded
1094        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Different], 0);
1095        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Same], 0);
1096        assert_eq!(
1097            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1098            0
1099        );
1100        assert_eq!(
1101            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1102            0
1103        );
1104        Ok(())
1105    }
1106
1107    #[tokio::test]
1108    #[traced_test]
1109    async fn cmp_with_path_pattern_filters_nested() -> Result<()> {
1110        // test that path-based patterns like "bar/*.txt" work correctly when recursing
1111        // this verifies source_root tracking is working properly
1112        let tmp_dir = setup_test_dirs(true).await?;
1113        // test structure:
1114        // foo/bar/1.txt, foo/bar/2.txt, foo/bar/3.txt
1115        // foo/baz/4.txt, foo/baz/5.txt (symlink), foo/baz/6.txt (symlink)
1116        // filter: only include bar/*.txt
1117        let mut filter = crate::filter::FilterSettings::new();
1118        filter.add_include("bar/*.txt")?;
1119        let compare_settings = Settings {
1120            fail_early: false,
1121            exit_early: false,
1122            expand_missing: false,
1123            compare: enum_map! {
1124                ObjType::File => filecmp::MetadataCmpSettings {
1125                    size: true,
1126                    ..Default::default()
1127                },
1128                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1129                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1130                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1131            },
1132            filter: Some(filter),
1133        };
1134        let summary = cmp(
1135            &PROGRESS,
1136            &tmp_dir.join("foo"),
1137            &tmp_dir.join("bar"),
1138            &LogWriter::silent().await?,
1139            &compare_settings,
1140        )
1141        .await?;
1142        // should only compare files in bar/ subdirectory (3 files: 1.txt, 2.txt, 3.txt)
1143        // all should be "Same" since we copied foo to bar earlier
1144        assert_eq!(
1145            summary.mismatch[ObjType::File][CompareResult::Same],
1146            3,
1147            "should have 3 same files from bar/*.txt pattern"
1148        );
1149        // files in baz/ should not be compared (filtered out)
1150        // 0.txt at root should not be compared
1151        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Different], 0);
1152        Ok(())
1153    }
1154
1155    #[tokio::test]
1156    #[traced_test]
1157    async fn cmp_filter_applies_to_root_file() -> Result<()> {
1158        // test that filters apply to the root item itself
1159        let tmp_dir = testutils::create_temp_dir().await?;
1160        // create two different files
1161        tokio::fs::write(tmp_dir.join("test.txt"), "content1").await?;
1162        tokio::fs::write(tmp_dir.join("test2.txt"), "content2").await?;
1163        // filter: only include *.rs files
1164        let mut filter = crate::filter::FilterSettings::new();
1165        filter.add_include("*.rs")?;
1166        let compare_settings = Settings {
1167            fail_early: false,
1168            exit_early: false,
1169            expand_missing: false,
1170            compare: enum_map! {
1171                ObjType::File => filecmp::MetadataCmpSettings {
1172                    size: true,
1173                    ..Default::default()
1174                },
1175                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1176                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1177                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1178            },
1179            filter: Some(filter),
1180        };
1181        // compare test.txt vs test2.txt - should be filtered out (not *.rs)
1182        let summary = cmp(
1183            &PROGRESS,
1184            &tmp_dir.join("test.txt"),
1185            &tmp_dir.join("test2.txt"),
1186            &LogWriter::silent().await?,
1187            &compare_settings,
1188        )
1189        .await?;
1190        // should return empty summary since root file is filtered
1191        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Same], 0);
1192        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Different], 0);
1193        Ok(())
1194    }
1195
1196    #[tokio::test]
1197    #[traced_test]
1198    async fn cmp_filter_excludes_root_directory() -> Result<()> {
1199        // test that filters apply to root directories
1200        let tmp_dir = testutils::setup_test_dir().await?;
1201        // filter: exclude directories named "foo"
1202        let mut filter = crate::filter::FilterSettings::new();
1203        filter.add_exclude("foo")?;
1204        let compare_settings = Settings {
1205            fail_early: false,
1206            exit_early: false,
1207            expand_missing: false,
1208            compare: enum_map! {
1209                ObjType::File => filecmp::MetadataCmpSettings::default(),
1210                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1211                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1212                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1213            },
1214            filter: Some(filter),
1215        };
1216        // compare foo vs bar - foo should be filtered out
1217        let summary = cmp(
1218            &PROGRESS,
1219            &tmp_dir.join("foo"),
1220            &tmp_dir.join("bar"),
1221            &LogWriter::silent().await?,
1222            &compare_settings,
1223        )
1224        .await?;
1225        // should return empty summary since root dir is excluded
1226        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::Same], 0);
1227        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::Different], 0);
1228        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Same], 0);
1229        Ok(())
1230    }
1231
1232    #[tokio::test]
1233    #[traced_test]
1234    async fn cmp_combined_include_exclude_patterns() -> Result<()> {
1235        let tmp_dir = setup_test_dirs(true).await?;
1236        // include all .txt files, but exclude bar/2.txt specifically
1237        let mut filter = crate::filter::FilterSettings::new();
1238        filter.add_include("**/*.txt")?;
1239        filter.add_exclude("bar/2.txt")?;
1240        let compare_settings = Settings {
1241            fail_early: false,
1242            exit_early: false,
1243            expand_missing: false,
1244            compare: enum_map! {
1245                ObjType::File => filecmp::MetadataCmpSettings {
1246                    size: true,
1247                    ..Default::default()
1248                },
1249                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1250                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1251                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1252            },
1253            filter: Some(filter),
1254        };
1255        let summary = cmp(
1256            &PROGRESS,
1257            &tmp_dir.join("foo"),
1258            &tmp_dir.join("bar"),
1259            &LogWriter::silent().await?,
1260            &compare_settings,
1261        )
1262        .await?;
1263        // should compare: 0.txt, bar/1.txt, bar/3.txt, baz/4.txt = 4 files (same)
1264        // should skip: bar/2.txt (excluded by pattern), 5.txt and 6.txt (symlinks, no match for *.txt in src dir) = 1 file + 2 symlinks
1265        // note: the pattern **/*.txt only matches files with .txt extension, but 5.txt and 6.txt in baz are symlinks
1266        assert_eq!(
1267            summary.mismatch[ObjType::File][CompareResult::Same],
1268            4,
1269            "should compare 4 .txt files as same"
1270        );
1271        // bar/2.txt is skipped for both src and dst traversal = 2 skipped
1272        assert_eq!(
1273            summary.skipped[ObjType::File],
1274            2,
1275            "should skip 2 files (bar/2.txt on src and dst)"
1276        );
1277        Ok(())
1278    }
1279
1280    #[tokio::test]
1281    #[traced_test]
1282    async fn cmp_skipped_counts_comprehensive() -> Result<()> {
1283        let tmp_dir = setup_test_dirs(true).await?;
1284        // exclude bar/ directory entirely
1285        let mut filter = crate::filter::FilterSettings::new();
1286        filter.add_exclude("bar/")?;
1287        let compare_settings = Settings {
1288            fail_early: false,
1289            exit_early: false,
1290            expand_missing: false,
1291            compare: enum_map! {
1292                ObjType::File => filecmp::MetadataCmpSettings {
1293                    size: true,
1294                    ..Default::default()
1295                },
1296                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1297                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1298                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1299            },
1300            filter: Some(filter),
1301        };
1302        let summary = cmp(
1303            &PROGRESS,
1304            &tmp_dir.join("foo"),
1305            &tmp_dir.join("bar"),
1306            &LogWriter::silent().await?,
1307            &compare_settings,
1308        )
1309        .await?;
1310        // compared: 0.txt (same), baz/4.txt (same) = 2 files
1311        // compared: baz/5.txt symlink (same), baz/6.txt symlink (same) = 2 symlinks
1312        // skipped: bar directory in src and dst = 2 dirs (cmp traverses both)
1313        assert_eq!(
1314            summary.mismatch[ObjType::File][CompareResult::Same],
1315            2,
1316            "should compare 2 files as same"
1317        );
1318        assert_eq!(
1319            summary.mismatch[ObjType::Symlink][CompareResult::Same],
1320            2,
1321            "should compare 2 symlinks as same"
1322        );
1323        assert_eq!(
1324            summary.skipped[ObjType::Dir],
1325            2,
1326            "should skip 2 directories (bar in src + bar in dst)"
1327        );
1328        Ok(())
1329    }
1330
1331    #[tokio::test]
1332    #[traced_test]
1333    async fn expand_missing_dst_reports_all_entries() -> Result<()> {
1334        let tmp_dir = setup_test_dirs(true).await?;
1335        // remove bar/bar directory entirely from dst
1336        tokio::fs::remove_dir_all(&tmp_dir.join("bar").join("bar")).await?;
1337        let compare_settings = Settings {
1338            fail_early: false,
1339            exit_early: false,
1340            expand_missing: true,
1341            compare: enum_map! {
1342                ObjType::File => filecmp::MetadataCmpSettings::default(),
1343                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1344                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1345                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1346            },
1347            filter: None,
1348        };
1349        let summary = cmp(
1350            &PROGRESS,
1351            &tmp_dir.join("foo"),
1352            &tmp_dir.join("bar"),
1353            &LogWriter::silent().await?,
1354            &compare_settings,
1355        )
1356        .await?;
1357        // bar/bar dir has: bar/ (1 dir) + 1.txt, 2.txt, 3.txt (3 files)
1358        assert_eq!(
1359            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1360            1,
1361            "should report 1 directory as DstMissing"
1362        );
1363        assert_eq!(
1364            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1365            3,
1366            "should report 3 files as DstMissing"
1367        );
1368        Ok(())
1369    }
1370
1371    #[tokio::test]
1372    #[traced_test]
1373    async fn expand_missing_src_reports_all_entries() -> Result<()> {
1374        let tmp_dir = setup_test_dirs(true).await?;
1375        // create a new subdir in dst with files
1376        let newdir = tmp_dir.join("bar").join("newdir");
1377        tokio::fs::create_dir(&newdir).await?;
1378        tokio::fs::write(newdir.join("a.txt"), "a").await?;
1379        tokio::fs::write(newdir.join("b.txt"), "b").await?;
1380        let compare_settings = Settings {
1381            fail_early: false,
1382            exit_early: false,
1383            expand_missing: true,
1384            compare: enum_map! {
1385                ObjType::File => filecmp::MetadataCmpSettings::default(),
1386                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1387                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1388                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1389            },
1390            filter: None,
1391        };
1392        let summary = cmp(
1393            &PROGRESS,
1394            &tmp_dir.join("foo"),
1395            &tmp_dir.join("bar"),
1396            &LogWriter::silent().await?,
1397            &compare_settings,
1398        )
1399        .await?;
1400        assert_eq!(
1401            summary.mismatch[ObjType::Dir][CompareResult::SrcMissing],
1402            1,
1403            "should report 1 directory as SrcMissing"
1404        );
1405        assert_eq!(
1406            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1407            2,
1408            "should report 2 files as SrcMissing"
1409        );
1410        Ok(())
1411    }
1412
1413    #[tokio::test]
1414    #[traced_test]
1415    async fn expand_missing_dst_deeply_nested() -> Result<()> {
1416        // verify expansion recurses through multiple directory levels
1417        let tmp_dir = testutils::create_temp_dir().await?;
1418        let src = tmp_dir.join("src");
1419        let dst = tmp_dir.join("dst");
1420        tokio::fs::create_dir(&src).await?;
1421        tokio::fs::create_dir(&dst).await?;
1422        // create src/a/b/c/d.txt -- 3 dirs deep
1423        let deep = src.join("a").join("b").join("c");
1424        tokio::fs::create_dir_all(&deep).await?;
1425        tokio::fs::write(deep.join("d.txt"), "d").await?;
1426        // also add a sibling file at an intermediate level
1427        tokio::fs::write(src.join("a").join("b").join("mid.txt"), "m").await?;
1428        // dst exists but is empty -- everything in src is DstMissing
1429        let compare_settings = Settings {
1430            fail_early: false,
1431            exit_early: false,
1432            expand_missing: true,
1433            compare: enum_map! {
1434                ObjType::File => filecmp::MetadataCmpSettings::default(),
1435                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1436                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1437                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1438            },
1439            filter: None,
1440        };
1441        let summary = cmp(
1442            &PROGRESS,
1443            &src,
1444            &dst,
1445            &LogWriter::silent().await?,
1446            &compare_settings,
1447        )
1448        .await?;
1449        // dirs: a, a/b, a/b/c = 3 DstMissing dirs
1450        assert_eq!(
1451            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1452            3,
1453            "should report 3 nested directories as DstMissing"
1454        );
1455        // files: a/b/c/d.txt, a/b/mid.txt = 2 DstMissing files
1456        assert_eq!(
1457            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1458            2,
1459            "should report 2 files as DstMissing"
1460        );
1461        // src_bytes: d.txt(1) + mid.txt(1) = 2
1462        assert_eq!(
1463            summary.src_bytes, 2,
1464            "should track bytes for expanded files"
1465        );
1466        Ok(())
1467    }
1468
1469    #[tokio::test]
1470    #[traced_test]
1471    async fn expand_missing_src_deeply_nested() -> Result<()> {
1472        // verify expansion recurses for SrcMissing through multiple levels
1473        let tmp_dir = testutils::create_temp_dir().await?;
1474        let src = tmp_dir.join("src");
1475        let dst = tmp_dir.join("dst");
1476        tokio::fs::create_dir(&src).await?;
1477        tokio::fs::create_dir(&dst).await?;
1478        // create dst/x/y/z.txt -- dirs only in dst
1479        let deep = dst.join("x").join("y");
1480        tokio::fs::create_dir_all(&deep).await?;
1481        tokio::fs::write(deep.join("z.txt"), "zz").await?;
1482        let compare_settings = Settings {
1483            fail_early: false,
1484            exit_early: false,
1485            expand_missing: true,
1486            compare: enum_map! {
1487                ObjType::File => filecmp::MetadataCmpSettings::default(),
1488                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1489                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1490                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1491            },
1492            filter: None,
1493        };
1494        let summary = cmp(
1495            &PROGRESS,
1496            &src,
1497            &dst,
1498            &LogWriter::silent().await?,
1499            &compare_settings,
1500        )
1501        .await?;
1502        // dirs: x, x/y = 2 SrcMissing dirs
1503        assert_eq!(
1504            summary.mismatch[ObjType::Dir][CompareResult::SrcMissing],
1505            2,
1506            "should report 2 nested directories as SrcMissing"
1507        );
1508        // files: x/y/z.txt = 1 SrcMissing file
1509        assert_eq!(
1510            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1511            1,
1512            "should report 1 file as SrcMissing"
1513        );
1514        // dst_bytes: z.txt(2)
1515        assert_eq!(
1516            summary.dst_bytes, 2,
1517            "should track bytes for expanded files"
1518        );
1519        Ok(())
1520    }
1521
1522    #[tokio::test]
1523    #[traced_test]
1524    async fn expand_missing_with_exclude_filter() -> Result<()> {
1525        // verify that filters are applied during expansion. exclude *.log files
1526        // from the missing subtree
1527        let tmp_dir = testutils::create_temp_dir().await?;
1528        let src = tmp_dir.join("src");
1529        let dst = tmp_dir.join("dst");
1530        tokio::fs::create_dir(&src).await?;
1531        tokio::fs::create_dir(&dst).await?;
1532        // src/missing_dir/ has mixed files
1533        let missing = src.join("missing_dir");
1534        tokio::fs::create_dir(&missing).await?;
1535        tokio::fs::write(missing.join("keep.txt"), "k").await?;
1536        tokio::fs::write(missing.join("skip.log"), "s").await?;
1537        tokio::fs::write(missing.join("also_keep.txt"), "a").await?;
1538        let mut filter = crate::filter::FilterSettings::new();
1539        filter.add_exclude("*.log")?;
1540        let compare_settings = Settings {
1541            fail_early: false,
1542            exit_early: false,
1543            expand_missing: true,
1544            compare: enum_map! {
1545                ObjType::File => filecmp::MetadataCmpSettings::default(),
1546                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1547                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1548                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1549            },
1550            filter: Some(filter),
1551        };
1552        let summary = cmp(
1553            &PROGRESS,
1554            &src,
1555            &dst,
1556            &LogWriter::silent().await?,
1557            &compare_settings,
1558        )
1559        .await?;
1560        // missing_dir itself = 1 DstMissing dir
1561        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::DstMissing], 1,);
1562        // only keep.txt and also_keep.txt should be reported. skip.log is filtered
1563        assert_eq!(
1564            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1565            2,
1566            "should report only non-excluded files as DstMissing"
1567        );
1568        // skip.log should be counted as skipped
1569        assert_eq!(
1570            summary.skipped[ObjType::File],
1571            1,
1572            "should count excluded file as skipped"
1573        );
1574        Ok(())
1575    }
1576
1577    #[tokio::test]
1578    #[traced_test]
1579    async fn expand_missing_with_include_filter() -> Result<()> {
1580        // verify that include filters restrict which children are reported during expansion
1581        let tmp_dir = testutils::create_temp_dir().await?;
1582        let src = tmp_dir.join("src");
1583        let dst = tmp_dir.join("dst");
1584        tokio::fs::create_dir(&src).await?;
1585        tokio::fs::create_dir(&dst).await?;
1586        // src/data/ has a mix of file types
1587        let data = src.join("data");
1588        tokio::fs::create_dir(&data).await?;
1589        tokio::fs::write(data.join("a.rs"), "fn main() {}").await?;
1590        tokio::fs::write(data.join("b.txt"), "hello").await?;
1591        tokio::fs::write(data.join("c.rs"), "fn test() {}").await?;
1592        let mut filter = crate::filter::FilterSettings::new();
1593        filter.add_include("**/*.rs")?;
1594        let compare_settings = Settings {
1595            fail_early: false,
1596            exit_early: false,
1597            expand_missing: true,
1598            compare: enum_map! {
1599                ObjType::File => filecmp::MetadataCmpSettings::default(),
1600                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1601                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1602                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1603            },
1604            filter: Some(filter),
1605        };
1606        let summary = cmp(
1607            &PROGRESS,
1608            &src,
1609            &dst,
1610            &LogWriter::silent().await?,
1611            &compare_settings,
1612        )
1613        .await?;
1614        // data dir = 1 DstMissing dir
1615        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::DstMissing], 1,);
1616        // only a.rs and c.rs should be reported. b.txt is filtered out
1617        assert_eq!(
1618            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1619            2,
1620            "should report only included files as DstMissing"
1621        );
1622        Ok(())
1623    }
1624
1625    #[tokio::test]
1626    #[traced_test]
1627    async fn expand_missing_with_nested_path_filter() -> Result<()> {
1628        // verify path-based patterns work correctly during expansion.
1629        // only include files under a specific nested path
1630        let tmp_dir = testutils::create_temp_dir().await?;
1631        let src = tmp_dir.join("src");
1632        let dst = tmp_dir.join("dst");
1633        tokio::fs::create_dir(&src).await?;
1634        tokio::fs::create_dir(&dst).await?;
1635        // src/top/ has two subdirs: keep/ and skip/
1636        let top = src.join("top");
1637        let keep = top.join("keep");
1638        let skip = top.join("skip");
1639        tokio::fs::create_dir_all(&keep).await?;
1640        tokio::fs::create_dir_all(&skip).await?;
1641        tokio::fs::write(keep.join("1.txt"), "1").await?;
1642        tokio::fs::write(keep.join("2.txt"), "2").await?;
1643        tokio::fs::write(skip.join("3.txt"), "3").await?;
1644        let mut filter = crate::filter::FilterSettings::new();
1645        filter.add_include("top/keep/**")?;
1646        let compare_settings = Settings {
1647            fail_early: false,
1648            exit_early: false,
1649            expand_missing: true,
1650            compare: enum_map! {
1651                ObjType::File => filecmp::MetadataCmpSettings::default(),
1652                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1653                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1654                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1655            },
1656            filter: Some(filter),
1657        };
1658        let summary = cmp(
1659            &PROGRESS,
1660            &src,
1661            &dst,
1662            &LogWriter::silent().await?,
1663            &compare_settings,
1664        )
1665        .await?;
1666        // only keep/ subtree: keep dir(1) + top dir(1) = 2 dirs. skip dir is filtered
1667        assert_eq!(
1668            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1669            2,
1670            "should report top and keep dirs as DstMissing"
1671        );
1672        // only 1.txt and 2.txt from keep/
1673        assert_eq!(
1674            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1675            2,
1676            "should report only files under keep/ as DstMissing"
1677        );
1678        Ok(())
1679    }
1680
1681    #[tokio::test]
1682    #[traced_test]
1683    async fn expand_missing_false_preserves_original_behavior() -> Result<()> {
1684        let tmp_dir = setup_test_dirs(true).await?;
1685        // remove bar/bar directory entirely from dst
1686        tokio::fs::remove_dir_all(&tmp_dir.join("bar").join("bar")).await?;
1687        let compare_settings = Settings {
1688            fail_early: false,
1689            exit_early: false,
1690            expand_missing: false,
1691            compare: enum_map! {
1692                ObjType::File => filecmp::MetadataCmpSettings::default(),
1693                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1694                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1695                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1696            },
1697            filter: None,
1698        };
1699        let summary = cmp(
1700            &PROGRESS,
1701            &tmp_dir.join("foo"),
1702            &tmp_dir.join("bar"),
1703            &LogWriter::silent().await?,
1704            &compare_settings,
1705        )
1706        .await?;
1707        // without expand_missing, only the top-level dir is reported
1708        assert_eq!(
1709            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1710            1,
1711            "should report only 1 directory as DstMissing"
1712        );
1713        assert_eq!(
1714            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1715            0,
1716            "should not report individual files as DstMissing"
1717        );
1718        Ok(())
1719    }
1720
1721    #[test]
1722    fn path_to_json_string_utf8() {
1723        let path = std::path::Path::new("/foo/bar/baz.txt");
1724        assert_eq!(path_to_json_string(path), "/foo/bar/baz.txt");
1725    }
1726
1727    #[test]
1728    fn path_to_json_string_non_utf8() {
1729        use std::ffi::OsStr;
1730        use std::os::unix::ffi::OsStrExt;
1731        // embed 0xFF byte in the middle
1732        let os_str = OsStr::from_bytes(b"/tmp/bad\xffname.txt");
1733        let path = std::path::Path::new(os_str);
1734        assert_eq!(path_to_json_string(path), "/tmp/bad\\xffname.txt");
1735    }
1736
1737    #[test]
1738    fn path_to_json_string_multiple_bad_bytes() {
1739        use std::ffi::OsStr;
1740        use std::os::unix::ffi::OsStrExt;
1741        let os_str = OsStr::from_bytes(b"\x80/ok/\xfe\xff/end");
1742        let path = std::path::Path::new(os_str);
1743        assert_eq!(path_to_json_string(path), "\\x80/ok/\\xfe\\xff/end");
1744    }
1745
1746    #[test]
1747    fn path_to_json_string_escapes_backslashes() {
1748        // a path with a literal backslash must be escaped so it doesn't
1749        // collide with \xHH byte escapes
1750        let path = std::path::Path::new("/tmp/bad\\xffname.txt");
1751        assert_eq!(path_to_json_string(path), "/tmp/bad\\\\xffname.txt");
1752    }
1753
1754    #[test]
1755    fn path_to_json_string_no_collision() {
1756        use std::ffi::OsStr;
1757        use std::os::unix::ffi::OsStrExt;
1758        // literal backslash-x-f-f in the filename
1759        let literal = std::path::Path::new("/tmp/bad\\xffname.txt");
1760        // actual 0xFF byte in the filename
1761        let raw = std::path::Path::new(OsStr::from_bytes(b"/tmp/bad\xffname.txt"));
1762        // these must produce different output
1763        assert_ne!(path_to_json_string(literal), path_to_json_string(raw));
1764    }
1765
1766    /// Stress tests exercising max-open-files saturation during cmp.
1767    mod max_open_files_tests {
1768        use super::*;
1769        use anyhow::Context;
1770
1771        /// deep + wide cmp: directory tree deeper than the open-files limit, with files
1772        /// at every level. verifies no deadlock occurs (directories don't consume permits).
1773        #[tokio::test]
1774        #[traced_test]
1775        async fn deep_tree_no_deadlock_under_open_files_saturation() -> Result<()> {
1776            let tmp_dir = testutils::create_temp_dir().await?;
1777            let src = tmp_dir.join("src");
1778            let dst = tmp_dir.join("dst");
1779            let depth = 20;
1780            let files_per_level = 5;
1781            let limit = 4;
1782            // create matching directory chains in src and dst, deeper than the permit limit
1783            let mut src_dir = src.clone();
1784            let mut dst_dir = dst.clone();
1785            for level in 0..depth {
1786                tokio::fs::create_dir_all(&src_dir).await?;
1787                tokio::fs::create_dir_all(&dst_dir).await?;
1788                for f in 0..files_per_level {
1789                    let name = format!("f{}_{}.txt", level, f);
1790                    let content = format!("L{}F{}", level, f);
1791                    tokio::fs::write(src_dir.join(&name), &content).await?;
1792                    tokio::fs::write(dst_dir.join(&name), &content).await?;
1793                }
1794                src_dir = src_dir.join(format!("d{}", level));
1795                dst_dir = dst_dir.join(format!("d{}", level));
1796            }
1797            throttle::set_max_open_files(limit);
1798            let compare_settings = Settings {
1799                fail_early: false,
1800                exit_early: false,
1801                expand_missing: false,
1802                compare: enum_map::enum_map! {
1803                    ObjType::File => filecmp::MetadataCmpSettings { size: true, ..Default::default() },
1804                    ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1805                    ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1806                    ObjType::Other => filecmp::MetadataCmpSettings::default(),
1807                },
1808                filter: None,
1809            };
1810            let summary = tokio::time::timeout(
1811                std::time::Duration::from_secs(30),
1812                cmp(
1813                    &PROGRESS,
1814                    &src,
1815                    &dst,
1816                    &LogWriter::silent().await?,
1817                    &compare_settings,
1818                ),
1819            )
1820            .await
1821            .context("cmp timed out — possible deadlock")?
1822            .context("cmp failed")?;
1823            assert_eq!(
1824                summary.mismatch[ObjType::File][CompareResult::Same],
1825                depth * files_per_level
1826            );
1827            Ok(())
1828        }
1829
1830        /// expand_missing under saturation: dst is empty, src is a deep tree.
1831        /// verifies expand_missing_tree's recursion bounds tasks under the permit cap.
1832        #[tokio::test]
1833        #[traced_test]
1834        async fn expand_missing_under_open_files_saturation() -> Result<()> {
1835            let tmp_dir = testutils::create_temp_dir().await?;
1836            let src = tmp_dir.join("src");
1837            let dst = tmp_dir.join("dst");
1838            let depth = 10;
1839            let files_per_level = 5;
1840            let limit = 4;
1841            // create a deep tree in src; dst stays empty
1842            let mut dir = src.clone();
1843            for level in 0..depth {
1844                tokio::fs::create_dir_all(&dir).await?;
1845                for f in 0..files_per_level {
1846                    tokio::fs::write(dir.join(format!("f{}_{}.txt", level, f)), "x").await?;
1847                }
1848                dir = dir.join(format!("d{}", level));
1849            }
1850            tokio::fs::create_dir(&dst).await?;
1851            throttle::set_max_open_files(limit);
1852            let compare_settings = Settings {
1853                fail_early: false,
1854                exit_early: false,
1855                expand_missing: true,
1856                compare: enum_map::enum_map! {
1857                    ObjType::File => filecmp::MetadataCmpSettings::default(),
1858                    ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1859                    ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1860                    ObjType::Other => filecmp::MetadataCmpSettings::default(),
1861                },
1862                filter: None,
1863            };
1864            let summary = tokio::time::timeout(
1865                std::time::Duration::from_secs(30),
1866                cmp(
1867                    &PROGRESS,
1868                    &src,
1869                    &dst,
1870                    &LogWriter::silent().await?,
1871                    &compare_settings,
1872                ),
1873            )
1874            .await
1875            .context("cmp timed out — possible deadlock")?
1876            .context("cmp failed")?;
1877            // every file under src is missing on dst
1878            assert_eq!(
1879                summary.mismatch[ObjType::File][CompareResult::DstMissing],
1880                depth * files_per_level
1881            );
1882            Ok(())
1883        }
1884    }
1885}