Skip to main content

common/
cmp.rs

1use anyhow::{Context, Result};
2use async_recursion::async_recursion;
3use enum_map::{Enum, EnumMap};
4use tokio::io::AsyncWriteExt;
5use tracing::instrument;
6
7use crate::copy::is_file_type_same;
8use crate::filecmp;
9use crate::progress;
10
11#[derive(Copy, Clone, Debug, Enum)]
12pub enum CompareResult {
13    Same,
14    Different,
15    SrcMissing, // object missing in src but present in dst
16    DstMissing, // same as above but flipped
17}
18
19#[derive(Copy, Clone, Debug, Enum)]
20pub enum ObjType {
21    File,
22    Dir,
23    Symlink,
24    Other, // sockets, block devices, character devices, FIFOs, etc.
25}
26
27pub type ObjSettings = EnumMap<ObjType, filecmp::MetadataCmpSettings>;
28
29#[derive(Debug, Clone)]
30pub struct Settings {
31    pub compare: ObjSettings,
32    pub fail_early: bool,
33    pub exit_early: bool,
34    pub expand_missing: bool,
35    pub filter: Option<crate::filter::FilterSettings>,
36}
37
38pub type Mismatch = EnumMap<ObjType, EnumMap<CompareResult, u64>>;
39
40/// Count of skipped items per object type
41pub type Skipped = EnumMap<ObjType, u64>;
42
43/// Output format for comparison results and summary.
44#[derive(Copy, Clone, Debug, Default, clap::ValueEnum)]
45pub enum OutputFormat {
46    /// JSON output (NDJSON for differences, JSON object for summary)
47    #[default]
48    Json,
49    /// Human-readable text output (legacy format)
50    Text,
51}
52
53fn compare_result_name(cr: CompareResult) -> &'static str {
54    match cr {
55        CompareResult::Same => "same",
56        CompareResult::Different => "different",
57        CompareResult::SrcMissing => "src_missing",
58        CompareResult::DstMissing => "dst_missing",
59    }
60}
61
62fn obj_type_name(ot: ObjType) -> &'static str {
63    match ot {
64        ObjType::File => "file",
65        ObjType::Dir => "dir",
66        ObjType::Symlink => "symlink",
67        ObjType::Other => "other",
68    }
69}
70
71/// Encodes a path as a JSON-safe string that is round-trippable for arbitrary
72/// Unix paths. Literal backslashes are escaped as `\\`, and non-UTF-8 bytes
73/// are escaped as `\xHH`. To decode, first parse the JSON string, then scan
74/// left-to-right: `\\` → literal `\`, `\xHH` → raw byte, all other characters
75/// are literal UTF-8.
76fn path_to_json_string(path: &std::path::Path) -> String {
77    use std::os::unix::ffi::OsStrExt;
78    let bytes = path.as_os_str().as_bytes();
79    let mut out = String::with_capacity(bytes.len());
80    for chunk in bytes.utf8_chunks() {
81        for c in chunk.valid().chars() {
82            if c == '\\' {
83                out.push_str("\\\\");
84            } else {
85                out.push(c);
86            }
87        }
88        for &b in chunk.invalid() {
89            use std::fmt::Write;
90            write!(out, "\\x{b:02x}").unwrap();
91        }
92    }
93    out
94}
95
96#[derive(Default)]
97pub struct Summary {
98    pub mismatch: Mismatch,
99    pub skipped: Skipped,
100    /// Total size of regular files compared on the source side, in bytes.
101    pub src_bytes: u64,
102    /// Total size of regular files compared on the destination side, in bytes.
103    pub dst_bytes: u64,
104}
105
106impl std::ops::Add for Summary {
107    type Output = Self;
108    fn add(self, other: Self) -> Self {
109        let mut mismatch = self.mismatch;
110        for (obj_type, &cmp_res_map) in &other.mismatch {
111            for (cmp_res, &count) in &cmp_res_map {
112                mismatch[obj_type][cmp_res] += count;
113            }
114        }
115        let mut skipped = self.skipped;
116        for (obj_type, &count) in &other.skipped {
117            skipped[obj_type] += count;
118        }
119        Self {
120            mismatch,
121            skipped,
122            src_bytes: self.src_bytes + other.src_bytes,
123            dst_bytes: self.dst_bytes + other.dst_bytes,
124        }
125    }
126}
127
128impl std::fmt::Display for Summary {
129    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
130        writeln!(
131            f,
132            "src size (compared): {}",
133            bytesize::ByteSize(self.src_bytes)
134        )?;
135        writeln!(
136            f,
137            "dst size (compared): {}",
138            bytesize::ByteSize(self.dst_bytes)
139        )?;
140        for (obj_type, &cmp_res_map) in &self.mismatch {
141            for (cmp_res, &count) in &cmp_res_map {
142                writeln!(f, "{obj_type:?} {cmp_res:?}: {count}")?;
143            }
144        }
145        for (obj_type, &count) in &self.skipped {
146            if count > 0 {
147                writeln!(f, "{obj_type:?} Skipped: {count}")?;
148            }
149        }
150        Ok(())
151    }
152}
153
154/// Wraps a [`Summary`] with an [`OutputFormat`] so that [`Display`](std::fmt::Display)
155/// renders either human-readable text or JSON.
156pub struct FormattedSummary {
157    pub summary: Summary,
158    pub format: OutputFormat,
159}
160
161impl std::fmt::Display for FormattedSummary {
162    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
163        match self.format {
164            OutputFormat::Text => write!(f, "{}", self.summary),
165            OutputFormat::Json => {
166                let mut mismatch = serde_json::Map::new();
167                for (obj_type, &cmp_res_map) in &self.summary.mismatch {
168                    let mut counts = serde_json::Map::new();
169                    for (cmp_res, &count) in &cmp_res_map {
170                        counts.insert(
171                            compare_result_name(cmp_res).to_string(),
172                            serde_json::Value::Number(count.into()),
173                        );
174                    }
175                    mismatch.insert(
176                        obj_type_name(obj_type).to_string(),
177                        serde_json::Value::Object(counts),
178                    );
179                }
180                let mut skipped = serde_json::Map::new();
181                for (obj_type, &count) in &self.summary.skipped {
182                    if count > 0 {
183                        skipped.insert(
184                            obj_type_name(obj_type).to_string(),
185                            serde_json::Value::Number(count.into()),
186                        );
187                    }
188                }
189                let stats = crate::collect_runtime_stats();
190                let walltime = crate::get_progress().get_duration();
191                let obj = serde_json::json!({
192                    "src_bytes": self.summary.src_bytes,
193                    "dst_bytes": self.summary.dst_bytes,
194                    "mismatch": serde_json::Value::Object(mismatch),
195                    "skipped": serde_json::Value::Object(skipped),
196                    "walltime_ms": walltime.as_millis() as u64,
197                    "cpu_time_user_ms": stats.cpu_time_user_ms,
198                    "cpu_time_kernel_ms": stats.cpu_time_kernel_ms,
199                    "peak_rss_bytes": stats.peak_rss_bytes,
200                });
201                write!(f, "{obj}")
202            }
203        }
204    }
205}
206
207#[derive(Clone)]
208pub struct LogWriter {
209    file: Option<std::sync::Arc<tokio::sync::Mutex<tokio::io::BufWriter<tokio::fs::File>>>>,
210    stdout: Option<std::sync::Arc<tokio::sync::Mutex<tokio::io::BufWriter<tokio::io::Stdout>>>>,
211    format: OutputFormat,
212}
213
214impl std::fmt::Debug for LogWriter {
215    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216        f.debug_struct("LogWriter")
217            .field("file", &self.file.is_some())
218            .field("stdout", &self.stdout.is_some())
219            .field("format", &self.format)
220            .finish()
221    }
222}
223
224impl LogWriter {
225    /// Creates a new LogWriter.
226    ///
227    /// If `log_path_opt` is provided, output goes to that file.
228    /// Otherwise, if `use_stdout` is true, output goes to stdout.
229    /// If both are false/None, no output is produced.
230    pub async fn new(
231        log_path_opt: Option<&std::path::Path>,
232        use_stdout: bool,
233        format: OutputFormat,
234    ) -> Result<Self> {
235        if let Some(log_path) = log_path_opt {
236            let log_file = tokio::fs::OpenOptions::new()
237                .write(true)
238                .create_new(true)
239                .open(log_path)
240                .await
241                .with_context(|| format!("Failed to open log file: {log_path:?}"))?;
242            let log =
243                std::sync::Arc::new(tokio::sync::Mutex::new(tokio::io::BufWriter::new(log_file)));
244            Ok(Self {
245                file: Some(log),
246                stdout: None,
247                format,
248            })
249        } else if use_stdout {
250            Ok(Self {
251                file: None,
252                stdout: Some(std::sync::Arc::new(tokio::sync::Mutex::new(
253                    tokio::io::BufWriter::new(tokio::io::stdout()),
254                ))),
255                format,
256            })
257        } else {
258            Ok(Self {
259                file: None,
260                stdout: None,
261                format,
262            })
263        }
264    }
265    /// Creates a silent LogWriter that produces no output, using the default format.
266    /// Convenience constructor primarily for tests.
267    pub async fn silent() -> Result<Self> {
268        Self::new(None, false, OutputFormat::default()).await
269    }
270
271    pub async fn log_mismatch(
272        &self,
273        cmp_result: CompareResult,
274        src_obj_type: Option<ObjType>,
275        src: &std::path::Path,
276        dst_obj_type: Option<ObjType>,
277        dst: &std::path::Path,
278    ) -> Result<()> {
279        let msg = match self.format {
280            OutputFormat::Text => {
281                format!(
282                    "[{cmp_result:?}]\n\t[{src_obj_type:?}]\t{src:?}\n\t[{dst_obj_type:?}]\t{dst:?}\n"
283                )
284            }
285            OutputFormat::Json => {
286                let src_type_val = match src_obj_type {
287                    Some(ot) => serde_json::Value::String(obj_type_name(ot).to_string()),
288                    None => serde_json::Value::Null,
289                };
290                let dst_type_val = match dst_obj_type {
291                    Some(ot) => serde_json::Value::String(obj_type_name(ot).to_string()),
292                    None => serde_json::Value::Null,
293                };
294                let obj = serde_json::json!({
295                    "result": compare_result_name(cmp_result),
296                    "src_type": src_type_val,
297                    "src": path_to_json_string(src),
298                    "dst_type": dst_type_val,
299                    "dst": path_to_json_string(dst),
300                });
301                format!("{obj}\n")
302            }
303        };
304        self.write(&msg).await
305    }
306
307    async fn write(&self, msg: &str) -> Result<()> {
308        if let Some(log) = &self.file {
309            let mut log = log.lock().await;
310            log.write_all(msg.as_bytes())
311                .await
312                .context("Failed to write to log file")?;
313        }
314        if let Some(stdout) = &self.stdout {
315            let mut stdout = stdout.lock().await;
316            stdout
317                .write_all(msg.as_bytes())
318                .await
319                .context("Failed to write to stdout")?;
320        }
321        Ok(())
322    }
323
324    pub async fn flush(&self) -> Result<()> {
325        if let Some(log) = &self.file {
326            let mut log = log.lock().await;
327            log.flush().await.context("Failed to flush log file")?;
328        }
329        if let Some(stdout) = &self.stdout {
330            let mut stdout = stdout.lock().await;
331            stdout.flush().await.context("Failed to flush stdout")?;
332        }
333        Ok(())
334    }
335}
336
337fn obj_type(metadata: &std::fs::Metadata) -> ObjType {
338    if metadata.is_file() {
339        ObjType::File
340    } else if metadata.is_dir() {
341        ObjType::Dir
342    } else if metadata.is_symlink() {
343        ObjType::Symlink
344    } else {
345        // sockets, block devices, character devices, FIFOs, etc.
346        ObjType::Other
347    }
348}
349
350/// Public entry point for compare operations.
351/// Internally delegates to cmp_internal with source_root/dest_root tracking for proper filter matching.
352#[instrument(skip(prog_track))]
353pub async fn cmp(
354    prog_track: &'static progress::Progress,
355    src: &std::path::Path,
356    dst: &std::path::Path,
357    log: &LogWriter,
358    settings: &Settings,
359) -> Result<Summary> {
360    cmp_internal(prog_track, src, dst, src, dst, log, settings).await
361}
362
363/// Recursively walks a directory tree on the existing side and records every entry as missing
364/// on the other side.
365#[instrument(skip(prog_track))]
366#[async_recursion]
367async fn expand_missing_tree(
368    prog_track: &'static progress::Progress,
369    existing_path: &std::path::Path,
370    mirror_path: &std::path::Path,
371    existing_root: &std::path::Path,
372    result: CompareResult,
373    log: &LogWriter,
374    settings: &Settings,
375) -> Result<Summary> {
376    let _prog_guard = prog_track.ops.guard();
377    let metadata = tokio::fs::symlink_metadata(existing_path)
378        .await
379        .with_context(|| format!("failed reading metadata from {:?}", &existing_path))?;
380    let existing_obj_type = obj_type(&metadata);
381    let mut summary = Summary::default();
382    summary.mismatch[existing_obj_type][result] += 1;
383    // track file sizes on the appropriate side
384    if metadata.is_file() {
385        match result {
386            CompareResult::DstMissing => summary.src_bytes += metadata.len(),
387            CompareResult::SrcMissing => summary.dst_bytes += metadata.len(),
388            _ => {}
389        }
390    }
391    match result {
392        CompareResult::DstMissing => {
393            log.log_mismatch(
394                result,
395                Some(existing_obj_type),
396                existing_path,
397                None,
398                mirror_path,
399            )
400            .await?;
401        }
402        CompareResult::SrcMissing => {
403            log.log_mismatch(
404                result,
405                None,
406                mirror_path,
407                Some(existing_obj_type),
408                existing_path,
409            )
410            .await?;
411        }
412        _ => {}
413    }
414    if settings.exit_early {
415        return Ok(summary);
416    }
417    if !metadata.is_dir() {
418        return Ok(summary);
419    }
420    let mut entries = tokio::fs::read_dir(existing_path)
421        .await
422        .with_context(|| format!("cannot open directory {:?} for reading", &existing_path))?;
423    let mut join_set = tokio::task::JoinSet::new();
424    let errors = crate::error_collector::ErrorCollector::default();
425    while let Some(entry) = entries
426        .next_entry()
427        .await
428        .with_context(|| format!("failed traversing directory {:?}", &existing_path))?
429    {
430        throttle::get_ops_token().await;
431        let entry_path = entry.path();
432        let entry_name = entry_path.file_name().unwrap();
433        // apply filter if configured
434        if let Some(ref filter) = settings.filter {
435            let relative_path = entry_path
436                .strip_prefix(existing_root)
437                .unwrap_or(&entry_path);
438            let entry_file_type = entry.file_type().await.ok();
439            let is_dir = entry_file_type.map(|ft| ft.is_dir()).unwrap_or(false);
440            if !matches!(
441                filter.should_include(relative_path, is_dir),
442                crate::filter::FilterResult::Included
443            ) {
444                // increment skipped counter based on entry type
445                let entry_obj_type = if is_dir {
446                    ObjType::Dir
447                } else if entry_file_type.map(|ft| ft.is_symlink()).unwrap_or(false) {
448                    ObjType::Symlink
449                } else {
450                    ObjType::File
451                };
452                summary.skipped[entry_obj_type] += 1;
453                continue;
454            }
455        }
456        let child_mirror = mirror_path.join(entry_name);
457        let log = log.clone();
458        let settings = settings.clone();
459        let existing_root = existing_root.to_owned();
460        join_set.spawn(async move {
461            expand_missing_tree(
462                prog_track,
463                &entry_path,
464                &child_mirror,
465                &existing_root,
466                result,
467                &log,
468                &settings,
469            )
470            .await
471        });
472    }
473    drop(entries);
474    while let Some(res) = join_set.join_next().await {
475        match res? {
476            Ok(child_summary) => summary = summary + child_summary,
477            Err(error) => {
478                tracing::error!(
479                    "expand_missing_tree: {:?} failed with: {:#}",
480                    existing_path,
481                    &error
482                );
483                errors.push(error);
484                if settings.fail_early {
485                    break;
486                }
487            }
488        }
489    }
490    if let Some(err) = errors.into_error() {
491        return Err(err);
492    }
493    Ok(summary)
494}
495
496#[instrument(skip(prog_track))]
497#[async_recursion]
498async fn cmp_internal(
499    prog_track: &'static progress::Progress,
500    src: &std::path::Path,
501    dst: &std::path::Path,
502    source_root: &std::path::Path,
503    dest_root: &std::path::Path,
504    log: &LogWriter,
505    settings: &Settings,
506) -> Result<Summary> {
507    let _prog_guard = prog_track.ops.guard();
508    tracing::debug!("reading source metadata");
509    // it is impossible for src not exist other than user passing invalid path (which is an error)
510    let src_metadata = tokio::fs::symlink_metadata(src)
511        .await
512        .with_context(|| format!("failed reading metadata from {:?}", &src))?;
513    // apply filter to root item (when src == source_root, this is the initial call)
514    if src == source_root {
515        if let Some(ref filter) = settings.filter {
516            if let Some(name) = src.file_name() {
517                let is_dir = src_metadata.is_dir();
518                if !matches!(
519                    filter.should_include_root_item(name.as_ref(), is_dir),
520                    crate::filter::FilterResult::Included
521                ) {
522                    // root item filtered out, return summary with skipped count
523                    let src_obj_type = obj_type(&src_metadata);
524                    let mut summary = Summary::default();
525                    summary.skipped[src_obj_type] += 1;
526                    return Ok(summary);
527                }
528            }
529        }
530    }
531    let mut cmp_summary = Summary::default();
532    let src_obj_type = obj_type(&src_metadata);
533    // track file sizes for the summary
534    if src_metadata.is_file() {
535        cmp_summary.src_bytes += src_metadata.len();
536    }
537    let dst_metadata = {
538        match tokio::fs::symlink_metadata(dst).await {
539            Ok(metadata) => metadata,
540            Err(err) => {
541                if err.kind() == std::io::ErrorKind::NotFound {
542                    if settings.expand_missing && src_metadata.is_dir() {
543                        let expanded = expand_missing_tree(
544                            prog_track,
545                            src,
546                            dst,
547                            source_root,
548                            CompareResult::DstMissing,
549                            log,
550                            settings,
551                        )
552                        .await?;
553                        cmp_summary = cmp_summary + expanded;
554                    } else {
555                        cmp_summary.mismatch[src_obj_type][CompareResult::DstMissing] += 1;
556                        log.log_mismatch(
557                            CompareResult::DstMissing,
558                            Some(src_obj_type),
559                            src,
560                            None,
561                            dst,
562                        )
563                        .await?;
564                    }
565                    return Ok(cmp_summary);
566                }
567                return Err(err).context(format!("failed reading metadata from {:?}", &dst));
568            }
569        }
570    };
571    if dst_metadata.is_file() {
572        cmp_summary.dst_bytes += dst_metadata.len();
573    }
574    if !is_file_type_same(&src_metadata, &dst_metadata)
575        || !filecmp::metadata_equal(
576            &settings.compare[src_obj_type],
577            &src_metadata,
578            &dst_metadata,
579        )
580    {
581        // we use the src type for the summary attribution
582        cmp_summary.mismatch[src_obj_type][CompareResult::Different] += 1;
583        let dst_obj_type = obj_type(&dst_metadata);
584        log.log_mismatch(
585            CompareResult::Different,
586            Some(src_obj_type),
587            src,
588            Some(dst_obj_type),
589            dst,
590        )
591        .await?;
592        if settings.exit_early {
593            return Ok(cmp_summary);
594        }
595    } else {
596        cmp_summary.mismatch[src_obj_type][CompareResult::Same] += 1;
597    }
598    if !src_metadata.is_dir() || !dst_metadata.is_dir() {
599        // nothing more to do
600        return Ok(cmp_summary);
601    }
602    tracing::debug!("process contents of 'src' directory");
603    let mut src_entries = tokio::fs::read_dir(src)
604        .await
605        .with_context(|| format!("cannot open directory {src:?} for reading"))?;
606    let mut join_set = tokio::task::JoinSet::new();
607    let errors = crate::error_collector::ErrorCollector::default();
608    // create a set of all the files we already processed
609    let mut processed_files = std::collections::HashSet::new();
610    // iterate through src entries and recursively call "cmp" on each one
611    while let Some(src_entry) = src_entries
612        .next_entry()
613        .await
614        .with_context(|| format!("failed traversing directory {:?}", &src))?
615    {
616        // it's better to await the token here so that we throttle the syscalls generated by the
617        // DirEntry call. the ops-throttle will never cause a deadlock (unlike max-open-files limit)
618        // so it's safe to do here.
619        throttle::get_ops_token().await;
620        let entry_path = src_entry.path();
621        let entry_name = entry_path.file_name().unwrap();
622        // apply filter if configured
623        if let Some(ref filter) = settings.filter {
624            // compute relative path from source_root for filter matching
625            let relative_path = entry_path.strip_prefix(source_root).unwrap_or(&entry_path);
626            let entry_file_type = src_entry.file_type().await.ok();
627            let is_dir = entry_file_type.map(|ft| ft.is_dir()).unwrap_or(false);
628            if !matches!(
629                filter.should_include(relative_path, is_dir),
630                crate::filter::FilterResult::Included
631            ) {
632                // increment skipped counter based on entry type
633                let entry_obj_type = if is_dir {
634                    ObjType::Dir
635                } else if entry_file_type.map(|ft| ft.is_symlink()).unwrap_or(false) {
636                    ObjType::Symlink
637                } else {
638                    ObjType::File
639                };
640                cmp_summary.skipped[entry_obj_type] += 1;
641                continue;
642            }
643        }
644        processed_files.insert(entry_name.to_owned());
645        let dst_path = dst.join(entry_name);
646        let log = log.clone();
647        let settings = settings.clone();
648        let source_root = source_root.to_owned();
649        let dest_root = dest_root.to_owned();
650        let do_cmp = || async move {
651            cmp_internal(
652                prog_track,
653                &entry_path,
654                &dst_path,
655                &source_root,
656                &dest_root,
657                &log,
658                &settings,
659            )
660            .await
661        };
662        join_set.spawn(do_cmp());
663    }
664    // unfortunately ReadDir is opening file-descriptors and there's not a good way to limit this,
665    // one thing we CAN do however is to drop it as soon as we're done with it
666    drop(src_entries);
667    tracing::debug!("process contents of 'dst' directory");
668    let mut dst_entries = tokio::fs::read_dir(dst)
669        .await
670        .with_context(|| format!("cannot open directory {:?} for reading", &dst))?;
671    // iterate through update entries and log each one that's not present in src
672    while let Some(dst_entry) = dst_entries
673        .next_entry()
674        .await
675        .with_context(|| format!("failed traversing directory {:?}", &dst))?
676    {
677        let entry_path = dst_entry.path();
678        let entry_name = entry_path.file_name().unwrap();
679        if processed_files.contains(entry_name) {
680            // we already must have considered this file, skip it
681            continue;
682        }
683        // apply filter if configured - if this entry would be filtered, don't report as missing
684        if let Some(ref filter) = settings.filter {
685            // compute relative path from dest_root for filter matching
686            let relative_path = entry_path.strip_prefix(dest_root).unwrap_or(&entry_path);
687            let entry_file_type = dst_entry.file_type().await.ok();
688            let is_dir = entry_file_type.map(|ft| ft.is_dir()).unwrap_or(false);
689            if !matches!(
690                filter.should_include(relative_path, is_dir),
691                crate::filter::FilterResult::Included
692            ) {
693                // increment skipped counter based on entry type
694                let entry_obj_type = if is_dir {
695                    ObjType::Dir
696                } else if entry_file_type.map(|ft| ft.is_symlink()).unwrap_or(false) {
697                    ObjType::Symlink
698                } else {
699                    ObjType::File
700                };
701                cmp_summary.skipped[entry_obj_type] += 1;
702                continue;
703            }
704        }
705        tracing::debug!("found a new entry in the 'dst' directory");
706        let dst_path = dst.join(entry_name);
707        let dst_entry_metadata = tokio::fs::symlink_metadata(&dst_path)
708            .await
709            .with_context(|| format!("failed reading metadata from {:?}", &dst_path))?;
710        let dst_obj_type = obj_type(&dst_entry_metadata);
711        if settings.expand_missing && dst_entry_metadata.is_dir() {
712            match expand_missing_tree(
713                prog_track,
714                &dst_path,
715                &src.join(entry_name),
716                dest_root,
717                CompareResult::SrcMissing,
718                log,
719                settings,
720            )
721            .await
722            {
723                Ok(expanded) => cmp_summary = cmp_summary + expanded,
724                Err(error) => {
725                    tracing::error!(
726                        "expand_missing_tree: {:?} failed with: {:#}",
727                        &dst_path,
728                        &error
729                    );
730                    errors.push(error);
731                    if settings.fail_early {
732                        // unwrap is safe: we just pushed an error
733                        return Err(errors.into_error().unwrap());
734                    }
735                }
736            }
737        } else {
738            if dst_entry_metadata.is_file() {
739                cmp_summary.dst_bytes += dst_entry_metadata.len();
740            }
741            cmp_summary.mismatch[dst_obj_type][CompareResult::SrcMissing] += 1;
742            log.log_mismatch(
743                CompareResult::SrcMissing,
744                None,
745                &src.join(entry_name),
746                Some(dst_obj_type),
747                &dst_path,
748            )
749            .await?;
750        }
751    }
752    // unfortunately ReadDir is opening file-descriptors and there's not a good way to limit this,
753    // one thing we CAN do however is to drop it as soon as we're done with it
754    drop(dst_entries);
755    while let Some(res) = join_set.join_next().await {
756        match res? {
757            Ok(summary) => cmp_summary = cmp_summary + summary,
758            Err(error) => {
759                tracing::error!("cmp: {:?} vs {:?} failed with: {:#}", src, dst, &error);
760                errors.push(error);
761                if settings.fail_early {
762                    break;
763                }
764            }
765        }
766    }
767    if let Some(err) = errors.into_error() {
768        return Err(err);
769    }
770    Ok(cmp_summary)
771}
772
773#[cfg(test)]
774mod cmp_tests {
775    use crate::copy;
776    use crate::preserve;
777    use crate::testutils;
778    use enum_map::enum_map;
779    use tracing_test::traced_test;
780
781    use super::*;
782
783    static PROGRESS: std::sync::LazyLock<progress::Progress> =
784        std::sync::LazyLock::new(progress::Progress::new);
785    static NO_PRESERVE_SETTINGS: std::sync::LazyLock<preserve::Settings> =
786        std::sync::LazyLock::new(preserve::preserve_none);
787    static DO_PRESERVE_SETTINGS: std::sync::LazyLock<preserve::Settings> =
788        std::sync::LazyLock::new(preserve::preserve_all);
789
790    async fn setup_test_dirs(preserve: bool) -> Result<std::path::PathBuf> {
791        let tmp_dir = testutils::setup_test_dir().await?;
792        let test_path = tmp_dir.as_path();
793        copy::copy(
794            &PROGRESS,
795            &test_path.join("foo"),
796            &test_path.join("bar"),
797            &copy::Settings {
798                dereference: false,
799                fail_early: false,
800                overwrite: false,
801                overwrite_compare: filecmp::MetadataCmpSettings {
802                    size: true,
803                    mtime: true,
804                    ..Default::default()
805                },
806                overwrite_filter: None,
807                ignore_existing: false,
808                chunk_size: 0,
809                remote_copy_buffer_size: 0,
810                filter: None,
811                dry_run: None,
812            },
813            if preserve {
814                &DO_PRESERVE_SETTINGS
815            } else {
816                &NO_PRESERVE_SETTINGS
817            },
818            false,
819        )
820        .await?;
821        Ok(tmp_dir)
822    }
823
824    async fn truncate_file(path: &str) -> Result<()> {
825        let file = tokio::fs::File::create(path).await?;
826        file.set_len(0).await?;
827        Ok(())
828    }
829
830    #[tokio::test]
831    #[traced_test]
832    async fn check_basic_cmp() -> Result<()> {
833        let tmp_dir = setup_test_dirs(true).await?;
834        // drop 1 file from src
835        tokio::fs::remove_file(&tmp_dir.join("foo").join("bar").join("1.txt")).await?;
836        // sleep to ensure mtime is different, this acts as a poor-mans barrier
837        tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
838        // modify 1 file in dst
839        truncate_file(
840            tmp_dir
841                .join("bar")
842                .join("baz")
843                .join("4.txt")
844                .to_str()
845                .unwrap(),
846        )
847        .await?;
848        // drop 1 (other) file from dst
849        tokio::fs::remove_file(&tmp_dir.join("bar").join("bar").join("2.txt")).await?;
850        // create one more file in dst -- this will also modify the mtime of the directory
851        tokio::fs::File::create(&tmp_dir.join("bar").join("baz").join("7.txt")).await?;
852        let compare_settings = Settings {
853            fail_early: false,
854            exit_early: false,
855            expand_missing: false,
856            compare: enum_map! {
857                ObjType::File => filecmp::MetadataCmpSettings {
858                    size: true,
859                    mtime: true,
860                    ..Default::default()
861                },
862                ObjType::Dir => filecmp::MetadataCmpSettings {
863                    mtime: true,
864                    ..Default::default()
865                },
866                ObjType::Symlink => filecmp::MetadataCmpSettings {
867                    mtime: true,
868                    ..Default::default()
869                },
870                ObjType::Other => filecmp::MetadataCmpSettings {
871                    mtime: true,
872                    ..Default::default()
873                },
874            },
875            filter: None,
876        };
877        let summary = cmp(
878            &PROGRESS,
879            &tmp_dir.join("foo"),
880            &tmp_dir.join("bar"),
881            &LogWriter::new(
882                Some(tmp_dir.join("cmp.log").as_path()),
883                false,
884                OutputFormat::Text,
885            )
886            .await?,
887            &compare_settings,
888        )
889        .await?;
890        let mismatch: Mismatch = enum_map! {
891            ObjType::File => enum_map! {
892                CompareResult::Different => 1,
893                CompareResult::Same => 2,
894                CompareResult::SrcMissing => 2,
895                CompareResult::DstMissing => 1,
896            },
897            ObjType::Dir => enum_map! {
898                CompareResult::Different => 2,
899                CompareResult::Same => 1,
900                CompareResult::SrcMissing => 0,
901                CompareResult::DstMissing => 0,
902            },
903            ObjType::Symlink => enum_map! {
904                CompareResult::Different => 0,
905                CompareResult::Same => 2,
906                CompareResult::SrcMissing => 0,
907                CompareResult::DstMissing => 0,
908            },
909            ObjType::Other => enum_map! {
910                CompareResult::Different => 0,
911                CompareResult::Same => 0,
912                CompareResult::SrcMissing => 0,
913                CompareResult::DstMissing => 0,
914            },
915        };
916        assert_eq!(summary.mismatch, mismatch);
917        // src has 4 regular files of 1 byte each (0.txt, bar/2.txt, bar/3.txt, baz/4.txt)
918        assert_eq!(summary.src_bytes, 4);
919        // dst has: 0.txt(1B), bar/1.txt(1B, SrcMissing), bar/3.txt(1B), baz/4.txt(0B, truncated), baz/7.txt(0B, SrcMissing)
920        assert_eq!(summary.dst_bytes, 3);
921        Ok(())
922    }
923
924    #[tokio::test]
925    #[traced_test]
926    async fn cmp_with_filter_excludes_files() -> Result<()> {
927        let tmp_dir = setup_test_dirs(true).await?;
928        // setup: src=foo, dst=bar (identical at this point)
929        // add a file to dst that would be reported as SrcMissing
930        tokio::fs::write(&tmp_dir.join("bar").join("extra.txt"), "extra").await?;
931        // without filter, should report extra.txt as SrcMissing
932        let compare_settings_no_filter = Settings {
933            fail_early: false,
934            exit_early: false,
935            expand_missing: false,
936            compare: enum_map! {
937                ObjType::File => filecmp::MetadataCmpSettings {
938                    size: true,
939                    mtime: true,
940                    ..Default::default()
941                },
942                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
943                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
944                ObjType::Other => filecmp::MetadataCmpSettings::default(),
945            },
946            filter: None,
947        };
948        let summary = cmp(
949            &PROGRESS,
950            &tmp_dir.join("foo"),
951            &tmp_dir.join("bar"),
952            &LogWriter::silent().await?,
953            &compare_settings_no_filter,
954        )
955        .await?;
956        assert_eq!(
957            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
958            1
959        );
960        // with filter excluding extra.txt, should not report it
961        let mut filter = crate::filter::FilterSettings::new();
962        filter.add_exclude("extra.txt")?;
963        let compare_settings_with_filter = Settings {
964            fail_early: false,
965            exit_early: false,
966            expand_missing: false,
967            compare: enum_map! {
968                ObjType::File => filecmp::MetadataCmpSettings {
969                    size: true,
970                    mtime: true,
971                    ..Default::default()
972                },
973                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
974                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
975                ObjType::Other => filecmp::MetadataCmpSettings::default(),
976            },
977            filter: Some(filter),
978        };
979        let summary = cmp(
980            &PROGRESS,
981            &tmp_dir.join("foo"),
982            &tmp_dir.join("bar"),
983            &LogWriter::silent().await?,
984            &compare_settings_with_filter,
985        )
986        .await?;
987        assert_eq!(
988            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
989            0
990        );
991        Ok(())
992    }
993
994    #[tokio::test]
995    #[traced_test]
996    async fn cmp_with_include_only_compares_matching() -> Result<()> {
997        let tmp_dir = setup_test_dirs(true).await?;
998        // setup: src=foo, dst=bar (identical at this point)
999        // modify a file that won't be included
1000        tokio::fs::write(&tmp_dir.join("bar").join("bar").join("1.txt"), "modified").await?;
1001        // with include pattern for only *.rs files, the .txt modification shouldn't appear
1002        let mut filter = crate::filter::FilterSettings::new();
1003        filter.add_include("*.rs")?;
1004        let compare_settings = Settings {
1005            fail_early: false,
1006            exit_early: false,
1007            expand_missing: false,
1008            compare: enum_map! {
1009                ObjType::File => filecmp::MetadataCmpSettings {
1010                    size: true,
1011                    mtime: true,
1012                    ..Default::default()
1013                },
1014                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1015                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1016                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1017            },
1018            filter: Some(filter),
1019        };
1020        let summary = cmp(
1021            &PROGRESS,
1022            &tmp_dir.join("foo"),
1023            &tmp_dir.join("bar"),
1024            &LogWriter::silent().await?,
1025            &compare_settings,
1026        )
1027        .await?;
1028        // no differences should be reported since all .txt files are excluded
1029        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Different], 0);
1030        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Same], 0);
1031        assert_eq!(
1032            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1033            0
1034        );
1035        assert_eq!(
1036            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1037            0
1038        );
1039        Ok(())
1040    }
1041
1042    #[tokio::test]
1043    #[traced_test]
1044    async fn cmp_with_path_pattern_filters_nested() -> Result<()> {
1045        // test that path-based patterns like "bar/*.txt" work correctly when recursing
1046        // this verifies source_root tracking is working properly
1047        let tmp_dir = setup_test_dirs(true).await?;
1048        // test structure:
1049        // foo/bar/1.txt, foo/bar/2.txt, foo/bar/3.txt
1050        // foo/baz/4.txt, foo/baz/5.txt (symlink), foo/baz/6.txt (symlink)
1051        // filter: only include bar/*.txt
1052        let mut filter = crate::filter::FilterSettings::new();
1053        filter.add_include("bar/*.txt")?;
1054        let compare_settings = Settings {
1055            fail_early: false,
1056            exit_early: false,
1057            expand_missing: false,
1058            compare: enum_map! {
1059                ObjType::File => filecmp::MetadataCmpSettings {
1060                    size: true,
1061                    ..Default::default()
1062                },
1063                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1064                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1065                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1066            },
1067            filter: Some(filter),
1068        };
1069        let summary = cmp(
1070            &PROGRESS,
1071            &tmp_dir.join("foo"),
1072            &tmp_dir.join("bar"),
1073            &LogWriter::silent().await?,
1074            &compare_settings,
1075        )
1076        .await?;
1077        // should only compare files in bar/ subdirectory (3 files: 1.txt, 2.txt, 3.txt)
1078        // all should be "Same" since we copied foo to bar earlier
1079        assert_eq!(
1080            summary.mismatch[ObjType::File][CompareResult::Same],
1081            3,
1082            "should have 3 same files from bar/*.txt pattern"
1083        );
1084        // files in baz/ should not be compared (filtered out)
1085        // 0.txt at root should not be compared
1086        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Different], 0);
1087        Ok(())
1088    }
1089
1090    #[tokio::test]
1091    #[traced_test]
1092    async fn cmp_filter_applies_to_root_file() -> Result<()> {
1093        // test that filters apply to the root item itself
1094        let tmp_dir = testutils::create_temp_dir().await?;
1095        // create two different files
1096        tokio::fs::write(tmp_dir.join("test.txt"), "content1").await?;
1097        tokio::fs::write(tmp_dir.join("test2.txt"), "content2").await?;
1098        // filter: only include *.rs files
1099        let mut filter = crate::filter::FilterSettings::new();
1100        filter.add_include("*.rs")?;
1101        let compare_settings = Settings {
1102            fail_early: false,
1103            exit_early: false,
1104            expand_missing: false,
1105            compare: enum_map! {
1106                ObjType::File => filecmp::MetadataCmpSettings {
1107                    size: true,
1108                    ..Default::default()
1109                },
1110                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1111                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1112                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1113            },
1114            filter: Some(filter),
1115        };
1116        // compare test.txt vs test2.txt - should be filtered out (not *.rs)
1117        let summary = cmp(
1118            &PROGRESS,
1119            &tmp_dir.join("test.txt"),
1120            &tmp_dir.join("test2.txt"),
1121            &LogWriter::silent().await?,
1122            &compare_settings,
1123        )
1124        .await?;
1125        // should return empty summary since root file is filtered
1126        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Same], 0);
1127        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Different], 0);
1128        Ok(())
1129    }
1130
1131    #[tokio::test]
1132    #[traced_test]
1133    async fn cmp_filter_excludes_root_directory() -> Result<()> {
1134        // test that filters apply to root directories
1135        let tmp_dir = testutils::setup_test_dir().await?;
1136        // filter: exclude directories named "foo"
1137        let mut filter = crate::filter::FilterSettings::new();
1138        filter.add_exclude("foo")?;
1139        let compare_settings = Settings {
1140            fail_early: false,
1141            exit_early: false,
1142            expand_missing: false,
1143            compare: enum_map! {
1144                ObjType::File => filecmp::MetadataCmpSettings::default(),
1145                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1146                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1147                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1148            },
1149            filter: Some(filter),
1150        };
1151        // compare foo vs bar - foo should be filtered out
1152        let summary = cmp(
1153            &PROGRESS,
1154            &tmp_dir.join("foo"),
1155            &tmp_dir.join("bar"),
1156            &LogWriter::silent().await?,
1157            &compare_settings,
1158        )
1159        .await?;
1160        // should return empty summary since root dir is excluded
1161        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::Same], 0);
1162        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::Different], 0);
1163        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Same], 0);
1164        Ok(())
1165    }
1166
1167    #[tokio::test]
1168    #[traced_test]
1169    async fn cmp_combined_include_exclude_patterns() -> Result<()> {
1170        let tmp_dir = setup_test_dirs(true).await?;
1171        // include all .txt files, but exclude bar/2.txt specifically
1172        let mut filter = crate::filter::FilterSettings::new();
1173        filter.add_include("**/*.txt")?;
1174        filter.add_exclude("bar/2.txt")?;
1175        let compare_settings = Settings {
1176            fail_early: false,
1177            exit_early: false,
1178            expand_missing: false,
1179            compare: enum_map! {
1180                ObjType::File => filecmp::MetadataCmpSettings {
1181                    size: true,
1182                    ..Default::default()
1183                },
1184                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1185                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1186                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1187            },
1188            filter: Some(filter),
1189        };
1190        let summary = cmp(
1191            &PROGRESS,
1192            &tmp_dir.join("foo"),
1193            &tmp_dir.join("bar"),
1194            &LogWriter::silent().await?,
1195            &compare_settings,
1196        )
1197        .await?;
1198        // should compare: 0.txt, bar/1.txt, bar/3.txt, baz/4.txt = 4 files (same)
1199        // should skip: bar/2.txt (excluded by pattern), 5.txt and 6.txt (symlinks, no match for *.txt in src dir) = 1 file + 2 symlinks
1200        // note: the pattern **/*.txt only matches files with .txt extension, but 5.txt and 6.txt in baz are symlinks
1201        assert_eq!(
1202            summary.mismatch[ObjType::File][CompareResult::Same],
1203            4,
1204            "should compare 4 .txt files as same"
1205        );
1206        // bar/2.txt is skipped for both src and dst traversal = 2 skipped
1207        assert_eq!(
1208            summary.skipped[ObjType::File],
1209            2,
1210            "should skip 2 files (bar/2.txt on src and dst)"
1211        );
1212        Ok(())
1213    }
1214
1215    #[tokio::test]
1216    #[traced_test]
1217    async fn cmp_skipped_counts_comprehensive() -> Result<()> {
1218        let tmp_dir = setup_test_dirs(true).await?;
1219        // exclude bar/ directory entirely
1220        let mut filter = crate::filter::FilterSettings::new();
1221        filter.add_exclude("bar/")?;
1222        let compare_settings = Settings {
1223            fail_early: false,
1224            exit_early: false,
1225            expand_missing: false,
1226            compare: enum_map! {
1227                ObjType::File => filecmp::MetadataCmpSettings {
1228                    size: true,
1229                    ..Default::default()
1230                },
1231                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1232                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1233                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1234            },
1235            filter: Some(filter),
1236        };
1237        let summary = cmp(
1238            &PROGRESS,
1239            &tmp_dir.join("foo"),
1240            &tmp_dir.join("bar"),
1241            &LogWriter::silent().await?,
1242            &compare_settings,
1243        )
1244        .await?;
1245        // compared: 0.txt (same), baz/4.txt (same) = 2 files
1246        // compared: baz/5.txt symlink (same), baz/6.txt symlink (same) = 2 symlinks
1247        // skipped: bar directory in src and dst = 2 dirs (cmp traverses both)
1248        assert_eq!(
1249            summary.mismatch[ObjType::File][CompareResult::Same],
1250            2,
1251            "should compare 2 files as same"
1252        );
1253        assert_eq!(
1254            summary.mismatch[ObjType::Symlink][CompareResult::Same],
1255            2,
1256            "should compare 2 symlinks as same"
1257        );
1258        assert_eq!(
1259            summary.skipped[ObjType::Dir],
1260            2,
1261            "should skip 2 directories (bar in src + bar in dst)"
1262        );
1263        Ok(())
1264    }
1265
1266    #[tokio::test]
1267    #[traced_test]
1268    async fn expand_missing_dst_reports_all_entries() -> Result<()> {
1269        let tmp_dir = setup_test_dirs(true).await?;
1270        // remove bar/bar directory entirely from dst
1271        tokio::fs::remove_dir_all(&tmp_dir.join("bar").join("bar")).await?;
1272        let compare_settings = Settings {
1273            fail_early: false,
1274            exit_early: false,
1275            expand_missing: true,
1276            compare: enum_map! {
1277                ObjType::File => filecmp::MetadataCmpSettings::default(),
1278                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1279                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1280                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1281            },
1282            filter: None,
1283        };
1284        let summary = cmp(
1285            &PROGRESS,
1286            &tmp_dir.join("foo"),
1287            &tmp_dir.join("bar"),
1288            &LogWriter::silent().await?,
1289            &compare_settings,
1290        )
1291        .await?;
1292        // bar/bar dir has: bar/ (1 dir) + 1.txt, 2.txt, 3.txt (3 files)
1293        assert_eq!(
1294            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1295            1,
1296            "should report 1 directory as DstMissing"
1297        );
1298        assert_eq!(
1299            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1300            3,
1301            "should report 3 files as DstMissing"
1302        );
1303        Ok(())
1304    }
1305
1306    #[tokio::test]
1307    #[traced_test]
1308    async fn expand_missing_src_reports_all_entries() -> Result<()> {
1309        let tmp_dir = setup_test_dirs(true).await?;
1310        // create a new subdir in dst with files
1311        let newdir = tmp_dir.join("bar").join("newdir");
1312        tokio::fs::create_dir(&newdir).await?;
1313        tokio::fs::write(newdir.join("a.txt"), "a").await?;
1314        tokio::fs::write(newdir.join("b.txt"), "b").await?;
1315        let compare_settings = Settings {
1316            fail_early: false,
1317            exit_early: false,
1318            expand_missing: true,
1319            compare: enum_map! {
1320                ObjType::File => filecmp::MetadataCmpSettings::default(),
1321                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1322                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1323                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1324            },
1325            filter: None,
1326        };
1327        let summary = cmp(
1328            &PROGRESS,
1329            &tmp_dir.join("foo"),
1330            &tmp_dir.join("bar"),
1331            &LogWriter::silent().await?,
1332            &compare_settings,
1333        )
1334        .await?;
1335        assert_eq!(
1336            summary.mismatch[ObjType::Dir][CompareResult::SrcMissing],
1337            1,
1338            "should report 1 directory as SrcMissing"
1339        );
1340        assert_eq!(
1341            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1342            2,
1343            "should report 2 files as SrcMissing"
1344        );
1345        Ok(())
1346    }
1347
1348    #[tokio::test]
1349    #[traced_test]
1350    async fn expand_missing_dst_deeply_nested() -> Result<()> {
1351        // verify expansion recurses through multiple directory levels
1352        let tmp_dir = testutils::create_temp_dir().await?;
1353        let src = tmp_dir.join("src");
1354        let dst = tmp_dir.join("dst");
1355        tokio::fs::create_dir(&src).await?;
1356        tokio::fs::create_dir(&dst).await?;
1357        // create src/a/b/c/d.txt -- 3 dirs deep
1358        let deep = src.join("a").join("b").join("c");
1359        tokio::fs::create_dir_all(&deep).await?;
1360        tokio::fs::write(deep.join("d.txt"), "d").await?;
1361        // also add a sibling file at an intermediate level
1362        tokio::fs::write(src.join("a").join("b").join("mid.txt"), "m").await?;
1363        // dst exists but is empty -- everything in src is DstMissing
1364        let compare_settings = Settings {
1365            fail_early: false,
1366            exit_early: false,
1367            expand_missing: true,
1368            compare: enum_map! {
1369                ObjType::File => filecmp::MetadataCmpSettings::default(),
1370                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1371                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1372                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1373            },
1374            filter: None,
1375        };
1376        let summary = cmp(
1377            &PROGRESS,
1378            &src,
1379            &dst,
1380            &LogWriter::silent().await?,
1381            &compare_settings,
1382        )
1383        .await?;
1384        // dirs: a, a/b, a/b/c = 3 DstMissing dirs
1385        assert_eq!(
1386            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1387            3,
1388            "should report 3 nested directories as DstMissing"
1389        );
1390        // files: a/b/c/d.txt, a/b/mid.txt = 2 DstMissing files
1391        assert_eq!(
1392            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1393            2,
1394            "should report 2 files as DstMissing"
1395        );
1396        // src_bytes: d.txt(1) + mid.txt(1) = 2
1397        assert_eq!(
1398            summary.src_bytes, 2,
1399            "should track bytes for expanded files"
1400        );
1401        Ok(())
1402    }
1403
1404    #[tokio::test]
1405    #[traced_test]
1406    async fn expand_missing_src_deeply_nested() -> Result<()> {
1407        // verify expansion recurses for SrcMissing through multiple levels
1408        let tmp_dir = testutils::create_temp_dir().await?;
1409        let src = tmp_dir.join("src");
1410        let dst = tmp_dir.join("dst");
1411        tokio::fs::create_dir(&src).await?;
1412        tokio::fs::create_dir(&dst).await?;
1413        // create dst/x/y/z.txt -- dirs only in dst
1414        let deep = dst.join("x").join("y");
1415        tokio::fs::create_dir_all(&deep).await?;
1416        tokio::fs::write(deep.join("z.txt"), "zz").await?;
1417        let compare_settings = Settings {
1418            fail_early: false,
1419            exit_early: false,
1420            expand_missing: true,
1421            compare: enum_map! {
1422                ObjType::File => filecmp::MetadataCmpSettings::default(),
1423                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1424                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1425                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1426            },
1427            filter: None,
1428        };
1429        let summary = cmp(
1430            &PROGRESS,
1431            &src,
1432            &dst,
1433            &LogWriter::silent().await?,
1434            &compare_settings,
1435        )
1436        .await?;
1437        // dirs: x, x/y = 2 SrcMissing dirs
1438        assert_eq!(
1439            summary.mismatch[ObjType::Dir][CompareResult::SrcMissing],
1440            2,
1441            "should report 2 nested directories as SrcMissing"
1442        );
1443        // files: x/y/z.txt = 1 SrcMissing file
1444        assert_eq!(
1445            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1446            1,
1447            "should report 1 file as SrcMissing"
1448        );
1449        // dst_bytes: z.txt(2)
1450        assert_eq!(
1451            summary.dst_bytes, 2,
1452            "should track bytes for expanded files"
1453        );
1454        Ok(())
1455    }
1456
1457    #[tokio::test]
1458    #[traced_test]
1459    async fn expand_missing_with_exclude_filter() -> Result<()> {
1460        // verify that filters are applied during expansion. exclude *.log files
1461        // from the missing subtree
1462        let tmp_dir = testutils::create_temp_dir().await?;
1463        let src = tmp_dir.join("src");
1464        let dst = tmp_dir.join("dst");
1465        tokio::fs::create_dir(&src).await?;
1466        tokio::fs::create_dir(&dst).await?;
1467        // src/missing_dir/ has mixed files
1468        let missing = src.join("missing_dir");
1469        tokio::fs::create_dir(&missing).await?;
1470        tokio::fs::write(missing.join("keep.txt"), "k").await?;
1471        tokio::fs::write(missing.join("skip.log"), "s").await?;
1472        tokio::fs::write(missing.join("also_keep.txt"), "a").await?;
1473        let mut filter = crate::filter::FilterSettings::new();
1474        filter.add_exclude("*.log")?;
1475        let compare_settings = Settings {
1476            fail_early: false,
1477            exit_early: false,
1478            expand_missing: true,
1479            compare: enum_map! {
1480                ObjType::File => filecmp::MetadataCmpSettings::default(),
1481                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1482                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1483                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1484            },
1485            filter: Some(filter),
1486        };
1487        let summary = cmp(
1488            &PROGRESS,
1489            &src,
1490            &dst,
1491            &LogWriter::silent().await?,
1492            &compare_settings,
1493        )
1494        .await?;
1495        // missing_dir itself = 1 DstMissing dir
1496        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::DstMissing], 1,);
1497        // only keep.txt and also_keep.txt should be reported. skip.log is filtered
1498        assert_eq!(
1499            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1500            2,
1501            "should report only non-excluded files as DstMissing"
1502        );
1503        // skip.log should be counted as skipped
1504        assert_eq!(
1505            summary.skipped[ObjType::File],
1506            1,
1507            "should count excluded file as skipped"
1508        );
1509        Ok(())
1510    }
1511
1512    #[tokio::test]
1513    #[traced_test]
1514    async fn expand_missing_with_include_filter() -> Result<()> {
1515        // verify that include filters restrict which children are reported during expansion
1516        let tmp_dir = testutils::create_temp_dir().await?;
1517        let src = tmp_dir.join("src");
1518        let dst = tmp_dir.join("dst");
1519        tokio::fs::create_dir(&src).await?;
1520        tokio::fs::create_dir(&dst).await?;
1521        // src/data/ has a mix of file types
1522        let data = src.join("data");
1523        tokio::fs::create_dir(&data).await?;
1524        tokio::fs::write(data.join("a.rs"), "fn main() {}").await?;
1525        tokio::fs::write(data.join("b.txt"), "hello").await?;
1526        tokio::fs::write(data.join("c.rs"), "fn test() {}").await?;
1527        let mut filter = crate::filter::FilterSettings::new();
1528        filter.add_include("**/*.rs")?;
1529        let compare_settings = Settings {
1530            fail_early: false,
1531            exit_early: false,
1532            expand_missing: true,
1533            compare: enum_map! {
1534                ObjType::File => filecmp::MetadataCmpSettings::default(),
1535                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1536                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1537                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1538            },
1539            filter: Some(filter),
1540        };
1541        let summary = cmp(
1542            &PROGRESS,
1543            &src,
1544            &dst,
1545            &LogWriter::silent().await?,
1546            &compare_settings,
1547        )
1548        .await?;
1549        // data dir = 1 DstMissing dir
1550        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::DstMissing], 1,);
1551        // only a.rs and c.rs should be reported. b.txt is filtered out
1552        assert_eq!(
1553            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1554            2,
1555            "should report only included files as DstMissing"
1556        );
1557        Ok(())
1558    }
1559
1560    #[tokio::test]
1561    #[traced_test]
1562    async fn expand_missing_with_nested_path_filter() -> Result<()> {
1563        // verify path-based patterns work correctly during expansion.
1564        // only include files under a specific nested path
1565        let tmp_dir = testutils::create_temp_dir().await?;
1566        let src = tmp_dir.join("src");
1567        let dst = tmp_dir.join("dst");
1568        tokio::fs::create_dir(&src).await?;
1569        tokio::fs::create_dir(&dst).await?;
1570        // src/top/ has two subdirs: keep/ and skip/
1571        let top = src.join("top");
1572        let keep = top.join("keep");
1573        let skip = top.join("skip");
1574        tokio::fs::create_dir_all(&keep).await?;
1575        tokio::fs::create_dir_all(&skip).await?;
1576        tokio::fs::write(keep.join("1.txt"), "1").await?;
1577        tokio::fs::write(keep.join("2.txt"), "2").await?;
1578        tokio::fs::write(skip.join("3.txt"), "3").await?;
1579        let mut filter = crate::filter::FilterSettings::new();
1580        filter.add_include("top/keep/**")?;
1581        let compare_settings = Settings {
1582            fail_early: false,
1583            exit_early: false,
1584            expand_missing: true,
1585            compare: enum_map! {
1586                ObjType::File => filecmp::MetadataCmpSettings::default(),
1587                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1588                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1589                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1590            },
1591            filter: Some(filter),
1592        };
1593        let summary = cmp(
1594            &PROGRESS,
1595            &src,
1596            &dst,
1597            &LogWriter::silent().await?,
1598            &compare_settings,
1599        )
1600        .await?;
1601        // only keep/ subtree: keep dir(1) + top dir(1) = 2 dirs. skip dir is filtered
1602        assert_eq!(
1603            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1604            2,
1605            "should report top and keep dirs as DstMissing"
1606        );
1607        // only 1.txt and 2.txt from keep/
1608        assert_eq!(
1609            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1610            2,
1611            "should report only files under keep/ as DstMissing"
1612        );
1613        Ok(())
1614    }
1615
1616    #[tokio::test]
1617    #[traced_test]
1618    async fn expand_missing_false_preserves_original_behavior() -> Result<()> {
1619        let tmp_dir = setup_test_dirs(true).await?;
1620        // remove bar/bar directory entirely from dst
1621        tokio::fs::remove_dir_all(&tmp_dir.join("bar").join("bar")).await?;
1622        let compare_settings = Settings {
1623            fail_early: false,
1624            exit_early: false,
1625            expand_missing: false,
1626            compare: enum_map! {
1627                ObjType::File => filecmp::MetadataCmpSettings::default(),
1628                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1629                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1630                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1631            },
1632            filter: None,
1633        };
1634        let summary = cmp(
1635            &PROGRESS,
1636            &tmp_dir.join("foo"),
1637            &tmp_dir.join("bar"),
1638            &LogWriter::silent().await?,
1639            &compare_settings,
1640        )
1641        .await?;
1642        // without expand_missing, only the top-level dir is reported
1643        assert_eq!(
1644            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1645            1,
1646            "should report only 1 directory as DstMissing"
1647        );
1648        assert_eq!(
1649            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1650            0,
1651            "should not report individual files as DstMissing"
1652        );
1653        Ok(())
1654    }
1655
1656    #[test]
1657    fn path_to_json_string_utf8() {
1658        let path = std::path::Path::new("/foo/bar/baz.txt");
1659        assert_eq!(path_to_json_string(path), "/foo/bar/baz.txt");
1660    }
1661
1662    #[test]
1663    fn path_to_json_string_non_utf8() {
1664        use std::ffi::OsStr;
1665        use std::os::unix::ffi::OsStrExt;
1666        // embed 0xFF byte in the middle
1667        let os_str = OsStr::from_bytes(b"/tmp/bad\xffname.txt");
1668        let path = std::path::Path::new(os_str);
1669        assert_eq!(path_to_json_string(path), "/tmp/bad\\xffname.txt");
1670    }
1671
1672    #[test]
1673    fn path_to_json_string_multiple_bad_bytes() {
1674        use std::ffi::OsStr;
1675        use std::os::unix::ffi::OsStrExt;
1676        let os_str = OsStr::from_bytes(b"\x80/ok/\xfe\xff/end");
1677        let path = std::path::Path::new(os_str);
1678        assert_eq!(path_to_json_string(path), "\\x80/ok/\\xfe\\xff/end");
1679    }
1680
1681    #[test]
1682    fn path_to_json_string_escapes_backslashes() {
1683        // a path with a literal backslash must be escaped so it doesn't
1684        // collide with \xHH byte escapes
1685        let path = std::path::Path::new("/tmp/bad\\xffname.txt");
1686        assert_eq!(path_to_json_string(path), "/tmp/bad\\\\xffname.txt");
1687    }
1688
1689    #[test]
1690    fn path_to_json_string_no_collision() {
1691        use std::ffi::OsStr;
1692        use std::os::unix::ffi::OsStrExt;
1693        // literal backslash-x-f-f in the filename
1694        let literal = std::path::Path::new("/tmp/bad\\xffname.txt");
1695        // actual 0xFF byte in the filename
1696        let raw = std::path::Path::new(OsStr::from_bytes(b"/tmp/bad\xffname.txt"));
1697        // these must produce different output
1698        assert_ne!(path_to_json_string(literal), path_to_json_string(raw));
1699    }
1700}