Skip to main content

common/
cmp.rs

1use anyhow::{Context, Result};
2use async_recursion::async_recursion;
3use enum_map::{Enum, EnumMap};
4use tokio::io::AsyncWriteExt;
5use tracing::instrument;
6
7use crate::copy::is_file_type_same;
8use crate::filecmp;
9use crate::progress;
10
11#[derive(Copy, Clone, Debug, Enum)]
12pub enum CompareResult {
13    Same,
14    Different,
15    SrcMissing, // object missing in src but present in dst
16    DstMissing, // same as above but flipped
17}
18
19#[derive(Copy, Clone, Debug, Enum)]
20pub enum ObjType {
21    File,
22    Dir,
23    Symlink,
24    Other, // sockets, block devices, character devices, FIFOs, etc.
25}
26
27pub type ObjSettings = EnumMap<ObjType, filecmp::MetadataCmpSettings>;
28
29#[derive(Debug, Clone)]
30pub struct Settings {
31    pub compare: ObjSettings,
32    pub fail_early: bool,
33    pub exit_early: bool,
34    pub expand_missing: bool,
35    pub filter: Option<crate::filter::FilterSettings>,
36}
37
38pub type Mismatch = EnumMap<ObjType, EnumMap<CompareResult, u64>>;
39
40/// Count of skipped items per object type
41pub type Skipped = EnumMap<ObjType, u64>;
42
43/// Output format for comparison results and summary.
44#[derive(Copy, Clone, Debug, Default, clap::ValueEnum)]
45pub enum OutputFormat {
46    /// JSON output (NDJSON for differences, JSON object for summary)
47    #[default]
48    Json,
49    /// Human-readable text output (legacy format)
50    Text,
51}
52
53fn compare_result_name(cr: CompareResult) -> &'static str {
54    match cr {
55        CompareResult::Same => "same",
56        CompareResult::Different => "different",
57        CompareResult::SrcMissing => "src_missing",
58        CompareResult::DstMissing => "dst_missing",
59    }
60}
61
62fn obj_type_name(ot: ObjType) -> &'static str {
63    match ot {
64        ObjType::File => "file",
65        ObjType::Dir => "dir",
66        ObjType::Symlink => "symlink",
67        ObjType::Other => "other",
68    }
69}
70
71/// Encodes a path as a JSON-safe string that is round-trippable for arbitrary
72/// Unix paths. Literal backslashes are escaped as `\\`, and non-UTF-8 bytes
73/// are escaped as `\xHH`. To decode, first parse the JSON string, then scan
74/// left-to-right: `\\` → literal `\`, `\xHH` → raw byte, all other characters
75/// are literal UTF-8.
76fn path_to_json_string(path: &std::path::Path) -> String {
77    use std::os::unix::ffi::OsStrExt;
78    let bytes = path.as_os_str().as_bytes();
79    let mut out = String::with_capacity(bytes.len());
80    for chunk in bytes.utf8_chunks() {
81        for c in chunk.valid().chars() {
82            if c == '\\' {
83                out.push_str("\\\\");
84            } else {
85                out.push(c);
86            }
87        }
88        for &b in chunk.invalid() {
89            use std::fmt::Write;
90            write!(out, "\\x{b:02x}").unwrap();
91        }
92    }
93    out
94}
95
96#[derive(Default)]
97pub struct Summary {
98    pub mismatch: Mismatch,
99    pub skipped: Skipped,
100    /// Total size of regular files compared on the source side, in bytes.
101    pub src_bytes: u64,
102    /// Total size of regular files compared on the destination side, in bytes.
103    pub dst_bytes: u64,
104}
105
106impl std::ops::Add for Summary {
107    type Output = Self;
108    fn add(self, other: Self) -> Self {
109        let mut mismatch = self.mismatch;
110        for (obj_type, &cmp_res_map) in &other.mismatch {
111            for (cmp_res, &count) in &cmp_res_map {
112                mismatch[obj_type][cmp_res] += count;
113            }
114        }
115        let mut skipped = self.skipped;
116        for (obj_type, &count) in &other.skipped {
117            skipped[obj_type] += count;
118        }
119        Self {
120            mismatch,
121            skipped,
122            src_bytes: self.src_bytes + other.src_bytes,
123            dst_bytes: self.dst_bytes + other.dst_bytes,
124        }
125    }
126}
127
128impl std::fmt::Display for Summary {
129    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
130        writeln!(
131            f,
132            "src size (compared): {}",
133            bytesize::ByteSize(self.src_bytes)
134        )?;
135        writeln!(
136            f,
137            "dst size (compared): {}",
138            bytesize::ByteSize(self.dst_bytes)
139        )?;
140        for (obj_type, &cmp_res_map) in &self.mismatch {
141            for (cmp_res, &count) in &cmp_res_map {
142                writeln!(f, "{obj_type:?} {cmp_res:?}: {count}")?;
143            }
144        }
145        for (obj_type, &count) in &self.skipped {
146            if count > 0 {
147                writeln!(f, "{obj_type:?} Skipped: {count}")?;
148            }
149        }
150        Ok(())
151    }
152}
153
154/// Wraps a [`Summary`] with an [`OutputFormat`] so that [`Display`](std::fmt::Display)
155/// renders either human-readable text or JSON.
156pub struct FormattedSummary {
157    pub summary: Summary,
158    pub format: OutputFormat,
159}
160
161impl std::fmt::Display for FormattedSummary {
162    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
163        match self.format {
164            OutputFormat::Text => write!(f, "{}", self.summary),
165            OutputFormat::Json => {
166                let mut mismatch = serde_json::Map::new();
167                for (obj_type, &cmp_res_map) in &self.summary.mismatch {
168                    let mut counts = serde_json::Map::new();
169                    for (cmp_res, &count) in &cmp_res_map {
170                        counts.insert(
171                            compare_result_name(cmp_res).to_string(),
172                            serde_json::Value::Number(count.into()),
173                        );
174                    }
175                    mismatch.insert(
176                        obj_type_name(obj_type).to_string(),
177                        serde_json::Value::Object(counts),
178                    );
179                }
180                let mut skipped = serde_json::Map::new();
181                for (obj_type, &count) in &self.summary.skipped {
182                    if count > 0 {
183                        skipped.insert(
184                            obj_type_name(obj_type).to_string(),
185                            serde_json::Value::Number(count.into()),
186                        );
187                    }
188                }
189                let stats = crate::collect_runtime_stats();
190                let walltime = crate::get_progress().get_duration();
191                let obj = serde_json::json!({
192                    "src_bytes": self.summary.src_bytes,
193                    "dst_bytes": self.summary.dst_bytes,
194                    "mismatch": serde_json::Value::Object(mismatch),
195                    "skipped": serde_json::Value::Object(skipped),
196                    "walltime_ms": walltime.as_millis() as u64,
197                    "cpu_time_user_ms": stats.cpu_time_user_ms,
198                    "cpu_time_kernel_ms": stats.cpu_time_kernel_ms,
199                    "peak_rss_bytes": stats.peak_rss_bytes,
200                });
201                write!(f, "{obj}")
202            }
203        }
204    }
205}
206
207#[derive(Clone)]
208pub struct LogWriter {
209    file: Option<std::sync::Arc<tokio::sync::Mutex<tokio::io::BufWriter<tokio::fs::File>>>>,
210    stdout: Option<std::sync::Arc<tokio::sync::Mutex<tokio::io::BufWriter<tokio::io::Stdout>>>>,
211    format: OutputFormat,
212}
213
214impl std::fmt::Debug for LogWriter {
215    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216        f.debug_struct("LogWriter")
217            .field("file", &self.file.is_some())
218            .field("stdout", &self.stdout.is_some())
219            .field("format", &self.format)
220            .finish()
221    }
222}
223
224impl LogWriter {
225    /// Creates a new LogWriter.
226    ///
227    /// If `log_path_opt` is provided, output goes to that file.
228    /// Otherwise, if `use_stdout` is true, output goes to stdout.
229    /// If both are false/None, no output is produced.
230    pub async fn new(
231        log_path_opt: Option<&std::path::Path>,
232        use_stdout: bool,
233        format: OutputFormat,
234    ) -> Result<Self> {
235        if let Some(log_path) = log_path_opt {
236            let log_file = tokio::fs::OpenOptions::new()
237                .write(true)
238                .create_new(true)
239                .open(log_path)
240                .await
241                .with_context(|| format!("Failed to open log file: {log_path:?}"))?;
242            let log =
243                std::sync::Arc::new(tokio::sync::Mutex::new(tokio::io::BufWriter::new(log_file)));
244            Ok(Self {
245                file: Some(log),
246                stdout: None,
247                format,
248            })
249        } else if use_stdout {
250            Ok(Self {
251                file: None,
252                stdout: Some(std::sync::Arc::new(tokio::sync::Mutex::new(
253                    tokio::io::BufWriter::new(tokio::io::stdout()),
254                ))),
255                format,
256            })
257        } else {
258            Ok(Self {
259                file: None,
260                stdout: None,
261                format,
262            })
263        }
264    }
265    /// Creates a silent LogWriter that produces no output, using the default format.
266    /// Convenience constructor primarily for tests.
267    pub async fn silent() -> Result<Self> {
268        Self::new(None, false, OutputFormat::default()).await
269    }
270
271    pub async fn log_mismatch(
272        &self,
273        cmp_result: CompareResult,
274        src_obj_type: Option<ObjType>,
275        src: &std::path::Path,
276        dst_obj_type: Option<ObjType>,
277        dst: &std::path::Path,
278    ) -> Result<()> {
279        let msg = match self.format {
280            OutputFormat::Text => {
281                format!(
282                    "[{cmp_result:?}]\n\t[{src_obj_type:?}]\t{src:?}\n\t[{dst_obj_type:?}]\t{dst:?}\n"
283                )
284            }
285            OutputFormat::Json => {
286                let src_type_val = match src_obj_type {
287                    Some(ot) => serde_json::Value::String(obj_type_name(ot).to_string()),
288                    None => serde_json::Value::Null,
289                };
290                let dst_type_val = match dst_obj_type {
291                    Some(ot) => serde_json::Value::String(obj_type_name(ot).to_string()),
292                    None => serde_json::Value::Null,
293                };
294                let obj = serde_json::json!({
295                    "result": compare_result_name(cmp_result),
296                    "src_type": src_type_val,
297                    "src": path_to_json_string(src),
298                    "dst_type": dst_type_val,
299                    "dst": path_to_json_string(dst),
300                });
301                format!("{obj}\n")
302            }
303        };
304        self.write(&msg).await
305    }
306
307    async fn write(&self, msg: &str) -> Result<()> {
308        if let Some(log) = &self.file {
309            let mut log = log.lock().await;
310            log.write_all(msg.as_bytes())
311                .await
312                .context("Failed to write to log file")?;
313        }
314        if let Some(stdout) = &self.stdout {
315            let mut stdout = stdout.lock().await;
316            stdout
317                .write_all(msg.as_bytes())
318                .await
319                .context("Failed to write to stdout")?;
320        }
321        Ok(())
322    }
323
324    pub async fn flush(&self) -> Result<()> {
325        if let Some(log) = &self.file {
326            let mut log = log.lock().await;
327            log.flush().await.context("Failed to flush log file")?;
328        }
329        if let Some(stdout) = &self.stdout {
330            let mut stdout = stdout.lock().await;
331            stdout.flush().await.context("Failed to flush stdout")?;
332        }
333        Ok(())
334    }
335}
336
337fn obj_type(metadata: &std::fs::Metadata) -> ObjType {
338    if metadata.is_file() {
339        ObjType::File
340    } else if metadata.is_dir() {
341        ObjType::Dir
342    } else if metadata.is_symlink() {
343        ObjType::Symlink
344    } else {
345        // sockets, block devices, character devices, FIFOs, etc.
346        ObjType::Other
347    }
348}
349
350/// Public entry point for compare operations.
351/// Internally delegates to cmp_internal with source_root/dest_root tracking for proper filter matching.
352#[instrument(skip(prog_track))]
353pub async fn cmp(
354    prog_track: &'static progress::Progress,
355    src: &std::path::Path,
356    dst: &std::path::Path,
357    log: &LogWriter,
358    settings: &Settings,
359) -> Result<Summary> {
360    cmp_internal(prog_track, src, dst, src, dst, log, settings).await
361}
362
363/// Recursively walks a directory tree on the existing side and records every entry as missing
364/// on the other side.
365#[instrument(skip(prog_track))]
366#[async_recursion]
367async fn expand_missing_tree(
368    prog_track: &'static progress::Progress,
369    existing_path: &std::path::Path,
370    mirror_path: &std::path::Path,
371    existing_root: &std::path::Path,
372    result: CompareResult,
373    log: &LogWriter,
374    settings: &Settings,
375) -> Result<Summary> {
376    let _prog_guard = prog_track.ops.guard();
377    let metadata = tokio::fs::symlink_metadata(existing_path)
378        .await
379        .with_context(|| format!("failed reading metadata from {:?}", &existing_path))?;
380    let existing_obj_type = obj_type(&metadata);
381    let mut summary = Summary::default();
382    summary.mismatch[existing_obj_type][result] += 1;
383    // track file sizes on the appropriate side
384    if metadata.is_file() {
385        match result {
386            CompareResult::DstMissing => summary.src_bytes += metadata.len(),
387            CompareResult::SrcMissing => summary.dst_bytes += metadata.len(),
388            _ => {}
389        }
390    }
391    match result {
392        CompareResult::DstMissing => {
393            log.log_mismatch(
394                result,
395                Some(existing_obj_type),
396                existing_path,
397                None,
398                mirror_path,
399            )
400            .await?;
401        }
402        CompareResult::SrcMissing => {
403            log.log_mismatch(
404                result,
405                None,
406                mirror_path,
407                Some(existing_obj_type),
408                existing_path,
409            )
410            .await?;
411        }
412        _ => {}
413    }
414    if settings.exit_early {
415        return Ok(summary);
416    }
417    if !metadata.is_dir() {
418        return Ok(summary);
419    }
420    let mut entries = tokio::fs::read_dir(existing_path)
421        .await
422        .with_context(|| format!("cannot open directory {:?} for reading", &existing_path))?;
423    let mut join_set = tokio::task::JoinSet::new();
424    let errors = crate::error_collector::ErrorCollector::default();
425    while let Some(entry) = entries
426        .next_entry()
427        .await
428        .with_context(|| format!("failed traversing directory {:?}", &existing_path))?
429    {
430        throttle::get_ops_token().await;
431        let entry_path = entry.path();
432        let entry_name = entry_path.file_name().unwrap();
433        // apply filter if configured
434        if let Some(ref filter) = settings.filter {
435            let relative_path = entry_path
436                .strip_prefix(existing_root)
437                .unwrap_or(&entry_path);
438            let entry_file_type = entry.file_type().await.ok();
439            let is_dir = entry_file_type.map(|ft| ft.is_dir()).unwrap_or(false);
440            if !matches!(
441                filter.should_include(relative_path, is_dir),
442                crate::filter::FilterResult::Included
443            ) {
444                // increment skipped counter based on entry type
445                let entry_obj_type = if is_dir {
446                    ObjType::Dir
447                } else if entry_file_type.map(|ft| ft.is_symlink()).unwrap_or(false) {
448                    ObjType::Symlink
449                } else {
450                    ObjType::File
451                };
452                summary.skipped[entry_obj_type] += 1;
453                continue;
454            }
455        }
456        let child_mirror = mirror_path.join(entry_name);
457        let log = log.clone();
458        let settings = settings.clone();
459        let existing_root = existing_root.to_owned();
460        join_set.spawn(async move {
461            expand_missing_tree(
462                prog_track,
463                &entry_path,
464                &child_mirror,
465                &existing_root,
466                result,
467                &log,
468                &settings,
469            )
470            .await
471        });
472    }
473    drop(entries);
474    while let Some(res) = join_set.join_next().await {
475        match res? {
476            Ok(child_summary) => summary = summary + child_summary,
477            Err(error) => {
478                tracing::error!(
479                    "expand_missing_tree: {:?} failed with: {:#}",
480                    existing_path,
481                    &error
482                );
483                errors.push(error);
484                if settings.fail_early {
485                    break;
486                }
487            }
488        }
489    }
490    if let Some(err) = errors.into_error() {
491        return Err(err);
492    }
493    Ok(summary)
494}
495
496#[instrument(skip(prog_track))]
497#[async_recursion]
498async fn cmp_internal(
499    prog_track: &'static progress::Progress,
500    src: &std::path::Path,
501    dst: &std::path::Path,
502    source_root: &std::path::Path,
503    dest_root: &std::path::Path,
504    log: &LogWriter,
505    settings: &Settings,
506) -> Result<Summary> {
507    let _prog_guard = prog_track.ops.guard();
508    tracing::debug!("reading source metadata");
509    // it is impossible for src not exist other than user passing invalid path (which is an error)
510    let src_metadata = tokio::fs::symlink_metadata(src)
511        .await
512        .with_context(|| format!("failed reading metadata from {:?}", &src))?;
513    // apply filter to root item (when src == source_root, this is the initial call)
514    if src == source_root {
515        if let Some(ref filter) = settings.filter {
516            if let Some(name) = src.file_name() {
517                let is_dir = src_metadata.is_dir();
518                if !matches!(
519                    filter.should_include_root_item(name.as_ref(), is_dir),
520                    crate::filter::FilterResult::Included
521                ) {
522                    // root item filtered out, return summary with skipped count
523                    let src_obj_type = obj_type(&src_metadata);
524                    let mut summary = Summary::default();
525                    summary.skipped[src_obj_type] += 1;
526                    return Ok(summary);
527                }
528            }
529        }
530    }
531    let mut cmp_summary = Summary::default();
532    let src_obj_type = obj_type(&src_metadata);
533    // track file sizes for the summary
534    if src_metadata.is_file() {
535        cmp_summary.src_bytes += src_metadata.len();
536    }
537    let dst_metadata = {
538        match tokio::fs::symlink_metadata(dst).await {
539            Ok(metadata) => metadata,
540            Err(err) => {
541                if err.kind() == std::io::ErrorKind::NotFound {
542                    if settings.expand_missing && src_metadata.is_dir() {
543                        let expanded = expand_missing_tree(
544                            prog_track,
545                            src,
546                            dst,
547                            source_root,
548                            CompareResult::DstMissing,
549                            log,
550                            settings,
551                        )
552                        .await?;
553                        cmp_summary = cmp_summary + expanded;
554                    } else {
555                        cmp_summary.mismatch[src_obj_type][CompareResult::DstMissing] += 1;
556                        log.log_mismatch(
557                            CompareResult::DstMissing,
558                            Some(src_obj_type),
559                            src,
560                            None,
561                            dst,
562                        )
563                        .await?;
564                    }
565                    return Ok(cmp_summary);
566                }
567                return Err(err).context(format!("failed reading metadata from {:?}", &dst));
568            }
569        }
570    };
571    if dst_metadata.is_file() {
572        cmp_summary.dst_bytes += dst_metadata.len();
573    }
574    if !is_file_type_same(&src_metadata, &dst_metadata)
575        || !filecmp::metadata_equal(
576            &settings.compare[src_obj_type],
577            &src_metadata,
578            &dst_metadata,
579        )
580    {
581        // we use the src type for the summary attribution
582        cmp_summary.mismatch[src_obj_type][CompareResult::Different] += 1;
583        let dst_obj_type = obj_type(&dst_metadata);
584        log.log_mismatch(
585            CompareResult::Different,
586            Some(src_obj_type),
587            src,
588            Some(dst_obj_type),
589            dst,
590        )
591        .await?;
592        if settings.exit_early {
593            return Ok(cmp_summary);
594        }
595    } else {
596        cmp_summary.mismatch[src_obj_type][CompareResult::Same] += 1;
597    }
598    if !src_metadata.is_dir() || !dst_metadata.is_dir() {
599        // nothing more to do
600        return Ok(cmp_summary);
601    }
602    tracing::debug!("process contents of 'src' directory");
603    let mut src_entries = tokio::fs::read_dir(src)
604        .await
605        .with_context(|| format!("cannot open directory {src:?} for reading"))?;
606    let mut join_set = tokio::task::JoinSet::new();
607    let errors = crate::error_collector::ErrorCollector::default();
608    // create a set of all the files we already processed
609    let mut processed_files = std::collections::HashSet::new();
610    // iterate through src entries and recursively call "cmp" on each one
611    while let Some(src_entry) = src_entries
612        .next_entry()
613        .await
614        .with_context(|| format!("failed traversing directory {:?}", &src))?
615    {
616        // it's better to await the token here so that we throttle the syscalls generated by the
617        // DirEntry call. the ops-throttle will never cause a deadlock (unlike max-open-files limit)
618        // so it's safe to do here.
619        throttle::get_ops_token().await;
620        let entry_path = src_entry.path();
621        let entry_name = entry_path.file_name().unwrap();
622        // apply filter if configured
623        if let Some(ref filter) = settings.filter {
624            // compute relative path from source_root for filter matching
625            let relative_path = entry_path.strip_prefix(source_root).unwrap_or(&entry_path);
626            let entry_file_type = src_entry.file_type().await.ok();
627            let is_dir = entry_file_type.map(|ft| ft.is_dir()).unwrap_or(false);
628            if !matches!(
629                filter.should_include(relative_path, is_dir),
630                crate::filter::FilterResult::Included
631            ) {
632                // increment skipped counter based on entry type
633                let entry_obj_type = if is_dir {
634                    ObjType::Dir
635                } else if entry_file_type.map(|ft| ft.is_symlink()).unwrap_or(false) {
636                    ObjType::Symlink
637                } else {
638                    ObjType::File
639                };
640                cmp_summary.skipped[entry_obj_type] += 1;
641                continue;
642            }
643        }
644        processed_files.insert(entry_name.to_owned());
645        let dst_path = dst.join(entry_name);
646        let log = log.clone();
647        let settings = settings.clone();
648        let source_root = source_root.to_owned();
649        let dest_root = dest_root.to_owned();
650        let do_cmp = || async move {
651            cmp_internal(
652                prog_track,
653                &entry_path,
654                &dst_path,
655                &source_root,
656                &dest_root,
657                &log,
658                &settings,
659            )
660            .await
661        };
662        join_set.spawn(do_cmp());
663    }
664    // unfortunately ReadDir is opening file-descriptors and there's not a good way to limit this,
665    // one thing we CAN do however is to drop it as soon as we're done with it
666    drop(src_entries);
667    tracing::debug!("process contents of 'dst' directory");
668    let mut dst_entries = tokio::fs::read_dir(dst)
669        .await
670        .with_context(|| format!("cannot open directory {:?} for reading", &dst))?;
671    // iterate through update entries and log each one that's not present in src
672    while let Some(dst_entry) = dst_entries
673        .next_entry()
674        .await
675        .with_context(|| format!("failed traversing directory {:?}", &dst))?
676    {
677        let entry_path = dst_entry.path();
678        let entry_name = entry_path.file_name().unwrap();
679        if processed_files.contains(entry_name) {
680            // we already must have considered this file, skip it
681            continue;
682        }
683        // apply filter if configured - if this entry would be filtered, don't report as missing
684        if let Some(ref filter) = settings.filter {
685            // compute relative path from dest_root for filter matching
686            let relative_path = entry_path.strip_prefix(dest_root).unwrap_or(&entry_path);
687            let entry_file_type = dst_entry.file_type().await.ok();
688            let is_dir = entry_file_type.map(|ft| ft.is_dir()).unwrap_or(false);
689            if !matches!(
690                filter.should_include(relative_path, is_dir),
691                crate::filter::FilterResult::Included
692            ) {
693                // increment skipped counter based on entry type
694                let entry_obj_type = if is_dir {
695                    ObjType::Dir
696                } else if entry_file_type.map(|ft| ft.is_symlink()).unwrap_or(false) {
697                    ObjType::Symlink
698                } else {
699                    ObjType::File
700                };
701                cmp_summary.skipped[entry_obj_type] += 1;
702                continue;
703            }
704        }
705        tracing::debug!("found a new entry in the 'dst' directory");
706        let dst_path = dst.join(entry_name);
707        let dst_entry_metadata = tokio::fs::symlink_metadata(&dst_path)
708            .await
709            .with_context(|| format!("failed reading metadata from {:?}", &dst_path))?;
710        let dst_obj_type = obj_type(&dst_entry_metadata);
711        if settings.expand_missing && dst_entry_metadata.is_dir() {
712            match expand_missing_tree(
713                prog_track,
714                &dst_path,
715                &src.join(entry_name),
716                dest_root,
717                CompareResult::SrcMissing,
718                log,
719                settings,
720            )
721            .await
722            {
723                Ok(expanded) => cmp_summary = cmp_summary + expanded,
724                Err(error) => {
725                    tracing::error!(
726                        "expand_missing_tree: {:?} failed with: {:#}",
727                        &dst_path,
728                        &error
729                    );
730                    errors.push(error);
731                    if settings.fail_early {
732                        // unwrap is safe: we just pushed an error
733                        return Err(errors.into_error().unwrap());
734                    }
735                }
736            }
737        } else {
738            if dst_entry_metadata.is_file() {
739                cmp_summary.dst_bytes += dst_entry_metadata.len();
740            }
741            cmp_summary.mismatch[dst_obj_type][CompareResult::SrcMissing] += 1;
742            log.log_mismatch(
743                CompareResult::SrcMissing,
744                None,
745                &src.join(entry_name),
746                Some(dst_obj_type),
747                &dst_path,
748            )
749            .await?;
750        }
751    }
752    // unfortunately ReadDir is opening file-descriptors and there's not a good way to limit this,
753    // one thing we CAN do however is to drop it as soon as we're done with it
754    drop(dst_entries);
755    while let Some(res) = join_set.join_next().await {
756        match res? {
757            Ok(summary) => cmp_summary = cmp_summary + summary,
758            Err(error) => {
759                tracing::error!("cmp: {:?} vs {:?} failed with: {:#}", src, dst, &error);
760                errors.push(error);
761                if settings.fail_early {
762                    break;
763                }
764            }
765        }
766    }
767    if let Some(err) = errors.into_error() {
768        return Err(err);
769    }
770    Ok(cmp_summary)
771}
772
773#[cfg(test)]
774mod cmp_tests {
775    use crate::copy;
776    use crate::preserve;
777    use crate::testutils;
778    use enum_map::enum_map;
779    use tracing_test::traced_test;
780
781    use super::*;
782
783    static PROGRESS: std::sync::LazyLock<progress::Progress> =
784        std::sync::LazyLock::new(progress::Progress::new);
785    static NO_PRESERVE_SETTINGS: std::sync::LazyLock<preserve::Settings> =
786        std::sync::LazyLock::new(preserve::preserve_none);
787    static DO_PRESERVE_SETTINGS: std::sync::LazyLock<preserve::Settings> =
788        std::sync::LazyLock::new(preserve::preserve_all);
789
790    async fn setup_test_dirs(preserve: bool) -> Result<std::path::PathBuf> {
791        let tmp_dir = testutils::setup_test_dir().await?;
792        let test_path = tmp_dir.as_path();
793        copy::copy(
794            &PROGRESS,
795            &test_path.join("foo"),
796            &test_path.join("bar"),
797            &copy::Settings {
798                dereference: false,
799                fail_early: false,
800                overwrite: false,
801                overwrite_compare: filecmp::MetadataCmpSettings {
802                    size: true,
803                    mtime: true,
804                    ..Default::default()
805                },
806                chunk_size: 0,
807                remote_copy_buffer_size: 0,
808                filter: None,
809                dry_run: None,
810            },
811            if preserve {
812                &DO_PRESERVE_SETTINGS
813            } else {
814                &NO_PRESERVE_SETTINGS
815            },
816            false,
817        )
818        .await?;
819        Ok(tmp_dir)
820    }
821
822    async fn truncate_file(path: &str) -> Result<()> {
823        let file = tokio::fs::File::create(path).await?;
824        file.set_len(0).await?;
825        Ok(())
826    }
827
828    #[tokio::test]
829    #[traced_test]
830    async fn check_basic_cmp() -> Result<()> {
831        let tmp_dir = setup_test_dirs(true).await?;
832        // drop 1 file from src
833        tokio::fs::remove_file(&tmp_dir.join("foo").join("bar").join("1.txt")).await?;
834        // sleep to ensure mtime is different, this acts as a poor-mans barrier
835        tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
836        // modify 1 file in dst
837        truncate_file(
838            tmp_dir
839                .join("bar")
840                .join("baz")
841                .join("4.txt")
842                .to_str()
843                .unwrap(),
844        )
845        .await?;
846        // drop 1 (other) file from dst
847        tokio::fs::remove_file(&tmp_dir.join("bar").join("bar").join("2.txt")).await?;
848        // create one more file in dst -- this will also modify the mtime of the directory
849        tokio::fs::File::create(&tmp_dir.join("bar").join("baz").join("7.txt")).await?;
850        let compare_settings = Settings {
851            fail_early: false,
852            exit_early: false,
853            expand_missing: false,
854            compare: enum_map! {
855                ObjType::File => filecmp::MetadataCmpSettings {
856                    size: true,
857                    mtime: true,
858                    ..Default::default()
859                },
860                ObjType::Dir => filecmp::MetadataCmpSettings {
861                    mtime: true,
862                    ..Default::default()
863                },
864                ObjType::Symlink => filecmp::MetadataCmpSettings {
865                    mtime: true,
866                    ..Default::default()
867                },
868                ObjType::Other => filecmp::MetadataCmpSettings {
869                    mtime: true,
870                    ..Default::default()
871                },
872            },
873            filter: None,
874        };
875        let summary = cmp(
876            &PROGRESS,
877            &tmp_dir.join("foo"),
878            &tmp_dir.join("bar"),
879            &LogWriter::new(
880                Some(tmp_dir.join("cmp.log").as_path()),
881                false,
882                OutputFormat::Text,
883            )
884            .await?,
885            &compare_settings,
886        )
887        .await?;
888        let mismatch: Mismatch = enum_map! {
889            ObjType::File => enum_map! {
890                CompareResult::Different => 1,
891                CompareResult::Same => 2,
892                CompareResult::SrcMissing => 2,
893                CompareResult::DstMissing => 1,
894            },
895            ObjType::Dir => enum_map! {
896                CompareResult::Different => 2,
897                CompareResult::Same => 1,
898                CompareResult::SrcMissing => 0,
899                CompareResult::DstMissing => 0,
900            },
901            ObjType::Symlink => enum_map! {
902                CompareResult::Different => 0,
903                CompareResult::Same => 2,
904                CompareResult::SrcMissing => 0,
905                CompareResult::DstMissing => 0,
906            },
907            ObjType::Other => enum_map! {
908                CompareResult::Different => 0,
909                CompareResult::Same => 0,
910                CompareResult::SrcMissing => 0,
911                CompareResult::DstMissing => 0,
912            },
913        };
914        assert_eq!(summary.mismatch, mismatch);
915        // src has 4 regular files of 1 byte each (0.txt, bar/2.txt, bar/3.txt, baz/4.txt)
916        assert_eq!(summary.src_bytes, 4);
917        // dst has: 0.txt(1B), bar/1.txt(1B, SrcMissing), bar/3.txt(1B), baz/4.txt(0B, truncated), baz/7.txt(0B, SrcMissing)
918        assert_eq!(summary.dst_bytes, 3);
919        Ok(())
920    }
921
922    #[tokio::test]
923    #[traced_test]
924    async fn cmp_with_filter_excludes_files() -> Result<()> {
925        let tmp_dir = setup_test_dirs(true).await?;
926        // setup: src=foo, dst=bar (identical at this point)
927        // add a file to dst that would be reported as SrcMissing
928        tokio::fs::write(&tmp_dir.join("bar").join("extra.txt"), "extra").await?;
929        // without filter, should report extra.txt as SrcMissing
930        let compare_settings_no_filter = Settings {
931            fail_early: false,
932            exit_early: false,
933            expand_missing: false,
934            compare: enum_map! {
935                ObjType::File => filecmp::MetadataCmpSettings {
936                    size: true,
937                    mtime: true,
938                    ..Default::default()
939                },
940                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
941                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
942                ObjType::Other => filecmp::MetadataCmpSettings::default(),
943            },
944            filter: None,
945        };
946        let summary = cmp(
947            &PROGRESS,
948            &tmp_dir.join("foo"),
949            &tmp_dir.join("bar"),
950            &LogWriter::silent().await?,
951            &compare_settings_no_filter,
952        )
953        .await?;
954        assert_eq!(
955            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
956            1
957        );
958        // with filter excluding extra.txt, should not report it
959        let mut filter = crate::filter::FilterSettings::new();
960        filter.add_exclude("extra.txt")?;
961        let compare_settings_with_filter = Settings {
962            fail_early: false,
963            exit_early: false,
964            expand_missing: false,
965            compare: enum_map! {
966                ObjType::File => filecmp::MetadataCmpSettings {
967                    size: true,
968                    mtime: true,
969                    ..Default::default()
970                },
971                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
972                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
973                ObjType::Other => filecmp::MetadataCmpSettings::default(),
974            },
975            filter: Some(filter),
976        };
977        let summary = cmp(
978            &PROGRESS,
979            &tmp_dir.join("foo"),
980            &tmp_dir.join("bar"),
981            &LogWriter::silent().await?,
982            &compare_settings_with_filter,
983        )
984        .await?;
985        assert_eq!(
986            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
987            0
988        );
989        Ok(())
990    }
991
992    #[tokio::test]
993    #[traced_test]
994    async fn cmp_with_include_only_compares_matching() -> Result<()> {
995        let tmp_dir = setup_test_dirs(true).await?;
996        // setup: src=foo, dst=bar (identical at this point)
997        // modify a file that won't be included
998        tokio::fs::write(&tmp_dir.join("bar").join("bar").join("1.txt"), "modified").await?;
999        // with include pattern for only *.rs files, the .txt modification shouldn't appear
1000        let mut filter = crate::filter::FilterSettings::new();
1001        filter.add_include("*.rs")?;
1002        let compare_settings = Settings {
1003            fail_early: false,
1004            exit_early: false,
1005            expand_missing: false,
1006            compare: enum_map! {
1007                ObjType::File => filecmp::MetadataCmpSettings {
1008                    size: true,
1009                    mtime: true,
1010                    ..Default::default()
1011                },
1012                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1013                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1014                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1015            },
1016            filter: Some(filter),
1017        };
1018        let summary = cmp(
1019            &PROGRESS,
1020            &tmp_dir.join("foo"),
1021            &tmp_dir.join("bar"),
1022            &LogWriter::silent().await?,
1023            &compare_settings,
1024        )
1025        .await?;
1026        // no differences should be reported since all .txt files are excluded
1027        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Different], 0);
1028        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Same], 0);
1029        assert_eq!(
1030            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1031            0
1032        );
1033        assert_eq!(
1034            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1035            0
1036        );
1037        Ok(())
1038    }
1039
1040    #[tokio::test]
1041    #[traced_test]
1042    async fn cmp_with_path_pattern_filters_nested() -> Result<()> {
1043        // test that path-based patterns like "bar/*.txt" work correctly when recursing
1044        // this verifies source_root tracking is working properly
1045        let tmp_dir = setup_test_dirs(true).await?;
1046        // test structure:
1047        // foo/bar/1.txt, foo/bar/2.txt, foo/bar/3.txt
1048        // foo/baz/4.txt, foo/baz/5.txt (symlink), foo/baz/6.txt (symlink)
1049        // filter: only include bar/*.txt
1050        let mut filter = crate::filter::FilterSettings::new();
1051        filter.add_include("bar/*.txt")?;
1052        let compare_settings = Settings {
1053            fail_early: false,
1054            exit_early: false,
1055            expand_missing: false,
1056            compare: enum_map! {
1057                ObjType::File => filecmp::MetadataCmpSettings {
1058                    size: true,
1059                    ..Default::default()
1060                },
1061                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1062                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1063                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1064            },
1065            filter: Some(filter),
1066        };
1067        let summary = cmp(
1068            &PROGRESS,
1069            &tmp_dir.join("foo"),
1070            &tmp_dir.join("bar"),
1071            &LogWriter::silent().await?,
1072            &compare_settings,
1073        )
1074        .await?;
1075        // should only compare files in bar/ subdirectory (3 files: 1.txt, 2.txt, 3.txt)
1076        // all should be "Same" since we copied foo to bar earlier
1077        assert_eq!(
1078            summary.mismatch[ObjType::File][CompareResult::Same],
1079            3,
1080            "should have 3 same files from bar/*.txt pattern"
1081        );
1082        // files in baz/ should not be compared (filtered out)
1083        // 0.txt at root should not be compared
1084        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Different], 0);
1085        Ok(())
1086    }
1087
1088    #[tokio::test]
1089    #[traced_test]
1090    async fn cmp_filter_applies_to_root_file() -> Result<()> {
1091        // test that filters apply to the root item itself
1092        let tmp_dir = testutils::create_temp_dir().await?;
1093        // create two different files
1094        tokio::fs::write(tmp_dir.join("test.txt"), "content1").await?;
1095        tokio::fs::write(tmp_dir.join("test2.txt"), "content2").await?;
1096        // filter: only include *.rs files
1097        let mut filter = crate::filter::FilterSettings::new();
1098        filter.add_include("*.rs")?;
1099        let compare_settings = Settings {
1100            fail_early: false,
1101            exit_early: false,
1102            expand_missing: false,
1103            compare: enum_map! {
1104                ObjType::File => filecmp::MetadataCmpSettings {
1105                    size: true,
1106                    ..Default::default()
1107                },
1108                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1109                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1110                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1111            },
1112            filter: Some(filter),
1113        };
1114        // compare test.txt vs test2.txt - should be filtered out (not *.rs)
1115        let summary = cmp(
1116            &PROGRESS,
1117            &tmp_dir.join("test.txt"),
1118            &tmp_dir.join("test2.txt"),
1119            &LogWriter::silent().await?,
1120            &compare_settings,
1121        )
1122        .await?;
1123        // should return empty summary since root file is filtered
1124        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Same], 0);
1125        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Different], 0);
1126        Ok(())
1127    }
1128
1129    #[tokio::test]
1130    #[traced_test]
1131    async fn cmp_filter_excludes_root_directory() -> Result<()> {
1132        // test that filters apply to root directories
1133        let tmp_dir = testutils::setup_test_dir().await?;
1134        // filter: exclude directories named "foo"
1135        let mut filter = crate::filter::FilterSettings::new();
1136        filter.add_exclude("foo")?;
1137        let compare_settings = Settings {
1138            fail_early: false,
1139            exit_early: false,
1140            expand_missing: false,
1141            compare: enum_map! {
1142                ObjType::File => filecmp::MetadataCmpSettings::default(),
1143                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1144                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1145                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1146            },
1147            filter: Some(filter),
1148        };
1149        // compare foo vs bar - foo should be filtered out
1150        let summary = cmp(
1151            &PROGRESS,
1152            &tmp_dir.join("foo"),
1153            &tmp_dir.join("bar"),
1154            &LogWriter::silent().await?,
1155            &compare_settings,
1156        )
1157        .await?;
1158        // should return empty summary since root dir is excluded
1159        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::Same], 0);
1160        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::Different], 0);
1161        assert_eq!(summary.mismatch[ObjType::File][CompareResult::Same], 0);
1162        Ok(())
1163    }
1164
1165    #[tokio::test]
1166    #[traced_test]
1167    async fn cmp_combined_include_exclude_patterns() -> Result<()> {
1168        let tmp_dir = setup_test_dirs(true).await?;
1169        // include all .txt files, but exclude bar/2.txt specifically
1170        let mut filter = crate::filter::FilterSettings::new();
1171        filter.add_include("**/*.txt")?;
1172        filter.add_exclude("bar/2.txt")?;
1173        let compare_settings = Settings {
1174            fail_early: false,
1175            exit_early: false,
1176            expand_missing: false,
1177            compare: enum_map! {
1178                ObjType::File => filecmp::MetadataCmpSettings {
1179                    size: true,
1180                    ..Default::default()
1181                },
1182                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1183                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1184                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1185            },
1186            filter: Some(filter),
1187        };
1188        let summary = cmp(
1189            &PROGRESS,
1190            &tmp_dir.join("foo"),
1191            &tmp_dir.join("bar"),
1192            &LogWriter::silent().await?,
1193            &compare_settings,
1194        )
1195        .await?;
1196        // should compare: 0.txt, bar/1.txt, bar/3.txt, baz/4.txt = 4 files (same)
1197        // should skip: bar/2.txt (excluded by pattern), 5.txt and 6.txt (symlinks, no match for *.txt in src dir) = 1 file + 2 symlinks
1198        // note: the pattern **/*.txt only matches files with .txt extension, but 5.txt and 6.txt in baz are symlinks
1199        assert_eq!(
1200            summary.mismatch[ObjType::File][CompareResult::Same],
1201            4,
1202            "should compare 4 .txt files as same"
1203        );
1204        // bar/2.txt is skipped for both src and dst traversal = 2 skipped
1205        assert_eq!(
1206            summary.skipped[ObjType::File],
1207            2,
1208            "should skip 2 files (bar/2.txt on src and dst)"
1209        );
1210        Ok(())
1211    }
1212
1213    #[tokio::test]
1214    #[traced_test]
1215    async fn cmp_skipped_counts_comprehensive() -> Result<()> {
1216        let tmp_dir = setup_test_dirs(true).await?;
1217        // exclude bar/ directory entirely
1218        let mut filter = crate::filter::FilterSettings::new();
1219        filter.add_exclude("bar/")?;
1220        let compare_settings = Settings {
1221            fail_early: false,
1222            exit_early: false,
1223            expand_missing: false,
1224            compare: enum_map! {
1225                ObjType::File => filecmp::MetadataCmpSettings {
1226                    size: true,
1227                    ..Default::default()
1228                },
1229                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1230                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1231                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1232            },
1233            filter: Some(filter),
1234        };
1235        let summary = cmp(
1236            &PROGRESS,
1237            &tmp_dir.join("foo"),
1238            &tmp_dir.join("bar"),
1239            &LogWriter::silent().await?,
1240            &compare_settings,
1241        )
1242        .await?;
1243        // compared: 0.txt (same), baz/4.txt (same) = 2 files
1244        // compared: baz/5.txt symlink (same), baz/6.txt symlink (same) = 2 symlinks
1245        // skipped: bar directory in src and dst = 2 dirs (cmp traverses both)
1246        assert_eq!(
1247            summary.mismatch[ObjType::File][CompareResult::Same],
1248            2,
1249            "should compare 2 files as same"
1250        );
1251        assert_eq!(
1252            summary.mismatch[ObjType::Symlink][CompareResult::Same],
1253            2,
1254            "should compare 2 symlinks as same"
1255        );
1256        assert_eq!(
1257            summary.skipped[ObjType::Dir],
1258            2,
1259            "should skip 2 directories (bar in src + bar in dst)"
1260        );
1261        Ok(())
1262    }
1263
1264    #[tokio::test]
1265    #[traced_test]
1266    async fn expand_missing_dst_reports_all_entries() -> Result<()> {
1267        let tmp_dir = setup_test_dirs(true).await?;
1268        // remove bar/bar directory entirely from dst
1269        tokio::fs::remove_dir_all(&tmp_dir.join("bar").join("bar")).await?;
1270        let compare_settings = Settings {
1271            fail_early: false,
1272            exit_early: false,
1273            expand_missing: true,
1274            compare: enum_map! {
1275                ObjType::File => filecmp::MetadataCmpSettings::default(),
1276                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1277                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1278                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1279            },
1280            filter: None,
1281        };
1282        let summary = cmp(
1283            &PROGRESS,
1284            &tmp_dir.join("foo"),
1285            &tmp_dir.join("bar"),
1286            &LogWriter::silent().await?,
1287            &compare_settings,
1288        )
1289        .await?;
1290        // bar/bar dir has: bar/ (1 dir) + 1.txt, 2.txt, 3.txt (3 files)
1291        assert_eq!(
1292            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1293            1,
1294            "should report 1 directory as DstMissing"
1295        );
1296        assert_eq!(
1297            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1298            3,
1299            "should report 3 files as DstMissing"
1300        );
1301        Ok(())
1302    }
1303
1304    #[tokio::test]
1305    #[traced_test]
1306    async fn expand_missing_src_reports_all_entries() -> Result<()> {
1307        let tmp_dir = setup_test_dirs(true).await?;
1308        // create a new subdir in dst with files
1309        let newdir = tmp_dir.join("bar").join("newdir");
1310        tokio::fs::create_dir(&newdir).await?;
1311        tokio::fs::write(newdir.join("a.txt"), "a").await?;
1312        tokio::fs::write(newdir.join("b.txt"), "b").await?;
1313        let compare_settings = Settings {
1314            fail_early: false,
1315            exit_early: false,
1316            expand_missing: true,
1317            compare: enum_map! {
1318                ObjType::File => filecmp::MetadataCmpSettings::default(),
1319                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1320                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1321                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1322            },
1323            filter: None,
1324        };
1325        let summary = cmp(
1326            &PROGRESS,
1327            &tmp_dir.join("foo"),
1328            &tmp_dir.join("bar"),
1329            &LogWriter::silent().await?,
1330            &compare_settings,
1331        )
1332        .await?;
1333        assert_eq!(
1334            summary.mismatch[ObjType::Dir][CompareResult::SrcMissing],
1335            1,
1336            "should report 1 directory as SrcMissing"
1337        );
1338        assert_eq!(
1339            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1340            2,
1341            "should report 2 files as SrcMissing"
1342        );
1343        Ok(())
1344    }
1345
1346    #[tokio::test]
1347    #[traced_test]
1348    async fn expand_missing_dst_deeply_nested() -> Result<()> {
1349        // verify expansion recurses through multiple directory levels
1350        let tmp_dir = testutils::create_temp_dir().await?;
1351        let src = tmp_dir.join("src");
1352        let dst = tmp_dir.join("dst");
1353        tokio::fs::create_dir(&src).await?;
1354        tokio::fs::create_dir(&dst).await?;
1355        // create src/a/b/c/d.txt -- 3 dirs deep
1356        let deep = src.join("a").join("b").join("c");
1357        tokio::fs::create_dir_all(&deep).await?;
1358        tokio::fs::write(deep.join("d.txt"), "d").await?;
1359        // also add a sibling file at an intermediate level
1360        tokio::fs::write(src.join("a").join("b").join("mid.txt"), "m").await?;
1361        // dst exists but is empty -- everything in src is DstMissing
1362        let compare_settings = Settings {
1363            fail_early: false,
1364            exit_early: false,
1365            expand_missing: true,
1366            compare: enum_map! {
1367                ObjType::File => filecmp::MetadataCmpSettings::default(),
1368                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1369                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1370                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1371            },
1372            filter: None,
1373        };
1374        let summary = cmp(
1375            &PROGRESS,
1376            &src,
1377            &dst,
1378            &LogWriter::silent().await?,
1379            &compare_settings,
1380        )
1381        .await?;
1382        // dirs: a, a/b, a/b/c = 3 DstMissing dirs
1383        assert_eq!(
1384            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1385            3,
1386            "should report 3 nested directories as DstMissing"
1387        );
1388        // files: a/b/c/d.txt, a/b/mid.txt = 2 DstMissing files
1389        assert_eq!(
1390            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1391            2,
1392            "should report 2 files as DstMissing"
1393        );
1394        // src_bytes: d.txt(1) + mid.txt(1) = 2
1395        assert_eq!(
1396            summary.src_bytes, 2,
1397            "should track bytes for expanded files"
1398        );
1399        Ok(())
1400    }
1401
1402    #[tokio::test]
1403    #[traced_test]
1404    async fn expand_missing_src_deeply_nested() -> Result<()> {
1405        // verify expansion recurses for SrcMissing through multiple levels
1406        let tmp_dir = testutils::create_temp_dir().await?;
1407        let src = tmp_dir.join("src");
1408        let dst = tmp_dir.join("dst");
1409        tokio::fs::create_dir(&src).await?;
1410        tokio::fs::create_dir(&dst).await?;
1411        // create dst/x/y/z.txt -- dirs only in dst
1412        let deep = dst.join("x").join("y");
1413        tokio::fs::create_dir_all(&deep).await?;
1414        tokio::fs::write(deep.join("z.txt"), "zz").await?;
1415        let compare_settings = Settings {
1416            fail_early: false,
1417            exit_early: false,
1418            expand_missing: true,
1419            compare: enum_map! {
1420                ObjType::File => filecmp::MetadataCmpSettings::default(),
1421                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1422                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1423                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1424            },
1425            filter: None,
1426        };
1427        let summary = cmp(
1428            &PROGRESS,
1429            &src,
1430            &dst,
1431            &LogWriter::silent().await?,
1432            &compare_settings,
1433        )
1434        .await?;
1435        // dirs: x, x/y = 2 SrcMissing dirs
1436        assert_eq!(
1437            summary.mismatch[ObjType::Dir][CompareResult::SrcMissing],
1438            2,
1439            "should report 2 nested directories as SrcMissing"
1440        );
1441        // files: x/y/z.txt = 1 SrcMissing file
1442        assert_eq!(
1443            summary.mismatch[ObjType::File][CompareResult::SrcMissing],
1444            1,
1445            "should report 1 file as SrcMissing"
1446        );
1447        // dst_bytes: z.txt(2)
1448        assert_eq!(
1449            summary.dst_bytes, 2,
1450            "should track bytes for expanded files"
1451        );
1452        Ok(())
1453    }
1454
1455    #[tokio::test]
1456    #[traced_test]
1457    async fn expand_missing_with_exclude_filter() -> Result<()> {
1458        // verify that filters are applied during expansion. exclude *.log files
1459        // from the missing subtree
1460        let tmp_dir = testutils::create_temp_dir().await?;
1461        let src = tmp_dir.join("src");
1462        let dst = tmp_dir.join("dst");
1463        tokio::fs::create_dir(&src).await?;
1464        tokio::fs::create_dir(&dst).await?;
1465        // src/missing_dir/ has mixed files
1466        let missing = src.join("missing_dir");
1467        tokio::fs::create_dir(&missing).await?;
1468        tokio::fs::write(missing.join("keep.txt"), "k").await?;
1469        tokio::fs::write(missing.join("skip.log"), "s").await?;
1470        tokio::fs::write(missing.join("also_keep.txt"), "a").await?;
1471        let mut filter = crate::filter::FilterSettings::new();
1472        filter.add_exclude("*.log")?;
1473        let compare_settings = Settings {
1474            fail_early: false,
1475            exit_early: false,
1476            expand_missing: true,
1477            compare: enum_map! {
1478                ObjType::File => filecmp::MetadataCmpSettings::default(),
1479                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1480                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1481                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1482            },
1483            filter: Some(filter),
1484        };
1485        let summary = cmp(
1486            &PROGRESS,
1487            &src,
1488            &dst,
1489            &LogWriter::silent().await?,
1490            &compare_settings,
1491        )
1492        .await?;
1493        // missing_dir itself = 1 DstMissing dir
1494        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::DstMissing], 1,);
1495        // only keep.txt and also_keep.txt should be reported. skip.log is filtered
1496        assert_eq!(
1497            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1498            2,
1499            "should report only non-excluded files as DstMissing"
1500        );
1501        // skip.log should be counted as skipped
1502        assert_eq!(
1503            summary.skipped[ObjType::File],
1504            1,
1505            "should count excluded file as skipped"
1506        );
1507        Ok(())
1508    }
1509
1510    #[tokio::test]
1511    #[traced_test]
1512    async fn expand_missing_with_include_filter() -> Result<()> {
1513        // verify that include filters restrict which children are reported during expansion
1514        let tmp_dir = testutils::create_temp_dir().await?;
1515        let src = tmp_dir.join("src");
1516        let dst = tmp_dir.join("dst");
1517        tokio::fs::create_dir(&src).await?;
1518        tokio::fs::create_dir(&dst).await?;
1519        // src/data/ has a mix of file types
1520        let data = src.join("data");
1521        tokio::fs::create_dir(&data).await?;
1522        tokio::fs::write(data.join("a.rs"), "fn main() {}").await?;
1523        tokio::fs::write(data.join("b.txt"), "hello").await?;
1524        tokio::fs::write(data.join("c.rs"), "fn test() {}").await?;
1525        let mut filter = crate::filter::FilterSettings::new();
1526        filter.add_include("**/*.rs")?;
1527        let compare_settings = Settings {
1528            fail_early: false,
1529            exit_early: false,
1530            expand_missing: true,
1531            compare: enum_map! {
1532                ObjType::File => filecmp::MetadataCmpSettings::default(),
1533                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1534                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1535                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1536            },
1537            filter: Some(filter),
1538        };
1539        let summary = cmp(
1540            &PROGRESS,
1541            &src,
1542            &dst,
1543            &LogWriter::silent().await?,
1544            &compare_settings,
1545        )
1546        .await?;
1547        // data dir = 1 DstMissing dir
1548        assert_eq!(summary.mismatch[ObjType::Dir][CompareResult::DstMissing], 1,);
1549        // only a.rs and c.rs should be reported. b.txt is filtered out
1550        assert_eq!(
1551            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1552            2,
1553            "should report only included files as DstMissing"
1554        );
1555        Ok(())
1556    }
1557
1558    #[tokio::test]
1559    #[traced_test]
1560    async fn expand_missing_with_nested_path_filter() -> Result<()> {
1561        // verify path-based patterns work correctly during expansion.
1562        // only include files under a specific nested path
1563        let tmp_dir = testutils::create_temp_dir().await?;
1564        let src = tmp_dir.join("src");
1565        let dst = tmp_dir.join("dst");
1566        tokio::fs::create_dir(&src).await?;
1567        tokio::fs::create_dir(&dst).await?;
1568        // src/top/ has two subdirs: keep/ and skip/
1569        let top = src.join("top");
1570        let keep = top.join("keep");
1571        let skip = top.join("skip");
1572        tokio::fs::create_dir_all(&keep).await?;
1573        tokio::fs::create_dir_all(&skip).await?;
1574        tokio::fs::write(keep.join("1.txt"), "1").await?;
1575        tokio::fs::write(keep.join("2.txt"), "2").await?;
1576        tokio::fs::write(skip.join("3.txt"), "3").await?;
1577        let mut filter = crate::filter::FilterSettings::new();
1578        filter.add_include("top/keep/**")?;
1579        let compare_settings = Settings {
1580            fail_early: false,
1581            exit_early: false,
1582            expand_missing: true,
1583            compare: enum_map! {
1584                ObjType::File => filecmp::MetadataCmpSettings::default(),
1585                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1586                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1587                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1588            },
1589            filter: Some(filter),
1590        };
1591        let summary = cmp(
1592            &PROGRESS,
1593            &src,
1594            &dst,
1595            &LogWriter::silent().await?,
1596            &compare_settings,
1597        )
1598        .await?;
1599        // only keep/ subtree: keep dir(1) + top dir(1) = 2 dirs. skip dir is filtered
1600        assert_eq!(
1601            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1602            2,
1603            "should report top and keep dirs as DstMissing"
1604        );
1605        // only 1.txt and 2.txt from keep/
1606        assert_eq!(
1607            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1608            2,
1609            "should report only files under keep/ as DstMissing"
1610        );
1611        Ok(())
1612    }
1613
1614    #[tokio::test]
1615    #[traced_test]
1616    async fn expand_missing_false_preserves_original_behavior() -> Result<()> {
1617        let tmp_dir = setup_test_dirs(true).await?;
1618        // remove bar/bar directory entirely from dst
1619        tokio::fs::remove_dir_all(&tmp_dir.join("bar").join("bar")).await?;
1620        let compare_settings = Settings {
1621            fail_early: false,
1622            exit_early: false,
1623            expand_missing: false,
1624            compare: enum_map! {
1625                ObjType::File => filecmp::MetadataCmpSettings::default(),
1626                ObjType::Dir => filecmp::MetadataCmpSettings::default(),
1627                ObjType::Symlink => filecmp::MetadataCmpSettings::default(),
1628                ObjType::Other => filecmp::MetadataCmpSettings::default(),
1629            },
1630            filter: None,
1631        };
1632        let summary = cmp(
1633            &PROGRESS,
1634            &tmp_dir.join("foo"),
1635            &tmp_dir.join("bar"),
1636            &LogWriter::silent().await?,
1637            &compare_settings,
1638        )
1639        .await?;
1640        // without expand_missing, only the top-level dir is reported
1641        assert_eq!(
1642            summary.mismatch[ObjType::Dir][CompareResult::DstMissing],
1643            1,
1644            "should report only 1 directory as DstMissing"
1645        );
1646        assert_eq!(
1647            summary.mismatch[ObjType::File][CompareResult::DstMissing],
1648            0,
1649            "should not report individual files as DstMissing"
1650        );
1651        Ok(())
1652    }
1653
1654    #[test]
1655    fn path_to_json_string_utf8() {
1656        let path = std::path::Path::new("/foo/bar/baz.txt");
1657        assert_eq!(path_to_json_string(path), "/foo/bar/baz.txt");
1658    }
1659
1660    #[test]
1661    fn path_to_json_string_non_utf8() {
1662        use std::ffi::OsStr;
1663        use std::os::unix::ffi::OsStrExt;
1664        // embed 0xFF byte in the middle
1665        let os_str = OsStr::from_bytes(b"/tmp/bad\xffname.txt");
1666        let path = std::path::Path::new(os_str);
1667        assert_eq!(path_to_json_string(path), "/tmp/bad\\xffname.txt");
1668    }
1669
1670    #[test]
1671    fn path_to_json_string_multiple_bad_bytes() {
1672        use std::ffi::OsStr;
1673        use std::os::unix::ffi::OsStrExt;
1674        let os_str = OsStr::from_bytes(b"\x80/ok/\xfe\xff/end");
1675        let path = std::path::Path::new(os_str);
1676        assert_eq!(path_to_json_string(path), "\\x80/ok/\\xfe\\xff/end");
1677    }
1678
1679    #[test]
1680    fn path_to_json_string_escapes_backslashes() {
1681        // a path with a literal backslash must be escaped so it doesn't
1682        // collide with \xHH byte escapes
1683        let path = std::path::Path::new("/tmp/bad\\xffname.txt");
1684        assert_eq!(path_to_json_string(path), "/tmp/bad\\\\xffname.txt");
1685    }
1686
1687    #[test]
1688    fn path_to_json_string_no_collision() {
1689        use std::ffi::OsStr;
1690        use std::os::unix::ffi::OsStrExt;
1691        // literal backslash-x-f-f in the filename
1692        let literal = std::path::Path::new("/tmp/bad\\xffname.txt");
1693        // actual 0xFF byte in the filename
1694        let raw = std::path::Path::new(OsStr::from_bytes(b"/tmp/bad\xffname.txt"));
1695        // these must produce different output
1696        assert_ne!(path_to_json_string(literal), path_to_json_string(raw));
1697    }
1698}