Skip to main content

common/
delete.rs

1//! rsync-style `--delete` (mirror) support: remove destination entries that
2//! have no counterpart in the source directory.
3
4use crate::copy::DeleteSettings;
5use crate::progress;
6
7/// Remove entries in `dst` whose names are not in `keep` (the source entry
8/// names that passed the filter for this directory).
9///
10/// `relative_dir` is this directory's path relative to the source root, used to
11/// match destination entries against `filter` for exclude-protection. Excluded
12/// destination entries are protected (kept) unless `delete_settings.delete_excluded`
13/// is set. Honors `dry_run` (reports without removing, via [`crate::rm::rm`]).
14#[allow(clippy::too_many_arguments)]
15pub async fn prune_extraneous(
16    prog_track: &'static progress::Progress,
17    dst: &std::path::Path,
18    relative_dir: &std::path::Path,
19    keep: &std::collections::HashSet<std::ffi::OsString>,
20    filter: Option<&crate::filter::FilterSettings>,
21    delete_settings: &DeleteSettings,
22    fail_early: bool,
23    dry_run: Option<crate::config::DryRunMode>,
24) -> Result<crate::rm::Summary, crate::rm::Error> {
25    let mut summary = crate::rm::Summary::default();
26    // Destination root: `dst` with this directory's source-relative path stripped. Removed
27    // descendants are matched against the filter relative to this root, so their full (mirror)
28    // relative paths are used — making path/anchored excludes like `cache/*.log` protect
29    // descendants correctly, not just simple basename patterns.
30    let mut dest_root = dst;
31    for _ in relative_dir.components() {
32        dest_root = dest_root.parent().unwrap_or(dest_root);
33    }
34    // In --dry-run the create-or-overwrite step is skipped, so `dst` may still be a file,
35    // symlink, or even a symlink-to-directory at this point. `read_dir` follows symlinks, so
36    // without a `symlink_metadata` pre-check it would walk the symlink's target and preview
37    // deletions OUTSIDE the destination tree. In a real run this can't happen — upstream
38    // create_dir/overwrite guarantees `dst` is a real directory by the time prune runs — so
39    // skip the extra stat there to keep the hot path cheap.
40    if dry_run.is_some() {
41        match tokio::fs::symlink_metadata(dst).await {
42            Ok(meta) if meta.file_type().is_dir() => { /* real directory: fall through */ }
43            Ok(_) => {
44                // not a real directory (file, symlink, special) — nothing to prune
45                return Ok(summary);
46            }
47            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
48                return Ok(summary);
49            }
50            Err(err) => {
51                return Err(crate::rm::Error::new(
52                    anyhow::Error::new(err)
53                        .context(format!("cannot stat destination {dst:?} for delete scan")),
54                    summary,
55                ));
56            }
57        }
58    }
59    let mut entries = match tokio::fs::read_dir(dst).await {
60        Ok(entries) => entries,
61        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
62            // destination directory absent (e.g. dry-run never created it): nothing to prune
63            return Ok(summary);
64        }
65        Err(err)
66            if err.kind() == std::io::ErrorKind::NotADirectory
67                || err.raw_os_error() == Some(20) =>
68        {
69            // destination counterpart is not a directory: e.g. `--dry-run --delete` where the real
70            // run would replace a file/symlink at this path with a directory before pruning, but
71            // dry-run skips that overwrite. There is nothing to prune. (In dry-run we already
72            // handled the non-dir case via symlink_metadata above; this arm remains as defense in
73            // depth against a race between that stat and the read_dir below.)
74            return Ok(summary);
75        }
76        Err(err) => {
77            return Err(crate::rm::Error::new(
78                anyhow::Error::new(err)
79                    .context(format!("cannot open destination {dst:?} for delete scan")),
80                summary,
81            ));
82        }
83    };
84    let errors = crate::error_collector::ErrorCollector::default();
85    loop {
86        let (entry, entry_file_type) = match crate::walk::next_entry_probed(
87            &mut entries,
88            congestion::Side::Destination,
89            || format!("failed scanning destination directory {dst:?} for deletion"),
90        )
91        .await
92        {
93            Ok(Some(value)) => value,
94            Ok(None) => break,
95            Err(err) => {
96                errors.push(err);
97                break;
98            }
99        };
100        let name = entry.file_name();
101        if keep.contains(&name) {
102            continue;
103        }
104        let is_dir = entry_file_type.as_ref().is_some_and(|ft| ft.is_dir());
105        // exclude-protection: keep destination entries the filter would exclude,
106        // unless --delete-excluded was requested.
107        if !delete_settings.delete_excluded
108            && let Some(filter) = filter
109        {
110            let entry_relative = relative_dir.join(&name);
111            if !matches!(
112                filter.should_include(&entry_relative, is_dir),
113                crate::filter::FilterResult::Included
114            ) {
115                tracing::debug!("protecting excluded destination entry {:?}", entry.path());
116                continue;
117            }
118        }
119        // Protect excluded descendants when removing an extraneous directory: rm::rm applies
120        // the filter recursively (skipping excluded entries), so an extra dir containing e.g.
121        // `*.log` files keeps them and survives non-empty — upholding the documented
122        // "excluded files are protected by default" guarantee (and matching rsync). With
123        // --delete-excluded we pass no filter so the whole subtree is removed. (rm matches
124        // relative to the entry being removed, so simple patterns protect by basename.)
125        let rm_settings = crate::rm::Settings {
126            fail_early,
127            filter: if delete_settings.delete_excluded {
128                None
129            } else {
130                filter.cloned()
131            },
132            time_filter: None,
133            dry_run,
134        };
135        match crate::rm::rm_with_filter_root(prog_track, &entry.path(), dest_root, &rm_settings)
136            .await
137        {
138            Ok(rm_summary) => {
139                summary = summary + rm_summary;
140            }
141            Err(err) => {
142                summary = summary + err.summary;
143                if fail_early {
144                    return Err(crate::rm::Error::new(err.source, summary));
145                }
146                errors.push(err.source);
147            }
148        }
149    }
150    if let Some(err) = errors.into_error() {
151        return Err(crate::rm::Error::new(err, summary));
152    }
153    Ok(summary)
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159    use std::collections::HashSet;
160    use tracing_test::traced_test;
161
162    static PROGRESS: std::sync::LazyLock<progress::Progress> =
163        std::sync::LazyLock::new(progress::Progress::new);
164
165    fn delete_settings(delete_excluded: bool) -> DeleteSettings {
166        DeleteSettings { delete_excluded }
167    }
168
169    #[tokio::test]
170    #[traced_test]
171    async fn removes_entries_not_in_keep_set() -> anyhow::Result<()> {
172        let tmp = tempfile::tempdir()?;
173        let dst = tmp.path().join("dst");
174        tokio::fs::create_dir(&dst).await?;
175        tokio::fs::write(dst.join("keep.txt"), b"x").await?;
176        tokio::fs::write(dst.join("extra.txt"), b"x").await?;
177        tokio::fs::create_dir(dst.join("extra_dir")).await?;
178        tokio::fs::write(dst.join("extra_dir").join("nested.txt"), b"x").await?;
179
180        let mut keep = HashSet::new();
181        keep.insert(std::ffi::OsString::from("keep.txt"));
182
183        let summary = prune_extraneous(
184            &PROGRESS,
185            &dst,
186            std::path::Path::new(""),
187            &keep,
188            None,
189            &delete_settings(false),
190            false,
191            None,
192        )
193        .await
194        .map_err(|e| e.source)?;
195
196        assert_eq!(summary.files_removed, 2); // extra.txt + extra_dir/nested.txt
197        assert_eq!(summary.directories_removed, 1); // extra_dir
198        assert!(dst.join("keep.txt").exists());
199        assert!(!dst.join("extra.txt").exists());
200        assert!(!dst.join("extra_dir").exists());
201        Ok(())
202    }
203
204    #[tokio::test]
205    #[traced_test]
206    async fn protects_excluded_entries_unless_delete_excluded() -> anyhow::Result<()> {
207        let tmp = tempfile::tempdir()?;
208        let dst = tmp.path().join("dst");
209        tokio::fs::create_dir(&dst).await?;
210        tokio::fs::write(dst.join("data.bin"), b"x").await?; // extra, not excluded
211        tokio::fs::write(dst.join("note.log"), b"x").await?; // extra, excluded by *.log
212
213        let mut filter = crate::filter::FilterSettings::new();
214        filter.add_exclude("*.log")?;
215        let keep = HashSet::new(); // both are extraneous
216
217        // default: *.log is protected, data.bin is removed
218        let summary = prune_extraneous(
219            &PROGRESS,
220            &dst,
221            std::path::Path::new(""),
222            &keep,
223            Some(&filter),
224            &delete_settings(false),
225            false,
226            None,
227        )
228        .await
229        .map_err(|e| e.source)?;
230        assert_eq!(summary.files_removed, 1);
231        assert!(!dst.join("data.bin").exists());
232        assert!(
233            dst.join("note.log").exists(),
234            "*.log must be protected by default"
235        );
236
237        // with delete_excluded: note.log is also removed
238        let summary = prune_extraneous(
239            &PROGRESS,
240            &dst,
241            std::path::Path::new(""),
242            &keep,
243            Some(&filter),
244            &delete_settings(true),
245            false,
246            None,
247        )
248        .await
249        .map_err(|e| e.source)?;
250        assert_eq!(summary.files_removed, 1);
251        assert!(!dst.join("note.log").exists());
252        Ok(())
253    }
254
255    #[tokio::test]
256    #[traced_test]
257    async fn protects_excluded_descendants_of_extraneous_dir() -> anyhow::Result<()> {
258        let tmp = tempfile::tempdir()?;
259        let dst = tmp.path().join("dst");
260        tokio::fs::create_dir(&dst).await?;
261        // an extraneous directory (no source counterpart) with an excluded and a normal file
262        tokio::fs::create_dir(dst.join("extra_dir")).await?;
263        tokio::fs::write(dst.join("extra_dir").join("keep.log"), b"x").await?; // excluded by *.log
264        tokio::fs::write(dst.join("extra_dir").join("gone.txt"), b"x").await?; // not excluded
265
266        let mut filter = crate::filter::FilterSettings::new();
267        filter.add_exclude("*.log")?;
268        let keep = HashSet::new(); // extra_dir is extraneous
269
270        // default --delete: the excluded descendant is protected, so the dir survives non-empty
271        let summary = prune_extraneous(
272            &PROGRESS,
273            &dst,
274            std::path::Path::new(""),
275            &keep,
276            Some(&filter),
277            &delete_settings(false),
278            false,
279            None,
280        )
281        .await
282        .map_err(|e| e.source)?;
283        assert_eq!(summary.files_removed, 1); // gone.txt
284        assert!(!dst.join("extra_dir").join("gone.txt").exists());
285        assert!(
286            dst.join("extra_dir").join("keep.log").exists(),
287            "excluded descendant of an extraneous dir must be protected"
288        );
289
290        // --delete-excluded: the whole extraneous directory is removed
291        let summary = prune_extraneous(
292            &PROGRESS,
293            &dst,
294            std::path::Path::new(""),
295            &keep,
296            Some(&filter),
297            &delete_settings(true),
298            false,
299            None,
300        )
301        .await
302        .map_err(|e| e.source)?;
303        assert_eq!(summary.files_removed, 1); // keep.log
304        assert!(!dst.join("extra_dir").exists());
305        Ok(())
306    }
307
308    #[tokio::test]
309    #[traced_test]
310    async fn protects_path_excluded_descendants_of_extraneous_dir() -> anyhow::Result<()> {
311        let tmp = tempfile::tempdir()?;
312        let dst = tmp.path().join("dst");
313        tokio::fs::create_dir(&dst).await?;
314        // an extraneous directory whose descendants are targeted by a PATH-based exclude
315        tokio::fs::create_dir(dst.join("cache")).await?;
316        tokio::fs::write(dst.join("cache").join("foo.log"), b"x").await?; // matches cache/*.log -> protected
317        tokio::fs::write(dst.join("cache").join("data.txt"), b"x").await?; // not matched -> removed
318
319        let mut filter = crate::filter::FilterSettings::new();
320        filter.add_exclude("cache/*.log")?;
321        let keep = HashSet::new();
322
323        let summary = prune_extraneous(
324            &PROGRESS,
325            &dst,
326            std::path::Path::new(""),
327            &keep,
328            Some(&filter),
329            &delete_settings(false),
330            false,
331            None,
332        )
333        .await
334        .map_err(|e| e.source)?;
335
336        assert_eq!(summary.files_removed, 1); // data.txt
337        assert!(!dst.join("cache").join("data.txt").exists());
338        assert!(
339            dst.join("cache").join("foo.log").exists(),
340            "path-based exclude must protect the descendant of an extraneous dir"
341        );
342        Ok(())
343    }
344
345    #[tokio::test]
346    #[traced_test]
347    async fn dry_run_does_not_follow_dst_symlink_to_directory() -> anyhow::Result<()> {
348        // In a real --delete run, the create-or-overwrite step replaces any non-directory
349        // destination (including a symlink) before prune runs. In --dry-run that overwrite
350        // is skipped, so prune_extraneous can be called with a dst that is still a symlink
351        // pointing to a directory. `tokio::fs::read_dir` follows symlinks, so without a
352        // pre-check it would walk the symlink's target and previews deletions OUTSIDE the
353        // destination tree.
354        let tmp = tempfile::tempdir()?;
355        let dst_parent = tmp.path().join("dst_parent");
356        let outside = tmp.path().join("outside"); // outside the destination tree
357        tokio::fs::create_dir(&dst_parent).await?;
358        tokio::fs::create_dir(&outside).await?;
359        tokio::fs::write(outside.join("precious.txt"), b"keep me").await?;
360        // dst is a symlink-to-directory living under the parent we'd prune.
361        let dst = dst_parent.join("link_dir");
362        std::os::unix::fs::symlink(&outside, &dst)?;
363
364        // keep set empty: anything `read_dir(dst)` returns would look extraneous.
365        let keep = HashSet::new();
366        let summary = prune_extraneous(
367            &PROGRESS,
368            &dst,
369            std::path::Path::new(""),
370            &keep,
371            None,
372            &delete_settings(false),
373            false,
374            Some(crate::config::DryRunMode::Brief),
375        )
376        .await
377        .map_err(|e| e.source)?;
378        assert_eq!(
379            summary.files_removed, 0,
380            "dry-run must not preview deletions reached by following a dst symlink"
381        );
382        assert_eq!(summary.directories_removed, 0);
383        // dry-run never deletes, but assert anyway as a belt-and-braces guard.
384        assert!(outside.join("precious.txt").exists());
385        Ok(())
386    }
387}