Skip to main content

btrfs_cli/filesystem/
du.rs

1//! Implementation of `btrfs filesystem du`.
2//!
3//! Uses [`btrfs_uapi::fiemap::file_extents`] to query the physical extent
4//! layout of each file.  For each file we report:
5//!
6//! * **Total** — sum of all non-skipped extent lengths
7//! * **Exclusive** — bytes not marked `FIEMAP_EXTENT_SHARED`
8//! * **Set shared** — at the top-level only: physical bytes shared with at
9//!   least one other file in the same argument's subtree, computed by
10//!   collecting all shared physical ranges, sorting, and merging overlaps.
11//!   Always `-` for non-top-level lines.
12//!
13//! The output format and semantics match `btrfs-progs filesystem du`.
14
15use super::UnitMode;
16use crate::{
17    Format, Runnable,
18    util::{SizeFormat, fmt_size},
19};
20use anyhow::{Context, Result};
21use btrfs_uapi::fiemap::file_extents;
22use clap::Parser;
23use std::{
24    collections::HashSet,
25    fs::{self, File},
26    os::unix::{fs::MetadataExt, io::AsFd},
27    path::{Path, PathBuf},
28};
29
30/// Summarize disk usage of each file, showing shared extents
31///
32/// For each path, prints three columns:
33///
34///   Total      — logical bytes used by non-inline extents
35///   Exclusive  — bytes not shared with any other file
36///   Set shared — (top-level only) physical bytes shared within this subtree
37#[derive(Parser, Debug)]
38pub struct FilesystemDuCommand {
39    /// Display only a total for each argument, not per-file lines
40    #[clap(long, short)]
41    pub summarize: bool,
42
43    #[clap(flatten)]
44    pub units: UnitMode,
45
46    /// One or more files or directories to summarize
47    #[clap(required = true)]
48    pub paths: Vec<PathBuf>,
49}
50
51impl Runnable for FilesystemDuCommand {
52    fn run(&self, _format: Format, _dry_run: bool) -> Result<()> {
53        let mode = self.units.resolve();
54        println!(
55            "{:>10}  {:>10}  {:>10}  Filename",
56            "Total", "Exclusive", "Set shared"
57        );
58
59        for path in &self.paths {
60            process_top_level(path, self.summarize, &mode).with_context(
61                || format!("cannot check space of '{}'", path.display()),
62            )?;
63        }
64        Ok(())
65    }
66}
67
68fn process_top_level(
69    path: &Path,
70    summarize: bool,
71    mode: &SizeFormat,
72) -> Result<()> {
73    let mut seen: HashSet<(u64, u64)> = HashSet::new();
74    // Physical (start, end_exclusive) ranges of all shared extents in this subtree.
75    let mut shared_ranges: Vec<(u64, u64)> = Vec::new();
76
77    let meta = fs::symlink_metadata(path)
78        .with_context(|| format!("cannot stat '{}'", path.display()))?;
79
80    let root_dev = meta.dev();
81
82    let total = if meta.is_file() {
83        let file = File::open(path)
84            .with_context(|| format!("cannot open '{}'", path.display()))?;
85        let info = file_extents(file.as_fd()).map_err(|e| {
86            anyhow::anyhow!("fiemap failed on '{}': {e}", path.display())
87        })?;
88        shared_ranges.extend_from_slice(&info.shared_extents);
89        info.total_bytes
90    } else if meta.is_dir() {
91        walk_dir(
92            path,
93            root_dev,
94            &mut seen,
95            &mut shared_ranges,
96            summarize,
97            mode,
98        )?
99    } else {
100        0
101    };
102
103    let set_shared = compute_set_shared(&mut shared_ranges);
104    let exclusive = total.saturating_sub(set_shared);
105
106    println!(
107        "{:>10}  {:>10}  {:>10}  {}",
108        fmt_size(total, mode),
109        fmt_size(exclusive, mode),
110        fmt_size(set_shared, mode),
111        path.display()
112    );
113
114    Ok(())
115}
116
117/// Walk `dir` recursively, printing one line per entry (unless `summarize`),
118/// and return the total bytes for the whole subtree.
119///
120/// `root_dev` is the device number of the top-level path.  Subdirectories on
121/// a different device (i.e. other mounted filesystems) are silently skipped so
122/// that the walk stays within a single filesystem, matching the behaviour of
123/// the C reference implementation.
124fn walk_dir(
125    dir: &Path,
126    root_dev: u64,
127    seen: &mut HashSet<(u64, u64)>,
128    shared_ranges: &mut Vec<(u64, u64)>,
129    summarize: bool,
130    mode: &SizeFormat,
131) -> Result<u64> {
132    let mut dir_total: u64 = 0;
133
134    let entries = fs::read_dir(dir).with_context(|| {
135        format!("cannot read directory '{}'", dir.display())
136    })?;
137
138    for entry in entries {
139        let entry = entry.with_context(|| {
140            format!("error reading entry in '{}'", dir.display())
141        })?;
142        let entry_path = entry.path();
143
144        let meta = match fs::symlink_metadata(&entry_path) {
145            Ok(m) => m,
146            Err(e) => {
147                eprintln!(
148                    "warning: cannot stat '{}': {e}",
149                    entry_path.display()
150                );
151                continue;
152            }
153        };
154
155        if !meta.is_file() && !meta.is_dir() {
156            continue;
157        }
158
159        // Don't cross mount boundaries.
160        if meta.dev() != root_dev {
161            continue;
162        }
163
164        let key = (meta.dev(), meta.ino());
165        if !seen.insert(key) {
166            continue; // hard-linked inode already counted
167        }
168
169        if meta.is_file() {
170            let file = match File::open(&entry_path) {
171                Ok(f) => f,
172                Err(e) => {
173                    eprintln!(
174                        "warning: cannot open '{}': {e}",
175                        entry_path.display()
176                    );
177                    continue;
178                }
179            };
180
181            let info = match file_extents(file.as_fd()) {
182                Ok(v) => v,
183                Err(e) => {
184                    eprintln!(
185                        "warning: fiemap failed on '{}': {e}",
186                        entry_path.display()
187                    );
188                    continue;
189                }
190            };
191
192            if !summarize {
193                let excl = info.total_bytes.saturating_sub(info.shared_bytes);
194                println!(
195                    "{:>10}  {:>10}  {:>10}  {}",
196                    fmt_size(info.total_bytes, mode),
197                    fmt_size(excl, mode),
198                    "-",
199                    entry_path.display()
200                );
201            }
202
203            shared_ranges.extend_from_slice(&info.shared_extents);
204            dir_total += info.total_bytes;
205        } else {
206            let sub_total = walk_dir(
207                &entry_path,
208                root_dev,
209                seen,
210                shared_ranges,
211                summarize,
212                mode,
213            )?;
214
215            if !summarize {
216                // For non-top-level directories, set shared is shown as "-".
217                // The set-shared total is only computed at the top level.
218                println!(
219                    "{:>10}  {:>10}  {:>10}  {}",
220                    fmt_size(sub_total, mode),
221                    "-",
222                    "-",
223                    entry_path.display()
224                );
225            }
226
227            dir_total += sub_total;
228        }
229    }
230
231    Ok(dir_total)
232}
233
234/// Merge `ranges` in place and return the total bytes covered by the union.
235///
236/// This gives the "set shared" value: physical bytes referenced by at least
237/// one `FIEMAP_EXTENT_SHARED` extent anywhere in the subtree.
238fn compute_set_shared(ranges: &mut [(u64, u64)]) -> u64 {
239    if ranges.is_empty() {
240        return 0;
241    }
242    ranges.sort_unstable();
243
244    let mut total = 0u64;
245    let (mut cur_start, mut cur_end) = ranges[0];
246
247    for &(start, end) in &ranges[1..] {
248        if start <= cur_end {
249            if end > cur_end {
250                cur_end = end;
251            }
252        } else {
253            total += cur_end - cur_start;
254            cur_start = start;
255            cur_end = end;
256        }
257    }
258    total += cur_end - cur_start;
259    total
260}