Skip to main content

btrfs_cli/filesystem/
du.rs

1//! Implementation of `btrfs filesystem du`.
2//!
3//! Uses [`btrfs_uapi::fiemap::file_extents`] to query the physical extent
4//! layout of each file.  For each file we report:
5//!
6//! * **Total** — sum of all non-skipped extent lengths
7//! * **Exclusive** — bytes not marked `FIEMAP_EXTENT_SHARED`
8//! * **Set shared** — at the top-level only: physical bytes shared with at
9//!   least one other file in the same argument's subtree, computed by
10//!   collecting all shared physical ranges, sorting, and merging overlaps.
11//!   Always `-` for non-top-level lines.
12//!
13//! The output format and semantics match `btrfs-progs filesystem du`.
14
15use super::UnitMode;
16use crate::{Format, Runnable, util::human_bytes};
17use anyhow::{Context, Result};
18use btrfs_uapi::fiemap::file_extents;
19use clap::Parser;
20use std::{
21    collections::HashSet,
22    fs::{self, File},
23    os::unix::{fs::MetadataExt, io::AsFd},
24    path::{Path, PathBuf},
25};
26
27/// Summarize disk usage of each file, showing shared extents
28///
29/// For each path, prints three columns:
30///
31///   Total      — logical bytes used by non-inline extents
32///   Exclusive  — bytes not shared with any other file
33///   Set shared — (top-level only) physical bytes shared within this subtree
34#[derive(Parser, Debug)]
35pub struct FilesystemDuCommand {
36    /// Display only a total for each argument, not per-file lines
37    #[clap(long, short)]
38    pub summarize: bool,
39
40    #[clap(flatten)]
41    pub units: UnitMode,
42
43    /// One or more files or directories to summarize
44    #[clap(required = true)]
45    pub paths: Vec<PathBuf>,
46}
47
48impl Runnable for FilesystemDuCommand {
49    fn run(&self, _format: Format, _dry_run: bool) -> Result<()> {
50        println!(
51            "{:>10}  {:>10}  {:>10}  {}",
52            "Total", "Exclusive", "Set shared", "Filename"
53        );
54
55        for path in &self.paths {
56            process_top_level(path, self.summarize).with_context(|| {
57                format!("cannot check space of '{}'", path.display())
58            })?;
59        }
60        Ok(())
61    }
62}
63
64fn process_top_level(path: &Path, summarize: bool) -> Result<()> {
65    let mut seen: HashSet<(u64, u64)> = HashSet::new();
66    // Physical (start, end_exclusive) ranges of all shared extents in this subtree.
67    let mut shared_ranges: Vec<(u64, u64)> = Vec::new();
68
69    let meta = fs::symlink_metadata(path)
70        .with_context(|| format!("cannot stat '{}'", path.display()))?;
71
72    let root_dev = meta.dev();
73
74    let total = if meta.is_file() {
75        let file = File::open(path)
76            .with_context(|| format!("cannot open '{}'", path.display()))?;
77        let info = file_extents(file.as_fd()).map_err(|e| {
78            anyhow::anyhow!("fiemap failed on '{}': {e}", path.display())
79        })?;
80        shared_ranges.extend_from_slice(&info.shared_extents);
81        info.total_bytes
82    } else if meta.is_dir() {
83        walk_dir(path, root_dev, &mut seen, &mut shared_ranges, summarize)?
84    } else {
85        0
86    };
87
88    let set_shared = compute_set_shared(&mut shared_ranges);
89    let exclusive = total.saturating_sub(set_shared);
90
91    println!(
92        "{:>10}  {:>10}  {:>10}  {}",
93        human_bytes(total),
94        human_bytes(exclusive),
95        human_bytes(set_shared),
96        path.display()
97    );
98
99    Ok(())
100}
101
102/// Walk `dir` recursively, printing one line per entry (unless `summarize`),
103/// and return the total bytes for the whole subtree.
104///
105/// `root_dev` is the device number of the top-level path.  Subdirectories on
106/// a different device (i.e. other mounted filesystems) are silently skipped so
107/// that the walk stays within a single filesystem, matching the behaviour of
108/// the C reference implementation.
109fn walk_dir(
110    dir: &Path,
111    root_dev: u64,
112    seen: &mut HashSet<(u64, u64)>,
113    shared_ranges: &mut Vec<(u64, u64)>,
114    summarize: bool,
115) -> Result<u64> {
116    let mut dir_total: u64 = 0;
117
118    let entries = fs::read_dir(dir).with_context(|| {
119        format!("cannot read directory '{}'", dir.display())
120    })?;
121
122    for entry in entries {
123        let entry = entry.with_context(|| {
124            format!("error reading entry in '{}'", dir.display())
125        })?;
126        let entry_path = entry.path();
127
128        let meta = match fs::symlink_metadata(&entry_path) {
129            Ok(m) => m,
130            Err(e) => {
131                eprintln!(
132                    "warning: cannot stat '{}': {e}",
133                    entry_path.display()
134                );
135                continue;
136            }
137        };
138
139        if !meta.is_file() && !meta.is_dir() {
140            continue;
141        }
142
143        // Don't cross mount boundaries.
144        if meta.dev() != root_dev {
145            continue;
146        }
147
148        let key = (meta.dev(), meta.ino());
149        if !seen.insert(key) {
150            continue; // hard-linked inode already counted
151        }
152
153        if meta.is_file() {
154            let file = match File::open(&entry_path) {
155                Ok(f) => f,
156                Err(e) => {
157                    eprintln!(
158                        "warning: cannot open '{}': {e}",
159                        entry_path.display()
160                    );
161                    continue;
162                }
163            };
164
165            let info = match file_extents(file.as_fd()) {
166                Ok(v) => v,
167                Err(e) => {
168                    eprintln!(
169                        "warning: fiemap failed on '{}': {e}",
170                        entry_path.display()
171                    );
172                    continue;
173                }
174            };
175
176            if !summarize {
177                let excl = info.total_bytes.saturating_sub(info.shared_bytes);
178                println!(
179                    "{:>10}  {:>10}  {:>10}  {}",
180                    human_bytes(info.total_bytes),
181                    human_bytes(excl),
182                    "-",
183                    entry_path.display()
184                );
185            }
186
187            shared_ranges.extend_from_slice(&info.shared_extents);
188            dir_total += info.total_bytes;
189        } else {
190            let sub_total = walk_dir(
191                &entry_path,
192                root_dev,
193                seen,
194                shared_ranges,
195                summarize,
196            )?;
197
198            if !summarize {
199                // For non-top-level directories, set shared is shown as "-".
200                // The set-shared total is only computed at the top level.
201                println!(
202                    "{:>10}  {:>10}  {:>10}  {}",
203                    human_bytes(sub_total),
204                    "-",
205                    "-",
206                    entry_path.display()
207                );
208            }
209
210            dir_total += sub_total;
211        }
212    }
213
214    Ok(dir_total)
215}
216
217/// Merge `ranges` in place and return the total bytes covered by the union.
218///
219/// This gives the "set shared" value: physical bytes referenced by at least
220/// one `FIEMAP_EXTENT_SHARED` extent anywhere in the subtree.
221fn compute_set_shared(ranges: &mut Vec<(u64, u64)>) -> u64 {
222    if ranges.is_empty() {
223        return 0;
224    }
225    ranges.sort_unstable();
226
227    let mut total = 0u64;
228    let (mut cur_start, mut cur_end) = ranges[0];
229
230    for &(start, end) in &ranges[1..] {
231        if start <= cur_end {
232            if end > cur_end {
233                cur_end = end;
234            }
235        } else {
236            total += cur_end - cur_start;
237            cur_start = start;
238            cur_end = end;
239        }
240    }
241    total += cur_end - cur_start;
242    total
243}