Skip to main content

btrfs_cli/rescue/
fix_device_size.rs

1use crate::{RunContext, Runnable, util::is_mounted};
2use anyhow::{Context, Result, bail};
3use btrfs_disk::{
4    items::{DeviceExtent, DeviceItem},
5    raw,
6    tree::{DiskKey, KeyType},
7};
8use btrfs_transaction::{
9    filesystem::Filesystem,
10    path::BtrfsPath,
11    search::{self, SearchIntent, next_leaf},
12    transaction::Transaction,
13};
14use clap::Parser;
15use std::{
16    fs::OpenOptions,
17    io::{Read, Seek, Write},
18    os::{fd::AsFd, unix::fs::FileTypeExt},
19    path::PathBuf,
20};
21
22/// Tree id constants used here.
23const CHUNK_TREE_OBJECTID: u64 = raw::BTRFS_CHUNK_TREE_OBJECTID as u64;
24const DEV_TREE_OBJECTID: u64 = raw::BTRFS_DEV_TREE_OBJECTID as u64;
25/// Special objectid that holds DEV_ITEM keys in the chunk tree.
26const DEV_ITEMS_OBJECTID: u64 = raw::BTRFS_DEV_ITEMS_OBJECTID as u64;
27
28/// Byte offset of `total_bytes` inside the on-disk `btrfs_dev_item`
29/// (after the leading u64 `devid`).
30const DEV_ITEM_TOTAL_BYTES_OFFSET: usize = 8;
31
32/// Re-align device and super block sizes
33///
34/// Recomputes each device's `total_bytes` from its physical size and
35/// the device extent layout, updates the corresponding `DEV_ITEM` in
36/// the chunk tree (and the embedded `dev_item` in the superblock),
37/// and rewrites the superblock's `total_bytes` to match the sum.
38///
39/// Cases handled:
40///
41/// - `dev_item.total_bytes` is misaligned to `sectorsize`: round it
42///   down.
43/// - `dev_item.total_bytes` is larger than the underlying block
44///   device or backing file: shrink it to the actual size, but only
45///   if no `DEV_EXTENT` covers or extends past that boundary
46///   (otherwise we'd lose data).
47///
48/// The device must not be mounted.
49#[derive(Parser, Debug)]
50pub struct RescueFixDeviceSizeCommand {
51    /// Path to the btrfs device
52    device: PathBuf,
53}
54
55/// One device's worth of state collected during the read pass.
56struct DeviceFix {
57    devid: u64,
58    old_total: u64,
59    new_total: u64,
60}
61
62/// Read the size in bytes of a block device or regular file.
63fn underlying_size(file: &std::fs::File) -> Result<u64> {
64    let meta = file.metadata().context("failed to stat device")?;
65    if meta.file_type().is_block_device() {
66        let size = btrfs_uapi::blkdev::device_size(file.as_fd())
67            .context("BLKGETSIZE64 failed")?;
68        Ok(size)
69    } else {
70        Ok(meta.len())
71    }
72}
73
74/// Find the largest `(offset + length)` of any DEV_EXTENT belonging
75/// to `devid` in the dev tree, or 0 if there are no extents.
76fn last_dev_extent_end<R: Read + Write + Seek>(
77    fs: &mut Filesystem<R>,
78    devid: u64,
79) -> Result<u64> {
80    let start = DiskKey {
81        objectid: devid,
82        key_type: KeyType::DeviceExtent,
83        offset: 0,
84    };
85    let mut path = BtrfsPath::new();
86    let _ = search::search_slot(
87        None,
88        fs,
89        DEV_TREE_OBJECTID,
90        &start,
91        &mut path,
92        SearchIntent::ReadOnly,
93        false,
94    )
95    .context("failed to search dev tree")?;
96
97    let mut max_end: u64 = 0;
98    'outer: loop {
99        {
100            let Some(leaf) = path.nodes[0].as_ref() else {
101                break;
102            };
103            let nritems = leaf.nritems() as usize;
104            while path.slots[0] < nritems {
105                let key = leaf.item_key(path.slots[0]);
106                if key.objectid != devid
107                    || key.key_type != KeyType::DeviceExtent
108                {
109                    break 'outer;
110                }
111                let dext = DeviceExtent::parse(leaf.item_data(path.slots[0]))
112                    .ok_or_else(|| {
113                    anyhow::anyhow!(
114                        "failed to parse DEV_EXTENT at devid {devid} offset {}",
115                        key.offset
116                    )
117                })?;
118                let end = key.offset.saturating_add(dext.length);
119                if end > max_end {
120                    max_end = end;
121                }
122                path.slots[0] += 1;
123            }
124        }
125        if !next_leaf(fs, &mut path).context("next_leaf failed")? {
126            break;
127        }
128    }
129    Ok(max_end)
130}
131
132/// Read pass: collect every DEV_ITEM in the chunk tree, decide
133/// whether each needs fixing.
134fn collect_device_fixes<R: Read + Write + Seek>(
135    fs: &mut Filesystem<R>,
136    actual_size: u64,
137    sectorsize: u64,
138) -> Result<Vec<DeviceFix>> {
139    let start = DiskKey {
140        objectid: DEV_ITEMS_OBJECTID,
141        key_type: KeyType::DeviceItem,
142        offset: 0,
143    };
144    let mut path = BtrfsPath::new();
145    let _ = search::search_slot(
146        None,
147        fs,
148        CHUNK_TREE_OBJECTID,
149        &start,
150        &mut path,
151        SearchIntent::ReadOnly,
152        false,
153    )
154    .context("failed to search chunk tree for DEV_ITEMs")?;
155
156    let mut raw_items: Vec<(u64, u64)> = Vec::new();
157    'outer: loop {
158        {
159            let Some(leaf) = path.nodes[0].as_ref() else {
160                break;
161            };
162            let nritems = leaf.nritems() as usize;
163            while path.slots[0] < nritems {
164                let key = leaf.item_key(path.slots[0]);
165                if key.objectid != DEV_ITEMS_OBJECTID {
166                    break 'outer;
167                }
168                if key.key_type == KeyType::DeviceItem {
169                    let di = DeviceItem::parse(leaf.item_data(path.slots[0]))
170                        .ok_or_else(|| {
171                        anyhow::anyhow!(
172                            "failed to parse DEV_ITEM for devid {}",
173                            key.offset
174                        )
175                    })?;
176                    raw_items.push((di.devid, di.total_bytes));
177                }
178                path.slots[0] += 1;
179            }
180        }
181        if !next_leaf(fs, &mut path).context("next_leaf failed")? {
182            break;
183        }
184    }
185    path.release();
186
187    let mut out = Vec::new();
188    for (devid, old_total) in raw_items {
189        let mut new_total = old_total;
190
191        if new_total % sectorsize != 0 {
192            new_total -= new_total % sectorsize;
193        }
194
195        if new_total > actual_size {
196            let extent_end = last_dev_extent_end(fs, devid)?;
197            if extent_end > actual_size {
198                bail!(
199                    "devid {devid}: cannot shrink total_bytes from {old_total} \
200                     to {actual_size}: a DEV_EXTENT covers up to {extent_end}, \
201                     which is past the actual device size",
202                );
203            }
204            new_total = actual_size - (actual_size % sectorsize);
205        }
206
207        if new_total != old_total {
208            out.push(DeviceFix {
209                devid,
210                old_total,
211                new_total,
212            });
213        }
214    }
215
216    Ok(out)
217}
218
219/// Sum the (corrected) `total_bytes` of every device in the chunk
220/// tree. For devices listed in `fixes`, use the new value; for the
221/// rest, use the on-disk value as-is.
222fn sum_corrected_total_bytes<R: Read + Write + Seek>(
223    fs: &mut Filesystem<R>,
224    fixes: &[DeviceFix],
225) -> Result<u64> {
226    let start = DiskKey {
227        objectid: DEV_ITEMS_OBJECTID,
228        key_type: KeyType::DeviceItem,
229        offset: 0,
230    };
231    let mut path = BtrfsPath::new();
232    let _ = search::search_slot(
233        None,
234        fs,
235        CHUNK_TREE_OBJECTID,
236        &start,
237        &mut path,
238        SearchIntent::ReadOnly,
239        false,
240    )?;
241
242    let mut total: u64 = 0;
243    'outer: loop {
244        {
245            let Some(leaf) = path.nodes[0].as_ref() else {
246                break;
247            };
248            let nritems = leaf.nritems() as usize;
249            while path.slots[0] < nritems {
250                let key = leaf.item_key(path.slots[0]);
251                if key.objectid != DEV_ITEMS_OBJECTID {
252                    break 'outer;
253                }
254                if key.key_type == KeyType::DeviceItem {
255                    let di = DeviceItem::parse(leaf.item_data(path.slots[0]))
256                        .ok_or_else(|| {
257                        anyhow::anyhow!(
258                            "failed to parse DEV_ITEM for devid {}",
259                            key.offset
260                        )
261                    })?;
262                    let value = fixes
263                        .iter()
264                        .find(|f| f.devid == di.devid)
265                        .map_or(di.total_bytes, |f| f.new_total);
266                    total = total.saturating_add(value);
267                }
268                path.slots[0] += 1;
269            }
270        }
271        if !next_leaf(fs, &mut path)? {
272            break;
273        }
274    }
275    path.release();
276    Ok(total)
277}
278
279impl Runnable for RescueFixDeviceSizeCommand {
280    fn run(&self, _ctx: &RunContext) -> Result<()> {
281        if is_mounted(&self.device) {
282            bail!("{} is currently mounted", self.device.display());
283        }
284
285        let file = OpenOptions::new()
286            .read(true)
287            .write(true)
288            .open(&self.device)
289            .with_context(|| {
290                format!("failed to open '{}'", self.device.display())
291            })?;
292
293        let actual_size =
294            underlying_size(&file).context("failed to get device size")?;
295
296        let mut fs = Filesystem::open(file).with_context(|| {
297            format!("failed to open filesystem on '{}'", self.device.display())
298        })?;
299
300        let sectorsize = u64::from(fs.superblock.sectorsize);
301        let actual_aligned = actual_size - (actual_size % sectorsize);
302
303        let fixes = collect_device_fixes(&mut fs, actual_aligned, sectorsize)?;
304
305        let new_super_total = sum_corrected_total_bytes(&mut fs, &fixes)?;
306        let old_super_total = fs.superblock.total_bytes;
307
308        if fixes.is_empty() && new_super_total == old_super_total {
309            println!("no device size related problem found");
310            return Ok(());
311        }
312
313        let mut trans = Transaction::start(&mut fs)
314            .context("failed to start transaction")?;
315
316        // Apply each per-device fix.
317        for fix in &fixes {
318            let key = DiskKey {
319                objectid: DEV_ITEMS_OBJECTID,
320                key_type: KeyType::DeviceItem,
321                offset: fix.devid,
322            };
323            let mut path = BtrfsPath::new();
324            let found = search::search_slot(
325                Some(&mut trans),
326                &mut fs,
327                CHUNK_TREE_OBJECTID,
328                &key,
329                &mut path,
330                SearchIntent::ReadOnly,
331                true,
332            )
333            .with_context(|| {
334                format!("failed to search DEV_ITEM for devid {}", fix.devid)
335            })?;
336            if !found {
337                bail!("DEV_ITEM for devid {} disappeared", fix.devid);
338            }
339            {
340                let leaf = path.nodes[0].as_mut().unwrap();
341                let data = leaf.item_data_mut(path.slots[0]);
342                let off = DEV_ITEM_TOTAL_BYTES_OFFSET;
343                data[off..off + 8]
344                    .copy_from_slice(&fix.new_total.to_le_bytes());
345                fs.mark_dirty(leaf);
346            }
347            path.release();
348
349            // Mirror the change into the superblock's embedded
350            // dev_item if this devid matches.
351            if fs.superblock.dev_item.devid == fix.devid {
352                fs.superblock.dev_item.total_bytes = fix.new_total;
353            }
354            println!(
355                "devid {}: total_bytes {} -> {}",
356                fix.devid, fix.old_total, fix.new_total
357            );
358        }
359
360        if new_super_total != old_super_total {
361            fs.superblock.total_bytes = new_super_total;
362            println!(
363                "superblock total_bytes {old_super_total} -> {new_super_total}"
364            );
365        }
366
367        trans
368            .commit(&mut fs)
369            .context("failed to commit transaction")?;
370        fs.sync().context("failed to sync to disk")?;
371
372        println!("device size fix-up complete");
373        Ok(())
374    }
375}