Skip to main content

btrfs_uapi/
replace.rs

1//! # Device replacement: replacing a device with another while the filesystem is online
2//!
3//! A replace operation copies all data from a source device to a target device,
4//! then swaps the target into the filesystem in place of the source. The
5//! filesystem remains mounted and usable throughout.
6//!
7//! Requires `CAP_SYS_ADMIN`.
8
9use crate::raw::{
10    BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL, BTRFS_IOCTL_DEV_REPLACE_CMD_START,
11    BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS,
12    BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS,
13    BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID,
14    BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED,
15    BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR,
16    BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED,
17    BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS,
18    BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED,
19    BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED,
20    BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED,
21    BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED,
22    BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED, btrfs_ioc_dev_replace,
23    btrfs_ioctl_dev_replace_args,
24};
25use nix::errno::Errno;
26use std::{
27    ffi::CStr,
28    mem,
29    os::{fd::AsRawFd, unix::io::BorrowedFd},
30    time::{Duration, SystemTime, UNIX_EPOCH},
31};
32
33/// Current state of a device replace operation.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum ReplaceState {
36    /// No replace operation has ever been started on this filesystem.
37    NeverStarted,
38    /// A replace operation is currently running.
39    Started,
40    /// The replace operation completed successfully.
41    Finished,
42    /// The replace operation was canceled before completion.
43    Canceled,
44    /// The replace operation was suspended (e.g. by unmount) and can be resumed.
45    Suspended,
46}
47
48impl ReplaceState {
49    fn from_raw(val: u64) -> Option<ReplaceState> {
50        match val {
51            x if x
52                == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED) =>
53            {
54                Some(ReplaceState::NeverStarted)
55            }
56            x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) => {
57                Some(ReplaceState::Started)
58            }
59            x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED) => {
60                Some(ReplaceState::Finished)
61            }
62            x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED) => {
63                Some(ReplaceState::Canceled)
64            }
65            x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED) => {
66                Some(ReplaceState::Suspended)
67            }
68            _ => None,
69        }
70    }
71}
72
73/// Status of a device replace operation, as returned by the status query.
74#[derive(Debug, Clone)]
75pub struct ReplaceStatus {
76    /// Current state of the replace operation.
77    pub state: ReplaceState,
78    /// Progress in tenths of a percent (0..=1000).
79    pub progress_1000: u64,
80    /// Time the replace operation was started.
81    pub time_started: Option<SystemTime>,
82    /// Time the replace operation stopped (finished, canceled, or suspended).
83    pub time_stopped: Option<SystemTime>,
84    /// Number of write errors encountered during the replace.
85    pub num_write_errors: u64,
86    /// Number of uncorrectable read errors encountered during the replace.
87    pub num_uncorrectable_read_errors: u64,
88}
89
90fn epoch_to_systemtime(secs: u64) -> Option<SystemTime> {
91    if secs == 0 {
92        None
93    } else {
94        Some(UNIX_EPOCH + Duration::from_secs(secs))
95    }
96}
97
98/// How to identify the source device for a replace operation.
99pub enum ReplaceSource<'a> {
100    /// Source device identified by its btrfs device ID.
101    DevId(u64),
102    /// Source device identified by its block device path.
103    Path(&'a CStr),
104}
105
106/// Query the status of a device replace operation on the filesystem referred
107/// to by `fd`.
108///
109/// # Errors
110///
111/// Returns `Err` if the ioctl fails.
112pub fn replace_status(fd: BorrowedFd) -> nix::Result<ReplaceStatus> {
113    let mut args: btrfs_ioctl_dev_replace_args = unsafe { mem::zeroed() };
114    args.cmd = u64::from(BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS);
115
116    unsafe { btrfs_ioc_dev_replace(fd.as_raw_fd(), &raw mut args) }?;
117
118    // SAFETY: we issued CMD_STATUS so the status union member is active.
119    let status = unsafe { &args.__bindgen_anon_1.status };
120    let state =
121        ReplaceState::from_raw(status.replace_state).ok_or(Errno::EINVAL)?;
122
123    Ok(ReplaceStatus {
124        state,
125        progress_1000: status.progress_1000,
126        time_started: epoch_to_systemtime(status.time_started),
127        time_stopped: epoch_to_systemtime(status.time_stopped),
128        num_write_errors: status.num_write_errors,
129        num_uncorrectable_read_errors: status.num_uncorrectable_read_errors,
130    })
131}
132
133/// Result of a replace start attempt that the kernel rejected at the
134/// application level (ioctl succeeded but the `result` field indicates a
135/// problem).
136#[derive(Debug, Clone, Copy, PartialEq, Eq)]
137pub enum ReplaceStartError {
138    /// A replace operation is already in progress.
139    AlreadyStarted,
140    /// A scrub is in progress and must finish before replace can start.
141    ScrubInProgress,
142}
143
144impl std::fmt::Display for ReplaceStartError {
145    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146        match self {
147            ReplaceStartError::AlreadyStarted => {
148                write!(f, "a device replace operation is already in progress")
149            }
150            ReplaceStartError::ScrubInProgress => {
151                write!(f, "a scrub is in progress; cancel it first")
152            }
153        }
154    }
155}
156
157impl std::error::Error for ReplaceStartError {}
158
159/// Start a device replace operation, copying all data from `source` to the
160/// target device at `tgtdev_path`.
161///
162/// When `avoid_srcdev` is true, the kernel will only read from the source
163/// device when no other zero-defect mirror is available (useful for replacing
164/// a device with known read errors).
165///
166/// Returns a two-level Result: the outer `nix::Result` covers ioctl-level
167/// failures (EPERM, EINVAL, etc.), while the inner `Result` covers
168/// application-level rejections reported by the kernel in the `result` field.
169/// `Ok(Ok(()))` means the replace started successfully.
170/// `Ok(Err(AlreadyStarted))` means another replace is in progress.
171/// `Ok(Err(ScrubInProgress))` means a scrub must finish or be cancelled first.
172///
173/// # Errors
174///
175/// Returns `Err` if the ioctl fails. `ENAMETOOLONG` if device paths exceed
176/// the kernel buffer size.
177pub fn replace_start(
178    fd: BorrowedFd,
179    source: &ReplaceSource<'_>,
180    tgtdev_path: &CStr,
181    avoid_srcdev: bool,
182) -> nix::Result<Result<(), ReplaceStartError>> {
183    let mut args: btrfs_ioctl_dev_replace_args = unsafe { mem::zeroed() };
184    args.cmd = u64::from(BTRFS_IOCTL_DEV_REPLACE_CMD_START);
185
186    // SAFETY: we are filling in the start union member before issuing CMD_START.
187    let start = unsafe { &mut args.__bindgen_anon_1.start };
188
189    match *source {
190        ReplaceSource::DevId(devid) => {
191            start.srcdevid = devid;
192        }
193        ReplaceSource::Path(path) => {
194            start.srcdevid = 0;
195            let bytes = path.to_bytes();
196            if bytes.len() >= start.srcdev_name.len() {
197                return Err(Errno::ENAMETOOLONG);
198            }
199            start.srcdev_name[..bytes.len()].copy_from_slice(bytes);
200        }
201    }
202
203    let tgt_bytes = tgtdev_path.to_bytes();
204    if tgt_bytes.len() >= start.tgtdev_name.len() {
205        return Err(Errno::ENAMETOOLONG);
206    }
207    start.tgtdev_name[..tgt_bytes.len()].copy_from_slice(tgt_bytes);
208
209    start.cont_reading_from_srcdev_mode = if avoid_srcdev {
210        u64::from(BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID)
211    } else {
212        u64::from(BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS)
213    };
214
215    unsafe { btrfs_ioc_dev_replace(fd.as_raw_fd(), &raw mut args) }?;
216
217    match args.result {
218        x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) => {
219            Ok(Ok(()))
220        }
221        x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED) => {
222            Ok(Err(ReplaceStartError::AlreadyStarted))
223        }
224        x if x
225            == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS) =>
226        {
227            Ok(Err(ReplaceStartError::ScrubInProgress))
228        }
229        _ => Err(Errno::EINVAL),
230    }
231}
232
233/// Cancel a running device replace operation on the filesystem referred to
234/// by `fd`.
235///
236/// Returns `Ok(true)` if the replace was successfully cancelled, or
237/// `Ok(false)` if no replace operation was in progress.
238///
239/// # Errors
240///
241/// Returns `Err` if the ioctl fails.
242pub fn replace_cancel(fd: BorrowedFd) -> nix::Result<bool> {
243    let mut args: btrfs_ioctl_dev_replace_args = unsafe { mem::zeroed() };
244    args.cmd = u64::from(BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL);
245
246    unsafe { btrfs_ioc_dev_replace(fd.as_raw_fd(), &raw mut args) }?;
247
248    match args.result {
249        x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) => {
250            Ok(true)
251        }
252        x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED) => {
253            Ok(false)
254        }
255        _ => Err(Errno::EINVAL),
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    // --- epoch_to_systemtime ---
264
265    #[test]
266    fn epoch_zero_is_none() {
267        assert!(epoch_to_systemtime(0).is_none());
268    }
269
270    #[test]
271    fn epoch_nonzero_is_some() {
272        let t = epoch_to_systemtime(1700000000).unwrap();
273        assert_eq!(t, UNIX_EPOCH + Duration::from_secs(1700000000));
274    }
275
276    // --- ReplaceState::from_raw ---
277
278    #[test]
279    fn replace_state_from_raw_all_variants() {
280        assert!(matches!(
281            ReplaceState::from_raw(
282                BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED as u64
283            ),
284            Some(ReplaceState::NeverStarted)
285        ));
286        assert!(matches!(
287            ReplaceState::from_raw(
288                BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED as u64
289            ),
290            Some(ReplaceState::Started)
291        ));
292        assert!(matches!(
293            ReplaceState::from_raw(
294                BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED as u64
295            ),
296            Some(ReplaceState::Finished)
297        ));
298        assert!(matches!(
299            ReplaceState::from_raw(
300                BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED as u64
301            ),
302            Some(ReplaceState::Canceled)
303        ));
304        assert!(matches!(
305            ReplaceState::from_raw(
306                BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED as u64
307            ),
308            Some(ReplaceState::Suspended)
309        ));
310    }
311
312    #[test]
313    fn replace_state_from_raw_unknown() {
314        assert!(ReplaceState::from_raw(9999).is_none());
315    }
316
317    // --- ReplaceStartError Display ---
318
319    #[test]
320    fn replace_start_error_display() {
321        assert_eq!(
322            format!("{}", ReplaceStartError::AlreadyStarted),
323            "a device replace operation is already in progress"
324        );
325        assert_eq!(
326            format!("{}", ReplaceStartError::ScrubInProgress),
327            "a scrub is in progress; cancel it first"
328        );
329    }
330}