Skip to main content

btrfs_uapi/
replace.rs

1//! # Device replacement: replacing a device with another while the filesystem is online
2//!
3//! A replace operation copies all data from a source device to a target device,
4//! then swaps the target into the filesystem in place of the source. The
5//! filesystem remains mounted and usable throughout.
6//!
7//! Requires `CAP_SYS_ADMIN`.
8
9use crate::raw::{
10    BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL, BTRFS_IOCTL_DEV_REPLACE_CMD_START,
11    BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS,
12    BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS,
13    BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID,
14    BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED,
15    BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR,
16    BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED,
17    BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS,
18    BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED,
19    BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED,
20    BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED,
21    BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED,
22    BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED, btrfs_ioc_dev_replace,
23    btrfs_ioctl_dev_replace_args,
24};
25use nix::errno::Errno;
26use std::{
27    ffi::CStr,
28    mem,
29    os::{fd::AsRawFd, unix::io::BorrowedFd},
30    time::{Duration, SystemTime, UNIX_EPOCH},
31};
32
33/// Current state of a device replace operation.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum ReplaceState {
36    NeverStarted,
37    Started,
38    Finished,
39    Canceled,
40    Suspended,
41}
42
43impl ReplaceState {
44    fn from_raw(val: u64) -> Option<ReplaceState> {
45        match val {
46            x if x
47                == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED) =>
48            {
49                Some(ReplaceState::NeverStarted)
50            }
51            x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) => {
52                Some(ReplaceState::Started)
53            }
54            x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED) => {
55                Some(ReplaceState::Finished)
56            }
57            x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED) => {
58                Some(ReplaceState::Canceled)
59            }
60            x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED) => {
61                Some(ReplaceState::Suspended)
62            }
63            _ => None,
64        }
65    }
66}
67
68/// Status of a device replace operation, as returned by the status query.
69#[derive(Debug, Clone)]
70pub struct ReplaceStatus {
71    /// Current state of the replace operation.
72    pub state: ReplaceState,
73    /// Progress in tenths of a percent (0..=1000).
74    pub progress_1000: u64,
75    /// Time the replace operation was started.
76    pub time_started: Option<SystemTime>,
77    /// Time the replace operation stopped (finished, canceled, or suspended).
78    pub time_stopped: Option<SystemTime>,
79    /// Number of write errors encountered during the replace.
80    pub num_write_errors: u64,
81    /// Number of uncorrectable read errors encountered during the replace.
82    pub num_uncorrectable_read_errors: u64,
83}
84
85fn epoch_to_systemtime(secs: u64) -> Option<SystemTime> {
86    if secs == 0 {
87        None
88    } else {
89        Some(UNIX_EPOCH + Duration::from_secs(secs))
90    }
91}
92
93/// How to identify the source device for a replace operation.
94pub enum ReplaceSource<'a> {
95    /// Source device identified by its btrfs device ID.
96    DevId(u64),
97    /// Source device identified by its block device path.
98    Path(&'a CStr),
99}
100
101/// Query the status of a device replace operation on the filesystem referred
102/// to by `fd`.
103pub fn replace_status(fd: BorrowedFd) -> nix::Result<ReplaceStatus> {
104    let mut args: btrfs_ioctl_dev_replace_args = unsafe { mem::zeroed() };
105    args.cmd = u64::from(BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS);
106
107    unsafe { btrfs_ioc_dev_replace(fd.as_raw_fd(), &raw mut args) }?;
108
109    // SAFETY: we issued CMD_STATUS so the status union member is active.
110    let status = unsafe { &args.__bindgen_anon_1.status };
111    let state =
112        ReplaceState::from_raw(status.replace_state).ok_or(Errno::EINVAL)?;
113
114    Ok(ReplaceStatus {
115        state,
116        progress_1000: status.progress_1000,
117        time_started: epoch_to_systemtime(status.time_started),
118        time_stopped: epoch_to_systemtime(status.time_stopped),
119        num_write_errors: status.num_write_errors,
120        num_uncorrectable_read_errors: status.num_uncorrectable_read_errors,
121    })
122}
123
124/// Result of a replace start attempt that the kernel rejected at the
125/// application level (ioctl succeeded but the `result` field indicates a
126/// problem).
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub enum ReplaceStartError {
129    /// A replace operation is already in progress.
130    AlreadyStarted,
131    /// A scrub is in progress and must finish before replace can start.
132    ScrubInProgress,
133}
134
135impl std::fmt::Display for ReplaceStartError {
136    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137        match self {
138            ReplaceStartError::AlreadyStarted => {
139                write!(f, "a device replace operation is already in progress")
140            }
141            ReplaceStartError::ScrubInProgress => {
142                write!(f, "a scrub is in progress; cancel it first")
143            }
144        }
145    }
146}
147
148impl std::error::Error for ReplaceStartError {}
149
150/// Start a device replace operation, copying all data from `source` to the
151/// target device at `tgtdev_path`.
152///
153/// When `avoid_srcdev` is true, the kernel will only read from the source
154/// device when no other zero-defect mirror is available (useful for replacing
155/// a device with known read errors).
156///
157/// Returns a two-level Result: the outer `nix::Result` covers ioctl-level
158/// failures (EPERM, EINVAL, etc.), while the inner `Result` covers
159/// application-level rejections reported by the kernel in the `result` field.
160/// `Ok(Ok(()))` means the replace started successfully.
161/// `Ok(Err(AlreadyStarted))` means another replace is in progress.
162/// `Ok(Err(ScrubInProgress))` means a scrub must finish or be cancelled first.
163///
164/// Errors: ENAMETOOLONG if source or target device paths exceed the kernel
165/// buffer size.
166pub fn replace_start(
167    fd: BorrowedFd,
168    source: ReplaceSource,
169    tgtdev_path: &CStr,
170    avoid_srcdev: bool,
171) -> nix::Result<Result<(), ReplaceStartError>> {
172    let mut args: btrfs_ioctl_dev_replace_args = unsafe { mem::zeroed() };
173    args.cmd = u64::from(BTRFS_IOCTL_DEV_REPLACE_CMD_START);
174
175    // SAFETY: we are filling in the start union member before issuing CMD_START.
176    let start = unsafe { &mut args.__bindgen_anon_1.start };
177
178    match source {
179        ReplaceSource::DevId(devid) => {
180            start.srcdevid = devid;
181        }
182        ReplaceSource::Path(path) => {
183            start.srcdevid = 0;
184            let bytes = path.to_bytes();
185            if bytes.len() >= start.srcdev_name.len() {
186                return Err(Errno::ENAMETOOLONG);
187            }
188            start.srcdev_name[..bytes.len()].copy_from_slice(bytes);
189        }
190    }
191
192    let tgt_bytes = tgtdev_path.to_bytes();
193    if tgt_bytes.len() >= start.tgtdev_name.len() {
194        return Err(Errno::ENAMETOOLONG);
195    }
196    start.tgtdev_name[..tgt_bytes.len()].copy_from_slice(tgt_bytes);
197
198    start.cont_reading_from_srcdev_mode = if avoid_srcdev {
199        u64::from(BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID)
200    } else {
201        u64::from(BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS)
202    };
203
204    unsafe { btrfs_ioc_dev_replace(fd.as_raw_fd(), &raw mut args) }?;
205
206    match args.result {
207        x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) => {
208            Ok(Ok(()))
209        }
210        x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED) => {
211            Ok(Err(ReplaceStartError::AlreadyStarted))
212        }
213        x if x
214            == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS) =>
215        {
216            Ok(Err(ReplaceStartError::ScrubInProgress))
217        }
218        _ => Err(Errno::EINVAL),
219    }
220}
221
222/// Cancel a running device replace operation on the filesystem referred to
223/// by `fd`.
224///
225/// Returns `Ok(true)` if the replace was successfully cancelled, or
226/// `Ok(false)` if no replace operation was in progress.
227pub fn replace_cancel(fd: BorrowedFd) -> nix::Result<bool> {
228    let mut args: btrfs_ioctl_dev_replace_args = unsafe { mem::zeroed() };
229    args.cmd = u64::from(BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL);
230
231    unsafe { btrfs_ioc_dev_replace(fd.as_raw_fd(), &raw mut args) }?;
232
233    match args.result {
234        x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) => {
235            Ok(true)
236        }
237        x if x == u64::from(BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED) => {
238            Ok(false)
239        }
240        _ => Err(Errno::EINVAL),
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247
248    // --- epoch_to_systemtime ---
249
250    #[test]
251    fn epoch_zero_is_none() {
252        assert!(epoch_to_systemtime(0).is_none());
253    }
254
255    #[test]
256    fn epoch_nonzero_is_some() {
257        let t = epoch_to_systemtime(1700000000).unwrap();
258        assert_eq!(t, UNIX_EPOCH + Duration::from_secs(1700000000));
259    }
260
261    // --- ReplaceState::from_raw ---
262
263    #[test]
264    fn replace_state_from_raw_all_variants() {
265        assert!(matches!(
266            ReplaceState::from_raw(
267                BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED as u64
268            ),
269            Some(ReplaceState::NeverStarted)
270        ));
271        assert!(matches!(
272            ReplaceState::from_raw(
273                BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED as u64
274            ),
275            Some(ReplaceState::Started)
276        ));
277        assert!(matches!(
278            ReplaceState::from_raw(
279                BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED as u64
280            ),
281            Some(ReplaceState::Finished)
282        ));
283        assert!(matches!(
284            ReplaceState::from_raw(
285                BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED as u64
286            ),
287            Some(ReplaceState::Canceled)
288        ));
289        assert!(matches!(
290            ReplaceState::from_raw(
291                BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED as u64
292            ),
293            Some(ReplaceState::Suspended)
294        ));
295    }
296
297    #[test]
298    fn replace_state_from_raw_unknown() {
299        assert!(ReplaceState::from_raw(9999).is_none());
300    }
301
302    // --- ReplaceStartError Display ---
303
304    #[test]
305    fn replace_start_error_display() {
306        assert_eq!(
307            format!("{}", ReplaceStartError::AlreadyStarted),
308            "a device replace operation is already in progress"
309        );
310        assert_eq!(
311            format!("{}", ReplaceStartError::ScrubInProgress),
312            "a scrub is in progress; cancel it first"
313        );
314    }
315}