Skip to main content

btrfs_uapi/
replace.rs

1//! # Device replacement: replacing a device with another while the filesystem is online
2//!
3//! A replace operation copies all data from a source device to a target device,
4//! then swaps the target into the filesystem in place of the source. The
5//! filesystem remains mounted and usable throughout.
6//!
7//! Requires `CAP_SYS_ADMIN`.
8
9use crate::raw::{
10    BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL, BTRFS_IOCTL_DEV_REPLACE_CMD_START,
11    BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS,
12    BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS,
13    BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID,
14    BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED,
15    BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR,
16    BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED,
17    BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS,
18    BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED,
19    BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED,
20    BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED,
21    BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED,
22    BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED, btrfs_ioc_dev_replace,
23    btrfs_ioctl_dev_replace_args,
24};
25use nix::errno::Errno;
26use std::{
27    ffi::CStr,
28    mem,
29    os::{fd::AsRawFd, unix::io::BorrowedFd},
30    time::{Duration, SystemTime, UNIX_EPOCH},
31};
32
33/// Current state of a device replace operation.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum ReplaceState {
36    NeverStarted,
37    Started,
38    Finished,
39    Canceled,
40    Suspended,
41}
42
43impl ReplaceState {
44    fn from_raw(val: u64) -> Option<ReplaceState> {
45        match val {
46            x if x == BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED as u64 => {
47                Some(ReplaceState::NeverStarted)
48            }
49            x if x == BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED as u64 => {
50                Some(ReplaceState::Started)
51            }
52            x if x == BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED as u64 => {
53                Some(ReplaceState::Finished)
54            }
55            x if x == BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED as u64 => {
56                Some(ReplaceState::Canceled)
57            }
58            x if x == BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED as u64 => {
59                Some(ReplaceState::Suspended)
60            }
61            _ => None,
62        }
63    }
64}
65
66/// Status of a device replace operation, as returned by the status query.
67#[derive(Debug, Clone)]
68pub struct ReplaceStatus {
69    /// Current state of the replace operation.
70    pub state: ReplaceState,
71    /// Progress in tenths of a percent (0..=1000).
72    pub progress_1000: u64,
73    /// Time the replace operation was started.
74    pub time_started: Option<SystemTime>,
75    /// Time the replace operation stopped (finished, canceled, or suspended).
76    pub time_stopped: Option<SystemTime>,
77    /// Number of write errors encountered during the replace.
78    pub num_write_errors: u64,
79    /// Number of uncorrectable read errors encountered during the replace.
80    pub num_uncorrectable_read_errors: u64,
81}
82
83fn epoch_to_systemtime(secs: u64) -> Option<SystemTime> {
84    if secs == 0 {
85        None
86    } else {
87        Some(UNIX_EPOCH + Duration::from_secs(secs))
88    }
89}
90
91/// How to identify the source device for a replace operation.
92pub enum ReplaceSource<'a> {
93    /// Source device identified by its btrfs device ID.
94    DevId(u64),
95    /// Source device identified by its block device path.
96    Path(&'a CStr),
97}
98
99/// Query the status of a device replace operation on the filesystem referred
100/// to by `fd`.
101pub fn replace_status(fd: BorrowedFd) -> nix::Result<ReplaceStatus> {
102    let mut args: btrfs_ioctl_dev_replace_args = unsafe { mem::zeroed() };
103    args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS as u64;
104
105    unsafe { btrfs_ioc_dev_replace(fd.as_raw_fd(), &mut args) }?;
106
107    // SAFETY: we issued CMD_STATUS so the status union member is active.
108    let status = unsafe { &args.__bindgen_anon_1.status };
109    let state =
110        ReplaceState::from_raw(status.replace_state).ok_or(Errno::EINVAL)?;
111
112    Ok(ReplaceStatus {
113        state,
114        progress_1000: status.progress_1000,
115        time_started: epoch_to_systemtime(status.time_started),
116        time_stopped: epoch_to_systemtime(status.time_stopped),
117        num_write_errors: status.num_write_errors,
118        num_uncorrectable_read_errors: status.num_uncorrectable_read_errors,
119    })
120}
121
122/// Result of a replace start attempt that the kernel rejected at the
123/// application level (ioctl succeeded but the `result` field indicates a
124/// problem).
125#[derive(Debug, Clone, Copy, PartialEq, Eq)]
126pub enum ReplaceStartError {
127    /// A replace operation is already in progress.
128    AlreadyStarted,
129    /// A scrub is in progress and must finish before replace can start.
130    ScrubInProgress,
131}
132
133impl std::fmt::Display for ReplaceStartError {
134    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135        match self {
136            ReplaceStartError::AlreadyStarted => {
137                write!(f, "a device replace operation is already in progress")
138            }
139            ReplaceStartError::ScrubInProgress => {
140                write!(f, "a scrub is in progress; cancel it first")
141            }
142        }
143    }
144}
145
146impl std::error::Error for ReplaceStartError {}
147
148/// Start a device replace operation, copying all data from `source` to the
149/// target device at `tgtdev_path`.
150///
151/// When `avoid_srcdev` is true, the kernel will only read from the source
152/// device when no other zero-defect mirror is available (useful for replacing
153/// a device with known read errors).
154///
155/// Returns a two-level Result: the outer `nix::Result` covers ioctl-level
156/// failures (EPERM, EINVAL, etc.), while the inner `Result` covers
157/// application-level rejections reported by the kernel in the `result` field.
158/// `Ok(Ok(()))` means the replace started successfully.
159/// `Ok(Err(AlreadyStarted))` means another replace is in progress.
160/// `Ok(Err(ScrubInProgress))` means a scrub must finish or be cancelled first.
161///
162/// Errors: ENAMETOOLONG if source or target device paths exceed the kernel
163/// buffer size.
164pub fn replace_start(
165    fd: BorrowedFd,
166    source: ReplaceSource,
167    tgtdev_path: &CStr,
168    avoid_srcdev: bool,
169) -> nix::Result<Result<(), ReplaceStartError>> {
170    let mut args: btrfs_ioctl_dev_replace_args = unsafe { mem::zeroed() };
171    args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_START as u64;
172
173    // SAFETY: we are filling in the start union member before issuing CMD_START.
174    let start = unsafe { &mut args.__bindgen_anon_1.start };
175
176    match source {
177        ReplaceSource::DevId(devid) => {
178            start.srcdevid = devid;
179        }
180        ReplaceSource::Path(path) => {
181            start.srcdevid = 0;
182            let bytes = path.to_bytes();
183            if bytes.len() >= start.srcdev_name.len() {
184                return Err(Errno::ENAMETOOLONG);
185            }
186            start.srcdev_name[..bytes.len()].copy_from_slice(bytes);
187        }
188    }
189
190    let tgt_bytes = tgtdev_path.to_bytes();
191    if tgt_bytes.len() >= start.tgtdev_name.len() {
192        return Err(Errno::ENAMETOOLONG);
193    }
194    start.tgtdev_name[..tgt_bytes.len()].copy_from_slice(tgt_bytes);
195
196    start.cont_reading_from_srcdev_mode = if avoid_srcdev {
197        BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID as u64
198    } else {
199        BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS as u64
200    };
201
202    unsafe { btrfs_ioc_dev_replace(fd.as_raw_fd(), &mut args) }?;
203
204    match args.result {
205        x if x == BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR as u64 => Ok(Ok(())),
206        x if x == BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED as u64 => {
207            Ok(Err(ReplaceStartError::AlreadyStarted))
208        }
209        x if x == BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS as u64 => {
210            Ok(Err(ReplaceStartError::ScrubInProgress))
211        }
212        _ => Err(Errno::EINVAL),
213    }
214}
215
216/// Cancel a running device replace operation on the filesystem referred to
217/// by `fd`.
218///
219/// Returns `Ok(true)` if the replace was successfully cancelled, or
220/// `Ok(false)` if no replace operation was in progress.
221pub fn replace_cancel(fd: BorrowedFd) -> nix::Result<bool> {
222    let mut args: btrfs_ioctl_dev_replace_args = unsafe { mem::zeroed() };
223    args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL as u64;
224
225    unsafe { btrfs_ioc_dev_replace(fd.as_raw_fd(), &mut args) }?;
226
227    match args.result {
228        x if x == BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR as u64 => Ok(true),
229        x if x == BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED as u64 => {
230            Ok(false)
231        }
232        _ => Err(Errno::EINVAL),
233    }
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239
240    // --- epoch_to_systemtime ---
241
242    #[test]
243    fn epoch_zero_is_none() {
244        assert!(epoch_to_systemtime(0).is_none());
245    }
246
247    #[test]
248    fn epoch_nonzero_is_some() {
249        let t = epoch_to_systemtime(1700000000).unwrap();
250        assert_eq!(t, UNIX_EPOCH + Duration::from_secs(1700000000));
251    }
252
253    // --- ReplaceState::from_raw ---
254
255    #[test]
256    fn replace_state_from_raw_all_variants() {
257        assert!(matches!(
258            ReplaceState::from_raw(
259                BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED as u64
260            ),
261            Some(ReplaceState::NeverStarted)
262        ));
263        assert!(matches!(
264            ReplaceState::from_raw(
265                BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED as u64
266            ),
267            Some(ReplaceState::Started)
268        ));
269        assert!(matches!(
270            ReplaceState::from_raw(
271                BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED as u64
272            ),
273            Some(ReplaceState::Finished)
274        ));
275        assert!(matches!(
276            ReplaceState::from_raw(
277                BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED as u64
278            ),
279            Some(ReplaceState::Canceled)
280        ));
281        assert!(matches!(
282            ReplaceState::from_raw(
283                BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED as u64
284            ),
285            Some(ReplaceState::Suspended)
286        ));
287    }
288
289    #[test]
290    fn replace_state_from_raw_unknown() {
291        assert!(ReplaceState::from_raw(9999).is_none());
292    }
293
294    // --- ReplaceStartError Display ---
295
296    #[test]
297    fn replace_start_error_display() {
298        assert_eq!(
299            format!("{}", ReplaceStartError::AlreadyStarted),
300            "a device replace operation is already in progress"
301        );
302        assert_eq!(
303            format!("{}", ReplaceStartError::ScrubInProgress),
304            "a scrub is in progress; cancel it first"
305        );
306    }
307}