Skip to main content

btrfs_uapi/
tree_search.rs

1//! # Generic B-tree search: walking any internal btrfs tree via `BTRFS_IOC_TREE_SEARCH`
2//!
3//! The kernel's tree search ioctl lets userspace read any internal btrfs tree
4//! (chunk, root, quota, …) by specifying a key range.  Items are returned in
5//! batches; [`tree_search`] advances the cursor automatically and calls a
6//! closure once per item until the range is exhausted.
7//!
8//! # Byte order
9//!
10//! [`SearchHeader`] fields (objectid, offset, type) are in host byte order:
11//! the kernel fills them in through the ioctl layer.  The `data` slice passed
12//! to the callback contains the raw on-disk item payload, which is
13//! **little-endian**; callers must use `u64::from_le_bytes` and friends when
14//! interpreting it.
15//!
16//! # Ioctl version
17//!
18//! This module uses `BTRFS_IOC_TREE_SEARCH` (v1) with its fixed 3992-byte
19//! result buffer.  This is sufficient for all item types used by this crate;
20//! the v2 variant with a configurable buffer size is not needed.
21
22use crate::raw::{
23    btrfs_ioc_tree_search, btrfs_ioctl_search_args, btrfs_ioctl_search_header,
24    btrfs_ioctl_search_key,
25};
26use std::{
27    mem,
28    os::{fd::AsRawFd, unix::io::BorrowedFd},
29};
30
31/// Parameters specifying which items to return from a tree search.
32///
33/// The kernel searches a 136-bit key space ordered as
34/// `(objectid << 72) | (type << 64) | offset`.
35/// All items whose key falls in the inclusive range `[min_key, max_key]` are
36/// returned.
37///
38/// Build a key for common cases with [`SearchKey::for_type`] or
39/// [`SearchKey::for_objectid_range`].
40///
41/// Tree IDs and item type codes are the `BTRFS_*_OBJECTID` and `BTRFS_*_KEY`
42/// constants from `crate::raw`, cast to `u64` and `u32` respectively at the
43/// call site.
44#[derive(Debug, Clone)]
45pub struct SearchKey {
46    /// Tree to search — use a `BTRFS_*_TREE_OBJECTID` constant from `crate::raw`.
47    pub tree_id: u64,
48    pub min_objectid: u64,
49    pub max_objectid: u64,
50    /// Item type — use a `BTRFS_*_KEY` constant from `crate::raw`.
51    pub min_type: u32,
52    pub max_type: u32,
53    pub min_offset: u64,
54    pub max_offset: u64,
55    /// Filter on the transaction ID of the *metadata block* that holds the
56    /// item, not the transaction that created the item itself.
57    pub min_transid: u64,
58    pub max_transid: u64,
59}
60
61impl SearchKey {
62    /// Return all items of `item_type` in `tree_id`, across every objectid
63    /// and offset.
64    pub fn for_type(tree_id: u64, item_type: u32) -> Self {
65        Self {
66            tree_id,
67            min_objectid: 0,
68            max_objectid: u64::MAX,
69            min_type: item_type,
70            max_type: item_type,
71            min_offset: 0,
72            max_offset: u64::MAX,
73            min_transid: 0,
74            max_transid: u64::MAX,
75        }
76    }
77
78    /// Return all items of `item_type` in `tree_id` whose objectid falls in
79    /// `[min_objectid, max_objectid]`.
80    pub fn for_objectid_range(
81        tree_id: u64,
82        item_type: u32,
83        min_objectid: u64,
84        max_objectid: u64,
85    ) -> Self {
86        Self {
87            min_objectid,
88            max_objectid,
89            ..Self::for_type(tree_id, item_type)
90        }
91    }
92}
93
94/// Metadata returned for each item found by [`tree_search`].
95///
96/// The header fields are in host byte order (the kernel fills them in through
97/// the ioctl layer).  The accompanying `data` slice passed to the callback is
98/// the raw on-disk item payload and is in **little-endian** byte order.
99#[derive(Debug, Clone, Copy)]
100pub struct SearchHeader {
101    pub transid: u64,
102    pub objectid: u64,
103    pub offset: u64,
104    /// Item type (the `type` field of the btrfs key).
105    pub item_type: u32,
106    /// Length in bytes of the item's data payload.
107    pub len: u32,
108}
109
110/// Number of items to request per ioctl call.
111const ITEMS_PER_BATCH: u32 = 4096;
112
113/// Size of `btrfs_ioctl_search_header` as a compile-time constant.
114const SEARCH_HEADER_SIZE: usize = mem::size_of::<btrfs_ioctl_search_header>();
115
116/// Walk every item in the tree that falls within `key`, calling `f` once for
117/// each one.
118///
119/// `f` receives:
120/// * a reference to the item's [`SearchHeader`] (objectid, offset, type, …)
121/// * a byte slice of the item's raw on-disk data payload (little-endian)
122///
123/// The search stops early and the error is returned if:
124/// * `f` returns `Err(_)`
125/// * the underlying `BTRFS_IOC_TREE_SEARCH` ioctl fails
126///
127/// Returns `Ok(())` when the entire requested range has been scanned.
128///
129/// # Privileges
130///
131/// Most trees require `CAP_SYS_ADMIN`.  The subvolume tree of the inode
132/// belonging to `fd` can be searched without elevated privileges by setting
133/// `key.tree_id = 0`.
134pub fn tree_search(
135    fd: BorrowedFd,
136    key: SearchKey,
137    mut f: impl FnMut(&SearchHeader, &[u8]) -> nix::Result<()>,
138) -> nix::Result<()> {
139    let mut args: btrfs_ioctl_search_args = unsafe { mem::zeroed() };
140
141    fill_search_key(&mut args.key, &key);
142
143    loop {
144        args.key.nr_items = ITEMS_PER_BATCH;
145
146        unsafe { btrfs_ioc_tree_search(fd.as_raw_fd(), &mut args) }?;
147
148        let nr = args.key.nr_items;
149        if nr == 0 {
150            break;
151        }
152
153        // Walk the result buffer.  We use raw pointer reads to avoid borrow
154        // checker conflicts: args.buf (read-only after the ioctl) and
155        // args.key (mutated below for cursor advancement) are separate fields,
156        // but a Rust reference to one would prevent mutation of the other.
157        //
158        // SAFETY:
159        // * The ioctl has returned successfully, so args.buf contains nr valid
160        //   (header, data) pairs totalling at most args.buf.len() bytes.
161        // * buf_base is derived from args.buf which lives for the entire loop
162        //   body; it is not invalidated until args is dropped.
163        // * All raw reads are bounds-checked before dereferencing.
164        // * The `data` slices passed to `f` do not outlive this loop
165        //   iteration — `f` takes `&[u8]`, not `&'static [u8]`.
166        let buf_base: *const u8 = args.buf.as_ptr().cast();
167        let buf_cap = args.buf.len();
168
169        let mut off = 0usize;
170        let mut last = SearchHeader {
171            transid: 0,
172            objectid: 0,
173            offset: 0,
174            item_type: 0,
175            len: 0,
176        };
177
178        for _ in 0..nr {
179            if off + SEARCH_HEADER_SIZE > buf_cap {
180                return Err(nix::errno::Errno::EOVERFLOW);
181            }
182            let raw_hdr: btrfs_ioctl_search_header = unsafe {
183                (buf_base.add(off) as *const btrfs_ioctl_search_header)
184                    .read_unaligned()
185            };
186            let hdr = SearchHeader {
187                transid: raw_hdr.transid,
188                objectid: raw_hdr.objectid,
189                offset: raw_hdr.offset,
190                item_type: raw_hdr.type_,
191                len: raw_hdr.len,
192            };
193            off += SEARCH_HEADER_SIZE;
194
195            let data_len = hdr.len as usize;
196            if off + data_len > buf_cap {
197                return Err(nix::errno::Errno::EOVERFLOW);
198            }
199            let data: &[u8] = unsafe {
200                std::slice::from_raw_parts(buf_base.add(off), data_len)
201            };
202            off += data_len;
203
204            f(&hdr, data)?;
205            last = hdr;
206        }
207
208        if !advance_cursor(&mut args.key, &last) {
209            break;
210        }
211    }
212
213    Ok(())
214}
215
216fn fill_search_key(sk: &mut btrfs_ioctl_search_key, key: &SearchKey) {
217    sk.tree_id = key.tree_id;
218    sk.min_objectid = key.min_objectid;
219    sk.max_objectid = key.max_objectid;
220    sk.min_type = key.min_type;
221    sk.max_type = key.max_type;
222    sk.min_offset = key.min_offset;
223    sk.max_offset = key.max_offset;
224    sk.min_transid = key.min_transid;
225    sk.max_transid = key.max_transid;
226}
227
228/// Advance the search cursor past `last` so the next batch begins from the
229/// item immediately after it in the 136-bit key space
230/// `(objectid << 72) | (type << 64) | offset`.
231///
232/// The kernel interprets `(min_objectid, min_type, min_offset)` as a compound
233/// tuple key, so all three fields must be updated together to point past the
234/// last returned item.  Advancing only `min_offset` while leaving
235/// `min_objectid` at its original value would cause items from lower objectids
236/// that were already returned to satisfy the new minimum and be yielded again.
237///
238/// Returns `false` when the objectid also overflows, meaning the full key
239/// space has been exhausted.
240fn advance_cursor(
241    sk: &mut btrfs_ioctl_search_key,
242    last: &SearchHeader,
243) -> bool {
244    let (new_offset, offset_overflow) = last.offset.overflowing_add(1);
245    if !offset_overflow {
246        sk.min_objectid = last.objectid;
247        sk.min_type = last.item_type;
248        sk.min_offset = new_offset;
249        return true;
250    }
251
252    sk.min_offset = 0;
253    let (new_type, type_overflow) = last.item_type.overflowing_add(1);
254    if !type_overflow {
255        sk.min_objectid = last.objectid;
256        sk.min_type = new_type;
257        return true;
258    }
259
260    sk.min_type = 0;
261    let (new_oid, oid_overflow) = last.objectid.overflowing_add(1);
262    if oid_overflow {
263        return false;
264    }
265    sk.min_objectid = new_oid;
266    true
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272
273    fn header(objectid: u64, item_type: u32, offset: u64) -> SearchHeader {
274        SearchHeader {
275            transid: 0,
276            objectid,
277            offset,
278            item_type,
279            len: 0,
280        }
281    }
282
283    fn zeroed_search_key() -> btrfs_ioctl_search_key {
284        unsafe { mem::zeroed() }
285    }
286
287    // --- SearchKey constructors ---
288
289    #[test]
290    fn for_type_covers_all_objectids_and_offsets() {
291        let sk = SearchKey::for_type(5, 132);
292        assert_eq!(sk.tree_id, 5);
293        assert_eq!(sk.min_objectid, 0);
294        assert_eq!(sk.max_objectid, u64::MAX);
295        assert_eq!(sk.min_type, 132);
296        assert_eq!(sk.max_type, 132);
297        assert_eq!(sk.min_offset, 0);
298        assert_eq!(sk.max_offset, u64::MAX);
299        assert_eq!(sk.min_transid, 0);
300        assert_eq!(sk.max_transid, u64::MAX);
301    }
302
303    #[test]
304    fn for_objectid_range_restricts_objectids() {
305        let sk = SearchKey::for_objectid_range(1, 84, 100, 200);
306        assert_eq!(sk.tree_id, 1);
307        assert_eq!(sk.min_objectid, 100);
308        assert_eq!(sk.max_objectid, 200);
309        assert_eq!(sk.min_type, 84);
310        assert_eq!(sk.max_type, 84);
311        assert_eq!(sk.min_offset, 0);
312        assert_eq!(sk.max_offset, u64::MAX);
313    }
314
315    // --- fill_search_key ---
316
317    #[test]
318    fn fill_search_key_copies_all_fields() {
319        let key = SearchKey {
320            tree_id: 1,
321            min_objectid: 10,
322            max_objectid: 20,
323            min_type: 30,
324            max_type: 40,
325            min_offset: 50,
326            max_offset: 60,
327            min_transid: 70,
328            max_transid: 80,
329        };
330        let mut sk = zeroed_search_key();
331        fill_search_key(&mut sk, &key);
332        assert_eq!(sk.tree_id, 1);
333        assert_eq!(sk.min_objectid, 10);
334        assert_eq!(sk.max_objectid, 20);
335        assert_eq!(sk.min_type, 30);
336        assert_eq!(sk.max_type, 40);
337        assert_eq!(sk.min_offset, 50);
338        assert_eq!(sk.max_offset, 60);
339        assert_eq!(sk.min_transid, 70);
340        assert_eq!(sk.max_transid, 80);
341    }
342
343    // --- advance_cursor: normal case ---
344
345    #[test]
346    fn advance_increments_offset() {
347        let mut sk = zeroed_search_key();
348        let last = header(256, 132, 100);
349        assert!(advance_cursor(&mut sk, &last));
350        assert_eq!(sk.min_objectid, 256);
351        assert_eq!(sk.min_type, 132);
352        assert_eq!(sk.min_offset, 101);
353    }
354
355    #[test]
356    fn advance_tracks_objectid_from_last_item() {
357        // This is the bug that was fixed: the cursor must track the last
358        // item's objectid, not leave min_objectid at its original value.
359        let mut sk = zeroed_search_key();
360        sk.min_objectid = 100; // original search started at 100
361        let last = header(300, 132, 50); // batch ended at objectid 300
362        assert!(advance_cursor(&mut sk, &last));
363        assert_eq!(sk.min_objectid, 300, "must track last item's objectid");
364        assert_eq!(sk.min_type, 132);
365        assert_eq!(sk.min_offset, 51);
366    }
367
368    #[test]
369    fn advance_tracks_type_from_last_item() {
370        let mut sk = zeroed_search_key();
371        let last = header(256, 180, 42);
372        assert!(advance_cursor(&mut sk, &last));
373        assert_eq!(sk.min_objectid, 256);
374        assert_eq!(sk.min_type, 180);
375        assert_eq!(sk.min_offset, 43);
376    }
377
378    // --- advance_cursor: offset overflow ---
379
380    #[test]
381    fn advance_offset_overflow_bumps_type() {
382        let mut sk = zeroed_search_key();
383        let last = header(256, 132, u64::MAX);
384        assert!(advance_cursor(&mut sk, &last));
385        assert_eq!(sk.min_objectid, 256);
386        assert_eq!(sk.min_type, 133);
387        assert_eq!(sk.min_offset, 0);
388    }
389
390    // --- advance_cursor: type overflow ---
391
392    #[test]
393    fn advance_type_overflow_bumps_objectid() {
394        let mut sk = zeroed_search_key();
395        let last = header(256, u32::MAX, u64::MAX);
396        assert!(advance_cursor(&mut sk, &last));
397        assert_eq!(sk.min_objectid, 257);
398        assert_eq!(sk.min_type, 0);
399        assert_eq!(sk.min_offset, 0);
400    }
401
402    // --- advance_cursor: full keyspace exhaustion ---
403
404    #[test]
405    fn advance_all_overflow_returns_false() {
406        let mut sk = zeroed_search_key();
407        let last = header(u64::MAX, u32::MAX, u64::MAX);
408        assert!(!advance_cursor(&mut sk, &last));
409    }
410
411    // --- advance_cursor: edge cases ---
412
413    #[test]
414    fn advance_zero_key() {
415        let mut sk = zeroed_search_key();
416        let last = header(0, 0, 0);
417        assert!(advance_cursor(&mut sk, &last));
418        assert_eq!(sk.min_objectid, 0);
419        assert_eq!(sk.min_type, 0);
420        assert_eq!(sk.min_offset, 1);
421    }
422
423    #[test]
424    fn advance_objectid_max_type_zero_offset_max() {
425        // offset overflows, type bumps to 1
426        let mut sk = zeroed_search_key();
427        let last = header(u64::MAX, 0, u64::MAX);
428        assert!(advance_cursor(&mut sk, &last));
429        assert_eq!(sk.min_objectid, u64::MAX);
430        assert_eq!(sk.min_type, 1);
431        assert_eq!(sk.min_offset, 0);
432    }
433
434    #[test]
435    fn advance_preserves_unrelated_search_key_fields() {
436        let mut sk = zeroed_search_key();
437        sk.max_objectid = 999;
438        sk.max_type = 888;
439        sk.max_offset = 777;
440        sk.max_transid = 666;
441        let last = header(10, 20, 30);
442        advance_cursor(&mut sk, &last);
443        assert_eq!(sk.max_objectid, 999);
444        assert_eq!(sk.max_type, 888);
445        assert_eq!(sk.max_offset, 777);
446        assert_eq!(sk.max_transid, 666);
447    }
448}