btrfs_uapi/tree_search.rs
1//! # Generic B-tree search: walking any internal btrfs tree via `BTRFS_IOC_TREE_SEARCH`
2//!
3//! The kernel's tree search ioctl lets userspace read any internal btrfs tree
4//! (chunk, root, quota, …) by specifying a key range. Items are returned in
5//! batches; [`tree_search`] advances the cursor automatically and calls a
6//! closure once per item until the range is exhausted.
7//!
8//! # Byte order
9//!
10//! [`SearchHeader`] fields (objectid, offset, type) are in host byte order:
11//! the kernel fills them in through the ioctl layer. The `data` slice passed
12//! to the callback contains the raw on-disk item payload, which is
13//! **little-endian**; callers must use `u64::from_le_bytes` and friends when
14//! interpreting it.
15//!
16//! # Ioctl version
17//!
18//! This module uses `BTRFS_IOC_TREE_SEARCH` (v1) with its fixed 3992-byte
19//! result buffer. This is sufficient for all item types used by this crate;
20//! the v2 variant with a configurable buffer size is not needed.
21
22use crate::raw::{
23 btrfs_ioc_tree_search, btrfs_ioctl_search_args, btrfs_ioctl_search_header,
24 btrfs_ioctl_search_key,
25};
26use std::{
27 mem,
28 os::{fd::AsRawFd, unix::io::BorrowedFd},
29};
30
31/// Parameters specifying which items to return from a tree search.
32///
33/// The kernel searches a 136-bit key space ordered as
34/// `(objectid << 72) | (type << 64) | offset`.
35/// All items whose key falls in the inclusive range `[min_key, max_key]` are
36/// returned.
37///
38/// Build a key for common cases with [`SearchKey::for_type`] or
39/// [`SearchKey::for_objectid_range`].
40///
41/// Tree IDs and item type codes are the `BTRFS_*_OBJECTID` and `BTRFS_*_KEY`
42/// constants from `crate::raw`, cast to `u64` and `u32` respectively at the
43/// call site.
44#[derive(Debug, Clone)]
45pub struct SearchKey {
46 /// Tree to search — use a `BTRFS_*_TREE_OBJECTID` constant from `crate::raw`.
47 pub tree_id: u64,
48 pub min_objectid: u64,
49 pub max_objectid: u64,
50 /// Item type — use a `BTRFS_*_KEY` constant from `crate::raw`.
51 pub min_type: u32,
52 pub max_type: u32,
53 pub min_offset: u64,
54 pub max_offset: u64,
55 /// Filter on the transaction ID of the *metadata block* that holds the
56 /// item, not the transaction that created the item itself.
57 pub min_transid: u64,
58 pub max_transid: u64,
59}
60
61impl SearchKey {
62 /// Return all items of `item_type` in `tree_id`, across every objectid
63 /// and offset.
64 pub fn for_type(tree_id: u64, item_type: u32) -> Self {
65 Self {
66 tree_id,
67 min_objectid: 0,
68 max_objectid: u64::MAX,
69 min_type: item_type,
70 max_type: item_type,
71 min_offset: 0,
72 max_offset: u64::MAX,
73 min_transid: 0,
74 max_transid: u64::MAX,
75 }
76 }
77
78 /// Return all items of `item_type` in `tree_id` whose objectid falls in
79 /// `[min_objectid, max_objectid]`.
80 pub fn for_objectid_range(
81 tree_id: u64,
82 item_type: u32,
83 min_objectid: u64,
84 max_objectid: u64,
85 ) -> Self {
86 Self {
87 min_objectid,
88 max_objectid,
89 ..Self::for_type(tree_id, item_type)
90 }
91 }
92}
93
94/// Metadata returned for each item found by [`tree_search`].
95///
96/// The header fields are in host byte order (the kernel fills them in through
97/// the ioctl layer). The accompanying `data` slice passed to the callback is
98/// the raw on-disk item payload and is in **little-endian** byte order.
99#[derive(Debug, Clone, Copy)]
100pub struct SearchHeader {
101 pub transid: u64,
102 pub objectid: u64,
103 pub offset: u64,
104 /// Item type (the `type` field of the btrfs key).
105 pub item_type: u32,
106 /// Length in bytes of the item's data payload.
107 pub len: u32,
108}
109
110/// Number of items to request per ioctl call.
111const ITEMS_PER_BATCH: u32 = 4096;
112
113/// Size of `btrfs_ioctl_search_header` as a compile-time constant.
114const SEARCH_HEADER_SIZE: usize = mem::size_of::<btrfs_ioctl_search_header>();
115
116/// Walk every item in the tree that falls within `key`, calling `f` once for
117/// each one.
118///
119/// `f` receives:
120/// * a reference to the item's [`SearchHeader`] (objectid, offset, type, …)
121/// * a byte slice of the item's raw on-disk data payload (little-endian)
122///
123/// The search stops early and the error is returned if:
124/// * `f` returns `Err(_)`
125/// * the underlying `BTRFS_IOC_TREE_SEARCH` ioctl fails
126///
127/// Returns `Ok(())` when the entire requested range has been scanned.
128///
129/// # Privileges
130///
131/// Most trees require `CAP_SYS_ADMIN`. The subvolume tree of the inode
132/// belonging to `fd` can be searched without elevated privileges by setting
133/// `key.tree_id = 0`.
134pub fn tree_search(
135 fd: BorrowedFd,
136 key: SearchKey,
137 mut f: impl FnMut(&SearchHeader, &[u8]) -> nix::Result<()>,
138) -> nix::Result<()> {
139 let mut args: btrfs_ioctl_search_args = unsafe { mem::zeroed() };
140
141 fill_search_key(&mut args.key, &key);
142
143 loop {
144 args.key.nr_items = ITEMS_PER_BATCH;
145
146 unsafe { btrfs_ioc_tree_search(fd.as_raw_fd(), &mut args) }?;
147
148 let nr = args.key.nr_items;
149 if nr == 0 {
150 break;
151 }
152
153 // Walk the result buffer. We use raw pointer reads to avoid borrow
154 // checker conflicts: args.buf (read-only after the ioctl) and
155 // args.key (mutated below for cursor advancement) are separate fields,
156 // but a Rust reference to one would prevent mutation of the other.
157 //
158 // SAFETY:
159 // * The ioctl has returned successfully, so args.buf contains nr valid
160 // (header, data) pairs totalling at most args.buf.len() bytes.
161 // * buf_base is derived from args.buf which lives for the entire loop
162 // body; it is not invalidated until args is dropped.
163 // * All raw reads are bounds-checked before dereferencing.
164 // * The `data` slices passed to `f` do not outlive this loop
165 // iteration — `f` takes `&[u8]`, not `&'static [u8]`.
166 let buf_base: *const u8 = args.buf.as_ptr().cast();
167 let buf_cap = args.buf.len();
168
169 let mut off = 0usize;
170 let mut last = SearchHeader {
171 transid: 0,
172 objectid: 0,
173 offset: 0,
174 item_type: 0,
175 len: 0,
176 };
177
178 for _ in 0..nr {
179 if off + SEARCH_HEADER_SIZE > buf_cap {
180 return Err(nix::errno::Errno::EOVERFLOW);
181 }
182 let raw_hdr: btrfs_ioctl_search_header =
183 unsafe { (buf_base.add(off) as *const btrfs_ioctl_search_header).read_unaligned() };
184 let hdr = SearchHeader {
185 transid: raw_hdr.transid,
186 objectid: raw_hdr.objectid,
187 offset: raw_hdr.offset,
188 item_type: raw_hdr.type_,
189 len: raw_hdr.len,
190 };
191 off += SEARCH_HEADER_SIZE;
192
193 let data_len = hdr.len as usize;
194 if off + data_len > buf_cap {
195 return Err(nix::errno::Errno::EOVERFLOW);
196 }
197 let data: &[u8] = unsafe { std::slice::from_raw_parts(buf_base.add(off), data_len) };
198 off += data_len;
199
200 f(&hdr, data)?;
201 last = hdr;
202 }
203
204 if !advance_cursor(&mut args.key, &last) {
205 break;
206 }
207 }
208
209 Ok(())
210}
211
212fn fill_search_key(sk: &mut btrfs_ioctl_search_key, key: &SearchKey) {
213 sk.tree_id = key.tree_id;
214 sk.min_objectid = key.min_objectid;
215 sk.max_objectid = key.max_objectid;
216 sk.min_type = key.min_type;
217 sk.max_type = key.max_type;
218 sk.min_offset = key.min_offset;
219 sk.max_offset = key.max_offset;
220 sk.min_transid = key.min_transid;
221 sk.max_transid = key.max_transid;
222}
223
224/// Advance the search cursor past `last` so the next batch begins from the
225/// item immediately after it in the 136-bit key space
226/// `(objectid << 72) | (type << 64) | offset`.
227///
228/// The kernel interprets `(min_objectid, min_type, min_offset)` as a compound
229/// tuple key, so all three fields must be updated together to point past the
230/// last returned item. Advancing only `min_offset` while leaving
231/// `min_objectid` at its original value would cause items from lower objectids
232/// that were already returned to satisfy the new minimum and be yielded again.
233///
234/// Returns `false` when the objectid also overflows, meaning the full key
235/// space has been exhausted.
236fn advance_cursor(sk: &mut btrfs_ioctl_search_key, last: &SearchHeader) -> bool {
237 let (new_offset, offset_overflow) = last.offset.overflowing_add(1);
238 if !offset_overflow {
239 sk.min_objectid = last.objectid;
240 sk.min_type = last.item_type;
241 sk.min_offset = new_offset;
242 return true;
243 }
244
245 sk.min_offset = 0;
246 let (new_type, type_overflow) = last.item_type.overflowing_add(1);
247 if !type_overflow {
248 sk.min_objectid = last.objectid;
249 sk.min_type = new_type;
250 return true;
251 }
252
253 sk.min_type = 0;
254 let (new_oid, oid_overflow) = last.objectid.overflowing_add(1);
255 if oid_overflow {
256 return false;
257 }
258 sk.min_objectid = new_oid;
259 true
260}