hexz_server/nbd.rs
1//! Network Block Device (NBD) protocol server implementation.
2//!
3//! This module implements the NBD protocol (version 3.0+, fixed newstyle negotiation)
4//! to expose Hexz snapshots as block devices over TCP. Clients can mount the snapshot
5//! using standard NBD client tools like `nbd-client` (Linux) or connect directly via
6//! the NBD protocol.
7//!
8//! # Protocol Overview
9//!
10//! The NBD protocol consists of three phases:
11//!
12//! 1. **Handshake**: Server announces capabilities (flags) and magic values
13//! 2. **Option Negotiation**: Client requests export info and flags
14//! 3. **Transmission**: Client sends read/write/flush/trim commands
15//!
16//! This implementation follows the "fixed newstyle" negotiation introduced in NBD 3.0,
17//! which is more robust than the legacy "oldstyle" protocol.
18//!
19//! # Protocol Reference
20//!
21//! - NBD Protocol Specification: <https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md>
22//! - RFC (draft): <https://www.ietf.org/archive/id/draft-ietf-nbd-protocol-00.html>
23//!
24//! # Security Considerations
25//!
26//! - **Read-only mode**: This implementation always exports snapshots as read-only
27//! to prevent accidental modification
28//! - **No encryption**: The NBD protocol does not include built-in encryption.
29//! For secure access over untrusted networks, use an SSH tunnel or VPN.
30//! - **No authentication**: NBD does not provide authentication. Access control
31//! must be implemented at the network level (firewall, localhost-only binding).
32//!
33//! # Performance Characteristics
34//!
35//! - **Throughput**: Typically limited by snapshot decompression (~500-2000 MB/s)
36//! rather than network bandwidth for local connections
37//! - **Latency**: Read latency includes network RTT + decompression time (~1-5 ms total)
38//! - **Concurrency**: Each client connection is handled by a separate Tokio task
39//!
40//! # Example Usage
41//!
42//! ```no_run
43//! # use std::sync::Arc;
44//! # use hexz_core::File;
45//! # use hexz_server::nbd::handle_client;
46//! # use tokio::net::TcpListener;
47//! # #[tokio::main]
48//! # async fn main() -> anyhow::Result<()> {
49//! // Server-side (in hexz-server)
50//! let listener = TcpListener::bind("127.0.0.1:10809").await?;
51//! // ... load snapshot into Arc<File> ...
52//! # let snap: Arc<File> = Arc::new(todo!());
53//!
54//! loop {
55//! let (socket, _) = listener.accept().await?;
56//! let snap = snap.clone();
57//! tokio::spawn(async move {
58//! if let Err(e) = handle_client(socket, snap).await {
59//! eprintln!("NBD client error: {}", e);
60//! }
61//! });
62//! }
63//! # Ok(())
64//! # }
65//! ```
66//!
67//! # Client-Side Usage (Linux)
68//!
69//! ```bash
70//! # Connect NBD client to server
71//! sudo nbd-client localhost 10809 /dev/nbd0
72//!
73//! # Mount the block device
74//! sudo mount -o ro /dev/nbd0 /mnt/snapshot
75//!
76//! # Disconnect when done
77//! sudo nbd-client -d /dev/nbd0
78//! ```
79
80use anyhow::Result;
81use hexz_core::{File, SnapshotStream};
82use std::sync::Arc;
83use tokio::io::{AsyncReadExt, AsyncWriteExt};
84use tokio::net::TcpStream;
85
86const NBD_MAGIC: u64 = 0x4e42444d41474943;
87const NBD_OPT_MAGIC: u64 = 0x49484156454F5054;
88const NBD_REP_MAGIC: u64 = 0x3e889045565a9;
89
90const NBD_FLAG_FIXED_NEWSTYLE: u16 = 1 << 0;
91const NBD_FLAG_NO_ZEROES: u16 = 1 << 1;
92
93const NBD_FLAG_HAS_FLAGS: u16 = 1 << 0;
94const NBD_FLAG_READ_ONLY: u16 = 1 << 1;
95
96const NBD_OPT_EXPORT_NAME: u32 = 1;
97const NBD_OPT_ABORT: u32 = 2;
98const NBD_OPT_INFO: u32 = 6;
99const NBD_OPT_GO: u32 = 7;
100
101const NBD_REP_ACK: u32 = 1;
102const NBD_REP_INFO: u32 = 3;
103
104const NBD_INFO_EXPORT: u16 = 0;
105
106const NBD_CMD_READ: u16 = 0;
107const NBD_CMD_WRITE: u16 = 1;
108const NBD_CMD_DISC: u16 = 2;
109const NBD_CMD_FLUSH: u16 = 3;
110const NBD_CMD_TRIM: u16 = 4;
111
112const NBD_REQUEST_MAGIC: u32 = 0x25609513;
113const NBD_REPLY_MAGIC: u32 = 0x67446698;
114
115/// Maximum allowed option data or read/write length from a client (32 MiB).
116/// Matches Linux kernel's NBD_MAX_BUFFER_SIZE to prevent OOM from crafted packets.
117const NBD_MAX_BUFFER_SIZE: u32 = 32 * 1024 * 1024;
118
119/// Handle a single NBD client connection.
120///
121/// This function implements the complete NBD server lifecycle for one client:
122/// 1. Performs the NBD handshake and option negotiation
123/// 2. Enters the transmission phase to serve read requests
124/// 3. Handles disconnect when the client sends NBD_CMD_DISC
125///
126/// The connection is read-only and blocks are served directly from the snapshot's
127/// primary stream. Write, flush, and trim commands return error responses.
128///
129/// # Connection Lifecycle
130///
131/// ```text
132/// Client connects → Handshake → Option Negotiation → Transmission → Disconnect
133/// ↓ ↓ ↓
134/// Send magic Send export info Serve read commands
135/// ```
136///
137/// # Arguments
138///
139/// - `socket`: TCP connection to the NBD client
140/// - `snap`: Shared reference to the Hexz snapshot file
141///
142/// # Returns
143///
144/// Returns `Ok(())` when the client disconnects normally, or an error if the protocol
145/// is violated or I/O fails.
146///
147/// # Errors
148///
149/// This function returns an error if:
150/// - The client sends invalid magic values or malformed requests
151/// - Socket I/O fails (connection reset, timeout, etc.)
152/// - The snapshot cannot be read (decompression errors, backend failures)
153pub async fn handle_client(mut socket: TcpStream, snap: Arc<File>) -> Result<()> {
154 // --- Handshake (Fixed Newstyle) ---
155
156 // 1. Send Init Pass
157 socket.write_u64(NBD_MAGIC).await?;
158 socket.write_u64(NBD_OPT_MAGIC).await?;
159 // Global flags: FIXED_NEWSTYLE | NO_ZEROES
160 socket
161 .write_u16(NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES)
162 .await?;
163
164 // 2. Receive Client Flags
165 let client_flags = socket.read_u32().await?;
166 let client_supports_no_zeroes = (client_flags & (NBD_FLAG_NO_ZEROES as u32)) != 0;
167
168 // 3. Option Negotiation Loop
169 loop {
170 let magic = socket.read_u64().await?;
171 if magic != NBD_OPT_MAGIC {
172 anyhow::bail!("Invalid option magic");
173 }
174
175 let opt_id = socket.read_u32().await?;
176 let opt_len = socket.read_u32().await?;
177
178 if opt_len > NBD_MAX_BUFFER_SIZE {
179 anyhow::bail!("NBD option data too large: {} bytes", opt_len);
180 }
181
182 // Read option data
183 let mut opt_data = vec![0u8; opt_len as usize];
184 socket.read_exact(&mut opt_data).await?;
185
186 match opt_id {
187 NBD_OPT_ABORT => return Ok(()),
188 NBD_OPT_EXPORT_NAME => {
189 // Old-style negotiation finish.
190 let size = snap.size(SnapshotStream::Primary);
191 let export_flags = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY;
192
193 socket.write_u64(size).await?;
194 socket.write_u16(export_flags).await?;
195 // Only skip 124 zero bytes if client supports NO_ZEROES
196 if !client_supports_no_zeroes {
197 socket.write_all(&[0u8; 124]).await?;
198 }
199 break;
200 }
201 NBD_OPT_INFO | NBD_OPT_GO => {
202 let size = snap.size(SnapshotStream::Primary);
203 let export_flags = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY;
204
205 // Reply NBD_INFO_EXPORT
206 socket.write_u64(NBD_REP_MAGIC).await?;
207 socket.write_u32(opt_id).await?;
208 socket.write_u32(NBD_REP_INFO).await?;
209 socket.write_u32(12).await?; // Length of info block
210 socket.write_u16(NBD_INFO_EXPORT).await?;
211 socket.write_u64(size).await?;
212 socket.write_u16(export_flags).await?;
213
214 // Reply ACK
215 socket.write_u64(NBD_REP_MAGIC).await?;
216 socket.write_u32(opt_id).await?;
217 socket.write_u32(NBD_REP_ACK).await?;
218 socket.write_u32(0).await?;
219
220 if opt_id == NBD_OPT_GO {
221 break;
222 }
223 }
224 _ => {
225 // Unsupported option: Reply ERR_UNSUP (0x80000001 = 2^31 + 1)
226 socket.write_u64(NBD_REP_MAGIC).await?;
227 socket.write_u32(opt_id).await?;
228 socket.write_u32(0x80000001).await?;
229 socket.write_u32(0).await?;
230 }
231 }
232 }
233
234 // --- Transmission Phase ---
235
236 loop {
237 let magic = socket.read_u32().await?;
238 if magic != NBD_REQUEST_MAGIC {
239 anyhow::bail!("Invalid request magic: {:x}", magic);
240 }
241
242 let _flags = socket.read_u16().await?;
243 let type_ = socket.read_u16().await?;
244 let handle = socket.read_u64().await?;
245 let offset = socket.read_u64().await?;
246 let length = socket.read_u32().await?;
247
248 if length > NBD_MAX_BUFFER_SIZE {
249 anyhow::bail!("NBD request length too large: {} bytes", length);
250 }
251
252 match type_ {
253 NBD_CMD_READ => {
254 let mut error = 0u32;
255 let data = match snap.read_at(SnapshotStream::Primary, offset, length as usize) {
256 Ok(d) => d,
257 Err(e) => {
258 tracing::error!("Read error: {}", e);
259 error = 5; // EIO
260 Vec::new()
261 }
262 };
263
264 // Reply header
265 socket.write_u32(NBD_REPLY_MAGIC).await?;
266 socket.write_u32(error).await?;
267 socket.write_u64(handle).await?;
268
269 // Payload: NBD protocol requires `length` bytes regardless of error
270 if error == 0 {
271 socket.write_all(&data).await?;
272 if data.len() < length as usize {
273 let padding = vec![0u8; length as usize - data.len()];
274 socket.write_all(&padding).await?;
275 }
276 } else {
277 let padding = vec![0u8; length as usize];
278 socket.write_all(&padding).await?;
279 }
280 }
281 NBD_CMD_DISC => {
282 return Ok(());
283 }
284 NBD_CMD_WRITE | NBD_CMD_TRIM | NBD_CMD_FLUSH => {
285 // We are read-only. Read payload if write to drain socket
286 if type_ == NBD_CMD_WRITE {
287 let mut buf = vec![0u8; length as usize];
288 socket.read_exact(&mut buf).await?;
289 }
290
291 // Return EPERM (1)
292 let error = 1u32;
293 socket.write_u32(NBD_REPLY_MAGIC).await?;
294 socket.write_u32(error).await?;
295 socket.write_u64(handle).await?;
296 }
297 _ => {
298 // Unknown command: EINVAL (22)
299 let error = 22u32;
300 socket.write_u32(NBD_REPLY_MAGIC).await?;
301 socket.write_u32(error).await?;
302 socket.write_u64(handle).await?;
303 }
304 }
305 }
306}