Skip to main content

hexz_server/
nbd.rs

1//! Network Block Device (NBD) protocol server implementation.
2//!
3//! This module implements the NBD protocol (version 3.0+, fixed newstyle negotiation)
4//! to expose Hexz snapshots as block devices over TCP. Clients can mount the snapshot
5//! using standard NBD client tools like `nbd-client` (Linux) or connect directly via
6//! the NBD protocol.
7//!
8//! # Protocol Overview
9//!
10//! The NBD protocol consists of three phases:
11//!
12//! 1. **Handshake**: Server announces capabilities (flags) and magic values
13//! 2. **Option Negotiation**: Client requests export info and flags
14//! 3. **Transmission**: Client sends read/write/flush/trim commands
15//!
16//! This implementation follows the "fixed newstyle" negotiation introduced in NBD 3.0,
17//! which is more robust than the legacy "oldstyle" protocol.
18//!
19//! # Protocol Reference
20//!
21//! - NBD Protocol Specification: <https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md>
22//! - RFC (draft): <https://www.ietf.org/archive/id/draft-ietf-nbd-protocol-00.html>
23//!
24//! # Security Considerations
25//!
26//! - **Read-only mode**: This implementation always exports snapshots as read-only
27//!   to prevent accidental modification
28//! - **No encryption**: The NBD protocol does not include built-in encryption.
29//!   For secure access over untrusted networks, use an SSH tunnel or VPN.
30//! - **No authentication**: NBD does not provide authentication. Access control
31//!   must be implemented at the network level (firewall, localhost-only binding).
32//!
33//! # Performance Characteristics
34//!
35//! - **Throughput**: Typically limited by snapshot decompression (~500-2000 MB/s)
36//!   rather than network bandwidth for local connections
37//! - **Latency**: Read latency includes network RTT + decompression time (~1-5 ms total)
38//! - **Concurrency**: Each client connection is handled by a separate Tokio task
39//!
40//! # Example Usage
41//!
42//! ```no_run
43//! # use std::sync::Arc;
44//! # use hexz_core::File;
45//! # use hexz_server::nbd::handle_client;
46//! # use tokio::net::TcpListener;
47//! # #[tokio::main]
48//! # async fn main() -> anyhow::Result<()> {
49//! // Server-side (in hexz-server)
50//! let listener = TcpListener::bind("127.0.0.1:10809").await?;
51//! // ... load snapshot into Arc<File> ...
52//! # let snap: Arc<File> = Arc::new(todo!());
53//!
54//! loop {
55//!     let (socket, _) = listener.accept().await?;
56//!     let snap = snap.clone();
57//!     tokio::spawn(async move {
58//!         if let Err(e) = handle_client(socket, snap).await {
59//!             eprintln!("NBD client error: {}", e);
60//!         }
61//!     });
62//! }
63//! # Ok(())
64//! # }
65//! ```
66//!
67//! # Client-Side Usage (Linux)
68//!
69//! ```bash
70//! # Connect NBD client to server
71//! sudo nbd-client localhost 10809 /dev/nbd0
72//!
73//! # Mount the block device
74//! sudo mount -o ro /dev/nbd0 /mnt/snapshot
75//!
76//! # Disconnect when done
77//! sudo nbd-client -d /dev/nbd0
78//! ```
79
80use anyhow::Result;
81use hexz_core::{File, SnapshotStream};
82use std::sync::Arc;
83use tokio::io::{AsyncReadExt, AsyncWriteExt};
84use tokio::net::TcpStream;
85
86const NBD_MAGIC: u64 = 0x4e42444d41474943;
87const NBD_OPT_MAGIC: u64 = 0x49484156454F5054;
88const NBD_REP_MAGIC: u64 = 0x3e889045565a9;
89
90const NBD_FLAG_FIXED_NEWSTYLE: u16 = 1 << 0;
91const NBD_FLAG_NO_ZEROES: u16 = 1 << 1;
92
93const NBD_FLAG_HAS_FLAGS: u16 = 1 << 0;
94const NBD_FLAG_READ_ONLY: u16 = 1 << 1;
95
96const NBD_OPT_EXPORT_NAME: u32 = 1;
97const NBD_OPT_ABORT: u32 = 2;
98const NBD_OPT_INFO: u32 = 6;
99const NBD_OPT_GO: u32 = 7;
100
101const NBD_REP_ACK: u32 = 1;
102const NBD_REP_INFO: u32 = 3;
103
104const NBD_INFO_EXPORT: u16 = 0;
105
106const NBD_CMD_READ: u16 = 0;
107const NBD_CMD_WRITE: u16 = 1;
108const NBD_CMD_DISC: u16 = 2;
109const NBD_CMD_FLUSH: u16 = 3;
110const NBD_CMD_TRIM: u16 = 4;
111
112const NBD_REQUEST_MAGIC: u32 = 0x25609513;
113const NBD_REPLY_MAGIC: u32 = 0x67446698;
114
115/// Maximum allowed option data or read/write length from a client (32 MiB).
116/// Matches Linux kernel's NBD_MAX_BUFFER_SIZE to prevent OOM from crafted packets.
117const NBD_MAX_BUFFER_SIZE: u32 = 32 * 1024 * 1024;
118
119/// Handle a single NBD client connection.
120///
121/// This function implements the complete NBD server lifecycle for one client:
122/// 1. Performs the NBD handshake and option negotiation
123/// 2. Enters the transmission phase to serve read requests
124/// 3. Handles disconnect when the client sends NBD_CMD_DISC
125///
126/// The connection is read-only and blocks are served directly from the snapshot's
127/// primary stream. Write, flush, and trim commands return error responses.
128///
129/// # Connection Lifecycle
130///
131/// ```text
132/// Client connects → Handshake → Option Negotiation → Transmission → Disconnect
133///                      ↓              ↓                    ↓
134///                  Send magic    Send export info    Serve read commands
135/// ```
136///
137/// # Arguments
138///
139/// - `socket`: TCP connection to the NBD client
140/// - `snap`: Shared reference to the Hexz snapshot file
141///
142/// # Returns
143///
144/// Returns `Ok(())` when the client disconnects normally, or an error if the protocol
145/// is violated or I/O fails.
146///
147/// # Errors
148///
149/// This function returns an error if:
150/// - The client sends invalid magic values or malformed requests
151/// - Socket I/O fails (connection reset, timeout, etc.)
152/// - The snapshot cannot be read (decompression errors, backend failures)
153pub async fn handle_client(mut socket: TcpStream, snap: Arc<File>) -> Result<()> {
154    // --- Handshake (Fixed Newstyle) ---
155
156    // 1. Send Init Pass
157    socket.write_u64(NBD_MAGIC).await?;
158    socket.write_u64(NBD_OPT_MAGIC).await?;
159    // Global flags: FIXED_NEWSTYLE | NO_ZEROES
160    socket
161        .write_u16(NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES)
162        .await?;
163
164    // 2. Receive Client Flags
165    let client_flags = socket.read_u32().await?;
166    let client_supports_no_zeroes = (client_flags & (NBD_FLAG_NO_ZEROES as u32)) != 0;
167
168    // 3. Option Negotiation Loop
169    loop {
170        let magic = socket.read_u64().await?;
171        if magic != NBD_OPT_MAGIC {
172            anyhow::bail!("Invalid option magic");
173        }
174
175        let opt_id = socket.read_u32().await?;
176        let opt_len = socket.read_u32().await?;
177
178        if opt_len > NBD_MAX_BUFFER_SIZE {
179            anyhow::bail!("NBD option data too large: {} bytes", opt_len);
180        }
181
182        // Read option data
183        let mut opt_data = vec![0u8; opt_len as usize];
184        socket.read_exact(&mut opt_data).await?;
185
186        match opt_id {
187            NBD_OPT_ABORT => return Ok(()),
188            NBD_OPT_EXPORT_NAME => {
189                // Old-style negotiation finish.
190                let size = snap.size(SnapshotStream::Primary);
191                let export_flags = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY;
192
193                socket.write_u64(size).await?;
194                socket.write_u16(export_flags).await?;
195                // Only skip 124 zero bytes if client supports NO_ZEROES
196                if !client_supports_no_zeroes {
197                    socket.write_all(&[0u8; 124]).await?;
198                }
199                break;
200            }
201            NBD_OPT_INFO | NBD_OPT_GO => {
202                let size = snap.size(SnapshotStream::Primary);
203                let export_flags = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY;
204
205                // Reply NBD_INFO_EXPORT
206                socket.write_u64(NBD_REP_MAGIC).await?;
207                socket.write_u32(opt_id).await?;
208                socket.write_u32(NBD_REP_INFO).await?;
209                socket.write_u32(12).await?; // Length of info block
210                socket.write_u16(NBD_INFO_EXPORT).await?;
211                socket.write_u64(size).await?;
212                socket.write_u16(export_flags).await?;
213
214                // Reply ACK
215                socket.write_u64(NBD_REP_MAGIC).await?;
216                socket.write_u32(opt_id).await?;
217                socket.write_u32(NBD_REP_ACK).await?;
218                socket.write_u32(0).await?;
219
220                if opt_id == NBD_OPT_GO {
221                    break;
222                }
223            }
224            _ => {
225                // Unsupported option: Reply ERR_UNSUP (0x80000001 = 2^31 + 1)
226                socket.write_u64(NBD_REP_MAGIC).await?;
227                socket.write_u32(opt_id).await?;
228                socket.write_u32(0x80000001).await?;
229                socket.write_u32(0).await?;
230            }
231        }
232    }
233
234    // --- Transmission Phase ---
235
236    loop {
237        let magic = socket.read_u32().await?;
238        if magic != NBD_REQUEST_MAGIC {
239            anyhow::bail!("Invalid request magic: {:x}", magic);
240        }
241
242        let _flags = socket.read_u16().await?;
243        let type_ = socket.read_u16().await?;
244        let handle = socket.read_u64().await?;
245        let offset = socket.read_u64().await?;
246        let length = socket.read_u32().await?;
247
248        if length > NBD_MAX_BUFFER_SIZE {
249            anyhow::bail!("NBD request length too large: {} bytes", length);
250        }
251
252        match type_ {
253            NBD_CMD_READ => {
254                let mut error = 0u32;
255                let data = match snap.read_at(SnapshotStream::Primary, offset, length as usize) {
256                    Ok(d) => d,
257                    Err(e) => {
258                        tracing::error!("Read error: {}", e);
259                        error = 5; // EIO
260                        Vec::new()
261                    }
262                };
263
264                // Reply header
265                socket.write_u32(NBD_REPLY_MAGIC).await?;
266                socket.write_u32(error).await?;
267                socket.write_u64(handle).await?;
268
269                // Payload: NBD protocol requires `length` bytes regardless of error
270                if error == 0 {
271                    socket.write_all(&data).await?;
272                    if data.len() < length as usize {
273                        let padding = vec![0u8; length as usize - data.len()];
274                        socket.write_all(&padding).await?;
275                    }
276                } else {
277                    let padding = vec![0u8; length as usize];
278                    socket.write_all(&padding).await?;
279                }
280            }
281            NBD_CMD_DISC => {
282                return Ok(());
283            }
284            NBD_CMD_WRITE | NBD_CMD_TRIM | NBD_CMD_FLUSH => {
285                // We are read-only. Read payload if write to drain socket
286                if type_ == NBD_CMD_WRITE {
287                    let mut buf = vec![0u8; length as usize];
288                    socket.read_exact(&mut buf).await?;
289                }
290
291                // Return EPERM (1)
292                let error = 1u32;
293                socket.write_u32(NBD_REPLY_MAGIC).await?;
294                socket.write_u32(error).await?;
295                socket.write_u64(handle).await?;
296            }
297            _ => {
298                // Unknown command: EINVAL (22)
299                let error = 22u32;
300                socket.write_u32(NBD_REPLY_MAGIC).await?;
301                socket.write_u32(error).await?;
302                socket.write_u64(handle).await?;
303            }
304        }
305    }
306}