1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
//! Inbound event handling: socket-readable, cross-core ring drain, and
//! connection teardown. The event loop (`run`), transport setup
//! (accept_ready, flush_conn, flush_dirty, maybe_auto_rewrite_aof), and
//! cross-shard send/backlog plumbing live in [`crate::shard`]; the
//! *semantics* (routing, execution, reduction) live in [`crate::exec`].
//! Split out so each file stays under the 500-LOC house rule without
//! breaking the established two-impl-block layering.
use std::io;
use kevy_resp::parse_command_borrowed;
use crate::Commands;
use crate::message::Inbound;
use crate::shard::Shard;
/// What [`Shard::dispatch_batch`] saw: how far the parse cursor got,
/// whether it stopped on malformed input, and whether the conn was
/// closed by one of its own commands (QUIT) mid-batch.
pub(crate) struct BatchOutcome {
pub(crate) consumed: usize,
pub(crate) protocol_error: bool,
pub(crate) conn_gone: bool,
}
impl<C: Commands> Shard<C> {
/// Parse and dispatch every complete RESP command at the front of
/// `buf` (the borrowed-argv hot path shared by both reactors). The
/// caller owns buffer bookkeeping (tail retention) and the AOF
/// group-commit window around the batch.
pub(crate) fn dispatch_batch(&mut self, conn_id: u64, buf: &[u8]) -> BatchOutcome {
let mut off = 0usize;
loop {
match parse_command_borrowed(&buf[off..]) {
Ok(Some((argv, consumed))) => {
if let Some(key) = argv.get(1) {
self.store.prefetch_for_key(key);
}
self.handle_command(conn_id, &argv);
drop(argv);
off += consumed;
if !self.conns.contains_key(&conn_id) {
return BatchOutcome { consumed: off, protocol_error: false, conn_gone: true };
}
}
Ok(None) => {
return BatchOutcome { consumed: off, protocol_error: false, conn_gone: false };
}
Err(_) => {
return BatchOutcome { consumed: off, protocol_error: true, conn_gone: false };
}
}
}
}
/// Socket readable: read until WouldBlock, then parse out every full
/// RESP command and dispatch it.
///
/// The local fast path dispatches straight from an `ArgvBorrowed` view
/// into the connection's read buffer — no per-cmd memcpy. We swap
/// `conn.input` onto the stack (`mem::take`) for the parse-and-dispatch
/// loop so the borrowed argv doesn't conflict with `&mut self`; a
/// cursor advances past each command and ONE final `drain` moves the
/// (usually empty) unparsed tail to the front before the buf is
/// swapped back into the connection (if it still exists). Cross-
/// shard / MULTI queue / AOF call `args.to_argv()` at the handoff
/// juncture; only those paths still materialise an owned `Argv`.
pub(crate) fn conn_readable(&mut self, conn_id: u64) -> io::Result<()> {
{
let Some(conn) = self.conns.get_mut(&conn_id) else {
return Ok(());
};
loop {
match conn.sock.read(&mut self.read_buf) {
Ok(0) => {
conn.closing = true;
break;
}
Ok(n) => conn.input.extend_from_slice(&self.read_buf[..n]),
Err(e) if e.kind() == io::ErrorKind::WouldBlock => break,
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(_) => {
conn.closing = true;
break;
}
}
}
}
// Swap conn.input onto the stack so parse_command_borrowed can lend
// it to ArgvBorrowed without colliding with &mut self in dispatch.
let mut input_buf = match self.conns.get_mut(&conn_id) {
Some(c) => std::mem::take(&mut c.input),
None => return Ok(()),
};
// Group-commit window for `appendfsync always`: the writes dispatched
// from this pipelined read batch buffer their AOF appends and fsync
// once in `aof_end_group`, BEFORE `flush_conn` sends their replies.
self.aof_begin_group();
let outcome = self.dispatch_batch(conn_id, &input_buf);
// fsync the batch's buffered writes before any reply leaves the shard.
self.aof_end_group()?;
if outcome.conn_gone {
// Connection was closed mid-batch; drop the rest of the buf.
return Ok(());
}
// ONE tail drain (usually empty) — `dispatch_batch`'s cursor already
// walked the cmds, so nothing memmoves per command.
input_buf.drain(..outcome.consumed);
if let Some(c) = self.conns.get_mut(&conn_id) {
c.input = input_buf;
}
if outcome.protocol_error {
self.protocol_error(conn_id);
}
self.flush_conn(conn_id)
}
/// Open the AOF group-commit window (no-op unless AOF is on + policy is
/// `always`). Bracket a batch of writes with this and [`Self::aof_end_group`]
/// so an `always` policy fsyncs once per batch instead of per command,
/// still before the batch's replies are sent.
#[inline]
pub(crate) fn aof_begin_group(&mut self) {
if let Some(aof) = &mut self.aof {
aof.begin_group();
}
}
/// Close the group-commit window: one fsync for the batch (if any writes
/// buffered), before replies leave. Errors propagate like other flush
/// failures.
#[inline]
pub(crate) fn aof_end_group(&mut self) -> io::Result<()> {
if let Some(aof) = &mut self.aof {
aof.end_group()?;
}
Ok(())
}
/// Drain inbound cross-core messages from every peer ring; returns
/// whether any were processed.
pub(crate) fn drain_inbound(&mut self) -> io::Result<bool> {
let mut did = false;
for src in 0..self.nshards {
if src == self.id {
continue; // no self-ring
}
while let Some(msg) = self.inboxes[src].as_mut().expect("peer inbox").pop() {
did = true;
match msg {
Inbound::Request {
origin,
conn,
seq,
op,
} => {
let part = self.exec_op(op);
self.send_to(origin, Inbound::Response { conn, seq, part });
}
Inbound::Response { conn, seq, part } => {
self.fold(conn, seq, part);
self.flush_conn(conn)?;
}
// Batched single-key dispatches to this (owning) shard:
// exec each locally, reply as one `ResponseBatch` to the
// origin.
Inbound::RequestBatch { origin, reqs } => {
let mut resps = Vec::with_capacity(reqs.len());
self.aof_begin_group();
for (conn, seq, argv, proto, meta) in reqs {
let part = self.run_dispatch(&argv, proto, meta);
// The spent argv husk rides home with the reply;
// the origin pools it (see `RespBatch`).
resps.push((conn, seq, part, argv));
}
// fsync the batch's forwarded writes before replying.
self.aof_end_group()?;
self.send_to(origin, Inbound::ResponseBatch(resps));
}
// Batched replies: fold each by seq, then flush each
// touched conn once (dedup — pipelined replies share a
// conn).
Inbound::ResponseBatch(resps) => {
let mut to_flush: Vec<u64> = Vec::new();
for (conn, seq, part, husk) in resps {
self.argv_pool.put(husk);
self.fold(conn, seq, part);
if !to_flush.contains(&conn) {
to_flush.push(conn);
}
}
for conn in to_flush {
self.flush_conn(conn)?;
}
}
// Fire-and-forget batched pub/sub delivery; appended
// subscriber output is flushed via `flush_dirty`.
Inbound::DeliverPublish(batch) => {
for m in &batch {
self.deliver_publish(&m.0, &m.1);
}
}
// ── Cross-shard BLOCK arbiter (see `block_xshard`) ──
Inbound::BlockArm {
origin,
conn,
key,
kind,
serve_argv,
proto,
} => self.target_arm(origin, conn, key, kind, serve_argv, proto),
Inbound::BlockReady { conn, key } => self.origin_on_ready(conn, &key),
Inbound::BlockServeReq { origin, conn, key } => {
let reply = self.target_serve(origin, conn, &key);
self.send_to(origin, Inbound::BlockServeResp { conn, key, reply });
}
Inbound::BlockServeResp { conn, key, reply } => {
self.origin_on_serve_resp(conn, key, reply);
self.flush_conn(conn)?;
}
Inbound::BlockCancel { origin, conn } => self.target_cancel(origin, conn),
}
}
}
Ok(did)
}
/// Tear down a closing connection: deregister from the poller, drop
/// its channel + pattern subscriptions from the shared registries
/// and the per-shard tables, and release its `Socket` (closing the
/// fd).
pub(crate) fn close_conn(&mut self, conn_id: u64) {
if let Some(conn) = self.conns.remove(&conn_id) {
let fd = conn.sock.raw();
let _ = self.poller.delete(fd);
self.fd_to_conn.remove(&fd);
// Drop any BLPOP / BRPOP / XREAD BLOCK waiter the closing conn
// was parked in, across all its watched keys. Cheap fast-out
// when nothing is blocked (the common case).
self.blocked.drop_for_conn(conn_id);
// Cancel any cross-shard block this conn was the origin of, so
// target shards drop their registrations.
self.cancel_xshard_on_close(conn_id);
self.unregister_subs(&conn.sub);
// Drop the conn's psub local table entries first (`unregister_psubs`
// reads `psub_local` to decide if our shard bit should be cleared).
for pat in &conn.psub {
if let Some(ids) = self.psub_local.get_mut(pat) {
ids.retain(|&id| id != conn_id);
if ids.is_empty() {
self.psub_local.remove(pat);
}
}
}
self.unregister_psubs(&conn.psub);
// conn (and its Socket) dropped here → fd closed.
}
}
}