ts_netstack_smoltcp_core 0.3.3

command-channel-based userspace netstack built on smoltcp (core functionality)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
#![doc = include_str!("../README.md")]
#![no_std]

extern crate alloc;

#[cfg(feature = "std")]
extern crate std;

pub extern crate flume;
pub extern crate smoltcp;

use alloc::{
    collections::{BTreeMap, VecDeque},
    vec,
    vec::Vec,
};
use core::{
    net::IpAddr,
    pin::Pin,
    task::{Context, Poll},
};

use smoltcp::{
    iface::{PollIngressSingleResult, PollResult, SocketHandle},
    wire::{HardwareAddress, IpAddress},
};

mod command;
mod config;
mod pipe;
mod socket_impl;
mod stack_control_impl;
mod util;
mod wake_device;

#[doc(inline)]
pub use command::{
    Channel, ChannelClosedError, Command, Error, HasChannel, InternalErrorKind, Request, Response,
    raw, request, request_blocking, request_nonblocking, stack_control, tcp, udp,
};
pub use config::Config;
pub use pipe::{Pipe, PipeDev};
pub use socket_impl::tcp::ListenerHandle as TcpListenerHandle;
pub use stack_control_impl::NetstackControl;
use util::NoopCapDev;
pub use util::{DisplayExt, DisplayToDebug, OptionExt, ResultExt};
pub use wake_device::AsyncWakeDevice;

/// Internally i/o free userspace network stack built around `smoltcp`.
pub struct Netstack {
    config: Config,

    iface: smoltcp::iface::Interface,
    socket_set: smoltcp::iface::SocketSet<'static>,

    command_rx: flume::Receiver<Request>,
    // Need to hold the sender to avoid closing the channel.
    command_tx: flume::Sender<Request>,

    /// Commands pending in a wouldblock state: to be processed again in the future for
    /// completion.
    blocked_commands: VecDeque<Request>,

    /// Set of TCP socket handles that are expected to close in the future, held onto for
    /// graceful shutdown.
    pending_tcp_closes: Vec<SocketHandle>,

    /// Active TCP listeners.
    ///
    /// These are registered here so that they can be polled to accept incoming connections
    /// without an explicit accept command: internally accepted connections are stored in
    /// a queue in the state, and the accept command just dequeues and returns the first
    /// ready one.
    tcp_listeners: BTreeMap<socket_impl::tcp::ListenerHandle, socket_impl::tcp::TcpListenerState>,
    next_tcp_listener_id: usize,
}

impl Netstack {
    /// Construct a netstack with the given config and starting instant.
    ///
    /// # Panics
    ///
    /// If `ns_config.loopback` is set and smoltcp's `iface-max-addr-count` (feature flag)
    /// is less than 2.
    pub fn new(ns_config: Config, now: smoltcp::time::Instant) -> Netstack {
        let config = smoltcp::iface::Config::new(HardwareAddress::Ip);

        let mut iface = smoltcp::iface::Interface::new(
            config,
            &mut NoopCapDev::with_caps(|caps| {
                caps.max_transmission_unit = ns_config.mtu;
            }),
            now,
        );

        if ns_config.loopback {
            iface.update_ip_addrs(|addrs| {
                if !set_loopback(addrs) {
                    panic!();
                }
            });
        }

        let (tx, rx) = match ns_config.command_channel_capacity {
            Some(cap) => flume::bounded(cap),
            None => flume::unbounded(),
        };

        Netstack {
            iface,
            socket_set: smoltcp::iface::SocketSet::new(vec![]),
            command_tx: tx,
            command_rx: rx,
            config: ns_config,
            blocked_commands: Default::default(),
            pending_tcp_closes: Default::default(),
            tcp_listeners: Default::default(),
            next_tcp_listener_id: 0,
        }
    }

    /// Report the next time the netstack should be polled.
    pub fn poll_at(&mut self, now: smoltcp::time::Instant) -> Option<smoltcp::time::Instant> {
        self.iface.poll_at(now, &self.socket_set)
    }

    /// Report the amount of time until the netstack should next be polled.
    pub fn poll_delay(&mut self, now: smoltcp::time::Instant) -> Option<core::time::Duration> {
        self.iface
            .poll_delay(now, &self.socket_set)
            .map(|x| x.into())
    }

    /// Process all commands available in the command queue.
    #[tracing::instrument(skip_all)]
    pub fn process_cmds(&mut self) {
        while let Ok(cmd) = self.command_rx.try_recv() {
            self.process_one_cmd(cmd);
        }
    }

    /// Synchronously block for a single command over the channel.
    #[tracing::instrument(skip_all, fields(?timeout))]
    pub fn wait_for_cmd_blocking(
        &mut self,
        timeout: Option<core::time::Duration>,
    ) -> Result<Request, flume::RecvTimeoutError> {
        if let Some(timeout) = timeout {
            self.command_rx.recv_timeout(timeout)
        } else {
            self.command_rx
                .recv()
                .map_err(|flume::RecvError::Disconnected| flume::RecvTimeoutError::Disconnected)
        }
    }

    /// Asynchronously wait for a single command over the channel.
    #[tracing::instrument(skip_all)]
    pub fn wait_for_cmd(&self) -> impl Future<Output = Option<Request>> + use<> {
        let rx = self.command_rx.clone();

        async move { rx.recv_async().await.ok() }
    }

    /// Set the IP addresses for this interface.
    ///
    /// Loopback addresses are automatically appended if indicated by [`Config::loopback`].
    ///
    /// The return value reports whether the operation was successful: if not, it was
    /// because there wasn't enough storage configured in smoltcp's feature flags for the
    /// number of submitted interface IPs.
    pub fn direct_set_ips(&mut self, ips: impl IntoIterator<Item = IpAddr>) -> bool {
        const fn full_prefix_len(is_ipv4: bool) -> u8 {
            if is_ipv4 { 32 } else { 128 }
        }

        let mut ok = true;

        self.iface.update_ip_addrs(|stored_ips| {
            stored_ips.clear();

            for ip in ips.into_iter() {
                let cidr = smoltcp::wire::IpCidr::new(ip.into(), full_prefix_len(ip.is_ipv4()));

                if stored_ips.push(cidr).is_err() {
                    ok = false;
                    break;
                }
            }

            if ok && self.config.loopback {
                ok = ok && set_loopback(stored_ips);
            }
        });

        ok
    }

    /// Process a single command.
    #[tracing::instrument(skip_all, fields(?command, ?handle))]
    pub fn process_one_cmd(
        &mut self,
        Request {
            command,
            handle,
            resp,
        }: Request,
    ) {
        let cmd_resp = match command {
            Command::StackControl(cmd) => self.process_stack_control(cmd),
            Command::Udp(udp) => self.process_udp(udp, handle),
            Command::TcpStream(tcp) => self.process_tcp_stream(tcp, handle),
            Command::TcpListen(listen) => self.process_tcp_listen(listen, handle),
            Command::Raw(raw) => self.process_raw(raw, handle),
        };
        tracing::trace!(?cmd_resp, "command processed");

        match cmd_resp {
            Response::WouldBlock { command, handle } => {
                self.blocked_commands.push_back(Request {
                    command,
                    handle,
                    resp,
                });
            }
            otherwise => {
                if let Response::Error(e) = &otherwise {
                    tracing::debug!(error = %e, "command error");
                }

                if let Err(resp) = resp.send(otherwise) {
                    tracing::debug!(resp = ?resp.0, "response channel closed");
                }
            }
        }
    }

    /// Poll the lower device to send and receive packets, and attempt to complete any
    /// blocked socket commands.
    ///
    /// Returns whether there were any updates to socket state.
    #[tracing::instrument(skip_all, fields(%now))]
    pub fn poll_device_io(
        &mut self,
        now: smoltcp::time::Instant,
        dev: &mut impl smoltcp::phy::Device,
    ) -> bool {
        use smoltcp::iface::{PollIngressSingleResult, PollResult};

        // Goal of this function: complete all _synchronously_ available work given what is queued
        // in the underlying device and the netstack state. It may be a long time until this
        // function is called again, so leaving any available work for the next iteration could
        // potentially stall out sockets with I/O in flight until the next poll.
        //
        // It requires a bit of care to determine when it's possible that the netstack made
        // progress and should try to communicate with the lower dev. Specifically, while (as of
        // smoltcp 0.12) packet egress does not itself perform loopback (outgoing packets are always
        // emitted to the underlying device, even if the address belongs to this node or a route
        // points back at us), it's possible that `dev` could synchronously perform loopback after
        // egressing packets, i.e. they would immediately (synchronously) become available for
        // ingress again. This means that after we perform egress and any sockets make progress, we
        // need to try ingress. And after any ingress, TCP state machines may have (synchronously)
        // made progress and want to emit packets -- hence, we need to poll egress again.
        //
        // This is why this function is structured as a loop: we need to keep polling until both
        // ingress and egress report that they are done and no sockets have changed state.
        // Unfortunately poll_egress is O(n) in the number of sockets, but that's an unavoidable
        // cost of correctness given the design of smoltcp.

        let mut changed = false;

        self.pump_waiters();

        'outer: loop {
            let mut changed_this_iter = false;

            let span_ingress = tracing::trace_span!("ingress");

            span_ingress.in_scope(|| {
                loop {
                    match self
                        .iface
                        .poll_ingress_single(now, dev, &mut self.socket_set)
                    {
                        PollIngressSingleResult::None => {
                            break;
                        }
                        PollIngressSingleResult::PacketProcessed => {}
                        PollIngressSingleResult::SocketStateChanged => {
                            changed = true;
                            changed_this_iter = true;
                            self.pump_tcp_accept();

                            tracing::trace!("socket state changed");
                        }
                    }
                }
            });

            if changed_this_iter {
                // TODO(npry): need to validate through more thorough inspection of smoltcp
                //  source, but I don't _think_ the below comment is true: since receive()
                //  provides a TxToken, TCP devices should actually not need to be pumped. We
                //  may still want to call pump_blocked_commands to ready receives. Leaving
                //  this in for now as a conservative measure to ensure correctness, even if
                //  it costs a bit of performance.

                // Ingress can cause egress: TCP state machines may have advanced and want to send
                // packets now.
                //
                // Also unblocks any sockets that were waiting for a recv.
                self.pump_waiters();
            }

            let _span = tracing::trace_span!("egress").entered();

            match self.iface.poll_egress(now, dev, &mut self.socket_set) {
                PollResult::SocketStateChanged => {
                    changed = true;
                    tracing::trace!("socket state changed");

                    // Egress may have opened capacity in packet buffers for
                    // e.g. pending TCP or UDP sends, which we may be able to complete + send now.
                    //
                    // Need to fall through to the top of the loop in this case to ensure that
                    // if the underlying device synchronously looped back any packets, they're
                    // ingressed and processed by any waiting sockets.
                    self.pump_waiters();
                }
                PollResult::None => break 'outer,
            }
        }

        if changed {
            self.drain_tcp_closes();
        }

        changed
    }

    /// Poll the device for all synchronously-available packets and process them.
    ///
    /// The return value indicates whether packets were consumed and if sockets made
    /// progress:
    ///
    /// - `Poll::Pending`: no packets were processed because we were waiting for them to
    ///   arrive from `dev.
    /// - `Poll::Ready(false)`: we received packets but socket state did not make progress
    /// - `Poll::Ready(true)`: we received packets and socket state _did_ make progress
    ///
    /// When this function returns, it is guaranteed that all packets currently available to
    /// receive from `dev` have been processed.
    #[tracing::instrument(skip_all, fields(%now), ret, level = "trace")]
    pub fn poll_device_ingress_async(
        &mut self,
        cx: &mut core::task::Context<'_>,
        now: smoltcp::time::Instant,
        mut dev: Pin<&mut (impl AsyncWakeDevice + smoltcp::phy::Device + Unpin)>,
    ) -> Poll<bool> {
        let mut changed = false;
        let mut polled_successfully = false;

        loop {
            match dev.as_mut().poll_rx(cx) {
                Poll::Ready(()) => {
                    polled_successfully = true;
                }
                Poll::Pending => {
                    // If we get a pending now but we have already polled successfully, don't return
                    // pending (we need to report that we made progress).
                    return if polled_successfully {
                        Poll::Ready(changed)
                    } else {
                        Poll::Pending
                    };
                }
            }

            let _span = tracing::trace_span!("poll_ingress_single").entered();
            match self
                .iface
                .poll_ingress_single(now, dev.as_mut().get_mut(), &mut self.socket_set)
            {
                PollIngressSingleResult::None => {
                    break;
                }
                PollIngressSingleResult::PacketProcessed => {}
                PollIngressSingleResult::SocketStateChanged => {
                    changed = true;
                    self.pump_tcp_accept();
                    tracing::trace!("socket state changed");
                }
            }
        }

        if changed {
            // TODO(npry): need to validate through more thorough inspection of smoltcp
            //  source, but I don't _think_ the below comment is true: since receive()
            //  provides a TxToken, TCP devices should actually not need to be pumped. We may
            //  still want to call pump_blocked_commands to ready receives. Leaving this in
            //  for now as a conservative measure to ensure correctness, even if it costs a
            //  bit of performance.

            // Ingress can cause egress: TCP state machines may have advanced and want to send
            // packets now.
            //
            // Also unblocks any sockets that were waiting for a recv.
            self.pump_waiters();
        }

        Poll::Ready(changed)
    }

    /// Send all packets the netstack wants to transmit on the network.
    ///
    /// Returns:
    ///
    /// - `Poll::Pending` if _no_ packets could be sent because `dev` wasn't ready (if
    ///   [`AsyncWakeDevice::poll_tx`] returns `Poll::Pending`)
    /// - `Poll::Ready(false)` if `dev` was ready but no packets needed to be sent
    /// - `Poll::Ready(true)` if `dev` was ready and packets were sent
    #[tracing::instrument(skip_all, fields(%now), ret, level = "trace")]
    pub fn poll_device_egress_async(
        &mut self,
        cx: &mut core::task::Context<'_>,
        now: smoltcp::time::Instant,
        mut dev: Pin<&mut (impl AsyncWakeDevice + smoltcp::phy::Device + Unpin)>,
    ) -> Poll<bool> {
        core::task::ready!(dev.as_mut().poll_tx(cx));

        match self
            .iface
            .poll_egress(now, dev.as_mut().get_mut(), &mut self.socket_set)
        {
            PollResult::SocketStateChanged => {
                tracing::trace!("socket state changed");

                // Egress may have opened capacity in packet buffers for
                // e.g. pending TCP or UDP sends, which we may be able to complete + send now.
                self.pump_waiters();

                Poll::Ready(true)
            }
            PollResult::None => Poll::Ready(false),
        }
    }

    /// Attempt to make progress on internal state that is blocking on I/O.
    ///
    /// Calls [`Netstack::pump_blocked_commands`] and [`Netstack::pump_tcp_accept`].
    #[tracing::instrument(skip_all, level = "trace")]
    fn pump_waiters(&mut self) {
        // Pump accept first, then commands: blocked commands will have tried to run once, so they
        // will have created any listeners already (i.e. they can't affect the TCP accept loop).
        // Accepts however can unblock waiting commands.
        self.pump_tcp_accept();
        self.pump_blocked_commands();
    }

    /// Reprocess all blocked socket commands.
    ///
    /// This is `O(n)` in the number of socket commands in the queue: it attempts to run
    /// all of them. Any that return [`Command::WouldBlock`] are requeued as normal.
    #[tracing::instrument(skip_all, level = "trace")]
    fn pump_blocked_commands(&mut self) {
        // NB: we pop_front here and push_back in process_one_cmd: since we're taking len()
        // elements, we see everything that is currently in the deque exactly once, no matter
        // how many of them end up still blocked and pushed onto the back.
        for _ in 0..self.blocked_commands.len() {
            let cmd = self.blocked_commands.pop_front().unwrap();
            self.process_one_cmd(cmd);
        }
    }

    /// Attempt to send and receive packets on `dev`.
    ///
    /// The future becomes ready when `dev` sends or receives packets on the network. All
    /// synchronously-available network I/O is always performed.
    ///
    /// Assumes that alarms are handled separately and that `now` does not advance
    /// over the course of the polled future.
    pub fn wait_io_async<'stack, 'dev, D>(
        &'stack mut self,
        now: smoltcp::time::Instant,
        dev: &'dev mut D,
    ) -> IoPoller<'stack, 'dev, D>
    where
        D: AsyncWakeDevice + smoltcp::phy::Device + Unpin,
    {
        IoPoller {
            stack: self,
            now,
            egress_done: false,
            dev: Pin::new(dev),
        }
    }
}

/// A future that becomes ready when the contained `smoltcp::phy::Device` sends or receives
/// packets on the network.
///
/// The future always completes _all_ synchronously-available network I/O, i.e. it polls
/// until `dev.poll_rx` and `dev.poll_tx` return `Poll::Pending`, and/or until the netstack
/// reports that there are no more packets to be sent.
///
/// # Cancel safety
///
/// This future is completely cancel-safe.
pub struct IoPoller<'stack, 'dev, D> {
    stack: &'stack mut Netstack,
    /// Whether egress has returned `Poll::Ready(false)`, indicating that it is not waiting
    /// for transmit capacity in `dev` but just has no more work to do with
    /// currently-available data.
    ///
    /// This state should not change with repeated calls to `poll` because socket commands
    /// are the only reason `smoltcp` calls `dev.transmit()`, and we don't expect any new
    /// commands to be processed while this future is being `poll`ed.
    ///
    /// We don't store a similar flag for ingress because ingress readiness is controlled
    /// by the network: if new packets arrive between calls to `poll`, `poll_ingress` may
    /// return `Poll::Ready(true)` where it had previously returned `Poll::Ready(false)`.
    ///
    /// By contrast, while send capacity may become available in the future
    /// (`Poll::Pending` -> `Poll::Ready(_)`), having _things to send_ (`Poll::Ready(true)`)
    /// will not arise from a `Poll::Ready(false)` state without commands being processed.
    egress_done: bool,

    /// The current instant to use while polling this future.
    ///
    /// Alarms (e.g. for TCP retransmit) must be set externally.
    now: smoltcp::time::Instant,

    dev: Pin<&'dev mut D>,
}

impl<D> Future for IoPoller<'_, '_, D>
where
    D: AsyncWakeDevice + smoltcp::phy::Device + Unpin,
{
    type Output = ();

    #[tracing::instrument(skip_all, fields(%self.now), ret, level = "trace")]
    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
        // Logic here: future resolves when _either_ egress or ingress has made progress.
        // If neither makes forward progress (either because pending or nothing to do), return
        // pending.

        let Self {
            now,
            stack,
            dev,
            egress_done,
            ..
        } = self.get_mut();

        stack.pump_waiters();

        let now = *now;

        let mut ingress_pending = false;
        let mut egress_pending = false;
        let mut progress_made = false;

        while !(ingress_pending && (*egress_done || egress_pending)) {
            tracing::trace_span!("poll_loop", ingress_pending, egress_done, egress_pending);

            if !ingress_pending {
                match stack.poll_device_ingress_async(cx, now, dev.as_mut()) {
                    // `dev` doesn't have any packets to receive
                    Poll::Pending => ingress_pending = true,
                    // `dev` received packets but no sockets made progress. This means that
                    // `dev` is completely drained, so polling ingress again should return
                    // `Poll::Pending`.
                    Poll::Ready(false) => {
                        ingress_pending = true;
                        egress_pending = false;
                    }
                    // We received packets and socket state was updated as a result. Reset
                    // egress_pending as well to cover the possibility that successful receives
                    // made transmit capacity available (loopback case).
                    Poll::Ready(true) => {
                        ingress_pending = false;
                        egress_pending = false;
                        progress_made = true;
                    }
                }
            }

            if !(*egress_done || egress_pending) {
                match stack.poll_device_egress_async(cx, now, dev.as_mut()) {
                    // `dev` isn't ready to accept transmits, don't bother trying egress again this
                    // poll.
                    Poll::Pending => egress_pending = true,
                    // Not blocked, we just have no packets to send. Don't bother polling egress
                    // again, we should not be able to make progress.
                    Poll::Ready(false) => {
                        *egress_done = true;
                        egress_pending = false;
                    }
                    // We successfully sent packet(s). Reset ingress_pending as well to cover
                    // possibility of synchronous loopback.
                    Poll::Ready(true) => {
                        egress_pending = false;
                        ingress_pending = false;
                        progress_made = true;
                    }
                }
            }
        }

        if progress_made {
            stack.drain_tcp_closes();
            Poll::Ready(())
        } else {
            Poll::Pending
        }
    }
}

fn set_loopback<const N: usize>(ips: &mut heapless::Vec<smoltcp::wire::IpCidr, N>) -> bool {
    if ips.capacity() < 2 {
        return false;
    }

    if ips
        .push(smoltcp::wire::IpCidr::new(IpAddress::v4(127, 0, 0, 1), 8))
        .is_err()
    {
        return false;
    }

    if ips
        .push(smoltcp::wire::IpCidr::new(
            IpAddress::v6(0, 0, 0, 0, 0, 0, 0, 1),
            128,
        ))
        .is_err()
    {
        return false;
    }

    true
}