s2n_quic_platform/
syscall.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4// some platforms contain empty implementations so disable any warnings from those
5#![allow(unused_variables, unused_macros, unused_mut, clippy::let_and_return)]
6
7use crate::socket::stats;
8use core::ops::ControlFlow;
9use socket2::{Domain, Protocol, Socket, Type};
10use std::io;
11
12/// Calls the given libc function and wraps the result in an `io::Result`.
13macro_rules! libc {
14    ($fn: ident ( $($arg: expr),* $(,)* ) ) => {{
15        let res = unsafe { libc::$fn($($arg, )*) };
16        if res < 0 {
17            Err(std::io::Error::last_os_error())
18        } else {
19            Ok(res)
20        }
21    }};
22}
23
24#[cfg(s2n_quic_platform_socket_mmsg)]
25pub mod mmsg;
26#[cfg(s2n_quic_platform_socket_msg)]
27pub mod msg;
28
29#[derive(Clone, Copy, Debug, PartialEq, Eq)]
30#[allow(dead_code)] // this isn't constructed on some platforms
31pub enum SocketType {
32    Blocking,
33    NonBlocking,
34}
35
36pub trait SocketEvents {
37    /// Called when `count` packets are completed
38    ///
39    /// If `Continue` is returned, the socket will assume the packet was acceptable and continue
40    /// with the remaining packets. If `Break` is returned, the syscall stop looping and yield to
41    /// the caller.
42    fn on_complete(&mut self, count: usize) -> ControlFlow<(), ()>;
43
44    /// Called when an error occurs on a socket
45    ///
46    /// If `Continue` is returned, the socket will discard the packet and continue
47    /// with the remaining packets. If `Break` is returned, the syscall will assume the current
48    /// packet can be retried and yield to the caller.
49    fn on_error(&mut self, error: io::Error) -> ControlFlow<(), ()>;
50}
51
52#[cfg(unix)]
53pub trait UnixMessage: crate::message::Message {
54    fn send<E: SocketEvents>(
55        fd: std::os::unix::io::RawFd,
56        entries: &mut [Self],
57        events: &mut E,
58        stats: &stats::Sender,
59    );
60    fn recv<E: SocketEvents>(
61        fd: std::os::unix::io::RawFd,
62        ty: SocketType,
63        entries: &mut [Self],
64        events: &mut E,
65        stats: &stats::Sender,
66    );
67}
68
69pub fn udp_socket(addr: std::net::SocketAddr, only_v6: bool) -> io::Result<Socket> {
70    let domain = Domain::for_address(addr);
71    let socket_type = Type::DGRAM;
72    let protocol = Some(Protocol::UDP);
73
74    let socket = Socket::new(domain, socket_type, protocol)?;
75
76    let _ = socket.set_only_v6(only_v6);
77
78    Ok(socket)
79}
80
81/// Creates a UDP socket bound to the provided address
82pub fn bind_udp<A: std::net::ToSocketAddrs>(
83    addr: A,
84    reuse_address: bool,
85    reuse_port: bool,
86    only_v6: bool,
87) -> io::Result<Socket> {
88    let addr = addr.to_socket_addrs()?.next().ok_or_else(|| {
89        std::io::Error::new(
90            io::ErrorKind::InvalidInput,
91            "the provided bind address was empty",
92        )
93    })?;
94    let socket = udp_socket(addr, only_v6)?;
95
96    socket.set_reuse_address(reuse_address)?;
97
98    #[cfg(unix)]
99    socket.set_reuse_port(reuse_port)?;
100
101    // mark the variable as "used" regardless of platform support
102    let _ = reuse_port;
103
104    socket.bind(&addr.into())?;
105
106    Ok(socket)
107}
108
109/// Binds a socket to a specified interface by name
110#[cfg(feature = "xdp")]
111pub fn bind_to_interface<F: std::os::unix::io::AsRawFd>(
112    socket: &F,
113    ifname: &std::ffi::CStr,
114) -> io::Result<()> {
115    libc!(setsockopt(
116        socket.as_raw_fd(),
117        libc::SOL_SOCKET,
118        libc::SO_BINDTODEVICE,
119        ifname as *const _ as *const _,
120        libc::IF_NAMESIZE as _
121    ))?;
122    Ok(())
123}
124
125/// Disables MTU discovery and fragmentation on the socket
126pub fn configure_mtu_disc(tx_socket: &Socket) -> bool {
127    let mut success = false;
128
129    //= https://www.rfc-editor.org/rfc/rfc9000#section-14
130    //# UDP datagrams MUST NOT be fragmented at the IP layer.
131
132    //= https://www.rfc-editor.org/rfc/rfc9000#section-14
133    //# In IPv4 [IPv4], the Don't Fragment (DF) bit MUST be set if possible, to
134    //# prevent fragmentation on the path.
135
136    //= https://www.rfc-editor.org/rfc/rfc8899#section-3
137    //# In IPv4, a probe packet MUST be sent with the Don't
138    //# Fragment (DF) bit set in the IP header and without network layer
139    //# endpoint fragmentation.
140
141    //= https://www.rfc-editor.org/rfc/rfc8899#section-4.5
142    //# A PL implementing this specification MUST suspend network layer
143    //# processing of outgoing packets that enforces a PMTU
144    //# [RFC1191][RFC8201] for each flow utilizing DPLPMTUD and instead use
145    //# DPLPMTUD to control the size of packets that are sent by a flow.
146    #[cfg(s2n_quic_platform_mtu_disc)]
147    {
148        use std::os::unix::io::AsRawFd;
149
150        // IP_PMTUDISC_PROBE setting will set the DF (Don't Fragment) flag
151        // while also ignoring the Path MTU. This means packets will not
152        // be fragmented, and the EMSGSIZE error will not be returned for
153        // packets larger than the Path MTU according to the kernel.
154        success |= libc!(setsockopt(
155            tx_socket.as_raw_fd(),
156            libc::IPPROTO_IP,
157            libc::IP_MTU_DISCOVER,
158            &libc::IP_PMTUDISC_PROBE as *const _ as _,
159            core::mem::size_of_val(&libc::IP_PMTUDISC_PROBE) as _,
160        ))
161        .is_ok();
162
163        success |= libc!(setsockopt(
164            tx_socket.as_raw_fd(),
165            libc::IPPROTO_IPV6,
166            libc::IPV6_MTU_DISCOVER,
167            &libc::IP_PMTUDISC_PROBE as *const _ as _,
168            core::mem::size_of_val(&libc::IP_PMTUDISC_PROBE) as _,
169        ))
170        .is_ok();
171    }
172
173    success
174}
175
176/// Configures the socket to return TOS/ECN information as part of the ancillary data
177pub fn configure_tos(rx_socket: &Socket) -> bool {
178    let mut success = false;
179
180    #[cfg(unix)]
181    {
182        use std::os::unix::io::AsRawFd;
183        let enabled: libc::c_int = 1;
184
185        if let Some((level, ty)) = crate::features::tos_v4::SOCKOPT {
186            success |= libc!(setsockopt(
187                rx_socket.as_raw_fd(),
188                level,
189                ty,
190                &enabled as *const _ as _,
191                core::mem::size_of_val(&enabled) as _,
192            ))
193            .is_ok();
194        }
195
196        if let Some((level, ty)) = crate::features::tos_v6::SOCKOPT {
197            success |= libc!(setsockopt(
198                rx_socket.as_raw_fd(),
199                level,
200                ty,
201                &enabled as *const _ as _,
202                core::mem::size_of_val(&enabled) as _,
203            ))
204            .is_ok();
205        }
206    }
207
208    success
209}
210
211/// Configures the socket to return local address and interface information as part of the
212/// ancillary data
213pub fn configure_pktinfo(rx_socket: &Socket) -> bool {
214    let mut success = false;
215
216    // Set up the RX socket to pass information about the local address and interface
217    #[cfg(unix)]
218    {
219        use std::os::unix::io::AsRawFd;
220        let enabled: libc::c_int = 1;
221
222        if let Some((level, ty)) = crate::features::pktinfo_v4::SOCKOPT {
223            success |= libc!(setsockopt(
224                rx_socket.as_raw_fd(),
225                level,
226                ty,
227                &enabled as *const _ as _,
228                core::mem::size_of_val(&enabled) as _,
229            ))
230            .is_ok();
231        }
232
233        if let Some((level, ty)) = crate::features::pktinfo_v6::SOCKOPT {
234            success |= libc!(setsockopt(
235                rx_socket.as_raw_fd(),
236                level,
237                ty,
238                &enabled as *const _ as _,
239                core::mem::size_of_val(&enabled) as _,
240            ))
241            .is_ok();
242        }
243    }
244
245    success
246}
247
248pub fn configure_gro(rx_socket: &Socket) -> bool {
249    let mut success = false;
250
251    #[cfg(unix)]
252    if let Some((level, ty)) = crate::features::gro::SOCKOPT {
253        use std::os::unix::io::AsRawFd;
254        let enabled: libc::c_int = 1;
255
256        success |= libc!(setsockopt(
257            rx_socket.as_raw_fd(),
258            level as _,
259            ty as _,
260            &enabled as *const _ as _,
261            core::mem::size_of_val(&enabled) as _
262        ))
263        .is_ok();
264    }
265
266    success
267}