1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
// SPDX-License-Identifier: MIT

use anyhow::Context;
use byteorder::{ByteOrder, NativeEndian};
use netlink_packet_utils::{
    buffer,
    nla::{self, DefaultNla, NlaBuffer},
    parsers::{parse_string, parse_u32, parse_u8},
    traits::{Emitable, Parseable},
    DecodeError,
};

use crate::constants::*;

#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Nla {
    /// Path to which the socket was bound. This attribute is known as
    /// `UNIX_DIAG_NAME` in the kernel.
    Name(String),
    /// VFS information for this socket. This attribute is known as
    /// `UNIX_DIAG_VFS` in the kernel.
    Vfs(Vfs),
    /// Inode number of the socket's peer. This attribute is reported
    /// for connected socket only. This attribute is known as
    /// `UNIX_DIAG_PEER` in the kernel.
    Peer(u32),
    /// The payload associated with this attribute is an array of
    /// inode numbers of sockets that have passed the `connect(2)`
    /// call, but haven't been processed with `accept(2)` yet. This
    /// attribute is reported for listening sockets only. This
    /// attribute is known as `UNIX_DIAG_ICONS` in the kernel.
    PendingConnections(Vec<u32>),
    /// This attribute corresponds to the `UNIX_DIAG_RQLEN`. It
    /// reports the length of the socket receive queue, and the queue
    /// size limit. Note that for **listening** sockets the receive
    /// queue is used to store actual data sent by other sockets. It
    /// is used to store pending connections. So the meaning of this
    /// attribute differs for listening sockets.
    ///
    /// For **listening** sockets:
    ///
    /// - the first the number is the number of pending connections. It should
    ///   be equal to `Nla::PendingConnections` value's length.
    /// - the second number is the backlog queue maximum length, which equals
    ///   to the value passed as the second argument to `listen(2)`
    ///
    /// For other sockets:
    ///
    /// - the first number is the amount of data in receive queue (**note**: I
    ///   am not sure if it is the actual amount of data or the amount of
    ///   memory allocated. The two might differ because of memory allocation
    ///   strategies: more memory than strictly necessary may be allocated for
    ///   a given `sk_buff`)
    /// - the second number is the memory used by outgoing data. Note that
    ///   strictly UNIX sockets don't have a send queue, since the data they
    ///   send is directly written into the destination socket receive queue.
    ///   But the memory allocated for this data is still counted from the
    ///   sender point of view.
    ReceiveQueueLength(u32, u32),
    /// Socket memory information. See [`MemInfo`] for more details.
    MemInfo(MemInfo),
    /// Shutown state: one of [`SHUT_RD`], [`SHUT_WR`] or [`SHUT_RDWR`]
    Shutdown(u8),
    /// Unknown attribute
    Other(DefaultNla),
}

pub const VFS_LEN: usize = 8;

buffer!(VfsBuffer(8) {
    inode: (u32, 0..4),
    device: (u32, 4..8),
});

#[derive(Debug, Eq, PartialEq, Clone)]
pub struct Vfs {
    /// Inode number
    inode: u32,
    /// Device number
    device: u32,
}

impl<T: AsRef<[u8]>> Parseable<VfsBuffer<T>> for Vfs {
    fn parse(buf: &VfsBuffer<T>) -> Result<Self, DecodeError> {
        Ok(Self {
            inode: buf.inode(),
            device: buf.device(),
        })
    }
}

impl Emitable for Vfs {
    fn buffer_len(&self) -> usize {
        VFS_LEN
    }

    fn emit(&self, buf: &mut [u8]) {
        let mut buf = VfsBuffer::new(buf);
        buf.set_inode(self.inode);
        buf.set_device(self.device);
    }
}

pub const MEM_INFO_LEN: usize = 36;

buffer!(MemInfoBuffer(MEM_INFO_LEN) {
    unused_sk_rmem_alloc: (u32, 0..4),
    so_rcvbuf: (u32, 4..8),
    unused_sk_wmem_queued: (u32, 8..12),
    max_datagram_size: (u32, 12..16),
    unused_sk_fwd_alloc: (u32, 16..20),
    alloc: (u32, 20..24),
    unused_sk_optmem: (u32, 24..28),
    unused_backlog: (u32, 28..32),
    unused_drops: (u32, 32..36),
});

/// # Warning
///
/// I don't have a good understanding of the Unix Domain Sockets, thus
/// take the following documentation with a *huge* grain of salt.
///
/// # Documentation
///
/// ## `UNIX_DIAG_MEMINFO` vs `INET_DIAG_SK_MEMINFO`
///
/// `MemInfo` represent an `UNIX_DIAG_MEMINFO` NLA. This NLA has the
/// same structure than `INET_DIAG_SKMEMINFO`, but since Unix sockets
/// don't actually use the network stack, many fields are not relevant
/// and are always set to 0. According to iproute2 commit
/// [51ff9f2453d066933f24170f0106a7deeefa02d9](https://patchwork.ozlabs.org/patch/222700/), only three attributes can have non-zero values.
///
/// ## Particularities of UNIX sockets
///
/// One particularity of UNIX sockets is that they don't really have a
/// send queue: when sending data, the kernel finds the destination
/// socket and enqueues the data directly in its receive queue (which
/// [see also this StackOverflow
/// answer](https://stackoverflow.com/questions/9644251/how-do-unix-domain-sockets-differentiate-between-multiple-clients)). For
/// instance in `unix_dgram_sendmsg()` in `net/unix/af_unix.c` we
/// have:
///
/// ```c
/// // `other` refers to the peer socket here
/// skb_queue_tail(&other->sk_receive_queue, skb);
/// ```
///
/// Another particularity is that the kernel keeps track of the memory
/// using the sender's `sock.sk_wmem_alloc` attribute. The receiver's
/// `sock.sk_rmem_alloc` is always zero. Memory is allocated when data
/// is written to a socket, and is reclaimed when the data is read
/// from the peer's socket.
///
/// Last but not least, the way unix sockets handle incoming
/// connection differs from the TCP sockets. For TCP sockets, the
/// queue used to store pending connections is
/// `sock.sk_ack_backlog`. But UNIX sockets use the receive queue to
/// store them. They can do that because a listening socket only
/// receive connections, they do not receive actual data from other
/// socket, so there is no ambiguity about the nature of the data
/// stored in the receive queue.
// /// We can see that in `unix_stream_sendmsg()` for instance we have
// /// the follownig function calls:
// ///
// /// ```
// /// unix_stream_sendmsg()
// ///     -> sock_alloc_send_pskb()
// ///     -> skb_set_owner_w()
// ///     -> refcount_add(size, &sk->sk_wmem_alloc);
/// ```
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub struct MemInfo {
    /// Value of `SO_RCVBUF`, although it does not have any effect on
    /// Unix Domain Sockets. As per `man unix(7)`:
    ///
    /// > The `SO_SNDBUF` socket option does have an effect for UNIX
    /// > domain sockets, but the `SO_RCVBUF` option does not.
    ///
    /// This attribute corresponds to `sock.sk_rcvbuf` in the kernel.
    pub so_rcvbuf: u32,
    /// Maximum size in in bytes of a datagram, as set by
    /// `SO_SNDBUF`. As per `man unix(7)`:
    ///
    /// > For datagram sockets, the `SO_SNDBUF` value imposes an upper
    /// > limit on the size of outgoing datagrams. This limit is
    /// > calculated as the doubled (see `socket(7)`) option value
    /// > less 32 bytes used for overhead.
    ///
    /// This attribute corresponds to `sock.sk_sndbuf` in the kernel.
    pub max_datagram_size: u32,
    /// Memory currently allocated for the data sent but not yet read
    /// from the receiving socket(s). The memory is tracked using the
    /// sending socket `sock.sk_wmem_queued` attribute in the kernel.
    ///
    /// Note that this quantity is a little larger than the actual
    /// data being sent because it takes into account the overhead of
    /// the `sk_buff`s used internally:
    ///
    /// ```c
    /// /* in net/core/sock.c, sk_wmem_alloc is set in
    ///    skb_set_owner_w() with: */
    /// refcount_add(skb->truesize, &sk->sk_wmem_alloc);
    ///
    /// /* truesize is set by __alloc_skb() in net/core/skbuff.c
    ///    by: */
    /// skb->truesize = SKB_TRUESIZE(size);
    ///
    /// /* and SKB_TRUESIZE is defined as: */
    /// #define SKB_TRUESIZE(X) ((X) +                        \
    ///     SKB_DATA_ALIGN(sizeof(struct sk_buff)) +          \
    ///     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
    /// ```
    pub alloc: u32,
}

impl<T: AsRef<[u8]>> Parseable<MemInfoBuffer<T>> for MemInfo {
    fn parse(buf: &MemInfoBuffer<T>) -> Result<Self, DecodeError> {
        Ok(Self {
            so_rcvbuf: buf.so_rcvbuf(),
            max_datagram_size: buf.max_datagram_size(),
            alloc: buf.alloc(),
        })
    }
}

impl Emitable for MemInfo {
    fn buffer_len(&self) -> usize {
        MEM_INFO_LEN
    }

    fn emit(&self, buf: &mut [u8]) {
        let mut buf = MemInfoBuffer::new(buf);
        buf.set_unused_sk_rmem_alloc(0);
        buf.set_so_rcvbuf(self.so_rcvbuf);
        buf.set_unused_sk_wmem_queued(0);
        buf.set_max_datagram_size(self.max_datagram_size);
        buf.set_unused_sk_fwd_alloc(0);
        buf.set_alloc(self.alloc);
        buf.set_unused_sk_optmem(0);
        buf.set_unused_backlog(0);
        buf.set_unused_drops(0);
    }
}

impl nla::Nla for Nla {
    fn value_len(&self) -> usize {
        use self::Nla::*;
        match *self {
            // +1 because we need to append a null byte
            Name(ref s) => s.as_bytes().len() + 1,
            Vfs(_) => VFS_LEN,
            Peer(_) => 4,
            PendingConnections(ref v) => 4 * v.len(),
            ReceiveQueueLength(_, _) => 8,
            MemInfo(_) => MEM_INFO_LEN,
            Shutdown(_) => 1,
            Other(ref attr) => attr.value_len(),
        }
    }

    fn emit_value(&self, buffer: &mut [u8]) {
        use self::Nla::*;
        match *self {
            Name(ref s) => {
                buffer[..s.len()].copy_from_slice(s.as_bytes());
                buffer[s.len()] = 0;
            }
            Vfs(ref value) => value.emit(buffer),
            Peer(value) => NativeEndian::write_u32(buffer, value),
            PendingConnections(ref values) => {
                for (i, v) in values.iter().enumerate() {
                    NativeEndian::write_u32(&mut buffer[i * 4..], *v);
                }
            }
            ReceiveQueueLength(v1, v2) => {
                NativeEndian::write_u32(buffer, v1);
                NativeEndian::write_u32(&mut buffer[4..], v2);
            }
            MemInfo(ref value) => value.emit(buffer),
            Shutdown(value) => buffer[0] = value,
            Other(ref attr) => attr.emit_value(buffer),
        }
    }

    fn kind(&self) -> u16 {
        use self::Nla::*;
        match *self {
            Name(_) => UNIX_DIAG_NAME,
            Vfs(_) => UNIX_DIAG_VFS,
            Peer(_) => UNIX_DIAG_PEER,
            PendingConnections(_) => UNIX_DIAG_ICONS,
            ReceiveQueueLength(_, _) => UNIX_DIAG_RQLEN,
            MemInfo(_) => UNIX_DIAG_MEMINFO,
            Shutdown(_) => UNIX_DIAG_SHUTDOWN,
            Other(ref attr) => attr.kind(),
        }
    }
}

impl<'a, T: AsRef<[u8]> + ?Sized> Parseable<NlaBuffer<&'a T>> for Nla {
    fn parse(buf: &NlaBuffer<&'a T>) -> Result<Self, DecodeError> {
        let payload = buf.value();
        Ok(match buf.kind() {
            UNIX_DIAG_NAME => {
                let err = "invalid UNIX_DIAG_NAME value";
                Self::Name(parse_string(payload).context(err)?)
            }
            UNIX_DIAG_VFS => {
                let err = "invalid UNIX_DIAG_VFS value";
                let buf = VfsBuffer::new_checked(payload).context(err)?;
                Self::Vfs(Vfs::parse(&buf).context(err)?)
            }
            UNIX_DIAG_PEER => Self::Peer(
                parse_u32(payload).context("invalid UNIX_DIAG_PEER value")?,
            ),
            UNIX_DIAG_ICONS => {
                if payload.len() % 4 != 0 {
                    return Err(DecodeError::from("invalid UNIX_DIAG_ICONS"));
                }
                Self::PendingConnections(
                    payload.chunks(4).map(NativeEndian::read_u32).collect(),
                )
            }
            UNIX_DIAG_RQLEN => {
                if payload.len() != 8 {
                    return Err(DecodeError::from("invalid UNIX_DIAG_RQLEN"));
                }
                Self::ReceiveQueueLength(
                    NativeEndian::read_u32(&payload[..4]),
                    NativeEndian::read_u32(&payload[4..]),
                )
            }
            UNIX_DIAG_MEMINFO => {
                let err = "invalid UNIX_DIAG_MEMINFO value";
                let buf = MemInfoBuffer::new_checked(payload).context(err)?;
                Self::MemInfo(MemInfo::parse(&buf).context(err)?)
            }
            UNIX_DIAG_SHUTDOWN => Self::Shutdown(
                parse_u8(payload)
                    .context("invalid UNIX_DIAG_SHUTDOWN value")?,
            ),
            kind => Self::Other(
                DefaultNla::parse(buf)
                    .context(format!("unknown NLA type {kind}"))?,
            ),
        })
    }
}