netavark 1.9.0

A container network stack
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
use crate::error::{ErrorWrap, NetavarkError, NetavarkResult};
use crate::network::{constants, internal_types, types};
use crate::wrap;
use ipnet::IpNet;
use log::debug;
use netlink_packet_route::{
    MACVLAN_MODE_BRIDGE, MACVLAN_MODE_PASSTHRU, MACVLAN_MODE_PRIVATE, MACVLAN_MODE_SOURCE,
    MACVLAN_MODE_VEPA,
};
use nix::sched;
use sha2::{Digest, Sha512};
use std::collections::HashMap;
use std::env;
use std::fmt::Display;
use std::fs::File;
use std::io::{self, Error};
use std::net::IpAddr;
use std::net::Ipv4Addr;
use std::net::Ipv6Addr;
use std::os::unix::prelude::*;
use std::str::FromStr;
use sysctl::{Sysctl, SysctlError};

use super::netlink;

pub const IPVLAN_MODE_L2: u16 = 0;
pub const IPVLAN_MODE_L3: u16 = 1;
pub const IPVLAN_MODE_L3S: u16 = 2;

pub struct CoreUtils {
    pub networkns: String,
}

pub fn get_netavark_dns_port() -> Result<u16, NetavarkError> {
    match env::var("NETAVARK_DNS_PORT") {
        Ok(port_string) => match port_string.parse() {
            Ok(port) => Ok(port),
            Err(e) => Err(NetavarkError::Message(format!(
                "Invalid NETAVARK_DNS_PORT {port_string}: {e}"
            ))),
        },
        Err(_) => Ok(53),
    }
}

pub fn parse_option<T>(
    opts: &Option<HashMap<String, String>>,
    name: &str,
) -> NetavarkResult<Option<T>>
where
    T: FromStr,
    <T as FromStr>::Err: Display,
{
    let val = match opts.as_ref().and_then(|map| map.get(name)) {
        Some(val) => match val.parse::<T>() {
            Ok(mtu) => mtu,
            Err(err) => {
                return Err(NetavarkError::Message(format!(
                    "unable to parse \"{name}\": {err}"
                )));
            }
        },
        // if no option is set return None
        None => return Ok(None),
    };
    Ok(Some(val))
}

pub fn get_ipam_addresses<'a>(
    per_network_opts: &'a types::PerNetworkOptions,
    network: &'a types::Network,
) -> Result<internal_types::IPAMAddresses, std::io::Error> {
    let addresses = match network
        .ipam_options
        .as_ref()
        .and_then(|map| map.get("driver").cloned())
        .as_deref()
    {
        // when option is none default to host local
        Some(constants::IPAM_HOSTLOCAL) | None => {
            // static ip vector
            let mut container_addresses = Vec::new();
            // gateway ip vector
            let mut gateway_addresses = Vec::new();
            // network addresses for response
            let mut net_addresses: Vec<types::NetAddress> = Vec::new();
            // bool for ipv6
            let mut ipv6_enabled = false;

            // nameservers which can be configured for this container
            let mut nameservers: Vec<IpAddr> = Vec::new();

            let static_ips = match per_network_opts.static_ips.as_ref() {
                None => {
                    return Err(std::io::Error::new(
                        std::io::ErrorKind::Other,
                        "no static ips provided",
                    ))
                }
                Some(i) => i,
            };

            // prepare a vector of static aps with appropriate cidr
            for (idx, subnet) in network.subnets.iter().flatten().enumerate() {
                let subnet_mask_cidr = subnet.subnet.prefix_len();
                if let Some(gw) = subnet.gateway {
                    let gw_net = match ipnet::IpNet::new(gw, subnet_mask_cidr) {
                        Ok(dest) => dest,
                        Err(err) => {
                            return Err(std::io::Error::new(
                                std::io::ErrorKind::Other,
                                format!("failed to parse address {gw}/{subnet_mask_cidr}: {err}"),
                            ))
                        }
                    };
                    gateway_addresses.push(gw_net);
                    nameservers.push(gw);
                }

                // for dual-stack network.ipv6_enabled could be false do explicit check
                if subnet.subnet.addr().is_ipv6() {
                    ipv6_enabled = true;
                }

                // Build up response information
                let container_address: ipnet::IpNet =
                    match format!("{}/{}", static_ips[idx], subnet_mask_cidr).parse() {
                        Ok(i) => i,
                        Err(e) => {
                            return Err(Error::new(std::io::ErrorKind::Other, e));
                        }
                    };
                // Add the IP to the address_vector
                container_addresses.push(container_address);
                net_addresses.push(types::NetAddress {
                    gateway: subnet.gateway,
                    ipnet: container_address,
                });
            }

            let routes: Vec<netlink::Route> = match create_route_list(&network.routes) {
                Ok(r) => r,
                Err(e) => {
                    return Err(Error::new(std::io::ErrorKind::Other, e));
                }
            };

            internal_types::IPAMAddresses {
                container_addresses,
                dhcp_enabled: false,
                gateway_addresses,
                routes,
                net_addresses,
                nameservers,
                ipv6_enabled,
            }
        }
        Some(constants::IPAM_NONE) => {
            // no ipam just return empty vectors
            internal_types::IPAMAddresses {
                container_addresses: vec![],
                dhcp_enabled: false,
                gateway_addresses: vec![],
                routes: vec![],
                net_addresses: vec![],
                nameservers: vec![],
                ipv6_enabled: false,
            }
        }
        Some(constants::IPAM_DHCP) => internal_types::IPAMAddresses {
            container_addresses: vec![],
            dhcp_enabled: true,
            gateway_addresses: vec![],
            routes: vec![],
            ipv6_enabled: false,
            net_addresses: vec![],
            nameservers: vec![],
        },
        Some(driver) => {
            return Err(std::io::Error::new(
                std::io::ErrorKind::Other,
                format!("unsupported ipam driver {driver}"),
            ));
        }
    };

    Ok(addresses)
}

impl CoreUtils {
    pub fn encode_address_to_hex(bytes: &[u8]) -> String {
        let address: String = bytes
            .iter()
            .map(|x| format!("{x:02x}"))
            .collect::<Vec<String>>()
            .join(":");

        address
    }

    pub fn decode_address_from_hex(input: &str) -> Result<Vec<u8>, std::io::Error> {
        let bytes: Result<Vec<u8>, _> = input
            .split(|c| c == ':' || c == '-')
            .map(|b| u8::from_str_radix(b, 16))
            .collect();

        let result = match bytes {
            Ok(bytes) => {
                if bytes.len() != 6 {
                    return Err(std::io::Error::new(
                        std::io::ErrorKind::Other,
                        format!("invalid mac length for address: {input}"),
                    ));
                }
                bytes
            }
            Err(e) => {
                return Err(std::io::Error::new(
                    std::io::ErrorKind::Other,
                    format!("unable to parse mac address {input}: {e}"),
                ));
            }
        };

        Ok(result)
    }

    pub fn get_macvlan_mode_from_string(mode: Option<&str>) -> NetavarkResult<u32> {
        match mode {
            // default to bridge when unset
            None | Some("") | Some("bridge") => Ok(MACVLAN_MODE_BRIDGE),
            Some("private") => Ok(MACVLAN_MODE_PRIVATE),
            Some("vepa") => Ok(MACVLAN_MODE_VEPA),
            Some("passthru") => Ok(MACVLAN_MODE_PASSTHRU),
            Some("source") => Ok(MACVLAN_MODE_SOURCE),
            // default to bridge
            Some(name) => Err(NetavarkError::msg(format!(
                "invalid macvlan mode \"{name}\""
            ))),
        }
    }

    pub fn get_ipvlan_mode_from_string(mode: Option<&str>) -> NetavarkResult<u16> {
        match mode {
            // default to l2 when unset
            None | Some("") | Some("l2") => Ok(IPVLAN_MODE_L2),
            Some("l3") => Ok(IPVLAN_MODE_L3),
            Some("l3s") => Ok(IPVLAN_MODE_L3S),
            Some(name) => Err(NetavarkError::msg(format!(
                "invalid ipvlan mode \"{name}\""
            ))),
        }
    }

    pub fn create_network_hash(network_name: &str, length: usize) -> String {
        let mut hasher = Sha512::new();
        hasher.update(network_name.as_bytes());
        let result = hasher.finalize();
        let hash_string = format!("{result:X}");
        let response = &hash_string[0..length];
        response.to_string()
    }

    /// Set a sysctl value by value's namespace.
    pub fn apply_sysctl_value(
        ns_value: impl AsRef<str>,
        val: impl AsRef<str>,
    ) -> Result<String, SysctlError> {
        let ns_value = ns_value.as_ref();
        let val = val.as_ref();
        debug!("Setting sysctl value for {} to {}", ns_value, val);
        let ctl = sysctl::Ctl::new(ns_value)?;
        match ctl.value_string() {
            Ok(result) => {
                if result == val {
                    return Ok(result);
                }
            }
            Err(e) => return Err(e),
        }
        ctl.set_value_string(val)
    }
}

pub fn join_netns<Fd: AsFd>(fd: Fd) -> NetavarkResult<()> {
    match sched::setns(fd, sched::CloneFlags::CLONE_NEWNET) {
        Ok(_) => Ok(()),
        Err(e) => Err(NetavarkError::wrap(
            "setns",
            NetavarkError::Io(io::Error::from(e)),
        )),
    }
}

/// safe way to join the namespace and join back to the host after the task is done
/// This first arg should be the hostns fd, the second is the container ns fd.
/// The third is the result variable name and the last the closure that should be
/// executed in the ns.
#[macro_export]
macro_rules! exec_netns {
    ($host:expr, $netns:expr, $result:ident, $exec:expr) => {
        join_netns($netns)?;
        let $result = $exec;
        join_netns($host)?;
    };
}

pub struct NamespaceOptions {
    /// Note we have to return the File object since the fd is only valid
    /// as long as the File object is valid
    pub file: File,
    pub netlink: netlink::Socket,
}

pub fn open_netlink_sockets(
    netns_path: &str,
) -> NetavarkResult<(NamespaceOptions, NamespaceOptions)> {
    let netns = open_netlink_socket(netns_path).wrap("open container netns")?;
    let hostns = open_netlink_socket("/proc/self/ns/net").wrap("open host netns")?;

    let host_socket = netlink::Socket::new().wrap("host netlink socket")?;
    exec_netns!(
        hostns.as_fd(),
        netns.as_fd(),
        res,
        netlink::Socket::new().wrap("netns netlink socket")
    );

    let netns_sock = res?;
    Ok((
        NamespaceOptions {
            file: hostns,
            netlink: host_socket,
        },
        NamespaceOptions {
            file: netns,
            netlink: netns_sock,
        },
    ))
}

fn open_netlink_socket(netns_path: &str) -> NetavarkResult<File> {
    wrap!(File::open(netns_path), format!("open {netns_path}"))
}

pub fn add_default_routes(
    sock: &mut netlink::Socket,
    gws: &[ipnet::IpNet],
    metric: Option<u32>,
) -> NetavarkResult<()> {
    let mut ipv4 = false;
    let mut ipv6 = false;
    for addr in gws {
        let route = match addr {
            ipnet::IpNet::V4(v4) => {
                if ipv4 {
                    continue;
                }
                ipv4 = true;

                netlink::Route::Ipv4 {
                    dest: ipnet::Ipv4Net::new(Ipv4Addr::new(0, 0, 0, 0), 0)?,
                    gw: v4.addr(),
                    metric,
                }
            }
            ipnet::IpNet::V6(v6) => {
                if ipv6 {
                    continue;
                }
                ipv6 = true;

                netlink::Route::Ipv6 {
                    dest: ipnet::Ipv6Net::new(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0), 0)?,
                    gw: v6.addr(),
                    metric,
                }
            }
        };
        sock.add_route(&route)
            .wrap(format!("add default route {}", &route))?;
    }
    Ok(())
}

pub fn create_route_list(
    routes: &Option<Vec<types::Route>>,
) -> NetavarkResult<Vec<netlink::Route>> {
    match routes {
        Some(rs) => rs
            .iter()
            .map(|r| {
                let gw = r.gateway;
                let dst = r.destination;
                let mtr = r.metric;
                match (gw, dst) {
                    (IpAddr::V4(gw4), IpNet::V4(dst4)) => Ok(netlink::Route::Ipv4 {
                        dest: dst4,
                        gw: gw4,
                        metric: mtr,
                    }),
                    (IpAddr::V6(gw6), IpNet::V6(dst6)) => Ok(netlink::Route::Ipv6 {
                        dest: dst6,
                        gw: gw6,
                        metric: mtr,
                    }),
                    (IpAddr::V4(gw4), IpNet::V6(dst6)) => Err(NetavarkError::Message(format!(
                        "Route with ipv6 destination and ipv4 gateway ({dst6} via {gw4})"
                    ))),

                    (IpAddr::V6(gw6), IpNet::V4(dst4)) => Err(NetavarkError::Message(format!(
                        "Route with ipv4 destination and ipv6 gateway ({dst4} via {gw6})"
                    ))),
                }
            })
            .collect(),
        None => Ok(vec![]),
    }
}

pub fn disable_ipv6_autoconf(if_name: &str) -> NetavarkResult<()> {
    // make sure autoconf is off, we want manual config only
    if let Err(err) =
        CoreUtils::apply_sysctl_value(format!("/proc/sys/net/ipv6/conf/{if_name}/autoconf"), "0")
    {
        match err {
            SysctlError::NotFound(_) => {
                // if the sysctl is not found we likely run on a system without ipv6
                // just ignore that case
            }

            // if we have a read only /proc we ignore it as well
            SysctlError::IoError(ref e) if e.raw_os_error() == Some(libc::EROFS) => {}

            _ => {
                return Err(NetavarkError::wrap(
                    "failed to set autoconf sysctl",
                    NetavarkError::Sysctl(err),
                ));
            }
        }
    };
    Ok(())
}