use std::{collections::HashMap, net::IpAddr, os::fd::BorrowedFd, sync::Once};
use ipnet::IpNet;
use log::{debug, error};
use netlink_packet_route::{
nlas::link::{Info, InfoData, InfoKind, Nla, VethInfo},
LinkMessage,
};
use crate::{
dns::aardvark::AardvarkEntry,
error::{ErrorWrap, NetavarkError, NetavarkErrorList, NetavarkResult},
exec_netns,
firewall::{
iptables::MAX_HASH_SIZE,
state::{remove_fw_config, write_fw_config},
},
network::{constants, core_utils::disable_ipv6_autoconf, types},
};
use super::{
constants::{
ISOLATE_OPTION_FALSE, ISOLATE_OPTION_STRICT, ISOLATE_OPTION_TRUE,
NO_CONTAINER_INTERFACE_ERROR, OPTION_ISOLATE, OPTION_METRIC, OPTION_MTU,
OPTION_NO_DEFAULT_ROUTE, OPTION_VRF,
},
core_utils::{self, get_ipam_addresses, join_netns, parse_option, CoreUtils},
driver::{self, DriverInfo},
internal_types::{
IPAMAddresses, IsolateOption, PortForwardConfig, SetupNetwork, TearDownNetwork,
TeardownPortForward,
},
netlink,
types::StatusBlock,
};
const NO_BRIDGE_NAME_ERROR: &str = "no bridge interface name given";
struct InternalData {
container_interface_name: String,
bridge_interface_name: String,
mac_address: Option<Vec<u8>>,
ipam: IPAMAddresses,
mtu: u32,
isolate: IsolateOption,
metric: Option<u32>,
no_default_route: bool,
vrf: Option<String>,
}
pub struct Bridge<'a> {
info: DriverInfo<'a>,
data: Option<InternalData>,
}
impl<'a> Bridge<'a> {
pub fn new(info: DriverInfo<'a>) -> Self {
Bridge { info, data: None }
}
}
impl driver::NetworkDriver for Bridge<'_> {
fn network_name(&self) -> String {
self.info.network.name.clone()
}
fn validate(&mut self) -> NetavarkResult<()> {
let bridge_name = get_interface_name(self.info.network.network_interface.clone())?;
if self.info.per_network_opts.interface_name.is_empty() {
return Err(NetavarkError::msg(NO_CONTAINER_INTERFACE_ERROR));
}
let ipam = get_ipam_addresses(self.info.per_network_opts, self.info.network)?;
let mtu: u32 = parse_option(&self.info.network.options, OPTION_MTU)?.unwrap_or(0);
let isolate: IsolateOption = get_isolate_option(&self.info.network.options)?;
let metric: u32 = parse_option(&self.info.network.options, OPTION_METRIC)?.unwrap_or(100);
let no_default_route: bool =
parse_option(&self.info.network.options, OPTION_NO_DEFAULT_ROUTE)?.unwrap_or(false);
let vrf: Option<String> = parse_option(&self.info.network.options, OPTION_VRF)?;
let static_mac = match &self.info.per_network_opts.static_mac {
Some(mac) => Some(CoreUtils::decode_address_from_hex(mac)?),
None => None,
};
self.data = Some(InternalData {
bridge_interface_name: bridge_name,
container_interface_name: self.info.per_network_opts.interface_name.clone(),
mac_address: static_mac,
ipam,
mtu,
isolate,
metric: Some(metric),
no_default_route,
vrf,
});
Ok(())
}
fn setup(
&self,
netlink_sockets: (&mut netlink::Socket, &mut netlink::Socket),
) -> NetavarkResult<(StatusBlock, Option<AardvarkEntry>)> {
let data = match &self.data {
Some(d) => d,
None => return Err(NetavarkError::msg("must call validate() before setup()")),
};
debug!("Setup network {}", self.info.network.name);
debug!(
"Container interface name: {} with IP addresses {:?}",
data.container_interface_name, data.ipam.container_addresses
);
debug!(
"Bridge name: {} with IP addresses {:?}",
data.bridge_interface_name, data.ipam.gateway_addresses
);
setup_ipv4_fw_sysctl()?;
if data.ipam.ipv6_enabled {
setup_ipv6_fw_sysctl()?;
}
let (host_sock, netns_sock) = netlink_sockets;
let container_veth_mac = create_interfaces(
host_sock,
netns_sock,
data,
self.info.network.internal,
self.info.netns_host,
self.info.netns_container,
)?;
let mut response = types::StatusBlock {
dns_server_ips: Some(Vec::<IpAddr>::new()),
dns_search_domains: Some(Vec::<String>::new()),
interfaces: Some(HashMap::new()),
};
let mut interfaces: HashMap<String, types::NetInterface> = HashMap::new();
let interface = types::NetInterface {
mac_address: container_veth_mac,
subnets: Option::from(data.ipam.net_addresses.clone()),
};
interfaces.insert(data.container_interface_name.clone(), interface);
let _ = response.interfaces.insert(interfaces);
let aardvark_entry = if self.info.network.dns_enabled {
let _ = response
.dns_server_ips
.insert(data.ipam.nameservers.clone());
let _ = response
.dns_search_domains
.insert(vec![constants::PODMAN_DEFAULT_SEARCH_DOMAIN.to_string()]);
let mut ipv4 = Vec::new();
let mut ipv6 = Vec::new();
for ipnet in &data.ipam.container_addresses {
match ipnet.addr() {
IpAddr::V4(v4) => {
ipv4.push(v4);
}
IpAddr::V6(v6) => {
ipv6.push(v6);
}
}
}
let mut names = vec![self.info.container_name.to_string()];
match &self.info.per_network_opts.aliases {
Some(n) => {
names.extend(n.clone());
}
None => {}
}
let gw = data
.ipam
.gateway_addresses
.iter()
.map(|ipnet| ipnet.addr())
.collect();
Some(AardvarkEntry {
network_name: &self.info.network.name,
container_id: self.info.container_id,
network_gateways: gw,
network_dns_servers: &self.info.network.network_dns_servers,
container_ips_v4: ipv4,
container_ips_v6: ipv6,
container_names: names,
container_dns_servers: self.info.container_dns_servers,
})
} else {
if let Some(container_dns_servers) = self.info.container_dns_servers {
let _ = response
.dns_server_ips
.insert(container_dns_servers.clone());
}
None
};
if self.info.network.internal {
CoreUtils::apply_sysctl_value(
format!(
"/proc/sys/net/ipv4/conf/{}/forwarding",
data.bridge_interface_name
),
"0",
)?;
if data.ipam.ipv6_enabled {
CoreUtils::apply_sysctl_value(
format!(
"/proc/sys/net/ipv6/conf/{}/forwarding",
data.bridge_interface_name
),
"0",
)?;
}
return Ok((response, aardvark_entry));
}
self.setup_firewall(data)?;
Ok((response, aardvark_entry))
}
fn teardown(
&self,
netlink_sockets: (&mut netlink::Socket, &mut netlink::Socket),
) -> NetavarkResult<()> {
let (host_sock, netns_sock) = netlink_sockets;
let mut error_list = NetavarkErrorList::new();
let routes = core_utils::create_route_list(&self.info.network.routes)?;
for route in routes.iter() {
netns_sock
.del_route(route)
.unwrap_or_else(|err| error_list.push(err))
}
let bridge_name = get_interface_name(self.info.network.network_interface.clone())?;
let complete_teardown = match remove_link(
host_sock,
netns_sock,
&bridge_name,
&self.info.per_network_opts.interface_name,
) {
Ok(teardown) => teardown,
Err(err) => {
error_list.push(err);
false
}
};
if self.info.network.internal {
if !error_list.is_empty() {
return Err(NetavarkError::List(error_list));
}
return Ok(());
}
match self.teardown_firewall(complete_teardown, bridge_name) {
Ok(_) => {}
Err(err) => {
error_list.push(err);
}
};
if !error_list.is_empty() {
return Err(NetavarkError::List(error_list));
}
Ok(())
}
}
fn get_interface_name(name: Option<String>) -> NetavarkResult<String> {
let name = match name {
None => return Err(NetavarkError::msg(NO_BRIDGE_NAME_ERROR)),
Some(n) => {
if n.is_empty() {
return Err(NetavarkError::msg(NO_BRIDGE_NAME_ERROR));
}
n
}
};
Ok(name)
}
impl<'a> Bridge<'a> {
fn get_firewall_conf(
&'a self,
container_addresses: &Vec<IpNet>,
nameservers: &'a Vec<IpAddr>,
isolate: IsolateOption,
bridge_name: String,
) -> NetavarkResult<(SetupNetwork, PortForwardConfig)> {
let id_network_hash =
CoreUtils::create_network_hash(&self.info.network.name, MAX_HASH_SIZE);
let sn = SetupNetwork {
subnets: self
.info
.network
.subnets
.as_ref()
.map(|nets| nets.iter().map(|n| n.subnet).collect()),
bridge_name,
network_hash_name: id_network_hash.clone(),
isolation: isolate,
dns_port: self.info.dns_port,
};
let mut has_ipv4 = false;
let mut has_ipv6 = false;
let mut addr_v4: Option<IpAddr> = None;
let mut addr_v6: Option<IpAddr> = None;
let mut net_v4: Option<IpNet> = None;
let mut net_v6: Option<IpNet> = None;
for net in container_addresses {
match net {
IpNet::V4(v4) => {
if has_ipv4 {
continue;
}
addr_v4 = Some(IpAddr::V4(v4.addr()));
net_v4 = Some(IpNet::new(v4.network().into(), v4.prefix_len())?);
has_ipv4 = true;
}
IpNet::V6(v6) => {
if has_ipv6 {
continue;
}
addr_v6 = Some(IpAddr::V6(v6.addr()));
net_v6 = Some(IpNet::new(v6.network().into(), v6.prefix_len())?);
has_ipv6 = true;
}
}
}
let spf = PortForwardConfig {
container_id: self.info.container_id.clone(),
port_mappings: self.info.port_mappings,
network_name: self.info.network.name.clone(),
network_hash_name: id_network_hash,
container_ip_v4: addr_v4,
subnet_v4: net_v4,
container_ip_v6: addr_v6,
subnet_v6: net_v6,
dns_port: self.info.dns_port,
dns_server_ips: nameservers,
};
Ok((sn, spf))
}
fn setup_firewall(&self, data: &InternalData) -> NetavarkResult<()> {
let (sn, spf) = self.get_firewall_conf(
&data.ipam.container_addresses,
&data.ipam.nameservers,
data.isolate,
data.bridge_interface_name.clone(),
)?;
if !self.info.rootless {
write_fw_config(
self.info.config_dir,
&self.info.network.id,
self.info.container_id,
self.info.firewall.driver_name(),
&sn,
&spf,
)?;
}
self.info.firewall.setup_network(sn)?;
if spf.port_mappings.is_some() {
CoreUtils::apply_sysctl_value(
format!(
"net.ipv4.conf.{}.route_localnet",
data.bridge_interface_name
),
"1",
)?;
}
self.info.firewall.setup_port_forward(spf)?;
Ok(())
}
fn teardown_firewall(
&self,
complete_teardown: bool,
bridge_name: String,
) -> NetavarkResult<()> {
let (container_addresses, nameservers);
let (container_addresses_ref, nameservers_ref, isolate) = match &self.data {
Some(d) => (&d.ipam.container_addresses, &d.ipam.nameservers, d.isolate),
None => {
let isolate = get_isolate_option(&self.info.network.options).unwrap_or_else(|e| {
error!("failed to parse {} option: {}", OPTION_ISOLATE, e);
IsolateOption::Never
});
(container_addresses, nameservers) =
match get_ipam_addresses(self.info.per_network_opts, self.info.network) {
Ok(i) => (i.container_addresses, i.nameservers),
Err(e) => {
error!("failed to parse ipam options: {}", e);
(Vec::new(), Vec::new())
}
};
(&container_addresses, &nameservers, isolate)
}
};
let (sn, spf) = self.get_firewall_conf(
container_addresses_ref,
nameservers_ref,
isolate,
bridge_name,
)?;
let tn = TearDownNetwork {
config: sn,
complete_teardown,
};
if !self.info.rootless {
remove_fw_config(
self.info.config_dir,
&self.info.network.id,
self.info.container_id,
complete_teardown,
)?;
}
if complete_teardown {
self.info.firewall.teardown_network(tn)?;
}
let tpf = TeardownPortForward {
config: spf,
complete_teardown,
};
self.info.firewall.teardown_port_forward(tpf)?;
Ok(())
}
}
static IPV4_FORWARD_ONCE: Once = Once::new();
static IPV6_FORWARD_ONCE: Once = Once::new();
const IPV4_FORWARD: &str = "net.ipv4.ip_forward";
const IPV6_FORWARD: &str = "net.ipv6.conf.all.forwarding";
fn setup_ipv4_fw_sysctl() -> NetavarkResult<()> {
let mut result = Ok("".to_string());
IPV4_FORWARD_ONCE.call_once(|| {
result = CoreUtils::apply_sysctl_value(IPV4_FORWARD, "1");
});
match result {
Ok(_) => {}
Err(e) => return Err(e.into()),
};
Ok(())
}
fn setup_ipv6_fw_sysctl() -> NetavarkResult<()> {
let mut result = Ok("".to_string());
IPV6_FORWARD_ONCE.call_once(|| {
result = CoreUtils::apply_sysctl_value(IPV6_FORWARD, "1");
});
match result {
Ok(_) => {}
Err(e) => return Err(e.into()),
};
Ok(())
}
fn create_interfaces(
host: &mut netlink::Socket,
netns: &mut netlink::Socket,
data: &InternalData,
internal: bool,
hostns_fd: BorrowedFd<'_>,
netns_fd: BorrowedFd<'_>,
) -> NetavarkResult<String> {
let (bridge_index, mac) = match host.get_link(netlink::LinkID::Name(
data.bridge_interface_name.to_string(),
)) {
Ok(bridge) => (
check_link_is_bridge(bridge, &data.bridge_interface_name)?
.header
.index,
None,
),
Err(err) => match err.unwrap() {
NetavarkError::Netlink(e) => {
if -e.raw_code() != libc::ENODEV {
return Err(err).wrap("get bridge interface");
}
let mut create_link_opts = netlink::CreateLinkOptions::new(
data.bridge_interface_name.to_string(),
InfoKind::Bridge,
);
create_link_opts.mtu = data.mtu;
if let Some(vrf_name) = &data.vrf {
let vrf = match host.get_link(netlink::LinkID::Name(vrf_name.to_string())) {
Ok(vrf) => check_link_is_vrf(vrf, vrf_name)?,
Err(err) => return Err(err).wrap("get vrf to set up bridge interface"),
};
create_link_opts.primary_index = vrf.header.index;
}
host.create_link(create_link_opts).wrap("create bridge")?;
if data.ipam.ipv6_enabled {
let br_accept_dad = format!(
"/proc/sys/net/ipv6/conf/{}/accept_dad",
&data.bridge_interface_name
);
let br_accept_ra =
format!("net/ipv6/conf/{}/accept_ra", &data.bridge_interface_name);
CoreUtils::apply_sysctl_value(br_accept_dad, "0")?;
CoreUtils::apply_sysctl_value(br_accept_ra, "0")?;
}
let link = host
.get_link(netlink::LinkID::Name(
data.bridge_interface_name.to_string(),
))
.wrap("get bridge interface")?;
let mut mac = None;
for nla in link.nlas.into_iter() {
if let Nla::Address(addr) = nla {
mac = Some(addr);
}
}
if mac.is_none() {
return Err(NetavarkError::msg(
"failed to get the mac address from the bridge interface",
));
}
for addr in &data.ipam.gateway_addresses {
host.add_addr(link.header.index, addr)
.wrap("add ip addr to bridge")?;
}
host.set_up(netlink::LinkID::ID(link.header.index))
.wrap("set bridge up")?;
(link.header.index, mac)
}
_ => return Err(err),
},
};
create_veth_pair(
host,
netns,
data,
bridge_index,
mac,
internal,
hostns_fd,
netns_fd,
)
}
#[allow(clippy::too_many_arguments)]
fn create_veth_pair<'fd>(
host: &mut netlink::Socket,
netns: &mut netlink::Socket,
data: &InternalData,
primary_index: u32,
bridge_mac: Option<Vec<u8>>,
internal: bool,
hostns_fd: BorrowedFd<'fd>,
netns_fd: BorrowedFd<'fd>,
) -> NetavarkResult<String> {
let mut peer_opts =
netlink::CreateLinkOptions::new(data.container_interface_name.to_string(), InfoKind::Veth);
peer_opts.mac = data.mac_address.clone().unwrap_or_default();
peer_opts.mtu = data.mtu;
peer_opts.netns = Some(netns_fd);
let mut peer = LinkMessage::default();
netlink::parse_create_link_options(&mut peer, peer_opts);
let mut host_veth = netlink::CreateLinkOptions::new(String::from(""), InfoKind::Veth);
host_veth.mtu = data.mtu;
host_veth.primary_index = primary_index;
host_veth.info_data = Some(InfoData::Veth(VethInfo::Peer(peer)));
host.create_link(host_veth).map_err(|err| match err {
NetavarkError::Netlink(ref e) if -e.raw_code() == libc::EEXIST => NetavarkError::wrap(
format!(
"create veth pair: interface {} already exists on container namespace",
data.container_interface_name
),
err,
),
_ => NetavarkError::wrap("create veth pair", err),
})?;
let veth = netns
.get_link(netlink::LinkID::Name(
data.container_interface_name.to_string(),
))
.wrap("get container veth")?;
let mut mac = String::from("");
let mut host_link = 0;
for nla in veth.nlas.into_iter() {
if let Nla::Address(ref addr) = nla {
mac = CoreUtils::encode_address_to_hex(addr);
}
if let Nla::Link(link) = nla {
host_link = link;
}
}
if mac.is_empty() {
return Err(NetavarkError::Message(
"failed to get the mac address from the container veth interface".to_string(),
));
}
exec_netns!(hostns_fd, netns_fd, res, {
disable_ipv6_autoconf(&data.container_interface_name)?;
if data.ipam.ipv6_enabled {
let disable_dad_in_container = format!(
"/proc/sys/net/ipv6/conf/{}/accept_dad",
&data.container_interface_name
);
core_utils::CoreUtils::apply_sysctl_value(disable_dad_in_container, "0")?;
}
let enable_arp_notify = format!(
"/proc/sys/net/ipv4/conf/{}/arp_notify",
&data.container_interface_name
);
core_utils::CoreUtils::apply_sysctl_value(enable_arp_notify, "1")?;
Ok::<(), NetavarkError>(())
});
res?;
if data.ipam.ipv6_enabled {
let host_veth = host.get_link(netlink::LinkID::ID(host_link))?;
for nla in host_veth.nlas.into_iter() {
if let Nla::IfName(name) = nla {
let disable_dad_in_container = format!("/proc/sys/net/ipv6/conf/{name}/accept_dad");
core_utils::CoreUtils::apply_sysctl_value(disable_dad_in_container, "0")?;
}
}
}
host.set_up(netlink::LinkID::ID(host_link))
.wrap("failed to set host veth up")?;
if let Some(m) = bridge_mac {
host.set_mac_address(netlink::LinkID::ID(primary_index), m)
.wrap("set static mac on bridge")?;
}
for addr in &data.ipam.container_addresses {
netns
.add_addr(veth.header.index, addr)
.wrap("add ip addr to container veth")?;
}
netns
.set_up(netlink::LinkID::ID(veth.header.index))
.wrap("set container veth up")?;
if !internal && !data.no_default_route {
core_utils::add_default_routes(netns, &data.ipam.gateway_addresses, data.metric)?;
}
for route in data.ipam.routes.iter() {
netns.add_route(route)?
}
Ok(mac)
}
fn check_link_is_bridge(msg: LinkMessage, br_name: &str) -> NetavarkResult<LinkMessage> {
for nla in msg.nlas.iter() {
if let Nla::Info(info) = nla {
for inf in info.iter() {
if let Info::Kind(kind) = inf {
if *kind == InfoKind::Bridge {
return Ok(msg);
} else {
return Err(NetavarkError::Message(format!(
"bridge interface {br_name} already exists but is a {kind:?} interface"
)));
}
}
}
}
}
Err(NetavarkError::Message(format!(
"could not determine namespace link kind for bridge {br_name}"
)))
}
fn check_link_is_vrf(msg: LinkMessage, vrf_name: &str) -> NetavarkResult<LinkMessage> {
for nla in msg.nlas.iter() {
if let Nla::Info(info) = nla {
for inf in info.iter() {
if let Info::Kind(kind) = inf {
if *kind == InfoKind::Vrf {
return Ok(msg);
} else {
return Err(NetavarkError::Message(format!(
"vrf {} already exists but is a {:?} interface",
vrf_name, kind
)));
}
}
}
}
}
Err(NetavarkError::Message(format!(
"could not determine namespace link kind for vrf {}",
vrf_name
)))
}
fn remove_link(
host: &mut netlink::Socket,
netns: &mut netlink::Socket,
br_name: &str,
container_veth_name: &str,
) -> NetavarkResult<bool> {
netns
.del_link(netlink::LinkID::Name(container_veth_name.to_string()))
.wrap(format!(
"failed to delete container veth {container_veth_name}"
))?;
let br = host
.get_link(netlink::LinkID::Name(br_name.to_string()))
.wrap("failed to get bridge interface")?;
let links = host
.dump_links(&mut vec![Nla::Master(br.header.index)])
.wrap("failed to get connected bridge interfaces")?;
if links.is_empty() {
log::info!("removing bridge {}", br_name);
host.del_link(netlink::LinkID::ID(br.header.index))
.wrap(format!("failed to delete bridge {container_veth_name}"))?;
return Ok(true);
}
Ok(false)
}
fn get_isolate_option(opts: &Option<HashMap<String, String>>) -> NetavarkResult<IsolateOption> {
let isolate = parse_option(opts, OPTION_ISOLATE)?.unwrap_or(ISOLATE_OPTION_FALSE.to_string());
Ok(match isolate.as_str() {
ISOLATE_OPTION_STRICT => IsolateOption::Strict,
ISOLATE_OPTION_TRUE => IsolateOption::Nomal,
ISOLATE_OPTION_FALSE => IsolateOption::Never,
_ => IsolateOption::Never,
})
}