#[cfg(target_pointer_width = "16")]
compile_error!("cannot build on 16 bit systems");
use std::{cmp, error, fmt, io};
use std::io::BufReader;
use std::str::FromStr;
use libflate::gzip;
use crate::api::roa::{AsNumber, Ipv4Prefix, Ipv6Prefix, TypedPrefix};
use crate::api::bgp::Announcement;
const MINIMUM_SEEN_BY: u32 = 13;
pub struct RisWhoisLoader {
v4_url: String,
v6_url: String,
}
impl RisWhoisLoader {
pub fn new(v4_url: String, v6_url: String) -> Self {
Self { v4_url, v6_url }
}
pub async fn load(&self) -> Result<RisWhois, RisWhoisError> {
Ok(RisWhois::new(
Self::load_tree(&self.v4_url).await?,
Self::load_tree(&self.v6_url).await?,
))
}
async fn load_tree<P: FromStr + RoutePrefix>(
uri: &str
) -> Result<RouteOriginCollection<P>, RisWhoisError>
where <P as FromStr>::Err: error::Error + Send + Sync + 'static {
Self::parse_gz_data(
&reqwest::get(uri).await.map_err(|err| {
RisWhoisError::new(uri, io::Error::other(err))
})?.bytes().await.map_err(|err| {
RisWhoisError::new(uri, io::Error::other(err))
})?
).map_err(|err| RisWhoisError::new(uri, err))
}
fn parse_gz_data<P: FromStr + RoutePrefix>(
data: &[u8]
) -> Result<RouteOriginCollection<P>, io::Error>
where <P as FromStr>::Err: error::Error + Send + Sync + 'static {
let data = BufReader::new(
gzip::Decoder::new(data)?
);
Self::parse_data(data)
}
pub(super) fn parse_data<P: FromStr + RoutePrefix>(
data: impl io::BufRead,
) -> Result<RouteOriginCollection<P>, io::Error>
where <P as FromStr>::Err: error::Error + Send + Sync + 'static {
let mut res = Vec::new();
for line in data.lines() {
let line = line?;
if line.is_empty() || line.starts_with('%') {
continue;
}
let mut values = line.split_whitespace();
let asn_str = values.next().ok_or(
io::Error::other("missing column")
)?;
let prefix_str = values.next().ok_or(
io::Error::other("missing column")
)?;
let peers = values.next().ok_or(
io::Error::other("missing column")
)?;
if u32::from_str(peers).map_err(io::Error::other)?
< MINIMUM_SEEN_BY
{
continue;
}
if asn_str.contains('{') {
continue; }
let origin = AsNumber::from_str(asn_str).map_err(io::Error::other)?;
let prefix = P::from_str(prefix_str).map_err(|err| {
io::Error::other(err)
})?;
res.push(RouteOrigin { prefix, origin });
}
Ok(RouteOriginCollection::new(res).unwrap())
}
}
#[derive(Clone, Default)]
pub struct RisWhois {
v4: RouteOriginCollection<Ipv4Prefix>,
v6: RouteOriginCollection<Ipv6Prefix>,
}
impl RisWhois {
pub fn new(
v4: RouteOriginCollection<Ipv4Prefix>,
v6: RouteOriginCollection<Ipv6Prefix>,
) -> Self {
Self { v4, v6 }
}
pub fn v4(&self) -> &RouteOriginCollection<Ipv4Prefix> {
&self.v4
}
pub fn v6(&self) -> &RouteOriginCollection<Ipv6Prefix> {
&self.v6
}
}
#[derive(Clone, Debug)]
pub struct RouteOriginCollection<P> {
tree: Box<[TreeNode]>,
tree_root_idx: TreeIndex,
data: RouteOriginBox<P>,
no_data: Box<[P]>,
}
impl<P: RoutePrefix> RouteOriginCollection<P> {
pub fn new(data: Vec<RouteOrigin<P>>) -> Result<Self, LargeDataset> {
CollectionBuilder::new(data).process()
}
pub fn eq_or_more_specific(&self, prefix: P) -> TreeIter<'_, P> {
TreeIter::more_specific(self, prefix)
}
}
impl<P: RoutePrefix> RouteOriginCollection<P> {
fn get_tree_node(&self, tree_idx: TreeIndex) -> Option<TreeNode> {
self.tree.get(tree_idx.into_usize()?).copied()
}
fn get_data_prefix(&self, data_idx: DataIndex) -> Option<P> {
match data_idx.into_data() {
Ok(data_idx) => Some(self.data.0.get(data_idx)?.prefix),
Err(no_data_idx) => self.no_data.get(no_data_idx).copied(),
}
}
}
impl<P> Default for RouteOriginCollection<P> {
fn default() -> Self {
Self {
tree: Box::new([]),
tree_root_idx: TreeIndex::none(),
data: RouteOriginBox::default(),
no_data: Box::new([]),
}
}
}
struct CollectionBuilder<P> {
tree: Vec<TreeNode>,
no_data: Vec<P>,
data: RouteOriginBox<P>,
next_data_idx: usize,
}
impl<P: RoutePrefix> CollectionBuilder<P> {
fn new(data: Vec<RouteOrigin<P>>) -> Self {
let mut data = data.into_boxed_slice();
data.sort();
let data = RouteOriginBox(data);
Self {
tree: Vec::new(),
no_data: Vec::new(),
data,
next_data_idx: 0,
}
}
fn process(mut self) -> Result<RouteOriginCollection<P>, LargeDataset> {
let Some(prefix) = self.next_prefix() else {
return Ok(RouteOriginCollection {
tree: Box::new([]),
tree_root_idx: TreeIndex::none(),
data: RouteOriginBox(Box::new([])),
no_data: Box::new([]),
})
};
let node = if prefix == P::default() {
self.advance_data();
self.process_node(
prefix, TreeNode::new(DataIndex::data(0)?)
)?
}
else {
let data = self.push_no_data(P::default())?;
self.process_node(
P::default(), TreeNode::new(data)
)?
};
let tree_root_idx = self.push_node(node)?;
Ok(RouteOriginCollection {
tree: self.tree.into_boxed_slice(),
tree_root_idx,
data: self.data,
no_data: self.no_data.into_boxed_slice()
})
}
fn process_node(
&mut self,
prefix: P,
mut node: TreeNode
) -> Result<TreeNode, LargeDataset> {
loop {
let Some(next_prefix) = self.next_prefix() else {
return Ok(node)
};
if !prefix.covers(next_prefix) {
return Ok(node)
}
if !next_prefix.bit(prefix.addr_len()) {
if let Some(left_idx) = node.left.into_usize() {
let ancestor_prefix = self.node_prefix(
left_idx
).closest_ancestor(next_prefix);
let data = self.push_no_data(ancestor_prefix)?;
let inter_node = self.process_node(
ancestor_prefix,
TreeNode::with_children(
data, node.left, TreeIndex::none()
)
)?;
node.left = self.push_node(inter_node)?;
}
else {
let left_node = TreeNode::new(
DataIndex::data(self.next_data_idx)?
);
self.advance_data();
let left_node = self.process_node(
next_prefix, left_node,
)?;
node.left = self.push_node(left_node)?;
}
}
else {
if let Some(right_idx) = node.right.into_usize() {
let ancestor_prefix = self.node_prefix(
right_idx
).closest_ancestor(next_prefix);
let data = self.push_no_data(ancestor_prefix)?;
let inter_node = self.process_node(
ancestor_prefix,
TreeNode::with_children(
data, node.right, TreeIndex::none()
)
)?;
node.right = self.push_node(inter_node)?;
}
else {
let right_node = TreeNode::new(
DataIndex::data(self.next_data_idx)?
);
self.advance_data();
let right_node = self.process_node(
next_prefix, right_node
)?;
node.right = self.push_node(right_node)?;
}
}
}
}
fn next_prefix(&self) -> Option<P> {
self.data.0.get(self.next_data_idx).map(|item| item.prefix)
}
fn advance_data(&mut self) {
self.next_data_idx = self.data.next_prefix(self.next_data_idx);
}
fn node_prefix(&self, node_idx: usize) -> P {
match self.tree[node_idx].data.into_data() {
Ok(idx) => self.data.0[idx].prefix,
Err(idx) => self.no_data[idx]
}
}
fn push_node(&mut self, node: TreeNode) -> Result<TreeIndex, LargeDataset> {
let res = self.tree.len().try_into()?;
self.tree.push(node);
Ok(res)
}
fn push_no_data(&mut self, prefix: P) -> Result<DataIndex, LargeDataset> {
let res = DataIndex::no_data(self.no_data.len())?;
self.no_data.push(prefix);
Ok(res)
}
}
pub trait RoutePrefix: Clone + Copy + Default + fmt::Debug + Eq + Ord {
fn covers(self, other: Self) -> bool;
fn closest_ancestor(self, other: Self) -> Self;
fn addr_len(self) -> u8;
fn bit(self, idx: u8) -> bool;
fn into_typed_prefix(self) -> TypedPrefix;
}
impl RoutePrefix for Ipv4Prefix {
fn covers(self, other: Self) -> bool {
if self.addr_len() > other.addr_len() {
return false
}
if self.addr_len() == 32 {
return self.addr() == other.addr()
}
self.addr().to_bits()
== other.addr().to_bits() & !(u32::MAX >> self.addr_len())
}
fn closest_ancestor(self, other: Self) -> Self {
self.resize(
cmp::min(
(self.addr().to_bits() ^ other.addr().to_bits())
.leading_zeros() as u8,
cmp::min(self.addr_len(), other.addr_len())
)
)
}
fn addr_len(self) -> u8 {
self.addr_len()
}
fn bit(self, idx: u8) -> bool {
let Some(mask) = 0x8000_0000u32.checked_shr(idx.into()) else {
return false
};
(self.addr().to_bits() & mask) != 0
}
fn into_typed_prefix(self) -> TypedPrefix {
TypedPrefix::V4(self)
}
}
impl RoutePrefix for Ipv6Prefix {
fn covers(self, other: Self) -> bool {
if self.addr_len() > other.addr_len() {
return false
}
if self.addr_len() == 128 {
return self.addr() == other.addr()
}
self.addr().to_bits()
== other.addr().to_bits() & !(u128::MAX >> self.addr_len())
}
fn closest_ancestor(self, other: Self) -> Self {
self.resize(
cmp::min(
(self.addr().to_bits() ^ other.addr().to_bits())
.leading_zeros() as u8,
cmp::min(self.addr_len(), other.addr_len())
)
)
}
fn addr_len(self) -> u8 {
self.addr_len()
}
fn bit(self, idx: u8) -> bool {
let Some(mask) = const { 1u128 << 127 }.checked_shr(idx.into()) else {
return false
};
(self.addr().to_bits() & mask) != 0
}
fn into_typed_prefix(self) -> TypedPrefix {
TypedPrefix::V6(self)
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
struct TreeIndex(u32);
impl TreeIndex {
const fn none() -> Self {
Self(u32::MAX)
}
fn into_usize(self) -> Option<usize> {
self.into()
}
}
impl Default for TreeIndex {
fn default() -> Self {
Self::none()
}
}
impl TryFrom<Option<usize>> for TreeIndex {
type Error = LargeDataset;
fn try_from(src: Option<usize>) -> Result<Self, Self::Error> {
match src {
Some(src) => {
match u32::try_from(src) {
Ok(src) if src == u32::MAX => Err(LargeDataset(())),
Ok(src) => Ok(Self(src)),
Err(_) => Err(LargeDataset(())),
}
}
None => Ok(Self(u32::MAX))
}
}
}
impl TryFrom<usize> for TreeIndex {
type Error = LargeDataset;
fn try_from(src: usize) -> Result<Self, Self::Error> {
Some(src).try_into()
}
}
impl From<TreeIndex> for Option<usize> {
fn from(src: TreeIndex) -> Self {
if src.0 == u32::MAX {
None
}
else {
Some(src.0 as usize)
}
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
struct DataIndex(u32);
impl DataIndex {
fn usize_to_u31(idx: usize) -> Result<u32, LargeDataset> {
match u32::try_from(idx) {
Ok(idx) if idx & 0x8000_0000 != 0 => Err(LargeDataset(())),
Ok(idx) => Ok(idx),
Err(_) => Err(LargeDataset(())),
}
}
fn data(idx: usize) -> Result<Self, LargeDataset> {
Ok(Self(Self::usize_to_u31(idx)? | 0x8000_0000))
}
fn no_data(idx: usize) -> Result<Self, LargeDataset> {
Ok(Self(Self::usize_to_u31(idx)?))
}
fn into_data(self) -> Result<usize, usize> {
if self.0 & 0x8000_0000 != 0 {
Ok((self.0 & 0x7FFF_FFFF) as usize)
}
else {
Err(self.0 as usize)
}
}
}
#[derive(Clone, Copy, Debug)]
struct TreeNode {
data: DataIndex,
left: TreeIndex,
right: TreeIndex,
}
impl TreeNode {
fn new(data: DataIndex) -> Self {
Self {
data,
left: TreeIndex::none(),
right: TreeIndex::none(),
}
}
fn with_children(
data: DataIndex, left: TreeIndex, right: TreeIndex
) -> Self {
Self { data, left, right }
}
}
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
pub struct RouteOrigin<P> {
pub prefix: P,
pub origin: AsNumber,
}
impl<P: RoutePrefix> From<RouteOrigin<P>> for Announcement {
fn from(src: RouteOrigin<P>) -> Self {
Self {
asn: src.origin,
prefix: src.prefix.into_typed_prefix()
}
}
}
#[derive(Clone, Copy, Debug)]
pub struct RouteOriginSet<'a, P> {
slice: &'a [RouteOrigin<P>],
}
impl<'a, P: RoutePrefix> RouteOriginSet<'a, P> {
fn new(slice: &'a [RouteOrigin<P>]) -> Self {
debug_assert!(!slice.is_empty());
Self { slice }
}
pub fn prefix(self) -> P {
self.slice[0].prefix
}
pub fn iter(self) -> impl Iterator<Item = RouteOrigin<P>> + 'a {
self.slice.iter().copied()
}
}
#[derive(Clone, Debug)]
pub struct RouteOriginBox<P>(Box<[RouteOrigin<P>]>);
impl<P: RoutePrefix> RouteOriginBox<P> {
fn next_prefix(&self, idx: usize) -> usize {
let mut next_idx = idx;
loop {
next_idx = match next_idx.checked_add(1) {
Some(idx) => idx,
None => return usize::MAX,
};
if next_idx >= self.0.len() {
return next_idx;
}
if self.0[next_idx].prefix != self.0[idx].prefix {
return next_idx;
}
}
}
fn origin_set(&self, idx: usize) -> Option<RouteOriginSet<'_, P>> {
let next = cmp::min(self.next_prefix(idx), self.0.len());
self.0.get(idx..next).map(RouteOriginSet::new)
}
}
impl<P> Default for RouteOriginBox<P> {
fn default() -> Self {
Self(Box::new([]))
}
}
pub struct TreeIter<'a, P> {
collection: &'a RouteOriginCollection<P>,
tree_idx_stack: Vec<usize>,
}
impl<'a, P: RoutePrefix> TreeIter<'a, P> {
#[cfg(test)]
fn new(collection: &'a RouteOriginCollection<P>) -> Self {
Self {
collection,
tree_idx_stack: match collection.tree_root_idx.into_usize() {
Some(idx) => vec![idx],
None => Vec::new()
}
}
}
fn more_specific(
collection: &'a RouteOriginCollection<P>,
root_prefix: P
) -> Self {
let mut tree_idx = collection.tree_root_idx;
while let Some(node) = collection.get_tree_node(tree_idx) {
let Some(prefix) = collection.get_data_prefix(node.data) else {
break;
};
if prefix.addr_len() >= root_prefix.addr_len()
&& (root_prefix == prefix || root_prefix.covers(prefix))
{
let Some(tree_idx) = tree_idx.into_usize() else {
break
};
return Self {
collection,
tree_idx_stack: vec![tree_idx],
}
}
if !root_prefix.bit(prefix.addr_len()) {
tree_idx = node.left
}
else {
tree_idx = node.right
}
}
Self {
collection,
tree_idx_stack: Vec::new()
}
}
}
impl<'a, P: RoutePrefix> Iterator for TreeIter<'a, P> {
type Item = RouteOriginSet<'a, P>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let node_idx = *self.tree_idx_stack.last()?;
let node = self.collection.tree.get(node_idx)?;
self.tree_idx_stack.pop();
if let Some(idx) = node.right.into_usize() {
self.tree_idx_stack.push(idx);
}
if let Some(idx) = node.left.into_usize() {
self.tree_idx_stack.push(idx);
}
if let Ok(idx) = node.data.into_data() {
return self.collection.data.origin_set(idx)
}
}
}
}
#[derive(Debug)]
pub struct RisWhoisError {
uri: String,
err: io::Error,
}
impl RisWhoisError {
fn new(uri: &str, err: io::Error) -> Self {
Self { uri: uri.into(), err }
}
}
impl fmt::Display for RisWhoisError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f,
"Failed to download RISwhois file `{}`: {}",
self.uri, self.err
)
}
}
#[derive(Debug)]
pub struct LargeDataset(());
impl fmt::Display for LargeDataset {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.write_str("RISwhois dataset too large")
}
}
impl error::Error for LargeDataset { }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_bgp_ris_dumps() {
let v4 = RisWhoisLoader::parse_data(include_bytes!(
"../../../test-resources/bgp/riswhoisdump.IPv4"
).as_ref()).unwrap();
let v6 = RisWhoisLoader::parse_data(include_bytes!(
"../../../test-resources/bgp/riswhoisdump.IPv6"
).as_ref()).unwrap();
let ris = RisWhois { v4, v6 };
let v4 = TreeIter::new(&ris.v4).map(|item| {
for origin in item.iter() {
assert_eq!(origin.prefix, item.prefix());
}
item.prefix()
}).collect::<Vec<_>>();
for item in v4.windows(2) {
assert!(item[0] < item[1])
}
let v6 = TreeIter::new(&ris.v6).map(|item| {
for origin in item.iter() {
assert_eq!(origin.prefix, item.prefix());
}
item.prefix()
}).collect::<Vec<_>>();
for item in v6.windows(2) {
assert!(item[0] < item[1])
}
}
#[test]
fn ipv6_subprefixes() {
fn origin(prefix: &str, origin: u32) -> RouteOrigin<Ipv6Prefix>
{
RouteOrigin {
prefix: Ipv6Prefix::from_str(prefix).unwrap(),
origin: AsNumber::from_u32(origin)
}
}
fn prefix(prefix: &str) -> Ipv6Prefix
{
Ipv6Prefix::from_str(prefix).unwrap()
}
let data = vec![
origin("2a0f::/16", 123),
origin("2a0f:1cc5:f00::/48", 456),
];
let collection = RouteOriginCollection::new(data).unwrap();
assert_eq!(
0,
collection.eq_or_more_specific(
prefix("2a0f:1cc5:2d00::/40")
).count()
);
assert_eq!(
1,
collection.eq_or_more_specific(
prefix("2a0f:1cc5:f00::/48")
).count()
);
assert_eq!(
0,
collection.eq_or_more_specific(
prefix("2a0f:1cc5:f01::/48")
).count()
);
assert_eq!(
2,
collection.eq_or_more_specific(
prefix("2a0f::/16")
).count()
);
}
}