1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
//! # CAIDA as2org utility.
//!
//! ## Data source
//! * The CAIDA [AS Organizations Dataset](http://www.caida.org/data/as-organizations).
//!
//! ## Data structure
//!
//! `As2orgAsInfo`:
//! * `asn`: the AS number
//! * `name`: the name provide for the individual AS number
//! * `country_code`: the country code of the organization's registration country
//! * `org_id`: maps to an organization entry
//! * `org_name`: the name of the organization
//! * `source`: the RIR or NIR database which was contained this entry
//!
//! ## Examples
//!
//! ```rust
//! use as2org_rs::As2org;
//!
//! let as2org = As2org::new(None).unwrap();
//! dbg!(as2org.get_as_info(400644).unwrap());
//! dbg!(as2org.get_siblings(15169).unwrap());
//! assert!(as2org.are_siblings(15169, 36040));
//! ```
use anyhow::{anyhow, Result};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Organization JSON format
///
/// --------------------
/// Organization fields
/// --------------------
/// org_id : unique ID for the given organization
/// some will be created by the WHOIS entry and others will be
/// created by our scripts
/// changed : the changed date provided by its WHOIS entry
/// name : name could be selected from the AUT entry tied to the
/// organization, the AUT entry with the largest customer cone,
/// listed for the organization (if there existed an stand alone
/// organization), or a human maintained file.
/// country : some WHOIS provide as a individual field. In other cases
/// we inferred it from the addresses
/// source : the RIR or NIR database which was contained this entry
#[derive(Debug, Clone, Serialize, Deserialize)]
struct As2orgJsonOrg {
#[serde(alias = "organizationId")]
org_id: String,
changed: Option<String>,
#[serde(default)]
name: String,
country: String,
/// The RIR or NIR database that contained this entry
source: String,
#[serde(alias = "type")]
data_type: String,
}
/// AS Json format
///
/// ----------
/// AS fields
/// ----------
/// asn : the AS number
/// changed : the changed date provided by its WHOIS entry
/// name : the name provide for the individual AS number
/// org_id : maps to an organization entry
/// opaque_id : opaque identifier used by RIR extended delegation format
/// source : the RIR or NIR database which was contained this entry
#[derive(Debug, Clone, Serialize, Deserialize)]
struct As2orgJsonAs {
asn: String,
changed: Option<String>,
#[serde(default)]
name: String,
#[serde(alias = "opaqueId")]
opaque_id: Option<String>,
#[serde(alias = "organizationId")]
org_id: String,
/// The RIR or NIR database that contained this entry
source: String,
#[serde(rename = "type")]
data_type: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
enum As2orgJsonEntry {
Org(As2orgJsonOrg),
As(As2orgJsonAs),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct As2orgAsInfo {
pub asn: u32,
pub name: String,
pub country_code: String,
pub org_id: String,
pub org_name: String,
pub source: String,
}
pub struct As2org {
as_map: HashMap<u32, As2orgJsonAs>,
org_map: HashMap<String, As2orgJsonOrg>,
as_to_org: HashMap<u32, String>,
org_to_as: HashMap<String, Vec<u32>>,
}
impl As2org {
pub fn new(data_file_path: Option<String>) -> Result<Self> {
let entries = match data_file_path {
Some(path) => parse_as2org_file(path.as_str())?,
None => {
let url = get_most_recent_data()?;
parse_as2org_file(url.as_str())?
}
};
let mut as_map: HashMap<u32, As2orgJsonAs> = HashMap::new();
let mut org_map: HashMap<String, As2orgJsonOrg> = HashMap::new();
for entry in entries {
match entry {
As2orgJsonEntry::As(as_entry) => {
as_map.insert(as_entry.asn.parse::<u32>().unwrap(), as_entry);
}
As2orgJsonEntry::Org(org_entry) => {
org_map.insert(org_entry.org_id.clone(), org_entry);
}
}
}
let mut as_to_org: HashMap<u32, String> = HashMap::new();
let mut org_to_as: HashMap<String, Vec<u32>> = HashMap::new();
for (asn, as_entry) in as_map.iter() {
as_to_org.insert(*asn, as_entry.org_id.clone());
let org_asn = org_to_as.entry(as_entry.org_id.clone()).or_default();
org_asn.push(*asn);
}
Ok(Self {
as_map,
org_map,
as_to_org,
org_to_as,
})
}
pub fn get_as_info(&self, asn: u32) -> Option<As2orgAsInfo> {
let as_entry = self.as_map.get(&asn)?;
let org_id = as_entry.org_id.as_str();
let org_entry = self.org_map.get(org_id)?;
Some(As2orgAsInfo {
asn,
name: as_entry.name.clone(),
country_code: org_entry.country.clone(),
org_id: org_id.to_string(),
org_name: org_entry.name.clone(),
source: org_entry.source.clone(),
})
}
pub fn get_siblings(&self, asn: u32) -> Option<Vec<As2orgAsInfo>> {
let org_id = self.as_to_org.get(&asn)?;
let org_asns = self.org_to_as.get(org_id)?.to_vec();
Some(
org_asns
.iter()
.map(|asn| self.get_as_info(*asn).unwrap())
.collect(),
)
}
pub fn are_siblings(&self, asn1: u32, asn2: u32) -> bool {
let org1 = match self.as_to_org.get(&asn1) {
None => return false,
Some(o) => o,
};
let org2 = match self.as_to_org.get(&asn2) {
None => return false,
Some(o) => o,
};
org1 == org2
}
}
/// parse remote AS2Org file into Vec of DataEntry
fn parse_as2org_file(path: &str) -> Result<Vec<As2orgJsonEntry>> {
let mut res: Vec<As2orgJsonEntry> = vec![];
for line in oneio::read_lines(path)? {
let line = line?;
if line.contains(r#""type":"ASN""#) {
let data = serde_json::from_str::<As2orgJsonAs>(line.as_str());
match data {
Ok(data) => {
res.push(As2orgJsonEntry::As(data));
}
Err(e) => {
eprintln!("error parsing line:\n{}", line.as_str());
return Err(anyhow!(e));
}
}
} else {
let data = serde_json::from_str::<As2orgJsonOrg>(line.as_str());
match data {
Ok(data) => {
res.push(As2orgJsonEntry::Org(data));
}
Err(e) => {
eprintln!("error parsing line:\n{}", line.as_str());
return Err(anyhow!(e));
}
}
}
}
Ok(res)
}
/// Get the most recent AS2Org data file from CAIDA
fn get_most_recent_data() -> Result<String> {
let data_link: Regex = Regex::new(r".*(........\.as-org2info\.jsonl\.gz).*")?;
let content = oneio::read_to_string("https://publicdata.caida.org/datasets/as-organizations/")?;
let res: Vec<String> = data_link
.captures_iter(content.as_str())
.map(|cap| cap[1].to_owned())
.collect();
let file = res.last().unwrap().to_string();
Ok(format!(
"https://publicdata.caida.org/datasets/as-organizations/{file}"
))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_load_entries() {
let as2org = As2org::new(None).unwrap();
dbg!(as2org.get_as_info(400644));
dbg!(as2org.get_siblings(400644));
dbg!(as2org.get_siblings(13335));
}
}