as2org_rs/
lib.rs

1//! # CAIDA as2org utility.
2//!
3//! ## Data source
4//! * The CAIDA [AS Organizations Dataset](http://www.caida.org/data/as-organizations).
5//!
6//! ## Data structure
7//!
8//! `As2orgAsInfo`:
9//! * `asn`: the AS number
10//! * `name`: the name provide for the individual AS number
11//! * `country_code`: the country code of the organization's registration country
12//! * `org_id`: maps to an organization entry
13//! * `org_name`: the name of the organization
14//! * `source`: the RIR or NIR database which was contained this entry
15//!
16//! ## Examples
17//!
18//! ```rust
19//! use as2org_rs::As2org;
20//!
21//! let as2org = As2org::new(None).unwrap();
22//! dbg!(as2org.get_as_info(400644).unwrap());
23//! dbg!(as2org.get_siblings(15169).unwrap());
24//! assert!(as2org.are_siblings(15169, 36040));
25//! ```
26
27use anyhow::{anyhow, Result};
28use regex::Regex;
29use serde::{Deserialize, Serialize};
30use std::collections::HashMap;
31
32/// Organization JSON format
33///
34/// --------------------
35/// Organization fields
36/// --------------------
37/// org_id  : unique ID for the given organization
38///            some will be created by the WHOIS entry and others will be
39///            created by our scripts
40/// changed : the changed date provided by its WHOIS entry
41/// name    : name could be selected from the AUT entry tied to the
42///            organization, the AUT entry with the largest customer cone,
43///           listed for the organization (if there existed an stand alone
44///            organization), or a human maintained file.
45/// country : some WHOIS provide as a individual field. In other cases
46///            we inferred it from the addresses
47/// source  : the RIR or NIR database which was contained this entry
48#[derive(Debug, Clone, Serialize, Deserialize)]
49struct As2orgJsonOrg {
50    #[serde(alias = "organizationId")]
51    org_id: String,
52
53    changed: Option<String>,
54
55    #[serde(default)]
56    name: String,
57
58    country: String,
59
60    /// The RIR or NIR database that contained this entry
61    source: String,
62
63    #[serde(alias = "type")]
64    data_type: String,
65}
66
67/// AS Json format
68///
69/// ----------
70/// AS fields
71/// ----------
72/// asn     : the AS number
73/// changed : the changed date provided by its WHOIS entry
74/// name    : the name provide for the individual AS number
75/// org_id  : maps to an organization entry
76/// opaque_id   : opaque identifier used by RIR extended delegation format
77/// source  : the RIR or NIR database which was contained this entry
78#[derive(Debug, Clone, Serialize, Deserialize)]
79struct As2orgJsonAs {
80    asn: String,
81
82    changed: Option<String>,
83
84    #[serde(default)]
85    name: String,
86
87    #[serde(alias = "opaqueId")]
88    opaque_id: Option<String>,
89
90    #[serde(alias = "organizationId")]
91    org_id: String,
92
93    /// The RIR or NIR database that contained this entry
94    source: String,
95
96    #[serde(rename = "type")]
97    data_type: String,
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
101enum As2orgJsonEntry {
102    Org(As2orgJsonOrg),
103    As(As2orgJsonAs),
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct As2orgAsInfo {
108    pub asn: u32,
109    pub name: String,
110    pub country_code: String,
111    pub org_id: String,
112    pub org_name: String,
113    pub source: String,
114}
115
116pub struct As2org {
117    as_map: HashMap<u32, As2orgJsonAs>,
118    org_map: HashMap<String, As2orgJsonOrg>,
119    as_to_org: HashMap<u32, String>,
120    org_to_as: HashMap<String, Vec<u32>>,
121}
122
123impl As2org {
124    pub fn new(data_file_path: Option<String>) -> Result<Self> {
125        let entries = match data_file_path {
126            Some(path) => parse_as2org_file(path.as_str())?,
127            None => {
128                let url = get_most_recent_data()?;
129                parse_as2org_file(url.as_str())?
130            }
131        };
132
133        let mut as_map: HashMap<u32, As2orgJsonAs> = HashMap::new();
134        let mut org_map: HashMap<String, As2orgJsonOrg> = HashMap::new();
135
136        for entry in entries {
137            match entry {
138                As2orgJsonEntry::As(as_entry) => {
139                    as_map.insert(as_entry.asn.parse::<u32>().unwrap(), as_entry);
140                }
141                As2orgJsonEntry::Org(org_entry) => {
142                    org_map.insert(org_entry.org_id.clone(), org_entry);
143                }
144            }
145        }
146
147        let mut as_to_org: HashMap<u32, String> = HashMap::new();
148        let mut org_to_as: HashMap<String, Vec<u32>> = HashMap::new();
149
150        for (asn, as_entry) in as_map.iter() {
151            as_to_org.insert(*asn, as_entry.org_id.clone());
152            let org_asn = org_to_as.entry(as_entry.org_id.clone()).or_default();
153            org_asn.push(*asn);
154        }
155
156        Ok(Self {
157            as_map,
158            org_map,
159            as_to_org,
160            org_to_as,
161        })
162    }
163
164    pub fn get_as_info(&self, asn: u32) -> Option<As2orgAsInfo> {
165        let as_entry = self.as_map.get(&asn)?;
166        let org_id = as_entry.org_id.as_str();
167        let org_entry = self.org_map.get(org_id)?;
168        Some(As2orgAsInfo {
169            asn,
170            name: as_entry.name.clone(),
171            country_code: org_entry.country.clone(),
172            org_id: org_id.to_string(),
173            org_name: org_entry.name.clone(),
174            source: org_entry.source.clone(),
175        })
176    }
177
178    pub fn get_siblings(&self, asn: u32) -> Option<Vec<As2orgAsInfo>> {
179        let org_id = self.as_to_org.get(&asn)?;
180        let org_asns = self.org_to_as.get(org_id)?.to_vec();
181        Some(
182            org_asns
183                .iter()
184                .map(|asn| self.get_as_info(*asn).unwrap())
185                .collect(),
186        )
187    }
188
189    pub fn are_siblings(&self, asn1: u32, asn2: u32) -> bool {
190        let org1 = match self.as_to_org.get(&asn1) {
191            None => return false,
192            Some(o) => o,
193        };
194        let org2 = match self.as_to_org.get(&asn2) {
195            None => return false,
196            Some(o) => o,
197        };
198        org1 == org2
199    }
200}
201
202/// Fixes misinterpretation of strings encoded in Latin-1 that were mistakenly decoded as UTF-8.
203///
204/// This function processes a string that may contain characters misinterpreted due to an
205/// incorrect encoding or decoding process. Specifically, it handles cases where Latin-1
206/// characters are represented as two incorrect UTF-8 characters, such as 'Ã' followed
207/// by a secondary byte.
208///
209/// # Arguments
210///
211/// * `input` - A string slice that may contain incorrectly encoded characters.
212///
213/// # Returns
214///
215/// A corrected string with all misinterpreted characters properly fixed or left unchanged
216/// if the pattern doesn't match.
217fn fix_latin1_misinterpretation(input: &str) -> String {
218    let mut result = String::new();
219    let mut chars = input.chars().peekable();
220
221    while let Some(c) = chars.next() {
222        // Check for the pattern of misinterpreted Latin-1 chars
223        if c == 'Ã' && chars.peek().is_some() {
224            let next_char = chars.next().unwrap();
225
226            // Calculate the original Latin-1 character
227            let byte_value = match next_char {
228                '\u{0080}'..='\u{00BF}' => 0xC0 + (next_char as u32 - 0x0080),
229                // Handle other ranges as needed
230                _ => {
231                    // If it doesn't match the pattern, treat as normal chars
232                    result.push(c);
233                    result.push(next_char);
234                    continue;
235                }
236            };
237
238            // Convert to the correct character
239            if let Some(correct_char) = char::from_u32(byte_value) {
240                result.push(correct_char);
241            } else {
242                // Fallback for invalid characters
243                result.push(c);
244                result.push(next_char);
245            }
246        } else {
247            result.push(c);
248        }
249    }
250
251    result
252}
253
254/// parse remote AS2Org file into Vec of DataEntry
255fn parse_as2org_file(path: &str) -> Result<Vec<As2orgJsonEntry>> {
256    let mut res: Vec<As2orgJsonEntry> = vec![];
257
258    for line in oneio::read_lines(path)? {
259        let line = fix_latin1_misinterpretation(&line?);
260        if line.contains(r#""type":"ASN""#) {
261            let data = serde_json::from_str::<As2orgJsonAs>(line.as_str());
262            match data {
263                Ok(data) => {
264                    res.push(As2orgJsonEntry::As(data));
265                }
266                Err(e) => {
267                    eprintln!("error parsing line:\n{}", line.as_str());
268                    return Err(anyhow!(e));
269                }
270            }
271        } else {
272            let data = serde_json::from_str::<As2orgJsonOrg>(line.as_str());
273            match data {
274                Ok(data) => {
275                    res.push(As2orgJsonEntry::Org(data));
276                }
277                Err(e) => {
278                    eprintln!("error parsing line:\n{}", line.as_str());
279                    return Err(anyhow!(e));
280                }
281            }
282        }
283    }
284    Ok(res)
285}
286
287/// Get the most recent AS2Org data file from CAIDA
288fn get_most_recent_data() -> Result<String> {
289    let data_link: Regex = Regex::new(r".*(........\.as-org2info\.jsonl\.gz).*")?;
290    let content = oneio::read_to_string("https://publicdata.caida.org/datasets/as-organizations/")?;
291    let res: Vec<String> = data_link
292        .captures_iter(content.as_str())
293        .map(|cap| cap[1].to_owned())
294        .collect();
295    let file = res.last().unwrap().to_string();
296
297    Ok(format!(
298        "https://publicdata.caida.org/datasets/as-organizations/{file}"
299    ))
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305
306    #[test]
307    fn test_load_entries() {
308        let as2org = As2org::new(None).unwrap();
309        dbg!(as2org.get_as_info(400644));
310        dbg!(as2org.get_siblings(400644));
311        dbg!(as2org.get_siblings(13335));
312        dbg!(as2org.get_siblings(61786));
313    }
314}