1use anyhow::{anyhow, Result};
28use regex::Regex;
29use serde::{Deserialize, Serialize};
30use std::collections::HashMap;
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
49struct As2orgJsonOrg {
50 #[serde(alias = "organizationId")]
51 org_id: String,
52
53 changed: Option<String>,
54
55 #[serde(default)]
56 name: String,
57
58 country: String,
59
60 source: String,
62
63 #[serde(alias = "type")]
64 data_type: String,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
79struct As2orgJsonAs {
80 asn: String,
81
82 changed: Option<String>,
83
84 #[serde(default)]
85 name: String,
86
87 #[serde(alias = "opaqueId")]
88 opaque_id: Option<String>,
89
90 #[serde(alias = "organizationId")]
91 org_id: String,
92
93 source: String,
95
96 #[serde(rename = "type")]
97 data_type: String,
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
101enum As2orgJsonEntry {
102 Org(As2orgJsonOrg),
103 As(As2orgJsonAs),
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct As2orgAsInfo {
108 pub asn: u32,
109 pub name: String,
110 pub country_code: String,
111 pub org_id: String,
112 pub org_name: String,
113 pub source: String,
114}
115
116pub struct As2org {
117 as_map: HashMap<u32, As2orgJsonAs>,
118 org_map: HashMap<String, As2orgJsonOrg>,
119 as_to_org: HashMap<u32, String>,
120 org_to_as: HashMap<String, Vec<u32>>,
121}
122
123impl As2org {
124 pub fn new(data_file_path: Option<String>) -> Result<Self> {
125 let entries = match data_file_path {
126 Some(path) => parse_as2org_file(path.as_str())?,
127 None => {
128 let url = get_most_recent_data()?;
129 parse_as2org_file(url.as_str())?
130 }
131 };
132
133 let mut as_map: HashMap<u32, As2orgJsonAs> = HashMap::new();
134 let mut org_map: HashMap<String, As2orgJsonOrg> = HashMap::new();
135
136 for entry in entries {
137 match entry {
138 As2orgJsonEntry::As(as_entry) => {
139 as_map.insert(as_entry.asn.parse::<u32>().unwrap(), as_entry);
140 }
141 As2orgJsonEntry::Org(org_entry) => {
142 org_map.insert(org_entry.org_id.clone(), org_entry);
143 }
144 }
145 }
146
147 let mut as_to_org: HashMap<u32, String> = HashMap::new();
148 let mut org_to_as: HashMap<String, Vec<u32>> = HashMap::new();
149
150 for (asn, as_entry) in as_map.iter() {
151 as_to_org.insert(*asn, as_entry.org_id.clone());
152 let org_asn = org_to_as.entry(as_entry.org_id.clone()).or_default();
153 org_asn.push(*asn);
154 }
155
156 Ok(Self {
157 as_map,
158 org_map,
159 as_to_org,
160 org_to_as,
161 })
162 }
163
164 pub fn get_as_info(&self, asn: u32) -> Option<As2orgAsInfo> {
165 let as_entry = self.as_map.get(&asn)?;
166 let org_id = as_entry.org_id.as_str();
167 let org_entry = self.org_map.get(org_id)?;
168 Some(As2orgAsInfo {
169 asn,
170 name: as_entry.name.clone(),
171 country_code: org_entry.country.clone(),
172 org_id: org_id.to_string(),
173 org_name: org_entry.name.clone(),
174 source: org_entry.source.clone(),
175 })
176 }
177
178 pub fn get_siblings(&self, asn: u32) -> Option<Vec<As2orgAsInfo>> {
179 let org_id = self.as_to_org.get(&asn)?;
180 let org_asns = self.org_to_as.get(org_id)?.to_vec();
181 Some(
182 org_asns
183 .iter()
184 .map(|asn| self.get_as_info(*asn).unwrap())
185 .collect(),
186 )
187 }
188
189 pub fn are_siblings(&self, asn1: u32, asn2: u32) -> bool {
190 let org1 = match self.as_to_org.get(&asn1) {
191 None => return false,
192 Some(o) => o,
193 };
194 let org2 = match self.as_to_org.get(&asn2) {
195 None => return false,
196 Some(o) => o,
197 };
198 org1 == org2
199 }
200}
201
202fn fix_latin1_misinterpretation(input: &str) -> String {
218 let mut result = String::new();
219 let mut chars = input.chars().peekable();
220
221 while let Some(c) = chars.next() {
222 if c == 'Ã' && chars.peek().is_some() {
224 let next_char = chars.next().unwrap();
225
226 let byte_value = match next_char {
228 '\u{0080}'..='\u{00BF}' => 0xC0 + (next_char as u32 - 0x0080),
229 _ => {
231 result.push(c);
233 result.push(next_char);
234 continue;
235 }
236 };
237
238 if let Some(correct_char) = char::from_u32(byte_value) {
240 result.push(correct_char);
241 } else {
242 result.push(c);
244 result.push(next_char);
245 }
246 } else {
247 result.push(c);
248 }
249 }
250
251 result
252}
253
254fn parse_as2org_file(path: &str) -> Result<Vec<As2orgJsonEntry>> {
256 let mut res: Vec<As2orgJsonEntry> = vec![];
257
258 for line in oneio::read_lines(path)? {
259 let line = fix_latin1_misinterpretation(&line?);
260 if line.contains(r#""type":"ASN""#) {
261 let data = serde_json::from_str::<As2orgJsonAs>(line.as_str());
262 match data {
263 Ok(data) => {
264 res.push(As2orgJsonEntry::As(data));
265 }
266 Err(e) => {
267 eprintln!("error parsing line:\n{}", line.as_str());
268 return Err(anyhow!(e));
269 }
270 }
271 } else {
272 let data = serde_json::from_str::<As2orgJsonOrg>(line.as_str());
273 match data {
274 Ok(data) => {
275 res.push(As2orgJsonEntry::Org(data));
276 }
277 Err(e) => {
278 eprintln!("error parsing line:\n{}", line.as_str());
279 return Err(anyhow!(e));
280 }
281 }
282 }
283 }
284 Ok(res)
285}
286
287fn get_most_recent_data() -> Result<String> {
289 let data_link: Regex = Regex::new(r".*(........\.as-org2info\.jsonl\.gz).*")?;
290 let content = oneio::read_to_string("https://publicdata.caida.org/datasets/as-organizations/")?;
291 let res: Vec<String> = data_link
292 .captures_iter(content.as_str())
293 .map(|cap| cap[1].to_owned())
294 .collect();
295 let file = res.last().unwrap().to_string();
296
297 Ok(format!(
298 "https://publicdata.caida.org/datasets/as-organizations/{file}"
299 ))
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305
306 #[test]
307 fn test_load_entries() {
308 let as2org = As2org::new(None).unwrap();
309 dbg!(as2org.get_as_info(400644));
310 dbg!(as2org.get_siblings(400644));
311 dbg!(as2org.get_siblings(13335));
312 dbg!(as2org.get_siblings(61786));
313 }
314}