bgpkit-commons 0.10.3

A library for common BGP-related data and functions.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
//! asinfo is a module for simple Autonomous System (AS) names and country lookup
//!
//! # Data source
//!
//! - RIPE NCC asinfo: <https://ftp.ripe.net/ripe/asnames/asn.txt>
//! - (Optional) CAIDA as-to-organization mapping: <https://www.caida.org/catalog/datasets/as-organizations/>
//! - (Optional) APNIC AS population data: <https://stats.labs.apnic.net/cgi-bin/aspop>
//! - (Optional) IIJ IHR Hegemony data: <https://ihr-archive.iijlab.net/>
//! - (Optional) PeeringDB data: <https://www.peeringdb.com>
//!
//! # Data structure
//!
//! ```rust,no_run
//! use serde::{Deserialize, Serialize};
//! #[derive(Debug, Clone, Serialize, Deserialize)]
//! pub struct AsInfo {
//!     pub asn: u32,
//!     pub name: String,
//!     pub country: String,
//!     pub as2org: Option<As2orgInfo>,
//!     pub population: Option<AsnPopulationData>,
//!     pub hegemony: Option<HegemonyData>,
//! }
//! #[derive(Debug, Clone, Serialize, Deserialize)]
//! pub struct As2orgInfo {
//!     pub name: String,
//!     pub country: String,
//!     pub org_id: String,
//!     pub org_name: String,
//! }
//! #[derive(Debug, Clone, Serialize, Deserialize)]
//! pub struct AsnPopulationData {
//!     pub user_count: i64,
//!     pub percent_country: f64,
//!     pub percent_global: f64,
//!     pub sample_count: i64,
//! }
//! #[derive(Debug, Clone, Serialize, Deserialize)]
//! pub struct HegemonyData {
//!     pub asn: u32,
//!     pub ipv4: f64,
//!     pub ipv6: f64,
//! }
//! #[derive(Debug, Clone, Serialize, Deserialize)]
//! pub struct PeeringdbData {
//!     pub asn: u32,
//!     pub name: Option<String>,
//!     pub name_long: Option<String>,
//!     pub aka: Option<String>,
//!     pub irr_as_set: Option<String>,
//! }
//! ```
//!
//! # Example
//!
//! Call with `BgpkitCommons` instance:
//!
//! ```rust,no_run
//! use bgpkit_commons::BgpkitCommons;
//!
//! let mut bgpkit = BgpkitCommons::new();
//! bgpkit.load_asinfo(false, false, false, false).unwrap();
//! let asinfo = bgpkit.asinfo_get(3333).unwrap().unwrap();
//! assert_eq!(asinfo.name, "RIPE-NCC-AS Reseaux IP Europeens Network Coordination Centre (RIPE NCC)");
//! ```
//!
//! Directly call the module:
//!
//! ```rust,no_run
//! use std::collections::HashMap;
//! use bgpkit_commons::asinfo::{AsInfo, get_asinfo_map};
//!
//! let asinfo: HashMap<u32, AsInfo> = get_asinfo_map(false, false, false, false).unwrap();
//! assert_eq!(asinfo.get(&3333).unwrap().name, "RIPE-NCC-AS Reseaux IP Europeens Network Coordination Centre (RIPE NCC)");
//! assert_eq!(asinfo.get(&400644).unwrap().name, "BGPKIT-LLC");
//! assert_eq!(asinfo.get(&400644).unwrap().country, "US");
//! ```
//!
//! Retrieve all previously generated and cached AS information:
//! ```rust,no_run
//! use std::collections::HashMap;
//! use bgpkit_commons::asinfo::{get_asinfo_map_cached, AsInfo};
//! let asinfo: HashMap<u32, AsInfo> = get_asinfo_map_cached().unwrap();
//! assert_eq!(asinfo.get(&3333).unwrap().name, "RIPE-NCC-AS Reseaux IP Europeens Network Coordination Centre (RIPE NCC)");
//! assert_eq!(asinfo.get(&400644).unwrap().name, "BGPKIT-LLC");
//! assert_eq!(asinfo.get(&400644).unwrap().country, "US");
//! ```
//!
//! Or with `BgpkitCommons` instance:
//! ```rust,no_run
//!
//! use std::collections::HashMap;
//! use bgpkit_commons::asinfo::AsInfo;
//! use bgpkit_commons::BgpkitCommons;
//!
//! let mut commons = BgpkitCommons::new();
//! commons.load_asinfo_cached().unwrap();
//! let asinfo: HashMap<u32, AsInfo> = commons.asinfo_all().unwrap();
//! assert_eq!(asinfo.get(&3333).unwrap().name, "RIPE-NCC-AS Reseaux IP Europeens Network Coordination Centre (RIPE NCC)");
//! assert_eq!(asinfo.get(&400644).unwrap().name, "BGPKIT-LLC");
//! assert_eq!(asinfo.get(&400644).unwrap().country, "US");
//! ```
//!
//! Check if two ASNs are siblings:
//!
//! ```rust,no_run
//! use bgpkit_commons::BgpkitCommons;
//!
//! let mut bgpkit = BgpkitCommons::new();
//! bgpkit.load_asinfo(true, false, false, false).unwrap();
//! let are_siblings = bgpkit.asinfo_are_siblings(3333, 3334).unwrap();
//! ```

mod as2org;
mod hegemony;
mod peeringdb;
mod population;
mod sibling_orgs;

use crate::errors::{data_sources, load_methods, modules};
use crate::{BgpkitCommons, BgpkitCommonsError, LazyLoadable, Result};
use serde::{Deserialize, Serialize};
use sibling_orgs::SiblingOrgsUtils;
use std::collections::HashMap;
use tracing::info;

pub use hegemony::HegemonyData;
pub use peeringdb::PeeringdbData;
pub use population::AsnPopulationData;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AsInfo {
    pub asn: u32,
    pub name: String,
    pub country: String,
    pub as2org: Option<As2orgInfo>,
    pub population: Option<AsnPopulationData>,
    pub hegemony: Option<HegemonyData>,
    pub peeringdb: Option<PeeringdbData>,
}

impl AsInfo {
    /// Returns the preferred name for the AS.
    ///
    /// The order of preference is:
    /// 1. `peeringdb.name` if available
    /// 2. `as2org.org_name` if available and not empty
    /// 3. The default `name` field
    /// ```
    pub fn get_preferred_name(&self) -> String {
        if let Some(peeringdb_data) = &self.peeringdb {
            if let Some(name) = &peeringdb_data.name {
                return name.clone();
            }
        }
        if let Some(as2org_info) = &self.as2org {
            if !as2org_info.org_name.is_empty() {
                return as2org_info.org_name.clone();
            }
        }
        self.name.clone()
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct As2orgInfo {
    pub name: String,
    pub country: String,
    pub org_id: String,
    pub org_name: String,
}

const RIPE_RIS_ASN_TXT_URL: &str = "https://ftp.ripe.net/ripe/asnames/asn.txt";
const BGPKIT_ASN_TXT_MIRROR_URL: &str = "https://data.bgpkit.com/commons/asn.txt";
const BGPKIT_ASNINFO_URL: &str = "https://data.bgpkit.com/commons/asinfo.jsonl";

/// Builder for configuring which data sources to load for AS information.
///
/// # Example
///
/// ```rust,no_run
/// use bgpkit_commons::asinfo::AsInfoBuilder;
///
/// let asinfo = AsInfoBuilder::new()
///     .with_as2org()
///     .with_peeringdb()
///     .build()
///     .unwrap();
/// ```
#[derive(Default)]
pub struct AsInfoBuilder {
    load_as2org: bool,
    load_population: bool,
    load_hegemony: bool,
    load_peeringdb: bool,
}

impl AsInfoBuilder {
    /// Create a new builder with all data sources disabled by default.
    pub fn new() -> Self {
        Self::default()
    }

    /// Enable loading CAIDA AS-to-Organization mapping data.
    pub fn with_as2org(mut self) -> Self {
        self.load_as2org = true;
        self
    }

    /// Enable loading APNIC AS population data.
    pub fn with_population(mut self) -> Self {
        self.load_population = true;
        self
    }

    /// Enable loading IIJ IHR hegemony score data.
    pub fn with_hegemony(mut self) -> Self {
        self.load_hegemony = true;
        self
    }

    /// Enable loading PeeringDB data.
    pub fn with_peeringdb(mut self) -> Self {
        self.load_peeringdb = true;
        self
    }

    /// Enable all optional data sources.
    pub fn with_all(mut self) -> Self {
        self.load_as2org = true;
        self.load_population = true;
        self.load_hegemony = true;
        self.load_peeringdb = true;
        self
    }

    /// Build the AsInfoUtils with the configured data sources.
    pub fn build(self) -> Result<AsInfoUtils> {
        AsInfoUtils::new(
            self.load_as2org,
            self.load_population,
            self.load_hegemony,
            self.load_peeringdb,
        )
    }
}

pub struct AsInfoUtils {
    pub asinfo_map: HashMap<u32, AsInfo>,
    pub sibling_orgs: Option<SiblingOrgsUtils>,
    pub load_as2org: bool,
    pub load_population: bool,
    pub load_hegemony: bool,
    pub load_peeringdb: bool,
}

impl AsInfoUtils {
    pub fn new(
        load_as2org: bool,
        load_population: bool,
        load_hegemony: bool,
        load_peeringdb: bool,
    ) -> Result<Self> {
        let asinfo_map =
            get_asinfo_map(load_as2org, load_population, load_hegemony, load_peeringdb)?;
        let sibling_orgs = if load_as2org {
            Some(SiblingOrgsUtils::new()?)
        } else {
            None
        };
        Ok(AsInfoUtils {
            asinfo_map,
            sibling_orgs,
            load_as2org,
            load_population,
            load_hegemony,
            load_peeringdb,
        })
    }

    pub fn new_from_cached() -> Result<Self> {
        let asinfo_map = get_asinfo_map_cached()?;
        let sibling_orgs = Some(SiblingOrgsUtils::new()?);
        Ok(AsInfoUtils {
            asinfo_map,
            sibling_orgs,
            load_as2org: true,
            load_population: true,
            load_hegemony: true,
            load_peeringdb: true,
        })
    }

    pub fn reload(&mut self) -> Result<()> {
        self.asinfo_map = get_asinfo_map(
            self.load_as2org,
            self.load_population,
            self.load_hegemony,
            self.load_peeringdb,
        )?;
        Ok(())
    }

    pub fn get(&self, asn: u32) -> Option<&AsInfo> {
        self.asinfo_map.get(&asn)
    }
}

impl LazyLoadable for AsInfoUtils {
    fn reload(&mut self) -> Result<()> {
        self.reload()
    }

    fn is_loaded(&self) -> bool {
        !self.asinfo_map.is_empty()
    }

    fn loading_status(&self) -> &'static str {
        if self.is_loaded() {
            "ASInfo data loaded"
        } else {
            "ASInfo data not loaded"
        }
    }
}

pub fn get_asinfo_map_cached() -> Result<HashMap<u32, AsInfo>> {
    info!("loading asinfo from previously generated BGPKIT cache file...");
    let mut asnames_map = HashMap::new();
    for line in oneio::read_lines(BGPKIT_ASNINFO_URL)? {
        let line = line?;
        if line.trim().is_empty() {
            continue;
        }
        let asinfo: AsInfo = serde_json::from_str(&line)?;
        asnames_map.insert(asinfo.asn, asinfo);
    }
    Ok(asnames_map)
}

pub fn get_asinfo_map(
    load_as2org: bool,
    load_population: bool,
    load_hegemony: bool,
    load_peeringdb: bool,
) -> Result<HashMap<u32, AsInfo>> {
    info!("loading asinfo from RIPE NCC...");
    let text = match oneio::read_to_string(BGPKIT_ASN_TXT_MIRROR_URL) {
        Ok(t) => t,
        Err(_) => match oneio::read_to_string(RIPE_RIS_ASN_TXT_URL) {
            Ok(t) => t,
            Err(e) => {
                return Err(BgpkitCommonsError::data_source_error(
                    data_sources::BGPKIT,
                    format!(
                        "error reading asinfo (neither mirror or original works): {}",
                        e
                    ),
                ));
            }
        },
    };

    let as2org_utils = if load_as2org {
        info!("loading as2org data from CAIDA...");
        Some(as2org::As2org::new(None)?)
    } else {
        None
    };
    let population_utils = if load_population {
        info!("loading ASN population data from APNIC...");
        Some(population::AsnPopulation::new()?)
    } else {
        None
    };
    let hegemony_utils = if load_hegemony {
        info!("loading IIJ IHR hegemony score data from BGPKIT mirror...");
        Some(hegemony::Hegemony::new()?)
    } else {
        None
    };
    let peeringdb_utils = if load_peeringdb {
        info!("loading peeringdb data...");
        Some(peeringdb::Peeringdb::new()?)
    } else {
        None
    };

    let asnames = text
        .lines()
        .filter_map(|line| {
            let (asn_str, name_country_str) = match line.split_once(' ') {
                Some((asn, name)) => (asn, name),
                None => return None,
            };
            let (name_str, country_str) = match name_country_str.rsplit_once(", ") {
                Some((name, country)) => (name, country),
                None => return None,
            };
            let asn = asn_str.parse::<u32>().unwrap();
            let as2org = as2org_utils.as_ref().and_then(|as2org_data| {
                as2org_data.get_as_info(asn).map(|info| As2orgInfo {
                    name: info.name.clone(),
                    country: info.country_code.clone(),
                    org_id: info.org_id.clone(),
                    org_name: info.org_name.clone(),
                })
            });
            let population = population_utils.as_ref().and_then(|p| p.get(asn));
            let hegemony = hegemony_utils
                .as_ref()
                .and_then(|h| h.get_score(asn).cloned());
            let peeringdb = peeringdb_utils
                .as_ref()
                .and_then(|h| h.get_data(asn).cloned());
            Some(AsInfo {
                asn,
                name: name_str.to_string(),
                country: country_str.to_string(),
                as2org,
                population,
                hegemony,
                peeringdb,
            })
        })
        .collect::<Vec<AsInfo>>();

    let mut asnames_map = HashMap::new();
    for asname in asnames {
        asnames_map.insert(asname.asn, asname);
    }
    Ok(asnames_map)
}

impl BgpkitCommons {
    /// Returns a HashMap containing all AS information.
    ///
    /// # Returns
    ///
    /// - `Ok(HashMap<u32, AsInfo>)`: A HashMap where the key is the ASN and the value is the corresponding AsInfo.
    /// - `Err`: If the asinfo is not loaded.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use bgpkit_commons::BgpkitCommons;
    ///
    /// let mut bgpkit = BgpkitCommons::new();
    /// bgpkit.load_asinfo(false, false, false, false).unwrap();
    /// let all_asinfo = bgpkit.asinfo_all().unwrap();
    /// ```
    pub fn asinfo_all(&self) -> Result<HashMap<u32, AsInfo>> {
        if self.asinfo.is_none() {
            return Err(BgpkitCommonsError::module_not_loaded(
                modules::ASINFO,
                load_methods::LOAD_ASINFO,
            ));
        }

        Ok(self.asinfo.as_ref().unwrap().asinfo_map.clone())
    }

    /// Retrieves AS information for a specific ASN.
    ///
    /// # Arguments
    ///
    /// * `asn` - The Autonomous System Number to look up.
    ///
    /// # Returns
    ///
    /// - `Ok(Some(AsInfo))`: The AS information if found.
    /// - `Ok(None)`: If the ASN is not found in the database.
    /// - `Err`: If the asinfo is not loaded.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use bgpkit_commons::BgpkitCommons;
    ///
    /// let mut bgpkit = BgpkitCommons::new();
    /// bgpkit.load_asinfo(false, false, false, false).unwrap();
    /// let asinfo = bgpkit.asinfo_get(3333).unwrap();
    /// ```
    pub fn asinfo_get(&self, asn: u32) -> Result<Option<AsInfo>> {
        if self.asinfo.is_none() {
            return Err(BgpkitCommonsError::module_not_loaded(
                modules::ASINFO,
                load_methods::LOAD_ASINFO,
            ));
        }

        Ok(self.asinfo.as_ref().unwrap().get(asn).cloned())
    }

    /// Checks if two ASNs are siblings (belong to the same organization).
    ///
    /// # Arguments
    ///
    /// * `asn1` - The first Autonomous System Number.
    /// * `asn2` - The second Autonomous System Number.
    ///
    /// # Returns
    ///
    /// - `Ok(bool)`: True if the ASNs are siblings, false otherwise.
    /// - `Err`: If the asinfo is not loaded or not loaded with as2org data.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use bgpkit_commons::BgpkitCommons;
    ///
    /// let mut bgpkit = BgpkitCommons::new();
    /// bgpkit.load_asinfo(true, false, false, false).unwrap();
    /// let are_siblings = bgpkit.asinfo_are_siblings(3333, 3334).unwrap();
    /// ```
    ///
    /// # Note
    ///
    /// This function requires the asinfo to be loaded with as2org data.
    pub fn asinfo_are_siblings(&self, asn1: u32, asn2: u32) -> Result<bool> {
        if self.asinfo.is_none() {
            return Err(BgpkitCommonsError::module_not_loaded(
                modules::ASINFO,
                load_methods::LOAD_ASINFO,
            ));
        }
        if !self.asinfo.as_ref().unwrap().load_as2org {
            return Err(BgpkitCommonsError::module_not_configured(
                modules::ASINFO,
                "as2org data",
                "load_asinfo() with as2org=true",
            ));
        }

        let info_1_opt = self.asinfo_get(asn1)?;
        let info_2_opt = self.asinfo_get(asn2)?;

        if let (Some(info1), Some(info2)) = (info_1_opt, info_2_opt) {
            if let (Some(org1), Some(org2)) = (info1.as2org, info2.as2org) {
                let org_id_1 = org1.org_id;
                let org_id_2 = org2.org_id;

                return Ok(org_id_1 == org_id_2
                    || self
                        .asinfo
                        .as_ref()
                        .and_then(|a| a.sibling_orgs.as_ref())
                        .map(|s| s.are_sibling_orgs(org_id_1.as_str(), org_id_2.as_str()))
                        .unwrap_or(false));
            }
        }
        Ok(false)
    }
}