wayback_rpki/
lib.rs

1#![allow(clippy::nonminimal_bool)]
2
3// pub mod roas_table;
4mod api;
5mod roas_trie;
6
7// pub use crate::roas_table::*;
8
9use anyhow::{anyhow, Result};
10use chrono::{Datelike, NaiveDate};
11use ipnet::IpNet;
12use rayon::prelude::*;
13use regex::Regex;
14use std::collections::{HashMap, HashSet};
15use std::str::FromStr;
16use tracing::{debug, info, warn};
17
18pub use api::*;
19pub use roas_trie::*;
20
21#[derive(Debug, Clone, Hash, Eq, PartialEq)]
22pub struct RoaEntry {
23    tal: String,
24    prefix: IpNet,
25    max_len: i32,
26    asn: u32,
27    date: NaiveDate,
28}
29
30#[derive(Debug)]
31pub struct RoaFile {
32    pub url: String,
33    pub tal: String,
34    pub file_date: NaiveDate,
35    pub rows_count: i32,
36    pub processed: bool,
37}
38
39fn __crawl_years(tal_url: &str) -> Vec<String> {
40    let year_pattern: Regex = Regex::new(r#"<a href=".*">\s*(\d\d\d\d)/</a>.*"#).unwrap();
41
42    // get all years
43    let body = match oneio::read_to_string(tal_url) {
44        Ok(b) => b,
45        Err(e) => {
46            warn!("failed to fetch years listing {}: {}", tal_url, e);
47            return Vec::new();
48        }
49    };
50    let years: Vec<String> = year_pattern
51        .captures_iter(body.as_str())
52        .map(|cap| cap[1].to_owned())
53        .collect();
54
55    years
56}
57
58fn __crawl_months_days(months_days_url: &str) -> Vec<String> {
59    let month_day_pattern: Regex = Regex::new(r#"<a href=".*">\s*(\d\d)/</a>.*"#).unwrap();
60
61    let body = match oneio::read_to_string(months_days_url) {
62        Ok(b) => b,
63        Err(e) => {
64            warn!(
65                "failed to fetch months/days listing {}: {}",
66                months_days_url, e
67            );
68            return Vec::new();
69        }
70    };
71    let months_days: Vec<String> = month_day_pattern
72        .captures_iter(body.as_str())
73        .map(|cap| cap[1].to_owned())
74        .collect();
75
76    months_days
77}
78
79fn check_date(
80    date: NaiveDate,
81    from: Option<NaiveDate>,
82    until: Option<NaiveDate>,
83    check_month: bool,
84    check_day: bool,
85) -> bool {
86    let from_match = match from {
87        Some(from_date) => {
88            date.year() >= from_date.year()
89                && (check_month && date.month() >= from_date.month() || !check_month)
90                && (check_day && date >= from_date || !check_day)
91        }
92        None => true,
93    };
94    let until_match = match until {
95        Some(until_date) => {
96            date.year() <= until_date.year()
97                && (check_month && date.month() <= until_date.month() || !check_month)
98                && (check_day && date <= until_date || !check_day)
99        }
100        None => true,
101    };
102
103    from_match && until_match
104}
105
106/// Crawl and return all RIPE ROA file metadata after a given date
107///
108/// The ROA files URLs has the following format:
109/// https://ftp.ripe.net/ripe/rpki/ripencc.tal/2022/08/28/roas.csv.xz
110pub fn crawl_tal_after(
111    tal_url: &str,
112    from: Option<NaiveDate>,
113    until: Option<NaiveDate>,
114) -> Vec<RoaFile> {
115    let fields: Vec<&str> = tal_url.split('/').collect();
116    let tal = fields[4].split('.').collect::<Vec<&str>>()[0].to_owned();
117
118    // get all years
119    let years: Vec<i32> = __crawl_years(tal_url)
120        .into_iter()
121        .map(|y| y.parse::<i32>().unwrap())
122        .filter(|y| {
123            let date = NaiveDate::from_ymd_opt(*y, 1, 1).unwrap();
124            check_date(date, from, until, false, false)
125        })
126        .collect();
127
128    years
129        .par_iter()
130        .map(|year| {
131            info!("scanning roas.csv.xz files for {}/{} ...", &tal_url, &year);
132            let year_url = format!("{}/{}", tal_url, year);
133
134            let months: Vec<u32> = __crawl_months_days(year_url.as_str())
135                .into_iter()
136                .map(|m| m.parse::<u32>().unwrap())
137                .filter(|m| {
138                    let date = NaiveDate::from_ymd_opt(*year, *m, 1).unwrap();
139                    check_date(date, from, until, true, false)
140                })
141                .collect();
142
143            months
144                .par_iter()
145                .map(|month| {
146                    debug!("scraping data for {}/{:02} ...", &year_url, &month);
147                    let month_url = format!("{}/{:02}", year_url, month);
148
149                    let days: Vec<u32> = __crawl_months_days(month_url.as_str())
150                        .into_iter()
151                        .map(|d| d.parse::<u32>().unwrap())
152                        .filter(|d| {
153                            let date = NaiveDate::from_ymd_opt(*year, *month, *d).unwrap();
154                            check_date(date, from, until, true, true)
155                        })
156                        .collect();
157
158                    days.into_iter()
159                        .map(|day| {
160                            let url = format!("{}/{:02}/roas.csv.xz", month_url, day);
161                            let file_date = NaiveDate::from_ymd_opt(*year, *month, day).unwrap();
162                            RoaFile {
163                                tal: tal.clone(),
164                                url,
165                                file_date,
166                                rows_count: 0,
167                                processed: false,
168                            }
169                        })
170                        .collect::<Vec<RoaFile>>()
171                })
172                .flat_map(|x| x)
173                .collect::<Vec<RoaFile>>()
174        })
175        .flat_map(|x| x)
176        .collect::<Vec<RoaFile>>()
177}
178
179/// Parse a RIPE ROA CSV file and return a set of ROA entries.
180pub fn parse_roas_csv(csv_url: &str) -> Result<Vec<RoaEntry>> {
181    // parse csv url for auxiliary fields
182    let fields: Vec<&str> = csv_url.split('/').collect();
183
184    let tal = fields[4].split('.').collect::<Vec<&str>>()[0].to_owned();
185    let year = fields[5].parse::<i32>()?;
186    let month = fields[6].parse::<u32>()?;
187    let day = fields[7].parse::<u32>()?;
188    let date = NaiveDate::from_ymd_opt(year, month, day).unwrap();
189
190    let mut roas = HashSet::new();
191
192    let mut file_ok = false;
193
194    for line in oneio::read_lines(csv_url)? {
195        let line = line.unwrap();
196
197        if line.starts_with("URI") {
198            file_ok = true;
199            continue;
200        }
201
202        if !file_ok {
203            return Err(anyhow!("file format incorrect!"));
204        }
205
206        let fields = line.split(',').collect::<Vec<&str>>();
207        let asn = fields[1].trim_start_matches("AS").parse::<u32>().unwrap();
208        let prefix = IpNet::from_str(fields[2].to_owned().as_str()).unwrap();
209        let max_len = match fields[3].to_owned().parse::<i32>() {
210            Ok(l) => l,
211            Err(_e) => prefix.prefix_len() as i32,
212        };
213
214        let entry = RoaEntry {
215            prefix,
216            asn,
217            max_len,
218            tal: tal.to_owned(),
219            date,
220        };
221
222        roas.insert(entry);
223    }
224
225    Ok(roas.into_iter().collect::<Vec<RoaEntry>>())
226}
227
228pub fn get_tal_urls(tal: Option<String>) -> Vec<String> {
229    let tal_map = HashMap::from([
230        ("afrinic", "https://ftp.ripe.net/rpki/afrinic.tal"),
231        ("lacnic", "https://ftp.ripe.net/rpki/lacnic.tal"),
232        ("apnic", "https://ftp.ripe.net/rpki/apnic.tal"),
233        ("ripencc", "https://ftp.ripe.net/rpki/ripencc.tal"),
234        ("arin", "https://ftp.ripe.net/rpki/arin.tal"),
235    ]);
236
237    match tal {
238        None => tal_map.values().map(|url| url.to_string()).collect(),
239        Some(tal) => {
240            let url = tal_map
241                .get(tal.as_str())
242                .expect(r#"can only be one of the following "ripencc"|"afrinic"|"apnic"|"arin"|"lacnic""#)
243                .to_string();
244            vec![url]
245        }
246    }
247}
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252
253    #[test]
254    fn test_parse() {
255        let roas =
256            parse_roas_csv("https://ftp.ripe.net/rpki/ripencc.tal/2022/01/15/roas.csv.xz").unwrap();
257        for roa in roas.iter().take(10) {
258            println!("{} {} {}", roa.asn, roa.prefix, roa.max_len);
259        }
260    }
261
262    #[test]
263    fn test_crawl_after() {
264        let after_date = NaiveDate::from_ymd_opt(2023, 3, 31).unwrap();
265        let roa_files = crawl_tal_after(
266            "https://ftp.ripe.net/rpki/ripencc.tal",
267            Some(after_date),
268            None,
269        );
270        assert!(!roa_files.is_empty());
271        assert_eq!(roa_files[0].file_date, after_date);
272    }
273
274    #[test]
275    fn test_crawl_after_bootstrap() {
276        let roa_files = crawl_tal_after("https://ftp.ripe.net/rpki/ripencc.tal", None, None);
277        assert!(!roa_files.is_empty());
278        assert_eq!(
279            roa_files[0].file_date,
280            NaiveDate::from_ymd_opt(2011, 1, 21).unwrap()
281        );
282    }
283
284    #[test]
285    fn test_missing_prefix() {
286        let roas =
287            parse_roas_csv("https://ftp.ripe.net/rpki/ripencc.tal/2024/06/02/roas.csv.xz").unwrap();
288        for entry in roas {
289            if entry.prefix.to_string().as_str() == "193.0.14.0/24" {
290                dbg!(entry);
291            }
292        }
293    }
294}