1#![allow(clippy::nonminimal_bool)]
2
3mod api;
5mod roas_trie;
6
7use anyhow::{anyhow, Result};
10use chrono::{Datelike, NaiveDate};
11use ipnet::IpNet;
12use rayon::prelude::*;
13use regex::Regex;
14use std::collections::{HashMap, HashSet};
15use std::str::FromStr;
16use tracing::{debug, info, warn};
17
18pub use api::*;
19pub use roas_trie::*;
20
21#[derive(Debug, Clone, Hash, Eq, PartialEq)]
22pub struct RoaEntry {
23 tal: String,
24 prefix: IpNet,
25 max_len: i32,
26 asn: u32,
27 date: NaiveDate,
28}
29
30#[derive(Debug)]
31pub struct RoaFile {
32 pub url: String,
33 pub tal: String,
34 pub file_date: NaiveDate,
35 pub rows_count: i32,
36 pub processed: bool,
37}
38
39fn __crawl_years(tal_url: &str) -> Vec<String> {
40 let year_pattern: Regex = Regex::new(r#"<a href=".*">\s*(\d\d\d\d)/</a>.*"#).unwrap();
41
42 let body = match oneio::read_to_string(tal_url) {
44 Ok(b) => b,
45 Err(e) => {
46 warn!("failed to fetch years listing {}: {}", tal_url, e);
47 return Vec::new();
48 }
49 };
50 let years: Vec<String> = year_pattern
51 .captures_iter(body.as_str())
52 .map(|cap| cap[1].to_owned())
53 .collect();
54
55 years
56}
57
58fn __crawl_months_days(months_days_url: &str) -> Vec<String> {
59 let month_day_pattern: Regex = Regex::new(r#"<a href=".*">\s*(\d\d)/</a>.*"#).unwrap();
60
61 let body = match oneio::read_to_string(months_days_url) {
62 Ok(b) => b,
63 Err(e) => {
64 warn!(
65 "failed to fetch months/days listing {}: {}",
66 months_days_url, e
67 );
68 return Vec::new();
69 }
70 };
71 let months_days: Vec<String> = month_day_pattern
72 .captures_iter(body.as_str())
73 .map(|cap| cap[1].to_owned())
74 .collect();
75
76 months_days
77}
78
79fn check_date(
80 date: NaiveDate,
81 from: Option<NaiveDate>,
82 until: Option<NaiveDate>,
83 check_month: bool,
84 check_day: bool,
85) -> bool {
86 let from_match = match from {
87 Some(from_date) => {
88 date.year() >= from_date.year()
89 && (check_month && date.month() >= from_date.month() || !check_month)
90 && (check_day && date >= from_date || !check_day)
91 }
92 None => true,
93 };
94 let until_match = match until {
95 Some(until_date) => {
96 date.year() <= until_date.year()
97 && (check_month && date.month() <= until_date.month() || !check_month)
98 && (check_day && date <= until_date || !check_day)
99 }
100 None => true,
101 };
102
103 from_match && until_match
104}
105
106pub fn crawl_tal_after(
111 tal_url: &str,
112 from: Option<NaiveDate>,
113 until: Option<NaiveDate>,
114) -> Vec<RoaFile> {
115 let fields: Vec<&str> = tal_url.split('/').collect();
116 let tal = fields[4].split('.').collect::<Vec<&str>>()[0].to_owned();
117
118 let years: Vec<i32> = __crawl_years(tal_url)
120 .into_iter()
121 .map(|y| y.parse::<i32>().unwrap())
122 .filter(|y| {
123 let date = NaiveDate::from_ymd_opt(*y, 1, 1).unwrap();
124 check_date(date, from, until, false, false)
125 })
126 .collect();
127
128 years
129 .par_iter()
130 .map(|year| {
131 info!("scanning roas.csv.xz files for {}/{} ...", &tal_url, &year);
132 let year_url = format!("{}/{}", tal_url, year);
133
134 let months: Vec<u32> = __crawl_months_days(year_url.as_str())
135 .into_iter()
136 .map(|m| m.parse::<u32>().unwrap())
137 .filter(|m| {
138 let date = NaiveDate::from_ymd_opt(*year, *m, 1).unwrap();
139 check_date(date, from, until, true, false)
140 })
141 .collect();
142
143 months
144 .par_iter()
145 .map(|month| {
146 debug!("scraping data for {}/{:02} ...", &year_url, &month);
147 let month_url = format!("{}/{:02}", year_url, month);
148
149 let days: Vec<u32> = __crawl_months_days(month_url.as_str())
150 .into_iter()
151 .map(|d| d.parse::<u32>().unwrap())
152 .filter(|d| {
153 let date = NaiveDate::from_ymd_opt(*year, *month, *d).unwrap();
154 check_date(date, from, until, true, true)
155 })
156 .collect();
157
158 days.into_iter()
159 .map(|day| {
160 let url = format!("{}/{:02}/roas.csv.xz", month_url, day);
161 let file_date = NaiveDate::from_ymd_opt(*year, *month, day).unwrap();
162 RoaFile {
163 tal: tal.clone(),
164 url,
165 file_date,
166 rows_count: 0,
167 processed: false,
168 }
169 })
170 .collect::<Vec<RoaFile>>()
171 })
172 .flat_map(|x| x)
173 .collect::<Vec<RoaFile>>()
174 })
175 .flat_map(|x| x)
176 .collect::<Vec<RoaFile>>()
177}
178
179pub fn parse_roas_csv(csv_url: &str) -> Result<Vec<RoaEntry>> {
181 let fields: Vec<&str> = csv_url.split('/').collect();
183
184 let tal = fields[4].split('.').collect::<Vec<&str>>()[0].to_owned();
185 let year = fields[5].parse::<i32>()?;
186 let month = fields[6].parse::<u32>()?;
187 let day = fields[7].parse::<u32>()?;
188 let date = NaiveDate::from_ymd_opt(year, month, day).unwrap();
189
190 let mut roas = HashSet::new();
191
192 let mut file_ok = false;
193
194 for line in oneio::read_lines(csv_url)? {
195 let line = line.unwrap();
196
197 if line.starts_with("URI") {
198 file_ok = true;
199 continue;
200 }
201
202 if !file_ok {
203 return Err(anyhow!("file format incorrect!"));
204 }
205
206 let fields = line.split(',').collect::<Vec<&str>>();
207 let asn = fields[1].trim_start_matches("AS").parse::<u32>().unwrap();
208 let prefix = IpNet::from_str(fields[2].to_owned().as_str()).unwrap();
209 let max_len = match fields[3].to_owned().parse::<i32>() {
210 Ok(l) => l,
211 Err(_e) => prefix.prefix_len() as i32,
212 };
213
214 let entry = RoaEntry {
215 prefix,
216 asn,
217 max_len,
218 tal: tal.to_owned(),
219 date,
220 };
221
222 roas.insert(entry);
223 }
224
225 Ok(roas.into_iter().collect::<Vec<RoaEntry>>())
226}
227
228pub fn get_tal_urls(tal: Option<String>) -> Vec<String> {
229 let tal_map = HashMap::from([
230 ("afrinic", "https://ftp.ripe.net/rpki/afrinic.tal"),
231 ("lacnic", "https://ftp.ripe.net/rpki/lacnic.tal"),
232 ("apnic", "https://ftp.ripe.net/rpki/apnic.tal"),
233 ("ripencc", "https://ftp.ripe.net/rpki/ripencc.tal"),
234 ("arin", "https://ftp.ripe.net/rpki/arin.tal"),
235 ]);
236
237 match tal {
238 None => tal_map.values().map(|url| url.to_string()).collect(),
239 Some(tal) => {
240 let url = tal_map
241 .get(tal.as_str())
242 .expect(r#"can only be one of the following "ripencc"|"afrinic"|"apnic"|"arin"|"lacnic""#)
243 .to_string();
244 vec![url]
245 }
246 }
247}
248
249#[cfg(test)]
250mod tests {
251 use super::*;
252
253 #[test]
254 fn test_parse() {
255 let roas =
256 parse_roas_csv("https://ftp.ripe.net/rpki/ripencc.tal/2022/01/15/roas.csv.xz").unwrap();
257 for roa in roas.iter().take(10) {
258 println!("{} {} {}", roa.asn, roa.prefix, roa.max_len);
259 }
260 }
261
262 #[test]
263 fn test_crawl_after() {
264 let after_date = NaiveDate::from_ymd_opt(2023, 3, 31).unwrap();
265 let roa_files = crawl_tal_after(
266 "https://ftp.ripe.net/rpki/ripencc.tal",
267 Some(after_date),
268 None,
269 );
270 assert!(!roa_files.is_empty());
271 assert_eq!(roa_files[0].file_date, after_date);
272 }
273
274 #[test]
275 fn test_crawl_after_bootstrap() {
276 let roa_files = crawl_tal_after("https://ftp.ripe.net/rpki/ripencc.tal", None, None);
277 assert!(!roa_files.is_empty());
278 assert_eq!(
279 roa_files[0].file_date,
280 NaiveDate::from_ymd_opt(2011, 1, 21).unwrap()
281 );
282 }
283
284 #[test]
285 fn test_missing_prefix() {
286 let roas =
287 parse_roas_csv("https://ftp.ripe.net/rpki/ripencc.tal/2024/06/02/roas.csv.xz").unwrap();
288 for entry in roas {
289 if entry.prefix.to_string().as_str() == "193.0.14.0/24" {
290 dbg!(entry);
291 }
292 }
293 }
294}