route_verification_irr/
lib.rs

1//! Parsing multiple Internet Routing Registries (IRR) databases and
2//! merge them into a single [`Ir`].
3use std::{
4    collections::BTreeMap,
5    fmt::Display,
6    io::{BufRead, BufReader, Read},
7    process::ChildStdout,
8    sync::mpsc::Sender,
9};
10
11use anyhow::{Context, Result};
12use ir::Ir;
13use lazy_regex::regex;
14use lex::*;
15use log::{debug, error, warn};
16use parse::parse_lexed;
17use rayon::prelude::*;
18
19pub mod mbrs;
20#[cfg(test)]
21mod tests;
22pub mod worker;
23
24use mbrs::*;
25use worker::{spawn_aut_num_worker, spawn_filter_set_worker, spawn_peering_set_worker};
26
27/// Gather `members` and `mp-members` expressions.
28/// Translate `mbrs-by-ref` expressions to pseudo sets.
29pub fn gather_members(obj: &RPSLObject) -> Vec<String> {
30    let mut members = Vec::new();
31    for RpslExpr { key, expr } in expressions(lines_continued(obj.body.lines())) {
32        match key.as_str() {
33            "members" | "mp-members" => {
34                members.extend(split_commas(&expr).map(Into::into));
35            }
36            "mbrs-by-ref" => match expr.as_str() {
37                "ANY" => members.push(ref_set(&obj.name)),
38                _ => members
39                    .extend(split_commas(&expr).map(|mntner| mntner_ref_set(mntner, &obj.name))),
40            },
41            _ => (),
42        }
43    }
44    members
45}
46
47pub fn read_line_wait(reader: &mut BufReader<ChildStdout>) -> Result<String> {
48    let mut line = Vec::new();
49    loop {
50        let mut buf = [0];
51        reader.read_exact(&mut buf)?;
52        match buf[0] {
53            b'\n' => break,
54            b => line.push(b),
55        }
56    }
57    Ok(String::from_utf8(line)?)
58}
59
60pub fn parse_object(obj: RPSLObject, pa: &mut PreAst) -> Result<()> {
61    match obj.class.as_str() {
62        "aut-num" => pa.send_aut_num.send(obj).context("sending aut-num")?,
63        "as-set" => parse_as_set(obj, &mut pa.as_sets),
64        "route" | "route6" => parse_route(obj, pa),
65        "route-set" => parse_route_set(obj, &mut pa.route_sets),
66        "filter-set" => pa.send_filter_set.send(obj).context("sending filter-set")?,
67        "peering-set" => pa
68            .send_peering_set
69            .send(obj)
70            .context("sending peering-set")?,
71        _ => (),
72    }
73    Ok(())
74}
75
76fn parse_as_set(obj: RPSLObject, as_sets: &mut Vec<AsOrRouteSet>) {
77    let members = gather_members(&obj);
78    as_sets.push(AsOrRouteSet::new(obj.name, obj.body, members));
79    match as_sets.len() {
80        l if l % 0xFF == 0 => debug!("Parsed {l} as_sets."),
81        _ => (),
82    }
83}
84
85fn parse_route(obj: RPSLObject, pa: &mut PreAst) {
86    gather_ref(&obj, &mut pa.pseudo_route_sets);
87    for RpslExpr {
88        key,
89        expr, /*AS*/
90    } in expressions(lines_continued(obj.body.lines()))
91    {
92        if key == "origin" {
93            pa.as_routes
94                .entry(expr.to_uppercase())
95                .or_default()
96                .push(obj.name /*The route*/);
97            return;
98        }
99    }
100    pa.counts.unknown_lex_err += 1;
101    error!("Route object {} does not have an `origin` field.", obj.name);
102}
103
104fn parse_route_set(obj: RPSLObject, route_sets: &mut Vec<AsOrRouteSet>) {
105    let members = gather_members(&obj);
106    route_sets.push(AsOrRouteSet::new(obj.name, obj.body, members));
107    match route_sets.len() {
108        l if l % 0xFF == 0 => debug!("Parsed {l} route_sets."),
109        _ => (),
110    }
111}
112
113/// Read and lex RPSL database.
114pub fn read_db(db: impl BufRead) -> Result<(Ast, Counts)> {
115    let (as_sets, route_sets, pseudo_route_sets, as_routes) =
116        (Vec::new(), Vec::new(), BTreeMap::new(), BTreeMap::new());
117    let (send_aut_num, aut_num_worker) = spawn_aut_num_worker().context("spawn_aut_num_worker")?;
118    let (send_peering_set, peering_set_worker) =
119        spawn_peering_set_worker().context("spawn_peering_set_worker")?;
120    let (send_filter_set, filter_set_worker) =
121        spawn_filter_set_worker().context("spawn_filter_set_worker")?;
122
123    let mut pa = PreAst {
124        as_sets,
125        route_sets,
126        pseudo_route_sets,
127        send_aut_num,
128        send_peering_set,
129        send_filter_set,
130        as_routes,
131        counts: Default::default(),
132    };
133    let process_output = process_db(db, &mut pa);
134
135    drop((pa.send_aut_num, pa.send_peering_set, pa.send_filter_set));
136    let an_out = aut_num_worker
137        .join()
138        .unwrap()
139        .context("aut_num_worker_output")?;
140    let peering_sets = peering_set_worker
141        .join()
142        .unwrap()
143        .context("peering_set_worker_output")?;
144    let filter_sets = filter_set_worker
145        .join()
146        .unwrap()
147        .context("filter_set_worker_output")?;
148
149    process_output?;
150
151    pa.route_sets.extend(conclude_set(pa.pseudo_route_sets));
152    pa.as_sets.extend(an_out.pseudo_as_sets);
153
154    let counts = pa.counts + an_out.counts;
155    debug!("read_db counts: {counts}.");
156
157    Ok((
158        Ast {
159            aut_nums: an_out.aut_nums,
160            as_sets: pa.as_sets,
161            route_sets: pa.route_sets,
162            peering_sets,
163            filter_sets,
164            as_routes: pa.as_routes,
165        },
166        counts,
167    ))
168}
169
170fn process_db(db: impl BufRead, pa: &mut PreAst) -> Result<()> {
171    for obj in rpsl_objects(io_wrapper_lines(db)) {
172        parse_object(obj, pa)?;
173    }
174
175    Ok(())
176}
177
178pub struct PreAst {
179    pub as_sets: Vec<AsOrRouteSet>,
180    pub route_sets: Vec<AsOrRouteSet>,
181    pub pseudo_route_sets: Map2DStringVec,
182    pub send_aut_num: Sender<RPSLObject>,
183    pub send_peering_set: Sender<RPSLObject>,
184    pub send_filter_set: Sender<RPSLObject>,
185    pub as_routes: BTreeMap<String, Vec<String>>,
186    pub counts: Counts,
187}
188
189/// Read, lex and parse a single DB.
190pub fn parse_db(tag: impl Display, db: impl BufRead) -> Result<(Ir, Counts)> {
191    debug!("Starting to read and lex RPSL in `{tag}`.");
192    let (parsed, l_counts) = read_db(db).with_context(|| format!("reading DB `{tag}`"))?;
193    debug!("Starting to parse lexed `{tag}`.");
194    let (ir, p_counts) = parse_lexed(parsed);
195    let (n_import, n_export) = ir
196        .aut_nums
197        .values()
198        .fold((0, 0), |(i, e), an| (i + an.n_import, e + an.n_export));
199    debug!(
200        "Read `{tag}`: {ir}; {n_import} imports, {n_export} exports. Lexing: {l_counts}. Parsing: {p_counts}.",
201    );
202    Ok((ir, l_counts + p_counts))
203}
204
205/// No guarantee about the priorities of the IRs.
206pub fn merge_ir_and_counts<I>(ir_and_counts: I) -> (Ir, Counts)
207where
208    I: IntoParallelIterator<Item = (Ir, Counts)>,
209{
210    ir_and_counts
211        .into_par_iter()
212        .reduce(Default::default, |(ir_acc, counts_acc), (ir, counts)| {
213            (ir_acc.merge(ir), counts_acc + counts)
214        })
215}
216
217/// Priorities the IRs yielded later.
218/// # Panic
219/// If `ir_and_counts` is empty.
220pub fn merge_ir_and_counts_ordered<I>(ir_and_counts: I) -> (Ir, Counts)
221where
222    I: IntoIterator<Item = (Ir, Counts)>,
223{
224    ir_and_counts
225        .into_iter()
226        .reduce(|(backup, b_counts), (priority, p_counts)| {
227            (backup.merge(priority), p_counts + b_counts)
228        })
229        .expect("ir_and_counts should not be empty")
230}
231
232/// Split by `,`s followed by any number of whitespace.
233/// Ignore empty parts.
234pub fn split_commas(expr: &str) -> impl Iterator<Item = &str> {
235    regex!(r",\s*").split(expr).filter_map(|s| {
236        let r = s.trim();
237        (!r.is_empty()).then_some(r)
238    })
239}
240
241pub type Map2DStringVec = BTreeMap<String, BTreeMap<String, Vec<String>>>;