Skip to main content

bgpkit_parser/parser/
mod.rs

1/*!
2parser module maintains the main logic for processing BGP and MRT messages.
3*/
4use std::io::Read;
5
6#[macro_use]
7pub mod utils;
8pub mod bgp;
9pub mod bmp;
10pub mod filter;
11pub mod iters;
12pub mod mrt;
13pub mod rpki;
14
15#[cfg(feature = "rislive")]
16pub mod rislive;
17
18pub(crate) use self::utils::*;
19
20use crate::models::MrtRecord;
21pub use mrt::mrt_elem::{BgpUpdateElemIter, ElemError, Elementor, RecordElemIter};
22#[cfg(feature = "oneio")]
23use oneio::{get_cache_reader, get_reader};
24
25pub use crate::error::{ParserError, ParserErrorWithBytes};
26pub use bmp::{parse_bmp_msg, parse_openbmp_header, parse_openbmp_msg};
27pub use filter::*;
28pub use iters::*;
29pub use mrt::*;
30
31#[cfg(feature = "rislive")]
32pub use rislive::parse_ris_live_message;
33
34pub struct BgpkitParser<R> {
35    reader: R,
36    core_dump: bool,
37    filters: Vec<Filter>,
38    options: ParserOptions,
39}
40
41pub(crate) struct ParserOptions {
42    show_warnings: bool,
43}
44impl Default for ParserOptions {
45    fn default() -> Self {
46        ParserOptions {
47            show_warnings: true,
48        }
49    }
50}
51
52#[cfg(feature = "oneio")]
53impl BgpkitParser<Box<dyn Read + Send>> {
54    /// Creating a new parser from a object that implements [Read] trait.
55    pub fn new(path: &str) -> Result<Self, ParserErrorWithBytes> {
56        let reader = get_reader(path)?;
57        Ok(BgpkitParser {
58            reader,
59            core_dump: false,
60            filters: vec![],
61            options: ParserOptions::default(),
62        })
63    }
64
65    /// Creating a new parser that also caches the remote content to a local cache directory.
66    ///
67    /// The cache file name is generated by the following format: `cache-<crc32 of file name>-<file name>`.
68    /// For example, the remote file `http://archive.routeviews.org/route-views.chile/bgpdata/2023.03/RIBS/rib.20230326.0600.bz2`
69    /// will be cached as `cache-682cb1eb-rib.20230326.0600.bz2` in the cache directory.
70    pub fn new_cached(path: &str, cache_dir: &str) -> Result<Self, ParserErrorWithBytes> {
71        let file_name = path.rsplit('/').next().unwrap().to_string();
72        let new_file_name = format!(
73            "cache-{}",
74            add_suffix_to_filename(file_name.as_str(), crc32(path).as_str())
75        );
76        let reader = get_cache_reader(path, cache_dir, Some(new_file_name), false)?;
77        Ok(BgpkitParser {
78            reader,
79            core_dump: false,
80            filters: vec![],
81            options: ParserOptions::default(),
82        })
83    }
84}
85
86#[cfg(feature = "oneio")]
87fn add_suffix_to_filename(filename: &str, suffix: &str) -> String {
88    let mut parts: Vec<&str> = filename.split('.').collect(); // Split filename by dots
89    if parts.len() > 1 {
90        let last_part = parts.pop().unwrap(); // Remove the last part (suffix) from the parts vector
91        let new_last_part = format!("{suffix}.{last_part}"); // Add the suffix to the last part
92        parts.push(&new_last_part); // Add the updated last part back to the parts vector
93        parts.join(".") // Join the parts back into a filename string with dots
94    } else {
95        // If the filename does not have any dots, simply append the suffix to the end
96        format!("{filename}.{suffix}")
97    }
98}
99
100impl<R: Read> BgpkitParser<R> {
101    /// Creating a new parser from an object that implements [Read] trait.
102    pub fn from_reader(reader: R) -> Self {
103        BgpkitParser {
104            reader,
105            core_dump: false,
106            filters: vec![],
107            options: ParserOptions::default(),
108        }
109    }
110
111    /// This is used in for loop `for item in parser{}`
112    pub fn next_record(&mut self) -> Result<MrtRecord, ParserErrorWithBytes> {
113        parse_mrt_record(&mut self.reader)
114    }
115}
116
117impl<R> BgpkitParser<R> {
118    pub fn enable_core_dump(self) -> Self {
119        BgpkitParser {
120            reader: self.reader,
121            core_dump: true,
122            filters: self.filters,
123            options: self.options,
124        }
125    }
126
127    pub fn disable_warnings(self) -> Self {
128        let mut options = self.options;
129        options.show_warnings = false;
130        BgpkitParser {
131            reader: self.reader,
132            core_dump: self.core_dump,
133            filters: self.filters,
134            options,
135        }
136    }
137
138    /// Add a filter to the parser by specifying filter type and value as strings.
139    ///
140    /// This method parses the filter type and value strings to create a [`Filter`] and adds it
141    /// to the parser's filter list. For the full list of available filter types and their
142    /// formats, see the [`Filter`] struct documentation.
143    ///
144    /// # Available Filter Types
145    ///
146    /// - `origin_asn` - Origin AS number (e.g., "12345")
147    /// - `origin_asns` - Multiple origin AS numbers, comma-separated (e.g., "12345,67890")
148    /// - `prefix` - Exact prefix match (e.g., "192.168.1.0/24")
149    /// - `prefix_super` - Match prefix and super-prefixes
150    /// - `prefix_sub` - Match prefix and sub-prefixes
151    /// - `prefix_super_sub` - Match prefix, super-prefixes, and sub-prefixes
152    /// - `prefixes` - Multiple prefixes (e.g., "1.1.1.0/24,8.8.8.0/24")
153    /// - `peer_ip` - Peer IP address (e.g., "192.168.1.1")
154    /// - `peer_ips` - Multiple peer IPs (e.g., "192.168.1.1,192.168.1.2")
155    /// - `peer_asn` - Peer AS number (e.g., "12345")
156    /// - `peer_asns` - Multiple peer AS numbers (e.g., "12345,67890")
157    /// - `type` - Message type: "a"/"announce" or "w"/"withdraw"
158    /// - `ts_start` - Start timestamp (unix timestamp or RFC3339)
159    /// - `ts_end` - End timestamp (unix timestamp or RFC3339)
160    /// - `as_path` - AS path regex pattern
161    /// - `community` - Community regex pattern
162    /// - `ip_version` - IP version: "4"/"ipv4" or "6"/"ipv6"
163    ///
164    /// # Negative Filters
165    ///
166    /// Most filters support negation by prefixing the value with `!`. For example:
167    /// - `origin_asn=!13335` matches elements where origin AS is NOT 13335
168    /// - `prefix=!10.0.0.0/8` matches elements where prefix is NOT 10.0.0.0/8
169    ///
170    /// # Example
171    ///
172    /// ```no_run
173    /// use bgpkit_parser::BgpkitParser;
174    ///
175    /// let parser = BgpkitParser::new("https://spaces.bgpkit.org/parser/update-example.gz")
176    ///     .unwrap()
177    ///     .add_filter("peer_ip", "185.1.8.65")
178    ///     .unwrap()
179    ///     .add_filter("type", "w")
180    ///     .unwrap();
181    ///
182    /// for elem in parser {
183    ///     println!("{}", elem);
184    /// }
185    /// ```
186    pub fn add_filter(
187        self,
188        filter_type: &str,
189        filter_value: &str,
190    ) -> Result<Self, ParserErrorWithBytes> {
191        let mut filters = self.filters;
192        filters.push(Filter::new(filter_type, filter_value)?);
193        Ok(BgpkitParser {
194            reader: self.reader,
195            core_dump: self.core_dump,
196            filters,
197            options: self.options,
198        })
199    }
200
201    /// Add multiple filters to the parser.
202    ///
203    /// This method extends the existing filters with the provided slice of filters.
204    ///
205    /// # Example
206    ///
207    /// ```no_run
208    /// use bgpkit_parser::BgpkitParser;
209    /// use bgpkit_parser::parser::Filter;
210    ///
211    /// let filters = vec![
212    ///     Filter::new("peer_ip", "185.1.8.65").unwrap(),
213    ///     Filter::new("type", "w").unwrap(),
214    /// ];
215    ///
216    /// let parser = BgpkitParser::new("https://spaces.bgpkit.org/parser/update-example.gz")
217    ///     .unwrap()
218    ///     .add_filters(&filters);
219    /// ```
220    pub fn add_filters(mut self, filters: &[Filter]) -> Self {
221        self.filters.extend(filters.iter().cloned());
222        self
223    }
224
225    /// Set filters directly, replacing any existing filters.
226    ///
227    /// This method allows passing a pre-built `Vec<Filter>` directly to the parser,
228    /// bypassing the need to parse filter strings. This is useful when you want to
229    /// build filter specifications independently and reuse them across multiple parsers.
230    ///
231    /// # Example
232    ///
233    /// ```no_run
234    /// use bgpkit_parser::BgpkitParser;
235    /// use bgpkit_parser::parser::Filter;
236    ///
237    /// // Build filters independently
238    /// let filters = vec![
239    ///     Filter::new("peer_ip", "185.1.8.65").unwrap(),
240    ///     Filter::new("type", "w").unwrap(),
241    /// ];
242    ///
243    /// // Apply to multiple parsers (no manual clone needed)
244    /// let parser1 = BgpkitParser::new("https://spaces.bgpkit.org/parser/update-example.gz")
245    ///     .unwrap()
246    ///     .with_filters(&filters);
247    ///
248    /// let parser2 = BgpkitParser::new("https://spaces.bgpkit.org/parser/update-example.gz")
249    ///     .unwrap()
250    ///     .with_filters(&filters);
251    /// ```
252    pub fn with_filters(mut self, filters: &[Filter]) -> Self {
253        self.filters = filters.to_vec();
254        self
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    #[test]
263    fn test_new_with_reader() {
264        // bzip2 reader for a compressed file
265        let reader = oneio::get_reader("http://archive.routeviews.org/route-views.ny/bgpdata/2023.02/UPDATES/updates.20230215.0630.bz2").unwrap();
266        assert_eq!(
267            12683,
268            BgpkitParser::from_reader(reader).into_elem_iter().count()
269        );
270
271        // remote reader for an uncompressed updates file
272        let reader = oneio::get_reader("https://spaces.bgpkit.org/parser/update-example").unwrap();
273        assert_eq!(
274            8160,
275            BgpkitParser::from_reader(reader).into_elem_iter().count()
276        );
277    }
278
279    #[test]
280    fn test_new_cached_with_reader() {
281        let url = "https://spaces.bgpkit.org/parser/update-example.gz";
282        let parser = BgpkitParser::new_cached(url, "/tmp/bgpkit-parser-tests")
283            .unwrap()
284            .enable_core_dump()
285            .disable_warnings();
286        let count = parser.into_elem_iter().count();
287        assert_eq!(8160, count);
288        let parser = BgpkitParser::new_cached(url, "/tmp/bgpkit-parser-tests").unwrap();
289        let count = parser.into_elem_iter().count();
290        assert_eq!(8160, count);
291    }
292
293    #[test]
294    fn test_add_suffix_to_filename() {
295        // Test with a filename that has dots
296        let filename = "example.txt";
297        let suffix = "suffix";
298        let result = add_suffix_to_filename(filename, suffix);
299        assert_eq!(result, "example.suffix.txt");
300
301        // Test with a filename that has multiple dots
302        let filename = "example.tar.gz";
303        let suffix = "suffix";
304        let result = add_suffix_to_filename(filename, suffix);
305        assert_eq!(result, "example.tar.suffix.gz");
306
307        // Test with a filename that has no dots
308        let filename = "example";
309        let suffix = "suffix";
310        let result = add_suffix_to_filename(filename, suffix);
311        assert_eq!(result, "example.suffix");
312
313        // Test with an empty filename
314        let filename = "";
315        let suffix = "suffix";
316        let result = add_suffix_to_filename(filename, suffix);
317        assert_eq!(result, ".suffix");
318
319        // Test with an empty suffix
320        let filename = "example.txt";
321        let suffix = "";
322        let result = add_suffix_to_filename(filename, suffix);
323        assert_eq!(result, "example..txt");
324    }
325
326    #[test]
327    fn test_with_filters() {
328        let url = "https://spaces.bgpkit.org/parser/update-example.gz";
329
330        // Build filters independently
331        let filters = vec![
332            Filter::new("peer_ip", "185.1.8.65").unwrap(),
333            Filter::new("type", "w").unwrap(),
334        ];
335
336        // Test with_filters - sets filters directly
337        let parser = BgpkitParser::new(url).unwrap().with_filters(&filters);
338        let count = parser.into_elem_iter().count();
339
340        // peer 185.1.8.65 has 3393 total, 132 withdrawals
341        assert_eq!(count, 132);
342
343        // Test that with_filters replaces existing filters
344        let filters1 = vec![Filter::new("peer_ip", "185.1.8.65").unwrap()];
345        let filters2 = vec![Filter::new("peer_ip", "185.1.8.50").unwrap()];
346
347        let parser = BgpkitParser::new(url)
348            .unwrap()
349            .with_filters(&filters1)
350            .with_filters(&filters2); // Should replace filters1
351        let count = parser.into_elem_iter().count();
352
353        // peer 185.1.8.50 has 1563 elements
354        assert_eq!(count, 1563);
355    }
356
357    #[test]
358    fn test_add_filters() {
359        let url = "https://spaces.bgpkit.org/parser/update-example.gz";
360
361        // Build filters independently
362        let filters = vec![
363            Filter::new("peer_ip", "185.1.8.65").unwrap(),
364            Filter::new("type", "w").unwrap(),
365        ];
366
367        // Test add_filters - extends existing filters
368        let parser = BgpkitParser::new(url).unwrap().add_filters(&filters);
369        let count = parser.into_elem_iter().count();
370
371        // peer 185.1.8.65 has 3393 total, 132 withdrawals
372        assert_eq!(count, 132);
373
374        // Test combining add_filter and add_filters
375        let parser = BgpkitParser::new(url)
376            .unwrap()
377            .add_filter("peer_ip", "185.1.8.65")
378            .unwrap()
379            .add_filters(&[Filter::new("type", "w").unwrap()]);
380        let count = parser.into_elem_iter().count();
381        assert_eq!(count, 132);
382    }
383
384    #[test]
385    fn test_with_filters_empty() {
386        let url = "https://spaces.bgpkit.org/parser/update-example.gz";
387
388        // Test with empty filters - should return all elements
389        let parser = BgpkitParser::new(url).unwrap().with_filters(&[]);
390        let count = parser.into_elem_iter().count();
391
392        // Total elements in the file
393        assert_eq!(count, 8160);
394    }
395
396    #[test]
397    fn test_add_filters_empty() {
398        let url = "https://spaces.bgpkit.org/parser/update-example.gz";
399
400        // Test adding empty filters - should not change behavior
401        let parser = BgpkitParser::new(url)
402            .unwrap()
403            .add_filter("peer_ip", "185.1.8.65")
404            .unwrap()
405            .add_filters(&[]);
406        let count = parser.into_elem_iter().count();
407
408        // peer 185.1.8.65 has 3393 elements
409        assert_eq!(count, 3393);
410    }
411
412    #[test]
413    fn test_with_filters_reuse() {
414        let url = "https://spaces.bgpkit.org/parser/update-example.gz";
415
416        // Build filters once
417        let filters = vec![
418            Filter::new("peer_ip", "185.1.8.65").unwrap(),
419            Filter::new("type", "w").unwrap(),
420        ];
421
422        // Apply to multiple parsers (simulating reuse pattern - no clone needed)
423        let parser1 = BgpkitParser::new(url).unwrap().with_filters(&filters);
424        let count1 = parser1.into_elem_iter().count();
425
426        let parser2 = BgpkitParser::new(url).unwrap().with_filters(&filters);
427        let count2 = parser2.into_elem_iter().count();
428
429        // Both should have same count: 132 withdrawals from peer 185.1.8.65
430        assert_eq!(count1, 132);
431        assert_eq!(count2, 132);
432    }
433}