asn_db2/
lib.rs

1//! Rust Library for parsing and indexing [ASN-DB](https://iptoasn.com/) files.
2//! For the single ip type files see [Ipv4Database] and [Ipv6Database].
3//! If you want to parse the combined file, you want to use [Database].
4use ipnet::{IpNet, Ipv4Net, Ipv4Subnets, Ipv6Net, Ipv6Subnets};
5use serde::{
6    de::{self, DeserializeOwned, Visitor},
7    Deserialize, Deserializer,
8};
9use std::{
10    borrow::Borrow,
11    fmt::Debug,
12    io::Read,
13    marker::PhantomData,
14    net::{IpAddr, Ipv4Addr, Ipv6Addr},
15};
16use thiserror::Error;
17
18/// This is an internal type, which gets deserialized from the tsv data.
19/// Please don't try to use it, thx :)
20#[doc(hidden)]
21#[derive(Debug)]
22pub struct Record<A> {
23    start: A,
24    end: A,
25    as_number: u32,
26    country: String,
27    owner: String,
28}
29
30impl From<Record<Ipv4Addr>> for Record<IpAddr> {
31    fn from(value: Record<Ipv4Addr>) -> Self {
32        Record {
33            start: value.start.into(),
34            end: value.end.into(),
35            as_number: value.as_number,
36            country: value.country,
37            owner: value.owner,
38        }
39    }
40}
41
42impl From<Record<Ipv6Addr>> for Record<IpAddr> {
43    fn from(value: Record<Ipv6Addr>) -> Self {
44        Record {
45            start: value.start.into(),
46            end: value.end.into(),
47            as_number: value.as_number,
48            country: value.country,
49            owner: value.owner,
50        }
51    }
52}
53
54/// An ipv4 entry in the database.
55pub type Ipv4Entry = Entry<Ipv4Net>;
56/// An ipv6 entry in the database.
57pub type Ipv6Entry = Entry<Ipv6Net>;
58
59/// An entry in the database.
60#[derive(Debug, PartialEq, Eq, Clone)]
61pub struct Entry<T> {
62    pub subnet: T,
63    pub as_number: u32,
64    pub country: String,
65    pub owner: String,
66}
67
68impl From<Entry<Ipv4Net>> for Entry<IpNet> {
69    fn from(value: Entry<Ipv4Net>) -> Self {
70        Entry {
71            subnet: value.subnet.into(),
72            as_number: value.as_number,
73            country: value.country,
74            owner: value.owner,
75        }
76    }
77}
78
79impl From<Entry<Ipv6Net>> for Entry<IpNet> {
80    fn from(value: Entry<Ipv6Net>) -> Self {
81        Entry {
82            subnet: value.subnet.into(),
83            as_number: value.as_number,
84            country: value.country,
85            owner: value.owner,
86        }
87    }
88}
89
90impl<T: PartialOrd> PartialOrd for Entry<T> {
91    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
92        self.subnet.partial_cmp(&other.subnet)
93    }
94}
95
96impl<T: Ord> Ord for Entry<T> {
97    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
98        self.subnet.cmp(&other.subnet)
99    }
100}
101
102impl<'de, A: Deserialize<'de>> Deserialize<'de> for Record<A> {
103    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
104    where
105        D: Deserializer<'de>,
106    {
107        deserializer.deserialize_seq(RecordVisitor {
108            _phantom: PhantomData::default(),
109        })
110    }
111}
112
113struct RecordVisitor<A> {
114    _phantom: PhantomData<A>,
115}
116
117impl<'de, A: Deserialize<'de>> Visitor<'de> for RecordVisitor<A> {
118    type Value = Record<A>;
119
120    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
121        formatter.write_str("a valid ipv4 entry")
122    }
123
124    fn visit_seq<S>(self, mut seq: S) -> std::result::Result<Self::Value, S::Error>
125    where
126        S: serde::de::SeqAccess<'de>,
127    {
128        let start: A = seq
129            .next_element()?
130            .ok_or_else(|| de::Error::invalid_length(0, &self))?;
131        let end: A = seq
132            .next_element()?
133            .ok_or_else(|| de::Error::invalid_length(1, &self))?;
134        let as_number = seq
135            .next_element()?
136            .ok_or_else(|| de::Error::invalid_length(2, &self))?;
137        let country = seq
138            .next_element()?
139            .ok_or_else(|| de::Error::invalid_length(3, &self))?;
140        let owner = seq
141            .next_element()?
142            .ok_or_else(|| de::Error::invalid_length(4, &self))?;
143        Ok(Record {
144            start,
145            end,
146            as_number,
147            country,
148            owner,
149        })
150    }
151}
152
153/// An error which can occur when using this crate.
154#[derive(Debug, Error)]
155pub enum Error {
156    #[error(transparent)]
157    Csv(#[from] csv::Error),
158}
159
160/// A custom [Result] type which can fail with an [crate::Error].
161pub type Result<T> = std::result::Result<T, crate::Error>;
162
163/// Database for parsing the `ip2asn-v4.tsv` file.
164pub type Ipv4Database = IpDatabase<Ipv4Addr>;
165/// Database for parsing the `ip2asn-v6.tsv` file.
166pub type Ipv6Database = IpDatabase<Ipv6Addr>;
167
168/// IP -> ASN Database for a single IP version.
169/// See [Ipv4Database] and [Ipv6Database].
170pub struct IpDatabase<A: MapSubnets> {
171    entries: Vec<Entry<A::Net>>,
172}
173
174impl<A: MapSubnets> Debug for IpDatabase<A> {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        write!(
177            f,
178            "IpDatabase<{}>{{ {} entries }}",
179            std::any::type_name::<A>(),
180            self.entries.len()
181        )
182    }
183}
184
185/// This is used internally but must be public.
186/// Please don't try to use it, thx :)
187#[doc(hidden)]
188pub trait MapSubnets: Sized {
189    type Net: Ord + Debug + Copy;
190
191    fn map(record: Record<Self>) -> Box<dyn std::iter::Iterator<Item = Entry<Self::Net>>>;
192
193    fn contains(net: &Self::Net, address: &Self) -> bool;
194}
195
196impl MapSubnets for Ipv4Addr {
197    type Net = Ipv4Net;
198
199    fn map(record: Record<Self>) -> Box<dyn std::iter::Iterator<Item = Entry<Self::Net>>> {
200        let iter = Ipv4Subnets::new(record.start, record.end, 8).map(move |subnet| Ipv4Entry {
201            subnet,
202            as_number: record.as_number,
203            country: record.country.clone(),
204            owner: record.owner.clone(),
205        });
206        Box::new(iter)
207    }
208
209    fn contains(net: &Self::Net, address: &Self) -> bool {
210        net.contains(address)
211    }
212}
213
214impl MapSubnets for Ipv6Addr {
215    type Net = Ipv6Net;
216
217    fn map(record: Record<Self>) -> Box<dyn std::iter::Iterator<Item = Entry<Self::Net>>> {
218        let iter = Ipv6Subnets::new(record.start, record.end, 8).map(move |subnet| Ipv6Entry {
219            subnet,
220            as_number: record.as_number,
221            country: record.country.clone(),
222            owner: record.owner.clone(),
223        });
224        Box::new(iter)
225    }
226
227    fn contains(net: &Self::Net, address: &Self) -> bool {
228        net.contains(address)
229    }
230}
231
232macro_rules! read_entries {
233    ($reader:expr, $type:tt) => {
234        $reader
235            .deserialize::<Record<$type>>()
236            .filter(|value| match value {
237                Ok(record) => record.owner != "Not routed",
238                Err(_) => true,
239            })
240            .map(|value| value.map(<$type>::map))
241            .flat_map(|subnets| {
242                let mut records = None;
243                let mut error = None;
244
245                match subnets {
246                    Ok(subnets) => records = Some(subnets),
247                    Err(err) => error = Some(err),
248                }
249                records
250                    .into_iter()
251                    .flatten()
252                    .map(Ok)
253                    .chain(error.into_iter().map(Err))
254            })
255    };
256}
257
258impl<A> IpDatabase<A>
259where
260    A: DeserializeOwned + MapSubnets + Debug + Into<A::Net> + Copy,
261{
262    /// Reads a new database from a reader.
263    ///
264    /// This can be memory data using [std::io::Cursor],
265    /// or a [std::fs::File].
266    pub fn from_reader(reader: impl Read) -> std::result::Result<Self, crate::Error> {
267        tracing::debug!("Parsing tsv data.");
268        let mut reader = create_reader(reader);
269        let entries = read_entries!(reader, A).collect::<std::result::Result<_, _>>()?;
270        Ok(Self { entries })
271    }
272
273    /// Looks up the given address in the database.
274    pub fn lookup(&self, address: A) -> Option<&Entry<A::Net>> {
275        tracing::debug!(?address, "Looking up ip address");
276        match self
277            .entries
278            .binary_search_by_key(&address.into(), |entry| entry.subnet)
279        {
280            Ok(index) => return Some(&self.entries[index]),
281            Err(index) => {
282                if index != 0 {
283                    let entry = &self.entries[index - 1];
284                    if A::contains(&entry.subnet, &address) {
285                        return Some(entry);
286                    }
287                }
288            }
289        }
290        None
291    }
292}
293
294fn create_reader(reader: impl Read) -> csv::Reader<impl Read> {
295    csv::ReaderBuilder::new()
296        .has_headers(false)
297        .delimiter(b'\t')
298        .from_reader(reader)
299}
300
301impl MapSubnets for IpAddr {
302    type Net = IpNet;
303
304    fn map(record: Record<Self>) -> Box<dyn std::iter::Iterator<Item = Entry<Self::Net>>> {
305        match (record.start, record.end) {
306            (IpAddr::V4(start), IpAddr::V4(end)) => Box::new(
307                Ipv4Addr::map(Record {
308                    start,
309                    end,
310                    as_number: record.as_number,
311                    country: record.country,
312                    owner: record.owner,
313                })
314                .map(|entry| Entry {
315                    subnet: IpNet::from(entry.subnet),
316                    as_number: entry.as_number,
317                    country: entry.country,
318                    owner: entry.owner,
319                }),
320            ),
321            (IpAddr::V6(start), IpAddr::V6(end)) => Box::new(
322                Ipv6Addr::map(Record {
323                    start,
324                    end,
325                    as_number: record.as_number,
326                    country: record.country,
327                    owner: record.owner,
328                })
329                .map(|entry| Entry {
330                    subnet: IpNet::from(entry.subnet),
331                    as_number: entry.as_number,
332                    country: entry.country,
333                    owner: entry.owner,
334                }),
335            ),
336            _ => panic!("invalid ip combination"),
337        }
338    }
339
340    fn contains(net: &Self::Net, address: &Self) -> bool {
341        net.contains(address)
342    }
343}
344
345/// Database for indexing and searching combined list of ipv4 and ipv6 entries.
346#[derive(Debug)]
347pub struct Database {
348    v4: Ipv4Database,
349    v6: Ipv6Database,
350}
351
352// Taken from itertools and modified for this use case.
353fn partition_map<I, A, B>(iter: I) -> (A, B)
354where
355    I: Sized,
356    I: Iterator<Item = Entry<IpNet>>,
357    A: Default + Extend<Entry<Ipv4Net>>,
358    B: Default + Extend<Entry<Ipv6Net>>,
359{
360    let mut left = A::default();
361    let mut right = B::default();
362
363    iter.for_each(|val| match val.subnet {
364        IpNet::V4(v4) => left.extend(Some(Entry {
365            subnet: v4,
366            as_number: val.as_number,
367            country: val.country,
368            owner: val.owner,
369        })),
370        IpNet::V6(v6) => right.extend(Some(Entry {
371            subnet: v6,
372            as_number: val.as_number,
373            country: val.country,
374            owner: val.owner,
375        })),
376    });
377    (left, right)
378}
379
380impl Database {
381    /// Reads a new database from a reader.
382    ///
383    /// This can be memory data using [std::io::Cursor],
384    /// or a [std::fs::File].
385    pub fn from_reader(reader: impl Read) -> std::result::Result<Self, crate::Error> {
386        let mut reader = create_reader(reader);
387        let entries = read_entries!(reader, IpAddr).collect::<std::result::Result<Vec<_>, _>>()?;
388        let (v4, v6): (Vec<_>, Vec<_>) = partition_map(entries.into_iter());
389        Ok(Self {
390            v4: Ipv4Database { entries: v4 },
391            v6: Ipv6Database { entries: v6 },
392        })
393    }
394
395    /// Looks up a [IpAddr] in the database.
396    pub fn lookup(&self, address: IpAddr) -> Option<IpEntry<'_>> {
397        match address {
398            IpAddr::V4(v4) => self.v4.lookup(v4).map(IpEntry::V4),
399            IpAddr::V6(v6) => self.v6.lookup(v6).map(IpEntry::V6),
400        }
401    }
402
403    /// Looks up a [Ipv4Addr] in the database.
404    #[inline(always)]
405    pub fn lookup_v4(&self, address: Ipv4Addr) -> Option<&Ipv4Entry> {
406        self.v4.lookup(address)
407    }
408
409    /// Looks up a [Ipv6Addr] in the database.
410    #[inline(always)]
411    pub fn lookup_v6(&self, address: Ipv6Addr) -> Option<&Ipv6Entry> {
412        self.v6.lookup(address)
413    }
414}
415
416/// A lookup result which is either a reference to [Ipv4Entry] or [Ipv6Entry].
417#[derive(Debug, PartialEq, Clone)]
418pub enum IpEntry<'a> {
419    V4(&'a Ipv4Entry),
420    V6(&'a Ipv6Entry),
421}
422
423/// Owned version of [IpEntry]
424#[derive(Debug, PartialEq, Clone)]
425pub enum IpEntryBuf {
426    V4(Ipv4Entry),
427    V6(Ipv6Entry),
428}
429
430impl IpEntry<'_> {
431    /// Converts the [IpEntry] into an owned [IpEntryBuf] by cloning.
432    pub fn to_ip_entry_buf(self) -> IpEntryBuf {
433        match self {
434            IpEntry::V4(v4) => IpEntryBuf::V4(v4.clone()),
435            IpEntry::V6(v6) => IpEntryBuf::V6(v6.clone()),
436        }
437    }
438}
439
440#[cfg(test)]
441mod tests {
442    use crate::{Ipv4Database, Ipv6Database};
443    use std::{
444        fs::File,
445        io::BufReader,
446        net::{Ipv4Addr, Ipv6Addr},
447        str::FromStr,
448    };
449
450    #[test]
451    fn test_database_v4() -> Result<(), Box<dyn std::error::Error>> {
452        let reader = BufReader::new(File::open("ip2asn-v4.tsv")?);
453        let db = Ipv4Database::from_reader(reader)?;
454        println!("{:#?}", db);
455        let record = db.lookup(Ipv4Addr::from_str("1.1.1.1")?).unwrap();
456        println!("{:#?}", record);
457        Ok(())
458    }
459
460    #[test]
461    fn test_database_v6() -> Result<(), Box<dyn std::error::Error>> {
462        let reader = BufReader::new(File::open("ip2asn-v6.tsv")?);
463        let db = Ipv6Database::from_reader(reader)?;
464        println!("{:#?}", db);
465        let record = db.lookup(Ipv6Addr::from_str("2a05:dfc2::")?).unwrap();
466        println!("{:#?}", record);
467        assert_eq!(record.as_number, 200242);
468        Ok(())
469    }
470}