ip_alloc_lookup/
database.rs

1//! In-memory IP range database and lookup logic.
2//!
3//! This module contains the core data structures used for fast, allocation-based
4//! IP classification. It is intentionally minimal and avoids external dependencies
5//! at runtime.
6//!
7//! ## Structure
8//!
9//! - [`GeoIpDb`] owns sorted IPv4 and IPv6 range tables
10//! - [`GeoInfo`] stores the classification result for a range
11//! - [`Region`] provides a coarse regional grouping abstraction
12//!
13//! IPv4 and IPv6 are handled separately to keep lookup logic simple and fast.
14//! All lookups are performed using binary search over pre-sorted ranges.
15//!
16//! ## Performance characteristics
17//!
18//! - Lookups are `O(log n)`
19//! - No heap allocation during lookup
20//! - Suitable for hot paths (e.g. request filtering, logging, metrics)
21//!
22//! ## Safety and correctness
23//!
24//! The database assumes that input ranges are:
25//!
26//! - Non-overlapping
27//! - Sorted by start address
28//!
29//! These invariants are guaranteed by the build script or runtime constructors.
30//!
31//! ## Regional classification
32//!
33//! Region grouping (e.g. EU vs non-EU) is derived from the country code using a
34//! fixed mapping. This mapping is a policy decision and may evolve over time.
35
36use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
37use std::{fs, io, path::Path};
38
39#[cfg(feature = "download")]
40pub const RIPE_EXTENDED_LATEST_URL: &str =
41    "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest";
42
43/// Compact classification result for a single IP range.
44///
45/// The country code is stored as two ASCII bytes (e.g. `b'D', b'E'`), and `is_eu`
46/// is a convenience flag derived from a built-in EU membership list.
47///
48/// `region` is stored as a small numeric code; use [`GeoInfo::region_enum`]
49/// for a typed view.
50#[derive(Debug, Clone, Copy)]
51#[repr(C)]
52pub struct GeoInfo {
53    pub country_code: [u8; 2],
54    pub is_eu: bool,
55    pub region: u8,
56}
57
58/// High-level region classification derived from the country code.
59///
60/// This is not a geolocation signal; it is a coarse grouping intended for
61/// policy-style decisions (e.g. "EU vs non-EU").
62#[repr(u8)]
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum Region {
65    EuropeanUnion = 1,
66    EuropeNonEu   = 2,
67    EasternEurope = 3,
68    Turkey        = 4,
69    MiddleEast    = 5,
70    NorthAfrica   = 6,
71    CentralAsia   = 7,
72    GulfStates    = 8,
73    Other         = 255,
74}
75
76impl Region {
77	/// Return a human-readable label for this region.
78    pub fn as_str(self) -> &'static str {
79        match self {
80            Region::EuropeanUnion => "European Union",
81            Region::EuropeNonEu   => "Europe (non-EU)",
82            Region::EasternEurope => "Eastern Europe",
83            Region::Turkey        => "Turkey",
84            Region::MiddleEast    => "Middle East",
85            Region::NorthAfrica   => "North Africa",
86            Region::CentralAsia   => "Central Asia",
87            Region::GulfStates    => "Gulf States",
88            Region::Other         => "Other",
89        }
90    }
91}
92
93/// Convert a 2-letter country code like "DE" into [b'D', b'E'].
94fn cc2(country: &str) -> [u8; 2] {
95    let b = country.as_bytes();
96    // RIPE data should always be 2-letter country codes; if not, fall back.
97    if b.len() >= 2 { [b[0], b[1]] } else { *b"??" }
98}
99
100/// For display/testing convenience.
101impl GeoInfo {
102	/// Return the ISO-3166 alpha-2 country code as a string slice.
103	///
104	/// This is intended for display/logging and should always be valid ASCII.
105	/// If the stored bytes are not valid UTF-8 (unexpected), this falls back to `"??"`.
106    pub fn country_code_str(&self) -> &str {
107        // Always valid for ASCII 2-letter codes; fallback if somehow invalid.
108        std::str::from_utf8(&self.country_code).unwrap_or("??")
109    }
110	
111	/// Interpret the stored numeric `region` code as a [`Region`] enum.
112	///
113	/// Unknown or unsupported codes map to [`Region::Other`].
114    pub fn region_enum(&self) -> Region {
115        match self.region {
116            1 => Region::EuropeanUnion,
117            2 => Region::EuropeNonEu,
118            3 => Region::EasternEurope,
119            4 => Region::Turkey,
120            5 => Region::MiddleEast,
121            6 => Region::NorthAfrica,
122            7 => Region::CentralAsia,
123            8 => Region::GulfStates,
124            _ => Region::Other,
125        }
126    }
127}
128
129
130/// Offline, in-memory lookup database for allocation-based IP classification.
131///
132/// The default constructor (`new`) uses range tables generated at build time.
133/// Lookups are performed with binary search and do not allocate.
134pub struct GeoIpDb {
135    v4_ranges: Vec<(u32, u32, GeoInfo)>,
136    v6_ranges: Vec<(u128, u128, GeoInfo)>,
137}
138
139// EU member states (27 countries as of 2025)
140const EU_COUNTRIES: &[&str] = &[
141    "AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR",
142    "DE", "GR", "HU", "IE", "IT", "LV", "LT", "LU", "MT", "NL",
143    "PL", "PT", "RO", "SK", "SI", "ES", "SE",
144];
145
146// Include the generated data from build.rs
147include!(concat!(env!("OUT_DIR"), "/generated_data.rs"));
148
149impl GeoIpDb {
150    /// Construct a database using the embedded range tables generated at build time.
151	///
152	/// This is the fastest and most predictable option: no I/O and no parsing at runtime.
153	///
154	/// # Examples
155	/// ```
156	/// use offline_ripe_geoip::GeoIpDb;
157	///
158	/// let db = GeoIpDb::new();
159	/// let info = db.lookup("46.4.0.1".parse().unwrap());
160	/// assert!(info.is_some());
161	/// ```
162    pub fn new() -> Self {
163        let mut v4_ranges = Vec::with_capacity(IPV4_RANGES.len());
164        let mut v6_ranges = Vec::with_capacity(IPV6_RANGES.len());
165
166        // Process IPv4 ranges
167        for &(start, end, country) in IPV4_RANGES {
168            let is_eu = EU_COUNTRIES.contains(&country);
169            let region = determine_region(country);
170
171            let geo_info = GeoInfo {
172				country_code: cc2(country),
173				is_eu,
174				region: region as u8,
175			};
176
177            v4_ranges.push((start, end, geo_info));
178        }
179
180        // Process IPv6 ranges
181        for &(start, end, country) in IPV6_RANGES {
182            let is_eu = EU_COUNTRIES.contains(&country);
183            let region = determine_region(country);
184
185            let geo_info = GeoInfo {
186				country_code: cc2(country),
187				is_eu,
188				region: region as u8,
189			};
190
191            v6_ranges.push((start, end, geo_info));
192        }
193
194        // Data should already be sorted from build.rs, but let's be safe
195        //v4_ranges.sort_by_key(|r| r.0);
196        //v6_ranges.sort_by_key(|r| r.0);
197
198        GeoIpDb { v4_ranges, v6_ranges }
199    }
200	
201	/// Build a database by parsing RIPE delegated stats content at runtime.
202	///
203	/// This is useful when you want to load newer data from a cache or ship your own
204	/// dataset. The resulting ranges are sorted for efficient lookup.
205	///
206	/// # Examples
207	/// ```
208	/// use offline_ripe_geoip::GeoIpDb;
209	///
210	/// let data = "ripencc|DE|ipv4|46.4.0.0|256|20250101|allocated\n";
211	/// let db = GeoIpDb::from_ripe_delegated_str(data);
212	/// assert!(db.lookup("46.4.0.1".parse().unwrap()).is_some());
213	/// ```
214    pub fn from_ripe_delegated_str(content: &str) -> Self {
215        let parsed = crate::parse_ripe_delegated(content);
216
217        let mut v4_ranges: Vec<(u32, u32, GeoInfo)> = Vec::new();
218        let mut v6_ranges: Vec<(u128, u128, GeoInfo)> = Vec::new();
219
220        for r in parsed {
221            let is_eu = EU_COUNTRIES.contains(&r.country.as_str());
222            let region = determine_region(&r.country);
223
224            let geo = GeoInfo {
225                country_code: cc2(&r.country),
226                is_eu,
227                region: region as u8,
228            };
229
230            if let Some(v4) = r.start_v4 {
231                let start: u32 = v4.into();
232                let end = start.saturating_add((r.count as u32).saturating_sub(1));
233                v4_ranges.push((start, end, geo));
234            } else if let Some(v6) = r.start_v6 {
235                let start: u128 = v6.into();
236                let end = start.saturating_add(r.count.saturating_sub(1));
237                v6_ranges.push((start, end, geo));
238            }
239        }
240
241        v4_ranges.sort_by_key(|r| r.0);
242        v6_ranges.sort_by_key(|r| r.0);
243
244        GeoIpDb { v4_ranges, v6_ranges }
245    }
246
247    /// Load RIPE delegated stats content from a file and build a database.
248	///
249	/// # Errors
250	/// Returns an error if the file cannot be read.
251    pub fn from_ripe_delegated_file<P: AsRef<Path>>(path: P) -> io::Result<Self> {
252        let content = fs::read_to_string(path)?;
253        Ok(Self::from_ripe_delegated_str(&content))
254    }
255
256    /// Try to load the database from a cache file, falling back to embedded data.
257	///
258	/// This is a convenience helper for "use cache if present, otherwise use the
259	/// built-in tables".
260    pub fn from_cache_or_embedded<P: AsRef<Path>>(cache_path: P) -> Self {
261        match Self::from_ripe_delegated_file(cache_path) {
262            Ok(db) => db,
263            Err(_) => Self::new(),
264        }
265    }
266
267    /// Look up a single IPv4 address.
268	///
269	/// Returns [`None`] if the address is not covered by the embedded/loaded ranges.
270	#[inline]
271    pub fn lookup_v4(&self, ip: Ipv4Addr) -> Option<&GeoInfo> {
272		let ip_u32: u32 = ip.into();
273		
274		match self.v4_ranges.binary_search_by_key(&ip_u32, |&(start, _, _)| start) {
275			Ok(idx) => Some(&self.v4_ranges[idx].2),
276			Err(idx) => {
277				if idx > 0 {
278					let (start, end, geo) = &self.v4_ranges[idx - 1];
279					if ip_u32 >= *start && ip_u32 <= *end {
280						return Some(geo);
281					}
282				}
283				None
284			}
285		}
286	}
287
288    /// Look up a single IPv6 address.
289	///
290	/// Returns [`None`] if the address is not covered by the embedded/loaded ranges.
291	#[inline]
292	pub fn lookup_v6(&self, ip: Ipv6Addr) -> Option<&GeoInfo> {
293		let ip_u128: u128 = ip.into();
294		let ranges = &self.v6_ranges;
295
296		if ranges.is_empty() {
297			return None;
298		}
299
300		// upper_bound: first index where start > ip
301		let mut lo: usize = 0;
302		let mut hi: usize = ranges.len();
303		while lo < hi {
304			let mid = lo + (hi - lo) / 2;
305			if ip_u128 < ranges[mid].0 {
306				hi = mid;
307			} else {
308				lo = mid + 1;
309			}
310		}
311
312		if lo == 0 {
313			return None;
314		}
315
316		let (start, end, geo) = &ranges[lo - 1];
317		if ip_u128 >= *start && ip_u128 <= *end {
318			Some(geo)
319		} else {
320			None
321		}
322	}
323
324    /// Look up an IP address (IPv4 or IPv6).
325	///
326	/// # Examples
327	/// ```
328	/// use offline_ripe_geoip::GeoIpDb;
329	///
330	/// let db = GeoIpDb::new();
331	/// let info = db.lookup("46.4.0.1".parse().unwrap()).unwrap();
332	/// assert_eq!(info.country_code_str(), "DE");
333	/// ```
334    pub fn lookup(&self, ip: IpAddr) -> Option<&GeoInfo> {
335        match ip {
336            IpAddr::V4(v4) => self.lookup_v4(v4),
337            IpAddr::V6(v6) => self.lookup_v6(v6),
338        }
339    }
340
341    /// Return `true` if the IP is covered by the database and classified as EU.
342	///
343	/// Addresses not found in the database return `false`.
344	#[inline]
345    pub fn is_eu(&self, ip: IpAddr) -> bool {
346        self.lookup(ip).map(|info| info.is_eu).unwrap_or(false)
347    }
348
349    /// Return basic statistics about the loaded database.
350	///
351	/// This can be useful for sanity checks (e.g., validating that data loaded correctly).
352    pub fn stats(&self) -> DbStats {
353        let total_v4_ranges = self.v4_ranges.len();
354        let total_v6_ranges = self.v6_ranges.len();
355        let eu_v4_ranges = self.v4_ranges.iter().filter(|(_, _, info)| info.is_eu).count();
356        let eu_v6_ranges = self.v6_ranges.iter().filter(|(_, _, info)| info.is_eu).count();
357
358        DbStats {
359            total_v4_ranges,
360            total_v6_ranges,
361            eu_v4_ranges,
362            eu_v6_ranges,
363            non_eu_v4_ranges: total_v4_ranges - eu_v4_ranges,
364            non_eu_v6_ranges: total_v6_ranges - eu_v6_ranges,
365        }
366    }
367}
368
369#[cfg(feature = "download")]
370impl GeoIpDb {
371    /// Download RIPE delegated data from `url` and atomically replace `cache_path`.
372	///
373	/// The download is written to a temporary file next to the destination and then
374	/// renamed into place.
375	///
376	/// # Errors
377	/// Returns an error if the download fails or the cache file cannot be written.
378	///
379	/// # Feature
380	/// Available only when the crate is built with the `download` feature.
381    pub fn update_cache_from_url<P: AsRef<Path>>(cache_path: P, url: &str) -> io::Result<u64> {
382        let cache_path = cache_path.as_ref();
383
384        // Ensure parent dir exists
385        if let Some(parent) = cache_path.parent() {
386            fs::create_dir_all(parent)?;
387        }
388
389        // Download
390        let resp = reqwest::blocking::get(url)
391            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
392            .error_for_status()
393            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
394
395        let bytes = resp
396            .bytes()
397            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
398
399        // Write to a temp file next to the destination (so rename is atomic on most OSes)
400        let tmp_path = cache_path.with_extension("tmp");
401        {
402            let mut f = fs::File::create(&tmp_path)?;
403            use std::io::Write;
404            f.write_all(&bytes)?;
405            f.sync_all()?;
406        }
407
408        // Replace existing cache atomically-ish
409        if cache_path.exists() {
410            // On Windows rename can fail if target exists, so remove first.
411            let _ = fs::remove_file(cache_path);
412        }
413        fs::rename(&tmp_path, cache_path)?;
414
415        Ok(bytes.len() as u64)
416    }
417
418    /// Convenience wrapper around [`GeoIpDb::update_cache_from_url`] using the
419	/// RIPE “extended latest” endpoint.
420	///
421	/// # Feature
422	/// Available only when the crate is built with the `download` feature.
423    pub fn update_cache<P: AsRef<Path>>(cache_path: P) -> io::Result<u64> {
424        Self::update_cache_from_url(cache_path, RIPE_EXTENDED_LATEST_URL)
425    }
426}
427
428impl Default for GeoIpDb {
429    fn default() -> Self {
430        Self::new()
431    }
432}
433
434/// Summary counts for the database contents.
435#[derive(Debug)]
436pub struct DbStats {
437    pub total_v4_ranges: usize,
438    pub total_v6_ranges: usize,
439    pub eu_v4_ranges: usize,
440    pub eu_v6_ranges: usize,
441    pub non_eu_v4_ranges: usize,
442    pub non_eu_v6_ranges: usize,
443}
444
445/// Map a country code to a coarse [`Region`] bucket.
446///
447/// This mapping is a policy-oriented heuristic and may be adjusted over time.
448fn determine_region(country_code: &str) -> Region {
449    if EU_COUNTRIES.contains(&country_code) {
450        Region::EuropeanUnion
451    } else {
452        match country_code {
453            "GB" | "NO" | "CH" | "IS" | "LI" => Region::EuropeNonEu,
454            "RU" | "UA" | "BY" | "MD" => Region::EasternEurope,
455            "TR" => Region::Turkey,
456            "IL" | "PS" => Region::MiddleEast,
457            "EG" | "TN" | "MA" | "DZ" => Region::NorthAfrica,
458            "KZ" | "UZ" | "TM" | "KG" | "TJ" => Region::CentralAsia,
459            "AE" | "SA" | "QA" | "KW" | "BH" | "OM" => Region::GulfStates,
460            _ => Region::Other,
461        }
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468
469    #[test]
470    fn test_embedded_db() {
471        let db = GeoIpDb::new();
472
473        let stats = db.stats();
474        println!("\n📊 Embedded Database Stats:");
475        println!("  IPv4 ranges: {} (EU: {}, non-EU: {})", 
476            stats.total_v4_ranges, stats.eu_v4_ranges, stats.non_eu_v4_ranges);
477        println!("  IPv6 ranges: {} (EU: {}, non-EU: {})", 
478            stats.total_v6_ranges, stats.eu_v6_ranges, stats.non_eu_v6_ranges);
479
480        assert!(stats.total_v4_ranges > 0, "Should have IPv4 ranges");
481    }
482
483    #[test]
484    fn test_lookup_german_ipv4() {
485        let db = GeoIpDb::new();
486        let ip: Ipv4Addr = "46.4.0.1".parse().unwrap();
487
488        let info = db.lookup_v4(ip).expect("German IP should be found");
489        assert_eq!(info.country_code_str(), "DE");
490        assert!(info.is_eu);
491    }
492
493    #[test]
494    fn test_lookup_german_ipv6() {
495        let db = GeoIpDb::new();
496        // Example German IPv6 address (2a00::/12 is typically EU)
497        let ip: Ipv6Addr = "2a01:4f8::1".parse().unwrap();
498
499        if let Some(info) = db.lookup_v6(ip) {
500            println!("Found IPv6: {} in {}", ip, info.country_code_str());
501            // Just verify we can look it up, actual country depends on data
502        }
503    }
504
505    #[test]
506    fn test_lookup_any_ip() {
507        let db = GeoIpDb::new();
508        
509        // Test with IPv4
510        let ipv4: IpAddr = "46.4.0.1".parse().unwrap();
511        if let Some(info) = db.lookup(ipv4) {
512            assert_eq!(info.country_code_str(), "DE");
513        }
514
515        // Test with IPv6
516        let ipv6: IpAddr = "2a01:4f8::1".parse().unwrap();
517        let _ = db.lookup(ipv6);
518    }
519
520    #[test]
521    fn test_is_eu_method() {
522        let db = GeoIpDb::new();
523
524        // Test IPv4
525        let ipv4: IpAddr = "46.4.0.1".parse().unwrap();
526        if db.lookup(ipv4).is_some() {
527            assert!(db.is_eu(ipv4));
528        }
529    }
530	
531	#[cfg(feature = "download")]
532	fn serve_once(body: &'static str) -> String {
533		use std::io::{Read, Write};
534		use std::net::TcpListener;
535
536		let listener = TcpListener::bind("127.0.0.1:0").unwrap();
537		let addr = listener.local_addr().unwrap();
538
539		std::thread::spawn(move || {
540			let (mut stream, _) = listener.accept().unwrap();
541
542			// read request (ignore contents)
543			let mut buf = [0u8; 1024];
544			let _ = stream.read(&mut buf);
545
546			let resp = format!(
547				"HTTP/1.1 200 OK\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}",
548				body.as_bytes().len(),
549				body
550			);
551			let _ = stream.write_all(resp.as_bytes());
552			let _ = stream.flush();
553		});
554
555		format!("http://{}", addr)
556	}
557	
558	#[test]
559	#[cfg(feature = "download")]
560	fn test_update_cache_and_load() {
561		use std::net::IpAddr;
562
563		// Minimal delegated content:
564		// - one IPv4 block: 46.4.0.0/24 (256 addrs)
565		// - one IPv6 block: 2a01:4f8::/32
566		let delegated = "\
567	# comment
568	2|ripencc|20250101|0000|summary|whatever
569	ripencc|DE|ipv4|46.4.0.0|256|20250101|allocated
570	ripencc|DE|ipv6|2a01:4f8::|32|20250101|allocated
571	";
572
573		let url = serve_once(delegated);
574
575		let dir = tempfile::tempdir().unwrap();
576		let cache_path = dir.path().join("ripe-cache.txt");
577
578		let bytes = GeoIpDb::update_cache_from_url(&cache_path, &url).unwrap();
579		assert!(bytes > 0);
580		assert!(cache_path.exists());
581
582		let db = GeoIpDb::from_ripe_delegated_file(&cache_path).unwrap();
583
584		let ip: IpAddr = "46.4.0.1".parse().unwrap();
585		let info = db.lookup(ip).expect("should find 46.4.0.1");
586		assert_eq!(info.country_code_str(), "DE");
587		assert!(info.is_eu);
588	}
589	
590	#[test]
591	#[cfg(feature = "download")]
592	fn test_update_cache_replaces_existing_file() {
593		let old = "\
594	ripencc|FR|ipv4|46.4.0.0|256|20250101|allocated
595	";
596		let new = "\
597	ripencc|DE|ipv4|46.4.0.0|256|20250101|allocated
598	";
599
600		let url = serve_once(new);
601
602		let dir = tempfile::tempdir().unwrap();
603		let cache_path = dir.path().join("ripe-cache.txt");
604
605		std::fs::write(&cache_path, old).unwrap();
606
607		GeoIpDb::update_cache_from_url(&cache_path, &url).unwrap();
608
609		let db = GeoIpDb::from_ripe_delegated_file(&cache_path).unwrap();
610		let info = db.lookup("46.4.0.1".parse().unwrap()).unwrap();
611		assert_eq!(info.country_code_str(), "DE");
612	}
613	
614	#[test]
615	#[ignore]
616	#[cfg(feature = "download")]
617	fn smoke_test_real_ripe_download_and_lookup() {
618		let cache = std::path::PathBuf::from("/tmp/ripe-cache.txt");
619
620		// Download real RIPE data
621		let bytes = GeoIpDb::update_cache(&cache).unwrap();
622		assert!(bytes > 1_000_000, "too small, download probably failed");
623
624		// Load from cache
625		let db = GeoIpDb::from_ripe_delegated_file(&cache).unwrap();
626
627		// Known Hetzner range is commonly DE
628		let ip: std::net::IpAddr = "88.198.0.1".parse().unwrap();
629		let info = db.lookup(ip).unwrap();
630		println!("88.198.0.1 -> {}", info.country_code_str());
631	}
632}