xdb-parse 0.2.0

High-performance, zero-copy xdb IP geolocation parser (ip2region-compatible)
Documentation
//! IP geolocation lookup functions.
//!
//! Provides four search entry points with different trade-offs:
//!
//! | Function | Input | When to use |
//! |----------|-------|-------------|
//! | [`search_by_uint`] | `u32` | You have a raw IPv4 integer |
//! | [`search_by_u128`] | `u128` | You have a raw IPv6 integer |
//! | [`search_by_ipaddr`] | `IpAddr` | You already have a parsed `IpAddr` |
//! | [`search_ip`] | `&str` | You have an IP string (dotted-decimal, colon-hex, or numeric) |
//!
//! All functions return `&str` borrowing from the loaded xdb data — zero heap allocation per query.

use std::net::IpAddr;

use crate::error::XdbError;
use crate::ip::IntoIpAddr;
use crate::{
    IPV4_SEGMENT_INDEX_BLOCK_SIZE, IPV6_SEGMENT_INDEX_BLOCK_SIZE, TOTAL_HEADER_SIZE,
    TOTAL_VECTOR_INDEX_SIZE, VECTOR_COL_SIZE, VECTOR_INDEX_BLOCK_SIZE,
};

/// Search by raw `u32` IPv4 address.
///
/// This is the fastest path — no string parsing, no `IpAddr` conversion.
/// Returns a `&str` borrowing from the loaded xdb buffer.
///
/// # Example
///
/// ```no_run
/// let data = xdb_parse::load_file("./assets/ip2region_v4.xdb".into())?;
/// let ip: u32 = 0x4a7d_2b63; // 74.125.43.99
/// let location = xdb_parse::search_by_uint(ip, &data)?;
/// println!("{}", location);
/// # Ok::<(), xdb_parse::error::XdbError>(())
/// ```
pub fn search_by_uint(ip: u32, data: &[u8]) -> Result<&str, XdbError> {
    let il0 = ((ip >> 24) & 0xFF) as usize;
    let il1 = ((ip >> 16) & 0xFF) as usize;

    let idx = il0 as u32 * VECTOR_COL_SIZE as u32 * VECTOR_INDEX_BLOCK_SIZE as u32
        + il1 as u32 * VECTOR_INDEX_BLOCK_SIZE as u32;

    let vec_segment = &data[TOTAL_HEADER_SIZE..TOTAL_HEADER_SIZE + TOTAL_VECTOR_INDEX_SIZE];
    let slice = &vec_segment[idx as usize..idx as usize + VECTOR_INDEX_BLOCK_SIZE];
    let start_ptr = u32::from_le_bytes(slice[0..4].try_into()?);
    let end_ptr = u32::from_le_bytes(slice[4..8].try_into()?);

    let mut left: usize = 0;
    let mut right: usize = ((end_ptr - start_ptr) / IPV4_SEGMENT_INDEX_BLOCK_SIZE) as usize;

    while left < right {
        let mid = (left + right) / 2;
        let offset = start_ptr as usize + mid * IPV4_SEGMENT_INDEX_BLOCK_SIZE as usize;
        let block: [u8; IPV4_SEGMENT_INDEX_BLOCK_SIZE as usize] =
            data[offset..offset + IPV4_SEGMENT_INDEX_BLOCK_SIZE as usize].try_into()?;
        let start_ip = read_u32(&block, 0);
        if ip < start_ip {
            right = mid;
            continue;
        }
        let end_ip = read_u32(&block, 4);
        if ip > end_ip {
            left = mid + 1;
        } else {
            let data_len = read_u16(&block, 8) as usize;
            let data_ptr = read_u32(&block, 10) as usize;
            let bytes = &data[data_ptr..data_ptr + data_len];
            return std::str::from_utf8(bytes).map_err(|_| XdbError::InvalidUtf8);
        }
    }
    Ok("")
}

/// Search by raw `u128` IPv6 address.
///
/// This is the fastest path for IPv6 — no string parsing, no `IpAddr` conversion.
/// Returns a `&str` borrowing from the loaded xdb buffer.
///
/// # Example
///
/// ```no_run
/// let data = xdb_parse::load_file("./assets/ip2region_v6.xdb".into())?;
/// let ip: u128 = 0x2001_0db8_85a3_0000_0000_8a2e_0370_7334;
/// let location = xdb_parse::search_by_u128(ip, &data)?;
/// println!("{}", location);
/// # Ok::<(), xdb_parse::error::XdbError>(())
/// ```
pub fn search_by_u128(ip: u128, data: &[u8]) -> Result<&str, XdbError> {
    let il0 = ((ip >> 120) & 0xFF) as usize;
    let il1 = ((ip >> 112) & 0xFF) as usize;

    let idx = il0 as u32 * VECTOR_COL_SIZE as u32 * VECTOR_INDEX_BLOCK_SIZE as u32
        + il1 as u32 * VECTOR_INDEX_BLOCK_SIZE as u32;

    let vec_segment = &data[TOTAL_HEADER_SIZE..TOTAL_HEADER_SIZE + TOTAL_VECTOR_INDEX_SIZE];
    let slice = &vec_segment[idx as usize..idx as usize + VECTOR_INDEX_BLOCK_SIZE];
    let start_ptr = u32::from_le_bytes(slice[0..4].try_into()?);
    let end_ptr = u32::from_le_bytes(slice[4..8].try_into()?);

    let mut left: usize = 0;
    let mut right: usize = ((end_ptr - start_ptr) / IPV6_SEGMENT_INDEX_BLOCK_SIZE) as usize;

    while left < right {
        let mid = (left + right) / 2;
        let offset = start_ptr as usize + mid * IPV6_SEGMENT_INDEX_BLOCK_SIZE as usize;
        let block: [u8; IPV6_SEGMENT_INDEX_BLOCK_SIZE as usize] =
            data[offset..offset + IPV6_SEGMENT_INDEX_BLOCK_SIZE as usize].try_into()?;
        let start_ip = read_u128(&block, 0);
        if ip < start_ip {
            right = mid;
            continue;
        }
        let end_ip = read_u128(&block, 16);
        if ip > end_ip {
            left = mid + 1;
        } else {
            let data_len = read_u16(&block, 32) as usize;
            let data_ptr = read_u32(&block, 34) as usize;
            let bytes = &data[data_ptr..data_ptr + data_len];
            return std::str::from_utf8(bytes).map_err(|_| XdbError::InvalidUtf8);
        }
    }
    Ok("")
}

/// Search by any IP-compatible input.
///
/// Accepts all types that implement [`IntoIpAddr`]:
///
/// | Type | Example |
/// |------|---------|
/// | `&str` / `String` | `"192.168.1.1"`, `"::1"`, `"3232235777"` |
/// | `u32` | `0xC0A80101`, `3232235777` |
/// | `u128` | `0x2001_0db8_...` |
/// | `IpAddr` / `Ipv4Addr` / `Ipv6Addr` | from socket addresses |
///
/// # Example
///
/// ```no_run
/// let data = xdb_parse::load_file("./assets/ip2region_v4.xdb".into())?;
///
/// // string
/// let loc = xdb_parse::search_ip("73.24.63.66", &data)?;
///
/// // raw u32 (fastest)
/// let loc = xdb_parse::search_ip(0x4918_3F42u32, &data)?;
///
/// // IpAddr from socket
/// let addr = "73.24.63.66".parse::<std::net::IpAddr>()?;
/// let loc = xdb_parse::search_ip(addr, &data)?;
/// # Ok::<(), xdb_parse::error::XdbError>(())
/// ```
pub fn search_ip(ip: impl IntoIpAddr, data: &[u8]) -> Result<&str, XdbError> {
    search_by_ipaddr(ip.into_ip_addr()?, data)
}

/// Search by [`std::net::IpAddr`].
///
/// Dispatches to [`search_by_uint`] for IPv4 or [`search_by_u128`] for IPv6.
/// Use this when you already have a parsed `IpAddr` from another source
/// (e.g. socket address).
///
/// # Example
///
/// ```no_run
/// use std::net::IpAddr;
///
/// let data = xdb_parse::load_file("./assets/ip2region_v4.xdb".into())?;
/// let ip: IpAddr = "73.24.63.66".parse()?;
/// let location = xdb_parse::search_by_ipaddr(ip, &data)?;
/// # Ok::<(), xdb_parse::error::XdbError>(())
/// ```
pub fn search_by_ipaddr(ip: IpAddr, data: &[u8]) -> Result<&str, XdbError> {
    match ip {
        IpAddr::V4(v4) => search_by_uint(v4.to_bits(), data),
        IpAddr::V6(v6) => search_by_u128(v6.to_bits(), data),
    }
}

#[inline(always)]
fn read_u32<const N: usize>(block: &[u8; N], offset: usize) -> u32 {
    u32::from_le_bytes(unsafe { *(block.as_ptr().add(offset) as *const [u8; 4]) })
}

#[inline(always)]
fn read_u16<const N: usize>(block: &[u8; N], offset: usize) -> u16 {
    u16::from_le_bytes(unsafe { *(block.as_ptr().add(offset) as *const [u8; 2]) })
}

#[inline(always)]
fn read_u128<const N: usize>(block: &[u8; N], offset: usize) -> u128 {
    u128::from_le_bytes(unsafe { *(block.as_ptr().add(offset) as *const [u8; 16]) })
}

#[cfg(test)]
mod tests {
    use std::sync::Arc;
    use std::thread;
    use std::time::Instant;

    use anyhow::Result;

    use super::*;
    use crate::ip::parse_ip;
    use crate::load_file;

    #[test]
    fn test_search_ipv4() -> Result<()> {
        let path = "./assets/ip2region_v4.xdb";
        let data = load_file(path.into())?;
        let ret = search_ip("73.24.63.66", &data)?;
        println!("result: {ret}");
        assert!(!ret.is_empty());
        Ok(())
    }

    #[test]
    fn test_search_ipv6() -> Result<()> {
        let path = "./assets/ip2region_v6.xdb";
        let data = load_file(path.into())?;
        let ret = search_ip("2001:0db8:85a3:0000:0000:8a2e:0370:7334", &data)?;
        println!("result: {ret}");
        assert!(!ret.is_empty());
        Ok(())
    }

    #[test]
    fn test_search_by_uint() -> Result<()> {
        let path = "./assets/ip2region_v4.xdb";
        let data = load_file(path.into())?;
        let ret = search_by_uint(0x4a7d_2b63, &data)?; // 74.125.43.99
        println!("result: {ret}");
        assert!(!ret.is_empty());
        Ok(())
    }

    #[test]
    fn test_search_by_u128() -> Result<()> {
        let path = "./assets/ip2region_v6.xdb";
        let data = load_file(path.into())?;
        let ip: u128 = 0x2001_0db8_85a3_0000_0000_8a2e_0370_7334;
        let ret = search_by_u128(ip, &data)?;
        println!("result: {ret}");
        assert!(!ret.is_empty());
        Ok(())
    }

    #[test]
    fn test_multi_thread() -> Result<()> {
        let start = Instant::now();
        let path = "./assets/ip2region_v6.xdb";
        let data = load_file(path.into())?;
        let data = Arc::new(data);

        let data_clone = Arc::clone(&data);
        let handle = thread::spawn(move || {
            search_ip("2408:8352:da10:1ad:c283:c9ff:fec6:4046", &data_clone).unwrap();
        });
        search_ip("2408:8352:da10:1ad:c283:c9ff:fec6:4046", &data).unwrap();
        println!("use time:{:?}", start.elapsed());
        handle.join().unwrap();
        Ok(())
    }

    #[test]
    fn test_parse_ip() -> Result<()> {
        println!("{:?}", parse_ip("192.168.1.1")?);
        println!("{:?}", parse_ip("2400:3200::1")?);
        println!("{:?}", parse_ip("3232235776")?);
        Ok(())
    }
}