lucisearch 0.8.1

Embeddable, in-process search engine — the SQLite/DuckDB of search
Documentation
//! IP address parsing and normalization for the `ip` field type.
//!
//! Supports IPv4 and IPv6. IPv4 addresses are stored as i64 column values
//! for range queries. Both are stored as normalized keyword strings for
//! term queries.

use std::net::{IpAddr, Ipv4Addr};

/// Normalize an IP address string for keyword indexing.
/// Returns the canonical string form, or empty string if invalid.
pub fn normalize_ip(s: &str) -> String {
    let s = s.trim();
    // Handle CIDR notation — strip the prefix length for storage
    let ip_part = s.split('/').next().unwrap_or(s);
    match ip_part.parse::<IpAddr>() {
        Ok(addr) => addr.to_string(),
        Err(_) => String::new(),
    }
}

/// Convert an IP address string to i64 for columnar storage.
/// IPv4 maps to its u32 numeric value (fits in i64).
/// IPv6 returns None (range queries not supported for IPv6 yet).
pub fn ip_to_i64(s: &str) -> Option<i64> {
    let s = s.trim();
    let ip_part = s.split('/').next().unwrap_or(s);
    match ip_part.parse::<IpAddr>() {
        Ok(IpAddr::V4(v4)) => Some(u32::from(v4) as i64),
        Ok(IpAddr::V6(v6)) => {
            // If it's an IPv4-mapped IPv6, extract the IPv4 part
            if let Some(v4) = v6.to_ipv4_mapped() {
                Some(u32::from(v4) as i64)
            } else {
                None
            }
        }
        Err(_) => None,
    }
}

/// Parse a CIDR notation string into (start_ip, end_ip) as i64 values.
/// Returns None for invalid input or IPv6 CIDRs.
pub fn cidr_to_range(s: &str) -> Option<(i64, i64)> {
    let parts: Vec<&str> = s.trim().split('/').collect();
    if parts.len() != 2 {
        return None;
    }
    let ip: Ipv4Addr = parts[0].parse().ok()?;
    let prefix_len: u32 = parts[1].parse().ok()?;
    if prefix_len > 32 {
        return None;
    }
    let ip_num = u32::from(ip);
    let mask = if prefix_len == 0 {
        0u32
    } else {
        !0u32 << (32 - prefix_len)
    };
    let start = ip_num & mask;
    let end = start | !mask;
    Some((start as i64, end as i64))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn normalize_ipv4() {
        assert_eq!(normalize_ip("192.168.1.1"), "192.168.1.1");
        assert_eq!(normalize_ip(" 10.0.0.1 "), "10.0.0.1");
    }

    #[test]
    fn normalize_ipv6() {
        assert_eq!(normalize_ip("::1"), "::1");
        assert_eq!(normalize_ip("2001:db8::1"), "2001:db8::1");
    }

    #[test]
    fn normalize_cidr_strips_prefix() {
        assert_eq!(normalize_ip("192.168.0.0/16"), "192.168.0.0");
    }

    #[test]
    fn normalize_invalid() {
        assert_eq!(normalize_ip("not_an_ip"), "");
    }

    #[test]
    fn ipv4_to_i64() {
        assert_eq!(ip_to_i64("0.0.0.0"), Some(0));
        assert_eq!(ip_to_i64("0.0.0.1"), Some(1));
        assert_eq!(ip_to_i64("192.168.1.1"), Some(0xC0A80101));
        assert_eq!(ip_to_i64("255.255.255.255"), Some(0xFFFFFFFF));
    }

    #[test]
    fn ipv6_returns_none() {
        assert_eq!(ip_to_i64("2001:db8::1"), None);
    }

    #[test]
    fn ipv4_mapped_ipv6() {
        assert_eq!(ip_to_i64("::ffff:192.168.1.1"), Some(0xC0A80101));
    }

    #[test]
    fn cidr_range() {
        let (start, end) = cidr_to_range("192.168.0.0/24").unwrap();
        assert_eq!(start, ip_to_i64("192.168.0.0").unwrap());
        assert_eq!(end, ip_to_i64("192.168.0.255").unwrap());
    }

    #[test]
    fn cidr_16() {
        let (start, end) = cidr_to_range("10.0.0.0/16").unwrap();
        assert_eq!(start, ip_to_i64("10.0.0.0").unwrap());
        assert_eq!(end, ip_to_i64("10.0.255.255").unwrap());
    }

    #[test]
    fn cidr_32() {
        let (start, end) = cidr_to_range("1.2.3.4/32").unwrap();
        assert_eq!(start, end);
        assert_eq!(start, ip_to_i64("1.2.3.4").unwrap());
    }

    #[test]
    fn cidr_invalid() {
        assert!(cidr_to_range("not_cidr").is_none());
        assert!(cidr_to_range("192.168.0.0").is_none()); // no prefix
        assert!(cidr_to_range("192.168.0.0/33").is_none()); // prefix too large
    }
}