lucisearch 0.8.1

Embeddable, in-process search engine — the SQLite/DuckDB of search
Documentation
//! Geo point types and distance computation.
//!
//! Uses the Haversine formula for accurate great-circle distance.

/// A geographic point (latitude, longitude in degrees).
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct GeoPoint {
    pub lat: f64,
    pub lon: f64,
}

impl GeoPoint {
    pub fn new(lat: f64, lon: f64) -> Self {
        Self { lat, lon }
    }

    /// Parse from JSON. Accepts:
    /// - `{"lat": 40.7, "lon": -73.9}`
    /// - `[lon, lat]` (GeoJSON order)
    /// - `"lat,lon"` string
    pub fn from_json(value: &serde_json::Value) -> Option<Self> {
        match value {
            serde_json::Value::Object(obj) => {
                let lat = obj.get("lat").and_then(|v| v.as_f64())?;
                let lon = obj.get("lon").and_then(|v| v.as_f64())?;
                Some(Self { lat, lon })
            }
            serde_json::Value::Array(arr) if arr.len() == 2 => {
                // GeoJSON: [lon, lat]
                let lon = arr[0].as_f64()?;
                let lat = arr[1].as_f64()?;
                Some(Self { lat, lon })
            }
            serde_json::Value::String(s) => {
                let parts: Vec<&str> = s.split(',').collect();
                if parts.len() == 2 {
                    let lat = parts[0].trim().parse().ok()?;
                    let lon = parts[1].trim().parse().ok()?;
                    Some(Self { lat, lon })
                } else {
                    None
                }
            }
            _ => None,
        }
    }
}

/// Earth radius in kilometers.
const EARTH_RADIUS_KM: f64 = 6371.0;

/// Haversine distance between two geo points in kilometers.
pub fn haversine_km(a: GeoPoint, b: GeoPoint) -> f64 {
    let lat1 = a.lat.to_radians();
    let lat2 = b.lat.to_radians();
    let dlat = (b.lat - a.lat).to_radians();
    let dlon = (b.lon - a.lon).to_radians();

    let h = (dlat / 2.0).sin().powi(2) + lat1.cos() * lat2.cos() * (dlon / 2.0).sin().powi(2);
    let c = 2.0 * h.sqrt().asin();

    EARTH_RADIUS_KM * c
}

/// Check if a point is within a bounding box.
pub fn in_bounding_box(point: GeoPoint, top_left: GeoPoint, bottom_right: GeoPoint) -> bool {
    point.lat <= top_left.lat
        && point.lat >= bottom_right.lat
        && point.lon >= top_left.lon
        && point.lon <= bottom_right.lon
}

/// Per-segment geo point storage.
///
/// Stores (lat, lon) pairs as parallel f64 arrays indexed by doc_id.
/// On deserialization, builds a latitude-sorted index for O(log n) range queries.
///
/// See [[optimization-bkd-tree-geo]] for the sorted scan optimization.
#[derive(Clone)]
pub struct GeoPointStore {
    lats: Vec<f64>,
    lons: Vec<f64>,
    /// Doc IDs sorted by latitude (built on deserialize for query-time use).
    lat_order: Vec<u32>,
}

impl GeoPointStore {
    pub fn new() -> Self {
        Self {
            lats: Vec::new(),
            lons: Vec::new(),
            lat_order: Vec::new(),
        }
    }

    pub fn add(&mut self, point: GeoPoint) {
        self.lats.push(point.lat);
        self.lons.push(point.lon);
    }

    pub fn add_null(&mut self) {
        self.lats.push(f64::NAN);
        self.lons.push(f64::NAN);
    }

    pub fn len(&self) -> usize {
        self.lats.len()
    }
    pub fn is_empty(&self) -> bool {
        self.lats.is_empty()
    }

    pub fn get(&self, doc_id: u32) -> Option<GeoPoint> {
        let i = doc_id as usize;
        if i < self.lats.len() && !self.lats[i].is_nan() {
            Some(GeoPoint::new(self.lats[i], self.lons[i]))
        } else {
            None
        }
    }

    /// Serialize to bytes.
    pub fn to_bytes(&self) -> Vec<u8> {
        let mut buf = Vec::new();
        buf.extend_from_slice(&(self.lats.len() as u32).to_le_bytes());
        for &lat in &self.lats {
            buf.extend_from_slice(&lat.to_le_bytes());
        }
        for &lon in &self.lons {
            buf.extend_from_slice(&lon.to_le_bytes());
        }
        buf
    }

    /// Returns doc IDs with latitude in [min_lat, max_lat], sorted by doc_id.
    ///
    /// Uses binary search on the pre-sorted latitude index for O(log n + k)
    /// where k is the number of matching documents.
    pub fn docs_in_lat_range(&self, min_lat: f64, max_lat: f64) -> Vec<u32> {
        if self.lat_order.is_empty() {
            return Vec::new();
        }
        let start = self
            .lat_order
            .partition_point(|&id| self.lats[id as usize] < min_lat);
        let end = self
            .lat_order
            .partition_point(|&id| self.lats[id as usize] <= max_lat);
        let mut result: Vec<u32> = self.lat_order[start..end].to_vec();
        result.sort_unstable(); // sort by doc_id for scorer ordering
        result
    }

    fn build_lat_order(lats: &[f64]) -> Vec<u32> {
        let mut order: Vec<u32> = (0..lats.len() as u32)
            .filter(|&i| !lats[i as usize].is_nan())
            .collect();
        order.sort_unstable_by(|&a, &b| lats[a as usize].partial_cmp(&lats[b as usize]).unwrap());
        order
    }

    /// Deserialize from bytes.
    pub fn from_bytes(data: &[u8]) -> Self {
        let count = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize;
        let mut pos = 4;
        let mut lats = Vec::with_capacity(count);
        for _ in 0..count {
            lats.push(f64::from_le_bytes(data[pos..pos + 8].try_into().unwrap()));
            pos += 8;
        }
        let mut lons = Vec::with_capacity(count);
        for _ in 0..count {
            lons.push(f64::from_le_bytes(data[pos..pos + 8].try_into().unwrap()));
            pos += 8;
        }
        let lat_order = Self::build_lat_order(&lats);
        Self {
            lats,
            lons,
            lat_order,
        }
    }
}

impl Default for GeoPointStore {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn haversine_known_distance() {
        // NYC to London ≈ 5570 km
        let nyc = GeoPoint::new(40.7128, -74.0060);
        let london = GeoPoint::new(51.5074, -0.1278);
        let d = haversine_km(nyc, london);
        assert!((d - 5570.0).abs() < 50.0, "NYC-London ≈ 5570km, got {d}");
    }

    #[test]
    fn haversine_same_point() {
        let p = GeoPoint::new(0.0, 0.0);
        assert!(haversine_km(p, p) < 0.001);
    }

    #[test]
    fn bounding_box_contains() {
        let tl = GeoPoint::new(41.0, -74.5);
        let br = GeoPoint::new(40.0, -73.5);
        let inside = GeoPoint::new(40.5, -74.0);
        let outside = GeoPoint::new(42.0, -74.0);
        assert!(in_bounding_box(inside, tl, br));
        assert!(!in_bounding_box(outside, tl, br));
    }

    #[test]
    fn geo_point_from_json_object() {
        let v = serde_json::json!({"lat": 40.7, "lon": -73.9});
        let p = GeoPoint::from_json(&v).unwrap();
        assert!((p.lat - 40.7).abs() < 0.001);
    }

    #[test]
    fn geo_point_from_json_array() {
        let v = serde_json::json!([-73.9, 40.7]); // GeoJSON: [lon, lat]
        let p = GeoPoint::from_json(&v).unwrap();
        assert!((p.lat - 40.7).abs() < 0.001);
        assert!((p.lon - (-73.9)).abs() < 0.001);
    }

    #[test]
    fn geo_point_from_json_string() {
        let v = serde_json::json!("40.7,-73.9");
        let p = GeoPoint::from_json(&v).unwrap();
        assert!((p.lat - 40.7).abs() < 0.001);
    }

    #[test]
    fn geo_store_round_trip() {
        let mut store = GeoPointStore::new();
        store.add(GeoPoint::new(40.7, -73.9));
        store.add_null();
        store.add(GeoPoint::new(51.5, -0.1));

        let bytes = store.to_bytes();
        let restored = GeoPointStore::from_bytes(&bytes);

        assert_eq!(restored.len(), 3);
        let p0 = restored.get(0).unwrap();
        assert!((p0.lat - 40.7).abs() < 0.001);
        assert!(restored.get(1).is_none()); // null
        let p2 = restored.get(2).unwrap();
        assert!((p2.lat - 51.5).abs() < 0.001);
    }
}