use std::collections::{BTreeMap, HashMap};
use nodedb_types::geometry::Geometry;
use serde::{Deserialize, Serialize};
use zerompk::{FromMessagePack, ToMessagePack};
use crate::geohash::{geohash_encode, geohash_neighbors};
pub const DEFAULT_PRECISION: u8 = 6;
pub struct GeohashIndex {
pub collection: String,
pub field: String,
pub precision: u8,
entries: HashMap<String, String>,
prefix_index: BTreeMap<String, Vec<String>>,
}
impl GeohashIndex {
pub fn new(collection: &str, field: &str, precision: u8) -> Self {
Self {
collection: collection.to_string(),
field: field.to_string(),
precision: precision.clamp(1, 12),
entries: HashMap::new(),
prefix_index: BTreeMap::new(),
}
}
pub fn index_document(&mut self, doc_id: &str, geometry: &Geometry) -> Option<String> {
let Geometry::Point { coordinates } = geometry else {
return None;
};
let lng = coordinates[0];
let lat = coordinates[1];
self.remove_document(doc_id);
let hash = geohash_encode(lng, lat, self.precision);
self.entries.insert(doc_id.to_string(), hash.clone());
self.prefix_index
.entry(hash.clone())
.or_default()
.push(doc_id.to_string());
Some(hash)
}
pub fn remove_document(&mut self, doc_id: &str) {
if let Some(old_hash) = self.entries.remove(doc_id)
&& let Some(ids) = self.prefix_index.get_mut(&old_hash)
{
ids.retain(|id| id != doc_id);
if ids.is_empty() {
self.prefix_index.remove(&old_hash);
}
}
}
pub fn prefix_search(&self, prefix: &str) -> Vec<&str> {
let mut results = Vec::new();
let end = prefix_successor(prefix);
let range = match &end {
Some(e) => self.prefix_index.range(prefix.to_string()..e.clone()),
None => self.prefix_index.range(prefix.to_string()..),
};
for (_hash, doc_ids) in range {
for id in doc_ids {
results.push(id.as_str());
}
}
results
}
pub fn nearby_search(&self, lng: f64, lat: f64, search_precision: Option<u8>) -> Vec<&str> {
let precision = search_precision.unwrap_or(self.precision);
let center_hash = geohash_encode(lng, lat, precision);
let mut results = Vec::new();
results.extend(self.prefix_search(¢er_hash));
let neighbors = geohash_neighbors(¢er_hash);
for (_, neighbor_hash) in &neighbors {
results.extend(self.prefix_search(neighbor_hash));
}
results.sort_unstable();
results.dedup();
results
}
pub fn get_geohash(&self, doc_id: &str) -> Option<&str> {
self.entries.get(doc_id).map(|s| s.as_str())
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn checkpoint_to_bytes(&self) -> Result<Vec<u8>, crate::persist::RTreeCheckpointError> {
let snap = GeohashSnapshot {
collection: self.collection.clone(),
field: self.field.clone(),
precision: self.precision,
entries: self.entries.clone(),
};
zerompk::to_msgpack_vec(&snap).map_err(crate::persist::RTreeCheckpointError::Serialize)
}
pub fn from_checkpoint(bytes: &[u8]) -> Result<Self, crate::persist::RTreeCheckpointError> {
let snap: GeohashSnapshot = zerompk::from_msgpack(bytes)
.map_err(crate::persist::RTreeCheckpointError::Deserialize)?;
let mut index = Self::new(&snap.collection, &snap.field, snap.precision);
for (doc_id, hash) in &snap.entries {
index.entries.insert(doc_id.clone(), hash.clone());
index
.prefix_index
.entry(hash.clone())
.or_default()
.push(doc_id.clone());
}
Ok(index)
}
}
fn prefix_successor(prefix: &str) -> Option<String> {
let mut bytes = prefix.as_bytes().to_vec();
for i in (0..bytes.len()).rev() {
if bytes[i] < 0xFF {
bytes[i] += 1;
bytes.truncate(i + 1);
return String::from_utf8(bytes).ok();
}
}
None
}
#[derive(Serialize, Deserialize, ToMessagePack, FromMessagePack)]
struct GeohashSnapshot {
collection: String,
field: String,
precision: u8,
entries: HashMap<String, String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn index_point() {
let mut idx = GeohashIndex::new("places", "location", 6);
let hash = idx
.index_document("doc1", &Geometry::point(-73.9857, 40.758))
.unwrap();
assert_eq!(hash.len(), 6);
assert_eq!(idx.len(), 1);
}
#[test]
fn skip_non_point() {
let mut idx = GeohashIndex::new("areas", "boundary", 6);
let poly = Geometry::polygon(vec![vec![[0.0, 0.0], [1.0, 0.0], [1.0, 1.0], [0.0, 0.0]]]);
assert!(idx.index_document("doc1", &poly).is_none());
assert_eq!(idx.len(), 0);
}
#[test]
fn prefix_search_finds_nearby() {
let mut idx = GeohashIndex::new("places", "loc", 6);
idx.index_document("a", &Geometry::point(-73.985, 40.758));
idx.index_document("b", &Geometry::point(-73.986, 40.759));
idx.index_document("c", &Geometry::point(139.69, 35.69));
let hash_a = idx.get_geohash("a").unwrap();
let prefix = &hash_a[..4];
let results = idx.prefix_search(prefix);
assert!(results.contains(&"a"));
assert!(results.contains(&"b"));
assert!(!results.contains(&"c"));
}
#[test]
fn nearby_search_includes_neighbors() {
let mut idx = GeohashIndex::new("places", "loc", 6);
for i in 0..20 {
let lng = -73.98 + (i as f64) * 0.001;
idx.index_document(&format!("d{i}"), &Geometry::point(lng, 40.758));
}
let results = idx.nearby_search(-73.98, 40.758, None);
assert!(!results.is_empty());
}
#[test]
fn upsert_removes_old() {
let mut idx = GeohashIndex::new("places", "loc", 6);
idx.index_document("doc1", &Geometry::point(0.0, 0.0));
idx.index_document("doc1", &Geometry::point(50.0, 50.0));
assert_eq!(idx.len(), 1);
let old_hash = geohash_encode(0.0, 0.0, 6);
let old_results = idx.prefix_search(&old_hash);
assert!(!old_results.contains(&"doc1"));
}
#[test]
fn remove_document() {
let mut idx = GeohashIndex::new("places", "loc", 6);
idx.index_document("doc1", &Geometry::point(10.0, 20.0));
idx.remove_document("doc1");
assert_eq!(idx.len(), 0);
}
#[test]
fn checkpoint_roundtrip() {
let mut idx = GeohashIndex::new("places", "loc", 6);
for i in 0..50 {
idx.index_document(&format!("d{i}"), &Geometry::point(i as f64, i as f64));
}
let bytes = idx.checkpoint_to_bytes().unwrap();
let restored = GeohashIndex::from_checkpoint(&bytes).unwrap();
assert_eq!(restored.len(), 50);
assert_eq!(restored.collection, "places");
assert_eq!(restored.precision, 6);
let hash = restored.get_geohash("d25").unwrap();
assert_eq!(hash.len(), 6);
}
}