use anyhow::{Context, Result};
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, RwLock};
pub const DEFAULT_GEOHASH_PRECISION: usize = 7;
#[derive(Clone)]
pub struct GeohashIndex {
index: Arc<RwLock<HashMap<String, HashSet<String>>>>,
doc_locations: Arc<RwLock<HashMap<String, String>>>,
precision: usize,
}
impl GeohashIndex {
pub fn new(precision: usize) -> Self {
Self {
index: Arc::new(RwLock::new(HashMap::new())),
doc_locations: Arc::new(RwLock::new(HashMap::new())),
precision,
}
}
pub fn default_precision() -> Self {
Self::new(DEFAULT_GEOHASH_PRECISION)
}
pub fn precision(&self) -> usize {
self.precision
}
pub fn insert(&self, doc_id: &str, lat: f64, lon: f64) -> Result<()> {
let geohash = self.encode(lat, lon)?;
{
let mut doc_locs = self
.doc_locations
.write()
.unwrap_or_else(|e| e.into_inner());
if let Some(old_geohash) = doc_locs.remove(doc_id) {
let mut idx = self.index.write().unwrap_or_else(|e| e.into_inner());
if let Some(cell) = idx.get_mut(&old_geohash) {
cell.remove(doc_id);
if cell.is_empty() {
idx.remove(&old_geohash);
}
}
}
}
{
let mut idx = self.index.write().unwrap_or_else(|e| e.into_inner());
idx.entry(geohash.clone())
.or_default()
.insert(doc_id.to_string());
}
{
let mut doc_locs = self
.doc_locations
.write()
.unwrap_or_else(|e| e.into_inner());
doc_locs.insert(doc_id.to_string(), geohash);
}
Ok(())
}
pub fn find_near(&self, lat: f64, lon: f64) -> Result<Vec<String>> {
let center_hash = self.encode(lat, lon)?;
let neighbors = self.get_neighbors(¢er_hash)?;
let idx = self.index.read().unwrap_or_else(|e| e.into_inner());
let mut results = HashSet::new();
if let Some(docs) = idx.get(¢er_hash) {
results.extend(docs.iter().cloned());
}
for neighbor_hash in neighbors {
if let Some(docs) = idx.get(&neighbor_hash) {
results.extend(docs.iter().cloned());
}
}
Ok(results.into_iter().collect())
}
pub fn find_in_cell(&self, geohash: &str) -> Vec<String> {
let idx = self.index.read().unwrap_or_else(|e| e.into_inner());
idx.get(geohash)
.map(|docs| docs.iter().cloned().collect())
.unwrap_or_default()
}
pub fn remove(&self, doc_id: &str) -> Result<bool> {
let mut doc_locs = self
.doc_locations
.write()
.unwrap_or_else(|e| e.into_inner());
if let Some(geohash) = doc_locs.remove(doc_id) {
let mut idx = self.index.write().unwrap_or_else(|e| e.into_inner());
if let Some(cell) = idx.get_mut(&geohash) {
cell.remove(doc_id);
if cell.is_empty() {
idx.remove(&geohash);
}
}
Ok(true)
} else {
Ok(false)
}
}
pub fn update(
&self,
doc_id: &str,
_old_lat: f64,
_old_lon: f64,
new_lat: f64,
new_lon: f64,
) -> Result<()> {
self.insert(doc_id, new_lat, new_lon)
}
pub fn contains(&self, doc_id: &str) -> bool {
self.doc_locations
.read()
.unwrap_or_else(|e| e.into_inner())
.contains_key(doc_id)
}
pub fn get_cell(&self, doc_id: &str) -> Option<String> {
self.doc_locations
.read()
.unwrap_or_else(|e| e.into_inner())
.get(doc_id)
.cloned()
}
pub fn len(&self) -> usize {
self.doc_locations
.read()
.unwrap_or_else(|e| e.into_inner())
.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn cell_count(&self) -> usize {
self.index.read().unwrap_or_else(|e| e.into_inner()).len()
}
pub fn clear(&self) {
self.index
.write()
.unwrap_or_else(|e| e.into_inner())
.clear();
self.doc_locations
.write()
.unwrap_or_else(|e| e.into_inner())
.clear();
}
pub fn all_doc_ids(&self) -> Vec<String> {
self.doc_locations
.read()
.unwrap_or_else(|e| e.into_inner())
.keys()
.cloned()
.collect()
}
fn encode(&self, lat: f64, lon: f64) -> Result<String> {
crate::geohash::encode(lon, lat, self.precision).context("Failed to encode geohash")
}
fn get_neighbors(&self, geohash: &str) -> Result<Vec<String>> {
let neighbors =
crate::geohash::neighbors(geohash).context("Failed to get geohash neighbors")?;
Ok(vec![
neighbors.n,
neighbors.ne,
neighbors.e,
neighbors.se,
neighbors.s,
neighbors.sw,
neighbors.w,
neighbors.nw,
])
}
}
impl Default for GeohashIndex {
fn default() -> Self {
Self::default_precision()
}
}
#[cfg(test)]
mod tests {
use super::*;
const SF_LAT: f64 = 37.7749;
const SF_LON: f64 = -122.4194;
const SF_LAT_NEAR: f64 = 37.7750;
const SF_LON_NEAR: f64 = -122.4193;
const NY_LAT: f64 = 40.7128;
const NY_LON: f64 = -74.0060;
#[test]
fn test_new_index() {
let index = GeohashIndex::new(7);
assert_eq!(index.precision(), 7);
assert!(index.is_empty());
assert_eq!(index.len(), 0);
assert_eq!(index.cell_count(), 0);
}
#[test]
fn test_default_precision() {
let index = GeohashIndex::default_precision();
assert_eq!(index.precision(), DEFAULT_GEOHASH_PRECISION);
}
#[test]
fn test_insert_and_find_near() {
let index = GeohashIndex::new(7);
index.insert("beacon1", SF_LAT, SF_LON).unwrap();
index.insert("beacon2", SF_LAT_NEAR, SF_LON_NEAR).unwrap();
let nearby = index.find_near(SF_LAT, SF_LON).unwrap();
assert!(nearby.contains(&"beacon1".to_string()));
assert!(nearby.contains(&"beacon2".to_string()));
assert_eq!(nearby.len(), 2);
}
#[test]
fn test_find_near_includes_neighbors() {
let index = GeohashIndex::new(7);
index.insert("sf_beacon", SF_LAT, SF_LON).unwrap();
let offset_lat = SF_LAT + 0.001; let offset_lon = SF_LON + 0.001; index
.insert("neighbor_beacon", offset_lat, offset_lon)
.unwrap();
let nearby_sf = index.find_near(SF_LAT, SF_LON).unwrap();
assert!(nearby_sf.contains(&"sf_beacon".to_string()));
let nearby_offset = index.find_near(offset_lat, offset_lon).unwrap();
assert!(nearby_offset.contains(&"neighbor_beacon".to_string()));
}
#[test]
fn test_find_near_excludes_distant() {
let index = GeohashIndex::new(7);
index.insert("sf_beacon", SF_LAT, SF_LON).unwrap();
index.insert("ny_beacon", NY_LAT, NY_LON).unwrap();
let nearby_sf = index.find_near(SF_LAT, SF_LON).unwrap();
assert!(nearby_sf.contains(&"sf_beacon".to_string()));
assert!(!nearby_sf.contains(&"ny_beacon".to_string()));
let nearby_ny = index.find_near(NY_LAT, NY_LON).unwrap();
assert!(nearby_ny.contains(&"ny_beacon".to_string()));
assert!(!nearby_ny.contains(&"sf_beacon".to_string()));
}
#[test]
fn test_find_in_cell() {
let index = GeohashIndex::new(7);
index.insert("beacon1", SF_LAT, SF_LON).unwrap();
let cell = index.get_cell("beacon1").unwrap();
let in_cell = index.find_in_cell(&cell);
assert!(in_cell.contains(&"beacon1".to_string()));
}
#[test]
fn test_remove() {
let index = GeohashIndex::new(7);
index.insert("beacon1", SF_LAT, SF_LON).unwrap();
assert!(index.contains("beacon1"));
assert_eq!(index.len(), 1);
let removed = index.remove("beacon1").unwrap();
assert!(removed);
assert!(!index.contains("beacon1"));
assert_eq!(index.len(), 0);
let removed_again = index.remove("beacon1").unwrap();
assert!(!removed_again);
}
#[test]
fn test_update_location() {
let index = GeohashIndex::new(7);
index.insert("beacon1", SF_LAT, SF_LON).unwrap();
let old_cell = index.get_cell("beacon1").unwrap();
index
.update("beacon1", SF_LAT, SF_LON, NY_LAT, NY_LON)
.unwrap();
let new_cell = index.get_cell("beacon1").unwrap();
assert_ne!(old_cell, new_cell);
assert!(index.contains("beacon1"));
assert_eq!(index.len(), 1);
let nearby_ny = index.find_near(NY_LAT, NY_LON).unwrap();
assert!(nearby_ny.contains(&"beacon1".to_string()));
let nearby_sf = index.find_near(SF_LAT, SF_LON).unwrap();
assert!(!nearby_sf.contains(&"beacon1".to_string()));
}
#[test]
fn test_insert_updates_existing() {
let index = GeohashIndex::new(7);
index.insert("beacon1", SF_LAT, SF_LON).unwrap();
assert_eq!(index.len(), 1);
index.insert("beacon1", NY_LAT, NY_LON).unwrap();
assert_eq!(index.len(), 1);
let nearby_ny = index.find_near(NY_LAT, NY_LON).unwrap();
assert!(nearby_ny.contains(&"beacon1".to_string()));
}
#[test]
fn test_clear() {
let index = GeohashIndex::new(7);
index.insert("beacon1", SF_LAT, SF_LON).unwrap();
index.insert("beacon2", NY_LAT, NY_LON).unwrap();
assert_eq!(index.len(), 2);
index.clear();
assert!(index.is_empty());
assert_eq!(index.cell_count(), 0);
}
#[test]
fn test_all_doc_ids() {
let index = GeohashIndex::new(7);
index.insert("a", SF_LAT, SF_LON).unwrap();
index.insert("b", NY_LAT, NY_LON).unwrap();
index.insert("c", SF_LAT_NEAR, SF_LON_NEAR).unwrap();
let all_ids = index.all_doc_ids();
assert_eq!(all_ids.len(), 3);
assert!(all_ids.contains(&"a".to_string()));
assert!(all_ids.contains(&"b".to_string()));
assert!(all_ids.contains(&"c".to_string()));
}
#[test]
fn test_concurrent_access() {
use std::thread;
let index = Arc::new(GeohashIndex::new(7));
let handles: Vec<_> = (0..10)
.map(|i| {
let idx = Arc::clone(&index);
thread::spawn(move || {
let lat = SF_LAT + (i as f64 * 0.001);
let lon = SF_LON + (i as f64 * 0.001);
idx.insert(&format!("beacon{}", i), lat, lon).unwrap();
})
})
.collect();
for handle in handles {
handle.join().unwrap();
}
assert_eq!(index.len(), 10);
}
#[test]
fn test_different_precisions() {
let index_5 = GeohashIndex::new(5);
let index_7 = GeohashIndex::new(7);
let index_9 = GeohashIndex::new(9);
index_5.insert("p5", SF_LAT, SF_LON).unwrap();
index_7.insert("p7", SF_LAT, SF_LON).unwrap();
index_9.insert("p9", SF_LAT, SF_LON).unwrap();
let cell_5 = index_5.get_cell("p5").unwrap();
let cell_7 = index_7.get_cell("p7").unwrap();
let cell_9 = index_9.get_cell("p9").unwrap();
assert_eq!(cell_5.len(), 5);
assert_eq!(cell_7.len(), 7);
assert_eq!(cell_9.len(), 9);
assert!(cell_7.starts_with(&cell_5));
assert!(cell_9.starts_with(&cell_7));
}
#[test]
fn test_empty_find_near() {
let index = GeohashIndex::new(7);
let nearby = index.find_near(SF_LAT, SF_LON).unwrap();
assert!(nearby.is_empty());
}
#[test]
fn test_get_cell_not_found() {
let index = GeohashIndex::new(7);
assert!(index.get_cell("nonexistent").is_none());
}
}