use dashmap::DashMap;
use geo::{Distance, Haversine};
use geohash::{Coord, encode};
use rstar::{Envelope, RTree};
use std::cmp::Ordering;
use std::thread;
use std::time::SystemTime;
use std::{path::PathBuf, sync::Arc, vec};
use thiserror::Error;
pub use rstar::{AABB, PointDistance, RTreeObject};
mod utils;
pub trait GeohashRTreeObject:
RTreeObject
+ PointDistance
+ PartialEq
+ Clone
+ Send
+ Sync
+ bincode::Decode<()>
+ bincode::Encode
+ 'static
{
fn unique_id(&self) -> String;
fn x_y(&self) -> (f64, f64);
fn gen_geohash_str(&self, geohash_precision: usize) -> Result<String, GeohashRTreeError> {
let (x, y) = self.x_y();
let geohash_str = encode(Coord { x, y }, geohash_precision)?;
Ok(geohash_str)
}
}
#[derive(Error, Debug)]
pub enum GeohashRTreeError {
#[error("persistence file not found, {0:?}")]
LoadError(#[from] sled::Error),
#[error("geohash error, {0:?}")]
GeohashError(#[from] geohash::GeohashError),
#[error("bincode decode error, {0:?}")]
BincodeDecodeError(#[from] bincode::error::DecodeError),
#[error("bincode encode error, {0:?}")]
BincodeEncodeError(#[from] bincode::error::EncodeError),
#[error("unknown error")]
Unknown,
}
pub struct GeohashRTree<T>
where
T: GeohashRTreeObject,
T::Envelope: Send + Sync,
{
arc_dashmap: Arc<DashMap<String, RTree<T>>>,
geohash_precision: usize,
db: Option<Arc<sled::Db>>,
}
impl<T> GeohashRTree<T>
where
T: GeohashRTreeObject,
T::Envelope: Send + Sync,
{
const DEFAULT_SLED_CACHE_CAPACITY: u64 = 1024 * 1024 * 100;
pub fn new(
geohash_precision: usize,
persistence_path: Option<PathBuf>,
) -> Result<Self, GeohashRTreeError> {
let mut s = Self {
arc_dashmap: Arc::new(DashMap::new()),
geohash_precision,
db: None,
};
if let Some(persistence_path) = persistence_path {
let db: sled::Db = sled::Config::default()
.cache_capacity(Self::DEFAULT_SLED_CACHE_CAPACITY)
.path(persistence_path)
.open()?;
s.db = Some(Arc::new(db));
}
Ok(s)
}
pub fn load(
geohash_precision: usize,
persistence_path: PathBuf,
) -> Result<Self, GeohashRTreeError> {
let db: sled::Db = sled::Config::default()
.cache_capacity(Self::DEFAULT_SLED_CACHE_CAPACITY)
.path(persistence_path)
.open()?;
let hrt = Self {
arc_dashmap: Arc::new(DashMap::new()),
geohash_precision,
db: Some(Arc::new(db)),
};
let config = bincode::config::standard();
if let Some(db) = &hrt.db {
let now = SystemTime::now();
let mut itr = db.iter();
while let Some(entry) = itr.next() {
let (_, value) = entry?;
let (t, _) = bincode::decode_from_slice::<T, _>(&value, config)?;
let geohash_str = t.gen_geohash_str(geohash_precision)?;
hrt.arc_dashmap
.entry(geohash_str)
.or_insert(RTree::new())
.insert(t);
}
println!(
"loaded elapsed time: {:?}, total: {}",
now.elapsed().unwrap(),
db.len()
);
}
Ok(hrt)
}
pub fn load_async(
geohash_precision: usize,
persistence_path: PathBuf,
) -> Result<Self, GeohashRTreeError> {
let db: sled::Db = sled::Config::default().path(persistence_path).open()?;
let hrt = Self {
arc_dashmap: Arc::new(DashMap::new()),
geohash_precision,
db: Some(Arc::new(db)),
};
let acr_dashmap = Arc::clone(&hrt.arc_dashmap);
if let Some(db) = hrt.db.clone() {
thread::spawn(move || {
let config = bincode::config::standard();
let mut itr = db.iter();
let now = SystemTime::now();
while let Some(entry) = itr.next() {
let (_, value) = match entry {
Ok((uid, value)) => (uid, value),
Err(e) => {
println!("error: {}", e);
return ();
}
};
let t = match bincode::decode_from_slice::<T, _>(&value, config) {
Ok((t, _)) => t,
Err(e) => {
println!("error: {}", e);
return ();
}
};
let geohash_str = match t.gen_geohash_str(geohash_precision) {
Ok(h) => h,
Err(e) => {
println!("error: {}", e);
return ();
}
};
acr_dashmap
.entry(geohash_str)
.or_insert(RTree::new())
.insert(t);
}
println!(
"loaded elapsed time: {:?}, total: {}",
now.elapsed().unwrap(),
db.len()
);
});
}
Ok(hrt)
}
pub fn insert(&self, t: T) -> Result<(), GeohashRTreeError> {
let geohash_str = t.gen_geohash_str(self.geohash_precision)?;
if let Some(db) = &self.db {
let enc = bincode::encode_to_vec(&t, bincode::config::standard())?;
db.insert(t.unique_id(), enc)?;
}
let mut rtree = self.arc_dashmap.entry(geohash_str).or_insert(RTree::new());
rtree.insert(t);
Ok(())
}
pub fn remove(&self, t: &T) -> Result<Option<T>, GeohashRTreeError> {
let geohash_str = t.gen_geohash_str(self.geohash_precision)?;
if let Some(mut rtree) = self.arc_dashmap.get_mut(&geohash_str) {
if let Some(rm) = rtree.remove(t) {
if let Some(db) = &self.db {
db.remove(rm.unique_id())?;
}
return Ok(Some(rm));
}
}
Ok(None)
}
pub fn delete(&self, id: &str) -> Result<Option<T>, GeohashRTreeError> {
if let Some(db) = &self.db {
if let Some(rm) = db.remove(id)? {
let (t, _) = bincode::decode_from_slice::<T, _>(&rm, bincode::config::standard())?;
let geohash_str = t.gen_geohash_str(self.geohash_precision)?;
if let Some(mut rtree) = self.arc_dashmap.get_mut(&geohash_str) {
let _ = rtree.remove(&t);
}
return Ok(Some(t));
}
}
Ok(None)
}
pub fn adjacent_cells_nearest(&self, query_point: &T) -> Result<Option<T>, GeohashRTreeError> {
let geohash_str = query_point.gen_geohash_str(self.geohash_precision)?;
let mut nearests = vec![];
if let Some(nearest) = self.nearest(query_point, &geohash_str)? {
nearests.push(nearest);
}
let neighbors = geohash::neighbors(&geohash_str)?;
if let Some(nearest) = self.nearest(query_point, &neighbors.n)? {
nearests.push(nearest);
}
if let Some(nearest) = self.nearest(query_point, &neighbors.ne)? {
nearests.push(nearest);
}
if let Some(nearest) = self.nearest(query_point, &neighbors.e)? {
nearests.push(nearest);
}
if let Some(nearest) = self.nearest(query_point, &neighbors.se)? {
nearests.push(nearest);
}
if let Some(nearest) = self.nearest(query_point, &neighbors.s)? {
nearests.push(nearest);
}
if let Some(nearest) = self.nearest(query_point, &neighbors.sw)? {
nearests.push(nearest);
}
if let Some(nearest) = self.nearest(query_point, &neighbors.w)? {
nearests.push(nearest);
}
if let Some(nearest) = self.nearest(query_point, &neighbors.nw)? {
nearests.push(nearest);
}
nearests.sort_by(|a, b| {
let a_distance = a.distance_2(&query_point.envelope().center());
let b_distance = b.distance_2(&query_point.envelope().center());
a_distance
.partial_cmp(&b_distance)
.unwrap_or(Ordering::Equal)
});
Ok(nearests.first().cloned())
}
fn nearest(&self, query_point: &T, geohash_str: &str) -> Result<Option<T>, GeohashRTreeError> {
if let Some(rtree) = self.arc_dashmap.get(geohash_str) {
let nearest = rtree.nearest_neighbor(&query_point.envelope().center());
return Ok(nearest.cloned());
}
Ok(None)
}
pub fn sorted_cells_nearest(&self, query: &T) -> Result<Option<T>, GeohashRTreeError> {
let (x, y) = query.x_y();
let point = geo::point!(x: x, y: y);
let sorted_geohash_cells = utils::sort_geohash_neighbors(point, self.geohash_precision)?;
for idx in 0..sorted_geohash_cells.len() {
let cursor = sorted_geohash_cells.get(idx).unwrap();
if let Some(nearest) = self.nearest(query, &cursor.0)? {
if let Some(next) = sorted_geohash_cells.get(idx + 1) {
let nearest_p = nearest.x_y();
let dist =
Haversine::distance(point, geo::point!(x: nearest_p.0 , y: nearest_p.1));
if dist <= next.1 {
return Ok(Some(nearest));
}
} else {
return Ok(Some(nearest));
}
}
}
Ok(None)
}
pub fn len(&self) -> usize {
self.arc_dashmap.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
use rstar::AABB;
use rstar::RTreeObject;
#[derive(Debug, PartialEq, Clone, bincode::Encode, bincode::Decode)]
struct Location {
id: String,
x_coordinate: f64,
y_coordinate: f64,
}
impl GeohashRTreeObject for Location {
fn unique_id(&self) -> String {
self.id.clone()
}
fn x_y(&self) -> (f64, f64) {
(self.x_coordinate, self.y_coordinate)
}
}
impl RTreeObject for Location {
type Envelope = AABB<[f64; 2]>;
fn envelope(&self) -> Self::Envelope {
AABB::from_point([self.x_coordinate, self.y_coordinate])
}
}
impl PointDistance for Location {
fn distance_2(&self, point: &[f64; 2]) -> f64 {
let self_geo_point = geo::point!(x: self.x_coordinate, y: self.y_coordinate);
let target_geo_point = geo::point!(x: point[0], y: point[1]);
Haversine::distance(self_geo_point, target_geo_point)
}
}
#[test]
fn test_insert() {
let hrt: GeohashRTree<Location> = GeohashRTree::new(5, None).unwrap();
let locations = vec![
Location {
id: "1".into(),
x_coordinate: 116.400357,
y_coordinate: 39.906453,
},
Location {
id: "2".into(),
x_coordinate: 116.401633,
y_coordinate: 39.906302,
},
Location {
id: "3".into(),
x_coordinate: 116.401645,
y_coordinate: 39.904753,
},
];
hrt.insert(locations[0].clone()).unwrap();
hrt.insert(locations[1].clone()).unwrap();
hrt.insert(locations[2].clone()).unwrap();
let nearest = hrt
.adjacent_cells_nearest(&Location {
id: "1".into(),
x_coordinate: 116.400357,
y_coordinate: 39.906453,
})
.unwrap()
.unwrap();
assert_eq!(locations[0], nearest);
}
#[test]
fn test_remove() {
let hrt: GeohashRTree<Location> = GeohashRTree::new(5, None).unwrap();
let locations = vec![
Location {
id: "1".into(),
x_coordinate: 116.400357,
y_coordinate: 39.906453,
},
Location {
id: "2".into(),
x_coordinate: 116.401633,
y_coordinate: 39.906302,
},
Location {
id: "3".into(),
x_coordinate: 116.401645,
y_coordinate: 39.904753,
},
];
hrt.insert(locations[0].clone()).unwrap();
hrt.insert(locations[1].clone()).unwrap();
hrt.insert(locations[2].clone()).unwrap();
assert_eq!(locations[0], hrt.remove(&locations[0]).unwrap().unwrap());
assert_eq!(locations[1], hrt.remove(&locations[1]).unwrap().unwrap());
assert_eq!(locations[2], hrt.remove(&locations[2]).unwrap().unwrap());
}
#[test]
fn test_sorted_cells_nearest() {
let hrt: GeohashRTree<Location> = GeohashRTree::new(5, None).unwrap();
let locations = vec![
Location {
id: "1".into(),
x_coordinate: 116.400357,
y_coordinate: 39.906453,
},
Location {
id: "2".into(),
x_coordinate: 116.401633,
y_coordinate: 39.906302,
},
Location {
id: "3".into(),
x_coordinate: 116.401645,
y_coordinate: 39.904753,
},
];
hrt.insert(locations[0].clone()).unwrap();
hrt.insert(locations[1].clone()).unwrap();
hrt.insert(locations[2].clone()).unwrap();
let nearest = hrt
.sorted_cells_nearest(&Location {
id: "1".into(),
x_coordinate: 116.400357,
y_coordinate: 39.906453,
})
.unwrap()
.unwrap();
assert_eq!(locations[0], nearest);
}
}