use crate::types::{BoundingBox, Geometry, Point};
use crate::{Result, StorageError};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, BinaryHeap};
use std::cmp::Ordering;
use std::path::{Path, PathBuf};
use parking_lot::RwLock;
use std::sync::Arc;
use lru::LruCache;
use std::num::NonZeroUsize;
use memmap2::MmapMut;
use std::fs::OpenOptions;
const MAX_RTREE_ENTRIES: usize = 256; const DEFAULT_GRID_SIZE: usize = 32; const DEFAULT_CACHE_SIZE: usize = 128; const ADAPTIVE_THRESHOLD: f32 = 0.95; const AUTO_FLUSH_THRESHOLD: usize = 2000;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpatialHybridConfig {
pub grid_size: usize,
pub world_bounds: BoundingBoxF32,
pub hot_cache_size: usize,
pub enable_mmap: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub data_dir: Option<PathBuf>,
pub enable_compression: bool,
pub enable_adaptive: bool,
pub enable_simd: bool,
#[serde(default = "default_mmap_initial_size")]
pub mmap_initial_size: u64,
}
fn default_mmap_initial_size() -> u64 {
100 * 1024 * 1024
}
impl Default for SpatialHybridConfig {
fn default() -> Self {
Self {
grid_size: DEFAULT_GRID_SIZE,
world_bounds: BoundingBoxF32::new(-180.0, -90.0, 180.0, 90.0),
hot_cache_size: DEFAULT_CACHE_SIZE,
enable_mmap: true,
data_dir: None,
enable_compression: true,
enable_adaptive: true,
enable_simd: cfg!(target_arch = "x86_64") || cfg!(target_arch = "aarch64"),
mmap_initial_size: default_mmap_initial_size(),
}
}
}
impl SpatialHybridConfig {
pub fn new(world_bounds: BoundingBoxF32) -> Self {
Self {
world_bounds,
..Default::default()
}
}
pub fn with_grid_size(mut self, size: usize) -> Self {
self.grid_size = size.clamp(16, 256);
self
}
pub fn with_cache_size(mut self, size: usize) -> Self {
self.hot_cache_size = size;
self
}
pub fn with_mmap(mut self, enabled: bool, data_dir: Option<PathBuf>) -> Self {
self.enable_mmap = enabled;
self.data_dir = data_dir;
self
}
pub fn with_adaptive(mut self, enabled: bool) -> Self {
self.enable_adaptive = enabled;
self
}
pub fn for_edge() -> Self {
Self {
grid_size: 16,
hot_cache_size: 32,
enable_mmap: true,
mmap_initial_size: 4 * 1024 * 1024, enable_compression: true,
enable_adaptive: true,
enable_simd: cfg!(target_arch = "x86_64") || cfg!(target_arch = "aarch64"),
..Default::default()
}
}
pub fn with_simd(mut self, enabled: bool) -> Self {
self.enable_simd = enabled;
self
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
pub struct BoundingBoxF32 {
pub min_x: f32,
pub min_y: f32,
pub max_x: f32,
pub max_y: f32,
}
impl BoundingBoxF32 {
pub fn new(min_x: f32, min_y: f32, max_x: f32, max_y: f32) -> Self {
Self { min_x, min_y, max_x, max_y }
}
pub fn from_f64(bbox: &BoundingBox) -> Self {
Self {
min_x: bbox.min_x as f32,
min_y: bbox.min_y as f32,
max_x: bbox.max_x as f32,
max_y: bbox.max_y as f32,
}
}
pub fn to_f64(&self) -> BoundingBox {
BoundingBox::new(
self.min_x as f64,
self.min_y as f64,
self.max_x as f64,
self.max_y as f64,
)
}
#[inline]
pub fn contains_point(&self, x: f32, y: f32) -> bool {
x >= self.min_x && x <= self.max_x && y >= self.min_y && y <= self.max_y
}
#[inline]
pub fn intersects(&self, other: &BoundingBoxF32) -> bool {
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
{
simd_intersects_x86(self, other)
}
#[cfg(not(all(target_arch = "x86_64", target_feature = "sse2")))]
{
!(self.max_x < other.min_x || self.min_x > other.max_x ||
self.max_y < other.min_y || self.min_y > other.max_y)
}
}
#[inline]
pub fn area(&self) -> f32 {
(self.max_x - self.min_x) * (self.max_y - self.min_y)
}
pub fn expand(&mut self, x: f32, y: f32) {
self.min_x = self.min_x.min(x);
self.min_y = self.min_y.min(y);
self.max_x = self.max_x.max(x);
self.max_y = self.max_y.max(y);
}
pub fn merge(&self, other: &BoundingBoxF32) -> BoundingBoxF32 {
BoundingBoxF32::new(
self.min_x.min(other.min_x),
self.min_y.min(other.min_y),
self.max_x.max(other.max_x),
self.max_y.max(other.max_y),
)
}
}
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
#[inline]
fn simd_intersects_x86(a: &BoundingBoxF32, b: &BoundingBoxF32) -> bool {
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
unsafe {
let a_min = _mm_set_ps(0.0, 0.0, a.min_y, a.min_x);
let a_max = _mm_set_ps(0.0, 0.0, a.max_y, a.max_x);
let b_min = _mm_set_ps(0.0, 0.0, b.min_y, b.min_x);
let b_max = _mm_set_ps(0.0, 0.0, b.max_y, b.max_x);
let cmp1 = _mm_cmpge_ps(a_max, b_min);
let cmp2 = _mm_cmple_ps(a_min, b_max);
let result = _mm_and_ps(cmp1, cmp2);
let mask = _mm_movemask_ps(result);
(mask & 0b11) == 0b11
}
}
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Serialize, Deserialize)]
struct GridCellId(u32);
impl GridCellId {
#[inline]
fn new(row: u16, col: u16) -> Self {
Self(((row as u32) << 16) | (col as u32))
}
#[inline]
fn row(&self) -> u16 {
(self.0 >> 16) as u16
}
#[inline]
fn col(&self) -> u16 {
(self.0 & 0xFFFF) as u16
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct CompactRTreeEntry {
bbox: BoundingBoxF32, data_id: u64, }
#[derive(Debug, Clone, Serialize, Deserialize)]
struct MiniRTree {
entries: Vec<CompactRTreeEntry>,
#[serde(skip)]
is_dirty: bool,
#[serde(skip)]
access_count: u64, }
impl MiniRTree {
fn new() -> Self {
Self {
entries: Vec::new(), is_dirty: false,
access_count: 0,
}
}
fn insert(&mut self, bbox: BoundingBoxF32, data_id: u64) {
self.entries.push(CompactRTreeEntry { bbox, data_id });
self.is_dirty = true;
self.access_count += 1;
}
fn delete(&mut self, data_id: u64) -> bool {
if let Some(pos) = self.entries.iter().position(|e| e.data_id == data_id) {
self.entries.swap_remove(pos);
self.is_dirty = true;
true
} else {
false
}
}
fn range_query(&mut self, query_bbox: &BoundingBoxF32, results: &mut Vec<u64>) {
self.access_count += 1;
for entry in &self.entries {
if entry.bbox.intersects(query_bbox) {
results.push(entry.data_id);
}
}
}
fn knn_search(&mut self, point: &PointF32, _k: usize, heap: &mut BinaryHeap<DistanceEntry>) {
self.access_count += 1;
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
{
simd_knn_search(&self.entries, point, heap);
}
#[cfg(not(all(target_arch = "x86_64", target_feature = "sse2")))]
{
for entry in &self.entries {
let dist = bbox_min_dist_f32(&entry.bbox, point);
heap.push(DistanceEntry {
dist,
data_id: entry.data_id,
});
}
}
}
#[allow(dead_code)]
fn len(&self) -> usize {
self.entries.len()
}
#[allow(dead_code)]
fn is_empty(&self) -> bool {
self.entries.is_empty()
}
#[allow(dead_code)]
fn memory_usage(&self) -> usize {
self.entries.capacity() * std::mem::size_of::<CompactRTreeEntry>()
}
}
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
fn simd_knn_search(entries: &[CompactRTreeEntry], point: &PointF32, heap: &mut BinaryHeap<DistanceEntry>) {
for entry in entries {
let dist = bbox_min_dist_f32(&entry.bbox, point);
heap.push(DistanceEntry {
dist,
data_id: entry.data_id,
});
}
}
#[derive(Debug, Clone, Copy)]
struct PointF32 {
x: f32,
y: f32,
}
impl PointF32 {
fn from_f64(point: &Point) -> Self {
Self {
x: point.x as f32,
y: point.y as f32,
}
}
}
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
#[inline]
fn simd_distance_x86(a: &PointF32, b: &PointF32) -> f32 {
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
unsafe {
let a_vec = _mm_set_ps(0.0, 0.0, a.y, a.x);
let b_vec = _mm_set_ps(0.0, 0.0, b.y, b.x);
let diff = _mm_sub_ps(a_vec, b_vec);
let squared = _mm_mul_ps(diff, diff);
let sum = _mm_hadd_ps(squared, squared);
let result = _mm_sqrt_ss(sum);
_mm_cvtss_f32(result)
}
}
#[derive(Debug, Clone, Copy)]
struct DistanceEntry {
dist: f32,
data_id: u64,
}
impl PartialEq for DistanceEntry {
fn eq(&self, other: &Self) -> bool {
self.dist == other.dist
}
}
impl Eq for DistanceEntry {}
impl PartialOrd for DistanceEntry {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
other.dist.partial_cmp(&self.dist)
}
}
impl Ord for DistanceEntry {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap_or(Ordering::Equal)
}
}
struct AdaptiveGrid {
config: SpatialHybridConfig,
cell_width: f32,
cell_height: f32,
current_grid_size: usize,
resize_counter: usize,
}
impl AdaptiveGrid {
fn new(config: SpatialHybridConfig) -> Self {
let cell_width = (config.world_bounds.max_x - config.world_bounds.min_x)
/ config.grid_size as f32;
let cell_height = (config.world_bounds.max_y - config.world_bounds.min_y)
/ config.grid_size as f32;
let current_grid_size = config.grid_size;
Self {
config,
cell_width,
cell_height,
current_grid_size,
resize_counter: 0,
}
}
fn maybe_resize(&mut self, active_cells: usize, total_entries: usize) -> bool {
if !self.config.enable_adaptive {
return false;
}
self.resize_counter += 1;
if self.resize_counter < 1000 {
return false;
}
self.resize_counter = 0;
let total_cells = self.current_grid_size * self.current_grid_size;
let occupancy = active_cells as f32 / total_cells as f32;
if occupancy > ADAPTIVE_THRESHOLD && self.current_grid_size < 64 { self.current_grid_size = (self.current_grid_size * 2).min(64);
self.recalculate_cell_size();
true
}
else if occupancy < 0.2 && self.current_grid_size > 16 {
let avg_entries = total_entries / active_cells.max(1);
if avg_entries < MAX_RTREE_ENTRIES / 4 {
self.current_grid_size = (self.current_grid_size / 2).max(16);
self.recalculate_cell_size();
true
} else {
false
}
} else {
false
}
}
fn recalculate_cell_size(&mut self) {
self.cell_width = (self.config.world_bounds.max_x - self.config.world_bounds.min_x)
/ self.current_grid_size as f32;
self.cell_height = (self.config.world_bounds.max_y - self.config.world_bounds.min_y)
/ self.current_grid_size as f32;
}
#[inline]
fn get_cell(&self, x: f32, y: f32) -> Option<GridCellId> {
if !self.config.world_bounds.contains_point(x, y) {
return None;
}
let col = ((x - self.config.world_bounds.min_x) / self.cell_width) as u16;
let row = ((y - self.config.world_bounds.min_y) / self.cell_height) as u16;
let col = col.min((self.current_grid_size - 1) as u16);
let row = row.min((self.current_grid_size - 1) as u16);
Some(GridCellId::new(row, col))
}
fn get_cells_in_bbox(&self, bbox: &BoundingBoxF32) -> Vec<GridCellId> {
let mut cells = Vec::new();
let min_col = ((bbox.min_x - self.config.world_bounds.min_x) / self.cell_width)
.floor()
.max(0.0) as u16;
let max_col = ((bbox.max_x - self.config.world_bounds.min_x) / self.cell_width)
.floor()
.max(0.0)
.min((self.current_grid_size - 1) as f32) as u16;
let min_row = ((bbox.min_y - self.config.world_bounds.min_y) / self.cell_height)
.floor()
.max(0.0) as u16;
let max_row = ((bbox.max_y - self.config.world_bounds.min_y) / self.cell_height)
.floor()
.max(0.0)
.min((self.current_grid_size - 1) as f32) as u16;
for row in min_row..=max_row {
for col in min_col..=max_col {
cells.push(GridCellId::new(row, col));
}
}
cells
}
fn spiral_search(&self, center: &PointF32, max_radius: usize) -> Vec<GridCellId> {
let mut cells = Vec::new();
if let Some(center_cell) = self.get_cell(center.x, center.y) {
let center_row = center_cell.row() as i32;
let center_col = center_cell.col() as i32;
cells.push(center_cell);
for radius in 1..=max_radius as i32 {
for dr in -radius..=radius {
for dc in -radius..=radius {
if dr.abs().max(dc.abs()) != radius {
continue;
}
let row = center_row + dr;
let col = center_col + dc;
if row >= 0 && row < self.current_grid_size as i32 &&
col >= 0 && col < self.current_grid_size as i32 {
cells.push(GridCellId::new(row as u16, col as u16));
}
}
}
}
}
cells
}
}
struct CellStorage {
hot_cache: LruCache<GridCellId, MiniRTree>,
mmap_file: Option<MmapMut>,
cell_offsets: HashMap<GridCellId, (u64, u32)>,
next_offset: u64,
config: SpatialHybridConfig,
}
impl CellStorage {
fn new(config: SpatialHybridConfig) -> Result<Self> {
let cache_size = NonZeroUsize::new(config.hot_cache_size)
.unwrap_or(NonZeroUsize::new(DEFAULT_CACHE_SIZE).unwrap());
let mmap_file = if config.enable_mmap {
if let Some(ref dir) = config.data_dir {
std::fs::create_dir_all(dir)?;
let path = dir.join("spatial_cells.mmap");
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(path)?;
file.set_len(config.mmap_initial_size)?;
let mmap = unsafe { MmapMut::map_mut(&file)? };
Some(mmap)
} else {
None
}
} else {
None
};
Ok(Self {
hot_cache: LruCache::new(cache_size),
mmap_file,
cell_offsets: HashMap::new(),
next_offset: 0,
config,
})
}
fn get(&mut self, cell_id: GridCellId) -> Option<&mut MiniRTree> {
if self.hot_cache.contains(&cell_id) {
return self.hot_cache.get_mut(&cell_id);
}
if let Some((offset, size)) = self.cell_offsets.get(&cell_id).copied() {
if let Some(ref mmap) = self.mmap_file {
let data = &mmap[offset as usize..(offset + size as u64) as usize];
let decompressed = if self.config.enable_compression {
snap::raw::Decoder::new()
.decompress_vec(data)
.ok()?
} else {
data.to_vec()
};
let tree: MiniRTree = bincode::deserialize(&decompressed).ok()?;
self.hot_cache.put(cell_id, tree);
return self.hot_cache.get_mut(&cell_id);
}
}
None
}
fn put(&mut self, cell_id: GridCellId, tree: MiniRTree) {
if self.hot_cache.len() >= self.hot_cache.cap().get() {
if let Some((evict_id, mut evict_tree)) = self.hot_cache.pop_lru() {
if self.mmap_file.is_some() {
let _ = self.write_to_mmap(evict_id, &evict_tree);
evict_tree.is_dirty = false;
}
}
}
self.hot_cache.put(cell_id, tree);
}
fn write_to_mmap(&mut self, cell_id: GridCellId, tree: &MiniRTree) -> Result<()> {
if let Some(ref mut mmap) = self.mmap_file {
let serialized = bincode::serialize(tree)
.map_err(|e| StorageError::Serialization(e.to_string()))?;
let data = if self.config.enable_compression {
snap::raw::Encoder::new()
.compress_vec(&serialized)
.map_err(|e| StorageError::Serialization(e.to_string()))?
} else {
serialized
};
let size = data.len() as u32;
let offset = self.next_offset;
if (offset + size as u64) as usize <= mmap.len() {
mmap[offset as usize..(offset + size as u64) as usize].copy_from_slice(&data);
self.cell_offsets.insert(cell_id, (offset, size));
self.next_offset += size as u64;
}
}
Ok(())
}
fn flush(&mut self) -> Result<()> {
let mut serialized_cells = Vec::new();
for (cell_id, tree) in self.hot_cache.iter() {
if tree.is_dirty {
let serialized = bincode::serialize(&*tree)
.map_err(|e| StorageError::Serialization(e.to_string()))?;
let data = if self.config.enable_compression {
snap::raw::Encoder::new()
.compress_vec(&serialized)
.map_err(|e| StorageError::Serialization(e.to_string()))?
} else {
serialized
};
serialized_cells.push((*cell_id, data));
}
}
if let Some(ref mut mmap) = self.mmap_file {
for (cell_id, data) in serialized_cells {
let size = data.len() as u32;
let offset = self.next_offset;
if (offset + size as u64) as usize <= mmap.len() {
mmap[offset as usize..(offset + size as u64) as usize].copy_from_slice(&data);
self.cell_offsets.insert(cell_id, (offset, size));
self.next_offset += size as u64;
}
if let Some(tree) = self.hot_cache.get_mut(&cell_id) {
tree.is_dirty = false;
}
}
}
if let Some(ref mut mmap) = self.mmap_file {
mmap.flush()?;
}
let target_size = (self.hot_cache.len() / 4).max(self.hot_cache.cap().get() / 4);
while self.hot_cache.len() > target_size {
if let Some((cell_id, mut tree)) = self.hot_cache.pop_lru() {
tree.entries.shrink_to_fit();
if !self.cell_offsets.contains_key(&cell_id) && self.mmap_file.is_some() {
let _ = self.write_to_mmap(cell_id, &tree);
}
} else {
break;
}
}
for (_cell_id, tree) in self.hot_cache.iter_mut() {
if tree.entries.capacity() > tree.entries.len() * 2 {
tree.entries.shrink_to_fit();
}
}
Ok(())
}
fn len(&self) -> usize {
self.hot_cache.len() + self.cell_offsets.len()
}
fn iter_all_cells(&self) -> Vec<GridCellId> {
let mut cells: Vec<_> = self.hot_cache.iter().map(|(k, _)| *k).collect();
cells.extend(self.cell_offsets.keys().copied());
cells.sort_unstable_by_key(|c| c.0);
cells.dedup();
cells
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct SpatialIndexMetadata {
config: SpatialHybridConfig,
grid_size: usize,
cell_width: f32,
cell_height: f32,
total_entries: usize,
cell_offsets: HashMap<u32, (u64, u32)>,
next_offset: u64,
}
pub struct SpatialHybridIndex {
grid: Arc<RwLock<AdaptiveGrid>>,
storage: Arc<RwLock<CellStorage>>,
size: Arc<RwLock<usize>>,
config: SpatialHybridConfig,
}
impl SpatialHybridIndex {
pub fn new(config: SpatialHybridConfig) -> Self {
let grid = AdaptiveGrid::new(config.clone());
let storage = CellStorage::new(config.clone()).expect("Failed to create storage");
Self {
grid: Arc::new(RwLock::new(grid)),
storage: Arc::new(RwLock::new(storage)),
size: Arc::new(RwLock::new(0)),
config,
}
}
pub fn new_default() -> Self {
Self::new(SpatialHybridConfig::default())
}
pub fn len(&self) -> usize {
*self.size.read()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn insert(&mut self, id: u64, geometry: Geometry) -> Result<()> {
let bbox = BoundingBoxF32::from_f64(&geometry.bounding_box());
let cells_to_insert = {
let grid = self.grid.read();
grid.get_cells_in_bbox(&bbox)
};
if cells_to_insert.is_empty() {
return Err(StorageError::InvalidData(
"Geometry outside world bounds".into()
));
}
let mut storage = self.storage.write();
for cell_id in cells_to_insert {
let tree = if let Some(t) = storage.get(cell_id) {
t
} else {
let new_tree = MiniRTree::new();
storage.put(cell_id, new_tree);
storage.get(cell_id).unwrap()
};
tree.insert(bbox, id);
}
*self.size.write() += 1;
let active_cells = storage.len();
let total_entries = self.len();
let mut grid = self.grid.write();
grid.maybe_resize(active_cells, total_entries);
if total_entries.is_multiple_of(AUTO_FLUSH_THRESHOLD) {
drop(storage); drop(grid); let _ = self.flush(); }
Ok(())
}
pub fn batch_insert(&mut self, items: Vec<(u64, Geometry)>) -> Result<()> {
for (i, (id, geometry)) in items.into_iter().enumerate() {
self.insert(id, geometry)?;
if (i + 1) % AUTO_FLUSH_THRESHOLD == 0 {
let _ = self.flush();
}
}
Ok(())
}
pub fn delete(&mut self, id: u64) -> Result<bool> {
let mut storage = self.storage.write();
let mut deleted = false;
let all_cells = storage.iter_all_cells();
for cell_id in all_cells {
if let Some(tree) = storage.get(cell_id) {
if tree.delete(id) {
deleted = true;
}
}
}
if deleted {
*self.size.write() -= 1;
}
Ok(deleted)
}
pub fn update(&mut self, id: u64, new_geometry: Geometry) -> Result<bool> {
if self.delete(id)? {
self.insert(id, new_geometry)?;
Ok(true)
} else {
Ok(false)
}
}
pub fn range_query(&self, query_bbox: &BoundingBox) -> Vec<u64> {
let bbox_f32 = BoundingBoxF32::from_f64(query_bbox);
let cells_to_query = {
let grid = self.grid.read();
grid.get_cells_in_bbox(&bbox_f32)
};
let estimated_capacity = cells_to_query.len() * 10;
let mut results = Vec::with_capacity(estimated_capacity);
let mut storage = self.storage.write();
for cell_id in cells_to_query {
if let Some(tree) = storage.get(cell_id) {
tree.range_query(&bbox_f32, &mut results);
}
}
results.sort_unstable();
results.dedup();
results
}
pub fn knn_query(&self, point: &Point, k: usize) -> Vec<(u64, f64)> {
if self.is_empty() || k == 0 {
return Vec::new();
}
let point_f32 = PointF32::from_f64(point);
let mut heap = BinaryHeap::new();
let cells_to_search = {
let grid = self.grid.read();
let max_spiral_radius = (grid.current_grid_size / 4).max(2);
grid.spiral_search(&point_f32, max_spiral_radius)
};
let mut storage = self.storage.write();
for cell_id in cells_to_search {
if let Some(tree) = storage.get(cell_id) {
tree.knn_search(&point_f32, k, &mut heap);
}
if heap.len() >= k * 4 {
break;
}
}
let mut results: Vec<_> = heap.into_sorted_vec()
.into_iter()
.take(k)
.map(|e| (e.data_id, e.dist as f64))
.collect();
results.reverse();
results
}
pub fn flush(&mut self) -> Result<()> {
let mut storage = self.storage.write();
storage.flush()
}
pub fn memory_usage(&self) -> MemoryStats {
let storage = self.storage.read();
let grid = self.grid.read();
let grid_overhead = std::mem::size_of::<AdaptiveGrid>();
let mut rtree_memory = 0;
for (_id, tree) in storage.hot_cache.iter() {
rtree_memory += std::mem::size_of::<MiniRTree>();
rtree_memory += tree.entries.len() * std::mem::size_of::<CompactRTreeEntry>();
rtree_memory += tree.entries.capacity() * std::mem::size_of::<CompactRTreeEntry>() - tree.entries.len() * std::mem::size_of::<CompactRTreeEntry>();
}
let hashmap_memory = storage.cell_offsets.len() * (std::mem::size_of::<GridCellId>() + std::mem::size_of::<(u64, u32)>());
let cache_memory = rtree_memory + hashmap_memory;
let mmap_cells = storage.cell_offsets.len();
let total_cells = storage.len();
MemoryStats {
grid_overhead,
rtree_memory: cache_memory,
total_cells,
total_entries: self.len(),
bytes_per_entry: if !self.is_empty() {
(grid_overhead + cache_memory) / self.len()
} else {
0
},
cache_hit_rate: 0.0, mmap_cells,
grid_size: grid.current_grid_size,
}
}
pub fn debug_memory_usage(&self) {
let storage = self.storage.read();
let _grid = self.grid.read();
debug_log!("╭─────────────────────────────────────╮");
debug_log!("│ 空间索引内存详细分析 │");
debug_log!("╰─────────────────────────────────────╯");
let grid_size = std::mem::size_of::<AdaptiveGrid>();
debug_log!(" Grid 结构: {} bytes", grid_size);
let cache_len = storage.hot_cache.len();
let cache_cap = storage.hot_cache.cap().get();
debug_log!("\n Hot Cache:");
debug_log!(" ├─ 容量: {}", cache_cap);
debug_log!(" ├─ 当前: {}", cache_len);
let mut total_entries = 0;
let mut total_vec_mem = 0;
for (_id, tree) in storage.hot_cache.iter() {
total_entries += tree.entries.len();
total_vec_mem += tree.entries.capacity() * std::mem::size_of::<CompactRTreeEntry>();
}
let struct_mem = cache_len * std::mem::size_of::<MiniRTree>();
debug_log!(" ├─ 结构体内存: {:.2} MB", struct_mem as f64 / 1024.0 / 1024.0);
debug_log!(" ├─ Vec 堆内存: {:.2} MB", total_vec_mem as f64 / 1024.0 / 1024.0);
debug_log!(" └─ 总条目数: {}", total_entries);
let hashmap_len = storage.cell_offsets.len();
let hashmap_mem = hashmap_len * (std::mem::size_of::<GridCellId>() + std::mem::size_of::<(u64, u32)>());
debug_log!("\n Cell Offsets HashMap:");
debug_log!(" ├─ 条目数: {}", hashmap_len);
debug_log!(" └─ 内存: {:.2} MB", hashmap_mem as f64 / 1024.0 / 1024.0);
let total_mem = grid_size + struct_mem + total_vec_mem + hashmap_mem;
debug_log!("\n 总计: {:.2} MB", total_mem as f64 / 1024.0 / 1024.0);
debug_log!(" 每条数据: {:.1} bytes\n", total_mem as f64 / self.len() as f64);
}
pub fn save<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let path = path.as_ref();
std::fs::create_dir_all(path)?;
let mut storage = self.storage.write();
storage.flush()?;
let grid = self.grid.read();
let cell_offsets: HashMap<u32, (u64, u32)> = storage.cell_offsets
.iter()
.map(|(k, v)| (k.0, *v))
.collect();
let metadata = SpatialIndexMetadata {
config: self.config.clone(),
grid_size: grid.current_grid_size,
cell_width: grid.cell_width,
cell_height: grid.cell_height,
total_entries: *self.size.read(),
cell_offsets,
next_offset: storage.next_offset,
};
let metadata_path = path.join("metadata.bin");
let data = bincode::serialize(&metadata)
.map_err(|e| StorageError::Serialization(e.to_string()))?;
std::fs::write(metadata_path, data)?;
if let Some(ref mut mmap) = storage.mmap_file {
mmap.flush()?;
}
Ok(())
}
pub fn load<P: AsRef<Path>>(path: P, config: SpatialHybridConfig) -> Result<Self> {
let path = path.as_ref();
let metadata_path = path.join("metadata.bin");
if !metadata_path.exists() {
return Ok(Self::new(config));
}
let data = std::fs::read(metadata_path)?;
let metadata: SpatialIndexMetadata = bincode::deserialize(&data)
.map_err(|e| StorageError::Serialization(e.to_string()))?;
let use_config = SpatialHybridConfig {
data_dir: config.data_dir.or(metadata.config.data_dir),
..metadata.config
};
let index = Self::new(use_config);
{
let mut grid = index.grid.write();
grid.current_grid_size = metadata.grid_size;
grid.cell_width = metadata.cell_width;
grid.cell_height = metadata.cell_height;
}
{
let mut storage = index.storage.write();
storage.cell_offsets = metadata.cell_offsets
.into_iter()
.map(|(k, v)| (GridCellId(k), v))
.collect();
storage.next_offset = metadata.next_offset;
}
*index.size.write() = metadata.total_entries;
Ok(index)
}
}
impl Default for SpatialHybridIndex {
fn default() -> Self {
Self::new_default()
}
}
#[derive(Debug)]
pub struct MemoryStats {
pub grid_overhead: usize,
pub rtree_memory: usize,
pub total_cells: usize,
pub total_entries: usize,
pub bytes_per_entry: usize,
pub cache_hit_rate: f64,
pub mmap_cells: usize,
pub grid_size: usize,
}
#[inline]
fn bbox_min_dist_f32(bbox: &BoundingBoxF32, point: &PointF32) -> f32 {
let dx = if point.x < bbox.min_x {
bbox.min_x - point.x
} else if point.x > bbox.max_x {
point.x - bbox.max_x
} else {
0.0
};
let dy = if point.y < bbox.min_y {
bbox.min_y - point.y
} else if point.y > bbox.max_y {
point.y - bbox.max_y
} else {
0.0
};
(dx * dx + dy * dy).sqrt()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compact_bbox() {
let bbox_f64 = BoundingBox::new(0.0, 0.0, 10.0, 10.0);
let bbox_f32 = BoundingBoxF32::from_f64(&bbox_f64);
assert_eq!(bbox_f32.min_x, 0.0);
assert_eq!(bbox_f32.max_x, 10.0);
assert_eq!(std::mem::size_of::<BoundingBoxF32>(), 16);
}
#[test]
fn test_grid_cell_mapping() {
let config = SpatialHybridConfig::new(BoundingBoxF32::new(0.0, 0.0, 160.0, 160.0))
.with_grid_size(16);
let grid = AdaptiveGrid::new(config);
let cell = grid.get_cell(5.0, 5.0).unwrap();
assert_eq!(cell.row(), 0);
assert_eq!(cell.col(), 0);
let cell = grid.get_cell(159.0, 159.0).unwrap();
assert_eq!(cell.row(), 15);
assert_eq!(cell.col(), 15);
}
#[test]
fn test_hybrid_insert_and_query() {
let config = SpatialHybridConfig::new(BoundingBoxF32::new(0.0, 0.0, 200.0, 200.0))
.with_adaptive(false);
let mut index = SpatialHybridIndex::new(config);
index.insert(1, Geometry::Point(Point::new(10.0, 10.0))).unwrap();
index.insert(2, Geometry::Point(Point::new(20.0, 20.0))).unwrap();
index.insert(3, Geometry::Point(Point::new(90.0, 90.0))).unwrap();
assert_eq!(index.len(), 3);
let bbox = BoundingBox::new(0.0, 0.0, 50.0, 50.0);
let mut results = index.range_query(&bbox);
results.sort();
assert_eq!(results, vec![1, 2]);
}
#[test]
fn test_knn_query() {
let config = SpatialHybridConfig::new(BoundingBoxF32::new(0.0, 0.0, 200.0, 200.0))
.with_adaptive(false);
let mut index = SpatialHybridIndex::new(config);
for i in 0..10 {
let x = (i * 10) as f64;
let y = (i * 10) as f64;
index.insert(i, Geometry::Point(Point::new(x, y))).unwrap();
}
let query_point = Point::new(25.0, 25.0);
let results = index.knn_query(&query_point, 3);
assert!(results.len() > 0);
}
#[test]
fn test_lru_cache() {
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let config = SpatialHybridConfig::new(BoundingBoxF32::new(0.0, 0.0, 200.0, 200.0))
.with_cache_size(10)
.with_adaptive(false)
.with_mmap(true, Some(temp_dir.path().to_path_buf()));
let mut index = SpatialHybridIndex::new(config);
for i in 0..100 {
let x = (i % 10) as f64 * 10.0;
let y = (i / 10) as f64 * 10.0;
index.insert(i, Geometry::Point(Point::new(x, y))).unwrap();
}
assert_eq!(index.len(), 100);
index.flush().unwrap();
let bbox = BoundingBox::new(0.0, 0.0, 30.0, 30.0);
let results = index.range_query(&bbox);
assert!(results.len() > 0, "Should find points in range");
}
#[test]
fn test_adaptive_grid() {
let config = SpatialHybridConfig::new(BoundingBoxF32::new(0.0, 0.0, 1000.0, 1000.0))
.with_grid_size(16)
.with_adaptive(true);
let mut index = SpatialHybridIndex::new(config);
for i in 0..2000 {
let x = (i % 100) as f64 * 10.0;
let y = (i / 100) as f64 * 10.0;
index.insert(i, Geometry::Point(Point::new(x, y))).unwrap();
}
let stats = index.memory_usage();
debug_log!("Adaptive grid size: {}", stats.grid_size);
assert!(stats.grid_size >= 16);
}
#[test]
fn test_memory_efficiency() {
let config = SpatialHybridConfig::new(BoundingBoxF32::new(0.0, 0.0, 2000.0, 2000.0))
.with_adaptive(false);
let mut index = SpatialHybridIndex::new(config);
for i in 0..1000 {
let x = (i % 100) as f64 * 10.0;
let y = (i / 100) as f64 * 10.0;
index.insert(i, Geometry::Point(Point::new(x, y))).unwrap();
}
let stats = index.memory_usage();
debug_log!("Memory stats: {:?}", stats);
let total_memory = stats.grid_overhead + stats.rtree_memory;
assert!(total_memory < 200_000);
assert!(stats.bytes_per_entry < 200);
}
#[test]
fn test_save_and_load() {
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let config = SpatialHybridConfig::new(BoundingBoxF32::new(0.0, 0.0, 100.0, 100.0))
.with_cache_size(10)
.with_adaptive(false)
.with_mmap(true, Some(temp_dir.path().to_path_buf()));
let mut index = SpatialHybridIndex::new(config.clone());
for i in 0..100 {
let x = (i % 10) as f64 * 10.0;
let y = (i / 10) as f64 * 10.0;
index.insert(i, Geometry::Point(Point::new(x, y))).unwrap();
}
assert_eq!(index.len(), 100);
index.flush().unwrap();
let bbox_before = BoundingBox::new(0.0, 0.0, 20.0, 20.0);
let results_before = index.range_query(&bbox_before);
assert!(results_before.len() > 0, "Should find points before save");
let save_path = temp_dir.path().join("index");
index.save(&save_path).unwrap();
assert!(save_path.join("metadata.bin").exists(), "Metadata file should exist");
drop(index);
let config_load = SpatialHybridConfig::new(BoundingBoxF32::new(0.0, 0.0, 100.0, 100.0))
.with_mmap(true, Some(temp_dir.path().to_path_buf()));
let index2 = SpatialHybridIndex::load(&save_path, config_load).unwrap();
assert_eq!(index2.len(), 100, "Should have 100 entries after load");
let bbox_after = BoundingBox::new(0.0, 0.0, 20.0, 20.0);
let results_after = index2.range_query(&bbox_after);
assert!(results_after.len() > 0, "Should find points after load");
assert_eq!(results_before.len(), results_after.len(), "Results should match");
}
}
use crate::index::builder::{IndexBuilder, BuildStats};
use crate::types::{Row, Value, RowId};
impl IndexBuilder for SpatialHybridIndex {
fn build_from_memtable(&mut self, rows: &[(RowId, Row)]) -> Result<()> {
use std::time::Instant;
let start = Instant::now();
let mut geometries: Vec<(u64, Geometry)> = Vec::with_capacity(rows.len());
for (row_id, row) in rows {
for value in row.iter() {
if let Value::Spatial(geom) = value {
geometries.push((*row_id, geom.clone()));
break; }
}
}
if geometries.is_empty() {
return Ok(());
}
debug_log!("[SpatialIndex] Batch building {} geometries", geometries.len());
self.str_bulk_load(&geometries)?;
let duration = start.elapsed();
debug_log!("[SpatialIndex] Batch build complete in {:?}", duration);
Ok(())
}
fn persist(&mut self) -> Result<()> {
use std::time::Instant;
let start = Instant::now();
self.flush()?;
let duration = start.elapsed();
debug_log!("[SpatialIndex] Persist complete in {:?}", duration);
Ok(())
}
fn name(&self) -> &str {
"SpatialHybridIndex"
}
fn stats(&self) -> BuildStats {
let stats = self.memory_usage();
let total_memory = stats.grid_overhead + stats.rtree_memory;
BuildStats {
rows_processed: self.len(),
build_time_ms: 0,
persist_time_ms: 0,
index_size_bytes: total_memory,
}
}
}
impl SpatialHybridIndex {
fn str_bulk_load(&mut self, geometries: &[(u64, Geometry)]) -> Result<()> {
let mut sorted_geoms = geometries.to_vec();
sorted_geoms.sort_by(|a, b| {
let bbox_a = a.1.bounding_box();
let bbox_b = b.1.bounding_box();
let z_a = morton_encode(bbox_a.min_x as f32, bbox_a.min_y as f32);
let z_b = morton_encode(bbox_b.min_x as f32, bbox_b.min_y as f32);
z_a.cmp(&z_b)
});
for (id, geom) in sorted_geoms {
self.insert(id, geom)?;
}
Ok(())
}
}
fn morton_encode(x: f32, y: f32) -> u64 {
let x = (x * 1000.0) as u32; let y = (y * 1000.0) as u32;
let mut z: u64 = 0;
for i in 0..16 {
z |= ((x & (1 << i)) as u64) << i | ((y & (1 << i)) as u64) << (i + 1);
}
z
}