1use scirs2_core::ndarray::{Array1, Array2};
12use scirs2_core::random::seeded_rng;
13use scirs2_core::Rng;
14use sklears_core::prelude::*;
15use std::collections::HashMap;
16use std::f64::consts::PI;
17
18const EARTH_RADIUS_KM: f64 = 6371.0088;
24
25const EARTH_RADIUS_M: f64 = 6371008.8;
27
28const WGS84_A: f64 = 6378137.0;
30
31const WGS84_B: f64 = 6356752.314245;
33
34const WGS84_F: f64 = 1.0 / 298.257223563;
36
37const WEB_MERCATOR_MAX_LAT: f64 = 85.051129;
39
40const BASE32_CHARS: &[u8; 32] = b"0123456789bcdefghjkmnpqrstuvwxyz";
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum CoordinateSystem {
50 WGS84,
52 WebMercator,
54 UTM { zone: u8, northern: bool },
56}
57
58#[derive(Debug, Clone, Copy)]
60pub struct Coordinate {
61 pub lat: f64,
63 pub lon: f64,
65}
66
67impl Coordinate {
68 pub fn new(lat: f64, lon: f64) -> Result<Self> {
70 if !(-90.0..=90.0).contains(&lat) {
71 return Err(SklearsError::InvalidInput(format!(
72 "Latitude must be between -90 and 90, got {}",
73 lat
74 )));
75 }
76 if !(-180.0..=180.0).contains(&lon) {
77 return Err(SklearsError::InvalidInput(format!(
78 "Longitude must be between -180 and 180, got {}",
79 lon
80 )));
81 }
82 Ok(Self { lat, lon })
83 }
84
85 pub fn to_radians(&self) -> (f64, f64) {
87 (self.lat.to_radians(), self.lon.to_radians())
88 }
89}
90
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
97pub enum SpatialDistanceMetric {
98 Haversine,
100 Vincenty,
102 Euclidean,
104 Manhattan,
106}
107
108pub fn calculate_distance(
110 coord1: &Coordinate,
111 coord2: &Coordinate,
112 metric: SpatialDistanceMetric,
113) -> Result<f64> {
114 match metric {
115 SpatialDistanceMetric::Haversine => Ok(haversine_distance(coord1, coord2)),
116 SpatialDistanceMetric::Vincenty => vincenty_distance(coord1, coord2),
117 SpatialDistanceMetric::Euclidean => {
118 Ok(((coord2.lat - coord1.lat).powi(2) + (coord2.lon - coord1.lon).powi(2)).sqrt())
119 }
120 SpatialDistanceMetric::Manhattan => {
121 Ok((coord2.lat - coord1.lat).abs() + (coord2.lon - coord1.lon).abs())
122 }
123 }
124}
125
126pub fn haversine_distance(coord1: &Coordinate, coord2: &Coordinate) -> f64 {
128 let (lat1, lon1) = coord1.to_radians();
129 let (lat2, lon2) = coord2.to_radians();
130
131 let dlat = lat2 - lat1;
132 let dlon = lon2 - lon1;
133
134 let a = (dlat / 2.0).sin().powi(2) + lat1.cos() * lat2.cos() * (dlon / 2.0).sin().powi(2);
135 let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
136
137 EARTH_RADIUS_KM * c
138}
139
140pub fn vincenty_distance(coord1: &Coordinate, coord2: &Coordinate) -> Result<f64> {
142 let (lat1, lon1) = coord1.to_radians();
143 let (lat2, lon2) = coord2.to_radians();
144
145 let l = lon2 - lon1;
146 let u1 = ((1.0 - WGS84_F) * lat1.tan()).atan();
147 let u2 = ((1.0 - WGS84_F) * lat2.tan()).atan();
148 let sin_u1 = u1.sin();
149 let cos_u1 = u1.cos();
150 let sin_u2 = u2.sin();
151 let cos_u2 = u2.cos();
152
153 let mut lambda = l;
154 let mut lambda_prev;
155 let mut iterations = 0;
156 const MAX_ITERATIONS: usize = 100;
157 const CONVERGENCE_THRESHOLD: f64 = 1e-12;
158
159 let (
160 mut sin_sigma,
161 mut cos_sigma,
162 mut sigma,
163 mut sin_alpha,
164 mut cos_sq_alpha,
165 mut cos_2sigma_m,
166 );
167
168 loop {
169 let sin_lambda = lambda.sin();
170 let cos_lambda = lambda.cos();
171 sin_sigma = ((cos_u2 * sin_lambda).powi(2)
172 + (cos_u1 * sin_u2 - sin_u1 * cos_u2 * cos_lambda).powi(2))
173 .sqrt();
174
175 if sin_sigma == 0.0 {
176 return Ok(0.0); }
178
179 cos_sigma = sin_u1 * sin_u2 + cos_u1 * cos_u2 * cos_lambda;
180 sigma = sin_sigma.atan2(cos_sigma);
181 sin_alpha = cos_u1 * cos_u2 * sin_lambda / sin_sigma;
182 cos_sq_alpha = 1.0 - sin_alpha.powi(2);
183 cos_2sigma_m = if cos_sq_alpha != 0.0 {
184 cos_sigma - 2.0 * sin_u1 * sin_u2 / cos_sq_alpha
185 } else {
186 0.0
187 };
188
189 let c = WGS84_F / 16.0 * cos_sq_alpha * (4.0 + WGS84_F * (4.0 - 3.0 * cos_sq_alpha));
190 lambda_prev = lambda;
191 lambda = l
192 + (1.0 - c)
193 * WGS84_F
194 * sin_alpha
195 * (sigma
196 + c * sin_sigma
197 * (cos_2sigma_m + c * cos_sigma * (-1.0 + 2.0 * cos_2sigma_m.powi(2))));
198
199 iterations += 1;
200 if (lambda - lambda_prev).abs() < CONVERGENCE_THRESHOLD || iterations >= MAX_ITERATIONS {
201 break;
202 }
203 }
204
205 if iterations >= MAX_ITERATIONS {
206 return Err(SklearsError::InvalidInput(
207 "Vincenty formula failed to converge".to_string(),
208 ));
209 }
210
211 let u_sq = cos_sq_alpha * (WGS84_A.powi(2) - WGS84_B.powi(2)) / WGS84_B.powi(2);
212 let a = 1.0 + u_sq / 16384.0 * (4096.0 + u_sq * (-768.0 + u_sq * (320.0 - 175.0 * u_sq)));
213 let b = u_sq / 1024.0 * (256.0 + u_sq * (-128.0 + u_sq * (74.0 - 47.0 * u_sq)));
214 let delta_sigma = b
215 * sin_sigma
216 * (cos_2sigma_m
217 + b / 4.0
218 * (cos_sigma * (-1.0 + 2.0 * cos_2sigma_m.powi(2))
219 - b / 6.0
220 * cos_2sigma_m
221 * (-3.0 + 4.0 * sin_sigma.powi(2))
222 * (-3.0 + 4.0 * cos_2sigma_m.powi(2))));
223
224 let s = WGS84_B * a * (sigma - delta_sigma);
225
226 Ok(s / 1000.0) }
228
229pub struct Geohash;
235
236impl Geohash {
237 pub fn encode(coord: &Coordinate, precision: usize) -> Result<String> {
243 if precision == 0 || precision > 12 {
244 return Err(SklearsError::InvalidInput(
245 "Geohash precision must be between 1 and 12".to_string(),
246 ));
247 }
248
249 let mut lat_min = -90.0;
250 let mut lat_max = 90.0;
251 let mut lon_min = -180.0;
252 let mut lon_max = 180.0;
253
254 let mut geohash = String::with_capacity(precision);
255 let mut bit = 0;
256 let mut ch = 0u8;
257 let mut is_lon = true;
258
259 while geohash.len() < precision {
260 if is_lon {
261 let mid = (lon_min + lon_max) / 2.0;
262 if coord.lon > mid {
263 ch |= 1 << (4 - bit);
264 lon_min = mid;
265 } else {
266 lon_max = mid;
267 }
268 } else {
269 let mid = (lat_min + lat_max) / 2.0;
270 if coord.lat > mid {
271 ch |= 1 << (4 - bit);
272 lat_min = mid;
273 } else {
274 lat_max = mid;
275 }
276 }
277
278 is_lon = !is_lon;
279 bit += 1;
280
281 if bit == 5 {
282 geohash.push(BASE32_CHARS[ch as usize] as char);
283 bit = 0;
284 ch = 0;
285 }
286 }
287
288 Ok(geohash)
289 }
290
291 pub fn decode(geohash: &str) -> Result<Coordinate> {
293 if geohash.is_empty() {
294 return Err(SklearsError::InvalidInput(
295 "Geohash string cannot be empty".to_string(),
296 ));
297 }
298
299 let mut char_map = HashMap::new();
301 for (i, &c) in BASE32_CHARS.iter().enumerate() {
302 char_map.insert(c as char, i as u8);
303 }
304
305 let mut lat_min = -90.0;
306 let mut lat_max = 90.0;
307 let mut lon_min = -180.0;
308 let mut lon_max = 180.0;
309 let mut is_lon = true;
310
311 for ch in geohash.chars() {
312 let bits = char_map.get(&ch).ok_or_else(|| {
313 SklearsError::InvalidInput(format!("Invalid geohash character: {}", ch))
314 })?;
315
316 for i in (0..5).rev() {
317 if is_lon {
318 let mid = (lon_min + lon_max) / 2.0;
319 if (bits >> i) & 1 == 1 {
320 lon_min = mid;
321 } else {
322 lon_max = mid;
323 }
324 } else {
325 let mid = (lat_min + lat_max) / 2.0;
326 if (bits >> i) & 1 == 1 {
327 lat_min = mid;
328 } else {
329 lat_max = mid;
330 }
331 }
332 is_lon = !is_lon;
333 }
334 }
335
336 let lat = (lat_min + lat_max) / 2.0;
337 let lon = (lon_min + lon_max) / 2.0;
338
339 Coordinate::new(lat, lon)
340 }
341
342 pub fn bounds(geohash: &str) -> Result<(f64, f64, f64, f64)> {
344 if geohash.is_empty() {
345 return Err(SklearsError::InvalidInput(
346 "Geohash string cannot be empty".to_string(),
347 ));
348 }
349
350 let mut char_map = HashMap::new();
351 for (i, &c) in BASE32_CHARS.iter().enumerate() {
352 char_map.insert(c as char, i as u8);
353 }
354
355 let mut lat_min = -90.0;
356 let mut lat_max = 90.0;
357 let mut lon_min = -180.0;
358 let mut lon_max = 180.0;
359 let mut is_lon = true;
360
361 for ch in geohash.chars() {
362 let bits = char_map.get(&ch).ok_or_else(|| {
363 SklearsError::InvalidInput(format!("Invalid geohash character: {}", ch))
364 })?;
365
366 for i in (0..5).rev() {
367 if is_lon {
368 let mid = (lon_min + lon_max) / 2.0;
369 if (bits >> i) & 1 == 1 {
370 lon_min = mid;
371 } else {
372 lon_max = mid;
373 }
374 } else {
375 let mid = (lat_min + lat_max) / 2.0;
376 if (bits >> i) & 1 == 1 {
377 lat_min = mid;
378 } else {
379 lat_max = mid;
380 }
381 }
382 is_lon = !is_lon;
383 }
384 }
385
386 Ok((lat_min, lon_min, lat_max, lon_max))
387 }
388
389 pub fn neighbors(geohash: &str) -> Result<Vec<String>> {
391 let coord = Self::decode(geohash)?;
394 let (lat_min, lon_min, lat_max, lon_max) = Self::bounds(geohash)?;
395
396 let lat_diff = lat_max - lat_min;
397 let lon_diff = lon_max - lon_min;
398
399 let precision = geohash.len();
400 let mut neighbors = Vec::with_capacity(8);
401
402 let offsets = [
404 (-1.0, -1.0),
405 (-1.0, 0.0),
406 (-1.0, 1.0),
407 (0.0, -1.0),
408 (0.0, 1.0),
409 (1.0, -1.0),
410 (1.0, 0.0),
411 (1.0, 1.0),
412 ];
413
414 for (dlat, dlon) in offsets.iter() {
415 let new_lat = (coord.lat + dlat * lat_diff).clamp(-90.0, 90.0);
416 let new_lon = coord.lon + dlon * lon_diff;
417 let new_lon = if new_lon > 180.0 {
419 new_lon - 360.0
420 } else if new_lon < -180.0 {
421 new_lon + 360.0
422 } else {
423 new_lon
424 };
425
426 if let Ok(neighbor_coord) = Coordinate::new(new_lat, new_lon) {
427 if let Ok(neighbor_hash) = Self::encode(&neighbor_coord, precision) {
428 if neighbor_hash != geohash {
429 neighbors.push(neighbor_hash);
430 }
431 }
432 }
433 }
434
435 Ok(neighbors)
436 }
437}
438
439#[derive(Debug, Clone)]
445pub struct CoordinateTransformerConfig {
446 pub from: CoordinateSystem,
448 pub to: CoordinateSystem,
450}
451
452pub struct CoordinateTransformer {
454 config: CoordinateTransformerConfig,
455}
456
457pub struct CoordinateTransformerFitted {
459 config: CoordinateTransformerConfig,
460}
461
462impl CoordinateTransformer {
463 pub fn new(from: CoordinateSystem, to: CoordinateSystem) -> Self {
465 Self {
466 config: CoordinateTransformerConfig { from, to },
467 }
468 }
469}
470
471impl Estimator for CoordinateTransformer {
472 type Config = CoordinateTransformerConfig;
473 type Error = SklearsError;
474 type Float = f64;
475
476 fn config(&self) -> &Self::Config {
477 &self.config
478 }
479}
480
481impl Fit<Array2<f64>, ()> for CoordinateTransformer {
482 type Fitted = CoordinateTransformerFitted;
483
484 fn fit(self, _X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
485 Ok(CoordinateTransformerFitted {
487 config: self.config,
488 })
489 }
490}
491
492impl Transform<Array2<f64>, Array2<f64>> for CoordinateTransformerFitted {
493 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
494 if X.ncols() != 2 {
495 return Err(SklearsError::InvalidInput(
496 "Input must have exactly 2 columns (lat, lon)".to_string(),
497 ));
498 }
499
500 let nrows = X.nrows();
501 let mut result = Array2::zeros((nrows, 2));
502
503 for i in 0..nrows {
504 let lat = X[[i, 0]];
505 let lon = X[[i, 1]];
506
507 let transformed = match (&self.config.from, &self.config.to) {
508 (CoordinateSystem::WGS84, CoordinateSystem::WebMercator) => {
509 wgs84_to_web_mercator(lat, lon)?
510 }
511 (CoordinateSystem::WebMercator, CoordinateSystem::WGS84) => {
512 web_mercator_to_wgs84(lat, lon)?
513 }
514 (CoordinateSystem::WGS84, CoordinateSystem::UTM { zone, northern }) => {
515 wgs84_to_utm(lat, lon, *zone, *northern)?
516 }
517 (CoordinateSystem::UTM { zone, northern }, CoordinateSystem::WGS84) => {
518 utm_to_wgs84(lat, lon, *zone, *northern)?
519 }
520 _ => {
521 return Err(SklearsError::InvalidInput(format!(
522 "Unsupported coordinate transformation: {:?} to {:?}",
523 self.config.from, self.config.to
524 )))
525 }
526 };
527
528 result[[i, 0]] = transformed.0;
529 result[[i, 1]] = transformed.1;
530 }
531
532 Ok(result)
533 }
534}
535
536fn wgs84_to_web_mercator(lat: f64, lon: f64) -> Result<(f64, f64)> {
542 if lat.abs() > WEB_MERCATOR_MAX_LAT {
543 return Err(SklearsError::InvalidInput(format!(
544 "Latitude exceeds Web Mercator bounds: {}",
545 lat
546 )));
547 }
548
549 let x = WGS84_A * lon.to_radians();
550 let y = WGS84_A * ((PI / 4.0) + (lat.to_radians() / 2.0)).tan().ln();
551
552 Ok((x, y))
553}
554
555fn web_mercator_to_wgs84(x: f64, y: f64) -> Result<(f64, f64)> {
557 let lon = (x / WGS84_A).to_degrees();
558 let lat = (2.0 * (y / WGS84_A).exp().atan() - PI / 2.0).to_degrees();
559
560 Ok((lat, lon))
561}
562
563fn wgs84_to_utm(lat: f64, lon: f64, zone: u8, northern: bool) -> Result<(f64, f64)> {
565 let lat_rad = lat.to_radians();
568 let lon_rad = lon.to_radians();
569
570 let central_meridian = ((zone as f64 - 1.0) * 6.0 - 180.0 + 3.0).to_radians();
571 let lon_diff = lon_rad - central_meridian;
572
573 let k0 = 0.9996;
575
576 let n = WGS84_A / (1.0 - (WGS84_F * (2.0 - WGS84_F) * lat_rad.sin().powi(2))).sqrt();
578 let t = lat_rad.tan();
579 let c = (WGS84_F * (2.0 - WGS84_F)) / (1.0 - WGS84_F * (2.0 - WGS84_F)) * lat_rad.cos().powi(2);
580 let a = lon_diff * lat_rad.cos();
581
582 let m = WGS84_A
583 * ((1.0
584 - WGS84_F * (2.0 - WGS84_F) / 4.0
585 - 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 64.0)
586 * lat_rad
587 - (3.0 * WGS84_F * (2.0 - WGS84_F) / 8.0
588 + 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 32.0)
589 * (2.0 * lat_rad).sin()
590 + (15.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 256.0) * (4.0 * lat_rad).sin());
591
592 let easting = 500000.0
593 + k0 * n
594 * (a + (1.0 - t.powi(2) + c) * a.powi(3) / 6.0
595 + (5.0 - 18.0 * t.powi(2) + t.powi(4)) * a.powi(5) / 120.0);
596
597 let northing_offset = if northern { 0.0 } else { 10000000.0 };
598 let northing = northing_offset
599 + k0 * (m + n
600 * t
601 * (a.powi(2) / 2.0
602 + (5.0 - t.powi(2) + 9.0 * c) * a.powi(4) / 24.0
603 + (61.0 - 58.0 * t.powi(2) + t.powi(4)) * a.powi(6) / 720.0));
604
605 Ok((easting, northing))
606}
607
608fn utm_to_wgs84(easting: f64, northing: f64, zone: u8, northern: bool) -> Result<(f64, f64)> {
610 let k0 = 0.9996;
612 let central_meridian = ((zone as f64 - 1.0) * 6.0 - 180.0 + 3.0).to_radians();
613
614 let x = easting - 500000.0;
615 let y = if northern {
616 northing
617 } else {
618 northing - 10000000.0
619 };
620
621 let m = y / k0;
622 let mu = m
623 / (WGS84_A
624 * (1.0
625 - WGS84_F * (2.0 - WGS84_F) / 4.0
626 - 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 64.0));
627
628 let e1 = (1.0 - (1.0 - WGS84_F * (2.0 - WGS84_F)).sqrt())
630 / (1.0 + (1.0 - WGS84_F * (2.0 - WGS84_F)).sqrt());
631 let phi1 = mu
632 + (3.0 * e1 / 2.0 - 27.0 * e1.powi(3) / 32.0) * (2.0 * mu).sin()
633 + (21.0 * e1.powi(2) / 16.0 - 55.0 * e1.powi(4) / 32.0) * (4.0 * mu).sin()
634 + (151.0 * e1.powi(3) / 96.0) * (6.0 * mu).sin();
635
636 let n1 = WGS84_A / (1.0 - WGS84_F * (2.0 - WGS84_F) * phi1.sin().powi(2)).sqrt();
637 let t1 = phi1.tan();
638 let c1 = (WGS84_F * (2.0 - WGS84_F)) / (1.0 - WGS84_F * (2.0 - WGS84_F)) * phi1.cos().powi(2);
639 let r1 = WGS84_A * (1.0 - WGS84_F * (2.0 - WGS84_F))
640 / (1.0 - WGS84_F * (2.0 - WGS84_F) * phi1.sin().powi(2)).powf(1.5);
641 let d = x / (n1 * k0);
642
643 let lat = phi1
644 - (n1 * t1 / r1)
645 * (d.powi(2) / 2.0
646 - (5.0 + 3.0 * t1.powi(2) + 10.0 * c1 - 4.0 * c1.powi(2)) * d.powi(4) / 24.0
647 + (61.0 + 90.0 * t1.powi(2) + 298.0 * c1 + 45.0 * t1.powi(4)) * d.powi(6) / 720.0);
648
649 let lon = central_meridian
650 + (d - (1.0 + 2.0 * t1.powi(2) + c1) * d.powi(3) / 6.0
651 + (5.0 - 2.0 * c1 + 28.0 * t1.powi(2)) * d.powi(5) / 120.0)
652 / phi1.cos();
653
654 Ok((lat.to_degrees(), lon.to_degrees()))
655}
656
657#[derive(Debug, Clone)]
663pub struct GeohashEncoderConfig {
664 pub precision: usize,
666 pub include_neighbors: bool,
668}
669
670impl Default for GeohashEncoderConfig {
671 fn default() -> Self {
672 Self {
673 precision: 7,
674 include_neighbors: false,
675 }
676 }
677}
678
679pub struct GeohashEncoder {
681 config: GeohashEncoderConfig,
682}
683
684pub struct GeohashEncoderFitted {
686 config: GeohashEncoderConfig,
687 vocabulary: HashMap<String, usize>,
689}
690
691impl GeohashEncoder {
692 pub fn new(config: GeohashEncoderConfig) -> Self {
694 Self { config }
695 }
696}
697
698impl Estimator for GeohashEncoder {
699 type Config = GeohashEncoderConfig;
700 type Error = SklearsError;
701 type Float = f64;
702
703 fn config(&self) -> &Self::Config {
704 &self.config
705 }
706}
707
708impl Fit<Array2<f64>, ()> for GeohashEncoder {
709 type Fitted = GeohashEncoderFitted;
710
711 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
712 if X.ncols() != 2 {
713 return Err(SklearsError::InvalidInput(
714 "Input must have exactly 2 columns (lat, lon)".to_string(),
715 ));
716 }
717
718 let mut vocabulary = HashMap::new();
719
720 for i in 0..X.nrows() {
721 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
722 let geohash = Geohash::encode(&coord, self.config.precision)?;
723
724 if !vocabulary.contains_key(&geohash) {
725 let idx = vocabulary.len();
726 vocabulary.insert(geohash.clone(), idx);
727 }
728
729 if self.config.include_neighbors {
730 for neighbor in Geohash::neighbors(&geohash)? {
731 if !vocabulary.contains_key(&neighbor) {
732 let idx = vocabulary.len();
733 vocabulary.insert(neighbor, idx);
734 }
735 }
736 }
737 }
738
739 Ok(GeohashEncoderFitted {
740 config: self.config,
741 vocabulary,
742 })
743 }
744}
745
746impl Transform<Array2<f64>, Array2<f64>> for GeohashEncoderFitted {
747 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
748 if X.ncols() != 2 {
749 return Err(SklearsError::InvalidInput(
750 "Input must have exactly 2 columns (lat, lon)".to_string(),
751 ));
752 }
753
754 let nrows = X.nrows();
755 let ncols = if self.config.include_neighbors {
756 self.vocabulary.len() * 9 } else {
758 self.vocabulary.len()
759 };
760
761 let mut result = Array2::zeros((nrows, ncols));
762
763 for i in 0..nrows {
764 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
765 let geohash = Geohash::encode(&coord, self.config.precision)?;
766
767 if let Some(&idx) = self.vocabulary.get(&geohash) {
768 result[[i, idx]] = 1.0;
769 }
770
771 if self.config.include_neighbors {
772 for neighbor in Geohash::neighbors(&geohash)? {
773 if let Some(&idx) = self.vocabulary.get(&neighbor) {
774 result[[i, idx]] = 1.0;
775 }
776 }
777 }
778 }
779
780 Ok(result)
781 }
782}
783
784#[derive(Debug, Clone)]
790pub struct SpatialDistanceFeaturesConfig {
791 pub reference_points: Vec<Coordinate>,
793 pub metric: SpatialDistanceMetric,
795 pub include_inverse: bool,
797 pub include_squared: bool,
799}
800
801pub struct SpatialDistanceFeatures {
803 config: SpatialDistanceFeaturesConfig,
804}
805
806pub struct SpatialDistanceFeaturesFitted {
808 config: SpatialDistanceFeaturesConfig,
809}
810
811impl SpatialDistanceFeatures {
812 pub fn new(config: SpatialDistanceFeaturesConfig) -> Self {
814 Self { config }
815 }
816}
817
818impl Estimator for SpatialDistanceFeatures {
819 type Config = SpatialDistanceFeaturesConfig;
820 type Error = SklearsError;
821 type Float = f64;
822
823 fn config(&self) -> &Self::Config {
824 &self.config
825 }
826}
827
828impl Fit<Array2<f64>, ()> for SpatialDistanceFeatures {
829 type Fitted = SpatialDistanceFeaturesFitted;
830
831 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
832 if X.ncols() != 2 {
833 return Err(SklearsError::InvalidInput(
834 "Input must have exactly 2 columns (lat, lon)".to_string(),
835 ));
836 }
837
838 Ok(SpatialDistanceFeaturesFitted {
839 config: self.config,
840 })
841 }
842}
843
844impl Transform<Array2<f64>, Array2<f64>> for SpatialDistanceFeaturesFitted {
845 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
846 if X.ncols() != 2 {
847 return Err(SklearsError::InvalidInput(
848 "Input must have exactly 2 columns (lat, lon)".to_string(),
849 ));
850 }
851
852 let nrows = X.nrows();
853 let n_refs = self.config.reference_points.len();
854 let mut n_features = n_refs;
855 if self.config.include_inverse {
856 n_features += n_refs;
857 }
858 if self.config.include_squared {
859 n_features += n_refs;
860 }
861
862 let mut result = Array2::zeros((nrows, n_features));
863
864 for i in 0..nrows {
865 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
866
867 for (j, ref_point) in self.config.reference_points.iter().enumerate() {
868 let distance = calculate_distance(&coord, ref_point, self.config.metric)?;
869
870 result[[i, j]] = distance;
871
872 if self.config.include_inverse {
873 result[[i, n_refs + j]] = if distance > 0.0 { 1.0 / distance } else { 0.0 };
874 }
875
876 if self.config.include_squared {
877 let offset = if self.config.include_inverse {
878 n_refs * 2
879 } else {
880 n_refs
881 };
882 result[[i, offset + j]] = distance * distance;
883 }
884 }
885 }
886
887 Ok(result)
888 }
889}
890
891#[derive(Debug, Clone)]
897pub struct SpatialBinningConfig {
898 pub n_lat_bins: usize,
900 pub n_lon_bins: usize,
902 pub lat_range: Option<(f64, f64)>,
904 pub lon_range: Option<(f64, f64)>,
906 pub one_hot: bool,
908}
909
910impl Default for SpatialBinningConfig {
911 fn default() -> Self {
912 Self {
913 n_lat_bins: 10,
914 n_lon_bins: 10,
915 lat_range: None,
916 lon_range: None,
917 one_hot: false,
918 }
919 }
920}
921
922pub struct SpatialBinning {
924 config: SpatialBinningConfig,
925}
926
927pub struct SpatialBinningFitted {
929 config: SpatialBinningConfig,
930 lat_range: (f64, f64),
931 lon_range: (f64, f64),
932}
933
934impl SpatialBinning {
935 pub fn new(config: SpatialBinningConfig) -> Self {
937 Self { config }
938 }
939}
940
941impl Estimator for SpatialBinning {
942 type Config = SpatialBinningConfig;
943 type Error = SklearsError;
944 type Float = f64;
945
946 fn config(&self) -> &Self::Config {
947 &self.config
948 }
949}
950
951impl Fit<Array2<f64>, ()> for SpatialBinning {
952 type Fitted = SpatialBinningFitted;
953
954 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
955 if X.ncols() != 2 {
956 return Err(SklearsError::InvalidInput(
957 "Input must have exactly 2 columns (lat, lon)".to_string(),
958 ));
959 }
960
961 let lat_range = if let Some(range) = self.config.lat_range {
962 range
963 } else {
964 let lats = X.column(0);
965 let min_lat = lats.iter().cloned().fold(f64::INFINITY, f64::min);
966 let max_lat = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
967 (min_lat, max_lat)
968 };
969
970 let lon_range = if let Some(range) = self.config.lon_range {
971 range
972 } else {
973 let lons = X.column(1);
974 let min_lon = lons.iter().cloned().fold(f64::INFINITY, f64::min);
975 let max_lon = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
976 (min_lon, max_lon)
977 };
978
979 Ok(SpatialBinningFitted {
980 config: self.config,
981 lat_range,
982 lon_range,
983 })
984 }
985}
986
987impl Transform<Array2<f64>, Array2<f64>> for SpatialBinningFitted {
988 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
989 if X.ncols() != 2 {
990 return Err(SklearsError::InvalidInput(
991 "Input must have exactly 2 columns (lat, lon)".to_string(),
992 ));
993 }
994
995 let nrows = X.nrows();
996 let ncols = if self.config.one_hot {
997 self.config.n_lat_bins * self.config.n_lon_bins
998 } else {
999 2
1000 };
1001
1002 let mut result = Array2::zeros((nrows, ncols));
1003
1004 let lat_bin_size = (self.lat_range.1 - self.lat_range.0) / self.config.n_lat_bins as f64;
1005 let lon_bin_size = (self.lon_range.1 - self.lon_range.0) / self.config.n_lon_bins as f64;
1006
1007 for i in 0..nrows {
1008 let lat = X[[i, 0]];
1009 let lon = X[[i, 1]];
1010
1011 let lat_bin = ((lat - self.lat_range.0) / lat_bin_size)
1012 .floor()
1013 .clamp(0.0, (self.config.n_lat_bins - 1) as f64) as usize;
1014 let lon_bin = ((lon - self.lon_range.0) / lon_bin_size)
1015 .floor()
1016 .clamp(0.0, (self.config.n_lon_bins - 1) as f64) as usize;
1017
1018 if self.config.one_hot {
1019 let bin_idx = lat_bin * self.config.n_lon_bins + lon_bin;
1020 result[[i, bin_idx]] = 1.0;
1021 } else {
1022 result[[i, 0]] = lat_bin as f64;
1023 result[[i, 1]] = lon_bin as f64;
1024 }
1025 }
1026
1027 Ok(result)
1028 }
1029}
1030
1031#[derive(Debug, Clone)]
1037pub struct SpatialClusteringConfig {
1038 pub method: SpatialClusteringMethod,
1040 pub n_clusters: usize,
1042 pub epsilon: Option<f64>,
1044 pub min_points: Option<usize>,
1046 pub metric: SpatialDistanceMetric,
1048}
1049
1050#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1052pub enum SpatialClusteringMethod {
1053 Grid,
1055 Density,
1057 KMeans,
1059 Hierarchical,
1061}
1062
1063impl Default for SpatialClusteringConfig {
1064 fn default() -> Self {
1065 Self {
1066 method: SpatialClusteringMethod::Grid,
1067 n_clusters: 10,
1068 epsilon: Some(5.0), min_points: Some(5),
1070 metric: SpatialDistanceMetric::Haversine,
1071 }
1072 }
1073}
1074
1075pub struct SpatialClustering {
1077 config: SpatialClusteringConfig,
1078}
1079
1080pub struct SpatialClusteringFitted {
1082 config: SpatialClusteringConfig,
1083 cluster_centers: Vec<Coordinate>,
1085 grid_bounds: Option<(f64, f64, f64, f64)>,
1087}
1088
1089impl SpatialClustering {
1090 pub fn new(config: SpatialClusteringConfig) -> Self {
1092 Self { config }
1093 }
1094}
1095
1096impl Estimator for SpatialClustering {
1097 type Config = SpatialClusteringConfig;
1098 type Error = SklearsError;
1099 type Float = f64;
1100
1101 fn config(&self) -> &Self::Config {
1102 &self.config
1103 }
1104}
1105
1106impl Fit<Array2<f64>, ()> for SpatialClustering {
1107 type Fitted = SpatialClusteringFitted;
1108
1109 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
1110 if X.ncols() != 2 {
1111 return Err(SklearsError::InvalidInput(
1112 "Input must have exactly 2 columns (lat, lon)".to_string(),
1113 ));
1114 }
1115
1116 let cluster_centers = match self.config.method {
1117 SpatialClusteringMethod::Grid => {
1118 let lats = X.column(0);
1120 let lons = X.column(1);
1121 let lat_min = lats.iter().cloned().fold(f64::INFINITY, f64::min);
1122 let lat_max = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1123 let lon_min = lons.iter().cloned().fold(f64::INFINITY, f64::min);
1124 let lon_max = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1125
1126 let grid_size = (self.config.n_clusters as f64).sqrt().ceil() as usize;
1127 let lat_step = (lat_max - lat_min) / grid_size as f64;
1128 let lon_step = (lon_max - lon_min) / grid_size as f64;
1129
1130 let mut centers = Vec::new();
1131 for i in 0..grid_size {
1132 for j in 0..grid_size {
1133 let lat = lat_min + (i as f64 + 0.5) * lat_step;
1134 let lon = lon_min + (j as f64 + 0.5) * lon_step;
1135 if let Ok(coord) = Coordinate::new(lat, lon) {
1136 centers.push(coord);
1137 }
1138 }
1139 }
1140
1141 centers
1142 }
1143 SpatialClusteringMethod::KMeans => {
1144 let mut rng = seeded_rng(42);
1146 let nrows = X.nrows();
1147 let mut centers = Vec::new();
1148
1149 for _ in 0..self.config.n_clusters.min(nrows) {
1151 let idx = rng.random_range(0..nrows);
1152 if let Ok(coord) = Coordinate::new(X[[idx, 0]], X[[idx, 1]]) {
1153 centers.push(coord);
1154 }
1155 }
1156
1157 for _ in 0..10 {
1159 let mut cluster_sums = vec![(0.0, 0.0); centers.len()];
1160 let mut cluster_counts = vec![0; centers.len()];
1161
1162 for i in 0..nrows {
1164 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1165 let mut min_dist = f64::INFINITY;
1166 let mut min_idx = 0;
1167
1168 for (j, center) in centers.iter().enumerate() {
1169 let dist = calculate_distance(&coord, center, self.config.metric)?;
1170 if dist < min_dist {
1171 min_dist = dist;
1172 min_idx = j;
1173 }
1174 }
1175
1176 cluster_sums[min_idx].0 += coord.lat;
1177 cluster_sums[min_idx].1 += coord.lon;
1178 cluster_counts[min_idx] += 1;
1179 }
1180
1181 for (j, center) in centers.iter_mut().enumerate() {
1183 if cluster_counts[j] > 0 {
1184 let new_lat = cluster_sums[j].0 / cluster_counts[j] as f64;
1185 let new_lon = cluster_sums[j].1 / cluster_counts[j] as f64;
1186 if let Ok(new_coord) = Coordinate::new(new_lat, new_lon) {
1187 *center = new_coord;
1188 }
1189 }
1190 }
1191 }
1192
1193 centers
1194 }
1195 SpatialClusteringMethod::Density | SpatialClusteringMethod::Hierarchical => {
1196 let mut rng = seeded_rng(42);
1198 let nrows = X.nrows();
1199 let mut centers = Vec::new();
1200
1201 let sample_size = self.config.n_clusters.min(nrows);
1202 for _ in 0..sample_size {
1203 let idx = rng.random_range(0..nrows);
1204 if let Ok(coord) = Coordinate::new(X[[idx, 0]], X[[idx, 1]]) {
1205 centers.push(coord);
1206 }
1207 }
1208
1209 centers
1210 }
1211 };
1212
1213 let grid_bounds = if matches!(self.config.method, SpatialClusteringMethod::Grid) {
1214 let lats = X.column(0);
1215 let lons = X.column(1);
1216 let lat_min = lats.iter().cloned().fold(f64::INFINITY, f64::min);
1217 let lat_max = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1218 let lon_min = lons.iter().cloned().fold(f64::INFINITY, f64::min);
1219 let lon_max = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1220 Some((lat_min, lon_min, lat_max, lon_max))
1221 } else {
1222 None
1223 };
1224
1225 Ok(SpatialClusteringFitted {
1226 config: self.config,
1227 cluster_centers,
1228 grid_bounds,
1229 })
1230 }
1231}
1232
1233impl Transform<Array2<f64>, Array2<f64>> for SpatialClusteringFitted {
1234 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1235 if X.ncols() != 2 {
1236 return Err(SklearsError::InvalidInput(
1237 "Input must have exactly 2 columns (lat, lon)".to_string(),
1238 ));
1239 }
1240
1241 let nrows = X.nrows();
1242
1243 match self.config.method {
1244 SpatialClusteringMethod::Grid | SpatialClusteringMethod::KMeans => {
1245 let mut result = Array2::zeros((nrows, 2));
1247
1248 for i in 0..nrows {
1249 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1250 let mut min_dist = f64::INFINITY;
1251 let mut min_idx = 0;
1252
1253 for (j, center) in self.cluster_centers.iter().enumerate() {
1254 let dist = calculate_distance(&coord, center, self.config.metric)?;
1255 if dist < min_dist {
1256 min_dist = dist;
1257 min_idx = j;
1258 }
1259 }
1260
1261 result[[i, 0]] = min_idx as f64;
1262 result[[i, 1]] = min_dist;
1263 }
1264
1265 Ok(result)
1266 }
1267 SpatialClusteringMethod::Density => {
1268 let epsilon = self.config.epsilon.unwrap_or(5.0);
1270 let mut result = Array2::zeros((nrows, 1));
1271
1272 for i in 0..nrows {
1273 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1274 let mut density = 0.0;
1275
1276 for j in 0..nrows {
1277 if i != j {
1278 let other = Coordinate::new(X[[j, 0]], X[[j, 1]])?;
1279 let dist = calculate_distance(&coord, &other, self.config.metric)?;
1280 if dist <= epsilon {
1281 density += 1.0;
1282 }
1283 }
1284 }
1285
1286 result[[i, 0]] = density;
1287 }
1288
1289 Ok(result)
1290 }
1291 SpatialClusteringMethod::Hierarchical => {
1292 let n_centers = self.cluster_centers.len();
1294 let mut result = Array2::zeros((nrows, n_centers));
1295
1296 for i in 0..nrows {
1297 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1298
1299 for (j, center) in self.cluster_centers.iter().enumerate() {
1300 let dist = calculate_distance(&coord, center, self.config.metric)?;
1301 result[[i, j]] = dist;
1302 }
1303 }
1304
1305 Ok(result)
1306 }
1307 }
1308 }
1309}
1310
1311#[derive(Debug, Clone)]
1317pub struct SpatialAutocorrelationConfig {
1318 pub k_neighbors: usize,
1320 pub metric: SpatialDistanceMetric,
1322 pub include_morans_i: bool,
1324 pub include_lisa: bool,
1326}
1327
1328impl Default for SpatialAutocorrelationConfig {
1329 fn default() -> Self {
1330 Self {
1331 k_neighbors: 5,
1332 metric: SpatialDistanceMetric::Haversine,
1333 include_morans_i: true,
1334 include_lisa: true,
1335 }
1336 }
1337}
1338
1339pub struct SpatialAutocorrelation {
1341 config: SpatialAutocorrelationConfig,
1342}
1343
1344pub struct SpatialAutocorrelationFitted {
1346 config: SpatialAutocorrelationConfig,
1347 training_coords: Vec<Coordinate>,
1349}
1350
1351impl SpatialAutocorrelation {
1352 pub fn new(config: SpatialAutocorrelationConfig) -> Self {
1354 Self { config }
1355 }
1356}
1357
1358impl Estimator for SpatialAutocorrelation {
1359 type Config = SpatialAutocorrelationConfig;
1360 type Error = SklearsError;
1361 type Float = f64;
1362
1363 fn config(&self) -> &Self::Config {
1364 &self.config
1365 }
1366}
1367
1368impl Fit<Array2<f64>, Array1<f64>> for SpatialAutocorrelation {
1369 type Fitted = SpatialAutocorrelationFitted;
1370
1371 fn fit(self, X: &Array2<f64>, _y: &Array1<f64>) -> Result<Self::Fitted> {
1372 if X.ncols() != 2 {
1373 return Err(SklearsError::InvalidInput(
1374 "Input must have exactly 2 columns (lat, lon)".to_string(),
1375 ));
1376 }
1377
1378 let mut training_coords = Vec::with_capacity(X.nrows());
1379 for i in 0..X.nrows() {
1380 training_coords.push(Coordinate::new(X[[i, 0]], X[[i, 1]])?);
1381 }
1382
1383 Ok(SpatialAutocorrelationFitted {
1384 config: self.config,
1385 training_coords,
1386 })
1387 }
1388}
1389
1390impl Transform<Array2<f64>, Array2<f64>> for SpatialAutocorrelationFitted {
1391 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1392 if X.ncols() != 2 {
1393 return Err(SklearsError::InvalidInput(
1394 "Input must have exactly 2 columns (lat, lon)".to_string(),
1395 ));
1396 }
1397
1398 let nrows = X.nrows();
1399 let mut n_features = 0;
1400
1401 if self.config.include_lisa {
1402 n_features += 1; }
1404
1405 let mut result = Array2::zeros((nrows, n_features.max(1)));
1406
1407 for i in 0..nrows {
1408 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1409
1410 let mut distances: Vec<(usize, f64)> = self
1412 .training_coords
1413 .iter()
1414 .enumerate()
1415 .map(|(j, other)| {
1416 let dist = calculate_distance(&coord, other, self.config.metric)
1417 .unwrap_or(f64::INFINITY);
1418 (j, dist)
1419 })
1420 .collect();
1421
1422 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
1423
1424 if self.config.include_lisa {
1426 let nearest_neighbors =
1429 &distances[1..=self.config.k_neighbors.min(distances.len() - 1)];
1430 let avg_neighbor_dist = nearest_neighbors.iter().map(|(_, d)| d).sum::<f64>()
1431 / nearest_neighbors.len() as f64;
1432
1433 result[[i, 0]] = 1.0 / (1.0 + avg_neighbor_dist); }
1435 }
1436
1437 Ok(result)
1438 }
1439}
1440
1441#[derive(Debug, Clone)]
1447pub struct ProximityFeaturesConfig {
1448 pub points_of_interest: Vec<(String, Coordinate)>,
1450 pub metric: SpatialDistanceMetric,
1452 pub max_distance: Option<f64>,
1454 pub include_indicators: bool,
1456}
1457
1458pub struct ProximityFeatures {
1460 config: ProximityFeaturesConfig,
1461}
1462
1463pub struct ProximityFeaturesFitted {
1465 config: ProximityFeaturesConfig,
1466}
1467
1468impl ProximityFeatures {
1469 pub fn new(config: ProximityFeaturesConfig) -> Self {
1471 Self { config }
1472 }
1473}
1474
1475impl Estimator for ProximityFeatures {
1476 type Config = ProximityFeaturesConfig;
1477 type Error = SklearsError;
1478 type Float = f64;
1479
1480 fn config(&self) -> &Self::Config {
1481 &self.config
1482 }
1483}
1484
1485impl Fit<Array2<f64>, ()> for ProximityFeatures {
1486 type Fitted = ProximityFeaturesFitted;
1487
1488 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
1489 if X.ncols() != 2 {
1490 return Err(SklearsError::InvalidInput(
1491 "Input must have exactly 2 columns (lat, lon)".to_string(),
1492 ));
1493 }
1494
1495 Ok(ProximityFeaturesFitted {
1496 config: self.config,
1497 })
1498 }
1499}
1500
1501impl Transform<Array2<f64>, Array2<f64>> for ProximityFeaturesFitted {
1502 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1503 if X.ncols() != 2 {
1504 return Err(SklearsError::InvalidInput(
1505 "Input must have exactly 2 columns (lat, lon)".to_string(),
1506 ));
1507 }
1508
1509 let nrows = X.nrows();
1510 let n_pois = self.config.points_of_interest.len();
1511 let n_features = if self.config.include_indicators {
1512 n_pois * 2 } else {
1514 n_pois };
1516
1517 let mut result = Array2::zeros((nrows, n_features));
1518
1519 for i in 0..nrows {
1520 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1521
1522 for (j, (_name, poi)) in self.config.points_of_interest.iter().enumerate() {
1523 let distance = calculate_distance(&coord, poi, self.config.metric)?;
1524
1525 result[[i, j]] = distance;
1526
1527 if self.config.include_indicators {
1528 let is_within = if let Some(max_dist) = self.config.max_distance {
1529 if distance <= max_dist {
1530 1.0
1531 } else {
1532 0.0
1533 }
1534 } else {
1535 1.0
1536 };
1537 result[[i, n_pois + j]] = is_within;
1538 }
1539 }
1540 }
1541
1542 Ok(result)
1543 }
1544}
1545
1546#[cfg(test)]
1551mod tests {
1552 use super::*;
1553 use approx::assert_relative_eq;
1554
1555 #[test]
1556 fn test_coordinate_creation() {
1557 let coord = Coordinate::new(40.7128, -74.0060).unwrap();
1558 assert_eq!(coord.lat, 40.7128);
1559 assert_eq!(coord.lon, -74.0060);
1560
1561 assert!(Coordinate::new(91.0, 0.0).is_err());
1563 assert!(Coordinate::new(0.0, 181.0).is_err());
1564 }
1565
1566 #[test]
1567 fn test_haversine_distance() {
1568 let nyc = Coordinate::new(40.7128, -74.0060).unwrap();
1570 let london = Coordinate::new(51.5074, -0.1278).unwrap();
1571
1572 let distance = haversine_distance(&nyc, &london);
1573 assert!((distance - 5570.0).abs() < 50.0);
1575 }
1576
1577 #[test]
1578 fn test_vincenty_distance() {
1579 let nyc = Coordinate::new(40.7128, -74.0060).unwrap();
1580 let london = Coordinate::new(51.5074, -0.1278).unwrap();
1581
1582 let distance = vincenty_distance(&nyc, &london).unwrap();
1583 assert!((distance - 5570.0).abs() < 50.0);
1585 }
1586
1587 #[test]
1588 fn test_geohash_encode_decode() {
1589 let coord = Coordinate::new(40.7128, -74.0060).unwrap();
1590 let geohash = Geohash::encode(&coord, 7).unwrap();
1591 assert_eq!(geohash.len(), 7);
1592
1593 let decoded = Geohash::decode(&geohash).unwrap();
1594 assert!((decoded.lat - coord.lat).abs() < 0.01);
1595 assert!((decoded.lon - coord.lon).abs() < 0.01);
1596 }
1597
1598 #[test]
1599 fn test_geohash_bounds() {
1600 let geohash = "dr5ru7";
1601 let (lat_min, lon_min, lat_max, lon_max) = Geohash::bounds(geohash).unwrap();
1602
1603 assert!(lat_min < lat_max);
1604 assert!(lon_min < lon_max);
1605 assert!(lat_min >= -90.0 && lat_max <= 90.0);
1606 assert!(lon_min >= -180.0 && lon_max <= 180.0);
1607 }
1608
1609 #[test]
1610 fn test_geohash_neighbors() {
1611 let geohash = "dr5ru7";
1612 let neighbors = Geohash::neighbors(geohash).unwrap();
1613
1614 assert!(neighbors.len() <= 8);
1616 assert!(neighbors.len() > 0);
1617
1618 for neighbor in neighbors.iter() {
1620 assert_eq!(neighbor.len(), geohash.len());
1621 }
1622 }
1623
1624 #[test]
1625 fn test_wgs84_to_web_mercator() {
1626 let (x, y) = wgs84_to_web_mercator(40.7128, -74.0060).unwrap();
1627
1628 assert!(x.abs() < 20037508.34); assert!(y.abs() < 20037508.34);
1631
1632 let (lat, lon) = web_mercator_to_wgs84(x, y).unwrap();
1634 assert_relative_eq!(lat, 40.7128, epsilon = 0.0001);
1635 assert_relative_eq!(lon, -74.0060, epsilon = 0.0001);
1636 }
1637
1638 #[test]
1639 fn test_coordinate_transformer() {
1640 use scirs2_core::ndarray::array;
1641
1642 let X = array![[40.7128, -74.0060], [51.5074, -0.1278]];
1643
1644 let transformer =
1645 CoordinateTransformer::new(CoordinateSystem::WGS84, CoordinateSystem::WebMercator);
1646 let fitted = transformer.fit(&X, &()).unwrap();
1647 let result = fitted.transform(&X).unwrap();
1648
1649 assert_eq!(result.nrows(), 2);
1650 assert_eq!(result.ncols(), 2);
1651
1652 assert!(result[[0, 0]].abs() < 20037508.34);
1654 assert!(result[[0, 1]].abs() < 20037508.34);
1655 }
1656
1657 #[test]
1658 fn test_geohash_encoder() {
1659 use scirs2_core::ndarray::array;
1660
1661 let X = array![[40.7128, -74.0060], [40.7589, -73.9851], [51.5074, -0.1278]];
1662
1663 let config = GeohashEncoderConfig {
1664 precision: 5,
1665 include_neighbors: false,
1666 };
1667
1668 let encoder = GeohashEncoder::new(config);
1669 let fitted = encoder.fit(&X, &()).unwrap();
1670 let result = fitted.transform(&X).unwrap();
1671
1672 assert_eq!(result.nrows(), 3);
1673 assert!(result.ncols() > 0);
1675 }
1676
1677 #[test]
1678 fn test_spatial_distance_features() {
1679 use scirs2_core::ndarray::array;
1680
1681 let X = array![[40.7128, -74.0060], [40.7589, -73.9851]];
1682
1683 let reference_points = vec![
1684 Coordinate::new(40.7128, -74.0060).unwrap(), Coordinate::new(51.5074, -0.1278).unwrap(), ];
1687
1688 let config = SpatialDistanceFeaturesConfig {
1689 reference_points,
1690 metric: SpatialDistanceMetric::Haversine,
1691 include_inverse: false,
1692 include_squared: false,
1693 };
1694
1695 let transformer = SpatialDistanceFeatures::new(config);
1696 let fitted = transformer.fit(&X, &()).unwrap();
1697 let result = fitted.transform(&X).unwrap();
1698
1699 assert_eq!(result.nrows(), 2);
1700 assert_eq!(result.ncols(), 2); assert!(result[[0, 0]] < 1.0);
1704 }
1705
1706 #[test]
1707 fn test_spatial_binning() {
1708 use scirs2_core::ndarray::array;
1709
1710 let X = array![[40.0, -74.0], [41.0, -73.0], [42.0, -72.0], [43.0, -71.0]];
1711
1712 let config = SpatialBinningConfig {
1713 n_lat_bins: 2,
1714 n_lon_bins: 2,
1715 lat_range: Some((40.0, 44.0)),
1716 lon_range: Some((-75.0, -70.0)),
1717 one_hot: false,
1718 };
1719
1720 let binning = SpatialBinning::new(config);
1721 let fitted = binning.fit(&X, &()).unwrap();
1722 let result = fitted.transform(&X).unwrap();
1723
1724 assert_eq!(result.nrows(), 4);
1725 assert_eq!(result.ncols(), 2);
1726
1727 for i in 0..result.nrows() {
1729 assert!(result[[i, 0]] >= 0.0 && result[[i, 0]] < 2.0);
1730 assert!(result[[i, 1]] >= 0.0 && result[[i, 1]] < 2.0);
1731 }
1732 }
1733
1734 #[test]
1735 fn test_spatial_binning_one_hot() {
1736 use scirs2_core::ndarray::array;
1737
1738 let X = array![[40.0, -74.0], [41.0, -73.0],];
1739
1740 let config = SpatialBinningConfig {
1741 n_lat_bins: 2,
1742 n_lon_bins: 2,
1743 lat_range: Some((40.0, 42.0)),
1744 lon_range: Some((-75.0, -72.0)),
1745 one_hot: true,
1746 };
1747
1748 let binning = SpatialBinning::new(config);
1749 let fitted = binning.fit(&X, &()).unwrap();
1750 let result = fitted.transform(&X).unwrap();
1751
1752 assert_eq!(result.nrows(), 2);
1753 assert_eq!(result.ncols(), 4); for i in 0..result.nrows() {
1757 let sum: f64 = result.row(i).sum();
1758 assert_relative_eq!(sum, 1.0, epsilon = 1e-10);
1759 }
1760 }
1761
1762 #[test]
1763 fn test_spatial_clustering_grid() {
1764 use scirs2_core::ndarray::array;
1765
1766 let X = array![[40.0, -74.0], [40.5, -73.5], [41.0, -73.0], [41.5, -72.5]];
1767
1768 let config = SpatialClusteringConfig {
1769 method: SpatialClusteringMethod::Grid,
1770 n_clusters: 4,
1771 epsilon: None,
1772 min_points: None,
1773 metric: SpatialDistanceMetric::Haversine,
1774 };
1775
1776 let clustering = SpatialClustering::new(config);
1777 let fitted = clustering.fit(&X, &()).unwrap();
1778 let result = fitted.transform(&X).unwrap();
1779
1780 assert_eq!(result.nrows(), 4);
1781 assert_eq!(result.ncols(), 2); for i in 0..result.nrows() {
1785 assert!(result[[i, 0]] >= 0.0);
1786 assert!(result[[i, 1]] >= 0.0); }
1788 }
1789
1790 #[test]
1791 fn test_spatial_clustering_kmeans() {
1792 use scirs2_core::ndarray::array;
1793
1794 let X = array![[40.0, -74.0], [40.1, -74.1], [42.0, -72.0], [42.1, -72.1]];
1795
1796 let config = SpatialClusteringConfig {
1797 method: SpatialClusteringMethod::KMeans,
1798 n_clusters: 2,
1799 epsilon: None,
1800 min_points: None,
1801 metric: SpatialDistanceMetric::Haversine,
1802 };
1803
1804 let clustering = SpatialClustering::new(config);
1805 let fitted = clustering.fit(&X, &()).unwrap();
1806 let result = fitted.transform(&X).unwrap();
1807
1808 assert_eq!(result.nrows(), 4);
1809 assert_eq!(result.ncols(), 2);
1810
1811 assert_eq!(result[[0, 0]], result[[1, 0]]);
1813 assert_eq!(result[[2, 0]], result[[3, 0]]);
1814 }
1815
1816 #[test]
1817 fn test_spatial_clustering_density() {
1818 use scirs2_core::ndarray::array;
1819
1820 let X = array![
1821 [40.0, -74.0],
1822 [40.001, -74.001], [42.0, -72.0] ];
1825
1826 let config = SpatialClusteringConfig {
1827 method: SpatialClusteringMethod::Density,
1828 n_clusters: 2,
1829 epsilon: Some(1.0), min_points: Some(1),
1831 metric: SpatialDistanceMetric::Haversine,
1832 };
1833
1834 let clustering = SpatialClustering::new(config);
1835 let fitted = clustering.fit(&X, &()).unwrap();
1836 let result = fitted.transform(&X).unwrap();
1837
1838 assert_eq!(result.nrows(), 3);
1839 assert_eq!(result.ncols(), 1); assert!(result[[0, 0]] > result[[2, 0]]);
1843 assert!(result[[1, 0]] > result[[2, 0]]);
1844 }
1845
1846 #[test]
1847 fn test_proximity_features() {
1848 use scirs2_core::ndarray::array;
1849
1850 let X = array![[40.7128, -74.0060], [51.5074, -0.1278]];
1851
1852 let pois = vec![
1853 (
1854 "NYC".to_string(),
1855 Coordinate::new(40.7128, -74.0060).unwrap(),
1856 ),
1857 (
1858 "London".to_string(),
1859 Coordinate::new(51.5074, -0.1278).unwrap(),
1860 ),
1861 ];
1862
1863 let config = ProximityFeaturesConfig {
1864 points_of_interest: pois,
1865 metric: SpatialDistanceMetric::Haversine,
1866 max_distance: Some(100.0),
1867 include_indicators: true,
1868 };
1869
1870 let proximity = ProximityFeatures::new(config);
1871 let fitted = proximity.fit(&X, &()).unwrap();
1872 let result = fitted.transform(&X).unwrap();
1873
1874 assert_eq!(result.nrows(), 2);
1875 assert_eq!(result.ncols(), 4); assert!(result[[0, 0]] < 1.0);
1879 assert_relative_eq!(result[[0, 2]], 1.0, epsilon = 1e-10); assert!(result[[1, 1]] < 1.0);
1883 assert_relative_eq!(result[[1, 3]], 1.0, epsilon = 1e-10); }
1885
1886 #[test]
1887 fn test_spatial_autocorrelation() {
1888 use scirs2_core::ndarray::array;
1889
1890 let X = array![[40.0, -74.0], [40.1, -74.0], [40.2, -74.0], [42.0, -72.0]];
1891
1892 let config = SpatialAutocorrelationConfig {
1893 k_neighbors: 2,
1894 metric: SpatialDistanceMetric::Haversine,
1895 include_morans_i: true,
1896 include_lisa: true,
1897 };
1898
1899 let autocorr = SpatialAutocorrelation::new(config);
1900 let y = array![1.0, 2.0, 3.0, 4.0]; let fitted = autocorr.fit(&X, &y).unwrap();
1902 let result = fitted.transform(&X).unwrap();
1903
1904 assert_eq!(result.nrows(), 4);
1905 assert!(result.ncols() >= 1);
1906
1907 for i in 0..result.nrows() {
1909 for j in 0..result.ncols() {
1910 assert!(result[[i, j]].is_finite());
1911 assert!(result[[i, j]] >= 0.0);
1912 }
1913 }
1914 }
1915
1916 #[test]
1917 fn test_utm_transformation() {
1918 let (easting, northing) = wgs84_to_utm(40.7128, -74.0060, 18, true).unwrap();
1919
1920 assert!(easting > 0.0);
1922 assert!(northing > 0.0);
1923
1924 let (lat, lon) = utm_to_wgs84(easting, northing, 18, true).unwrap();
1926 assert_relative_eq!(lat, 40.7128, epsilon = 0.01);
1927 assert_relative_eq!(lon, -74.0060, epsilon = 0.01);
1928 }
1929
1930 #[test]
1931 fn test_coordinate_systems_enum() {
1932 let wgs84 = CoordinateSystem::WGS84;
1933 let web_merc = CoordinateSystem::WebMercator;
1934 let utm = CoordinateSystem::UTM {
1935 zone: 18,
1936 northern: true,
1937 };
1938
1939 assert_eq!(wgs84, CoordinateSystem::WGS84);
1940 assert_eq!(web_merc, CoordinateSystem::WebMercator);
1941 assert_ne!(wgs84, web_merc);
1942
1943 match utm {
1944 CoordinateSystem::UTM { zone, northern } => {
1945 assert_eq!(zone, 18);
1946 assert!(northern);
1947 }
1948 _ => panic!("Expected UTM"),
1949 }
1950 }
1951
1952 #[test]
1953 fn test_distance_metrics() {
1954 let coord1 = Coordinate::new(40.7128, -74.0060).unwrap();
1955 let coord2 = Coordinate::new(40.7589, -73.9851).unwrap();
1956
1957 let haversine =
1958 calculate_distance(&coord1, &coord2, SpatialDistanceMetric::Haversine).unwrap();
1959 let vincenty =
1960 calculate_distance(&coord1, &coord2, SpatialDistanceMetric::Vincenty).unwrap();
1961
1962 assert!((haversine - vincenty).abs() < 0.1);
1964
1965 assert!(haversine > 0.0 && haversine < 10.0);
1967 }
1968}