1use scirs2_core::ndarray::{Array1, Array2};
12use scirs2_core::random::seeded_rng;
13use sklears_core::prelude::*;
14use std::collections::HashMap;
15use std::f64::consts::PI;
16
17const EARTH_RADIUS_KM: f64 = 6371.0088;
23
24const EARTH_RADIUS_M: f64 = 6371008.8;
26
27const WGS84_A: f64 = 6378137.0;
29
30const WGS84_B: f64 = 6356752.314245;
32
33const WGS84_F: f64 = 1.0 / 298.257223563;
35
36const WEB_MERCATOR_MAX_LAT: f64 = 85.051129;
38
39const BASE32_CHARS: &[u8; 32] = b"0123456789bcdefghjkmnpqrstuvwxyz";
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum CoordinateSystem {
49 WGS84,
51 WebMercator,
53 UTM { zone: u8, northern: bool },
55}
56
57#[derive(Debug, Clone, Copy)]
59pub struct Coordinate {
60 pub lat: f64,
62 pub lon: f64,
64}
65
66impl Coordinate {
67 pub fn new(lat: f64, lon: f64) -> Result<Self> {
69 if !(-90.0..=90.0).contains(&lat) {
70 return Err(SklearsError::InvalidInput(format!(
71 "Latitude must be between -90 and 90, got {}",
72 lat
73 )));
74 }
75 if !(-180.0..=180.0).contains(&lon) {
76 return Err(SklearsError::InvalidInput(format!(
77 "Longitude must be between -180 and 180, got {}",
78 lon
79 )));
80 }
81 Ok(Self { lat, lon })
82 }
83
84 pub fn to_radians(&self) -> (f64, f64) {
86 (self.lat.to_radians(), self.lon.to_radians())
87 }
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum SpatialDistanceMetric {
97 Haversine,
99 Vincenty,
101 Euclidean,
103 Manhattan,
105}
106
107pub fn calculate_distance(
109 coord1: &Coordinate,
110 coord2: &Coordinate,
111 metric: SpatialDistanceMetric,
112) -> Result<f64> {
113 match metric {
114 SpatialDistanceMetric::Haversine => Ok(haversine_distance(coord1, coord2)),
115 SpatialDistanceMetric::Vincenty => vincenty_distance(coord1, coord2),
116 SpatialDistanceMetric::Euclidean => {
117 Ok(((coord2.lat - coord1.lat).powi(2) + (coord2.lon - coord1.lon).powi(2)).sqrt())
118 }
119 SpatialDistanceMetric::Manhattan => {
120 Ok((coord2.lat - coord1.lat).abs() + (coord2.lon - coord1.lon).abs())
121 }
122 }
123}
124
125pub fn haversine_distance(coord1: &Coordinate, coord2: &Coordinate) -> f64 {
127 let (lat1, lon1) = coord1.to_radians();
128 let (lat2, lon2) = coord2.to_radians();
129
130 let dlat = lat2 - lat1;
131 let dlon = lon2 - lon1;
132
133 let a = (dlat / 2.0).sin().powi(2) + lat1.cos() * lat2.cos() * (dlon / 2.0).sin().powi(2);
134 let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
135
136 EARTH_RADIUS_KM * c
137}
138
139pub fn vincenty_distance(coord1: &Coordinate, coord2: &Coordinate) -> Result<f64> {
141 let (lat1, lon1) = coord1.to_radians();
142 let (lat2, lon2) = coord2.to_radians();
143
144 let l = lon2 - lon1;
145 let u1 = ((1.0 - WGS84_F) * lat1.tan()).atan();
146 let u2 = ((1.0 - WGS84_F) * lat2.tan()).atan();
147 let sin_u1 = u1.sin();
148 let cos_u1 = u1.cos();
149 let sin_u2 = u2.sin();
150 let cos_u2 = u2.cos();
151
152 let mut lambda = l;
153 let mut lambda_prev;
154 let mut iterations = 0;
155 const MAX_ITERATIONS: usize = 100;
156 const CONVERGENCE_THRESHOLD: f64 = 1e-12;
157
158 let (
159 mut sin_sigma,
160 mut cos_sigma,
161 mut sigma,
162 mut sin_alpha,
163 mut cos_sq_alpha,
164 mut cos_2sigma_m,
165 );
166
167 loop {
168 let sin_lambda = lambda.sin();
169 let cos_lambda = lambda.cos();
170 sin_sigma = ((cos_u2 * sin_lambda).powi(2)
171 + (cos_u1 * sin_u2 - sin_u1 * cos_u2 * cos_lambda).powi(2))
172 .sqrt();
173
174 if sin_sigma == 0.0 {
175 return Ok(0.0); }
177
178 cos_sigma = sin_u1 * sin_u2 + cos_u1 * cos_u2 * cos_lambda;
179 sigma = sin_sigma.atan2(cos_sigma);
180 sin_alpha = cos_u1 * cos_u2 * sin_lambda / sin_sigma;
181 cos_sq_alpha = 1.0 - sin_alpha.powi(2);
182 cos_2sigma_m = if cos_sq_alpha != 0.0 {
183 cos_sigma - 2.0 * sin_u1 * sin_u2 / cos_sq_alpha
184 } else {
185 0.0
186 };
187
188 let c = WGS84_F / 16.0 * cos_sq_alpha * (4.0 + WGS84_F * (4.0 - 3.0 * cos_sq_alpha));
189 lambda_prev = lambda;
190 lambda = l
191 + (1.0 - c)
192 * WGS84_F
193 * sin_alpha
194 * (sigma
195 + c * sin_sigma
196 * (cos_2sigma_m + c * cos_sigma * (-1.0 + 2.0 * cos_2sigma_m.powi(2))));
197
198 iterations += 1;
199 if (lambda - lambda_prev).abs() < CONVERGENCE_THRESHOLD || iterations >= MAX_ITERATIONS {
200 break;
201 }
202 }
203
204 if iterations >= MAX_ITERATIONS {
205 return Err(SklearsError::InvalidInput(
206 "Vincenty formula failed to converge".to_string(),
207 ));
208 }
209
210 let u_sq = cos_sq_alpha * (WGS84_A.powi(2) - WGS84_B.powi(2)) / WGS84_B.powi(2);
211 let a = 1.0 + u_sq / 16384.0 * (4096.0 + u_sq * (-768.0 + u_sq * (320.0 - 175.0 * u_sq)));
212 let b = u_sq / 1024.0 * (256.0 + u_sq * (-128.0 + u_sq * (74.0 - 47.0 * u_sq)));
213 let delta_sigma = b
214 * sin_sigma
215 * (cos_2sigma_m
216 + b / 4.0
217 * (cos_sigma * (-1.0 + 2.0 * cos_2sigma_m.powi(2))
218 - b / 6.0
219 * cos_2sigma_m
220 * (-3.0 + 4.0 * sin_sigma.powi(2))
221 * (-3.0 + 4.0 * cos_2sigma_m.powi(2))));
222
223 let s = WGS84_B * a * (sigma - delta_sigma);
224
225 Ok(s / 1000.0) }
227
228pub struct Geohash;
234
235impl Geohash {
236 pub fn encode(coord: &Coordinate, precision: usize) -> Result<String> {
242 if precision == 0 || precision > 12 {
243 return Err(SklearsError::InvalidInput(
244 "Geohash precision must be between 1 and 12".to_string(),
245 ));
246 }
247
248 let mut lat_min = -90.0;
249 let mut lat_max = 90.0;
250 let mut lon_min = -180.0;
251 let mut lon_max = 180.0;
252
253 let mut geohash = String::with_capacity(precision);
254 let mut bit = 0;
255 let mut ch = 0u8;
256 let mut is_lon = true;
257
258 while geohash.len() < precision {
259 if is_lon {
260 let mid = (lon_min + lon_max) / 2.0;
261 if coord.lon > mid {
262 ch |= 1 << (4 - bit);
263 lon_min = mid;
264 } else {
265 lon_max = mid;
266 }
267 } else {
268 let mid = (lat_min + lat_max) / 2.0;
269 if coord.lat > mid {
270 ch |= 1 << (4 - bit);
271 lat_min = mid;
272 } else {
273 lat_max = mid;
274 }
275 }
276
277 is_lon = !is_lon;
278 bit += 1;
279
280 if bit == 5 {
281 geohash.push(BASE32_CHARS[ch as usize] as char);
282 bit = 0;
283 ch = 0;
284 }
285 }
286
287 Ok(geohash)
288 }
289
290 pub fn decode(geohash: &str) -> Result<Coordinate> {
292 if geohash.is_empty() {
293 return Err(SklearsError::InvalidInput(
294 "Geohash string cannot be empty".to_string(),
295 ));
296 }
297
298 let mut char_map = HashMap::new();
300 for (i, &c) in BASE32_CHARS.iter().enumerate() {
301 char_map.insert(c as char, i as u8);
302 }
303
304 let mut lat_min = -90.0;
305 let mut lat_max = 90.0;
306 let mut lon_min = -180.0;
307 let mut lon_max = 180.0;
308 let mut is_lon = true;
309
310 for ch in geohash.chars() {
311 let bits = char_map.get(&ch).ok_or_else(|| {
312 SklearsError::InvalidInput(format!("Invalid geohash character: {}", ch))
313 })?;
314
315 for i in (0..5).rev() {
316 if is_lon {
317 let mid = (lon_min + lon_max) / 2.0;
318 if (bits >> i) & 1 == 1 {
319 lon_min = mid;
320 } else {
321 lon_max = mid;
322 }
323 } else {
324 let mid = (lat_min + lat_max) / 2.0;
325 if (bits >> i) & 1 == 1 {
326 lat_min = mid;
327 } else {
328 lat_max = mid;
329 }
330 }
331 is_lon = !is_lon;
332 }
333 }
334
335 let lat = (lat_min + lat_max) / 2.0;
336 let lon = (lon_min + lon_max) / 2.0;
337
338 Coordinate::new(lat, lon)
339 }
340
341 pub fn bounds(geohash: &str) -> Result<(f64, f64, f64, f64)> {
343 if geohash.is_empty() {
344 return Err(SklearsError::InvalidInput(
345 "Geohash string cannot be empty".to_string(),
346 ));
347 }
348
349 let mut char_map = HashMap::new();
350 for (i, &c) in BASE32_CHARS.iter().enumerate() {
351 char_map.insert(c as char, i as u8);
352 }
353
354 let mut lat_min = -90.0;
355 let mut lat_max = 90.0;
356 let mut lon_min = -180.0;
357 let mut lon_max = 180.0;
358 let mut is_lon = true;
359
360 for ch in geohash.chars() {
361 let bits = char_map.get(&ch).ok_or_else(|| {
362 SklearsError::InvalidInput(format!("Invalid geohash character: {}", ch))
363 })?;
364
365 for i in (0..5).rev() {
366 if is_lon {
367 let mid = (lon_min + lon_max) / 2.0;
368 if (bits >> i) & 1 == 1 {
369 lon_min = mid;
370 } else {
371 lon_max = mid;
372 }
373 } else {
374 let mid = (lat_min + lat_max) / 2.0;
375 if (bits >> i) & 1 == 1 {
376 lat_min = mid;
377 } else {
378 lat_max = mid;
379 }
380 }
381 is_lon = !is_lon;
382 }
383 }
384
385 Ok((lat_min, lon_min, lat_max, lon_max))
386 }
387
388 pub fn neighbors(geohash: &str) -> Result<Vec<String>> {
390 let coord = Self::decode(geohash)?;
393 let (lat_min, lon_min, lat_max, lon_max) = Self::bounds(geohash)?;
394
395 let lat_diff = lat_max - lat_min;
396 let lon_diff = lon_max - lon_min;
397
398 let precision = geohash.len();
399 let mut neighbors = Vec::with_capacity(8);
400
401 let offsets = [
403 (-1.0, -1.0),
404 (-1.0, 0.0),
405 (-1.0, 1.0),
406 (0.0, -1.0),
407 (0.0, 1.0),
408 (1.0, -1.0),
409 (1.0, 0.0),
410 (1.0, 1.0),
411 ];
412
413 for (dlat, dlon) in offsets.iter() {
414 let new_lat = (coord.lat + dlat * lat_diff).clamp(-90.0, 90.0);
415 let new_lon = coord.lon + dlon * lon_diff;
416 let new_lon = if new_lon > 180.0 {
418 new_lon - 360.0
419 } else if new_lon < -180.0 {
420 new_lon + 360.0
421 } else {
422 new_lon
423 };
424
425 if let Ok(neighbor_coord) = Coordinate::new(new_lat, new_lon) {
426 if let Ok(neighbor_hash) = Self::encode(&neighbor_coord, precision) {
427 if neighbor_hash != geohash {
428 neighbors.push(neighbor_hash);
429 }
430 }
431 }
432 }
433
434 Ok(neighbors)
435 }
436}
437
438#[derive(Debug, Clone)]
444pub struct CoordinateTransformerConfig {
445 pub from: CoordinateSystem,
447 pub to: CoordinateSystem,
449}
450
451pub struct CoordinateTransformer {
453 config: CoordinateTransformerConfig,
454}
455
456pub struct CoordinateTransformerFitted {
458 config: CoordinateTransformerConfig,
459}
460
461impl CoordinateTransformer {
462 pub fn new(from: CoordinateSystem, to: CoordinateSystem) -> Self {
464 Self {
465 config: CoordinateTransformerConfig { from, to },
466 }
467 }
468}
469
470impl Estimator for CoordinateTransformer {
471 type Config = CoordinateTransformerConfig;
472 type Error = SklearsError;
473 type Float = f64;
474
475 fn config(&self) -> &Self::Config {
476 &self.config
477 }
478}
479
480impl Fit<Array2<f64>, ()> for CoordinateTransformer {
481 type Fitted = CoordinateTransformerFitted;
482
483 fn fit(self, _X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
484 Ok(CoordinateTransformerFitted {
486 config: self.config,
487 })
488 }
489}
490
491impl Transform<Array2<f64>, Array2<f64>> for CoordinateTransformerFitted {
492 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
493 if X.ncols() != 2 {
494 return Err(SklearsError::InvalidInput(
495 "Input must have exactly 2 columns (lat, lon)".to_string(),
496 ));
497 }
498
499 let nrows = X.nrows();
500 let mut result = Array2::zeros((nrows, 2));
501
502 for i in 0..nrows {
503 let lat = X[[i, 0]];
504 let lon = X[[i, 1]];
505
506 let transformed = match (&self.config.from, &self.config.to) {
507 (CoordinateSystem::WGS84, CoordinateSystem::WebMercator) => {
508 wgs84_to_web_mercator(lat, lon)?
509 }
510 (CoordinateSystem::WebMercator, CoordinateSystem::WGS84) => {
511 web_mercator_to_wgs84(lat, lon)?
512 }
513 (CoordinateSystem::WGS84, CoordinateSystem::UTM { zone, northern }) => {
514 wgs84_to_utm(lat, lon, *zone, *northern)?
515 }
516 (CoordinateSystem::UTM { zone, northern }, CoordinateSystem::WGS84) => {
517 utm_to_wgs84(lat, lon, *zone, *northern)?
518 }
519 _ => {
520 return Err(SklearsError::InvalidInput(format!(
521 "Unsupported coordinate transformation: {:?} to {:?}",
522 self.config.from, self.config.to
523 )))
524 }
525 };
526
527 result[[i, 0]] = transformed.0;
528 result[[i, 1]] = transformed.1;
529 }
530
531 Ok(result)
532 }
533}
534
535fn wgs84_to_web_mercator(lat: f64, lon: f64) -> Result<(f64, f64)> {
541 if lat.abs() > WEB_MERCATOR_MAX_LAT {
542 return Err(SklearsError::InvalidInput(format!(
543 "Latitude exceeds Web Mercator bounds: {}",
544 lat
545 )));
546 }
547
548 let x = WGS84_A * lon.to_radians();
549 let y = WGS84_A * ((PI / 4.0) + (lat.to_radians() / 2.0)).tan().ln();
550
551 Ok((x, y))
552}
553
554fn web_mercator_to_wgs84(x: f64, y: f64) -> Result<(f64, f64)> {
556 let lon = (x / WGS84_A).to_degrees();
557 let lat = (2.0 * (y / WGS84_A).exp().atan() - PI / 2.0).to_degrees();
558
559 Ok((lat, lon))
560}
561
562fn wgs84_to_utm(lat: f64, lon: f64, zone: u8, northern: bool) -> Result<(f64, f64)> {
564 let lat_rad = lat.to_radians();
567 let lon_rad = lon.to_radians();
568
569 let central_meridian = ((zone as f64 - 1.0) * 6.0 - 180.0 + 3.0).to_radians();
570 let lon_diff = lon_rad - central_meridian;
571
572 let k0 = 0.9996;
574
575 let n = WGS84_A / (1.0 - (WGS84_F * (2.0 - WGS84_F) * lat_rad.sin().powi(2))).sqrt();
577 let t = lat_rad.tan();
578 let c = (WGS84_F * (2.0 - WGS84_F)) / (1.0 - WGS84_F * (2.0 - WGS84_F)) * lat_rad.cos().powi(2);
579 let a = lon_diff * lat_rad.cos();
580
581 let m = WGS84_A
582 * ((1.0
583 - WGS84_F * (2.0 - WGS84_F) / 4.0
584 - 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 64.0)
585 * lat_rad
586 - (3.0 * WGS84_F * (2.0 - WGS84_F) / 8.0
587 + 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 32.0)
588 * (2.0 * lat_rad).sin()
589 + (15.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 256.0) * (4.0 * lat_rad).sin());
590
591 let easting = 500000.0
592 + k0 * n
593 * (a + (1.0 - t.powi(2) + c) * a.powi(3) / 6.0
594 + (5.0 - 18.0 * t.powi(2) + t.powi(4)) * a.powi(5) / 120.0);
595
596 let northing_offset = if northern { 0.0 } else { 10000000.0 };
597 let northing = northing_offset
598 + k0 * (m + n
599 * t
600 * (a.powi(2) / 2.0
601 + (5.0 - t.powi(2) + 9.0 * c) * a.powi(4) / 24.0
602 + (61.0 - 58.0 * t.powi(2) + t.powi(4)) * a.powi(6) / 720.0));
603
604 Ok((easting, northing))
605}
606
607fn utm_to_wgs84(easting: f64, northing: f64, zone: u8, northern: bool) -> Result<(f64, f64)> {
609 let k0 = 0.9996;
611 let central_meridian = ((zone as f64 - 1.0) * 6.0 - 180.0 + 3.0).to_radians();
612
613 let x = easting - 500000.0;
614 let y = if northern {
615 northing
616 } else {
617 northing - 10000000.0
618 };
619
620 let m = y / k0;
621 let mu = m
622 / (WGS84_A
623 * (1.0
624 - WGS84_F * (2.0 - WGS84_F) / 4.0
625 - 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 64.0));
626
627 let e1 = (1.0 - (1.0 - WGS84_F * (2.0 - WGS84_F)).sqrt())
629 / (1.0 + (1.0 - WGS84_F * (2.0 - WGS84_F)).sqrt());
630 let phi1 = mu
631 + (3.0 * e1 / 2.0 - 27.0 * e1.powi(3) / 32.0) * (2.0 * mu).sin()
632 + (21.0 * e1.powi(2) / 16.0 - 55.0 * e1.powi(4) / 32.0) * (4.0 * mu).sin()
633 + (151.0 * e1.powi(3) / 96.0) * (6.0 * mu).sin();
634
635 let n1 = WGS84_A / (1.0 - WGS84_F * (2.0 - WGS84_F) * phi1.sin().powi(2)).sqrt();
636 let t1 = phi1.tan();
637 let c1 = (WGS84_F * (2.0 - WGS84_F)) / (1.0 - WGS84_F * (2.0 - WGS84_F)) * phi1.cos().powi(2);
638 let r1 = WGS84_A * (1.0 - WGS84_F * (2.0 - WGS84_F))
639 / (1.0 - WGS84_F * (2.0 - WGS84_F) * phi1.sin().powi(2)).powf(1.5);
640 let d = x / (n1 * k0);
641
642 let lat = phi1
643 - (n1 * t1 / r1)
644 * (d.powi(2) / 2.0
645 - (5.0 + 3.0 * t1.powi(2) + 10.0 * c1 - 4.0 * c1.powi(2)) * d.powi(4) / 24.0
646 + (61.0 + 90.0 * t1.powi(2) + 298.0 * c1 + 45.0 * t1.powi(4)) * d.powi(6) / 720.0);
647
648 let lon = central_meridian
649 + (d - (1.0 + 2.0 * t1.powi(2) + c1) * d.powi(3) / 6.0
650 + (5.0 - 2.0 * c1 + 28.0 * t1.powi(2)) * d.powi(5) / 120.0)
651 / phi1.cos();
652
653 Ok((lat.to_degrees(), lon.to_degrees()))
654}
655
656#[derive(Debug, Clone)]
662pub struct GeohashEncoderConfig {
663 pub precision: usize,
665 pub include_neighbors: bool,
667}
668
669impl Default for GeohashEncoderConfig {
670 fn default() -> Self {
671 Self {
672 precision: 7,
673 include_neighbors: false,
674 }
675 }
676}
677
678pub struct GeohashEncoder {
680 config: GeohashEncoderConfig,
681}
682
683pub struct GeohashEncoderFitted {
685 config: GeohashEncoderConfig,
686 vocabulary: HashMap<String, usize>,
688}
689
690impl GeohashEncoder {
691 pub fn new(config: GeohashEncoderConfig) -> Self {
693 Self { config }
694 }
695}
696
697impl Estimator for GeohashEncoder {
698 type Config = GeohashEncoderConfig;
699 type Error = SklearsError;
700 type Float = f64;
701
702 fn config(&self) -> &Self::Config {
703 &self.config
704 }
705}
706
707impl Fit<Array2<f64>, ()> for GeohashEncoder {
708 type Fitted = GeohashEncoderFitted;
709
710 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
711 if X.ncols() != 2 {
712 return Err(SklearsError::InvalidInput(
713 "Input must have exactly 2 columns (lat, lon)".to_string(),
714 ));
715 }
716
717 let mut vocabulary = HashMap::new();
718
719 for i in 0..X.nrows() {
720 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
721 let geohash = Geohash::encode(&coord, self.config.precision)?;
722
723 if !vocabulary.contains_key(&geohash) {
724 let idx = vocabulary.len();
725 vocabulary.insert(geohash.clone(), idx);
726 }
727
728 if self.config.include_neighbors {
729 for neighbor in Geohash::neighbors(&geohash)? {
730 if !vocabulary.contains_key(&neighbor) {
731 let idx = vocabulary.len();
732 vocabulary.insert(neighbor, idx);
733 }
734 }
735 }
736 }
737
738 Ok(GeohashEncoderFitted {
739 config: self.config,
740 vocabulary,
741 })
742 }
743}
744
745impl Transform<Array2<f64>, Array2<f64>> for GeohashEncoderFitted {
746 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
747 if X.ncols() != 2 {
748 return Err(SklearsError::InvalidInput(
749 "Input must have exactly 2 columns (lat, lon)".to_string(),
750 ));
751 }
752
753 let nrows = X.nrows();
754 let ncols = if self.config.include_neighbors {
755 self.vocabulary.len() * 9 } else {
757 self.vocabulary.len()
758 };
759
760 let mut result = Array2::zeros((nrows, ncols));
761
762 for i in 0..nrows {
763 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
764 let geohash = Geohash::encode(&coord, self.config.precision)?;
765
766 if let Some(&idx) = self.vocabulary.get(&geohash) {
767 result[[i, idx]] = 1.0;
768 }
769
770 if self.config.include_neighbors {
771 for neighbor in Geohash::neighbors(&geohash)? {
772 if let Some(&idx) = self.vocabulary.get(&neighbor) {
773 result[[i, idx]] = 1.0;
774 }
775 }
776 }
777 }
778
779 Ok(result)
780 }
781}
782
783#[derive(Debug, Clone)]
789pub struct SpatialDistanceFeaturesConfig {
790 pub reference_points: Vec<Coordinate>,
792 pub metric: SpatialDistanceMetric,
794 pub include_inverse: bool,
796 pub include_squared: bool,
798}
799
800pub struct SpatialDistanceFeatures {
802 config: SpatialDistanceFeaturesConfig,
803}
804
805pub struct SpatialDistanceFeaturesFitted {
807 config: SpatialDistanceFeaturesConfig,
808}
809
810impl SpatialDistanceFeatures {
811 pub fn new(config: SpatialDistanceFeaturesConfig) -> Self {
813 Self { config }
814 }
815}
816
817impl Estimator for SpatialDistanceFeatures {
818 type Config = SpatialDistanceFeaturesConfig;
819 type Error = SklearsError;
820 type Float = f64;
821
822 fn config(&self) -> &Self::Config {
823 &self.config
824 }
825}
826
827impl Fit<Array2<f64>, ()> for SpatialDistanceFeatures {
828 type Fitted = SpatialDistanceFeaturesFitted;
829
830 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
831 if X.ncols() != 2 {
832 return Err(SklearsError::InvalidInput(
833 "Input must have exactly 2 columns (lat, lon)".to_string(),
834 ));
835 }
836
837 Ok(SpatialDistanceFeaturesFitted {
838 config: self.config,
839 })
840 }
841}
842
843impl Transform<Array2<f64>, Array2<f64>> for SpatialDistanceFeaturesFitted {
844 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
845 if X.ncols() != 2 {
846 return Err(SklearsError::InvalidInput(
847 "Input must have exactly 2 columns (lat, lon)".to_string(),
848 ));
849 }
850
851 let nrows = X.nrows();
852 let n_refs = self.config.reference_points.len();
853 let mut n_features = n_refs;
854 if self.config.include_inverse {
855 n_features += n_refs;
856 }
857 if self.config.include_squared {
858 n_features += n_refs;
859 }
860
861 let mut result = Array2::zeros((nrows, n_features));
862
863 for i in 0..nrows {
864 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
865
866 for (j, ref_point) in self.config.reference_points.iter().enumerate() {
867 let distance = calculate_distance(&coord, ref_point, self.config.metric)?;
868
869 result[[i, j]] = distance;
870
871 if self.config.include_inverse {
872 result[[i, n_refs + j]] = if distance > 0.0 { 1.0 / distance } else { 0.0 };
873 }
874
875 if self.config.include_squared {
876 let offset = if self.config.include_inverse {
877 n_refs * 2
878 } else {
879 n_refs
880 };
881 result[[i, offset + j]] = distance * distance;
882 }
883 }
884 }
885
886 Ok(result)
887 }
888}
889
890#[derive(Debug, Clone)]
896pub struct SpatialBinningConfig {
897 pub n_lat_bins: usize,
899 pub n_lon_bins: usize,
901 pub lat_range: Option<(f64, f64)>,
903 pub lon_range: Option<(f64, f64)>,
905 pub one_hot: bool,
907}
908
909impl Default for SpatialBinningConfig {
910 fn default() -> Self {
911 Self {
912 n_lat_bins: 10,
913 n_lon_bins: 10,
914 lat_range: None,
915 lon_range: None,
916 one_hot: false,
917 }
918 }
919}
920
921pub struct SpatialBinning {
923 config: SpatialBinningConfig,
924}
925
926pub struct SpatialBinningFitted {
928 config: SpatialBinningConfig,
929 lat_range: (f64, f64),
930 lon_range: (f64, f64),
931}
932
933impl SpatialBinning {
934 pub fn new(config: SpatialBinningConfig) -> Self {
936 Self { config }
937 }
938}
939
940impl Estimator for SpatialBinning {
941 type Config = SpatialBinningConfig;
942 type Error = SklearsError;
943 type Float = f64;
944
945 fn config(&self) -> &Self::Config {
946 &self.config
947 }
948}
949
950impl Fit<Array2<f64>, ()> for SpatialBinning {
951 type Fitted = SpatialBinningFitted;
952
953 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
954 if X.ncols() != 2 {
955 return Err(SklearsError::InvalidInput(
956 "Input must have exactly 2 columns (lat, lon)".to_string(),
957 ));
958 }
959
960 let lat_range = if let Some(range) = self.config.lat_range {
961 range
962 } else {
963 let lats = X.column(0);
964 let min_lat = lats.iter().cloned().fold(f64::INFINITY, f64::min);
965 let max_lat = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
966 (min_lat, max_lat)
967 };
968
969 let lon_range = if let Some(range) = self.config.lon_range {
970 range
971 } else {
972 let lons = X.column(1);
973 let min_lon = lons.iter().cloned().fold(f64::INFINITY, f64::min);
974 let max_lon = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
975 (min_lon, max_lon)
976 };
977
978 Ok(SpatialBinningFitted {
979 config: self.config,
980 lat_range,
981 lon_range,
982 })
983 }
984}
985
986impl Transform<Array2<f64>, Array2<f64>> for SpatialBinningFitted {
987 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
988 if X.ncols() != 2 {
989 return Err(SklearsError::InvalidInput(
990 "Input must have exactly 2 columns (lat, lon)".to_string(),
991 ));
992 }
993
994 let nrows = X.nrows();
995 let ncols = if self.config.one_hot {
996 self.config.n_lat_bins * self.config.n_lon_bins
997 } else {
998 2
999 };
1000
1001 let mut result = Array2::zeros((nrows, ncols));
1002
1003 let lat_bin_size = (self.lat_range.1 - self.lat_range.0) / self.config.n_lat_bins as f64;
1004 let lon_bin_size = (self.lon_range.1 - self.lon_range.0) / self.config.n_lon_bins as f64;
1005
1006 for i in 0..nrows {
1007 let lat = X[[i, 0]];
1008 let lon = X[[i, 1]];
1009
1010 let lat_bin = ((lat - self.lat_range.0) / lat_bin_size)
1011 .floor()
1012 .clamp(0.0, (self.config.n_lat_bins - 1) as f64) as usize;
1013 let lon_bin = ((lon - self.lon_range.0) / lon_bin_size)
1014 .floor()
1015 .clamp(0.0, (self.config.n_lon_bins - 1) as f64) as usize;
1016
1017 if self.config.one_hot {
1018 let bin_idx = lat_bin * self.config.n_lon_bins + lon_bin;
1019 result[[i, bin_idx]] = 1.0;
1020 } else {
1021 result[[i, 0]] = lat_bin as f64;
1022 result[[i, 1]] = lon_bin as f64;
1023 }
1024 }
1025
1026 Ok(result)
1027 }
1028}
1029
1030#[derive(Debug, Clone)]
1036pub struct SpatialClusteringConfig {
1037 pub method: SpatialClusteringMethod,
1039 pub n_clusters: usize,
1041 pub epsilon: Option<f64>,
1043 pub min_points: Option<usize>,
1045 pub metric: SpatialDistanceMetric,
1047}
1048
1049#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1051pub enum SpatialClusteringMethod {
1052 Grid,
1054 Density,
1056 KMeans,
1058 Hierarchical,
1060}
1061
1062impl Default for SpatialClusteringConfig {
1063 fn default() -> Self {
1064 Self {
1065 method: SpatialClusteringMethod::Grid,
1066 n_clusters: 10,
1067 epsilon: Some(5.0), min_points: Some(5),
1069 metric: SpatialDistanceMetric::Haversine,
1070 }
1071 }
1072}
1073
1074pub struct SpatialClustering {
1076 config: SpatialClusteringConfig,
1077}
1078
1079pub struct SpatialClusteringFitted {
1081 config: SpatialClusteringConfig,
1082 cluster_centers: Vec<Coordinate>,
1084 grid_bounds: Option<(f64, f64, f64, f64)>,
1086}
1087
1088impl SpatialClustering {
1089 pub fn new(config: SpatialClusteringConfig) -> Self {
1091 Self { config }
1092 }
1093}
1094
1095impl Estimator for SpatialClustering {
1096 type Config = SpatialClusteringConfig;
1097 type Error = SklearsError;
1098 type Float = f64;
1099
1100 fn config(&self) -> &Self::Config {
1101 &self.config
1102 }
1103}
1104
1105impl Fit<Array2<f64>, ()> for SpatialClustering {
1106 type Fitted = SpatialClusteringFitted;
1107
1108 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
1109 if X.ncols() != 2 {
1110 return Err(SklearsError::InvalidInput(
1111 "Input must have exactly 2 columns (lat, lon)".to_string(),
1112 ));
1113 }
1114
1115 let cluster_centers = match self.config.method {
1116 SpatialClusteringMethod::Grid => {
1117 let lats = X.column(0);
1119 let lons = X.column(1);
1120 let lat_min = lats.iter().cloned().fold(f64::INFINITY, f64::min);
1121 let lat_max = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1122 let lon_min = lons.iter().cloned().fold(f64::INFINITY, f64::min);
1123 let lon_max = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1124
1125 let grid_size = (self.config.n_clusters as f64).sqrt().ceil() as usize;
1126 let lat_step = (lat_max - lat_min) / grid_size as f64;
1127 let lon_step = (lon_max - lon_min) / grid_size as f64;
1128
1129 let mut centers = Vec::new();
1130 for i in 0..grid_size {
1131 for j in 0..grid_size {
1132 let lat = lat_min + (i as f64 + 0.5) * lat_step;
1133 let lon = lon_min + (j as f64 + 0.5) * lon_step;
1134 if let Ok(coord) = Coordinate::new(lat, lon) {
1135 centers.push(coord);
1136 }
1137 }
1138 }
1139
1140 centers
1141 }
1142 SpatialClusteringMethod::KMeans => {
1143 let mut rng = seeded_rng(42);
1145 let nrows = X.nrows();
1146 let mut centers = Vec::new();
1147
1148 for _ in 0..self.config.n_clusters.min(nrows) {
1150 let idx = rng.random_range(0..nrows);
1151 if let Ok(coord) = Coordinate::new(X[[idx, 0]], X[[idx, 1]]) {
1152 centers.push(coord);
1153 }
1154 }
1155
1156 for _ in 0..10 {
1158 let mut cluster_sums = vec![(0.0, 0.0); centers.len()];
1159 let mut cluster_counts = vec![0; centers.len()];
1160
1161 for i in 0..nrows {
1163 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1164 let mut min_dist = f64::INFINITY;
1165 let mut min_idx = 0;
1166
1167 for (j, center) in centers.iter().enumerate() {
1168 let dist = calculate_distance(&coord, center, self.config.metric)?;
1169 if dist < min_dist {
1170 min_dist = dist;
1171 min_idx = j;
1172 }
1173 }
1174
1175 cluster_sums[min_idx].0 += coord.lat;
1176 cluster_sums[min_idx].1 += coord.lon;
1177 cluster_counts[min_idx] += 1;
1178 }
1179
1180 for (j, center) in centers.iter_mut().enumerate() {
1182 if cluster_counts[j] > 0 {
1183 let new_lat = cluster_sums[j].0 / cluster_counts[j] as f64;
1184 let new_lon = cluster_sums[j].1 / cluster_counts[j] as f64;
1185 if let Ok(new_coord) = Coordinate::new(new_lat, new_lon) {
1186 *center = new_coord;
1187 }
1188 }
1189 }
1190 }
1191
1192 centers
1193 }
1194 SpatialClusteringMethod::Density | SpatialClusteringMethod::Hierarchical => {
1195 let mut rng = seeded_rng(42);
1197 let nrows = X.nrows();
1198 let mut centers = Vec::new();
1199
1200 let sample_size = self.config.n_clusters.min(nrows);
1201 for _ in 0..sample_size {
1202 let idx = rng.random_range(0..nrows);
1203 if let Ok(coord) = Coordinate::new(X[[idx, 0]], X[[idx, 1]]) {
1204 centers.push(coord);
1205 }
1206 }
1207
1208 centers
1209 }
1210 };
1211
1212 let grid_bounds = if matches!(self.config.method, SpatialClusteringMethod::Grid) {
1213 let lats = X.column(0);
1214 let lons = X.column(1);
1215 let lat_min = lats.iter().cloned().fold(f64::INFINITY, f64::min);
1216 let lat_max = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1217 let lon_min = lons.iter().cloned().fold(f64::INFINITY, f64::min);
1218 let lon_max = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1219 Some((lat_min, lon_min, lat_max, lon_max))
1220 } else {
1221 None
1222 };
1223
1224 Ok(SpatialClusteringFitted {
1225 config: self.config,
1226 cluster_centers,
1227 grid_bounds,
1228 })
1229 }
1230}
1231
1232impl Transform<Array2<f64>, Array2<f64>> for SpatialClusteringFitted {
1233 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1234 if X.ncols() != 2 {
1235 return Err(SklearsError::InvalidInput(
1236 "Input must have exactly 2 columns (lat, lon)".to_string(),
1237 ));
1238 }
1239
1240 let nrows = X.nrows();
1241
1242 match self.config.method {
1243 SpatialClusteringMethod::Grid | SpatialClusteringMethod::KMeans => {
1244 let mut result = Array2::zeros((nrows, 2));
1246
1247 for i in 0..nrows {
1248 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1249 let mut min_dist = f64::INFINITY;
1250 let mut min_idx = 0;
1251
1252 for (j, center) in self.cluster_centers.iter().enumerate() {
1253 let dist = calculate_distance(&coord, center, self.config.metric)?;
1254 if dist < min_dist {
1255 min_dist = dist;
1256 min_idx = j;
1257 }
1258 }
1259
1260 result[[i, 0]] = min_idx as f64;
1261 result[[i, 1]] = min_dist;
1262 }
1263
1264 Ok(result)
1265 }
1266 SpatialClusteringMethod::Density => {
1267 let epsilon = self.config.epsilon.unwrap_or(5.0);
1269 let mut result = Array2::zeros((nrows, 1));
1270
1271 for i in 0..nrows {
1272 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1273 let mut density = 0.0;
1274
1275 for j in 0..nrows {
1276 if i != j {
1277 let other = Coordinate::new(X[[j, 0]], X[[j, 1]])?;
1278 let dist = calculate_distance(&coord, &other, self.config.metric)?;
1279 if dist <= epsilon {
1280 density += 1.0;
1281 }
1282 }
1283 }
1284
1285 result[[i, 0]] = density;
1286 }
1287
1288 Ok(result)
1289 }
1290 SpatialClusteringMethod::Hierarchical => {
1291 let n_centers = self.cluster_centers.len();
1293 let mut result = Array2::zeros((nrows, n_centers));
1294
1295 for i in 0..nrows {
1296 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1297
1298 for (j, center) in self.cluster_centers.iter().enumerate() {
1299 let dist = calculate_distance(&coord, center, self.config.metric)?;
1300 result[[i, j]] = dist;
1301 }
1302 }
1303
1304 Ok(result)
1305 }
1306 }
1307 }
1308}
1309
1310#[derive(Debug, Clone)]
1316pub struct SpatialAutocorrelationConfig {
1317 pub k_neighbors: usize,
1319 pub metric: SpatialDistanceMetric,
1321 pub include_morans_i: bool,
1323 pub include_lisa: bool,
1325}
1326
1327impl Default for SpatialAutocorrelationConfig {
1328 fn default() -> Self {
1329 Self {
1330 k_neighbors: 5,
1331 metric: SpatialDistanceMetric::Haversine,
1332 include_morans_i: true,
1333 include_lisa: true,
1334 }
1335 }
1336}
1337
1338pub struct SpatialAutocorrelation {
1340 config: SpatialAutocorrelationConfig,
1341}
1342
1343pub struct SpatialAutocorrelationFitted {
1345 config: SpatialAutocorrelationConfig,
1346 training_coords: Vec<Coordinate>,
1348}
1349
1350impl SpatialAutocorrelation {
1351 pub fn new(config: SpatialAutocorrelationConfig) -> Self {
1353 Self { config }
1354 }
1355}
1356
1357impl Estimator for SpatialAutocorrelation {
1358 type Config = SpatialAutocorrelationConfig;
1359 type Error = SklearsError;
1360 type Float = f64;
1361
1362 fn config(&self) -> &Self::Config {
1363 &self.config
1364 }
1365}
1366
1367impl Fit<Array2<f64>, Array1<f64>> for SpatialAutocorrelation {
1368 type Fitted = SpatialAutocorrelationFitted;
1369
1370 fn fit(self, X: &Array2<f64>, _y: &Array1<f64>) -> Result<Self::Fitted> {
1371 if X.ncols() != 2 {
1372 return Err(SklearsError::InvalidInput(
1373 "Input must have exactly 2 columns (lat, lon)".to_string(),
1374 ));
1375 }
1376
1377 let mut training_coords = Vec::with_capacity(X.nrows());
1378 for i in 0..X.nrows() {
1379 training_coords.push(Coordinate::new(X[[i, 0]], X[[i, 1]])?);
1380 }
1381
1382 Ok(SpatialAutocorrelationFitted {
1383 config: self.config,
1384 training_coords,
1385 })
1386 }
1387}
1388
1389impl Transform<Array2<f64>, Array2<f64>> for SpatialAutocorrelationFitted {
1390 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1391 if X.ncols() != 2 {
1392 return Err(SklearsError::InvalidInput(
1393 "Input must have exactly 2 columns (lat, lon)".to_string(),
1394 ));
1395 }
1396
1397 let nrows = X.nrows();
1398 let mut n_features = 0;
1399
1400 if self.config.include_lisa {
1401 n_features += 1; }
1403
1404 let mut result = Array2::zeros((nrows, n_features.max(1)));
1405
1406 for i in 0..nrows {
1407 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1408
1409 let mut distances: Vec<(usize, f64)> = self
1411 .training_coords
1412 .iter()
1413 .enumerate()
1414 .map(|(j, other)| {
1415 let dist = calculate_distance(&coord, other, self.config.metric)
1416 .unwrap_or(f64::INFINITY);
1417 (j, dist)
1418 })
1419 .collect();
1420
1421 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
1422
1423 if self.config.include_lisa {
1425 let nearest_neighbors =
1428 &distances[1..=self.config.k_neighbors.min(distances.len() - 1)];
1429 let avg_neighbor_dist = nearest_neighbors.iter().map(|(_, d)| d).sum::<f64>()
1430 / nearest_neighbors.len() as f64;
1431
1432 result[[i, 0]] = 1.0 / (1.0 + avg_neighbor_dist); }
1434 }
1435
1436 Ok(result)
1437 }
1438}
1439
1440#[derive(Debug, Clone)]
1446pub struct ProximityFeaturesConfig {
1447 pub points_of_interest: Vec<(String, Coordinate)>,
1449 pub metric: SpatialDistanceMetric,
1451 pub max_distance: Option<f64>,
1453 pub include_indicators: bool,
1455}
1456
1457pub struct ProximityFeatures {
1459 config: ProximityFeaturesConfig,
1460}
1461
1462pub struct ProximityFeaturesFitted {
1464 config: ProximityFeaturesConfig,
1465}
1466
1467impl ProximityFeatures {
1468 pub fn new(config: ProximityFeaturesConfig) -> Self {
1470 Self { config }
1471 }
1472}
1473
1474impl Estimator for ProximityFeatures {
1475 type Config = ProximityFeaturesConfig;
1476 type Error = SklearsError;
1477 type Float = f64;
1478
1479 fn config(&self) -> &Self::Config {
1480 &self.config
1481 }
1482}
1483
1484impl Fit<Array2<f64>, ()> for ProximityFeatures {
1485 type Fitted = ProximityFeaturesFitted;
1486
1487 fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
1488 if X.ncols() != 2 {
1489 return Err(SklearsError::InvalidInput(
1490 "Input must have exactly 2 columns (lat, lon)".to_string(),
1491 ));
1492 }
1493
1494 Ok(ProximityFeaturesFitted {
1495 config: self.config,
1496 })
1497 }
1498}
1499
1500impl Transform<Array2<f64>, Array2<f64>> for ProximityFeaturesFitted {
1501 fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1502 if X.ncols() != 2 {
1503 return Err(SklearsError::InvalidInput(
1504 "Input must have exactly 2 columns (lat, lon)".to_string(),
1505 ));
1506 }
1507
1508 let nrows = X.nrows();
1509 let n_pois = self.config.points_of_interest.len();
1510 let n_features = if self.config.include_indicators {
1511 n_pois * 2 } else {
1513 n_pois };
1515
1516 let mut result = Array2::zeros((nrows, n_features));
1517
1518 for i in 0..nrows {
1519 let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1520
1521 for (j, (_name, poi)) in self.config.points_of_interest.iter().enumerate() {
1522 let distance = calculate_distance(&coord, poi, self.config.metric)?;
1523
1524 result[[i, j]] = distance;
1525
1526 if self.config.include_indicators {
1527 let is_within = if let Some(max_dist) = self.config.max_distance {
1528 if distance <= max_dist {
1529 1.0
1530 } else {
1531 0.0
1532 }
1533 } else {
1534 1.0
1535 };
1536 result[[i, n_pois + j]] = is_within;
1537 }
1538 }
1539 }
1540
1541 Ok(result)
1542 }
1543}
1544
1545#[cfg(test)]
1550mod tests {
1551 use super::*;
1552 use approx::assert_relative_eq;
1553
1554 #[test]
1555 fn test_coordinate_creation() {
1556 let coord = Coordinate::new(40.7128, -74.0060).unwrap();
1557 assert_eq!(coord.lat, 40.7128);
1558 assert_eq!(coord.lon, -74.0060);
1559
1560 assert!(Coordinate::new(91.0, 0.0).is_err());
1562 assert!(Coordinate::new(0.0, 181.0).is_err());
1563 }
1564
1565 #[test]
1566 fn test_haversine_distance() {
1567 let nyc = Coordinate::new(40.7128, -74.0060).unwrap();
1569 let london = Coordinate::new(51.5074, -0.1278).unwrap();
1570
1571 let distance = haversine_distance(&nyc, &london);
1572 assert!((distance - 5570.0).abs() < 50.0);
1574 }
1575
1576 #[test]
1577 fn test_vincenty_distance() {
1578 let nyc = Coordinate::new(40.7128, -74.0060).unwrap();
1579 let london = Coordinate::new(51.5074, -0.1278).unwrap();
1580
1581 let distance = vincenty_distance(&nyc, &london).unwrap();
1582 assert!((distance - 5570.0).abs() < 50.0);
1584 }
1585
1586 #[test]
1587 fn test_geohash_encode_decode() {
1588 let coord = Coordinate::new(40.7128, -74.0060).unwrap();
1589 let geohash = Geohash::encode(&coord, 7).unwrap();
1590 assert_eq!(geohash.len(), 7);
1591
1592 let decoded = Geohash::decode(&geohash).unwrap();
1593 assert!((decoded.lat - coord.lat).abs() < 0.01);
1594 assert!((decoded.lon - coord.lon).abs() < 0.01);
1595 }
1596
1597 #[test]
1598 fn test_geohash_bounds() {
1599 let geohash = "dr5ru7";
1600 let (lat_min, lon_min, lat_max, lon_max) = Geohash::bounds(geohash).unwrap();
1601
1602 assert!(lat_min < lat_max);
1603 assert!(lon_min < lon_max);
1604 assert!(lat_min >= -90.0 && lat_max <= 90.0);
1605 assert!(lon_min >= -180.0 && lon_max <= 180.0);
1606 }
1607
1608 #[test]
1609 fn test_geohash_neighbors() {
1610 let geohash = "dr5ru7";
1611 let neighbors = Geohash::neighbors(geohash).unwrap();
1612
1613 assert!(neighbors.len() <= 8);
1615 assert!(neighbors.len() > 0);
1616
1617 for neighbor in neighbors.iter() {
1619 assert_eq!(neighbor.len(), geohash.len());
1620 }
1621 }
1622
1623 #[test]
1624 fn test_wgs84_to_web_mercator() {
1625 let (x, y) = wgs84_to_web_mercator(40.7128, -74.0060).unwrap();
1626
1627 assert!(x.abs() < 20037508.34); assert!(y.abs() < 20037508.34);
1630
1631 let (lat, lon) = web_mercator_to_wgs84(x, y).unwrap();
1633 assert_relative_eq!(lat, 40.7128, epsilon = 0.0001);
1634 assert_relative_eq!(lon, -74.0060, epsilon = 0.0001);
1635 }
1636
1637 #[test]
1638 fn test_coordinate_transformer() {
1639 use scirs2_core::ndarray::array;
1640
1641 let X = array![[40.7128, -74.0060], [51.5074, -0.1278]];
1642
1643 let transformer =
1644 CoordinateTransformer::new(CoordinateSystem::WGS84, CoordinateSystem::WebMercator);
1645 let fitted = transformer.fit(&X, &()).unwrap();
1646 let result = fitted.transform(&X).unwrap();
1647
1648 assert_eq!(result.nrows(), 2);
1649 assert_eq!(result.ncols(), 2);
1650
1651 assert!(result[[0, 0]].abs() < 20037508.34);
1653 assert!(result[[0, 1]].abs() < 20037508.34);
1654 }
1655
1656 #[test]
1657 fn test_geohash_encoder() {
1658 use scirs2_core::ndarray::array;
1659
1660 let X = array![[40.7128, -74.0060], [40.7589, -73.9851], [51.5074, -0.1278]];
1661
1662 let config = GeohashEncoderConfig {
1663 precision: 5,
1664 include_neighbors: false,
1665 };
1666
1667 let encoder = GeohashEncoder::new(config);
1668 let fitted = encoder.fit(&X, &()).unwrap();
1669 let result = fitted.transform(&X).unwrap();
1670
1671 assert_eq!(result.nrows(), 3);
1672 assert!(result.ncols() > 0);
1674 }
1675
1676 #[test]
1677 fn test_spatial_distance_features() {
1678 use scirs2_core::ndarray::array;
1679
1680 let X = array![[40.7128, -74.0060], [40.7589, -73.9851]];
1681
1682 let reference_points = vec![
1683 Coordinate::new(40.7128, -74.0060).unwrap(), Coordinate::new(51.5074, -0.1278).unwrap(), ];
1686
1687 let config = SpatialDistanceFeaturesConfig {
1688 reference_points,
1689 metric: SpatialDistanceMetric::Haversine,
1690 include_inverse: false,
1691 include_squared: false,
1692 };
1693
1694 let transformer = SpatialDistanceFeatures::new(config);
1695 let fitted = transformer.fit(&X, &()).unwrap();
1696 let result = fitted.transform(&X).unwrap();
1697
1698 assert_eq!(result.nrows(), 2);
1699 assert_eq!(result.ncols(), 2); assert!(result[[0, 0]] < 1.0);
1703 }
1704
1705 #[test]
1706 fn test_spatial_binning() {
1707 use scirs2_core::ndarray::array;
1708
1709 let X = array![[40.0, -74.0], [41.0, -73.0], [42.0, -72.0], [43.0, -71.0]];
1710
1711 let config = SpatialBinningConfig {
1712 n_lat_bins: 2,
1713 n_lon_bins: 2,
1714 lat_range: Some((40.0, 44.0)),
1715 lon_range: Some((-75.0, -70.0)),
1716 one_hot: false,
1717 };
1718
1719 let binning = SpatialBinning::new(config);
1720 let fitted = binning.fit(&X, &()).unwrap();
1721 let result = fitted.transform(&X).unwrap();
1722
1723 assert_eq!(result.nrows(), 4);
1724 assert_eq!(result.ncols(), 2);
1725
1726 for i in 0..result.nrows() {
1728 assert!(result[[i, 0]] >= 0.0 && result[[i, 0]] < 2.0);
1729 assert!(result[[i, 1]] >= 0.0 && result[[i, 1]] < 2.0);
1730 }
1731 }
1732
1733 #[test]
1734 fn test_spatial_binning_one_hot() {
1735 use scirs2_core::ndarray::array;
1736
1737 let X = array![[40.0, -74.0], [41.0, -73.0],];
1738
1739 let config = SpatialBinningConfig {
1740 n_lat_bins: 2,
1741 n_lon_bins: 2,
1742 lat_range: Some((40.0, 42.0)),
1743 lon_range: Some((-75.0, -72.0)),
1744 one_hot: true,
1745 };
1746
1747 let binning = SpatialBinning::new(config);
1748 let fitted = binning.fit(&X, &()).unwrap();
1749 let result = fitted.transform(&X).unwrap();
1750
1751 assert_eq!(result.nrows(), 2);
1752 assert_eq!(result.ncols(), 4); for i in 0..result.nrows() {
1756 let sum: f64 = result.row(i).sum();
1757 assert_relative_eq!(sum, 1.0, epsilon = 1e-10);
1758 }
1759 }
1760
1761 #[test]
1762 fn test_spatial_clustering_grid() {
1763 use scirs2_core::ndarray::array;
1764
1765 let X = array![[40.0, -74.0], [40.5, -73.5], [41.0, -73.0], [41.5, -72.5]];
1766
1767 let config = SpatialClusteringConfig {
1768 method: SpatialClusteringMethod::Grid,
1769 n_clusters: 4,
1770 epsilon: None,
1771 min_points: None,
1772 metric: SpatialDistanceMetric::Haversine,
1773 };
1774
1775 let clustering = SpatialClustering::new(config);
1776 let fitted = clustering.fit(&X, &()).unwrap();
1777 let result = fitted.transform(&X).unwrap();
1778
1779 assert_eq!(result.nrows(), 4);
1780 assert_eq!(result.ncols(), 2); for i in 0..result.nrows() {
1784 assert!(result[[i, 0]] >= 0.0);
1785 assert!(result[[i, 1]] >= 0.0); }
1787 }
1788
1789 #[test]
1790 fn test_spatial_clustering_kmeans() {
1791 use scirs2_core::ndarray::array;
1792
1793 let X = array![[40.0, -74.0], [40.1, -74.1], [42.0, -72.0], [42.1, -72.1]];
1794
1795 let config = SpatialClusteringConfig {
1796 method: SpatialClusteringMethod::KMeans,
1797 n_clusters: 2,
1798 epsilon: None,
1799 min_points: None,
1800 metric: SpatialDistanceMetric::Haversine,
1801 };
1802
1803 let clustering = SpatialClustering::new(config);
1804 let fitted = clustering.fit(&X, &()).unwrap();
1805 let result = fitted.transform(&X).unwrap();
1806
1807 assert_eq!(result.nrows(), 4);
1808 assert_eq!(result.ncols(), 2);
1809
1810 assert_eq!(result[[0, 0]], result[[1, 0]]);
1812 assert_eq!(result[[2, 0]], result[[3, 0]]);
1813 }
1814
1815 #[test]
1816 fn test_spatial_clustering_density() {
1817 use scirs2_core::ndarray::array;
1818
1819 let X = array![
1820 [40.0, -74.0],
1821 [40.001, -74.001], [42.0, -72.0] ];
1824
1825 let config = SpatialClusteringConfig {
1826 method: SpatialClusteringMethod::Density,
1827 n_clusters: 2,
1828 epsilon: Some(1.0), min_points: Some(1),
1830 metric: SpatialDistanceMetric::Haversine,
1831 };
1832
1833 let clustering = SpatialClustering::new(config);
1834 let fitted = clustering.fit(&X, &()).unwrap();
1835 let result = fitted.transform(&X).unwrap();
1836
1837 assert_eq!(result.nrows(), 3);
1838 assert_eq!(result.ncols(), 1); assert!(result[[0, 0]] > result[[2, 0]]);
1842 assert!(result[[1, 0]] > result[[2, 0]]);
1843 }
1844
1845 #[test]
1846 fn test_proximity_features() {
1847 use scirs2_core::ndarray::array;
1848
1849 let X = array![[40.7128, -74.0060], [51.5074, -0.1278]];
1850
1851 let pois = vec![
1852 (
1853 "NYC".to_string(),
1854 Coordinate::new(40.7128, -74.0060).unwrap(),
1855 ),
1856 (
1857 "London".to_string(),
1858 Coordinate::new(51.5074, -0.1278).unwrap(),
1859 ),
1860 ];
1861
1862 let config = ProximityFeaturesConfig {
1863 points_of_interest: pois,
1864 metric: SpatialDistanceMetric::Haversine,
1865 max_distance: Some(100.0),
1866 include_indicators: true,
1867 };
1868
1869 let proximity = ProximityFeatures::new(config);
1870 let fitted = proximity.fit(&X, &()).unwrap();
1871 let result = fitted.transform(&X).unwrap();
1872
1873 assert_eq!(result.nrows(), 2);
1874 assert_eq!(result.ncols(), 4); assert!(result[[0, 0]] < 1.0);
1878 assert_relative_eq!(result[[0, 2]], 1.0, epsilon = 1e-10); assert!(result[[1, 1]] < 1.0);
1882 assert_relative_eq!(result[[1, 3]], 1.0, epsilon = 1e-10); }
1884
1885 #[test]
1886 fn test_spatial_autocorrelation() {
1887 use scirs2_core::ndarray::array;
1888
1889 let X = array![[40.0, -74.0], [40.1, -74.0], [40.2, -74.0], [42.0, -72.0]];
1890
1891 let config = SpatialAutocorrelationConfig {
1892 k_neighbors: 2,
1893 metric: SpatialDistanceMetric::Haversine,
1894 include_morans_i: true,
1895 include_lisa: true,
1896 };
1897
1898 let autocorr = SpatialAutocorrelation::new(config);
1899 let y = array![1.0, 2.0, 3.0, 4.0]; let fitted = autocorr.fit(&X, &y).unwrap();
1901 let result = fitted.transform(&X).unwrap();
1902
1903 assert_eq!(result.nrows(), 4);
1904 assert!(result.ncols() >= 1);
1905
1906 for i in 0..result.nrows() {
1908 for j in 0..result.ncols() {
1909 assert!(result[[i, j]].is_finite());
1910 assert!(result[[i, j]] >= 0.0);
1911 }
1912 }
1913 }
1914
1915 #[test]
1916 fn test_utm_transformation() {
1917 let (easting, northing) = wgs84_to_utm(40.7128, -74.0060, 18, true).unwrap();
1918
1919 assert!(easting > 0.0);
1921 assert!(northing > 0.0);
1922
1923 let (lat, lon) = utm_to_wgs84(easting, northing, 18, true).unwrap();
1925 assert_relative_eq!(lat, 40.7128, epsilon = 0.01);
1926 assert_relative_eq!(lon, -74.0060, epsilon = 0.01);
1927 }
1928
1929 #[test]
1930 fn test_coordinate_systems_enum() {
1931 let wgs84 = CoordinateSystem::WGS84;
1932 let web_merc = CoordinateSystem::WebMercator;
1933 let utm = CoordinateSystem::UTM {
1934 zone: 18,
1935 northern: true,
1936 };
1937
1938 assert_eq!(wgs84, CoordinateSystem::WGS84);
1939 assert_eq!(web_merc, CoordinateSystem::WebMercator);
1940 assert_ne!(wgs84, web_merc);
1941
1942 match utm {
1943 CoordinateSystem::UTM { zone, northern } => {
1944 assert_eq!(zone, 18);
1945 assert!(northern);
1946 }
1947 _ => panic!("Expected UTM"),
1948 }
1949 }
1950
1951 #[test]
1952 fn test_distance_metrics() {
1953 let coord1 = Coordinate::new(40.7128, -74.0060).unwrap();
1954 let coord2 = Coordinate::new(40.7589, -73.9851).unwrap();
1955
1956 let haversine =
1957 calculate_distance(&coord1, &coord2, SpatialDistanceMetric::Haversine).unwrap();
1958 let vincenty =
1959 calculate_distance(&coord1, &coord2, SpatialDistanceMetric::Vincenty).unwrap();
1960
1961 assert!((haversine - vincenty).abs() < 0.1);
1963
1964 assert!(haversine > 0.0 && haversine < 10.0);
1966 }
1967}