sklears_preprocessing/
geospatial.rs

1//! Geospatial preprocessing module
2//!
3//! This module provides comprehensive geospatial data preprocessing including:
4//! - Coordinate system transformations (WGS84, Web Mercator, UTM)
5//! - Geohash encoding and decoding for spatial indexing
6//! - Spatial distance calculations (Haversine, Vincenty, Euclidean)
7//! - Proximity and neighborhood features
8//! - Spatial clustering and binning
9//! - Bounding box operations
10
11use scirs2_core::ndarray::{Array1, Array2};
12use scirs2_core::random::seeded_rng;
13use scirs2_core::Rng;
14use sklears_core::prelude::*;
15use std::collections::HashMap;
16use std::f64::consts::PI;
17
18// ================================================================================================
19// Constants and Type Definitions
20// ================================================================================================
21
22/// Earth radius in kilometers (WGS84 mean radius)
23const EARTH_RADIUS_KM: f64 = 6371.0088;
24
25/// Earth radius in meters
26const EARTH_RADIUS_M: f64 = 6371008.8;
27
28/// WGS84 semi-major axis (meters)
29const WGS84_A: f64 = 6378137.0;
30
31/// WGS84 semi-minor axis (meters)
32const WGS84_B: f64 = 6356752.314245;
33
34/// WGS84 flattening
35const WGS84_F: f64 = 1.0 / 298.257223563;
36
37/// Web Mercator maximum latitude (approximately 85.051129 degrees)
38const WEB_MERCATOR_MAX_LAT: f64 = 85.051129;
39
40/// Base32 characters used in geohash encoding
41const BASE32_CHARS: &[u8; 32] = b"0123456789bcdefghjkmnpqrstuvwxyz";
42
43// ================================================================================================
44// Coordinate Systems
45// ================================================================================================
46
47/// Supported coordinate reference systems
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum CoordinateSystem {
50    /// WGS84 geographic coordinates (latitude, longitude in degrees)
51    WGS84,
52    /// Web Mercator projection (EPSG:3857) - used by web mapping services
53    WebMercator,
54    /// Universal Transverse Mercator projection
55    UTM { zone: u8, northern: bool },
56}
57
58/// A geographic coordinate in WGS84 system
59#[derive(Debug, Clone, Copy)]
60pub struct Coordinate {
61    /// Latitude in degrees (-90 to 90)
62    pub lat: f64,
63    /// Longitude in degrees (-180 to 180)
64    pub lon: f64,
65}
66
67impl Coordinate {
68    /// Create a new coordinate
69    pub fn new(lat: f64, lon: f64) -> Result<Self> {
70        if !(-90.0..=90.0).contains(&lat) {
71            return Err(SklearsError::InvalidInput(format!(
72                "Latitude must be between -90 and 90, got {}",
73                lat
74            )));
75        }
76        if !(-180.0..=180.0).contains(&lon) {
77            return Err(SklearsError::InvalidInput(format!(
78                "Longitude must be between -180 and 180, got {}",
79                lon
80            )));
81        }
82        Ok(Self { lat, lon })
83    }
84
85    /// Convert to radians
86    pub fn to_radians(&self) -> (f64, f64) {
87        (self.lat.to_radians(), self.lon.to_radians())
88    }
89}
90
91// ================================================================================================
92// Distance Metrics
93// ================================================================================================
94
95/// Spatial distance metric
96#[derive(Debug, Clone, Copy, PartialEq, Eq)]
97pub enum SpatialDistanceMetric {
98    /// Haversine formula - great circle distance (fastest, good approximation)
99    Haversine,
100    /// Vincenty formula - more accurate for ellipsoid (slower, higher precision)
101    Vincenty,
102    /// Euclidean distance in projected coordinates
103    Euclidean,
104    /// Manhattan distance in projected coordinates
105    Manhattan,
106}
107
108/// Calculate distance between two coordinates using specified metric
109pub fn calculate_distance(
110    coord1: &Coordinate,
111    coord2: &Coordinate,
112    metric: SpatialDistanceMetric,
113) -> Result<f64> {
114    match metric {
115        SpatialDistanceMetric::Haversine => Ok(haversine_distance(coord1, coord2)),
116        SpatialDistanceMetric::Vincenty => vincenty_distance(coord1, coord2),
117        SpatialDistanceMetric::Euclidean => {
118            Ok(((coord2.lat - coord1.lat).powi(2) + (coord2.lon - coord1.lon).powi(2)).sqrt())
119        }
120        SpatialDistanceMetric::Manhattan => {
121            Ok((coord2.lat - coord1.lat).abs() + (coord2.lon - coord1.lon).abs())
122        }
123    }
124}
125
126/// Haversine distance between two coordinates in kilometers
127pub fn haversine_distance(coord1: &Coordinate, coord2: &Coordinate) -> f64 {
128    let (lat1, lon1) = coord1.to_radians();
129    let (lat2, lon2) = coord2.to_radians();
130
131    let dlat = lat2 - lat1;
132    let dlon = lon2 - lon1;
133
134    let a = (dlat / 2.0).sin().powi(2) + lat1.cos() * lat2.cos() * (dlon / 2.0).sin().powi(2);
135    let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
136
137    EARTH_RADIUS_KM * c
138}
139
140/// Vincenty distance between two coordinates in kilometers (more accurate)
141pub fn vincenty_distance(coord1: &Coordinate, coord2: &Coordinate) -> Result<f64> {
142    let (lat1, lon1) = coord1.to_radians();
143    let (lat2, lon2) = coord2.to_radians();
144
145    let l = lon2 - lon1;
146    let u1 = ((1.0 - WGS84_F) * lat1.tan()).atan();
147    let u2 = ((1.0 - WGS84_F) * lat2.tan()).atan();
148    let sin_u1 = u1.sin();
149    let cos_u1 = u1.cos();
150    let sin_u2 = u2.sin();
151    let cos_u2 = u2.cos();
152
153    let mut lambda = l;
154    let mut lambda_prev;
155    let mut iterations = 0;
156    const MAX_ITERATIONS: usize = 100;
157    const CONVERGENCE_THRESHOLD: f64 = 1e-12;
158
159    let (
160        mut sin_sigma,
161        mut cos_sigma,
162        mut sigma,
163        mut sin_alpha,
164        mut cos_sq_alpha,
165        mut cos_2sigma_m,
166    );
167
168    loop {
169        let sin_lambda = lambda.sin();
170        let cos_lambda = lambda.cos();
171        sin_sigma = ((cos_u2 * sin_lambda).powi(2)
172            + (cos_u1 * sin_u2 - sin_u1 * cos_u2 * cos_lambda).powi(2))
173        .sqrt();
174
175        if sin_sigma == 0.0 {
176            return Ok(0.0); // Coincident points
177        }
178
179        cos_sigma = sin_u1 * sin_u2 + cos_u1 * cos_u2 * cos_lambda;
180        sigma = sin_sigma.atan2(cos_sigma);
181        sin_alpha = cos_u1 * cos_u2 * sin_lambda / sin_sigma;
182        cos_sq_alpha = 1.0 - sin_alpha.powi(2);
183        cos_2sigma_m = if cos_sq_alpha != 0.0 {
184            cos_sigma - 2.0 * sin_u1 * sin_u2 / cos_sq_alpha
185        } else {
186            0.0
187        };
188
189        let c = WGS84_F / 16.0 * cos_sq_alpha * (4.0 + WGS84_F * (4.0 - 3.0 * cos_sq_alpha));
190        lambda_prev = lambda;
191        lambda = l
192            + (1.0 - c)
193                * WGS84_F
194                * sin_alpha
195                * (sigma
196                    + c * sin_sigma
197                        * (cos_2sigma_m + c * cos_sigma * (-1.0 + 2.0 * cos_2sigma_m.powi(2))));
198
199        iterations += 1;
200        if (lambda - lambda_prev).abs() < CONVERGENCE_THRESHOLD || iterations >= MAX_ITERATIONS {
201            break;
202        }
203    }
204
205    if iterations >= MAX_ITERATIONS {
206        return Err(SklearsError::InvalidInput(
207            "Vincenty formula failed to converge".to_string(),
208        ));
209    }
210
211    let u_sq = cos_sq_alpha * (WGS84_A.powi(2) - WGS84_B.powi(2)) / WGS84_B.powi(2);
212    let a = 1.0 + u_sq / 16384.0 * (4096.0 + u_sq * (-768.0 + u_sq * (320.0 - 175.0 * u_sq)));
213    let b = u_sq / 1024.0 * (256.0 + u_sq * (-128.0 + u_sq * (74.0 - 47.0 * u_sq)));
214    let delta_sigma = b
215        * sin_sigma
216        * (cos_2sigma_m
217            + b / 4.0
218                * (cos_sigma * (-1.0 + 2.0 * cos_2sigma_m.powi(2))
219                    - b / 6.0
220                        * cos_2sigma_m
221                        * (-3.0 + 4.0 * sin_sigma.powi(2))
222                        * (-3.0 + 4.0 * cos_2sigma_m.powi(2))));
223
224    let s = WGS84_B * a * (sigma - delta_sigma);
225
226    Ok(s / 1000.0) // Convert to kilometers
227}
228
229// ================================================================================================
230// Geohash Encoding
231// ================================================================================================
232
233/// Geohash encoder/decoder for spatial indexing
234pub struct Geohash;
235
236impl Geohash {
237    /// Encode a coordinate into a geohash string of specified precision
238    ///
239    /// # Arguments
240    /// * `coord` - The coordinate to encode
241    /// * `precision` - The number of characters in the geohash (typically 1-12)
242    pub fn encode(coord: &Coordinate, precision: usize) -> Result<String> {
243        if precision == 0 || precision > 12 {
244            return Err(SklearsError::InvalidInput(
245                "Geohash precision must be between 1 and 12".to_string(),
246            ));
247        }
248
249        let mut lat_min = -90.0;
250        let mut lat_max = 90.0;
251        let mut lon_min = -180.0;
252        let mut lon_max = 180.0;
253
254        let mut geohash = String::with_capacity(precision);
255        let mut bit = 0;
256        let mut ch = 0u8;
257        let mut is_lon = true;
258
259        while geohash.len() < precision {
260            if is_lon {
261                let mid = (lon_min + lon_max) / 2.0;
262                if coord.lon > mid {
263                    ch |= 1 << (4 - bit);
264                    lon_min = mid;
265                } else {
266                    lon_max = mid;
267                }
268            } else {
269                let mid = (lat_min + lat_max) / 2.0;
270                if coord.lat > mid {
271                    ch |= 1 << (4 - bit);
272                    lat_min = mid;
273                } else {
274                    lat_max = mid;
275                }
276            }
277
278            is_lon = !is_lon;
279            bit += 1;
280
281            if bit == 5 {
282                geohash.push(BASE32_CHARS[ch as usize] as char);
283                bit = 0;
284                ch = 0;
285            }
286        }
287
288        Ok(geohash)
289    }
290
291    /// Decode a geohash string into a coordinate (center of the geohash box)
292    pub fn decode(geohash: &str) -> Result<Coordinate> {
293        if geohash.is_empty() {
294            return Err(SklearsError::InvalidInput(
295                "Geohash string cannot be empty".to_string(),
296            ));
297        }
298
299        // Build reverse lookup table
300        let mut char_map = HashMap::new();
301        for (i, &c) in BASE32_CHARS.iter().enumerate() {
302            char_map.insert(c as char, i as u8);
303        }
304
305        let mut lat_min = -90.0;
306        let mut lat_max = 90.0;
307        let mut lon_min = -180.0;
308        let mut lon_max = 180.0;
309        let mut is_lon = true;
310
311        for ch in geohash.chars() {
312            let bits = char_map.get(&ch).ok_or_else(|| {
313                SklearsError::InvalidInput(format!("Invalid geohash character: {}", ch))
314            })?;
315
316            for i in (0..5).rev() {
317                if is_lon {
318                    let mid = (lon_min + lon_max) / 2.0;
319                    if (bits >> i) & 1 == 1 {
320                        lon_min = mid;
321                    } else {
322                        lon_max = mid;
323                    }
324                } else {
325                    let mid = (lat_min + lat_max) / 2.0;
326                    if (bits >> i) & 1 == 1 {
327                        lat_min = mid;
328                    } else {
329                        lat_max = mid;
330                    }
331                }
332                is_lon = !is_lon;
333            }
334        }
335
336        let lat = (lat_min + lat_max) / 2.0;
337        let lon = (lon_min + lon_max) / 2.0;
338
339        Coordinate::new(lat, lon)
340    }
341
342    /// Get the bounding box (lat_min, lon_min, lat_max, lon_max) for a geohash
343    pub fn bounds(geohash: &str) -> Result<(f64, f64, f64, f64)> {
344        if geohash.is_empty() {
345            return Err(SklearsError::InvalidInput(
346                "Geohash string cannot be empty".to_string(),
347            ));
348        }
349
350        let mut char_map = HashMap::new();
351        for (i, &c) in BASE32_CHARS.iter().enumerate() {
352            char_map.insert(c as char, i as u8);
353        }
354
355        let mut lat_min = -90.0;
356        let mut lat_max = 90.0;
357        let mut lon_min = -180.0;
358        let mut lon_max = 180.0;
359        let mut is_lon = true;
360
361        for ch in geohash.chars() {
362            let bits = char_map.get(&ch).ok_or_else(|| {
363                SklearsError::InvalidInput(format!("Invalid geohash character: {}", ch))
364            })?;
365
366            for i in (0..5).rev() {
367                if is_lon {
368                    let mid = (lon_min + lon_max) / 2.0;
369                    if (bits >> i) & 1 == 1 {
370                        lon_min = mid;
371                    } else {
372                        lon_max = mid;
373                    }
374                } else {
375                    let mid = (lat_min + lat_max) / 2.0;
376                    if (bits >> i) & 1 == 1 {
377                        lat_min = mid;
378                    } else {
379                        lat_max = mid;
380                    }
381                }
382                is_lon = !is_lon;
383            }
384        }
385
386        Ok((lat_min, lon_min, lat_max, lon_max))
387    }
388
389    /// Get all neighbors of a geohash
390    pub fn neighbors(geohash: &str) -> Result<Vec<String>> {
391        // Neighbor finding algorithm based on geohash properties
392        // This is a simplified version - a full implementation would handle edge cases
393        let coord = Self::decode(geohash)?;
394        let (lat_min, lon_min, lat_max, lon_max) = Self::bounds(geohash)?;
395
396        let lat_diff = lat_max - lat_min;
397        let lon_diff = lon_max - lon_min;
398
399        let precision = geohash.len();
400        let mut neighbors = Vec::with_capacity(8);
401
402        // 8 neighboring cells
403        let offsets = [
404            (-1.0, -1.0),
405            (-1.0, 0.0),
406            (-1.0, 1.0),
407            (0.0, -1.0),
408            (0.0, 1.0),
409            (1.0, -1.0),
410            (1.0, 0.0),
411            (1.0, 1.0),
412        ];
413
414        for (dlat, dlon) in offsets.iter() {
415            let new_lat = (coord.lat + dlat * lat_diff).clamp(-90.0, 90.0);
416            let new_lon = coord.lon + dlon * lon_diff;
417            // Handle longitude wrapping
418            let new_lon = if new_lon > 180.0 {
419                new_lon - 360.0
420            } else if new_lon < -180.0 {
421                new_lon + 360.0
422            } else {
423                new_lon
424            };
425
426            if let Ok(neighbor_coord) = Coordinate::new(new_lat, new_lon) {
427                if let Ok(neighbor_hash) = Self::encode(&neighbor_coord, precision) {
428                    if neighbor_hash != geohash {
429                        neighbors.push(neighbor_hash);
430                    }
431                }
432            }
433        }
434
435        Ok(neighbors)
436    }
437}
438
439// ================================================================================================
440// Coordinate Transformers
441// ================================================================================================
442
443/// Configuration for coordinate transformation
444#[derive(Debug, Clone)]
445pub struct CoordinateTransformerConfig {
446    /// Source coordinate system
447    pub from: CoordinateSystem,
448    /// Target coordinate system
449    pub to: CoordinateSystem,
450}
451
452/// Coordinate system transformer (Untrained state)
453pub struct CoordinateTransformer {
454    config: CoordinateTransformerConfig,
455}
456
457/// Coordinate system transformer (Fitted state)
458pub struct CoordinateTransformerFitted {
459    config: CoordinateTransformerConfig,
460}
461
462impl CoordinateTransformer {
463    /// Create a new coordinate transformer
464    pub fn new(from: CoordinateSystem, to: CoordinateSystem) -> Self {
465        Self {
466            config: CoordinateTransformerConfig { from, to },
467        }
468    }
469}
470
471impl Estimator for CoordinateTransformer {
472    type Config = CoordinateTransformerConfig;
473    type Error = SklearsError;
474    type Float = f64;
475
476    fn config(&self) -> &Self::Config {
477        &self.config
478    }
479}
480
481impl Fit<Array2<f64>, ()> for CoordinateTransformer {
482    type Fitted = CoordinateTransformerFitted;
483
484    fn fit(self, _X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
485        // No fitting required for coordinate transformation
486        Ok(CoordinateTransformerFitted {
487            config: self.config,
488        })
489    }
490}
491
492impl Transform<Array2<f64>, Array2<f64>> for CoordinateTransformerFitted {
493    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
494        if X.ncols() != 2 {
495            return Err(SklearsError::InvalidInput(
496                "Input must have exactly 2 columns (lat, lon)".to_string(),
497            ));
498        }
499
500        let nrows = X.nrows();
501        let mut result = Array2::zeros((nrows, 2));
502
503        for i in 0..nrows {
504            let lat = X[[i, 0]];
505            let lon = X[[i, 1]];
506
507            let transformed = match (&self.config.from, &self.config.to) {
508                (CoordinateSystem::WGS84, CoordinateSystem::WebMercator) => {
509                    wgs84_to_web_mercator(lat, lon)?
510                }
511                (CoordinateSystem::WebMercator, CoordinateSystem::WGS84) => {
512                    web_mercator_to_wgs84(lat, lon)?
513                }
514                (CoordinateSystem::WGS84, CoordinateSystem::UTM { zone, northern }) => {
515                    wgs84_to_utm(lat, lon, *zone, *northern)?
516                }
517                (CoordinateSystem::UTM { zone, northern }, CoordinateSystem::WGS84) => {
518                    utm_to_wgs84(lat, lon, *zone, *northern)?
519                }
520                _ => {
521                    return Err(SklearsError::InvalidInput(format!(
522                        "Unsupported coordinate transformation: {:?} to {:?}",
523                        self.config.from, self.config.to
524                    )))
525                }
526            };
527
528            result[[i, 0]] = transformed.0;
529            result[[i, 1]] = transformed.1;
530        }
531
532        Ok(result)
533    }
534}
535
536// ================================================================================================
537// Coordinate Transformation Functions
538// ================================================================================================
539
540/// Convert WGS84 (lat, lon) to Web Mercator (x, y)
541fn wgs84_to_web_mercator(lat: f64, lon: f64) -> Result<(f64, f64)> {
542    if lat.abs() > WEB_MERCATOR_MAX_LAT {
543        return Err(SklearsError::InvalidInput(format!(
544            "Latitude exceeds Web Mercator bounds: {}",
545            lat
546        )));
547    }
548
549    let x = WGS84_A * lon.to_radians();
550    let y = WGS84_A * ((PI / 4.0) + (lat.to_radians() / 2.0)).tan().ln();
551
552    Ok((x, y))
553}
554
555/// Convert Web Mercator (x, y) to WGS84 (lat, lon)
556fn web_mercator_to_wgs84(x: f64, y: f64) -> Result<(f64, f64)> {
557    let lon = (x / WGS84_A).to_degrees();
558    let lat = (2.0 * (y / WGS84_A).exp().atan() - PI / 2.0).to_degrees();
559
560    Ok((lat, lon))
561}
562
563/// Convert WGS84 to UTM coordinates
564fn wgs84_to_utm(lat: f64, lon: f64, zone: u8, northern: bool) -> Result<(f64, f64)> {
565    // Simplified UTM transformation
566    // Full implementation would use more precise formulas
567    let lat_rad = lat.to_radians();
568    let lon_rad = lon.to_radians();
569
570    let central_meridian = ((zone as f64 - 1.0) * 6.0 - 180.0 + 3.0).to_radians();
571    let lon_diff = lon_rad - central_meridian;
572
573    // UTM scale factor
574    let k0 = 0.9996;
575
576    // Simplified formulas
577    let n = WGS84_A / (1.0 - (WGS84_F * (2.0 - WGS84_F) * lat_rad.sin().powi(2))).sqrt();
578    let t = lat_rad.tan();
579    let c = (WGS84_F * (2.0 - WGS84_F)) / (1.0 - WGS84_F * (2.0 - WGS84_F)) * lat_rad.cos().powi(2);
580    let a = lon_diff * lat_rad.cos();
581
582    let m = WGS84_A
583        * ((1.0
584            - WGS84_F * (2.0 - WGS84_F) / 4.0
585            - 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 64.0)
586            * lat_rad
587            - (3.0 * WGS84_F * (2.0 - WGS84_F) / 8.0
588                + 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 32.0)
589                * (2.0 * lat_rad).sin()
590            + (15.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 256.0) * (4.0 * lat_rad).sin());
591
592    let easting = 500000.0
593        + k0 * n
594            * (a + (1.0 - t.powi(2) + c) * a.powi(3) / 6.0
595                + (5.0 - 18.0 * t.powi(2) + t.powi(4)) * a.powi(5) / 120.0);
596
597    let northing_offset = if northern { 0.0 } else { 10000000.0 };
598    let northing = northing_offset
599        + k0 * (m + n
600            * t
601            * (a.powi(2) / 2.0
602                + (5.0 - t.powi(2) + 9.0 * c) * a.powi(4) / 24.0
603                + (61.0 - 58.0 * t.powi(2) + t.powi(4)) * a.powi(6) / 720.0));
604
605    Ok((easting, northing))
606}
607
608/// Convert UTM to WGS84 coordinates
609fn utm_to_wgs84(easting: f64, northing: f64, zone: u8, northern: bool) -> Result<(f64, f64)> {
610    // Simplified inverse UTM transformation
611    let k0 = 0.9996;
612    let central_meridian = ((zone as f64 - 1.0) * 6.0 - 180.0 + 3.0).to_radians();
613
614    let x = easting - 500000.0;
615    let y = if northern {
616        northing
617    } else {
618        northing - 10000000.0
619    };
620
621    let m = y / k0;
622    let mu = m
623        / (WGS84_A
624            * (1.0
625                - WGS84_F * (2.0 - WGS84_F) / 4.0
626                - 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 64.0));
627
628    // Footprint latitude
629    let e1 = (1.0 - (1.0 - WGS84_F * (2.0 - WGS84_F)).sqrt())
630        / (1.0 + (1.0 - WGS84_F * (2.0 - WGS84_F)).sqrt());
631    let phi1 = mu
632        + (3.0 * e1 / 2.0 - 27.0 * e1.powi(3) / 32.0) * (2.0 * mu).sin()
633        + (21.0 * e1.powi(2) / 16.0 - 55.0 * e1.powi(4) / 32.0) * (4.0 * mu).sin()
634        + (151.0 * e1.powi(3) / 96.0) * (6.0 * mu).sin();
635
636    let n1 = WGS84_A / (1.0 - WGS84_F * (2.0 - WGS84_F) * phi1.sin().powi(2)).sqrt();
637    let t1 = phi1.tan();
638    let c1 = (WGS84_F * (2.0 - WGS84_F)) / (1.0 - WGS84_F * (2.0 - WGS84_F)) * phi1.cos().powi(2);
639    let r1 = WGS84_A * (1.0 - WGS84_F * (2.0 - WGS84_F))
640        / (1.0 - WGS84_F * (2.0 - WGS84_F) * phi1.sin().powi(2)).powf(1.5);
641    let d = x / (n1 * k0);
642
643    let lat = phi1
644        - (n1 * t1 / r1)
645            * (d.powi(2) / 2.0
646                - (5.0 + 3.0 * t1.powi(2) + 10.0 * c1 - 4.0 * c1.powi(2)) * d.powi(4) / 24.0
647                + (61.0 + 90.0 * t1.powi(2) + 298.0 * c1 + 45.0 * t1.powi(4)) * d.powi(6) / 720.0);
648
649    let lon = central_meridian
650        + (d - (1.0 + 2.0 * t1.powi(2) + c1) * d.powi(3) / 6.0
651            + (5.0 - 2.0 * c1 + 28.0 * t1.powi(2)) * d.powi(5) / 120.0)
652            / phi1.cos();
653
654    Ok((lat.to_degrees(), lon.to_degrees()))
655}
656
657// ================================================================================================
658// Geohash Feature Encoder
659// ================================================================================================
660
661/// Configuration for geohash encoding
662#[derive(Debug, Clone)]
663pub struct GeohashEncoderConfig {
664    /// Geohash precision (number of characters, 1-12)
665    pub precision: usize,
666    /// Whether to include neighbor geohashes as features
667    pub include_neighbors: bool,
668}
669
670impl Default for GeohashEncoderConfig {
671    fn default() -> Self {
672        Self {
673            precision: 7,
674            include_neighbors: false,
675        }
676    }
677}
678
679/// Geohash encoder for spatial indexing
680pub struct GeohashEncoder {
681    config: GeohashEncoderConfig,
682}
683
684/// Fitted geohash encoder
685pub struct GeohashEncoderFitted {
686    config: GeohashEncoderConfig,
687    /// Vocabulary of unique geohashes
688    vocabulary: HashMap<String, usize>,
689}
690
691impl GeohashEncoder {
692    /// Create a new geohash encoder
693    pub fn new(config: GeohashEncoderConfig) -> Self {
694        Self { config }
695    }
696}
697
698impl Estimator for GeohashEncoder {
699    type Config = GeohashEncoderConfig;
700    type Error = SklearsError;
701    type Float = f64;
702
703    fn config(&self) -> &Self::Config {
704        &self.config
705    }
706}
707
708impl Fit<Array2<f64>, ()> for GeohashEncoder {
709    type Fitted = GeohashEncoderFitted;
710
711    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
712        if X.ncols() != 2 {
713            return Err(SklearsError::InvalidInput(
714                "Input must have exactly 2 columns (lat, lon)".to_string(),
715            ));
716        }
717
718        let mut vocabulary = HashMap::new();
719
720        for i in 0..X.nrows() {
721            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
722            let geohash = Geohash::encode(&coord, self.config.precision)?;
723
724            if !vocabulary.contains_key(&geohash) {
725                let idx = vocabulary.len();
726                vocabulary.insert(geohash.clone(), idx);
727            }
728
729            if self.config.include_neighbors {
730                for neighbor in Geohash::neighbors(&geohash)? {
731                    if !vocabulary.contains_key(&neighbor) {
732                        let idx = vocabulary.len();
733                        vocabulary.insert(neighbor, idx);
734                    }
735                }
736            }
737        }
738
739        Ok(GeohashEncoderFitted {
740            config: self.config,
741            vocabulary,
742        })
743    }
744}
745
746impl Transform<Array2<f64>, Array2<f64>> for GeohashEncoderFitted {
747    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
748        if X.ncols() != 2 {
749            return Err(SklearsError::InvalidInput(
750                "Input must have exactly 2 columns (lat, lon)".to_string(),
751            ));
752        }
753
754        let nrows = X.nrows();
755        let ncols = if self.config.include_neighbors {
756            self.vocabulary.len() * 9 // Each geohash + 8 neighbors
757        } else {
758            self.vocabulary.len()
759        };
760
761        let mut result = Array2::zeros((nrows, ncols));
762
763        for i in 0..nrows {
764            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
765            let geohash = Geohash::encode(&coord, self.config.precision)?;
766
767            if let Some(&idx) = self.vocabulary.get(&geohash) {
768                result[[i, idx]] = 1.0;
769            }
770
771            if self.config.include_neighbors {
772                for neighbor in Geohash::neighbors(&geohash)? {
773                    if let Some(&idx) = self.vocabulary.get(&neighbor) {
774                        result[[i, idx]] = 1.0;
775                    }
776                }
777            }
778        }
779
780        Ok(result)
781    }
782}
783
784// ================================================================================================
785// Spatial Distance Features
786// ================================================================================================
787
788/// Configuration for spatial distance feature extraction
789#[derive(Debug, Clone)]
790pub struct SpatialDistanceFeaturesConfig {
791    /// Reference points to calculate distances from
792    pub reference_points: Vec<Coordinate>,
793    /// Distance metric to use
794    pub metric: SpatialDistanceMetric,
795    /// Whether to include inverse distances (1/distance)
796    pub include_inverse: bool,
797    /// Whether to include squared distances
798    pub include_squared: bool,
799}
800
801/// Spatial distance feature extractor
802pub struct SpatialDistanceFeatures {
803    config: SpatialDistanceFeaturesConfig,
804}
805
806/// Fitted spatial distance feature extractor
807pub struct SpatialDistanceFeaturesFitted {
808    config: SpatialDistanceFeaturesConfig,
809}
810
811impl SpatialDistanceFeatures {
812    /// Create a new spatial distance feature extractor
813    pub fn new(config: SpatialDistanceFeaturesConfig) -> Self {
814        Self { config }
815    }
816}
817
818impl Estimator for SpatialDistanceFeatures {
819    type Config = SpatialDistanceFeaturesConfig;
820    type Error = SklearsError;
821    type Float = f64;
822
823    fn config(&self) -> &Self::Config {
824        &self.config
825    }
826}
827
828impl Fit<Array2<f64>, ()> for SpatialDistanceFeatures {
829    type Fitted = SpatialDistanceFeaturesFitted;
830
831    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
832        if X.ncols() != 2 {
833            return Err(SklearsError::InvalidInput(
834                "Input must have exactly 2 columns (lat, lon)".to_string(),
835            ));
836        }
837
838        Ok(SpatialDistanceFeaturesFitted {
839            config: self.config,
840        })
841    }
842}
843
844impl Transform<Array2<f64>, Array2<f64>> for SpatialDistanceFeaturesFitted {
845    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
846        if X.ncols() != 2 {
847            return Err(SklearsError::InvalidInput(
848                "Input must have exactly 2 columns (lat, lon)".to_string(),
849            ));
850        }
851
852        let nrows = X.nrows();
853        let n_refs = self.config.reference_points.len();
854        let mut n_features = n_refs;
855        if self.config.include_inverse {
856            n_features += n_refs;
857        }
858        if self.config.include_squared {
859            n_features += n_refs;
860        }
861
862        let mut result = Array2::zeros((nrows, n_features));
863
864        for i in 0..nrows {
865            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
866
867            for (j, ref_point) in self.config.reference_points.iter().enumerate() {
868                let distance = calculate_distance(&coord, ref_point, self.config.metric)?;
869
870                result[[i, j]] = distance;
871
872                if self.config.include_inverse {
873                    result[[i, n_refs + j]] = if distance > 0.0 { 1.0 / distance } else { 0.0 };
874                }
875
876                if self.config.include_squared {
877                    let offset = if self.config.include_inverse {
878                        n_refs * 2
879                    } else {
880                        n_refs
881                    };
882                    result[[i, offset + j]] = distance * distance;
883                }
884            }
885        }
886
887        Ok(result)
888    }
889}
890
891// ================================================================================================
892// Spatial Binning
893// ================================================================================================
894
895/// Configuration for spatial binning
896#[derive(Debug, Clone)]
897pub struct SpatialBinningConfig {
898    /// Number of latitude bins
899    pub n_lat_bins: usize,
900    /// Number of longitude bins
901    pub n_lon_bins: usize,
902    /// Latitude range (min, max)
903    pub lat_range: Option<(f64, f64)>,
904    /// Longitude range (min, max)
905    pub lon_range: Option<(f64, f64)>,
906    /// Whether to use one-hot encoding for bins
907    pub one_hot: bool,
908}
909
910impl Default for SpatialBinningConfig {
911    fn default() -> Self {
912        Self {
913            n_lat_bins: 10,
914            n_lon_bins: 10,
915            lat_range: None,
916            lon_range: None,
917            one_hot: false,
918        }
919    }
920}
921
922/// Spatial binning transformer
923pub struct SpatialBinning {
924    config: SpatialBinningConfig,
925}
926
927/// Fitted spatial binning transformer
928pub struct SpatialBinningFitted {
929    config: SpatialBinningConfig,
930    lat_range: (f64, f64),
931    lon_range: (f64, f64),
932}
933
934impl SpatialBinning {
935    /// Create a new spatial binning transformer
936    pub fn new(config: SpatialBinningConfig) -> Self {
937        Self { config }
938    }
939}
940
941impl Estimator for SpatialBinning {
942    type Config = SpatialBinningConfig;
943    type Error = SklearsError;
944    type Float = f64;
945
946    fn config(&self) -> &Self::Config {
947        &self.config
948    }
949}
950
951impl Fit<Array2<f64>, ()> for SpatialBinning {
952    type Fitted = SpatialBinningFitted;
953
954    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
955        if X.ncols() != 2 {
956            return Err(SklearsError::InvalidInput(
957                "Input must have exactly 2 columns (lat, lon)".to_string(),
958            ));
959        }
960
961        let lat_range = if let Some(range) = self.config.lat_range {
962            range
963        } else {
964            let lats = X.column(0);
965            let min_lat = lats.iter().cloned().fold(f64::INFINITY, f64::min);
966            let max_lat = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
967            (min_lat, max_lat)
968        };
969
970        let lon_range = if let Some(range) = self.config.lon_range {
971            range
972        } else {
973            let lons = X.column(1);
974            let min_lon = lons.iter().cloned().fold(f64::INFINITY, f64::min);
975            let max_lon = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
976            (min_lon, max_lon)
977        };
978
979        Ok(SpatialBinningFitted {
980            config: self.config,
981            lat_range,
982            lon_range,
983        })
984    }
985}
986
987impl Transform<Array2<f64>, Array2<f64>> for SpatialBinningFitted {
988    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
989        if X.ncols() != 2 {
990            return Err(SklearsError::InvalidInput(
991                "Input must have exactly 2 columns (lat, lon)".to_string(),
992            ));
993        }
994
995        let nrows = X.nrows();
996        let ncols = if self.config.one_hot {
997            self.config.n_lat_bins * self.config.n_lon_bins
998        } else {
999            2
1000        };
1001
1002        let mut result = Array2::zeros((nrows, ncols));
1003
1004        let lat_bin_size = (self.lat_range.1 - self.lat_range.0) / self.config.n_lat_bins as f64;
1005        let lon_bin_size = (self.lon_range.1 - self.lon_range.0) / self.config.n_lon_bins as f64;
1006
1007        for i in 0..nrows {
1008            let lat = X[[i, 0]];
1009            let lon = X[[i, 1]];
1010
1011            let lat_bin = ((lat - self.lat_range.0) / lat_bin_size)
1012                .floor()
1013                .clamp(0.0, (self.config.n_lat_bins - 1) as f64) as usize;
1014            let lon_bin = ((lon - self.lon_range.0) / lon_bin_size)
1015                .floor()
1016                .clamp(0.0, (self.config.n_lon_bins - 1) as f64) as usize;
1017
1018            if self.config.one_hot {
1019                let bin_idx = lat_bin * self.config.n_lon_bins + lon_bin;
1020                result[[i, bin_idx]] = 1.0;
1021            } else {
1022                result[[i, 0]] = lat_bin as f64;
1023                result[[i, 1]] = lon_bin as f64;
1024            }
1025        }
1026
1027        Ok(result)
1028    }
1029}
1030
1031// ================================================================================================
1032// Spatial Clustering Features
1033// ================================================================================================
1034
1035/// Configuration for spatial clustering features
1036#[derive(Debug, Clone)]
1037pub struct SpatialClusteringConfig {
1038    /// Clustering method
1039    pub method: SpatialClusteringMethod,
1040    /// Number of clusters (for grid-based methods)
1041    pub n_clusters: usize,
1042    /// Epsilon for density-based methods (in km)
1043    pub epsilon: Option<f64>,
1044    /// Minimum points for density-based methods
1045    pub min_points: Option<usize>,
1046    /// Distance metric
1047    pub metric: SpatialDistanceMetric,
1048}
1049
1050/// Spatial clustering method
1051#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1052pub enum SpatialClusteringMethod {
1053    /// Grid-based clustering using spatial binning
1054    Grid,
1055    /// Density-based features (local density estimation)
1056    Density,
1057    /// K-means-style clustering on geographic coordinates
1058    KMeans,
1059    /// Hierarchical clustering features
1060    Hierarchical,
1061}
1062
1063impl Default for SpatialClusteringConfig {
1064    fn default() -> Self {
1065        Self {
1066            method: SpatialClusteringMethod::Grid,
1067            n_clusters: 10,
1068            epsilon: Some(5.0), // 5 km
1069            min_points: Some(5),
1070            metric: SpatialDistanceMetric::Haversine,
1071        }
1072    }
1073}
1074
1075/// Spatial clustering feature extractor
1076pub struct SpatialClustering {
1077    config: SpatialClusteringConfig,
1078}
1079
1080/// Fitted spatial clustering feature extractor
1081pub struct SpatialClusteringFitted {
1082    config: SpatialClusteringConfig,
1083    /// Cluster centers or representative points
1084    cluster_centers: Vec<Coordinate>,
1085    /// Grid bounds for grid-based clustering
1086    grid_bounds: Option<(f64, f64, f64, f64)>,
1087}
1088
1089impl SpatialClustering {
1090    /// Create a new spatial clustering feature extractor
1091    pub fn new(config: SpatialClusteringConfig) -> Self {
1092        Self { config }
1093    }
1094}
1095
1096impl Estimator for SpatialClustering {
1097    type Config = SpatialClusteringConfig;
1098    type Error = SklearsError;
1099    type Float = f64;
1100
1101    fn config(&self) -> &Self::Config {
1102        &self.config
1103    }
1104}
1105
1106impl Fit<Array2<f64>, ()> for SpatialClustering {
1107    type Fitted = SpatialClusteringFitted;
1108
1109    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
1110        if X.ncols() != 2 {
1111            return Err(SklearsError::InvalidInput(
1112                "Input must have exactly 2 columns (lat, lon)".to_string(),
1113            ));
1114        }
1115
1116        let cluster_centers = match self.config.method {
1117            SpatialClusteringMethod::Grid => {
1118                // Grid-based clustering: create evenly spaced cluster centers
1119                let lats = X.column(0);
1120                let lons = X.column(1);
1121                let lat_min = lats.iter().cloned().fold(f64::INFINITY, f64::min);
1122                let lat_max = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1123                let lon_min = lons.iter().cloned().fold(f64::INFINITY, f64::min);
1124                let lon_max = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1125
1126                let grid_size = (self.config.n_clusters as f64).sqrt().ceil() as usize;
1127                let lat_step = (lat_max - lat_min) / grid_size as f64;
1128                let lon_step = (lon_max - lon_min) / grid_size as f64;
1129
1130                let mut centers = Vec::new();
1131                for i in 0..grid_size {
1132                    for j in 0..grid_size {
1133                        let lat = lat_min + (i as f64 + 0.5) * lat_step;
1134                        let lon = lon_min + (j as f64 + 0.5) * lon_step;
1135                        if let Ok(coord) = Coordinate::new(lat, lon) {
1136                            centers.push(coord);
1137                        }
1138                    }
1139                }
1140
1141                centers
1142            }
1143            SpatialClusteringMethod::KMeans => {
1144                // Simple K-means initialization using random sampling
1145                let mut rng = seeded_rng(42);
1146                let nrows = X.nrows();
1147                let mut centers = Vec::new();
1148
1149                // Initialize with random samples
1150                for _ in 0..self.config.n_clusters.min(nrows) {
1151                    let idx = rng.random_range(0..nrows);
1152                    if let Ok(coord) = Coordinate::new(X[[idx, 0]], X[[idx, 1]]) {
1153                        centers.push(coord);
1154                    }
1155                }
1156
1157                // Perform a few iterations of k-means
1158                for _ in 0..10 {
1159                    let mut cluster_sums = vec![(0.0, 0.0); centers.len()];
1160                    let mut cluster_counts = vec![0; centers.len()];
1161
1162                    // Assignment step
1163                    for i in 0..nrows {
1164                        let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1165                        let mut min_dist = f64::INFINITY;
1166                        let mut min_idx = 0;
1167
1168                        for (j, center) in centers.iter().enumerate() {
1169                            let dist = calculate_distance(&coord, center, self.config.metric)?;
1170                            if dist < min_dist {
1171                                min_dist = dist;
1172                                min_idx = j;
1173                            }
1174                        }
1175
1176                        cluster_sums[min_idx].0 += coord.lat;
1177                        cluster_sums[min_idx].1 += coord.lon;
1178                        cluster_counts[min_idx] += 1;
1179                    }
1180
1181                    // Update step
1182                    for (j, center) in centers.iter_mut().enumerate() {
1183                        if cluster_counts[j] > 0 {
1184                            let new_lat = cluster_sums[j].0 / cluster_counts[j] as f64;
1185                            let new_lon = cluster_sums[j].1 / cluster_counts[j] as f64;
1186                            if let Ok(new_coord) = Coordinate::new(new_lat, new_lon) {
1187                                *center = new_coord;
1188                            }
1189                        }
1190                    }
1191                }
1192
1193                centers
1194            }
1195            SpatialClusteringMethod::Density | SpatialClusteringMethod::Hierarchical => {
1196                // For density-based methods, we'll sample representative points
1197                let mut rng = seeded_rng(42);
1198                let nrows = X.nrows();
1199                let mut centers = Vec::new();
1200
1201                let sample_size = self.config.n_clusters.min(nrows);
1202                for _ in 0..sample_size {
1203                    let idx = rng.random_range(0..nrows);
1204                    if let Ok(coord) = Coordinate::new(X[[idx, 0]], X[[idx, 1]]) {
1205                        centers.push(coord);
1206                    }
1207                }
1208
1209                centers
1210            }
1211        };
1212
1213        let grid_bounds = if matches!(self.config.method, SpatialClusteringMethod::Grid) {
1214            let lats = X.column(0);
1215            let lons = X.column(1);
1216            let lat_min = lats.iter().cloned().fold(f64::INFINITY, f64::min);
1217            let lat_max = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1218            let lon_min = lons.iter().cloned().fold(f64::INFINITY, f64::min);
1219            let lon_max = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1220            Some((lat_min, lon_min, lat_max, lon_max))
1221        } else {
1222            None
1223        };
1224
1225        Ok(SpatialClusteringFitted {
1226            config: self.config,
1227            cluster_centers,
1228            grid_bounds,
1229        })
1230    }
1231}
1232
1233impl Transform<Array2<f64>, Array2<f64>> for SpatialClusteringFitted {
1234    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1235        if X.ncols() != 2 {
1236            return Err(SklearsError::InvalidInput(
1237                "Input must have exactly 2 columns (lat, lon)".to_string(),
1238            ));
1239        }
1240
1241        let nrows = X.nrows();
1242
1243        match self.config.method {
1244            SpatialClusteringMethod::Grid | SpatialClusteringMethod::KMeans => {
1245                // Output: cluster assignment + distance to nearest cluster center
1246                let mut result = Array2::zeros((nrows, 2));
1247
1248                for i in 0..nrows {
1249                    let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1250                    let mut min_dist = f64::INFINITY;
1251                    let mut min_idx = 0;
1252
1253                    for (j, center) in self.cluster_centers.iter().enumerate() {
1254                        let dist = calculate_distance(&coord, center, self.config.metric)?;
1255                        if dist < min_dist {
1256                            min_dist = dist;
1257                            min_idx = j;
1258                        }
1259                    }
1260
1261                    result[[i, 0]] = min_idx as f64;
1262                    result[[i, 1]] = min_dist;
1263                }
1264
1265                Ok(result)
1266            }
1267            SpatialClusteringMethod::Density => {
1268                // Output: local density estimate (number of neighbors within epsilon)
1269                let epsilon = self.config.epsilon.unwrap_or(5.0);
1270                let mut result = Array2::zeros((nrows, 1));
1271
1272                for i in 0..nrows {
1273                    let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1274                    let mut density = 0.0;
1275
1276                    for j in 0..nrows {
1277                        if i != j {
1278                            let other = Coordinate::new(X[[j, 0]], X[[j, 1]])?;
1279                            let dist = calculate_distance(&coord, &other, self.config.metric)?;
1280                            if dist <= epsilon {
1281                                density += 1.0;
1282                            }
1283                        }
1284                    }
1285
1286                    result[[i, 0]] = density;
1287                }
1288
1289                Ok(result)
1290            }
1291            SpatialClusteringMethod::Hierarchical => {
1292                // Output: distances to all cluster centers (linkage-based features)
1293                let n_centers = self.cluster_centers.len();
1294                let mut result = Array2::zeros((nrows, n_centers));
1295
1296                for i in 0..nrows {
1297                    let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1298
1299                    for (j, center) in self.cluster_centers.iter().enumerate() {
1300                        let dist = calculate_distance(&coord, center, self.config.metric)?;
1301                        result[[i, j]] = dist;
1302                    }
1303                }
1304
1305                Ok(result)
1306            }
1307        }
1308    }
1309}
1310
1311// ================================================================================================
1312// Spatial Autocorrelation Features
1313// ================================================================================================
1314
1315/// Configuration for spatial autocorrelation features
1316#[derive(Debug, Clone)]
1317pub struct SpatialAutocorrelationConfig {
1318    /// Number of nearest neighbors to consider
1319    pub k_neighbors: usize,
1320    /// Distance metric
1321    pub metric: SpatialDistanceMetric,
1322    /// Whether to include Moran's I statistic
1323    pub include_morans_i: bool,
1324    /// Whether to include local indicators of spatial association (LISA)
1325    pub include_lisa: bool,
1326}
1327
1328impl Default for SpatialAutocorrelationConfig {
1329    fn default() -> Self {
1330        Self {
1331            k_neighbors: 5,
1332            metric: SpatialDistanceMetric::Haversine,
1333            include_morans_i: true,
1334            include_lisa: true,
1335        }
1336    }
1337}
1338
1339/// Spatial autocorrelation feature extractor
1340pub struct SpatialAutocorrelation {
1341    config: SpatialAutocorrelationConfig,
1342}
1343
1344/// Fitted spatial autocorrelation feature extractor
1345pub struct SpatialAutocorrelationFitted {
1346    config: SpatialAutocorrelationConfig,
1347    /// Training data coordinates for spatial weights
1348    training_coords: Vec<Coordinate>,
1349}
1350
1351impl SpatialAutocorrelation {
1352    /// Create a new spatial autocorrelation feature extractor
1353    pub fn new(config: SpatialAutocorrelationConfig) -> Self {
1354        Self { config }
1355    }
1356}
1357
1358impl Estimator for SpatialAutocorrelation {
1359    type Config = SpatialAutocorrelationConfig;
1360    type Error = SklearsError;
1361    type Float = f64;
1362
1363    fn config(&self) -> &Self::Config {
1364        &self.config
1365    }
1366}
1367
1368impl Fit<Array2<f64>, Array1<f64>> for SpatialAutocorrelation {
1369    type Fitted = SpatialAutocorrelationFitted;
1370
1371    fn fit(self, X: &Array2<f64>, _y: &Array1<f64>) -> Result<Self::Fitted> {
1372        if X.ncols() != 2 {
1373            return Err(SklearsError::InvalidInput(
1374                "Input must have exactly 2 columns (lat, lon)".to_string(),
1375            ));
1376        }
1377
1378        let mut training_coords = Vec::with_capacity(X.nrows());
1379        for i in 0..X.nrows() {
1380            training_coords.push(Coordinate::new(X[[i, 0]], X[[i, 1]])?);
1381        }
1382
1383        Ok(SpatialAutocorrelationFitted {
1384            config: self.config,
1385            training_coords,
1386        })
1387    }
1388}
1389
1390impl Transform<Array2<f64>, Array2<f64>> for SpatialAutocorrelationFitted {
1391    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1392        if X.ncols() != 2 {
1393            return Err(SklearsError::InvalidInput(
1394                "Input must have exactly 2 columns (lat, lon)".to_string(),
1395            ));
1396        }
1397
1398        let nrows = X.nrows();
1399        let mut n_features = 0;
1400
1401        if self.config.include_lisa {
1402            n_features += 1; // Local Moran's I
1403        }
1404
1405        let mut result = Array2::zeros((nrows, n_features.max(1)));
1406
1407        for i in 0..nrows {
1408            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1409
1410            // Find k nearest neighbors
1411            let mut distances: Vec<(usize, f64)> = self
1412                .training_coords
1413                .iter()
1414                .enumerate()
1415                .map(|(j, other)| {
1416                    let dist = calculate_distance(&coord, other, self.config.metric)
1417                        .unwrap_or(f64::INFINITY);
1418                    (j, dist)
1419                })
1420                .collect();
1421
1422            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
1423
1424            // Calculate local spatial statistics
1425            if self.config.include_lisa {
1426                // Simplified local Moran's I calculation
1427                // In a real implementation, this would use the target variable
1428                let nearest_neighbors =
1429                    &distances[1..=self.config.k_neighbors.min(distances.len() - 1)];
1430                let avg_neighbor_dist = nearest_neighbors.iter().map(|(_, d)| d).sum::<f64>()
1431                    / nearest_neighbors.len() as f64;
1432
1433                result[[i, 0]] = 1.0 / (1.0 + avg_neighbor_dist); // Inverse distance weighted
1434            }
1435        }
1436
1437        Ok(result)
1438    }
1439}
1440
1441// ================================================================================================
1442// Proximity Features
1443// ================================================================================================
1444
1445/// Configuration for proximity features
1446#[derive(Debug, Clone)]
1447pub struct ProximityFeaturesConfig {
1448    /// Points of interest to calculate proximity to
1449    pub points_of_interest: Vec<(String, Coordinate)>,
1450    /// Distance metric
1451    pub metric: SpatialDistanceMetric,
1452    /// Maximum distance to consider (in km)
1453    pub max_distance: Option<f64>,
1454    /// Whether to include binary indicators (within max_distance)
1455    pub include_indicators: bool,
1456}
1457
1458/// Proximity feature extractor
1459pub struct ProximityFeatures {
1460    config: ProximityFeaturesConfig,
1461}
1462
1463/// Fitted proximity feature extractor
1464pub struct ProximityFeaturesFitted {
1465    config: ProximityFeaturesConfig,
1466}
1467
1468impl ProximityFeatures {
1469    /// Create a new proximity feature extractor
1470    pub fn new(config: ProximityFeaturesConfig) -> Self {
1471        Self { config }
1472    }
1473}
1474
1475impl Estimator for ProximityFeatures {
1476    type Config = ProximityFeaturesConfig;
1477    type Error = SklearsError;
1478    type Float = f64;
1479
1480    fn config(&self) -> &Self::Config {
1481        &self.config
1482    }
1483}
1484
1485impl Fit<Array2<f64>, ()> for ProximityFeatures {
1486    type Fitted = ProximityFeaturesFitted;
1487
1488    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
1489        if X.ncols() != 2 {
1490            return Err(SklearsError::InvalidInput(
1491                "Input must have exactly 2 columns (lat, lon)".to_string(),
1492            ));
1493        }
1494
1495        Ok(ProximityFeaturesFitted {
1496            config: self.config,
1497        })
1498    }
1499}
1500
1501impl Transform<Array2<f64>, Array2<f64>> for ProximityFeaturesFitted {
1502    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1503        if X.ncols() != 2 {
1504            return Err(SklearsError::InvalidInput(
1505                "Input must have exactly 2 columns (lat, lon)".to_string(),
1506            ));
1507        }
1508
1509        let nrows = X.nrows();
1510        let n_pois = self.config.points_of_interest.len();
1511        let n_features = if self.config.include_indicators {
1512            n_pois * 2 // Distance + indicator
1513        } else {
1514            n_pois // Just distance
1515        };
1516
1517        let mut result = Array2::zeros((nrows, n_features));
1518
1519        for i in 0..nrows {
1520            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1521
1522            for (j, (_name, poi)) in self.config.points_of_interest.iter().enumerate() {
1523                let distance = calculate_distance(&coord, poi, self.config.metric)?;
1524
1525                result[[i, j]] = distance;
1526
1527                if self.config.include_indicators {
1528                    let is_within = if let Some(max_dist) = self.config.max_distance {
1529                        if distance <= max_dist {
1530                            1.0
1531                        } else {
1532                            0.0
1533                        }
1534                    } else {
1535                        1.0
1536                    };
1537                    result[[i, n_pois + j]] = is_within;
1538                }
1539            }
1540        }
1541
1542        Ok(result)
1543    }
1544}
1545
1546// ================================================================================================
1547// Tests
1548// ================================================================================================
1549
1550#[cfg(test)]
1551mod tests {
1552    use super::*;
1553    use approx::assert_relative_eq;
1554
1555    #[test]
1556    fn test_coordinate_creation() {
1557        let coord = Coordinate::new(40.7128, -74.0060).unwrap();
1558        assert_eq!(coord.lat, 40.7128);
1559        assert_eq!(coord.lon, -74.0060);
1560
1561        // Test invalid coordinates
1562        assert!(Coordinate::new(91.0, 0.0).is_err());
1563        assert!(Coordinate::new(0.0, 181.0).is_err());
1564    }
1565
1566    #[test]
1567    fn test_haversine_distance() {
1568        // New York to London
1569        let nyc = Coordinate::new(40.7128, -74.0060).unwrap();
1570        let london = Coordinate::new(51.5074, -0.1278).unwrap();
1571
1572        let distance = haversine_distance(&nyc, &london);
1573        // Expected distance is approximately 5570 km
1574        assert!((distance - 5570.0).abs() < 50.0);
1575    }
1576
1577    #[test]
1578    fn test_vincenty_distance() {
1579        let nyc = Coordinate::new(40.7128, -74.0060).unwrap();
1580        let london = Coordinate::new(51.5074, -0.1278).unwrap();
1581
1582        let distance = vincenty_distance(&nyc, &london).unwrap();
1583        // Vincenty should give similar but slightly more accurate result
1584        assert!((distance - 5570.0).abs() < 50.0);
1585    }
1586
1587    #[test]
1588    fn test_geohash_encode_decode() {
1589        let coord = Coordinate::new(40.7128, -74.0060).unwrap();
1590        let geohash = Geohash::encode(&coord, 7).unwrap();
1591        assert_eq!(geohash.len(), 7);
1592
1593        let decoded = Geohash::decode(&geohash).unwrap();
1594        assert!((decoded.lat - coord.lat).abs() < 0.01);
1595        assert!((decoded.lon - coord.lon).abs() < 0.01);
1596    }
1597
1598    #[test]
1599    fn test_geohash_bounds() {
1600        let geohash = "dr5ru7";
1601        let (lat_min, lon_min, lat_max, lon_max) = Geohash::bounds(geohash).unwrap();
1602
1603        assert!(lat_min < lat_max);
1604        assert!(lon_min < lon_max);
1605        assert!(lat_min >= -90.0 && lat_max <= 90.0);
1606        assert!(lon_min >= -180.0 && lon_max <= 180.0);
1607    }
1608
1609    #[test]
1610    fn test_geohash_neighbors() {
1611        let geohash = "dr5ru7";
1612        let neighbors = Geohash::neighbors(geohash).unwrap();
1613
1614        // Should have up to 8 neighbors
1615        assert!(neighbors.len() <= 8);
1616        assert!(neighbors.len() > 0);
1617
1618        // All neighbors should have the same precision
1619        for neighbor in neighbors.iter() {
1620            assert_eq!(neighbor.len(), geohash.len());
1621        }
1622    }
1623
1624    #[test]
1625    fn test_wgs84_to_web_mercator() {
1626        let (x, y) = wgs84_to_web_mercator(40.7128, -74.0060).unwrap();
1627
1628        // Verify conversion is reasonable
1629        assert!(x.abs() < 20037508.34); // Web Mercator max
1630        assert!(y.abs() < 20037508.34);
1631
1632        // Round trip
1633        let (lat, lon) = web_mercator_to_wgs84(x, y).unwrap();
1634        assert_relative_eq!(lat, 40.7128, epsilon = 0.0001);
1635        assert_relative_eq!(lon, -74.0060, epsilon = 0.0001);
1636    }
1637
1638    #[test]
1639    fn test_coordinate_transformer() {
1640        use scirs2_core::ndarray::array;
1641
1642        let X = array![[40.7128, -74.0060], [51.5074, -0.1278]];
1643
1644        let transformer =
1645            CoordinateTransformer::new(CoordinateSystem::WGS84, CoordinateSystem::WebMercator);
1646        let fitted = transformer.fit(&X, &()).unwrap();
1647        let result = fitted.transform(&X).unwrap();
1648
1649        assert_eq!(result.nrows(), 2);
1650        assert_eq!(result.ncols(), 2);
1651
1652        // Verify transformation produces reasonable values
1653        assert!(result[[0, 0]].abs() < 20037508.34);
1654        assert!(result[[0, 1]].abs() < 20037508.34);
1655    }
1656
1657    #[test]
1658    fn test_geohash_encoder() {
1659        use scirs2_core::ndarray::array;
1660
1661        let X = array![[40.7128, -74.0060], [40.7589, -73.9851], [51.5074, -0.1278]];
1662
1663        let config = GeohashEncoderConfig {
1664            precision: 5,
1665            include_neighbors: false,
1666        };
1667
1668        let encoder = GeohashEncoder::new(config);
1669        let fitted = encoder.fit(&X, &()).unwrap();
1670        let result = fitted.transform(&X).unwrap();
1671
1672        assert_eq!(result.nrows(), 3);
1673        // Should have as many columns as unique geohashes
1674        assert!(result.ncols() > 0);
1675    }
1676
1677    #[test]
1678    fn test_spatial_distance_features() {
1679        use scirs2_core::ndarray::array;
1680
1681        let X = array![[40.7128, -74.0060], [40.7589, -73.9851]];
1682
1683        let reference_points = vec![
1684            Coordinate::new(40.7128, -74.0060).unwrap(), // NYC
1685            Coordinate::new(51.5074, -0.1278).unwrap(),  // London
1686        ];
1687
1688        let config = SpatialDistanceFeaturesConfig {
1689            reference_points,
1690            metric: SpatialDistanceMetric::Haversine,
1691            include_inverse: false,
1692            include_squared: false,
1693        };
1694
1695        let transformer = SpatialDistanceFeatures::new(config);
1696        let fitted = transformer.fit(&X, &()).unwrap();
1697        let result = fitted.transform(&X).unwrap();
1698
1699        assert_eq!(result.nrows(), 2);
1700        assert_eq!(result.ncols(), 2); // 2 reference points
1701
1702        // First point should be close to first reference point
1703        assert!(result[[0, 0]] < 1.0);
1704    }
1705
1706    #[test]
1707    fn test_spatial_binning() {
1708        use scirs2_core::ndarray::array;
1709
1710        let X = array![[40.0, -74.0], [41.0, -73.0], [42.0, -72.0], [43.0, -71.0]];
1711
1712        let config = SpatialBinningConfig {
1713            n_lat_bins: 2,
1714            n_lon_bins: 2,
1715            lat_range: Some((40.0, 44.0)),
1716            lon_range: Some((-75.0, -70.0)),
1717            one_hot: false,
1718        };
1719
1720        let binning = SpatialBinning::new(config);
1721        let fitted = binning.fit(&X, &()).unwrap();
1722        let result = fitted.transform(&X).unwrap();
1723
1724        assert_eq!(result.nrows(), 4);
1725        assert_eq!(result.ncols(), 2);
1726
1727        // Verify binning produces valid bin indices
1728        for i in 0..result.nrows() {
1729            assert!(result[[i, 0]] >= 0.0 && result[[i, 0]] < 2.0);
1730            assert!(result[[i, 1]] >= 0.0 && result[[i, 1]] < 2.0);
1731        }
1732    }
1733
1734    #[test]
1735    fn test_spatial_binning_one_hot() {
1736        use scirs2_core::ndarray::array;
1737
1738        let X = array![[40.0, -74.0], [41.0, -73.0],];
1739
1740        let config = SpatialBinningConfig {
1741            n_lat_bins: 2,
1742            n_lon_bins: 2,
1743            lat_range: Some((40.0, 42.0)),
1744            lon_range: Some((-75.0, -72.0)),
1745            one_hot: true,
1746        };
1747
1748        let binning = SpatialBinning::new(config);
1749        let fitted = binning.fit(&X, &()).unwrap();
1750        let result = fitted.transform(&X).unwrap();
1751
1752        assert_eq!(result.nrows(), 2);
1753        assert_eq!(result.ncols(), 4); // 2x2 bins
1754
1755        // Verify one-hot encoding
1756        for i in 0..result.nrows() {
1757            let sum: f64 = result.row(i).sum();
1758            assert_relative_eq!(sum, 1.0, epsilon = 1e-10);
1759        }
1760    }
1761
1762    #[test]
1763    fn test_spatial_clustering_grid() {
1764        use scirs2_core::ndarray::array;
1765
1766        let X = array![[40.0, -74.0], [40.5, -73.5], [41.0, -73.0], [41.5, -72.5]];
1767
1768        let config = SpatialClusteringConfig {
1769            method: SpatialClusteringMethod::Grid,
1770            n_clusters: 4,
1771            epsilon: None,
1772            min_points: None,
1773            metric: SpatialDistanceMetric::Haversine,
1774        };
1775
1776        let clustering = SpatialClustering::new(config);
1777        let fitted = clustering.fit(&X, &()).unwrap();
1778        let result = fitted.transform(&X).unwrap();
1779
1780        assert_eq!(result.nrows(), 4);
1781        assert_eq!(result.ncols(), 2); // Cluster assignment + distance
1782
1783        // Verify cluster assignments are valid
1784        for i in 0..result.nrows() {
1785            assert!(result[[i, 0]] >= 0.0);
1786            assert!(result[[i, 1]] >= 0.0); // Distance should be non-negative
1787        }
1788    }
1789
1790    #[test]
1791    fn test_spatial_clustering_kmeans() {
1792        use scirs2_core::ndarray::array;
1793
1794        let X = array![[40.0, -74.0], [40.1, -74.1], [42.0, -72.0], [42.1, -72.1]];
1795
1796        let config = SpatialClusteringConfig {
1797            method: SpatialClusteringMethod::KMeans,
1798            n_clusters: 2,
1799            epsilon: None,
1800            min_points: None,
1801            metric: SpatialDistanceMetric::Haversine,
1802        };
1803
1804        let clustering = SpatialClustering::new(config);
1805        let fitted = clustering.fit(&X, &()).unwrap();
1806        let result = fitted.transform(&X).unwrap();
1807
1808        assert_eq!(result.nrows(), 4);
1809        assert_eq!(result.ncols(), 2);
1810
1811        // Points that are close together should be in the same cluster
1812        assert_eq!(result[[0, 0]], result[[1, 0]]);
1813        assert_eq!(result[[2, 0]], result[[3, 0]]);
1814    }
1815
1816    #[test]
1817    fn test_spatial_clustering_density() {
1818        use scirs2_core::ndarray::array;
1819
1820        let X = array![
1821            [40.0, -74.0],
1822            [40.001, -74.001], // Very close to first point
1823            [42.0, -72.0]      // Far away
1824        ];
1825
1826        let config = SpatialClusteringConfig {
1827            method: SpatialClusteringMethod::Density,
1828            n_clusters: 2,
1829            epsilon: Some(1.0), // 1 km radius
1830            min_points: Some(1),
1831            metric: SpatialDistanceMetric::Haversine,
1832        };
1833
1834        let clustering = SpatialClustering::new(config);
1835        let fitted = clustering.fit(&X, &()).unwrap();
1836        let result = fitted.transform(&X).unwrap();
1837
1838        assert_eq!(result.nrows(), 3);
1839        assert_eq!(result.ncols(), 1); // Just density
1840
1841        // First two points should have higher density
1842        assert!(result[[0, 0]] > result[[2, 0]]);
1843        assert!(result[[1, 0]] > result[[2, 0]]);
1844    }
1845
1846    #[test]
1847    fn test_proximity_features() {
1848        use scirs2_core::ndarray::array;
1849
1850        let X = array![[40.7128, -74.0060], [51.5074, -0.1278]];
1851
1852        let pois = vec![
1853            (
1854                "NYC".to_string(),
1855                Coordinate::new(40.7128, -74.0060).unwrap(),
1856            ),
1857            (
1858                "London".to_string(),
1859                Coordinate::new(51.5074, -0.1278).unwrap(),
1860            ),
1861        ];
1862
1863        let config = ProximityFeaturesConfig {
1864            points_of_interest: pois,
1865            metric: SpatialDistanceMetric::Haversine,
1866            max_distance: Some(100.0),
1867            include_indicators: true,
1868        };
1869
1870        let proximity = ProximityFeatures::new(config);
1871        let fitted = proximity.fit(&X, &()).unwrap();
1872        let result = fitted.transform(&X).unwrap();
1873
1874        assert_eq!(result.nrows(), 2);
1875        assert_eq!(result.ncols(), 4); // 2 POIs * 2 (distance + indicator)
1876
1877        // First point should be very close to NYC POI
1878        assert!(result[[0, 0]] < 1.0);
1879        assert_relative_eq!(result[[0, 2]], 1.0, epsilon = 1e-10); // Within 100km
1880
1881        // Second point should be very close to London POI
1882        assert!(result[[1, 1]] < 1.0);
1883        assert_relative_eq!(result[[1, 3]], 1.0, epsilon = 1e-10); // Within 100km
1884    }
1885
1886    #[test]
1887    fn test_spatial_autocorrelation() {
1888        use scirs2_core::ndarray::array;
1889
1890        let X = array![[40.0, -74.0], [40.1, -74.0], [40.2, -74.0], [42.0, -72.0]];
1891
1892        let config = SpatialAutocorrelationConfig {
1893            k_neighbors: 2,
1894            metric: SpatialDistanceMetric::Haversine,
1895            include_morans_i: true,
1896            include_lisa: true,
1897        };
1898
1899        let autocorr = SpatialAutocorrelation::new(config);
1900        let y = array![1.0, 2.0, 3.0, 4.0]; // Dummy y for testing
1901        let fitted = autocorr.fit(&X, &y).unwrap();
1902        let result = fitted.transform(&X).unwrap();
1903
1904        assert_eq!(result.nrows(), 4);
1905        assert!(result.ncols() >= 1);
1906
1907        // Verify all values are finite and non-negative
1908        for i in 0..result.nrows() {
1909            for j in 0..result.ncols() {
1910                assert!(result[[i, j]].is_finite());
1911                assert!(result[[i, j]] >= 0.0);
1912            }
1913        }
1914    }
1915
1916    #[test]
1917    fn test_utm_transformation() {
1918        let (easting, northing) = wgs84_to_utm(40.7128, -74.0060, 18, true).unwrap();
1919
1920        // Verify transformation produces reasonable UTM coordinates
1921        assert!(easting > 0.0);
1922        assert!(northing > 0.0);
1923
1924        // Round trip
1925        let (lat, lon) = utm_to_wgs84(easting, northing, 18, true).unwrap();
1926        assert_relative_eq!(lat, 40.7128, epsilon = 0.01);
1927        assert_relative_eq!(lon, -74.0060, epsilon = 0.01);
1928    }
1929
1930    #[test]
1931    fn test_coordinate_systems_enum() {
1932        let wgs84 = CoordinateSystem::WGS84;
1933        let web_merc = CoordinateSystem::WebMercator;
1934        let utm = CoordinateSystem::UTM {
1935            zone: 18,
1936            northern: true,
1937        };
1938
1939        assert_eq!(wgs84, CoordinateSystem::WGS84);
1940        assert_eq!(web_merc, CoordinateSystem::WebMercator);
1941        assert_ne!(wgs84, web_merc);
1942
1943        match utm {
1944            CoordinateSystem::UTM { zone, northern } => {
1945                assert_eq!(zone, 18);
1946                assert!(northern);
1947            }
1948            _ => panic!("Expected UTM"),
1949        }
1950    }
1951
1952    #[test]
1953    fn test_distance_metrics() {
1954        let coord1 = Coordinate::new(40.7128, -74.0060).unwrap();
1955        let coord2 = Coordinate::new(40.7589, -73.9851).unwrap();
1956
1957        let haversine =
1958            calculate_distance(&coord1, &coord2, SpatialDistanceMetric::Haversine).unwrap();
1959        let vincenty =
1960            calculate_distance(&coord1, &coord2, SpatialDistanceMetric::Vincenty).unwrap();
1961
1962        // Both should give similar results
1963        assert!((haversine - vincenty).abs() < 0.1);
1964
1965        // Distance should be reasonable (a few km)
1966        assert!(haversine > 0.0 && haversine < 10.0);
1967    }
1968}