sklears_preprocessing/
geospatial.rs

1//! Geospatial preprocessing module
2//!
3//! This module provides comprehensive geospatial data preprocessing including:
4//! - Coordinate system transformations (WGS84, Web Mercator, UTM)
5//! - Geohash encoding and decoding for spatial indexing
6//! - Spatial distance calculations (Haversine, Vincenty, Euclidean)
7//! - Proximity and neighborhood features
8//! - Spatial clustering and binning
9//! - Bounding box operations
10
11use scirs2_core::ndarray::{Array1, Array2};
12use scirs2_core::random::seeded_rng;
13use sklears_core::prelude::*;
14use std::collections::HashMap;
15use std::f64::consts::PI;
16
17// ================================================================================================
18// Constants and Type Definitions
19// ================================================================================================
20
21/// Earth radius in kilometers (WGS84 mean radius)
22const EARTH_RADIUS_KM: f64 = 6371.0088;
23
24/// Earth radius in meters
25const EARTH_RADIUS_M: f64 = 6371008.8;
26
27/// WGS84 semi-major axis (meters)
28const WGS84_A: f64 = 6378137.0;
29
30/// WGS84 semi-minor axis (meters)
31const WGS84_B: f64 = 6356752.314245;
32
33/// WGS84 flattening
34const WGS84_F: f64 = 1.0 / 298.257223563;
35
36/// Web Mercator maximum latitude (approximately 85.051129 degrees)
37const WEB_MERCATOR_MAX_LAT: f64 = 85.051129;
38
39/// Base32 characters used in geohash encoding
40const BASE32_CHARS: &[u8; 32] = b"0123456789bcdefghjkmnpqrstuvwxyz";
41
42// ================================================================================================
43// Coordinate Systems
44// ================================================================================================
45
46/// Supported coordinate reference systems
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum CoordinateSystem {
49    /// WGS84 geographic coordinates (latitude, longitude in degrees)
50    WGS84,
51    /// Web Mercator projection (EPSG:3857) - used by web mapping services
52    WebMercator,
53    /// Universal Transverse Mercator projection
54    UTM { zone: u8, northern: bool },
55}
56
57/// A geographic coordinate in WGS84 system
58#[derive(Debug, Clone, Copy)]
59pub struct Coordinate {
60    /// Latitude in degrees (-90 to 90)
61    pub lat: f64,
62    /// Longitude in degrees (-180 to 180)
63    pub lon: f64,
64}
65
66impl Coordinate {
67    /// Create a new coordinate
68    pub fn new(lat: f64, lon: f64) -> Result<Self> {
69        if !(-90.0..=90.0).contains(&lat) {
70            return Err(SklearsError::InvalidInput(format!(
71                "Latitude must be between -90 and 90, got {}",
72                lat
73            )));
74        }
75        if !(-180.0..=180.0).contains(&lon) {
76            return Err(SklearsError::InvalidInput(format!(
77                "Longitude must be between -180 and 180, got {}",
78                lon
79            )));
80        }
81        Ok(Self { lat, lon })
82    }
83
84    /// Convert to radians
85    pub fn to_radians(&self) -> (f64, f64) {
86        (self.lat.to_radians(), self.lon.to_radians())
87    }
88}
89
90// ================================================================================================
91// Distance Metrics
92// ================================================================================================
93
94/// Spatial distance metric
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum SpatialDistanceMetric {
97    /// Haversine formula - great circle distance (fastest, good approximation)
98    Haversine,
99    /// Vincenty formula - more accurate for ellipsoid (slower, higher precision)
100    Vincenty,
101    /// Euclidean distance in projected coordinates
102    Euclidean,
103    /// Manhattan distance in projected coordinates
104    Manhattan,
105}
106
107/// Calculate distance between two coordinates using specified metric
108pub fn calculate_distance(
109    coord1: &Coordinate,
110    coord2: &Coordinate,
111    metric: SpatialDistanceMetric,
112) -> Result<f64> {
113    match metric {
114        SpatialDistanceMetric::Haversine => Ok(haversine_distance(coord1, coord2)),
115        SpatialDistanceMetric::Vincenty => vincenty_distance(coord1, coord2),
116        SpatialDistanceMetric::Euclidean => {
117            Ok(((coord2.lat - coord1.lat).powi(2) + (coord2.lon - coord1.lon).powi(2)).sqrt())
118        }
119        SpatialDistanceMetric::Manhattan => {
120            Ok((coord2.lat - coord1.lat).abs() + (coord2.lon - coord1.lon).abs())
121        }
122    }
123}
124
125/// Haversine distance between two coordinates in kilometers
126pub fn haversine_distance(coord1: &Coordinate, coord2: &Coordinate) -> f64 {
127    let (lat1, lon1) = coord1.to_radians();
128    let (lat2, lon2) = coord2.to_radians();
129
130    let dlat = lat2 - lat1;
131    let dlon = lon2 - lon1;
132
133    let a = (dlat / 2.0).sin().powi(2) + lat1.cos() * lat2.cos() * (dlon / 2.0).sin().powi(2);
134    let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
135
136    EARTH_RADIUS_KM * c
137}
138
139/// Vincenty distance between two coordinates in kilometers (more accurate)
140pub fn vincenty_distance(coord1: &Coordinate, coord2: &Coordinate) -> Result<f64> {
141    let (lat1, lon1) = coord1.to_radians();
142    let (lat2, lon2) = coord2.to_radians();
143
144    let l = lon2 - lon1;
145    let u1 = ((1.0 - WGS84_F) * lat1.tan()).atan();
146    let u2 = ((1.0 - WGS84_F) * lat2.tan()).atan();
147    let sin_u1 = u1.sin();
148    let cos_u1 = u1.cos();
149    let sin_u2 = u2.sin();
150    let cos_u2 = u2.cos();
151
152    let mut lambda = l;
153    let mut lambda_prev;
154    let mut iterations = 0;
155    const MAX_ITERATIONS: usize = 100;
156    const CONVERGENCE_THRESHOLD: f64 = 1e-12;
157
158    let (
159        mut sin_sigma,
160        mut cos_sigma,
161        mut sigma,
162        mut sin_alpha,
163        mut cos_sq_alpha,
164        mut cos_2sigma_m,
165    );
166
167    loop {
168        let sin_lambda = lambda.sin();
169        let cos_lambda = lambda.cos();
170        sin_sigma = ((cos_u2 * sin_lambda).powi(2)
171            + (cos_u1 * sin_u2 - sin_u1 * cos_u2 * cos_lambda).powi(2))
172        .sqrt();
173
174        if sin_sigma == 0.0 {
175            return Ok(0.0); // Coincident points
176        }
177
178        cos_sigma = sin_u1 * sin_u2 + cos_u1 * cos_u2 * cos_lambda;
179        sigma = sin_sigma.atan2(cos_sigma);
180        sin_alpha = cos_u1 * cos_u2 * sin_lambda / sin_sigma;
181        cos_sq_alpha = 1.0 - sin_alpha.powi(2);
182        cos_2sigma_m = if cos_sq_alpha != 0.0 {
183            cos_sigma - 2.0 * sin_u1 * sin_u2 / cos_sq_alpha
184        } else {
185            0.0
186        };
187
188        let c = WGS84_F / 16.0 * cos_sq_alpha * (4.0 + WGS84_F * (4.0 - 3.0 * cos_sq_alpha));
189        lambda_prev = lambda;
190        lambda = l
191            + (1.0 - c)
192                * WGS84_F
193                * sin_alpha
194                * (sigma
195                    + c * sin_sigma
196                        * (cos_2sigma_m + c * cos_sigma * (-1.0 + 2.0 * cos_2sigma_m.powi(2))));
197
198        iterations += 1;
199        if (lambda - lambda_prev).abs() < CONVERGENCE_THRESHOLD || iterations >= MAX_ITERATIONS {
200            break;
201        }
202    }
203
204    if iterations >= MAX_ITERATIONS {
205        return Err(SklearsError::InvalidInput(
206            "Vincenty formula failed to converge".to_string(),
207        ));
208    }
209
210    let u_sq = cos_sq_alpha * (WGS84_A.powi(2) - WGS84_B.powi(2)) / WGS84_B.powi(2);
211    let a = 1.0 + u_sq / 16384.0 * (4096.0 + u_sq * (-768.0 + u_sq * (320.0 - 175.0 * u_sq)));
212    let b = u_sq / 1024.0 * (256.0 + u_sq * (-128.0 + u_sq * (74.0 - 47.0 * u_sq)));
213    let delta_sigma = b
214        * sin_sigma
215        * (cos_2sigma_m
216            + b / 4.0
217                * (cos_sigma * (-1.0 + 2.0 * cos_2sigma_m.powi(2))
218                    - b / 6.0
219                        * cos_2sigma_m
220                        * (-3.0 + 4.0 * sin_sigma.powi(2))
221                        * (-3.0 + 4.0 * cos_2sigma_m.powi(2))));
222
223    let s = WGS84_B * a * (sigma - delta_sigma);
224
225    Ok(s / 1000.0) // Convert to kilometers
226}
227
228// ================================================================================================
229// Geohash Encoding
230// ================================================================================================
231
232/// Geohash encoder/decoder for spatial indexing
233pub struct Geohash;
234
235impl Geohash {
236    /// Encode a coordinate into a geohash string of specified precision
237    ///
238    /// # Arguments
239    /// * `coord` - The coordinate to encode
240    /// * `precision` - The number of characters in the geohash (typically 1-12)
241    pub fn encode(coord: &Coordinate, precision: usize) -> Result<String> {
242        if precision == 0 || precision > 12 {
243            return Err(SklearsError::InvalidInput(
244                "Geohash precision must be between 1 and 12".to_string(),
245            ));
246        }
247
248        let mut lat_min = -90.0;
249        let mut lat_max = 90.0;
250        let mut lon_min = -180.0;
251        let mut lon_max = 180.0;
252
253        let mut geohash = String::with_capacity(precision);
254        let mut bit = 0;
255        let mut ch = 0u8;
256        let mut is_lon = true;
257
258        while geohash.len() < precision {
259            if is_lon {
260                let mid = (lon_min + lon_max) / 2.0;
261                if coord.lon > mid {
262                    ch |= 1 << (4 - bit);
263                    lon_min = mid;
264                } else {
265                    lon_max = mid;
266                }
267            } else {
268                let mid = (lat_min + lat_max) / 2.0;
269                if coord.lat > mid {
270                    ch |= 1 << (4 - bit);
271                    lat_min = mid;
272                } else {
273                    lat_max = mid;
274                }
275            }
276
277            is_lon = !is_lon;
278            bit += 1;
279
280            if bit == 5 {
281                geohash.push(BASE32_CHARS[ch as usize] as char);
282                bit = 0;
283                ch = 0;
284            }
285        }
286
287        Ok(geohash)
288    }
289
290    /// Decode a geohash string into a coordinate (center of the geohash box)
291    pub fn decode(geohash: &str) -> Result<Coordinate> {
292        if geohash.is_empty() {
293            return Err(SklearsError::InvalidInput(
294                "Geohash string cannot be empty".to_string(),
295            ));
296        }
297
298        // Build reverse lookup table
299        let mut char_map = HashMap::new();
300        for (i, &c) in BASE32_CHARS.iter().enumerate() {
301            char_map.insert(c as char, i as u8);
302        }
303
304        let mut lat_min = -90.0;
305        let mut lat_max = 90.0;
306        let mut lon_min = -180.0;
307        let mut lon_max = 180.0;
308        let mut is_lon = true;
309
310        for ch in geohash.chars() {
311            let bits = char_map.get(&ch).ok_or_else(|| {
312                SklearsError::InvalidInput(format!("Invalid geohash character: {}", ch))
313            })?;
314
315            for i in (0..5).rev() {
316                if is_lon {
317                    let mid = (lon_min + lon_max) / 2.0;
318                    if (bits >> i) & 1 == 1 {
319                        lon_min = mid;
320                    } else {
321                        lon_max = mid;
322                    }
323                } else {
324                    let mid = (lat_min + lat_max) / 2.0;
325                    if (bits >> i) & 1 == 1 {
326                        lat_min = mid;
327                    } else {
328                        lat_max = mid;
329                    }
330                }
331                is_lon = !is_lon;
332            }
333        }
334
335        let lat = (lat_min + lat_max) / 2.0;
336        let lon = (lon_min + lon_max) / 2.0;
337
338        Coordinate::new(lat, lon)
339    }
340
341    /// Get the bounding box (lat_min, lon_min, lat_max, lon_max) for a geohash
342    pub fn bounds(geohash: &str) -> Result<(f64, f64, f64, f64)> {
343        if geohash.is_empty() {
344            return Err(SklearsError::InvalidInput(
345                "Geohash string cannot be empty".to_string(),
346            ));
347        }
348
349        let mut char_map = HashMap::new();
350        for (i, &c) in BASE32_CHARS.iter().enumerate() {
351            char_map.insert(c as char, i as u8);
352        }
353
354        let mut lat_min = -90.0;
355        let mut lat_max = 90.0;
356        let mut lon_min = -180.0;
357        let mut lon_max = 180.0;
358        let mut is_lon = true;
359
360        for ch in geohash.chars() {
361            let bits = char_map.get(&ch).ok_or_else(|| {
362                SklearsError::InvalidInput(format!("Invalid geohash character: {}", ch))
363            })?;
364
365            for i in (0..5).rev() {
366                if is_lon {
367                    let mid = (lon_min + lon_max) / 2.0;
368                    if (bits >> i) & 1 == 1 {
369                        lon_min = mid;
370                    } else {
371                        lon_max = mid;
372                    }
373                } else {
374                    let mid = (lat_min + lat_max) / 2.0;
375                    if (bits >> i) & 1 == 1 {
376                        lat_min = mid;
377                    } else {
378                        lat_max = mid;
379                    }
380                }
381                is_lon = !is_lon;
382            }
383        }
384
385        Ok((lat_min, lon_min, lat_max, lon_max))
386    }
387
388    /// Get all neighbors of a geohash
389    pub fn neighbors(geohash: &str) -> Result<Vec<String>> {
390        // Neighbor finding algorithm based on geohash properties
391        // This is a simplified version - a full implementation would handle edge cases
392        let coord = Self::decode(geohash)?;
393        let (lat_min, lon_min, lat_max, lon_max) = Self::bounds(geohash)?;
394
395        let lat_diff = lat_max - lat_min;
396        let lon_diff = lon_max - lon_min;
397
398        let precision = geohash.len();
399        let mut neighbors = Vec::with_capacity(8);
400
401        // 8 neighboring cells
402        let offsets = [
403            (-1.0, -1.0),
404            (-1.0, 0.0),
405            (-1.0, 1.0),
406            (0.0, -1.0),
407            (0.0, 1.0),
408            (1.0, -1.0),
409            (1.0, 0.0),
410            (1.0, 1.0),
411        ];
412
413        for (dlat, dlon) in offsets.iter() {
414            let new_lat = (coord.lat + dlat * lat_diff).clamp(-90.0, 90.0);
415            let new_lon = coord.lon + dlon * lon_diff;
416            // Handle longitude wrapping
417            let new_lon = if new_lon > 180.0 {
418                new_lon - 360.0
419            } else if new_lon < -180.0 {
420                new_lon + 360.0
421            } else {
422                new_lon
423            };
424
425            if let Ok(neighbor_coord) = Coordinate::new(new_lat, new_lon) {
426                if let Ok(neighbor_hash) = Self::encode(&neighbor_coord, precision) {
427                    if neighbor_hash != geohash {
428                        neighbors.push(neighbor_hash);
429                    }
430                }
431            }
432        }
433
434        Ok(neighbors)
435    }
436}
437
438// ================================================================================================
439// Coordinate Transformers
440// ================================================================================================
441
442/// Configuration for coordinate transformation
443#[derive(Debug, Clone)]
444pub struct CoordinateTransformerConfig {
445    /// Source coordinate system
446    pub from: CoordinateSystem,
447    /// Target coordinate system
448    pub to: CoordinateSystem,
449}
450
451/// Coordinate system transformer (Untrained state)
452pub struct CoordinateTransformer {
453    config: CoordinateTransformerConfig,
454}
455
456/// Coordinate system transformer (Fitted state)
457pub struct CoordinateTransformerFitted {
458    config: CoordinateTransformerConfig,
459}
460
461impl CoordinateTransformer {
462    /// Create a new coordinate transformer
463    pub fn new(from: CoordinateSystem, to: CoordinateSystem) -> Self {
464        Self {
465            config: CoordinateTransformerConfig { from, to },
466        }
467    }
468}
469
470impl Estimator for CoordinateTransformer {
471    type Config = CoordinateTransformerConfig;
472    type Error = SklearsError;
473    type Float = f64;
474
475    fn config(&self) -> &Self::Config {
476        &self.config
477    }
478}
479
480impl Fit<Array2<f64>, ()> for CoordinateTransformer {
481    type Fitted = CoordinateTransformerFitted;
482
483    fn fit(self, _X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
484        // No fitting required for coordinate transformation
485        Ok(CoordinateTransformerFitted {
486            config: self.config,
487        })
488    }
489}
490
491impl Transform<Array2<f64>, Array2<f64>> for CoordinateTransformerFitted {
492    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
493        if X.ncols() != 2 {
494            return Err(SklearsError::InvalidInput(
495                "Input must have exactly 2 columns (lat, lon)".to_string(),
496            ));
497        }
498
499        let nrows = X.nrows();
500        let mut result = Array2::zeros((nrows, 2));
501
502        for i in 0..nrows {
503            let lat = X[[i, 0]];
504            let lon = X[[i, 1]];
505
506            let transformed = match (&self.config.from, &self.config.to) {
507                (CoordinateSystem::WGS84, CoordinateSystem::WebMercator) => {
508                    wgs84_to_web_mercator(lat, lon)?
509                }
510                (CoordinateSystem::WebMercator, CoordinateSystem::WGS84) => {
511                    web_mercator_to_wgs84(lat, lon)?
512                }
513                (CoordinateSystem::WGS84, CoordinateSystem::UTM { zone, northern }) => {
514                    wgs84_to_utm(lat, lon, *zone, *northern)?
515                }
516                (CoordinateSystem::UTM { zone, northern }, CoordinateSystem::WGS84) => {
517                    utm_to_wgs84(lat, lon, *zone, *northern)?
518                }
519                _ => {
520                    return Err(SklearsError::InvalidInput(format!(
521                        "Unsupported coordinate transformation: {:?} to {:?}",
522                        self.config.from, self.config.to
523                    )))
524                }
525            };
526
527            result[[i, 0]] = transformed.0;
528            result[[i, 1]] = transformed.1;
529        }
530
531        Ok(result)
532    }
533}
534
535// ================================================================================================
536// Coordinate Transformation Functions
537// ================================================================================================
538
539/// Convert WGS84 (lat, lon) to Web Mercator (x, y)
540fn wgs84_to_web_mercator(lat: f64, lon: f64) -> Result<(f64, f64)> {
541    if lat.abs() > WEB_MERCATOR_MAX_LAT {
542        return Err(SklearsError::InvalidInput(format!(
543            "Latitude exceeds Web Mercator bounds: {}",
544            lat
545        )));
546    }
547
548    let x = WGS84_A * lon.to_radians();
549    let y = WGS84_A * ((PI / 4.0) + (lat.to_radians() / 2.0)).tan().ln();
550
551    Ok((x, y))
552}
553
554/// Convert Web Mercator (x, y) to WGS84 (lat, lon)
555fn web_mercator_to_wgs84(x: f64, y: f64) -> Result<(f64, f64)> {
556    let lon = (x / WGS84_A).to_degrees();
557    let lat = (2.0 * (y / WGS84_A).exp().atan() - PI / 2.0).to_degrees();
558
559    Ok((lat, lon))
560}
561
562/// Convert WGS84 to UTM coordinates
563fn wgs84_to_utm(lat: f64, lon: f64, zone: u8, northern: bool) -> Result<(f64, f64)> {
564    // Simplified UTM transformation
565    // Full implementation would use more precise formulas
566    let lat_rad = lat.to_radians();
567    let lon_rad = lon.to_radians();
568
569    let central_meridian = ((zone as f64 - 1.0) * 6.0 - 180.0 + 3.0).to_radians();
570    let lon_diff = lon_rad - central_meridian;
571
572    // UTM scale factor
573    let k0 = 0.9996;
574
575    // Simplified formulas
576    let n = WGS84_A / (1.0 - (WGS84_F * (2.0 - WGS84_F) * lat_rad.sin().powi(2))).sqrt();
577    let t = lat_rad.tan();
578    let c = (WGS84_F * (2.0 - WGS84_F)) / (1.0 - WGS84_F * (2.0 - WGS84_F)) * lat_rad.cos().powi(2);
579    let a = lon_diff * lat_rad.cos();
580
581    let m = WGS84_A
582        * ((1.0
583            - WGS84_F * (2.0 - WGS84_F) / 4.0
584            - 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 64.0)
585            * lat_rad
586            - (3.0 * WGS84_F * (2.0 - WGS84_F) / 8.0
587                + 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 32.0)
588                * (2.0 * lat_rad).sin()
589            + (15.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 256.0) * (4.0 * lat_rad).sin());
590
591    let easting = 500000.0
592        + k0 * n
593            * (a + (1.0 - t.powi(2) + c) * a.powi(3) / 6.0
594                + (5.0 - 18.0 * t.powi(2) + t.powi(4)) * a.powi(5) / 120.0);
595
596    let northing_offset = if northern { 0.0 } else { 10000000.0 };
597    let northing = northing_offset
598        + k0 * (m + n
599            * t
600            * (a.powi(2) / 2.0
601                + (5.0 - t.powi(2) + 9.0 * c) * a.powi(4) / 24.0
602                + (61.0 - 58.0 * t.powi(2) + t.powi(4)) * a.powi(6) / 720.0));
603
604    Ok((easting, northing))
605}
606
607/// Convert UTM to WGS84 coordinates
608fn utm_to_wgs84(easting: f64, northing: f64, zone: u8, northern: bool) -> Result<(f64, f64)> {
609    // Simplified inverse UTM transformation
610    let k0 = 0.9996;
611    let central_meridian = ((zone as f64 - 1.0) * 6.0 - 180.0 + 3.0).to_radians();
612
613    let x = easting - 500000.0;
614    let y = if northern {
615        northing
616    } else {
617        northing - 10000000.0
618    };
619
620    let m = y / k0;
621    let mu = m
622        / (WGS84_A
623            * (1.0
624                - WGS84_F * (2.0 - WGS84_F) / 4.0
625                - 3.0 * (WGS84_F * (2.0 - WGS84_F)).powi(2) / 64.0));
626
627    // Footprint latitude
628    let e1 = (1.0 - (1.0 - WGS84_F * (2.0 - WGS84_F)).sqrt())
629        / (1.0 + (1.0 - WGS84_F * (2.0 - WGS84_F)).sqrt());
630    let phi1 = mu
631        + (3.0 * e1 / 2.0 - 27.0 * e1.powi(3) / 32.0) * (2.0 * mu).sin()
632        + (21.0 * e1.powi(2) / 16.0 - 55.0 * e1.powi(4) / 32.0) * (4.0 * mu).sin()
633        + (151.0 * e1.powi(3) / 96.0) * (6.0 * mu).sin();
634
635    let n1 = WGS84_A / (1.0 - WGS84_F * (2.0 - WGS84_F) * phi1.sin().powi(2)).sqrt();
636    let t1 = phi1.tan();
637    let c1 = (WGS84_F * (2.0 - WGS84_F)) / (1.0 - WGS84_F * (2.0 - WGS84_F)) * phi1.cos().powi(2);
638    let r1 = WGS84_A * (1.0 - WGS84_F * (2.0 - WGS84_F))
639        / (1.0 - WGS84_F * (2.0 - WGS84_F) * phi1.sin().powi(2)).powf(1.5);
640    let d = x / (n1 * k0);
641
642    let lat = phi1
643        - (n1 * t1 / r1)
644            * (d.powi(2) / 2.0
645                - (5.0 + 3.0 * t1.powi(2) + 10.0 * c1 - 4.0 * c1.powi(2)) * d.powi(4) / 24.0
646                + (61.0 + 90.0 * t1.powi(2) + 298.0 * c1 + 45.0 * t1.powi(4)) * d.powi(6) / 720.0);
647
648    let lon = central_meridian
649        + (d - (1.0 + 2.0 * t1.powi(2) + c1) * d.powi(3) / 6.0
650            + (5.0 - 2.0 * c1 + 28.0 * t1.powi(2)) * d.powi(5) / 120.0)
651            / phi1.cos();
652
653    Ok((lat.to_degrees(), lon.to_degrees()))
654}
655
656// ================================================================================================
657// Geohash Feature Encoder
658// ================================================================================================
659
660/// Configuration for geohash encoding
661#[derive(Debug, Clone)]
662pub struct GeohashEncoderConfig {
663    /// Geohash precision (number of characters, 1-12)
664    pub precision: usize,
665    /// Whether to include neighbor geohashes as features
666    pub include_neighbors: bool,
667}
668
669impl Default for GeohashEncoderConfig {
670    fn default() -> Self {
671        Self {
672            precision: 7,
673            include_neighbors: false,
674        }
675    }
676}
677
678/// Geohash encoder for spatial indexing
679pub struct GeohashEncoder {
680    config: GeohashEncoderConfig,
681}
682
683/// Fitted geohash encoder
684pub struct GeohashEncoderFitted {
685    config: GeohashEncoderConfig,
686    /// Vocabulary of unique geohashes
687    vocabulary: HashMap<String, usize>,
688}
689
690impl GeohashEncoder {
691    /// Create a new geohash encoder
692    pub fn new(config: GeohashEncoderConfig) -> Self {
693        Self { config }
694    }
695}
696
697impl Estimator for GeohashEncoder {
698    type Config = GeohashEncoderConfig;
699    type Error = SklearsError;
700    type Float = f64;
701
702    fn config(&self) -> &Self::Config {
703        &self.config
704    }
705}
706
707impl Fit<Array2<f64>, ()> for GeohashEncoder {
708    type Fitted = GeohashEncoderFitted;
709
710    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
711        if X.ncols() != 2 {
712            return Err(SklearsError::InvalidInput(
713                "Input must have exactly 2 columns (lat, lon)".to_string(),
714            ));
715        }
716
717        let mut vocabulary = HashMap::new();
718
719        for i in 0..X.nrows() {
720            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
721            let geohash = Geohash::encode(&coord, self.config.precision)?;
722
723            if !vocabulary.contains_key(&geohash) {
724                let idx = vocabulary.len();
725                vocabulary.insert(geohash.clone(), idx);
726            }
727
728            if self.config.include_neighbors {
729                for neighbor in Geohash::neighbors(&geohash)? {
730                    if !vocabulary.contains_key(&neighbor) {
731                        let idx = vocabulary.len();
732                        vocabulary.insert(neighbor, idx);
733                    }
734                }
735            }
736        }
737
738        Ok(GeohashEncoderFitted {
739            config: self.config,
740            vocabulary,
741        })
742    }
743}
744
745impl Transform<Array2<f64>, Array2<f64>> for GeohashEncoderFitted {
746    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
747        if X.ncols() != 2 {
748            return Err(SklearsError::InvalidInput(
749                "Input must have exactly 2 columns (lat, lon)".to_string(),
750            ));
751        }
752
753        let nrows = X.nrows();
754        let ncols = if self.config.include_neighbors {
755            self.vocabulary.len() * 9 // Each geohash + 8 neighbors
756        } else {
757            self.vocabulary.len()
758        };
759
760        let mut result = Array2::zeros((nrows, ncols));
761
762        for i in 0..nrows {
763            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
764            let geohash = Geohash::encode(&coord, self.config.precision)?;
765
766            if let Some(&idx) = self.vocabulary.get(&geohash) {
767                result[[i, idx]] = 1.0;
768            }
769
770            if self.config.include_neighbors {
771                for neighbor in Geohash::neighbors(&geohash)? {
772                    if let Some(&idx) = self.vocabulary.get(&neighbor) {
773                        result[[i, idx]] = 1.0;
774                    }
775                }
776            }
777        }
778
779        Ok(result)
780    }
781}
782
783// ================================================================================================
784// Spatial Distance Features
785// ================================================================================================
786
787/// Configuration for spatial distance feature extraction
788#[derive(Debug, Clone)]
789pub struct SpatialDistanceFeaturesConfig {
790    /// Reference points to calculate distances from
791    pub reference_points: Vec<Coordinate>,
792    /// Distance metric to use
793    pub metric: SpatialDistanceMetric,
794    /// Whether to include inverse distances (1/distance)
795    pub include_inverse: bool,
796    /// Whether to include squared distances
797    pub include_squared: bool,
798}
799
800/// Spatial distance feature extractor
801pub struct SpatialDistanceFeatures {
802    config: SpatialDistanceFeaturesConfig,
803}
804
805/// Fitted spatial distance feature extractor
806pub struct SpatialDistanceFeaturesFitted {
807    config: SpatialDistanceFeaturesConfig,
808}
809
810impl SpatialDistanceFeatures {
811    /// Create a new spatial distance feature extractor
812    pub fn new(config: SpatialDistanceFeaturesConfig) -> Self {
813        Self { config }
814    }
815}
816
817impl Estimator for SpatialDistanceFeatures {
818    type Config = SpatialDistanceFeaturesConfig;
819    type Error = SklearsError;
820    type Float = f64;
821
822    fn config(&self) -> &Self::Config {
823        &self.config
824    }
825}
826
827impl Fit<Array2<f64>, ()> for SpatialDistanceFeatures {
828    type Fitted = SpatialDistanceFeaturesFitted;
829
830    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
831        if X.ncols() != 2 {
832            return Err(SklearsError::InvalidInput(
833                "Input must have exactly 2 columns (lat, lon)".to_string(),
834            ));
835        }
836
837        Ok(SpatialDistanceFeaturesFitted {
838            config: self.config,
839        })
840    }
841}
842
843impl Transform<Array2<f64>, Array2<f64>> for SpatialDistanceFeaturesFitted {
844    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
845        if X.ncols() != 2 {
846            return Err(SklearsError::InvalidInput(
847                "Input must have exactly 2 columns (lat, lon)".to_string(),
848            ));
849        }
850
851        let nrows = X.nrows();
852        let n_refs = self.config.reference_points.len();
853        let mut n_features = n_refs;
854        if self.config.include_inverse {
855            n_features += n_refs;
856        }
857        if self.config.include_squared {
858            n_features += n_refs;
859        }
860
861        let mut result = Array2::zeros((nrows, n_features));
862
863        for i in 0..nrows {
864            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
865
866            for (j, ref_point) in self.config.reference_points.iter().enumerate() {
867                let distance = calculate_distance(&coord, ref_point, self.config.metric)?;
868
869                result[[i, j]] = distance;
870
871                if self.config.include_inverse {
872                    result[[i, n_refs + j]] = if distance > 0.0 { 1.0 / distance } else { 0.0 };
873                }
874
875                if self.config.include_squared {
876                    let offset = if self.config.include_inverse {
877                        n_refs * 2
878                    } else {
879                        n_refs
880                    };
881                    result[[i, offset + j]] = distance * distance;
882                }
883            }
884        }
885
886        Ok(result)
887    }
888}
889
890// ================================================================================================
891// Spatial Binning
892// ================================================================================================
893
894/// Configuration for spatial binning
895#[derive(Debug, Clone)]
896pub struct SpatialBinningConfig {
897    /// Number of latitude bins
898    pub n_lat_bins: usize,
899    /// Number of longitude bins
900    pub n_lon_bins: usize,
901    /// Latitude range (min, max)
902    pub lat_range: Option<(f64, f64)>,
903    /// Longitude range (min, max)
904    pub lon_range: Option<(f64, f64)>,
905    /// Whether to use one-hot encoding for bins
906    pub one_hot: bool,
907}
908
909impl Default for SpatialBinningConfig {
910    fn default() -> Self {
911        Self {
912            n_lat_bins: 10,
913            n_lon_bins: 10,
914            lat_range: None,
915            lon_range: None,
916            one_hot: false,
917        }
918    }
919}
920
921/// Spatial binning transformer
922pub struct SpatialBinning {
923    config: SpatialBinningConfig,
924}
925
926/// Fitted spatial binning transformer
927pub struct SpatialBinningFitted {
928    config: SpatialBinningConfig,
929    lat_range: (f64, f64),
930    lon_range: (f64, f64),
931}
932
933impl SpatialBinning {
934    /// Create a new spatial binning transformer
935    pub fn new(config: SpatialBinningConfig) -> Self {
936        Self { config }
937    }
938}
939
940impl Estimator for SpatialBinning {
941    type Config = SpatialBinningConfig;
942    type Error = SklearsError;
943    type Float = f64;
944
945    fn config(&self) -> &Self::Config {
946        &self.config
947    }
948}
949
950impl Fit<Array2<f64>, ()> for SpatialBinning {
951    type Fitted = SpatialBinningFitted;
952
953    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
954        if X.ncols() != 2 {
955            return Err(SklearsError::InvalidInput(
956                "Input must have exactly 2 columns (lat, lon)".to_string(),
957            ));
958        }
959
960        let lat_range = if let Some(range) = self.config.lat_range {
961            range
962        } else {
963            let lats = X.column(0);
964            let min_lat = lats.iter().cloned().fold(f64::INFINITY, f64::min);
965            let max_lat = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
966            (min_lat, max_lat)
967        };
968
969        let lon_range = if let Some(range) = self.config.lon_range {
970            range
971        } else {
972            let lons = X.column(1);
973            let min_lon = lons.iter().cloned().fold(f64::INFINITY, f64::min);
974            let max_lon = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
975            (min_lon, max_lon)
976        };
977
978        Ok(SpatialBinningFitted {
979            config: self.config,
980            lat_range,
981            lon_range,
982        })
983    }
984}
985
986impl Transform<Array2<f64>, Array2<f64>> for SpatialBinningFitted {
987    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
988        if X.ncols() != 2 {
989            return Err(SklearsError::InvalidInput(
990                "Input must have exactly 2 columns (lat, lon)".to_string(),
991            ));
992        }
993
994        let nrows = X.nrows();
995        let ncols = if self.config.one_hot {
996            self.config.n_lat_bins * self.config.n_lon_bins
997        } else {
998            2
999        };
1000
1001        let mut result = Array2::zeros((nrows, ncols));
1002
1003        let lat_bin_size = (self.lat_range.1 - self.lat_range.0) / self.config.n_lat_bins as f64;
1004        let lon_bin_size = (self.lon_range.1 - self.lon_range.0) / self.config.n_lon_bins as f64;
1005
1006        for i in 0..nrows {
1007            let lat = X[[i, 0]];
1008            let lon = X[[i, 1]];
1009
1010            let lat_bin = ((lat - self.lat_range.0) / lat_bin_size)
1011                .floor()
1012                .clamp(0.0, (self.config.n_lat_bins - 1) as f64) as usize;
1013            let lon_bin = ((lon - self.lon_range.0) / lon_bin_size)
1014                .floor()
1015                .clamp(0.0, (self.config.n_lon_bins - 1) as f64) as usize;
1016
1017            if self.config.one_hot {
1018                let bin_idx = lat_bin * self.config.n_lon_bins + lon_bin;
1019                result[[i, bin_idx]] = 1.0;
1020            } else {
1021                result[[i, 0]] = lat_bin as f64;
1022                result[[i, 1]] = lon_bin as f64;
1023            }
1024        }
1025
1026        Ok(result)
1027    }
1028}
1029
1030// ================================================================================================
1031// Spatial Clustering Features
1032// ================================================================================================
1033
1034/// Configuration for spatial clustering features
1035#[derive(Debug, Clone)]
1036pub struct SpatialClusteringConfig {
1037    /// Clustering method
1038    pub method: SpatialClusteringMethod,
1039    /// Number of clusters (for grid-based methods)
1040    pub n_clusters: usize,
1041    /// Epsilon for density-based methods (in km)
1042    pub epsilon: Option<f64>,
1043    /// Minimum points for density-based methods
1044    pub min_points: Option<usize>,
1045    /// Distance metric
1046    pub metric: SpatialDistanceMetric,
1047}
1048
1049/// Spatial clustering method
1050#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1051pub enum SpatialClusteringMethod {
1052    /// Grid-based clustering using spatial binning
1053    Grid,
1054    /// Density-based features (local density estimation)
1055    Density,
1056    /// K-means-style clustering on geographic coordinates
1057    KMeans,
1058    /// Hierarchical clustering features
1059    Hierarchical,
1060}
1061
1062impl Default for SpatialClusteringConfig {
1063    fn default() -> Self {
1064        Self {
1065            method: SpatialClusteringMethod::Grid,
1066            n_clusters: 10,
1067            epsilon: Some(5.0), // 5 km
1068            min_points: Some(5),
1069            metric: SpatialDistanceMetric::Haversine,
1070        }
1071    }
1072}
1073
1074/// Spatial clustering feature extractor
1075pub struct SpatialClustering {
1076    config: SpatialClusteringConfig,
1077}
1078
1079/// Fitted spatial clustering feature extractor
1080pub struct SpatialClusteringFitted {
1081    config: SpatialClusteringConfig,
1082    /// Cluster centers or representative points
1083    cluster_centers: Vec<Coordinate>,
1084    /// Grid bounds for grid-based clustering
1085    grid_bounds: Option<(f64, f64, f64, f64)>,
1086}
1087
1088impl SpatialClustering {
1089    /// Create a new spatial clustering feature extractor
1090    pub fn new(config: SpatialClusteringConfig) -> Self {
1091        Self { config }
1092    }
1093}
1094
1095impl Estimator for SpatialClustering {
1096    type Config = SpatialClusteringConfig;
1097    type Error = SklearsError;
1098    type Float = f64;
1099
1100    fn config(&self) -> &Self::Config {
1101        &self.config
1102    }
1103}
1104
1105impl Fit<Array2<f64>, ()> for SpatialClustering {
1106    type Fitted = SpatialClusteringFitted;
1107
1108    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
1109        if X.ncols() != 2 {
1110            return Err(SklearsError::InvalidInput(
1111                "Input must have exactly 2 columns (lat, lon)".to_string(),
1112            ));
1113        }
1114
1115        let cluster_centers = match self.config.method {
1116            SpatialClusteringMethod::Grid => {
1117                // Grid-based clustering: create evenly spaced cluster centers
1118                let lats = X.column(0);
1119                let lons = X.column(1);
1120                let lat_min = lats.iter().cloned().fold(f64::INFINITY, f64::min);
1121                let lat_max = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1122                let lon_min = lons.iter().cloned().fold(f64::INFINITY, f64::min);
1123                let lon_max = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1124
1125                let grid_size = (self.config.n_clusters as f64).sqrt().ceil() as usize;
1126                let lat_step = (lat_max - lat_min) / grid_size as f64;
1127                let lon_step = (lon_max - lon_min) / grid_size as f64;
1128
1129                let mut centers = Vec::new();
1130                for i in 0..grid_size {
1131                    for j in 0..grid_size {
1132                        let lat = lat_min + (i as f64 + 0.5) * lat_step;
1133                        let lon = lon_min + (j as f64 + 0.5) * lon_step;
1134                        if let Ok(coord) = Coordinate::new(lat, lon) {
1135                            centers.push(coord);
1136                        }
1137                    }
1138                }
1139
1140                centers
1141            }
1142            SpatialClusteringMethod::KMeans => {
1143                // Simple K-means initialization using random sampling
1144                let mut rng = seeded_rng(42);
1145                let nrows = X.nrows();
1146                let mut centers = Vec::new();
1147
1148                // Initialize with random samples
1149                for _ in 0..self.config.n_clusters.min(nrows) {
1150                    let idx = rng.random_range(0..nrows);
1151                    if let Ok(coord) = Coordinate::new(X[[idx, 0]], X[[idx, 1]]) {
1152                        centers.push(coord);
1153                    }
1154                }
1155
1156                // Perform a few iterations of k-means
1157                for _ in 0..10 {
1158                    let mut cluster_sums = vec![(0.0, 0.0); centers.len()];
1159                    let mut cluster_counts = vec![0; centers.len()];
1160
1161                    // Assignment step
1162                    for i in 0..nrows {
1163                        let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1164                        let mut min_dist = f64::INFINITY;
1165                        let mut min_idx = 0;
1166
1167                        for (j, center) in centers.iter().enumerate() {
1168                            let dist = calculate_distance(&coord, center, self.config.metric)?;
1169                            if dist < min_dist {
1170                                min_dist = dist;
1171                                min_idx = j;
1172                            }
1173                        }
1174
1175                        cluster_sums[min_idx].0 += coord.lat;
1176                        cluster_sums[min_idx].1 += coord.lon;
1177                        cluster_counts[min_idx] += 1;
1178                    }
1179
1180                    // Update step
1181                    for (j, center) in centers.iter_mut().enumerate() {
1182                        if cluster_counts[j] > 0 {
1183                            let new_lat = cluster_sums[j].0 / cluster_counts[j] as f64;
1184                            let new_lon = cluster_sums[j].1 / cluster_counts[j] as f64;
1185                            if let Ok(new_coord) = Coordinate::new(new_lat, new_lon) {
1186                                *center = new_coord;
1187                            }
1188                        }
1189                    }
1190                }
1191
1192                centers
1193            }
1194            SpatialClusteringMethod::Density | SpatialClusteringMethod::Hierarchical => {
1195                // For density-based methods, we'll sample representative points
1196                let mut rng = seeded_rng(42);
1197                let nrows = X.nrows();
1198                let mut centers = Vec::new();
1199
1200                let sample_size = self.config.n_clusters.min(nrows);
1201                for _ in 0..sample_size {
1202                    let idx = rng.random_range(0..nrows);
1203                    if let Ok(coord) = Coordinate::new(X[[idx, 0]], X[[idx, 1]]) {
1204                        centers.push(coord);
1205                    }
1206                }
1207
1208                centers
1209            }
1210        };
1211
1212        let grid_bounds = if matches!(self.config.method, SpatialClusteringMethod::Grid) {
1213            let lats = X.column(0);
1214            let lons = X.column(1);
1215            let lat_min = lats.iter().cloned().fold(f64::INFINITY, f64::min);
1216            let lat_max = lats.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1217            let lon_min = lons.iter().cloned().fold(f64::INFINITY, f64::min);
1218            let lon_max = lons.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1219            Some((lat_min, lon_min, lat_max, lon_max))
1220        } else {
1221            None
1222        };
1223
1224        Ok(SpatialClusteringFitted {
1225            config: self.config,
1226            cluster_centers,
1227            grid_bounds,
1228        })
1229    }
1230}
1231
1232impl Transform<Array2<f64>, Array2<f64>> for SpatialClusteringFitted {
1233    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1234        if X.ncols() != 2 {
1235            return Err(SklearsError::InvalidInput(
1236                "Input must have exactly 2 columns (lat, lon)".to_string(),
1237            ));
1238        }
1239
1240        let nrows = X.nrows();
1241
1242        match self.config.method {
1243            SpatialClusteringMethod::Grid | SpatialClusteringMethod::KMeans => {
1244                // Output: cluster assignment + distance to nearest cluster center
1245                let mut result = Array2::zeros((nrows, 2));
1246
1247                for i in 0..nrows {
1248                    let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1249                    let mut min_dist = f64::INFINITY;
1250                    let mut min_idx = 0;
1251
1252                    for (j, center) in self.cluster_centers.iter().enumerate() {
1253                        let dist = calculate_distance(&coord, center, self.config.metric)?;
1254                        if dist < min_dist {
1255                            min_dist = dist;
1256                            min_idx = j;
1257                        }
1258                    }
1259
1260                    result[[i, 0]] = min_idx as f64;
1261                    result[[i, 1]] = min_dist;
1262                }
1263
1264                Ok(result)
1265            }
1266            SpatialClusteringMethod::Density => {
1267                // Output: local density estimate (number of neighbors within epsilon)
1268                let epsilon = self.config.epsilon.unwrap_or(5.0);
1269                let mut result = Array2::zeros((nrows, 1));
1270
1271                for i in 0..nrows {
1272                    let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1273                    let mut density = 0.0;
1274
1275                    for j in 0..nrows {
1276                        if i != j {
1277                            let other = Coordinate::new(X[[j, 0]], X[[j, 1]])?;
1278                            let dist = calculate_distance(&coord, &other, self.config.metric)?;
1279                            if dist <= epsilon {
1280                                density += 1.0;
1281                            }
1282                        }
1283                    }
1284
1285                    result[[i, 0]] = density;
1286                }
1287
1288                Ok(result)
1289            }
1290            SpatialClusteringMethod::Hierarchical => {
1291                // Output: distances to all cluster centers (linkage-based features)
1292                let n_centers = self.cluster_centers.len();
1293                let mut result = Array2::zeros((nrows, n_centers));
1294
1295                for i in 0..nrows {
1296                    let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1297
1298                    for (j, center) in self.cluster_centers.iter().enumerate() {
1299                        let dist = calculate_distance(&coord, center, self.config.metric)?;
1300                        result[[i, j]] = dist;
1301                    }
1302                }
1303
1304                Ok(result)
1305            }
1306        }
1307    }
1308}
1309
1310// ================================================================================================
1311// Spatial Autocorrelation Features
1312// ================================================================================================
1313
1314/// Configuration for spatial autocorrelation features
1315#[derive(Debug, Clone)]
1316pub struct SpatialAutocorrelationConfig {
1317    /// Number of nearest neighbors to consider
1318    pub k_neighbors: usize,
1319    /// Distance metric
1320    pub metric: SpatialDistanceMetric,
1321    /// Whether to include Moran's I statistic
1322    pub include_morans_i: bool,
1323    /// Whether to include local indicators of spatial association (LISA)
1324    pub include_lisa: bool,
1325}
1326
1327impl Default for SpatialAutocorrelationConfig {
1328    fn default() -> Self {
1329        Self {
1330            k_neighbors: 5,
1331            metric: SpatialDistanceMetric::Haversine,
1332            include_morans_i: true,
1333            include_lisa: true,
1334        }
1335    }
1336}
1337
1338/// Spatial autocorrelation feature extractor
1339pub struct SpatialAutocorrelation {
1340    config: SpatialAutocorrelationConfig,
1341}
1342
1343/// Fitted spatial autocorrelation feature extractor
1344pub struct SpatialAutocorrelationFitted {
1345    config: SpatialAutocorrelationConfig,
1346    /// Training data coordinates for spatial weights
1347    training_coords: Vec<Coordinate>,
1348}
1349
1350impl SpatialAutocorrelation {
1351    /// Create a new spatial autocorrelation feature extractor
1352    pub fn new(config: SpatialAutocorrelationConfig) -> Self {
1353        Self { config }
1354    }
1355}
1356
1357impl Estimator for SpatialAutocorrelation {
1358    type Config = SpatialAutocorrelationConfig;
1359    type Error = SklearsError;
1360    type Float = f64;
1361
1362    fn config(&self) -> &Self::Config {
1363        &self.config
1364    }
1365}
1366
1367impl Fit<Array2<f64>, Array1<f64>> for SpatialAutocorrelation {
1368    type Fitted = SpatialAutocorrelationFitted;
1369
1370    fn fit(self, X: &Array2<f64>, _y: &Array1<f64>) -> Result<Self::Fitted> {
1371        if X.ncols() != 2 {
1372            return Err(SklearsError::InvalidInput(
1373                "Input must have exactly 2 columns (lat, lon)".to_string(),
1374            ));
1375        }
1376
1377        let mut training_coords = Vec::with_capacity(X.nrows());
1378        for i in 0..X.nrows() {
1379            training_coords.push(Coordinate::new(X[[i, 0]], X[[i, 1]])?);
1380        }
1381
1382        Ok(SpatialAutocorrelationFitted {
1383            config: self.config,
1384            training_coords,
1385        })
1386    }
1387}
1388
1389impl Transform<Array2<f64>, Array2<f64>> for SpatialAutocorrelationFitted {
1390    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1391        if X.ncols() != 2 {
1392            return Err(SklearsError::InvalidInput(
1393                "Input must have exactly 2 columns (lat, lon)".to_string(),
1394            ));
1395        }
1396
1397        let nrows = X.nrows();
1398        let mut n_features = 0;
1399
1400        if self.config.include_lisa {
1401            n_features += 1; // Local Moran's I
1402        }
1403
1404        let mut result = Array2::zeros((nrows, n_features.max(1)));
1405
1406        for i in 0..nrows {
1407            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1408
1409            // Find k nearest neighbors
1410            let mut distances: Vec<(usize, f64)> = self
1411                .training_coords
1412                .iter()
1413                .enumerate()
1414                .map(|(j, other)| {
1415                    let dist = calculate_distance(&coord, other, self.config.metric)
1416                        .unwrap_or(f64::INFINITY);
1417                    (j, dist)
1418                })
1419                .collect();
1420
1421            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
1422
1423            // Calculate local spatial statistics
1424            if self.config.include_lisa {
1425                // Simplified local Moran's I calculation
1426                // In a real implementation, this would use the target variable
1427                let nearest_neighbors =
1428                    &distances[1..=self.config.k_neighbors.min(distances.len() - 1)];
1429                let avg_neighbor_dist = nearest_neighbors.iter().map(|(_, d)| d).sum::<f64>()
1430                    / nearest_neighbors.len() as f64;
1431
1432                result[[i, 0]] = 1.0 / (1.0 + avg_neighbor_dist); // Inverse distance weighted
1433            }
1434        }
1435
1436        Ok(result)
1437    }
1438}
1439
1440// ================================================================================================
1441// Proximity Features
1442// ================================================================================================
1443
1444/// Configuration for proximity features
1445#[derive(Debug, Clone)]
1446pub struct ProximityFeaturesConfig {
1447    /// Points of interest to calculate proximity to
1448    pub points_of_interest: Vec<(String, Coordinate)>,
1449    /// Distance metric
1450    pub metric: SpatialDistanceMetric,
1451    /// Maximum distance to consider (in km)
1452    pub max_distance: Option<f64>,
1453    /// Whether to include binary indicators (within max_distance)
1454    pub include_indicators: bool,
1455}
1456
1457/// Proximity feature extractor
1458pub struct ProximityFeatures {
1459    config: ProximityFeaturesConfig,
1460}
1461
1462/// Fitted proximity feature extractor
1463pub struct ProximityFeaturesFitted {
1464    config: ProximityFeaturesConfig,
1465}
1466
1467impl ProximityFeatures {
1468    /// Create a new proximity feature extractor
1469    pub fn new(config: ProximityFeaturesConfig) -> Self {
1470        Self { config }
1471    }
1472}
1473
1474impl Estimator for ProximityFeatures {
1475    type Config = ProximityFeaturesConfig;
1476    type Error = SklearsError;
1477    type Float = f64;
1478
1479    fn config(&self) -> &Self::Config {
1480        &self.config
1481    }
1482}
1483
1484impl Fit<Array2<f64>, ()> for ProximityFeatures {
1485    type Fitted = ProximityFeaturesFitted;
1486
1487    fn fit(self, X: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
1488        if X.ncols() != 2 {
1489            return Err(SklearsError::InvalidInput(
1490                "Input must have exactly 2 columns (lat, lon)".to_string(),
1491            ));
1492        }
1493
1494        Ok(ProximityFeaturesFitted {
1495            config: self.config,
1496        })
1497    }
1498}
1499
1500impl Transform<Array2<f64>, Array2<f64>> for ProximityFeaturesFitted {
1501    fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
1502        if X.ncols() != 2 {
1503            return Err(SklearsError::InvalidInput(
1504                "Input must have exactly 2 columns (lat, lon)".to_string(),
1505            ));
1506        }
1507
1508        let nrows = X.nrows();
1509        let n_pois = self.config.points_of_interest.len();
1510        let n_features = if self.config.include_indicators {
1511            n_pois * 2 // Distance + indicator
1512        } else {
1513            n_pois // Just distance
1514        };
1515
1516        let mut result = Array2::zeros((nrows, n_features));
1517
1518        for i in 0..nrows {
1519            let coord = Coordinate::new(X[[i, 0]], X[[i, 1]])?;
1520
1521            for (j, (_name, poi)) in self.config.points_of_interest.iter().enumerate() {
1522                let distance = calculate_distance(&coord, poi, self.config.metric)?;
1523
1524                result[[i, j]] = distance;
1525
1526                if self.config.include_indicators {
1527                    let is_within = if let Some(max_dist) = self.config.max_distance {
1528                        if distance <= max_dist {
1529                            1.0
1530                        } else {
1531                            0.0
1532                        }
1533                    } else {
1534                        1.0
1535                    };
1536                    result[[i, n_pois + j]] = is_within;
1537                }
1538            }
1539        }
1540
1541        Ok(result)
1542    }
1543}
1544
1545// ================================================================================================
1546// Tests
1547// ================================================================================================
1548
1549#[cfg(test)]
1550mod tests {
1551    use super::*;
1552    use approx::assert_relative_eq;
1553
1554    #[test]
1555    fn test_coordinate_creation() {
1556        let coord = Coordinate::new(40.7128, -74.0060).unwrap();
1557        assert_eq!(coord.lat, 40.7128);
1558        assert_eq!(coord.lon, -74.0060);
1559
1560        // Test invalid coordinates
1561        assert!(Coordinate::new(91.0, 0.0).is_err());
1562        assert!(Coordinate::new(0.0, 181.0).is_err());
1563    }
1564
1565    #[test]
1566    fn test_haversine_distance() {
1567        // New York to London
1568        let nyc = Coordinate::new(40.7128, -74.0060).unwrap();
1569        let london = Coordinate::new(51.5074, -0.1278).unwrap();
1570
1571        let distance = haversine_distance(&nyc, &london);
1572        // Expected distance is approximately 5570 km
1573        assert!((distance - 5570.0).abs() < 50.0);
1574    }
1575
1576    #[test]
1577    fn test_vincenty_distance() {
1578        let nyc = Coordinate::new(40.7128, -74.0060).unwrap();
1579        let london = Coordinate::new(51.5074, -0.1278).unwrap();
1580
1581        let distance = vincenty_distance(&nyc, &london).unwrap();
1582        // Vincenty should give similar but slightly more accurate result
1583        assert!((distance - 5570.0).abs() < 50.0);
1584    }
1585
1586    #[test]
1587    fn test_geohash_encode_decode() {
1588        let coord = Coordinate::new(40.7128, -74.0060).unwrap();
1589        let geohash = Geohash::encode(&coord, 7).unwrap();
1590        assert_eq!(geohash.len(), 7);
1591
1592        let decoded = Geohash::decode(&geohash).unwrap();
1593        assert!((decoded.lat - coord.lat).abs() < 0.01);
1594        assert!((decoded.lon - coord.lon).abs() < 0.01);
1595    }
1596
1597    #[test]
1598    fn test_geohash_bounds() {
1599        let geohash = "dr5ru7";
1600        let (lat_min, lon_min, lat_max, lon_max) = Geohash::bounds(geohash).unwrap();
1601
1602        assert!(lat_min < lat_max);
1603        assert!(lon_min < lon_max);
1604        assert!(lat_min >= -90.0 && lat_max <= 90.0);
1605        assert!(lon_min >= -180.0 && lon_max <= 180.0);
1606    }
1607
1608    #[test]
1609    fn test_geohash_neighbors() {
1610        let geohash = "dr5ru7";
1611        let neighbors = Geohash::neighbors(geohash).unwrap();
1612
1613        // Should have up to 8 neighbors
1614        assert!(neighbors.len() <= 8);
1615        assert!(neighbors.len() > 0);
1616
1617        // All neighbors should have the same precision
1618        for neighbor in neighbors.iter() {
1619            assert_eq!(neighbor.len(), geohash.len());
1620        }
1621    }
1622
1623    #[test]
1624    fn test_wgs84_to_web_mercator() {
1625        let (x, y) = wgs84_to_web_mercator(40.7128, -74.0060).unwrap();
1626
1627        // Verify conversion is reasonable
1628        assert!(x.abs() < 20037508.34); // Web Mercator max
1629        assert!(y.abs() < 20037508.34);
1630
1631        // Round trip
1632        let (lat, lon) = web_mercator_to_wgs84(x, y).unwrap();
1633        assert_relative_eq!(lat, 40.7128, epsilon = 0.0001);
1634        assert_relative_eq!(lon, -74.0060, epsilon = 0.0001);
1635    }
1636
1637    #[test]
1638    fn test_coordinate_transformer() {
1639        use scirs2_core::ndarray::array;
1640
1641        let X = array![[40.7128, -74.0060], [51.5074, -0.1278]];
1642
1643        let transformer =
1644            CoordinateTransformer::new(CoordinateSystem::WGS84, CoordinateSystem::WebMercator);
1645        let fitted = transformer.fit(&X, &()).unwrap();
1646        let result = fitted.transform(&X).unwrap();
1647
1648        assert_eq!(result.nrows(), 2);
1649        assert_eq!(result.ncols(), 2);
1650
1651        // Verify transformation produces reasonable values
1652        assert!(result[[0, 0]].abs() < 20037508.34);
1653        assert!(result[[0, 1]].abs() < 20037508.34);
1654    }
1655
1656    #[test]
1657    fn test_geohash_encoder() {
1658        use scirs2_core::ndarray::array;
1659
1660        let X = array![[40.7128, -74.0060], [40.7589, -73.9851], [51.5074, -0.1278]];
1661
1662        let config = GeohashEncoderConfig {
1663            precision: 5,
1664            include_neighbors: false,
1665        };
1666
1667        let encoder = GeohashEncoder::new(config);
1668        let fitted = encoder.fit(&X, &()).unwrap();
1669        let result = fitted.transform(&X).unwrap();
1670
1671        assert_eq!(result.nrows(), 3);
1672        // Should have as many columns as unique geohashes
1673        assert!(result.ncols() > 0);
1674    }
1675
1676    #[test]
1677    fn test_spatial_distance_features() {
1678        use scirs2_core::ndarray::array;
1679
1680        let X = array![[40.7128, -74.0060], [40.7589, -73.9851]];
1681
1682        let reference_points = vec![
1683            Coordinate::new(40.7128, -74.0060).unwrap(), // NYC
1684            Coordinate::new(51.5074, -0.1278).unwrap(),  // London
1685        ];
1686
1687        let config = SpatialDistanceFeaturesConfig {
1688            reference_points,
1689            metric: SpatialDistanceMetric::Haversine,
1690            include_inverse: false,
1691            include_squared: false,
1692        };
1693
1694        let transformer = SpatialDistanceFeatures::new(config);
1695        let fitted = transformer.fit(&X, &()).unwrap();
1696        let result = fitted.transform(&X).unwrap();
1697
1698        assert_eq!(result.nrows(), 2);
1699        assert_eq!(result.ncols(), 2); // 2 reference points
1700
1701        // First point should be close to first reference point
1702        assert!(result[[0, 0]] < 1.0);
1703    }
1704
1705    #[test]
1706    fn test_spatial_binning() {
1707        use scirs2_core::ndarray::array;
1708
1709        let X = array![[40.0, -74.0], [41.0, -73.0], [42.0, -72.0], [43.0, -71.0]];
1710
1711        let config = SpatialBinningConfig {
1712            n_lat_bins: 2,
1713            n_lon_bins: 2,
1714            lat_range: Some((40.0, 44.0)),
1715            lon_range: Some((-75.0, -70.0)),
1716            one_hot: false,
1717        };
1718
1719        let binning = SpatialBinning::new(config);
1720        let fitted = binning.fit(&X, &()).unwrap();
1721        let result = fitted.transform(&X).unwrap();
1722
1723        assert_eq!(result.nrows(), 4);
1724        assert_eq!(result.ncols(), 2);
1725
1726        // Verify binning produces valid bin indices
1727        for i in 0..result.nrows() {
1728            assert!(result[[i, 0]] >= 0.0 && result[[i, 0]] < 2.0);
1729            assert!(result[[i, 1]] >= 0.0 && result[[i, 1]] < 2.0);
1730        }
1731    }
1732
1733    #[test]
1734    fn test_spatial_binning_one_hot() {
1735        use scirs2_core::ndarray::array;
1736
1737        let X = array![[40.0, -74.0], [41.0, -73.0],];
1738
1739        let config = SpatialBinningConfig {
1740            n_lat_bins: 2,
1741            n_lon_bins: 2,
1742            lat_range: Some((40.0, 42.0)),
1743            lon_range: Some((-75.0, -72.0)),
1744            one_hot: true,
1745        };
1746
1747        let binning = SpatialBinning::new(config);
1748        let fitted = binning.fit(&X, &()).unwrap();
1749        let result = fitted.transform(&X).unwrap();
1750
1751        assert_eq!(result.nrows(), 2);
1752        assert_eq!(result.ncols(), 4); // 2x2 bins
1753
1754        // Verify one-hot encoding
1755        for i in 0..result.nrows() {
1756            let sum: f64 = result.row(i).sum();
1757            assert_relative_eq!(sum, 1.0, epsilon = 1e-10);
1758        }
1759    }
1760
1761    #[test]
1762    fn test_spatial_clustering_grid() {
1763        use scirs2_core::ndarray::array;
1764
1765        let X = array![[40.0, -74.0], [40.5, -73.5], [41.0, -73.0], [41.5, -72.5]];
1766
1767        let config = SpatialClusteringConfig {
1768            method: SpatialClusteringMethod::Grid,
1769            n_clusters: 4,
1770            epsilon: None,
1771            min_points: None,
1772            metric: SpatialDistanceMetric::Haversine,
1773        };
1774
1775        let clustering = SpatialClustering::new(config);
1776        let fitted = clustering.fit(&X, &()).unwrap();
1777        let result = fitted.transform(&X).unwrap();
1778
1779        assert_eq!(result.nrows(), 4);
1780        assert_eq!(result.ncols(), 2); // Cluster assignment + distance
1781
1782        // Verify cluster assignments are valid
1783        for i in 0..result.nrows() {
1784            assert!(result[[i, 0]] >= 0.0);
1785            assert!(result[[i, 1]] >= 0.0); // Distance should be non-negative
1786        }
1787    }
1788
1789    #[test]
1790    fn test_spatial_clustering_kmeans() {
1791        use scirs2_core::ndarray::array;
1792
1793        let X = array![[40.0, -74.0], [40.1, -74.1], [42.0, -72.0], [42.1, -72.1]];
1794
1795        let config = SpatialClusteringConfig {
1796            method: SpatialClusteringMethod::KMeans,
1797            n_clusters: 2,
1798            epsilon: None,
1799            min_points: None,
1800            metric: SpatialDistanceMetric::Haversine,
1801        };
1802
1803        let clustering = SpatialClustering::new(config);
1804        let fitted = clustering.fit(&X, &()).unwrap();
1805        let result = fitted.transform(&X).unwrap();
1806
1807        assert_eq!(result.nrows(), 4);
1808        assert_eq!(result.ncols(), 2);
1809
1810        // Points that are close together should be in the same cluster
1811        assert_eq!(result[[0, 0]], result[[1, 0]]);
1812        assert_eq!(result[[2, 0]], result[[3, 0]]);
1813    }
1814
1815    #[test]
1816    fn test_spatial_clustering_density() {
1817        use scirs2_core::ndarray::array;
1818
1819        let X = array![
1820            [40.0, -74.0],
1821            [40.001, -74.001], // Very close to first point
1822            [42.0, -72.0]      // Far away
1823        ];
1824
1825        let config = SpatialClusteringConfig {
1826            method: SpatialClusteringMethod::Density,
1827            n_clusters: 2,
1828            epsilon: Some(1.0), // 1 km radius
1829            min_points: Some(1),
1830            metric: SpatialDistanceMetric::Haversine,
1831        };
1832
1833        let clustering = SpatialClustering::new(config);
1834        let fitted = clustering.fit(&X, &()).unwrap();
1835        let result = fitted.transform(&X).unwrap();
1836
1837        assert_eq!(result.nrows(), 3);
1838        assert_eq!(result.ncols(), 1); // Just density
1839
1840        // First two points should have higher density
1841        assert!(result[[0, 0]] > result[[2, 0]]);
1842        assert!(result[[1, 0]] > result[[2, 0]]);
1843    }
1844
1845    #[test]
1846    fn test_proximity_features() {
1847        use scirs2_core::ndarray::array;
1848
1849        let X = array![[40.7128, -74.0060], [51.5074, -0.1278]];
1850
1851        let pois = vec![
1852            (
1853                "NYC".to_string(),
1854                Coordinate::new(40.7128, -74.0060).unwrap(),
1855            ),
1856            (
1857                "London".to_string(),
1858                Coordinate::new(51.5074, -0.1278).unwrap(),
1859            ),
1860        ];
1861
1862        let config = ProximityFeaturesConfig {
1863            points_of_interest: pois,
1864            metric: SpatialDistanceMetric::Haversine,
1865            max_distance: Some(100.0),
1866            include_indicators: true,
1867        };
1868
1869        let proximity = ProximityFeatures::new(config);
1870        let fitted = proximity.fit(&X, &()).unwrap();
1871        let result = fitted.transform(&X).unwrap();
1872
1873        assert_eq!(result.nrows(), 2);
1874        assert_eq!(result.ncols(), 4); // 2 POIs * 2 (distance + indicator)
1875
1876        // First point should be very close to NYC POI
1877        assert!(result[[0, 0]] < 1.0);
1878        assert_relative_eq!(result[[0, 2]], 1.0, epsilon = 1e-10); // Within 100km
1879
1880        // Second point should be very close to London POI
1881        assert!(result[[1, 1]] < 1.0);
1882        assert_relative_eq!(result[[1, 3]], 1.0, epsilon = 1e-10); // Within 100km
1883    }
1884
1885    #[test]
1886    fn test_spatial_autocorrelation() {
1887        use scirs2_core::ndarray::array;
1888
1889        let X = array![[40.0, -74.0], [40.1, -74.0], [40.2, -74.0], [42.0, -72.0]];
1890
1891        let config = SpatialAutocorrelationConfig {
1892            k_neighbors: 2,
1893            metric: SpatialDistanceMetric::Haversine,
1894            include_morans_i: true,
1895            include_lisa: true,
1896        };
1897
1898        let autocorr = SpatialAutocorrelation::new(config);
1899        let y = array![1.0, 2.0, 3.0, 4.0]; // Dummy y for testing
1900        let fitted = autocorr.fit(&X, &y).unwrap();
1901        let result = fitted.transform(&X).unwrap();
1902
1903        assert_eq!(result.nrows(), 4);
1904        assert!(result.ncols() >= 1);
1905
1906        // Verify all values are finite and non-negative
1907        for i in 0..result.nrows() {
1908            for j in 0..result.ncols() {
1909                assert!(result[[i, j]].is_finite());
1910                assert!(result[[i, j]] >= 0.0);
1911            }
1912        }
1913    }
1914
1915    #[test]
1916    fn test_utm_transformation() {
1917        let (easting, northing) = wgs84_to_utm(40.7128, -74.0060, 18, true).unwrap();
1918
1919        // Verify transformation produces reasonable UTM coordinates
1920        assert!(easting > 0.0);
1921        assert!(northing > 0.0);
1922
1923        // Round trip
1924        let (lat, lon) = utm_to_wgs84(easting, northing, 18, true).unwrap();
1925        assert_relative_eq!(lat, 40.7128, epsilon = 0.01);
1926        assert_relative_eq!(lon, -74.0060, epsilon = 0.01);
1927    }
1928
1929    #[test]
1930    fn test_coordinate_systems_enum() {
1931        let wgs84 = CoordinateSystem::WGS84;
1932        let web_merc = CoordinateSystem::WebMercator;
1933        let utm = CoordinateSystem::UTM {
1934            zone: 18,
1935            northern: true,
1936        };
1937
1938        assert_eq!(wgs84, CoordinateSystem::WGS84);
1939        assert_eq!(web_merc, CoordinateSystem::WebMercator);
1940        assert_ne!(wgs84, web_merc);
1941
1942        match utm {
1943            CoordinateSystem::UTM { zone, northern } => {
1944                assert_eq!(zone, 18);
1945                assert!(northern);
1946            }
1947            _ => panic!("Expected UTM"),
1948        }
1949    }
1950
1951    #[test]
1952    fn test_distance_metrics() {
1953        let coord1 = Coordinate::new(40.7128, -74.0060).unwrap();
1954        let coord2 = Coordinate::new(40.7589, -73.9851).unwrap();
1955
1956        let haversine =
1957            calculate_distance(&coord1, &coord2, SpatialDistanceMetric::Haversine).unwrap();
1958        let vincenty =
1959            calculate_distance(&coord1, &coord2, SpatialDistanceMetric::Vincenty).unwrap();
1960
1961        // Both should give similar results
1962        assert!((haversine - vincenty).abs() < 0.1);
1963
1964        // Distance should be reasonable (a few km)
1965        assert!(haversine > 0.0 && haversine < 10.0);
1966    }
1967}