ruvector_data_framework/
geospatial_clients.rs

1//! Geospatial & Mapping API integrations
2//!
3//! This module provides async clients for:
4//! - Nominatim (OpenStreetMap geocoding)
5//! - Overpass API (OSM data queries)
6//! - GeoNames (place name database)
7//! - Open Elevation (elevation data)
8//!
9//! All responses are converted to SemanticVector format for RuVector discovery.
10
11use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::{Duration, Instant};
14
15use chrono::Utc;
16use reqwest::{Client, StatusCode};
17use serde::{Deserialize, Serialize};
18use tokio::sync::Mutex;
19use tokio::time::sleep;
20
21use crate::api_clients::SimpleEmbedder;
22use crate::physics_clients::GeoUtils;
23use crate::ruvector_native::{Domain, SemanticVector};
24use crate::{FrameworkError, Result};
25
26/// Rate limiting configuration
27const NOMINATIM_RATE_LIMIT_MS: u64 = 1000; // STRICT: 1 request/second
28const OVERPASS_RATE_LIMIT_MS: u64 = 500; // Conservative: 2 requests/second
29const GEONAMES_RATE_LIMIT_MS: u64 = 2000; // Conservative for free tier: ~0.5/sec (2000/hour limit)
30const OPEN_ELEVATION_RATE_LIMIT_MS: u64 = 200; // ~5 requests/second
31const MAX_RETRIES: u32 = 3;
32const RETRY_DELAY_MS: u64 = 2000;
33
34// User-Agent for OSM services (required by policy)
35const USER_AGENT: &str = "RuVector-Data-Framework/1.0 (https://github.com/ruvnet/ruvector)";
36
37// ============================================================================
38// Nominatim Client (OpenStreetMap Geocoding)
39// ============================================================================
40
41/// Nominatim geocoding response
42#[derive(Debug, Deserialize)]
43struct NominatimPlace {
44    #[serde(default)]
45    place_id: u64,
46    #[serde(default)]
47    licence: String,
48    #[serde(default)]
49    osm_type: String,
50    #[serde(default)]
51    osm_id: u64,
52    #[serde(default)]
53    lat: String,
54    #[serde(default)]
55    lon: String,
56    #[serde(default)]
57    display_name: String,
58    #[serde(default)]
59    r#type: String,
60    #[serde(default)]
61    importance: f64,
62    #[serde(default)]
63    address: Option<NominatimAddress>,
64    #[serde(default)]
65    geojson: Option<serde_json::Value>,
66}
67
68#[derive(Debug, Deserialize, Default)]
69struct NominatimAddress {
70    #[serde(default)]
71    house_number: Option<String>,
72    #[serde(default)]
73    road: Option<String>,
74    #[serde(default)]
75    city: Option<String>,
76    #[serde(default)]
77    state: Option<String>,
78    #[serde(default)]
79    postcode: Option<String>,
80    #[serde(default)]
81    country: Option<String>,
82    #[serde(default)]
83    country_code: Option<String>,
84}
85
86/// Client for Nominatim (OpenStreetMap Geocoding)
87///
88/// Provides access to:
89/// - Address to coordinates (geocoding)
90/// - Coordinates to address (reverse geocoding)
91/// - Place name search
92///
93/// **IMPORTANT**: STRICT rate limit of 1 request/second is enforced.
94/// See: https://operations.osmfoundation.org/policies/nominatim/
95///
96/// # Example
97/// ```rust,ignore
98/// use ruvector_data_framework::NominatimClient;
99///
100/// let client = NominatimClient::new()?;
101/// let coords = client.geocode("1600 Pennsylvania Avenue, Washington DC").await?;
102/// let address = client.reverse_geocode(38.8977, -77.0365).await?;
103/// let places = client.search("Eiffel Tower", 5).await?;
104/// ```
105pub struct NominatimClient {
106    client: Client,
107    base_url: String,
108    rate_limit_delay: Duration,
109    embedder: Arc<SimpleEmbedder>,
110    /// Last request time for STRICT rate limiting
111    last_request: Arc<Mutex<Option<Instant>>>,
112}
113
114impl NominatimClient {
115    /// Create a new Nominatim client
116    pub fn new() -> Result<Self> {
117        let client = Client::builder()
118            .timeout(Duration::from_secs(30))
119            .user_agent(USER_AGENT)
120            .build()
121            .map_err(FrameworkError::Network)?;
122
123        Ok(Self {
124            client,
125            base_url: "https://nominatim.openstreetmap.org".to_string(),
126            rate_limit_delay: Duration::from_millis(NOMINATIM_RATE_LIMIT_MS),
127            embedder: Arc::new(SimpleEmbedder::new(256)),
128            last_request: Arc::new(Mutex::new(None)),
129        })
130    }
131
132    /// Enforce STRICT rate limiting (1 request/second)
133    async fn enforce_rate_limit(&self) {
134        let mut last = self.last_request.lock().await;
135
136        if let Some(last_time) = *last {
137            let elapsed = last_time.elapsed();
138            if elapsed < self.rate_limit_delay {
139                let wait_time = self.rate_limit_delay - elapsed;
140                sleep(wait_time).await;
141            }
142        }
143
144        *last = Some(Instant::now());
145    }
146
147    /// Geocode an address to coordinates
148    ///
149    /// # Arguments
150    /// * `address` - Address string (e.g., "1600 Pennsylvania Avenue, Washington DC")
151    ///
152    /// # Example
153    /// ```rust,ignore
154    /// let coords = client.geocode("Eiffel Tower, Paris").await?;
155    /// ```
156    pub async fn geocode(&self, address: &str) -> Result<Vec<SemanticVector>> {
157        self.enforce_rate_limit().await;
158
159        let url = format!(
160            "{}/search?q={}&format=json&addressdetails=1&limit=1",
161            self.base_url,
162            urlencoding::encode(address)
163        );
164
165        let response = self.fetch_with_retry(&url).await?;
166        let places: Vec<NominatimPlace> = response.json().await?;
167
168        self.convert_places(places)
169    }
170
171    /// Reverse geocode coordinates to address
172    ///
173    /// # Arguments
174    /// * `lat` - Latitude
175    /// * `lon` - Longitude
176    ///
177    /// # Example
178    /// ```rust,ignore
179    /// let address = client.reverse_geocode(48.8584, 2.2945).await?;
180    /// ```
181    pub async fn reverse_geocode(&self, lat: f64, lon: f64) -> Result<Vec<SemanticVector>> {
182        self.enforce_rate_limit().await;
183
184        let url = format!(
185            "{}/reverse?lat={}&lon={}&format=json&addressdetails=1",
186            self.base_url, lat, lon
187        );
188
189        let response = self.fetch_with_retry(&url).await?;
190        let place: NominatimPlace = response.json().await?;
191
192        self.convert_places(vec![place])
193    }
194
195    /// Search for places by name
196    ///
197    /// # Arguments
198    /// * `query` - Search query (e.g., "Central Park")
199    /// * `limit` - Maximum number of results (max 50)
200    ///
201    /// # Example
202    /// ```rust,ignore
203    /// let places = client.search("Times Square", 5).await?;
204    /// ```
205    pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SemanticVector>> {
206        self.enforce_rate_limit().await;
207
208        let limit = limit.min(50); // Nominatim max is 50
209        let url = format!(
210            "{}/search?q={}&format=json&addressdetails=1&limit={}",
211            self.base_url,
212            urlencoding::encode(query),
213            limit
214        );
215
216        let response = self.fetch_with_retry(&url).await?;
217        let places: Vec<NominatimPlace> = response.json().await?;
218
219        self.convert_places(places)
220    }
221
222    /// Convert Nominatim places to SemanticVectors
223    fn convert_places(&self, places: Vec<NominatimPlace>) -> Result<Vec<SemanticVector>> {
224        let mut vectors = Vec::new();
225
226        for place in places {
227            let lat = place.lat.parse::<f64>().unwrap_or(0.0);
228            let lon = place.lon.parse::<f64>().unwrap_or(0.0);
229
230            // Build address string
231            let address_str = if let Some(addr) = &place.address {
232                format!(
233                    "{}, {}, {}, {}",
234                    addr.road.as_deref().unwrap_or(""),
235                    addr.city.as_deref().unwrap_or(""),
236                    addr.state.as_deref().unwrap_or(""),
237                    addr.country.as_deref().unwrap_or("")
238                )
239            } else {
240                place.display_name.clone()
241            };
242
243            // Create text for embedding
244            let text = format!(
245                "{} at lat: {}, lon: {} - {} (OSM type: {})",
246                place.display_name, lat, lon, address_str, place.osm_type
247            );
248            let embedding = self.embedder.embed_text(&text);
249
250            let mut metadata = HashMap::new();
251            metadata.insert("place_id".to_string(), place.place_id.to_string());
252            metadata.insert("osm_type".to_string(), place.osm_type.clone());
253            metadata.insert("osm_id".to_string(), place.osm_id.to_string());
254            metadata.insert("latitude".to_string(), lat.to_string());
255            metadata.insert("longitude".to_string(), lon.to_string());
256            metadata.insert("display_name".to_string(), place.display_name.clone());
257            metadata.insert("place_type".to_string(), place.r#type.clone());
258            metadata.insert("importance".to_string(), place.importance.to_string());
259
260            if let Some(addr) = &place.address {
261                if let Some(city) = &addr.city {
262                    metadata.insert("city".to_string(), city.clone());
263                }
264                if let Some(country) = &addr.country {
265                    metadata.insert("country".to_string(), country.clone());
266                }
267                if let Some(country_code) = &addr.country_code {
268                    metadata.insert("country_code".to_string(), country_code.clone());
269                }
270            }
271            metadata.insert("source".to_string(), "nominatim".to_string());
272
273            vectors.push(SemanticVector {
274                id: format!("NOMINATIM:{}:{}", place.osm_type, place.osm_id),
275                embedding,
276                domain: Domain::CrossDomain, // Geographic data spans multiple domains
277                timestamp: Utc::now(),
278                metadata,
279            });
280        }
281
282        Ok(vectors)
283    }
284
285    /// Fetch with retry logic
286    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
287        let mut retries = 0;
288        loop {
289            match self.client.get(url).send().await {
290                Ok(response) => {
291                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
292                        retries += 1;
293                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
294                        continue;
295                    }
296                    return Ok(response);
297                }
298                Err(_) if retries < MAX_RETRIES => {
299                    retries += 1;
300                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
301                }
302                Err(e) => return Err(FrameworkError::Network(e)),
303            }
304        }
305    }
306}
307
308impl Default for NominatimClient {
309    fn default() -> Self {
310        Self::new().expect("Failed to create Nominatim client")
311    }
312}
313
314// ============================================================================
315// Overpass API Client (OSM Data Queries)
316// ============================================================================
317
318/// Overpass API response element
319#[derive(Debug, Deserialize)]
320struct OverpassElement {
321    #[serde(default)]
322    r#type: String,
323    #[serde(default)]
324    id: u64,
325    #[serde(default)]
326    lat: Option<f64>,
327    #[serde(default)]
328    lon: Option<f64>,
329    #[serde(default)]
330    tags: HashMap<String, String>,
331    #[serde(default)]
332    center: Option<OverpassCenter>,
333}
334
335#[derive(Debug, Deserialize)]
336struct OverpassCenter {
337    lat: f64,
338    lon: f64,
339}
340
341/// Overpass API response
342#[derive(Debug, Deserialize)]
343struct OverpassResponse {
344    #[serde(default)]
345    elements: Vec<OverpassElement>,
346}
347
348/// Client for Overpass API (OSM Data Queries)
349///
350/// Provides access to:
351/// - Custom Overpass QL queries
352/// - Nearby POI (Points of Interest) search
353/// - Road network extraction
354/// - OSM tag-based queries
355///
356/// # Example
357/// ```rust,ignore
358/// use ruvector_data_framework::OverpassClient;
359///
360/// let client = OverpassClient::new()?;
361/// let pois = client.get_nearby_pois(48.8584, 2.2945, 500.0, "restaurant").await?;
362/// let roads = client.get_roads(48.85, 2.29, 48.86, 2.30).await?;
363/// ```
364pub struct OverpassClient {
365    client: Client,
366    base_url: String,
367    rate_limit_delay: Duration,
368    embedder: Arc<SimpleEmbedder>,
369}
370
371impl OverpassClient {
372    /// Create a new Overpass API client
373    pub fn new() -> Result<Self> {
374        let client = Client::builder()
375            .timeout(Duration::from_secs(60)) // Overpass can be slow
376            .user_agent(USER_AGENT)
377            .build()
378            .map_err(FrameworkError::Network)?;
379
380        Ok(Self {
381            client,
382            base_url: "https://overpass-api.de/api/interpreter".to_string(),
383            rate_limit_delay: Duration::from_millis(OVERPASS_RATE_LIMIT_MS),
384            embedder: Arc::new(SimpleEmbedder::new(256)),
385        })
386    }
387
388    /// Execute a custom Overpass QL query
389    ///
390    /// # Arguments
391    /// * `query` - Overpass QL query string
392    ///
393    /// # Example
394    /// ```rust,ignore
395    /// let query = r#"
396    ///     [out:json];
397    ///     node["amenity"="cafe"](around:1000,48.8584,2.2945);
398    ///     out;
399    /// "#;
400    /// let results = client.query(query).await?;
401    /// ```
402    pub async fn query(&self, query: &str) -> Result<Vec<SemanticVector>> {
403        sleep(self.rate_limit_delay).await;
404
405        let response = self.client
406            .post(&self.base_url)
407            .body(query.to_string())
408            .send()
409            .await?;
410
411        let overpass_response: OverpassResponse = response.json().await?;
412        self.convert_elements(overpass_response.elements)
413    }
414
415    /// Get nearby POIs (Points of Interest)
416    ///
417    /// # Arguments
418    /// * `lat` - Center latitude
419    /// * `lon` - Center longitude
420    /// * `radius` - Search radius in meters
421    /// * `amenity_type` - OSM amenity type (e.g., "restaurant", "cafe", "hospital")
422    ///
423    /// # Example
424    /// ```rust,ignore
425    /// let cafes = client.get_nearby_pois(48.8584, 2.2945, 1000.0, "cafe").await?;
426    /// ```
427    pub async fn get_nearby_pois(
428        &self,
429        lat: f64,
430        lon: f64,
431        radius: f64,
432        amenity_type: &str,
433    ) -> Result<Vec<SemanticVector>> {
434        let query = format!(
435            r#"[out:json];node["amenity"="{}"](around:{},{},{});out;"#,
436            amenity_type, radius, lat, lon
437        );
438
439        self.query(&query).await
440    }
441
442    /// Get road network in a bounding box
443    ///
444    /// # Arguments
445    /// * `south` - Southern latitude
446    /// * `west` - Western longitude
447    /// * `north` - Northern latitude
448    /// * `east` - Eastern longitude
449    ///
450    /// # Example
451    /// ```rust,ignore
452    /// let roads = client.get_roads(48.85, 2.29, 48.86, 2.30).await?;
453    /// ```
454    pub async fn get_roads(
455        &self,
456        south: f64,
457        west: f64,
458        north: f64,
459        east: f64,
460    ) -> Result<Vec<SemanticVector>> {
461        let query = format!(
462            r#"[out:json];way["highway"]({},{},{},{});out geom;"#,
463            south, west, north, east
464        );
465
466        self.query(&query).await
467    }
468
469    /// Convert Overpass elements to SemanticVectors
470    fn convert_elements(&self, elements: Vec<OverpassElement>) -> Result<Vec<SemanticVector>> {
471        let mut vectors = Vec::new();
472
473        for element in elements {
474            // Get coordinates (from element or center)
475            let (lat, lon) = if let (Some(lat), Some(lon)) = (element.lat, element.lon) {
476                (lat, lon)
477            } else if let Some(center) = element.center {
478                (center.lat, center.lon)
479            } else {
480                continue; // Skip elements without coordinates
481            };
482
483            // Extract name and tags
484            let name = element.tags.get("name").cloned().unwrap_or_else(|| {
485                format!("OSM {} {}", element.r#type, element.id)
486            });
487
488            let amenity = element.tags.get("amenity").cloned().unwrap_or_default();
489            let highway = element.tags.get("highway").cloned().unwrap_or_default();
490
491            // Create text for embedding
492            let text = format!(
493                "{} at lat: {}, lon: {} - amenity: {}, highway: {}, tags: {:?}",
494                name, lat, lon, amenity, highway, element.tags
495            );
496            let embedding = self.embedder.embed_text(&text);
497
498            let mut metadata = HashMap::new();
499            metadata.insert("osm_id".to_string(), element.id.to_string());
500            metadata.insert("osm_type".to_string(), element.r#type.clone());
501            metadata.insert("latitude".to_string(), lat.to_string());
502            metadata.insert("longitude".to_string(), lon.to_string());
503            metadata.insert("name".to_string(), name);
504
505            if !amenity.is_empty() {
506                metadata.insert("amenity".to_string(), amenity);
507            }
508            if !highway.is_empty() {
509                metadata.insert("highway".to_string(), highway);
510            }
511
512            // Add all OSM tags
513            for (key, value) in element.tags {
514                metadata.insert(format!("osm_tag_{}", key), value);
515            }
516            metadata.insert("source".to_string(), "overpass".to_string());
517
518            vectors.push(SemanticVector {
519                id: format!("OVERPASS:{}:{}", element.r#type, element.id),
520                embedding,
521                domain: Domain::CrossDomain,
522                timestamp: Utc::now(),
523                metadata,
524            });
525        }
526
527        Ok(vectors)
528    }
529}
530
531impl Default for OverpassClient {
532    fn default() -> Self {
533        Self::new().expect("Failed to create Overpass client")
534    }
535}
536
537// ============================================================================
538// GeoNames Client
539// ============================================================================
540
541/// GeoNames search result
542#[derive(Debug, Deserialize)]
543struct GeoNamesSearchResult {
544    #[serde(default)]
545    geonames: Vec<GeoName>,
546}
547
548#[derive(Debug, Deserialize)]
549struct GeoName {
550    #[serde(default)]
551    geonameId: u64,
552    #[serde(default)]
553    name: String,
554    #[serde(default)]
555    lat: String,
556    #[serde(default)]
557    lng: String,
558    #[serde(default)]
559    countryCode: String,
560    #[serde(default)]
561    countryName: String,
562    #[serde(default)]
563    fcl: String, // feature class
564    #[serde(default)]
565    fcode: String, // feature code
566    #[serde(default)]
567    population: u64,
568    #[serde(default)]
569    adminName1: String, // state/province
570    #[serde(default)]
571    toponymName: String,
572}
573
574/// GeoNames timezone result
575#[derive(Debug, Deserialize)]
576struct GeoNamesTimezone {
577    #[serde(default)]
578    timezoneId: String,
579    #[serde(default)]
580    countryCode: String,
581    #[serde(default)]
582    lat: f64,
583    #[serde(default)]
584    lng: f64,
585}
586
587/// GeoNames country info
588#[derive(Debug, Deserialize)]
589struct GeoNamesCountryInfo {
590    #[serde(default)]
591    geonames: Vec<GeoNamesCountry>,
592}
593
594#[derive(Debug, Deserialize)]
595struct GeoNamesCountry {
596    #[serde(default)]
597    countryCode: String,
598    #[serde(default)]
599    countryName: String,
600    #[serde(default)]
601    capital: String,
602    #[serde(default)]
603    population: u64,
604    #[serde(default)]
605    areaInSqKm: String,
606    #[serde(default)]
607    continent: String,
608}
609
610/// Client for GeoNames
611///
612/// Provides access to:
613/// - Place name search
614/// - Nearby places lookup
615/// - Timezone information
616/// - Country details
617///
618/// **Note**: Requires username (set GEONAMES_USERNAME env var)
619/// Free tier: 2000 requests/hour, 30000/day
620///
621/// # Example
622/// ```rust,ignore
623/// use ruvector_data_framework::GeonamesClient;
624///
625/// let client = GeonamesClient::new("your_username".to_string())?;
626/// let places = client.search("Paris", 10).await?;
627/// let nearby = client.get_nearby(48.8566, 2.3522).await?;
628/// let tz = client.get_timezone(40.7128, -74.0060).await?;
629/// ```
630pub struct GeonamesClient {
631    client: Client,
632    base_url: String,
633    username: String,
634    rate_limit_delay: Duration,
635    embedder: Arc<SimpleEmbedder>,
636}
637
638impl GeonamesClient {
639    /// Create a new GeoNames client
640    ///
641    /// # Arguments
642    /// * `username` - GeoNames username (register at geonames.org)
643    pub fn new(username: String) -> Result<Self> {
644        let client = Client::builder()
645            .timeout(Duration::from_secs(30))
646            .build()
647            .map_err(FrameworkError::Network)?;
648
649        Ok(Self {
650            client,
651            base_url: "http://api.geonames.org".to_string(),
652            username,
653            rate_limit_delay: Duration::from_millis(GEONAMES_RATE_LIMIT_MS),
654            embedder: Arc::new(SimpleEmbedder::new(256)),
655        })
656    }
657
658    /// Search for places by name
659    ///
660    /// # Arguments
661    /// * `query` - Place name to search
662    /// * `limit` - Maximum number of results
663    ///
664    /// # Example
665    /// ```rust,ignore
666    /// let results = client.search("New York", 10).await?;
667    /// ```
668    pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SemanticVector>> {
669        sleep(self.rate_limit_delay).await;
670
671        let url = format!(
672            "{}/searchJSON?q={}&maxRows={}&username={}",
673            self.base_url,
674            urlencoding::encode(query),
675            limit,
676            self.username
677        );
678
679        let response = self.fetch_with_retry(&url).await?;
680        let result: GeoNamesSearchResult = response.json().await?;
681
682        self.convert_geonames(result.geonames)
683    }
684
685    /// Get nearby places
686    ///
687    /// # Arguments
688    /// * `lat` - Latitude
689    /// * `lon` - Longitude
690    ///
691    /// # Example
692    /// ```rust,ignore
693    /// let nearby = client.get_nearby(40.7128, -74.0060).await?;
694    /// ```
695    pub async fn get_nearby(&self, lat: f64, lon: f64) -> Result<Vec<SemanticVector>> {
696        sleep(self.rate_limit_delay).await;
697
698        let url = format!(
699            "{}/findNearbyJSON?lat={}&lng={}&username={}",
700            self.base_url, lat, lon, self.username
701        );
702
703        let response = self.fetch_with_retry(&url).await?;
704        let result: GeoNamesSearchResult = response.json().await?;
705
706        self.convert_geonames(result.geonames)
707    }
708
709    /// Get timezone for coordinates
710    ///
711    /// # Arguments
712    /// * `lat` - Latitude
713    /// * `lon` - Longitude
714    ///
715    /// # Example
716    /// ```rust,ignore
717    /// let tz = client.get_timezone(51.5074, -0.1278).await?;
718    /// ```
719    pub async fn get_timezone(&self, lat: f64, lon: f64) -> Result<Vec<SemanticVector>> {
720        sleep(self.rate_limit_delay).await;
721
722        let url = format!(
723            "{}/timezoneJSON?lat={}&lng={}&username={}",
724            self.base_url, lat, lon, self.username
725        );
726
727        let response = self.fetch_with_retry(&url).await?;
728        let tz: GeoNamesTimezone = response.json().await?;
729
730        let text = format!(
731            "Timezone {} for coordinates ({}, {}), country: {}",
732            tz.timezoneId, lat, lon, tz.countryCode
733        );
734        let embedding = self.embedder.embed_text(&text);
735
736        let mut metadata = HashMap::new();
737        metadata.insert("timezone_id".to_string(), tz.timezoneId.clone());
738        metadata.insert("country_code".to_string(), tz.countryCode);
739        metadata.insert("latitude".to_string(), lat.to_string());
740        metadata.insert("longitude".to_string(), lon.to_string());
741        metadata.insert("source".to_string(), "geonames".to_string());
742
743        Ok(vec![SemanticVector {
744            id: format!("GEONAMES:TZ:{}", tz.timezoneId),
745            embedding,
746            domain: Domain::CrossDomain,
747            timestamp: Utc::now(),
748            metadata,
749        }])
750    }
751
752    /// Get country information
753    ///
754    /// # Arguments
755    /// * `country_code` - ISO 2-letter country code (e.g., "US", "FR")
756    ///
757    /// # Example
758    /// ```rust,ignore
759    /// let info = client.get_country_info("US").await?;
760    /// ```
761    pub async fn get_country_info(&self, country_code: &str) -> Result<Vec<SemanticVector>> {
762        sleep(self.rate_limit_delay).await;
763
764        let url = format!(
765            "{}/countryInfoJSON?country={}&username={}",
766            self.base_url, country_code, self.username
767        );
768
769        let response = self.fetch_with_retry(&url).await?;
770        let result: GeoNamesCountryInfo = response.json().await?;
771
772        let mut vectors = Vec::new();
773        for country in result.geonames {
774            let text = format!(
775                "{} ({}) - Capital: {}, Population: {}, Area: {} sq km, Continent: {}",
776                country.countryName,
777                country.countryCode,
778                country.capital,
779                country.population,
780                country.areaInSqKm,
781                country.continent
782            );
783            let embedding = self.embedder.embed_text(&text);
784
785            let mut metadata = HashMap::new();
786            metadata.insert("country_code".to_string(), country.countryCode.clone());
787            metadata.insert("country_name".to_string(), country.countryName);
788            metadata.insert("capital".to_string(), country.capital);
789            metadata.insert("population".to_string(), country.population.to_string());
790            metadata.insert("area_sq_km".to_string(), country.areaInSqKm);
791            metadata.insert("continent".to_string(), country.continent);
792            metadata.insert("source".to_string(), "geonames".to_string());
793
794            vectors.push(SemanticVector {
795                id: format!("GEONAMES:COUNTRY:{}", country.countryCode),
796                embedding,
797                domain: Domain::CrossDomain,
798                timestamp: Utc::now(),
799                metadata,
800            });
801        }
802
803        Ok(vectors)
804    }
805
806    /// Convert GeoNames results to SemanticVectors
807    fn convert_geonames(&self, geonames: Vec<GeoName>) -> Result<Vec<SemanticVector>> {
808        let mut vectors = Vec::new();
809
810        for place in geonames {
811            let lat = place.lat.parse::<f64>().unwrap_or(0.0);
812            let lon = place.lng.parse::<f64>().unwrap_or(0.0);
813
814            let text = format!(
815                "{} ({}) in {}, {} - lat: {}, lon: {}, population: {}",
816                place.name,
817                place.toponymName,
818                place.adminName1,
819                place.countryName,
820                lat,
821                lon,
822                place.population
823            );
824            let embedding = self.embedder.embed_text(&text);
825
826            let mut metadata = HashMap::new();
827            metadata.insert("geoname_id".to_string(), place.geonameId.to_string());
828            metadata.insert("name".to_string(), place.name);
829            metadata.insert("toponym_name".to_string(), place.toponymName);
830            metadata.insert("latitude".to_string(), lat.to_string());
831            metadata.insert("longitude".to_string(), lon.to_string());
832            metadata.insert("country_code".to_string(), place.countryCode);
833            metadata.insert("country_name".to_string(), place.countryName);
834            metadata.insert("admin_name1".to_string(), place.adminName1);
835            metadata.insert("feature_class".to_string(), place.fcl);
836            metadata.insert("feature_code".to_string(), place.fcode);
837            metadata.insert("population".to_string(), place.population.to_string());
838            metadata.insert("source".to_string(), "geonames".to_string());
839
840            vectors.push(SemanticVector {
841                id: format!("GEONAMES:{}", place.geonameId),
842                embedding,
843                domain: Domain::CrossDomain,
844                timestamp: Utc::now(),
845                metadata,
846            });
847        }
848
849        Ok(vectors)
850    }
851
852    /// Fetch with retry logic
853    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
854        let mut retries = 0;
855        loop {
856            match self.client.get(url).send().await {
857                Ok(response) => {
858                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
859                        retries += 1;
860                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
861                        continue;
862                    }
863                    return Ok(response);
864                }
865                Err(_) if retries < MAX_RETRIES => {
866                    retries += 1;
867                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
868                }
869                Err(e) => return Err(FrameworkError::Network(e)),
870            }
871        }
872    }
873}
874
875// ============================================================================
876// Open Elevation Client
877// ============================================================================
878
879/// Open Elevation result
880#[derive(Debug, Deserialize)]
881struct OpenElevationResponse {
882    #[serde(default)]
883    results: Vec<ElevationPoint>,
884}
885
886#[derive(Debug, Deserialize, Serialize)]
887struct ElevationPoint {
888    latitude: f64,
889    longitude: f64,
890    elevation: f64,
891}
892
893/// Request for batch elevation lookup
894#[derive(Debug, Serialize)]
895struct ElevationRequest {
896    locations: Vec<ElevationLocation>,
897}
898
899#[derive(Debug, Serialize)]
900struct ElevationLocation {
901    latitude: f64,
902    longitude: f64,
903}
904
905/// Client for Open Elevation API
906///
907/// Provides access to:
908/// - Single point elevation lookup
909/// - Batch elevation lookups
910/// - Worldwide coverage using SRTM data
911///
912/// No authentication required. Free and open service.
913///
914/// # Example
915/// ```rust,ignore
916/// use ruvector_data_framework::OpenElevationClient;
917///
918/// let client = OpenElevationClient::new()?;
919/// let elevation = client.get_elevation(46.9480, 7.4474).await?; // Mt. Everest base
920/// let elevations = client.get_elevations(vec![(40.7128, -74.0060), (48.8566, 2.3522)]).await?;
921/// ```
922pub struct OpenElevationClient {
923    client: Client,
924    base_url: String,
925    rate_limit_delay: Duration,
926    embedder: Arc<SimpleEmbedder>,
927}
928
929impl OpenElevationClient {
930    /// Create a new Open Elevation client
931    pub fn new() -> Result<Self> {
932        let client = Client::builder()
933            .timeout(Duration::from_secs(30))
934            .build()
935            .map_err(FrameworkError::Network)?;
936
937        Ok(Self {
938            client,
939            base_url: "https://api.open-elevation.com/api/v1".to_string(),
940            rate_limit_delay: Duration::from_millis(OPEN_ELEVATION_RATE_LIMIT_MS),
941            embedder: Arc::new(SimpleEmbedder::new(256)),
942        })
943    }
944
945    /// Get elevation for a single point
946    ///
947    /// # Arguments
948    /// * `lat` - Latitude
949    /// * `lon` - Longitude
950    ///
951    /// # Example
952    /// ```rust,ignore
953    /// let elevation = client.get_elevation(27.9881, 86.9250).await?; // Mt. Everest
954    /// ```
955    pub async fn get_elevation(&self, lat: f64, lon: f64) -> Result<Vec<SemanticVector>> {
956        self.get_elevations(vec![(lat, lon)]).await
957    }
958
959    /// Get elevations for multiple points
960    ///
961    /// # Arguments
962    /// * `locations` - Vec of (latitude, longitude) tuples
963    ///
964    /// # Example
965    /// ```rust,ignore
966    /// let elevations = client.get_elevations(vec![
967    ///     (40.7128, -74.0060), // NYC
968    ///     (48.8566, 2.3522),   // Paris
969    /// ]).await?;
970    /// ```
971    pub async fn get_elevations(&self, locations: Vec<(f64, f64)>) -> Result<Vec<SemanticVector>> {
972        sleep(self.rate_limit_delay).await;
973
974        let request = ElevationRequest {
975            locations: locations
976                .iter()
977                .map(|(lat, lon)| ElevationLocation {
978                    latitude: *lat,
979                    longitude: *lon,
980                })
981                .collect(),
982        };
983
984        let url = format!("{}/lookup", self.base_url);
985
986        let response = self.client
987            .post(&url)
988            .json(&request)
989            .send()
990            .await?;
991
992        let elevation_response: OpenElevationResponse = response.json().await?;
993        self.convert_elevations(elevation_response.results)
994    }
995
996    /// Convert elevation points to SemanticVectors
997    fn convert_elevations(&self, points: Vec<ElevationPoint>) -> Result<Vec<SemanticVector>> {
998        let mut vectors = Vec::new();
999
1000        for point in points {
1001            let text = format!(
1002                "Elevation {} meters at lat: {}, lon: {}",
1003                point.elevation, point.latitude, point.longitude
1004            );
1005            let embedding = self.embedder.embed_text(&text);
1006
1007            let mut metadata = HashMap::new();
1008            metadata.insert("latitude".to_string(), point.latitude.to_string());
1009            metadata.insert("longitude".to_string(), point.longitude.to_string());
1010            metadata.insert("elevation_m".to_string(), point.elevation.to_string());
1011            metadata.insert("source".to_string(), "open_elevation".to_string());
1012
1013            vectors.push(SemanticVector {
1014                id: format!("ELEVATION:{}:{}", point.latitude, point.longitude),
1015                embedding,
1016                domain: Domain::CrossDomain,
1017                timestamp: Utc::now(),
1018                metadata,
1019            });
1020        }
1021
1022        Ok(vectors)
1023    }
1024}
1025
1026impl Default for OpenElevationClient {
1027    fn default() -> Self {
1028        Self::new().expect("Failed to create OpenElevation client")
1029    }
1030}
1031
1032// ============================================================================
1033// Tests
1034// ============================================================================
1035
1036#[cfg(test)]
1037mod tests {
1038    use super::*;
1039
1040    #[tokio::test]
1041    async fn test_nominatim_client_creation() {
1042        let client = NominatimClient::new();
1043        assert!(client.is_ok());
1044        let client = client.unwrap();
1045        assert_eq!(client.rate_limit_delay, Duration::from_millis(NOMINATIM_RATE_LIMIT_MS));
1046    }
1047
1048    #[tokio::test]
1049    async fn test_nominatim_rate_limiting() {
1050        let client = NominatimClient::new().unwrap();
1051
1052        // First request should be immediate
1053        let start = Instant::now();
1054        client.enforce_rate_limit().await;
1055        let first_elapsed = start.elapsed();
1056        assert!(first_elapsed < Duration::from_millis(100));
1057
1058        // Second request should be delayed
1059        let start = Instant::now();
1060        client.enforce_rate_limit().await;
1061        let second_elapsed = start.elapsed();
1062        assert!(second_elapsed >= Duration::from_millis(900)); // Allow some tolerance
1063    }
1064
1065    #[tokio::test]
1066    async fn test_overpass_client_creation() {
1067        let client = OverpassClient::new();
1068        assert!(client.is_ok());
1069    }
1070
1071    #[tokio::test]
1072    async fn test_geonames_client_creation() {
1073        let client = GeonamesClient::new("test_user".to_string());
1074        assert!(client.is_ok());
1075    }
1076
1077    #[tokio::test]
1078    async fn test_open_elevation_client_creation() {
1079        let client = OpenElevationClient::new();
1080        assert!(client.is_ok());
1081    }
1082
1083    #[test]
1084    fn test_nominatim_place_conversion() {
1085        let client = NominatimClient::new().unwrap();
1086
1087        let places = vec![NominatimPlace {
1088            place_id: 12345,
1089            licence: "ODbL".to_string(),
1090            osm_type: "way".to_string(),
1091            osm_id: 67890,
1092            lat: "48.8584".to_string(),
1093            lon: "2.2945".to_string(),
1094            display_name: "Eiffel Tower, Paris, France".to_string(),
1095            r#type: "attraction".to_string(),
1096            importance: 0.9,
1097            address: Some(NominatimAddress {
1098                house_number: None,
1099                road: Some("Champ de Mars".to_string()),
1100                city: Some("Paris".to_string()),
1101                state: Some("Île-de-France".to_string()),
1102                postcode: Some("75007".to_string()),
1103                country: Some("France".to_string()),
1104                country_code: Some("fr".to_string()),
1105            }),
1106            geojson: None,
1107        }];
1108
1109        let vectors = client.convert_places(places).unwrap();
1110        assert_eq!(vectors.len(), 1);
1111
1112        let vec = &vectors[0];
1113        assert_eq!(vec.id, "NOMINATIM:way:67890");
1114        assert_eq!(vec.metadata.get("city").unwrap(), "Paris");
1115        assert_eq!(vec.metadata.get("country").unwrap(), "France");
1116        assert_eq!(vec.domain, Domain::CrossDomain);
1117    }
1118
1119    #[test]
1120    fn test_overpass_element_conversion() {
1121        let client = OverpassClient::new().unwrap();
1122
1123        let mut tags = HashMap::new();
1124        tags.insert("name".to_string(), "Central Park".to_string());
1125        tags.insert("amenity".to_string(), "park".to_string());
1126
1127        let elements = vec![OverpassElement {
1128            r#type: "node".to_string(),
1129            id: 123456,
1130            lat: Some(40.7829),
1131            lon: Some(-73.9654),
1132            tags,
1133            center: None,
1134        }];
1135
1136        let vectors = client.convert_elements(elements).unwrap();
1137        assert_eq!(vectors.len(), 1);
1138
1139        let vec = &vectors[0];
1140        assert_eq!(vec.id, "OVERPASS:node:123456");
1141        assert_eq!(vec.metadata.get("name").unwrap(), "Central Park");
1142        assert_eq!(vec.metadata.get("amenity").unwrap(), "park");
1143    }
1144
1145    #[test]
1146    fn test_geonames_conversion() {
1147        let client = GeonamesClient::new("test".to_string()).unwrap();
1148
1149        let geonames = vec![GeoName {
1150            geonameId: 2988507,
1151            name: "Paris".to_string(),
1152            lat: "48.85341".to_string(),
1153            lng: "2.3488".to_string(),
1154            countryCode: "FR".to_string(),
1155            countryName: "France".to_string(),
1156            fcl: "P".to_string(),
1157            fcode: "PPLC".to_string(),
1158            population: 2138551,
1159            adminName1: "Île-de-France".to_string(),
1160            toponymName: "Paris".to_string(),
1161        }];
1162
1163        let vectors = client.convert_geonames(geonames).unwrap();
1164        assert_eq!(vectors.len(), 1);
1165
1166        let vec = &vectors[0];
1167        assert_eq!(vec.id, "GEONAMES:2988507");
1168        assert_eq!(vec.metadata.get("name").unwrap(), "Paris");
1169        assert_eq!(vec.metadata.get("country_code").unwrap(), "FR");
1170        assert_eq!(vec.metadata.get("population").unwrap(), "2138551");
1171    }
1172
1173    #[test]
1174    fn test_elevation_conversion() {
1175        let client = OpenElevationClient::new().unwrap();
1176
1177        let points = vec![
1178            ElevationPoint {
1179                latitude: 27.9881,
1180                longitude: 86.9250,
1181                elevation: 8848.86,
1182            },
1183            ElevationPoint {
1184                latitude: 40.7128,
1185                longitude: -74.0060,
1186                elevation: 10.0,
1187            },
1188        ];
1189
1190        let vectors = client.convert_elevations(points).unwrap();
1191        assert_eq!(vectors.len(), 2);
1192
1193        assert_eq!(vectors[0].metadata.get("elevation_m").unwrap(), "8848.86");
1194        assert_eq!(vectors[1].metadata.get("elevation_m").unwrap(), "10");
1195    }
1196
1197    #[test]
1198    fn test_rate_limits() {
1199        assert_eq!(NOMINATIM_RATE_LIMIT_MS, 1000); // 1/sec
1200        assert!(OVERPASS_RATE_LIMIT_MS <= 500); // At least 2/sec
1201        assert!(GEONAMES_RATE_LIMIT_MS >= 1800); // Conservative for free tier
1202        assert!(OPEN_ELEVATION_RATE_LIMIT_MS <= 200); // At least 5/sec
1203    }
1204
1205    #[test]
1206    fn test_user_agent_constant() {
1207        assert!(USER_AGENT.contains("RuVector"));
1208        assert!(USER_AGENT.contains("github"));
1209    }
1210
1211    #[test]
1212    fn test_geo_utils_integration() {
1213        // Test GeoUtils distance calculation (from physics_clients)
1214        let paris_lat = 48.8566;
1215        let paris_lon = 2.3522;
1216        let london_lat = 51.5074;
1217        let london_lon = -0.1278;
1218
1219        let distance = GeoUtils::distance_km(paris_lat, paris_lon, london_lat, london_lon);
1220
1221        // Paris to London is approximately 344 km
1222        assert!((distance - 344.0).abs() < 50.0);
1223    }
1224
1225    #[test]
1226    fn test_geo_utils_within_radius() {
1227        let center_lat = 48.8566;
1228        let center_lon = 2.3522;
1229
1230        // Eiffel Tower is about 2.5km from center of Paris
1231        let eiffel_lat = 48.8584;
1232        let eiffel_lon = 2.2945;
1233
1234        assert!(GeoUtils::within_radius(
1235            center_lat,
1236            center_lon,
1237            eiffel_lat,
1238            eiffel_lon,
1239            5.0
1240        ));
1241
1242        assert!(!GeoUtils::within_radius(
1243            center_lat,
1244            center_lon,
1245            eiffel_lat,
1246            eiffel_lon,
1247            1.0
1248        ));
1249    }
1250}