ruvector_data_framework/
geospatial_clients.rs

1//! Geospatial & Mapping API integrations
2//!
3//! This module provides async clients for:
4//! - Nominatim (OpenStreetMap geocoding)
5//! - Overpass API (OSM data queries)
6//! - GeoNames (place name database)
7//! - Open Elevation (elevation data)
8//!
9//! All responses are converted to SemanticVector format for RuVector discovery.
10
11use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::{Duration, Instant};
14
15use chrono::Utc;
16use reqwest::{Client, StatusCode};
17use serde::{Deserialize, Serialize};
18use tokio::sync::Mutex;
19use tokio::time::sleep;
20
21use crate::api_clients::SimpleEmbedder;
22use crate::ruvector_native::{Domain, SemanticVector};
23use crate::{FrameworkError, Result};
24
25/// Rate limiting configuration
26const NOMINATIM_RATE_LIMIT_MS: u64 = 1000; // STRICT: 1 request/second
27const OVERPASS_RATE_LIMIT_MS: u64 = 500; // Conservative: 2 requests/second
28const GEONAMES_RATE_LIMIT_MS: u64 = 2000; // Conservative for free tier: ~0.5/sec (2000/hour limit)
29const OPEN_ELEVATION_RATE_LIMIT_MS: u64 = 200; // ~5 requests/second
30const MAX_RETRIES: u32 = 3;
31const RETRY_DELAY_MS: u64 = 2000;
32
33// User-Agent for OSM services (required by policy)
34const USER_AGENT: &str = "RuVector-Data-Framework/1.0 (https://github.com/ruvnet/ruvector)";
35
36// ============================================================================
37// Nominatim Client (OpenStreetMap Geocoding)
38// ============================================================================
39
40/// Nominatim geocoding response
41#[derive(Debug, Deserialize)]
42struct NominatimPlace {
43    #[serde(default)]
44    place_id: u64,
45    #[serde(default)]
46    licence: String,
47    #[serde(default)]
48    osm_type: String,
49    #[serde(default)]
50    osm_id: u64,
51    #[serde(default)]
52    lat: String,
53    #[serde(default)]
54    lon: String,
55    #[serde(default)]
56    display_name: String,
57    #[serde(default)]
58    r#type: String,
59    #[serde(default)]
60    importance: f64,
61    #[serde(default)]
62    address: Option<NominatimAddress>,
63    #[serde(default)]
64    geojson: Option<serde_json::Value>,
65}
66
67#[derive(Debug, Deserialize, Default)]
68struct NominatimAddress {
69    #[serde(default)]
70    house_number: Option<String>,
71    #[serde(default)]
72    road: Option<String>,
73    #[serde(default)]
74    city: Option<String>,
75    #[serde(default)]
76    state: Option<String>,
77    #[serde(default)]
78    postcode: Option<String>,
79    #[serde(default)]
80    country: Option<String>,
81    #[serde(default)]
82    country_code: Option<String>,
83}
84
85/// Client for Nominatim (OpenStreetMap Geocoding)
86///
87/// Provides access to:
88/// - Address to coordinates (geocoding)
89/// - Coordinates to address (reverse geocoding)
90/// - Place name search
91///
92/// **IMPORTANT**: STRICT rate limit of 1 request/second is enforced.
93/// See: https://operations.osmfoundation.org/policies/nominatim/
94///
95/// # Example
96/// ```rust,ignore
97/// use ruvector_data_framework::NominatimClient;
98///
99/// let client = NominatimClient::new()?;
100/// let coords = client.geocode("1600 Pennsylvania Avenue, Washington DC").await?;
101/// let address = client.reverse_geocode(38.8977, -77.0365).await?;
102/// let places = client.search("Eiffel Tower", 5).await?;
103/// ```
104pub struct NominatimClient {
105    client: Client,
106    base_url: String,
107    rate_limit_delay: Duration,
108    embedder: Arc<SimpleEmbedder>,
109    /// Last request time for STRICT rate limiting
110    last_request: Arc<Mutex<Option<Instant>>>,
111}
112
113impl NominatimClient {
114    /// Create a new Nominatim client
115    pub fn new() -> Result<Self> {
116        let client = Client::builder()
117            .timeout(Duration::from_secs(30))
118            .user_agent(USER_AGENT)
119            .build()
120            .map_err(FrameworkError::Network)?;
121
122        Ok(Self {
123            client,
124            base_url: "https://nominatim.openstreetmap.org".to_string(),
125            rate_limit_delay: Duration::from_millis(NOMINATIM_RATE_LIMIT_MS),
126            embedder: Arc::new(SimpleEmbedder::new(256)),
127            last_request: Arc::new(Mutex::new(None)),
128        })
129    }
130
131    /// Enforce STRICT rate limiting (1 request/second)
132    async fn enforce_rate_limit(&self) {
133        let mut last = self.last_request.lock().await;
134
135        if let Some(last_time) = *last {
136            let elapsed = last_time.elapsed();
137            if elapsed < self.rate_limit_delay {
138                let wait_time = self.rate_limit_delay - elapsed;
139                sleep(wait_time).await;
140            }
141        }
142
143        *last = Some(Instant::now());
144    }
145
146    /// Geocode an address to coordinates
147    ///
148    /// # Arguments
149    /// * `address` - Address string (e.g., "1600 Pennsylvania Avenue, Washington DC")
150    ///
151    /// # Example
152    /// ```rust,ignore
153    /// let coords = client.geocode("Eiffel Tower, Paris").await?;
154    /// ```
155    pub async fn geocode(&self, address: &str) -> Result<Vec<SemanticVector>> {
156        self.enforce_rate_limit().await;
157
158        let url = format!(
159            "{}/search?q={}&format=json&addressdetails=1&limit=1",
160            self.base_url,
161            urlencoding::encode(address)
162        );
163
164        let response = self.fetch_with_retry(&url).await?;
165        let places: Vec<NominatimPlace> = response.json().await?;
166
167        self.convert_places(places)
168    }
169
170    /// Reverse geocode coordinates to address
171    ///
172    /// # Arguments
173    /// * `lat` - Latitude
174    /// * `lon` - Longitude
175    ///
176    /// # Example
177    /// ```rust,ignore
178    /// let address = client.reverse_geocode(48.8584, 2.2945).await?;
179    /// ```
180    pub async fn reverse_geocode(&self, lat: f64, lon: f64) -> Result<Vec<SemanticVector>> {
181        self.enforce_rate_limit().await;
182
183        let url = format!(
184            "{}/reverse?lat={}&lon={}&format=json&addressdetails=1",
185            self.base_url, lat, lon
186        );
187
188        let response = self.fetch_with_retry(&url).await?;
189        let place: NominatimPlace = response.json().await?;
190
191        self.convert_places(vec![place])
192    }
193
194    /// Search for places by name
195    ///
196    /// # Arguments
197    /// * `query` - Search query (e.g., "Central Park")
198    /// * `limit` - Maximum number of results (max 50)
199    ///
200    /// # Example
201    /// ```rust,ignore
202    /// let places = client.search("Times Square", 5).await?;
203    /// ```
204    pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SemanticVector>> {
205        self.enforce_rate_limit().await;
206
207        let limit = limit.min(50); // Nominatim max is 50
208        let url = format!(
209            "{}/search?q={}&format=json&addressdetails=1&limit={}",
210            self.base_url,
211            urlencoding::encode(query),
212            limit
213        );
214
215        let response = self.fetch_with_retry(&url).await?;
216        let places: Vec<NominatimPlace> = response.json().await?;
217
218        self.convert_places(places)
219    }
220
221    /// Convert Nominatim places to SemanticVectors
222    fn convert_places(&self, places: Vec<NominatimPlace>) -> Result<Vec<SemanticVector>> {
223        let mut vectors = Vec::new();
224
225        for place in places {
226            let lat = place.lat.parse::<f64>().unwrap_or(0.0);
227            let lon = place.lon.parse::<f64>().unwrap_or(0.0);
228
229            // Build address string
230            let address_str = if let Some(addr) = &place.address {
231                format!(
232                    "{}, {}, {}, {}",
233                    addr.road.as_deref().unwrap_or(""),
234                    addr.city.as_deref().unwrap_or(""),
235                    addr.state.as_deref().unwrap_or(""),
236                    addr.country.as_deref().unwrap_or("")
237                )
238            } else {
239                place.display_name.clone()
240            };
241
242            // Create text for embedding
243            let text = format!(
244                "{} at lat: {}, lon: {} - {} (OSM type: {})",
245                place.display_name, lat, lon, address_str, place.osm_type
246            );
247            let embedding = self.embedder.embed_text(&text);
248
249            let mut metadata = HashMap::new();
250            metadata.insert("place_id".to_string(), place.place_id.to_string());
251            metadata.insert("osm_type".to_string(), place.osm_type.clone());
252            metadata.insert("osm_id".to_string(), place.osm_id.to_string());
253            metadata.insert("latitude".to_string(), lat.to_string());
254            metadata.insert("longitude".to_string(), lon.to_string());
255            metadata.insert("display_name".to_string(), place.display_name.clone());
256            metadata.insert("place_type".to_string(), place.r#type.clone());
257            metadata.insert("importance".to_string(), place.importance.to_string());
258
259            if let Some(addr) = &place.address {
260                if let Some(city) = &addr.city {
261                    metadata.insert("city".to_string(), city.clone());
262                }
263                if let Some(country) = &addr.country {
264                    metadata.insert("country".to_string(), country.clone());
265                }
266                if let Some(country_code) = &addr.country_code {
267                    metadata.insert("country_code".to_string(), country_code.clone());
268                }
269            }
270            metadata.insert("source".to_string(), "nominatim".to_string());
271
272            vectors.push(SemanticVector {
273                id: format!("NOMINATIM:{}:{}", place.osm_type, place.osm_id),
274                embedding,
275                domain: Domain::CrossDomain, // Geographic data spans multiple domains
276                timestamp: Utc::now(),
277                metadata,
278            });
279        }
280
281        Ok(vectors)
282    }
283
284    /// Fetch with retry logic
285    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
286        let mut retries = 0;
287        loop {
288            match self.client.get(url).send().await {
289                Ok(response) => {
290                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
291                        retries += 1;
292                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
293                        continue;
294                    }
295                    return Ok(response);
296                }
297                Err(_) if retries < MAX_RETRIES => {
298                    retries += 1;
299                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
300                }
301                Err(e) => return Err(FrameworkError::Network(e)),
302            }
303        }
304    }
305}
306
307impl Default for NominatimClient {
308    fn default() -> Self {
309        Self::new().expect("Failed to create Nominatim client")
310    }
311}
312
313// ============================================================================
314// Overpass API Client (OSM Data Queries)
315// ============================================================================
316
317/// Overpass API response element
318#[derive(Debug, Deserialize)]
319struct OverpassElement {
320    #[serde(default)]
321    r#type: String,
322    #[serde(default)]
323    id: u64,
324    #[serde(default)]
325    lat: Option<f64>,
326    #[serde(default)]
327    lon: Option<f64>,
328    #[serde(default)]
329    tags: HashMap<String, String>,
330    #[serde(default)]
331    center: Option<OverpassCenter>,
332}
333
334#[derive(Debug, Deserialize)]
335struct OverpassCenter {
336    lat: f64,
337    lon: f64,
338}
339
340/// Overpass API response
341#[derive(Debug, Deserialize)]
342struct OverpassResponse {
343    #[serde(default)]
344    elements: Vec<OverpassElement>,
345}
346
347/// Client for Overpass API (OSM Data Queries)
348///
349/// Provides access to:
350/// - Custom Overpass QL queries
351/// - Nearby POI (Points of Interest) search
352/// - Road network extraction
353/// - OSM tag-based queries
354///
355/// # Example
356/// ```rust,ignore
357/// use ruvector_data_framework::OverpassClient;
358///
359/// let client = OverpassClient::new()?;
360/// let pois = client.get_nearby_pois(48.8584, 2.2945, 500.0, "restaurant").await?;
361/// let roads = client.get_roads(48.85, 2.29, 48.86, 2.30).await?;
362/// ```
363pub struct OverpassClient {
364    client: Client,
365    base_url: String,
366    rate_limit_delay: Duration,
367    embedder: Arc<SimpleEmbedder>,
368}
369
370impl OverpassClient {
371    /// Create a new Overpass API client
372    pub fn new() -> Result<Self> {
373        let client = Client::builder()
374            .timeout(Duration::from_secs(60)) // Overpass can be slow
375            .user_agent(USER_AGENT)
376            .build()
377            .map_err(FrameworkError::Network)?;
378
379        Ok(Self {
380            client,
381            base_url: "https://overpass-api.de/api/interpreter".to_string(),
382            rate_limit_delay: Duration::from_millis(OVERPASS_RATE_LIMIT_MS),
383            embedder: Arc::new(SimpleEmbedder::new(256)),
384        })
385    }
386
387    /// Execute a custom Overpass QL query
388    ///
389    /// # Arguments
390    /// * `query` - Overpass QL query string
391    ///
392    /// # Example
393    /// ```rust,ignore
394    /// let query = r#"
395    ///     [out:json];
396    ///     node["amenity"="cafe"](around:1000,48.8584,2.2945);
397    ///     out;
398    /// "#;
399    /// let results = client.query(query).await?;
400    /// ```
401    pub async fn query(&self, query: &str) -> Result<Vec<SemanticVector>> {
402        sleep(self.rate_limit_delay).await;
403
404        let response = self.client
405            .post(&self.base_url)
406            .body(query.to_string())
407            .send()
408            .await?;
409
410        let overpass_response: OverpassResponse = response.json().await?;
411        self.convert_elements(overpass_response.elements)
412    }
413
414    /// Get nearby POIs (Points of Interest)
415    ///
416    /// # Arguments
417    /// * `lat` - Center latitude
418    /// * `lon` - Center longitude
419    /// * `radius` - Search radius in meters
420    /// * `amenity_type` - OSM amenity type (e.g., "restaurant", "cafe", "hospital")
421    ///
422    /// # Example
423    /// ```rust,ignore
424    /// let cafes = client.get_nearby_pois(48.8584, 2.2945, 1000.0, "cafe").await?;
425    /// ```
426    pub async fn get_nearby_pois(
427        &self,
428        lat: f64,
429        lon: f64,
430        radius: f64,
431        amenity_type: &str,
432    ) -> Result<Vec<SemanticVector>> {
433        let query = format!(
434            r#"[out:json];node["amenity"="{}"](around:{},{},{});out;"#,
435            amenity_type, radius, lat, lon
436        );
437
438        self.query(&query).await
439    }
440
441    /// Get road network in a bounding box
442    ///
443    /// # Arguments
444    /// * `south` - Southern latitude
445    /// * `west` - Western longitude
446    /// * `north` - Northern latitude
447    /// * `east` - Eastern longitude
448    ///
449    /// # Example
450    /// ```rust,ignore
451    /// let roads = client.get_roads(48.85, 2.29, 48.86, 2.30).await?;
452    /// ```
453    pub async fn get_roads(
454        &self,
455        south: f64,
456        west: f64,
457        north: f64,
458        east: f64,
459    ) -> Result<Vec<SemanticVector>> {
460        let query = format!(
461            r#"[out:json];way["highway"]({},{},{},{});out geom;"#,
462            south, west, north, east
463        );
464
465        self.query(&query).await
466    }
467
468    /// Convert Overpass elements to SemanticVectors
469    fn convert_elements(&self, elements: Vec<OverpassElement>) -> Result<Vec<SemanticVector>> {
470        let mut vectors = Vec::new();
471
472        for element in elements {
473            // Get coordinates (from element or center)
474            let (lat, lon) = if let (Some(lat), Some(lon)) = (element.lat, element.lon) {
475                (lat, lon)
476            } else if let Some(center) = element.center {
477                (center.lat, center.lon)
478            } else {
479                continue; // Skip elements without coordinates
480            };
481
482            // Extract name and tags
483            let name = element.tags.get("name").cloned().unwrap_or_else(|| {
484                format!("OSM {} {}", element.r#type, element.id)
485            });
486
487            let amenity = element.tags.get("amenity").cloned().unwrap_or_default();
488            let highway = element.tags.get("highway").cloned().unwrap_or_default();
489
490            // Create text for embedding
491            let text = format!(
492                "{} at lat: {}, lon: {} - amenity: {}, highway: {}, tags: {:?}",
493                name, lat, lon, amenity, highway, element.tags
494            );
495            let embedding = self.embedder.embed_text(&text);
496
497            let mut metadata = HashMap::new();
498            metadata.insert("osm_id".to_string(), element.id.to_string());
499            metadata.insert("osm_type".to_string(), element.r#type.clone());
500            metadata.insert("latitude".to_string(), lat.to_string());
501            metadata.insert("longitude".to_string(), lon.to_string());
502            metadata.insert("name".to_string(), name);
503
504            if !amenity.is_empty() {
505                metadata.insert("amenity".to_string(), amenity);
506            }
507            if !highway.is_empty() {
508                metadata.insert("highway".to_string(), highway);
509            }
510
511            // Add all OSM tags
512            for (key, value) in element.tags {
513                metadata.insert(format!("osm_tag_{}", key), value);
514            }
515            metadata.insert("source".to_string(), "overpass".to_string());
516
517            vectors.push(SemanticVector {
518                id: format!("OVERPASS:{}:{}", element.r#type, element.id),
519                embedding,
520                domain: Domain::CrossDomain,
521                timestamp: Utc::now(),
522                metadata,
523            });
524        }
525
526        Ok(vectors)
527    }
528}
529
530impl Default for OverpassClient {
531    fn default() -> Self {
532        Self::new().expect("Failed to create Overpass client")
533    }
534}
535
536// ============================================================================
537// GeoNames Client
538// ============================================================================
539
540/// GeoNames search result
541#[derive(Debug, Deserialize)]
542struct GeoNamesSearchResult {
543    #[serde(default)]
544    geonames: Vec<GeoName>,
545}
546
547#[derive(Debug, Deserialize)]
548struct GeoName {
549    #[serde(default)]
550    geonameId: u64,
551    #[serde(default)]
552    name: String,
553    #[serde(default)]
554    lat: String,
555    #[serde(default)]
556    lng: String,
557    #[serde(default)]
558    countryCode: String,
559    #[serde(default)]
560    countryName: String,
561    #[serde(default)]
562    fcl: String, // feature class
563    #[serde(default)]
564    fcode: String, // feature code
565    #[serde(default)]
566    population: u64,
567    #[serde(default)]
568    adminName1: String, // state/province
569    #[serde(default)]
570    toponymName: String,
571}
572
573/// GeoNames timezone result
574#[derive(Debug, Deserialize)]
575struct GeoNamesTimezone {
576    #[serde(default)]
577    timezoneId: String,
578    #[serde(default)]
579    countryCode: String,
580    #[serde(default)]
581    lat: f64,
582    #[serde(default)]
583    lng: f64,
584}
585
586/// GeoNames country info
587#[derive(Debug, Deserialize)]
588struct GeoNamesCountryInfo {
589    #[serde(default)]
590    geonames: Vec<GeoNamesCountry>,
591}
592
593#[derive(Debug, Deserialize)]
594struct GeoNamesCountry {
595    #[serde(default)]
596    countryCode: String,
597    #[serde(default)]
598    countryName: String,
599    #[serde(default)]
600    capital: String,
601    #[serde(default)]
602    population: u64,
603    #[serde(default)]
604    areaInSqKm: String,
605    #[serde(default)]
606    continent: String,
607}
608
609/// Client for GeoNames
610///
611/// Provides access to:
612/// - Place name search
613/// - Nearby places lookup
614/// - Timezone information
615/// - Country details
616///
617/// **Note**: Requires username (set GEONAMES_USERNAME env var)
618/// Free tier: 2000 requests/hour, 30000/day
619///
620/// # Example
621/// ```rust,ignore
622/// use ruvector_data_framework::GeonamesClient;
623///
624/// let client = GeonamesClient::new("your_username".to_string())?;
625/// let places = client.search("Paris", 10).await?;
626/// let nearby = client.get_nearby(48.8566, 2.3522).await?;
627/// let tz = client.get_timezone(40.7128, -74.0060).await?;
628/// ```
629pub struct GeonamesClient {
630    client: Client,
631    base_url: String,
632    username: String,
633    rate_limit_delay: Duration,
634    embedder: Arc<SimpleEmbedder>,
635}
636
637impl GeonamesClient {
638    /// Create a new GeoNames client
639    ///
640    /// # Arguments
641    /// * `username` - GeoNames username (register at geonames.org)
642    pub fn new(username: String) -> Result<Self> {
643        let client = Client::builder()
644            .timeout(Duration::from_secs(30))
645            .build()
646            .map_err(FrameworkError::Network)?;
647
648        Ok(Self {
649            client,
650            base_url: "http://api.geonames.org".to_string(),
651            username,
652            rate_limit_delay: Duration::from_millis(GEONAMES_RATE_LIMIT_MS),
653            embedder: Arc::new(SimpleEmbedder::new(256)),
654        })
655    }
656
657    /// Search for places by name
658    ///
659    /// # Arguments
660    /// * `query` - Place name to search
661    /// * `limit` - Maximum number of results
662    ///
663    /// # Example
664    /// ```rust,ignore
665    /// let results = client.search("New York", 10).await?;
666    /// ```
667    pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SemanticVector>> {
668        sleep(self.rate_limit_delay).await;
669
670        let url = format!(
671            "{}/searchJSON?q={}&maxRows={}&username={}",
672            self.base_url,
673            urlencoding::encode(query),
674            limit,
675            self.username
676        );
677
678        let response = self.fetch_with_retry(&url).await?;
679        let result: GeoNamesSearchResult = response.json().await?;
680
681        self.convert_geonames(result.geonames)
682    }
683
684    /// Get nearby places
685    ///
686    /// # Arguments
687    /// * `lat` - Latitude
688    /// * `lon` - Longitude
689    ///
690    /// # Example
691    /// ```rust,ignore
692    /// let nearby = client.get_nearby(40.7128, -74.0060).await?;
693    /// ```
694    pub async fn get_nearby(&self, lat: f64, lon: f64) -> Result<Vec<SemanticVector>> {
695        sleep(self.rate_limit_delay).await;
696
697        let url = format!(
698            "{}/findNearbyJSON?lat={}&lng={}&username={}",
699            self.base_url, lat, lon, self.username
700        );
701
702        let response = self.fetch_with_retry(&url).await?;
703        let result: GeoNamesSearchResult = response.json().await?;
704
705        self.convert_geonames(result.geonames)
706    }
707
708    /// Get timezone for coordinates
709    ///
710    /// # Arguments
711    /// * `lat` - Latitude
712    /// * `lon` - Longitude
713    ///
714    /// # Example
715    /// ```rust,ignore
716    /// let tz = client.get_timezone(51.5074, -0.1278).await?;
717    /// ```
718    pub async fn get_timezone(&self, lat: f64, lon: f64) -> Result<Vec<SemanticVector>> {
719        sleep(self.rate_limit_delay).await;
720
721        let url = format!(
722            "{}/timezoneJSON?lat={}&lng={}&username={}",
723            self.base_url, lat, lon, self.username
724        );
725
726        let response = self.fetch_with_retry(&url).await?;
727        let tz: GeoNamesTimezone = response.json().await?;
728
729        let text = format!(
730            "Timezone {} for coordinates ({}, {}), country: {}",
731            tz.timezoneId, lat, lon, tz.countryCode
732        );
733        let embedding = self.embedder.embed_text(&text);
734
735        let mut metadata = HashMap::new();
736        metadata.insert("timezone_id".to_string(), tz.timezoneId.clone());
737        metadata.insert("country_code".to_string(), tz.countryCode);
738        metadata.insert("latitude".to_string(), lat.to_string());
739        metadata.insert("longitude".to_string(), lon.to_string());
740        metadata.insert("source".to_string(), "geonames".to_string());
741
742        Ok(vec![SemanticVector {
743            id: format!("GEONAMES:TZ:{}", tz.timezoneId),
744            embedding,
745            domain: Domain::CrossDomain,
746            timestamp: Utc::now(),
747            metadata,
748        }])
749    }
750
751    /// Get country information
752    ///
753    /// # Arguments
754    /// * `country_code` - ISO 2-letter country code (e.g., "US", "FR")
755    ///
756    /// # Example
757    /// ```rust,ignore
758    /// let info = client.get_country_info("US").await?;
759    /// ```
760    pub async fn get_country_info(&self, country_code: &str) -> Result<Vec<SemanticVector>> {
761        sleep(self.rate_limit_delay).await;
762
763        let url = format!(
764            "{}/countryInfoJSON?country={}&username={}",
765            self.base_url, country_code, self.username
766        );
767
768        let response = self.fetch_with_retry(&url).await?;
769        let result: GeoNamesCountryInfo = response.json().await?;
770
771        let mut vectors = Vec::new();
772        for country in result.geonames {
773            let text = format!(
774                "{} ({}) - Capital: {}, Population: {}, Area: {} sq km, Continent: {}",
775                country.countryName,
776                country.countryCode,
777                country.capital,
778                country.population,
779                country.areaInSqKm,
780                country.continent
781            );
782            let embedding = self.embedder.embed_text(&text);
783
784            let mut metadata = HashMap::new();
785            metadata.insert("country_code".to_string(), country.countryCode.clone());
786            metadata.insert("country_name".to_string(), country.countryName);
787            metadata.insert("capital".to_string(), country.capital);
788            metadata.insert("population".to_string(), country.population.to_string());
789            metadata.insert("area_sq_km".to_string(), country.areaInSqKm);
790            metadata.insert("continent".to_string(), country.continent);
791            metadata.insert("source".to_string(), "geonames".to_string());
792
793            vectors.push(SemanticVector {
794                id: format!("GEONAMES:COUNTRY:{}", country.countryCode),
795                embedding,
796                domain: Domain::CrossDomain,
797                timestamp: Utc::now(),
798                metadata,
799            });
800        }
801
802        Ok(vectors)
803    }
804
805    /// Convert GeoNames results to SemanticVectors
806    fn convert_geonames(&self, geonames: Vec<GeoName>) -> Result<Vec<SemanticVector>> {
807        let mut vectors = Vec::new();
808
809        for place in geonames {
810            let lat = place.lat.parse::<f64>().unwrap_or(0.0);
811            let lon = place.lng.parse::<f64>().unwrap_or(0.0);
812
813            let text = format!(
814                "{} ({}) in {}, {} - lat: {}, lon: {}, population: {}",
815                place.name,
816                place.toponymName,
817                place.adminName1,
818                place.countryName,
819                lat,
820                lon,
821                place.population
822            );
823            let embedding = self.embedder.embed_text(&text);
824
825            let mut metadata = HashMap::new();
826            metadata.insert("geoname_id".to_string(), place.geonameId.to_string());
827            metadata.insert("name".to_string(), place.name);
828            metadata.insert("toponym_name".to_string(), place.toponymName);
829            metadata.insert("latitude".to_string(), lat.to_string());
830            metadata.insert("longitude".to_string(), lon.to_string());
831            metadata.insert("country_code".to_string(), place.countryCode);
832            metadata.insert("country_name".to_string(), place.countryName);
833            metadata.insert("admin_name1".to_string(), place.adminName1);
834            metadata.insert("feature_class".to_string(), place.fcl);
835            metadata.insert("feature_code".to_string(), place.fcode);
836            metadata.insert("population".to_string(), place.population.to_string());
837            metadata.insert("source".to_string(), "geonames".to_string());
838
839            vectors.push(SemanticVector {
840                id: format!("GEONAMES:{}", place.geonameId),
841                embedding,
842                domain: Domain::CrossDomain,
843                timestamp: Utc::now(),
844                metadata,
845            });
846        }
847
848        Ok(vectors)
849    }
850
851    /// Fetch with retry logic
852    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
853        let mut retries = 0;
854        loop {
855            match self.client.get(url).send().await {
856                Ok(response) => {
857                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
858                        retries += 1;
859                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
860                        continue;
861                    }
862                    return Ok(response);
863                }
864                Err(_) if retries < MAX_RETRIES => {
865                    retries += 1;
866                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
867                }
868                Err(e) => return Err(FrameworkError::Network(e)),
869            }
870        }
871    }
872}
873
874// ============================================================================
875// Open Elevation Client
876// ============================================================================
877
878/// Open Elevation result
879#[derive(Debug, Deserialize)]
880struct OpenElevationResponse {
881    #[serde(default)]
882    results: Vec<ElevationPoint>,
883}
884
885#[derive(Debug, Deserialize, Serialize)]
886struct ElevationPoint {
887    latitude: f64,
888    longitude: f64,
889    elevation: f64,
890}
891
892/// Request for batch elevation lookup
893#[derive(Debug, Serialize)]
894struct ElevationRequest {
895    locations: Vec<ElevationLocation>,
896}
897
898#[derive(Debug, Serialize)]
899struct ElevationLocation {
900    latitude: f64,
901    longitude: f64,
902}
903
904/// Client for Open Elevation API
905///
906/// Provides access to:
907/// - Single point elevation lookup
908/// - Batch elevation lookups
909/// - Worldwide coverage using SRTM data
910///
911/// No authentication required. Free and open service.
912///
913/// # Example
914/// ```rust,ignore
915/// use ruvector_data_framework::OpenElevationClient;
916///
917/// let client = OpenElevationClient::new()?;
918/// let elevation = client.get_elevation(46.9480, 7.4474).await?; // Mt. Everest base
919/// let elevations = client.get_elevations(vec![(40.7128, -74.0060), (48.8566, 2.3522)]).await?;
920/// ```
921pub struct OpenElevationClient {
922    client: Client,
923    base_url: String,
924    rate_limit_delay: Duration,
925    embedder: Arc<SimpleEmbedder>,
926}
927
928impl OpenElevationClient {
929    /// Create a new Open Elevation client
930    pub fn new() -> Result<Self> {
931        let client = Client::builder()
932            .timeout(Duration::from_secs(30))
933            .build()
934            .map_err(FrameworkError::Network)?;
935
936        Ok(Self {
937            client,
938            base_url: "https://api.open-elevation.com/api/v1".to_string(),
939            rate_limit_delay: Duration::from_millis(OPEN_ELEVATION_RATE_LIMIT_MS),
940            embedder: Arc::new(SimpleEmbedder::new(256)),
941        })
942    }
943
944    /// Get elevation for a single point
945    ///
946    /// # Arguments
947    /// * `lat` - Latitude
948    /// * `lon` - Longitude
949    ///
950    /// # Example
951    /// ```rust,ignore
952    /// let elevation = client.get_elevation(27.9881, 86.9250).await?; // Mt. Everest
953    /// ```
954    pub async fn get_elevation(&self, lat: f64, lon: f64) -> Result<Vec<SemanticVector>> {
955        self.get_elevations(vec![(lat, lon)]).await
956    }
957
958    /// Get elevations for multiple points
959    ///
960    /// # Arguments
961    /// * `locations` - Vec of (latitude, longitude) tuples
962    ///
963    /// # Example
964    /// ```rust,ignore
965    /// let elevations = client.get_elevations(vec![
966    ///     (40.7128, -74.0060), // NYC
967    ///     (48.8566, 2.3522),   // Paris
968    /// ]).await?;
969    /// ```
970    pub async fn get_elevations(&self, locations: Vec<(f64, f64)>) -> Result<Vec<SemanticVector>> {
971        sleep(self.rate_limit_delay).await;
972
973        let request = ElevationRequest {
974            locations: locations
975                .iter()
976                .map(|(lat, lon)| ElevationLocation {
977                    latitude: *lat,
978                    longitude: *lon,
979                })
980                .collect(),
981        };
982
983        let url = format!("{}/lookup", self.base_url);
984
985        let response = self.client
986            .post(&url)
987            .json(&request)
988            .send()
989            .await?;
990
991        let elevation_response: OpenElevationResponse = response.json().await?;
992        self.convert_elevations(elevation_response.results)
993    }
994
995    /// Convert elevation points to SemanticVectors
996    fn convert_elevations(&self, points: Vec<ElevationPoint>) -> Result<Vec<SemanticVector>> {
997        let mut vectors = Vec::new();
998
999        for point in points {
1000            let text = format!(
1001                "Elevation {} meters at lat: {}, lon: {}",
1002                point.elevation, point.latitude, point.longitude
1003            );
1004            let embedding = self.embedder.embed_text(&text);
1005
1006            let mut metadata = HashMap::new();
1007            metadata.insert("latitude".to_string(), point.latitude.to_string());
1008            metadata.insert("longitude".to_string(), point.longitude.to_string());
1009            metadata.insert("elevation_m".to_string(), point.elevation.to_string());
1010            metadata.insert("source".to_string(), "open_elevation".to_string());
1011
1012            vectors.push(SemanticVector {
1013                id: format!("ELEVATION:{}:{}", point.latitude, point.longitude),
1014                embedding,
1015                domain: Domain::CrossDomain,
1016                timestamp: Utc::now(),
1017                metadata,
1018            });
1019        }
1020
1021        Ok(vectors)
1022    }
1023}
1024
1025impl Default for OpenElevationClient {
1026    fn default() -> Self {
1027        Self::new().expect("Failed to create OpenElevation client")
1028    }
1029}
1030
1031// ============================================================================
1032// Tests
1033// ============================================================================
1034
1035#[cfg(test)]
1036mod tests {
1037    use super::*;
1038
1039    #[tokio::test]
1040    async fn test_nominatim_client_creation() {
1041        let client = NominatimClient::new();
1042        assert!(client.is_ok());
1043        let client = client.unwrap();
1044        assert_eq!(client.rate_limit_delay, Duration::from_millis(NOMINATIM_RATE_LIMIT_MS));
1045    }
1046
1047    #[tokio::test]
1048    async fn test_nominatim_rate_limiting() {
1049        let client = NominatimClient::new().unwrap();
1050
1051        // First request should be immediate
1052        let start = Instant::now();
1053        client.enforce_rate_limit().await;
1054        let first_elapsed = start.elapsed();
1055        assert!(first_elapsed < Duration::from_millis(100));
1056
1057        // Second request should be delayed
1058        let start = Instant::now();
1059        client.enforce_rate_limit().await;
1060        let second_elapsed = start.elapsed();
1061        assert!(second_elapsed >= Duration::from_millis(900)); // Allow some tolerance
1062    }
1063
1064    #[tokio::test]
1065    async fn test_overpass_client_creation() {
1066        let client = OverpassClient::new();
1067        assert!(client.is_ok());
1068    }
1069
1070    #[tokio::test]
1071    async fn test_geonames_client_creation() {
1072        let client = GeonamesClient::new("test_user".to_string());
1073        assert!(client.is_ok());
1074    }
1075
1076    #[tokio::test]
1077    async fn test_open_elevation_client_creation() {
1078        let client = OpenElevationClient::new();
1079        assert!(client.is_ok());
1080    }
1081
1082    #[test]
1083    fn test_nominatim_place_conversion() {
1084        let client = NominatimClient::new().unwrap();
1085
1086        let places = vec![NominatimPlace {
1087            place_id: 12345,
1088            licence: "ODbL".to_string(),
1089            osm_type: "way".to_string(),
1090            osm_id: 67890,
1091            lat: "48.8584".to_string(),
1092            lon: "2.2945".to_string(),
1093            display_name: "Eiffel Tower, Paris, France".to_string(),
1094            r#type: "attraction".to_string(),
1095            importance: 0.9,
1096            address: Some(NominatimAddress {
1097                house_number: None,
1098                road: Some("Champ de Mars".to_string()),
1099                city: Some("Paris".to_string()),
1100                state: Some("Île-de-France".to_string()),
1101                postcode: Some("75007".to_string()),
1102                country: Some("France".to_string()),
1103                country_code: Some("fr".to_string()),
1104            }),
1105            geojson: None,
1106        }];
1107
1108        let vectors = client.convert_places(places).unwrap();
1109        assert_eq!(vectors.len(), 1);
1110
1111        let vec = &vectors[0];
1112        assert_eq!(vec.id, "NOMINATIM:way:67890");
1113        assert_eq!(vec.metadata.get("city").unwrap(), "Paris");
1114        assert_eq!(vec.metadata.get("country").unwrap(), "France");
1115        assert_eq!(vec.domain, Domain::CrossDomain);
1116    }
1117
1118    #[test]
1119    fn test_overpass_element_conversion() {
1120        let client = OverpassClient::new().unwrap();
1121
1122        let mut tags = HashMap::new();
1123        tags.insert("name".to_string(), "Central Park".to_string());
1124        tags.insert("amenity".to_string(), "park".to_string());
1125
1126        let elements = vec![OverpassElement {
1127            r#type: "node".to_string(),
1128            id: 123456,
1129            lat: Some(40.7829),
1130            lon: Some(-73.9654),
1131            tags,
1132            center: None,
1133        }];
1134
1135        let vectors = client.convert_elements(elements).unwrap();
1136        assert_eq!(vectors.len(), 1);
1137
1138        let vec = &vectors[0];
1139        assert_eq!(vec.id, "OVERPASS:node:123456");
1140        assert_eq!(vec.metadata.get("name").unwrap(), "Central Park");
1141        assert_eq!(vec.metadata.get("amenity").unwrap(), "park");
1142    }
1143
1144    #[test]
1145    fn test_geonames_conversion() {
1146        let client = GeonamesClient::new("test".to_string()).unwrap();
1147
1148        let geonames = vec![GeoName {
1149            geonameId: 2988507,
1150            name: "Paris".to_string(),
1151            lat: "48.85341".to_string(),
1152            lng: "2.3488".to_string(),
1153            countryCode: "FR".to_string(),
1154            countryName: "France".to_string(),
1155            fcl: "P".to_string(),
1156            fcode: "PPLC".to_string(),
1157            population: 2138551,
1158            adminName1: "Île-de-France".to_string(),
1159            toponymName: "Paris".to_string(),
1160        }];
1161
1162        let vectors = client.convert_geonames(geonames).unwrap();
1163        assert_eq!(vectors.len(), 1);
1164
1165        let vec = &vectors[0];
1166        assert_eq!(vec.id, "GEONAMES:2988507");
1167        assert_eq!(vec.metadata.get("name").unwrap(), "Paris");
1168        assert_eq!(vec.metadata.get("country_code").unwrap(), "FR");
1169        assert_eq!(vec.metadata.get("population").unwrap(), "2138551");
1170    }
1171
1172    #[test]
1173    fn test_elevation_conversion() {
1174        let client = OpenElevationClient::new().unwrap();
1175
1176        let points = vec![
1177            ElevationPoint {
1178                latitude: 27.9881,
1179                longitude: 86.9250,
1180                elevation: 8848.86,
1181            },
1182            ElevationPoint {
1183                latitude: 40.7128,
1184                longitude: -74.0060,
1185                elevation: 10.0,
1186            },
1187        ];
1188
1189        let vectors = client.convert_elevations(points).unwrap();
1190        assert_eq!(vectors.len(), 2);
1191
1192        assert_eq!(vectors[0].metadata.get("elevation_m").unwrap(), "8848.86");
1193        assert_eq!(vectors[1].metadata.get("elevation_m").unwrap(), "10");
1194    }
1195
1196    #[test]
1197    fn test_rate_limits() {
1198        assert_eq!(NOMINATIM_RATE_LIMIT_MS, 1000); // 1/sec
1199        assert!(OVERPASS_RATE_LIMIT_MS <= 500); // At least 2/sec
1200        assert!(GEONAMES_RATE_LIMIT_MS >= 1800); // Conservative for free tier
1201        assert!(OPEN_ELEVATION_RATE_LIMIT_MS <= 200); // At least 5/sec
1202    }
1203
1204    #[test]
1205    fn test_user_agent_constant() {
1206        assert!(USER_AGENT.contains("RuVector"));
1207        assert!(USER_AGENT.contains("github"));
1208    }
1209
1210    #[test]
1211    fn test_geo_utils_integration() {
1212        // Test GeoUtils distance calculation (from physics_clients)
1213        let paris_lat = 48.8566;
1214        let paris_lon = 2.3522;
1215        let london_lat = 51.5074;
1216        let london_lon = -0.1278;
1217
1218        let distance = GeoUtils::distance_km(paris_lat, paris_lon, london_lat, london_lon);
1219
1220        // Paris to London is approximately 344 km
1221        assert!((distance - 344.0).abs() < 50.0);
1222    }
1223
1224    #[test]
1225    fn test_geo_utils_within_radius() {
1226        let center_lat = 48.8566;
1227        let center_lon = 2.3522;
1228
1229        // Eiffel Tower is about 2.5km from center of Paris
1230        let eiffel_lat = 48.8584;
1231        let eiffel_lon = 2.2945;
1232
1233        assert!(GeoUtils::within_radius(
1234            center_lat,
1235            center_lon,
1236            eiffel_lat,
1237            eiffel_lon,
1238            5.0
1239        ));
1240
1241        assert!(!GeoUtils::within_radius(
1242            center_lat,
1243            center_lon,
1244            eiffel_lat,
1245            eiffel_lon,
1246            1.0
1247        ));
1248    }
1249}