ruvector_data_framework/
physics_clients.rs

1//! Physics, seismic, and ocean data API integrations
2//!
3//! This module provides async clients for:
4//! - USGS Earthquake Hazards Program
5//! - CERN Open Data Portal
6//! - Argo Float Ocean Data
7//! - Materials Project
8//!
9//! All responses are converted to SemanticVector format for RuVector discovery.
10
11use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::Duration;
14
15use chrono::{DateTime, NaiveDateTime, Utc};
16use reqwest::{Client, StatusCode};
17use serde::Deserialize;
18use tokio::time::sleep;
19
20use crate::api_clients::SimpleEmbedder;
21use crate::ruvector_native::{Domain, SemanticVector};
22use crate::{FrameworkError, Result};
23
24/// Rate limiting configuration
25const USGS_RATE_LIMIT_MS: u64 = 200; // ~5 requests/second
26const CERN_RATE_LIMIT_MS: u64 = 500; // Conservative rate
27const ARGO_RATE_LIMIT_MS: u64 = 300; // ~3 requests/second
28const MATERIALS_PROJECT_RATE_LIMIT_MS: u64 = 1000; // 1 request/second for free tier
29const MAX_RETRIES: u32 = 3;
30const RETRY_DELAY_MS: u64 = 1000;
31
32// ============================================================================
33// Geographic Coordinate Utilities
34// ============================================================================
35
36/// Geographic coordinate utilities for region-based searches
37pub struct GeoUtils;
38
39impl GeoUtils {
40    /// Calculate approximate distance between two lat/lon points (Haversine formula)
41    /// Returns distance in kilometers
42    pub fn distance_km(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
43        let r = 6371.0; // Earth radius in km
44        let dlat = (lat2 - lat1).to_radians();
45        let dlon = (lon2 - lon1).to_radians();
46        let a = (dlat / 2.0).sin().powi(2)
47            + lat1.to_radians().cos() * lat2.to_radians().cos() * (dlon / 2.0).sin().powi(2);
48        let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
49        r * c
50    }
51
52    /// Check if a point is within a radius of a center point
53    pub fn within_radius(
54        center_lat: f64,
55        center_lon: f64,
56        point_lat: f64,
57        point_lon: f64,
58        radius_km: f64,
59    ) -> bool {
60        Self::distance_km(center_lat, center_lon, point_lat, point_lon) <= radius_km
61    }
62}
63
64// ============================================================================
65// USGS Earthquake Hazards Program Client
66// ============================================================================
67
68/// USGS GeoJSON response format
69#[derive(Debug, Deserialize)]
70struct UsgsGeoJsonResponse {
71    #[serde(default)]
72    features: Vec<UsgsEarthquakeFeature>,
73    #[serde(default)]
74    metadata: UsgsMetadata,
75}
76
77#[derive(Debug, Deserialize, Default)]
78struct UsgsMetadata {
79    #[serde(default)]
80    count: u32,
81}
82
83#[derive(Debug, Deserialize)]
84struct UsgsEarthquakeFeature {
85    id: String,
86    properties: UsgsProperties,
87    geometry: UsgsGeometry,
88}
89
90#[derive(Debug, Deserialize)]
91struct UsgsProperties {
92    #[serde(default)]
93    mag: Option<f64>,
94    #[serde(default)]
95    place: String,
96    #[serde(default)]
97    time: i64, // Unix timestamp in milliseconds
98    #[serde(default)]
99    updated: i64,
100    #[serde(default)]
101    tz: Option<i32>,
102    #[serde(default)]
103    url: String,
104    #[serde(default)]
105    detail: String,
106    #[serde(default)]
107    felt: Option<u32>,
108    #[serde(default)]
109    cdi: Option<f64>, // Community Decimal Intensity
110    #[serde(default)]
111    mmi: Option<f64>, // Modified Mercalli Intensity
112    #[serde(default)]
113    alert: Option<String>,
114    #[serde(default)]
115    status: String,
116    #[serde(default)]
117    tsunami: u8,
118    #[serde(default)]
119    sig: u32, // Significance
120    #[serde(default)]
121    net: String,
122    #[serde(default)]
123    code: String,
124    #[serde(default)]
125    r#type: String,
126    #[serde(default)]
127    title: String,
128}
129
130#[derive(Debug, Deserialize)]
131struct UsgsGeometry {
132    coordinates: Vec<f64>, // [longitude, latitude, depth]
133}
134
135/// Client for USGS Earthquake Hazards Program
136///
137/// Provides access to:
138/// - Real-time earthquake data worldwide
139/// - Historical earthquake records
140/// - Magnitude, location, depth information
141/// - Tsunami warnings and alerts
142///
143/// # Example
144/// ```rust,ignore
145/// use ruvector_data_framework::UsgsEarthquakeClient;
146///
147/// let client = UsgsEarthquakeClient::new()?;
148/// let recent = client.get_recent(4.5, 7).await?; // Mag 4.5+, last 7 days
149/// let regional = client.search_by_region(35.0, -118.0, 200.0, 30).await?;
150/// let significant = client.get_significant(30).await?;
151/// ```
152pub struct UsgsEarthquakeClient {
153    client: Client,
154    base_url: String,
155    rate_limit_delay: Duration,
156    embedder: Arc<SimpleEmbedder>,
157}
158
159impl UsgsEarthquakeClient {
160    /// Create a new USGS Earthquake client
161    pub fn new() -> Result<Self> {
162        let client = Client::builder()
163            .timeout(Duration::from_secs(30))
164            .build()
165            .map_err(FrameworkError::Network)?;
166
167        Ok(Self {
168            client,
169            base_url: "https://earthquake.usgs.gov/fdsnws/event/1".to_string(),
170            rate_limit_delay: Duration::from_millis(USGS_RATE_LIMIT_MS),
171            embedder: Arc::new(SimpleEmbedder::new(256)),
172        })
173    }
174
175    /// Get recent earthquakes above a minimum magnitude
176    ///
177    /// # Arguments
178    /// * `min_magnitude` - Minimum magnitude (e.g., 4.5)
179    /// * `days` - Number of days to look back (e.g., 7 for last week)
180    ///
181    /// # Example
182    /// ```rust,ignore
183    /// let earthquakes = client.get_recent(5.0, 30).await?;
184    /// ```
185    pub async fn get_recent(
186        &self,
187        min_magnitude: f64,
188        days: u32,
189    ) -> Result<Vec<SemanticVector>> {
190        let now = Utc::now();
191        let start_time = now - chrono::Duration::days(days as i64);
192
193        let url = format!(
194            "{}/query?format=geojson&starttime={}&endtime={}&minmagnitude={}",
195            self.base_url,
196            start_time.format("%Y-%m-%d"),
197            now.format("%Y-%m-%d"),
198            min_magnitude
199        );
200
201        sleep(self.rate_limit_delay).await;
202        let response = self.fetch_with_retry(&url).await?;
203        let geojson: UsgsGeoJsonResponse = response.json().await?;
204
205        self.convert_earthquakes(geojson.features)
206    }
207
208    /// Search earthquakes by geographic region
209    ///
210    /// # Arguments
211    /// * `lat` - Center latitude
212    /// * `lon` - Center longitude
213    /// * `radius_km` - Search radius in kilometers (max 20001.6 km)
214    /// * `days` - Number of days to look back
215    ///
216    /// # Example
217    /// ```rust,ignore
218    /// // Search near Los Angeles
219    /// let la_quakes = client.search_by_region(34.05, -118.25, 100.0, 7).await?;
220    /// ```
221    pub async fn search_by_region(
222        &self,
223        lat: f64,
224        lon: f64,
225        radius_km: f64,
226        days: u32,
227    ) -> Result<Vec<SemanticVector>> {
228        let now = Utc::now();
229        let start_time = now - chrono::Duration::days(days as i64);
230
231        let url = format!(
232            "{}/query?format=geojson&starttime={}&endtime={}&latitude={}&longitude={}&maxradiuskm={}",
233            self.base_url,
234            start_time.format("%Y-%m-%d"),
235            now.format("%Y-%m-%d"),
236            lat,
237            lon,
238            radius_km
239        );
240
241        sleep(self.rate_limit_delay).await;
242        let response = self.fetch_with_retry(&url).await?;
243        let geojson: UsgsGeoJsonResponse = response.json().await?;
244
245        self.convert_earthquakes(geojson.features)
246    }
247
248    /// Get significant earthquakes (as determined by USGS)
249    ///
250    /// # Arguments
251    /// * `days` - Number of days to look back
252    ///
253    /// # Example
254    /// ```rust,ignore
255    /// let significant = client.get_significant(30).await?;
256    /// ```
257    pub async fn get_significant(&self, days: u32) -> Result<Vec<SemanticVector>> {
258        let now = Utc::now();
259        let start_time = now - chrono::Duration::days(days as i64);
260
261        let url = format!(
262            "{}/query?format=geojson&starttime={}&endtime={}&orderby=magnitude&limit=100",
263            self.base_url,
264            start_time.format("%Y-%m-%d"),
265            now.format("%Y-%m-%d")
266        );
267
268        sleep(self.rate_limit_delay).await;
269        let response = self.fetch_with_retry(&url).await?;
270        let geojson: UsgsGeoJsonResponse = response.json().await?;
271
272        // Filter for significant (magnitude >= 6.0 or high significance score)
273        let significant: Vec<_> = geojson
274            .features
275            .into_iter()
276            .filter(|f| {
277                f.properties.mag.unwrap_or(0.0) >= 6.0 || f.properties.sig >= 600
278            })
279            .collect();
280
281        self.convert_earthquakes(significant)
282    }
283
284    /// Get earthquakes within a magnitude range
285    ///
286    /// # Arguments
287    /// * `min` - Minimum magnitude
288    /// * `max` - Maximum magnitude
289    /// * `days` - Number of days to look back
290    ///
291    /// # Example
292    /// ```rust,ignore
293    /// // Get moderate earthquakes (4.0-6.0)
294    /// let moderate = client.get_by_magnitude_range(4.0, 6.0, 7).await?;
295    /// ```
296    pub async fn get_by_magnitude_range(
297        &self,
298        min: f64,
299        max: f64,
300        days: u32,
301    ) -> Result<Vec<SemanticVector>> {
302        let now = Utc::now();
303        let start_time = now - chrono::Duration::days(days as i64);
304
305        let url = format!(
306            "{}/query?format=geojson&starttime={}&endtime={}&minmagnitude={}&maxmagnitude={}",
307            self.base_url,
308            start_time.format("%Y-%m-%d"),
309            now.format("%Y-%m-%d"),
310            min,
311            max
312        );
313
314        sleep(self.rate_limit_delay).await;
315        let response = self.fetch_with_retry(&url).await?;
316        let geojson: UsgsGeoJsonResponse = response.json().await?;
317
318        self.convert_earthquakes(geojson.features)
319    }
320
321    /// Convert USGS earthquake features to SemanticVectors
322    fn convert_earthquakes(&self, features: Vec<UsgsEarthquakeFeature>) -> Result<Vec<SemanticVector>> {
323        let mut vectors = Vec::new();
324
325        for feature in features {
326            let mag = feature.properties.mag.unwrap_or(0.0);
327            let coords = &feature.geometry.coordinates;
328            let lon = coords.get(0).copied().unwrap_or(0.0);
329            let lat = coords.get(1).copied().unwrap_or(0.0);
330            let depth = coords.get(2).copied().unwrap_or(0.0);
331
332            // Convert Unix timestamp (milliseconds) to DateTime
333            let timestamp = DateTime::from_timestamp_millis(feature.properties.time)
334                .unwrap_or_else(Utc::now);
335
336            // Create text for embedding
337            let text = format!(
338                "Magnitude {} earthquake {} at depth {}km (lat: {}, lon: {})",
339                mag, feature.properties.place, depth, lat, lon
340            );
341            let embedding = self.embedder.embed_text(&text);
342
343            let mut metadata = HashMap::new();
344            metadata.insert("magnitude".to_string(), mag.to_string());
345            metadata.insert("place".to_string(), feature.properties.place);
346            metadata.insert("latitude".to_string(), lat.to_string());
347            metadata.insert("longitude".to_string(), lon.to_string());
348            metadata.insert("depth_km".to_string(), depth.to_string());
349            metadata.insert("tsunami".to_string(), feature.properties.tsunami.to_string());
350            metadata.insert("significance".to_string(), feature.properties.sig.to_string());
351            metadata.insert("status".to_string(), feature.properties.status);
352            if let Some(alert) = feature.properties.alert {
353                metadata.insert("alert".to_string(), alert);
354            }
355            metadata.insert("source".to_string(), "usgs".to_string());
356
357            vectors.push(SemanticVector {
358                id: format!("USGS:{}", feature.id),
359                embedding,
360                domain: Domain::Seismic,
361                timestamp,
362                metadata,
363            });
364        }
365
366        Ok(vectors)
367    }
368
369    /// Fetch with retry logic
370    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
371        let mut retries = 0;
372        loop {
373            match self.client.get(url).send().await {
374                Ok(response) => {
375                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
376                        retries += 1;
377                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
378                        continue;
379                    }
380                    return Ok(response);
381                }
382                Err(_) if retries < MAX_RETRIES => {
383                    retries += 1;
384                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
385                }
386                Err(e) => return Err(FrameworkError::Network(e)),
387            }
388        }
389    }
390}
391
392impl Default for UsgsEarthquakeClient {
393    fn default() -> Self {
394        Self::new().expect("Failed to create USGS client")
395    }
396}
397
398// ============================================================================
399// CERN Open Data Portal Client
400// ============================================================================
401
402/// CERN Open Data record
403#[derive(Debug, Deserialize)]
404struct CernRecord {
405    id: u64,
406    #[serde(default)]
407    metadata: CernMetadata,
408}
409
410#[derive(Debug, Deserialize, Default)]
411struct CernMetadata {
412    #[serde(default)]
413    titles: Vec<CernTitle>,
414    #[serde(default)]
415    r#abstract: Option<CernAbstract>,
416    #[serde(default)]
417    experiment: Option<String>,
418    #[serde(default)]
419    collision_information: Option<CernCollisionInfo>,
420    #[serde(default)]
421    date_created: Vec<String>,
422    #[serde(default)]
423    keywords: Vec<String>,
424    #[serde(default)]
425    r#type: CernType,
426}
427
428#[derive(Debug, Deserialize)]
429struct CernTitle {
430    title: String,
431}
432
433#[derive(Debug, Deserialize)]
434struct CernAbstract {
435    description: String,
436}
437
438#[derive(Debug, Deserialize)]
439struct CernCollisionInfo {
440    #[serde(default)]
441    energy: String,
442    #[serde(default)]
443    r#type: String,
444}
445
446#[derive(Debug, Deserialize, Default)]
447struct CernType {
448    #[serde(default)]
449    primary: String,
450    #[serde(default)]
451    secondary: Vec<String>,
452}
453
454/// CERN API search response
455#[derive(Debug, Deserialize)]
456struct CernSearchResponse {
457    #[serde(default)]
458    hits: CernHits,
459}
460
461#[derive(Debug, Deserialize, Default)]
462struct CernHits {
463    #[serde(default)]
464    hits: Vec<CernRecord>,
465    #[serde(default)]
466    total: u32,
467}
468
469/// Client for CERN Open Data Portal
470///
471/// Provides access to:
472/// - LHC experiment data (CMS, ATLAS, LHCb, ALICE)
473/// - Collision events and particle physics datasets
474/// - Education and outreach materials
475///
476/// # Example
477/// ```rust,ignore
478/// use ruvector_data_framework::CernOpenDataClient;
479///
480/// let client = CernOpenDataClient::new()?;
481/// let datasets = client.search_datasets("Higgs").await?;
482/// let cms_data = client.search_by_experiment("CMS").await?;
483/// let dataset = client.get_dataset(12345).await?;
484/// ```
485pub struct CernOpenDataClient {
486    client: Client,
487    base_url: String,
488    rate_limit_delay: Duration,
489    embedder: Arc<SimpleEmbedder>,
490}
491
492impl CernOpenDataClient {
493    /// Create a new CERN Open Data client
494    pub fn new() -> Result<Self> {
495        let client = Client::builder()
496            .timeout(Duration::from_secs(30))
497            .build()
498            .map_err(FrameworkError::Network)?;
499
500        Ok(Self {
501            client,
502            base_url: "https://opendata.cern.ch/api/records".to_string(),
503            rate_limit_delay: Duration::from_millis(CERN_RATE_LIMIT_MS),
504            embedder: Arc::new(SimpleEmbedder::new(256)),
505        })
506    }
507
508    /// Search datasets by query string
509    ///
510    /// # Arguments
511    /// * `query` - Search query (e.g., "Higgs", "top quark", "W boson")
512    ///
513    /// # Example
514    /// ```rust,ignore
515    /// let higgs_data = client.search_datasets("Higgs boson").await?;
516    /// ```
517    pub async fn search_datasets(&self, query: &str) -> Result<Vec<SemanticVector>> {
518        let url = format!(
519            "{}?q={}&size=50",
520            self.base_url,
521            urlencoding::encode(query)
522        );
523
524        sleep(self.rate_limit_delay).await;
525        let response = self.fetch_with_retry(&url).await?;
526        let search_response: CernSearchResponse = response.json().await?;
527
528        self.convert_records(search_response.hits.hits)
529    }
530
531    /// Get a specific dataset by record ID
532    ///
533    /// # Arguments
534    /// * `recid` - CERN record ID
535    ///
536    /// # Example
537    /// ```rust,ignore
538    /// let dataset = client.get_dataset(5500).await?;
539    /// ```
540    pub async fn get_dataset(&self, recid: u64) -> Result<Vec<SemanticVector>> {
541        let url = format!("{}/{}", self.base_url, recid);
542
543        sleep(self.rate_limit_delay).await;
544        let response = self.fetch_with_retry(&url).await?;
545        let record: CernRecord = response.json().await?;
546
547        self.convert_records(vec![record])
548    }
549
550    /// Search datasets by experiment
551    ///
552    /// # Arguments
553    /// * `experiment` - Experiment name: "CMS", "ATLAS", "LHCb", "ALICE"
554    ///
555    /// # Example
556    /// ```rust,ignore
557    /// let cms_data = client.search_by_experiment("CMS").await?;
558    /// ```
559    pub async fn search_by_experiment(&self, experiment: &str) -> Result<Vec<SemanticVector>> {
560        let url = format!(
561            "{}?experiment={}&size=50",
562            self.base_url,
563            urlencoding::encode(experiment)
564        );
565
566        sleep(self.rate_limit_delay).await;
567        let response = self.fetch_with_retry(&url).await?;
568        let search_response: CernSearchResponse = response.json().await?;
569
570        self.convert_records(search_response.hits.hits)
571    }
572
573    /// Convert CERN records to SemanticVectors
574    fn convert_records(&self, records: Vec<CernRecord>) -> Result<Vec<SemanticVector>> {
575        let mut vectors = Vec::new();
576
577        for record in records {
578            let title = record
579                .metadata
580                .titles
581                .first()
582                .map(|t| t.title.clone())
583                .unwrap_or_else(|| format!("Dataset {}", record.id));
584
585            let description = record
586                .metadata
587                .r#abstract
588                .as_ref()
589                .map(|a| a.description.clone())
590                .unwrap_or_default();
591
592            let experiment = record.metadata.experiment.unwrap_or_default();
593
594            let collision_energy = record
595                .metadata
596                .collision_information
597                .as_ref()
598                .map(|c| c.energy.clone())
599                .unwrap_or_default();
600
601            let collision_type = record
602                .metadata
603                .collision_information
604                .as_ref()
605                .map(|c| c.r#type.clone())
606                .unwrap_or_default();
607
608            // Create text for embedding
609            let text = format!(
610                "{} {} {} {} {}",
611                title,
612                description,
613                experiment,
614                collision_energy,
615                collision_type
616            );
617            let embedding = self.embedder.embed_text(&text);
618
619            let mut metadata = HashMap::new();
620            metadata.insert("recid".to_string(), record.id.to_string());
621            metadata.insert("title".to_string(), title);
622            metadata.insert("experiment".to_string(), experiment);
623            metadata.insert("collision_energy".to_string(), collision_energy);
624            metadata.insert("collision_type".to_string(), collision_type);
625            metadata.insert("data_type".to_string(), record.metadata.r#type.primary);
626            metadata.insert("source".to_string(), "cern".to_string());
627
628            let date = record
629                .metadata
630                .date_created
631                .first()
632                .and_then(|d| NaiveDateTime::parse_from_str(d, "%Y-%m-%d %H:%M:%S").ok())
633                .or_else(|| {
634                    record
635                        .metadata
636                        .date_created
637                        .first()
638                        .and_then(|d| NaiveDateTime::parse_from_str(d, "%Y").ok())
639                })
640                .map(|dt| dt.and_utc())
641                .unwrap_or_else(Utc::now);
642
643            vectors.push(SemanticVector {
644                id: format!("CERN:{}", record.id),
645                embedding,
646                domain: Domain::Physics,
647                timestamp: date,
648                metadata,
649            });
650        }
651
652        Ok(vectors)
653    }
654
655    /// Fetch with retry logic
656    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
657        let mut retries = 0;
658        loop {
659            match self.client.get(url).send().await {
660                Ok(response) => {
661                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
662                        retries += 1;
663                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
664                        continue;
665                    }
666                    return Ok(response);
667                }
668                Err(_) if retries < MAX_RETRIES => {
669                    retries += 1;
670                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
671                }
672                Err(e) => return Err(FrameworkError::Network(e)),
673            }
674        }
675    }
676}
677
678impl Default for CernOpenDataClient {
679    fn default() -> Self {
680        Self::new().expect("Failed to create CERN client")
681    }
682}
683
684// ============================================================================
685// Argo Float Ocean Data Client
686// ============================================================================
687
688/// Argo profile data (simplified structure)
689#[derive(Debug, Deserialize)]
690struct ArgoProfile {
691    #[serde(default)]
692    platform_number: String,
693    #[serde(default)]
694    cycle_number: u32,
695    #[serde(default)]
696    latitude: f64,
697    #[serde(default)]
698    longitude: f64,
699    #[serde(default)]
700    juld: f64, // Julian date
701    #[serde(default)]
702    pres: Vec<f64>, // Pressure levels
703    #[serde(default)]
704    temp: Vec<f64>, // Temperature
705    #[serde(default)]
706    psal: Vec<f64>, // Practical salinity
707}
708
709/// Client for Argo Float Ocean Data
710///
711/// Provides access to:
712/// - Ocean temperature profiles
713/// - Salinity measurements
714/// - Pressure/depth data
715/// - Global ocean coverage
716///
717/// Note: This client uses a simplified Argo data access pattern.
718/// For production use, consider using dedicated Argo APIs or netCDF data.
719///
720/// # Example
721/// ```rust,ignore
722/// use ruvector_data_framework::ArgoClient;
723///
724/// let client = ArgoClient::new()?;
725/// let recent = client.get_recent_profiles(30).await?;
726/// let regional = client.search_by_region(0.0, -30.0, 500.0).await?;
727/// let temp_profiles = client.get_temperature_profiles().await?;
728/// ```
729pub struct ArgoClient {
730    client: Client,
731    base_url: String,
732    rate_limit_delay: Duration,
733    embedder: Arc<SimpleEmbedder>,
734}
735
736impl ArgoClient {
737    /// Create a new Argo client
738    pub fn new() -> Result<Self> {
739        let client = Client::builder()
740            .timeout(Duration::from_secs(30))
741            .build()
742            .map_err(FrameworkError::Network)?;
743
744        Ok(Self {
745            client,
746            // Note: Using Ifremer Argo GDAC as base URL
747            base_url: "https://data-argo.ifremer.fr".to_string(),
748            rate_limit_delay: Duration::from_millis(ARGO_RATE_LIMIT_MS),
749            embedder: Arc::new(SimpleEmbedder::new(256)),
750        })
751    }
752
753    /// Get recent ocean profiles
754    ///
755    /// # Arguments
756    /// * `days` - Number of days to look back
757    ///
758    /// # Example
759    /// ```rust,ignore
760    /// let recent = client.get_recent_profiles(7).await?;
761    /// ```
762    pub async fn get_recent_profiles(&self, _days: u32) -> Result<Vec<SemanticVector>> {
763        // This is a placeholder implementation
764        // In production, you would fetch from Argo GDAC index files or use ArgoVis API
765
766        // For demonstration, return empty vec with a note
767        // Real implementation would parse Argo profile files
768        Ok(Vec::new())
769    }
770
771    /// Search profiles by geographic region
772    ///
773    /// # Arguments
774    /// * `lat` - Center latitude
775    /// * `lon` - Center longitude
776    /// * `radius_km` - Search radius in kilometers
777    ///
778    /// # Example
779    /// ```rust,ignore
780    /// // Search in Atlantic Ocean
781    /// let atlantic = client.search_by_region(0.0, -30.0, 500.0).await?;
782    /// ```
783    pub async fn search_by_region(
784        &self,
785        _lat: f64,
786        _lon: f64,
787        _radius_km: f64,
788    ) -> Result<Vec<SemanticVector>> {
789        // Placeholder implementation
790        // Real implementation would use Argo spatial index
791        Ok(Vec::new())
792    }
793
794    /// Get ocean temperature profiles
795    ///
796    /// # Example
797    /// ```rust,ignore
798    /// let temp_data = client.get_temperature_profiles().await?;
799    /// ```
800    pub async fn get_temperature_profiles(&self) -> Result<Vec<SemanticVector>> {
801        // Placeholder implementation
802        // Real implementation would filter for temperature-focused profiles
803        Ok(Vec::new())
804    }
805
806    /// Create sample Argo data for testing/demonstration
807    ///
808    /// This generates synthetic ocean profile data
809    pub fn create_sample_profiles(&self, count: usize) -> Result<Vec<SemanticVector>> {
810        let mut vectors = Vec::new();
811
812        for i in 0..count {
813            let lat = -60.0 + (120.0 * (i as f64 / count as f64));
814            let lon = -180.0 + (360.0 * ((i * 7) % count) as f64 / count as f64);
815            let temp = 5.0 + (15.0 * (lat.abs() / 90.0));
816            let salinity = 34.0 + (2.0 * (lat / 90.0));
817            let depth = 100.0 * (i % 20) as f64;
818
819            let text = format!(
820                "Ocean profile at lat {} lon {}: temp {}°C, salinity {}, depth {}m",
821                lat, lon, temp, salinity, depth
822            );
823            let embedding = self.embedder.embed_text(&text);
824
825            let mut metadata = HashMap::new();
826            metadata.insert("platform_number".to_string(), format!("{}", 1900000 + i));
827            metadata.insert("latitude".to_string(), lat.to_string());
828            metadata.insert("longitude".to_string(), lon.to_string());
829            metadata.insert("temperature".to_string(), temp.to_string());
830            metadata.insert("salinity".to_string(), salinity.to_string());
831            metadata.insert("depth_m".to_string(), depth.to_string());
832            metadata.insert("source".to_string(), "argo".to_string());
833
834            vectors.push(SemanticVector {
835                id: format!("ARGO:{}", 1900000 + i),
836                embedding,
837                domain: Domain::Ocean,
838                timestamp: Utc::now() - chrono::Duration::days(i as i64 % 30),
839                metadata,
840            });
841        }
842
843        Ok(vectors)
844    }
845}
846
847impl Default for ArgoClient {
848    fn default() -> Self {
849        Self::new().expect("Failed to create Argo client")
850    }
851}
852
853// ============================================================================
854// Materials Project Client
855// ============================================================================
856
857/// Materials Project material data
858#[derive(Debug, Deserialize)]
859struct MaterialsProjectMaterial {
860    material_id: String,
861    #[serde(default)]
862    formula_pretty: String,
863    #[serde(default)]
864    band_gap: Option<f64>,
865    #[serde(default)]
866    density: Option<f64>,
867    #[serde(default)]
868    formation_energy_per_atom: Option<f64>,
869    #[serde(default)]
870    energy_per_atom: Option<f64>,
871    #[serde(default)]
872    volume: Option<f64>,
873    #[serde(default)]
874    nsites: Option<u32>,
875    #[serde(default)]
876    elements: Vec<String>,
877    #[serde(default)]
878    nelements: Option<u32>,
879    #[serde(default)]
880    crystal_system: Option<String>,
881    #[serde(default)]
882    symmetry: Option<MaterialsSymmetry>,
883}
884
885#[derive(Debug, Deserialize)]
886struct MaterialsSymmetry {
887    #[serde(default)]
888    crystal_system: String,
889    #[serde(default)]
890    symbol: String,
891}
892
893/// Materials Project API response
894#[derive(Debug, Deserialize)]
895struct MaterialsProjectResponse {
896    #[serde(default)]
897    data: Vec<MaterialsProjectMaterial>,
898}
899
900/// Client for Materials Project
901///
902/// Provides access to:
903/// - Computational materials science database
904/// - Crystal structures and properties
905/// - Band gaps, formation energies
906/// - Electronic and mechanical properties
907///
908/// **Note**: Requires API key from https://materialsproject.org
909///
910/// # Example
911/// ```rust,ignore
912/// use ruvector_data_framework::MaterialsProjectClient;
913///
914/// let client = MaterialsProjectClient::new("YOUR_API_KEY".to_string())?;
915/// let silicon = client.search_materials("Si").await?;
916/// let material = client.get_material("mp-149").await?;
917/// let semiconductors = client.search_by_property("band_gap", 1.0, 3.0).await?;
918/// ```
919pub struct MaterialsProjectClient {
920    client: Client,
921    base_url: String,
922    api_key: String,
923    rate_limit_delay: Duration,
924    embedder: Arc<SimpleEmbedder>,
925}
926
927impl MaterialsProjectClient {
928    /// Create a new Materials Project client
929    ///
930    /// # Arguments
931    /// * `api_key` - Materials Project API key (get from https://materialsproject.org)
932    pub fn new(api_key: String) -> Result<Self> {
933        let client = Client::builder()
934            .timeout(Duration::from_secs(30))
935            .build()
936            .map_err(FrameworkError::Network)?;
937
938        Ok(Self {
939            client,
940            base_url: "https://api.materialsproject.org".to_string(),
941            api_key,
942            rate_limit_delay: Duration::from_millis(MATERIALS_PROJECT_RATE_LIMIT_MS),
943            embedder: Arc::new(SimpleEmbedder::new(256)),
944        })
945    }
946
947    /// Search materials by chemical formula
948    ///
949    /// # Arguments
950    /// * `formula` - Chemical formula (e.g., "Si", "Fe2O3", "LiFePO4")
951    ///
952    /// # Example
953    /// ```rust,ignore
954    /// let silicon = client.search_materials("Si").await?;
955    /// let iron_oxide = client.search_materials("Fe2O3").await?;
956    /// ```
957    pub async fn search_materials(&self, formula: &str) -> Result<Vec<SemanticVector>> {
958        let url = format!(
959            "{}/materials/summary/?formula={}",
960            self.base_url,
961            urlencoding::encode(formula)
962        );
963
964        sleep(self.rate_limit_delay).await;
965        let response = self.client
966            .get(&url)
967            .header("X-API-KEY", &self.api_key)
968            .send()
969            .await?;
970
971        let mp_response: MaterialsProjectResponse = response.json().await?;
972        self.convert_materials(mp_response.data)
973    }
974
975    /// Get a specific material by Materials Project ID
976    ///
977    /// # Arguments
978    /// * `material_id` - Materials Project ID (e.g., "mp-149" for silicon)
979    ///
980    /// # Example
981    /// ```rust,ignore
982    /// let silicon = client.get_material("mp-149").await?;
983    /// ```
984    pub async fn get_material(&self, material_id: &str) -> Result<Vec<SemanticVector>> {
985        let url = format!(
986            "{}/materials/{}/",
987            self.base_url, material_id
988        );
989
990        sleep(self.rate_limit_delay).await;
991        let response = self.client
992            .get(&url)
993            .header("X-API-KEY", &self.api_key)
994            .send()
995            .await?;
996
997        let material: MaterialsProjectMaterial = response.json().await?;
998        self.convert_materials(vec![material])
999    }
1000
1001    /// Search materials by property range
1002    ///
1003    /// # Arguments
1004    /// * `property` - Property name (e.g., "band_gap", "formation_energy_per_atom")
1005    /// * `min` - Minimum value
1006    /// * `max` - Maximum value
1007    ///
1008    /// # Example
1009    /// ```rust,ignore
1010    /// // Find semiconductors with band gap 1-3 eV
1011    /// let semiconductors = client.search_by_property("band_gap", 1.0, 3.0).await?;
1012    /// ```
1013    pub async fn search_by_property(
1014        &self,
1015        property: &str,
1016        min: f64,
1017        max: f64,
1018    ) -> Result<Vec<SemanticVector>> {
1019        let url = format!(
1020            "{}/materials/summary/?{}_min={}&{}_max={}",
1021            self.base_url, property, min, property, max
1022        );
1023
1024        sleep(self.rate_limit_delay).await;
1025        let response = self.client
1026            .get(&url)
1027            .header("X-API-KEY", &self.api_key)
1028            .send()
1029            .await?;
1030
1031        let mp_response: MaterialsProjectResponse = response.json().await?;
1032        self.convert_materials(mp_response.data)
1033    }
1034
1035    /// Convert Materials Project materials to SemanticVectors
1036    fn convert_materials(&self, materials: Vec<MaterialsProjectMaterial>) -> Result<Vec<SemanticVector>> {
1037        let mut vectors = Vec::new();
1038
1039        for material in materials {
1040            let band_gap = material.band_gap.unwrap_or(0.0);
1041            let density = material.density.unwrap_or(0.0);
1042            let formation_energy = material.formation_energy_per_atom.unwrap_or(0.0);
1043            let crystal_system = material
1044                .crystal_system
1045                .or_else(|| material.symmetry.as_ref().map(|s| s.crystal_system.clone()))
1046                .unwrap_or_default();
1047
1048            // Create text for embedding
1049            let text = format!(
1050                "{} {} crystal system, band gap {} eV, density {} g/cm³, formation energy {} eV/atom",
1051                material.formula_pretty, crystal_system, band_gap, density, formation_energy
1052            );
1053            let embedding = self.embedder.embed_text(&text);
1054
1055            let mut metadata = HashMap::new();
1056            metadata.insert("material_id".to_string(), material.material_id.clone());
1057            metadata.insert("formula".to_string(), material.formula_pretty);
1058            metadata.insert("band_gap".to_string(), band_gap.to_string());
1059            metadata.insert("density".to_string(), density.to_string());
1060            metadata.insert("formation_energy".to_string(), formation_energy.to_string());
1061            metadata.insert("crystal_system".to_string(), crystal_system);
1062            metadata.insert("elements".to_string(), material.elements.join(","));
1063            metadata.insert("source".to_string(), "materials_project".to_string());
1064
1065            vectors.push(SemanticVector {
1066                id: format!("MP:{}", material.material_id),
1067                embedding,
1068                domain: Domain::Physics,
1069                timestamp: Utc::now(),
1070                metadata,
1071            });
1072        }
1073
1074        Ok(vectors)
1075    }
1076}
1077
1078// ============================================================================
1079// Tests
1080// ============================================================================
1081
1082#[cfg(test)]
1083mod tests {
1084    use super::*;
1085
1086    #[test]
1087    fn test_geo_utils_distance() {
1088        // Distance from NYC to LA (approximately 3936 km)
1089        let dist = GeoUtils::distance_km(40.7128, -74.0060, 34.0522, -118.2437);
1090        assert!((dist - 3936.0).abs() < 100.0); // Within 100km tolerance
1091    }
1092
1093    #[test]
1094    fn test_geo_utils_within_radius() {
1095        let center_lat = 34.05;
1096        let center_lon = -118.25;
1097
1098        // Point 50km away should be within 100km radius
1099        let nearby = GeoUtils::within_radius(center_lat, center_lon, 34.5, -118.25, 100.0);
1100        assert!(nearby);
1101
1102        // Point far away should not be within 10km radius
1103        let far = GeoUtils::within_radius(center_lat, center_lon, 40.7, -74.0, 10.0);
1104        assert!(!far);
1105    }
1106
1107    #[tokio::test]
1108    async fn test_usgs_client_creation() {
1109        let client = UsgsEarthquakeClient::new();
1110        assert!(client.is_ok());
1111    }
1112
1113    #[tokio::test]
1114    async fn test_cern_client_creation() {
1115        let client = CernOpenDataClient::new();
1116        assert!(client.is_ok());
1117    }
1118
1119    #[tokio::test]
1120    async fn test_argo_client_creation() {
1121        let client = ArgoClient::new();
1122        assert!(client.is_ok());
1123    }
1124
1125    #[tokio::test]
1126    async fn test_materials_project_client_creation() {
1127        let client = MaterialsProjectClient::new("test_key".to_string());
1128        assert!(client.is_ok());
1129    }
1130
1131    #[tokio::test]
1132    async fn test_argo_sample_profiles() {
1133        let client = ArgoClient::new().unwrap();
1134        let profiles = client.create_sample_profiles(10);
1135        assert!(profiles.is_ok());
1136        let vectors = profiles.unwrap();
1137        assert_eq!(vectors.len(), 10);
1138        assert_eq!(vectors[0].domain, Domain::Ocean);
1139    }
1140
1141    #[test]
1142    fn test_rate_limiting() {
1143        let usgs = UsgsEarthquakeClient::new().unwrap();
1144        assert_eq!(usgs.rate_limit_delay, Duration::from_millis(USGS_RATE_LIMIT_MS));
1145
1146        let cern = CernOpenDataClient::new().unwrap();
1147        assert_eq!(cern.rate_limit_delay, Duration::from_millis(CERN_RATE_LIMIT_MS));
1148
1149        let argo = ArgoClient::new().unwrap();
1150        assert_eq!(argo.rate_limit_delay, Duration::from_millis(ARGO_RATE_LIMIT_MS));
1151
1152        let mp = MaterialsProjectClient::new("test".to_string()).unwrap();
1153        assert_eq!(mp.rate_limit_delay, Duration::from_millis(MATERIALS_PROJECT_RATE_LIMIT_MS));
1154    }
1155}