1use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::Duration;
14
15use chrono::{DateTime, NaiveDateTime, Utc};
16use reqwest::{Client, StatusCode};
17use serde::Deserialize;
18use tokio::time::sleep;
19
20use crate::api_clients::SimpleEmbedder;
21use crate::ruvector_native::{Domain, SemanticVector};
22use crate::{FrameworkError, Result};
23
24const USGS_RATE_LIMIT_MS: u64 = 200; const CERN_RATE_LIMIT_MS: u64 = 500; const ARGO_RATE_LIMIT_MS: u64 = 300; const MATERIALS_PROJECT_RATE_LIMIT_MS: u64 = 1000; const MAX_RETRIES: u32 = 3;
30const RETRY_DELAY_MS: u64 = 1000;
31
32pub struct GeoUtils;
38
39impl GeoUtils {
40 pub fn distance_km(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
43 let r = 6371.0; let dlat = (lat2 - lat1).to_radians();
45 let dlon = (lon2 - lon1).to_radians();
46 let a = (dlat / 2.0).sin().powi(2)
47 + lat1.to_radians().cos() * lat2.to_radians().cos() * (dlon / 2.0).sin().powi(2);
48 let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
49 r * c
50 }
51
52 pub fn within_radius(
54 center_lat: f64,
55 center_lon: f64,
56 point_lat: f64,
57 point_lon: f64,
58 radius_km: f64,
59 ) -> bool {
60 Self::distance_km(center_lat, center_lon, point_lat, point_lon) <= radius_km
61 }
62}
63
64#[derive(Debug, Deserialize)]
70struct UsgsGeoJsonResponse {
71 #[serde(default)]
72 features: Vec<UsgsEarthquakeFeature>,
73 #[serde(default)]
74 metadata: UsgsMetadata,
75}
76
77#[derive(Debug, Deserialize, Default)]
78struct UsgsMetadata {
79 #[serde(default)]
80 count: u32,
81}
82
83#[derive(Debug, Deserialize)]
84struct UsgsEarthquakeFeature {
85 id: String,
86 properties: UsgsProperties,
87 geometry: UsgsGeometry,
88}
89
90#[derive(Debug, Deserialize)]
91struct UsgsProperties {
92 #[serde(default)]
93 mag: Option<f64>,
94 #[serde(default)]
95 place: String,
96 #[serde(default)]
97 time: i64, #[serde(default)]
99 updated: i64,
100 #[serde(default)]
101 tz: Option<i32>,
102 #[serde(default)]
103 url: String,
104 #[serde(default)]
105 detail: String,
106 #[serde(default)]
107 felt: Option<u32>,
108 #[serde(default)]
109 cdi: Option<f64>, #[serde(default)]
111 mmi: Option<f64>, #[serde(default)]
113 alert: Option<String>,
114 #[serde(default)]
115 status: String,
116 #[serde(default)]
117 tsunami: u8,
118 #[serde(default)]
119 sig: u32, #[serde(default)]
121 net: String,
122 #[serde(default)]
123 code: String,
124 #[serde(default)]
125 r#type: String,
126 #[serde(default)]
127 title: String,
128}
129
130#[derive(Debug, Deserialize)]
131struct UsgsGeometry {
132 coordinates: Vec<f64>, }
134
135pub struct UsgsEarthquakeClient {
153 client: Client,
154 base_url: String,
155 rate_limit_delay: Duration,
156 embedder: Arc<SimpleEmbedder>,
157}
158
159impl UsgsEarthquakeClient {
160 pub fn new() -> Result<Self> {
162 let client = Client::builder()
163 .timeout(Duration::from_secs(30))
164 .build()
165 .map_err(FrameworkError::Network)?;
166
167 Ok(Self {
168 client,
169 base_url: "https://earthquake.usgs.gov/fdsnws/event/1".to_string(),
170 rate_limit_delay: Duration::from_millis(USGS_RATE_LIMIT_MS),
171 embedder: Arc::new(SimpleEmbedder::new(256)),
172 })
173 }
174
175 pub async fn get_recent(
186 &self,
187 min_magnitude: f64,
188 days: u32,
189 ) -> Result<Vec<SemanticVector>> {
190 let now = Utc::now();
191 let start_time = now - chrono::Duration::days(days as i64);
192
193 let url = format!(
194 "{}/query?format=geojson&starttime={}&endtime={}&minmagnitude={}",
195 self.base_url,
196 start_time.format("%Y-%m-%d"),
197 now.format("%Y-%m-%d"),
198 min_magnitude
199 );
200
201 sleep(self.rate_limit_delay).await;
202 let response = self.fetch_with_retry(&url).await?;
203 let geojson: UsgsGeoJsonResponse = response.json().await?;
204
205 self.convert_earthquakes(geojson.features)
206 }
207
208 pub async fn search_by_region(
222 &self,
223 lat: f64,
224 lon: f64,
225 radius_km: f64,
226 days: u32,
227 ) -> Result<Vec<SemanticVector>> {
228 let now = Utc::now();
229 let start_time = now - chrono::Duration::days(days as i64);
230
231 let url = format!(
232 "{}/query?format=geojson&starttime={}&endtime={}&latitude={}&longitude={}&maxradiuskm={}",
233 self.base_url,
234 start_time.format("%Y-%m-%d"),
235 now.format("%Y-%m-%d"),
236 lat,
237 lon,
238 radius_km
239 );
240
241 sleep(self.rate_limit_delay).await;
242 let response = self.fetch_with_retry(&url).await?;
243 let geojson: UsgsGeoJsonResponse = response.json().await?;
244
245 self.convert_earthquakes(geojson.features)
246 }
247
248 pub async fn get_significant(&self, days: u32) -> Result<Vec<SemanticVector>> {
258 let now = Utc::now();
259 let start_time = now - chrono::Duration::days(days as i64);
260
261 let url = format!(
262 "{}/query?format=geojson&starttime={}&endtime={}&orderby=magnitude&limit=100",
263 self.base_url,
264 start_time.format("%Y-%m-%d"),
265 now.format("%Y-%m-%d")
266 );
267
268 sleep(self.rate_limit_delay).await;
269 let response = self.fetch_with_retry(&url).await?;
270 let geojson: UsgsGeoJsonResponse = response.json().await?;
271
272 let significant: Vec<_> = geojson
274 .features
275 .into_iter()
276 .filter(|f| {
277 f.properties.mag.unwrap_or(0.0) >= 6.0 || f.properties.sig >= 600
278 })
279 .collect();
280
281 self.convert_earthquakes(significant)
282 }
283
284 pub async fn get_by_magnitude_range(
297 &self,
298 min: f64,
299 max: f64,
300 days: u32,
301 ) -> Result<Vec<SemanticVector>> {
302 let now = Utc::now();
303 let start_time = now - chrono::Duration::days(days as i64);
304
305 let url = format!(
306 "{}/query?format=geojson&starttime={}&endtime={}&minmagnitude={}&maxmagnitude={}",
307 self.base_url,
308 start_time.format("%Y-%m-%d"),
309 now.format("%Y-%m-%d"),
310 min,
311 max
312 );
313
314 sleep(self.rate_limit_delay).await;
315 let response = self.fetch_with_retry(&url).await?;
316 let geojson: UsgsGeoJsonResponse = response.json().await?;
317
318 self.convert_earthquakes(geojson.features)
319 }
320
321 fn convert_earthquakes(&self, features: Vec<UsgsEarthquakeFeature>) -> Result<Vec<SemanticVector>> {
323 let mut vectors = Vec::new();
324
325 for feature in features {
326 let mag = feature.properties.mag.unwrap_or(0.0);
327 let coords = &feature.geometry.coordinates;
328 let lon = coords.get(0).copied().unwrap_or(0.0);
329 let lat = coords.get(1).copied().unwrap_or(0.0);
330 let depth = coords.get(2).copied().unwrap_or(0.0);
331
332 let timestamp = DateTime::from_timestamp_millis(feature.properties.time)
334 .unwrap_or_else(Utc::now);
335
336 let text = format!(
338 "Magnitude {} earthquake {} at depth {}km (lat: {}, lon: {})",
339 mag, feature.properties.place, depth, lat, lon
340 );
341 let embedding = self.embedder.embed_text(&text);
342
343 let mut metadata = HashMap::new();
344 metadata.insert("magnitude".to_string(), mag.to_string());
345 metadata.insert("place".to_string(), feature.properties.place);
346 metadata.insert("latitude".to_string(), lat.to_string());
347 metadata.insert("longitude".to_string(), lon.to_string());
348 metadata.insert("depth_km".to_string(), depth.to_string());
349 metadata.insert("tsunami".to_string(), feature.properties.tsunami.to_string());
350 metadata.insert("significance".to_string(), feature.properties.sig.to_string());
351 metadata.insert("status".to_string(), feature.properties.status);
352 if let Some(alert) = feature.properties.alert {
353 metadata.insert("alert".to_string(), alert);
354 }
355 metadata.insert("source".to_string(), "usgs".to_string());
356
357 vectors.push(SemanticVector {
358 id: format!("USGS:{}", feature.id),
359 embedding,
360 domain: Domain::Seismic,
361 timestamp,
362 metadata,
363 });
364 }
365
366 Ok(vectors)
367 }
368
369 async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
371 let mut retries = 0;
372 loop {
373 match self.client.get(url).send().await {
374 Ok(response) => {
375 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
376 retries += 1;
377 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
378 continue;
379 }
380 return Ok(response);
381 }
382 Err(_) if retries < MAX_RETRIES => {
383 retries += 1;
384 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
385 }
386 Err(e) => return Err(FrameworkError::Network(e)),
387 }
388 }
389 }
390}
391
392impl Default for UsgsEarthquakeClient {
393 fn default() -> Self {
394 Self::new().expect("Failed to create USGS client")
395 }
396}
397
398#[derive(Debug, Deserialize)]
404struct CernRecord {
405 id: u64,
406 #[serde(default)]
407 metadata: CernMetadata,
408}
409
410#[derive(Debug, Deserialize, Default)]
411struct CernMetadata {
412 #[serde(default)]
413 titles: Vec<CernTitle>,
414 #[serde(default)]
415 r#abstract: Option<CernAbstract>,
416 #[serde(default)]
417 experiment: Option<String>,
418 #[serde(default)]
419 collision_information: Option<CernCollisionInfo>,
420 #[serde(default)]
421 date_created: Vec<String>,
422 #[serde(default)]
423 keywords: Vec<String>,
424 #[serde(default)]
425 r#type: CernType,
426}
427
428#[derive(Debug, Deserialize)]
429struct CernTitle {
430 title: String,
431}
432
433#[derive(Debug, Deserialize)]
434struct CernAbstract {
435 description: String,
436}
437
438#[derive(Debug, Deserialize)]
439struct CernCollisionInfo {
440 #[serde(default)]
441 energy: String,
442 #[serde(default)]
443 r#type: String,
444}
445
446#[derive(Debug, Deserialize, Default)]
447struct CernType {
448 #[serde(default)]
449 primary: String,
450 #[serde(default)]
451 secondary: Vec<String>,
452}
453
454#[derive(Debug, Deserialize)]
456struct CernSearchResponse {
457 #[serde(default)]
458 hits: CernHits,
459}
460
461#[derive(Debug, Deserialize, Default)]
462struct CernHits {
463 #[serde(default)]
464 hits: Vec<CernRecord>,
465 #[serde(default)]
466 total: u32,
467}
468
469pub struct CernOpenDataClient {
486 client: Client,
487 base_url: String,
488 rate_limit_delay: Duration,
489 embedder: Arc<SimpleEmbedder>,
490}
491
492impl CernOpenDataClient {
493 pub fn new() -> Result<Self> {
495 let client = Client::builder()
496 .timeout(Duration::from_secs(30))
497 .build()
498 .map_err(FrameworkError::Network)?;
499
500 Ok(Self {
501 client,
502 base_url: "https://opendata.cern.ch/api/records".to_string(),
503 rate_limit_delay: Duration::from_millis(CERN_RATE_LIMIT_MS),
504 embedder: Arc::new(SimpleEmbedder::new(256)),
505 })
506 }
507
508 pub async fn search_datasets(&self, query: &str) -> Result<Vec<SemanticVector>> {
518 let url = format!(
519 "{}?q={}&size=50",
520 self.base_url,
521 urlencoding::encode(query)
522 );
523
524 sleep(self.rate_limit_delay).await;
525 let response = self.fetch_with_retry(&url).await?;
526 let search_response: CernSearchResponse = response.json().await?;
527
528 self.convert_records(search_response.hits.hits)
529 }
530
531 pub async fn get_dataset(&self, recid: u64) -> Result<Vec<SemanticVector>> {
541 let url = format!("{}/{}", self.base_url, recid);
542
543 sleep(self.rate_limit_delay).await;
544 let response = self.fetch_with_retry(&url).await?;
545 let record: CernRecord = response.json().await?;
546
547 self.convert_records(vec![record])
548 }
549
550 pub async fn search_by_experiment(&self, experiment: &str) -> Result<Vec<SemanticVector>> {
560 let url = format!(
561 "{}?experiment={}&size=50",
562 self.base_url,
563 urlencoding::encode(experiment)
564 );
565
566 sleep(self.rate_limit_delay).await;
567 let response = self.fetch_with_retry(&url).await?;
568 let search_response: CernSearchResponse = response.json().await?;
569
570 self.convert_records(search_response.hits.hits)
571 }
572
573 fn convert_records(&self, records: Vec<CernRecord>) -> Result<Vec<SemanticVector>> {
575 let mut vectors = Vec::new();
576
577 for record in records {
578 let title = record
579 .metadata
580 .titles
581 .first()
582 .map(|t| t.title.clone())
583 .unwrap_or_else(|| format!("Dataset {}", record.id));
584
585 let description = record
586 .metadata
587 .r#abstract
588 .as_ref()
589 .map(|a| a.description.clone())
590 .unwrap_or_default();
591
592 let experiment = record.metadata.experiment.unwrap_or_default();
593
594 let collision_energy = record
595 .metadata
596 .collision_information
597 .as_ref()
598 .map(|c| c.energy.clone())
599 .unwrap_or_default();
600
601 let collision_type = record
602 .metadata
603 .collision_information
604 .as_ref()
605 .map(|c| c.r#type.clone())
606 .unwrap_or_default();
607
608 let text = format!(
610 "{} {} {} {} {}",
611 title,
612 description,
613 experiment,
614 collision_energy,
615 collision_type
616 );
617 let embedding = self.embedder.embed_text(&text);
618
619 let mut metadata = HashMap::new();
620 metadata.insert("recid".to_string(), record.id.to_string());
621 metadata.insert("title".to_string(), title);
622 metadata.insert("experiment".to_string(), experiment);
623 metadata.insert("collision_energy".to_string(), collision_energy);
624 metadata.insert("collision_type".to_string(), collision_type);
625 metadata.insert("data_type".to_string(), record.metadata.r#type.primary);
626 metadata.insert("source".to_string(), "cern".to_string());
627
628 let date = record
629 .metadata
630 .date_created
631 .first()
632 .and_then(|d| NaiveDateTime::parse_from_str(d, "%Y-%m-%d %H:%M:%S").ok())
633 .or_else(|| {
634 record
635 .metadata
636 .date_created
637 .first()
638 .and_then(|d| NaiveDateTime::parse_from_str(d, "%Y").ok())
639 })
640 .map(|dt| dt.and_utc())
641 .unwrap_or_else(Utc::now);
642
643 vectors.push(SemanticVector {
644 id: format!("CERN:{}", record.id),
645 embedding,
646 domain: Domain::Physics,
647 timestamp: date,
648 metadata,
649 });
650 }
651
652 Ok(vectors)
653 }
654
655 async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
657 let mut retries = 0;
658 loop {
659 match self.client.get(url).send().await {
660 Ok(response) => {
661 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
662 retries += 1;
663 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
664 continue;
665 }
666 return Ok(response);
667 }
668 Err(_) if retries < MAX_RETRIES => {
669 retries += 1;
670 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
671 }
672 Err(e) => return Err(FrameworkError::Network(e)),
673 }
674 }
675 }
676}
677
678impl Default for CernOpenDataClient {
679 fn default() -> Self {
680 Self::new().expect("Failed to create CERN client")
681 }
682}
683
684#[derive(Debug, Deserialize)]
690struct ArgoProfile {
691 #[serde(default)]
692 platform_number: String,
693 #[serde(default)]
694 cycle_number: u32,
695 #[serde(default)]
696 latitude: f64,
697 #[serde(default)]
698 longitude: f64,
699 #[serde(default)]
700 juld: f64, #[serde(default)]
702 pres: Vec<f64>, #[serde(default)]
704 temp: Vec<f64>, #[serde(default)]
706 psal: Vec<f64>, }
708
709pub struct ArgoClient {
730 client: Client,
731 base_url: String,
732 rate_limit_delay: Duration,
733 embedder: Arc<SimpleEmbedder>,
734}
735
736impl ArgoClient {
737 pub fn new() -> Result<Self> {
739 let client = Client::builder()
740 .timeout(Duration::from_secs(30))
741 .build()
742 .map_err(FrameworkError::Network)?;
743
744 Ok(Self {
745 client,
746 base_url: "https://data-argo.ifremer.fr".to_string(),
748 rate_limit_delay: Duration::from_millis(ARGO_RATE_LIMIT_MS),
749 embedder: Arc::new(SimpleEmbedder::new(256)),
750 })
751 }
752
753 pub async fn get_recent_profiles(&self, _days: u32) -> Result<Vec<SemanticVector>> {
763 Ok(Vec::new())
769 }
770
771 pub async fn search_by_region(
784 &self,
785 _lat: f64,
786 _lon: f64,
787 _radius_km: f64,
788 ) -> Result<Vec<SemanticVector>> {
789 Ok(Vec::new())
792 }
793
794 pub async fn get_temperature_profiles(&self) -> Result<Vec<SemanticVector>> {
801 Ok(Vec::new())
804 }
805
806 pub fn create_sample_profiles(&self, count: usize) -> Result<Vec<SemanticVector>> {
810 let mut vectors = Vec::new();
811
812 for i in 0..count {
813 let lat = -60.0 + (120.0 * (i as f64 / count as f64));
814 let lon = -180.0 + (360.0 * ((i * 7) % count) as f64 / count as f64);
815 let temp = 5.0 + (15.0 * (lat.abs() / 90.0));
816 let salinity = 34.0 + (2.0 * (lat / 90.0));
817 let depth = 100.0 * (i % 20) as f64;
818
819 let text = format!(
820 "Ocean profile at lat {} lon {}: temp {}°C, salinity {}, depth {}m",
821 lat, lon, temp, salinity, depth
822 );
823 let embedding = self.embedder.embed_text(&text);
824
825 let mut metadata = HashMap::new();
826 metadata.insert("platform_number".to_string(), format!("{}", 1900000 + i));
827 metadata.insert("latitude".to_string(), lat.to_string());
828 metadata.insert("longitude".to_string(), lon.to_string());
829 metadata.insert("temperature".to_string(), temp.to_string());
830 metadata.insert("salinity".to_string(), salinity.to_string());
831 metadata.insert("depth_m".to_string(), depth.to_string());
832 metadata.insert("source".to_string(), "argo".to_string());
833
834 vectors.push(SemanticVector {
835 id: format!("ARGO:{}", 1900000 + i),
836 embedding,
837 domain: Domain::Ocean,
838 timestamp: Utc::now() - chrono::Duration::days(i as i64 % 30),
839 metadata,
840 });
841 }
842
843 Ok(vectors)
844 }
845}
846
847impl Default for ArgoClient {
848 fn default() -> Self {
849 Self::new().expect("Failed to create Argo client")
850 }
851}
852
853#[derive(Debug, Deserialize)]
859struct MaterialsProjectMaterial {
860 material_id: String,
861 #[serde(default)]
862 formula_pretty: String,
863 #[serde(default)]
864 band_gap: Option<f64>,
865 #[serde(default)]
866 density: Option<f64>,
867 #[serde(default)]
868 formation_energy_per_atom: Option<f64>,
869 #[serde(default)]
870 energy_per_atom: Option<f64>,
871 #[serde(default)]
872 volume: Option<f64>,
873 #[serde(default)]
874 nsites: Option<u32>,
875 #[serde(default)]
876 elements: Vec<String>,
877 #[serde(default)]
878 nelements: Option<u32>,
879 #[serde(default)]
880 crystal_system: Option<String>,
881 #[serde(default)]
882 symmetry: Option<MaterialsSymmetry>,
883}
884
885#[derive(Debug, Deserialize)]
886struct MaterialsSymmetry {
887 #[serde(default)]
888 crystal_system: String,
889 #[serde(default)]
890 symbol: String,
891}
892
893#[derive(Debug, Deserialize)]
895struct MaterialsProjectResponse {
896 #[serde(default)]
897 data: Vec<MaterialsProjectMaterial>,
898}
899
900pub struct MaterialsProjectClient {
920 client: Client,
921 base_url: String,
922 api_key: String,
923 rate_limit_delay: Duration,
924 embedder: Arc<SimpleEmbedder>,
925}
926
927impl MaterialsProjectClient {
928 pub fn new(api_key: String) -> Result<Self> {
933 let client = Client::builder()
934 .timeout(Duration::from_secs(30))
935 .build()
936 .map_err(FrameworkError::Network)?;
937
938 Ok(Self {
939 client,
940 base_url: "https://api.materialsproject.org".to_string(),
941 api_key,
942 rate_limit_delay: Duration::from_millis(MATERIALS_PROJECT_RATE_LIMIT_MS),
943 embedder: Arc::new(SimpleEmbedder::new(256)),
944 })
945 }
946
947 pub async fn search_materials(&self, formula: &str) -> Result<Vec<SemanticVector>> {
958 let url = format!(
959 "{}/materials/summary/?formula={}",
960 self.base_url,
961 urlencoding::encode(formula)
962 );
963
964 sleep(self.rate_limit_delay).await;
965 let response = self.client
966 .get(&url)
967 .header("X-API-KEY", &self.api_key)
968 .send()
969 .await?;
970
971 let mp_response: MaterialsProjectResponse = response.json().await?;
972 self.convert_materials(mp_response.data)
973 }
974
975 pub async fn get_material(&self, material_id: &str) -> Result<Vec<SemanticVector>> {
985 let url = format!(
986 "{}/materials/{}/",
987 self.base_url, material_id
988 );
989
990 sleep(self.rate_limit_delay).await;
991 let response = self.client
992 .get(&url)
993 .header("X-API-KEY", &self.api_key)
994 .send()
995 .await?;
996
997 let material: MaterialsProjectMaterial = response.json().await?;
998 self.convert_materials(vec![material])
999 }
1000
1001 pub async fn search_by_property(
1014 &self,
1015 property: &str,
1016 min: f64,
1017 max: f64,
1018 ) -> Result<Vec<SemanticVector>> {
1019 let url = format!(
1020 "{}/materials/summary/?{}_min={}&{}_max={}",
1021 self.base_url, property, min, property, max
1022 );
1023
1024 sleep(self.rate_limit_delay).await;
1025 let response = self.client
1026 .get(&url)
1027 .header("X-API-KEY", &self.api_key)
1028 .send()
1029 .await?;
1030
1031 let mp_response: MaterialsProjectResponse = response.json().await?;
1032 self.convert_materials(mp_response.data)
1033 }
1034
1035 fn convert_materials(&self, materials: Vec<MaterialsProjectMaterial>) -> Result<Vec<SemanticVector>> {
1037 let mut vectors = Vec::new();
1038
1039 for material in materials {
1040 let band_gap = material.band_gap.unwrap_or(0.0);
1041 let density = material.density.unwrap_or(0.0);
1042 let formation_energy = material.formation_energy_per_atom.unwrap_or(0.0);
1043 let crystal_system = material
1044 .crystal_system
1045 .or_else(|| material.symmetry.as_ref().map(|s| s.crystal_system.clone()))
1046 .unwrap_or_default();
1047
1048 let text = format!(
1050 "{} {} crystal system, band gap {} eV, density {} g/cm³, formation energy {} eV/atom",
1051 material.formula_pretty, crystal_system, band_gap, density, formation_energy
1052 );
1053 let embedding = self.embedder.embed_text(&text);
1054
1055 let mut metadata = HashMap::new();
1056 metadata.insert("material_id".to_string(), material.material_id.clone());
1057 metadata.insert("formula".to_string(), material.formula_pretty);
1058 metadata.insert("band_gap".to_string(), band_gap.to_string());
1059 metadata.insert("density".to_string(), density.to_string());
1060 metadata.insert("formation_energy".to_string(), formation_energy.to_string());
1061 metadata.insert("crystal_system".to_string(), crystal_system);
1062 metadata.insert("elements".to_string(), material.elements.join(","));
1063 metadata.insert("source".to_string(), "materials_project".to_string());
1064
1065 vectors.push(SemanticVector {
1066 id: format!("MP:{}", material.material_id),
1067 embedding,
1068 domain: Domain::Physics,
1069 timestamp: Utc::now(),
1070 metadata,
1071 });
1072 }
1073
1074 Ok(vectors)
1075 }
1076}
1077
1078#[cfg(test)]
1083mod tests {
1084 use super::*;
1085
1086 #[test]
1087 fn test_geo_utils_distance() {
1088 let dist = GeoUtils::distance_km(40.7128, -74.0060, 34.0522, -118.2437);
1090 assert!((dist - 3936.0).abs() < 100.0); }
1092
1093 #[test]
1094 fn test_geo_utils_within_radius() {
1095 let center_lat = 34.05;
1096 let center_lon = -118.25;
1097
1098 let nearby = GeoUtils::within_radius(center_lat, center_lon, 34.5, -118.25, 100.0);
1100 assert!(nearby);
1101
1102 let far = GeoUtils::within_radius(center_lat, center_lon, 40.7, -74.0, 10.0);
1104 assert!(!far);
1105 }
1106
1107 #[tokio::test]
1108 async fn test_usgs_client_creation() {
1109 let client = UsgsEarthquakeClient::new();
1110 assert!(client.is_ok());
1111 }
1112
1113 #[tokio::test]
1114 async fn test_cern_client_creation() {
1115 let client = CernOpenDataClient::new();
1116 assert!(client.is_ok());
1117 }
1118
1119 #[tokio::test]
1120 async fn test_argo_client_creation() {
1121 let client = ArgoClient::new();
1122 assert!(client.is_ok());
1123 }
1124
1125 #[tokio::test]
1126 async fn test_materials_project_client_creation() {
1127 let client = MaterialsProjectClient::new("test_key".to_string());
1128 assert!(client.is_ok());
1129 }
1130
1131 #[tokio::test]
1132 async fn test_argo_sample_profiles() {
1133 let client = ArgoClient::new().unwrap();
1134 let profiles = client.create_sample_profiles(10);
1135 assert!(profiles.is_ok());
1136 let vectors = profiles.unwrap();
1137 assert_eq!(vectors.len(), 10);
1138 assert_eq!(vectors[0].domain, Domain::Ocean);
1139 }
1140
1141 #[test]
1142 fn test_rate_limiting() {
1143 let usgs = UsgsEarthquakeClient::new().unwrap();
1144 assert_eq!(usgs.rate_limit_delay, Duration::from_millis(USGS_RATE_LIMIT_MS));
1145
1146 let cern = CernOpenDataClient::new().unwrap();
1147 assert_eq!(cern.rate_limit_delay, Duration::from_millis(CERN_RATE_LIMIT_MS));
1148
1149 let argo = ArgoClient::new().unwrap();
1150 assert_eq!(argo.rate_limit_delay, Duration::from_millis(ARGO_RATE_LIMIT_MS));
1151
1152 let mp = MaterialsProjectClient::new("test".to_string()).unwrap();
1153 assert_eq!(mp.rate_limit_delay, Duration::from_millis(MATERIALS_PROJECT_RATE_LIMIT_MS));
1154 }
1155}