1use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::Duration;
14
15use chrono::{NaiveDate, Utc};
16use reqwest::{Client, StatusCode};
17use serde::Deserialize;
18use tokio::time::sleep;
19
20use crate::api_clients::SimpleEmbedder;
21use crate::ruvector_native::{Domain, SemanticVector};
22use crate::{FrameworkError, Result};
23
24const NASA_RATE_LIMIT_MS: u64 = 100; const SPACEX_RATE_LIMIT_MS: u64 = 100; const ASTRONOMY_RATE_LIMIT_MS: u64 = 200; const MAX_RETRIES: u32 = 3;
29const RETRY_DELAY_MS: u64 = 1000;
30
31#[derive(Debug, Deserialize)]
37struct ApodResponse {
38 #[serde(default)]
39 date: String,
40 #[serde(default)]
41 title: String,
42 #[serde(default)]
43 explanation: String,
44 #[serde(default)]
45 url: String,
46 #[serde(default)]
47 media_type: String,
48 #[serde(default)]
49 copyright: String,
50}
51
52#[derive(Debug, Deserialize)]
54struct NeoResponse {
55 #[serde(default)]
56 near_earth_objects: HashMap<String, Vec<NeoObject>>,
57}
58
59#[derive(Debug, Deserialize)]
60struct NeoObject {
61 id: String,
62 name: String,
63 #[serde(default)]
64 nasa_jpl_url: String,
65 #[serde(default)]
66 absolute_magnitude_h: f64,
67 #[serde(default)]
68 is_potentially_hazardous_asteroid: bool,
69 #[serde(default)]
70 close_approach_data: Vec<CloseApproachData>,
71}
72
73#[derive(Debug, Deserialize)]
74struct CloseApproachData {
75 #[serde(default)]
76 close_approach_date: String,
77 #[serde(default)]
78 relative_velocity: HashMap<String, String>,
79 #[serde(default)]
80 miss_distance: HashMap<String, String>,
81}
82
83#[derive(Debug, Deserialize)]
85struct MarsPhotosResponse {
86 #[serde(default)]
87 photos: Vec<MarsPhoto>,
88}
89
90#[derive(Debug, Deserialize)]
91struct MarsPhoto {
92 id: u64,
93 #[serde(default)]
94 sol: u32,
95 #[serde(default)]
96 img_src: String,
97 #[serde(default)]
98 earth_date: String,
99 #[serde(default)]
100 camera: MarsCamera,
101 #[serde(default)]
102 rover: MarsRover,
103}
104
105#[derive(Debug, Deserialize, Default)]
106struct MarsCamera {
107 #[serde(default)]
108 name: String,
109 #[serde(default)]
110 full_name: String,
111}
112
113#[derive(Debug, Deserialize, Default)]
114struct MarsRover {
115 #[serde(default)]
116 name: String,
117 #[serde(default)]
118 status: String,
119}
120
121#[derive(Debug, Deserialize)]
123struct DonkiEvent {
124 #[serde(default)]
125 #[serde(rename = "activityID")]
126 activity_id: String,
127 #[serde(default)]
128 #[serde(rename = "startTime")]
129 start_time: String,
130 #[serde(default)]
131 #[serde(rename = "classType")]
132 class_type: String,
133 #[serde(default)]
134 #[serde(rename = "sourceLocation")]
135 source_location: String,
136 #[serde(default)]
137 note: String,
138}
139
140pub struct NasaClient {
158 client: Client,
159 base_url: String,
160 api_key: String,
161 rate_limit_delay: Duration,
162 embedder: Arc<SimpleEmbedder>,
163}
164
165impl NasaClient {
166 pub fn new(api_key: Option<String>) -> Result<Self> {
172 let client = Client::builder()
173 .timeout(Duration::from_secs(30))
174 .user_agent("ruvector-data-framework/1.0")
175 .build()
176 .map_err(FrameworkError::Network)?;
177
178 Ok(Self {
179 client,
180 base_url: "https://api.nasa.gov".to_string(),
181 api_key: api_key.unwrap_or_else(|| "DEMO_KEY".to_string()),
182 rate_limit_delay: Duration::from_millis(NASA_RATE_LIMIT_MS),
183 embedder: Arc::new(SimpleEmbedder::new(384)),
184 })
185 }
186
187 pub async fn get_apod(&self, date: Option<&str>) -> Result<Vec<SemanticVector>> {
198 let mut url = format!("{}/planetary/apod?api_key={}", self.base_url, self.api_key);
199
200 if let Some(d) = date {
201 url.push_str(&format!("&date={}", d));
202 }
203
204 sleep(self.rate_limit_delay).await;
205 let response = self.fetch_with_retry(&url).await?;
206 let apod: ApodResponse = response.json().await?;
207
208 let text = format!(
210 "Astronomy Picture of the Day {}: {} - {}",
211 apod.date, apod.title, apod.explanation
212 );
213 let embedding = self.embedder.embed_text(&text);
214
215 let timestamp = NaiveDate::parse_from_str(&apod.date, "%Y-%m-%d")
217 .ok()
218 .and_then(|d| d.and_hms_opt(0, 0, 0))
219 .map(|dt| dt.and_utc())
220 .unwrap_or_else(Utc::now);
221
222 let mut metadata = HashMap::new();
223 metadata.insert("title".to_string(), apod.title);
224 metadata.insert("date".to_string(), apod.date.clone());
225 metadata.insert("media_type".to_string(), apod.media_type);
226 metadata.insert("url".to_string(), apod.url);
227 metadata.insert("copyright".to_string(), apod.copyright);
228 metadata.insert("source".to_string(), "nasa_apod".to_string());
229
230 Ok(vec![SemanticVector {
231 id: format!("NASA:APOD:{}", apod.date),
232 embedding,
233 domain: Domain::Space,
234 timestamp,
235 metadata,
236 }])
237 }
238
239 pub async fn search_neo(&self, start_date: &str, end_date: &str) -> Result<Vec<SemanticVector>> {
250 let url = format!(
251 "{}/neo/rest/v1/feed?start_date={}&end_date={}&api_key={}",
252 self.base_url, start_date, end_date, self.api_key
253 );
254
255 sleep(self.rate_limit_delay).await;
256 let response = self.fetch_with_retry(&url).await?;
257 let neo_response: NeoResponse = response.json().await?;
258
259 let mut vectors = Vec::new();
260
261 for (date, objects) in neo_response.near_earth_objects {
262 for obj in objects {
263 let approach = obj.close_approach_data.first();
265 let velocity = approach
266 .and_then(|a| a.relative_velocity.get("kilometers_per_hour"))
267 .map(|v| v.as_str())
268 .unwrap_or("unknown");
269 let miss_distance = approach
270 .and_then(|a| a.miss_distance.get("kilometers"))
271 .map(|d| d.as_str())
272 .unwrap_or("unknown");
273
274 let text = format!(
276 "Near Earth Object {}: magnitude {:.2}, potentially hazardous: {}, velocity {} km/h, miss distance {} km",
277 obj.name,
278 obj.absolute_magnitude_h,
279 obj.is_potentially_hazardous_asteroid,
280 velocity,
281 miss_distance
282 );
283 let embedding = self.embedder.embed_text(&text);
284
285 let timestamp = NaiveDate::parse_from_str(&date, "%Y-%m-%d")
287 .ok()
288 .and_then(|d| d.and_hms_opt(0, 0, 0))
289 .map(|dt| dt.and_utc())
290 .unwrap_or_else(Utc::now);
291
292 let mut metadata = HashMap::new();
293 metadata.insert("neo_id".to_string(), obj.id.clone());
294 metadata.insert("name".to_string(), obj.name.clone());
295 metadata.insert("date".to_string(), date.clone());
296 metadata.insert("magnitude".to_string(), obj.absolute_magnitude_h.to_string());
297 metadata.insert("hazardous".to_string(), obj.is_potentially_hazardous_asteroid.to_string());
298 metadata.insert("velocity_kph".to_string(), velocity.to_string());
299 metadata.insert("miss_distance_km".to_string(), miss_distance.to_string());
300 metadata.insert("source".to_string(), "nasa_neo".to_string());
301
302 vectors.push(SemanticVector {
303 id: format!("NASA:NEO:{}:{}", obj.id, date),
304 embedding,
305 domain: Domain::Space,
306 timestamp,
307 metadata,
308 });
309 }
310 }
311
312 Ok(vectors)
313 }
314
315 pub async fn get_mars_weather(&self) -> Result<Vec<SemanticVector>> {
322 let url = format!("{}/insight_weather/?api_key={}&feedtype=json&ver=1.0",
324 self.base_url, self.api_key);
325
326 sleep(self.rate_limit_delay).await;
327 let response = self.fetch_with_retry(&url).await?;
328
329 Ok(Vec::new())
332 }
333
334 pub async fn search_mars_photos(&self, sol: u32, camera: Option<&str>) -> Result<Vec<SemanticVector>> {
346 let mut url = format!(
347 "{}/mars-photos/api/v1/rovers/curiosity/photos?sol={}&api_key={}",
348 self.base_url, sol, self.api_key
349 );
350
351 if let Some(cam) = camera {
352 url.push_str(&format!("&camera={}", cam));
353 }
354
355 sleep(self.rate_limit_delay).await;
356 let response = self.fetch_with_retry(&url).await?;
357 let photos_response: MarsPhotosResponse = response.json().await?;
358
359 let mut vectors = Vec::new();
360
361 for photo in photos_response.photos.iter().take(50) {
362 let text = format!(
364 "Mars rover {} photo from {} camera on sol {} ({})",
365 photo.rover.name, photo.camera.full_name, photo.sol, photo.earth_date
366 );
367 let embedding = self.embedder.embed_text(&text);
368
369 let timestamp = NaiveDate::parse_from_str(&photo.earth_date, "%Y-%m-%d")
371 .ok()
372 .and_then(|d| d.and_hms_opt(0, 0, 0))
373 .map(|dt| dt.and_utc())
374 .unwrap_or_else(Utc::now);
375
376 let mut metadata = HashMap::new();
377 metadata.insert("photo_id".to_string(), photo.id.to_string());
378 metadata.insert("sol".to_string(), photo.sol.to_string());
379 metadata.insert("camera".to_string(), photo.camera.name.clone());
380 metadata.insert("camera_full_name".to_string(), photo.camera.full_name.clone());
381 metadata.insert("rover".to_string(), photo.rover.name.clone());
382 metadata.insert("rover_status".to_string(), photo.rover.status.clone());
383 metadata.insert("earth_date".to_string(), photo.earth_date.clone());
384 metadata.insert("img_src".to_string(), photo.img_src.clone());
385 metadata.insert("source".to_string(), "nasa_mars_photos".to_string());
386
387 vectors.push(SemanticVector {
388 id: format!("NASA:MARS:{}:{}", photo.id, photo.sol),
389 embedding,
390 domain: Domain::Space,
391 timestamp,
392 metadata,
393 });
394 }
395
396 Ok(vectors)
397 }
398
399 pub async fn get_donki_events(
412 &self,
413 event_type: &str,
414 start_date: &str,
415 end_date: &str,
416 ) -> Result<Vec<SemanticVector>> {
417 let url = format!(
418 "{}/DONKI/{}?startDate={}&endDate={}&api_key={}",
419 self.base_url, event_type, start_date, end_date, self.api_key
420 );
421
422 sleep(self.rate_limit_delay).await;
423 let response = self.fetch_with_retry(&url).await?;
424 let events: Vec<DonkiEvent> = response.json().await?;
425
426 let mut vectors = Vec::new();
427
428 for event in events {
429 let text = format!(
431 "Space weather event {}: {} at {} - {}",
432 event_type, event.activity_id, event.source_location, event.note
433 );
434 let embedding = self.embedder.embed_text(&text);
435
436 let timestamp = chrono::DateTime::parse_from_rfc3339(&event.start_time)
438 .map(|dt| dt.with_timezone(&Utc))
439 .unwrap_or_else(|_| Utc::now());
440
441 let mut metadata = HashMap::new();
442 metadata.insert("activity_id".to_string(), event.activity_id.clone());
443 metadata.insert("event_type".to_string(), event_type.to_string());
444 metadata.insert("start_time".to_string(), event.start_time.clone());
445 metadata.insert("class_type".to_string(), event.class_type);
446 metadata.insert("source_location".to_string(), event.source_location);
447 metadata.insert("note".to_string(), event.note);
448 metadata.insert("source".to_string(), "nasa_donki".to_string());
449
450 vectors.push(SemanticVector {
451 id: format!("NASA:DONKI:{}:{}", event_type, event.activity_id),
452 embedding,
453 domain: Domain::Space,
454 timestamp,
455 metadata,
456 });
457 }
458
459 Ok(vectors)
460 }
461
462 async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
464 let mut retries = 0;
465 loop {
466 match self.client.get(url).send().await {
467 Ok(response) => {
468 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
469 retries += 1;
470 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
471 continue;
472 }
473 return Ok(response);
474 }
475 Err(_) if retries < MAX_RETRIES => {
476 retries += 1;
477 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
478 }
479 Err(e) => return Err(FrameworkError::Network(e)),
480 }
481 }
482 }
483}
484
485#[derive(Debug, Deserialize)]
491struct ExoplanetData {
492 #[serde(default)]
493 pl_name: String,
494 #[serde(default)]
495 hostname: String,
496 #[serde(default)]
497 discoverymethod: String,
498 #[serde(default)]
499 disc_year: Option<i32>,
500 #[serde(default)]
501 pl_orbper: Option<f64>, #[serde(default)]
503 pl_rade: Option<f64>, #[serde(default)]
505 pl_masse: Option<f64>, #[serde(default)]
507 pl_eqt: Option<f64>, #[serde(default)]
509 sy_dist: Option<f64>, }
511
512pub struct ExoplanetClient {
529 client: Client,
530 base_url: String,
531 rate_limit_delay: Duration,
532 embedder: Arc<SimpleEmbedder>,
533}
534
535impl ExoplanetClient {
536 pub fn new() -> Result<Self> {
538 let client = Client::builder()
539 .timeout(Duration::from_secs(30))
540 .user_agent("ruvector-data-framework/1.0")
541 .build()
542 .map_err(FrameworkError::Network)?;
543
544 Ok(Self {
545 client,
546 base_url: "https://exoplanetarchive.ipac.caltech.edu/TAP/sync".to_string(),
547 rate_limit_delay: Duration::from_millis(NASA_RATE_LIMIT_MS),
548 embedder: Arc::new(SimpleEmbedder::new(384)),
549 })
550 }
551
552 pub async fn search_exoplanets(&self, query: Option<&str>) -> Result<Vec<SemanticVector>> {
563 let base_query = "SELECT pl_name,hostname,discoverymethod,disc_year,pl_orbper,pl_rade,pl_masse,pl_eqt,sy_dist FROM ps";
564 let full_query = if let Some(q) = query {
565 format!("{} WHERE {}", base_query, q)
566 } else {
567 base_query.to_string()
568 };
569
570 let url = format!(
571 "{}?query={}&format=json",
572 self.base_url,
573 urlencoding::encode(&full_query)
574 );
575
576 sleep(self.rate_limit_delay).await;
577 let response = self.fetch_with_retry(&url).await?;
578 let exoplanets: Vec<ExoplanetData> = response.json().await?;
579
580 let mut vectors = Vec::new();
581
582 for (idx, planet) in exoplanets.iter().take(100).enumerate() {
583 let text = format!(
585 "Exoplanet {} orbiting {}, discovered via {} in {:?}, radius {:.2}R⊕, mass {:.2}M⊕, temp {:.0}K",
586 planet.pl_name,
587 planet.hostname,
588 planet.discoverymethod,
589 planet.disc_year,
590 planet.pl_rade.unwrap_or(0.0),
591 planet.pl_masse.unwrap_or(0.0),
592 planet.pl_eqt.unwrap_or(0.0)
593 );
594 let embedding = self.embedder.embed_text(&text);
595
596 let timestamp = planet.disc_year
598 .and_then(|y| NaiveDate::from_ymd_opt(y, 1, 1))
599 .and_then(|d| d.and_hms_opt(0, 0, 0))
600 .map(|dt| dt.and_utc())
601 .unwrap_or_else(Utc::now);
602
603 let mut metadata = HashMap::new();
604 metadata.insert("planet_name".to_string(), planet.pl_name.clone());
605 metadata.insert("host_star".to_string(), planet.hostname.clone());
606 metadata.insert("discovery_method".to_string(), planet.discoverymethod.clone());
607 metadata.insert("discovery_year".to_string(), planet.disc_year.map(|y| y.to_string()).unwrap_or_default());
608 metadata.insert("orbital_period_days".to_string(), planet.pl_orbper.map(|p| p.to_string()).unwrap_or_default());
609 metadata.insert("radius_earth".to_string(), planet.pl_rade.map(|r| r.to_string()).unwrap_or_default());
610 metadata.insert("mass_earth".to_string(), planet.pl_masse.map(|m| m.to_string()).unwrap_or_default());
611 metadata.insert("temperature_k".to_string(), planet.pl_eqt.map(|t| t.to_string()).unwrap_or_default());
612 metadata.insert("distance_parsecs".to_string(), planet.sy_dist.map(|d| d.to_string()).unwrap_or_default());
613 metadata.insert("source".to_string(), "nasa_exoplanet_archive".to_string());
614
615 vectors.push(SemanticVector {
616 id: format!("EXOPLANET:{}:{}", planet.pl_name, idx),
617 embedding,
618 domain: Domain::Space,
619 timestamp,
620 metadata,
621 });
622 }
623
624 Ok(vectors)
625 }
626
627 pub async fn get_habitable_zone(&self) -> Result<Vec<SemanticVector>> {
634 self.search_exoplanets(Some("pl_eqt>200 and pl_eqt<350")).await
636 }
637
638 pub async fn get_by_discovery_method(&self, method: &str) -> Result<Vec<SemanticVector>> {
649 let query = format!("discoverymethod='{}'", method);
650 self.search_exoplanets(Some(&query)).await
651 }
652
653 async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
655 let mut retries = 0;
656 loop {
657 match self.client.get(url).send().await {
658 Ok(response) => {
659 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
660 retries += 1;
661 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
662 continue;
663 }
664 return Ok(response);
665 }
666 Err(_) if retries < MAX_RETRIES => {
667 retries += 1;
668 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
669 }
670 Err(e) => return Err(FrameworkError::Network(e)),
671 }
672 }
673 }
674}
675
676impl Default for ExoplanetClient {
677 fn default() -> Self {
678 Self::new().expect("Failed to create ExoplanetClient")
679 }
680}
681
682#[derive(Debug, Deserialize)]
688struct SpaceXLaunch {
689 id: String,
690 #[serde(default)]
691 name: String,
692 #[serde(default)]
693 date_utc: String,
694 #[serde(default)]
695 success: Option<bool>,
696 #[serde(default)]
697 details: Option<String>,
698 #[serde(default)]
699 flight_number: u32,
700 #[serde(default)]
701 rocket: String,
702 #[serde(default)]
703 launchpad: String,
704}
705
706#[derive(Debug, Deserialize)]
708struct SpaceXRocket {
709 id: String,
710 #[serde(default)]
711 name: String,
712 #[serde(default)]
713 description: String,
714 #[serde(default)]
715 height: Option<SpaceXDimension>,
716 #[serde(default)]
717 mass: Option<SpaceXMass>,
718 #[serde(default)]
719 first_flight: String,
720 #[serde(default)]
721 success_rate_pct: Option<f64>,
722}
723
724#[derive(Debug, Deserialize)]
725struct SpaceXDimension {
726 meters: Option<f64>,
727}
728
729#[derive(Debug, Deserialize)]
730struct SpaceXMass {
731 kg: Option<f64>,
732}
733
734#[derive(Debug, Deserialize)]
736struct StarlinkSatellite {
737 #[serde(default)]
738 id: String,
739 #[serde(default)]
740 version: String,
741 #[serde(default)]
742 launch: String,
743 #[serde(default)]
744 longitude: Option<f64>,
745 #[serde(default)]
746 latitude: Option<f64>,
747 #[serde(default)]
748 height_km: Option<f64>,
749}
750
751pub struct SpaceXClient {
769 client: Client,
770 base_url: String,
771 rate_limit_delay: Duration,
772 embedder: Arc<SimpleEmbedder>,
773}
774
775impl SpaceXClient {
776 pub fn new() -> Result<Self> {
778 let client = Client::builder()
779 .timeout(Duration::from_secs(30))
780 .user_agent("ruvector-data-framework/1.0")
781 .build()
782 .map_err(FrameworkError::Network)?;
783
784 Ok(Self {
785 client,
786 base_url: "https://api.spacexdata.com/v4".to_string(),
787 rate_limit_delay: Duration::from_millis(SPACEX_RATE_LIMIT_MS),
788 embedder: Arc::new(SimpleEmbedder::new(384)),
789 })
790 }
791
792 pub async fn get_launches(&self, limit: Option<usize>) -> Result<Vec<SemanticVector>> {
802 let url = format!("{}/launches", self.base_url);
803
804 sleep(self.rate_limit_delay).await;
805 let response = self.fetch_with_retry(&url).await?;
806 let launches: Vec<SpaceXLaunch> = response.json().await?;
807
808 let mut vectors = Vec::new();
809 let launches_to_process = if let Some(lim) = limit {
810 &launches[..launches.len().min(lim)]
811 } else {
812 &launches
813 };
814
815 for launch in launches_to_process {
816 let success_str = match launch.success {
818 Some(true) => "successful",
819 Some(false) => "failed",
820 None => "pending",
821 };
822 let details = launch.details.as_deref().unwrap_or("No details");
823
824 let text = format!(
825 "SpaceX launch {} (flight #{}): {} - {}",
826 launch.name, launch.flight_number, success_str, details
827 );
828 let embedding = self.embedder.embed_text(&text);
829
830 let timestamp = chrono::DateTime::parse_from_rfc3339(&launch.date_utc)
832 .map(|dt| dt.with_timezone(&Utc))
833 .unwrap_or_else(|_| Utc::now());
834
835 let mut metadata = HashMap::new();
836 metadata.insert("launch_id".to_string(), launch.id.clone());
837 metadata.insert("name".to_string(), launch.name.clone());
838 metadata.insert("flight_number".to_string(), launch.flight_number.to_string());
839 metadata.insert("date".to_string(), launch.date_utc.clone());
840 metadata.insert("success".to_string(), launch.success.map(|s| s.to_string()).unwrap_or_default());
841 metadata.insert("rocket_id".to_string(), launch.rocket.clone());
842 metadata.insert("launchpad".to_string(), launch.launchpad.clone());
843 metadata.insert("source".to_string(), "spacex_launches".to_string());
844
845 vectors.push(SemanticVector {
846 id: format!("SPACEX:LAUNCH:{}", launch.id),
847 embedding,
848 domain: Domain::Space,
849 timestamp,
850 metadata,
851 });
852 }
853
854 Ok(vectors)
855 }
856
857 pub async fn get_upcoming_launches(&self) -> Result<Vec<SemanticVector>> {
864 let url = format!("{}/launches/upcoming", self.base_url);
865
866 sleep(self.rate_limit_delay).await;
867 let response = self.fetch_with_retry(&url).await?;
868 let launches: Vec<SpaceXLaunch> = response.json().await?;
869
870 let mut vectors = Vec::new();
871
872 for launch in launches.iter().take(20) {
873 let details = launch.details.as_deref().unwrap_or("No details");
874
875 let text = format!(
876 "Upcoming SpaceX launch {} (flight #{}): {}",
877 launch.name, launch.flight_number, details
878 );
879 let embedding = self.embedder.embed_text(&text);
880
881 let timestamp = chrono::DateTime::parse_from_rfc3339(&launch.date_utc)
882 .map(|dt| dt.with_timezone(&Utc))
883 .unwrap_or_else(|_| Utc::now());
884
885 let mut metadata = HashMap::new();
886 metadata.insert("launch_id".to_string(), launch.id.clone());
887 metadata.insert("name".to_string(), launch.name.clone());
888 metadata.insert("flight_number".to_string(), launch.flight_number.to_string());
889 metadata.insert("date".to_string(), launch.date_utc.clone());
890 metadata.insert("rocket_id".to_string(), launch.rocket.clone());
891 metadata.insert("status".to_string(), "upcoming".to_string());
892 metadata.insert("source".to_string(), "spacex_upcoming".to_string());
893
894 vectors.push(SemanticVector {
895 id: format!("SPACEX:UPCOMING:{}", launch.id),
896 embedding,
897 domain: Domain::Space,
898 timestamp,
899 metadata,
900 });
901 }
902
903 Ok(vectors)
904 }
905
906 pub async fn get_rockets(&self) -> Result<Vec<SemanticVector>> {
913 let url = format!("{}/rockets", self.base_url);
914
915 sleep(self.rate_limit_delay).await;
916 let response = self.fetch_with_retry(&url).await?;
917 let rockets: Vec<SpaceXRocket> = response.json().await?;
918
919 let mut vectors = Vec::new();
920
921 for rocket in rockets {
922 let height = rocket.height
923 .as_ref()
924 .and_then(|h| h.meters)
925 .unwrap_or(0.0);
926 let mass = rocket.mass
927 .as_ref()
928 .and_then(|m| m.kg)
929 .unwrap_or(0.0);
930 let success_rate = rocket.success_rate_pct.unwrap_or(0.0);
931
932 let text = format!(
933 "SpaceX rocket {}: {} - height {:.1}m, mass {:.0}kg, {:.1}% success rate, first flight {}",
934 rocket.name, rocket.description, height, mass, success_rate, rocket.first_flight
935 );
936 let embedding = self.embedder.embed_text(&text);
937
938 let timestamp = NaiveDate::parse_from_str(&rocket.first_flight, "%Y-%m-%d")
940 .ok()
941 .and_then(|d| d.and_hms_opt(0, 0, 0))
942 .map(|dt| dt.and_utc())
943 .unwrap_or_else(Utc::now);
944
945 let mut metadata = HashMap::new();
946 metadata.insert("rocket_id".to_string(), rocket.id.clone());
947 metadata.insert("name".to_string(), rocket.name.clone());
948 metadata.insert("description".to_string(), rocket.description);
949 metadata.insert("height_meters".to_string(), height.to_string());
950 metadata.insert("mass_kg".to_string(), mass.to_string());
951 metadata.insert("first_flight".to_string(), rocket.first_flight);
952 metadata.insert("success_rate_pct".to_string(), success_rate.to_string());
953 metadata.insert("source".to_string(), "spacex_rockets".to_string());
954
955 vectors.push(SemanticVector {
956 id: format!("SPACEX:ROCKET:{}", rocket.id),
957 embedding,
958 domain: Domain::Space,
959 timestamp,
960 metadata,
961 });
962 }
963
964 Ok(vectors)
965 }
966
967 pub async fn get_starlink_satellites(&self) -> Result<Vec<SemanticVector>> {
974 let url = format!("{}/starlink", self.base_url);
975
976 sleep(self.rate_limit_delay).await;
977 let response = self.fetch_with_retry(&url).await?;
978 let satellites: Vec<StarlinkSatellite> = response.json().await?;
979
980 let mut vectors = Vec::new();
981
982 for satellite in satellites.iter().take(100) {
984 let lon = satellite.longitude.unwrap_or(0.0);
985 let lat = satellite.latitude.unwrap_or(0.0);
986 let height = satellite.height_km.unwrap_or(0.0);
987
988 let text = format!(
989 "Starlink satellite {} version {}, orbit: {:.2}°N, {:.2}°E at {:.0}km",
990 satellite.id, satellite.version, lat, lon, height
991 );
992 let embedding = self.embedder.embed_text(&text);
993
994 let mut metadata = HashMap::new();
995 metadata.insert("satellite_id".to_string(), satellite.id.clone());
996 metadata.insert("version".to_string(), satellite.version.clone());
997 metadata.insert("launch".to_string(), satellite.launch.clone());
998 metadata.insert("longitude".to_string(), lon.to_string());
999 metadata.insert("latitude".to_string(), lat.to_string());
1000 metadata.insert("height_km".to_string(), height.to_string());
1001 metadata.insert("source".to_string(), "spacex_starlink".to_string());
1002
1003 vectors.push(SemanticVector {
1004 id: format!("SPACEX:STARLINK:{}", satellite.id),
1005 embedding,
1006 domain: Domain::Space,
1007 timestamp: Utc::now(),
1008 metadata,
1009 });
1010 }
1011
1012 Ok(vectors)
1013 }
1014
1015 async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
1017 let mut retries = 0;
1018 loop {
1019 match self.client.get(url).send().await {
1020 Ok(response) => {
1021 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
1022 retries += 1;
1023 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1024 continue;
1025 }
1026 return Ok(response);
1027 }
1028 Err(_) if retries < MAX_RETRIES => {
1029 retries += 1;
1030 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1031 }
1032 Err(e) => return Err(FrameworkError::Network(e)),
1033 }
1034 }
1035 }
1036}
1037
1038impl Default for SpaceXClient {
1039 fn default() -> Self {
1040 Self::new().expect("Failed to create SpaceXClient")
1041 }
1042}
1043
1044#[derive(Debug, Deserialize)]
1050struct SupernovaData {
1051 #[serde(default)]
1052 name: String,
1053 #[serde(default)]
1054 ra: Option<String>,
1055 #[serde(default)]
1056 dec: Option<String>,
1057 #[serde(default)]
1058 discoveryear: Option<String>,
1059 #[serde(default)]
1060 claimedtype: Option<String>,
1061 #[serde(default)]
1062 redshift: Option<String>,
1063 #[serde(default)]
1064 maxappmag: Option<String>,
1065}
1066
1067pub struct AstronomyClient {
1081 client: Client,
1082 base_url: String,
1083 rate_limit_delay: Duration,
1084 embedder: Arc<SimpleEmbedder>,
1085}
1086
1087impl AstronomyClient {
1088 pub fn new() -> Result<Self> {
1090 let client = Client::builder()
1091 .timeout(Duration::from_secs(30))
1092 .user_agent("ruvector-data-framework/1.0")
1093 .build()
1094 .map_err(FrameworkError::Network)?;
1095
1096 Ok(Self {
1097 client,
1098 base_url: "https://api.astrocats.space".to_string(),
1099 rate_limit_delay: Duration::from_millis(ASTRONOMY_RATE_LIMIT_MS),
1100 embedder: Arc::new(SimpleEmbedder::new(384)),
1101 })
1102 }
1103
1104 pub async fn search_supernovae(&self, limit: Option<usize>) -> Result<Vec<SemanticVector>> {
1114 let url = format!("{}/catalog", self.base_url);
1115
1116 sleep(self.rate_limit_delay).await;
1117 let response = self.fetch_with_retry(&url).await?;
1118
1119 let text = response.text().await?;
1122 let data: HashMap<String, SupernovaData> = serde_json::from_str(&text)
1123 .unwrap_or_default();
1124
1125 let mut vectors = Vec::new();
1126 let take_count = limit.unwrap_or(50);
1127
1128 for (id, sn) in data.iter().take(take_count) {
1129 let sn_type = sn.claimedtype.as_deref().unwrap_or("unknown");
1130 let year = sn.discoveryear.as_deref().unwrap_or("unknown");
1131 let redshift = sn.redshift.as_deref().unwrap_or("unknown");
1132
1133 let text = format!(
1134 "Supernova {} (type {}), discovered {}, redshift {}, coords: {} {}",
1135 sn.name,
1136 sn_type,
1137 year,
1138 redshift,
1139 sn.ra.as_deref().unwrap_or("unknown"),
1140 sn.dec.as_deref().unwrap_or("unknown")
1141 );
1142 let embedding = self.embedder.embed_text(&text);
1143
1144 let timestamp = sn.discoveryear
1146 .as_ref()
1147 .and_then(|y| y.parse::<i32>().ok())
1148 .and_then(|y| NaiveDate::from_ymd_opt(y, 1, 1))
1149 .and_then(|d| d.and_hms_opt(0, 0, 0))
1150 .map(|dt| dt.and_utc())
1151 .unwrap_or_else(Utc::now);
1152
1153 let mut metadata = HashMap::new();
1154 metadata.insert("name".to_string(), sn.name.clone());
1155 metadata.insert("type".to_string(), sn_type.to_string());
1156 metadata.insert("discovery_year".to_string(), year.to_string());
1157 metadata.insert("ra".to_string(), sn.ra.clone().unwrap_or_default());
1158 metadata.insert("dec".to_string(), sn.dec.clone().unwrap_or_default());
1159 metadata.insert("redshift".to_string(), redshift.to_string());
1160 metadata.insert("max_magnitude".to_string(), sn.maxappmag.clone().unwrap_or_default());
1161 metadata.insert("source".to_string(), "open_supernova_catalog".to_string());
1162
1163 vectors.push(SemanticVector {
1164 id: format!("SUPERNOVA:{}", id),
1165 embedding,
1166 domain: Domain::Space,
1167 timestamp,
1168 metadata,
1169 });
1170 }
1171
1172 Ok(vectors)
1173 }
1174
1175 pub async fn search_transients(&self) -> Result<Vec<SemanticVector>> {
1182 Ok(Vec::new())
1185 }
1186
1187 async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
1189 let mut retries = 0;
1190 loop {
1191 match self.client.get(url).send().await {
1192 Ok(response) => {
1193 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
1194 retries += 1;
1195 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1196 continue;
1197 }
1198 return Ok(response);
1199 }
1200 Err(_) if retries < MAX_RETRIES => {
1201 retries += 1;
1202 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1203 }
1204 Err(e) => return Err(FrameworkError::Network(e)),
1205 }
1206 }
1207 }
1208}
1209
1210impl Default for AstronomyClient {
1211 fn default() -> Self {
1212 Self::new().expect("Failed to create AstronomyClient")
1213 }
1214}
1215
1216#[cfg(test)]
1221mod tests {
1222 use super::*;
1223
1224 #[tokio::test]
1225 async fn test_nasa_client_creation() {
1226 let client = NasaClient::new(None);
1227 assert!(client.is_ok());
1228 }
1229
1230 #[tokio::test]
1231 async fn test_nasa_client_with_key() {
1232 let client = NasaClient::new(Some("test_key".to_string()));
1233 assert!(client.is_ok());
1234 }
1235
1236 #[tokio::test]
1237 async fn test_exoplanet_client_creation() {
1238 let client = ExoplanetClient::new();
1239 assert!(client.is_ok());
1240 }
1241
1242 #[tokio::test]
1243 async fn test_spacex_client_creation() {
1244 let client = SpaceXClient::new();
1245 assert!(client.is_ok());
1246 }
1247
1248 #[tokio::test]
1249 async fn test_astronomy_client_creation() {
1250 let client = AstronomyClient::new();
1251 assert!(client.is_ok());
1252 }
1253
1254 #[test]
1255 fn test_rate_limiting() {
1256 let nasa = NasaClient::new(None).unwrap();
1257 assert_eq!(nasa.rate_limit_delay, Duration::from_millis(NASA_RATE_LIMIT_MS));
1258
1259 let exoplanet = ExoplanetClient::new().unwrap();
1260 assert_eq!(exoplanet.rate_limit_delay, Duration::from_millis(NASA_RATE_LIMIT_MS));
1261
1262 let spacex = SpaceXClient::new().unwrap();
1263 assert_eq!(spacex.rate_limit_delay, Duration::from_millis(SPACEX_RATE_LIMIT_MS));
1264
1265 let astronomy = AstronomyClient::new().unwrap();
1266 assert_eq!(astronomy.rate_limit_delay, Duration::from_millis(ASTRONOMY_RATE_LIMIT_MS));
1267 }
1268
1269 #[test]
1270 fn test_domain_is_space() {
1271 let embedder = SimpleEmbedder::new(384);
1272 let embedding = embedder.embed_text("test");
1273
1274 let vector = SemanticVector {
1275 id: "test".to_string(),
1276 embedding,
1277 domain: Domain::Space,
1278 timestamp: Utc::now(),
1279 metadata: HashMap::new(),
1280 };
1281
1282 assert_eq!(vector.domain, Domain::Space);
1283 }
1284}