ruvector_data_framework/
medical_clients.rs

1//! Medical data API integrations for PubMed, ClinicalTrials.gov, and FDA
2//!
3//! This module provides async clients for fetching medical literature, clinical trials,
4//! and FDA data, converting responses to SemanticVector format for RuVector discovery.
5
6use std::collections::HashMap;
7use std::sync::Arc;
8use std::time::Duration;
9
10use chrono::{NaiveDate, Utc};
11use reqwest::{Client, StatusCode};
12use serde::Deserialize;
13use tokio::time::sleep;
14
15use crate::api_clients::SimpleEmbedder;
16use crate::ruvector_native::{Domain, SemanticVector};
17use crate::{FrameworkError, Result};
18
19/// Custom deserializer that handles both string and integer values
20fn deserialize_number_from_string<'de, D>(deserializer: D) -> std::result::Result<Option<i32>, D::Error>
21where
22    D: serde::Deserializer<'de>,
23{
24    use serde::de::{self, Visitor};
25
26    struct NumberOrStringVisitor;
27
28    impl<'de> Visitor<'de> for NumberOrStringVisitor {
29        type Value = Option<i32>;
30
31        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
32            formatter.write_str("a number or numeric string")
33        }
34
35        fn visit_i64<E>(self, v: i64) -> std::result::Result<Self::Value, E>
36        where
37            E: de::Error,
38        {
39            Ok(Some(v as i32))
40        }
41
42        fn visit_u64<E>(self, v: u64) -> std::result::Result<Self::Value, E>
43        where
44            E: de::Error,
45        {
46            Ok(Some(v as i32))
47        }
48
49        fn visit_str<E>(self, v: &str) -> std::result::Result<Self::Value, E>
50        where
51            E: de::Error,
52        {
53            v.parse::<i32>().map(Some).map_err(de::Error::custom)
54        }
55
56        fn visit_none<E>(self) -> std::result::Result<Self::Value, E>
57        where
58            E: de::Error,
59        {
60            Ok(None)
61        }
62
63        fn visit_unit<E>(self) -> std::result::Result<Self::Value, E>
64        where
65            E: de::Error,
66        {
67            Ok(None)
68        }
69    }
70
71    deserializer.deserialize_any(NumberOrStringVisitor)
72}
73
74/// Rate limiting configuration
75const NCBI_RATE_LIMIT_MS: u64 = 334; // ~3 requests/second without API key
76const NCBI_WITH_KEY_RATE_LIMIT_MS: u64 = 100; // 10 requests/second with key
77const FDA_RATE_LIMIT_MS: u64 = 250; // Conservative 4 requests/second
78const CLINICALTRIALS_RATE_LIMIT_MS: u64 = 100;
79const MAX_RETRIES: u32 = 3;
80const RETRY_DELAY_MS: u64 = 1000;
81
82// ============================================================================
83// PubMed E-utilities Client
84// ============================================================================
85
86/// PubMed ESearch API response
87#[derive(Debug, Deserialize)]
88struct PubMedSearchResponse {
89    esearchresult: ESearchResult,
90}
91
92#[derive(Debug, Deserialize)]
93struct ESearchResult {
94    #[serde(default)]
95    idlist: Vec<String>,
96    #[serde(default)]
97    count: String,
98}
99
100/// PubMed EFetch API response (simplified)
101#[derive(Debug, Deserialize)]
102struct PubMedFetchResponse {
103    #[serde(rename = "PubmedArticleSet")]
104    pubmed_article_set: Option<PubmedArticleSet>,
105}
106
107#[derive(Debug, Deserialize)]
108struct PubmedArticleSet {
109    #[serde(rename = "PubmedArticle", default)]
110    articles: Vec<PubmedArticle>,
111}
112
113#[derive(Debug, Deserialize)]
114struct PubmedArticle {
115    #[serde(rename = "MedlineCitation")]
116    medline_citation: MedlineCitation,
117}
118
119#[derive(Debug, Deserialize)]
120struct MedlineCitation {
121    #[serde(rename = "PMID")]
122    pmid: PmidObject,
123    #[serde(rename = "Article")]
124    article: Article,
125    #[serde(rename = "DateCompleted", default)]
126    date_completed: Option<DateCompleted>,
127}
128
129#[derive(Debug, Deserialize)]
130struct PmidObject {
131    #[serde(rename = "$value", default)]
132    value: String,
133}
134
135#[derive(Debug, Deserialize)]
136struct Article {
137    #[serde(rename = "ArticleTitle", default)]
138    article_title: Option<String>,
139    #[serde(rename = "Abstract", default)]
140    abstract_data: Option<AbstractData>,
141    #[serde(rename = "AuthorList", default)]
142    author_list: Option<AuthorList>,
143}
144
145#[derive(Debug, Deserialize)]
146struct AbstractData {
147    #[serde(rename = "AbstractText", default)]
148    abstract_text: Vec<AbstractText>,
149}
150
151#[derive(Debug, Deserialize)]
152struct AbstractText {
153    #[serde(rename = "$value", default)]
154    value: Option<String>,
155}
156
157#[derive(Debug, Deserialize)]
158struct AuthorList {
159    #[serde(rename = "Author", default)]
160    authors: Vec<Author>,
161}
162
163#[derive(Debug, Deserialize)]
164struct Author {
165    #[serde(rename = "LastName", default)]
166    last_name: Option<String>,
167    #[serde(rename = "ForeName", default)]
168    fore_name: Option<String>,
169}
170
171#[derive(Debug, Deserialize)]
172struct DateCompleted {
173    #[serde(rename = "Year", default)]
174    year: Option<String>,
175    #[serde(rename = "Month", default)]
176    month: Option<String>,
177    #[serde(rename = "Day", default)]
178    day: Option<String>,
179}
180
181/// Client for PubMed medical literature database
182pub struct PubMedClient {
183    client: Client,
184    base_url: String,
185    api_key: Option<String>,
186    rate_limit_delay: Duration,
187    embedder: Arc<SimpleEmbedder>,
188}
189
190impl PubMedClient {
191    /// Create a new PubMed client
192    ///
193    /// # Arguments
194    /// * `api_key` - Optional NCBI API key (get from https://www.ncbi.nlm.nih.gov/account/)
195    pub fn new(api_key: Option<String>) -> Result<Self> {
196        let client = Client::builder()
197            .timeout(Duration::from_secs(30))
198            .build()
199            .map_err(FrameworkError::Network)?;
200
201        let rate_limit_delay = if api_key.is_some() {
202            Duration::from_millis(NCBI_WITH_KEY_RATE_LIMIT_MS)
203        } else {
204            Duration::from_millis(NCBI_RATE_LIMIT_MS)
205        };
206
207        Ok(Self {
208            client,
209            base_url: "https://eutils.ncbi.nlm.nih.gov/entrez/eutils".to_string(),
210            api_key,
211            rate_limit_delay,
212            embedder: Arc::new(SimpleEmbedder::new(384)), // Higher dimension for medical text
213        })
214    }
215
216    /// Search PubMed articles by query
217    ///
218    /// # Arguments
219    /// * `query` - Search query (e.g., "COVID-19 vaccine", "alzheimer's treatment")
220    /// * `max_results` - Maximum number of results to return
221    pub async fn search_articles(
222        &self,
223        query: &str,
224        max_results: usize,
225    ) -> Result<Vec<SemanticVector>> {
226        // Step 1: Search for PMIDs
227        let pmids = self.search_pmids(query, max_results).await?;
228
229        if pmids.is_empty() {
230            return Ok(Vec::new());
231        }
232
233        // Step 2: Fetch full abstracts for PMIDs
234        self.fetch_abstracts(&pmids).await
235    }
236
237    /// Search for PMIDs matching query
238    async fn search_pmids(&self, query: &str, max_results: usize) -> Result<Vec<String>> {
239        let mut url = format!(
240            "{}/esearch.fcgi?db=pubmed&term={}&retmode=json&retmax={}",
241            self.base_url,
242            urlencoding::encode(query),
243            max_results
244        );
245
246        if let Some(key) = &self.api_key {
247            url.push_str(&format!("&api_key={}", key));
248        }
249
250        sleep(self.rate_limit_delay).await;
251        let response = self.fetch_with_retry(&url).await?;
252        let search_response: PubMedSearchResponse = response.json().await?;
253
254        Ok(search_response.esearchresult.idlist)
255    }
256
257    /// Fetch full article abstracts by PMIDs
258    ///
259    /// # Arguments
260    /// * `pmids` - List of PubMed IDs to fetch
261    pub async fn fetch_abstracts(&self, pmids: &[String]) -> Result<Vec<SemanticVector>> {
262        if pmids.is_empty() {
263            return Ok(Vec::new());
264        }
265
266        // Batch PMIDs (max 200 per request)
267        let mut all_vectors = Vec::new();
268
269        for chunk in pmids.chunks(200) {
270            let pmid_list = chunk.join(",");
271            let mut url = format!(
272                "{}/efetch.fcgi?db=pubmed&id={}&retmode=xml",
273                self.base_url, pmid_list
274            );
275
276            if let Some(key) = &self.api_key {
277                url.push_str(&format!("&api_key={}", key));
278            }
279
280            sleep(self.rate_limit_delay).await;
281            let response = self.fetch_with_retry(&url).await?;
282            let xml_text = response.text().await?;
283
284            // Parse XML response
285            let vectors = self.parse_xml_to_vectors(&xml_text)?;
286            all_vectors.extend(vectors);
287        }
288
289        Ok(all_vectors)
290    }
291
292    /// Parse PubMed XML response to SemanticVectors
293    fn parse_xml_to_vectors(&self, xml: &str) -> Result<Vec<SemanticVector>> {
294        // Use quick-xml for parsing
295        let fetch_response: PubMedFetchResponse = quick_xml::de::from_str(xml)
296            .map_err(|e| FrameworkError::Config(format!("XML parse error: {}", e)))?;
297
298        let mut vectors = Vec::new();
299
300        if let Some(article_set) = fetch_response.pubmed_article_set {
301            for pubmed_article in article_set.articles {
302                let citation = pubmed_article.medline_citation;
303                let article = citation.article;
304
305                let pmid = citation.pmid.value;
306                let title = article.article_title.unwrap_or_else(|| "Untitled".to_string());
307
308                // Extract abstract text
309                let abstract_text = article
310                    .abstract_data
311                    .as_ref()
312                    .map(|abs| {
313                        abs.abstract_text
314                            .iter()
315                            .filter_map(|at| at.value.clone())
316                            .collect::<Vec<_>>()
317                            .join(" ")
318                    })
319                    .unwrap_or_default();
320
321                // Create combined text for embedding
322                let text = format!("{} {}", title, abstract_text);
323                let embedding = self.embedder.embed_text(&text);
324
325                // Parse publication date
326                let timestamp = citation
327                    .date_completed
328                    .as_ref()
329                    .and_then(|date| {
330                        let year = date.year.as_ref()?.parse::<i32>().ok()?;
331                        let month = date.month.as_ref()?.parse::<u32>().ok()?;
332                        let day = date.day.as_ref()?.parse::<u32>().ok()?;
333                        NaiveDate::from_ymd_opt(year, month, day)
334                    })
335                    .and_then(|d| d.and_hms_opt(0, 0, 0))
336                    .map(|dt| dt.and_utc())
337                    .unwrap_or_else(Utc::now);
338
339                // Extract author names
340                let authors = article
341                    .author_list
342                    .as_ref()
343                    .map(|al| {
344                        al.authors
345                            .iter()
346                            .filter_map(|a| {
347                                let last = a.last_name.as_deref().unwrap_or("");
348                                let first = a.fore_name.as_deref().unwrap_or("");
349                                if !last.is_empty() {
350                                    Some(format!("{} {}", first, last))
351                                } else {
352                                    None
353                                }
354                            })
355                            .collect::<Vec<_>>()
356                            .join(", ")
357                    })
358                    .unwrap_or_default();
359
360                let mut metadata = HashMap::new();
361                metadata.insert("pmid".to_string(), pmid.clone());
362                metadata.insert("title".to_string(), title);
363                metadata.insert("abstract".to_string(), abstract_text);
364                metadata.insert("authors".to_string(), authors);
365                metadata.insert("source".to_string(), "pubmed".to_string());
366
367                vectors.push(SemanticVector {
368                    id: format!("PMID:{}", pmid),
369                    embedding,
370                    domain: Domain::Medical,
371                    timestamp,
372                    metadata,
373                });
374            }
375        }
376
377        Ok(vectors)
378    }
379
380    /// Fetch with retry logic
381    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
382        let mut retries = 0;
383        loop {
384            match self.client.get(url).send().await {
385                Ok(response) => {
386                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
387                        retries += 1;
388                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
389                        continue;
390                    }
391                    return Ok(response);
392                }
393                Err(_) if retries < MAX_RETRIES => {
394                    retries += 1;
395                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
396                }
397                Err(e) => return Err(FrameworkError::Network(e)),
398            }
399        }
400    }
401}
402
403// ============================================================================
404// ClinicalTrials.gov Client
405// ============================================================================
406
407/// ClinicalTrials.gov API response
408#[derive(Debug, Deserialize)]
409struct ClinicalTrialsResponse {
410    #[serde(default)]
411    studies: Vec<ClinicalStudy>,
412}
413
414#[derive(Debug, Deserialize)]
415struct ClinicalStudy {
416    #[serde(rename = "protocolSection")]
417    protocol_section: ProtocolSection,
418}
419
420#[derive(Debug, Deserialize)]
421struct ProtocolSection {
422    #[serde(rename = "identificationModule")]
423    identification: IdentificationModule,
424    #[serde(rename = "statusModule")]
425    status: StatusModule,
426    #[serde(rename = "descriptionModule", default)]
427    description: Option<DescriptionModule>,
428    #[serde(rename = "conditionsModule", default)]
429    conditions: Option<ConditionsModule>,
430}
431
432#[derive(Debug, Deserialize)]
433struct IdentificationModule {
434    #[serde(rename = "nctId")]
435    nct_id: String,
436    #[serde(rename = "briefTitle", default)]
437    brief_title: Option<String>,
438}
439
440#[derive(Debug, Deserialize)]
441struct StatusModule {
442    #[serde(rename = "overallStatus", default)]
443    overall_status: Option<String>,
444    #[serde(rename = "startDateStruct", default)]
445    start_date: Option<DateStruct>,
446}
447
448#[derive(Debug, Deserialize)]
449struct DateStruct {
450    #[serde(default)]
451    date: Option<String>,
452}
453
454#[derive(Debug, Deserialize)]
455struct DescriptionModule {
456    #[serde(rename = "briefSummary", default)]
457    brief_summary: Option<String>,
458}
459
460#[derive(Debug, Deserialize)]
461struct ConditionsModule {
462    #[serde(default)]
463    conditions: Vec<String>,
464}
465
466/// Client for ClinicalTrials.gov database
467pub struct ClinicalTrialsClient {
468    client: Client,
469    base_url: String,
470    rate_limit_delay: Duration,
471    embedder: Arc<SimpleEmbedder>,
472}
473
474impl ClinicalTrialsClient {
475    /// Create a new ClinicalTrials.gov client
476    pub fn new() -> Result<Self> {
477        let client = Client::builder()
478            .timeout(Duration::from_secs(30))
479            .build()
480            .map_err(FrameworkError::Network)?;
481
482        Ok(Self {
483            client,
484            base_url: "https://clinicaltrials.gov/api/v2".to_string(),
485            rate_limit_delay: Duration::from_millis(CLINICALTRIALS_RATE_LIMIT_MS),
486            embedder: Arc::new(SimpleEmbedder::new(384)),
487        })
488    }
489
490    /// Search clinical trials by condition
491    ///
492    /// # Arguments
493    /// * `condition` - Medical condition to search (e.g., "diabetes", "cancer")
494    /// * `status` - Optional recruitment status filter (e.g., "RECRUITING", "COMPLETED")
495    pub async fn search_trials(
496        &self,
497        condition: &str,
498        status: Option<&str>,
499    ) -> Result<Vec<SemanticVector>> {
500        let mut url = format!(
501            "{}/studies?query.cond={}&pageSize=100",
502            self.base_url,
503            urlencoding::encode(condition)
504        );
505
506        if let Some(s) = status {
507            url.push_str(&format!("&filter.overallStatus={}", s));
508        }
509
510        sleep(self.rate_limit_delay).await;
511        let response = self.fetch_with_retry(&url).await?;
512        let trials_response: ClinicalTrialsResponse = response.json().await?;
513
514        let mut vectors = Vec::new();
515        for study in trials_response.studies {
516            let vector = self.study_to_vector(study)?;
517            vectors.push(vector);
518        }
519
520        Ok(vectors)
521    }
522
523    /// Convert clinical study to SemanticVector
524    fn study_to_vector(&self, study: ClinicalStudy) -> Result<SemanticVector> {
525        let protocol = study.protocol_section;
526        let nct_id = protocol.identification.nct_id;
527        let title = protocol
528            .identification
529            .brief_title
530            .unwrap_or_else(|| "Untitled Study".to_string());
531
532        let summary = protocol
533            .description
534            .as_ref()
535            .and_then(|d| d.brief_summary.clone())
536            .unwrap_or_default();
537
538        let conditions = protocol
539            .conditions
540            .as_ref()
541            .map(|c| c.conditions.join(", "))
542            .unwrap_or_default();
543
544        let status = protocol
545            .status
546            .overall_status
547            .unwrap_or_else(|| "UNKNOWN".to_string());
548
549        // Create text for embedding
550        let text = format!("{} {} {}", title, summary, conditions);
551        let embedding = self.embedder.embed_text(&text);
552
553        // Parse start date
554        let timestamp = protocol
555            .status
556            .start_date
557            .as_ref()
558            .and_then(|sd| sd.date.as_ref())
559            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
560            .and_then(|d| d.and_hms_opt(0, 0, 0))
561            .map(|dt| dt.and_utc())
562            .unwrap_or_else(Utc::now);
563
564        let mut metadata = HashMap::new();
565        metadata.insert("nct_id".to_string(), nct_id.clone());
566        metadata.insert("title".to_string(), title);
567        metadata.insert("summary".to_string(), summary);
568        metadata.insert("conditions".to_string(), conditions);
569        metadata.insert("status".to_string(), status);
570        metadata.insert("source".to_string(), "clinicaltrials".to_string());
571
572        Ok(SemanticVector {
573            id: format!("NCT:{}", nct_id),
574            embedding,
575            domain: Domain::Medical,
576            timestamp,
577            metadata,
578        })
579    }
580
581    /// Fetch with retry logic
582    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
583        let mut retries = 0;
584        loop {
585            match self.client.get(url).send().await {
586                Ok(response) => {
587                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
588                        retries += 1;
589                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
590                        continue;
591                    }
592                    return Ok(response);
593                }
594                Err(_) if retries < MAX_RETRIES => {
595                    retries += 1;
596                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
597                }
598                Err(e) => return Err(FrameworkError::Network(e)),
599            }
600        }
601    }
602}
603
604impl Default for ClinicalTrialsClient {
605    fn default() -> Self {
606        Self::new().expect("Failed to create ClinicalTrials client")
607    }
608}
609
610// ============================================================================
611// FDA OpenFDA Client
612// ============================================================================
613
614/// OpenFDA drug adverse event response
615#[derive(Debug, Deserialize)]
616struct FdaDrugEventResponse {
617    results: Vec<FdaDrugEvent>,
618}
619
620#[derive(Debug, Deserialize)]
621struct FdaDrugEvent {
622    #[serde(rename = "safetyreportid")]
623    safety_report_id: String,
624    #[serde(rename = "receivedate", default)]
625    receive_date: Option<String>,
626    #[serde(default)]
627    patient: Option<FdaPatient>,
628    #[serde(default, deserialize_with = "deserialize_number_from_string")]
629    serious: Option<i32>,
630}
631
632#[derive(Debug, Deserialize)]
633struct FdaPatient {
634    #[serde(default)]
635    drug: Vec<FdaDrug>,
636    #[serde(default)]
637    reaction: Vec<FdaReaction>,
638}
639
640#[derive(Debug, Deserialize)]
641struct FdaDrug {
642    #[serde(rename = "medicinalproduct", default)]
643    medicinal_product: Option<String>,
644}
645
646#[derive(Debug, Deserialize)]
647struct FdaReaction {
648    #[serde(rename = "reactionmeddrapt", default)]
649    reaction_meddra_pt: Option<String>,
650}
651
652/// OpenFDA device recall response
653#[derive(Debug, Deserialize)]
654struct FdaRecallResponse {
655    results: Vec<FdaRecall>,
656}
657
658#[derive(Debug, Deserialize)]
659struct FdaRecall {
660    #[serde(rename = "recall_number")]
661    recall_number: String,
662    #[serde(default)]
663    reason_for_recall: Option<String>,
664    #[serde(default)]
665    product_description: Option<String>,
666    #[serde(default)]
667    report_date: Option<String>,
668    #[serde(default)]
669    classification: Option<String>,
670}
671
672/// Client for FDA OpenFDA API
673pub struct FdaClient {
674    client: Client,
675    base_url: String,
676    rate_limit_delay: Duration,
677    embedder: Arc<SimpleEmbedder>,
678}
679
680impl FdaClient {
681    /// Create a new FDA OpenFDA client
682    pub fn new() -> Result<Self> {
683        let client = Client::builder()
684            .timeout(Duration::from_secs(30))
685            .build()
686            .map_err(FrameworkError::Network)?;
687
688        Ok(Self {
689            client,
690            base_url: "https://api.fda.gov".to_string(),
691            rate_limit_delay: Duration::from_millis(FDA_RATE_LIMIT_MS),
692            embedder: Arc::new(SimpleEmbedder::new(384)),
693        })
694    }
695
696    /// Search drug adverse events
697    ///
698    /// # Arguments
699    /// * `drug_name` - Name of drug to search (e.g., "aspirin", "ibuprofen")
700    pub async fn search_drug_events(&self, drug_name: &str) -> Result<Vec<SemanticVector>> {
701        let url = format!(
702            "{}/drug/event.json?search=patient.drug.medicinalproduct:\"{}\"&limit=100",
703            self.base_url,
704            urlencoding::encode(drug_name)
705        );
706
707        sleep(self.rate_limit_delay).await;
708        let response = self.fetch_with_retry(&url).await?;
709
710        // FDA API may return 404 if no results - handle gracefully
711        if response.status() == StatusCode::NOT_FOUND {
712            return Ok(Vec::new());
713        }
714
715        let events_response: FdaDrugEventResponse = response.json().await?;
716
717        let mut vectors = Vec::new();
718        for event in events_response.results {
719            let vector = self.drug_event_to_vector(event)?;
720            vectors.push(vector);
721        }
722
723        Ok(vectors)
724    }
725
726    /// Search device recalls
727    ///
728    /// # Arguments
729    /// * `reason` - Reason for recall to search
730    pub async fn search_recalls(&self, reason: &str) -> Result<Vec<SemanticVector>> {
731        let url = format!(
732            "{}/device/recall.json?search=reason_for_recall:\"{}\"&limit=100",
733            self.base_url,
734            urlencoding::encode(reason)
735        );
736
737        sleep(self.rate_limit_delay).await;
738        let response = self.fetch_with_retry(&url).await?;
739
740        if response.status() == StatusCode::NOT_FOUND {
741            return Ok(Vec::new());
742        }
743
744        let recalls_response: FdaRecallResponse = response.json().await?;
745
746        let mut vectors = Vec::new();
747        for recall in recalls_response.results {
748            let vector = self.recall_to_vector(recall)?;
749            vectors.push(vector);
750        }
751
752        Ok(vectors)
753    }
754
755    /// Convert drug event to SemanticVector
756    fn drug_event_to_vector(&self, event: FdaDrugEvent) -> Result<SemanticVector> {
757        let mut drug_names = Vec::new();
758        let mut reactions = Vec::new();
759
760        if let Some(patient) = &event.patient {
761            for drug in &patient.drug {
762                if let Some(name) = &drug.medicinal_product {
763                    drug_names.push(name.clone());
764                }
765            }
766            for reaction in &patient.reaction {
767                if let Some(r) = &reaction.reaction_meddra_pt {
768                    reactions.push(r.clone());
769                }
770            }
771        }
772
773        let drugs_text = drug_names.join(", ");
774        let reactions_text = reactions.join(", ");
775        let serious = if event.serious == Some(1) {
776            "serious"
777        } else {
778            "non-serious"
779        };
780
781        // Create text for embedding
782        let text = format!("Drug: {} Reactions: {} Severity: {}", drugs_text, reactions_text, serious);
783        let embedding = self.embedder.embed_text(&text);
784
785        // Parse receive date
786        let timestamp = event
787            .receive_date
788            .as_ref()
789            .and_then(|d| NaiveDate::parse_from_str(d, "%Y%m%d").ok())
790            .and_then(|d| d.and_hms_opt(0, 0, 0))
791            .map(|dt| dt.and_utc())
792            .unwrap_or_else(Utc::now);
793
794        let mut metadata = HashMap::new();
795        metadata.insert("report_id".to_string(), event.safety_report_id.clone());
796        metadata.insert("drugs".to_string(), drugs_text);
797        metadata.insert("reactions".to_string(), reactions_text);
798        metadata.insert("serious".to_string(), serious.to_string());
799        metadata.insert("source".to_string(), "fda_drug_events".to_string());
800
801        Ok(SemanticVector {
802            id: format!("FDA_EVENT:{}", event.safety_report_id),
803            embedding,
804            domain: Domain::Medical,
805            timestamp,
806            metadata,
807        })
808    }
809
810    /// Convert recall to SemanticVector
811    fn recall_to_vector(&self, recall: FdaRecall) -> Result<SemanticVector> {
812        let reason = recall.reason_for_recall.unwrap_or_else(|| "Unknown reason".to_string());
813        let product = recall.product_description.unwrap_or_else(|| "Unknown product".to_string());
814        let classification = recall.classification.unwrap_or_else(|| "Unknown".to_string());
815
816        // Create text for embedding
817        let text = format!("Product: {} Reason: {} Classification: {}", product, reason, classification);
818        let embedding = self.embedder.embed_text(&text);
819
820        // Parse report date
821        let timestamp = recall
822            .report_date
823            .as_ref()
824            .and_then(|d| NaiveDate::parse_from_str(d, "%Y%m%d").ok())
825            .and_then(|d| d.and_hms_opt(0, 0, 0))
826            .map(|dt| dt.and_utc())
827            .unwrap_or_else(Utc::now);
828
829        let mut metadata = HashMap::new();
830        metadata.insert("recall_number".to_string(), recall.recall_number.clone());
831        metadata.insert("reason".to_string(), reason);
832        metadata.insert("product".to_string(), product);
833        metadata.insert("classification".to_string(), classification);
834        metadata.insert("source".to_string(), "fda_recalls".to_string());
835
836        Ok(SemanticVector {
837            id: format!("FDA_RECALL:{}", recall.recall_number),
838            embedding,
839            domain: Domain::Medical,
840            timestamp,
841            metadata,
842        })
843    }
844
845    /// Fetch with retry logic
846    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
847        let mut retries = 0;
848        loop {
849            match self.client.get(url).send().await {
850                Ok(response) => {
851                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
852                        retries += 1;
853                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
854                        continue;
855                    }
856                    return Ok(response);
857                }
858                Err(_) if retries < MAX_RETRIES => {
859                    retries += 1;
860                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
861                }
862                Err(e) => return Err(FrameworkError::Network(e)),
863            }
864        }
865    }
866}
867
868impl Default for FdaClient {
869    fn default() -> Self {
870        Self::new().expect("Failed to create FDA client")
871    }
872}
873
874// ============================================================================
875// Tests
876// ============================================================================
877
878#[cfg(test)]
879mod tests {
880    use super::*;
881
882    #[tokio::test]
883    async fn test_pubmed_client_creation() {
884        let client = PubMedClient::new(None);
885        assert!(client.is_ok());
886    }
887
888    #[tokio::test]
889    async fn test_clinical_trials_client_creation() {
890        let client = ClinicalTrialsClient::new();
891        assert!(client.is_ok());
892    }
893
894    #[tokio::test]
895    async fn test_fda_client_creation() {
896        let client = FdaClient::new();
897        assert!(client.is_ok());
898    }
899
900    #[test]
901    fn test_rate_limiting() {
902        // Verify rate limits are set correctly
903        let pubmed_without_key = PubMedClient::new(None).unwrap();
904        assert_eq!(
905            pubmed_without_key.rate_limit_delay,
906            Duration::from_millis(NCBI_RATE_LIMIT_MS)
907        );
908
909        let pubmed_with_key = PubMedClient::new(Some("test_key".to_string())).unwrap();
910        assert_eq!(
911            pubmed_with_key.rate_limit_delay,
912            Duration::from_millis(NCBI_WITH_KEY_RATE_LIMIT_MS)
913        );
914    }
915}