ruvector_data_framework/
government_clients.rs

1//! Government and International Organization API Integrations
2//!
3//! This module provides async clients for fetching data from government agencies
4//! and international organizations, converting responses to SemanticVector format
5//! for RuVector discovery.
6//!
7//! # Supported APIs
8//!
9//! - **US Census Bureau**: Population, demographics, economic surveys
10//! - **Data.gov**: US Government open data catalog
11//! - **EU Open Data Portal**: European Union datasets
12//! - **UK Government Data**: UK public sector information
13//! - **World Bank**: Global development indicators
14//! - **United Nations**: International statistics and indicators
15
16use std::collections::HashMap;
17use std::sync::Arc;
18use std::time::Duration;
19
20use chrono::{Datelike, NaiveDate, Utc};
21use reqwest::{Client, StatusCode};
22use serde::{Deserialize, Serialize};
23use tokio::time::sleep;
24
25use crate::api_clients::SimpleEmbedder;
26use crate::ruvector_native::{Domain, SemanticVector};
27use crate::{FrameworkError, Result};
28
29/// Rate limiting configuration
30const CENSUS_RATE_LIMIT_MS: u64 = 1200; // 50 req/min
31const DATAGOV_RATE_LIMIT_MS: u64 = 1000; // 60 req/min
32const EU_OPENDATA_RATE_LIMIT_MS: u64 = 500; // Conservative
33const UK_GOV_RATE_LIMIT_MS: u64 = 500; // Conservative
34const WORLDBANK_RATE_LIMIT_MS: u64 = 100; // Liberal
35const UNDATA_RATE_LIMIT_MS: u64 = 1000; // Conservative
36const MAX_RETRIES: u32 = 3;
37const RETRY_DELAY_MS: u64 = 1000;
38const DEFAULT_EMBEDDING_DIM: usize = 256;
39
40// ============================================================================
41// US Census Bureau Client
42// ============================================================================
43
44/// Census Bureau API response for datasets
45#[derive(Debug, Deserialize)]
46struct CensusDataResponse {
47    #[serde(default)]
48    dataset: Vec<Vec<serde_json::Value>>,
49}
50
51/// Census Bureau dataset info
52#[derive(Debug, Deserialize, Serialize, Clone)]
53struct CensusDataset {
54    #[serde(rename = "c_vintage", default)]
55    vintage: String,
56    #[serde(rename = "c_dataset", default)]
57    dataset: Vec<String>,
58    #[serde(default)]
59    title: String,
60    #[serde(default)]
61    description: String,
62}
63
64/// Census Bureau variable info
65#[derive(Debug, Deserialize, Clone)]
66struct CensusVariable {
67    #[serde(default)]
68    name: String,
69    #[serde(default)]
70    label: String,
71    #[serde(default)]
72    concept: String,
73    #[serde(default)]
74    predicateType: String,
75}
76
77/// Client for US Census Bureau API
78///
79/// Provides access to:
80/// - Decennial Census data
81/// - American Community Survey (ACS)
82/// - Economic indicators
83/// - Population estimates
84///
85/// # Example
86/// ```rust,ignore
87/// use ruvector_data_framework::CensusClient;
88///
89/// let client = CensusClient::new(None);
90/// let population = client.get_population(2020, "state:*").await?;
91/// let acs_data = client.get_acs5(2021, vec!["B01001_001E"], "county:*").await?;
92/// let datasets = client.get_available_datasets().await?;
93/// ```
94pub struct CensusClient {
95    client: Client,
96    base_url: String,
97    api_key: Option<String>,
98    rate_limit_delay: Duration,
99    embedder: Arc<SimpleEmbedder>,
100    use_mock: bool,
101}
102
103impl CensusClient {
104    /// Create a new Census Bureau client
105    ///
106    /// # Arguments
107    /// * `api_key` - Optional Census API key (500 req/day without key, unlimited with key)
108    ///               Get key at: https://api.census.gov/data/key_signup.html
109    pub fn new(api_key: Option<String>) -> Self {
110        Self::with_config(api_key, DEFAULT_EMBEDDING_DIM, false)
111    }
112
113    /// Create a new Census client with custom configuration
114    ///
115    /// # Arguments
116    /// * `api_key` - Optional Census API key
117    /// * `embedding_dim` - Dimension for embeddings
118    /// * `use_mock` - Use mock data when API is unavailable
119    pub fn with_config(api_key: Option<String>, embedding_dim: usize, use_mock: bool) -> Self {
120        Self {
121            client: Client::builder()
122                .user_agent("RuVector-Discovery/1.0")
123                .timeout(Duration::from_secs(30))
124                .build()
125                .expect("Failed to create HTTP client"),
126            base_url: "https://api.census.gov/data".to_string(),
127            api_key,
128            rate_limit_delay: Duration::from_millis(CENSUS_RATE_LIMIT_MS),
129            embedder: Arc::new(SimpleEmbedder::new(embedding_dim)),
130            use_mock,
131        }
132    }
133
134    /// Get population data for a specific year and geography
135    ///
136    /// # Arguments
137    /// * `year` - Census year (e.g., 2020, 2010)
138    /// * `geography` - Geography filter (e.g., "state:*", "county:06", "us:1")
139    ///
140    /// # Example
141    /// ```rust,ignore
142    /// // Get all states population
143    /// let pop = client.get_population(2020, "state:*").await?;
144    ///
145    /// // Get California counties
146    /// let ca_counties = client.get_population(2020, "county:*&in=state:06").await?;
147    /// ```
148    pub async fn get_population(
149        &self,
150        year: u32,
151        geography: &str,
152    ) -> Result<Vec<SemanticVector>> {
153        let url = if year >= 2020 {
154            format!(
155                "{}/{}/dec/pl?get=NAME,P1_001N&for={}",
156                self.base_url, year, geography
157            )
158        } else {
159            format!(
160                "{}/{}/dec/sf1?get=NAME,P001001&for={}",
161                self.base_url, year, geography
162            )
163        };
164
165        self.fetch_census_data(&url, &format!("population_{}", year))
166            .await
167    }
168
169    /// Get American Community Survey 5-year estimates
170    ///
171    /// # Arguments
172    /// * `year` - Survey year (2009-2021+)
173    /// * `variables` - Variable codes (e.g., ["B01001_001E"] for total population)
174    /// * `geography` - Geography filter
175    ///
176    /// # Example
177    /// ```rust,ignore
178    /// // Get median household income for all states
179    /// let income = client.get_acs5(
180    ///     2021,
181    ///     vec!["B19013_001E"],
182    ///     "state:*"
183    /// ).await?;
184    /// ```
185    pub async fn get_acs5(
186        &self,
187        year: u32,
188        variables: Vec<&str>,
189        geography: &str,
190    ) -> Result<Vec<SemanticVector>> {
191        let vars = variables.join(",");
192        let url = format!(
193            "{}/{}/acs/acs5?get=NAME,{}&for={}",
194            self.base_url, year, vars, geography
195        );
196
197        self.fetch_census_data(&url, &format!("acs5_{}", year))
198            .await
199    }
200
201    /// Get available Census datasets
202    ///
203    /// # Example
204    /// ```rust,ignore
205    /// let datasets = client.get_available_datasets().await?;
206    /// for ds in datasets {
207    ///     println!("Dataset: {}", ds.metadata.get("title").unwrap());
208    /// }
209    /// ```
210    pub async fn get_available_datasets(&self) -> Result<Vec<SemanticVector>> {
211        if self.use_mock {
212            return Ok(self.get_mock_datasets());
213        }
214
215        let url = format!("{}.json", self.base_url);
216
217        sleep(self.rate_limit_delay).await;
218        let response = self.fetch_with_retry(&url).await?;
219        let text = response.text().await?;
220
221        // Parse the complex JSON structure
222        let json: serde_json::Value = serde_json::from_str(&text)?;
223        let mut vectors = Vec::new();
224
225        if let Some(dataset_obj) = json.get("dataset") {
226            if let Some(datasets) = dataset_obj.as_array() {
227                for (idx, ds) in datasets.iter().enumerate() {
228                    if let Some(title) = ds.get("title").and_then(|t| t.as_str()) {
229                        let description = ds
230                            .get("description")
231                            .and_then(|d| d.as_str())
232                            .unwrap_or("");
233                        let vintage = ds.get("c_vintage").and_then(|v| v.as_str()).unwrap_or("");
234
235                        let text = format!("{} {} {}", title, description, vintage);
236                        let embedding = self.embedder.embed_text(&text);
237
238                        let mut metadata = HashMap::new();
239                        metadata.insert("title".to_string(), title.to_string());
240                        metadata.insert("description".to_string(), description.to_string());
241                        metadata.insert("vintage".to_string(), vintage.to_string());
242                        metadata.insert("source".to_string(), "census_catalog".to_string());
243
244                        vectors.push(SemanticVector {
245                            id: format!("CENSUS_DS:{}", idx),
246                            embedding,
247                            domain: Domain::Government,
248                            timestamp: Utc::now(),
249                            metadata,
250                        });
251                    }
252                }
253            }
254        }
255
256        Ok(vectors)
257    }
258
259    /// Search for variables in a dataset
260    ///
261    /// # Arguments
262    /// * `dataset` - Dataset identifier (e.g., "acs/acs5", "dec/pl")
263    /// * `query` - Search query for variable names/labels
264    ///
265    /// # Example
266    /// ```rust,ignore
267    /// let vars = client.search_variables("acs/acs5", "income").await?;
268    /// ```
269    pub async fn search_variables(
270        &self,
271        dataset: &str,
272        query: &str,
273    ) -> Result<Vec<SemanticVector>> {
274        if self.use_mock {
275            return Ok(self.get_mock_variables(query));
276        }
277
278        let url = format!("{}/2021/{}/variables.json", self.base_url, dataset);
279
280        sleep(self.rate_limit_delay).await;
281        let response = self.fetch_with_retry(&url).await?;
282        let text = response.text().await?;
283
284        let json: serde_json::Value = serde_json::from_str(&text)?;
285        let mut vectors = Vec::new();
286
287        if let Some(variables) = json.get("variables").and_then(|v| v.as_object()) {
288            for (var_name, var_data) in variables.iter() {
289                if let Some(label) = var_data.get("label").and_then(|l| l.as_str()) {
290                    // Filter by query
291                    if !label.to_lowercase().contains(&query.to_lowercase())
292                        && !var_name.to_lowercase().contains(&query.to_lowercase())
293                    {
294                        continue;
295                    }
296
297                    let concept = var_data
298                        .get("concept")
299                        .and_then(|c| c.as_str())
300                        .unwrap_or("");
301
302                    let text = format!("{} {} {}", var_name, label, concept);
303                    let embedding = self.embedder.embed_text(&text);
304
305                    let mut metadata = HashMap::new();
306                    metadata.insert("variable_name".to_string(), var_name.clone());
307                    metadata.insert("label".to_string(), label.to_string());
308                    metadata.insert("concept".to_string(), concept.to_string());
309                    metadata.insert("dataset".to_string(), dataset.to_string());
310                    metadata.insert("source".to_string(), "census_variables".to_string());
311
312                    vectors.push(SemanticVector {
313                        id: format!("CENSUS_VAR:{}:{}", dataset, var_name),
314                        embedding,
315                        domain: Domain::Government,
316                        timestamp: Utc::now(),
317                        metadata,
318                    });
319
320                    if vectors.len() >= 50 {
321                        break; // Limit results
322                    }
323                }
324            }
325        }
326
327        Ok(vectors)
328    }
329
330    /// Fetch and parse Census data
331    async fn fetch_census_data(&self, url: &str, dataset_name: &str) -> Result<Vec<SemanticVector>> {
332        if self.use_mock {
333            return Ok(self.get_mock_census_data(dataset_name));
334        }
335
336        let mut full_url = url.to_string();
337        if let Some(key) = &self.api_key {
338            full_url.push_str(&format!("&key={}", key));
339        }
340
341        sleep(self.rate_limit_delay).await;
342        let response = self.fetch_with_retry(&full_url).await?;
343        let text = response.text().await?;
344
345        // Census API returns array of arrays: [["NAME", "P1_001N"], ["California", "39538223"], ...]
346        let data: Vec<Vec<serde_json::Value>> = serde_json::from_str(&text)?;
347
348        if data.is_empty() {
349            return Ok(Vec::new());
350        }
351
352        let headers = &data[0];
353        let mut vectors = Vec::new();
354
355        for (idx, row) in data.iter().skip(1).enumerate() {
356            let mut record = HashMap::new();
357            for (i, value) in row.iter().enumerate() {
358                if let Some(header) = headers.get(i).and_then(|h| h.as_str()) {
359                    record.insert(header.to_string(), value.to_string());
360                }
361            }
362
363            let name = record.get("NAME").map(|s| s.as_str()).unwrap_or("Unknown");
364            let text = format!("{} {}", dataset_name, name);
365            let embedding = self.embedder.embed_text(&text);
366
367            let mut metadata = record.clone();
368            metadata.insert("source".to_string(), "census".to_string());
369            metadata.insert("dataset".to_string(), dataset_name.to_string());
370
371            vectors.push(SemanticVector {
372                id: format!("CENSUS:{}:{}", dataset_name, idx),
373                embedding,
374                domain: Domain::Government,
375                timestamp: Utc::now(),
376                metadata,
377            });
378        }
379
380        Ok(vectors)
381    }
382
383    /// Mock census data for testing
384    fn get_mock_census_data(&self, dataset_name: &str) -> Vec<SemanticVector> {
385        let mut vectors = Vec::new();
386        let items = vec![
387            ("California", "39538223"),
388            ("Texas", "29145505"),
389            ("Florida", "21538187"),
390        ];
391
392        for (idx, (name, population)) in items.iter().enumerate() {
393            let text = format!("{} {} population {}", dataset_name, name, population);
394            let embedding = self.embedder.embed_text(&text);
395
396            let mut metadata = HashMap::new();
397            metadata.insert("NAME".to_string(), name.to_string());
398            metadata.insert("population".to_string(), population.to_string());
399            metadata.insert("source".to_string(), "census_mock".to_string());
400
401            vectors.push(SemanticVector {
402                id: format!("CENSUS_MOCK:{}:{}", dataset_name, idx),
403                embedding,
404                domain: Domain::Government,
405                timestamp: Utc::now(),
406                metadata,
407            });
408        }
409
410        vectors
411    }
412
413    /// Mock datasets for testing
414    fn get_mock_datasets(&self) -> Vec<SemanticVector> {
415        vec![self.create_mock_dataset(
416            "Decennial Census",
417            "Population and housing counts",
418            "2020",
419        )]
420    }
421
422    /// Mock variables for testing
423    fn get_mock_variables(&self, query: &str) -> Vec<SemanticVector> {
424        vec![self.create_mock_variable(
425            "B19013_001E",
426            "Median Household Income",
427            "Income",
428        )]
429    }
430
431    fn create_mock_dataset(&self, title: &str, description: &str, vintage: &str) -> SemanticVector {
432        let text = format!("{} {} {}", title, description, vintage);
433        let embedding = self.embedder.embed_text(&text);
434
435        let mut metadata = HashMap::new();
436        metadata.insert("title".to_string(), title.to_string());
437        metadata.insert("description".to_string(), description.to_string());
438        metadata.insert("vintage".to_string(), vintage.to_string());
439        metadata.insert("source".to_string(), "census_mock".to_string());
440
441        SemanticVector {
442            id: "CENSUS_MOCK_DS:1".to_string(),
443            embedding,
444            domain: Domain::Government,
445            timestamp: Utc::now(),
446            metadata,
447        }
448    }
449
450    fn create_mock_variable(&self, name: &str, label: &str, concept: &str) -> SemanticVector {
451        let text = format!("{} {} {}", name, label, concept);
452        let embedding = self.embedder.embed_text(&text);
453
454        let mut metadata = HashMap::new();
455        metadata.insert("variable_name".to_string(), name.to_string());
456        metadata.insert("label".to_string(), label.to_string());
457        metadata.insert("concept".to_string(), concept.to_string());
458        metadata.insert("source".to_string(), "census_mock".to_string());
459
460        SemanticVector {
461            id: format!("CENSUS_MOCK_VAR:{}", name),
462            embedding,
463            domain: Domain::Government,
464            timestamp: Utc::now(),
465            metadata,
466        }
467    }
468
469    /// Fetch with retry logic
470    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
471        let mut retries = 0;
472        loop {
473            match self.client.get(url).send().await {
474                Ok(response) => {
475                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
476                        retries += 1;
477                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
478                        continue;
479                    }
480                    if !response.status().is_success() && self.use_mock {
481                        return Err(FrameworkError::Network(
482                            reqwest::Error::from(response.error_for_status().unwrap_err()),
483                        ));
484                    }
485                    return Ok(response);
486                }
487                Err(e) if retries < MAX_RETRIES && self.use_mock => {
488                    retries += 1;
489                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
490                }
491                Err(e) => return Err(FrameworkError::Network(e)),
492            }
493        }
494    }
495}
496
497// ============================================================================
498// Data.gov Client
499// ============================================================================
500
501/// Data.gov CKAN API response
502#[derive(Debug, Deserialize)]
503struct DataGovSearchResponse {
504    #[serde(default)]
505    success: bool,
506    result: DataGovResult,
507}
508
509#[derive(Debug, Deserialize)]
510struct DataGovResult {
511    #[serde(default)]
512    count: u64,
513    #[serde(default)]
514    results: Vec<DataGovDataset>,
515}
516
517#[derive(Debug, Deserialize)]
518struct DataGovDataset {
519    id: String,
520    #[serde(default)]
521    title: String,
522    #[serde(default)]
523    notes: String,
524    #[serde(default)]
525    organization: Option<DataGovOrganization>,
526    #[serde(default)]
527    tags: Vec<DataGovTag>,
528    #[serde(default)]
529    metadata_created: String,
530    #[serde(default)]
531    metadata_modified: String,
532}
533
534#[derive(Debug, Deserialize)]
535struct DataGovOrganization {
536    #[serde(default)]
537    name: String,
538    #[serde(default)]
539    title: String,
540}
541
542#[derive(Debug, Deserialize)]
543struct DataGovTag {
544    #[serde(default)]
545    name: String,
546}
547
548#[derive(Debug, Deserialize)]
549struct DataGovOrganizationList {
550    #[serde(default)]
551    success: bool,
552    #[serde(default)]
553    result: Vec<DataGovOrganizationInfo>,
554}
555
556#[derive(Debug, Deserialize)]
557struct DataGovOrganizationInfo {
558    id: String,
559    name: String,
560    title: String,
561    #[serde(default)]
562    description: String,
563    #[serde(default)]
564    package_count: u64,
565}
566
567/// Client for Data.gov catalog
568///
569/// Provides access to:
570/// - 250,000+ US Government datasets
571/// - Federal, state, and local data
572/// - Organization and tag browsing
573///
574/// # Example
575/// ```rust,ignore
576/// use ruvector_data_framework::DataGovClient;
577///
578/// let client = DataGovClient::new();
579/// let datasets = client.search_datasets("climate change").await?;
580/// let orgs = client.list_organizations().await?;
581/// let dataset = client.get_dataset("dataset-id").await?;
582/// ```
583pub struct DataGovClient {
584    client: Client,
585    base_url: String,
586    rate_limit_delay: Duration,
587    embedder: Arc<SimpleEmbedder>,
588    use_mock: bool,
589}
590
591impl DataGovClient {
592    /// Create a new Data.gov client
593    pub fn new() -> Self {
594        Self::with_config(DEFAULT_EMBEDDING_DIM, false)
595    }
596
597    /// Create a new Data.gov client with custom configuration
598    pub fn with_config(embedding_dim: usize, use_mock: bool) -> Self {
599        Self {
600            client: Client::builder()
601                .user_agent("RuVector-Discovery/1.0")
602                .timeout(Duration::from_secs(30))
603                .build()
604                .expect("Failed to create HTTP client"),
605            base_url: "https://catalog.data.gov/api/3".to_string(),
606            rate_limit_delay: Duration::from_millis(DATAGOV_RATE_LIMIT_MS),
607            embedder: Arc::new(SimpleEmbedder::new(embedding_dim)),
608            use_mock,
609        }
610    }
611
612    /// Search for datasets
613    ///
614    /// # Arguments
615    /// * `query` - Search query string
616    ///
617    /// # Example
618    /// ```rust,ignore
619    /// let results = client.search_datasets("renewable energy").await?;
620    /// ```
621    pub async fn search_datasets(&self, query: &str) -> Result<Vec<SemanticVector>> {
622        if self.use_mock {
623            return Ok(self.get_mock_datagov_datasets(query));
624        }
625
626        let url = format!(
627            "{}/action/package_search?q={}&rows=50",
628            self.base_url,
629            urlencoding::encode(query)
630        );
631
632        sleep(self.rate_limit_delay).await;
633        let response = self.fetch_with_retry(&url).await?;
634        let search_response: DataGovSearchResponse = response.json().await?;
635
636        let mut vectors = Vec::new();
637        for dataset in search_response.result.results {
638            let org_name = dataset
639                .organization
640                .as_ref()
641                .map(|o| o.title.as_str())
642                .unwrap_or("Unknown");
643
644            let tags = dataset
645                .tags
646                .iter()
647                .map(|t| t.name.clone())
648                .collect::<Vec<_>>()
649                .join(", ");
650
651            let text = format!("{} {} {} {}", dataset.title, dataset.notes, org_name, tags);
652            let embedding = self.embedder.embed_text(&text);
653
654            let mut metadata = HashMap::new();
655            metadata.insert("title".to_string(), dataset.title);
656            metadata.insert("description".to_string(), dataset.notes);
657            metadata.insert("organization".to_string(), org_name.to_string());
658            metadata.insert("tags".to_string(), tags);
659            metadata.insert("created".to_string(), dataset.metadata_created);
660            metadata.insert("modified".to_string(), dataset.metadata_modified);
661            metadata.insert("source".to_string(), "datagov".to_string());
662
663            vectors.push(SemanticVector {
664                id: format!("DATAGOV:{}", dataset.id),
665                embedding,
666                domain: Domain::Government,
667                timestamp: Utc::now(),
668                metadata,
669            });
670        }
671
672        Ok(vectors)
673    }
674
675    /// Get a specific dataset by ID
676    ///
677    /// # Arguments
678    /// * `id` - Dataset ID
679    ///
680    /// # Example
681    /// ```rust,ignore
682    /// let dataset = client.get_dataset("some-dataset-id").await?;
683    /// ```
684    pub async fn get_dataset(&self, id: &str) -> Result<Option<SemanticVector>> {
685        if self.use_mock {
686            return Ok(Some(self.get_mock_datagov_datasets("mock").into_iter().next().unwrap()));
687        }
688
689        let url = format!("{}/action/package_show?id={}", self.base_url, id);
690
691        sleep(self.rate_limit_delay).await;
692        let response = self.fetch_with_retry(&url).await?;
693        let json: serde_json::Value = response.json().await?;
694
695        if let Some(result) = json.get("result") {
696            let dataset: DataGovDataset = serde_json::from_value(result.clone())?;
697
698            let org_name = dataset
699                .organization
700                .as_ref()
701                .map(|o| o.title.as_str())
702                .unwrap_or("Unknown");
703
704            let text = format!("{} {}", dataset.title, dataset.notes);
705            let embedding = self.embedder.embed_text(&text);
706
707            let mut metadata = HashMap::new();
708            metadata.insert("title".to_string(), dataset.title);
709            metadata.insert("description".to_string(), dataset.notes);
710            metadata.insert("organization".to_string(), org_name.to_string());
711            metadata.insert("source".to_string(), "datagov".to_string());
712
713            return Ok(Some(SemanticVector {
714                id: format!("DATAGOV:{}", dataset.id),
715                embedding,
716                domain: Domain::Government,
717                timestamp: Utc::now(),
718                metadata,
719            }));
720        }
721
722        Ok(None)
723    }
724
725    /// List all organizations
726    ///
727    /// # Example
728    /// ```rust,ignore
729    /// let orgs = client.list_organizations().await?;
730    /// ```
731    pub async fn list_organizations(&self) -> Result<Vec<SemanticVector>> {
732        if self.use_mock {
733            return Ok(self.get_mock_organizations());
734        }
735
736        let url = format!("{}/action/organization_list?all_fields=true", self.base_url);
737
738        sleep(self.rate_limit_delay).await;
739        let response = self.fetch_with_retry(&url).await?;
740        let org_list: DataGovOrganizationList = response.json().await?;
741
742        let mut vectors = Vec::new();
743        for org in org_list.result.iter().take(100) {
744            let text = format!("{} {}", org.title, org.description);
745            let embedding = self.embedder.embed_text(&text);
746
747            let mut metadata = HashMap::new();
748            metadata.insert("name".to_string(), org.name.clone());
749            metadata.insert("title".to_string(), org.title.clone());
750            metadata.insert("description".to_string(), org.description.clone());
751            metadata.insert("package_count".to_string(), org.package_count.to_string());
752            metadata.insert("source".to_string(), "datagov_org".to_string());
753
754            vectors.push(SemanticVector {
755                id: format!("DATAGOV_ORG:{}", org.id),
756                embedding,
757                domain: Domain::Government,
758                timestamp: Utc::now(),
759                metadata,
760            });
761        }
762
763        Ok(vectors)
764    }
765
766    /// Get organization details
767    ///
768    /// # Arguments
769    /// * `id` - Organization ID or name
770    ///
771    /// # Example
772    /// ```rust,ignore
773    /// let org = client.get_organization("nasa-gov").await?;
774    /// ```
775    pub async fn get_organization(&self, id: &str) -> Result<Option<SemanticVector>> {
776        if self.use_mock {
777            return Ok(Some(self.get_mock_organizations().into_iter().next().unwrap()));
778        }
779
780        let url = format!("{}/action/organization_show?id={}", self.base_url, id);
781
782        sleep(self.rate_limit_delay).await;
783        let response = self.fetch_with_retry(&url).await?;
784        let json: serde_json::Value = response.json().await?;
785
786        if let Some(result) = json.get("result") {
787            let org: DataGovOrganizationInfo = serde_json::from_value(result.clone())?;
788
789            let text = format!("{} {}", org.title, org.description);
790            let embedding = self.embedder.embed_text(&text);
791
792            let mut metadata = HashMap::new();
793            metadata.insert("name".to_string(), org.name);
794            metadata.insert("title".to_string(), org.title);
795            metadata.insert("description".to_string(), org.description);
796            metadata.insert("source".to_string(), "datagov_org".to_string());
797
798            return Ok(Some(SemanticVector {
799                id: format!("DATAGOV_ORG:{}", org.id),
800                embedding,
801                domain: Domain::Government,
802                timestamp: Utc::now(),
803                metadata,
804            }));
805        }
806
807        Ok(None)
808    }
809
810    /// List popular tags
811    ///
812    /// # Example
813    /// ```rust,ignore
814    /// let tags = client.list_tags().await?;
815    /// ```
816    pub async fn list_tags(&self) -> Result<Vec<SemanticVector>> {
817        if self.use_mock {
818            return Ok(self.get_mock_tags());
819        }
820
821        let url = format!("{}/action/tag_list", self.base_url);
822
823        sleep(self.rate_limit_delay).await;
824        let response = self.fetch_with_retry(&url).await?;
825        let json: serde_json::Value = response.json().await?;
826
827        let mut vectors = Vec::new();
828        if let Some(result) = json.get("result").and_then(|r| r.as_array()) {
829            for (idx, tag) in result.iter().take(100).enumerate() {
830                if let Some(tag_name) = tag.as_str() {
831                    let embedding = self.embedder.embed_text(tag_name);
832
833                    let mut metadata = HashMap::new();
834                    metadata.insert("tag".to_string(), tag_name.to_string());
835                    metadata.insert("source".to_string(), "datagov_tag".to_string());
836
837                    vectors.push(SemanticVector {
838                        id: format!("DATAGOV_TAG:{}", idx),
839                        embedding,
840                        domain: Domain::Government,
841                        timestamp: Utc::now(),
842                        metadata,
843                    });
844                }
845            }
846        }
847
848        Ok(vectors)
849    }
850
851    fn get_mock_datagov_datasets(&self, query: &str) -> Vec<SemanticVector> {
852        let text = format!("Mock dataset for {}", query);
853        let embedding = self.embedder.embed_text(&text);
854
855        let mut metadata = HashMap::new();
856        metadata.insert("title".to_string(), "Mock Dataset".to_string());
857        metadata.insert("description".to_string(), "Mock data for testing".to_string());
858        metadata.insert("source".to_string(), "datagov_mock".to_string());
859
860        vec![SemanticVector {
861            id: "DATAGOV_MOCK:1".to_string(),
862            embedding,
863            domain: Domain::Government,
864            timestamp: Utc::now(),
865            metadata,
866        }]
867    }
868
869    fn get_mock_organizations(&self) -> Vec<SemanticVector> {
870        let text = "NASA National Aeronautics and Space Administration";
871        let embedding = self.embedder.embed_text(text);
872
873        let mut metadata = HashMap::new();
874        metadata.insert("name".to_string(), "nasa-gov".to_string());
875        metadata.insert("title".to_string(), "NASA".to_string());
876        metadata.insert("source".to_string(), "datagov_mock".to_string());
877
878        vec![SemanticVector {
879            id: "DATAGOV_ORG_MOCK:1".to_string(),
880            embedding,
881            domain: Domain::Government,
882            timestamp: Utc::now(),
883            metadata,
884        }]
885    }
886
887    fn get_mock_tags(&self) -> Vec<SemanticVector> {
888        let text = "climate";
889        let embedding = self.embedder.embed_text(text);
890
891        let mut metadata = HashMap::new();
892        metadata.insert("tag".to_string(), "climate".to_string());
893        metadata.insert("source".to_string(), "datagov_mock".to_string());
894
895        vec![SemanticVector {
896            id: "DATAGOV_TAG_MOCK:1".to_string(),
897            embedding,
898            domain: Domain::Government,
899            timestamp: Utc::now(),
900            metadata,
901        }]
902    }
903
904    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
905        let mut retries = 0;
906        loop {
907            match self.client.get(url).send().await {
908                Ok(response) => {
909                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
910                        retries += 1;
911                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
912                        continue;
913                    }
914                    return Ok(response);
915                }
916                Err(e) if retries < MAX_RETRIES => {
917                    retries += 1;
918                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
919                }
920                Err(e) => return Err(FrameworkError::Network(e)),
921            }
922        }
923    }
924}
925
926impl Default for DataGovClient {
927    fn default() -> Self {
928        Self::new()
929    }
930}
931
932// ============================================================================
933// EU Open Data Portal Client
934// ============================================================================
935
936/// EU Open Data search response
937#[derive(Debug, Deserialize)]
938struct EuOpenDataResponse {
939    #[serde(default)]
940    result: EuOpenDataResult,
941}
942
943#[derive(Debug, Default, Deserialize)]
944struct EuOpenDataResult {
945    #[serde(default)]
946    count: u64,
947    #[serde(default)]
948    results: Vec<EuDataset>,
949}
950
951#[derive(Debug, Deserialize)]
952struct EuDataset {
953    #[serde(default)]
954    id: String,
955    #[serde(default)]
956    title: HashMap<String, String>,
957    #[serde(default)]
958    description: HashMap<String, String>,
959    #[serde(default)]
960    keywords: Vec<String>,
961    #[serde(default)]
962    catalog: Option<String>,
963}
964
965/// Client for EU Open Data Portal
966///
967/// Provides access to:
968/// - European Union datasets
969/// - EU institutions data
970/// - Member states open data
971///
972/// # Example
973/// ```rust,ignore
974/// use ruvector_data_framework::EuOpenDataClient;
975///
976/// let client = EuOpenDataClient::new();
977/// let datasets = client.search_datasets("environment").await?;
978/// let catalogs = client.list_catalogs().await?;
979/// ```
980pub struct EuOpenDataClient {
981    client: Client,
982    base_url: String,
983    rate_limit_delay: Duration,
984    embedder: Arc<SimpleEmbedder>,
985    use_mock: bool,
986}
987
988impl EuOpenDataClient {
989    /// Create a new EU Open Data client
990    pub fn new() -> Self {
991        Self::with_config(DEFAULT_EMBEDDING_DIM, false)
992    }
993
994    /// Create a new EU Open Data client with custom configuration
995    pub fn with_config(embedding_dim: usize, use_mock: bool) -> Self {
996        Self {
997            client: Client::builder()
998                .user_agent("RuVector-Discovery/1.0")
999                .timeout(Duration::from_secs(30))
1000                .build()
1001                .expect("Failed to create HTTP client"),
1002            base_url: "https://data.europa.eu/api/hub/search".to_string(),
1003            rate_limit_delay: Duration::from_millis(EU_OPENDATA_RATE_LIMIT_MS),
1004            embedder: Arc::new(SimpleEmbedder::new(embedding_dim)),
1005            use_mock,
1006        }
1007    }
1008
1009    /// Search for datasets
1010    ///
1011    /// # Arguments
1012    /// * `query` - Search query string
1013    ///
1014    /// # Example
1015    /// ```rust,ignore
1016    /// let results = client.search_datasets("agriculture").await?;
1017    /// ```
1018    pub async fn search_datasets(&self, query: &str) -> Result<Vec<SemanticVector>> {
1019        if self.use_mock {
1020            return Ok(self.get_mock_eu_datasets(query));
1021        }
1022
1023        let url = format!(
1024            "{}/datasets?q={}&limit=50",
1025            self.base_url,
1026            urlencoding::encode(query)
1027        );
1028
1029        sleep(self.rate_limit_delay).await;
1030        let response = self.fetch_with_retry(&url).await?;
1031        let json: serde_json::Value = response.json().await?;
1032
1033        let mut vectors = Vec::new();
1034
1035        if let Some(results) = json.get("result").and_then(|r| r.get("results")).and_then(|r| r.as_array()) {
1036            for dataset in results {
1037                let id = dataset.get("id").and_then(|i| i.as_str()).unwrap_or("");
1038                let title = dataset
1039                    .get("title")
1040                    .and_then(|t| t.get("en"))
1041                    .and_then(|e| e.as_str())
1042                    .or_else(|| dataset.get("title").and_then(|t| t.as_str()))
1043                    .unwrap_or("");
1044
1045                let description = dataset
1046                    .get("description")
1047                    .and_then(|d| d.get("en"))
1048                    .and_then(|e| e.as_str())
1049                    .or_else(|| dataset.get("description").and_then(|d| d.as_str()))
1050                    .unwrap_or("");
1051
1052                let text = format!("{} {}", title, description);
1053                let embedding = self.embedder.embed_text(&text);
1054
1055                let mut metadata = HashMap::new();
1056                metadata.insert("title".to_string(), title.to_string());
1057                metadata.insert("description".to_string(), description.to_string());
1058                metadata.insert("source".to_string(), "eu_opendata".to_string());
1059
1060                vectors.push(SemanticVector {
1061                    id: format!("EU:{}", id),
1062                    embedding,
1063                    domain: Domain::Government,
1064                    timestamp: Utc::now(),
1065                    metadata,
1066                });
1067            }
1068        }
1069
1070        Ok(vectors)
1071    }
1072
1073    /// Get a specific dataset by ID
1074    ///
1075    /// # Arguments
1076    /// * `id` - Dataset ID
1077    pub async fn get_dataset(&self, id: &str) -> Result<Option<SemanticVector>> {
1078        if self.use_mock {
1079            return Ok(Some(self.get_mock_eu_datasets("mock").into_iter().next().unwrap()));
1080        }
1081
1082        let url = format!("{}/datasets/{}", self.base_url, id);
1083
1084        sleep(self.rate_limit_delay).await;
1085        let response = self.fetch_with_retry(&url).await?;
1086        let dataset: serde_json::Value = response.json().await?;
1087
1088        let title = dataset
1089            .get("title")
1090            .and_then(|t| t.get("en"))
1091            .and_then(|e| e.as_str())
1092            .unwrap_or("");
1093
1094        let text = title.to_string();
1095        let embedding = self.embedder.embed_text(&text);
1096
1097        let mut metadata = HashMap::new();
1098        metadata.insert("title".to_string(), title.to_string());
1099        metadata.insert("source".to_string(), "eu_opendata".to_string());
1100
1101        Ok(Some(SemanticVector {
1102            id: format!("EU:{}", id),
1103            embedding,
1104            domain: Domain::Government,
1105            timestamp: Utc::now(),
1106            metadata,
1107        }))
1108    }
1109
1110    /// List available catalogs
1111    pub async fn list_catalogs(&self) -> Result<Vec<SemanticVector>> {
1112        if self.use_mock {
1113            return Ok(self.get_mock_catalogs());
1114        }
1115
1116        let url = format!("{}/catalogues", self.base_url);
1117
1118        sleep(self.rate_limit_delay).await;
1119        let response = self.fetch_with_retry(&url).await?;
1120        let json: serde_json::Value = response.json().await?;
1121
1122        let mut vectors = Vec::new();
1123
1124        if let Some(catalogs) = json.get("result").and_then(|r| r.as_array()) {
1125            for (idx, catalog) in catalogs.iter().take(50).enumerate() {
1126                let id = catalog.get("id").and_then(|i| i.as_str()).unwrap_or("");
1127                let title = catalog.get("title").and_then(|t| t.as_str()).unwrap_or("");
1128
1129                let embedding = self.embedder.embed_text(title);
1130
1131                let mut metadata = HashMap::new();
1132                metadata.insert("title".to_string(), title.to_string());
1133                metadata.insert("source".to_string(), "eu_catalog".to_string());
1134
1135                vectors.push(SemanticVector {
1136                    id: format!("EU_CAT:{}", id),
1137                    embedding,
1138                    domain: Domain::Government,
1139                    timestamp: Utc::now(),
1140                    metadata,
1141                });
1142            }
1143        }
1144
1145        Ok(vectors)
1146    }
1147
1148    /// Get catalog details
1149    pub async fn get_catalog(&self, id: &str) -> Result<Option<SemanticVector>> {
1150        if self.use_mock {
1151            return Ok(Some(self.get_mock_catalogs().into_iter().next().unwrap()));
1152        }
1153
1154        let url = format!("{}/catalogues/{}", self.base_url, id);
1155
1156        sleep(self.rate_limit_delay).await;
1157        let response = self.fetch_with_retry(&url).await?;
1158        let catalog: serde_json::Value = response.json().await?;
1159
1160        let title = catalog.get("title").and_then(|t| t.as_str()).unwrap_or("");
1161        let embedding = self.embedder.embed_text(title);
1162
1163        let mut metadata = HashMap::new();
1164        metadata.insert("title".to_string(), title.to_string());
1165        metadata.insert("source".to_string(), "eu_catalog".to_string());
1166
1167        Ok(Some(SemanticVector {
1168            id: format!("EU_CAT:{}", id),
1169            embedding,
1170            domain: Domain::Government,
1171            timestamp: Utc::now(),
1172            metadata,
1173        }))
1174    }
1175
1176    /// Search by theme
1177    pub async fn search_by_theme(&self, theme: &str) -> Result<Vec<SemanticVector>> {
1178        if self.use_mock {
1179            return Ok(self.get_mock_eu_datasets(theme));
1180        }
1181
1182        let url = format!(
1183            "{}/datasets?facets[theme][]={}&limit=50",
1184            self.base_url,
1185            urlencoding::encode(theme)
1186        );
1187
1188        sleep(self.rate_limit_delay).await;
1189        let response = self.fetch_with_retry(&url).await?;
1190        let json: serde_json::Value = response.json().await?;
1191
1192        // Similar parsing as search_datasets
1193        self.parse_eu_datasets(&json)
1194    }
1195
1196    fn parse_eu_datasets(&self, json: &serde_json::Value) -> Result<Vec<SemanticVector>> {
1197        let mut vectors = Vec::new();
1198
1199        if let Some(results) = json.get("result").and_then(|r| r.get("results")).and_then(|r| r.as_array()) {
1200            for dataset in results {
1201                let id = dataset.get("id").and_then(|i| i.as_str()).unwrap_or("");
1202                let title = dataset.get("title").and_then(|t| t.as_str()).unwrap_or("");
1203
1204                let embedding = self.embedder.embed_text(title);
1205
1206                let mut metadata = HashMap::new();
1207                metadata.insert("title".to_string(), title.to_string());
1208                metadata.insert("source".to_string(), "eu_opendata".to_string());
1209
1210                vectors.push(SemanticVector {
1211                    id: format!("EU:{}", id),
1212                    embedding,
1213                    domain: Domain::Government,
1214                    timestamp: Utc::now(),
1215                    metadata,
1216                });
1217            }
1218        }
1219
1220        Ok(vectors)
1221    }
1222
1223    fn get_mock_eu_datasets(&self, query: &str) -> Vec<SemanticVector> {
1224        let text = format!("EU dataset about {}", query);
1225        let embedding = self.embedder.embed_text(&text);
1226
1227        let mut metadata = HashMap::new();
1228        metadata.insert("title".to_string(), "Mock EU Dataset".to_string());
1229        metadata.insert("source".to_string(), "eu_mock".to_string());
1230
1231        vec![SemanticVector {
1232            id: "EU_MOCK:1".to_string(),
1233            embedding,
1234            domain: Domain::Government,
1235            timestamp: Utc::now(),
1236            metadata,
1237        }]
1238    }
1239
1240    fn get_mock_catalogs(&self) -> Vec<SemanticVector> {
1241        let text = "European Commission Data Catalog";
1242        let embedding = self.embedder.embed_text(text);
1243
1244        let mut metadata = HashMap::new();
1245        metadata.insert("title".to_string(), "EC Catalog".to_string());
1246        metadata.insert("source".to_string(), "eu_mock".to_string());
1247
1248        vec![SemanticVector {
1249            id: "EU_CAT_MOCK:1".to_string(),
1250            embedding,
1251            domain: Domain::Government,
1252            timestamp: Utc::now(),
1253            metadata,
1254        }]
1255    }
1256
1257    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
1258        let mut retries = 0;
1259        loop {
1260            match self.client.get(url).send().await {
1261                Ok(response) => {
1262                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
1263                        retries += 1;
1264                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1265                        continue;
1266                    }
1267                    return Ok(response);
1268                }
1269                Err(e) if retries < MAX_RETRIES => {
1270                    retries += 1;
1271                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1272                }
1273                Err(e) => return Err(FrameworkError::Network(e)),
1274            }
1275        }
1276    }
1277}
1278
1279impl Default for EuOpenDataClient {
1280    fn default() -> Self {
1281        Self::new()
1282    }
1283}
1284
1285// ============================================================================
1286// UK Government Data Client
1287// ============================================================================
1288
1289/// UK Gov CKAN API response (similar to Data.gov)
1290#[derive(Debug, Deserialize)]
1291struct UkGovSearchResponse {
1292    #[serde(default)]
1293    success: bool,
1294    result: UkGovResult,
1295}
1296
1297#[derive(Debug, Deserialize)]
1298struct UkGovResult {
1299    #[serde(default)]
1300    count: u64,
1301    #[serde(default)]
1302    results: Vec<UkGovDataset>,
1303}
1304
1305#[derive(Debug, Deserialize)]
1306struct UkGovDataset {
1307    id: String,
1308    #[serde(default)]
1309    title: String,
1310    #[serde(default)]
1311    notes: String,
1312    #[serde(default)]
1313    organization: Option<UkGovOrganization>,
1314}
1315
1316#[derive(Debug, Deserialize)]
1317struct UkGovOrganization {
1318    #[serde(default)]
1319    title: String,
1320}
1321
1322/// Client for UK Government data
1323///
1324/// Provides access to:
1325/// - UK public sector datasets
1326/// - Government department data
1327/// - National statistics
1328///
1329/// # Example
1330/// ```rust,ignore
1331/// use ruvector_data_framework::UkGovClient;
1332///
1333/// let client = UkGovClient::new();
1334/// let datasets = client.search_datasets("health").await?;
1335/// let publishers = client.list_publishers().await?;
1336/// ```
1337pub struct UkGovClient {
1338    client: Client,
1339    base_url: String,
1340    rate_limit_delay: Duration,
1341    embedder: Arc<SimpleEmbedder>,
1342    use_mock: bool,
1343}
1344
1345impl UkGovClient {
1346    /// Create a new UK Gov client
1347    pub fn new() -> Self {
1348        Self::with_config(DEFAULT_EMBEDDING_DIM, false)
1349    }
1350
1351    /// Create a new UK Gov client with custom configuration
1352    pub fn with_config(embedding_dim: usize, use_mock: bool) -> Self {
1353        Self {
1354            client: Client::builder()
1355                .user_agent("RuVector-Discovery/1.0")
1356                .timeout(Duration::from_secs(30))
1357                .build()
1358                .expect("Failed to create HTTP client"),
1359            base_url: "https://data.gov.uk/api/action".to_string(),
1360            rate_limit_delay: Duration::from_millis(UK_GOV_RATE_LIMIT_MS),
1361            embedder: Arc::new(SimpleEmbedder::new(embedding_dim)),
1362            use_mock,
1363        }
1364    }
1365
1366    /// Search for datasets
1367    pub async fn search_datasets(&self, query: &str) -> Result<Vec<SemanticVector>> {
1368        if self.use_mock {
1369            return Ok(self.get_mock_uk_datasets(query));
1370        }
1371
1372        let url = format!(
1373            "{}/package_search?q={}&rows=50",
1374            self.base_url,
1375            urlencoding::encode(query)
1376        );
1377
1378        sleep(self.rate_limit_delay).await;
1379        let response = self.fetch_with_retry(&url).await?;
1380        let search_response: UkGovSearchResponse = response.json().await?;
1381
1382        let mut vectors = Vec::new();
1383        for dataset in search_response.result.results {
1384            let org_name = dataset
1385                .organization
1386                .as_ref()
1387                .map(|o| o.title.as_str())
1388                .unwrap_or("Unknown");
1389
1390            let text = format!("{} {} {}", dataset.title, dataset.notes, org_name);
1391            let embedding = self.embedder.embed_text(&text);
1392
1393            let mut metadata = HashMap::new();
1394            metadata.insert("title".to_string(), dataset.title);
1395            metadata.insert("description".to_string(), dataset.notes);
1396            metadata.insert("publisher".to_string(), org_name.to_string());
1397            metadata.insert("source".to_string(), "ukgov".to_string());
1398
1399            vectors.push(SemanticVector {
1400                id: format!("UKGOV:{}", dataset.id),
1401                embedding,
1402                domain: Domain::Government,
1403                timestamp: Utc::now(),
1404                metadata,
1405            });
1406        }
1407
1408        Ok(vectors)
1409    }
1410
1411    /// Get a specific dataset by ID
1412    pub async fn get_dataset(&self, id: &str) -> Result<Option<SemanticVector>> {
1413        if self.use_mock {
1414            return Ok(Some(self.get_mock_uk_datasets("mock").into_iter().next().unwrap()));
1415        }
1416
1417        let url = format!("{}/package_show?id={}", self.base_url, id);
1418
1419        sleep(self.rate_limit_delay).await;
1420        let response = self.fetch_with_retry(&url).await?;
1421        let json: serde_json::Value = response.json().await?;
1422
1423        if let Some(result) = json.get("result") {
1424            let dataset: UkGovDataset = serde_json::from_value(result.clone())?;
1425
1426            let text = format!("{} {}", dataset.title, dataset.notes);
1427            let embedding = self.embedder.embed_text(&text);
1428
1429            let mut metadata = HashMap::new();
1430            metadata.insert("title".to_string(), dataset.title);
1431            metadata.insert("description".to_string(), dataset.notes);
1432            metadata.insert("source".to_string(), "ukgov".to_string());
1433
1434            return Ok(Some(SemanticVector {
1435                id: format!("UKGOV:{}", dataset.id),
1436                embedding,
1437                domain: Domain::Government,
1438                timestamp: Utc::now(),
1439                metadata,
1440            }));
1441        }
1442
1443        Ok(None)
1444    }
1445
1446    /// List publishers
1447    pub async fn list_publishers(&self) -> Result<Vec<SemanticVector>> {
1448        if self.use_mock {
1449            return Ok(self.get_mock_publishers());
1450        }
1451
1452        let url = format!("{}/organization_list?all_fields=true", self.base_url);
1453
1454        sleep(self.rate_limit_delay).await;
1455        let response = self.fetch_with_retry(&url).await?;
1456        let json: serde_json::Value = response.json().await?;
1457
1458        let mut vectors = Vec::new();
1459
1460        if let Some(result) = json.get("result").and_then(|r| r.as_array()) {
1461            for (idx, pub_data) in result.iter().take(50).enumerate() {
1462                let title = pub_data.get("title").and_then(|t| t.as_str()).unwrap_or("");
1463                let name = pub_data.get("name").and_then(|n| n.as_str()).unwrap_or("");
1464
1465                let embedding = self.embedder.embed_text(title);
1466
1467                let mut metadata = HashMap::new();
1468                metadata.insert("title".to_string(), title.to_string());
1469                metadata.insert("name".to_string(), name.to_string());
1470                metadata.insert("source".to_string(), "ukgov_publisher".to_string());
1471
1472                vectors.push(SemanticVector {
1473                    id: format!("UKGOV_PUB:{}", idx),
1474                    embedding,
1475                    domain: Domain::Government,
1476                    timestamp: Utc::now(),
1477                    metadata,
1478                });
1479            }
1480        }
1481
1482        Ok(vectors)
1483    }
1484
1485    /// Get publisher details
1486    pub async fn get_publisher(&self, id: &str) -> Result<Option<SemanticVector>> {
1487        if self.use_mock {
1488            return Ok(Some(self.get_mock_publishers().into_iter().next().unwrap()));
1489        }
1490
1491        let url = format!("{}/organization_show?id={}", self.base_url, id);
1492
1493        sleep(self.rate_limit_delay).await;
1494        let response = self.fetch_with_retry(&url).await?;
1495        let json: serde_json::Value = response.json().await?;
1496
1497        if let Some(result) = json.get("result") {
1498            let title = result.get("title").and_then(|t| t.as_str()).unwrap_or("");
1499            let embedding = self.embedder.embed_text(title);
1500
1501            let mut metadata = HashMap::new();
1502            metadata.insert("title".to_string(), title.to_string());
1503            metadata.insert("source".to_string(), "ukgov_publisher".to_string());
1504
1505            return Ok(Some(SemanticVector {
1506                id: format!("UKGOV_PUB:{}", id),
1507                embedding,
1508                domain: Domain::Government,
1509                timestamp: Utc::now(),
1510                metadata,
1511            }));
1512        }
1513
1514        Ok(None)
1515    }
1516
1517    fn get_mock_uk_datasets(&self, query: &str) -> Vec<SemanticVector> {
1518        let text = format!("UK dataset about {}", query);
1519        let embedding = self.embedder.embed_text(&text);
1520
1521        let mut metadata = HashMap::new();
1522        metadata.insert("title".to_string(), "Mock UK Dataset".to_string());
1523        metadata.insert("source".to_string(), "ukgov_mock".to_string());
1524
1525        vec![SemanticVector {
1526            id: "UKGOV_MOCK:1".to_string(),
1527            embedding,
1528            domain: Domain::Government,
1529            timestamp: Utc::now(),
1530            metadata,
1531        }]
1532    }
1533
1534    fn get_mock_publishers(&self) -> Vec<SemanticVector> {
1535        let text = "Department of Health and Social Care";
1536        let embedding = self.embedder.embed_text(text);
1537
1538        let mut metadata = HashMap::new();
1539        metadata.insert("title".to_string(), "DHSC".to_string());
1540        metadata.insert("source".to_string(), "ukgov_mock".to_string());
1541
1542        vec![SemanticVector {
1543            id: "UKGOV_PUB_MOCK:1".to_string(),
1544            embedding,
1545            domain: Domain::Government,
1546            timestamp: Utc::now(),
1547            metadata,
1548        }]
1549    }
1550
1551    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
1552        let mut retries = 0;
1553        loop {
1554            match self.client.get(url).send().await {
1555                Ok(response) => {
1556                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
1557                        retries += 1;
1558                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1559                        continue;
1560                    }
1561                    return Ok(response);
1562                }
1563                Err(e) if retries < MAX_RETRIES => {
1564                    retries += 1;
1565                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1566                }
1567                Err(e) => return Err(FrameworkError::Network(e)),
1568            }
1569        }
1570    }
1571}
1572
1573impl Default for UkGovClient {
1574    fn default() -> Self {
1575        Self::new()
1576    }
1577}
1578
1579// ============================================================================
1580// World Bank Client (Government Data Focus)
1581// ============================================================================
1582
1583/// World Bank country info
1584#[derive(Debug, Deserialize)]
1585struct WbCountry {
1586    id: String,
1587    #[serde(default)]
1588    name: String,
1589    #[serde(default)]
1590    capitalCity: String,
1591    #[serde(default)]
1592    longitude: String,
1593    #[serde(default)]
1594    latitude: String,
1595}
1596
1597/// World Bank indicator info
1598#[derive(Debug, Deserialize)]
1599struct WbIndicator {
1600    id: String,
1601    #[serde(default)]
1602    name: String,
1603    #[serde(default)]
1604    sourceNote: String,
1605}
1606
1607/// World Bank indicator data
1608#[derive(Debug, Deserialize)]
1609struct WbIndicatorData {
1610    #[serde(default)]
1611    indicator: WbIndicatorInfo,
1612    #[serde(default)]
1613    country: WbCountryInfo,
1614    #[serde(default)]
1615    countryiso3code: String,
1616    #[serde(default)]
1617    date: String,
1618    #[serde(default)]
1619    value: Option<f64>,
1620}
1621
1622#[derive(Debug, Default, Deserialize)]
1623struct WbIndicatorInfo {
1624    id: String,
1625    value: String,
1626}
1627
1628#[derive(Debug, Default, Deserialize)]
1629struct WbCountryInfo {
1630    id: String,
1631    value: String,
1632}
1633
1634/// Client for World Bank Open Data API
1635///
1636/// Provides access to:
1637/// - Global development indicators
1638/// - Country statistics
1639/// - Economic and social data
1640/// - Climate and environment metrics
1641///
1642/// # Example
1643/// ```rust,ignore
1644/// use ruvector_data_framework::WorldBankClient;
1645///
1646/// let client = WorldBankClient::new();
1647/// let countries = client.get_countries().await?;
1648/// let indicators = client.get_indicators("economy").await?;
1649/// let data = client.get_indicator_data("NY.GDP.MKTP.CD", "USA", "2015:2020").await?;
1650/// ```
1651pub struct WorldBankClient {
1652    client: Client,
1653    base_url: String,
1654    rate_limit_delay: Duration,
1655    embedder: Arc<SimpleEmbedder>,
1656    use_mock: bool,
1657}
1658
1659impl WorldBankClient {
1660    /// Create a new World Bank client
1661    pub fn new() -> Result<Self> {
1662        Ok(Self::with_config(DEFAULT_EMBEDDING_DIM, false)?)
1663    }
1664
1665    /// Create a new World Bank client with custom configuration
1666    pub fn with_config(embedding_dim: usize, use_mock: bool) -> Result<Self> {
1667        let client = Client::builder()
1668            .user_agent("RuVector-Discovery/1.0")
1669            .timeout(Duration::from_secs(30))
1670            .build()
1671            .map_err(FrameworkError::Network)?;
1672
1673        Ok(Self {
1674            client,
1675            base_url: "https://api.worldbank.org/v2".to_string(),
1676            rate_limit_delay: Duration::from_millis(WORLDBANK_RATE_LIMIT_MS),
1677            embedder: Arc::new(SimpleEmbedder::new(embedding_dim)),
1678            use_mock,
1679        })
1680    }
1681
1682    /// Get list of countries
1683    pub async fn get_countries(&self) -> Result<Vec<SemanticVector>> {
1684        if self.use_mock {
1685            return Ok(self.get_mock_countries());
1686        }
1687
1688        let url = format!("{}/country?format=json&per_page=300", self.base_url);
1689
1690        sleep(self.rate_limit_delay).await;
1691        let response = self.fetch_with_retry(&url).await?;
1692        let text = response.text().await?;
1693
1694        let json_values: Vec<serde_json::Value> = serde_json::from_str(&text)?;
1695        if json_values.len() < 2 {
1696            return Ok(Vec::new());
1697        }
1698
1699        let countries: Vec<WbCountry> = serde_json::from_value(json_values[1].clone())?;
1700
1701        let mut vectors = Vec::new();
1702        for country in countries {
1703            let text = format!("{} {}", country.name, country.capitalCity);
1704            let embedding = self.embedder.embed_text(&text);
1705
1706            let mut metadata = HashMap::new();
1707            metadata.insert("country_code".to_string(), country.id.clone());
1708            metadata.insert("name".to_string(), country.name);
1709            metadata.insert("capital".to_string(), country.capitalCity);
1710            metadata.insert("source".to_string(), "worldbank_country".to_string());
1711
1712            vectors.push(SemanticVector {
1713                id: format!("WB_COUNTRY:{}", country.id),
1714                embedding,
1715                domain: Domain::Government,
1716                timestamp: Utc::now(),
1717                metadata,
1718            });
1719        }
1720
1721        Ok(vectors)
1722    }
1723
1724    /// Get indicators by topic
1725    pub async fn get_indicators(&self, topic: &str) -> Result<Vec<SemanticVector>> {
1726        if self.use_mock {
1727            return Ok(self.get_mock_indicators(topic));
1728        }
1729
1730        let url = format!(
1731            "{}/indicator?format=json&per_page=100&source=2",
1732            self.base_url
1733        );
1734
1735        sleep(self.rate_limit_delay).await;
1736        let response = self.fetch_with_retry(&url).await?;
1737        let text = response.text().await?;
1738
1739        let json_values: Vec<serde_json::Value> = serde_json::from_str(&text)?;
1740        if json_values.len() < 2 {
1741            return Ok(Vec::new());
1742        }
1743
1744        let indicators: Vec<WbIndicator> = serde_json::from_value(json_values[1].clone())?;
1745
1746        let mut vectors = Vec::new();
1747        for indicator in indicators.into_iter().take(50) {
1748            // Filter by topic if specified
1749            if !topic.is_empty()
1750                && !indicator.name.to_lowercase().contains(&topic.to_lowercase())
1751                && !indicator.sourceNote.to_lowercase().contains(&topic.to_lowercase())
1752            {
1753                continue;
1754            }
1755
1756            let text = format!("{} {}", indicator.name, indicator.sourceNote);
1757            let embedding = self.embedder.embed_text(&text);
1758
1759            let mut metadata = HashMap::new();
1760            metadata.insert("indicator_id".to_string(), indicator.id.clone());
1761            metadata.insert("name".to_string(), indicator.name);
1762            metadata.insert("description".to_string(), indicator.sourceNote);
1763            metadata.insert("source".to_string(), "worldbank_indicator".to_string());
1764
1765            vectors.push(SemanticVector {
1766                id: format!("WB_IND:{}", indicator.id),
1767                embedding,
1768                domain: Domain::Government,
1769                timestamp: Utc::now(),
1770                metadata,
1771            });
1772        }
1773
1774        Ok(vectors)
1775    }
1776
1777    /// Get indicator data for a country (compatibility method)
1778    ///
1779    /// # Arguments
1780    /// * `country` - Country code (e.g., "USA", "CHN") or "all"
1781    /// * `indicator` - Indicator code (e.g., "NY.GDP.MKTP.CD")
1782    ///
1783    /// This method fetches the last 10 years of data by default
1784    pub async fn get_indicator(
1785        &self,
1786        country: &str,
1787        indicator: &str,
1788    ) -> Result<Vec<SemanticVector>> {
1789        let current_year = chrono::Utc::now().year();
1790        let start_year = current_year - 10;
1791        let date_range = format!("{}:{}", start_year, current_year);
1792        self.get_indicator_data(indicator, country, &date_range).await
1793    }
1794
1795    /// Get indicator data for a country with custom date range
1796    ///
1797    /// # Arguments
1798    /// * `indicator` - Indicator code (e.g., "NY.GDP.MKTP.CD")
1799    /// * `country` - Country code (e.g., "USA", "CHN") or "all"
1800    /// * `date_range` - Date range (e.g., "2015:2020", "2020")
1801    pub async fn get_indicator_data(
1802        &self,
1803        indicator: &str,
1804        country: &str,
1805        date_range: &str,
1806    ) -> Result<Vec<SemanticVector>> {
1807        if self.use_mock {
1808            return Ok(self.get_mock_indicator_data(indicator, country));
1809        }
1810
1811        let url = format!(
1812            "{}/country/{}/indicator/{}?format=json&date={}&per_page=100",
1813            self.base_url, country, indicator, date_range
1814        );
1815
1816        sleep(self.rate_limit_delay).await;
1817        let response = self.fetch_with_retry(&url).await?;
1818        let text = response.text().await?;
1819
1820        let json_values: Vec<serde_json::Value> = serde_json::from_str(&text)?;
1821        if json_values.len() < 2 {
1822            return Ok(Vec::new());
1823        }
1824
1825        let data_points: Vec<WbIndicatorData> = serde_json::from_value(json_values[1].clone())?;
1826
1827        let mut vectors = Vec::new();
1828        for data in data_points {
1829            let value = match data.value {
1830                Some(v) => v,
1831                None => continue,
1832            };
1833
1834            let year = data.date.parse::<i32>().unwrap_or(2020);
1835            let date = NaiveDate::from_ymd_opt(year, 1, 1)
1836                .and_then(|d| d.and_hms_opt(0, 0, 0))
1837                .map(|dt| dt.and_utc())
1838                .unwrap_or_else(Utc::now);
1839
1840            let text = format!(
1841                "{} {} in {}: {}",
1842                data.country.value, data.indicator.value, data.date, value
1843            );
1844            let embedding = self.embedder.embed_text(&text);
1845
1846            let mut metadata = HashMap::new();
1847            metadata.insert("country".to_string(), data.country.value);
1848            metadata.insert("country_code".to_string(), data.countryiso3code.clone());
1849            metadata.insert("indicator".to_string(), data.indicator.value);
1850            metadata.insert("indicator_id".to_string(), data.indicator.id);
1851            metadata.insert("year".to_string(), data.date.clone());
1852            metadata.insert("value".to_string(), value.to_string());
1853            metadata.insert("source".to_string(), "worldbank".to_string());
1854
1855            vectors.push(SemanticVector {
1856                id: format!("WB:{}:{}:{}", country, indicator, data.date),
1857                embedding,
1858                domain: Domain::Government,
1859                timestamp: date,
1860                metadata,
1861            });
1862        }
1863
1864        Ok(vectors)
1865    }
1866
1867    /// Search for indicators
1868    pub async fn search_indicators(&self, query: &str) -> Result<Vec<SemanticVector>> {
1869        // World Bank doesn't have a direct search API, so we fetch and filter
1870        self.get_indicators(query).await
1871    }
1872
1873    fn get_mock_countries(&self) -> Vec<SemanticVector> {
1874        let text = "United States Washington D.C.";
1875        let embedding = self.embedder.embed_text(text);
1876
1877        let mut metadata = HashMap::new();
1878        metadata.insert("country_code".to_string(), "USA".to_string());
1879        metadata.insert("name".to_string(), "United States".to_string());
1880        metadata.insert("source".to_string(), "worldbank_mock".to_string());
1881
1882        vec![SemanticVector {
1883            id: "WB_COUNTRY_MOCK:USA".to_string(),
1884            embedding,
1885            domain: Domain::Government,
1886            timestamp: Utc::now(),
1887            metadata,
1888        }]
1889    }
1890
1891    fn get_mock_indicators(&self, topic: &str) -> Vec<SemanticVector> {
1892        let text = format!("GDP indicator {}", topic);
1893        let embedding = self.embedder.embed_text(&text);
1894
1895        let mut metadata = HashMap::new();
1896        metadata.insert("indicator_id".to_string(), "NY.GDP.MKTP.CD".to_string());
1897        metadata.insert("name".to_string(), "GDP".to_string());
1898        metadata.insert("source".to_string(), "worldbank_mock".to_string());
1899
1900        vec![SemanticVector {
1901            id: "WB_IND_MOCK:1".to_string(),
1902            embedding,
1903            domain: Domain::Government,
1904            timestamp: Utc::now(),
1905            metadata,
1906        }]
1907    }
1908
1909    fn get_mock_indicator_data(&self, indicator: &str, country: &str) -> Vec<SemanticVector> {
1910        let text = format!("{} {} GDP: 20000000000000", country, indicator);
1911        let embedding = self.embedder.embed_text(&text);
1912
1913        let mut metadata = HashMap::new();
1914        metadata.insert("country_code".to_string(), country.to_string());
1915        metadata.insert("indicator_id".to_string(), indicator.to_string());
1916        metadata.insert("value".to_string(), "20000000000000".to_string());
1917        metadata.insert("source".to_string(), "worldbank_mock".to_string());
1918
1919        vec![SemanticVector {
1920            id: format!("WB_MOCK:{}:{}:2020", country, indicator),
1921            embedding,
1922            domain: Domain::Government,
1923            timestamp: Utc::now(),
1924            metadata,
1925        }]
1926    }
1927
1928    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
1929        let mut retries = 0;
1930        loop {
1931            match self.client.get(url).send().await {
1932                Ok(response) => {
1933                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
1934                        retries += 1;
1935                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1936                        continue;
1937                    }
1938                    return Ok(response);
1939                }
1940                Err(e) if retries < MAX_RETRIES => {
1941                    retries += 1;
1942                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
1943                }
1944                Err(e) => return Err(FrameworkError::Network(e)),
1945            }
1946        }
1947    }
1948}
1949
1950impl Default for WorldBankClient {
1951    fn default() -> Self {
1952        Self::new().expect("Failed to create WorldBank client")
1953    }
1954}
1955
1956// ============================================================================
1957// United Nations Data Client
1958// ============================================================================
1959
1960/// UN Data REST API response structures
1961#[derive(Debug, Deserialize)]
1962struct UnDataResponse {
1963    #[serde(default)]
1964    data: Vec<UnDataRecord>,
1965}
1966
1967#[derive(Debug, Deserialize)]
1968struct UnDataRecord {
1969    #[serde(default)]
1970    indicator: String,
1971    #[serde(default)]
1972    country: String,
1973    #[serde(default)]
1974    year: String,
1975    #[serde(default)]
1976    value: String,
1977}
1978
1979/// Client for United Nations Data API
1980///
1981/// Provides access to:
1982/// - UN Statistics Division data
1983/// - Sustainable Development Goals (SDG) indicators
1984/// - Global economic and social statistics
1985///
1986/// # Example
1987/// ```rust,ignore
1988/// use ruvector_data_framework::UNDataClient;
1989///
1990/// let client = UNDataClient::new();
1991/// let indicators = client.get_indicators().await?;
1992/// let data = client.get_data("population", "USA").await?;
1993/// let search = client.search_datasets("climate").await?;
1994/// ```
1995pub struct UNDataClient {
1996    client: Client,
1997    base_url: String,
1998    rate_limit_delay: Duration,
1999    embedder: Arc<SimpleEmbedder>,
2000    use_mock: bool,
2001}
2002
2003impl UNDataClient {
2004    /// Create a new UN Data client
2005    pub fn new() -> Self {
2006        Self::with_config(DEFAULT_EMBEDDING_DIM, false)
2007    }
2008
2009    /// Create a new UN Data client with custom configuration
2010    pub fn with_config(embedding_dim: usize, use_mock: bool) -> Self {
2011        Self {
2012            client: Client::builder()
2013                .user_agent("RuVector-Discovery/1.0")
2014                .timeout(Duration::from_secs(30))
2015                .build()
2016                .expect("Failed to create HTTP client"),
2017            base_url: "https://data.un.org/ws/rest".to_string(),
2018            rate_limit_delay: Duration::from_millis(UNDATA_RATE_LIMIT_MS),
2019            embedder: Arc::new(SimpleEmbedder::new(embedding_dim)),
2020            use_mock,
2021        }
2022    }
2023
2024    /// Get available indicators
2025    pub async fn get_indicators(&self) -> Result<Vec<SemanticVector>> {
2026        if self.use_mock {
2027            return Ok(self.get_mock_un_indicators());
2028        }
2029
2030        // UN Data API has limited public endpoints, using mock for now
2031        Ok(self.get_mock_un_indicators())
2032    }
2033
2034    /// Get data for an indicator and country
2035    pub async fn get_data(&self, indicator: &str, country: &str) -> Result<Vec<SemanticVector>> {
2036        if self.use_mock {
2037            return Ok(self.get_mock_un_data(indicator, country));
2038        }
2039
2040        // UN Data API requires specific dataset IDs
2041        Ok(self.get_mock_un_data(indicator, country))
2042    }
2043
2044    /// Search datasets
2045    pub async fn search_datasets(&self, query: &str) -> Result<Vec<SemanticVector>> {
2046        if self.use_mock {
2047            return Ok(self.get_mock_un_datasets(query));
2048        }
2049
2050        Ok(self.get_mock_un_datasets(query))
2051    }
2052
2053    fn get_mock_un_indicators(&self) -> Vec<SemanticVector> {
2054        let indicators = vec![
2055            ("Population", "Total population"),
2056            ("GDP", "Gross Domestic Product"),
2057            ("CO2 Emissions", "Carbon dioxide emissions"),
2058        ];
2059
2060        let mut vectors = Vec::new();
2061        for (idx, (name, description)) in indicators.iter().enumerate() {
2062            let text = format!("{} {}", name, description);
2063            let embedding = self.embedder.embed_text(&text);
2064
2065            let mut metadata = HashMap::new();
2066            metadata.insert("indicator".to_string(), name.to_string());
2067            metadata.insert("description".to_string(), description.to_string());
2068            metadata.insert("source".to_string(), "undata_mock".to_string());
2069
2070            vectors.push(SemanticVector {
2071                id: format!("UN_IND_MOCK:{}", idx),
2072                embedding,
2073                domain: Domain::Government,
2074                timestamp: Utc::now(),
2075                metadata,
2076            });
2077        }
2078
2079        vectors
2080    }
2081
2082    fn get_mock_un_data(&self, indicator: &str, country: &str) -> Vec<SemanticVector> {
2083        let text = format!("{} {} 2020: 100000", country, indicator);
2084        let embedding = self.embedder.embed_text(&text);
2085
2086        let mut metadata = HashMap::new();
2087        metadata.insert("indicator".to_string(), indicator.to_string());
2088        metadata.insert("country".to_string(), country.to_string());
2089        metadata.insert("year".to_string(), "2020".to_string());
2090        metadata.insert("value".to_string(), "100000".to_string());
2091        metadata.insert("source".to_string(), "undata_mock".to_string());
2092
2093        vec![SemanticVector {
2094            id: format!("UN_MOCK:{}:{}:2020", country, indicator),
2095            embedding,
2096            domain: Domain::Government,
2097            timestamp: Utc::now(),
2098            metadata,
2099        }]
2100    }
2101
2102    fn get_mock_un_datasets(&self, query: &str) -> Vec<SemanticVector> {
2103        let text = format!("UN dataset about {}", query);
2104        let embedding = self.embedder.embed_text(&text);
2105
2106        let mut metadata = HashMap::new();
2107        metadata.insert("title".to_string(), format!("UN {} Data", query));
2108        metadata.insert("description".to_string(), "Mock UN dataset".to_string());
2109        metadata.insert("source".to_string(), "undata_mock".to_string());
2110
2111        vec![SemanticVector {
2112            id: "UN_DS_MOCK:1".to_string(),
2113            embedding,
2114            domain: Domain::Government,
2115            timestamp: Utc::now(),
2116            metadata,
2117        }]
2118    }
2119
2120    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
2121        let mut retries = 0;
2122        loop {
2123            match self.client.get(url).send().await {
2124                Ok(response) => {
2125                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES {
2126                        retries += 1;
2127                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
2128                        continue;
2129                    }
2130                    return Ok(response);
2131                }
2132                Err(e) if retries < MAX_RETRIES => {
2133                    retries += 1;
2134                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
2135                }
2136                Err(e) => return Err(FrameworkError::Network(e)),
2137            }
2138        }
2139    }
2140}
2141
2142impl Default for UNDataClient {
2143    fn default() -> Self {
2144        Self::new()
2145    }
2146}
2147
2148// ============================================================================
2149// Tests
2150// ============================================================================
2151
2152#[cfg(test)]
2153mod tests {
2154    use super::*;
2155
2156    // Census Client Tests
2157    #[test]
2158    fn test_census_client_creation() {
2159        let client = CensusClient::new(None);
2160        assert_eq!(client.base_url, "https://api.census.gov/data");
2161    }
2162
2163    #[test]
2164    fn test_census_client_with_key() {
2165        let client = CensusClient::new(Some("test_key".to_string()));
2166        assert!(client.api_key.is_some());
2167    }
2168
2169    #[tokio::test]
2170    async fn test_census_mock_population() {
2171        let client = CensusClient::with_config(None, 256, true);
2172        let result = client.get_population(2020, "state:*").await;
2173        assert!(result.is_ok());
2174        let vectors = result.unwrap();
2175        assert!(!vectors.is_empty());
2176        assert_eq!(vectors[0].domain, Domain::Government);
2177    }
2178
2179    #[tokio::test]
2180    async fn test_census_mock_datasets() {
2181        let client = CensusClient::with_config(None, 256, true);
2182        let result = client.get_available_datasets().await;
2183        assert!(result.is_ok());
2184        let vectors = result.unwrap();
2185        assert!(!vectors.is_empty());
2186    }
2187
2188    #[tokio::test]
2189    async fn test_census_mock_variables() {
2190        let client = CensusClient::with_config(None, 256, true);
2191        let result = client.search_variables("acs/acs5", "income").await;
2192        assert!(result.is_ok());
2193    }
2194
2195    // Data.gov Client Tests
2196    #[test]
2197    fn test_datagov_client_creation() {
2198        let client = DataGovClient::new();
2199        assert_eq!(client.base_url, "https://catalog.data.gov/api/3");
2200    }
2201
2202    #[tokio::test]
2203    async fn test_datagov_mock_search() {
2204        let client = DataGovClient::with_config(256, true);
2205        let result = client.search_datasets("climate").await;
2206        assert!(result.is_ok());
2207        let vectors = result.unwrap();
2208        assert!(!vectors.is_empty());
2209        assert_eq!(vectors[0].domain, Domain::Government);
2210    }
2211
2212    #[tokio::test]
2213    async fn test_datagov_mock_organizations() {
2214        let client = DataGovClient::with_config(256, true);
2215        let result = client.list_organizations().await;
2216        assert!(result.is_ok());
2217    }
2218
2219    #[tokio::test]
2220    async fn test_datagov_mock_tags() {
2221        let client = DataGovClient::with_config(256, true);
2222        let result = client.list_tags().await;
2223        assert!(result.is_ok());
2224    }
2225
2226    // EU Open Data Client Tests
2227    #[test]
2228    fn test_eu_client_creation() {
2229        let client = EuOpenDataClient::new();
2230        assert_eq!(client.base_url, "https://data.europa.eu/api/hub/search");
2231    }
2232
2233    #[tokio::test]
2234    async fn test_eu_mock_search() {
2235        let client = EuOpenDataClient::with_config(256, true);
2236        let result = client.search_datasets("environment").await;
2237        assert!(result.is_ok());
2238        let vectors = result.unwrap();
2239        assert!(!vectors.is_empty());
2240    }
2241
2242    #[tokio::test]
2243    async fn test_eu_mock_catalogs() {
2244        let client = EuOpenDataClient::with_config(256, true);
2245        let result = client.list_catalogs().await;
2246        assert!(result.is_ok());
2247    }
2248
2249    // UK Gov Client Tests
2250    #[test]
2251    fn test_ukgov_client_creation() {
2252        let client = UkGovClient::new();
2253        assert_eq!(client.base_url, "https://data.gov.uk/api/action");
2254    }
2255
2256    #[tokio::test]
2257    async fn test_ukgov_mock_search() {
2258        let client = UkGovClient::with_config(256, true);
2259        let result = client.search_datasets("health").await;
2260        assert!(result.is_ok());
2261        let vectors = result.unwrap();
2262        assert!(!vectors.is_empty());
2263    }
2264
2265    #[tokio::test]
2266    async fn test_ukgov_mock_publishers() {
2267        let client = UkGovClient::with_config(256, true);
2268        let result = client.list_publishers().await;
2269        assert!(result.is_ok());
2270    }
2271
2272    // World Bank Client Tests
2273    #[test]
2274    fn test_worldbank_client_creation() {
2275        let client = WorldBankClient::new();
2276        assert!(client.is_ok());
2277        assert_eq!(client.unwrap().base_url, "https://api.worldbank.org/v2");
2278    }
2279
2280    #[tokio::test]
2281    async fn test_worldbank_mock_countries() {
2282        let client = WorldBankClient::with_config(256, true).unwrap();
2283        let result = client.get_countries().await;
2284        assert!(result.is_ok());
2285        let vectors = result.unwrap();
2286        assert!(!vectors.is_empty());
2287    }
2288
2289    #[tokio::test]
2290    async fn test_worldbank_mock_indicators() {
2291        let client = WorldBankClient::with_config(256, true).unwrap();
2292        let result = client.get_indicators("gdp").await;
2293        assert!(result.is_ok());
2294    }
2295
2296    #[tokio::test]
2297    async fn test_worldbank_mock_data() {
2298        let client = WorldBankClient::with_config(256, true).unwrap();
2299        let result = client.get_indicator_data("NY.GDP.MKTP.CD", "USA", "2020").await;
2300        assert!(result.is_ok());
2301    }
2302
2303    // UN Data Client Tests
2304    #[test]
2305    fn test_undata_client_creation() {
2306        let client = UNDataClient::new();
2307        assert_eq!(client.base_url, "https://data.un.org/ws/rest");
2308    }
2309
2310    #[tokio::test]
2311    async fn test_undata_mock_indicators() {
2312        let client = UNDataClient::with_config(256, true);
2313        let result = client.get_indicators().await;
2314        assert!(result.is_ok());
2315        let vectors = result.unwrap();
2316        assert!(!vectors.is_empty());
2317    }
2318
2319    #[tokio::test]
2320    async fn test_undata_mock_data() {
2321        let client = UNDataClient::with_config(256, true);
2322        let result = client.get_data("population", "USA").await;
2323        assert!(result.is_ok());
2324    }
2325
2326    #[tokio::test]
2327    async fn test_undata_mock_search() {
2328        let client = UNDataClient::with_config(256, true);
2329        let result = client.search_datasets("climate").await;
2330        assert!(result.is_ok());
2331    }
2332
2333    // Rate limiting tests
2334    #[test]
2335    fn test_rate_limits() {
2336        let census = CensusClient::new(None);
2337        assert_eq!(census.rate_limit_delay, Duration::from_millis(CENSUS_RATE_LIMIT_MS));
2338
2339        let datagov = DataGovClient::new();
2340        assert_eq!(datagov.rate_limit_delay, Duration::from_millis(DATAGOV_RATE_LIMIT_MS));
2341
2342        let eu = EuOpenDataClient::new();
2343        assert_eq!(eu.rate_limit_delay, Duration::from_millis(EU_OPENDATA_RATE_LIMIT_MS));
2344
2345        let uk = UkGovClient::new();
2346        assert_eq!(uk.rate_limit_delay, Duration::from_millis(UK_GOV_RATE_LIMIT_MS));
2347
2348        let wb = WorldBankClient::new().unwrap();
2349        assert_eq!(wb.rate_limit_delay, Duration::from_millis(WORLDBANK_RATE_LIMIT_MS));
2350
2351        let un = UNDataClient::new();
2352        assert_eq!(un.rate_limit_delay, Duration::from_millis(UNDATA_RATE_LIMIT_MS));
2353    }
2354}