1use std::collections::HashMap;
11use std::sync::Arc;
12use std::time::Duration;
13
14use async_trait::async_trait;
15use chrono::Utc;
16use reqwest::{Client, StatusCode};
17use serde::{Deserialize, Serialize};
18use tokio::time::sleep;
19
20use crate::{DataRecord, DataSource, FrameworkError, Relationship, Result};
21use crate::api_clients::SimpleEmbedder;
22
23const DEFAULT_RATE_LIMIT_DELAY_MS: u64 = 100;
25const MAX_RETRIES: u32 = 3;
26const RETRY_DELAY_MS: u64 = 1000;
27
28#[derive(Debug, Deserialize)]
34struct WikiSearchResponse {
35 query: WikiSearchQuery,
36}
37
38#[derive(Debug, Deserialize)]
39struct WikiSearchQuery {
40 search: Vec<WikiSearchResult>,
41}
42
43#[derive(Debug, Deserialize)]
44struct WikiSearchResult {
45 title: String,
46 pageid: u64,
47 snippet: String,
48}
49
50#[derive(Debug, Deserialize)]
52struct WikiPageResponse {
53 query: WikiPageQuery,
54}
55
56#[derive(Debug, Deserialize)]
57struct WikiPageQuery {
58 pages: HashMap<String, WikiPage>,
59}
60
61#[derive(Debug, Deserialize)]
62struct WikiPage {
63 pageid: u64,
64 title: String,
65 #[serde(default)]
66 extract: String,
67 #[serde(default)]
68 categories: Vec<WikiCategory>,
69 #[serde(default)]
70 links: Vec<WikiLink>,
71}
72
73#[derive(Debug, Deserialize)]
74struct WikiCategory {
75 title: String,
76}
77
78#[derive(Debug, Deserialize)]
79struct WikiLink {
80 title: String,
81}
82
83pub struct WikipediaClient {
85 client: Client,
86 base_url: String,
87 language: String,
88 rate_limit_delay: Duration,
89 embedder: Arc<SimpleEmbedder>,
90}
91
92impl WikipediaClient {
93 pub fn new(language: String) -> Result<Self> {
98 let client = Client::builder()
99 .timeout(Duration::from_secs(30))
100 .user_agent("RuVector/1.0 (https://github.com/ruvnet/ruvector)")
101 .build()
102 .map_err(|e| FrameworkError::Network(e))?;
103
104 let base_url = format!("https://{}.wikipedia.org/w/api.php", language);
105
106 Ok(Self {
107 client,
108 base_url,
109 language,
110 rate_limit_delay: Duration::from_millis(DEFAULT_RATE_LIMIT_DELAY_MS),
111 embedder: Arc::new(SimpleEmbedder::new(256)), })
113 }
114
115 pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<DataRecord>> {
121 let url = format!(
122 "{}?action=query&list=search&srsearch={}&srlimit={}&format=json",
123 self.base_url,
124 urlencoding::encode(query),
125 limit.min(500)
126 );
127
128 let response = self.fetch_with_retry(&url).await?;
129 let search_response: WikiSearchResponse = response.json().await?;
130
131 let mut records = Vec::new();
132 for result in search_response.query.search {
133 if let Ok(article) = self.get_article(&result.title).await {
135 records.push(article);
136 sleep(self.rate_limit_delay).await;
137 }
138 }
139
140 Ok(records)
141 }
142
143 pub async fn get_article(&self, title: &str) -> Result<DataRecord> {
148 let url = format!(
149 "{}?action=query&prop=extracts|categories|links&titles={}&exintro=1&explaintext=1&format=json&cllimit=50&pllimit=50",
150 self.base_url,
151 urlencoding::encode(title)
152 );
153
154 let response = self.fetch_with_retry(&url).await?;
155 let page_response: WikiPageResponse = response.json().await?;
156
157 let page = page_response
159 .query
160 .pages
161 .values()
162 .next()
163 .ok_or_else(|| FrameworkError::Discovery("No page found".to_string()))?;
164
165 self.page_to_record(page)
166 }
167
168 pub async fn get_categories(&self, title: &str) -> Result<Vec<String>> {
173 let url = format!(
174 "{}?action=query&prop=categories&titles={}&cllimit=500&format=json",
175 self.base_url,
176 urlencoding::encode(title)
177 );
178
179 let response = self.fetch_with_retry(&url).await?;
180 let page_response: WikiPageResponse = response.json().await?;
181
182 let categories = page_response
183 .query
184 .pages
185 .values()
186 .next()
187 .map(|page| page.categories.iter().map(|c| c.title.clone()).collect())
188 .unwrap_or_default();
189
190 Ok(categories)
191 }
192
193 pub async fn get_links(&self, title: &str) -> Result<Vec<String>> {
198 let url = format!(
199 "{}?action=query&prop=links&titles={}&pllimit=500&format=json",
200 self.base_url,
201 urlencoding::encode(title)
202 );
203
204 let response = self.fetch_with_retry(&url).await?;
205 let page_response: WikiPageResponse = response.json().await?;
206
207 let links = page_response
208 .query
209 .pages
210 .values()
211 .next()
212 .map(|page| page.links.iter().map(|l| l.title.clone()).collect())
213 .unwrap_or_default();
214
215 Ok(links)
216 }
217
218 fn page_to_record(&self, page: &WikiPage) -> Result<DataRecord> {
220 let text = format!("{} {}", page.title, page.extract);
222 let embedding = self.embedder.embed_text(&text);
223
224 let mut relationships = Vec::new();
226 for category in &page.categories {
227 relationships.push(Relationship {
228 target_id: category.title.clone(),
229 rel_type: "in_category".to_string(),
230 weight: 1.0,
231 properties: HashMap::new(),
232 });
233 }
234
235 for link in page.links.iter().take(20) {
237 relationships.push(Relationship {
238 target_id: link.title.clone(),
239 rel_type: "links_to".to_string(),
240 weight: 0.5,
241 properties: HashMap::new(),
242 });
243 }
244
245 let mut data_map = serde_json::Map::new();
246 data_map.insert("title".to_string(), serde_json::json!(page.title));
247 data_map.insert("extract".to_string(), serde_json::json!(page.extract));
248 data_map.insert("pageid".to_string(), serde_json::json!(page.pageid));
249 data_map.insert("language".to_string(), serde_json::json!(self.language));
250 data_map.insert(
251 "url".to_string(),
252 serde_json::json!(format!(
253 "https://{}.wikipedia.org/wiki/{}",
254 self.language,
255 urlencoding::encode(&page.title)
256 )),
257 );
258
259 Ok(DataRecord {
260 id: format!("wikipedia_{}_{}", self.language, page.pageid),
261 source: "wikipedia".to_string(),
262 record_type: "article".to_string(),
263 timestamp: Utc::now(),
264 data: serde_json::Value::Object(data_map),
265 embedding: Some(embedding),
266 relationships,
267 })
268 }
269
270 async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
272 let mut retries = 0;
273 loop {
274 match self.client.get(url).send().await {
275 Ok(response) => {
276 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES
277 {
278 retries += 1;
279 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
280 continue;
281 }
282 return Ok(response);
283 }
284 Err(_) if retries < MAX_RETRIES => {
285 retries += 1;
286 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
287 }
288 Err(e) => return Err(FrameworkError::Network(e)),
289 }
290 }
291 }
292}
293
294#[async_trait]
295impl DataSource for WikipediaClient {
296 fn source_id(&self) -> &str {
297 "wikipedia"
298 }
299
300 async fn fetch_batch(
301 &self,
302 cursor: Option<String>,
303 batch_size: usize,
304 ) -> Result<(Vec<DataRecord>, Option<String>)> {
305 let query = cursor.as_deref().unwrap_or("machine learning");
307 let records = self.search(query, batch_size).await?;
308 Ok((records, None))
309 }
310
311 async fn total_count(&self) -> Result<Option<u64>> {
312 Ok(None)
313 }
314
315 async fn health_check(&self) -> Result<bool> {
316 let response = self.client.get(&self.base_url).send().await?;
317 Ok(response.status().is_success())
318 }
319}
320
321#[derive(Debug, Deserialize)]
327struct WikidataSearchResponse {
328 search: Vec<WikidataSearchResult>,
329}
330
331#[derive(Debug, Deserialize)]
332struct WikidataSearchResult {
333 id: String,
334 label: String,
335 description: Option<String>,
336}
337
338#[derive(Debug, Deserialize)]
340struct WikidataEntityResponse {
341 entities: HashMap<String, WikidataEntityData>,
342}
343
344#[derive(Debug, Deserialize)]
345struct WikidataEntityData {
346 id: String,
347 labels: HashMap<String, WikidataLabel>,
348 descriptions: HashMap<String, WikidataLabel>,
349 aliases: HashMap<String, Vec<WikidataLabel>>,
350 claims: HashMap<String, Vec<WikidataClaim>>,
351}
352
353#[derive(Debug, Deserialize)]
354struct WikidataLabel {
355 value: String,
356}
357
358#[derive(Debug, Deserialize)]
359struct WikidataClaim {
360 mainsnak: WikidataSnak,
361}
362
363#[derive(Debug, Deserialize)]
364struct WikidataSnak {
365 datavalue: Option<WikidataValue>,
366}
367
368#[derive(Debug, Deserialize)]
369struct WikidataValue {
370 value: serde_json::Value,
371}
372
373#[derive(Debug, Deserialize)]
375struct WikidataSparqlResponse {
376 results: WikidataSparqlResults,
377}
378
379#[derive(Debug, Deserialize)]
380struct WikidataSparqlResults {
381 bindings: Vec<HashMap<String, WikidataSparqlBinding>>,
382}
383
384#[derive(Debug, Deserialize)]
385struct WikidataSparqlBinding {
386 value: String,
387}
388
389#[derive(Debug, Clone, Serialize, Deserialize)]
391pub struct WikidataEntity {
392 pub qid: String,
394 pub label: String,
396 pub description: String,
398 pub aliases: Vec<String>,
400 pub claims: HashMap<String, Vec<String>>,
402}
403
404pub struct WikidataClient {
406 client: Client,
407 api_url: String,
408 sparql_url: String,
409 rate_limit_delay: Duration,
410 embedder: Arc<SimpleEmbedder>,
411}
412
413impl WikidataClient {
414 pub fn new() -> Result<Self> {
416 let client = Client::builder()
417 .timeout(Duration::from_secs(30))
418 .user_agent("RuVector/1.0 (https://github.com/ruvnet/ruvector)")
419 .build()
420 .map_err(|e| FrameworkError::Network(e))?;
421
422 Ok(Self {
423 client,
424 api_url: "https://www.wikidata.org/w/api.php".to_string(),
425 sparql_url: "https://query.wikidata.org/sparql".to_string(),
426 rate_limit_delay: Duration::from_millis(DEFAULT_RATE_LIMIT_DELAY_MS),
427 embedder: Arc::new(SimpleEmbedder::new(256)),
428 })
429 }
430
431 pub async fn search_entities(&self, query: &str) -> Result<Vec<WikidataEntity>> {
436 let url = format!(
437 "{}?action=wbsearchentities&search={}&language=en&format=json&limit=50",
438 self.api_url,
439 urlencoding::encode(query)
440 );
441
442 let response = self.fetch_with_retry(&url).await?;
443 let search_response: WikidataSearchResponse = response.json().await?;
444
445 let mut entities = Vec::new();
446 for result in search_response.search {
447 entities.push(WikidataEntity {
448 qid: result.id,
449 label: result.label,
450 description: result.description.unwrap_or_default(),
451 aliases: Vec::new(),
452 claims: HashMap::new(),
453 });
454 }
455
456 Ok(entities)
457 }
458
459 pub async fn get_entity(&self, qid: &str) -> Result<WikidataEntity> {
464 let url = format!(
465 "{}?action=wbgetentities&ids={}&format=json",
466 self.api_url, qid
467 );
468
469 let response = self.fetch_with_retry(&url).await?;
470 let entity_response: WikidataEntityResponse = response.json().await?;
471
472 let entity_data = entity_response
473 .entities
474 .get(qid)
475 .ok_or_else(|| FrameworkError::Discovery(format!("Entity {} not found", qid)))?;
476
477 self.entity_data_to_entity(entity_data)
478 }
479
480 pub async fn sparql_query(&self, query: &str) -> Result<Vec<HashMap<String, String>>> {
485 let response = self
486 .client
487 .get(&self.sparql_url)
488 .query(&[("query", query), ("format", "json")])
489 .send()
490 .await?;
491
492 let sparql_response: WikidataSparqlResponse = response.json().await?;
493
494 let results = sparql_response
495 .results
496 .bindings
497 .into_iter()
498 .map(|binding| {
499 binding
500 .into_iter()
501 .map(|(k, v)| (k, v.value))
502 .collect::<HashMap<String, String>>()
503 })
504 .collect();
505
506 Ok(results)
507 }
508
509 pub async fn query_climate_entities(&self) -> Result<Vec<DataRecord>> {
511 let query = r#"
512SELECT ?item ?itemLabel ?itemDescription WHERE {
513 {
514 ?item wdt:P31 wd:Q125977. # climate change
515 } UNION {
516 ?item wdt:P279* wd:Q125977. # subclass of climate change
517 } UNION {
518 ?item wdt:P921 wd:Q125977. # main subject climate change
519 }
520 SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
521}
522LIMIT 100
523"#;
524
525 self.sparql_to_records(query, "climate").await
526 }
527
528 pub async fn query_pharmaceutical_companies(&self) -> Result<Vec<DataRecord>> {
530 let query = r#"
531SELECT ?item ?itemLabel ?itemDescription ?founded ?employees WHERE {
532 ?item wdt:P31/wdt:P279* wd:Q507443. # pharmaceutical company
533 OPTIONAL { ?item wdt:P571 ?founded. }
534 OPTIONAL { ?item wdt:P1128 ?employees. }
535 SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
536}
537LIMIT 100
538"#;
539
540 self.sparql_to_records(query, "pharma").await
541 }
542
543 pub async fn query_disease_outbreaks(&self) -> Result<Vec<DataRecord>> {
545 let query = r#"
546SELECT ?item ?itemLabel ?itemDescription ?disease ?diseaseLabel ?startTime ?location ?locationLabel WHERE {
547 ?item wdt:P31 wd:Q3241045. # epidemic
548 OPTIONAL { ?item wdt:P828 ?disease. }
549 OPTIONAL { ?item wdt:P580 ?startTime. }
550 OPTIONAL { ?item wdt:P276 ?location. }
551 SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
552}
553LIMIT 100
554"#;
555
556 self.sparql_to_records(query, "disease").await
557 }
558
559 async fn sparql_to_records(&self, query: &str, category: &str) -> Result<Vec<DataRecord>> {
561 let results = self.sparql_query(query).await?;
562
563 let mut records = Vec::new();
564 for result in results {
565 let item_uri = result.get("item").cloned().unwrap_or_default();
567 let qid = item_uri
568 .split('/')
569 .last()
570 .unwrap_or(&item_uri)
571 .to_string();
572
573 let label = result
574 .get("itemLabel")
575 .cloned()
576 .unwrap_or_else(|| qid.clone());
577 let description = result.get("itemDescription").cloned().unwrap_or_default();
578
579 let text = format!("{} {}", label, description);
581 let embedding = self.embedder.embed_text(&text);
582
583 let mut data_map = serde_json::Map::new();
584 data_map.insert("qid".to_string(), serde_json::json!(qid));
585 data_map.insert("label".to_string(), serde_json::json!(label));
586 data_map.insert("description".to_string(), serde_json::json!(description));
587 data_map.insert("category".to_string(), serde_json::json!(category));
588
589 for (key, value) in result.iter() {
591 if !key.ends_with("Label") && key != "item" && key != "itemDescription" {
592 data_map.insert(key.clone(), serde_json::json!(value));
593 }
594 }
595
596 records.push(DataRecord {
597 id: format!("wikidata_{}", qid),
598 source: "wikidata".to_string(),
599 record_type: category.to_string(),
600 timestamp: Utc::now(),
601 data: serde_json::Value::Object(data_map),
602 embedding: Some(embedding),
603 relationships: Vec::new(),
604 });
605 }
606
607 Ok(records)
608 }
609
610 fn entity_data_to_entity(&self, data: &WikidataEntityData) -> Result<WikidataEntity> {
612 let label = data
613 .labels
614 .get("en")
615 .map(|l| l.value.clone())
616 .unwrap_or_else(|| data.id.clone());
617
618 let description = data
619 .descriptions
620 .get("en")
621 .map(|d| d.value.clone())
622 .unwrap_or_default();
623
624 let aliases = data
625 .aliases
626 .get("en")
627 .map(|aliases| aliases.iter().map(|a| a.value.clone()).collect())
628 .unwrap_or_default();
629
630 let mut claims = HashMap::new();
631 for (property, claim_list) in &data.claims {
632 let values: Vec<String> = claim_list
633 .iter()
634 .filter_map(|claim| {
635 claim
636 .mainsnak
637 .datavalue
638 .as_ref()
639 .map(|dv| dv.value.to_string())
640 })
641 .collect();
642
643 if !values.is_empty() {
644 claims.insert(property.clone(), values);
645 }
646 }
647
648 Ok(WikidataEntity {
649 qid: data.id.clone(),
650 label,
651 description,
652 aliases,
653 claims,
654 })
655 }
656
657 fn entity_to_record(&self, entity: &WikidataEntity) -> Result<DataRecord> {
659 let text = format!(
661 "{} {} {}",
662 entity.label,
663 entity.description,
664 entity.aliases.join(" ")
665 );
666 let embedding = self.embedder.embed_text(&text);
667
668 let mut relationships = Vec::new();
670 for (property, values) in &entity.claims {
671 for value in values {
672 if let Some(qid) = value.strip_prefix("Q") {
674 if qid.chars().all(|c| c.is_ascii_digit()) {
675 relationships.push(Relationship {
676 target_id: value.clone(),
677 rel_type: property.clone(),
678 weight: 1.0,
679 properties: HashMap::new(),
680 });
681 }
682 }
683 }
684 }
685
686 let mut data_map = serde_json::Map::new();
687 data_map.insert("qid".to_string(), serde_json::json!(entity.qid));
688 data_map.insert("label".to_string(), serde_json::json!(entity.label));
689 data_map.insert(
690 "description".to_string(),
691 serde_json::json!(entity.description),
692 );
693 data_map.insert("aliases".to_string(), serde_json::json!(entity.aliases));
694 data_map.insert(
695 "url".to_string(),
696 serde_json::json!(format!(
697 "https://www.wikidata.org/wiki/{}",
698 entity.qid
699 )),
700 );
701
702 let claims_json: serde_json::Value = serde_json::to_value(&entity.claims)?;
704 data_map.insert("claims".to_string(), claims_json);
705
706 Ok(DataRecord {
707 id: format!("wikidata_{}", entity.qid),
708 source: "wikidata".to_string(),
709 record_type: "entity".to_string(),
710 timestamp: Utc::now(),
711 data: serde_json::Value::Object(data_map),
712 embedding: Some(embedding),
713 relationships,
714 })
715 }
716
717 async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
719 let mut retries = 0;
720 loop {
721 match self.client.get(url).send().await {
722 Ok(response) => {
723 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES
724 {
725 retries += 1;
726 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
727 continue;
728 }
729 return Ok(response);
730 }
731 Err(_) if retries < MAX_RETRIES => {
732 retries += 1;
733 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
734 }
735 Err(e) => return Err(FrameworkError::Network(e)),
736 }
737 }
738 }
739}
740
741impl Default for WikidataClient {
742 fn default() -> Self {
743 Self::new().expect("Failed to create WikidataClient")
744 }
745}
746
747#[async_trait]
748impl DataSource for WikidataClient {
749 fn source_id(&self) -> &str {
750 "wikidata"
751 }
752
753 async fn fetch_batch(
754 &self,
755 cursor: Option<String>,
756 _batch_size: usize,
757 ) -> Result<(Vec<DataRecord>, Option<String>)> {
758 let records = match cursor.as_deref() {
760 Some("climate") => self.query_climate_entities().await?,
761 Some("pharma") => self.query_pharmaceutical_companies().await?,
762 Some("disease") => self.query_disease_outbreaks().await?,
763 _ => {
764 let entities = self.search_entities("artificial intelligence").await?;
766 let mut records = Vec::new();
767 for entity in entities.iter().take(20) {
768 records.push(self.entity_to_record(entity)?);
769 }
770 records
771 }
772 };
773
774 Ok((records, None))
775 }
776
777 async fn total_count(&self) -> Result<Option<u64>> {
778 Ok(None)
779 }
780
781 async fn health_check(&self) -> Result<bool> {
782 let response = self.client.get(&self.api_url).send().await?;
783 Ok(response.status().is_success())
784 }
785}
786
787pub mod sparql_queries {
793 pub const CLIMATE_CHANGE: &str = r#"
795SELECT ?item ?itemLabel ?itemDescription WHERE {
796 {
797 ?item wdt:P31 wd:Q125977. # instance of climate change
798 } UNION {
799 ?item wdt:P279* wd:Q125977. # subclass of climate change
800 } UNION {
801 ?item wdt:P921 wd:Q125977. # main subject climate change
802 }
803 SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
804}
805LIMIT 100
806"#;
807
808 pub const PHARMACEUTICAL_COMPANIES: &str = r#"
810SELECT ?item ?itemLabel ?itemDescription ?founded ?employees ?headquarters ?headquartersLabel WHERE {
811 ?item wdt:P31/wdt:P279* wd:Q507443. # pharmaceutical company
812 OPTIONAL { ?item wdt:P571 ?founded. }
813 OPTIONAL { ?item wdt:P1128 ?employees. }
814 OPTIONAL { ?item wdt:P159 ?headquarters. }
815 SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
816}
817ORDER BY DESC(?employees)
818LIMIT 100
819"#;
820
821 pub const DISEASE_OUTBREAKS: &str = r#"
823SELECT ?item ?itemLabel ?itemDescription ?disease ?diseaseLabel ?startTime ?endTime ?location ?locationLabel ?deaths WHERE {
824 ?item wdt:P31 wd:Q3241045. # epidemic
825 OPTIONAL { ?item wdt:P828 ?disease. }
826 OPTIONAL { ?item wdt:P580 ?startTime. }
827 OPTIONAL { ?item wdt:P582 ?endTime. }
828 OPTIONAL { ?item wdt:P276 ?location. }
829 OPTIONAL { ?item wdt:P1120 ?deaths. }
830 SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
831}
832ORDER BY DESC(?startTime)
833LIMIT 100
834"#;
835
836 pub const RESEARCH_INSTITUTIONS: &str = r#"
838SELECT ?item ?itemLabel ?itemDescription ?country ?countryLabel ?founded WHERE {
839 ?item wdt:P31/wdt:P279* wd:Q31855. # research institute
840 OPTIONAL { ?item wdt:P17 ?country. }
841 OPTIONAL { ?item wdt:P571 ?founded. }
842 SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
843}
844LIMIT 100
845"#;
846
847 pub const NOBEL_LAUREATES: &str = r#"
849SELECT ?item ?itemLabel ?itemDescription ?award ?awardLabel ?year ?field ?fieldLabel WHERE {
850 ?item wdt:P166 ?award.
851 ?award wdt:P279* wd:Q7191. # Nobel Prize
852 OPTIONAL { ?item wdt:P166 ?award. ?award wdt:P585 ?year. }
853 OPTIONAL { ?award wdt:P101 ?field. }
854 SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
855}
856ORDER BY DESC(?year)
857LIMIT 100
858"#;
859}
860
861#[cfg(test)]
866mod tests {
867 use super::*;
868
869 #[tokio::test]
870 async fn test_wikipedia_client_creation() {
871 let client = WikipediaClient::new("en".to_string());
872 assert!(client.is_ok());
873 }
874
875 #[tokio::test]
876 async fn test_wikidata_client_creation() {
877 let client = WikidataClient::new();
878 assert!(client.is_ok());
879 }
880
881 #[test]
882 fn test_wikidata_entity_serialization() {
883 let mut claims = HashMap::new();
884 claims.insert("P31".to_string(), vec!["Q5".to_string()]);
885
886 let entity = WikidataEntity {
887 qid: "Q42".to_string(),
888 label: "Douglas Adams".to_string(),
889 description: "English writer and humorist".to_string(),
890 aliases: vec!["Douglas Noel Adams".to_string()],
891 claims,
892 };
893
894 let json = serde_json::to_string(&entity).unwrap();
895 let parsed: WikidataEntity = serde_json::from_str(&json).unwrap();
896 assert_eq!(parsed.qid, "Q42");
897 assert_eq!(parsed.label, "Douglas Adams");
898 }
899
900 #[test]
901 fn test_sparql_query_templates() {
902 assert!(!sparql_queries::CLIMATE_CHANGE.is_empty());
903 assert!(!sparql_queries::PHARMACEUTICAL_COMPANIES.is_empty());
904 assert!(!sparql_queries::DISEASE_OUTBREAKS.is_empty());
905 }
906}