ruvector_data_framework/
wiki_clients.rs

1//! Wikipedia and Wikidata API clients for knowledge graph building
2//!
3//! This module provides async clients for:
4//! - Wikipedia: Article content, categories, links, and search
5//! - Wikidata: Entity lookup, SPARQL queries, and structured knowledge
6//!
7//! Both clients convert responses into RuVector's DataRecord format with
8//! semantic embeddings for vector search and graph analysis.
9
10use std::collections::HashMap;
11use std::sync::Arc;
12use std::time::Duration;
13
14use async_trait::async_trait;
15use chrono::Utc;
16use reqwest::{Client, StatusCode};
17use serde::{Deserialize, Serialize};
18use tokio::time::sleep;
19
20use crate::{DataRecord, DataSource, FrameworkError, Relationship, Result};
21use crate::api_clients::SimpleEmbedder;
22
23/// Rate limiting configuration
24const DEFAULT_RATE_LIMIT_DELAY_MS: u64 = 100;
25const MAX_RETRIES: u32 = 3;
26const RETRY_DELAY_MS: u64 = 1000;
27
28// ============================================================================
29// Wikipedia API Client
30// ============================================================================
31
32/// Wikipedia API search response
33#[derive(Debug, Deserialize)]
34struct WikiSearchResponse {
35    query: WikiSearchQuery,
36}
37
38#[derive(Debug, Deserialize)]
39struct WikiSearchQuery {
40    search: Vec<WikiSearchResult>,
41}
42
43#[derive(Debug, Deserialize)]
44struct WikiSearchResult {
45    title: String,
46    pageid: u64,
47    snippet: String,
48}
49
50/// Wikipedia API page response
51#[derive(Debug, Deserialize)]
52struct WikiPageResponse {
53    query: WikiPageQuery,
54}
55
56#[derive(Debug, Deserialize)]
57struct WikiPageQuery {
58    pages: HashMap<String, WikiPage>,
59}
60
61#[derive(Debug, Deserialize)]
62struct WikiPage {
63    pageid: u64,
64    title: String,
65    #[serde(default)]
66    extract: String,
67    #[serde(default)]
68    categories: Vec<WikiCategory>,
69    #[serde(default)]
70    links: Vec<WikiLink>,
71}
72
73#[derive(Debug, Deserialize)]
74struct WikiCategory {
75    title: String,
76}
77
78#[derive(Debug, Deserialize)]
79struct WikiLink {
80    title: String,
81}
82
83/// Client for Wikipedia API
84pub struct WikipediaClient {
85    client: Client,
86    base_url: String,
87    language: String,
88    rate_limit_delay: Duration,
89    embedder: Arc<SimpleEmbedder>,
90}
91
92impl WikipediaClient {
93    /// Create a new Wikipedia client
94    ///
95    /// # Arguments
96    /// * `language` - Wikipedia language code (e.g., "en", "de", "fr")
97    pub fn new(language: String) -> Result<Self> {
98        let client = Client::builder()
99            .timeout(Duration::from_secs(30))
100            .user_agent("RuVector/1.0 (https://github.com/ruvnet/ruvector)")
101            .build()
102            .map_err(|e| FrameworkError::Network(e))?;
103
104        let base_url = format!("https://{}.wikipedia.org/w/api.php", language);
105
106        Ok(Self {
107            client,
108            base_url,
109            language,
110            rate_limit_delay: Duration::from_millis(DEFAULT_RATE_LIMIT_DELAY_MS),
111            embedder: Arc::new(SimpleEmbedder::new(256)), // Larger dimension for richer content
112        })
113    }
114
115    /// Search Wikipedia articles
116    ///
117    /// # Arguments
118    /// * `query` - Search query
119    /// * `limit` - Maximum number of results (max 500)
120    pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<DataRecord>> {
121        let url = format!(
122            "{}?action=query&list=search&srsearch={}&srlimit={}&format=json",
123            self.base_url,
124            urlencoding::encode(query),
125            limit.min(500)
126        );
127
128        let response = self.fetch_with_retry(&url).await?;
129        let search_response: WikiSearchResponse = response.json().await?;
130
131        let mut records = Vec::new();
132        for result in search_response.query.search {
133            // Get full article for each search result
134            if let Ok(article) = self.get_article(&result.title).await {
135                records.push(article);
136                sleep(self.rate_limit_delay).await;
137            }
138        }
139
140        Ok(records)
141    }
142
143    /// Get a Wikipedia article by title
144    ///
145    /// # Arguments
146    /// * `title` - Article title
147    pub async fn get_article(&self, title: &str) -> Result<DataRecord> {
148        let url = format!(
149            "{}?action=query&prop=extracts|categories|links&titles={}&exintro=1&explaintext=1&format=json&cllimit=50&pllimit=50",
150            self.base_url,
151            urlencoding::encode(title)
152        );
153
154        let response = self.fetch_with_retry(&url).await?;
155        let page_response: WikiPageResponse = response.json().await?;
156
157        // Extract the page (should be only one)
158        let page = page_response
159            .query
160            .pages
161            .values()
162            .next()
163            .ok_or_else(|| FrameworkError::Discovery("No page found".to_string()))?;
164
165        self.page_to_record(page)
166    }
167
168    /// Get categories for an article
169    ///
170    /// # Arguments
171    /// * `title` - Article title
172    pub async fn get_categories(&self, title: &str) -> Result<Vec<String>> {
173        let url = format!(
174            "{}?action=query&prop=categories&titles={}&cllimit=500&format=json",
175            self.base_url,
176            urlencoding::encode(title)
177        );
178
179        let response = self.fetch_with_retry(&url).await?;
180        let page_response: WikiPageResponse = response.json().await?;
181
182        let categories = page_response
183            .query
184            .pages
185            .values()
186            .next()
187            .map(|page| page.categories.iter().map(|c| c.title.clone()).collect())
188            .unwrap_or_default();
189
190        Ok(categories)
191    }
192
193    /// Get links from an article
194    ///
195    /// # Arguments
196    /// * `title` - Article title
197    pub async fn get_links(&self, title: &str) -> Result<Vec<String>> {
198        let url = format!(
199            "{}?action=query&prop=links&titles={}&pllimit=500&format=json",
200            self.base_url,
201            urlencoding::encode(title)
202        );
203
204        let response = self.fetch_with_retry(&url).await?;
205        let page_response: WikiPageResponse = response.json().await?;
206
207        let links = page_response
208            .query
209            .pages
210            .values()
211            .next()
212            .map(|page| page.links.iter().map(|l| l.title.clone()).collect())
213            .unwrap_or_default();
214
215        Ok(links)
216    }
217
218    /// Convert Wikipedia page to DataRecord
219    fn page_to_record(&self, page: &WikiPage) -> Result<DataRecord> {
220        // Create embedding from title and extract
221        let text = format!("{} {}", page.title, page.extract);
222        let embedding = self.embedder.embed_text(&text);
223
224        // Build relationships from categories
225        let mut relationships = Vec::new();
226        for category in &page.categories {
227            relationships.push(Relationship {
228                target_id: category.title.clone(),
229                rel_type: "in_category".to_string(),
230                weight: 1.0,
231                properties: HashMap::new(),
232            });
233        }
234
235        // Build relationships from links (limit to first 20)
236        for link in page.links.iter().take(20) {
237            relationships.push(Relationship {
238                target_id: link.title.clone(),
239                rel_type: "links_to".to_string(),
240                weight: 0.5,
241                properties: HashMap::new(),
242            });
243        }
244
245        let mut data_map = serde_json::Map::new();
246        data_map.insert("title".to_string(), serde_json::json!(page.title));
247        data_map.insert("extract".to_string(), serde_json::json!(page.extract));
248        data_map.insert("pageid".to_string(), serde_json::json!(page.pageid));
249        data_map.insert("language".to_string(), serde_json::json!(self.language));
250        data_map.insert(
251            "url".to_string(),
252            serde_json::json!(format!(
253                "https://{}.wikipedia.org/wiki/{}",
254                self.language,
255                urlencoding::encode(&page.title)
256            )),
257        );
258
259        Ok(DataRecord {
260            id: format!("wikipedia_{}_{}", self.language, page.pageid),
261            source: "wikipedia".to_string(),
262            record_type: "article".to_string(),
263            timestamp: Utc::now(),
264            data: serde_json::Value::Object(data_map),
265            embedding: Some(embedding),
266            relationships,
267        })
268    }
269
270    /// Fetch with retry logic
271    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
272        let mut retries = 0;
273        loop {
274            match self.client.get(url).send().await {
275                Ok(response) => {
276                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES
277                    {
278                        retries += 1;
279                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
280                        continue;
281                    }
282                    return Ok(response);
283                }
284                Err(_) if retries < MAX_RETRIES => {
285                    retries += 1;
286                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
287                }
288                Err(e) => return Err(FrameworkError::Network(e)),
289            }
290        }
291    }
292}
293
294#[async_trait]
295impl DataSource for WikipediaClient {
296    fn source_id(&self) -> &str {
297        "wikipedia"
298    }
299
300    async fn fetch_batch(
301        &self,
302        cursor: Option<String>,
303        batch_size: usize,
304    ) -> Result<(Vec<DataRecord>, Option<String>)> {
305        // Default to searching for "machine learning" if no cursor provided
306        let query = cursor.as_deref().unwrap_or("machine learning");
307        let records = self.search(query, batch_size).await?;
308        Ok((records, None))
309    }
310
311    async fn total_count(&self) -> Result<Option<u64>> {
312        Ok(None)
313    }
314
315    async fn health_check(&self) -> Result<bool> {
316        let response = self.client.get(&self.base_url).send().await?;
317        Ok(response.status().is_success())
318    }
319}
320
321// ============================================================================
322// Wikidata API Client
323// ============================================================================
324
325/// Wikidata entity search response
326#[derive(Debug, Deserialize)]
327struct WikidataSearchResponse {
328    search: Vec<WikidataSearchResult>,
329}
330
331#[derive(Debug, Deserialize)]
332struct WikidataSearchResult {
333    id: String,
334    label: String,
335    description: Option<String>,
336}
337
338/// Wikidata entity response
339#[derive(Debug, Deserialize)]
340struct WikidataEntityResponse {
341    entities: HashMap<String, WikidataEntityData>,
342}
343
344#[derive(Debug, Deserialize)]
345struct WikidataEntityData {
346    id: String,
347    labels: HashMap<String, WikidataLabel>,
348    descriptions: HashMap<String, WikidataLabel>,
349    aliases: HashMap<String, Vec<WikidataLabel>>,
350    claims: HashMap<String, Vec<WikidataClaim>>,
351}
352
353#[derive(Debug, Deserialize)]
354struct WikidataLabel {
355    value: String,
356}
357
358#[derive(Debug, Deserialize)]
359struct WikidataClaim {
360    mainsnak: WikidataSnak,
361}
362
363#[derive(Debug, Deserialize)]
364struct WikidataSnak {
365    datavalue: Option<WikidataValue>,
366}
367
368#[derive(Debug, Deserialize)]
369struct WikidataValue {
370    value: serde_json::Value,
371}
372
373/// Wikidata SPARQL response
374#[derive(Debug, Deserialize)]
375struct WikidataSparqlResponse {
376    results: WikidataSparqlResults,
377}
378
379#[derive(Debug, Deserialize)]
380struct WikidataSparqlResults {
381    bindings: Vec<HashMap<String, WikidataSparqlBinding>>,
382}
383
384#[derive(Debug, Deserialize)]
385struct WikidataSparqlBinding {
386    value: String,
387}
388
389/// Structured Wikidata entity
390#[derive(Debug, Clone, Serialize, Deserialize)]
391pub struct WikidataEntity {
392    /// Wikidata Q-identifier
393    pub qid: String,
394    /// Primary label
395    pub label: String,
396    /// Description
397    pub description: String,
398    /// Alternative names
399    pub aliases: Vec<String>,
400    /// Property claims (property ID -> values)
401    pub claims: HashMap<String, Vec<String>>,
402}
403
404/// Client for Wikidata API and SPARQL endpoint
405pub struct WikidataClient {
406    client: Client,
407    api_url: String,
408    sparql_url: String,
409    rate_limit_delay: Duration,
410    embedder: Arc<SimpleEmbedder>,
411}
412
413impl WikidataClient {
414    /// Create a new Wikidata client
415    pub fn new() -> Result<Self> {
416        let client = Client::builder()
417            .timeout(Duration::from_secs(30))
418            .user_agent("RuVector/1.0 (https://github.com/ruvnet/ruvector)")
419            .build()
420            .map_err(|e| FrameworkError::Network(e))?;
421
422        Ok(Self {
423            client,
424            api_url: "https://www.wikidata.org/w/api.php".to_string(),
425            sparql_url: "https://query.wikidata.org/sparql".to_string(),
426            rate_limit_delay: Duration::from_millis(DEFAULT_RATE_LIMIT_DELAY_MS),
427            embedder: Arc::new(SimpleEmbedder::new(256)),
428        })
429    }
430
431    /// Search for Wikidata entities
432    ///
433    /// # Arguments
434    /// * `query` - Search query
435    pub async fn search_entities(&self, query: &str) -> Result<Vec<WikidataEntity>> {
436        let url = format!(
437            "{}?action=wbsearchentities&search={}&language=en&format=json&limit=50",
438            self.api_url,
439            urlencoding::encode(query)
440        );
441
442        let response = self.fetch_with_retry(&url).await?;
443        let search_response: WikidataSearchResponse = response.json().await?;
444
445        let mut entities = Vec::new();
446        for result in search_response.search {
447            entities.push(WikidataEntity {
448                qid: result.id,
449                label: result.label,
450                description: result.description.unwrap_or_default(),
451                aliases: Vec::new(),
452                claims: HashMap::new(),
453            });
454        }
455
456        Ok(entities)
457    }
458
459    /// Get a Wikidata entity by QID
460    ///
461    /// # Arguments
462    /// * `qid` - Wikidata Q-identifier (e.g., "Q42" for Douglas Adams)
463    pub async fn get_entity(&self, qid: &str) -> Result<WikidataEntity> {
464        let url = format!(
465            "{}?action=wbgetentities&ids={}&format=json",
466            self.api_url, qid
467        );
468
469        let response = self.fetch_with_retry(&url).await?;
470        let entity_response: WikidataEntityResponse = response.json().await?;
471
472        let entity_data = entity_response
473            .entities
474            .get(qid)
475            .ok_or_else(|| FrameworkError::Discovery(format!("Entity {} not found", qid)))?;
476
477        self.entity_data_to_entity(entity_data)
478    }
479
480    /// Execute a SPARQL query
481    ///
482    /// # Arguments
483    /// * `query` - SPARQL query string
484    pub async fn sparql_query(&self, query: &str) -> Result<Vec<HashMap<String, String>>> {
485        let response = self
486            .client
487            .get(&self.sparql_url)
488            .query(&[("query", query), ("format", "json")])
489            .send()
490            .await?;
491
492        let sparql_response: WikidataSparqlResponse = response.json().await?;
493
494        let results = sparql_response
495            .results
496            .bindings
497            .into_iter()
498            .map(|binding| {
499                binding
500                    .into_iter()
501                    .map(|(k, v)| (k, v.value))
502                    .collect::<HashMap<String, String>>()
503            })
504            .collect();
505
506        Ok(results)
507    }
508
509    /// Query climate change related entities
510    pub async fn query_climate_entities(&self) -> Result<Vec<DataRecord>> {
511        let query = r#"
512SELECT ?item ?itemLabel ?itemDescription WHERE {
513  {
514    ?item wdt:P31 wd:Q125977.  # climate change
515  } UNION {
516    ?item wdt:P279* wd:Q125977.  # subclass of climate change
517  } UNION {
518    ?item wdt:P921 wd:Q125977.  # main subject climate change
519  }
520  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
521}
522LIMIT 100
523"#;
524
525        self.sparql_to_records(query, "climate").await
526    }
527
528    /// Query pharmaceutical companies
529    pub async fn query_pharmaceutical_companies(&self) -> Result<Vec<DataRecord>> {
530        let query = r#"
531SELECT ?item ?itemLabel ?itemDescription ?founded ?employees WHERE {
532  ?item wdt:P31/wdt:P279* wd:Q507443.  # pharmaceutical company
533  OPTIONAL { ?item wdt:P571 ?founded. }
534  OPTIONAL { ?item wdt:P1128 ?employees. }
535  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
536}
537LIMIT 100
538"#;
539
540        self.sparql_to_records(query, "pharma").await
541    }
542
543    /// Query disease outbreaks
544    pub async fn query_disease_outbreaks(&self) -> Result<Vec<DataRecord>> {
545        let query = r#"
546SELECT ?item ?itemLabel ?itemDescription ?disease ?diseaseLabel ?startTime ?location ?locationLabel WHERE {
547  ?item wdt:P31 wd:Q3241045.  # epidemic
548  OPTIONAL { ?item wdt:P828 ?disease. }
549  OPTIONAL { ?item wdt:P580 ?startTime. }
550  OPTIONAL { ?item wdt:P276 ?location. }
551  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
552}
553LIMIT 100
554"#;
555
556        self.sparql_to_records(query, "disease").await
557    }
558
559    /// Convert SPARQL results to DataRecords
560    async fn sparql_to_records(&self, query: &str, category: &str) -> Result<Vec<DataRecord>> {
561        let results = self.sparql_query(query).await?;
562
563        let mut records = Vec::new();
564        for result in results {
565            // Extract QID from URI
566            let item_uri = result.get("item").cloned().unwrap_or_default();
567            let qid = item_uri
568                .split('/')
569                .last()
570                .unwrap_or(&item_uri)
571                .to_string();
572
573            let label = result
574                .get("itemLabel")
575                .cloned()
576                .unwrap_or_else(|| qid.clone());
577            let description = result.get("itemDescription").cloned().unwrap_or_default();
578
579            // Create embedding from label and description
580            let text = format!("{} {}", label, description);
581            let embedding = self.embedder.embed_text(&text);
582
583            let mut data_map = serde_json::Map::new();
584            data_map.insert("qid".to_string(), serde_json::json!(qid));
585            data_map.insert("label".to_string(), serde_json::json!(label));
586            data_map.insert("description".to_string(), serde_json::json!(description));
587            data_map.insert("category".to_string(), serde_json::json!(category));
588
589            // Add all other SPARQL result fields
590            for (key, value) in result.iter() {
591                if !key.ends_with("Label") && key != "item" && key != "itemDescription" {
592                    data_map.insert(key.clone(), serde_json::json!(value));
593                }
594            }
595
596            records.push(DataRecord {
597                id: format!("wikidata_{}", qid),
598                source: "wikidata".to_string(),
599                record_type: category.to_string(),
600                timestamp: Utc::now(),
601                data: serde_json::Value::Object(data_map),
602                embedding: Some(embedding),
603                relationships: Vec::new(),
604            });
605        }
606
607        Ok(records)
608    }
609
610    /// Convert entity data to WikidataEntity
611    fn entity_data_to_entity(&self, data: &WikidataEntityData) -> Result<WikidataEntity> {
612        let label = data
613            .labels
614            .get("en")
615            .map(|l| l.value.clone())
616            .unwrap_or_else(|| data.id.clone());
617
618        let description = data
619            .descriptions
620            .get("en")
621            .map(|d| d.value.clone())
622            .unwrap_or_default();
623
624        let aliases = data
625            .aliases
626            .get("en")
627            .map(|aliases| aliases.iter().map(|a| a.value.clone()).collect())
628            .unwrap_or_default();
629
630        let mut claims = HashMap::new();
631        for (property, claim_list) in &data.claims {
632            let values: Vec<String> = claim_list
633                .iter()
634                .filter_map(|claim| {
635                    claim
636                        .mainsnak
637                        .datavalue
638                        .as_ref()
639                        .map(|dv| dv.value.to_string())
640                })
641                .collect();
642
643            if !values.is_empty() {
644                claims.insert(property.clone(), values);
645            }
646        }
647
648        Ok(WikidataEntity {
649            qid: data.id.clone(),
650            label,
651            description,
652            aliases,
653            claims,
654        })
655    }
656
657    /// Convert WikidataEntity to DataRecord
658    fn entity_to_record(&self, entity: &WikidataEntity) -> Result<DataRecord> {
659        // Create embedding from label, description, and aliases
660        let text = format!(
661            "{} {} {}",
662            entity.label,
663            entity.description,
664            entity.aliases.join(" ")
665        );
666        let embedding = self.embedder.embed_text(&text);
667
668        // Build relationships from claims
669        let mut relationships = Vec::new();
670        for (property, values) in &entity.claims {
671            for value in values {
672                // Try to extract QID if value is an entity reference
673                if let Some(qid) = value.strip_prefix("Q") {
674                    if qid.chars().all(|c| c.is_ascii_digit()) {
675                        relationships.push(Relationship {
676                            target_id: value.clone(),
677                            rel_type: property.clone(),
678                            weight: 1.0,
679                            properties: HashMap::new(),
680                        });
681                    }
682                }
683            }
684        }
685
686        let mut data_map = serde_json::Map::new();
687        data_map.insert("qid".to_string(), serde_json::json!(entity.qid));
688        data_map.insert("label".to_string(), serde_json::json!(entity.label));
689        data_map.insert(
690            "description".to_string(),
691            serde_json::json!(entity.description),
692        );
693        data_map.insert("aliases".to_string(), serde_json::json!(entity.aliases));
694        data_map.insert(
695            "url".to_string(),
696            serde_json::json!(format!(
697                "https://www.wikidata.org/wiki/{}",
698                entity.qid
699            )),
700        );
701
702        // Add claims as structured data
703        let claims_json: serde_json::Value = serde_json::to_value(&entity.claims)?;
704        data_map.insert("claims".to_string(), claims_json);
705
706        Ok(DataRecord {
707            id: format!("wikidata_{}", entity.qid),
708            source: "wikidata".to_string(),
709            record_type: "entity".to_string(),
710            timestamp: Utc::now(),
711            data: serde_json::Value::Object(data_map),
712            embedding: Some(embedding),
713            relationships,
714        })
715    }
716
717    /// Fetch with retry logic
718    async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
719        let mut retries = 0;
720        loop {
721            match self.client.get(url).send().await {
722                Ok(response) => {
723                    if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES
724                    {
725                        retries += 1;
726                        sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
727                        continue;
728                    }
729                    return Ok(response);
730                }
731                Err(_) if retries < MAX_RETRIES => {
732                    retries += 1;
733                    sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
734                }
735                Err(e) => return Err(FrameworkError::Network(e)),
736            }
737        }
738    }
739}
740
741impl Default for WikidataClient {
742    fn default() -> Self {
743        Self::new().expect("Failed to create WikidataClient")
744    }
745}
746
747#[async_trait]
748impl DataSource for WikidataClient {
749    fn source_id(&self) -> &str {
750        "wikidata"
751    }
752
753    async fn fetch_batch(
754        &self,
755        cursor: Option<String>,
756        _batch_size: usize,
757    ) -> Result<(Vec<DataRecord>, Option<String>)> {
758        // Use cursor to determine which query to run
759        let records = match cursor.as_deref() {
760            Some("climate") => self.query_climate_entities().await?,
761            Some("pharma") => self.query_pharmaceutical_companies().await?,
762            Some("disease") => self.query_disease_outbreaks().await?,
763            _ => {
764                // Default: search for "artificial intelligence"
765                let entities = self.search_entities("artificial intelligence").await?;
766                let mut records = Vec::new();
767                for entity in entities.iter().take(20) {
768                    records.push(self.entity_to_record(entity)?);
769                }
770                records
771            }
772        };
773
774        Ok((records, None))
775    }
776
777    async fn total_count(&self) -> Result<Option<u64>> {
778        Ok(None)
779    }
780
781    async fn health_check(&self) -> Result<bool> {
782        let response = self.client.get(&self.api_url).send().await?;
783        Ok(response.status().is_success())
784    }
785}
786
787// ============================================================================
788// Example SPARQL Queries
789// ============================================================================
790
791/// Pre-defined SPARQL query templates
792pub mod sparql_queries {
793    /// Query for climate change related entities
794    pub const CLIMATE_CHANGE: &str = r#"
795SELECT ?item ?itemLabel ?itemDescription WHERE {
796  {
797    ?item wdt:P31 wd:Q125977.  # instance of climate change
798  } UNION {
799    ?item wdt:P279* wd:Q125977.  # subclass of climate change
800  } UNION {
801    ?item wdt:P921 wd:Q125977.  # main subject climate change
802  }
803  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
804}
805LIMIT 100
806"#;
807
808    /// Query for pharmaceutical companies
809    pub const PHARMACEUTICAL_COMPANIES: &str = r#"
810SELECT ?item ?itemLabel ?itemDescription ?founded ?employees ?headquarters ?headquartersLabel WHERE {
811  ?item wdt:P31/wdt:P279* wd:Q507443.  # pharmaceutical company
812  OPTIONAL { ?item wdt:P571 ?founded. }
813  OPTIONAL { ?item wdt:P1128 ?employees. }
814  OPTIONAL { ?item wdt:P159 ?headquarters. }
815  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
816}
817ORDER BY DESC(?employees)
818LIMIT 100
819"#;
820
821    /// Query for disease outbreaks
822    pub const DISEASE_OUTBREAKS: &str = r#"
823SELECT ?item ?itemLabel ?itemDescription ?disease ?diseaseLabel ?startTime ?endTime ?location ?locationLabel ?deaths WHERE {
824  ?item wdt:P31 wd:Q3241045.  # epidemic
825  OPTIONAL { ?item wdt:P828 ?disease. }
826  OPTIONAL { ?item wdt:P580 ?startTime. }
827  OPTIONAL { ?item wdt:P582 ?endTime. }
828  OPTIONAL { ?item wdt:P276 ?location. }
829  OPTIONAL { ?item wdt:P1120 ?deaths. }
830  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
831}
832ORDER BY DESC(?startTime)
833LIMIT 100
834"#;
835
836    /// Query for scientific research institutions
837    pub const RESEARCH_INSTITUTIONS: &str = r#"
838SELECT ?item ?itemLabel ?itemDescription ?country ?countryLabel ?founded WHERE {
839  ?item wdt:P31/wdt:P279* wd:Q31855.  # research institute
840  OPTIONAL { ?item wdt:P17 ?country. }
841  OPTIONAL { ?item wdt:P571 ?founded. }
842  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
843}
844LIMIT 100
845"#;
846
847    /// Query for Nobel Prize winners in specific field
848    pub const NOBEL_LAUREATES: &str = r#"
849SELECT ?item ?itemLabel ?itemDescription ?award ?awardLabel ?year ?field ?fieldLabel WHERE {
850  ?item wdt:P166 ?award.
851  ?award wdt:P279* wd:Q7191.  # Nobel Prize
852  OPTIONAL { ?item wdt:P166 ?award. ?award wdt:P585 ?year. }
853  OPTIONAL { ?award wdt:P101 ?field. }
854  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
855}
856ORDER BY DESC(?year)
857LIMIT 100
858"#;
859}
860
861// ============================================================================
862// Tests
863// ============================================================================
864
865#[cfg(test)]
866mod tests {
867    use super::*;
868
869    #[tokio::test]
870    async fn test_wikipedia_client_creation() {
871        let client = WikipediaClient::new("en".to_string());
872        assert!(client.is_ok());
873    }
874
875    #[tokio::test]
876    async fn test_wikidata_client_creation() {
877        let client = WikidataClient::new();
878        assert!(client.is_ok());
879    }
880
881    #[test]
882    fn test_wikidata_entity_serialization() {
883        let mut claims = HashMap::new();
884        claims.insert("P31".to_string(), vec!["Q5".to_string()]);
885
886        let entity = WikidataEntity {
887            qid: "Q42".to_string(),
888            label: "Douglas Adams".to_string(),
889            description: "English writer and humorist".to_string(),
890            aliases: vec!["Douglas Noel Adams".to_string()],
891            claims,
892        };
893
894        let json = serde_json::to_string(&entity).unwrap();
895        let parsed: WikidataEntity = serde_json::from_str(&json).unwrap();
896        assert_eq!(parsed.qid, "Q42");
897        assert_eq!(parsed.label, "Douglas Adams");
898    }
899
900    #[test]
901    fn test_sparql_query_templates() {
902        assert!(!sparql_queries::CLIMATE_CHANGE.is_empty());
903        assert!(!sparql_queries::PHARMACEUTICAL_COMPANIES.is_empty());
904        assert!(!sparql_queries::DISEASE_OUTBREAKS.is_empty());
905    }
906}