papers/
semanticscholar2wikidata.rs

1extern crate config;
2extern crate mediawiki;
3extern crate serde_json;
4
5use crate::generic_author_info::GenericAuthorInfo;
6use crate::scientific_publication_adapter::ScientificPublicationAdapter;
7use crate::*;
8use semanticscholar::*;
9use std::collections::HashMap;
10
11pub struct Semanticscholar2Wikidata {
12    author_cache: HashMap<String, String>,
13    work_cache: HashMap<String, Work>,
14    client: Client,
15}
16
17impl Semanticscholar2Wikidata {
18    pub fn new() -> Self {
19        Semanticscholar2Wikidata {
20            author_cache: HashMap::new(),
21            work_cache: HashMap::new(),
22            client: Client::new(),
23        }
24    }
25
26    pub fn get_cached_publication_from_id(&self, publication_id: &String) -> Option<&Work> {
27        self.work_cache.get(publication_id)
28    }
29
30    fn publication_ids_from_doi(&mut self, doi: &String) -> Vec<String> {
31        let work = match self.client.work(&doi) {
32            Ok(w) => w,
33            _ => return vec![], // No such work
34        };
35
36        let publication_id = match &work.paper_id {
37            Some(paper_id) => paper_id.to_string(),
38            None => return vec![], // No ID
39        };
40
41        self.work_cache.insert(publication_id.clone(), work);
42        vec![publication_id]
43    }
44
45    fn add_identifiers_from_cached_publication(
46        &mut self,
47        publication_id: &String,
48        ret: &mut Vec<GenericWorkIdentifier>,
49    ) {
50        let my_prop = GenericWorkType::Property(self.publication_property().unwrap());
51
52        let work = match self.get_cached_publication_from_id(&publication_id) {
53            Some(w) => w,
54            None => return,
55        };
56
57        ret.push(GenericWorkIdentifier {
58            work_type: my_prop.clone(),
59            id: publication_id.clone(),
60        });
61
62        match &work.doi {
63            Some(id) => {
64                ret.push(GenericWorkIdentifier {
65                    work_type: GenericWorkType::Property(PROP_DOI.to_string()),
66                    id: id.clone(),
67                });
68            }
69            None => {}
70        }
71
72        /*
73        This works, but might somehow merge separate items for "reviewed publication" and arxiv version
74        match &work.arxiv_id {
75            Some(id) => {
76                ret.push(GenericWorkIdentifier {
77                    work_type: GenericWorkType::Property(PROP_ARXIV.to_string()),
78                    id: id.clone(),
79                });
80            }
81            None => {}
82        }
83        */
84    }
85}
86
87impl ScientificPublicationAdapter for Semanticscholar2Wikidata {
88    fn name(&self) -> &str {
89        "Semanticscholar2Wikidata"
90    }
91
92    fn author_property(&self) -> Option<String> {
93        return Some("P4012".to_string());
94    }
95
96    fn publication_property(&self) -> Option<String> {
97        return Some("P4011".to_string());
98    }
99
100    fn topic_property(&self) -> Option<String> {
101        return Some("P6611".to_string());
102    }
103
104    fn author_cache(&self) -> &HashMap<String, String> {
105        &self.author_cache
106    }
107
108    fn author_cache_mut(&mut self) -> &mut HashMap<String, String> {
109        &mut self.author_cache
110    }
111
112    fn get_identifier_list(
113        &mut self,
114        ids: &Vec<GenericWorkIdentifier>,
115    ) -> Vec<GenericWorkIdentifier> {
116        let mut ret: Vec<GenericWorkIdentifier> = vec![];
117        for id in ids {
118            match &id.work_type {
119                GenericWorkType::Property(prop) => match prop.as_str() {
120                    PROP_DOI => {
121                        for publication_id in self.publication_ids_from_doi(&id.id) {
122                            self.add_identifiers_from_cached_publication(&publication_id, &mut ret);
123                        }
124                    }
125                    _ => {}
126                },
127                _ => {}
128            }
129        }
130        ret
131    }
132
133    fn do_cache_work(&mut self, publication_id: &String) -> Option<String> {
134        let work = match self.client.work(&publication_id) {
135            Ok(w) => w,
136            _ => return None, // No such work
137        };
138
139        let publication_id = match &work.paper_id {
140            Some(paper_id) => paper_id.to_string(),
141            None => return None, // No ID
142        };
143
144        self.work_cache.insert(publication_id.clone(), work);
145        Some(publication_id)
146    }
147
148    fn get_work_titles(&self, publication_id: &String) -> Vec<LocaleString> {
149        match self.get_cached_publication_from_id(publication_id) {
150            Some(work) => match &work.title {
151                Some(title) => vec![LocaleString::new("en", &title)],
152                None => vec![],
153            },
154            None => vec![],
155        }
156    }
157
158    fn update_statements_for_publication_id(&self, publication_id: &String, item: &mut Entity) {
159        let work = match self.get_cached_publication_from_id(publication_id) {
160            Some(w) => w,
161            None => return,
162        };
163
164        if !item.has_claims_with_property("P577") {
165            match work.year {
166                Some(year) => {
167                    let statement =
168                        self.get_wb_time_from_partial("P577".to_string(), year as u32, None, None);
169                    item.add_claim(statement);
170                }
171                None => {}
172            }
173        }
174    }
175
176    fn get_author_list(&mut self, publication_id: &String) -> Vec<GenericAuthorInfo> {
177        let mut ret: Vec<GenericAuthorInfo> = vec![];
178        let work = match self.get_cached_publication_from_id(publication_id) {
179            Some(w) => w.clone(),
180            None => return ret,
181        };
182
183        for num in 0..work.authors.len() {
184            let author = &work.authors[num];
185            let mut entry = GenericAuthorInfo {
186                name: author.name.clone(),
187                prop2id: HashMap::new(),
188                wikidata_item: None,
189                list_number: Some((num + 1).to_string()),
190                alternative_names: vec![],
191            };
192            match &author.author_id {
193                Some(id) => {
194                    entry
195                        .prop2id
196                        .insert(self.author_property().unwrap(), id.to_string());
197                }
198                None => {}
199            }
200            ret.push(entry);
201        }
202
203        ret
204    }
205}