1use chrono::{DateTime, Utc};
52use quick_xml::events::Event;
53use quick_xml::reader::Reader;
54use reqwest as request;
55use serde::{Deserialize, Serialize};
56use urlencoding::encode;
57
58pub enum Category {
59 CsAi,
60 CsCl,
61 CsLg,
62 CsGt,
63 CsCv,
64 CsCr,
65 CsCc,
66 CsCe,
67 CsCy,
68 CsDs,
69 CsDm,
70 CsDc,
71 CsEt,
72 CsFl,
73 CsGl,
74 CsGr,
75 CsAr,
76 CsHc,
77 CsIr,
78}
79
80impl Category {
81 pub fn to_string(&self) -> String {
82 match self {
83 Category::CsAi => String::from("cs.AI"),
84 Category::CsCl => String::from("cs.CL"),
85 Category::CsLg => String::from("cs.LG"),
86 Category::CsGt => String::from("cs.GT"),
87 Category::CsCv => String::from("cs.CV"),
88 Category::CsCr => String::from("cs.CR"),
89 Category::CsCc => String::from("cs.CC"),
90 Category::CsCe => String::from("cs.CE"),
91 Category::CsCy => String::from("cs.CY"),
92 Category::CsDs => String::from("cs.DS"),
93 Category::CsDm => String::from("cs.DM"),
94 Category::CsDc => String::from("cs.DC"),
95 Category::CsEt => String::from("cs.ET"),
96 Category::CsFl => String::from("cs.FL"),
97 Category::CsGl => String::from("cs.GL"),
98 Category::CsGr => String::from("cs.GR"),
99 Category::CsAr => String::from("cs.AR"),
100 Category::CsHc => String::from("cs.HC"),
101 Category::CsIr => String::from("cs.IR"),
102 }
103 }
104}
105
106#[derive(Clone, Debug)]
107pub enum QueryParams {
108 Title(String),
109 Author(String),
110 Abstract(String),
111 Comment(String),
112 JournalRef(String),
113 SubjectCategory(String),
114 ReportNumber(String),
115 Id(String),
116 All(String),
117 And(String),
118 Or(String),
119 AndNot(String),
120 Group(String),
121 SubmittedDate(String, String),
122}
123
124impl Default for QueryParams {
125 fn default() -> Self {
126 return QueryParams::title("default");
127 }
128}
129
130#[derive(Clone, Debug, Default)]
131pub enum SortBy {
132 #[default]
133 Relevance,
134 LastUpdatedDate,
135 SubmittedDate,
136}
137
138impl SortBy {
139 pub fn to_string(&self) -> String {
140 match self {
141 SortBy::Relevance => String::from("relevance"),
142 SortBy::LastUpdatedDate => String::from("lastUpdatedDate"),
143 SortBy::SubmittedDate => String::from("submittedDate"),
144 }
145 }
146}
147
148#[derive(Clone, Debug, Default)]
149pub enum SortOrder {
150 #[default]
151 Ascending,
152 Descending,
153}
154
155impl SortOrder {
156 pub fn to_string(&self) -> String {
157 match self {
158 SortOrder::Ascending => String::from("ascending"),
159 SortOrder::Descending => String::from("descending"),
160 }
161 }
162}
163
164impl QueryParams {
165 pub fn title(arg: &str) -> Self {
166 return QueryParams::Title(format!("ti:\"{}\"", encode(arg)));
167 }
168 pub fn author(arg: &str) -> Self {
169 return QueryParams::Author(format!("au:\"{}\"", encode(arg)));
170 }
171 pub fn abstract_text(arg: &str) -> Self {
172 return QueryParams::Abstract(format!("abs:\"{}\"", encode(arg)));
173 }
174 pub fn comment(arg: &str) -> Self {
175 return QueryParams::Comment(format!("co:\"{}\"", encode(arg)));
176 }
177 pub fn journal_ref(arg: &str) -> Self {
178 return QueryParams::JournalRef(format!("jr:\"{}\"", encode(arg)));
179 }
180 pub fn subject_category(arg: Category) -> Self {
181 return QueryParams::SubjectCategory(format!("cat:\"{}\"", encode(&arg.to_string())));
182 }
183 pub fn report_number(arg: &str) -> Self {
184 return QueryParams::ReportNumber(format!("rn:\"{}\"", encode(arg)));
185 }
186 pub fn id(id: &str) -> Self {
187 return QueryParams::Id(format!("id:\"{}\"", encode(id)));
188 }
189 pub fn all(arg: &str) -> Self {
190 return QueryParams::All(format!("all:\"{}\"", encode(arg)));
191 }
192 pub fn to_string(&self) -> String {
193 match self {
194 QueryParams::Title(arg) => arg.to_string(),
195 QueryParams::Author(arg) => arg.to_string(),
196 QueryParams::Abstract(arg) => arg.to_string(),
197 QueryParams::Comment(arg) => arg.to_string(),
198 QueryParams::JournalRef(arg) => arg.to_string(),
199 QueryParams::SubjectCategory(arg) => arg.to_string(),
200 QueryParams::ReportNumber(arg) => arg.to_string(),
201 QueryParams::Id(arg) => arg.to_string(),
202 QueryParams::All(arg) => arg.to_string(),
203 QueryParams::And(arg) => arg.to_string(),
204 QueryParams::Or(arg) => arg.to_string(),
205 QueryParams::AndNot(arg) => arg.to_string(),
206 QueryParams::Group(arg) => arg.to_string(),
207 QueryParams::SubmittedDate(from, to) => {
208 format!("submittedDate:[{}+TO+{}]", from, to)
209 }
210 }
211 }
212 pub fn and(args: Vec<QueryParams>) -> Self {
213 let args = args
214 .iter()
215 .map(|arg| arg.to_string())
216 .collect::<Vec<String>>();
217 let query = args.join("+AND+");
218 return QueryParams::And(query);
219 }
220 pub fn or(args: Vec<QueryParams>) -> Self {
221 let args = args
222 .iter()
223 .map(|arg| arg.to_string())
224 .collect::<Vec<String>>();
225 let query = args.join("+OR+");
226 return QueryParams::Or(query);
227 }
228 pub fn and_not(args: Vec<QueryParams>) -> Self {
229 let args = args
230 .iter()
231 .map(|arg| arg.to_string())
232 .collect::<Vec<String>>();
233 let query = args.join("+ANDNOT+");
234 return QueryParams::Or(query);
235 }
236 pub fn group(args: Vec<QueryParams>) -> Self {
237 let mut args = args
238 .iter()
239 .map(|arg| arg.to_string())
240 .collect::<Vec<String>>();
241 args.insert(0, String::from("%28"));
242 args.push(String::from("%29"));
243 let query = args.join("");
244 return QueryParams::Group(query);
245 }
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize)]
249pub struct Paper {
250 pub id: String,
251 pub title: String,
252 pub authors: Vec<String>,
253 #[serde(rename = "abstract")]
254 pub abstract_text: String,
255 pub published: String,
256 pub updated: String,
257 pub doi: String,
258 pub comment: Vec<String>,
259 pub journal_ref: String,
260 pub pdf_url: String,
261 pub primary_category: String,
262 pub categories: Vec<String>,
263}
264
265impl Paper {
266 pub fn default() -> Self {
267 return Paper {
268 id: "".to_string(),
269 title: "".to_string(),
270 authors: Vec::new(),
271 abstract_text: "".to_string(),
272 published: "".to_string(),
273 updated: "".to_string(),
274 doi: "".to_string(),
275 comment: Vec::new(),
276 journal_ref: "".to_string(),
277 pdf_url: "".to_string(),
278 primary_category: "".to_string(),
279 categories: Vec::new(),
280 };
281 }
282
283 pub fn published2utc(&self) -> DateTime<Utc> {
284 return DateTime::parse_from_rfc3339(&self.published)
285 .unwrap()
286 .with_timezone(&Utc);
287 }
288
289 pub fn updated2utc(&self) -> DateTime<Utc> {
290 return DateTime::parse_from_rfc3339(&self.updated)
291 .unwrap()
292 .with_timezone(&Utc);
293 }
294}
295
296#[derive(Clone, Debug, Default)]
297pub struct ArXiv {
298 pub args: QueryParams,
299 pub start: Option<u64>,
300 pub max_resutls: Option<u64>,
301 pub sort_by: Option<SortBy>,
302 pub sort_order: Option<SortOrder>,
303}
304
305impl ArXiv {
306 pub fn from_args(args: QueryParams) -> Self {
307 return ArXiv {
308 args: args,
309 max_resutls: None,
310 start: None,
311 sort_by: None,
312 sort_order: None,
313 };
314 }
315
316 pub fn start(&mut self, start: u64) -> &mut Self {
317 self.start = Some(start);
318 return self;
319 }
320 pub fn max_results(&mut self, max_results: u64) -> &mut Self {
321 self.max_resutls = Some(max_results);
322 return self;
323 }
324 pub fn sort_by(&mut self, sort_by: SortBy) -> &mut Self {
325 self.sort_by = Some(sort_by);
326 return self;
327 }
328 pub fn sort_order(&mut self, sort_order: SortOrder) -> &mut Self {
329 self.sort_order = Some(sort_order);
330 return self;
331 }
332
333 fn parse_xml(&self, xml: String) -> Vec<Paper> {
334 let mut reader = Reader::from_str(&xml);
335 let mut buf = Vec::new();
336 let mut in_entry = false;
337 let mut in_id = false;
338 let mut in_title = false;
339 let mut in_author = false;
340 let mut in_name = false;
341 let mut in_abstract = false;
342 let mut in_published = false;
343 let mut in_updated = false;
344 let mut in_comment = false;
345 let mut in_journal_ref = false;
346
347 let mut responses: Vec<Paper> = Vec::new();
348 let mut res = Paper::default();
349 loop {
350 match reader.read_event_into(&mut buf) {
351 Ok(Event::Start(ref e)) => {
352 if e.name().as_ref() == b"entry" {
353 in_entry = true;
354 res = Paper::default();
355 } else if e.name().as_ref() == b"id" {
356 in_id = true;
357 } else if e.name().as_ref() == b"title" {
358 in_title = true;
359 } else if e.name().as_ref() == b"author" {
360 in_author = true;
361 } else if e.name().as_ref() == b"name" {
362 if in_author {
363 in_name = true;
364 }
365 } else if e.name().as_ref() == b"summary" {
366 in_abstract = true;
367 } else if e.name().as_ref() == b"published" {
368 in_published = true;
369 } else if e.name().as_ref() == b"updated" {
370 in_updated = true;
371 } else if e.name().as_ref() == b"arxiv:comment" {
372 in_comment = true;
373 } else if e.name().as_ref() == b"arxiv:journal_ref" {
374 in_journal_ref = true;
375 } else if e.name().as_ref() == b"link" && in_entry {
376 let mut is_pdf = false;
377 let mut is_doi = false;
378 e.attributes().for_each(|attr| {
379 if let Ok(attr) = attr {
380 if attr.key.as_ref() == b"title" && attr.value.as_ref() == b"pdf" {
381 is_pdf = true;
382 } else if attr.key.as_ref() == b"title"
383 && attr.value.as_ref() == b"doi"
384 {
385 is_doi = true;
386 }
387 }
388 });
389 e.attributes().for_each(|attr| {
390 if let Ok(attr) = attr {
391 if attr.key.as_ref() == b"href" {
392 if is_pdf {
393 res.pdf_url = String::from_utf8_lossy(attr.value.as_ref())
394 .to_string();
395 } else if is_doi {
396 res.doi = String::from_utf8_lossy(attr.value.as_ref())
397 .to_string();
398 }
399 }
400 }
401 });
402 } else if e.name().as_ref() == b"arxiv:primary_category" {
403 e.attributes().for_each(|attr| {
404 if let Ok(attr) = attr {
405 if attr.key.as_ref() == b"term" {
406 res.primary_category =
407 String::from_utf8_lossy(attr.value.as_ref()).to_string();
408 }
409 }
410 });
411 } else if e.name().as_ref() == b"category" {
412 if let Some(attr) = e
413 .attributes()
414 .find(|attr| attr.as_ref().unwrap().key.as_ref() == b"term")
415 {
416 res.categories.push(
417 String::from_utf8_lossy(attr.unwrap().value.as_ref()).to_string(),
418 );
419 }
420 } else if e.name().as_ref() == b"category" {
421 if let Some(attr) = e
422 .attributes()
423 .find(|attr| attr.as_ref().unwrap().key.as_ref() == b"term")
424 {
425 res.categories.push(
426 String::from_utf8_lossy(attr.unwrap().value.as_ref()).to_string(),
427 );
428 }
429 }
430 }
431 Ok(Event::End(ref e)) => {
432 if e.name().as_ref() == b"entry" {
433 in_entry = false;
434 responses.push(res.clone());
435 res = Paper::default();
436 } else if e.name().as_ref() == b"id" {
437 in_id = false;
438 } else if e.name().as_ref() == b"title" {
439 in_title = false;
440 } else if e.name().as_ref() == b"author" {
441 in_author = false;
442 } else if e.name().as_ref() == b"name" {
443 if in_author {
444 in_name = false;
445 }
446 } else if e.name().as_ref() == b"summary" {
447 in_abstract = false;
448 } else if e.name().as_ref() == b"published" {
449 in_published = false;
450 } else if e.name().as_ref() == b"updated" {
451 in_updated = false;
452 } else if e.name().as_ref() == b"arxiv:comment" {
453 in_comment = false;
454 } else if e.name().as_ref() == b"arxiv:journal_ref" {
455 in_journal_ref = true;
456 }
457 }
458 Ok(Event::Text(e)) => {
459 if in_entry {
460 if in_id {
461 res.id = e.unescape().unwrap().to_string();
462 } else if in_title {
463 res.title = e.unescape().unwrap().to_string();
464 } else if in_author && in_name {
465 res.authors.push(e.unescape().unwrap().to_string());
466 } else if in_abstract {
467 res.abstract_text =
468 e.unescape().unwrap().to_string().trim().replace("\n", "");
469 } else if in_published {
470 res.published = e.unescape().unwrap().to_string();
471 } else if in_updated {
472 res.updated = e.unescape().unwrap().to_string();
473 } else if in_comment {
474 res.comment.push(e.unescape().unwrap().to_string());
475 } else if in_journal_ref {
476 res.journal_ref = e.unescape().unwrap().to_string();
477 }
478 }
479 }
480 Ok(Event::Empty(ref e)) => {
481 if e.name().as_ref() == b"link" && in_entry {
482 let mut is_pdf = false;
483 let mut is_doi = false;
484 e.attributes().for_each(|attr| {
485 if let Ok(attr) = attr {
486 if attr.key.as_ref() == b"title" && attr.value.as_ref() == b"pdf" {
487 is_pdf = true;
488 } else if attr.key.as_ref() == b"title"
489 && attr.value.as_ref() == b"doi"
490 {
491 is_doi = true;
492 }
493 }
494 });
495 e.attributes().for_each(|attr| {
496 if let Ok(attr) = attr {
497 if attr.key.as_ref() == b"href" {
498 if is_pdf {
499 res.pdf_url = String::from_utf8_lossy(attr.value.as_ref())
500 .to_string();
501 } else if is_doi {
502 res.doi = String::from_utf8_lossy(attr.value.as_ref())
503 .to_string();
504 }
505 }
506 }
507 });
508 } else if e.name().as_ref() == b"arxiv:primary_category" && in_entry {
509 e.attributes().for_each(|attr| {
510 if let Ok(attr) = attr {
511 if attr.key.as_ref() == b"term" {
512 res.primary_category =
513 String::from_utf8_lossy(attr.value.as_ref()).to_string();
514 }
515 }
516 });
517 } else if e.name().as_ref() == b"category" && in_entry {
518 if let Some(attr) = e
519 .attributes()
520 .find(|attr| attr.as_ref().unwrap().key.as_ref() == b"term")
521 {
522 res.categories.push(
523 String::from_utf8_lossy(attr.unwrap().value.as_ref()).to_string(),
524 );
525 }
526 }
527 }
528 Ok(Event::Eof) => break,
529 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
530 _ => (),
531 }
532 buf.clear();
533 }
534 return responses;
535 }
536
537 fn build_query(&self) -> String {
538 let mut query = self.args.to_string();
539 query = query.replace("%20", "+");
540 if let Some(start) = &self.start {
541 query.push_str(&format!("&start={}", start));
542 }
543 if let Some(max_resutls) = &self.max_resutls {
544 query.push_str(&format!("&max_results={}", max_resutls));
545 }
546 if let Some(sort_by) = &self.sort_by {
547 query.push_str(&format!("&sortBy={}", sort_by.to_string()));
548 }
549 if let Some(sort_order) = &self.sort_order {
550 query.push_str(&format!("&sortOrder={}", sort_order.to_string()));
551 }
552
553 return format!("http://export.arxiv.org/api/query?search_query={}", query);
554 }
555
556 pub async fn query(&mut self) -> Vec<Paper> {
557 let url = self.build_query();
558 let body = request::get(&url).await.unwrap().text().await.unwrap();
559 let responses = self.parse_xml(body);
560 return responses;
561 }
562}
563
564#[cfg(test)]
565mod tests;