1use chrono::{DateTime, Utc};
68use quick_xml::events::Event;
69use quick_xml::reader::Reader;
70use reqwest as request;
71use serde::{Deserialize, Serialize};
72use urlencoding::encode;
73
74pub enum Category {
75 CsAi,
76 CsCl,
77 CsLg,
78 CsGt,
79 CsCv,
80 CsCr,
81 CsCc,
82 CsCe,
83 CsCy,
84 CsDs,
85 CsDm,
86 CsDc,
87 CsEt,
88 CsFl,
89 CsGl,
90 CsGr,
91 CsAr,
92 CsHc,
93 CsIr,
94}
95
96impl Category {
97 pub fn to_string(&self) -> String {
98 match self {
99 Category::CsAi => String::from("cs.AI"),
100 Category::CsCl => String::from("cs.CL"),
101 Category::CsLg => String::from("cs.LG"),
102 Category::CsGt => String::from("cs.GT"),
103 Category::CsCv => String::from("cs.CV"),
104 Category::CsCr => String::from("cs.CR"),
105 Category::CsCc => String::from("cs.CC"),
106 Category::CsCe => String::from("cs.CE"),
107 Category::CsCy => String::from("cs.CY"),
108 Category::CsDs => String::from("cs.DS"),
109 Category::CsDm => String::from("cs.DM"),
110 Category::CsDc => String::from("cs.DC"),
111 Category::CsEt => String::from("cs.ET"),
112 Category::CsFl => String::from("cs.FL"),
113 Category::CsGl => String::from("cs.GL"),
114 Category::CsGr => String::from("cs.GR"),
115 Category::CsAr => String::from("cs.AR"),
116 Category::CsHc => String::from("cs.HC"),
117 Category::CsIr => String::from("cs.IR"),
118 }
119 }
120}
121
122#[derive(Clone, Debug)]
123pub enum QueryParams {
124 Title(String),
125 Author(String),
126 Abstract(String),
127 Comment(String),
128 JournalRef(String),
129 SubjectCategory(String),
130 ReportNumber(String),
131 Id(String),
132 All(String),
133 And(String),
134 Or(String),
135 AndNot(String),
136 Group(String),
137 SubmittedDate(String, String),
138}
139
140impl Default for QueryParams {
141 fn default() -> Self {
142 return QueryParams::title("default");
143 }
144}
145
146#[derive(Clone, Debug, Default)]
147pub enum SortBy {
148 #[default]
149 Relevance,
150 LastUpdatedDate,
151 SubmittedDate,
152}
153
154impl SortBy {
155 pub fn to_string(&self) -> String {
156 match self {
157 SortBy::Relevance => String::from("relevance"),
158 SortBy::LastUpdatedDate => String::from("lastUpdatedDate"),
159 SortBy::SubmittedDate => String::from("submittedDate"),
160 }
161 }
162}
163
164#[derive(Clone, Debug, Default)]
165pub enum SortOrder {
166 #[default]
167 Ascending,
168 Descending,
169}
170
171impl SortOrder {
172 pub fn to_string(&self) -> String {
173 match self {
174 SortOrder::Ascending => String::from("ascending"),
175 SortOrder::Descending => String::from("descending"),
176 }
177 }
178}
179
180impl QueryParams {
181 pub fn title(arg: &str) -> Self {
182 return QueryParams::Title(format!("ti:\"{}\"", encode(arg)));
183 }
184 pub fn author(arg: &str) -> Self {
185 return QueryParams::Author(format!("au:\"{}\"", encode(arg)));
186 }
187 pub fn abstract_text(arg: &str) -> Self {
188 return QueryParams::Abstract(format!("abs:\"{}\"", encode(arg)));
189 }
190 pub fn comment(arg: &str) -> Self {
191 return QueryParams::Comment(format!("co:\"{}\"", encode(arg)));
192 }
193 pub fn journal_ref(arg: &str) -> Self {
194 return QueryParams::JournalRef(format!("jr:\"{}\"", encode(arg)));
195 }
196 pub fn subject_category(arg: Category) -> Self {
197 return QueryParams::SubjectCategory(format!("cat:\"{}\"", encode(&arg.to_string())));
198 }
199 pub fn report_number(arg: &str) -> Self {
200 return QueryParams::ReportNumber(format!("rn:\"{}\"", encode(arg)));
201 }
202 pub fn id(id: &str) -> Self {
203 return QueryParams::Id(format!("id:\"{}\"", encode(id)));
204 }
205 pub fn all(arg: &str) -> Self {
206 return QueryParams::All(format!("all:\"{}\"", encode(arg)));
207 }
208 pub fn to_string(&self) -> String {
209 match self {
210 QueryParams::Title(arg) => arg.to_string(),
211 QueryParams::Author(arg) => arg.to_string(),
212 QueryParams::Abstract(arg) => arg.to_string(),
213 QueryParams::Comment(arg) => arg.to_string(),
214 QueryParams::JournalRef(arg) => arg.to_string(),
215 QueryParams::SubjectCategory(arg) => arg.to_string(),
216 QueryParams::ReportNumber(arg) => arg.to_string(),
217 QueryParams::Id(arg) => arg.to_string(),
218 QueryParams::All(arg) => arg.to_string(),
219 QueryParams::And(arg) => arg.to_string(),
220 QueryParams::Or(arg) => arg.to_string(),
221 QueryParams::AndNot(arg) => arg.to_string(),
222 QueryParams::Group(arg) => arg.to_string(),
223 QueryParams::SubmittedDate(from, to) => {
224 format!("submittedDate:[{}+TO+{}]", from, to)
225 }
226 }
227 }
228 pub fn and(args: Vec<QueryParams>) -> Self {
229 let args = args
230 .iter()
231 .map(|arg| arg.to_string())
232 .collect::<Vec<String>>();
233 let query = args.join("+AND+");
234 return QueryParams::And(query);
235 }
236 pub fn or(args: Vec<QueryParams>) -> Self {
237 let args = args
238 .iter()
239 .map(|arg| arg.to_string())
240 .collect::<Vec<String>>();
241 let query = args.join("+OR+");
242 return QueryParams::Or(query);
243 }
244 pub fn and_not(args: Vec<QueryParams>) -> Self {
245 let args = args
246 .iter()
247 .map(|arg| arg.to_string())
248 .collect::<Vec<String>>();
249 let query = args.join("+ANDNOT+");
250 return QueryParams::Or(query);
251 }
252 pub fn group(args: Vec<QueryParams>) -> Self {
253 let mut args = args
254 .iter()
255 .map(|arg| arg.to_string())
256 .collect::<Vec<String>>();
257 args.insert(0, String::from("%28"));
258 args.push(String::from("%29"));
259 let query = args.join("");
260 return QueryParams::Group(query);
261 }
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct Paper {
266 pub id: String,
267 pub title: String,
268 pub authors: Vec<String>,
269 #[serde(rename = "abstract")]
270 pub abstract_text: String,
271 pub published: String,
272 pub updated: String,
273 pub doi: String,
274 pub comment: Vec<String>,
275 pub journal_ref: String,
276 pub pdf_url: String,
277 pub primary_category: String,
278 pub categories: Vec<String>,
279}
280
281impl Paper {
282 pub fn default() -> Self {
283 return Paper {
284 id: "".to_string(),
285 title: "".to_string(),
286 authors: Vec::new(),
287 abstract_text: "".to_string(),
288 published: "".to_string(),
289 updated: "".to_string(),
290 doi: "".to_string(),
291 comment: Vec::new(),
292 journal_ref: "".to_string(),
293 pdf_url: "".to_string(),
294 primary_category: "".to_string(),
295 categories: Vec::new(),
296 };
297 }
298
299 pub fn published2utc(&self) -> DateTime<Utc> {
300 return DateTime::parse_from_rfc3339(&self.published)
301 .unwrap()
302 .with_timezone(&Utc);
303 }
304
305 pub fn updated2utc(&self) -> DateTime<Utc> {
306 return DateTime::parse_from_rfc3339(&self.updated)
307 .unwrap()
308 .with_timezone(&Utc);
309 }
310}
311
312#[derive(Clone, Debug, Default)]
313pub struct ArXiv {
314 pub args: QueryParams,
315 pub start: Option<u64>,
316 pub max_resutls: Option<u64>,
317 pub sort_by: Option<SortBy>,
318 pub sort_order: Option<SortOrder>,
319 pub id_list: Option<Vec<String>>,
320}
321
322impl ArXiv {
323 pub fn from_args(args: QueryParams) -> Self {
324 return ArXiv {
325 args: args,
326 max_resutls: None,
327 start: None,
328 sort_by: None,
329 sort_order: None,
330 id_list: None,
331 };
332 }
333
334 pub fn from_id_list(ids: Vec<&str>) -> Self {
346 return ArXiv {
347 args: QueryParams::default(),
348 max_resutls: None,
349 start: None,
350 sort_by: None,
351 sort_order: None,
352 id_list: Some(ids.iter().map(|s| s.to_string()).collect()),
353 };
354 }
355
356 pub fn start(&mut self, start: u64) -> &mut Self {
357 self.start = Some(start);
358 return self;
359 }
360 pub fn max_results(&mut self, max_results: u64) -> &mut Self {
361 self.max_resutls = Some(max_results);
362 return self;
363 }
364 pub fn sort_by(&mut self, sort_by: SortBy) -> &mut Self {
365 self.sort_by = Some(sort_by);
366 return self;
367 }
368 pub fn sort_order(&mut self, sort_order: SortOrder) -> &mut Self {
369 self.sort_order = Some(sort_order);
370 return self;
371 }
372 pub fn id_list(&mut self, ids: Vec<&str>) -> &mut Self {
376 self.id_list = Some(ids.iter().map(|s| s.to_string()).collect());
377 return self;
378 }
379
380 fn parse_xml(&self, xml: String) -> Vec<Paper> {
381 let mut reader = Reader::from_str(&xml);
382 let mut buf = Vec::new();
383 let mut in_entry = false;
384 let mut in_id = false;
385 let mut in_title = false;
386 let mut in_author = false;
387 let mut in_name = false;
388 let mut in_abstract = false;
389 let mut in_published = false;
390 let mut in_updated = false;
391 let mut in_comment = false;
392 let mut in_journal_ref = false;
393
394 let mut responses: Vec<Paper> = Vec::new();
395 let mut res = Paper::default();
396 loop {
397 match reader.read_event_into(&mut buf) {
398 Ok(Event::Start(ref e)) => {
399 if e.name().as_ref() == b"entry" {
400 in_entry = true;
401 res = Paper::default();
402 } else if e.name().as_ref() == b"id" {
403 in_id = true;
404 } else if e.name().as_ref() == b"title" {
405 in_title = true;
406 } else if e.name().as_ref() == b"author" {
407 in_author = true;
408 } else if e.name().as_ref() == b"name" {
409 if in_author {
410 in_name = true;
411 }
412 } else if e.name().as_ref() == b"summary" {
413 in_abstract = true;
414 } else if e.name().as_ref() == b"published" {
415 in_published = true;
416 } else if e.name().as_ref() == b"updated" {
417 in_updated = true;
418 } else if e.name().as_ref() == b"arxiv:comment" {
419 in_comment = true;
420 } else if e.name().as_ref() == b"arxiv:journal_ref" {
421 in_journal_ref = true;
422 } else if e.name().as_ref() == b"link" && in_entry {
423 let mut is_pdf = false;
424 let mut is_doi = false;
425 e.attributes().for_each(|attr| {
426 if let Ok(attr) = attr {
427 if attr.key.as_ref() == b"title" && attr.value.as_ref() == b"pdf" {
428 is_pdf = true;
429 } else if attr.key.as_ref() == b"title"
430 && attr.value.as_ref() == b"doi"
431 {
432 is_doi = true;
433 }
434 }
435 });
436 e.attributes().for_each(|attr| {
437 if let Ok(attr) = attr {
438 if attr.key.as_ref() == b"href" {
439 if is_pdf {
440 res.pdf_url = String::from_utf8_lossy(attr.value.as_ref())
441 .to_string();
442 } else if is_doi {
443 res.doi = String::from_utf8_lossy(attr.value.as_ref())
444 .to_string();
445 }
446 }
447 }
448 });
449 } else if e.name().as_ref() == b"arxiv:primary_category" {
450 e.attributes().for_each(|attr| {
451 if let Ok(attr) = attr {
452 if attr.key.as_ref() == b"term" {
453 res.primary_category =
454 String::from_utf8_lossy(attr.value.as_ref()).to_string();
455 }
456 }
457 });
458 } else if e.name().as_ref() == b"category" {
459 if let Some(attr) = e
460 .attributes()
461 .find(|attr| attr.as_ref().unwrap().key.as_ref() == b"term")
462 {
463 res.categories.push(
464 String::from_utf8_lossy(attr.unwrap().value.as_ref()).to_string(),
465 );
466 }
467 } else if e.name().as_ref() == b"category" {
468 if let Some(attr) = e
469 .attributes()
470 .find(|attr| attr.as_ref().unwrap().key.as_ref() == b"term")
471 {
472 res.categories.push(
473 String::from_utf8_lossy(attr.unwrap().value.as_ref()).to_string(),
474 );
475 }
476 }
477 }
478 Ok(Event::End(ref e)) => {
479 if e.name().as_ref() == b"entry" {
480 in_entry = false;
481 responses.push(res.clone());
482 res = Paper::default();
483 } else if e.name().as_ref() == b"id" {
484 in_id = false;
485 } else if e.name().as_ref() == b"title" {
486 in_title = false;
487 } else if e.name().as_ref() == b"author" {
488 in_author = false;
489 } else if e.name().as_ref() == b"name" {
490 if in_author {
491 in_name = false;
492 }
493 } else if e.name().as_ref() == b"summary" {
494 in_abstract = false;
495 } else if e.name().as_ref() == b"published" {
496 in_published = false;
497 } else if e.name().as_ref() == b"updated" {
498 in_updated = false;
499 } else if e.name().as_ref() == b"arxiv:comment" {
500 in_comment = false;
501 } else if e.name().as_ref() == b"arxiv:journal_ref" {
502 in_journal_ref = true;
503 }
504 }
505 Ok(Event::Text(e)) => {
506 if in_entry {
507 if in_id {
508 res.id = e.unescape().unwrap().to_string();
509 } else if in_title {
510 res.title = e.unescape().unwrap().to_string();
511 } else if in_author && in_name {
512 res.authors.push(e.unescape().unwrap().to_string());
513 } else if in_abstract {
514 res.abstract_text =
515 e.unescape().unwrap().to_string().trim().replace("\n", "");
516 } else if in_published {
517 res.published = e.unescape().unwrap().to_string();
518 } else if in_updated {
519 res.updated = e.unescape().unwrap().to_string();
520 } else if in_comment {
521 res.comment.push(e.unescape().unwrap().to_string());
522 } else if in_journal_ref {
523 res.journal_ref = e.unescape().unwrap().to_string();
524 }
525 }
526 }
527 Ok(Event::Empty(ref e)) => {
528 if e.name().as_ref() == b"link" && in_entry {
529 let mut is_pdf = false;
530 let mut is_doi = false;
531 e.attributes().for_each(|attr| {
532 if let Ok(attr) = attr {
533 if attr.key.as_ref() == b"title" && attr.value.as_ref() == b"pdf" {
534 is_pdf = true;
535 } else if attr.key.as_ref() == b"title"
536 && attr.value.as_ref() == b"doi"
537 {
538 is_doi = true;
539 }
540 }
541 });
542 e.attributes().for_each(|attr| {
543 if let Ok(attr) = attr {
544 if attr.key.as_ref() == b"href" {
545 if is_pdf {
546 res.pdf_url = String::from_utf8_lossy(attr.value.as_ref())
547 .to_string();
548 } else if is_doi {
549 res.doi = String::from_utf8_lossy(attr.value.as_ref())
550 .to_string();
551 }
552 }
553 }
554 });
555 } else if e.name().as_ref() == b"arxiv:primary_category" && in_entry {
556 e.attributes().for_each(|attr| {
557 if let Ok(attr) = attr {
558 if attr.key.as_ref() == b"term" {
559 res.primary_category =
560 String::from_utf8_lossy(attr.value.as_ref()).to_string();
561 }
562 }
563 });
564 } else if e.name().as_ref() == b"category" && in_entry {
565 if let Some(attr) = e
566 .attributes()
567 .find(|attr| attr.as_ref().unwrap().key.as_ref() == b"term")
568 {
569 res.categories.push(
570 String::from_utf8_lossy(attr.unwrap().value.as_ref()).to_string(),
571 );
572 }
573 }
574 }
575 Ok(Event::Eof) => break,
576 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
577 _ => (),
578 }
579 buf.clear();
580 }
581 return responses;
582 }
583
584 fn build_query(&self) -> String {
585 let mut params: Vec<String> = Vec::new();
586
587 if self.id_list.is_none() {
589 let mut search_query = self.args.to_string();
590 search_query = search_query.replace("%20", "+");
591 params.push(format!("search_query={}", search_query));
592 } else if let Some(ref id_list) = self.id_list {
593 let default_query = QueryParams::default().to_string();
595 let current_query = self.args.to_string();
596 if current_query != default_query {
597 let mut search_query = current_query;
598 search_query = search_query.replace("%20", "+");
599 params.push(format!("search_query={}", search_query));
600 }
601 params.push(format!("id_list={}", id_list.join(",")));
602 }
603
604 if let Some(start) = &self.start {
605 params.push(format!("start={}", start));
606 }
607 if let Some(max_resutls) = &self.max_resutls {
608 params.push(format!("max_results={}", max_resutls));
609 }
610 if let Some(sort_by) = &self.sort_by {
611 params.push(format!("sortBy={}", sort_by.to_string()));
612 }
613 if let Some(sort_order) = &self.sort_order {
614 params.push(format!("sortOrder={}", sort_order.to_string()));
615 }
616
617 return format!("https://export.arxiv.org/api/query?{}", params.join("&"));
618 }
619
620 pub async fn query(&mut self) -> Vec<Paper> {
621 let url = self.build_query();
622 let body = request::get(&url).await.unwrap().text().await.unwrap();
623 let responses = self.parse_xml(body);
624 return responses;
625 }
626}
627
628#[cfg(test)]
629mod tests;