use crate::error::{AdsError, Result};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
const MAX_ROWS: u64 = 2000;
#[derive(serde::Serialize, Clone)]
#[must_use]
pub struct Query<'ads> {
#[serde(skip)]
client: &'ads crate::Ads,
q: String,
#[serde(skip_serializing_if = "Option::is_none")]
rows: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
start: Option<u64>,
#[serde(serialize_with = "fl_defaults")]
fl: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
fq: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
#[serde(serialize_with = "comma_separated")]
sort: Vec<Sort>,
}
#[derive(Serialize, Deserialize, Clone)]
pub struct Response {
#[serde(rename = "numFound")]
pub num_found: u64,
pub start: u64,
pub docs: Vec<Document>,
}
#[adsabs_macro::make_optional]
#[derive(Debug, Serialize, Deserialize, Default, Clone)]
pub struct Document {
#[serde(rename = "abstract")]
pub abs: String,
pub ack: String,
pub aff: Vec<String>,
pub aff_id: Vec<String>,
pub alternate_bibcode: Vec<String>,
pub alternate_title: Vec<String>,
pub arxiv_class: Vec<String>,
pub author: Vec<String>,
pub author_count: u64,
pub author_norm: Vec<String>,
pub bibcode: String,
pub bibgroup: Vec<String>,
pub bibstem: Vec<String>,
pub citation: Vec<String>,
pub citation_count: u64,
pub cite_read_boost: f32,
pub classic_factor: u64,
pub comment: String,
pub copyright: String,
pub data: Vec<String>,
pub database: Vec<Database>,
pub date: DateTime<Utc>,
pub doctype: DocType,
pub doi: Vec<String>,
pub eid: String,
pub entdate: String, pub entry_date: DateTime<Utc>,
pub esources: Vec<String>,
pub facility: Vec<String>,
pub first_author: String,
pub first_author_norm: String,
pub grant: Vec<String>,
pub grant_agencies: Vec<String>,
pub grant_id: Vec<String>,
pub id: String,
pub identifier: Vec<String>,
pub indexstamp: DateTime<Utc>,
pub inst: Vec<String>,
pub isbn: Vec<String>,
pub issn: Vec<String>,
pub issue: String,
pub keyword: Vec<String>,
pub keyword_norm: Vec<String>,
pub keyword_schema: Vec<String>,
pub lang: String,
pub links_data: Vec<String>,
pub nedid: Vec<String>,
pub nedtype: Vec<String>,
pub orcid_pub: Vec<String>,
pub orcid_other: Vec<String>,
pub orcid_user: Vec<String>,
pub page: Vec<String>,
pub page_count: String,
pub page_range: String,
pub property: Vec<String>,
#[serde(rename = "pub")]
pub publication: String,
pub pub_raw: String,
pub pubdate: String, pub pubnote: Vec<String>,
pub read_count: u64,
pub reference: Vec<String>,
pub simbid: Vec<String>,
pub title: Vec<String>,
pub vizier: Vec<String>,
pub volume: String,
pub year: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "lowercase")]
pub enum Database {
Astronomy,
Physics,
General,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "lowercase")]
pub enum DocType {
Article,
Eprint,
Inproceedings,
Inbook,
Abstract,
Book,
Bookreview,
Catalog,
Circular,
Erratum,
Mastersthesis,
Newsletter,
Obituary,
Phdthesis,
Pressrelease,
Proceedings,
Proposal,
Software,
Talk,
Techreport,
Misc,
}
impl<'ads> Query<'ads> {
pub fn new(client: &'ads crate::Ads, query: &str) -> Self {
Self {
client,
q: query.to_owned(),
rows: None,
start: None,
fl: Vec::new(),
fq: None,
sort: Vec::new(),
}
}
pub fn start(mut self, start: u64) -> Self {
self.start = Some(start);
self
}
pub fn fl(mut self, fl: &str) -> Self {
self.fl.push(fl.to_owned());
self
}
pub fn fq(mut self, fq: &str) -> Self {
self.fq = Some(fq.to_owned());
self
}
pub fn sort<T: Into<Sort>>(mut self, field: T) -> Self {
self.sort.push(field.into());
self
}
pub fn rows(mut self, rows: u64) -> Self {
self.rows = Some(rows);
self
}
pub fn send(&self) -> Result<Response> {
let data: serde_json::Value = self.client.get("search/query", Some(self))?.json()?;
if let Some(serde_json::Value::String(msg)) = data.get("error").and_then(|x| x.get("msg")) {
return Err(AdsError::Ads(msg.clone()));
}
Ok(serde_json::from_value(data["response"].clone())?)
}
pub fn iter_docs(self) -> IterDocs<'ads> {
let start = self.start.unwrap_or(0);
IterDocs {
query: self,
num_found: 0,
start,
limit: None,
docs: Vec::new().into_iter(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[must_use]
pub enum Sort {
Asc(String),
Desc(String),
}
impl Sort {
pub fn asc(field: &str) -> Self {
Sort::Asc(field.to_owned())
}
pub fn desc(field: &str) -> Self {
Sort::Desc(field.to_owned())
}
}
impl From<&str> for Sort {
fn from(s: &str) -> Self {
Sort::Desc(s.to_owned())
}
}
impl ToString for Sort {
fn to_string(&self) -> String {
match self {
Sort::Asc(fl) => format!("{} asc", fl),
Sort::Desc(fl) => format!("{} desc", fl),
}
}
}
#[must_use]
pub struct IterDocs<'ads> {
query: Query<'ads>,
num_found: u64,
start: u64,
limit: Option<u64>,
docs: <Vec<Document> as IntoIterator>::IntoIter,
}
impl<'ads> IterDocs<'ads> {
pub fn limit(mut self, limit: u64) -> Self {
self.limit = Some(limit);
self
}
#[inline]
fn page_size(&self) -> u64 {
MAX_ROWS.min(
self.limit
.unwrap_or_else(|| self.query.rows.unwrap_or(MAX_ROWS)),
)
}
fn try_next(&mut self) -> Result<Option<Document>> {
if let Some(doc) = self.docs.next() {
self.start += 1;
return Ok(Some(doc));
}
if self.start > 0
&& (self.start >= self.num_found || self.start >= self.limit.unwrap_or(u64::MAX))
{
return Ok(None);
}
let response = self
.query
.clone()
.start(self.start)
.rows(self.page_size())
.send()?;
self.num_found = response.num_found;
self.start = response.start + 1;
self.docs = response.docs.into_iter();
Ok(self.docs.next())
}
}
impl<'ads> Iterator for IterDocs<'ads> {
type Item = Result<Document>;
fn next(&mut self) -> Option<Self::Item> {
match self.try_next() {
Ok(Some(doc)) => Some(Ok(doc)),
Ok(None) => None,
Err(err) => Some(Err(err)),
}
}
}
fn fl_defaults<S: serde::Serializer>(items: &[String], serializer: S) -> Result<S::Ok, S::Error> {
if items.is_empty() {
serializer.serialize_str("author,first_author,bibcode,id,year,title")
} else {
comma_separated(items, serializer)
}
}
fn comma_separated<T: ToString, S: serde::Serializer>(
items: &[T],
serializer: S,
) -> Result<S::Ok, S::Error> {
let items = items.iter().map(|x| x.to_string()).collect::<Vec<_>>();
serializer.serialize_str(&items.join(","))
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Datelike;
#[test]
fn deserialize_document() {
let data = "
{
\"abstract\": \"abstract\",
\"aff\": [\"aff1\"],
\"database\": [\"astronomy\"],
\"entdate\": \"2021-09-25\",
\"indexstamp\":\"2021-10-24T07:56:53.361Z\"
}
";
let response: Document = serde_json::from_str(data).unwrap();
assert_eq!(response.abs.unwrap(), "abstract");
assert_eq!(response.aff.unwrap()[0], "aff1");
assert!(matches!(response.database.unwrap()[0], Database::Astronomy));
assert_eq!(response.entdate.unwrap(), "2021-09-25");
assert_eq!(response.indexstamp.unwrap().year(), 2021);
}
#[test]
fn deserialize_search_response() {
let data = "
{
\"numFound\": 194,
\"start\": 12,
\"docs\": [
{
\"id\": \"312911\"
},
{
\"id\": \"1877482\"
}
]
}";
let response: Response = serde_json::from_str(data).unwrap();
assert_eq!(response.num_found, 194);
assert_eq!(response.start, 12);
assert_eq!(response.docs.len(), 2);
assert_eq!(response.docs[0].id.as_ref().unwrap(), "312911");
}
#[test]
fn basic_query() {
let client = crate::Ads::new("token").unwrap();
let query = Query::new(&client, "au:foreman-mackey")
.rows(10)
.start(5)
.fl("id")
.fl("author")
.fq("au:hogg")
.sort("citation_count");
assert_eq!(
serde_json::to_value(query).unwrap(),
serde_json::json!({
"q": "au:foreman-mackey",
"rows": 10,
"start": 5,
"fl": "id,author",
"fq": "au:hogg",
"sort": "citation_count desc",
})
)
}
#[test]
fn vec_fls() {
let client = crate::Ads::new("token").unwrap();
let query = Query::new(&client, "au:foreman-mackey").fl("id,author");
assert_eq!(
serde_json::to_value(query).unwrap(),
serde_json::json!({
"q": "au:foreman-mackey",
"fl": "id,author",
})
)
}
}