#![doc = include_str!("response/tests/query_doc.txt")]
#[cfg(test)]
mod tests;
mod xml;
use std::{borrow::Cow, fmt::Display};
use chrono::{DateTime, FixedOffset};
pub(crate) use self::xml::ResponseReader;
#[cfg(feature = "serde")]
pub(crate) use self::xml::Term;
use crate::id::ArticleId;
#[derive(Debug, Clone, PartialEq)]
pub struct Response<T> {
pub updated: DateTime<FixedOffset>,
pub pagination: Pagination,
pub entries: T,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Pagination {
pub total_results: u64,
pub start_index: u64,
pub items_per_page: u64,
}
pub fn parse<'r>(xml: &'r [u8]) -> Result<Response<Vec<Entry<'r>>>, ResponseError> {
let (updated, pagination, mut reader) = ResponseReader::init(xml)?;
let expected_count = pagination.items_per_page.min(pagination.total_results) as usize;
let mut entries = Vec::with_capacity(expected_count);
while let Some(id) = reader.next_id()? {
let id = ArticleId::parse_bytes(id)?;
let title = reader.next_title()?;
let updated = DateTime::parse_from_rfc3339(&reader.next_updated()?)?;
let summary = reader.next_summary()?;
let mut categories = Vec::new();
while let Some(term) = reader.next_category()? {
categories.push(term.get()?.into_owned().into())
}
let published = DateTime::parse_from_rfc3339(&reader.next_published()?)?;
let comment = reader.next_comment()?;
let primary_category = reader.next_primary_category()?.get()?.into_owned().into();
let journal_ref = reader.next_journal_ref()?;
let mut authors = Vec::new();
while reader.next_author()? {
let name = AuthorName::from_arxiv(&reader.next_author_name()?);
let affiliation = reader.next_author_affiliation()?;
authors.push(Author { name, affiliation });
}
let doi = reader.next_doi()?;
entries.push(Entry {
id,
updated,
published,
title,
summary,
authors,
doi,
comment,
journal_ref,
primary_category,
categories,
});
}
Ok(Response {
updated,
pagination,
entries,
})
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct AuthorName {
pub keyname: String,
pub firstnames: String,
pub suffix: String,
}
impl Display for AuthorName {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut wrote = false;
for k in [&self.firstnames, &self.keyname, &self.suffix] {
if !k.is_empty() {
if wrote {
f.write_str(" ")?;
} else {
wrote = true;
}
f.write_str(k)?;
}
}
Ok(())
}
}
impl AuthorName {
pub fn from_arxiv(name: &str) -> Self {
fn join_into<'a, T: IntoIterator<Item = &'a str>>(target: &mut String, it: T) {
let mut iter = it.into_iter();
match iter.next() {
Some(e) => target.push_str(e),
None => return,
}
for e in iter {
target.push(' ');
target.push_str(e)
}
}
let mut components = name.split_ascii_whitespace();
let mut keyname = String::new();
let mut firstnames = String::new();
let mut suffix = String::new();
let Some(maybe_suffix) = components.next_back() else {
return Self {
keyname,
firstnames,
suffix,
};
};
let last_name = if Self::is_arxiv_suffix(maybe_suffix) {
suffix.push_str(maybe_suffix);
match components.next_back() {
Some(last) => last,
None => {
return Self {
keyname: suffix,
firstnames,
suffix: String::new(),
};
}
}
} else {
maybe_suffix
};
let prefix1 = match components.next_back() {
Some(pref) if Self::is_arxiv_prefix(pref) => pref,
Some(not_pref) => {
join_into(&mut firstnames, components.chain(Some(not_pref)));
keyname.push_str(last_name);
return Self {
keyname,
firstnames,
suffix,
};
}
None => {
keyname.push_str(last_name);
return Self {
keyname,
firstnames,
suffix,
};
}
};
let prefix2 = match components.next_back() {
Some(pref) if Self::is_arxiv_prefix(pref) => pref,
Some(not_pref) => {
join_into(&mut firstnames, components.chain(Some(not_pref)));
keyname.reserve_exact(prefix1.len() + last_name.len() + 1);
keyname.push_str(prefix1);
keyname.push(' ');
keyname.push_str(last_name);
return Self {
keyname,
firstnames,
suffix,
};
}
None => {
keyname.reserve_exact(prefix1.len() + last_name.len() + 1);
keyname.push_str(prefix1);
keyname.push(' ');
keyname.push_str(last_name);
return Self {
keyname,
firstnames,
suffix,
};
}
};
join_into(&mut firstnames, components);
keyname.reserve_exact(prefix2.len() + prefix1.len() + last_name.len() + 2);
keyname.push_str(prefix2);
keyname.push(' ');
keyname.push_str(prefix1);
keyname.push(' ');
keyname.push_str(last_name);
Self {
keyname,
firstnames,
suffix,
}
}
fn is_arxiv_prefix(s: &str) -> bool {
matches!(
s,
"da" | "de"
| "del"
| "della"
| "dem"
| "der"
| "di"
| "la"
| "mac"
| "ter"
| "van"
| "vaziri"
| "von"
)
}
fn is_arxiv_suffix(s: &str) -> bool {
matches!(
s,
"I" | "II" | "III" | "IV" | "Jr" | "Jr." | "Sr" | "Sr." | "V"
)
}
}
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(all(test, feature = "serde"), derive(serde::Deserialize))]
pub struct Entry<'r> {
pub id: ArticleId,
pub updated: DateTime<FixedOffset>,
pub published: DateTime<FixedOffset>,
pub title: Cow<'r, str>,
pub summary: Cow<'r, str>,
pub authors: Vec<Author<'r>>,
pub doi: Option<Cow<'r, str>>,
pub comment: Option<Cow<'r, str>>,
pub journal_ref: Option<Cow<'r, str>>,
pub primary_category: Cow<'r, str>,
pub categories: Vec<Cow<'r, str>>,
}
#[derive(Debug)]
pub enum ResponseError {
Parse(quick_xml::errors::Error),
Attribute(quick_xml::events::attributes::AttrError),
InvalidDateTime(chrono::ParseError),
Arxiv(String),
TrailingEntries,
MissingTag(&'static str),
MissingTerm,
InvalidHeader(String),
InvalidError(String),
InvalidId(crate::id::IdError),
Custom(String),
}
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(all(test, feature = "serde"), derive(serde::Deserialize))]
pub struct Author<'r> {
pub name: AuthorName,
pub affiliation: Option<Cow<'r, str>>,
}
mod error_impl {
use super::ResponseError;
impl From<crate::id::IdError> for ResponseError {
fn from(value: crate::id::IdError) -> Self {
Self::InvalidId(value)
}
}
impl From<chrono::ParseError> for ResponseError {
fn from(value: chrono::ParseError) -> Self {
Self::InvalidDateTime(value)
}
}
impl From<std::str::Utf8Error> for ResponseError {
fn from(value: std::str::Utf8Error) -> Self {
ResponseError::Parse(quick_xml::errors::Error::Encoding(
quick_xml::encoding::EncodingError::Utf8(value),
))
}
}
impl From<quick_xml::events::attributes::AttrError> for ResponseError {
fn from(value: quick_xml::events::attributes::AttrError) -> Self {
ResponseError::Attribute(value)
}
}
impl From<quick_xml::errors::Error> for ResponseError {
fn from(value: quick_xml::errors::Error) -> Self {
ResponseError::Parse(value)
}
}
impl std::fmt::Display for ResponseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ResponseError::Parse(error) => write!(f, "XML parse error: {error}"),
ResponseError::Attribute(attr_error) => {
write!(f, "XML error while reading attribute: {attr_error}")
}
ResponseError::Arxiv(error) => {
write!(f, "arXiv API error response: {error}")
}
ResponseError::MissingTag(tag) => {
write!(f, "missing tag `{tag}`")
}
ResponseError::Custom(err) => write!(f, "{err}"),
ResponseError::MissingTerm => f.write_str(
"`category` or `primary_category` tag is missing the `term` attribute",
),
ResponseError::InvalidHeader(msg) => write!(f, "Unexpected API header: {msg}"),
ResponseError::InvalidError(msg) => write!(f, "Unexpected API error format: {msg}"),
ResponseError::InvalidDateTime(parse_error) => {
write!(f, "Error parsing datetime field: {parse_error}")
}
ResponseError::InvalidId(id_error) => {
write!(f, "Entry contains invalid identifier: {id_error}")
}
ResponseError::TrailingEntries => write!(f, "Response contains trailing entries"),
}
}
}
impl std::error::Error for ResponseError {}
}