use std::{
borrow::Cow,
error::Error,
fmt::{Debug, Display},
mem::transmute,
num::NonZero,
str::FromStr,
};
mod archive;
mod parse;
#[cfg(test)]
mod tests;
use self::parse::tri;
pub use archive::{Archive, strip_archive_prefix};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Style {
Old,
NewShort,
NewLong,
}
#[inline]
pub const fn validate(s: &str) -> Result<(), IdError> {
match ArticleId::parse(s) {
Ok(_) => Ok(()),
Err(err) => Err(err),
}
}
#[inline]
pub const fn normalize(s: &str) -> Result<Option<(&str, &str)>, IdError> {
tri!(validate(s));
unsafe { Ok(split_subject_class_unchecked(s)) }
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum IdError {
DateOutOfRange,
NumberOutOfRange,
InvalidDate,
InvalidNumber,
InvalidVersion,
InvalidArchive,
}
impl Display for IdError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
IdError::DateOutOfRange => "Date invalid for the given format",
IdError::NumberOutOfRange => "Number invalid for the given format",
IdError::InvalidDate => "Failed to parse the date",
IdError::InvalidNumber => "Failed to parse the number",
IdError::InvalidVersion => "Failed to parse the version",
IdError::InvalidArchive => "Failed to parse the archive",
};
f.write_str(s)
}
}
impl Error for IdError {}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
pub struct ArticleId {
raw: u64,
}
pub const ARXIV_EPOCH: u16 = 1991;
pub const MAX_ID_FORMATTED_LEN: usize = 22;
impl ArticleId {
#[inline]
pub const fn parse(id: &str) -> Result<Self, IdError> {
Self::parse_bytes(id.as_bytes())
}
#[inline]
pub const fn parse_bytes(id: &[u8]) -> Result<Self, IdError> {
match id {
[y1 @ b'0'..=b'9', y2, m1, m2, b'.', tail @ ..] => {
let date = [*y1, *y2, *m1, *m2];
let number: &[u8] = tail;
let (years_since_epoch, month) = tri!(parse::date_new(date));
let (number, version) = if years_since_epoch <= 23 {
tri!(parse::number_and_version_len_4(number))
} else {
tri!(parse::number_and_version_len_5(number))
};
Ok(Self::new_unchecked(
years_since_epoch,
month,
None,
number,
version,
))
}
_ => match archive::strip_archive_prefix_bytes(id) {
Some((archive, tail)) => {
let date_number = match tail {
[b'/', tail @ ..]
| [b'.', b'A'..=b'Z', b'A'..=b'Z', b'/', tail @ ..]
| tail => tail,
};
let parse::DateNumber {
years_since_epoch,
month,
number,
version,
} = match parse::date_number(date_number) {
Ok(v) => v,
Err(e) => return Err(e),
};
Ok(Self::new_unchecked(
years_since_epoch,
month,
Some(archive),
number,
version,
))
}
None => Err(IdError::InvalidArchive),
},
}
}
pub const fn new(
year: u16,
month: u8,
archive: Option<Archive>,
number: NonZero<u32>,
version: Option<NonZero<u16>>,
) -> Result<Self, IdError> {
if month == 0 || month > 12 {
return Err(IdError::DateOutOfRange);
}
if archive.is_some() {
if !(1991 <= year && year <= 2007)
|| (year == 1991 && month <= 7)
|| (year == 2007 && month >= 4)
{
return Err(IdError::DateOutOfRange);
}
if number.get() >= 1000 {
return Err(IdError::NumberOutOfRange);
}
} else {
if !(2007 <= year && year <= 2107)
|| (year == 2007 && month < 4)
|| (year == 2107 && month >= 4)
{
return Err(IdError::DateOutOfRange);
}
let threshold = if year <= 2014 { 10_000 } else { 100_000 };
if number.get() >= threshold {
return Err(IdError::NumberOutOfRange);
}
}
Ok(Self::new_unchecked(
(year - ARXIV_EPOCH) as u8,
month,
archive,
number,
unsafe { transmute::<Option<NonZero<u16>>, u16>(version) },
))
}
#[must_use]
const fn new_unchecked(
years_since_epoch: u8,
month: u8,
archive: Option<Archive>,
number: NonZero<u32>,
version: u16,
) -> Self {
let archive = match archive {
Some(archive) => archive as u8,
None => 0,
};
let number = number.get();
let raw = ((years_since_epoch as u64) << 56)
| ((month as u64) << 48)
| ((archive as u64) << 40)
| ((number as u64) << 16)
| (version as u64);
Self { raw }
}
#[inline]
#[must_use]
pub const fn years_since_epoch(self) -> u8 {
raw::years_since_epoch(self.raw)
}
#[inline]
#[must_use]
pub const fn year(self) -> u16 {
ARXIV_EPOCH + (self.years_since_epoch() as u16)
}
#[inline]
#[must_use]
pub const fn month(self) -> u8 {
raw::month(self.raw)
}
#[inline]
#[must_use]
pub const fn archive(self) -> Option<Archive> {
let a = raw::archive(self.raw);
if a == 0 {
None
} else {
unsafe { Some(transmute::<u8, Archive>(a)) }
}
}
#[inline]
#[must_use]
pub const fn style(self) -> Style {
if raw::is_new_style(self.raw) {
if self.years_since_epoch() <= 23 {
Style::NewShort
} else {
Style::NewLong
}
} else {
Style::Old
}
}
#[inline]
#[must_use]
pub const fn number(self) -> NonZero<u32> {
let n = raw::number(self.raw);
unsafe { NonZero::new_unchecked(n) }
}
#[inline]
#[must_use]
pub const fn version(self) -> Option<NonZero<u16>> {
NonZero::new(raw::version(self.raw))
}
pub const fn set_version(self, v: Option<NonZero<u16>>) -> Self {
let v = unsafe { transmute::<Option<NonZero<u16>>, u16>(v) };
Self {
raw: raw::set_version(self.raw, v),
}
}
#[inline]
#[must_use]
pub const fn clear_version(self) -> Self {
self.set_version(None)
}
#[must_use]
pub const fn formatted_len(self) -> usize {
#[inline]
const fn version_formatted_len(v: u16) -> usize {
if v == 0 {
return 0;
}
if v <= 9 {
return 2;
}
unsafe { (v.checked_ilog10().unwrap_unchecked() as usize).unchecked_add(2) }
}
let l_version = version_formatted_len(raw::version(self.raw));
const BODY_OFFSET_LUT: [u8; 35] = [
0, 7, 7, 7, 5, 7, 6, 7, 7, 6, 5, 7, 7, 1, 4, 7, 4, 5, 6, 5, 5, 3, 6, 6, 3, 6, 6, 7, 6, 7, 4, 4, 7, 7, 7, ];
let archive_raw = raw::archive(self.raw) as usize;
unsafe { std::hint::assert_unchecked(archive_raw <= 34) };
let l_body = BODY_OFFSET_LUT[archive_raw] as usize;
let new_style_offset = (self.years_since_epoch() > 23) as usize;
unsafe {
l_version
.unchecked_add(l_body)
.unchecked_add(new_style_offset)
.unchecked_add(9)
}
}
#[must_use]
pub const fn serialize(self) -> u64 {
self.raw
}
#[must_use]
pub const fn deserialize(raw: u64) -> Option<Self> {
let years_since_epoch = raw::years_since_epoch(raw);
let month = raw::month(raw);
let archive = raw::archive(raw);
let number = raw::number(raw);
if month == 0 || month > 12 {
return None;
}
if number == 0 {
return None;
}
if archive == 0 {
if !(16 <= years_since_epoch && years_since_epoch <= 116)
|| (years_since_epoch == 16 && month < 4)
|| (years_since_epoch == 116 && month >= 4)
{
return None;
}
let threshold = if years_since_epoch <= 23 {
10_000
} else {
100_000
};
if number >= threshold {
return None;
}
} else if archive <= 34 {
if (years_since_epoch > 16)
|| (years_since_epoch == 0 && month <= 7)
|| (years_since_epoch == 16 && month >= 4)
{
return None;
}
if number >= 1000 {
return None;
}
} else {
return None;
}
Some(Self { raw })
}
pub const unsafe fn deserialize_unchecked(raw: u64) -> Self {
Self { raw }
}
pub const SERIALIZED_BITMASK: u64 =
0b01111111_00001111_00111111_00000001_11111111_11111111_11111111_11111111;
}
impl FromStr for ArticleId {
type Err = IdError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s)
}
}
impl Display for ArticleId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(archive) = self.archive() {
f.write_str(archive.to_id())?;
f.write_str("/")?;
write!(
f,
"{:02}{:02}{:03}",
self.years_since_epoch().wrapping_add(91).rem_euclid(100),
self.month(),
self.number()
)?;
} else {
write!(
f,
"{:02}{:02}.",
self.years_since_epoch().wrapping_add(91).rem_euclid(100),
self.month(),
)?;
if self.years_since_epoch() <= 23 {
write!(f, "{:04}", self.number())?;
} else {
write!(f, "{:05}", self.number())?;
}
}
if let Some(version) = self.version() {
write!(f, "v{version}")?;
}
Ok(())
}
}
impl Debug for ArticleId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ArticleId")
.field("id", &format_args!("{}", self))
.field("raw", &self.raw)
.finish()
}
}
mod raw {
#[inline]
pub const fn years_since_epoch(raw: u64) -> u8 {
(raw >> 56) as u8
}
#[inline]
pub const fn month(raw: u64) -> u8 {
(raw >> 48) as u8
}
#[inline]
pub const fn archive(raw: u64) -> u8 {
(raw >> 40) as u8
}
#[inline]
pub const fn number(raw: u64) -> u32 {
((raw >> 16) as u32) & 0xFFFFFF
}
#[inline]
pub const fn version(raw: u64) -> u16 {
raw as u16
}
#[inline]
pub const fn set_version(raw: u64, v: u16) -> u64 {
(raw & 0xFFFF_FFFF_FFFF_0000) | (v as u64)
}
#[inline]
pub const fn is_new_style(raw: u64) -> bool {
const MASK: u64 = u64::from_be_bytes([0, 0, 0xFF, 0, 0, 0, 0, 0]);
raw & MASK == 0
}
}
#[derive(Debug, Clone)]
pub struct Validated<S> {
inner: S,
}
impl<S: AsRef<str>, T: AsRef<str>> PartialEq<Validated<T>> for Validated<S> {
fn eq(&self, other: &Validated<T>) -> bool {
let s_inner = self.inner.as_ref();
let other_inner = other.inner.as_ref();
let cases = unsafe {
(
split_subject_class_unchecked(s_inner),
split_subject_class_unchecked(other_inner),
)
};
match cases {
(None, None) => s_inner.eq(other_inner),
(None, Some((l, r))) => {
s_inner.get(0..l.len()).is_some_and(|v| v.eq(l))
&& s_inner.get(l.len()..).is_some_and(|v| v.eq(r))
}
(Some((l, r)), None) => {
other_inner.get(0..l.len()).is_some_and(|v| v.eq(l))
&& other_inner.get(l.len()..).is_some_and(|v| v.eq(r))
}
(Some((l, r)), Some((lp, rp))) => l.eq(lp) && r.eq(rp),
}
}
}
impl<S: AsRef<str>> Eq for Validated<S> {}
impl<S: AsRef<str>> Display for Validated<S> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = self.inner.as_ref();
match unsafe { split_subject_class_unchecked(s) } {
Some((l, r)) => {
f.write_str(l)?;
f.write_str(r)
}
None => f.write_str(s),
}
}
}
#[inline]
const unsafe fn split_subject_class_unchecked(s: &str) -> Option<(&str, &str)> {
let archive_len = match s.as_bytes() {
[_, _, b'.', ..] => 2,
[_, _, _, _, b'.', b'A'..=b'Z', ..] => 4,
[_, _, _, _, _, b'.', ..] => 5,
[_, _, _, _, _, _, b'.', ..] => 6,
[_, _, _, _, _, _, _, b'.', ..] => 7,
[_, _, _, _, _, _, _, _, b'.', ..] => 8,
_ => return None,
};
unsafe {
Some((
std::str::from_utf8_unchecked(s.as_bytes().split_at_unchecked(archive_len).0),
std::str::from_utf8_unchecked(s.as_bytes().split_at_unchecked(archive_len + 3).1),
))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ValidationError<S> {
pub invalid: S,
pub id_err: IdError,
}
impl<S> From<ValidationError<S>> for IdError {
fn from(value: ValidationError<S>) -> Self {
value.id_err
}
}
impl<S: AsRef<str>> Display for ValidationError<S> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"error parsing {}: {}",
self.invalid.as_ref(),
self.id_err
)
}
}
impl<S: AsRef<str> + Debug> Error for ValidationError<S> {}
impl<S: AsRef<str>> Validated<S> {
pub fn parse(s: S) -> Result<Self, ValidationError<S>> {
match validate(s.as_ref()) {
Ok(()) => Ok(Self { inner: s }),
Err(id_err) => Err(ValidationError { invalid: s, id_err }),
}
}
#[inline]
pub fn normalize(&self) -> Option<(&str, &str)> {
unsafe { split_subject_class_unchecked(self.inner.as_ref()) }
}
#[inline]
pub fn into_inner(self) -> S {
self.inner
}
}
impl<S: AsRef<str>> From<&Validated<S>> for ArticleId {
fn from(value: &Validated<S>) -> Self {
unsafe { ArticleId::parse(value.inner.as_ref()).unwrap_unchecked() }
}
}
impl FromStr for Validated<String> {
type Err = IdError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s.to_owned()).map_err(|err| err.id_err)
}
}
impl From<ArticleId> for Validated<String> {
fn from(value: ArticleId) -> Self {
Self {
inner: value.to_string(),
}
}
}
pub trait Identifier: private::Sealed {
fn write_identifier(&self, buffer: &mut String);
fn identifier(&self) -> Cow<'_, str> {
let mut buffer = String::with_capacity(MAX_ID_FORMATTED_LEN);
self.write_identifier(&mut buffer);
Cow::Owned(buffer)
}
}
impl Identifier for ArticleId {
fn write_identifier(&self, buffer: &mut String) {
use std::fmt::Write;
let _ = write!(buffer, "{self}");
}
}
impl<S: AsRef<str>> Identifier for Validated<S> {
fn identifier(&self) -> Cow<'_, str> {
match self.normalize() {
Some((l, r)) => {
let mut owned = String::with_capacity(l.len() + r.len());
owned.push_str(l);
owned.push_str(r);
Cow::Owned(owned)
}
None => Cow::Borrowed(self.inner.as_ref()),
}
}
fn write_identifier(&self, buffer: &mut String) {
match self.normalize() {
Some((l, r)) => {
buffer.push_str(l);
buffer.push_str(r);
}
None => buffer.push_str(self.inner.as_ref()),
}
}
}
#[cfg(feature = "serde")]
mod serialize {
use super::ArticleId;
use serde::{
Deserializer,
de::{Deserialize, Visitor},
};
#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
impl<'de> Deserialize<'de> for ArticleId {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
struct ArticleIdVisitor;
impl<'de> Visitor<'de> for ArticleIdVisitor {
type Value = ArticleId;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a str representing an arxiv identifier")
}
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
ArticleId::parse_bytes(v).map_err(E::custom)
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
ArticleId::parse(v).map_err(E::custom)
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
ArticleId::deserialize(v)
.ok_or_else(|| E::custom("invalid binary format for identifier"))
}
}
deserializer.deserialize_bytes(ArticleIdVisitor)
}
}
}
mod private {
use super::{ArticleId, Validated};
pub trait Sealed {}
impl Sealed for ArticleId {}
impl<S: AsRef<str>> Sealed for Validated<S> {}
}