use chrono::DurationRound;
use ipld_core::cid;
use langtag::{LanguageTag, LanguageTagBuf};
use regex::Regex;
use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error};
use std::{cmp, ops::Deref, str::FromStr, sync::OnceLock};
use super::LimitedU32;
fn s32_encode(mut i: u64) -> String {
const S32_CHAR: &[u8] = b"234567abcdefghijklmnopqrstuvwxyz";
let mut s = String::with_capacity(13);
for _ in 0..13 {
let c = i & 0x1F;
s.push(S32_CHAR[c as usize] as char);
i >>= 5;
}
s.chars().rev().collect()
}
macro_rules! string_newtype {
($name:ident) => {
impl FromStr for $name {
type Err = &'static str;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s.into())
}
}
impl<'de> Deserialize<'de> for $name {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let value = Deserialize::deserialize(deserializer)?;
Self::new(value).map_err(D::Error::custom)
}
}
impl From<$name> for String {
fn from(value: $name) -> Self {
value.0
}
}
impl AsRef<str> for $name {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl Deref for $name {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
};
}
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Hash)]
#[serde(untagged)]
pub enum AtIdentifier {
Did(Did),
Handle(Handle),
}
impl From<Did> for AtIdentifier {
fn from(did: Did) -> Self {
AtIdentifier::Did(did)
}
}
impl From<Handle> for AtIdentifier {
fn from(handle: Handle) -> Self {
AtIdentifier::Handle(handle)
}
}
impl FromStr for AtIdentifier {
type Err = &'static str;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Ok(did) = s.parse() {
Ok(AtIdentifier::Did(did))
} else {
s.parse().map(AtIdentifier::Handle)
}
}
}
impl From<AtIdentifier> for String {
fn from(value: AtIdentifier) -> Self {
match value {
AtIdentifier::Did(did) => did.into(),
AtIdentifier::Handle(handle) => handle.into(),
}
}
}
impl AsRef<str> for AtIdentifier {
fn as_ref(&self) -> &str {
match self {
AtIdentifier::Did(did) => did.as_ref(),
AtIdentifier::Handle(handle) => handle.as_ref(),
}
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Cid(cid::Cid);
impl Cid {
pub fn new(cid: cid::Cid) -> Self {
Self(cid)
}
}
impl FromStr for Cid {
type Err = cid::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
s.parse().map(Self)
}
}
impl<'de> Deserialize<'de> for Cid {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let value: String = Deserialize::deserialize(deserializer)?;
Self::from_str(&value).map_err(D::Error::custom)
}
}
impl Serialize for Cid {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&self.0.to_string())
}
}
impl AsRef<cid::Cid> for Cid {
fn as_ref(&self) -> &cid::Cid {
&self.0
}
}
#[derive(Clone, Debug, Eq)]
pub struct Datetime {
serialized: String,
dt: chrono::DateTime<chrono::FixedOffset>,
}
impl PartialEq for Datetime {
fn eq(&self, other: &Self) -> bool {
self.dt == other.dt
}
}
impl Ord for Datetime {
fn cmp(&self, other: &Self) -> cmp::Ordering {
self.dt.cmp(&other.dt)
}
}
impl PartialOrd for Datetime {
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Datetime {
pub fn now() -> Self {
Self::new(chrono::Utc::now().fixed_offset())
}
pub fn new(dt: chrono::DateTime<chrono::FixedOffset>) -> Self {
let dt = dt
.duration_round(chrono::Duration::microseconds(1))
.expect("delta does not exceed limits");
let serialized = dt.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
Self { serialized, dt }
}
#[inline]
#[must_use]
pub fn as_str(&self) -> &str {
self.serialized.as_str()
}
}
impl FromStr for Datetime {
type Err = chrono::ParseError;
#[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
fn from_str(s: &str) -> Result<Self, Self::Err> {
static RE_ISO_8601: OnceLock<Regex> = OnceLock::new();
if RE_ISO_8601
.get_or_init(|| Regex::new(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|(\+[0-9]{2}|\-[0-9][1-9]):[0-9]{2})$").unwrap())
.is_match(s)
{
let dt = chrono::DateTime::parse_from_rfc3339(s)?;
Ok(Self {
serialized: s.into(),
dt,
})
} else {
Err(chrono::DateTime::parse_from_rfc3339("invalid").expect_err("invalid"))
}
}
}
impl<'de> Deserialize<'de> for Datetime {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let value: String = Deserialize::deserialize(deserializer)?;
Self::from_str(&value).map_err(D::Error::custom)
}
}
impl Serialize for Datetime {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&self.serialized)
}
}
impl AsRef<chrono::DateTime<chrono::FixedOffset>> for Datetime {
fn as_ref(&self) -> &chrono::DateTime<chrono::FixedOffset> {
&self.dt
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
#[serde(transparent)]
pub struct Did(String);
string_newtype!(Did);
impl Did {
#[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
pub fn new(did: String) -> Result<Self, &'static str> {
static RE_DID: OnceLock<Regex> = OnceLock::new();
if did.len() > 2048 {
Err("DID too long")
} else if !RE_DID
.get_or_init(|| Regex::new(r"^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$").unwrap())
.is_match(&did)
{
Err("Invalid DID")
} else {
Ok(Self(did))
}
}
pub fn method(&self) -> &str {
&self.0[..4 + self.0[4..].find(':').unwrap()]
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
#[serde(transparent)]
pub struct Handle(String);
string_newtype!(Handle);
impl Handle {
#[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
pub fn new(handle: String) -> Result<Self, &'static str> {
static RE_HANDLE: OnceLock<Regex> = OnceLock::new();
if handle.len() > 253 {
Err("Handle too long")
} else if !RE_HANDLE
.get_or_init(|| Regex::new(r"^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$").unwrap())
.is_match(&handle)
{
Err("Invalid handle")
} else {
Ok(Self(handle))
}
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
#[serde(transparent)]
pub struct Nsid(String);
string_newtype!(Nsid);
impl Nsid {
#[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
pub fn new(nsid: String) -> Result<Self, &'static str> {
static RE_NSID: OnceLock<Regex> = OnceLock::new();
if nsid.len() > 317 {
Err("NSID too long")
} else if !RE_NSID
.get_or_init(|| Regex::new(r"^[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(\.[a-zA-Z][a-zA-Z0-9]{0,62})$").unwrap())
.is_match(&nsid)
{
Err("Invalid NSID")
} else {
Ok(Self(nsid))
}
}
pub fn domain_authority(&self) -> &str {
let split = self.0.rfind('.').expect("enforced by constructor");
&self.0[..split]
}
pub fn name(&self) -> &str {
let split = self.0.rfind('.').expect("enforced by constructor");
&self.0[split + 1..]
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Hash)]
#[serde(transparent)]
pub struct Language(LanguageTagBuf);
impl Language {
pub fn new(s: String) -> Result<Self, langtag::Error> {
LanguageTagBuf::new(s.into()).map(Self).map_err(|(e, _)| e)
}
#[inline]
pub fn as_ref(&self) -> LanguageTag<'_> {
self.0.as_ref()
}
}
impl FromStr for Language {
type Err = langtag::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s.into())
}
}
impl Serialize for Language {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.0.as_str())
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
#[serde(transparent)]
pub struct Tid(String);
string_newtype!(Tid);
impl Tid {
#[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
pub fn new(tid: String) -> Result<Self, &'static str> {
static RE_TID: OnceLock<Regex> = OnceLock::new();
if tid.len() != 13 {
Err("TID must be 13 characters")
} else if !RE_TID
.get_or_init(|| {
Regex::new(r"^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$").unwrap()
})
.is_match(&tid)
{
Err("Invalid TID")
} else {
Ok(Self(tid))
}
}
pub fn from_datetime(clkid: LimitedU32<1023>, time: chrono::DateTime<chrono::Utc>) -> Self {
let time = time.timestamp_micros() as u64;
let tid = (time << 10) & 0x7FFF_FFFF_FFFF_FC00 | (Into::<u32>::into(clkid) as u64 & 0x3FF);
Self(s32_encode(tid))
}
pub fn now(clkid: LimitedU32<1023>) -> Self {
Self::from_datetime(clkid, chrono::Utc::now())
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Hash)]
pub struct RecordKey(String);
string_newtype!(RecordKey);
impl RecordKey {
#[allow(clippy::borrow_interior_mutable_const, clippy::declare_interior_mutable_const)]
pub fn new(s: String) -> Result<Self, &'static str> {
static RE_RKEY: OnceLock<Regex> = OnceLock::new();
if [".", ".."].contains(&s.as_str()) {
Err("Disallowed rkey")
} else if !RE_RKEY
.get_or_init(|| Regex::new(r"^[a-zA-Z0-9.\-_:~]{1,512}$").unwrap())
.is_match(&s)
{
Err("Invalid rkey")
} else {
Ok(Self(s))
}
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
#[cfg(test)]
mod tests {
use serde_json::{from_str, to_string};
use super::*;
#[test]
fn valid_datetime() {
for valid in [
"1985-04-12T23:20:50.123Z",
"1985-04-12T23:20:50.123456Z",
"1985-04-12T23:20:50.120Z",
"1985-04-12T23:20:50.120000Z",
"1985-04-12T23:20:50.12345678912345Z",
"1985-04-12T23:20:50Z",
"1985-04-12T23:20:50.0Z",
"1985-04-12T23:20:50.123+00:00",
"1985-04-12T23:20:50.123-07:00",
] {
let json_valid = format!("\"{}\"", valid);
let res = from_str::<Datetime>(&json_valid);
assert!(res.is_ok(), "valid Datetime `{}` parsed as invalid", valid);
let dt = res.unwrap();
assert_eq!(to_string(&dt).unwrap(), json_valid);
}
}
#[test]
fn invalid_datetime() {
for invalid in [
"1985-04-12",
"1985-04-12T23:20Z",
"1985-04-12T23:20:5Z",
"1985-04-12T23:20:50.123",
"+001985-04-12T23:20:50.123Z",
"23:20:50.123Z",
"-1985-04-12T23:20:50.123Z",
"1985-4-12T23:20:50.123Z",
"01985-04-12T23:20:50.123Z",
"1985-04-12T23:20:50.123+00",
"1985-04-12T23:20:50.123+0000",
"1985-04-12t23:20:50.123Z",
"1985-04-12T23:20:50.123z",
"1985-04-12T23:20:50.123-00:00",
"1985-04-12 23:20:50.123Z",
"1985-04-12T23:20:50.123",
"1985-04-12T23:99:50.123Z",
"1985-00-12T23:20:50.123Z",
] {
assert!(
from_str::<Datetime>(&format!("\"{}\"", invalid)).is_err(),
"invalid Datetime `{}` parsed as valid",
invalid,
);
}
}
#[test]
fn datetime_round_trip() {
let dt = Datetime::now();
let encoded = to_string(&dt).unwrap();
assert_eq!(from_str::<Datetime>(&encoded).unwrap(), dt);
}
#[test]
fn valid_did() {
for valid in [
"did:plc:z72i7hdynmk6r22z27h6tvur",
"did:web:blueskyweb.xyz",
"did:method:val:two",
"did:m:v",
"did:method::::val",
"did:method:-:_:.",
"did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N",
] {
assert!(
from_str::<Did>(&format!("\"{}\"", valid)).is_ok(),
"valid DID `{}` parsed as invalid",
valid,
);
}
}
#[test]
fn invalid_did() {
for invalid in [
"did:METHOD:val",
"did:m123:val",
"DID:method:val",
"did:method:",
"did:method:val/two",
"did:method:val?two",
"did:method:val#two",
] {
assert!(
from_str::<Did>(&format!("\"{}\"", invalid)).is_err(),
"invalid DID `{}` parsed as valid",
invalid,
);
}
}
#[test]
fn did_method() {
for (method, did) in [
("did:plc", "did:plc:z72i7hdynmk6r22z27h6tvur"),
("did:web", "did:web:blueskyweb.xyz"),
("did:method", "did:method:val:two"),
("did:m", "did:m:v"),
("did:method", "did:method::::val"),
("did:method", "did:method:-:_:."),
("did:key", "did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N"),
] {
assert_eq!(Did::new(did.to_string()).unwrap().method(), method);
}
}
#[test]
fn valid_handle() {
for valid in [
"jay.bsky.social",
"8.cn",
"name.t--t", "XX.LCS.MIT.EDU",
"a.co",
"xn--notarealidn.com",
"xn--fiqa61au8b7zsevnm8ak20mc4a87e.xn--fiqs8s",
"xn--ls8h.test",
"example.t", "2gzyxa5ihm7nsggfxnu52rck2vv4rvmdlkiu3zzui5du4xyclen53wid.onion",
"laptop.local",
"blah.arpa",
] {
assert!(
from_str::<Handle>(&format!("\"{}\"", valid)).is_ok(),
"valid handle `{}` parsed as invalid",
valid,
);
}
}
#[test]
fn invalid_handle() {
for invalid in [
"jo@hn.test",
"💩.test",
"john..test",
"xn--bcher-.tld",
"john.0",
"cn.8",
"www.masełkowski.pl.com",
"org",
"name.org.",
] {
assert!(
from_str::<Handle>(&format!("\"{}\"", invalid)).is_err(),
"invalid handle `{}` parsed as valid",
invalid,
);
}
}
#[test]
fn valid_nsid() {
for valid in [
"com.example.fooBar",
"net.users.bob.ping",
"a-0.b-1.c",
"a.b.c",
"com.example.fooBarV2",
"cn.8.lex.stuff",
] {
assert!(
from_str::<Nsid>(&format!("\"{}\"", valid)).is_ok(),
"valid NSID `{}` parsed as invalid",
valid,
);
}
}
#[test]
fn invalid_nsid() {
for invalid in ["com.exa💩ple.thing", "com.example", "com.example.3"] {
assert!(
from_str::<Nsid>(&format!("\"{}\"", invalid)).is_err(),
"invalid NSID `{}` parsed as valid",
invalid,
);
}
}
#[test]
fn nsid_parts() {
for (nsid, domain_authority, name) in [
("com.example.fooBar", "com.example", "fooBar"),
("net.users.bob.ping", "net.users.bob", "ping"),
("a-0.b-1.c", "a-0.b-1", "c"),
("a.b.c", "a.b", "c"),
("cn.8.lex.stuff", "cn.8.lex", "stuff"),
] {
let nsid = Nsid::new(nsid.to_string()).unwrap();
assert_eq!(nsid.domain_authority(), domain_authority);
assert_eq!(nsid.name(), name);
}
}
#[test]
fn valid_language() {
for valid in [
"de", "fr", "ja", "i-enochian", "zh-Hant", "zh-Hans", "sr-Cyrl", "sr-Latn", "zh-cmn-Hans-CN", "cmn-Hans-CN", "zh-yue-HK", "yue-HK", "zh-Hans-CN", "sr-Latn-RS", "sl-rozaj", "sl-rozaj-biske", "sl-nedis", "de-CH-1901", "sl-IT-nedis", "hy-Latn-IT-arevela", "de-DE", "en-US", "es-419", "de-CH-x-phonebk",
"az-Arab-x-AZE-derbend",
"x-whatever", "qaa-Qaaa-QM-x-southern", "de-Qaaa", "sr-Latn-QM", "sr-Qaaa-RS", "en-US-u-islamcal",
"zh-CN-a-myext-x-private",
"en-a-myext-b-another",
"ar-a-aaa-b-bbb-a-ccc", ] {
let json_valid = format!("\"{}\"", valid);
let res = from_str::<Language>(&json_valid);
assert!(res.is_ok(), "valid language `{}` parsed as invalid", valid);
let dt = res.unwrap();
assert_eq!(to_string(&dt).unwrap(), json_valid);
}
}
#[test]
fn invalid_language() {
for invalid in [
"de-419-DE", "a-DE",
] {
assert!(
from_str::<Language>(&format!("\"{}\"", invalid)).is_err(),
"invalid language `{}` parsed as valid",
invalid,
);
}
}
#[test]
fn tid_encode() {
assert_eq!(s32_encode(0), "2222222222222");
assert_eq!(s32_encode(1), "2222222222223");
}
#[test]
fn tid_construct() {
let tid = Tid::from_datetime(
0.try_into().unwrap(),
chrono::DateTime::from_timestamp(1738430999, 0).unwrap(),
);
assert_eq!(tid.as_str(), "3lh5234mwy222");
}
#[test]
fn valid_tid() {
for valid in ["3jzfcijpj2z2a", "7777777777777", "3zzzzzzzzzzzz"] {
assert!(
from_str::<Tid>(&format!("\"{}\"", valid)).is_ok(),
"valid TID `{}` parsed as invalid",
valid,
);
}
}
#[test]
fn invalid_tid() {
for invalid in [
"3jzfcijpj2z21",
"0000000000000",
"3jzfcijpj2z2aa",
"3jzfcijpj2z2",
"3jzf-cij-pj2z-2a",
"zzzzzzzzzzzzz",
"kjzfcijpj2z2a",
] {
assert!(
from_str::<Tid>(&format!("\"{}\"", invalid)).is_err(),
"invalid TID `{}` parsed as valid",
invalid,
);
}
}
#[test]
fn valid_rkey() {
for valid in [
"3jui7kd54zh2y",
"self",
"literal:self",
"example.com",
"~1.2-3_",
"dHJ1ZQ",
"pre:fix",
"_",
] {
assert!(
from_str::<RecordKey>(&format!("\"{}\"", valid)).is_ok(),
"valid rkey `{}` parsed as invalid",
valid,
);
}
}
#[test]
fn invalid_rkey() {
for invalid in [
"alpha/beta",
".",
"..",
"#extra",
"@handle",
"any space",
"any+space",
"number[3]",
"number(3)",
"\"quote\"",
"dHJ1ZQ==",
] {
assert!(
from_str::<RecordKey>(&format!("\"{}\"", invalid)).is_err(),
"invalid rkey `{}` parsed as valid",
invalid,
);
}
}
}