pub mod docs {
#![allow(rustdoc::invalid_rust_codeblocks)]
#![doc = include_str!("../README.md")]
}
use std::{
cmp::Ordering,
fmt,
str::{FromStr, Utf8Error},
sync::OnceLock,
};
use compact_str::format_compact;
use percent_encoding::{percent_decode, percent_encode};
use thiserror::Error;
use url::Url;
pub mod facet;
pub use self::facet::{CompactFacet, Facet, StdFacet};
pub mod label;
pub use self::label::{CompactLabel, Label, StdLabel};
pub mod props;
pub use self::props::{CompactName, CompactProperty, Name, Property, StdName, Value};
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct Tag<F, L, N, V> {
pub label: L,
pub facet: F,
pub props: Vec<Property<N, V>>,
}
impl<F, L, N, V> Tag<F, L, N, V>
where
F: Facet,
L: Label,
N: Name,
{
#[must_use]
pub fn has_label(&self) -> bool {
debug_assert!(self.label.is_valid());
!self.label.as_ref().is_empty()
}
#[must_use]
pub fn label(&self) -> &L {
debug_assert!(self.label.is_valid());
&self.label
}
#[must_use]
pub fn has_facet(&self) -> bool {
debug_assert!(self.facet.is_valid());
!self.facet.as_ref().is_empty()
}
#[must_use]
pub fn facet(&self) -> &F {
debug_assert!(self.facet.is_valid());
&self.facet
}
#[must_use]
pub fn has_props(&self) -> bool {
!self.props().is_empty()
}
#[must_use]
pub fn props(&self) -> &[Property<N, V>] {
debug_assert!(self.props.iter().all(Property::is_valid));
&self.props
}
#[must_use]
pub fn is_valid(&self) -> bool {
self.has_label()
|| (self.has_facet() && (self.has_props() || self.facet().has_date_like_suffix()))
}
}
mod encoding {
use percent_encoding::{AsciiSet, CONTROLS};
const CONTROLS_ESCAPE: &AsciiSet = &CONTROLS.add(b'%');
const FRAGMENT: &AsciiSet = &CONTROLS_ESCAPE
.add(b' ')
.add(b'"')
.add(b'<')
.add(b'>')
.add(b'`');
pub(super) const LABEL: &AsciiSet = FRAGMENT;
const QUERY: &AsciiSet = &CONTROLS_ESCAPE
.add(b' ')
.add(b'"')
.add(b'<')
.add(b'>')
.add(b'#');
pub(super) const PROPS: &AsciiSet = &QUERY.add(b'&').add(b'=');
const PATH: &AsciiSet = &QUERY.add(b'`').add(b'?').add(b'{').add(b'}');
pub(super) const FACET: &AsciiSet = PATH;
}
impl<F, L, N, V> Tag<F, L, N, V>
where
F: Facet,
L: Label,
N: Name,
V: AsRef<str>,
{
pub fn encode_into<W: fmt::Write>(&self, write: &mut W) -> fmt::Result {
debug_assert!(self.is_valid());
let encoded_label = percent_encode(self.label().as_ref().as_bytes(), encoding::LABEL);
let encoded_facet = percent_encode(self.facet().as_ref().as_bytes(), encoding::FACET);
if !self.has_props() {
#[allow(clippy::redundant_else)]
if self.has_label() {
return write.write_fmt(format_args!("{encoded_facet}#{encoded_label}"));
} else {
return write.write_fmt(format_args!("{encoded_facet}"));
}
}
let encoded_props_iter = self.props().iter().map(|Property { name, value }| {
let encoded_name = percent_encode(name.as_ref().as_bytes(), encoding::PROPS);
let encoded_value = percent_encode(value.as_ref().as_bytes(), encoding::PROPS);
format_compact!("{encoded_name}={encoded_value}")
});
let encoded_props = itertools::join(encoded_props_iter, "&");
if self.has_label() {
write.write_fmt(format_args!(
"{encoded_facet}?{encoded_props}#{encoded_label}"
))
} else {
write.write_fmt(format_args!("{encoded_facet}?{encoded_props}"))
}
}
#[must_use]
pub fn encode(&self) -> String {
self.to_string()
}
}
impl<F, L, N, V> fmt::Display for Tag<F, L, N, V>
where
F: Facet,
L: Label,
N: Name,
V: AsRef<str>,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.encode_into(f)
}
}
#[derive(Debug, Error)]
pub enum DecodeError {
#[error("invalid")]
InvalidTag,
#[error(transparent)]
Parse(#[from] anyhow::Error),
}
impl From<Utf8Error> for DecodeError {
fn from(from: Utf8Error) -> Self {
anyhow::Error::from(from).into()
}
}
impl From<url::ParseError> for DecodeError {
fn from(from: url::ParseError) -> Self {
anyhow::Error::from(from).into()
}
}
static DUMMY_BASE_URL_WITH_ABSOLUTE_PATH: OnceLock<Url> = OnceLock::new();
fn dummy_base_url() -> &'static Url {
DUMMY_BASE_URL_WITH_ABSOLUTE_PATH.get_or_init(|| {
"dummy:///".parse().unwrap()
})
}
impl<F, L, N, V> Tag<F, L, N, V>
where
F: Facet,
L: Label,
N: Name,
V: Value,
{
pub fn decode_str(encoded: &str) -> Result<Self, DecodeError> {
let encoded_trimmed = encoded.trim();
if encoded_trimmed != encoded {
return Err(anyhow::anyhow!("leading/trailing whitespace in encoded input").into());
}
if encoded_trimmed.is_empty() {
return Err(anyhow::anyhow!("empty encoded input").into());
}
if encoded_trimmed.as_bytes().first() == Some(&b'/') {
return Err(anyhow::anyhow!("encoded input starts with leading slash `/`").into());
}
let parse_options = Url::options().base_url(Some(dummy_base_url()));
let url: Url = parse_options.parse(encoded)?;
if url.scheme() != dummy_base_url().scheme() || url.has_host() || !url.username().is_empty()
{
return Err(anyhow::anyhow!("invalid encoded input").into());
}
let fragment = url.fragment().unwrap_or_default();
debug_assert_eq!(fragment.trim(), fragment);
let label_encoded = fragment.as_bytes();
let label = percent_decode(label_encoded).decode_utf8()?;
if !label::is_valid(&label) {
return Err(anyhow::anyhow!("invalid label '{label}'").into());
}
let path = url.path();
debug_assert!(!path.is_empty());
debug_assert_eq!(path.trim(), path);
debug_assert_eq!(path.as_bytes()[0], b'/');
let facet_encoded = &url.path().as_bytes()[1..];
let facet = percent_decode(facet_encoded).decode_utf8()?;
if !facet::is_valid(&facet) {
return Err(anyhow::anyhow!("invalid facet '{facet}'").into());
}
if facet::has_invalid_date_like_suffix(&facet) {
return Err(anyhow::anyhow!("facet with invalid date-like suffix '{facet}'").into());
}
let mut props = vec![];
let query = url.query().unwrap_or_default();
debug_assert_eq!(query.trim(), query);
if !query.is_empty() {
let query_encoded = query.as_bytes();
for name_value_encoded in query_encoded.split(|b| *b == b'&') {
let mut name_value_encoded_split = name_value_encoded.split(|b| *b == b'=');
let Some(name_encoded) = name_value_encoded_split.next() else {
return Err(anyhow::anyhow!("missing property name").into());
};
let value_encoded = name_value_encoded_split.next().unwrap_or_default();
if name_value_encoded_split.next().is_some() {
return Err(anyhow::anyhow!(
"malformed name=value property '{name_value}'",
name_value = percent_decode(name_value_encoded)
.decode_utf8()
.unwrap_or_default()
)
.into());
}
let name = percent_decode(name_encoded).decode_utf8()?;
if !props::is_name_valid(&name) {
return Err(anyhow::anyhow!("invalid property name '{name}'").into());
}
let value = percent_decode(value_encoded).decode_utf8()?;
let prop = Property {
name: Name::from_cow_str(name),
value: Value::from_cow_str(value),
};
props.push(prop);
}
}
let tag = Self {
label: <L as Label>::from_cow_str(label),
facet: <F as Facet>::from_cow_str(facet),
props,
};
if !tag.is_valid() {
return Err(DecodeError::InvalidTag);
}
Ok(tag)
}
}
impl<F, L, N, V> FromStr for Tag<F, L, N, V>
where
F: Facet,
L: Label,
N: Name,
V: Value,
{
type Err = DecodeError;
fn from_str(input: &str) -> Result<Self, Self::Err> {
Tag::decode_str(input.trim())
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DecodedTags<F, L, N, V> {
pub tags: Vec<Tag<F, L, N, V>>,
pub undecoded_prefix: String,
}
const JOIN_ENCODED_TOKENS_CHAR: char = ' ';
impl<F, L, N, V> DecodedTags<F, L, N, V>
where
F: Facet,
L: Label,
N: Name,
V: Value,
{
#[must_use]
pub fn decode_str(encoded: &str) -> Self {
let mut undecoded_prefix = encoded;
let mut tags = vec![];
while !undecoded_prefix.is_empty() {
let remainder = undecoded_prefix.trim_end();
if remainder.is_empty() {
break;
}
let (next_remainder, next_token) =
if let Some((i, _)) = remainder.rmatch_indices(char::is_whitespace).next() {
debug_assert!(i < remainder.len());
(&remainder[..=i], &remainder[i + 1..])
} else {
("", remainder)
};
debug_assert!(!next_token.is_empty());
debug_assert_eq!(next_token.trim(), next_token);
if let Ok(tag) = Tag::decode_str(next_token) {
tags.push(tag);
undecoded_prefix = next_remainder;
} else {
break;
}
}
tags.reverse();
if undecoded_prefix.trim().is_empty() {
undecoded_prefix = "";
}
Self {
tags,
undecoded_prefix: undecoded_prefix.to_owned(),
}
}
pub fn encode_into<W: fmt::Write>(&self, write: &mut W) -> fmt::Result {
write.write_str(&self.undecoded_prefix)?;
let mut append_separator = !self.undecoded_prefix.is_empty()
&& self.undecoded_prefix.trim_end() == self.undecoded_prefix;
for tag in &self.tags {
if append_separator {
write.write_char(JOIN_ENCODED_TOKENS_CHAR)?;
}
tag.encode_into(write)?;
append_separator = true;
}
Ok(())
}
pub fn reencode(self) -> Result<String, fmt::Error> {
let mut reencoded = self.undecoded_prefix;
let mut append_separator = !reencoded.is_empty() && reencoded.trim_end() == reencoded;
for tag in &self.tags {
if append_separator {
reencoded.push(JOIN_ENCODED_TOKENS_CHAR);
}
tag.encode_into(&mut reencoded)?;
append_separator = true;
}
Ok(reencoded)
}
#[allow(clippy::missing_panics_doc)]
pub fn reorder_and_dedup(&mut self) {
self.tags.sort_by(|lhs, rhs| {
if rhs.facet().has_date_like_suffix() {
if lhs.facet().has_date_like_suffix() {
let (_, lhs_suffix) = lhs
.facet()
.try_split_into_prefix_and_date_like_suffix()
.unwrap();
let (_, rhs_suffix) = rhs
.facet()
.try_split_into_prefix_and_date_like_suffix()
.unwrap();
let ordering = rhs_suffix.cmp(lhs_suffix);
if ordering != Ordering::Equal {
return ordering;
}
} else {
return Ordering::Less;
}
} else if lhs.facet().has_date_like_suffix() {
return Ordering::Greater;
}
if rhs.has_facet() {
if lhs.has_facet() {
let ordering = lhs.facet().cmp(rhs.facet());
if ordering != Ordering::Equal {
return ordering;
}
} else {
return Ordering::Less;
}
} else if lhs.has_facet() {
return Ordering::Greater;
}
debug_assert_eq!(lhs.facet(), rhs.facet());
debug_assert_eq!(lhs.facet(), rhs.facet());
if rhs.has_label() {
if lhs.has_label() {
lhs.label().cmp(rhs.label())
} else {
Ordering::Greater
}
} else if lhs.has_label() {
Ordering::Less
} else {
Ordering::Equal
}
});
self.tags.dedup();
}
}
#[cfg(test)]
mod tests;