gigtag/
lib.rs

1// SPDX-FileCopyrightText: The gigtag authors
2// SPDX-License-Identifier: MPL-2.0
3
4//! A lightweight, textual tagging system aimed at DJs for managing custom metadata.
5//!
6//! Refer to [`docs`] for more information about the idea and the specification.
7
8pub mod docs;
9
10use std::{borrow::Cow, cmp::Ordering, fmt, str::FromStr, sync::OnceLock};
11
12use anyhow::anyhow;
13use derive_more::{Display, Error};
14use percent_encoding::{percent_decode, percent_encode};
15use url::Url;
16
17pub mod facet;
18pub use self::facet::Facet;
19
20pub mod label;
21pub use self::label::Label;
22
23pub mod props;
24pub use self::props::{Name, Property, Value};
25
26pub trait StringTyped: Sized + AsRef<str> + fmt::Debug + fmt::Display {
27    fn from_str(from_str: &str) -> Self;
28
29    fn from_cow_str(from_cow: Cow<'_, str>) -> Self;
30
31    fn from_format_args(from_format_args: fmt::Arguments<'_>) -> Self;
32
33    fn as_str(&self) -> &str;
34}
35
36impl StringTyped for String {
37    fn from_str(from_str: &str) -> Self {
38        from_str.to_owned()
39    }
40
41    fn from_cow_str(from_cow: Cow<'_, str>) -> Self {
42        from_cow.into_owned()
43    }
44
45    fn from_format_args(from_format_args: fmt::Arguments<'_>) -> Self {
46        std::fmt::format(from_format_args)
47    }
48
49    fn as_str(&self) -> &str {
50        self.as_str()
51    }
52}
53
54#[cfg(feature = "compact_str")]
55pub use compact_str;
56
57#[cfg(feature = "compact_str")]
58impl StringTyped for crate::compact_str::CompactString {
59    fn from_str(from_str: &str) -> Self {
60        from_str.into()
61    }
62
63    fn from_cow_str(from_cow: Cow<'_, str>) -> Self {
64        from_cow.into()
65    }
66
67    fn from_format_args(from_format_args: fmt::Arguments<'_>) -> Self {
68        // Copied from implementation of format_compact!();
69        crate::compact_str::ToCompactString::to_compact_string(&from_format_args)
70    }
71
72    fn as_str(&self) -> &str {
73        self.as_str()
74    }
75}
76
77#[cfg(feature = "smol_str")]
78pub use smol_str;
79
80#[cfg(feature = "smol_str")]
81impl StringTyped for crate::smol_str::SmolStr {
82    fn from_str(from_str: &str) -> Self {
83        from_str.into()
84    }
85
86    fn from_cow_str(from_cow: Cow<'_, str>) -> Self {
87        from_cow.into()
88    }
89
90    fn from_format_args(from_format_args: fmt::Arguments<'_>) -> Self {
91        // Copied from implementation of format_smolstr!();
92        let mut w = crate::smol_str::SmolStrBuilder::new();
93        ::core::fmt::Write::write_fmt(&mut w, from_format_args)
94            .expect("a formatting trait implementation returned an error");
95        w.finish()
96    }
97
98    fn as_str(&self) -> &str {
99        self.as_str()
100    }
101}
102
103#[derive(Debug, Clone, Default, PartialEq, Eq)]
104/// A tag
105pub struct Tag<F, L, N, V> {
106    /// The label
107    pub label: L,
108
109    /// The facet
110    pub facet: F,
111
112    /// The properties
113    pub props: Vec<Property<N, V>>,
114}
115
116impl<F, L, N, V> Tag<F, L, N, V>
117where
118    F: Facet,
119    L: Label,
120    N: Name,
121{
122    /// Check for a non-empty label.
123    #[must_use]
124    pub fn has_label(&self) -> bool {
125        debug_assert!(self.label.is_valid());
126        !self.label.is_empty()
127    }
128
129    /// Return the empty or valid label.
130    #[must_use]
131    pub fn label(&self) -> &L {
132        debug_assert!(self.label.is_valid());
133        &self.label
134    }
135
136    /// Check for a non-empty facet.
137    #[must_use]
138    pub fn has_facet(&self) -> bool {
139        debug_assert!(self.facet.is_valid());
140        !self.facet.is_empty()
141    }
142
143    /// Return the empty or valid facet.
144    #[must_use]
145    pub fn facet(&self) -> &F {
146        debug_assert!(self.facet.is_valid());
147        &self.facet
148    }
149
150    /// Check for non-empty properties.
151    #[must_use]
152    pub fn has_props(&self) -> bool {
153        !self.props().is_empty()
154    }
155
156    /// Return the properties.
157    #[must_use]
158    pub fn props(&self) -> &[Property<N, V>] {
159        debug_assert!(self.props.iter().all(Property::is_valid));
160        &self.props
161    }
162
163    /// Check if the tag is valid.
164    #[must_use]
165    pub fn is_valid(&self) -> bool {
166        self.has_label()
167            || (self.has_facet() && (self.has_props() || self.facet().has_date_like_suffix()))
168    }
169}
170
171mod encoding {
172    use percent_encoding::{AsciiSet, CONTROLS};
173
174    const CONTROLS_ESCAPE: &AsciiSet = &CONTROLS.add(b'%');
175
176    /// <https://url.spec.whatwg.org/#fragment-percent-encode-set>
177    const FRAGMENT: &AsciiSet = &CONTROLS_ESCAPE
178        .add(b' ')
179        .add(b'"')
180        .add(b'<')
181        .add(b'>')
182        .add(b'`');
183
184    pub(super) const LABEL: &AsciiSet = FRAGMENT;
185
186    /// <https://url.spec.whatwg.org/#query-percent-encode-set>
187    const QUERY: &AsciiSet = &CONTROLS_ESCAPE
188        .add(b' ')
189        .add(b'"')
190        .add(b'<')
191        .add(b'>')
192        .add(b'#');
193
194    pub(super) const PROPS: &AsciiSet = &QUERY.add(b'&').add(b'=');
195
196    /// <https://url.spec.whatwg.org/#path-percent-encode-set>
197    const PATH: &AsciiSet = &QUERY.add(b'`').add(b'?').add(b'{').add(b'}');
198
199    pub(super) const FACET: &AsciiSet = PATH;
200}
201
202impl<F, L, N, V> Tag<F, L, N, V>
203where
204    F: Facet,
205    L: Label,
206    N: Name,
207    V: Value,
208{
209    /// Encode a tag as a string.
210    ///
211    /// The tag must be valid.
212    ///
213    /// # Errors
214    ///
215    /// Returns an [`fmt::Error`] if writing into the buffer fails.
216    pub fn encode_into<W: fmt::Write>(&self, write: &mut W) -> fmt::Result {
217        debug_assert!(self.is_valid());
218        let encoded_label = percent_encode(self.label().as_str().as_bytes(), encoding::LABEL);
219        let encoded_facet = percent_encode(self.facet().as_str().as_bytes(), encoding::FACET);
220        if !self.has_props() {
221            #[expect(clippy::redundant_else)]
222            if self.has_label() {
223                return write.write_fmt(format_args!("{encoded_facet}#{encoded_label}"));
224            } else {
225                return write.write_fmt(format_args!("{encoded_facet}"));
226            }
227        }
228        let encoded_props_iter = self.props().iter().map(|Property { name, value }| {
229            let encoded_name = percent_encode(name.as_str().as_bytes(), encoding::PROPS);
230            let encoded_value = percent_encode(value.as_ref().as_bytes(), encoding::PROPS);
231            <V as StringTyped>::from_format_args(format_args!("{encoded_name}={encoded_value}"))
232        });
233        let encoded_props = itertools::join(encoded_props_iter, "&");
234        if self.has_label() {
235            write.write_fmt(format_args!(
236                "{encoded_facet}?{encoded_props}#{encoded_label}"
237            ))
238        } else {
239            write.write_fmt(format_args!("{encoded_facet}?{encoded_props}"))
240        }
241    }
242
243    /// Encode a tag as a string.
244    ///
245    /// The tag must be valid.
246    #[must_use]
247    pub fn encode(&self) -> String {
248        self.to_string()
249    }
250}
251
252impl<F, L, N, V> fmt::Display for Tag<F, L, N, V>
253where
254    F: Facet,
255    L: Label,
256    N: Name,
257    V: Value,
258{
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        self.encode_into(f)
261    }
262}
263
264/// A decoding error
265#[derive(Debug, Display, Error)]
266pub enum DecodeError {
267    /// Invalid tag.
268    #[display("invalid")]
269    InvalidTag,
270
271    /// Parse error.
272    Parse(anyhow::Error),
273}
274
275static DUMMY_BASE_URL_WITH_ABSOLUTE_PATH: OnceLock<Url> = OnceLock::new();
276
277fn dummy_base_url() -> &'static Url {
278    DUMMY_BASE_URL_WITH_ABSOLUTE_PATH.get_or_init(|| {
279        // Workaround to prevent RelativeUrlWithoutBase errors
280        // when parsing relative URLs. The leading slash has to
281        // be skipped in the resulting path.
282        "dummy:///".parse().unwrap()
283    })
284}
285
286impl<F, L, N, V> Tag<F, L, N, V>
287where
288    F: Facet,
289    L: Label,
290    N: Name,
291    V: Value,
292{
293    /// Decode a tag from an encoded token.
294    ///
295    /// The `encoded` input must not contain any leading/trailing whitespace.
296    /// The caller is responsible to ensure that no leading/trailing whitespace
297    /// is present if decoding should not fail because of this. Separating
298    /// whitespace between tokens should already be discarded when tokenizing
299    /// the input text.
300    ///
301    /// # Errors
302    ///
303    /// Returns a [`DecodeError`] if the encoded input cannot be decoded as a valid tag.
304    pub fn decode_str(encoded: &str) -> Result<Self, DecodeError> {
305        let encoded_trimmed = encoded.trim();
306        if encoded_trimmed != encoded {
307            return Err(DecodeError::Parse(anyhow!(
308                "leading/trailing whitespace in encoded input"
309            )));
310        }
311        if encoded_trimmed.is_empty() {
312            return Err(DecodeError::Parse(anyhow!("empty encoded input")));
313        }
314        if encoded_trimmed.as_bytes().first() == Some(&b'/') {
315            return Err(DecodeError::Parse(anyhow!(
316                "encoded input starts with leading slash `/`"
317            )));
318        }
319        let parse_options = Url::options().base_url(Some(dummy_base_url()));
320        let url: Url = parse_options
321            .parse(encoded)
322            .map_err(Into::into)
323            .map_err(DecodeError::Parse)?;
324        if url.scheme() != dummy_base_url().scheme() || url.has_host() || !url.username().is_empty()
325        {
326            return Err(DecodeError::Parse(anyhow!("invalid encoded input")));
327        }
328        let fragment = url.fragment().unwrap_or_default();
329        debug_assert_eq!(fragment.trim(), fragment);
330        let label_encoded = fragment.as_bytes();
331        let label = percent_decode(label_encoded)
332            .decode_utf8()
333            .map_err(Into::into)
334            .map_err(DecodeError::Parse)?;
335        if !label::is_valid(&label) {
336            return Err(DecodeError::Parse(anyhow!("invalid label '{label}'")));
337        }
338        // The leading slash in the path from the dummy base URL needs to be skipped.
339        let path = url.path();
340        debug_assert!(!path.is_empty());
341        debug_assert_eq!(path.trim(), path);
342        debug_assert_eq!(path.as_bytes()[0], b'/');
343        let facet_encoded = &url.path().as_bytes()[1..];
344        let facet = percent_decode(facet_encoded)
345            .decode_utf8()
346            .map_err(Into::into)
347            .map_err(DecodeError::Parse)?;
348        if !facet::is_valid(&facet) {
349            return Err(DecodeError::Parse(anyhow!("invalid facet '{facet}'")));
350        }
351        if facet::has_invalid_date_like_suffix(&facet) {
352            return Err(DecodeError::Parse(anyhow!(
353                "facet with invalid date-like suffix '{facet}'"
354            )));
355        }
356        let mut props = vec![];
357        let query = url.query().unwrap_or_default();
358        debug_assert_eq!(query.trim(), query);
359        if !query.is_empty() {
360            let query_encoded = query.as_bytes();
361            for name_value_encoded in query_encoded.split(|b| *b == b'&') {
362                let mut name_value_encoded_split = name_value_encoded.split(|b| *b == b'=');
363                let Some(name_encoded) = name_value_encoded_split.next() else {
364                    return Err(DecodeError::Parse(anyhow!("missing property name")));
365                };
366                let value_encoded = name_value_encoded_split.next().unwrap_or_default();
367                if name_value_encoded_split.next().is_some() {
368                    return Err(DecodeError::Parse(anyhow!(
369                        "malformed name=value property '{name_value}'",
370                        name_value = percent_decode(name_value_encoded)
371                            .decode_utf8()
372                            .unwrap_or_default()
373                    )));
374                }
375                let name = percent_decode(name_encoded)
376                    .decode_utf8()
377                    .map_err(Into::into)
378                    .map_err(DecodeError::Parse)?;
379                if !props::is_name_valid(&name) {
380                    return Err(DecodeError::Parse(anyhow!(
381                        "invalid property name '{name}'"
382                    )));
383                }
384                let value = percent_decode(value_encoded)
385                    .decode_utf8()
386                    .map_err(Into::into)
387                    .map_err(DecodeError::Parse)?;
388                let prop = Property {
389                    name: <N as StringTyped>::from_cow_str(name),
390                    value: <V as StringTyped>::from_cow_str(value),
391                };
392                props.push(prop);
393            }
394        }
395        let tag = Self {
396            label: <L as StringTyped>::from_cow_str(label),
397            facet: <F as StringTyped>::from_cow_str(facet),
398            props,
399        };
400        if !tag.is_valid() {
401            return Err(DecodeError::InvalidTag);
402        }
403        Ok(tag)
404    }
405}
406
407impl<F, L, N, V> FromStr for Tag<F, L, N, V>
408where
409    F: Facet,
410    L: Label,
411    N: Name,
412    V: Value,
413{
414    type Err = DecodeError;
415
416    fn from_str(input: &str) -> Result<Self, Self::Err> {
417        // This implementation permits leading/trailing whitespace,
418        // other than `Tag::decode_str()` which is more strict.
419        Tag::decode_str(input.trim())
420    }
421}
422
423/// Tags decoded from a text field
424#[derive(Debug, Clone, PartialEq, Eq)]
425pub struct DecodedTags<F, L, N, V> {
426    /// Valid, decoded tags
427    pub tags: Vec<Tag<F, L, N, V>>,
428
429    /// The remaining, undecoded prefix.
430    pub undecoded_prefix: String,
431}
432
433const JOIN_ENCODED_TOKENS_CHAR: char = ' ';
434
435impl<F, L, N, V> DecodedTags<F, L, N, V>
436where
437    F: Facet,
438    L: Label,
439    N: Name,
440    V: Value,
441{
442    /// Decode from a string slice.
443    #[must_use]
444    pub fn decode_str(encoded: &str) -> Self {
445        let mut undecoded_prefix = encoded;
446        let mut tags = vec![];
447        while !undecoded_prefix.is_empty() {
448            // Skip trailing whitespace, but stop at the first newline character.
449            let remainder =
450                undecoded_prefix.trim_end_matches(|c: char| c != '\n' && c.is_whitespace());
451            if remainder.is_empty() || remainder.ends_with('\n') {
452                break;
453            }
454            let (next_remainder, next_token) =
455                if let Some((i, _)) = remainder.rmatch_indices(char::is_whitespace).next() {
456                    debug_assert!(i < remainder.len());
457                    // Next token might be preceded by whitespace
458                    (&remainder[..=i], &remainder[i + 1..])
459                } else {
460                    // First token without leading whitespace
461                    ("", remainder)
462                };
463            debug_assert!(!next_token.is_empty());
464            debug_assert_eq!(next_token.trim(), next_token);
465            if let Ok(tag) = Tag::decode_str(next_token) {
466                tags.push(tag);
467                undecoded_prefix = next_remainder;
468            } else {
469                break;
470            }
471        }
472        tags.reverse();
473        if undecoded_prefix.trim().is_empty() {
474            // Discard any preceding whitespace if all tokens have been decoded as tags
475            undecoded_prefix = "";
476        }
477        Self {
478            tags,
479            undecoded_prefix: undecoded_prefix.to_owned(),
480        }
481    }
482
483    /// Encode the contents into a separate buffer.
484    ///
485    /// Adds a space character before the first encoded tag, if the
486    /// `undecodedPrefix` is not empty and does not end with a
487    /// whitespace character.
488    ///
489    /// # Errors
490    ///
491    /// Returns an [`fmt::Error`] if writing into the buffer fails.
492    pub fn encode_into<W: fmt::Write>(&self, write: &mut W) -> fmt::Result {
493        write.write_str(&self.undecoded_prefix)?;
494        // Append a separator before the first encoded tag of the undecoded prefix
495        // is not empty and does not end with a whitespace.
496        let mut append_separator = !self.undecoded_prefix.is_empty()
497            && self.undecoded_prefix.trim_end() == self.undecoded_prefix;
498        for tag in &self.tags {
499            if append_separator {
500                write.write_char(JOIN_ENCODED_TOKENS_CHAR)?;
501            }
502            tag.encode_into(write)?;
503            append_separator = true;
504        }
505        Ok(())
506    }
507
508    /// Re-encode the contents.
509    ///
510    /// # Errors
511    ///
512    /// Returns an [`fmt::Error`] if writing into the buffer fails.
513    pub fn reencode(self) -> Result<String, fmt::Error> {
514        let mut reencoded = self.undecoded_prefix;
515        // Append a separated before the first encoded tag of the undecoded prefix
516        // is not empty and does not end with a whitespace.
517        let mut append_separator = !reencoded.is_empty() && reencoded.trim_end() == reencoded;
518        for tag in &self.tags {
519            if append_separator {
520                reencoded.push(JOIN_ENCODED_TOKENS_CHAR);
521            }
522            tag.encode_into(&mut reencoded)?;
523            append_separator = true;
524        }
525        Ok(reencoded)
526    }
527
528    /// Reorder and deduplicate tags.
529    ///
530    /// Canonical ordering:
531    ///   1. Tags without a facet
532    ///   2. Tags with a non-date-like facet
533    ///   3. Tags with a date-like facet (by descending suffix)
534    ///
535    /// Within each group tags are sorted by facet, then by label. For tags with
536    /// equal facets those with a label are sorted before those without a label.
537    ///
538    /// Tags with a date-like facet are sorted in descending order by their
539    /// date-like suffix, i.e. newer dates are sorted before older dates.
540    #[expect(clippy::missing_panics_doc)]
541    pub fn reorder_and_dedup(&mut self) {
542        self.tags.sort_by(|lhs, rhs| {
543            if rhs.facet().has_date_like_suffix() {
544                if lhs.facet().has_date_like_suffix() {
545                    // Using unwrap() is safe after we already checked that
546                    // the contents of both facets match the date-like format.
547                    let (_, lhs_suffix) = lhs
548                        .facet()
549                        .try_split_into_prefix_and_date_like_suffix()
550                        .unwrap();
551                    let (_, rhs_suffix) = rhs
552                        .facet()
553                        .try_split_into_prefix_and_date_like_suffix()
554                        .unwrap();
555                    // Descending order by decimal digits encoded as ASCII chars
556                    let ordering = rhs_suffix.cmp(lhs_suffix);
557                    if ordering != Ordering::Equal {
558                        return ordering;
559                    }
560                } else {
561                    return Ordering::Less;
562                }
563            } else if lhs.facet().has_date_like_suffix() {
564                return Ordering::Greater;
565            }
566            if rhs.has_facet() {
567                if lhs.has_facet() {
568                    let ordering = lhs.facet().cmp(rhs.facet());
569                    if ordering != Ordering::Equal {
570                        return ordering;
571                    }
572                } else {
573                    return Ordering::Less;
574                }
575            } else if lhs.has_facet() {
576                return Ordering::Greater;
577            }
578            debug_assert_eq!(lhs.facet(), rhs.facet());
579            // Tags with labels before tags without labels
580            debug_assert_eq!(lhs.facet(), rhs.facet());
581            if rhs.has_label() {
582                if lhs.has_label() {
583                    lhs.label().cmp(rhs.label())
584                } else {
585                    Ordering::Greater
586                }
587            } else if lhs.has_label() {
588                Ordering::Less
589            } else {
590                Ordering::Equal
591            }
592        });
593        self.tags.dedup();
594    }
595}
596
597#[cfg(test)]
598mod tests;