Skip to main content

zrx_id/id/
format.rs

1// Copyright (c) 2025-2026 Zensical and contributors
2
3// SPDX-License-Identifier: MIT
4// All contributions are certified under the DCO
5
6// Permission is hereby granted, free of charge, to any person obtaining a copy
7// of this software and associated documentation files (the "Software"), to
8// deal in the Software without restriction, including without limitation the
9// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10// sell copies of the Software, and to permit persons to whom the Software is
11// furnished to do so, subject to the following conditions:
12
13// The above copyright notice and this permission notice shall be included in
14// all copies or substantial portions of the Software.
15
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
19// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22// IN THE SOFTWARE.
23
24// ----------------------------------------------------------------------------
25
26//! Formatted string.
27
28use std::array;
29use std::borrow::Cow;
30use std::cmp::Ordering;
31use std::fmt::{self, Debug, Display};
32use std::hash::{Hash, Hasher};
33use std::ops::Range;
34use std::str::{from_utf8_unchecked, FromStr};
35
36mod builder;
37mod encoding;
38mod error;
39mod path;
40
41pub use builder::Builder;
42use encoding::decode;
43pub use error::{Error, Result};
44use path::validate;
45
46// ----------------------------------------------------------------------------
47// Structs
48// ----------------------------------------------------------------------------
49
50/// Formatted string.
51///
52/// This is a low-level construct which allows to efficiently work with strings
53/// that contain a predefined number of components separated by `:` characters.
54/// If a component contains a `:` itself, it is percent-encoded, indicated by
55/// a flag. This is slower, but not expected to be common.
56///
57/// Formatted strings are optimized for very fast conversion with [`FromStr`]
58/// or gradual construction through [`Format::builder`], which both produce an
59/// immutable instance. To make sure cloning is fast, it's recommended to wrap
60/// the encapsulating type, [`Selector`][] or [`Id`][], in an [`Arc`][].
61///
62/// This implementation is currently limited to 64 spans, which should probably
63/// be sufficient for all use cases that can ever happen. For our means, an `u8`
64/// would be more than enough, but since Rust will align the field to 64 bits
65/// anyway, there's no point in being cheap.
66///
67/// [`Arc`]: std::sync::Arc
68/// [`Id`]: crate::id::Id
69/// [`Selector`]: crate::id::selector::Selector
70///
71/// # Examples
72///
73/// ```
74/// # use std::error::Error;
75/// # fn main() -> Result<(), Box<dyn Error>> {
76/// use zrx_id::format::Format;
77///
78/// // Create formatted string builder
79/// let mut builder = Format::<3>::builder();
80/// builder.set(0, "a");
81/// builder.set(1, "b");
82/// builder.set(2, "c");
83///
84/// // Create formatted string from builder
85/// let format = builder.build()?;
86///
87/// // Obtain string representation
88/// assert_eq!(format.as_str(), "a:b:c");
89/// # Ok(())
90/// # }
91/// ```
92#[derive(Clone)]
93pub struct Format<const N: usize> {
94    /// String representation.
95    value: Box<[u8]>,
96    /// Set of spans.
97    spans: [Range<u16>; N],
98    /// Encoding flags.
99    flags: u64,
100}
101
102// ----------------------------------------------------------------------------
103// Implementations
104// ----------------------------------------------------------------------------
105
106impl<const N: usize> Format<N> {
107    /// Returns the value at the index.
108    ///
109    /// If the value is not percent-encoded, which means it does not contain a
110    /// `:` character, a borrowed reference is returned which is essentially a
111    /// zero-cost operation and expected to be the common case. Otherwise, the
112    /// value is percent-decoded and an owned value is returned.
113    ///
114    /// # Panics
115    ///
116    /// Panics if the index is out of bounds. Since [`Format`] is a low-level
117    /// construct, we don't expect this to happen, as indices should be known.
118    ///
119    /// # Examples
120    ///
121    /// ```
122    /// # use std::error::Error;
123    /// # fn main() -> Result<(), Box<dyn Error>> {
124    /// use zrx_id::format::Format;
125    ///
126    /// // Create formatted string builder
127    /// let mut builder = Format::<3>::builder();
128    /// builder.set(0, "a");
129    /// builder.set(1, "b");
130    /// builder.set(2, "c");
131    ///
132    /// // Create formatted string from builder
133    /// let format = builder.build()?;
134    ///
135    /// // Obtain value at index
136    /// assert_eq!(format.get(0), "a");
137    /// # Ok(())
138    /// # }
139    /// ```
140    #[must_use]
141    pub fn get(&self, index: usize) -> Cow<'_, str> {
142        let p = self.spans[index].start as usize;
143        let q = self.spans[index].end as usize;
144        if self.flags & (1 << index) == 0 {
145            // SAFETY: The value is guaranteed to be valid UTF-8, as it was
146            // created from a valid UTF-8 string. Additionally, the value is
147            // not percent-encoded, so we can just return a borrowed reference
148            // to the formatted string value, which is the common fast path.
149            unsafe { Cow::Borrowed(from_utf8_unchecked(&self.value[p..q])) }
150        } else {
151            decode(&self.value[p..q])
152        }
153    }
154
155    /// Returns the string representation.
156    ///
157    /// # Examples
158    ///
159    /// ```
160    /// # use std::error::Error;
161    /// # fn main() -> Result<(), Box<dyn Error>> {
162    /// use zrx_id::format::Format;
163    ///
164    /// // Create formatted string builder
165    /// let mut builder = Format::<3>::builder();
166    /// builder.set(0, "a");
167    /// builder.set(1, "b");
168    /// builder.set(2, "c");
169    ///
170    /// // Create formatted string from builder
171    /// let format = builder.build()?;
172    ///
173    /// // Obtain string representation
174    /// assert_eq!(format.as_str(), "a:b:c");
175    /// # Ok(())
176    /// # }
177    /// ```
178    #[inline]
179    #[must_use]
180    pub fn as_str(&self) -> &str {
181        // SAFETY: The value is guaranteed to be valid UTF-8, as it was created
182        // from valid UTF-8 strings, so we can just return a borrowed reference
183        unsafe { from_utf8_unchecked(&self.value) }
184    }
185}
186
187// ----------------------------------------------------------------------------
188// Trait implementations
189// ----------------------------------------------------------------------------
190
191impl<const N: usize> FromStr for Format<N> {
192    type Err = Error;
193
194    /// Attempts to create a formatted string from a string.
195    ///
196    /// # Errors
197    ///
198    /// If the span number is off, [`Error::Mismatch`] is returned.
199    ///
200    /// # Examples
201    ///
202    /// ```
203    /// # use std::error::Error;
204    /// # fn main() -> Result<(), Box<dyn Error>> {
205    /// use zrx_id::format::Format;
206    ///
207    /// // Create formatted string from string
208    /// let format: Format::<3> = "a:b:c".parse()?;
209    /// assert_eq!(format.as_str(), "a:b:c");
210    /// # Ok(())
211    /// # }
212    /// ```
213    fn from_str(value: &str) -> Result<Self> {
214        let mut spans = array::from_fn(|_| 0u16..0u16);
215        let mut flags = 0;
216
217        // Initialize start and span counter
218        let mut start = 0u16;
219        let mut index = 0;
220        let mut shift = 1;
221
222        // Compute spans from characters
223        for (i, char) in value.char_indices() {
224            match char {
225                // If the current character is a separator, finalize the span.
226                // It's very unlikely that any conversion here results in an
227                // error, but since identifiers might potentially contain user
228                // data, we handle it and return an error.
229                ':' => {
230                    let end = u16::try_from(i).map_err(|_| Error::Overflow)?;
231                    validate(&value[start.into()..end.into()])?;
232
233                    // Finalize current span
234                    spans[index] = start..end;
235                    index += 1;
236
237                    // Continue after separator
238                    start = end + 1;
239                    shift = 1 << index;
240                }
241
242                // If the current span contains a percent sign, and we haven't
243                // already marked the span as percent-encoded, check if the next
244                // two characters are valid hexadecimal digits. If so, mark it
245                // as percent-encoded. Otherwise, proceed without modification.
246                '%' if flags & shift == 0 => {
247                    let bytes = value.as_bytes();
248                    if let Some(&[b1, b2]) = bytes.get(i + 1..i + 3) {
249                        if b1.is_ascii_hexdigit() && b2.is_ascii_hexdigit() {
250                            flags |= shift;
251                        }
252                    }
253                }
254
255                // Consume all other characters
256                _ => {}
257            }
258        }
259
260        // Finalize last span
261        let end = u16::try_from(value.len()).map_err(|_| Error::Overflow)?;
262        spans[index] = start..end;
263
264        // Return format or error on number mismatch
265        if index == N - 1 {
266            Ok(Format {
267                value: value.as_bytes().into(),
268                spans,
269                flags,
270            })
271        } else {
272            Err(Error::Mismatch)
273        }
274    }
275}
276
277// ----------------------------------------------------------------------------
278
279impl<const N: usize> Hash for Format<N> {
280    /// Hashes the formatted string.
281    #[inline]
282    fn hash<H: Hasher>(&self, state: &mut H) {
283        self.value.hash(state);
284    }
285}
286
287// ----------------------------------------------------------------------------
288
289impl<const N: usize> PartialEq for Format<N> {
290    /// Compares two formatted strings for equality.
291    ///
292    /// # Examples
293    ///
294    /// ```
295    /// # use std::error::Error;
296    /// # fn main() -> Result<(), Box<dyn Error>> {
297    /// use zrx_id::format::Format;
298    ///
299    /// // Create and compare formatted strings
300    /// let a: Format::<3> = "a:b:c".parse()?;
301    /// let b: Format::<3> = "a:b:c".parse()?;
302    /// assert_eq!(a, b);
303    /// # Ok(())
304    /// # }
305    /// ```
306    #[inline]
307    fn eq(&self, other: &Self) -> bool {
308        self.value == other.value
309    }
310}
311
312impl<const N: usize> Eq for Format<N> {}
313
314// ----------------------------------------------------------------------------
315
316impl<const N: usize> PartialOrd for Format<N> {
317    /// Orders two formatted strings.
318    ///
319    /// # Examples
320    ///
321    /// ```
322    /// # use std::error::Error;
323    /// # fn main() -> Result<(), Box<dyn Error>> {
324    /// use zrx_id::format::Format;
325    ///
326    /// // Create and compare formatted strings
327    /// let a: Format::<3> = "b:c:d".parse()?;
328    /// let b: Format::<3> = "a:b:c".parse()?;
329    /// assert!(a > b);
330    /// # Ok(())
331    /// # }
332    /// ```
333    #[inline]
334    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
335        Some(self.cmp(other))
336    }
337}
338
339impl<const N: usize> Ord for Format<N> {
340    /// Orders two formatted strings.
341    ///
342    /// # Examples
343    ///
344    /// ```
345    /// # use std::error::Error;
346    /// # fn main() -> Result<(), Box<dyn Error>> {
347    /// use zrx_id::format::Format;
348    ///
349    /// // Create and compare formatted strings
350    /// let a: Format::<3> = "b:c:d".parse()?;
351    /// let b: Format::<3> = "a:b:c".parse()?;
352    /// assert!(a > b);
353    /// # Ok(())
354    /// # }
355    /// ```
356    #[inline]
357    fn cmp(&self, other: &Self) -> Ordering {
358        self.value.cmp(&other.value)
359    }
360}
361
362// ----------------------------------------------------------------------------
363
364impl<const N: usize> Display for Format<N> {
365    /// Formats the formatted string for display.
366    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
367        f.write_str(self.as_str())
368    }
369}
370
371impl<const N: usize> Debug for Format<N> {
372    /// Formats the formatted string for debugging.
373    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
374        f.debug_struct("Format")
375            .field("value", &self.as_str())
376            .field("spans", &self.spans)
377            .field("flags", &self.flags)
378            .finish()
379    }
380}