zrx_id/id/format.rs
1// Copyright (c) 2025-2026 Zensical and contributors
2
3// SPDX-License-Identifier: MIT
4// All contributions are certified under the DCO
5
6// Permission is hereby granted, free of charge, to any person obtaining a copy
7// of this software and associated documentation files (the "Software"), to
8// deal in the Software without restriction, including without limitation the
9// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10// sell copies of the Software, and to permit persons to whom the Software is
11// furnished to do so, subject to the following conditions:
12
13// The above copyright notice and this permission notice shall be included in
14// all copies or substantial portions of the Software.
15
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
19// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22// IN THE SOFTWARE.
23
24// ----------------------------------------------------------------------------
25
26//! Formatted string.
27
28use std::array;
29use std::borrow::Cow;
30use std::cmp::Ordering;
31use std::fmt::{self, Debug, Display};
32use std::hash::{Hash, Hasher};
33use std::ops::Range;
34use std::str::{from_utf8_unchecked, FromStr};
35
36mod builder;
37mod encoding;
38mod error;
39mod path;
40
41pub use builder::Builder;
42use encoding::decode;
43pub use error::{Error, Result};
44use path::validate;
45
46// ----------------------------------------------------------------------------
47// Structs
48// ----------------------------------------------------------------------------
49
50/// Formatted string.
51///
52/// This is a low-level construct which allows to efficiently work with strings
53/// that contain a predefined number of components separated by `:` characters.
54/// If a component contains a `:` itself, it is percent-encoded, indicated by
55/// a flag. This is slower, but not expected to be common.
56///
57/// Formatted strings are optimized for very fast conversion with [`FromStr`]
58/// or gradual construction through [`Format::builder`], which both produce an
59/// immutable instance. To make sure cloning is fast, it's recommended to wrap
60/// the encapsulating type, [`Selector`][] or [`Id`][], in an [`Arc`][].
61///
62/// This implementation is currently limited to 64 spans, which should probably
63/// be sufficient for all use cases that can ever happen. For our means, an `u8`
64/// would be more than enough, but since Rust will align the field to 64 bits
65/// anyway, there's no point in being cheap.
66///
67/// [`Arc`]: std::sync::Arc
68/// [`Id`]: crate::id::Id
69/// [`Selector`]: crate::id::selector::Selector
70///
71/// # Examples
72///
73/// ```
74/// # use std::error::Error;
75/// # fn main() -> Result<(), Box<dyn Error>> {
76/// use zrx_id::format::Format;
77///
78/// // Create formatted string builder
79/// let mut builder = Format::<3>::builder();
80/// builder.set(0, "a");
81/// builder.set(1, "b");
82/// builder.set(2, "c");
83///
84/// // Create formatted string from builder
85/// let format = builder.build()?;
86///
87/// // Obtain string representation
88/// assert_eq!(format.as_str(), "a:b:c");
89/// # Ok(())
90/// # }
91/// ```
92#[derive(Clone)]
93pub struct Format<const N: usize> {
94 /// String representation.
95 value: Box<[u8]>,
96 /// Set of spans.
97 spans: [Range<u16>; N],
98 /// Encoding flags.
99 flags: u64,
100}
101
102// ----------------------------------------------------------------------------
103// Implementations
104// ----------------------------------------------------------------------------
105
106impl<const N: usize> Format<N> {
107 /// Returns the value at the index.
108 ///
109 /// If the value is not percent-encoded, which means it does not contain a
110 /// `:` character, a borrowed reference is returned which is essentially a
111 /// zero-cost operation and expected to be the common case. Otherwise, the
112 /// value is percent-decoded and an owned value is returned.
113 ///
114 /// # Panics
115 ///
116 /// Panics if the index is out of bounds. Since [`Format`] is a low-level
117 /// construct, we don't expect this to happen, as indices should be known.
118 ///
119 /// # Examples
120 ///
121 /// ```
122 /// # use std::error::Error;
123 /// # fn main() -> Result<(), Box<dyn Error>> {
124 /// use zrx_id::format::Format;
125 ///
126 /// // Create formatted string builder
127 /// let mut builder = Format::<3>::builder();
128 /// builder.set(0, "a");
129 /// builder.set(1, "b");
130 /// builder.set(2, "c");
131 ///
132 /// // Create formatted string from builder
133 /// let format = builder.build()?;
134 ///
135 /// // Obtain value at index
136 /// assert_eq!(format.get(0), "a");
137 /// # Ok(())
138 /// # }
139 /// ```
140 #[must_use]
141 pub fn get(&self, index: usize) -> Cow<'_, str> {
142 let p = self.spans[index].start as usize;
143 let q = self.spans[index].end as usize;
144 if self.flags & (1 << index) == 0 {
145 // SAFETY: The value is guaranteed to be valid UTF-8, as it was
146 // created from a valid UTF-8 string. Additionally, the value is
147 // not percent-encoded, so we can just return a borrowed reference
148 // to the formatted string value, which is the common fast path.
149 unsafe { Cow::Borrowed(from_utf8_unchecked(&self.value[p..q])) }
150 } else {
151 decode(&self.value[p..q])
152 }
153 }
154
155 /// Returns the string representation.
156 ///
157 /// # Examples
158 ///
159 /// ```
160 /// # use std::error::Error;
161 /// # fn main() -> Result<(), Box<dyn Error>> {
162 /// use zrx_id::format::Format;
163 ///
164 /// // Create formatted string builder
165 /// let mut builder = Format::<3>::builder();
166 /// builder.set(0, "a");
167 /// builder.set(1, "b");
168 /// builder.set(2, "c");
169 ///
170 /// // Create formatted string from builder
171 /// let format = builder.build()?;
172 ///
173 /// // Obtain string representation
174 /// assert_eq!(format.as_str(), "a:b:c");
175 /// # Ok(())
176 /// # }
177 /// ```
178 #[inline]
179 #[must_use]
180 pub fn as_str(&self) -> &str {
181 // SAFETY: The value is guaranteed to be valid UTF-8, as it was created
182 // from valid UTF-8 strings, so we can just return a borrowed reference
183 unsafe { from_utf8_unchecked(&self.value) }
184 }
185}
186
187// ----------------------------------------------------------------------------
188// Trait implementations
189// ----------------------------------------------------------------------------
190
191impl<const N: usize> FromStr for Format<N> {
192 type Err = Error;
193
194 /// Attempts to create a formatted string from a string.
195 ///
196 /// # Errors
197 ///
198 /// If the span number is off, [`Error::Mismatch`] is returned.
199 ///
200 /// # Examples
201 ///
202 /// ```
203 /// # use std::error::Error;
204 /// # fn main() -> Result<(), Box<dyn Error>> {
205 /// use zrx_id::format::Format;
206 ///
207 /// // Create formatted string from string
208 /// let format: Format::<3> = "a:b:c".parse()?;
209 /// assert_eq!(format.as_str(), "a:b:c");
210 /// # Ok(())
211 /// # }
212 /// ```
213 fn from_str(value: &str) -> Result<Self> {
214 let mut spans = array::from_fn(|_| 0u16..0u16);
215 let mut flags = 0;
216
217 // Initialize start and span counter
218 let mut start = 0u16;
219 let mut index = 0;
220 let mut shift = 1;
221
222 // Compute spans from characters
223 for (i, char) in value.char_indices() {
224 match char {
225 // If the current character is a separator, finalize the span.
226 // It's very unlikely that any conversion here results in an
227 // error, but since identifiers might potentially contain user
228 // data, we handle it and return an error.
229 ':' => {
230 let end = u16::try_from(i).map_err(|_| Error::Overflow)?;
231 validate(&value[start.into()..end.into()])?;
232
233 // Finalize current span
234 spans[index] = start..end;
235 index += 1;
236
237 // Continue after separator
238 start = end + 1;
239 shift = 1 << index;
240 }
241
242 // If the current span contains a percent sign, and we haven't
243 // already marked the span as percent-encoded, check if the next
244 // two characters are valid hexadecimal digits. If so, mark it
245 // as percent-encoded. Otherwise, proceed without modification.
246 '%' if flags & shift == 0 => {
247 let bytes = value.as_bytes();
248 if let Some(&[b1, b2]) = bytes.get(i + 1..i + 3) {
249 if b1.is_ascii_hexdigit() && b2.is_ascii_hexdigit() {
250 flags |= shift;
251 }
252 }
253 }
254
255 // Consume all other characters
256 _ => {}
257 }
258 }
259
260 // Finalize last span
261 let end = u16::try_from(value.len()).map_err(|_| Error::Overflow)?;
262 spans[index] = start..end;
263
264 // Return format or error on number mismatch
265 if index == N - 1 {
266 Ok(Format {
267 value: value.as_bytes().into(),
268 spans,
269 flags,
270 })
271 } else {
272 Err(Error::Mismatch)
273 }
274 }
275}
276
277// ----------------------------------------------------------------------------
278
279impl<const N: usize> Hash for Format<N> {
280 /// Hashes the formatted string.
281 #[inline]
282 fn hash<H: Hasher>(&self, state: &mut H) {
283 self.value.hash(state);
284 }
285}
286
287// ----------------------------------------------------------------------------
288
289impl<const N: usize> PartialEq for Format<N> {
290 /// Compares two formatted strings for equality.
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// # use std::error::Error;
296 /// # fn main() -> Result<(), Box<dyn Error>> {
297 /// use zrx_id::format::Format;
298 ///
299 /// // Create and compare formatted strings
300 /// let a: Format::<3> = "a:b:c".parse()?;
301 /// let b: Format::<3> = "a:b:c".parse()?;
302 /// assert_eq!(a, b);
303 /// # Ok(())
304 /// # }
305 /// ```
306 #[inline]
307 fn eq(&self, other: &Self) -> bool {
308 self.value == other.value
309 }
310}
311
312impl<const N: usize> Eq for Format<N> {}
313
314// ----------------------------------------------------------------------------
315
316impl<const N: usize> PartialOrd for Format<N> {
317 /// Orders two formatted strings.
318 ///
319 /// # Examples
320 ///
321 /// ```
322 /// # use std::error::Error;
323 /// # fn main() -> Result<(), Box<dyn Error>> {
324 /// use zrx_id::format::Format;
325 ///
326 /// // Create and compare formatted strings
327 /// let a: Format::<3> = "b:c:d".parse()?;
328 /// let b: Format::<3> = "a:b:c".parse()?;
329 /// assert!(a > b);
330 /// # Ok(())
331 /// # }
332 /// ```
333 #[inline]
334 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
335 Some(self.cmp(other))
336 }
337}
338
339impl<const N: usize> Ord for Format<N> {
340 /// Orders two formatted strings.
341 ///
342 /// # Examples
343 ///
344 /// ```
345 /// # use std::error::Error;
346 /// # fn main() -> Result<(), Box<dyn Error>> {
347 /// use zrx_id::format::Format;
348 ///
349 /// // Create and compare formatted strings
350 /// let a: Format::<3> = "b:c:d".parse()?;
351 /// let b: Format::<3> = "a:b:c".parse()?;
352 /// assert!(a > b);
353 /// # Ok(())
354 /// # }
355 /// ```
356 #[inline]
357 fn cmp(&self, other: &Self) -> Ordering {
358 self.value.cmp(&other.value)
359 }
360}
361
362// ----------------------------------------------------------------------------
363
364impl<const N: usize> Display for Format<N> {
365 /// Formats the formatted string for display.
366 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
367 f.write_str(self.as_str())
368 }
369}
370
371impl<const N: usize> Debug for Format<N> {
372 /// Formats the formatted string for debugging.
373 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
374 f.debug_struct("Format")
375 .field("value", &self.as_str())
376 .field("spans", &self.spans)
377 .field("flags", &self.flags)
378 .finish()
379 }
380}