Skip to main content

jacquard_common/types/
string.rs

1use crate::bos::{Bos, DefaultStr};
2use alloc::string::{String, ToString};
3use alloc::sync::Arc;
4use core::str::FromStr;
5#[cfg(feature = "std")]
6use miette::{Diagnostic, SourceSpan};
7use serde::{Deserialize, Deserializer, Serialize, Serializer};
8use smol_str::SmolStr;
9
10/// Source span for error reporting (offset, length)
11/// With `std` feature, this is `miette::SourceSpan`. Without, a simple tuple struct.
12#[cfg(not(feature = "std"))]
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub struct SourceSpan(pub usize, pub usize);
15
16#[cfg(not(feature = "std"))]
17impl SourceSpan {
18    pub fn new(offset: usize, len: usize) -> Self {
19        Self(offset, len)
20    }
21}
22
23#[cfg(not(feature = "std"))]
24impl From<(usize, usize)> for SourceSpan {
25    fn from((offset, len): (usize, usize)) -> Self {
26        Self(offset, len)
27    }
28}
29
30pub use crate::{
31    CowStr,
32    types::{
33        aturi::AtUri,
34        cid::{Cid, CidLink},
35        datetime::Datetime,
36        did::Did,
37        did_service::DidService,
38        handle::Handle,
39        ident::AtIdentifier,
40        language::Language,
41        nsid::Nsid,
42        recordkey::{RecordKey, Rkey},
43        tid::Tid,
44        uri::UriValue,
45    },
46};
47use crate::{
48    IntoStatic,
49    types::{LexiconStringType, UriType},
50};
51
52/// Polymorphic AT Protocol string value
53///
54/// Represents any AT Protocol string type, automatically detecting and parsing
55/// into the appropriate variant. Used internally for generic value handling.
56///
57/// Variants are checked in order from most specific to least specific. Note that
58/// record keys are intentionally NOT parsed from bare strings as the validation
59/// is too permissive and would catch too many values.
60#[derive(Debug, Clone, PartialEq, Eq, Hash)]
61pub enum AtprotoStr<S: Bos<str> + AsRef<str> = DefaultStr> {
62    /// ISO 8601 datetime
63    Datetime(Datetime),
64    /// BCP 47 language tag
65    Language(Language),
66    /// Timestamp identifier
67    Tid(Tid),
68    /// Namespaced identifier
69    Nsid(Nsid<S>),
70    /// Decentralized identifier
71    Did(Did<S>),
72    /// Account handle
73    Handle(Handle<S>),
74    /// Identifier (DID or handle)
75    AtIdentifier(AtIdentifier<S>),
76    /// AT URI
77    AtUri(AtUri<S>),
78    /// Generic URI
79    Uri(UriValue<S>),
80    /// Content identifier
81    Cid(Cid<S>),
82    /// Record key
83    RecordKey(RecordKey<Rkey<S>>),
84    /// Plain string (fallback)
85    String(S),
86}
87
88use crate::types::cid::IpldCid;
89use crate::types::did::validate_did;
90use crate::types::handle::validate_handle;
91use crate::types::nsid::validate_nsid;
92
93impl<S: Bos<str> + AsRef<str>> AtprotoStr<S> {
94    /// Classify and wrap a string value into the appropriate variant.
95    ///
96    /// This is fairly exhaustive and potentially **slow**, prefer using anything
97    /// that narrows down the search field quicker.
98    ///
99    /// Inspects the string content, validates against known AT Protocol types,
100    /// and moves `string` into the matching variant via unchecked constructors
101    /// (safe because we validate first).
102    pub fn new(string: S) -> Self {
103        let s: &str = string.as_ref();
104        // Non-string-backed types first (they don't consume S).
105        if let Ok(datetime) = Datetime::from_str(s) {
106            return Self::Datetime(datetime);
107        }
108        if let Ok(lang) = Language::new(s) {
109            return Self::Language(lang);
110        }
111        if let Ok(tid) = Tid::from_str(s) {
112            return Self::Tid(tid);
113        }
114        // String-backed types: validate then wrap S directly.
115        if validate_did(s).is_ok() {
116            return Self::Did(unsafe { Did::unchecked(string) });
117        }
118        if validate_handle(s).is_ok() {
119            return Self::Handle(unsafe { Handle::unchecked(string) });
120        }
121        if validate_nsid(s).is_ok() {
122            return Self::Nsid(unsafe { Nsid::unchecked(string) });
123        }
124        if crate::types::aturi::validate_and_index(s).is_ok() {
125            return Self::AtUri(unsafe { AtUri::unchecked(string) });
126        }
127        // URI schemes that UriValue handles - check prefix, wrap S directly.
128        if s.starts_with("https://") || s.starts_with("wss://") || s.starts_with("ipld://") {
129            if let Ok(uri) = UriValue::new(s) {
130                // we don't want to always Any here, it's better to fall back to the String variant.
131                match uri {
132                    UriValue::Any(_) => {}
133                    _ => {
134                        return Self::Uri(UriValue::new(string).expect("already checked"));
135                    }
136                }
137            }
138        }
139        let s: &str = string.as_ref();
140        // CID: try to parse as IPLD first, otherwise wrap as string CID.
141        if IpldCid::try_from(s).is_ok() || s.starts_with("bafy") {
142            return Self::Cid(unsafe { Cid::unchecked_str(string) });
143        }
144        // Fallback: plain string.
145        Self::String(string)
146    }
147
148    /// Get the string value regardless of variant.
149    pub fn as_str(&self) -> &str {
150        match self {
151            Self::Datetime(datetime) => datetime.as_str(),
152            Self::Language(lang) => lang.as_str(),
153            Self::Handle(handle) => handle.as_str(),
154            Self::AtIdentifier(atid) => atid.as_str(),
155            Self::Nsid(nsid) => nsid.as_str(),
156            Self::AtUri(aturi) => aturi.as_str(),
157            Self::Uri(uri) => uri.as_str(),
158            Self::Cid(cid) => cid.as_str(),
159            Self::Tid(tid) => tid.as_str(),
160            Self::Did(did) => did.as_str(),
161            Self::RecordKey(rkey) => rkey.as_ref(),
162            Self::String(string) => string.as_ref(),
163        }
164    }
165
166    /// Detailed string type classification.
167    pub fn string_type(&self) -> LexiconStringType {
168        match self {
169            Self::Datetime(_) => LexiconStringType::Datetime,
170            Self::Language(_) => LexiconStringType::Language,
171            Self::Handle(_) => LexiconStringType::Handle,
172            Self::AtIdentifier(_) => LexiconStringType::AtIdentifier,
173            Self::Nsid(_) => LexiconStringType::Nsid,
174            Self::AtUri(_) => LexiconStringType::AtUri,
175            Self::Uri(uri) => LexiconStringType::Uri(match uri {
176                UriValue::Did(_) => UriType::Did,
177                UriValue::At(_) => UriType::At,
178                UriValue::Https(_) => UriType::Https,
179                UriValue::Wss(_) => UriType::Wss,
180                UriValue::Cid(_) => UriType::Cid,
181                UriValue::Any(_) => UriType::Any,
182            }),
183            Self::Cid(_) => LexiconStringType::Cid,
184            Self::Tid(_) => LexiconStringType::Tid,
185            Self::Did(_) => LexiconStringType::Did,
186            Self::RecordKey(_) => LexiconStringType::RecordKey,
187            Self::String(_) => LexiconStringType::String,
188        }
189    }
190}
191
192impl<S: Bos<str> + AsRef<str>> AsRef<str> for AtprotoStr<S> {
193    fn as_ref(&self) -> &str {
194        self.as_str()
195    }
196}
197
198impl<S: Bos<str> + AsRef<str> + Serialize> Serialize for AtprotoStr<S> {
199    fn serialize<Ser>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error>
200    where
201        Ser: Serializer,
202    {
203        serializer.serialize_str(self.as_str())
204    }
205}
206
207impl<'de, S> Deserialize<'de> for AtprotoStr<S>
208where
209    S: Bos<str> + AsRef<str> + Deserialize<'de>,
210{
211    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
212    where
213        D: Deserializer<'de>,
214    {
215        let value = S::deserialize(deserializer)?;
216        Ok(Self::new(value))
217    }
218}
219
220impl<S: Bos<str> + AsRef<str>> AtprotoStr<S> {
221    /// Convert to an `AtprotoStr` with a different backing type.
222    pub fn convert<B: Bos<str> + AsRef<str> + From<S>>(self) -> AtprotoStr<B> {
223        match self {
224            AtprotoStr::Datetime(dt) => AtprotoStr::Datetime(dt),
225            AtprotoStr::Language(lang) => AtprotoStr::Language(lang),
226            AtprotoStr::Tid(tid) => AtprotoStr::Tid(tid),
227            AtprotoStr::Nsid(nsid) => AtprotoStr::Nsid(nsid.convert()),
228            AtprotoStr::Did(did) => AtprotoStr::Did(did.convert()),
229            AtprotoStr::Handle(handle) => AtprotoStr::Handle(handle.convert()),
230            AtprotoStr::AtIdentifier(ident) => AtprotoStr::AtIdentifier(ident.convert()),
231            AtprotoStr::AtUri(at_uri) => AtprotoStr::AtUri(at_uri.convert()),
232            AtprotoStr::Uri(uri) => AtprotoStr::Uri(uri.convert()),
233            AtprotoStr::Cid(cid) => AtprotoStr::Cid(cid.convert()),
234            AtprotoStr::RecordKey(rkey) => AtprotoStr::RecordKey(RecordKey(rkey.0.convert())),
235            AtprotoStr::String(s) => AtprotoStr::String(B::from(s)),
236        }
237    }
238}
239
240impl<S: Bos<str> + AsRef<str> + IntoStatic> IntoStatic for AtprotoStr<S>
241where
242    S::Output: Bos<str> + AsRef<str>,
243{
244    type Output = AtprotoStr<S::Output>;
245
246    fn into_static(self) -> Self::Output {
247        match self {
248            AtprotoStr::Datetime(datetime) => AtprotoStr::Datetime(datetime),
249            AtprotoStr::Language(language) => AtprotoStr::Language(language),
250            AtprotoStr::Tid(tid) => AtprotoStr::Tid(tid),
251            AtprotoStr::Nsid(nsid) => AtprotoStr::Nsid(nsid.into_static()),
252            AtprotoStr::Did(did) => AtprotoStr::Did(did.into_static()),
253            AtprotoStr::Handle(handle) => AtprotoStr::Handle(handle.into_static()),
254            AtprotoStr::AtIdentifier(ident) => AtprotoStr::AtIdentifier(ident.into_static()),
255            // AtUri and UriValue are already 'static in this enum.
256            AtprotoStr::AtUri(at_uri) => AtprotoStr::AtUri(at_uri.into_static()),
257            AtprotoStr::Uri(uri) => AtprotoStr::Uri(uri.into_static()),
258            AtprotoStr::Cid(cid) => AtprotoStr::Cid(cid.into_static()),
259            AtprotoStr::RecordKey(record_key) => AtprotoStr::RecordKey(record_key.into_static()),
260            AtprotoStr::String(s) => AtprotoStr::String(s.into_static()),
261        }
262    }
263}
264
265impl<S: Bos<str> + AsRef<str> + Clone + Serialize> From<AtprotoStr<S>> for String {
266    fn from(value: AtprotoStr<S>) -> Self {
267        value.as_str().to_string()
268    }
269}
270
271/// Parsing Error for atproto string types which don't have third-party specs
272/// (e.g. datetime, CIDs, language tags).
273///
274/// `spec` refers to the final url path segment on atproto.com/specs,
275/// detailing the specification for the type
276/// `source` is the source string, or part of it
277/// `kind` is the type of parsing error: `[StrParseKind]`
278#[derive(Debug, thiserror::Error, PartialEq, Eq, Clone)]
279#[cfg_attr(feature = "std", derive(Diagnostic))]
280#[error("error in `{source}`: {kind}")]
281#[cfg_attr(
282    feature = "std",
283    diagnostic(
284        url("https://atproto.com/specs/{spec}"),
285        help("if something doesn't match the spec, contact the crate author")
286    )
287)]
288pub struct AtStrError {
289    /// AT Protocol spec name this error relates to
290    pub spec: SmolStr,
291    /// The source string that failed to parse
292    #[cfg_attr(feature = "std", source_code)]
293    pub source: String,
294    /// The specific kind of parsing error
295    #[source]
296    #[cfg_attr(feature = "std", diagnostic_source)]
297    pub kind: StrParseKind,
298}
299
300impl AtStrError {
301    /// Create a new AT string parsing error
302    pub fn new(spec: &'static str, source: String, kind: StrParseKind) -> Self {
303        Self {
304            spec: SmolStr::new_static(spec),
305            source,
306            kind,
307        }
308    }
309
310    /// Wrap an existing error with a new spec context
311    pub fn wrap(spec: &'static str, source: String, error: AtStrError) -> Self {
312        if let Some(span) = match &error.kind {
313            StrParseKind::Disallowed { problem, .. } => problem,
314            StrParseKind::MissingComponent { span, .. } => span,
315            _ => &None,
316        } {
317            Self {
318                spec: SmolStr::new_static(spec),
319                source,
320                kind: StrParseKind::Wrap {
321                    span: Some(*span),
322                    err: Arc::new(error),
323                },
324            }
325        } else {
326            let span = source
327                .find(&error.source)
328                .map(|start| (start, error.source.len()).into());
329            Self {
330                spec: SmolStr::new_static(spec),
331                source,
332                kind: StrParseKind::Wrap {
333                    span,
334                    err: Arc::new(error),
335                },
336            }
337        }
338    }
339
340    /// something on the provided disallowed list was found in the source string
341    /// does a substring search for any of the offending strings to get the span
342    pub fn disallowed(spec: &'static str, source: &str, disallowed: &[&str]) -> Self {
343        for item in disallowed {
344            if let Some(loc) = source.find(item) {
345                return Self {
346                    spec: SmolStr::new_static(spec),
347                    source: source.to_string(),
348                    kind: StrParseKind::Disallowed {
349                        problem: Some(SourceSpan::new(loc.into(), item.len())),
350                        message: smol_str::format_smolstr!("`{item}`"),
351                    },
352                };
353            }
354        }
355        Self {
356            spec: SmolStr::new_static(spec),
357            source: source.to_string(),
358            kind: StrParseKind::Disallowed {
359                problem: None,
360                message: SmolStr::new_static(""),
361            },
362        }
363    }
364
365    /// Create an error for a string that exceeds the maximum length
366    pub fn too_long(spec: &'static str, source: &str, max: usize, actual: usize) -> Self {
367        Self {
368            spec: SmolStr::new_static(spec),
369            source: source.to_string(),
370            kind: StrParseKind::TooLong { max, actual },
371        }
372    }
373
374    /// Create an error for a string below the minimum length
375    pub fn too_short(spec: &'static str, source: &str, min: usize, actual: usize) -> Self {
376        Self {
377            spec: SmolStr::new_static(spec),
378            source: source.to_string(),
379            kind: StrParseKind::TooShort { min, actual },
380        }
381    }
382
383    /// missing component, with what was expected to be found
384    pub fn missing(spec: &'static str, source: &str, expected: &str) -> Self {
385        if let Some(loc) = source.find(expected) {
386            return Self {
387                spec: SmolStr::new_static(spec),
388                source: source.to_string(),
389                kind: StrParseKind::MissingComponent {
390                    span: Some(SourceSpan::new(loc.into(), expected.len())),
391                    message: smol_str::format_smolstr!("`{expected}` found incorrectly here"),
392                },
393            };
394        }
395        Self {
396            spec: SmolStr::new_static(spec),
397            source: source.to_string(),
398            kind: StrParseKind::MissingComponent {
399                span: None,
400                message: SmolStr::new(expected),
401            },
402        }
403    }
404
405    /// missing component, with the span where it was expected to be founf
406    /// Create an error for a missing component at a specific span
407    pub fn missing_from(
408        spec: &'static str,
409        source: &str,
410        expected: &str,
411        span: (usize, usize),
412    ) -> Self {
413        Self {
414            spec: SmolStr::new_static(spec),
415            source: source.to_string(),
416            kind: StrParseKind::MissingComponent {
417                span: Some(span.into()),
418                message: SmolStr::new(expected),
419            },
420        }
421    }
422
423    /// Create an error for a regex validation failure
424    pub fn regex(spec: &'static str, source: &str, message: SmolStr) -> Self {
425        Self {
426            spec: SmolStr::new_static(spec),
427            source: source.to_string(),
428            kind: StrParseKind::RegexFail {
429                span: None,
430                message,
431            },
432        }
433    }
434}
435
436/// Kinds of parsing errors for AT Protocol string types
437#[derive(Debug, thiserror::Error, PartialEq, Eq, Clone)]
438#[cfg_attr(feature = "std", derive(Diagnostic))]
439pub enum StrParseKind {
440    /// Regex pattern validation failed
441    #[error("regex failure - {message}")]
442    #[cfg_attr(feature = "std", diagnostic(code(jacquard::types::string::regex_fail)))]
443    RegexFail {
444        /// Optional span highlighting the problem area
445        #[cfg_attr(feature = "std", label)]
446        span: Option<SourceSpan>,
447        /// Help message explaining the failure
448        #[cfg_attr(feature = "std", help)]
449        message: SmolStr,
450    },
451    /// String exceeds maximum allowed length
452    #[error("string too long (allowed: {max}, actual: {actual})")]
453    #[cfg_attr(
454        feature = "std",
455        diagnostic(code(jacquard::types::string::wrong_length))
456    )]
457    TooLong {
458        /// Maximum allowed length
459        max: usize,
460        /// Actual string length
461        actual: usize,
462    },
463
464    /// String is below minimum required length
465    #[error("string too short (allowed: {min}, actual: {actual})")]
466    #[cfg_attr(
467        feature = "std",
468        diagnostic(code(jacquard::types::string::wrong_length))
469    )]
470    TooShort {
471        /// Minimum required length
472        min: usize,
473        /// Actual string length
474        actual: usize,
475    },
476    /// String contains disallowed values
477    #[error("disallowed - {message}")]
478    #[cfg_attr(feature = "std", diagnostic(code(jacquard::types::string::disallowed)))]
479    Disallowed {
480        /// Optional span highlighting the disallowed content
481        #[cfg_attr(feature = "std", label)]
482        problem: Option<SourceSpan>,
483        /// Help message about what's disallowed
484        #[cfg_attr(feature = "std", help)]
485        message: SmolStr,
486    },
487    /// Required component is missing
488    #[error("missing - {message}")]
489    #[cfg_attr(feature = "std", diagnostic(code(jacquard::atstr::missing_component)))]
490    MissingComponent {
491        /// Optional span where the component should be
492        #[cfg_attr(feature = "std", label)]
493        span: Option<SourceSpan>,
494        /// Help message about what's missing
495        #[cfg_attr(feature = "std", help)]
496        message: SmolStr,
497    },
498    /// Wraps another error with additional context
499    #[error("{err:?}")]
500    #[cfg_attr(feature = "std", diagnostic(code(jacquard::atstr::inner)))]
501    Wrap {
502        /// Optional span in the outer context
503        #[cfg_attr(feature = "std", label)]
504        span: Option<SourceSpan>,
505        /// The wrapped inner error
506        #[source]
507        err: Arc<AtStrError>,
508    },
509    /// Wraps another error with additional context
510    #[error("converting from a string slice")]
511    #[cfg_attr(feature = "std", diagnostic(code(jacquard::atstr::conversion)))]
512    Conversion,
513}