jacquard_common/types/
string.rs

1use miette::SourceSpan;
2use serde::{Deserialize, Deserializer, Serialize, Serializer};
3use smol_str::{SmolStr, ToSmolStr};
4use std::{str::FromStr, sync::Arc};
5
6use crate::IntoStatic;
7pub use crate::{
8    CowStr,
9    types::{
10        aturi::AtUri,
11        cid::{Cid, CidLink},
12        datetime::Datetime,
13        did::Did,
14        handle::Handle,
15        ident::AtIdentifier,
16        language::Language,
17        nsid::Nsid,
18        recordkey::{RecordKey, Rkey},
19        tid::Tid,
20        uri::Uri,
21    },
22};
23
24/// Polymorphic AT Protocol string value
25///
26/// Represents any AT Protocol string type, automatically detecting and parsing
27/// into the appropriate variant. Used internally for generic value handling.
28///
29/// Variants are checked in order from most specific to least specific. Note that
30/// record keys are intentionally NOT parsed from bare strings as the validation
31/// is too permissive and would catch too many values.
32#[derive(Debug, Clone, PartialEq, Eq, Hash)]
33pub enum AtprotoStr<'s> {
34    /// ISO 8601 datetime
35    Datetime(Datetime),
36    /// BCP 47 language tag
37    Language(Language),
38    /// Timestamp identifier
39    Tid(Tid),
40    /// Namespaced identifier
41    Nsid(Nsid<'s>),
42    /// Decentralized identifier
43    Did(Did<'s>),
44    /// Account handle
45    Handle(Handle<'s>),
46    /// Identifier (DID or handle)
47    AtIdentifier(AtIdentifier<'s>),
48    /// AT URI
49    AtUri(AtUri<'s>),
50    /// Generic URI
51    Uri(Uri<'s>),
52    /// Content identifier
53    Cid(Cid<'s>),
54    /// Record key
55    RecordKey(RecordKey<Rkey<'s>>),
56    /// Plain string (fallback)
57    String(CowStr<'s>),
58}
59
60impl<'s> AtprotoStr<'s> {
61    /// Borrowing constructor for bare atproto string values
62    /// This is fairly exhaustive and potentially **slow**, prefer using anything
63    /// that narrows down the search field quicker.
64    ///
65    /// Note: We don't construct record keys from bare strings in this because
66    /// the type is too permissive and too many things would be classified as rkeys.
67    ///
68    /// Value object deserialization checks against the field names for common
69    /// names (uri, cid, did, handle, createdAt, indexedAt, etc.) to improve
70    /// performance of the happy path.
71    pub fn new(string: &'s str) -> Self {
72        // TODO: do some quick prefix checks like in Uri to drop through faster
73        if let Ok(datetime) = Datetime::from_str(string) {
74            Self::Datetime(datetime)
75        } else if let Ok(lang) = Language::new(string) {
76            Self::Language(lang)
77        } else if let Ok(tid) = Tid::from_str(string) {
78            Self::Tid(tid)
79        } else if let Ok(did) = Did::new(string) {
80            Self::Did(did)
81        } else if let Ok(handle) = Handle::new(string) {
82            Self::Handle(handle)
83        } else if let Ok(atid) = AtIdentifier::new(string) {
84            Self::AtIdentifier(atid)
85        } else if let Ok(nsid) = Nsid::new(string) {
86            Self::Nsid(nsid)
87        } else if let Ok(aturi) = AtUri::new(string) {
88            Self::AtUri(aturi)
89        } else if let Ok(uri) = Uri::new(string) {
90            Self::Uri(uri)
91        } else if let Ok(cid) = Cid::new(string.as_bytes()) {
92            Self::Cid(cid)
93        } else {
94            // We don't construct record keys from bare strings because the type is too permissive
95            Self::String(CowStr::Borrowed(string))
96        }
97    }
98
99    /// Get the string value regardless of variant
100    pub fn as_str(&self) -> &str {
101        match self {
102            Self::Datetime(datetime) => datetime.as_str(),
103            Self::Language(lang) => lang.as_str(),
104            Self::Handle(handle) => handle.as_str(),
105            Self::AtIdentifier(atid) => atid.as_str(),
106            Self::Nsid(nsid) => nsid.as_str(),
107            Self::AtUri(aturi) => aturi.as_str(),
108            Self::Uri(uri) => uri.as_str(),
109            Self::Cid(cid) => cid.as_str(),
110            Self::Tid(tid) => tid.as_str(),
111            Self::Did(did) => did.as_str(),
112            Self::RecordKey(rkey) => rkey.as_ref(),
113            Self::String(string) => string.as_ref(),
114        }
115    }
116}
117
118impl AtprotoStr<'static> {
119    /// Owned constructor for bare atproto string values
120    /// This is fairly exhaustive and potentially **slow**, prefer using anything
121    /// that narrows down the search field quicker.
122    ///
123    /// Note: We don't construct record keys from bare strings in this because
124    /// the type is too permissive and too many things would be classified as rkeys.
125    ///
126    /// Value object deserialization checks against the field names for common
127    /// names (uri, cid, did, handle, createdAt, indexedAt, etc.) to improve
128    /// performance of the happy path.
129    pub fn new_owned(string: impl AsRef<str>) -> AtprotoStr<'static> {
130        let string = string.as_ref();
131        // TODO: do some quick prefix checks like in Uri to drop through faster
132        if let Ok(datetime) = Datetime::from_str(string) {
133            Self::Datetime(datetime)
134        } else if let Ok(lang) = Language::new(string) {
135            Self::Language(lang)
136        } else if let Ok(tid) = Tid::from_str(string) {
137            Self::Tid(tid)
138        } else if let Ok(did) = Did::new_owned(string) {
139            Self::Did(did)
140        } else if let Ok(handle) = Handle::new_owned(string) {
141            Self::Handle(handle)
142        } else if let Ok(atid) = AtIdentifier::new_owned(string) {
143            Self::AtIdentifier(atid)
144        } else if let Ok(nsid) = Nsid::new_owned(string) {
145            Self::Nsid(nsid)
146        } else if let Ok(aturi) = AtUri::new_owned(string) {
147            Self::AtUri(aturi)
148        } else if let Ok(uri) = Uri::new_owned(string) {
149            Self::Uri(uri)
150        } else if let Ok(cid) = Cid::new_owned(string.as_bytes()) {
151            Self::Cid(cid)
152        } else {
153            // We don't construct record keys from bare strings because the type is too permissive
154            Self::String(CowStr::Owned(string.to_smolstr()))
155        }
156    }
157}
158
159impl<'s> AsRef<str> for AtprotoStr<'s> {
160    fn as_ref(&self) -> &str {
161        match self {
162            Self::Datetime(datetime) => datetime.as_str(),
163            Self::Language(lang) => lang.as_ref(),
164            Self::Tid(tid) => tid.as_ref(),
165            Self::Did(did) => did.as_ref(),
166            Self::Handle(handle) => handle.as_ref(),
167            Self::AtIdentifier(atid) => atid.as_ref(),
168            Self::Nsid(nsid) => nsid.as_ref(),
169            Self::AtUri(aturi) => aturi.as_ref(),
170            Self::Uri(uri) => uri.as_str(),
171            Self::Cid(cid) => cid.as_ref(),
172            Self::RecordKey(rkey) => rkey.as_ref(),
173            Self::String(string) => string.as_ref(),
174        }
175    }
176}
177
178impl Serialize for AtprotoStr<'_> {
179    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
180    where
181        S: Serializer,
182    {
183        serializer.serialize_str(self.as_ref())
184    }
185}
186
187impl<'de, 'a> Deserialize<'de> for AtprotoStr<'a>
188where
189    'de: 'a,
190{
191    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
192    where
193        D: Deserializer<'de>,
194    {
195        let value = Deserialize::deserialize(deserializer)?;
196        Ok(Self::new(value))
197    }
198}
199
200impl IntoStatic for AtprotoStr<'_> {
201    type Output = AtprotoStr<'static>;
202
203    fn into_static(self) -> Self::Output {
204        match self {
205            AtprotoStr::Datetime(datetime) => AtprotoStr::Datetime(datetime),
206            AtprotoStr::Language(language) => AtprotoStr::Language(language),
207            AtprotoStr::Tid(tid) => AtprotoStr::Tid(tid),
208            AtprotoStr::Nsid(nsid) => AtprotoStr::Nsid(nsid.into_static()),
209            AtprotoStr::Did(did) => AtprotoStr::Did(did.into_static()),
210            AtprotoStr::Handle(handle) => AtprotoStr::Handle(handle.into_static()),
211            AtprotoStr::AtIdentifier(ident) => AtprotoStr::AtIdentifier(ident.into_static()),
212            AtprotoStr::AtUri(at_uri) => AtprotoStr::AtUri(at_uri.into_static()),
213            AtprotoStr::Uri(uri) => AtprotoStr::Uri(uri.into_static()),
214            AtprotoStr::Cid(cid) => AtprotoStr::Cid(cid.into_static()),
215            AtprotoStr::RecordKey(record_key) => AtprotoStr::RecordKey(record_key.into_static()),
216            AtprotoStr::String(cow_str) => AtprotoStr::String(cow_str.into_static()),
217        }
218    }
219}
220
221impl From<AtprotoStr<'_>> for String {
222    fn from(value: AtprotoStr<'_>) -> Self {
223        match value {
224            AtprotoStr::AtIdentifier(ident) => ident.to_string(),
225            AtprotoStr::AtUri(at_uri) => at_uri.to_string(),
226            AtprotoStr::Uri(uri) => match uri {
227                Uri::At(at_uri) => at_uri.to_string(),
228                Uri::Cid(cid) => cid.to_string(),
229                Uri::Did(did) => did.to_string(),
230                Uri::Https(url) => url.to_string(),
231                Uri::Wss(url) => url.to_string(),
232                Uri::Any(cow_str) => cow_str.to_string(),
233            },
234            AtprotoStr::Cid(cid) => cid.to_string(),
235            AtprotoStr::RecordKey(record_key) => record_key.as_ref().to_string(),
236            AtprotoStr::String(cow_str) => cow_str.to_string(),
237            AtprotoStr::Datetime(datetime) => datetime.to_string(),
238            AtprotoStr::Language(language) => language.to_string(),
239            AtprotoStr::Tid(tid) => tid.to_string(),
240            AtprotoStr::Nsid(nsid) => nsid.to_string(),
241            AtprotoStr::Did(did) => did.to_string(),
242            AtprotoStr::Handle(handle) => handle.to_string(),
243        }
244    }
245}
246
247/// Parsing Error for atproto string types which don't have third-party specs
248/// (e.g. datetime, CIDs, language tags).
249///
250/// `spec` refers to the final url path segment on atproto.com/specs,
251/// detailing the specification for the type
252/// `source` is the source string, or part of it
253/// `kind` is the type of parsing error: `[StrParseKind]`
254#[derive(Debug, thiserror::Error, miette::Diagnostic)]
255#[error("error in `{source}`: {kind}")]
256#[diagnostic(
257    url("https://atproto.com/specs/{spec}"),
258    help("if something doesn't match the spec, contact the crate author")
259)]
260pub struct AtStrError {
261    /// AT Protocol spec name this error relates to
262    pub spec: SmolStr,
263    /// The source string that failed to parse
264    #[source_code]
265    pub source: String,
266    /// The specific kind of parsing error
267    #[source]
268    #[diagnostic_source]
269    pub kind: StrParseKind,
270}
271
272impl AtStrError {
273    /// Create a new AT string parsing error
274    pub fn new(spec: &'static str, source: String, kind: StrParseKind) -> Self {
275        Self {
276            spec: SmolStr::new_static(spec),
277            source,
278            kind,
279        }
280    }
281
282    /// Wrap an existing error with a new spec context
283    pub fn wrap(spec: &'static str, source: String, error: AtStrError) -> Self {
284        if let Some(span) = match &error.kind {
285            StrParseKind::Disallowed { problem, .. } => problem,
286            StrParseKind::MissingComponent { span, .. } => span,
287            _ => &None,
288        } {
289            Self {
290                spec: SmolStr::new_static(spec),
291                source,
292                kind: StrParseKind::Wrap {
293                    span: Some(*span),
294                    err: Arc::new(error),
295                },
296            }
297        } else {
298            let span = source
299                .find(&error.source)
300                .map(|start| (start, error.source.len()).into());
301            Self {
302                spec: SmolStr::new_static(spec),
303                source,
304                kind: StrParseKind::Wrap {
305                    span,
306                    err: Arc::new(error),
307                },
308            }
309        }
310    }
311
312    /// something on the provided disallowed list was found in the source string
313    /// does a substring search for any of the offending strings to get the span
314    pub fn disallowed(spec: &'static str, source: &str, disallowed: &[&str]) -> Self {
315        for item in disallowed {
316            if let Some(loc) = source.find(item) {
317                return Self {
318                    spec: SmolStr::new_static(spec),
319                    source: source.to_string(),
320                    kind: StrParseKind::Disallowed {
321                        problem: Some(SourceSpan::new(loc.into(), item.len())),
322                        message: smol_str::format_smolstr!("`{item}`"),
323                    },
324                };
325            }
326        }
327        Self {
328            spec: SmolStr::new_static(spec),
329            source: source.to_string(),
330            kind: StrParseKind::Disallowed {
331                problem: None,
332                message: SmolStr::new_static(""),
333            },
334        }
335    }
336
337    /// Create an error for a string that exceeds the maximum length
338    pub fn too_long(spec: &'static str, source: &str, max: usize, actual: usize) -> Self {
339        Self {
340            spec: SmolStr::new_static(spec),
341            source: source.to_string(),
342            kind: StrParseKind::TooLong { max, actual },
343        }
344    }
345
346    /// Create an error for a string below the minimum length
347    pub fn too_short(spec: &'static str, source: &str, min: usize, actual: usize) -> Self {
348        Self {
349            spec: SmolStr::new_static(spec),
350            source: source.to_string(),
351            kind: StrParseKind::TooShort { min, actual },
352        }
353    }
354
355    /// missing component, with what was expected to be found
356    pub fn missing(spec: &'static str, source: &str, expected: &str) -> Self {
357        if let Some(loc) = source.find(expected) {
358            return Self {
359                spec: SmolStr::new_static(spec),
360                source: source.to_string(),
361                kind: StrParseKind::MissingComponent {
362                    span: Some(SourceSpan::new(loc.into(), expected.len())),
363                    message: smol_str::format_smolstr!("`{expected}` found incorrectly here"),
364                },
365            };
366        }
367        Self {
368            spec: SmolStr::new_static(spec),
369            source: source.to_string(),
370            kind: StrParseKind::MissingComponent {
371                span: None,
372                message: SmolStr::new(expected),
373            },
374        }
375    }
376
377    /// missing component, with the span where it was expected to be founf
378    /// Create an error for a missing component at a specific span
379    pub fn missing_from(
380        spec: &'static str,
381        source: &str,
382        expected: &str,
383        span: (usize, usize),
384    ) -> Self {
385        Self {
386            spec: SmolStr::new_static(spec),
387            source: source.to_string(),
388            kind: StrParseKind::MissingComponent {
389                span: Some(span.into()),
390                message: SmolStr::new(expected),
391            },
392        }
393    }
394
395    /// Create an error for a regex validation failure
396    pub fn regex(spec: &'static str, source: &str, message: SmolStr) -> Self {
397        Self {
398            spec: SmolStr::new_static(spec),
399            source: source.to_string(),
400            kind: StrParseKind::RegexFail {
401                span: None,
402                message,
403            },
404        }
405    }
406}
407
408/// Kinds of parsing errors for AT Protocol string types
409#[derive(Debug, thiserror::Error, miette::Diagnostic)]
410pub enum StrParseKind {
411    /// Regex pattern validation failed
412    #[error("regex failure - {message}")]
413    #[diagnostic(code(jacquard::types::string::regex_fail))]
414    RegexFail {
415        /// Optional span highlighting the problem area
416        #[label]
417        span: Option<SourceSpan>,
418        /// Help message explaining the failure
419        #[help]
420        message: SmolStr,
421    },
422    /// String exceeds maximum allowed length
423    #[error("string too long (allowed: {max}, actual: {actual})")]
424    #[diagnostic(code(jacquard::types::string::wrong_length))]
425    TooLong {
426        /// Maximum allowed length
427        max: usize,
428        /// Actual string length
429        actual: usize,
430    },
431
432    /// String is below minimum required length
433    #[error("string too short (allowed: {min}, actual: {actual})")]
434    #[diagnostic(code(jacquard::types::string::wrong_length))]
435    TooShort {
436        /// Minimum required length
437        min: usize,
438        /// Actual string length
439        actual: usize,
440    },
441    /// String contains disallowed values
442    #[error("disallowed - {message}")]
443    #[diagnostic(code(jacquard::types::string::disallowed))]
444    Disallowed {
445        /// Optional span highlighting the disallowed content
446        #[label]
447        problem: Option<SourceSpan>,
448        /// Help message about what's disallowed
449        #[help]
450        message: SmolStr,
451    },
452    /// Required component is missing
453    #[error("missing - {message}")]
454    #[diagnostic(code(jacquard::atstr::missing_component))]
455    MissingComponent {
456        /// Optional span where the component should be
457        #[label]
458        span: Option<SourceSpan>,
459        /// Help message about what's missing
460        #[help]
461        message: SmolStr,
462    },
463    /// Wraps another error with additional context
464    #[error("{err:?}")]
465    #[diagnostic(code(jacquard::atstr::inner))]
466    Wrap {
467        /// Optional span in the outer context
468        #[label]
469        span: Option<SourceSpan>,
470        /// The wrapped inner error
471        #[source]
472        err: Arc<AtStrError>,
473    },
474}