sfv/
lib.rs

1/*!
2`sfv` is an implementation of *Structured Field Values for HTTP*, as specified in [RFC 9651](https://httpwg.org/specs/rfc9651.html) for parsing and serializing HTTP field values.
3It also exposes a set of types that might be useful for defining new structured fields.
4
5# Data Structures
6
7There are three types of structured fields:
8
9- `Item` -- an `Integer`, `Decimal`, `String`, `Token`, `Byte Sequence`, `Boolean`, `Date`, or `Display String`. It can have associated `Parameters`.
10- `List` -- an array of zero or more members, each of which can be an `Item` or an `InnerList`, both of which can have `Parameters`.
11- `Dictionary` -- an ordered map of name-value pairs, where the names are short textual strings and the values are `Item`s or arrays of `Items` (represented with `InnerList`), both of which can have associated parameters. There can be zero or more members, and their names are unique in the scope of the `Dictionary` they occur within.
12
13There are also a few lower-level types used to construct structured field values:
14- `BareItem` is used as `Item`'s value or as a parameter value in `Parameters`.
15- `Parameters` are an ordered map of key-value pairs that are associated with an `Item` or `InnerList`. The keys are unique within the scope the `Parameters` they occur within, and the values are `BareItem`.
16- `InnerList` is an array of zero or more `Items`. Can have associated `Parameters`.
17- `ListEntry` represents either `Item` or `InnerList` as a member of `List` or as member-value in `Dictionary`.
18
19# Examples
20
21*/
22#![cfg_attr(
23    feature = "parsed-types",
24    doc = r##"
25### Parsing
26
27```
28# use sfv::{Dictionary, Item, List, Parser};
29# fn main() -> Result<(), sfv::Error> {
30// Parsing a structured field value of Item type.
31let input = "12.445;foo=bar";
32let item: Item = Parser::new(input).parse()?;
33println!("{:#?}", item);
34
35// Parsing a structured field value of List type.
36let input = r#"1;a=tok, ("foo" "bar");baz, ()"#;
37let list: List = Parser::new(input).parse()?;
38println!("{:#?}", list);
39
40// Parsing a structured field value of Dictionary type.
41let input = "a=?0, b, c; foo=bar, rating=1.5, fruits=(apple pear)";
42let dict: Dictionary = Parser::new(input).parse()?;
43println!("{:#?}", dict);
44# Ok(())
45# }
46```
47
48### Getting Parsed Value Members
49```
50# use sfv::*;
51# fn main() -> Result<(), sfv::Error> {
52let input = "u=2, n=(* foo 2)";
53let dict: Dictionary = Parser::new(input).parse()?;
54
55match dict.get("u") {
56    Some(ListEntry::Item(item)) => match &item.bare_item {
57        BareItem::Token(val) => { /* ... */ }
58        BareItem::Integer(val) => { /* ... */ }
59        BareItem::Boolean(val) => { /* ... */ }
60        BareItem::Decimal(val) => { /* ... */ }
61        BareItem::String(val) => { /* ... */ }
62        BareItem::ByteSequence(val) => { /* ... */ }
63        BareItem::Date(val) => { /* ... */ }
64        BareItem::DisplayString(val) => { /* ... */ }
65    },
66    Some(ListEntry::InnerList(inner_list)) => { /* ... */ }
67    None => { /* ... */ }
68}
69# Ok(())
70# }
71```
72"##
73)]
74/*!
75### Serialization
76Serializes an `Item`:
77```
78use sfv::{Decimal, ItemSerializer, KeyRef, StringRef};
79
80# fn main() -> Result<(), sfv::Error> {
81let serialized_item = ItemSerializer::new()
82    .bare_item(StringRef::from_str("foo")?)
83    .parameter(KeyRef::from_str("key")?, Decimal::try_from(13.45655)?)
84    .finish();
85
86assert_eq!(serialized_item, r#""foo";key=13.457"#);
87# Ok(())
88# }
89```
90
91Serializes a `List`:
92```
93use sfv::{KeyRef, ListSerializer, StringRef, TokenRef};
94
95# fn main() -> Result<(), sfv::Error> {
96let mut ser = ListSerializer::new();
97
98ser.bare_item(TokenRef::from_str("tok")?);
99
100{
101    let mut ser = ser.inner_list();
102
103    ser.bare_item(99).parameter(KeyRef::from_str("key")?, false);
104
105    ser.bare_item(StringRef::from_str("foo")?);
106
107    ser.finish().parameter(KeyRef::from_str("bar")?, true);
108}
109
110assert_eq!(
111    ser.finish().as_deref(),
112    Some(r#"tok, (99;key=?0 "foo");bar"#),
113);
114# Ok(())
115# }
116```
117
118Serializes a `Dictionary`:
119```
120use sfv::{DictSerializer, KeyRef, StringRef};
121
122# fn main() -> Result<(), sfv::Error> {
123let mut ser = DictSerializer::new();
124
125ser.bare_item(KeyRef::from_str("key1")?, StringRef::from_str("apple")?);
126
127ser.bare_item(KeyRef::from_str("key2")?, true);
128
129ser.bare_item(KeyRef::from_str("key3")?, false);
130
131assert_eq!(
132    ser.finish().as_deref(),
133    Some(r#"key1="apple", key2, key3=?0"#),
134);
135# Ok(())
136# }
137```
138
139# Crate features
140
141- `parsed-types` (enabled by default) -- When enabled, exposes fully owned types
142  `Item`, `Dictionary`, `List`, and their components, which can be obtained from
143  `Parser::parse_item`, etc. These types are implemented using the
144  [`indexmap`](https://crates.io/crates/indexmap) crate, so disabling this
145  feature can avoid that dependency if parsing using a visitor
146  ([`Parser::parse_item_with_visitor`], etc.) is sufficient.
147
148- `arbitrary` -- Implements the
149  [`Arbitrary`](https://docs.rs/arbitrary/1.4.1/arbitrary/trait.Arbitrary.html)
150  trait for this crate's types, making them easier to use with fuzzing.
151*/
152
153#![deny(missing_docs)]
154
155mod date;
156mod decimal;
157mod error;
158mod integer;
159mod key;
160#[cfg(feature = "parsed-types")]
161mod parsed;
162mod parser;
163mod ref_serializer;
164mod serializer;
165mod string;
166mod token;
167mod utils;
168pub mod visitor;
169
170#[cfg(test)]
171mod test_decimal;
172#[cfg(test)]
173mod test_integer;
174#[cfg(test)]
175mod test_key;
176#[cfg(test)]
177mod test_parser;
178#[cfg(test)]
179mod test_ref_serializer;
180#[cfg(test)]
181mod test_serializer;
182#[cfg(test)]
183mod test_string;
184#[cfg(test)]
185mod test_token;
186
187use std::borrow::{Borrow, Cow};
188use std::fmt;
189use std::string::String as StdString;
190
191pub use date::Date;
192pub use decimal::Decimal;
193pub use error::Error;
194pub use integer::{integer, Integer};
195pub use key::{key_ref, Key, KeyRef};
196#[cfg(feature = "parsed-types")]
197pub use parsed::{Dictionary, FieldType, InnerList, Item, List, ListEntry, Parameters};
198pub use parser::Parser;
199pub use ref_serializer::{
200    DictSerializer, InnerListSerializer, ItemSerializer, ListSerializer, ParameterSerializer,
201};
202pub use string::{string_ref, String, StringRef};
203pub use token::{token_ref, Token, TokenRef};
204
205type SFVResult<T> = std::result::Result<T, Error>;
206
207/// An abstraction over multiple kinds of ownership of a [bare item].
208///
209/// In general most users will be interested in:
210/// - [`BareItem`], for completely owned data
211/// - [`RefBareItem`], for completely borrowed data
212/// - [`BareItemFromInput`], for data borrowed from input when possible
213///
214/// [bare item]: <https://httpwg.org/specs/9651.html#item>
215#[derive(Debug, Clone, Copy)]
216#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
217pub enum GenericBareItem<S, B, T, D> {
218    /// A [decimal](https://httpwg.org/specs/rfc9651.html#decimal).
219    // sf-decimal  = ["-"] 1*12DIGIT "." 1*3DIGIT
220    Decimal(Decimal),
221    /// An [integer](https://httpwg.org/specs/rfc9651.html#integer).
222    // sf-integer = ["-"] 1*15DIGIT
223    Integer(Integer),
224    /// A [string](https://httpwg.org/specs/rfc9651.html#string).
225    // sf-string = DQUOTE *chr DQUOTE
226    // chr       = unescaped / escaped
227    // unescaped = %x20-21 / %x23-5B / %x5D-7E
228    // escaped   = "\" ( DQUOTE / "\" )
229    String(S),
230    /// A [byte sequence](https://httpwg.org/specs/rfc9651.html#binary).
231    // ":" *(base64) ":"
232    // base64    = ALPHA / DIGIT / "+" / "/" / "="
233    ByteSequence(B),
234    /// A [boolean](https://httpwg.org/specs/rfc9651.html#boolean).
235    // sf-boolean = "?" boolean
236    // boolean    = "0" / "1"
237    Boolean(bool),
238    /// A [token](https://httpwg.org/specs/rfc9651.html#token).
239    // sf-token = ( ALPHA / "*" ) *( tchar / ":" / "/" )
240    Token(T),
241    /// A [date](https://httpwg.org/specs/rfc9651.html#date).
242    ///
243    /// [`Parser`] will never produce this variant when used with
244    /// [`Version::Rfc8941`].
245    // sf-date = "@" sf-integer
246    Date(Date),
247    /// A [display string](https://httpwg.org/specs/rfc9651.html#displaystring).
248    ///
249    /// Display Strings are similar to [`String`]s, in that they consist of zero
250    /// or more characters, but they allow Unicode scalar values (i.e., all
251    /// Unicode code points except for surrogates), unlike [`String`]s.
252    ///
253    /// [`Parser`] will never produce this variant when used with
254    /// [`Version::Rfc8941`].
255    ///
256    /// [display string]: <https://httpwg.org/specs/rfc9651.html#displaystring>
257    // sf-displaystring = "%" DQUOTE *( unescaped / "\" / pct-encoded ) DQUOTE
258    // pct-encoded      = "%" lc-hexdig lc-hexdig
259    // lc-hexdig        = DIGIT / %x61-66 ; 0-9, a-f
260    DisplayString(D),
261}
262
263impl<S, B, T, D> GenericBareItem<S, B, T, D> {
264    /// If the bare item is a decimal, returns it; otherwise returns `None`.
265    #[must_use]
266    pub fn as_decimal(&self) -> Option<Decimal> {
267        match *self {
268            Self::Decimal(val) => Some(val),
269            _ => None,
270        }
271    }
272
273    /// If the bare item is an integer, returns it; otherwise returns `None`.
274    #[must_use]
275    pub fn as_integer(&self) -> Option<Integer> {
276        match *self {
277            Self::Integer(val) => Some(val),
278            _ => None,
279        }
280    }
281
282    /// If the bare item is a string, returns a reference to it; otherwise returns `None`.
283    #[must_use]
284    pub fn as_string(&self) -> Option<&StringRef>
285    where
286        S: Borrow<StringRef>,
287    {
288        match *self {
289            Self::String(ref val) => Some(val.borrow()),
290            _ => None,
291        }
292    }
293
294    /// If the bare item is a byte sequence, returns a reference to it; otherwise returns `None`.
295    #[must_use]
296    pub fn as_byte_sequence(&self) -> Option<&[u8]>
297    where
298        B: Borrow<[u8]>,
299    {
300        match *self {
301            Self::ByteSequence(ref val) => Some(val.borrow()),
302            _ => None,
303        }
304    }
305
306    /// If the bare item is a boolean, returns it; otherwise returns `None`.
307    #[must_use]
308    pub fn as_boolean(&self) -> Option<bool> {
309        match *self {
310            Self::Boolean(val) => Some(val),
311            _ => None,
312        }
313    }
314
315    /// If the bare item is a token, returns a reference to it; otherwise returns `None`.
316    #[must_use]
317    pub fn as_token(&self) -> Option<&TokenRef>
318    where
319        T: Borrow<TokenRef>,
320    {
321        match *self {
322            Self::Token(ref val) => Some(val.borrow()),
323            _ => None,
324        }
325    }
326
327    /// If the bare item is a date, returns it; otherwise returns `None`.
328    #[must_use]
329    pub fn as_date(&self) -> Option<Date> {
330        match *self {
331            Self::Date(val) => Some(val),
332            _ => None,
333        }
334    }
335
336    /// If the bare item is a display string, returns a reference to it; otherwise returns `None`.
337    #[must_use]
338    pub fn as_display_string(&self) -> Option<&D> {
339        match *self {
340            Self::DisplayString(ref val) => Some(val),
341            _ => None,
342        }
343    }
344}
345
346impl<S, B, T, D> From<Integer> for GenericBareItem<S, B, T, D> {
347    fn from(val: Integer) -> Self {
348        Self::Integer(val)
349    }
350}
351
352impl<S, B, T, D> From<bool> for GenericBareItem<S, B, T, D> {
353    fn from(val: bool) -> Self {
354        Self::Boolean(val)
355    }
356}
357
358impl<S, B, T, D> From<Decimal> for GenericBareItem<S, B, T, D> {
359    fn from(val: Decimal) -> Self {
360        Self::Decimal(val)
361    }
362}
363
364impl<S, B, T, D> From<Date> for GenericBareItem<S, B, T, D> {
365    fn from(val: Date) -> Self {
366        Self::Date(val)
367    }
368}
369
370impl<S, B, T, D> TryFrom<f32> for GenericBareItem<S, B, T, D> {
371    type Error = Error;
372
373    fn try_from(val: f32) -> Result<Self, Error> {
374        Decimal::try_from(val).map(Self::Decimal)
375    }
376}
377
378impl<S, B, T, D> TryFrom<f64> for GenericBareItem<S, B, T, D> {
379    type Error = Error;
380
381    fn try_from(val: f64) -> Result<Self, Error> {
382        Decimal::try_from(val).map(Self::Decimal)
383    }
384}
385
386impl<S, T, D> From<Vec<u8>> for GenericBareItem<S, Vec<u8>, T, D> {
387    fn from(val: Vec<u8>) -> Self {
388        Self::ByteSequence(val)
389    }
390}
391
392impl<S, B, D> From<Token> for GenericBareItem<S, B, Token, D> {
393    fn from(val: Token) -> Self {
394        Self::Token(val)
395    }
396}
397
398impl<B, T, D> From<String> for GenericBareItem<String, B, T, D> {
399    fn from(val: String) -> Self {
400        Self::String(val)
401    }
402}
403
404impl<'a, S, T, D> From<&'a [u8]> for GenericBareItem<S, Vec<u8>, T, D> {
405    fn from(val: &'a [u8]) -> Self {
406        Self::ByteSequence(val.to_owned())
407    }
408}
409
410impl<'a, S, B, D> From<&'a TokenRef> for GenericBareItem<S, B, Token, D> {
411    fn from(val: &'a TokenRef) -> Self {
412        Self::Token(val.to_owned())
413    }
414}
415
416impl<'a, B, T, D> From<&'a StringRef> for GenericBareItem<String, B, T, D> {
417    fn from(val: &'a StringRef) -> Self {
418        Self::String(val.to_owned())
419    }
420}
421
422#[derive(Debug, PartialEq)]
423pub(crate) enum Num {
424    Decimal(Decimal),
425    Integer(Integer),
426}
427
428/// A [bare item] that owns its data.
429///
430/// [bare item]: <https://httpwg.org/specs/rfc9651.html#item>
431#[cfg_attr(
432    feature = "parsed-types",
433    doc = "Used to construct an [`Item`] or [`Parameters`] values."
434)]
435///
436/// Note: This type deliberately does not implement `From<StdString>` as a
437/// shorthand for [`BareItem::DisplayString`] because it is too easy to confuse
438/// with conversions from [`String`]:
439///
440/// ```compile_fail
441/// # use sfv::BareItem;
442/// let _: BareItem = "x".to_owned().into();
443/// ```
444///
445/// Instead, use:
446///
447/// ```
448/// # use sfv::BareItem;
449/// let _ = BareItem::DisplayString("x".to_owned());
450/// ```
451pub type BareItem = GenericBareItem<String, Vec<u8>, Token, StdString>;
452
453/// A [bare item] that borrows its data.
454///
455/// Used to serialize values via [`ItemSerializer`], [`ListSerializer`], and [`DictSerializer`].
456///
457/// [bare item]: <https://httpwg.org/specs/rfc9651.html#item>
458///
459/// Note: This type deliberately does not implement `From<&str>` as a shorthand
460/// for [`RefBareItem::DisplayString`] because it is too easy to confuse with
461/// conversions from [`StringRef`]:
462///
463/// ```compile_fail
464/// # use sfv::RefBareItem;
465/// let _: RefBareItem = "x".into();
466/// ```
467///
468/// Instead, use:
469///
470/// ```
471/// # use sfv::RefBareItem;
472/// let _ = RefBareItem::DisplayString("x");
473/// ```
474pub type RefBareItem<'a> = GenericBareItem<&'a StringRef, &'a [u8], &'a TokenRef, &'a str>;
475
476/// A [bare item] that borrows data from input when possible.
477///
478/// Used to parse input incrementally in the [`visitor`] module.
479///
480/// [bare item]: <https://httpwg.org/specs/rfc9651.html#item>
481///
482/// Note: This type deliberately does not implement `From<Cow<str>>` as a
483/// shorthand for [`BareItemFromInput::DisplayString`] because it is too easy to
484/// confuse with conversions from [`Cow<StringRef>`]:
485///
486/// ```compile_fail
487/// # use sfv::BareItemFromInput;
488/// # use std::borrow::Cow;
489/// let _: BareItemFromInput = "x".to_owned().into();
490/// ```
491///
492/// Instead, use:
493///
494/// ```
495/// # use sfv::BareItemFromInput;
496/// # use std::borrow::Cow;
497/// let _ = BareItemFromInput::DisplayString(Cow::Borrowed("x"));
498/// ```
499pub type BareItemFromInput<'a> =
500    GenericBareItem<Cow<'a, StringRef>, Vec<u8>, &'a TokenRef, Cow<'a, str>>;
501
502impl<'a, S, B, T, D> From<&'a GenericBareItem<S, B, T, D>> for RefBareItem<'a>
503where
504    S: Borrow<StringRef>,
505    B: Borrow<[u8]>,
506    T: Borrow<TokenRef>,
507    D: Borrow<str>,
508{
509    fn from(val: &'a GenericBareItem<S, B, T, D>) -> RefBareItem<'a> {
510        match val {
511            GenericBareItem::Integer(val) => RefBareItem::Integer(*val),
512            GenericBareItem::Decimal(val) => RefBareItem::Decimal(*val),
513            GenericBareItem::String(val) => RefBareItem::String(val.borrow()),
514            GenericBareItem::ByteSequence(val) => RefBareItem::ByteSequence(val.borrow()),
515            GenericBareItem::Boolean(val) => RefBareItem::Boolean(*val),
516            GenericBareItem::Token(val) => RefBareItem::Token(val.borrow()),
517            GenericBareItem::Date(val) => RefBareItem::Date(*val),
518            GenericBareItem::DisplayString(val) => RefBareItem::DisplayString(val.borrow()),
519        }
520    }
521}
522
523impl<'a> From<BareItemFromInput<'a>> for BareItem {
524    fn from(val: BareItemFromInput<'a>) -> BareItem {
525        match val {
526            BareItemFromInput::Integer(val) => BareItem::Integer(val),
527            BareItemFromInput::Decimal(val) => BareItem::Decimal(val),
528            BareItemFromInput::String(val) => BareItem::String(val.into_owned()),
529            BareItemFromInput::ByteSequence(val) => BareItem::ByteSequence(val),
530            BareItemFromInput::Boolean(val) => BareItem::Boolean(val),
531            BareItemFromInput::Token(val) => BareItem::Token(val.to_owned()),
532            BareItemFromInput::Date(val) => BareItem::Date(val),
533            BareItemFromInput::DisplayString(val) => BareItem::DisplayString(val.into_owned()),
534        }
535    }
536}
537
538impl<'a> From<RefBareItem<'a>> for BareItem {
539    fn from(val: RefBareItem<'a>) -> BareItem {
540        match val {
541            RefBareItem::Integer(val) => BareItem::Integer(val),
542            RefBareItem::Decimal(val) => BareItem::Decimal(val),
543            RefBareItem::String(val) => BareItem::String(val.to_owned()),
544            RefBareItem::ByteSequence(val) => BareItem::ByteSequence(val.to_owned()),
545            RefBareItem::Boolean(val) => BareItem::Boolean(val),
546            RefBareItem::Token(val) => BareItem::Token(val.to_owned()),
547            RefBareItem::Date(val) => BareItem::Date(val),
548            RefBareItem::DisplayString(val) => BareItem::DisplayString(val.to_owned()),
549        }
550    }
551}
552
553impl<'a, S, T, D> From<&'a [u8]> for GenericBareItem<S, &'a [u8], T, D> {
554    fn from(val: &'a [u8]) -> Self {
555        Self::ByteSequence(val)
556    }
557}
558
559impl<'a, S, B, D> From<&'a Token> for GenericBareItem<S, B, &'a TokenRef, D> {
560    fn from(val: &'a Token) -> Self {
561        Self::Token(val)
562    }
563}
564
565impl<'a, S, B, D> From<&'a TokenRef> for GenericBareItem<S, B, &'a TokenRef, D> {
566    fn from(val: &'a TokenRef) -> Self {
567        Self::Token(val)
568    }
569}
570
571impl<'a, B, T, D> From<&'a String> for GenericBareItem<&'a StringRef, B, T, D> {
572    fn from(val: &'a String) -> Self {
573        Self::String(val)
574    }
575}
576
577impl<'a, B, T, D> From<&'a StringRef> for GenericBareItem<&'a StringRef, B, T, D> {
578    fn from(val: &'a StringRef) -> Self {
579        Self::String(val)
580    }
581}
582
583impl<S1, B1, T1, D1, S2, B2, T2, D2> PartialEq<GenericBareItem<S2, B2, T2, D2>>
584    for GenericBareItem<S1, B1, T1, D1>
585where
586    for<'a> RefBareItem<'a>: From<&'a Self>,
587    for<'a> RefBareItem<'a>: From<&'a GenericBareItem<S2, B2, T2, D2>>,
588{
589    fn eq(&self, other: &GenericBareItem<S2, B2, T2, D2>) -> bool {
590        match (RefBareItem::from(self), RefBareItem::from(other)) {
591            (RefBareItem::Integer(a), RefBareItem::Integer(b)) => a == b,
592            (RefBareItem::Decimal(a), RefBareItem::Decimal(b)) => a == b,
593            (RefBareItem::String(a), RefBareItem::String(b)) => a == b,
594            (RefBareItem::ByteSequence(a), RefBareItem::ByteSequence(b)) => a == b,
595            (RefBareItem::Boolean(a), RefBareItem::Boolean(b)) => a == b,
596            (RefBareItem::Token(a), RefBareItem::Token(b)) => a == b,
597            (RefBareItem::Date(a), RefBareItem::Date(b)) => a == b,
598            (RefBareItem::DisplayString(a), RefBareItem::DisplayString(b)) => a == b,
599            _ => false,
600        }
601    }
602}
603
604/// A version for serialized structured field values.
605///
606/// Each HTTP specification that uses structured field values must indicate
607/// which version it uses. See [the guidance from RFC 9651] for details.
608///
609/// [RFC 9651]: <https://httpwg.org/specs/rfc9651.html#using-new-structured-types-in-extensions>
610#[derive(Clone, Copy, Debug, PartialEq, Eq)]
611#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
612pub enum Version {
613    /// [RFC 8941], which does not support dates or display strings.
614    ///
615    /// [RFC 8941]: <https://httpwg.org/specs/rfc8941.html>
616    Rfc8941,
617    /// [RFC 9651], which supports dates and display strings.
618    ///
619    /// [RFC 9651]: <https://httpwg.org/specs/rfc9651.html>
620    Rfc9651,
621}
622
623impl fmt::Display for Version {
624    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
625        f.write_str(match self {
626            Self::Rfc8941 => "RFC 8941",
627            Self::Rfc9651 => "RFC 9651",
628        })
629    }
630}
631
632mod private {
633    #[allow(unused)]
634    pub trait Sealed {}
635}