xso/
text.rs

1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! Module containing implementations for conversions to/from XML text.
8
9#[cfg(feature = "base64")]
10use core::marker::PhantomData;
11
12use std::borrow::Cow;
13
14use crate::{error::Error, AsXmlText, FromXmlText};
15
16#[cfg(feature = "base64")]
17use base64::engine::{general_purpose::STANDARD as StandardBase64Engine, Engine as _};
18
19macro_rules! convert_via_fromstr_and_display {
20    ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
21        $(
22            $(
23                #[cfg $cfg]
24                #[cfg_attr(docsrs, doc(cfg $cfg))]
25            )?
26            impl FromXmlText for $t {
27                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
28                fn from_xml_text(s: String) -> Result<Self, Error> {
29                    s.parse().map_err(Error::text_parse_error)
30                }
31            }
32
33            $(
34                #[cfg $cfg]
35                #[cfg_attr(docsrs, doc(cfg $cfg))]
36            )?
37            impl AsXmlText for $t {
38                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
39                fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
40                    Ok(Cow::Owned(self.to_string()))
41                }
42            }
43        )+
44    }
45}
46
47/// This provides an implementation compliant with xsd::bool.
48impl FromXmlText for bool {
49    /// Parse a boolean from XML text.
50    ///
51    /// The values `"1"` and `"true"` are considered true. The values `"0"`
52    /// and `"false"` are considered `false`. Any other value is invalid and
53    /// will return an error.
54    fn from_xml_text(s: String) -> Result<Self, Error> {
55        match s.as_str() {
56            "1" => "true",
57            "0" => "false",
58            other => other,
59        }
60        .parse()
61        .map_err(Error::text_parse_error)
62    }
63}
64
65/// This provides an implementation compliant with xsd::bool.
66impl AsXmlText for bool {
67    /// Convert a boolean to XML text.
68    ///
69    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
70    /// This implementation never fails.
71    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
72        match self {
73            true => Ok(Cow::Borrowed("true")),
74            false => Ok(Cow::Borrowed("false")),
75        }
76    }
77}
78
79convert_via_fromstr_and_display! {
80    u8,
81    u16,
82    u32,
83    u64,
84    u128,
85    usize,
86    i8,
87    i16,
88    i32,
89    i64,
90    i128,
91    isize,
92    f32,
93    f64,
94    char,
95    std::net::IpAddr,
96    std::net::Ipv4Addr,
97    std::net::Ipv6Addr,
98    std::net::SocketAddr,
99    std::net::SocketAddrV4,
100    std::net::SocketAddrV6,
101    std::num::NonZeroU8,
102    std::num::NonZeroU16,
103    std::num::NonZeroU32,
104    std::num::NonZeroU64,
105    std::num::NonZeroU128,
106    std::num::NonZeroUsize,
107    std::num::NonZeroI8,
108    std::num::NonZeroI16,
109    std::num::NonZeroI32,
110    std::num::NonZeroI64,
111    std::num::NonZeroI128,
112    std::num::NonZeroIsize,
113
114    #[cfg(feature = "uuid")]
115    uuid::Uuid,
116
117    #[cfg(feature = "jid")]
118    jid::Jid,
119    #[cfg(feature = "jid")]
120    jid::FullJid,
121    #[cfg(feature = "jid")]
122    jid::BareJid,
123}
124
125/// Represent a way to encode/decode text data into a Rust type.
126///
127/// This trait can be used in scenarios where implementing [`FromXmlText`]
128/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
129/// following:
130///
131/// 1. The type originates in a foreign crate, preventing the implementation
132///    of foreign traits.
133///
134/// 2. There is more than one way to convert a value to/from XML.
135///
136/// The codec to use for a text can be specified in the attributes understood
137/// by `FromXml` and `AsXml` derive macros. See the documentation of the
138/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
139pub trait TextCodec<T> {
140    /// Decode a string value into the type.
141    fn decode(s: String) -> Result<T, Error>;
142
143    /// Encode the type as string value.
144    ///
145    /// If this returns `None`, the string value is not emitted at all.
146    fn encode(value: &T) -> Result<Option<Cow<'_, str>>, Error>;
147}
148
149/// Text codec which does no transform.
150pub struct Plain;
151
152impl TextCodec<String> for Plain {
153    fn decode(s: String) -> Result<String, Error> {
154        Ok(s)
155    }
156
157    fn encode(value: &String) -> Result<Option<Cow<'_, str>>, Error> {
158        Ok(Some(Cow::Borrowed(value.as_str())))
159    }
160}
161
162/// Text codec which returns None instead of the empty string.
163pub struct EmptyAsNone;
164
165impl TextCodec<Option<String>> for EmptyAsNone {
166    fn decode(s: String) -> Result<Option<String>, Error> {
167        if s.is_empty() {
168            Ok(None)
169        } else {
170            Ok(Some(s))
171        }
172    }
173
174    fn encode(value: &Option<String>) -> Result<Option<Cow<'_, str>>, Error> {
175        Ok(match value.as_ref() {
176            Some(v) if !v.is_empty() => Some(Cow::Borrowed(v.as_str())),
177            Some(_) | None => None,
178        })
179    }
180}
181
182/// Text codec which returns None instead of the empty string.
183pub struct EmptyAsError;
184
185impl TextCodec<String> for EmptyAsError {
186    fn decode(s: String) -> Result<String, Error> {
187        if s.is_empty() {
188            Err(Error::Other("Empty text node."))
189        } else {
190            Ok(s)
191        }
192    }
193
194    fn encode(value: &String) -> Result<Option<Cow<'_, str>>, Error> {
195        if value.is_empty() {
196            Err(Error::Other("Empty text node."))
197        } else {
198            Ok(Some(Cow::Borrowed(value.as_str())))
199        }
200    }
201}
202
203/// Trait for preprocessing text data from XML.
204///
205/// This may be used by codecs to allow to customize some of their behaviour.
206pub trait TextFilter {
207    /// Process the incoming string and return the result of the processing.
208    fn preprocess(s: String) -> String;
209}
210
211/// Text preprocessor which returns the input unchanged.
212pub struct NoFilter;
213
214impl TextFilter for NoFilter {
215    fn preprocess(s: String) -> String {
216        s
217    }
218}
219
220/// Text preprocessor to remove all whitespace.
221pub struct StripWhitespace;
222
223impl TextFilter for StripWhitespace {
224    fn preprocess(s: String) -> String {
225        let s: String = s
226            .chars()
227            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
228            .collect();
229        s
230    }
231}
232
233/// Text codec transforming text to binary using standard base64.
234///
235/// The `Filter` type argument can be used to employ additional preprocessing
236/// of incoming text data. Most interestingly, passing [`StripWhitespace`]
237/// will make the implementation ignore any whitespace within the text.
238#[cfg(feature = "base64")]
239#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
240pub struct Base64<Filter: TextFilter = NoFilter>(PhantomData<Filter>);
241
242#[cfg(feature = "base64")]
243#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
244impl<Filter: TextFilter> TextCodec<Vec<u8>> for Base64<Filter> {
245    fn decode(s: String) -> Result<Vec<u8>, Error> {
246        let value = Filter::preprocess(s);
247        StandardBase64Engine
248            .decode(value.as_bytes())
249            .map_err(Error::text_parse_error)
250    }
251
252    fn encode(value: &Vec<u8>) -> Result<Option<Cow<'_, str>>, Error> {
253        Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
254    }
255}
256
257#[cfg(feature = "base64")]
258#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
259impl<'x, Filter: TextFilter> TextCodec<Cow<'x, [u8]>> for Base64<Filter> {
260    fn decode(s: String) -> Result<Cow<'x, [u8]>, Error> {
261        let value = Filter::preprocess(s);
262        StandardBase64Engine
263            .decode(value.as_bytes())
264            .map_err(Error::text_parse_error)
265            .map(Cow::Owned)
266    }
267
268    fn encode<'a>(value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
269        Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
270    }
271}
272
273#[cfg(feature = "base64")]
274#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
275impl<T, Filter: TextFilter> TextCodec<Option<T>> for Base64<Filter>
276where
277    Base64<Filter>: TextCodec<T>,
278{
279    fn decode(s: String) -> Result<Option<T>, Error> {
280        if s.is_empty() {
281            return Ok(None);
282        }
283        Ok(Some(Self::decode(s)?))
284    }
285
286    fn encode(decoded: &Option<T>) -> Result<Option<Cow<'_, str>>, Error> {
287        decoded
288            .as_ref()
289            .map(Self::encode)
290            .transpose()
291            .map(Option::flatten)
292    }
293}
294
295/// Text codec transforming text to binary using hexadecimal nibbles.
296///
297/// The length must be known at compile-time.
298pub struct FixedHex<const N: usize>;
299
300impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
301    fn decode(s: String) -> Result<[u8; N], Error> {
302        if s.len() != 2 * N {
303            return Err(Error::Other("Invalid length"));
304        }
305
306        let mut bytes = [0u8; N];
307        for i in 0..N {
308            bytes[i] =
309                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
310        }
311
312        Ok(bytes)
313    }
314
315    fn encode(value: &[u8; N]) -> Result<Option<Cow<'_, str>>, Error> {
316        let mut bytes = String::with_capacity(N * 2);
317        for byte in value {
318            bytes.extend(format!("{:02x}", byte).chars());
319        }
320        Ok(Some(Cow::Owned(bytes)))
321    }
322}
323
324impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
325where
326    FixedHex<N>: TextCodec<T>,
327{
328    fn decode(s: String) -> Result<Option<T>, Error> {
329        if s.is_empty() {
330            return Ok(None);
331        }
332        Ok(Some(Self::decode(s)?))
333    }
334
335    fn encode(decoded: &Option<T>) -> Result<Option<Cow<'_, str>>, Error> {
336        decoded
337            .as_ref()
338            .map(Self::encode)
339            .transpose()
340            .map(Option::flatten)
341    }
342}
343
344/// Text codec for colon-separated bytes of uppercase hexadecimal.
345pub struct ColonSeparatedHex;
346
347impl TextCodec<Vec<u8>> for ColonSeparatedHex {
348    fn decode(s: String) -> Result<Vec<u8>, Error> {
349        assert_eq!((s.len() + 1) % 3, 0);
350        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
351        for i in 0..(1 + s.len()) / 3 {
352            let byte =
353                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
354            if 3 * i + 2 < s.len() {
355                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
356            }
357            bytes.push(byte);
358        }
359        Ok(bytes)
360    }
361
362    fn encode(decoded: &Vec<u8>) -> Result<Option<Cow<'_, str>>, Error> {
363        // TODO: Super inefficient!
364        let mut bytes = Vec::with_capacity(decoded.len());
365        for byte in decoded {
366            bytes.push(format!("{:02X}", byte));
367        }
368        Ok(Some(Cow::Owned(bytes.join(":"))))
369    }
370}