mail_core/
iri.rs

1use std::{
2    str::FromStr
3};
4
5#[cfg(feature="serde")]
6use std::fmt;
7#[cfg(feature="serde")]
8use serde::{
9    ser::{Serialize, Serializer},
10    de::{self, Deserialize, Deserializer, Visitor}
11};
12
13//TODO consider adding a str_context
14#[derive(Copy, Clone, Debug, Fail)]
15#[fail(display = "invalid syntax for iri/uri scheme")]
16pub struct InvalidIRIScheme;
17
18/// A minimal IRI (International Resource Identifier) implementation which just
19/// parses the scheme but no scheme specific part (and neither fragments wrt.
20/// those definitions in which fragments are not scheme specific parts).
21///
22/// **This implementation does not perform any form of normalization or other
23/// IRI specific aspects, it's basically just a String split into two parts.**
24///
25/// Additionally this implementations requires all URI to be valid utf8.
26///
27/// # Example
28///
29/// ```
30/// # use mail_core::IRI;
31/// let uri = IRI::new("file:/random/logo.png").unwrap();
32/// assert_eq!(uri.scheme(), "file");
33/// assert_eq!(uri.tail(), "/random/logo.png");
34/// ```
35#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
36pub struct IRI {
37    iri: String,
38    scheme_end_idx: usize
39}
40
41impl IRI {
42
43    /// Create a new IRI from a scheme part and a tail part.
44    ///
45    /// This will convert the scheme part into lower case before
46    /// using it.
47    pub fn from_parts(scheme: &str, tail: &str) -> Result<Self, InvalidIRIScheme> {
48        Self::validate_scheme(scheme)?;
49        let scheme_len = scheme.len();
50        let mut buffer = String::with_capacity(scheme_len + 1 + tail.len());
51        for ch in scheme.chars() {
52            let ch = ch.to_ascii_lowercase();
53            buffer.push(ch);
54        }
55        buffer.push(':');
56        buffer.push_str(tail);
57        Ok(IRI {
58            iri: buffer,
59            scheme_end_idx: scheme_len
60        })
61    }
62
63    /// crates a new a IRI
64    ///
65    /// 1. this determines the first occurrence of `:` to split the input into scheme and tail
66    /// 2. it validates that the scheme name is [RFC 3986](https://tools.ietf.org/html/rfc3986)
67    ///    compatible, i.e. is ascii, starting with a letter followed by alpha numeric characters
68    ///    (or `"+"`,`"-"`,`"."`).
69    /// 3. converts the scheme part to lower case
70    pub fn new<I>(iri: I) -> Result<Self, InvalidIRIScheme>
71        where I: Into<String>
72    {
73        let mut buffer = iri.into();
74        let split_pos = buffer.bytes().position(|b| b == b':')
75            //TODO error type
76            .ok_or_else(|| InvalidIRIScheme)?;
77        {
78            let scheme = &mut buffer[..split_pos];
79            {
80                Self::validate_scheme(scheme)?;
81            }
82
83            scheme.make_ascii_lowercase();
84        }
85
86        Ok(IRI {
87            iri: buffer,
88            scheme_end_idx: split_pos
89        })
90    }
91
92    fn validate_scheme(scheme: &str) -> Result<(), InvalidIRIScheme> {
93        let mut iter = scheme.bytes();
94        let valid = iter.next()
95            .map(|bch| bch.is_ascii_alphabetic()).unwrap_or(false)
96            && iter.all(|bch|
97                bch.is_ascii_alphanumeric() || bch == b'+' || bch == b'-' || bch == b'.');
98
99        if !valid {
100            return Err(InvalidIRIScheme);
101        }
102        Ok(())
103    }
104
105    /// Creates a new IRI with the same schema but a different tail.
106    pub fn with_tail(&self, new_tail: &str) -> Self {
107        IRI::from_parts(self.scheme(), new_tail)
108            .unwrap()
109    }
110
111    /// The scheme part of the uri excluding the `:` seperator.
112    ///
113    /// The scheme is guaranteed to be lower case.
114    ///
115    /// # Example
116    ///
117    /// ```
118    /// # use mail_core::IRI;
119    /// let uri = IRI::new("file:///opt/share/logo.png").unwrap();
120    /// assert_eq!(uri.scheme(), "file");
121    /// ```
122    pub fn scheme(&self) -> &str {
123        &self.iri[..self.scheme_end_idx]
124    }
125
126    /// the scheme specific part of the uri
127    ///
128    /// # Example
129    ///
130    /// ```
131    /// # use mail_core::IRI;
132    /// let uri = IRI::new("file:///opt/share/logo.png").unwrap();
133    /// assert_eq!(uri.scheme(), "file");
134    /// ```
135    pub fn tail(&self) -> &str {
136        &self.iri[self.scheme_end_idx+1..]
137    }
138
139    /// returns the underlying string representation
140    ///
141    /// Note that it does not implement Display even through
142    /// it implements `as_str` and `Into<String>` as displaying
143    /// a IRI is more complex then just displaying a string (mainly due to
144    /// bidirectional IRI's).
145    pub fn as_str(&self) -> &str {
146        &self.iri
147    }
148}
149
150impl FromStr for IRI {
151    type Err = InvalidIRIScheme;
152
153    fn from_str(inp: &str) -> Result<Self, Self::Err> {
154        IRI::new(inp)
155    }
156}
157
158impl Into<String> for IRI {
159    fn into(self) -> String {
160        self.iri
161    }
162}
163
164#[cfg(feature="serde")]
165impl Serialize for IRI {
166    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
167        where S: Serializer
168    {
169        serializer.serialize_str(self.as_str())
170    }
171}
172
173#[cfg(feature="serde")]
174impl<'de> Deserialize<'de> for IRI {
175
176    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
177        where D: Deserializer<'de>
178    {
179        struct IRIVisitor;
180        impl<'de> Visitor<'de> for IRIVisitor {
181            type Value = IRI;
182
183            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
184                write!(formatter, "a string representing a IRI")
185            }
186
187            fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
188            where
189                E: de::Error,
190            {
191                let iri = s.parse()
192                    .map_err(|err| E::custom(err))?;
193
194                Ok(iri)
195            }
196        }
197
198        deserializer.deserialize_str(IRIVisitor)
199    }
200}
201
202#[cfg(test)]
203mod test {
204    use super::IRI;
205
206    #[test]
207    fn split_correctly_excluding_colon() {
208        let uri = IRI::new("scheme:other:parts/yeha?z=r#frak").unwrap();
209        assert_eq!(uri.scheme(), "scheme");
210        assert_eq!(uri.tail(), "other:parts/yeha?z=r#frak");
211        assert_eq!(uri.as_str(), "scheme:other:parts/yeha?z=r#frak");
212    }
213
214    #[test]
215    fn scheme_is_lowercase() {
216        let uri = IRI::new("FILE:///opt/share/logo.png").unwrap();
217        assert_eq!(uri.scheme(), "file");
218        assert_eq!(uri.as_str(), "file:///opt/share/logo.png");
219    }
220
221    #[test]
222    fn scheme_name_has_to_be_valid() {
223        // empty scheme
224        assert!(IRI::new(":ups").is_err());
225        // starting with numeric scheme
226        assert!(IRI::new("1aim.path:/logo").is_err());
227        // schme with invalid chars
228        assert!(IRI::new("g ap:ups").is_err());
229        assert!(IRI::new("s{trang}e:ups").is_err());
230
231        // some strange but valid names
232        assert!(IRI::new("c++:is valid").is_ok());
233        assert!(IRI::new("c1+-.:is valid").is_ok());
234    }
235
236
237    #[test]
238    fn scheme_is_always_lower_case() {
239        let iri = IRI::new("FoO:bAr").unwrap();
240        assert_eq!(iri.scheme(), "foo");
241        assert_eq!(iri.tail(), "bAr");
242
243        let iri = IRI::from_parts("FoO", "bAr").unwrap();
244        assert_eq!(iri.scheme(), "foo");
245        assert_eq!(iri.tail(), "bAr");
246    }
247
248    #[test]
249    fn replacing_tail_does_that() {
250        let iri = IRI::new("foo:bar/bazz").unwrap();
251        let new_iri = iri.with_tail("zoobar");
252
253        assert_eq!(new_iri.as_str(), "foo:zoobar");
254        assert_eq!(iri.as_str(), "foo:bar/bazz");
255    }
256
257    #[cfg(feature="serde")]
258    #[test]
259    fn serde_works_for_str_iri() {
260        use serde_test::{Token, assert_tokens, assert_de_tokens};
261
262        let iri: IRI = "path:./my/joke.txt".parse().unwrap();
263
264        assert_tokens(&iri, &[
265            Token::Str("path:./my/joke.txt")
266        ]);
267
268        assert_de_tokens(&iri, &[
269            Token::String("path:./my/joke.txt"),
270        ]);
271
272        assert_de_tokens(&iri, &[
273            Token::BorrowedStr("path:./my/joke.txt"),
274        ]);
275    }
276}