utf7_imap/
lib.rs

1//! A Rust library for encoding and decoding [UTF-7](https://datatracker.ietf.org/doc/html/rfc2152) string as defined by the [IMAP](https://datatracker.ietf.org/doc/html/rfc3501) standard in [RFC 3501 (#5.1.3)](https://datatracker.ietf.org/doc/html/rfc3501#section-5.1.3).
2//!
3//! Idea is based on Python [mutf7](https://github.com/cheshire-mouse/mutf7) library.
4
5extern crate base64;
6extern crate encoding_rs;
7extern crate regex;
8
9use encoding_rs::UTF_16BE;
10use regex::{Captures, Regex};
11
12/// Encode UTF-7 IMAP mailbox name
13///
14/// <https://datatracker.ietf.org/doc/html/rfc3501#section-5.1.3>
15///
16/// # Usage:
17///
18/// ```
19/// use utf7_imap::encode_utf7_imap;
20///
21/// let test_string = String::from("Отправленные");
22/// assert_eq!(utf7_imap::encode_utf7_imap(test_string), "&BB4EQgQ,BEAEMAQyBDsENQQ9BD0ESwQ1-");
23/// ```
24pub fn encode_utf7_imap(text: String) -> String {
25    let mut result = "".to_string();
26    let text = text.replace('&', "&-");
27    let mut text = text.as_str();
28    while !text.is_empty() {
29        result = format!("{}{}", result, get_ascii(text));
30        text = remove_ascii(text);
31        if !text.is_empty() {
32            let tmp = get_nonascii(text);
33            result = format!("{}{}", result, encode_modified_utf7(tmp.to_string()));
34            text = remove_nonascii(text);
35        }
36    }
37    result
38}
39fn is_ascii_custom(c: u8) -> bool {
40    (0x20..=0x7f).contains(&c)
41}
42
43fn get_ascii(s: &str) -> &str {
44    let bytes = s.as_bytes();
45    for (i, &item) in bytes.iter().enumerate() {
46        if !is_ascii_custom(item) {
47            return &s[0..i];
48        }
49    }
50    s
51}
52
53fn get_nonascii(s: &str) -> &str {
54    let bytes = s.as_bytes();
55    for (i, &item) in bytes.iter().enumerate() {
56        if is_ascii_custom(item) {
57            return &s[0..i];
58        }
59    }
60    s
61}
62
63fn remove_ascii(s: &str) -> &str {
64    let bytes = s.as_bytes();
65    for (i, &item) in bytes.iter().enumerate() {
66        if !is_ascii_custom(item) {
67            return &s[i..];
68        }
69    }
70    ""
71}
72
73fn remove_nonascii(s: &str) -> &str {
74    let bytes = s.as_bytes();
75    for (i, &item) in bytes.iter().enumerate() {
76        if is_ascii_custom(item) {
77            return &s[i..];
78        }
79    }
80    ""
81}
82
83fn encode_modified_utf7(text: String) -> String {
84    let capacity = 2 * text.len();
85    let mut input = Vec::with_capacity(capacity);
86    let text_u16 = text.encode_utf16();
87    for value in text_u16 {
88        input.extend_from_slice(&value.to_be_bytes());
89    }
90    let text_u16 = base64::encode(input);
91    let text_u16 = text_u16.trim_end_matches('=');
92    let result = text_u16.replace('/', ",");
93    format!("&{}-", result)
94}
95
96/// Decode UTF-7 IMAP mailbox name
97///
98/// <https://datatracker.ietf.org/doc/html/rfc3501#section-5.1.3>
99///
100/// # Usage:
101///
102/// ```
103/// use utf7_imap::decode_utf7_imap;
104///
105/// let test_string = String::from("&BB4EQgQ,BEAEMAQyBDsENQQ9BD0ESwQ1-");
106/// assert_eq!(decode_utf7_imap(test_string), "Отправленные");
107/// ```
108pub fn decode_utf7_imap(text: String) -> String {
109    let pattern = Regex::new(r"&([^-]*)-").unwrap();
110    pattern.replace_all(&text, expand).to_string()
111}
112
113fn expand(cap: &Captures) -> String {
114    if cap.get(1).unwrap().as_str() == "" {
115        "&".to_string()
116    } else {
117        decode_utf7_part(cap.get(0).unwrap().as_str().to_string())
118    }
119}
120
121fn decode_utf7_part(text: String) -> String {
122    if text == "&-" {
123        return String::from("&");
124    }
125
126    let text_mb64 = &text[1..text.len() - 1];
127    let mut text_b64 = text_mb64.replace(',', "/");
128
129    while (text_b64.len() % 4) != 0 {
130        text_b64 += "=";
131    }
132
133    let text_u16 = base64::decode(text_b64).unwrap();
134    let (cow, _encoding_used, _had_errors) = UTF_16BE.decode(&text_u16);
135    let result = cow.as_ref();
136
137    String::from(result)
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143    #[test]
144    fn encode_test() {
145        let test_string = String::from("Отправленные");
146        assert_eq!(
147            encode_utf7_imap(test_string),
148            "&BB4EQgQ,BEAEMAQyBDsENQQ9BD0ESwQ1-"
149        );
150    }
151    #[test]
152    fn encode_test_split() {
153        let test_string = String::from("Šiukšliadėžė");
154        assert_eq!(encode_utf7_imap(test_string), "&AWA-iuk&AWE-liad&ARcBfgEX-")
155    }
156
157    #[test]
158    fn encode_consecutive_accents() {
159        let test_string = String::from("théâtre");
160        assert_eq!(encode_utf7_imap(test_string), "th&AOkA4g-tre")
161    }
162
163    #[test]
164    fn decode_test() {
165        let test_string = String::from("&BB4EQgQ,BEAEMAQyBDsENQQ9BD0ESwQ1-");
166        assert_eq!(decode_utf7_imap(test_string), "Отправленные");
167    }
168    #[test]
169    fn decode_test_split() {
170        // input string with utf7 encoded bits being separated by ascii
171        let test_string = String::from("&AWA-iuk&AWE-liad&ARcBfgEX-");
172        assert_eq!(decode_utf7_imap(test_string), "Šiukšliadėžė")
173    }
174
175    #[test]
176    fn decode_consecutive_accents() {
177        let test_string = String::from("th&AOkA4g-tre");
178        assert_eq!(decode_utf7_imap(test_string), "théâtre")
179    }
180
181    use proptest::prelude::*;
182    proptest! {
183        #![proptest_config(ProptestConfig::with_cases(10000))]
184        #[test]
185        fn fuzzy_dec_enc_check(s in "\\PC*") {
186            assert_eq!(decode_utf7_imap(encode_utf7_imap(s.clone())),s)
187        }
188    }
189}