cyware_convert/encoding/
url.rs

1use bstr::ByteSlice;
2use percent_encoding::{self, percent_encode_byte};
3#[cfg(target_family = "wasm")]
4use serde::{Deserialize, Serialize};
5
6use crate::Operation;
7use crate::OperationError;
8
9#[derive(Clone)]
10#[cfg_attr(target_family = "wasm", derive(Serialize, Deserialize))]
11pub struct UrlDecode {}
12
13impl Operation for UrlDecode {
14    fn execute(&self, input: &[u8]) -> Result<Vec<u8>, OperationError> {
15        let decoded: Vec<u8> = percent_encoding::percent_decode(input).collect();
16        Ok(decoded)
17    }
18}
19
20impl UrlDecode {
21    pub fn new() -> Self {
22        UrlDecode {}
23    }
24}
25
26#[derive(Clone)]
27#[cfg_attr(target_family = "wasm", derive(Serialize, Deserialize))]
28pub struct UrlEncode {
29    non_ascii: bool,
30    charset: String,
31}
32
33fn extend_with_grapheme_encode(output: &mut Vec<u8>, grapheme: &[u8]) {
34    grapheme
35        .iter()
36        .for_each(|b| output.extend_from_slice(percent_encode_byte(*b).as_bytes()));
37}
38
39impl Operation for UrlEncode {
40    fn execute(&self, input: &[u8]) -> Result<Vec<u8>, OperationError> {
41        let charset = self.charset.as_bytes().graphemes();
42        let input_graphemes = input.grapheme_indices();
43        let mut output: Vec<u8> = vec![];
44
45        for (start, stop, grapheme) in input_graphemes {
46            let mut charset_graphemes = charset.clone();
47            if charset_graphemes.any(|charset_grapheme| charset_grapheme == grapheme)
48                || (self.non_ascii && !grapheme.is_ascii())
49            {
50                extend_with_grapheme_encode(&mut output, &input[start..stop]);
51            } else {
52                output.extend_from_slice(&input[start..stop]);
53            }
54        }
55        Ok(output)
56    }
57}
58
59impl UrlEncode {
60    pub fn new(non_ascii: bool, charset: Option<String>) -> Self {
61        UrlEncode {
62            non_ascii,
63            charset: charset.unwrap_or_default(),
64        }
65    }
66}
67
68#[cfg(test)]
69mod tests {
70    use super::*;
71
72    #[test]
73    fn url_decode() {
74        let encoder = UrlDecode::new();
75        let actual = encoder
76            .execute("caido @%C3%A9%C3%A9%F0%9F%A5%96".as_bytes())
77            .unwrap();
78        let expected = "caido @éé🥖".as_bytes().to_vec();
79        assert_eq!(actual, expected);
80    }
81
82    #[test]
83    fn url_encode_unicode_char() {
84        let encoder = UrlEncode::new(false, Some("🥖".to_string()));
85        let actual = encoder.execute("a🥖🥖st".as_bytes()).unwrap();
86        let expected = "a%F0%9F%A5%96%F0%9F%A5%96st".as_bytes().to_vec();
87        assert_eq!(actual, expected);
88    }
89
90    #[test]
91    fn url_encode_non_ascii() {
92        let encoder = UrlEncode::new(true, None);
93        let actual = encoder.execute("caido @éé".as_bytes()).unwrap();
94        let expected = "caido @%C3%A9%C3%A9".as_bytes().to_vec();
95        assert_eq!(actual, expected);
96    }
97
98    #[test]
99    fn url_encode_non_ascii_and_charset() {
100        let encoder = UrlEncode::new(true, Some("c".to_string()));
101        let actual = encoder.execute("caido @éé🥖".as_bytes()).unwrap();
102        let expected = "%63aido @%C3%A9%C3%A9%F0%9F%A5%96".as_bytes().to_vec();
103        assert_eq!(actual, expected);
104    }
105
106    #[test]
107    fn url_encode_charset() {
108        let encoder = UrlEncode::new(true, Some("@t".to_string()));
109        let actual = encoder.execute("a@ test".as_bytes()).unwrap();
110        let expected = "a%40 %74es%74".as_bytes().to_vec();
111        assert_eq!(actual, expected);
112    }
113
114    #[test]
115    fn url_encode_invalid_utf_8() {
116        let encoder = UrlEncode::new(false, None);
117        let actual = encoder
118            .execute(&[0x98, 0xfd, 0xe0, 0xbf, 0xb8, 0xa7, 0xd6, 0xe1, 0x74, 0xa0])
119            .unwrap();
120        let expected = &[0x98, 0xfd, 0xe0, 0xbf, 0xb8, 0xa7, 0xd6, 0xe1, 0x74, 0xa0];
121        assert_eq!(actual, expected);
122    }
123
124    #[test]
125    fn url_encode_non_ascii_invalid_utf_8() {
126        let encoder = UrlEncode::new(true, None);
127        let actual = encoder
128            .execute(&[0x98, 0xfd, 0xe0, 0xbf, 0xb8, 0xa7, 0xd6, 0xe1, 0x74, 0xa0])
129            .unwrap();
130        let expected = "%98%FD%E0%BF%B8%A7%D6%E1t%A0".as_bytes().to_vec();
131        assert_eq!(actual, expected);
132    }
133}