zwnbsp/
zero_width.rs

1use crate::{Binary, Error};
2use std::str::FromStr;
3
4/// # Zero-width space (ZWSP)
5///
6/// `char` for the Unicode representation on the ZWSP character
7///
8/// ## Representations
9///
10/// Encoding | Code
11/// --- | ---
12/// Unicode | `U+200B`
13/// HTML | `​` `​`
14/// TeX | `\hskip0pt`
15/// LaTex | `\hspace{0pt}`
16/// groff | `\:`
17///
18/// ## Prohibited in URLs
19///
20/// ICANN rules prohibit domain names from including non-displayed characters
21/// such as zero-width space, and most browsers prohibit their use within domain
22/// names, because they can be used to create a homograph attack, where a malicious
23/// URL is visually indistinguishable from a legitimate one.
24///
25/// ## Reference
26///
27/// [Wikipedia](https://en.wikipedia.org/wiki/Zero-width_space)
28///
29pub const ZERO_WIDTH_SPACE: (char, &str) = ('\u{200B}', "​");
30
31/// # Zero-width non-joiner (ZWNJ)
32///
33/// `char` for the Unicode representation on the ZWNJ character
34///
35/// ## Representations
36///
37/// Encoding | Code
38/// --- | ---
39/// Unicode | `U+200C`
40/// HTML | `‌` `‌`
41///
42/// ## Reference
43///
44/// [Wikipedia](https://en.wikipedia.org/wiki/Zero-width_non-joiner)
45///
46pub const ZERO_WIDTH_NON_JOINER: (char, &str) = ('\u{200C}', "‌");
47
48/// # Zero-width joiner (ZWJ)
49///
50/// `char` for the Unicode representation on the ZWJ character
51///
52/// ## Representations
53///
54/// Encoding | Code
55/// --- | ---
56/// Unicode | `U+200D`
57/// HTML | `‍` `‍`
58///
59/// ## Reference
60///
61/// [Wikipedia](https://en.wikipedia.org/wiki/Zero-width_joiner)
62///
63pub const ZERO_WIDTH_JOINER: (char, &str) = ('\u{200D}', "‍");
64
65/// # Word joiner (WJ)
66///
67/// ## Representations
68///
69/// Encoding | Code
70/// --- | ---
71/// Unicode | `U+2060`
72/// HTML | `⁠` `⁠`
73///
74/// ## Reference
75///
76/// [Wikipedia](https://en.wikipedia.org/wiki/Word_joiner)
77///
78pub const WORD_JOINER: (char, &str) = ('\u{2060}', "⁠");
79
80/// # Zero-width No-Break Space (ZWNBSP)
81///
82/// `char` for the Unicode representation on the ZWNBSP character
83///
84/// ## Representations
85///
86/// Encoding | Code
87/// --- | ---
88/// Unicode | `U+FEFF`
89/// HTML | ``
90///
91/// ## Reference
92///
93/// [Wikipedia](https://www.fileformat.info/info/unicode/char/feff/index.htm)
94///
95pub const ZERO_WIDTH_NO_BREAK_SPACE: (char, &str) = ('\u{FEFF}', "");
96
97pub struct ZeroWidth(Binary);
98
99impl From<Binary> for ZeroWidth {
100    fn from(binary: Binary) -> Self {
101        Self(binary)
102    }
103}
104
105impl ZeroWidth {
106    /// Creates a new `ZeroWidth` instance given a string slice
107    pub fn new(string: &str) -> Result<Self, Error> {
108        let binary = Binary::from_str(string)?;
109
110        Ok(ZeroWidth::from(binary))
111    }
112
113    /// Retrieve the binary representation of the ASCII
114    /// value provided
115    pub fn get_binary_string(&self) -> String {
116        self.0.to_string()
117    }
118
119    /// Creates the Unicode zero width character representation
120    /// from the binary representation of the ASCII value
121    pub fn to_unicode(&self) -> String {
122        let string_value = self.get_binary_string();
123        let mut zero_width = String::default();
124
125        // split each ascii letter representation
126        let chars: Vec<&str> = string_value.split(' ').collect();
127
128        chars.into_iter().for_each(|character| {
129            character.split("").for_each(|ch| {
130                if ch.eq("0") {
131                    zero_width.push(ZERO_WIDTH_SPACE.0);
132                } else {
133                    zero_width.push(ZERO_WIDTH_NON_JOINER.0);
134                }
135            });
136
137            zero_width.push(ZERO_WIDTH_JOINER.0);
138        });
139
140        // remove trailing zero width character
141        zero_width.pop();
142
143        zero_width
144    }
145
146    /// Creates the Unicode zero width character representation
147    /// from the binary representation of the ASCII value
148    pub fn to_html(&self) -> String {
149        let string_value = self.get_binary_string();
150        let mut zero_width = String::default();
151
152        // split each ascii letter representation
153        let chars: Vec<&str> = string_value.split(' ').collect();
154
155        chars.into_iter().for_each(|character| {
156            character.split("").for_each(|ch| {
157                if ch.eq("0") {
158                    zero_width.push_str(ZERO_WIDTH_SPACE.1);
159                } else {
160                    zero_width.push_str(ZERO_WIDTH_NON_JOINER.1);
161                }
162            });
163
164            zero_width.push_str(ZERO_WIDTH_JOINER.1);
165        });
166
167        // remove trailing zero width character
168        zero_width.pop();
169
170        zero_width
171    }
172}
173
174#[cfg(test)]
175mod test {
176    use super::*;
177    const RUSTACEANS_ZW_UNICODE: &str = "\u{200c}\u{200b}\u{200c}\u{200b}\u{200c}\u{200b}\u{200b}\u{200c}\u{200b}\u{200c}\u{200d}\u{200c}\u{200b}\u{200c}\u{200c}\u{200c}\u{200b}\u{200c}\u{200b}\u{200c}\u{200c}\u{200d}\u{200c}\u{200b}\u{200c}\u{200c}\u{200c}\u{200b}\u{200b}\u{200c}\u{200c}\u{200c}\u{200d}\u{200c}\u{200b}\u{200c}\u{200c}\u{200c}\u{200b}\u{200c}\u{200b}\u{200b}\u{200c}\u{200d}\u{200c}\u{200b}\u{200c}\u{200c}\u{200b}\u{200b}\u{200b}\u{200b}\u{200c}\u{200c}\u{200d}\u{200c}\u{200b}\u{200c}\u{200c}\u{200b}\u{200b}\u{200b}\u{200c}\u{200c}\u{200c}\u{200d}\u{200c}\u{200b}\u{200c}\u{200c}\u{200b}\u{200b}\u{200c}\u{200b}\u{200c}\u{200c}\u{200d}\u{200c}\u{200b}\u{200c}\u{200c}\u{200b}\u{200b}\u{200b}\u{200b}\u{200c}\u{200c}\u{200d}\u{200c}\u{200b}\u{200c}\u{200c}\u{200b}\u{200c}\u{200c}\u{200c}\u{200b}\u{200c}\u{200d}\u{200c}\u{200b}\u{200c}\u{200c}\u{200c}\u{200b}\u{200b}\u{200c}\u{200c}\u{200c}";
178    const RUSTACEANS_ZW_HTML: &str = "&#8204;&#8203;&#8204;&#8203;&#8204;&#8203;&#8203;&#8204;&#8203;&#8204;&#8205;&#8204;&#8203;&#8204;&#8204;&#8204;&#8203;&#8204;&#8203;&#8204;&#8204;&#8205;&#8204;&#8203;&#8204;&#8204;&#8204;&#8203;&#8203;&#8204;&#8204;&#8204;&#8205;&#8204;&#8203;&#8204;&#8204;&#8204;&#8203;&#8204;&#8203;&#8203;&#8204;&#8205;&#8204;&#8203;&#8204;&#8204;&#8203;&#8203;&#8203;&#8203;&#8204;&#8204;&#8205;&#8204;&#8203;&#8204;&#8204;&#8203;&#8203;&#8203;&#8204;&#8204;&#8204;&#8205;&#8204;&#8203;&#8204;&#8204;&#8203;&#8203;&#8204;&#8203;&#8204;&#8204;&#8205;&#8204;&#8203;&#8204;&#8204;&#8203;&#8203;&#8203;&#8203;&#8204;&#8204;&#8205;&#8204;&#8203;&#8204;&#8204;&#8203;&#8204;&#8204;&#8204;&#8203;&#8204;&#8205;&#8204;&#8203;&#8204;&#8204;&#8204;&#8203;&#8203;&#8204;&#8204;&#8204;&#8205";
179
180    #[test]
181    fn it_zw_into_unicode() {
182        let have = ZeroWidth::new("Rustaceans").unwrap().to_unicode();
183        let want = RUSTACEANS_ZW_UNICODE.to_string();
184
185        assert_eq!(have, want);
186    }
187
188    #[test]
189    fn it_zw_into_html() {
190        let have = ZeroWidth::new("Rustaceans").unwrap().to_html();
191        let want = RUSTACEANS_ZW_HTML.to_string();
192
193        assert_eq!(have, want);
194    }
195}