zalgo_codec/
lib.rs

1//! This crate lets you convert an ASCII text string into a single unicode grapheme cluster and back.
2//! It also provides a procedural macro that lets you embed such a grapheme cluster and decode it into source code at compile time.  
3//! This lets you reach new lows in the field of self-documenting code.
4//!
5//! The encoded string will be ~2 times larger than the original in terms of bytes.
6//!
7//! Additionally the crate provides a function to encode Python code and wrap the result in a decoder that
8//! decodes and executes the encoded string, retaining the functionality of the original code.
9//!
10//! There are two ways of interacting with the codec.
11//! The first one is to call the encoding and decoding functions directly,
12//! and the second one is to use the [`ZalgoString`] wrapper type.
13//!
14//! # Examples
15//!
16//! Encode a string to a grapheme cluster with [`zalgo_encode`]:
17//! ```
18//! # use zalgo_codec::{EncodeError, zalgo_encode};
19//! let s = "Zalgo";
20//! let encoded = zalgo_encode(s)?;
21//! assert_eq!(encoded, "É̺͇͌͏");
22//! # Ok::<(), EncodeError>(())
23//! ```
24//! Decode the grapheme cluster back into a string with [`zalgo_decode`]:
25//! ```
26//! # use zalgo_codec::{zalgo_decode, DecodeError};
27//! # extern crate alloc;
28//! let encoded = "É̺͇͌͏";
29//! let s = zalgo_decode(encoded)?;
30//! assert_eq!(s, "Zalgo");
31//! # Ok::<(), DecodeError>(())
32//! ```
33//! The [`ZalgoString`] type can be used to encode a string and handle the result in various ways:
34//! ```
35//! # use zalgo_codec::{EncodeError, ZalgoString};
36//! let s = "Zalgo";
37//! let zstr = ZalgoString::new(s)?;
38//! assert_eq!(zstr, "É̺͇͌͏");
39//! assert_eq!(zstr.len(), 2 * s.len() + 1);
40//! assert_eq!(zstr.decoded_len(), s.len());
41//! assert_eq!(zstr.bytes().next(), Some(69));
42//! assert_eq!(zstr.decoded_chars().next_back(), Some('o'));
43//! # Ok::<(), EncodeError>(())
44//! ```
45//! Encode Rust source code and embed it in your program with the [`zalgo_embed!`] proc-macro:
46//! ```
47//! # #[cfg(feature = "macro")]
48//! # {
49//! # use zalgo_codec::zalgo_embed;
50//! // This grapheme cluster was made by encoding "add(x: i32, y: i32) -> i32 {x + y}"
51//! zalgo_embed!("E͎͉͙͉̞͉͙͆̀́̈́̈́̈̀̓̒̌̀̀̓̒̉̀̍̀̓̒̀͛̀̋̀͘̚̚͘͝");
52//!
53//! // The `add` function is now available
54//! assert_eq!(add(10, 20), 30);
55//! # }
56//! ```
57//!
58//! # Feature flags
59//!
60//! `std`: enables [`EncodeError`] and [`DecodeError`] to capture a [`Backtrace`](std::backtrace::Backtrace).
61//! If this feature is not enabled the library is `no_std` compatible, but still uses the `alloc` crate.
62//!
63//! `serde`: derives the `Serialize` and `Deserialize` traits from [`serde`](https://docs.rs/serde) for [`ZalgoString`].
64//!
65//! `rkyv`: derives the `Serialize`, `Deserialize`, and `Archive` traits from [`rkyv`](https://docs.rs/rkyv) for [`ZalgoString`].
66//!
67//! `macro` *(enabled by default)*: exports the procedural macros [`zalgo_embed!`] and [`zalgofy!`].
68//!   
69//! # Explanation
70//!
71//! Characters U+0300–U+036F are the combining characters for unicode Latin.
72//! The fun thing about combining characters is that you can add as many of these characters
73//! as you like to the original character and it does not create any new symbols,
74//! it only adds symbols on top of the character. It's supposed to be used in order to
75//! create characters such as `á` by taking a normal `a` and adding another character
76//! to give it the mark (U+301, in this case). Fun fact: Unicode doesn't specify
77//! any limit on the number of these characters.
78//! Conveniently, this gives us 112 different characters we can map to,
79//! which nicely maps to the ASCII character range 0x20 -> 0x7F, aka all the non-control characters.
80//! The only issue is that we can't have new lines in this system, so to fix that,
81//! we can simply map 0x7F (DEL) to 0x0A (LF).
82//! This can be represented as `(CHARACTER - 11) % 133 - 21`, and decoded with `(CHARACTER + 22) % 133 + 10`.  
83//!
84//! <details><summary><b>Full conversion table</b></summary>
85//!
86//! | ASCII character | Encoded |  
87//! |---|---|  
88//! | A | U+321 |  
89//! | B | U+322 |  
90//! | C | U+323 |  
91//! | D | U+324 |  
92//! | E | U+325 |  
93//! | F | U+326 |  
94//! | G | U+327 |  
95//! | H | U+328 |  
96//! | I | U+329 |  
97//! | J | U+32A |  
98//! | K | U+32B |  
99//! | L | U+32C |  
100//! | M | U+32D |  
101//! | N | U+32E |  
102//! | O | U+32F |  
103//! | P | U+330 |  
104//! | Q | U+331 |  
105//! | R | U+332 |  
106//! | S | U+333 |  
107//! | T | U+334 |  
108//! | U | U+335 |  
109//! | V | U+336 |  
110//! | W | U+337 |  
111//! | X | U+338 |  
112//! | Y | U+339 |  
113//! | Z | U+33A |  
114//! | a | U+341 |  
115//! | b | U+342 |  
116//! | c | U+343 |  
117//! | d | U+344 |  
118//! | e | U+345 |  
119//! | f | U+346 |  
120//! | g | U+347 |  
121//! | h | U+348 |  
122//! | i | U+349 |  
123//! | j | U+34A |  
124//! | k | U+34B |  
125//! | l | U+34C |  
126//! | m | U+34D |  
127//! | n | U+34E |  
128//! | o | U+34F |  
129//! | p | U+350 |  
130//! | q | U+351 |  
131//! | r | U+352 |  
132//! | s | U+353 |  
133//! | t | U+354 |  
134//! | u | U+355 |  
135//! | v | U+356 |  
136//! | w | U+357 |  
137//! | x | U+358 |  
138//! | y | U+359 |  
139//! | z | U+35A |  
140//! | 1 | U+311 |  
141//! | 2 | U+312 |  
142//! | 3 | U+313 |  
143//! | 4 | U+314 |  
144//! | 5 | U+315 |  
145//! | 6 | U+316 |  
146//! | 7 | U+317 |  
147//! | 8 | U+318 |  
148//! | 9 | U+319 |  
149//! | 0 | U+310 |  
150//! |   | U+300 |  
151//! | ! | U+301 |  
152//! | " | U+302 |  
153//! | # | U+303 |  
154//! | $ | U+304 |  
155//! | % | U+305 |  
156//! | & | U+306 |  
157//! | ' | U+307 |  
158//! | ( | U+308 |  
159//! | ) | U+309 |  
160//! | * | U+30A |  
161//! | + | U+30B |  
162//! | , | U+30C |  
163//! | - | U+30D |  
164//! | \ | U+33C |  
165//! | . | U+30E |  
166//! | / | U+30F |  
167//! | : | U+31A |  
168//! | ; | U+31B |  
169//! | < | U+31C |  
170//! | = | U+31D |  
171//! | > | U+31E |  
172//! | ? | U+31F |  
173//! | @ | U+320 |  
174//! | \n| U+36F |  
175//!
176//! </details>
177//!
178//! # Experiment with the codec
179//!
180//! There is an executable available for experimenting with the codec on text and files.
181//! It can also be used to generate grapheme clusters from source code for use with [`zalgo_embed!`].
182//! It can be installed with `cargo install zalgo-codec --features binary`.
183//! You can optionally enable the `gui` feature during installation to include a rudimentary GUI mode for the program.
184
185#![no_std]
186#![cfg_attr(docsrs, feature(doc_auto_cfg))]
187
188pub use zalgo_codec_common::{
189    zalgo_decode, zalgo_encode, zalgo_string, zalgo_wrap_python, DecodeError, EncodeError,
190    ZalgoString,
191};
192
193#[cfg(feature = "macro")]
194pub use zalgo_codec_macro::{zalgo_embed, zalgofy};
195
196#[cfg(test)]
197mod tests {
198    extern crate alloc;
199
200    use super::*;
201    use alloc::string::String;
202    use core::str;
203    use rand::{
204        distributions::{DistString, Distribution},
205        seq::SliceRandom,
206        Rng,
207    };
208    use unicode_segmentation::UnicodeSegmentation;
209
210    struct PrintableAsciiAndNewline;
211
212    impl Distribution<char> for PrintableAsciiAndNewline {
213        fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> char {
214            *b" !\"#$%&'()*,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVXYZ[\\]^_`abcdefghijklmnopqrstuvxyz{|}~\n".choose(rng).unwrap() as char
215        }
216    }
217
218    impl DistString for PrintableAsciiAndNewline {
219        fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, string: &mut String, len: usize) {
220            string.reserve(len);
221            for _ in 0..len {
222                string.push(self.sample(rng));
223            }
224        }
225    }
226
227    #[cfg(feature = "macro")]
228    #[test]
229    fn test_embed_function() {
230        let code = "fn add(x: i32, y: i32) -> i32 {x + y}";
231
232        let encoded = zalgo_encode(code).unwrap();
233        assert_eq!(encoded, "E͎͉͙͉̞͉͙͆̀́̈́̈́̈̀̓̒̌̀̀̓̒̉̀̍̀̓̒̀͛̀̋̀͘̚̚͘͝");
234
235        zalgo_embed!("E͎͉͙͉̞͉͙͆̀́̈́̈́̈̀̓̒̌̀̀̓̒̉̀̍̀̓̒̀͛̀̋̀͘̚̚͘͝");
236
237        // Now the `add` function is available
238        assert_eq!(add(10, 20), 30)
239    }
240
241    #[cfg(feature = "macro")]
242    #[test]
243    fn test_embed_expression() {
244        let x = 20;
245        let y = -10;
246
247        let expr = "x + y";
248
249        let encoded = zalgo_encode(expr).unwrap();
250
251        assert_eq!(encoded, "È͙̋̀͘");
252
253        // It works on expressions, too!
254        let z = zalgo_embed!("È͙̋̀͘");
255        assert_eq!(z, x + y);
256    }
257
258    #[test]
259    fn verify() {
260        const TEST_STRING_1: &str = "the greatest adventure is going to bed";
261        let out_string = str::from_utf8(b"E\xcd\x94\xcd\x88\xcd\x85\xcc\x80\xcd\x87\xcd\x92\xcd\x85\xcd\x81\xcd\x94\xcd\x85\xcd\x93\xcd\x94\xcc\x80\xcd\x81\xcd\x84\xcd\x96\xcd\x85\xcd\x8e\xcd\x94\xcd\x95\xcd\x92\xcd\x85\xcc\x80\xcd\x89\xcd\x93\xcc\x80\xcd\x87\xcd\x8f\xcd\x89\xcd\x8e\xcd\x87\xcc\x80\xcd\x94\xcd\x8f\xcc\x80\xcd\x82\xcd\x85\xcd\x84").unwrap();
262        assert_eq!(zalgo_encode(TEST_STRING_1).unwrap(), out_string);
263
264        const TEST_STRING_2: &str =
265            "I'll have you know I graduated top of my class in the Navy Seals";
266        assert_eq!(
267            zalgo_decode(&zalgo_encode(TEST_STRING_2).unwrap()).unwrap(),
268            TEST_STRING_2
269        );
270
271        const ASCII_CHAR_TABLE: &str = r##"ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvxyz1234567890 !"#$%&'()*+,-\./:;<=>?@"##;
272        assert_eq!(
273            zalgo_decode(&zalgo_encode(ASCII_CHAR_TABLE).unwrap()).unwrap(),
274            ASCII_CHAR_TABLE
275        );
276
277        // Checking that randomly generated alphanumeric strings are encoded in a lossless fashion, and that they contain a single grapheme cluster
278        for _ in 0..100 {
279            let s = PrintableAsciiAndNewline.sample_string(&mut rand::thread_rng(), 100);
280            let encoded = zalgo_encode(&s).unwrap();
281            assert_eq!(zalgo_decode(&encoded).unwrap(), s);
282            assert_eq!(encoded.as_str().graphemes(true).count(), 1)
283        }
284    }
285
286    #[test]
287    fn newlines() {
288        assert_eq!(&zalgo_encode("\n").unwrap(), "Eͯ",);
289        const TEST_STRING: &str = "The next sentence is true.\nThe previous sentence is false.";
290        assert_eq!(
291            zalgo_decode(&zalgo_encode(TEST_STRING).unwrap()).unwrap(),
292            TEST_STRING,
293        );
294    }
295
296    #[test]
297    fn check_errors() {
298        assert!(zalgo_encode("We got the Ä Ö Å, you aint got the Ä Ö Å").is_err());
299        assert!(zalgo_encode("\t").is_err());
300        assert!(zalgo_encode("\r").is_err());
301        assert!(zalgo_encode("\0").is_err());
302    }
303
304    #[cfg(feature = "macro")]
305    #[test]
306    fn check_zalgofy() {
307        const ZS: &str = zalgofy!("Zalgo");
308        assert_eq!(zalgo_decode(ZS).unwrap(), "Zalgo");
309    }
310}