zalgo_codec/lib.rs
1//! This crate lets you convert an ASCII text string into a single unicode grapheme cluster and back.
2//! It also provides a procedural macro that lets you embed such a grapheme cluster and decode it into source code at compile time.
3//! This lets you reach new lows in the field of self-documenting code.
4//!
5//! The encoded string will be ~2 times larger than the original in terms of bytes.
6//!
7//! Additionally the crate provides a function to encode Python code and wrap the result in a decoder that
8//! decodes and executes the encoded string, retaining the functionality of the original code.
9//!
10//! There are two ways of interacting with the codec.
11//! The first one is to call the encoding and decoding functions directly,
12//! and the second one is to use the [`ZalgoString`] wrapper type.
13//!
14//! # Examples
15//!
16//! Encode a string to a grapheme cluster with [`zalgo_encode`]:
17//! ```
18//! # use zalgo_codec::{EncodeError, zalgo_encode};
19//! let s = "Zalgo";
20//! let encoded = zalgo_encode(s)?;
21//! assert_eq!(encoded, "É̺͇͌͏");
22//! # Ok::<(), EncodeError>(())
23//! ```
24//! Decode the grapheme cluster back into a string with [`zalgo_decode`]:
25//! ```
26//! # use zalgo_codec::{zalgo_decode, DecodeError};
27//! # extern crate alloc;
28//! let encoded = "É̺͇͌͏";
29//! let s = zalgo_decode(encoded)?;
30//! assert_eq!(s, "Zalgo");
31//! # Ok::<(), DecodeError>(())
32//! ```
33//! The [`ZalgoString`] type can be used to encode a string and handle the result in various ways:
34//! ```
35//! # use zalgo_codec::{EncodeError, ZalgoString};
36//! let s = "Zalgo";
37//! let zstr = ZalgoString::new(s)?;
38//! assert_eq!(zstr, "É̺͇͌͏");
39//! assert_eq!(zstr.len(), 2 * s.len() + 1);
40//! assert_eq!(zstr.decoded_len(), s.len());
41//! assert_eq!(zstr.bytes().next(), Some(69));
42//! assert_eq!(zstr.decoded_chars().next_back(), Some('o'));
43//! # Ok::<(), EncodeError>(())
44//! ```
45//! Encode Rust source code and embed it in your program with the [`zalgo_embed!`] proc-macro:
46//! ```
47//! # #[cfg(feature = "macro")]
48//! # {
49//! # use zalgo_codec::zalgo_embed;
50//! // This grapheme cluster was made by encoding "add(x: i32, y: i32) -> i32 {x + y}"
51//! zalgo_embed!("E͎͉͙͉̞͉͙͆̀́̈́̈́̈̀̓̒̌̀̀̓̒̉̀̍̀̓̒̀͛̀̋̀͘̚̚͘͝");
52//!
53//! // The `add` function is now available
54//! assert_eq!(add(10, 20), 30);
55//! # }
56//! ```
57//!
58//! # Feature flags
59//!
60//! `std`: enables [`EncodeError`] and [`DecodeError`] to capture a [`Backtrace`](std::backtrace::Backtrace).
61//! If this feature is not enabled the library is `no_std` compatible, but still uses the `alloc` crate.
62//!
63//! `serde`: derives the `Serialize` and `Deserialize` traits from [`serde`](https://docs.rs/serde) for [`ZalgoString`].
64//!
65//! `rkyv`: derives the `Serialize`, `Deserialize`, and `Archive` traits from [`rkyv`](https://docs.rs/rkyv) for [`ZalgoString`].
66//!
67//! `macro` *(enabled by default)*: exports the procedural macros [`zalgo_embed!`] and [`zalgofy!`].
68//!
69//! # Explanation
70//!
71//! Characters U+0300–U+036F are the combining characters for unicode Latin.
72//! The fun thing about combining characters is that you can add as many of these characters
73//! as you like to the original character and it does not create any new symbols,
74//! it only adds symbols on top of the character. It's supposed to be used in order to
75//! create characters such as `á` by taking a normal `a` and adding another character
76//! to give it the mark (U+301, in this case). Fun fact: Unicode doesn't specify
77//! any limit on the number of these characters.
78//! Conveniently, this gives us 112 different characters we can map to,
79//! which nicely maps to the ASCII character range 0x20 -> 0x7F, aka all the non-control characters.
80//! The only issue is that we can't have new lines in this system, so to fix that,
81//! we can simply map 0x7F (DEL) to 0x0A (LF).
82//! This can be represented as `(CHARACTER - 11) % 133 - 21`, and decoded with `(CHARACTER + 22) % 133 + 10`.
83//!
84//! <details><summary><b>Full conversion table</b></summary>
85//!
86//! | ASCII character | Encoded |
87//! |---|---|
88//! | A | U+321 |
89//! | B | U+322 |
90//! | C | U+323 |
91//! | D | U+324 |
92//! | E | U+325 |
93//! | F | U+326 |
94//! | G | U+327 |
95//! | H | U+328 |
96//! | I | U+329 |
97//! | J | U+32A |
98//! | K | U+32B |
99//! | L | U+32C |
100//! | M | U+32D |
101//! | N | U+32E |
102//! | O | U+32F |
103//! | P | U+330 |
104//! | Q | U+331 |
105//! | R | U+332 |
106//! | S | U+333 |
107//! | T | U+334 |
108//! | U | U+335 |
109//! | V | U+336 |
110//! | W | U+337 |
111//! | X | U+338 |
112//! | Y | U+339 |
113//! | Z | U+33A |
114//! | a | U+341 |
115//! | b | U+342 |
116//! | c | U+343 |
117//! | d | U+344 |
118//! | e | U+345 |
119//! | f | U+346 |
120//! | g | U+347 |
121//! | h | U+348 |
122//! | i | U+349 |
123//! | j | U+34A |
124//! | k | U+34B |
125//! | l | U+34C |
126//! | m | U+34D |
127//! | n | U+34E |
128//! | o | U+34F |
129//! | p | U+350 |
130//! | q | U+351 |
131//! | r | U+352 |
132//! | s | U+353 |
133//! | t | U+354 |
134//! | u | U+355 |
135//! | v | U+356 |
136//! | w | U+357 |
137//! | x | U+358 |
138//! | y | U+359 |
139//! | z | U+35A |
140//! | 1 | U+311 |
141//! | 2 | U+312 |
142//! | 3 | U+313 |
143//! | 4 | U+314 |
144//! | 5 | U+315 |
145//! | 6 | U+316 |
146//! | 7 | U+317 |
147//! | 8 | U+318 |
148//! | 9 | U+319 |
149//! | 0 | U+310 |
150//! | | U+300 |
151//! | ! | U+301 |
152//! | " | U+302 |
153//! | # | U+303 |
154//! | $ | U+304 |
155//! | % | U+305 |
156//! | & | U+306 |
157//! | ' | U+307 |
158//! | ( | U+308 |
159//! | ) | U+309 |
160//! | * | U+30A |
161//! | + | U+30B |
162//! | , | U+30C |
163//! | - | U+30D |
164//! | \ | U+33C |
165//! | . | U+30E |
166//! | / | U+30F |
167//! | : | U+31A |
168//! | ; | U+31B |
169//! | < | U+31C |
170//! | = | U+31D |
171//! | > | U+31E |
172//! | ? | U+31F |
173//! | @ | U+320 |
174//! | \n| U+36F |
175//!
176//! </details>
177//!
178//! # Experiment with the codec
179//!
180//! There is an executable available for experimenting with the codec on text and files.
181//! It can also be used to generate grapheme clusters from source code for use with [`zalgo_embed!`].
182//! It can be installed with `cargo install zalgo-codec --features binary`.
183//! You can optionally enable the `gui` feature during installation to include a rudimentary GUI mode for the program.
184
185#![no_std]
186#![cfg_attr(docsrs, feature(doc_auto_cfg))]
187
188#[cfg(feature = "std")]
189extern crate std;
190
191pub use zalgo_codec_common::{
192 zalgo_decode, zalgo_encode, zalgo_string, zalgo_wrap_python, DecodeError, EncodeError,
193 ZalgoString,
194};
195
196#[cfg(feature = "macro")]
197pub use zalgo_codec_macro::{zalgo_embed, zalgofy};
198
199#[cfg(test)]
200mod tests {
201 extern crate alloc;
202
203 use super::*;
204 use alloc::string::String;
205 use core::str;
206 use rand::{
207 distr::{Distribution, SampleString},
208 seq::IndexedRandom,
209 Rng,
210 };
211 use unicode_segmentation::UnicodeSegmentation;
212
213 struct PrintableAsciiAndNewline;
214
215 impl Distribution<char> for PrintableAsciiAndNewline {
216 fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> char {
217 *b" !\"#$%&'()*,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVXYZ[\\]^_`abcdefghijklmnopqrstuvxyz{|}~\n".choose(rng).unwrap() as char
218 }
219 }
220
221 impl SampleString for PrintableAsciiAndNewline {
222 fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, string: &mut String, len: usize) {
223 string.reserve(len);
224 for _ in 0..len {
225 string.push(self.sample(rng));
226 }
227 }
228 }
229
230 #[cfg(feature = "macro")]
231 #[test]
232 fn test_embed_function() {
233 let code = "fn add(x: i32, y: i32) -> i32 {x + y}";
234
235 let encoded = zalgo_encode(code).unwrap();
236 assert_eq!(encoded, "E͎͉͙͉̞͉͙͆̀́̈́̈́̈̀̓̒̌̀̀̓̒̉̀̍̀̓̒̀͛̀̋̀͘̚̚͘͝");
237
238 zalgo_embed!("E͎͉͙͉̞͉͙͆̀́̈́̈́̈̀̓̒̌̀̀̓̒̉̀̍̀̓̒̀͛̀̋̀͘̚̚͘͝");
239
240 // Now the `add` function is available
241 assert_eq!(add(10, 20), 30)
242 }
243
244 #[cfg(feature = "macro")]
245 #[test]
246 fn test_embed_expression() {
247 let x = 20;
248 let y = -10;
249
250 let expr = "x + y";
251
252 let encoded = zalgo_encode(expr).unwrap();
253
254 assert_eq!(encoded, "È͙̋̀͘");
255
256 // It works on expressions, too!
257 let z = zalgo_embed!("È͙̋̀͘");
258 assert_eq!(z, x + y);
259 }
260
261 #[test]
262 fn verify() {
263 const TEST_STRING_1: &str = "the greatest adventure is going to bed";
264 let out_string = str::from_utf8(b"E\xcd\x94\xcd\x88\xcd\x85\xcc\x80\xcd\x87\xcd\x92\xcd\x85\xcd\x81\xcd\x94\xcd\x85\xcd\x93\xcd\x94\xcc\x80\xcd\x81\xcd\x84\xcd\x96\xcd\x85\xcd\x8e\xcd\x94\xcd\x95\xcd\x92\xcd\x85\xcc\x80\xcd\x89\xcd\x93\xcc\x80\xcd\x87\xcd\x8f\xcd\x89\xcd\x8e\xcd\x87\xcc\x80\xcd\x94\xcd\x8f\xcc\x80\xcd\x82\xcd\x85\xcd\x84").unwrap();
265 assert_eq!(zalgo_encode(TEST_STRING_1).unwrap(), out_string);
266
267 const TEST_STRING_2: &str =
268 "I'll have you know I graduated top of my class in the Navy Seals";
269 assert_eq!(
270 zalgo_decode(&zalgo_encode(TEST_STRING_2).unwrap()).unwrap(),
271 TEST_STRING_2
272 );
273
274 const ASCII_CHAR_TABLE: &str = r##"ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvxyz1234567890 !"#$%&'()*+,-\./:;<=>?@"##;
275 assert_eq!(
276 zalgo_decode(&zalgo_encode(ASCII_CHAR_TABLE).unwrap()).unwrap(),
277 ASCII_CHAR_TABLE
278 );
279
280 // Checking that randomly generated alphanumeric strings are encoded in a lossless fashion, and that they contain a single grapheme cluster
281 for _ in 0..100 {
282 let s = PrintableAsciiAndNewline.sample_string(&mut rand::rng(), 100);
283 let encoded = zalgo_encode(&s).unwrap();
284 assert_eq!(zalgo_decode(&encoded).unwrap(), s);
285 assert_eq!(encoded.as_str().graphemes(true).count(), 1)
286 }
287 }
288
289 #[test]
290 fn newlines() {
291 assert_eq!(&zalgo_encode("\n").unwrap(), "Eͯ",);
292 const TEST_STRING: &str = "The next sentence is true.\nThe previous sentence is false.";
293 assert_eq!(
294 zalgo_decode(&zalgo_encode(TEST_STRING).unwrap()).unwrap(),
295 TEST_STRING,
296 );
297 }
298
299 #[test]
300 fn check_errors() {
301 assert!(zalgo_encode("We got the Ä Ö Å, you aint got the Ä Ö Å").is_err());
302 assert!(zalgo_encode("\t").is_err());
303 assert!(zalgo_encode("\r").is_err());
304 assert!(zalgo_encode("\0").is_err());
305 }
306
307 #[cfg(feature = "macro")]
308 #[test]
309 fn check_zalgofy() {
310 const ZS: &str = zalgofy!("Zalgo");
311 assert_eq!(zalgo_decode(ZS).unwrap(), "Zalgo");
312 }
313}