1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
//! Contains the implementation of [`ZalgoString`] as well as related iterators.

use crate::{decode_byte_pair, fmt, zalgo_encode, Error};

use core::iter::{ExactSizeIterator, FusedIterator};

#[cfg(not(feature = "std"))]
use alloc::{borrow::Cow, string::String, vec::Vec};

#[cfg(feature = "std")]
use std::borrow::Cow;

/// A [`String`] that has been encoded with [`zalgo_encode`].
/// This struct can be decoded in-place and also allows iteration over its characters and bytes, both in
/// decoded and encoded form.
///
/// If the `serde` feature is enabled this struct implements the
/// [`Serialize`](serde::Serialize) and [`Deserialize`](serde::Deserialize) traits.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ZalgoString(String);

impl ZalgoString {
    /// Encodes the given string slice with [`zalgo_encode`] and stores the result in a new allocation.
    ///
    /// # Errors
    ///
    /// Returns an error if the input string contains bytes that don't correspond to printable
    /// ASCII characters or newlines.
    ///
    /// # Examples
    ///
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// assert_eq!(ZalgoString::new("Zalgo")?, "É̺͇͌͏");
    /// # Ok::<(), Error>(())
    /// ```
    /// Can only encode printable ASCII and newlines:
    /// ```
    /// # use zalgo_codec_common::ZalgoString;
    /// assert!(ZalgoString::new("❤️").is_err());
    /// assert!(ZalgoString::new("\r").is_err());
    /// ```
    #[must_use = "this function returns a new `ZalgoString` and does not modify the input"]
    pub fn new(s: &str) -> Result<Self, Error> {
        zalgo_encode(s).map(Self)
    }

    /// Returns the *encoded* contents of `self` as a string slice.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Oh boy!")?;
    /// assert_eq!(zs.as_str(), "È̯͈͂͏͙́");
    /// # Ok::<(), Error>(())
    /// ```
    /// Note that `ZalgoString` implements [`PartialEq`] with common string types,
    /// so the comparison in the above example could also be done directly
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// # let zs = ZalgoString::new("Oh boy!")?;
    /// assert_eq!(zs, "È̯͈͂͏͙́");
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    #[must_use = "the method returns a reference and does not modify `self`"]
    pub fn as_str(&self) -> &str {
        &self.0
    }

    /// Returns an iterator over the encoded characters of the `ZalgoString`.
    ///
    /// The first character is an "E", the others are unicode combining characters.
    ///
    /// # Example
    ///
    /// Iterate through the encoded [`char`]s:
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("42")?;
    /// let mut chars = zs.chars();
    /// assert_eq!(chars.next(), Some('E'));
    /// assert_eq!(chars.next(), Some('\u{314}'));
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    pub fn chars(&self) -> core::str::Chars<'_> {
        self.0.chars()
    }

    /// Returns an iterator over the encoded characters of the `ZalgoString` and their positions.
    ///
    /// # Example
    ///
    /// Combining characters lie deep in the dark depths of Unicode,
    /// and may not match with your intuition of what a character is.
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Zalgo")?;
    /// let mut ci = zs.char_indices();
    /// assert_eq!(ci.next(), Some((0, 'E')));
    /// assert_eq!(ci.next(), Some((1,'\u{33a}')));
    /// // Note the 3 here, the combining characters take up two bytes.
    /// assert_eq!(ci.next(), Some((3, '\u{341}')));
    /// // The final character begins at position 9
    /// assert_eq!(ci.next_back(), Some((9, '\u{34f}')));
    /// // even though the length in bytes is 11
    /// assert_eq!(zs.len(), 11);
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    pub fn char_indices(&self) -> core::str::CharIndices<'_> {
        self.0.char_indices()
    }

    /// Returns an iterator over the decoded characters of the `ZalgoString`.
    ///
    /// These characters are guaranteed to be valid ASCII.
    ///
    /// # Example
    ///
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Zlgoa")?;
    /// let mut decoded_chars = zs.decoded_chars();
    /// assert_eq!(decoded_chars.next(), Some('Z'));
    /// assert_eq!(decoded_chars.next_back(), Some('a'));
    /// assert_eq!(decoded_chars.next(), Some('l'));
    /// assert_eq!(decoded_chars.next(), Some('g'));
    /// assert_eq!(decoded_chars.next_back(), Some('o'));
    /// assert_eq!(decoded_chars.next(), None);
    /// assert_eq!(decoded_chars.next_back(), None);
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    pub fn decoded_chars(&self) -> DecodedChars<'_> {
        DecodedChars(self.decoded_bytes())
    }

    /// Converts `self` into a `String`.
    ///
    /// This simply returns the underlying `String` without any cloning or decoding.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Zalgo\n He comes!")?;
    /// assert_eq!(zs.into_string(), "É̺͇͌͏̨ͯ̀̀̓ͅ͏͍͓́ͅ");
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    #[must_use = "`self` will be dropped if the result is not used"]
    pub fn into_string(self) -> String {
        self.0
    }

    /// Decodes `self` into a `String` in-place.
    ///
    /// This method has no effect on the allocated capacity.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let s = "Zalgo";
    /// let zs = ZalgoString::new(s)?;
    /// assert_eq!(s, zs.into_decoded_string());
    /// # Ok::<(), Error>(())
    /// ```
    #[must_use = "`self` will be dropped if the result is not used"]
    pub fn into_decoded_string(self) -> String {
        // Safety: we know that the starting string was encoded from valid ASCII to begin with
        // so every decoded byte is a valid utf-8 character.
        unsafe { String::from_utf8_unchecked(self.into_decoded_bytes()) }
    }

    /// Returns the encoded contents of `self` as a byte slice.
    ///
    /// The first byte is always 69, after that the bytes no longer correspond to ASCII characters.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Zalgo")?;
    /// let bytes = zs.as_bytes();
    /// assert_eq!(bytes[0], 69);
    /// assert_eq!(&bytes[1..5], &[204, 186, 205, 129]);
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    #[must_use = "the method returns a reference and does not modify `self`"]
    pub fn as_bytes(&self) -> &[u8] {
        self.0.as_bytes()
    }

    /// Returns an iterator over the encoded bytes of the `ZalgoString`.
    ///
    /// Since a `ZalgoString` always begins with an "E", the first byte is always 69.
    /// After that the bytes no longer correspond to ASCII values.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Bytes")?;
    /// let mut bytes = zs.bytes();
    /// assert_eq!(bytes.next(), Some(69));
    /// assert_eq!(bytes.nth(5), Some(148));
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    pub fn bytes(&self) -> core::str::Bytes<'_> {
        self.0.bytes()
    }

    /// Returns an iterator over the decoded bytes of the `ZalgoString`.
    ///
    /// These bytes are guaranteed to represent valid ASCII.
    ///
    /// # Example
    ///
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Zalgo")?;
    /// let mut decoded_bytes = zs.decoded_bytes();
    /// assert_eq!(decoded_bytes.next(), Some(90));
    /// assert_eq!(decoded_bytes.next_back(), Some(111));
    /// assert_eq!(decoded_bytes.collect::<Vec<u8>>(), vec![97, 108, 103]);
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    pub fn decoded_bytes(&self) -> DecodedBytes<'_> {
        DecodedBytes(self.0.bytes().skip(1))
    }

    /// Converts `self` into a byte vector.
    ///
    /// This simply returns the underlying buffer without any cloning or decoding.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Zalgo")?;
    /// assert_eq!(zs.into_bytes(), vec![69, 204, 186, 205, 129, 205, 140, 205, 135, 205, 143]);
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    #[must_use = "`self` will be dropped if the result is not used"]
    pub fn into_bytes(self) -> Vec<u8> {
        self.0.into_bytes()
    }

    /// Decodes `self` into a byte vector in-place.
    ///
    /// This method has no effect on the allocated capacity.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Zalgo")?;
    /// assert_eq!(b"Zalgo".to_vec(), zs.into_decoded_bytes());
    /// # Ok::<(), Error>(())
    /// ```
    #[must_use = "`self` will be dropped if the result is not used"]
    pub fn into_decoded_bytes(self) -> Vec<u8> {
        let mut w = 0;
        let mut bytes = self.into_bytes();
        for r in (1..bytes.len()).step_by(2) {
            bytes[w] = decode_byte_pair(bytes[r], bytes[r + 1]);
            w += 1;
        }
        bytes.truncate(w);
        bytes
    }

    /// Returns the length of `self` in bytes.
    ///
    /// This length is twice the length of the original `String` plus one.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Z")?;
    /// assert_eq!(zs.len(), 3);
    /// # Ok::<(), Error>(())
    /// ```
    // Since the length is never empty it makes no sense to have an is_empty function.
    // The decoded length can be empty though, so `decoded_is_empty` is provided instead.
    #[inline]
    #[allow(clippy::len_without_is_empty)]
    #[must_use = "the method returns a new value and does not modify `self`"]
    pub fn len(&self) -> usize {
        self.0.len()
    }

    /// Returns the capacity of the underlying encoded string in bytes.
    ///
    /// The `ZalgoString` is preallocated to the needed capacity of twice the length
    /// of the original unencoded `String` plus one.
    /// However, this size is not guaranteed since the allocator can choose to allocate more space.
    #[inline]
    #[must_use = "the method returns a new value and does not modify `self`"]
    pub fn capacity(&self) -> usize {
        self.0.capacity()
    }

    /// Returns the length of the `ZalgoString` in bytes if it were to be decoded.  
    ///
    /// This is computed without any decoding.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let s = "Zalgo, He comes!";
    /// let zs = ZalgoString::new(s)?;
    /// assert_eq!(s.len(), zs.decoded_len());
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    #[must_use = "the method returns a new value and does not modify `self`"]
    pub fn decoded_len(&self) -> usize {
        (self.len() - 1) / 2
    }

    /// Returns whether the string would be empty if decoded.
    ///
    /// # Example
    ///
    /// Basic usage
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("")?;
    /// assert!(zs.decoded_is_empty());
    /// let zs = ZalgoString::new("Blargh")?;
    /// assert!(!zs.decoded_is_empty());
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    #[must_use = "the method returns a new value and does not modify `self`"]
    pub fn decoded_is_empty(&self) -> bool {
        self.decoded_len() == 0
    }

    /// Appends the combining characters of a different `ZalgoString` to the end of `self`.
    ///
    /// # Example
    ///
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let (s1, s2) = ("Zalgo", ", He comes!");
    ///
    /// let mut zs1 = ZalgoString::new(s1)?;
    /// let zs2 = ZalgoString::new(s2)?;
    ///
    /// zs1.push_zalgo_str(&zs2);
    ///
    /// assert_eq!(zs1.into_decoded_string(), format!("{s1}{s2}"));
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    pub fn push_zalgo_str(&mut self, zalgo_string: &ZalgoString) {
        self.0.push_str(zalgo_string.as_combining_chars());
    }

    /// Returns a string slice of just the combining characters of the `ZalgoString` without the inital 'E'.
    ///
    /// Note that [`zalgo_decode`](crate::zalgo_decode) assumes that the initial 'E' is present,
    /// and can not decode the result of this method.
    ///
    /// # Example
    ///
    /// ```
    /// # use zalgo_codec_common::{Error, ZalgoString};
    /// let zs = ZalgoString::new("Hi")?;
    /// assert_eq!(zs.as_combining_chars(), "\u{328}\u{349}");
    /// # Ok::<(), Error>(())
    /// ```
    #[inline]
    #[must_use = "the method returns a new value and does not modify `self`"]
    pub fn as_combining_chars(&self) -> &str {
        self.0.split_at(1).1
    }
}

/// Implements the `+` operator for concaternating two `ZalgoString`s.
/// Memorywise it works the same as the `Add` implementation for the normal
/// `String` type: it consumes the lefthand side, extends its buffer, and
/// copies the combining characters of the right hand side into it.
impl core::ops::Add<&ZalgoString> for ZalgoString {
    type Output = ZalgoString;
    #[inline]
    fn add(mut self, rhs: &Self) -> Self::Output {
        self.push_zalgo_str(rhs);
        self
    }
}

/// Implements the `+=` operator for appending to a `ZalgoString`.
///
/// This just calls [`push_zalgo_str`](ZalgoString::push_zalgo_str).
impl core::ops::AddAssign<&ZalgoString> for ZalgoString {
    #[inline]
    fn add_assign(&mut self, rhs: &ZalgoString) {
        self.push_zalgo_str(rhs);
    }
}

/// An iterator over the decoded bytes of a [`ZalgoString`].
///
/// This struct is obtained by calling the [`decoded_bytes`](ZalgoString::decoded_bytes) method on a [`ZalgoString`].
/// See its documentation for more.
#[derive(Debug, Clone)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct DecodedBytes<'a>(core::iter::Skip<core::str::Bytes<'a>>);

impl<'a> Iterator for DecodedBytes<'a> {
    type Item = u8;
    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        self.0
            .next()
            .zip(self.0.next())
            .map(|(odd, even)| decode_byte_pair(odd, even))
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        let left = self.0.size_hint().0 / 2;
        (left, Some(left))
    }
}

impl<'a> DoubleEndedIterator for DecodedBytes<'a> {
    #[inline]
    fn next_back(&mut self) -> Option<Self::Item> {
        self.0
            .next_back()
            .zip(self.0.next_back())
            .map(|(even, odd)| decode_byte_pair(odd, even))
    }
}

impl<'a> FusedIterator for DecodedBytes<'a> {}
impl<'a> ExactSizeIterator for DecodedBytes<'a> {}

/// An iterator over the decoded characters of a [`ZalgoString`].
///
/// This struct is obtained by calling the [`decoded_chars`](ZalgoString::decoded_chars) method on a [`ZalgoString`].
/// See it's documentation for more.
#[derive(Debug, Clone)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct DecodedChars<'a>(DecodedBytes<'a>);

impl<'a> Iterator for DecodedChars<'a> {
    type Item = char;
    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        self.0.next().map(char::from)
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        self.0.size_hint()
    }
}

impl<'a> DoubleEndedIterator for DecodedChars<'a> {
    #[inline]
    fn next_back(&mut self) -> Option<Self::Item> {
        self.0.next_back().map(char::from)
    }
}

impl<'a> FusedIterator for DecodedChars<'a> {}
impl<'a> ExactSizeIterator for DecodedChars<'a> {}

macro_rules! impl_partial_eq {
    ($($rhs:ty),+) => {
        $(
            impl PartialEq<$rhs> for ZalgoString {
                #[inline]
                fn eq(&self, other: &$rhs) -> bool {
                    &self.0 == other
                }
            }

            impl PartialEq<ZalgoString> for $rhs {
                #[inline]
                fn eq(&self, other: &ZalgoString) -> bool {
                    self == &other.0
                }
            }
        )+
    };
}
impl_partial_eq! {String, &str, str, Cow<'_, str>}

/// Displays the encoded form of the `ZalgoString`.
impl fmt::Display for ZalgoString {
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}", self.0)
    }
}

#[cfg(test)]
mod test {
    use super::*;
    #[cfg(not(feature = "std"))]
    use alloc::{format, string::ToString};

    #[test]
    fn check_into_decoded_string() {
        let s = "Zalgo\n He comes!";
        let zs: ZalgoString = ZalgoString::new(s).unwrap();
        assert_eq!(zs.into_decoded_string(), s);

        let zs = ZalgoString::new("").unwrap();
        assert_eq!(zs.into_decoded_string(), "");
    }

    #[test]
    fn check_string_from_zalgo_string() {
        let zs = ZalgoString::new("Zalgo\n He comes!").unwrap();
        assert_eq!(zs.to_string(), "É̺͇͌͏̨ͯ̀̀̓ͅ͏͍͓́ͅ");
        assert_eq!(zs.into_string(), "É̺͇͌͏̨ͯ̀̀̓ͅ͏͍͓́ͅ");

        let zs = ZalgoString::new("").unwrap();
        assert_eq!(zs.into_string(), "E");
    }

    #[test]
    fn check_partial_eq() {
        let enc = "É̺͇͌͏̨ͯ̀̀̓ͅ͏͍͓́ͅ";
        let zs = ZalgoString::new("Zalgo\n He comes!").unwrap();
        assert_eq!(zs, enc);
        assert_eq!(zs, String::from(enc));
        assert_eq!(zs, Cow::from(enc));
        assert_eq!(String::from(enc), zs);
        assert_eq!(Cow::from(enc), zs);
    }

    #[test]
    fn check_push_str() {
        let s1 = "Zalgo";
        let s2 = ", He comes";
        let mut zs = ZalgoString::new(s1).unwrap();
        let zs2 = ZalgoString::new(s2).unwrap();
        zs.push_zalgo_str(&zs2);
        assert_eq!(zs.clone().into_decoded_string(), format!("{s1}{s2}"));
        zs += &zs2;
        assert_eq!(
            (zs + &zs2).into_decoded_string(),
            format!("{s1}{s2}{s2}{s2}")
        );
    }

    #[test]
    fn check_as_combining_chars() {
        assert_eq!(
            ZalgoString::new("Hi").unwrap().as_combining_chars(),
            "\u{328}\u{349}"
        );
        assert_eq!(ZalgoString::new("").unwrap().as_combining_chars(), "");
    }

    #[test]
    fn check_decoded_chars() {
        let zs = ZalgoString::new("Zalgo").unwrap();
        assert_eq!("oglaZ", zs.decoded_chars().rev().collect::<String>());
    }
}