1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#![deny(missing_docs, missing_debug_implementations, missing_copy_implementations,
trivial_casts, trivial_numeric_casts, unsafe_code, unstable_features,
unused_import_braces, unused_qualifications)]

#![cfg_attr(feature="clippy", allow(unstable_features))]
#![cfg_attr(feature="clippy", feature(plugin))]
#![cfg_attr(feature="clippy", plugin(clippy))]
#![cfg_attr(feature="clippy", deny(clippy_pedantic))]
#![cfg_attr(feature="clippy", allow(non_ascii_literal))]

//! # encoji
//!
//! Encode a byte buffer into emojis, and decode an emoji string into a byte vector.
//!
//! Reimplementation of [`base_emoji`](https://github.com/pfrazee/base-emoji) including both
//! encoding and decoding.
//!
//! ## Example
//!
//! ```rust
//! let bytes = [0x6e, 0x6e, 0x6e, 0xcd];
//! let emojis = "๐Ÿ”ฅ๐Ÿ”ฅ๐Ÿ”ฅ๐Ÿš€";
//!
//! assert_eq!(encoji::to_string(&bytes), emojis);
//! assert_eq!(encoji::from_string(&emojis)[..], bytes);
//! ```
//!
//! ## Encoding (same as original implementation)
//!
//! Citing [the README](https://github.com/pfrazee/base-emoji/blob/master/README.md):
//!
//! > The emojis used are in `emojis.json`. There are 843 emojis there, but the
//! > converter reads sequences of 8 bits at a time, and so only maps the value to
//! > the first 256 of them. To stay consistent with other renderings, make sure you
//! > don't change the order of your emojis.json.
//!
//! ## Decoding
//!
//! String is split into graphemes by the
//! [`unicode-segmentation`](https://crates.io/crates/unicode-segmentation) crate in accordance
//! with the [Unicode Standard Annex #29](http://www.unicode.org/reports/tr29/). If the grapheme
//! isn't a mappable emoji it is skipped, else it is converted to a byte.
//!
//! ## License
//!
//! MIT. See included `LICENSE` file.
extern crate phf;
extern crate unicode_segmentation;

use unicode_segmentation::UnicodeSegmentation;

include!(concat!(env!("OUT_DIR"), "/emojis.rs"));

/// Encode buffer as an encoji string
///
/// ```rust
/// let bytes = [0x6e, 0x6e, 0x6e, 0xcd];
/// let emojis = "๐Ÿ”ฅ๐Ÿ”ฅ๐Ÿ”ฅ๐Ÿš€";
///
/// assert_eq!(encoji::to_string(&bytes), emojis);
/// ```
pub fn to_string<T: AsRef<[u8]>>(buf: T) -> String {
    buf.as_ref()
        .iter()
        .flat_map(|c| EMOJIS.get(c))
        .map(|c| c.0)
        .collect::<String>()
}

/// Encode encoji string into buffer
///
/// ```rust
/// let emojis = "๐Ÿ”ฅ๐Ÿ”ฅ๐Ÿ”ฅ๐Ÿš€";
/// let bytes = vec![0x6e, 0x6e, 0x6e, 0xcd];
///
/// assert_eq!(encoji::from_string(&emojis), bytes);
/// ```
pub fn from_string(input: &str) -> Vec<u8> {
    UnicodeSegmentation::graphemes(input, true)
        .flat_map(|c| EMOJIS_REV.get(c))
        .map(|c| c.to_owned())
        .collect::<Vec<_>>()
}

/// Encode buffer as a string of emoji names
///
/// ```rust
/// let input = [0x6e, 0x6e, 0x6e, 0xcd];
/// let output = ":fire::fire::fire::rocket:";
/// assert_eq!(encoji::to_names(&input), output);
/// ```
pub fn to_names<T: AsRef<[u8]>>(buf: T) -> String {
    to_custom(buf, |_, name| format!(":{}:", name))
}

/// Encode buffer as custom-mapped string
///
/// ```rust
/// let input = [0xde];
/// let output = "<img src='/img/snowflake.png' alt='โ„๏ธ' title='snowflake'>";
/// assert_eq!(
///     encoji::to_custom(&input, |ch, name| {
///         format!("<img src='/img/{}.png' alt='{}' title='{}'>",
///                 name, ch, name)
///     }),
///     output);
/// ```
pub fn to_custom<T: AsRef<[u8]>, F: Fn(&str, &str) -> String>(buf: T, f: F) -> String {
    buf.as_ref()
        .iter()
        .map(|c| {
            let emoji = EMOJIS.get(c).unwrap();
            f(emoji.0, emoji.1)
        })
        .collect::<Vec<_>>()
        .concat()
}

#[cfg(test)]
mod tests {
    use super::*;

    static ALL_EMOJIS: &'static str = "๐Ÿ’ฏ๐Ÿ”ข๐Ÿ‘๐Ÿ‘Ž๐ŸŽฑ๐Ÿ…ฐ๐Ÿšกโœˆ๏ธ๐Ÿ‘ฝ๐Ÿš‘โš“๏ธ๐Ÿ‘ผ๐Ÿ’ข๐Ÿ˜ ๐Ÿœ๐ŸŽโฌ‡๏ธโฌ…๏ธโžก๏ธโฌ†๏ธ๐ŸŽจ๐Ÿง๐Ÿ‘ถ๐Ÿผ๐ŸŽˆ๐ŸŽ๐ŸŒโ€ผ๏ธ๐Ÿ“Š๐Ÿ’ˆ๐Ÿ€๐Ÿ›€๐Ÿ”‹๐Ÿป๐Ÿ๐Ÿบ๐Ÿž๐Ÿ””๐Ÿšด๐Ÿ‘™๐ŸŽ‚๐Ÿƒ๐ŸŒผ๐Ÿ“˜๐Ÿš™๐Ÿ’™๐Ÿ—โ›ต๏ธ๐Ÿ’ฃ๐Ÿ“–๐Ÿ“š๐Ÿ’ฅ\
                                       ๐Ÿ‘ข๐Ÿ’๐ŸŽณ๐Ÿ‘ฆ๐Ÿž๐Ÿ‘ฐ๐Ÿ’ผ๐Ÿ’”๐Ÿ›๐Ÿ’ก๐ŸšŒ๐Ÿ‘ค๐ŸŒต๐Ÿฐ๐Ÿ“†๐Ÿซ๐Ÿฌ๐Ÿš—๐Ÿˆ๐Ÿ’ฟ๐Ÿ๐Ÿ’๐Ÿ”๐Ÿซ๐ŸŽ„๐Ÿ‘๐ŸŽฌโ˜๏ธ๐Ÿธโ˜•๏ธ๐Ÿ’ป๐ŸŽŠ๐Ÿšง๐Ÿช๐ŸŒฝ๐Ÿ‘ซ๐Ÿ„๐ŸŠ๐Ÿ‘‘๐Ÿ”ฎ๐Ÿ’˜๐ŸŒ€๐Ÿ’ƒ๐ŸŽฏ๐Ÿ’จ๐ŸŒณ๐Ÿ’ซ๐Ÿ•๐Ÿ’ต๐Ÿฌ๐Ÿฉ๐Ÿšช๐Ÿ’ง๐Ÿ‘‚๐ŸŒ๐Ÿ‘“๐Ÿ‘Š๐Ÿ‘ฃ\
                                       ๐Ÿ”ฅ๐Ÿ”ฆ๐Ÿ’พ๐Ÿˆ๐Ÿ€๐Ÿค๐ŸŸ๐Ÿธ๐ŸŽฒ๐Ÿ’Ž๐Ÿ‘ป๐ŸŽ๐ŸŒโ›ณ๏ธ๐Ÿ‡๐Ÿ๐ŸŽธ๐Ÿ”ซ๐Ÿ”๐Ÿ”จ๐Ÿ‘œ๐Ÿฃ๐ŸŒฟ๐Ÿ”†๐Ÿ‘ ๐ŸดโŒ›๏ธ๐Ÿจ๐ŸŽƒ๐Ÿ”‘๐Ÿ’‹๐Ÿจ๐Ÿƒ๐Ÿ”—๐Ÿ”’๐Ÿ“ข๐Ÿ”๐Ÿ“ซ๐Ÿ๐Ÿ“ฃ๐Ÿ“๐ŸŽค๐Ÿ”ฌ๐Ÿ’ฐ๐Ÿ’๐ŸŒ™๐Ÿ๐ŸŽฅ๐Ÿ’ช๐Ÿ„๐ŸŽน๐ŸŽต๐Ÿ‘”๐Ÿ“ฐ๐Ÿ”•โ›”๏ธ๐Ÿšซ\
                                       ๐Ÿ‘ƒ๐Ÿ”ฉ๐Ÿ™๐Ÿ‘Œ๐Ÿ‘๐Ÿ‚๐Ÿผโ›…๏ธ๐Ÿพ๐Ÿ‘๐Ÿ๐Ÿง๐ŸŽญโ˜Ž๏ธ๐Ÿ–๐Ÿฝ๐Ÿ’Š๐Ÿ๐Ÿ•๐Ÿ‘‡๐Ÿ‘ˆ๐Ÿ‘‰๐Ÿ‘†๐Ÿš“๐Ÿฉ๐Ÿ’ฉ๐Ÿ“ฏ๐Ÿ—๐Ÿ™๐Ÿ‘›๐Ÿ“Œ๐Ÿ‡๐ŸŽ๐Ÿ“ป๐Ÿ€๐ŸŽ€๐Ÿš๐Ÿ’๐Ÿš€๐Ÿ“๐ŸŒน๐Ÿšจ๐Ÿ“๐Ÿƒ๐ŸŽ…๐Ÿ“ก๐ŸŽทโœ‚๏ธ๐Ÿš๐Ÿ‘•๐Ÿšฟ๐Ÿ’€๐Ÿ˜„๐ŸŒ๐Ÿโ„๏ธ\
                                       โ›„๏ธ๐Ÿ˜ญโšฝ๏ธ๐Ÿ”‰๐Ÿ‘พ๐Ÿ’ฌโญ๏ธ๐Ÿ“๐Ÿ˜Ž๐Ÿ’ฆ๐ŸŠ๐Ÿ’‰๐Ÿ”ญ๐ŸŽพ๐Ÿ’ญ๐Ÿšฝ๐Ÿ‘…๐ŸŽฉ๐Ÿšฅ๐Ÿ†๐ŸŽบ๐Ÿข๐Ÿšฆ๐Ÿ“ผ๐ŸŽฎ๐ŸŽปโŒš๏ธ๐Ÿ‹๐Ÿท๐Ÿบ๐Ÿ”งโšก๏ธ๐Ÿ’ค";

    #[test]
    fn all_bytes_to_emojis() {
        let all_code_points = (0..256)
            .collect::<Vec<_>>()
            .iter()
            .map(|cp| *cp as u8)
            .collect::<Vec<_>>();
        assert_eq!(to_string(&all_code_points[..]), ALL_EMOJIS);
    }

    #[test]
    fn all_emojis_to_bytes() {
        let all_code_points = (0..256)
            .collect::<Vec<_>>()
            .iter()
            .map(|cp| *cp as u8)
            .collect::<Vec<_>>();
        assert_eq!(from_string(&ALL_EMOJIS)[..], all_code_points[..]);
    }

    #[test]
    fn string_to_emojis() {
        let input = "encoji is the way of the future!";
        let output = "๐Ÿฌ๐Ÿ”ฅ๐Ÿ•๐Ÿ”ฆ๐ŸŒ๐Ÿ‘‚๐Ÿ”‹๐Ÿ‘‚๐Ÿค๐Ÿ”‹๐ŸŸ๐Ÿ’ง๐Ÿฌ๐Ÿ”‹๐Ÿ’Ž๐ŸŒณ๐ŸŽ๐Ÿ”‹๐Ÿ”ฆ๐Ÿฉ๐Ÿ”‹๐ŸŸ๐Ÿ’ง๐Ÿฌ๐Ÿ”‹๐Ÿฉ๐Ÿธ๐ŸŸ๐Ÿธ๐Ÿ€๐Ÿฌ๐Ÿป";

        assert_eq!(to_string(input), output);
    }

    #[test]
    fn emojis_to_string() {
        let input = "๐Ÿฌ๐Ÿ”ฅ๐Ÿ•๐Ÿ”ฆ๐ŸŒ๐Ÿ‘‚๐Ÿ”‹๐Ÿ‘‚๐Ÿค๐Ÿ”‹๐ŸŸ๐Ÿ’ง๐Ÿฌ๐Ÿ”‹๐Ÿ’Ž๐ŸŒณ๐ŸŽ๐Ÿ”‹๐Ÿ”ฆ๐Ÿฉ๐Ÿ”‹๐ŸŸ๐Ÿ’ง๐Ÿฌ๐Ÿ”‹๐Ÿฉ๐Ÿธ๐ŸŸ๐Ÿธ๐Ÿ€๐Ÿฌ๐Ÿป";
        let output = "encoji is the way of the future!";

        assert_eq!(String::from_utf8_lossy(&from_string(&input)[..]), output);
    }

    #[test]
    fn emojis_to_string_ingore_invalid_codepoints() {
        let input = "๐Ÿฌ๐Ÿ”ฅ๐Ÿ•๐Ÿ”ฆ๐ŸŒ๐Ÿ‘‚๐Ÿ”‹๐Ÿ‘‚๐Ÿค๐Ÿ”‹๐ŸŸ๐Ÿ’ง๐Ÿฌasdfasdf๐Ÿ”‹๐Ÿ’Ž๐ŸŒณ๐ŸŽ๐Ÿ”‹๐Ÿ”ฆ๐Ÿฉ๐Ÿ”‹๐ŸŸ๐Ÿ’ง๐Ÿฌ๐Ÿ”‹๐Ÿฉ๐Ÿธ๐ŸŸ๐Ÿธ๐Ÿ€๐Ÿฌ๐Ÿป\n";
        let output = "encoji is the way of the future!";

        assert_eq!(String::from_utf8_lossy(&from_string(&input)[..]), output);
    }
}