1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
//! A Rust implementation of [Ecoji](https://github.com/keith-turner/ecoji), a base-1024 encoding
//! with an emoji alphabet.
//!
//! This crate includes both encoding and decoding functionality, as well as a binary with an
//! interface similar to the `base64` tool to perform Ecoji encoding and/or decoding from the
//! command line.
//!
//! ## Features
//!
//! Features of the Ecoji encoding are described in depth in the
//! [original implementation's](https://github.com/keith-turner/ecoji) repository. In short, it has
//! the following key characteristics:
//!
//! * While Ecoji-encoded strings take more bytes than their base-64 or other ASCII-using
//!   counterparts, they take less *visible* characters. More specifically, each visible character
//!   in Ecoji encodes 10 bits of data, while for example each visible character in Base64 encodes
//!   6 bits of data.
//! * Ecoji-encoded strings can be concatenated and then decoded, giving the concatenation of the
//!   original strings:
//!
//!   ```
//!   use ecoji::{encode_to_string, decode_to_string};
//!
//!   # fn test() -> ::std::io::Result<()> {
//!   let (input1, input2) = ("hello ", "world");
//!
//!   // Encode both input strings and concatenate the encoded output
//!   let output1 = encode_to_string(&mut input1.as_bytes())?;
//!   let output2 = encode_to_string(&mut input2.as_bytes())?;
//!   let output = output1 + &output2;
//!
//!   // Then decode the concatenated output
//!   let input = decode_to_string(&mut output.as_bytes())?;
//!
//!   // The result is the same as concatenation of the input strings
//!   assert_eq!(input, input1.to_owned() + input2);
//!   # Ok(())
//!   # }
//!   # test().unwrap();
//!   ```
//! * Data encoded with Ecoji has the same sorting order as the input data:
//!
//!   ```
//!   use ecoji::{encode_to_string, decode_to_string};
//!
//!   # fn test() -> ::std::io::Result<()> {
//!   // The input vector is sorted
//!   let inputs = vec![
//!       "a", "ab", "abc", "abcd",
//!       "ac",
//!       "b", "ba"
//!   ];
//!
//!   // Encode each element of input and sort the resulting strings again
//!   let mut outputs: Vec<_> = inputs.iter().cloned()
//!     .map(|s| encode_to_string(&mut s.as_bytes()))
//!     .collect::<Result<_, _>>()?;
//!   outputs.sort_unstable();
//!
//!   // Decode each output item back
//!   let mut inputs2: Vec<_> = outputs.iter()
//!     .map(|mut s| decode_to_string(&mut s.as_bytes()))
//!     .collect::<Result<_, _>>()?;
//!   let mut inputs2: Vec<_> = inputs2.iter()
//!     .map(|s| s.as_str())
//!     .collect();  // to have a Vec<&str> instead of Vec<String> for assert below
//!
//!   // Input (which is sorted) and decoded output (whose source is sorted) should be the same
//!   assert_eq!(inputs, inputs2);
//!
//!   # Ok(())
//!   # }
//!   # test().unwrap();
//!   ```
//!
//! ## Usage
//!
//! The two main functions provided by this library are [`encode`](fn.encode.html) and
//! [`decode`](fn.decode.html), which both have the same signature: they accept a reference
//! to an `std::io::Read` and a reference to `std::io::Write` and return an `std::io::Result<usize>`
//! with the number of bytes written to the output `std::io::Write`.
//!
//! Additionally, this library provides shortcut functions,
//! [`encode_to_string`](fn.encode_to_string.html), [`decode_to_vec`](fn.decode_to_vec.html) and
//! [`decode_to_string`](fn.decode_to_string.html), whose output is an in-memory `String` or
//! `Vec<u8>`. Note that there is no need to support special versions of the encode/decode
//! operations which would *accept* strings or vectors, because slices of bytes (`&[u8]`) implement
//! the `std::io::Read` trait by default. Therefore, if you have a string or a byte vector, you
//! can invoke the encoding/decoding functions like this:
//!
//! ```
//! # fn test() -> ::std::io::Result<()> {
//! let input_1: &str = "some data";
//! let input_2: &[u8] = b"some data";
//!
//! // Pass a mutable reference to the intermediate &[u8] object returned by `str::as_bytes()`
//! let result_1 = ecoji::encode_to_string(&mut input_1.as_bytes())?;
//!
//! // Pass a mutable reference to a cloned &[u8] object if you already have a byte slice
//! let result_2 = ecoji::encode_to_string(&mut input_2.clone())?;
//! #   Ok(())
//! # }
//! ```
//!
//! ## Command line tool
//!
//! This crate also provides an executable binary, `ecoji`, which provides a command line
//! interface similar to that of the standard `base64` command and which can encode or decode data
//! coming on the standard input and write the results of this processing to the standard output.
//! You can install it by invoking the following command:
//!
//! ```none
//! $ cargo install --bin ecoji --features build-binary ecoji
//! ```
//!
//! It will be installed in your default Cargo binaries directory (usually `~/.cargo/bin` on Unix
//! systems). Run `ecoji --help` (assuming the aforementioned directory is in your `PATH`) to
//! see documentation on how to invoke itl.
//!
//! ## Issues and limitations
//!
//! Currently this crate does not provide an ability to do wrapping of the encoded text, like
//! e.g. what the `base64` command does with the `-w` flag. It is possible that this feature will
//! be implemented in future; pull requests for this functionality are welcome!
//!
//! This library is almost a direct line-by-line reimplementation of the original algorithm
//! which is implemented in Go. There were almost zero attempts at optimization, therefore
//! performance characteristics may not be stellar. No benchmarking is done either. This is another
//! area where contributions are very welcome.
//!
//! The core API of this library expects `std::io::Read` and `std::io::Write` instances. This
//! implies that the only supported encoding for the emoji output is UTF-8.

extern crate phf;
#[cfg(test)] #[macro_use] extern crate quickcheck;

mod emojis;
mod encode;
mod decode;
mod chars;

pub use encode::{encode, encode_to_string};
pub use decode::{decode, decode_to_vec, decode_to_string};

#[cfg(test)]
mod test {
    use super::*;

    quickcheck! {
        fn encode_then_decode_identity(input: Vec<u8>) -> bool {
            let encoded = encode_to_string(&mut input.as_slice()).unwrap();
            let output = decode_to_vec(&mut encoded.as_bytes()).unwrap();
            input == output
        }

        fn encoded_data_has_the_same_sort_order(input: Vec<Vec<u8>>) -> bool {
            // input          ---sort--->  input_sorted
            //
            // input          --encode-->  output
            // output         ---sort--->  output_sorted
            // output_sorted  --decode-->  input2_sorted
            //
            // input_sorted       ==       input2_sorted

            let mut input_sorted = input.clone();
            input_sorted.sort_unstable();

            let output: Vec<_> = input.into_iter()
                .map(|b| encode_to_string(&mut b.as_slice()).unwrap())
                .collect();

            let mut output_sorted = output.clone();
            output_sorted.sort_unstable();

            let input2_sorted: Vec<_> = output_sorted.into_iter()
                .map(|s| decode_to_vec(&mut s.as_bytes()).unwrap())
                .collect();

            input_sorted == input2_sorted
        }
    }
}