ecoji/
encode.rs

1use emojis::*;
2use std::io::{self, Read, Write};
3
4fn encode_chunk<W: Write + ?Sized>(s: &[u8], out: &mut W) -> io::Result<usize> {
5    assert!(s.len() > 0 && s.len() <= 5, "Unexpected slice length");
6
7    let (b0, b1, b2, b3, b4) = (
8        s[0] as usize,
9        s.get(1).cloned().unwrap_or(0) as usize,
10        s.get(2).cloned().unwrap_or(0) as usize,
11        s.get(3).cloned().unwrap_or(0) as usize,
12        s.get(4).cloned().unwrap_or(0) as usize,
13    );
14
15    let mut chars = [
16        EMOJIS[b0 << 2 | b1 >> 6] as char,
17        PADDING,
18        PADDING,
19        PADDING,
20    ];
21
22    match s.len() {
23        1 => {}
24        2 => {
25            chars[1] = EMOJIS[(b1 & 0x3f) << 4 | b2 >> 4]
26        }
27        3 => {
28            chars[1] = EMOJIS[(b1 & 0x3f) << 4 | b2 >> 4];
29            chars[2] = EMOJIS[(b2 & 0x0f) << 6 | b3 >> 2];
30        }
31        4 => {
32            chars[1] = EMOJIS[(b1 & 0x3f) << 4 | b2 >> 4];
33            chars[2] = EMOJIS[(b2 & 0x0f) << 6 | b3 >> 2];
34
35            chars[3] = match b3 & 0x03 {
36                0 => PADDING_40,
37                1 => PADDING_41,
38                2 => PADDING_42,
39                3 => PADDING_43,
40                _ => unreachable!(),
41            }
42        }
43        5 => {
44            chars[1] = EMOJIS[(b1 & 0x3f) << 4 | b2 >> 4];
45            chars[2] = EMOJIS[(b2 & 0x0f) << 6 | b3 >> 2];
46            chars[3] = EMOJIS[(b3 & 0x03) << 8 | b4];
47        }
48        _ => unreachable!(),
49    }
50
51    let mut buf = [0; 4];
52    let mut bytes_written = 0;
53    for c in chars.iter() {
54        let s = c.encode_utf8(&mut buf).as_bytes();
55        out.write_all(s)?;
56        bytes_written += s.len();
57    }
58
59    Ok(bytes_written)
60}
61
62fn read_exact<R: Read + ?Sized>(source: &mut R, mut buf: &mut [u8]) -> io::Result<usize> {
63    let mut bytes_read = 0;
64    while !buf.is_empty() {
65        match source.read(buf) {
66            Ok(0) => break,
67            Ok(n) => {
68                let tmp = buf;
69                buf = &mut tmp[n..];
70                bytes_read += n;
71            }
72            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
73            Err(e) => return Err(e),
74        }
75    }
76    Ok(bytes_read)
77}
78
79/// Encodes the entire source into the Ecoji format and writes a UTF-8 representation of
80/// the encoded data to the provided destination.
81///
82/// If successful, returns the number of bytes which were written to the destination writer.
83///
84/// Returns an error when either source or destination operation has failed. No guarantees are
85/// made about the state of the destination if an error occurs, so it is possible for the
86/// destination to contain only a part of the encoded data.
87///
88/// # Examples
89///
90/// Successful encoding:
91///
92/// ```
93/// # fn test() -> ::std::io::Result<()> {
94/// let input = "input data";
95///
96/// let mut output: Vec<u8> = Vec::new();
97/// ecoji::encode(&mut input.as_bytes(), &mut output)?;
98///
99/// assert_eq!(output, "πŸ‘ΆπŸ˜²πŸ‡²πŸ‘…πŸ‰πŸ”™πŸŒ₯🌩".as_bytes());
100/// #  Ok(())
101/// # }
102/// # test().unwrap();
103/// ```
104pub fn encode<R: Read + ?Sized, W: Write + ?Sized>(source: &mut R, destination: &mut W) -> io::Result<usize> {
105    let mut buf = [0; 5];
106    let mut bytes_written = 0;
107
108    loop {
109        let n = read_exact(source, &mut buf)?;
110
111        // EOF
112        if n == 0 {
113            break;
114        }
115
116        bytes_written += encode_chunk(&buf[..n], destination)?;
117    }
118
119    Ok(bytes_written)
120}
121
122/// Encodes the entire source into the Ecoji format, storing the result of the encoding to a
123/// new owned string.
124///
125/// Returns a string with the encoded data if successful.
126///
127/// Failure conditions are exactly the same as those of the [`encode`](fn.encode.html) function;
128/// because the encoding output is always a valid sequence of emoji code points, it is guaranteed
129/// to be representable as a valid UTF-8 sequence.
130///
131/// # Examples
132///
133/// Successful encoding:
134///
135/// ```
136/// # fn test() -> ::std::io::Result<()> {
137/// let input = "input data";
138/// let output: String = ecoji::encode_to_string(&mut input.as_bytes())?;
139///
140/// assert_eq!(output, "πŸ‘ΆπŸ˜²πŸ‡²πŸ‘…πŸ‰πŸ”™πŸŒ₯🌩");
141/// #  Ok(())
142/// # }
143/// # test().unwrap();
144/// ```
145pub fn encode_to_string<R: Read + ?Sized>(source: &mut R) -> io::Result<String> {
146    let mut output = Vec::new();
147    encode(source, &mut output)?;
148    // encoded output is guaranteed to be valid UTF-8
149    Ok(unsafe { String::from_utf8_unchecked(output) })
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    fn check(input: &[u8], output: &[u8]) {
157        let buf = encode_to_string(&mut input.clone()).unwrap();
158        assert_eq!(output, buf.as_bytes());
159    }
160
161    fn check_chars(input: &[u8], output: &[char]) {
162        let buf = encode_to_string(&mut input.clone()).unwrap();
163        let chars: Vec<_> = buf.chars().collect();
164        assert_eq!(output, chars.as_slice());
165    }
166
167    #[test]
168    fn test_random() {
169        check(b"abc", "πŸ‘–πŸ“ΈπŸŽˆβ˜•".as_bytes());
170    }
171
172    #[test]
173    fn test_one_byte() {
174        check_chars(b"k", &[EMOJIS[('k' as usize) << 2], PADDING, PADDING, PADDING]);
175    }
176
177    #[test]
178    fn test_two_bytes() {
179        check_chars(&[0, 1], &[EMOJIS[0], EMOJIS[16], PADDING, PADDING]);
180    }
181
182    #[test]
183    fn test_three_bytes() {
184        check_chars(&[0, 1, 2], &[EMOJIS[0], EMOJIS[16], EMOJIS[128], PADDING]);
185    }
186
187    #[test]
188    fn test_four_bytes() {
189        check_chars(&[0, 1, 2, 0], &[EMOJIS[0], EMOJIS[16], EMOJIS[128], PADDING_40]);
190        check_chars(&[0, 1, 2, 1], &[EMOJIS[0], EMOJIS[16], EMOJIS[128], PADDING_41]);
191        check_chars(&[0, 1, 2, 2], &[EMOJIS[0], EMOJIS[16], EMOJIS[128], PADDING_42]);
192        check_chars(&[0, 1, 2, 3], &[EMOJIS[0], EMOJIS[16], EMOJIS[128], PADDING_43]);
193    }
194
195    #[test]
196    fn test_five_bytes() {
197        check_chars(&[0xAB, 0xCD, 0xEF, 0x01, 0x23], &[EMOJIS[687], EMOJIS[222], EMOJIS[960], EMOJIS[291]]);
198    }
199}