emojito/
lib.rs

1//! # Emojito
2//!
3//! Find Emoji in strings. Supports complex emoji such as πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦.
4//! Uses the `unic-emoji-char` crate in the background, and does not rely on regexes.
5//!
6//! ## Usage
7//!
8//! ``` rs
9//! let content = "Test 😘❀️! πŸ˜»πŸ’“ πŸ‘¨β€πŸ‘©β€πŸ‘¦  kk πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦";
10//! let emojis = emojito::find_emoji(content);
11//! assert_eq!(emojis.len(), 6);
12//! ```
13use std::ops::Range;
14
15use emoji::lookup_by_glyph;
16
17pub use emoji::Emoji;
18
19/// Find all the emoji in a string. Returns the emoji in a `Vec`.
20/// ``` rs
21/// let content = "Test 😘❀️! πŸ˜»πŸ’“ πŸ‘¨β€πŸ‘©β€πŸ‘¦  kk πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦";
22/// let emojis = emojito::find_emoji(content);
23/// assert_eq!(emojis.len(), 6);
24/// ```
25pub fn find_emoji(content: impl AsRef<str>) -> Vec<&'static Emoji> {
26    find_emoji_ranges(content)
27        .into_iter()
28        .map(|(emoji, _)| emoji)
29        .collect()
30}
31
32/// Find all the emoji in a string. Returns a struct containing
33/// the range of the Emoji as well as the Emoji
34/// ``` rs
35/// let content = "Test 😘❀️! πŸ˜»πŸ’“ πŸ‘¨β€πŸ‘©β€πŸ‘¦  kk πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦";
36/// let emojis = emojito::find_emoji(content);
37/// assert_eq!(emojis.len(), 6);
38/// ```
39fn find_emoji_ranges(
40    content: impl AsRef<str>,
41) -> impl ExactSizeIterator<Item = (&'static Emoji, Range<usize>)> {
42    let zwj = '\u{200d}';
43    let variation_selectors = ['\u{fe0f}', '\u{fe0e}'];
44    let mut emoji_list = Vec::with_capacity(128);
45    let mut container = String::with_capacity(8);
46
47    fn compose(
48        position: usize,
49        container: &mut String,
50        emoji_list: &mut Vec<(&'static Emoji, Range<usize>)>,
51    ) {
52        if let Some(emoji) = lookup_by_glyph::lookup(container) {
53            emoji_list.push((emoji, position..(position + container.len())));
54        }
55        container.clear();
56    }
57
58    let mut previous_zwj = false;
59    let mut previous_emoji = false;
60    let mut last_begin = 0usize;
61    for (index, char) in content.as_ref().char_indices() {
62        let is_emoji_presentation = unic_emoji_char::is_emoji_presentation(char);
63        let is_ascii = char.is_ascii();
64        // Shortcut to ignore ascii characters which don't have a unicode presentation
65        // for a good speed boost
66        if !is_emoji_presentation && is_ascii {
67            if !container.is_empty() {
68                compose(last_begin, &mut container, &mut emoji_list);
69            } else {
70                last_begin = index;
71            }
72            continue;
73        }
74        let is_emoji = unic_emoji_char::is_emoji(char);
75        let is_emoji_component = unic_emoji_char::is_emoji_component(char);
76        let is_emoji_modifier_base = unic_emoji_char::is_emoji_modifier_base(char);
77        let is_emoji_modifier = unic_emoji_char::is_emoji_modifier(char);
78        let is_emoji_variant = variation_selectors.contains(&char);
79        if !previous_zwj {
80            let is_empty = container.is_empty();
81            if is_empty {
82                last_begin = index;
83            }
84            // For Zero width joiners, we continue
85            if !is_empty && (char == zwj || is_emoji_variant) {
86                container.push(char);
87                previous_zwj = true;
88                continue;
89            } else if !is_empty && char != zwj {
90                // If this character is an emoji component and the previous character
91                // was an emoji, don't compose just yet
92                if !(previous_emoji && (is_emoji_component)) {
93                    compose(last_begin, &mut container, &mut emoji_list);
94                }
95            }
96        }
97        previous_zwj = false;
98        if is_emoji
99            || is_emoji_component
100            || is_emoji_modifier_base
101            || is_emoji_modifier
102            || is_emoji_presentation
103        {
104            if container.is_empty() {
105                last_begin = index;
106            }
107            container.push(char);
108        }
109        previous_emoji = is_emoji;
110    }
111    if !container.is_empty() {
112        compose(last_begin, &mut container, &mut emoji_list);
113    }
114    emoji_list.into_iter()
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    #[test]
122    fn find_all() {
123        let content = "Test 😘❀️! πŸ˜»πŸ’“ πŸ‘¨β€πŸ‘©β€πŸ‘¦  kk πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦";
124        let emojis = find_emoji(content);
125        assert_eq!(emojis.len(), 6);
126        assert_eq!(emojis[0].name, "face blowing a kiss");
127        assert_eq!(emojis[5].name, "family: man, woman, girl, boy");
128    }
129
130    #[test]
131    fn fun_with_flags() {
132        let content = "πŸ‡¦πŸ‡© πŸ‡ͺπŸ‡Έ";
133        let emojis = find_emoji(content);
134        assert_eq!(emojis.len(), 2);
135    }
136
137    #[test]
138    fn test_ranges() {
139        let content = "πŸ‡¦πŸ‡© πŸ‡ͺπŸ‡ΈTest ❀️ 😘 πŸ˜»πŸ’“ πŸ‘¨β€πŸ‘©β€πŸ‘¦  kk πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦";
140        let emojis = find_emoji_ranges(content);
141        for (emoji, range) in emojis {
142            assert_eq!(&content[range.clone()], emoji.glyph);
143        }
144    }
145
146    #[test]
147    fn test_read_heart() {
148        let content = "❀️";
149        let emojis = find_emoji(content);
150        assert_eq!(emojis.len(), 1);
151        assert_eq!(emojis[0].glyph, content);
152    }
153}