1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
pub mod error;

use failure::Error;
use image::RgbaImage;
use lz4::block::{compress, decompress, CompressionMode};
use rayon::prelude::*;
use rmp_serde::{encode::write_named as mp_to_writer, from_read as mp_from_reader};
use serde_derive::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;
use std::sync::Arc;

#[derive(Clone, Default, Debug, Serialize, Deserialize)]
pub struct Emoji {
	pub value: String,
	pub name: HashMap<String, String>,
	pub keywords: HashMap<String, Vec<String>>,
	pub svg: String,

	#[serde(skip)]
	pub rank: f32,
}

fn get_by_lang<'a, T, S: AsRef<str>>(h: &'a HashMap<String, T>, lang: S) -> Option<&'a T> {
	let lang = lang.as_ref();
	if let Some(v) = h.get(lang) {
		return Some(v);
	}

	if lang.len() > 3 {
		// fall back to "parent" lang. E.g. for "en_GB" try "en"
		let (parent, _) = lang.split_at(2);
		if let Some(v) = h.get(parent) {
			return Some(v);
		}
	}

	// last resort, English
	h.get("en")
}

impl Emoji {
	pub fn name<'a, S: AsRef<str>>(&'a self, lang: S) -> &'a str {
		match get_by_lang(&self.name, lang) {
			Some(v) => v.as_str(),
			None => "Unknown",
		}
	}

	pub fn update_rank<S: AsRef<str>, T: AsRef<str>>(&mut self, lang: S, query: T) {
		self.rank = 0.0;

		for slice in query.as_ref().split_whitespace() {
			if let Some(name) = self.name.get(lang.as_ref()) {
				self.rank += rank_similarity(slice, name);
			}

			if let Some(keywords) = get_by_lang(&self.keywords, &lang) {
				for keyword in keywords {
					for kw_slice in keyword.split_whitespace() {
						self.rank += rank_similarity(slice, kw_slice);
					}
				}
			}
		}
	}

	pub fn get_image(&self, area_width: usize, area_height: usize) -> Result<RgbaImage, Error> {
		let svg = nsvg::parse_str(&self.svg, nsvg::Units::Pixel, 96.0)?;

		let area_aspect = area_width as f32 / area_height as f32;
		let svg_aspect = svg.width() / svg.height();

		let scale = if area_aspect > svg_aspect {
			area_height as f32 / svg.height()
		} else {
			area_width as f32 / svg.width()
		};

		Ok(svg.rasterize(scale)?)
	}
}

fn rank_similarity<S: AsRef<str>, T: AsRef<str>>(query: S, subject: T) -> f32 {
	let query = query.as_ref();
	let subject = subject.as_ref();

	if query == subject {
		return 5.0;
	}

	if subject.starts_with(query) {
		return 3.0;
	}

	trigram::similarity(query, subject)
}

#[derive(Default, Debug, Serialize, Deserialize)]
pub struct Index {
	pub emojis: Vec<Arc<Emoji>>,
	pub locale_codes: Vec<String>,
}

impl Index {
	pub fn from_bytes(bytes: &[u8]) -> Result<Index, Error> {
		let uncompressed: Vec<u8> = decompress(bytes, None)?;

		Ok(mp_from_reader(&*uncompressed)?)
	}

	pub fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<(), Error> {
		let mut uncompressed: Vec<u8> = Vec::new();
		mp_to_writer(&mut uncompressed, self)?;

		let compressed = compress(
			uncompressed.as_slice(),
			Some(CompressionMode::HIGHCOMPRESSION(9)),
			true,
		)?;

		Ok(std::fs::write(path, compressed)?)
	}

	pub fn items<S: AsRef<str>>(&self, lang: S) -> Vec<String> {
		self.emojis
			.iter()
			.map(|e| format!("{}: {}", e.value, e.name(lang.as_ref())))
			.collect()
	}

	pub fn search<S: AsRef<str>, T: AsRef<str>>(&mut self, lang: S, query: T) {
		let lang = lang.as_ref().to_string();
		let query = query.as_ref().to_string();

		self.emojis
			.as_mut_slice()
			.par_iter_mut()
			.for_each(move |ref mut emoji| Arc::make_mut(emoji).update_rank(&lang, &query));

		self.emojis
			.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap());
	}
}