Skip to main content

visual_hashing/
emojihash.rs

1// SPDX-FileCopyrightText: 2026 Blackcat Informaticsยฎ Inc. <paudley@blackcatinformatics.ca>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! A nameable 64-emoji visual hash.
5//!
6//! A BLAKE3-XOF digest is sliced into 6-bit symbols, each indexing a fixed
7//! alphabet that favours common animals and then familiar foods over abstract,
8//! confusable symbols โ€” a fingerprint only helps if a human can read it back.
9
10/// The 64-entry emoji alphabet (a 6-bit digit set).
11pub const EMOJI: [&str; 64] = [
12    "๐Ÿต", "๐Ÿถ", "๐Ÿบ", "๐ŸฆŠ", "๐Ÿฑ", "๐Ÿฆ", "๐Ÿฏ", "๐Ÿด", "๐Ÿฆ„", "๐Ÿฆ“", "๐ŸฆŒ", "๐Ÿฎ", "๐Ÿท", "๐Ÿ—", "๐Ÿญ", "๐Ÿน",
13    "๐Ÿฐ", "๐Ÿป", "๐Ÿผ", "๐Ÿจ", "๐Ÿธ", "๐Ÿฒ", "๐Ÿ”", "๐Ÿง", "๐Ÿฆ†", "๐Ÿฆ…", "๐Ÿฆ‰", "๐Ÿฆ‡", "๐Ÿข", "๐Ÿ", "๐ŸฆŽ", "๐ŸŠ",
14    "๐Ÿณ", "๐Ÿฌ", "๐ŸŸ", "๐Ÿ ", "๐Ÿก", "๐Ÿฆˆ", "๐Ÿ™", "๐Ÿฆ‘", "๐Ÿฆ€", "๐Ÿฆž", "๐Ÿฆ", "๐Ÿฆ‹", "๐ŸŒ", "๐Ÿž", "๐Ÿ", "๐Ÿœ",
15    "๐Ÿฆ‚", "๐ŸŽ", "๐Ÿ", "๐ŸŠ", "๐Ÿ‹", "๐ŸŒ", "๐Ÿ‰", "๐Ÿ‡", "๐Ÿ“", "๐Ÿ’", "๐Ÿ", "๐Ÿฅ", "๐Ÿ‘", "๐Ÿฅฅ", "๐Ÿฅ•", "๐ŸŒฝ",
16];
17
18/// The stable label for each entry in [`EMOJI`], by index.
19pub const LABELS: [&str; 64] = [
20    "monkey",
21    "dog",
22    "wolf",
23    "fox",
24    "cat",
25    "lion",
26    "tiger",
27    "horse",
28    "unicorn",
29    "zebra",
30    "deer",
31    "cow",
32    "pig",
33    "boar",
34    "mouse",
35    "hamster",
36    "rabbit",
37    "bear",
38    "panda",
39    "koala",
40    "frog",
41    "dragon",
42    "chicken",
43    "penguin",
44    "duck",
45    "eagle",
46    "owl",
47    "bat",
48    "turtle",
49    "snake",
50    "lizard",
51    "crocodile",
52    "whale",
53    "dolphin",
54    "fish",
55    "tropical-fish",
56    "blowfish",
57    "shark",
58    "octopus",
59    "squid",
60    "crab",
61    "lobster",
62    "shrimp",
63    "butterfly",
64    "snail",
65    "lady-beetle",
66    "bee",
67    "ant",
68    "scorpion",
69    "apple",
70    "pear",
71    "orange",
72    "lemon",
73    "banana",
74    "watermelon",
75    "grapes",
76    "strawberry",
77    "cherries",
78    "pineapple",
79    "kiwi",
80    "peach",
81    "coconut",
82    "carrot",
83    "corn",
84];
85
86/// The number of distinct emoji digits (a 6-bit alphabet).
87pub const ALPHABET_SIZE: usize = 64;
88
89/// Return `length` 6-bit digest symbols (each in `0..64`).
90pub fn emoji_indices(data: &[u8], length: usize) -> Vec<usize> {
91    let wanted = length.max(1);
92    let nbytes = (wanted * 6).div_ceil(8);
93    let mut digest = vec![0u8; nbytes];
94    blake3::Hasher::new()
95        .update(data)
96        .finalize_xof()
97        .fill(&mut digest);
98
99    let mut out = Vec::with_capacity(wanted);
100    let mut acc: u64 = 0;
101    let mut bits: u32 = 0;
102    for byte in digest {
103        acc = (acc << 8) | u64::from(byte);
104        bits += 8;
105        while bits >= 6 && out.len() < wanted {
106            bits -= 6;
107            out.push(((acc >> bits) & 0x3f) as usize);
108        }
109        acc &= (1u64 << bits) - 1; // keep only the unconsumed low bits
110    }
111    out.truncate(wanted);
112    out
113}
114
115/// Map `data` to a space-joined string of `length` emoji digits.
116pub fn emojihash(data: &[u8], length: usize) -> String {
117    emoji_indices(data, length)
118        .into_iter()
119        .map(|i| EMOJI[i])
120        .collect::<Vec<_>>()
121        .join(" ")
122}
123
124/// The stable label names for [`emojihash`] output (space-joined).
125pub fn emojihash_labels(data: &[u8], length: usize) -> String {
126    emoji_indices(data, length)
127        .into_iter()
128        .map(|i| LABELS[i])
129        .collect::<Vec<_>>()
130        .join(" ")
131}