1use core::{
2 fmt::{self, Write},
3 ops::Deref,
4};
5
6use blake2::Blake2b256;
7use digest::Digest;
8
9use crate::fmt::{DigestWriter, Lines, TsvField};
10
11pub trait Dict: Send + Sync {
15 fn words(&self) -> &[&str];
17
18 fn hash(&self) -> &[u8; 32];
20}
21
22pub struct BoxDict<'a>(Box<[&'a str]>, [u8; 32]);
25
26pub struct RefDict<'a>(&'a [&'a str], &'a [u8; 32]);
30
31impl<'a> BoxDict<'a> {
32 pub fn from_lines(s: &'a str) -> Self {
35 Self::from_iter(s.lines().map(str::trim))
36 }
37
38 pub fn from_sep(s: &'a str, sep: &str) -> Self {
41 Self::from_iter(s.split(sep))
42 }
43}
44
45impl<'a> FromIterator<&'a str> for BoxDict<'a> {
46 fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
47 let mut items: Vec<_> = iter.into_iter().filter(|&l| !l.is_empty()).collect();
48 items.sort_unstable();
49 items.dedup();
50 let mut w = DigestWriter(Blake2b256::new());
51 write!(w, "{}", Lines(items.iter().map(TsvField))).unwrap();
53 BoxDict(items.into(), w.0.finalize().into())
54 }
55}
56
57impl<'a> RefDict<'a> {
58 pub const unsafe fn new(words: &'a [&'a str], hash: &'a [u8; 32]) -> Self {
63 RefDict(words, hash)
64 }
65}
66
67impl<'a> Deref for BoxDict<'a> {
68 type Target = dyn Dict + 'a;
69 fn deref(&self) -> &Self::Target {
70 self
71 }
72}
73
74impl<'a> Deref for RefDict<'a> {
75 type Target = dyn Dict + 'a;
76 fn deref(&self) -> &Self::Target {
77 self
78 }
79}
80
81impl Dict for BoxDict<'_> {
82 fn words(&self) -> &[&str] {
83 &self.0
84 }
85 fn hash(&self) -> &[u8; 32] {
86 &self.1
87 }
88}
89
90impl Dict for RefDict<'_> {
91 fn words(&self) -> &[&str] {
92 self.0
93 }
94 fn hash(&self) -> &[u8; 32] {
95 self.1
96 }
97}
98
99impl<'a> fmt::Debug for dyn Dict + 'a {
100 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
101 let mut out = [0u8; 64];
102 hex::encode_to_slice(self.hash(), &mut out).unwrap();
103 let hash = str::from_utf8(&out).unwrap();
104 write!(f, "Dict({hash})")
105 }
106}
107
108#[cfg(test)]
109mod tests {
110 use super::*;
111
112 #[test]
113 fn box_dict_hash_vectors() {
114 let tests: &[(&str, &str, Option<&str>)] = &[
115 (
116 "749a7ee32cf838199eae943516767f7ef02d49b212202f1aad74cacd645e2edf",
117 "bob\ndole",
118 None,
119 ),
120 (
121 "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8",
122 "",
123 None,
124 ),
125 (
126 "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8",
127 " \n",
128 None,
129 ),
130 (
131 "f9a96c938288e95ab3b8804104a69daf44e925fd962565233d9de5d26e951068",
132 "bob\ndole",
133 Some("\0"),
134 ),
135 (
136 "3b4312af5a1f7e9eb79c27b4503f734d303e6664d2df2796ec034b4c34195dbf",
137 "a\nb\nc",
138 None,
139 ),
140 (
141 "3b4312af5a1f7e9eb79c27b4503f734d303e6664d2df2796ec034b4c34195dbf",
142 "b\nc\na",
143 None,
144 ),
145 (
146 "3b4312af5a1f7e9eb79c27b4503f734d303e6664d2df2796ec034b4c34195dbf",
147 " b \na \nc\n\n\n",
148 None,
149 ),
150 (
151 "3b4312af5a1f7e9eb79c27b4503f734d303e6664d2df2796ec034b4c34195dbf",
152 "a\0b\0c",
153 Some("\0"),
154 ),
155 (
156 "3b4312af5a1f7e9eb79c27b4503f734d303e6664d2df2796ec034b4c34195dbf",
157 "c\0b\0a\0a\0a",
158 Some("\0"),
159 ),
160 (
161 "3b42ee5c745153f2fe8533b19c35411d8d45c70bbecf0dc3ac9e60b7eb5ea07d",
162 " \0",
163 Some("\0"),
164 ),
165 (
166 "ff11901891de4daf46c9ffc4a5c23ae22c4fa2597dc1beb86d2ef5bf87d9c878",
167 "\\\r\n\t",
168 Some("\0"),
169 ),
170 (
171 "dec3a7b8941401737abb9ff3f37cde4b47c79c5be60bba8ba2ffb02fb84864ba",
172 "a a",
173 None,
174 ),
175 ];
176 for (want, inp, sep) in tests {
177 let dict = match sep {
178 None => BoxDict::from_lines(inp),
179 Some(sep) => BoxDict::from_sep(inp, sep),
180 };
181 assert_eq!(want, &hex::encode(dict.hash()), "{:?}", dict.words());
182 }
183 }
184}