1use std::collections::BTreeSet;
7
8use crate::{
9 error::{Error, Result},
10 expr::{Expr, NumberLiteral},
11 id::Symbol,
12 ref_id::ContentId,
13};
14
15pub const DATUM_CONTENT_ALGORITHM_NAMESPACE: &str = "core";
17pub const DATUM_CONTENT_ALGORITHM_NAME: &str = "sha256-datum-v1";
19
20#[derive(Clone, Debug, PartialEq, Eq, Hash)]
38pub enum Datum {
39 Nil,
41 Bool(bool),
43 Number(NumberLiteral),
45 Symbol(Symbol),
47 String(String),
49 Bytes(Vec<u8>),
51 List(Vec<Datum>),
53 Vector(Vec<Datum>),
55 Map(Vec<(Datum, Datum)>),
58 Set(Vec<Datum>),
60 Node {
63 tag: Symbol,
65 fields: Vec<(Symbol, Datum)>,
67 },
68}
69
70impl Datum {
71 pub fn canonical_bytes(&self) -> Result<Vec<u8>> {
78 let mut out = Vec::new();
79 write_bytes(&mut out, b"sim6:datum:v1");
80 self.write_canonical(&mut out)?;
81 Ok(out)
82 }
83
84 pub fn content_id(&self) -> Result<ContentId> {
86 Ok(ContentId::from_bytes(
87 datum_content_algorithm(),
88 sha256(&self.canonical_bytes()?),
89 ))
90 }
91
92 fn write_canonical(&self, out: &mut Vec<u8>) -> Result<()> {
93 match self {
94 Self::Nil => write_tag(out, "sim6:datum:nil"),
95 Self::Bool(value) => {
96 write_tag(out, "sim6:datum:bool");
97 out.push(u8::from(*value));
98 }
99 Self::Number(value) => {
100 write_tag(out, "sim6:datum:number");
101 write_symbol(out, &value.domain);
102 write_string(out, &value.canonical);
103 }
104 Self::Symbol(symbol) => {
105 write_tag(out, "sim6:datum:symbol");
106 write_symbol(out, symbol);
107 }
108 Self::String(value) => {
109 write_tag(out, "sim6:datum:string");
110 write_string(out, value);
111 }
112 Self::Bytes(value) => {
113 write_tag(out, "sim6:datum:bytes");
114 write_bytes(out, value);
115 }
116 Self::List(items) => {
117 write_tag(out, "sim6:datum:list");
118 write_records(out, ordered_child_records(items)?)?;
119 }
120 Self::Vector(items) => {
121 write_tag(out, "sim6:datum:vector");
122 write_records(out, ordered_child_records(items)?)?;
123 }
124 Self::Map(entries) => {
125 write_tag(out, "sim6:datum:map");
126 write_records(out, map_records(entries)?)?;
127 }
128 Self::Set(items) => {
129 write_tag(out, "sim6:datum:set");
130 write_records(out, set_records(items)?)?;
131 }
132 Self::Node { tag, fields } => {
133 write_tag(out, "sim6:datum:node");
134 write_symbol(out, tag);
135 write_records(out, node_field_records(fields)?)?;
136 }
137 }
138 Ok(())
139 }
140}
141
142pub fn datum_content_algorithm() -> Symbol {
144 Symbol::qualified(
145 DATUM_CONTENT_ALGORITHM_NAMESPACE,
146 DATUM_CONTENT_ALGORITHM_NAME,
147 )
148}
149
150impl TryFrom<Expr> for Datum {
151 type Error = Error;
152
153 fn try_from(expr: Expr) -> Result<Self> {
154 match expr {
155 Expr::Nil => Ok(Self::Nil),
156 Expr::Bool(value) => Ok(Self::Bool(value)),
157 Expr::Number(value) => Ok(Self::Number(value)),
158 Expr::Symbol(value) => Ok(Self::Symbol(value)),
159 Expr::String(value) => Ok(Self::String(value)),
160 Expr::Bytes(value) => Ok(Self::Bytes(value)),
161 Expr::List(items) => items
162 .into_iter()
163 .map(Self::try_from)
164 .collect::<Result<Vec<_>>>()
165 .map(Self::List),
166 Expr::Vector(items) => items
167 .into_iter()
168 .map(Self::try_from)
169 .collect::<Result<Vec<_>>>()
170 .map(Self::Vector),
171 Expr::Map(entries) => entries
172 .into_iter()
173 .map(|(key, value)| Ok((Self::try_from(key)?, Self::try_from(value)?)))
174 .collect::<Result<Vec<_>>>()
175 .map(Self::Map),
176 Expr::Set(items) => items
177 .into_iter()
178 .map(Self::try_from)
179 .collect::<Result<Vec<_>>>()
180 .map(Self::Set),
181 Expr::Extension { tag, payload } => extension_to_node(tag, *payload),
182 other => Err(Error::TypeMismatch {
183 expected: "datum expression",
184 found: expr_variant_name(&other),
185 }),
186 }
187 }
188}
189
190impl From<Datum> for Expr {
191 fn from(datum: Datum) -> Self {
192 match datum {
193 Datum::Nil => Self::Nil,
194 Datum::Bool(value) => Self::Bool(value),
195 Datum::Number(value) => Self::Number(value),
196 Datum::Symbol(value) => Self::Symbol(value),
197 Datum::String(value) => Self::String(value),
198 Datum::Bytes(value) => Self::Bytes(value),
199 Datum::List(items) => Self::List(items.into_iter().map(Self::from).collect()),
200 Datum::Vector(items) => Self::Vector(items.into_iter().map(Self::from).collect()),
201 Datum::Map(entries) => Self::Map(
202 entries
203 .into_iter()
204 .map(|(key, value)| (Self::from(key), Self::from(value)))
205 .collect(),
206 ),
207 Datum::Set(items) => Self::Set(items.into_iter().map(Self::from).collect()),
208 Datum::Node { tag, fields } => Self::Extension {
209 tag,
210 payload: Box::new(Self::Map(
211 fields
212 .into_iter()
213 .map(|(field, value)| (Self::Symbol(field), Self::from(value)))
214 .collect(),
215 )),
216 },
217 }
218 }
219}
220
221fn extension_to_node(tag: Symbol, payload: Expr) -> Result<Datum> {
222 let Expr::Map(entries) = payload else {
223 return Err(Error::TypeMismatch {
224 expected: "datum node field map",
225 found: expr_variant_name(&payload),
226 });
227 };
228
229 let fields = entries
230 .into_iter()
231 .map(|(key, value)| {
232 let Expr::Symbol(field) = key else {
233 return Err(Error::TypeMismatch {
234 expected: "datum node field symbol",
235 found: expr_variant_name(&key),
236 });
237 };
238 Ok((field, Datum::try_from(value)?))
239 })
240 .collect::<Result<Vec<_>>>()?;
241 Ok(Datum::Node { tag, fields })
242}
243
244fn expr_variant_name(expr: &Expr) -> &'static str {
245 match expr {
246 Expr::Nil => "nil expression",
247 Expr::Bool(_) => "bool expression",
248 Expr::Number(_) => "number expression",
249 Expr::Symbol(_) => "symbol expression",
250 Expr::Local(_) => "local expression",
251 Expr::String(_) => "string expression",
252 Expr::Bytes(_) => "bytes expression",
253 Expr::List(_) => "list expression",
254 Expr::Vector(_) => "vector expression",
255 Expr::Map(_) => "map expression",
256 Expr::Set(_) => "set expression",
257 Expr::Call { .. } => "call expression",
258 Expr::Infix { .. } => "infix expression",
259 Expr::Prefix { .. } => "prefix expression",
260 Expr::Postfix { .. } => "postfix expression",
261 Expr::Block(_) => "block expression",
262 Expr::Quote { .. } => "quote expression",
263 Expr::Annotated { .. } => "annotated expression",
264 Expr::Extension { .. } => "extension expression",
265 }
266}
267
268fn ordered_child_records(items: &[Datum]) -> Result<Vec<Vec<u8>>> {
269 items.iter().map(Datum::canonical_bytes).collect()
270}
271
272fn map_records(entries: &[(Datum, Datum)]) -> Result<Vec<Vec<u8>>> {
273 let mut keys = BTreeSet::new();
274 let mut records = Vec::with_capacity(entries.len());
275 for (key, value) in entries {
276 let key_bytes = key.canonical_bytes()?;
277 if !keys.insert(key_bytes.clone()) {
278 return Err(Error::Eval("duplicate datum map key".to_owned()));
279 }
280 let value_bytes = value.canonical_bytes()?;
281 let mut record = Vec::new();
282 write_tag(&mut record, "sim6:datum:map-entry");
283 write_bytes(&mut record, &key_bytes);
284 write_bytes(&mut record, &value_bytes);
285 records.push(record);
286 }
287 sort_records(&mut records);
288 Ok(records)
289}
290
291fn set_records(items: &[Datum]) -> Result<Vec<Vec<u8>>> {
292 let mut seen = BTreeSet::new();
293 let mut records = Vec::with_capacity(items.len());
294 for item in items {
295 let bytes = item.canonical_bytes()?;
296 if !seen.insert(bytes.clone()) {
297 return Err(Error::Eval("duplicate datum set entry".to_owned()));
298 }
299 records.push(bytes);
300 }
301 sort_records(&mut records);
302 Ok(records)
303}
304
305fn node_field_records(fields: &[(Symbol, Datum)]) -> Result<Vec<Vec<u8>>> {
306 let mut names = BTreeSet::new();
307 let mut records = Vec::with_capacity(fields.len());
308 for (name, value) in fields {
309 let mut name_bytes = Vec::new();
310 write_symbol(&mut name_bytes, name);
311 if !names.insert(name_bytes.clone()) {
312 return Err(Error::Eval("duplicate datum node field".to_owned()));
313 }
314 let value_bytes = value.canonical_bytes()?;
315 let mut record = Vec::new();
316 write_tag(&mut record, "sim6:datum:node-field");
317 write_bytes(&mut record, &name_bytes);
318 write_bytes(&mut record, &value_bytes);
319 records.push(record);
320 }
321 sort_records(&mut records);
322 Ok(records)
323}
324
325fn sort_records(records: &mut [Vec<u8>]) {
326 records.sort_by(|left, right| {
327 sha256(left)
328 .cmp(&sha256(right))
329 .then_with(|| left.cmp(right))
330 });
331}
332
333fn write_records(out: &mut Vec<u8>, records: Vec<Vec<u8>>) -> Result<()> {
334 write_len(out, records.len())?;
335 for record in records {
336 write_bytes(out, &record);
337 }
338 Ok(())
339}
340
341fn write_tag(out: &mut Vec<u8>, tag: &str) {
342 write_bytes(out, tag.as_bytes());
343}
344
345fn write_symbol(out: &mut Vec<u8>, symbol: &Symbol) {
346 match &symbol.namespace {
347 Some(namespace) => {
348 out.push(1);
349 write_string(out, namespace);
350 }
351 None => out.push(0),
352 }
353 write_string(out, &symbol.name);
354}
355
356fn write_string(out: &mut Vec<u8>, value: &str) {
357 write_bytes(out, value.as_bytes());
358}
359
360fn write_bytes(out: &mut Vec<u8>, value: &[u8]) {
361 write_len(out, value.len()).expect("canonical datum record length exceeded u64");
362 out.extend_from_slice(value);
363}
364
365fn write_len(out: &mut Vec<u8>, len: usize) -> Result<()> {
366 let len = u64::try_from(len).map_err(|_| Error::Eval("datum record too large".to_owned()))?;
367 out.extend_from_slice(&len.to_be_bytes());
368 Ok(())
369}
370
371fn sha256(input: &[u8]) -> [u8; 32] {
372 const H0: [u32; 8] = [
373 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab,
374 0x5be0cd19,
375 ];
376 const K: [u32; 64] = [
377 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4,
378 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe,
379 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f,
380 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
381 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc,
382 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
383 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116,
384 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
385 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7,
386 0xc67178f2,
387 ];
388
389 let mut h = H0;
390 let mut message = input.to_vec();
391 let bit_len = u64::try_from(input.len())
392 .unwrap_or(u64::MAX)
393 .wrapping_mul(8);
394 message.push(0x80);
395 while message.len() % 64 != 56 {
396 message.push(0);
397 }
398 message.extend_from_slice(&bit_len.to_be_bytes());
399
400 for chunk in message.chunks_exact(64) {
401 let mut w = [0_u32; 64];
402 for (word, bytes) in w.iter_mut().take(16).zip(chunk.chunks_exact(4)) {
403 *word = u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
404 }
405 for i in 16..64 {
406 let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3);
407 let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10);
408 w[i] = w[i - 16]
409 .wrapping_add(s0)
410 .wrapping_add(w[i - 7])
411 .wrapping_add(s1);
412 }
413
414 let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut hh] = h;
415 for i in 0..64 {
416 let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
417 let ch = (e & f) ^ ((!e) & g);
418 let temp1 = hh
419 .wrapping_add(s1)
420 .wrapping_add(ch)
421 .wrapping_add(K[i])
422 .wrapping_add(w[i]);
423 let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
424 let maj = (a & b) ^ (a & c) ^ (b & c);
425 let temp2 = s0.wrapping_add(maj);
426
427 hh = g;
428 g = f;
429 f = e;
430 e = d.wrapping_add(temp1);
431 d = c;
432 c = b;
433 b = a;
434 a = temp1.wrapping_add(temp2);
435 }
436
437 h[0] = h[0].wrapping_add(a);
438 h[1] = h[1].wrapping_add(b);
439 h[2] = h[2].wrapping_add(c);
440 h[3] = h[3].wrapping_add(d);
441 h[4] = h[4].wrapping_add(e);
442 h[5] = h[5].wrapping_add(f);
443 h[6] = h[6].wrapping_add(g);
444 h[7] = h[7].wrapping_add(hh);
445 }
446
447 let mut out = [0_u8; 32];
448 for (slot, word) in out.chunks_exact_mut(4).zip(h) {
449 slot.copy_from_slice(&word.to_be_bytes());
450 }
451 out
452}
453
454#[cfg(test)]
455#[path = "datum_tests.rs"]
456mod tests;