1use std::marker::PhantomData;
9
10use anybytes::{Bytes, View};
11use digest::Digest;
12use winnow::stream::Stream;
13
14use crate::blob::schemas::longstring::LongString;
15use crate::blob::Blob;
16use crate::blob::ToBlob;
17use crate::id::{ExclusiveId, Id, RawId, ID_LEN};
18use crate::import::ImportAttribute;
19use crate::macros::{entity, id_hex};
20use crate::metadata;
21use crate::metadata::{ConstDescribe, Describe};
22use crate::repo::BlobStore;
23use crate::trible::Fragment;
24use crate::trible::TribleSet;
25use crate::value::schemas::boolean::Boolean;
26use crate::value::schemas::genid::GenId;
27use crate::value::schemas::hash::{Blake3, Handle, HashProtocol};
28use crate::value::schemas::iu256::U256BE;
29use crate::value::Value;
30use triblespace_core_macros::attributes;
31
32use crate::import::json::{
33 parse_number_common, parse_string_common, parse_unicode_escape, EncodeError, JsonImportError,
34};
35
36type ParsedString = View<str>;
37
38attributes! {
39 "D78B9D5A96029FDBBB327E377418AF51" as pub kind: GenId;
41 "40BC51924FD5D2058A48D1FA6073F871" as pub string: Handle<Blake3, LongString>;
43 "428E02672FFD0D010D95AE641ADE1730" as pub number_raw: Handle<Blake3, LongString>;
45 "6F43FC771207574BF4CC58D3080C313C" as pub boolean: Boolean;
47 "97A4ACD83EC9EA29EE7E487BB058C437" as pub field_parent: GenId;
49 "2B9FCF2A60C9B05FADDA9F022762B822" as pub field_name: Handle<Blake3, LongString>;
51 "38C7B1CDEA580DE70A520B2C8CBC4F14" as pub field_index: U256BE;
53 "6E6CA175F925B6AA0844D357B409F15A" as pub field_value: GenId;
55 "B49E6499D0A2CF5DD9A1E72D9D047747" as pub array_parent: GenId;
57 "D5DA41A093BD0DE490925126D1150B57" as pub array_index: U256BE;
59 "33535F41827B476B1EC0CACECE9BEED0" as pub array_value: GenId;
61}
62
63#[allow(non_upper_case_globals)]
65pub const kind_object: Id = id_hex!("64D8981414502BF750387C617F1F9D09");
66#[allow(non_upper_case_globals)]
68pub const kind_array: Id = id_hex!("5DC7096A184E658C8E16C54EB207C386");
69#[allow(non_upper_case_globals)]
71pub const kind_string: Id = id_hex!("58A5EAC244801C5E26AD9178C784781A");
72#[allow(non_upper_case_globals)]
74pub const kind_number: Id = id_hex!("711555ADF72B9499E6A7F68E0BD3B4B8");
75#[allow(non_upper_case_globals)]
77pub const kind_bool: Id = id_hex!("7D3079C5E20658B6CA5F54771B5D0D30");
78#[allow(non_upper_case_globals)]
80pub const kind_null: Id = id_hex!("FC1DCF98A3A8418D6090EBD367CFFD7A");
81#[allow(non_upper_case_globals)]
83pub const kind_field: Id = id_hex!("890FC1F34B9FAD18F93E6EDF1B69A1A2");
84#[allow(non_upper_case_globals)]
86pub const kind_array_entry: Id = id_hex!("EB325EABEA8C35DE7E5D700A5EF9207B");
87
88pub fn build_json_tree_metadata<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
91where
92 B: BlobStore<Blake3>,
93{
94 let mut metadata = Fragment::default();
95 let name = |value: &'static str| {
96 Bytes::from_source(value)
97 .view::<str>()
98 .expect("static JSON attribute names are valid UTF-8")
99 };
100
101 metadata += <GenId as ConstDescribe>::describe(blobs)?;
102 metadata += <Boolean as ConstDescribe>::describe(blobs)?;
103 metadata += <U256BE as ConstDescribe>::describe(blobs)?;
104 metadata += <Handle<Blake3, LongString> as ConstDescribe>::describe(blobs)?;
105
106 metadata +=
107 ImportAttribute::<GenId>::from_raw(kind.raw(), Some(name("json.kind"))).describe(blobs)?;
108 metadata += ImportAttribute::<Handle<Blake3, LongString>>::from_raw(
109 string.raw(),
110 Some(name("json.string")),
111 )
112 .describe(blobs)?;
113 metadata += ImportAttribute::<Handle<Blake3, LongString>>::from_raw(
114 number_raw.raw(),
115 Some(name("json.number_raw")),
116 )
117 .describe(blobs)?;
118 metadata += ImportAttribute::<Boolean>::from_raw(boolean.raw(), Some(name("json.boolean")))
119 .describe(blobs)?;
120 metadata +=
121 ImportAttribute::<GenId>::from_raw(field_parent.raw(), Some(name("json.field_parent")))
122 .describe(blobs)?;
123 metadata += ImportAttribute::<Handle<Blake3, LongString>>::from_raw(
124 field_name.raw(),
125 Some(name("json.field_name")),
126 )
127 .describe(blobs)?;
128 metadata +=
129 ImportAttribute::<U256BE>::from_raw(field_index.raw(), Some(name("json.field_index")))
130 .describe(blobs)?;
131 metadata +=
132 ImportAttribute::<GenId>::from_raw(field_value.raw(), Some(name("json.field_value")))
133 .describe(blobs)?;
134 metadata +=
135 ImportAttribute::<GenId>::from_raw(array_parent.raw(), Some(name("json.array_parent")))
136 .describe(blobs)?;
137 metadata +=
138 ImportAttribute::<U256BE>::from_raw(array_index.raw(), Some(name("json.array_index")))
139 .describe(blobs)?;
140 metadata +=
141 ImportAttribute::<GenId>::from_raw(array_value.raw(), Some(name("json.array_value")))
142 .describe(blobs)?;
143
144 metadata += describe_kind(blobs, kind_object, "json.kind.object", "JSON object node.")?;
145 metadata += describe_kind(blobs, kind_array, "json.kind.array", "JSON array node.")?;
146 metadata += describe_kind(blobs, kind_string, "json.kind.string", "JSON string node.")?;
147 metadata += describe_kind(blobs, kind_number, "json.kind.number", "JSON number node.")?;
148 metadata += describe_kind(blobs, kind_bool, "json.kind.bool", "JSON boolean node.")?;
149 metadata += describe_kind(blobs, kind_null, "json.kind.null", "JSON null node.")?;
150 metadata += describe_kind(
151 blobs,
152 kind_field,
153 "json.kind.field",
154 "JSON object field entry.",
155 )?;
156 metadata += describe_kind(
157 blobs,
158 kind_array_entry,
159 "json.kind.array_entry",
160 "JSON array entry.",
161 )?;
162
163 Ok(metadata)
164}
165
166fn describe_kind<B>(
167 blobs: &mut B,
168 kind_id: Id,
169 name: &str,
170 description: &str,
171) -> Result<Fragment, B::PutError>
172where
173 B: BlobStore<Blake3>,
174{
175 let name_handle = blobs.put(name.to_owned())?;
176
177 let tribles = entity! { ExclusiveId::force_ref(&kind_id) @
178 metadata::name: name_handle,
179 metadata::description: blobs.put(description.to_owned())?,
180 };
181 Ok(tribles)
182}
183
184#[derive(Clone)]
185struct FieldEntry {
186 name: View<str>,
187 name_handle: Value<Handle<Blake3, LongString>>,
188 index: u64,
189 value: Id,
190}
191
192#[derive(Clone)]
193struct ArrayEntry {
194 index: u64,
195 value: Id,
196}
197
198pub struct JsonTreeImporter<'a, Store, Hasher = Blake3>
203where
204 Store: BlobStore<Blake3>,
205 Hasher: HashProtocol,
206{
207 store: &'a mut Store,
208 id_salt: Option<[u8; 32]>,
209 _hasher: PhantomData<Hasher>,
210}
211
212impl<'a, Store, Hasher> JsonTreeImporter<'a, Store, Hasher>
213where
214 Store: BlobStore<Blake3>,
215 Hasher: HashProtocol,
216{
217 pub fn new(store: &'a mut Store, id_salt: Option<[u8; 32]>) -> Self {
220 Self {
221 store,
222 id_salt,
223 _hasher: PhantomData,
224 }
225 }
226
227 pub fn import_str(&mut self, input: &str) -> Result<Fragment, JsonImportError> {
229 self.import_blob(input.to_owned().to_blob())
230 }
231
232 pub fn import_blob(&mut self, blob: Blob<LongString>) -> Result<Fragment, JsonImportError> {
235 let mut data = TribleSet::new();
236 let mut bytes = blob.bytes.clone();
237 self.skip_ws(&mut bytes);
238 let root = self.parse_value(&mut bytes, &mut data)?;
239 self.skip_ws(&mut bytes);
240 if bytes.peek_token().is_some() {
241 return Err(JsonImportError::Syntax("trailing tokens".into()));
242 }
243 Ok(Fragment::rooted(root, data))
244 }
245
246 pub fn metadata(&mut self) -> Result<Fragment, Store::PutError> {
249 build_json_tree_metadata(self.store)
250 }
251
252 fn parse_value(
253 &mut self,
254 bytes: &mut Bytes,
255 data: &mut TribleSet,
256 ) -> Result<Id, JsonImportError> {
257 match bytes.peek_token() {
258 Some(b'n') => {
259 self.consume_literal(bytes, b"null")?;
260 let id = self.hash_tagged(b"null", &[]);
261 *data += entity! { ExclusiveId::force_ref(&id) @
262 kind: kind_null,
263 };
264 Ok(id)
265 }
266 Some(b't') => {
267 self.consume_literal(bytes, b"true")?;
268 let id = self.hash_tagged(b"bool", &[b"true"]);
269 *data += entity! { ExclusiveId::force_ref(&id) @
270 kind: kind_bool,
271 boolean: true,
272 };
273 Ok(id)
274 }
275 Some(b'f') => {
276 self.consume_literal(bytes, b"false")?;
277 let id = self.hash_tagged(b"bool", &[b"false"]);
278 *data += entity! { ExclusiveId::force_ref(&id) @
279 kind: kind_bool,
280 boolean: false,
281 };
282 Ok(id)
283 }
284 Some(b'"') => {
285 let text = self.parse_string(bytes)?;
286 let id = self.hash_tagged(b"string", &[text.as_ref().as_bytes()]);
287 let handle = self
288 .store
289 .put(text)
290 .map_err(|err| JsonImportError::EncodeString {
291 field: "string".to_string(),
292 source: EncodeError::from_error(err),
293 })?;
294 *data += entity! { ExclusiveId::force_ref(&id) @
295 kind: kind_string,
296 string: handle,
297 };
298 Ok(id)
299 }
300 Some(b'{') => self.parse_object(bytes, data),
301 Some(b'[') => self.parse_array(bytes, data),
302 _ => {
303 let number = self.parse_number(bytes)?;
304 let number_view = number
305 .view::<str>()
306 .map_err(|_| JsonImportError::Syntax("invalid number".into()))?;
307 let id = self.hash_tagged(b"number", &[number_view.as_ref().as_bytes()]);
308 let handle =
309 self.store
310 .put(number_view)
311 .map_err(|err| JsonImportError::EncodeNumber {
312 field: "number".to_string(),
313 source: EncodeError::from_error(err),
314 })?;
315 *data += entity! { ExclusiveId::force_ref(&id) @
316 kind: kind_number,
317 number_raw: handle,
318 };
319 Ok(id)
320 }
321 }
322 }
323
324 fn parse_object(
325 &mut self,
326 bytes: &mut Bytes,
327 data: &mut TribleSet,
328 ) -> Result<Id, JsonImportError> {
329 self.consume_byte(bytes, b'{')?;
330 self.skip_ws(bytes);
331
332 let mut fields: Vec<FieldEntry> = Vec::new();
333 if bytes.peek_token() == Some(b'}') {
334 self.consume_byte(bytes, b'}')?;
335 } else {
336 let mut index: u64 = 0;
337 loop {
338 let name = self.parse_string(bytes)?;
339 self.skip_ws(bytes);
340 self.consume_byte(bytes, b':')?;
341 self.skip_ws(bytes);
342 let value = self.parse_value(bytes, data)?;
343 let name_handle =
344 self.store
345 .put(name.clone())
346 .map_err(|err| JsonImportError::EncodeString {
347 field: "field".to_string(),
348 source: EncodeError::from_error(err),
349 })?;
350 fields.push(FieldEntry {
351 name,
352 name_handle,
353 index,
354 value,
355 });
356 index = index.saturating_add(1);
357
358 self.skip_ws(bytes);
359 match bytes.peek_token() {
360 Some(b',') => {
361 self.consume_byte(bytes, b',')?;
362 self.skip_ws(bytes);
363 }
364 Some(b'}') => {
365 self.consume_byte(bytes, b'}')?;
366 break;
367 }
368 _ => return Err(JsonImportError::Syntax("unexpected token".into())),
369 }
370 }
371 }
372
373 let object_id = self.hash_object(&fields);
374 *data += entity! { ExclusiveId::force_ref(&object_id) @
375 kind: kind_object,
376 };
377
378 for field in fields {
379 let entry_id = self.hash_field_entry(&object_id, &field);
380 *data += entity! { ExclusiveId::force_ref(&entry_id) @
381 kind: kind_field,
382 field_parent: object_id,
383 field_name: field.name_handle,
384 field_index: field.index,
385 field_value: field.value,
386 };
387 }
388
389 Ok(object_id)
390 }
391
392 fn parse_array(
393 &mut self,
394 bytes: &mut Bytes,
395 data: &mut TribleSet,
396 ) -> Result<Id, JsonImportError> {
397 self.consume_byte(bytes, b'[')?;
398 self.skip_ws(bytes);
399
400 let mut entries: Vec<ArrayEntry> = Vec::new();
401 if bytes.peek_token() == Some(b']') {
402 self.consume_byte(bytes, b']')?;
403 } else {
404 let mut index: u64 = 0;
405 loop {
406 let value = self.parse_value(bytes, data)?;
407 entries.push(ArrayEntry { index, value });
408 index = index.saturating_add(1);
409
410 self.skip_ws(bytes);
411 match bytes.peek_token() {
412 Some(b',') => {
413 self.consume_byte(bytes, b',')?;
414 self.skip_ws(bytes);
415 }
416 Some(b']') => {
417 self.consume_byte(bytes, b']')?;
418 break;
419 }
420 _ => return Err(JsonImportError::Syntax("unexpected token".into())),
421 }
422 }
423 }
424
425 let array_id = self.hash_array(&entries);
426 *data += entity! { ExclusiveId::force_ref(&array_id) @
427 kind: kind_array,
428 };
429
430 for entry in entries {
431 let entry_id = self.hash_array_entry(&array_id, &entry);
432 *data += entity! { ExclusiveId::force_ref(&entry_id) @
433 kind: kind_array_entry,
434 array_parent: array_id,
435 array_index: entry.index,
436 array_value: entry.value,
437 };
438 }
439
440 Ok(array_id)
441 }
442
443 fn hash_object(&self, fields: &[FieldEntry]) -> Id {
444 let mut hasher = self.seeded_hasher();
445 hash_chunk(&mut hasher, b"object");
446 for field in fields {
447 let index_bytes = field.index.to_be_bytes();
448 hash_chunk(&mut hasher, field.name.as_ref().as_bytes());
449 hash_chunk(&mut hasher, &index_bytes);
450 hash_chunk(&mut hasher, field.value.as_ref());
451 }
452 self.finish_hash(hasher)
453 }
454
455 fn hash_array(&self, entries: &[ArrayEntry]) -> Id {
456 let mut hasher = self.seeded_hasher();
457 hash_chunk(&mut hasher, b"array");
458 for entry in entries {
459 let index_bytes = entry.index.to_be_bytes();
460 hash_chunk(&mut hasher, &index_bytes);
461 hash_chunk(&mut hasher, entry.value.as_ref());
462 }
463 self.finish_hash(hasher)
464 }
465
466 fn hash_field_entry(&self, parent: &Id, entry: &FieldEntry) -> Id {
467 let mut hasher = self.seeded_hasher();
468 hash_chunk(&mut hasher, b"field");
469 let index_bytes = entry.index.to_be_bytes();
470 hash_chunk(&mut hasher, parent.as_ref());
471 hash_chunk(&mut hasher, entry.name.as_ref().as_bytes());
472 hash_chunk(&mut hasher, &index_bytes);
473 hash_chunk(&mut hasher, entry.value.as_ref());
474 self.finish_hash(hasher)
475 }
476
477 fn hash_array_entry(&self, parent: &Id, entry: &ArrayEntry) -> Id {
478 let mut hasher = self.seeded_hasher();
479 hash_chunk(&mut hasher, b"array_entry");
480 let index_bytes = entry.index.to_be_bytes();
481 hash_chunk(&mut hasher, parent.as_ref());
482 hash_chunk(&mut hasher, &index_bytes);
483 hash_chunk(&mut hasher, entry.value.as_ref());
484 self.finish_hash(hasher)
485 }
486
487 fn hash_tagged(&self, tag: &[u8], parts: &[&[u8]]) -> Id {
488 let mut hasher = self.seeded_hasher();
489 hash_chunk(&mut hasher, tag);
490 for part in parts {
491 hash_chunk(&mut hasher, part);
492 }
493 self.finish_hash(hasher)
494 }
495
496 fn seeded_hasher(&self) -> Hasher {
497 let mut hasher = Hasher::new();
498 if let Some(salt) = self.id_salt {
499 hasher.update(salt.as_ref());
500 }
501 hasher
502 }
503
504 fn finish_hash(&self, hasher: Hasher) -> Id {
505 let digest = hasher.finalize();
506 id_from_digest(digest.as_ref())
507 }
508
509 fn skip_ws(&self, bytes: &mut Bytes) {
510 while matches!(bytes.peek_token(), Some(b) if b.is_ascii_whitespace()) {
511 bytes.pop_front();
512 }
513 }
514
515 fn consume_byte(&self, bytes: &mut Bytes, expected: u8) -> Result<(), JsonImportError> {
516 match bytes.pop_front() {
517 Some(b) if b == expected => Ok(()),
518 _ => Err(JsonImportError::Syntax("unexpected token".into())),
519 }
520 }
521
522 fn consume_literal(&self, bytes: &mut Bytes, literal: &[u8]) -> Result<(), JsonImportError> {
523 for expected in literal {
524 self.consume_byte(bytes, *expected)?;
525 }
526 Ok(())
527 }
528
529 fn parse_string(&self, bytes: &mut Bytes) -> Result<ParsedString, JsonImportError> {
530 let raw = parse_string_common(bytes, &mut parse_unicode_escape)?;
531 raw.view::<str>()
532 .map_err(|_| JsonImportError::Syntax("invalid utf-8".into()))
533 }
534
535 fn parse_number(&self, bytes: &mut Bytes) -> Result<Bytes, JsonImportError> {
536 parse_number_common(bytes)
537 }
538}
539
540fn hash_chunk<H: Digest>(hasher: &mut H, bytes: &[u8]) {
541 let len = (bytes.len() as u64).to_be_bytes();
542 hasher.update(len);
543 hasher.update(bytes);
544}
545
546fn id_from_digest(digest: &[u8]) -> Id {
547 let mut raw: RawId = [0u8; ID_LEN];
548 raw.copy_from_slice(&digest[digest.len() - ID_LEN..]);
549 if raw == [0; ID_LEN] {
550 raw[0] = 1;
551 }
552 Id::new(raw).unwrap_or_else(|| unsafe { Id::force(raw) })
553}
554
555#[cfg(test)]
556mod tests {
557 use super::{kind_array_entry, JsonTreeImporter};
558 use crate::blob::MemoryBlobStore;
559 use crate::blob::ToBlob;
560 use crate::id::Id;
561 use crate::macros::{find, pattern};
562 use crate::value::schemas::hash::Blake3;
563
564 #[test]
565 fn lossless_ids_are_content_based() {
566 let input = r#"{ "a": [1, 2] }"#;
567 let mut blobs = MemoryBlobStore::<Blake3>::new();
568 let mut importer = JsonTreeImporter::<_, Blake3>::new(&mut blobs, None);
569 let root = importer
570 .import_blob(input.to_blob())
571 .unwrap()
572 .root()
573 .expect("import_blob returns a rooted fragment");
574 drop(importer);
575 let mut other = JsonTreeImporter::<_, Blake3>::new(&mut blobs, None);
576 let other_root = other
577 .import_blob(input.to_blob())
578 .unwrap()
579 .root()
580 .expect("import_blob returns a rooted fragment");
581 assert_eq!(root, other_root);
582 }
583
584 #[test]
585 fn lossless_preserves_array_order() {
586 let input = r#"[1, 2]"#;
587 let mut blobs = MemoryBlobStore::<Blake3>::new();
588 let mut importer = JsonTreeImporter::<_, Blake3>::new(&mut blobs, None);
589 let fragment = importer.import_blob(input.to_blob()).unwrap();
590 let root = fragment
591 .root()
592 .expect("import_blob returns a rooted fragment");
593 let catalog = fragment.facts();
594 let mut entries = find!(
595 (index: ethnum::U256, value: Id),
596 pattern!(catalog, [{
597 _?entry @
598 super::kind: kind_array_entry,
599 super::array_parent: root,
600 super::array_index: ?index,
601 super::array_value: ?value,
602 }])
603 )
604 .collect::<Vec<_>>();
605 entries.sort_by_key(|(index, _)| *index);
606 assert_eq!(entries.len(), 2);
607 assert_eq!(entries[0].0, ethnum::U256::new(0));
608 assert_eq!(entries[1].0, ethnum::U256::new(1));
609 }
610}