Skip to main content

triblespace_core/import/
json.rs

1//! Deterministic JSON *object* importer built on a winnow-based streaming parser.
2//!
3//! This importer hashes attribute/value pairs to derive entity identifiers.
4//! Identical JSON objects therefore converge to the same id, enabling structural
5//! deduplication.
6//!
7//! Note: this importer only accepts a top-level JSON object, or a top-level JSON
8//! array containing objects. Primitive roots are rejected.
9
10use std::collections::{HashMap, HashSet};
11use std::fmt;
12use std::marker::PhantomData;
13use std::str::FromStr;
14
15use anybytes::{Bytes, View};
16use winnow::stream::Stream;
17
18use crate::blob::schemas::longstring::LongString;
19use crate::blob::Blob;
20use crate::blob::ToBlob;
21use crate::id::{ExclusiveId, Id, RawId, ID_LEN};
22use crate::import::ImportAttribute;
23use crate::macros::entity;
24use crate::metadata;
25use crate::metadata::{ConstDescribe, Describe};
26use crate::repo::BlobStore;
27use crate::trible::{Fragment, Trible, TribleSet};
28use crate::value::schemas::boolean::Boolean;
29use crate::value::schemas::f64::F64;
30use crate::value::schemas::genid::GenId;
31use crate::value::schemas::hash::{Blake3, Handle, HashProtocol};
32use crate::value::schemas::UnknownValue;
33use crate::value::{RawValue, ToValue, Value, ValueSchema};
34
35/// Error returned by [`JsonObjectImporter`] when importing a JSON document.
36#[derive(Debug)]
37pub enum JsonImportError {
38    /// The document root is a primitive (string, number, bool, null) — only
39    /// objects and arrays of objects are accepted.
40    PrimitiveRoot,
41    /// A string field could not be encoded into the target value schema.
42    EncodeString {
43        /// Name of the JSON field.
44        field: String,
45        /// Underlying encoding error.
46        source: EncodeError,
47    },
48    /// A number field could not be encoded into the target value schema.
49    EncodeNumber {
50        /// Name of the JSON field.
51        field: String,
52        /// Underlying encoding error.
53        source: EncodeError,
54    },
55    /// The JSON input is syntactically invalid.
56    Syntax(String),
57}
58
59impl fmt::Display for JsonImportError {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        match self {
62            Self::PrimitiveRoot => write!(f, "cannot import JSON primitives as the document root"),
63            Self::EncodeString { field, source } => {
64                write!(f, "failed to encode string field {field:?}: {source}")
65            }
66            Self::EncodeNumber { field, source } => {
67                write!(f, "failed to encode number field {field:?}: {source}")
68            }
69            Self::Syntax(msg) => write!(f, "failed to parse JSON: {msg}"),
70        }
71    }
72}
73
74impl std::error::Error for JsonImportError {
75    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
76        match self {
77            Self::PrimitiveRoot | Self::Syntax(_) => None,
78            Self::EncodeString { source, .. } | Self::EncodeNumber { source, .. } => {
79                Some(source.as_error())
80            }
81        }
82    }
83}
84
85/// Opaque wrapper around a value-encoding error during JSON import.
86#[derive(Debug)]
87pub struct EncodeError(Box<dyn std::error::Error + Send + Sync + 'static>);
88
89impl EncodeError {
90    /// Creates an encode error from a plain message string.
91    pub fn message(message: impl Into<String>) -> Self {
92        #[derive(Debug)]
93        struct Message(String);
94
95        impl fmt::Display for Message {
96            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
97                f.write_str(&self.0)
98            }
99        }
100
101        impl std::error::Error for Message {}
102
103        Self(Box::new(Message(message.into())))
104    }
105
106    fn as_error(&self) -> &(dyn std::error::Error + 'static) {
107        self.0.as_ref()
108    }
109
110    /// Wraps an existing error as an encode error.
111    pub fn from_error(err: impl std::error::Error + Send + Sync + 'static) -> Self {
112        Self(Box::new(err))
113    }
114}
115
116impl fmt::Display for EncodeError {
117    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118        fmt::Display::fmt(self.0.as_ref(), f)
119    }
120}
121
122impl std::error::Error for EncodeError {
123    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
124        Some(self.0.as_ref())
125    }
126}
127
128type ParsedString = View<str>;
129
130/// Deterministic JSON importer that derives entity ids from attribute/value pairs.
131///
132/// This importer expects either:
133/// - a top-level JSON object, or
134/// - a top-level array of JSON objects.
135///
136/// Use [`crate::import::json_tree::JsonTreeImporter`] when you need a lossless
137/// representation of arbitrary JSON values (including primitive roots).
138pub struct JsonObjectImporter<'a, Store, Hasher = Blake3>
139where
140    Store: BlobStore<Blake3>,
141    Hasher: HashProtocol,
142{
143    store: &'a mut Store,
144    bool_attrs: HashMap<View<str>, ImportAttribute<Boolean>>,
145    num_attrs: HashMap<View<str>, ImportAttribute<F64>>,
146    str_attrs: HashMap<View<str>, ImportAttribute<Handle<Blake3, LongString>>>,
147    genid_attrs: HashMap<View<str>, ImportAttribute<GenId>>,
148    id_salt: Option<[u8; 32]>,
149    _hasher: PhantomData<Hasher>,
150    array_fields: HashSet<View<str>>,
151}
152
153impl<'a, Store, Hasher> JsonObjectImporter<'a, Store, Hasher>
154where
155    Store: BlobStore<Blake3>,
156    Hasher: HashProtocol,
157{
158    fn attr_from_field<S: ValueSchema>(
159        &mut self,
160        field: &ParsedString,
161    ) -> Result<ImportAttribute<S>, JsonImportError> {
162        let handle =
163            self.store
164                .put(field.clone())
165                .map_err(|err| JsonImportError::EncodeString {
166                    field: field.as_ref().to_owned(),
167                    source: EncodeError::from_error(err),
168                })?;
169        Ok(ImportAttribute::<S>::from_handle(handle, field.clone()))
170    }
171
172    fn bool_attr(
173        &mut self,
174        field: &ParsedString,
175    ) -> Result<ImportAttribute<Boolean>, JsonImportError> {
176        let key = field.clone();
177        if let Some(attr) = self.bool_attrs.get(&key) {
178            return Ok(attr.clone());
179        }
180        let attr = self.attr_from_field::<Boolean>(field)?;
181        self.bool_attrs.insert(key, attr.clone());
182        Ok(attr)
183    }
184
185    fn num_attr(&mut self, field: &ParsedString) -> Result<ImportAttribute<F64>, JsonImportError> {
186        let key = field.clone();
187        if let Some(attr) = self.num_attrs.get(&key) {
188            return Ok(attr.clone());
189        }
190        let attr = self.attr_from_field::<F64>(field)?;
191        self.num_attrs.insert(key, attr.clone());
192        Ok(attr)
193    }
194
195    fn str_attr(
196        &mut self,
197        field: &ParsedString,
198    ) -> Result<ImportAttribute<Handle<Blake3, LongString>>, JsonImportError> {
199        let key = field.clone();
200        if let Some(attr) = self.str_attrs.get(&key) {
201            return Ok(attr.clone());
202        }
203        let attr = self.attr_from_field::<Handle<Blake3, LongString>>(field)?;
204        self.str_attrs.insert(key, attr.clone());
205        Ok(attr)
206    }
207
208    fn genid_attr(
209        &mut self,
210        field: &ParsedString,
211    ) -> Result<ImportAttribute<GenId>, JsonImportError> {
212        let key = field.clone();
213        if let Some(attr) = self.genid_attrs.get(&key) {
214            return Ok(attr.clone());
215        }
216        let attr = self.attr_from_field::<GenId>(field)?;
217        self.genid_attrs.insert(key, attr.clone());
218        Ok(attr)
219    }
220
221    /// Creates a new importer backed by `store`. Pass an optional 32-byte
222    /// salt to namespace the deterministic entity ids.
223    pub fn new(store: &'a mut Store, id_salt: Option<[u8; 32]>) -> Self {
224        Self {
225            store,
226            bool_attrs: HashMap::new(),
227            num_attrs: HashMap::new(),
228            str_attrs: HashMap::new(),
229            genid_attrs: HashMap::new(),
230            id_salt,
231            _hasher: PhantomData,
232            array_fields: HashSet::new(),
233        }
234    }
235
236    /// Imports a JSON string. Convenience wrapper around [`import_blob`](Self::import_blob).
237    pub fn import_str(&mut self, input: &str) -> Result<Fragment, JsonImportError> {
238        self.import_blob(input.to_owned().to_blob())
239    }
240
241    /// Imports a JSON document from a [`LongString`] blob, returning a
242    /// [`Fragment`] with the root entity ids as exports.
243    pub fn import_blob(&mut self, blob: Blob<LongString>) -> Result<Fragment, JsonImportError> {
244        let mut bytes = blob.bytes.clone();
245        self.skip_ws(&mut bytes);
246
247        let mut roots = Vec::new();
248        let mut staged = TribleSet::new();
249        match bytes.peek_token() {
250            Some(b'{') => {
251                let (root, obj_staged) = self.parse_object(&mut bytes)?;
252                staged += obj_staged;
253                roots.push(root.forget());
254            }
255            Some(b'[') => {
256                self.consume_byte(&mut bytes, b'[')?;
257                self.skip_ws(&mut bytes);
258                if bytes.peek_token() == Some(b']') {
259                    self.consume_byte(&mut bytes, b']')?;
260                } else {
261                    loop {
262                        self.skip_ws(&mut bytes);
263                        if bytes.peek_token() != Some(b'{') {
264                            return Err(JsonImportError::PrimitiveRoot);
265                        }
266                        let (root, obj_staged) = self.parse_object(&mut bytes)?;
267                        staged += obj_staged;
268                        roots.push(root.forget());
269                        self.skip_ws(&mut bytes);
270                        match bytes.peek_token() {
271                            Some(b',') => {
272                                self.consume_byte(&mut bytes, b',')?;
273                                continue;
274                            }
275                            Some(b']') => {
276                                self.consume_byte(&mut bytes, b']')?;
277                                break;
278                            }
279                            _ => return Err(JsonImportError::PrimitiveRoot),
280                        }
281                    }
282                }
283            }
284            _ => return Err(JsonImportError::PrimitiveRoot),
285        }
286
287        self.skip_ws(&mut bytes);
288        Ok(Fragment::new(roots, staged))
289    }
290
291    fn parse_object(
292        &mut self,
293        bytes: &mut Bytes,
294    ) -> Result<(ExclusiveId, TribleSet), JsonImportError> {
295        self.consume_byte(bytes, b'{')?;
296        self.skip_ws(bytes);
297        let mut pairs: Vec<(RawId, RawValue)> = Vec::new();
298        let mut staged = TribleSet::new();
299
300        if bytes.peek_token() == Some(b'}') {
301            self.consume_byte(bytes, b'}')?;
302        } else {
303            loop {
304                let field = self.parse_string(bytes)?;
305                self.skip_ws(bytes);
306                self.consume_byte(bytes, b':')?;
307                self.skip_ws(bytes);
308                self.parse_value(bytes, &field, &mut pairs, &mut staged)?;
309                self.skip_ws(bytes);
310                match bytes.peek_token() {
311                    Some(b',') => {
312                        self.consume_byte(bytes, b',')?;
313                        self.skip_ws(bytes);
314                    }
315                    Some(b'}') => {
316                        self.consume_byte(bytes, b'}')?;
317                        break;
318                    }
319                    _ => return Err(JsonImportError::Syntax("unexpected token".into())),
320                }
321            }
322        }
323
324        let entity = self.derive_id(&pairs)?;
325        for (attr_raw, value_raw) in pairs {
326            let attr_id = Id::new(attr_raw).ok_or(JsonImportError::PrimitiveRoot)?;
327            let value = Value::<UnknownValue>::new(value_raw);
328            staged.insert(&Trible::new(&entity, &attr_id, &value));
329        }
330
331        Ok((entity, staged))
332    }
333
334    fn parse_array(
335        &mut self,
336        bytes: &mut Bytes,
337        field: &ParsedString,
338        pairs: &mut Vec<(RawId, RawValue)>,
339        staged: &mut TribleSet,
340    ) -> Result<(), JsonImportError> {
341        self.consume_byte(bytes, b'[')?;
342        self.array_fields.insert(field.clone());
343        self.skip_ws(bytes);
344        if bytes.peek_token() == Some(b']') {
345            self.consume_byte(bytes, b']')?;
346            return Ok(());
347        }
348
349        loop {
350            self.parse_value(bytes, field, pairs, staged)?;
351            self.skip_ws(bytes);
352            match bytes.peek_token() {
353                Some(b',') => {
354                    self.consume_byte(bytes, b',')?;
355                    self.skip_ws(bytes);
356                }
357                Some(b']') => {
358                    self.consume_byte(bytes, b']')?;
359                    break;
360                }
361                _ => return Err(JsonImportError::Syntax("unexpected token".into())),
362            }
363        }
364        Ok(())
365    }
366
367    fn parse_value(
368        &mut self,
369        bytes: &mut Bytes,
370        field: &ParsedString,
371        pairs: &mut Vec<(RawId, RawValue)>,
372        staged: &mut TribleSet,
373    ) -> Result<(), JsonImportError> {
374        match bytes.peek_token() {
375            Some(b'n') => {
376                self.consume_literal(bytes, b"null")?;
377                Ok(())
378            }
379            Some(b't') => {
380                self.consume_literal(bytes, b"true")?;
381                let attr = self.bool_attr(field)?;
382                pairs.push((attr.raw(), true.to_value().raw));
383                Ok(())
384            }
385            Some(b'f') => {
386                self.consume_literal(bytes, b"false")?;
387                let attr = self.bool_attr(field)?;
388                pairs.push((attr.raw(), false.to_value().raw));
389                Ok(())
390            }
391            Some(b'"') => {
392                let text = self.parse_string(bytes)?;
393                let field_name = field.as_ref().to_owned();
394                let attr = self.str_attr(field)?;
395                let handle = self
396                    .store
397                    .put(text)
398                    .map_err(|err| JsonImportError::EncodeString {
399                        field: field_name,
400                        source: EncodeError::from_error(err),
401                    })?;
402                pairs.push((attr.raw(), handle.raw));
403                Ok(())
404            }
405            Some(b'{') => {
406                let (child, child_staged) = self.parse_object(bytes)?;
407                *staged += child_staged;
408                let attr = self.genid_attr(field)?;
409                let value = GenId::value_from(&child);
410                pairs.push((attr.raw(), value.raw));
411                Ok(())
412            }
413            Some(b'[') => self.parse_array(bytes, field, pairs, staged),
414            _ => {
415                let num = self.parse_number(bytes)?;
416                let num_str = num
417                    .view::<str>()
418                    .map_err(|_| JsonImportError::Syntax("invalid number".into()))?;
419                let number: f64 = f64::from_str(num_str.as_ref()).map_err(|err| {
420                    JsonImportError::EncodeNumber {
421                        field: field.as_ref().to_owned(),
422                        source: EncodeError::from_error(err),
423                    }
424                })?;
425                if !number.is_finite() {
426                    return Err(JsonImportError::EncodeNumber {
427                        field: field.as_ref().to_owned(),
428                        source: EncodeError::message("non-finite number"),
429                    });
430                }
431                let attr = self.num_attr(field)?;
432                let encoded: Value<F64> = number.to_value();
433                pairs.push((attr.raw(), encoded.raw));
434                Ok(())
435            }
436        }
437    }
438
439    fn derive_id(&self, pairs: &[(RawId, RawValue)]) -> Result<ExclusiveId, JsonImportError> {
440        let mut sorted = pairs.to_vec();
441        sorted
442            .sort_by(|(a_attr, a_val), (b_attr, b_val)| a_attr.cmp(b_attr).then(a_val.cmp(b_val)));
443
444        let mut hasher = Hasher::new();
445        if let Some(salt) = self.id_salt {
446            hasher.update(salt.as_ref());
447        }
448        for (attr, value) in &sorted {
449            hasher.update(attr);
450            hasher.update(value);
451        }
452        let digest: [u8; 32] = hasher.finalize().into();
453        let mut raw = [0u8; ID_LEN];
454        raw.copy_from_slice(&digest[digest.len() - ID_LEN..]);
455        let id = Id::new(raw).ok_or(JsonImportError::PrimitiveRoot)?;
456        Ok(ExclusiveId::force(id))
457    }
458
459    fn skip_ws(&self, bytes: &mut Bytes) {
460        while matches!(bytes.peek_token(), Some(b) if b.is_ascii_whitespace()) {
461            bytes.pop_front();
462        }
463    }
464
465    fn consume_byte(&self, bytes: &mut Bytes, expected: u8) -> Result<(), JsonImportError> {
466        match bytes.pop_front() {
467            Some(b) if b == expected => Ok(()),
468            _ => Err(JsonImportError::Syntax("unexpected token".into())),
469        }
470    }
471
472    fn consume_literal(&self, bytes: &mut Bytes, literal: &[u8]) -> Result<(), JsonImportError> {
473        for expected in literal {
474            self.consume_byte(bytes, *expected)?;
475        }
476        Ok(())
477    }
478
479    fn parse_string(&self, bytes: &mut Bytes) -> Result<ParsedString, JsonImportError> {
480        let raw = parse_string_common(bytes, &mut parse_unicode_escape)?;
481        raw.view::<str>()
482            .map_err(|_| JsonImportError::Syntax("invalid utf-8".into()))
483    }
484
485    fn parse_number(&self, bytes: &mut Bytes) -> Result<Bytes, JsonImportError> {
486        parse_number_common(bytes)
487    }
488
489    /// Returns a [`Fragment`] describing every attribute and schema
490    /// encountered so far, suitable for committing alongside the data.
491    pub fn metadata(&mut self) -> Result<Fragment, Store::PutError> {
492        let mut meta = Fragment::default();
493        meta += <Boolean as ConstDescribe>::describe(self.store)?;
494        meta += <F64 as ConstDescribe>::describe(self.store)?;
495        meta += <GenId as ConstDescribe>::describe(self.store)?;
496        meta += <Handle<Blake3, LongString> as ConstDescribe>::describe(self.store)?;
497        for (key, attr) in self.bool_attrs.iter() {
498            meta += attr.describe(self.store)?;
499            if self.array_fields.contains(key) {
500                let attr_id = attr.id();
501                let entity = ExclusiveId::force_ref(&attr_id);
502                meta += entity! { &entity @ metadata::tag: metadata::KIND_MULTI };
503            }
504        }
505        for (key, attr) in self.num_attrs.iter() {
506            meta += attr.describe(self.store)?;
507            if self.array_fields.contains(key) {
508                let attr_id = attr.id();
509                let entity = ExclusiveId::force_ref(&attr_id);
510                meta += entity! { &entity @ metadata::tag: metadata::KIND_MULTI };
511            }
512        }
513        for (key, attr) in self.str_attrs.iter() {
514            meta += attr.describe(self.store)?;
515            if self.array_fields.contains(key) {
516                let attr_id = attr.id();
517                let entity = ExclusiveId::force_ref(&attr_id);
518                meta += entity! { &entity @ metadata::tag: metadata::KIND_MULTI };
519            }
520        }
521        for (key, attr) in self.genid_attrs.iter() {
522            meta += attr.describe(self.store)?;
523            if self.array_fields.contains(key) {
524                let attr_id = attr.id();
525                let entity = ExclusiveId::force_ref(&attr_id);
526                meta += entity! { &entity @ metadata::tag: metadata::KIND_MULTI };
527            }
528        }
529        Ok(meta)
530    }
531
532    /// Resets the cached attribute mappings. Call between unrelated import
533    /// batches if you want field names to be re-derived.
534    pub fn clear(&mut self) {
535        self.bool_attrs.clear();
536        self.num_attrs.clear();
537        self.str_attrs.clear();
538        self.genid_attrs.clear();
539        self.array_fields.clear();
540    }
541}
542
543pub(crate) fn parse_unicode_escape(bytes: &mut Bytes) -> Result<Vec<u8>, JsonImportError> {
544    use winnow::error::InputError;
545    use winnow::token::take;
546    use winnow::Parser;
547
548    let mut grab = take::<_, _, InputError<Bytes>>(4usize);
549    let hex = grab
550        .parse_next(bytes)
551        .map_err(|_| JsonImportError::Syntax("unterminated unicode escape".into()))?;
552
553    let mut code: u32 = 0;
554    for h in hex.as_ref() {
555        code = (code << 4)
556            | match h {
557                b'0'..=b'9' => (h - b'0') as u32,
558                b'a'..=b'f' => (h - b'a' + 10) as u32,
559                b'A'..=b'F' => (h - b'A' + 10) as u32,
560                _ => return Err(JsonImportError::Syntax("invalid unicode escape".into())),
561            };
562    }
563
564    if let Some(ch) = char::from_u32(code) {
565        let mut buf = [0u8; 4];
566        let encoded = ch.encode_utf8(&mut buf);
567        Ok(encoded.as_bytes().to_vec())
568    } else {
569        Err(JsonImportError::Syntax("invalid unicode escape".into()))
570    }
571}
572
573pub(crate) fn parse_string_common(
574    bytes: &mut Bytes,
575    unicode_escape: &mut impl FnMut(&mut Bytes) -> Result<Vec<u8>, JsonImportError>,
576) -> Result<Bytes, JsonImportError> {
577    let consume_byte = |bytes: &mut Bytes, expected: u8| -> Result<(), JsonImportError> {
578        match bytes.pop_front() {
579            Some(b) if b == expected => Ok(()),
580            _ => Err(JsonImportError::Syntax("unexpected token".into())),
581        }
582    };
583
584    consume_byte(bytes, b'"')?;
585    {
586        use winnow::error::InputError;
587        use winnow::token::take_while;
588        use winnow::Parser;
589
590        let mut tentative = bytes.clone();
591        let mut segment = take_while::<_, _, InputError<Bytes>>(0.., |b: u8| {
592            b != b'"' && b != b'\\' && b != b'\n' && b != b'\r'
593        });
594
595        if let Ok(prefix) = segment.parse_next(&mut tentative) {
596            if tentative.peek_token() == Some(b'"') {
597                tentative.pop_front();
598                *bytes = tentative;
599                return Ok(prefix);
600            }
601        }
602    }
603
604    let mut out = Vec::new();
605    loop {
606        use winnow::error::InputError;
607        use winnow::token::take_while;
608        use winnow::Parser;
609
610        let mut segment = take_while::<_, _, InputError<Bytes>>(0.., |b: u8| {
611            b != b'\\' && b != b'"' && b != b'\n' && b != b'\r'
612        });
613        let chunk = segment
614            .parse_next(bytes)
615            .map_err(|_| JsonImportError::Syntax("unterminated string".into()))?;
616        out.extend_from_slice(chunk.as_ref());
617
618        match bytes.peek_token() {
619            Some(b'"') => {
620                bytes.pop_front();
621                return Ok(Bytes::from(out));
622            }
623            Some(b'\\') => {
624                bytes.pop_front();
625                let esc = bytes
626                    .pop_front()
627                    .ok_or_else(|| JsonImportError::Syntax("unterminated escape".into()))?;
628                match esc {
629                    b'"' => out.push(b'"'),
630                    b'\\' => out.push(b'\\'),
631                    b'/' => out.push(b'/'),
632                    b'b' => out.push(0x08),
633                    b'f' => out.push(0x0c),
634                    b'n' => out.push(b'\n'),
635                    b'r' => out.push(b'\r'),
636                    b't' => out.push(b'\t'),
637                    b'u' => out.extend_from_slice(&unicode_escape(bytes)?),
638                    _ => return Err(JsonImportError::Syntax("invalid escape sequence".into())),
639                }
640            }
641            Some(b'\n') | Some(b'\r') | None => {
642                return Err(JsonImportError::Syntax("unterminated string".into()))
643            }
644            _ => unreachable!("peek_token only yields bytes"),
645        }
646    }
647}
648
649pub(crate) fn parse_number_common(bytes: &mut Bytes) -> Result<Bytes, JsonImportError> {
650    use winnow::error::InputError;
651    use winnow::token::take_while;
652    use winnow::Parser;
653
654    let mut number = take_while::<_, _, InputError<Bytes>>(1.., |b: u8| {
655        b.is_ascii_digit() || b == b'-' || b == b'+' || b == b'.' || b == b'e' || b == b'E'
656    });
657
658    number
659        .parse_next(bytes)
660        .map_err(|_: InputError<Bytes>| JsonImportError::Syntax("expected number".into()))
661}
662
663#[cfg(test)]
664mod tests {
665    use super::*;
666    use crate::blob::MemoryBlobStore;
667    use crate::blob::ToBlob;
668    use crate::prelude::Attribute;
669    use crate::value::schemas::hash::Blake3;
670    use anybytes::View;
671
672    #[test]
673    fn deterministic_imports_simple_object() {
674        let input = r#"{ "title": "Dune", "pages": 412 }"#;
675        let mut blobs = MemoryBlobStore::<Blake3>::new();
676        let mut importer = JsonObjectImporter::<_, Blake3>::new(&mut blobs, None);
677        let fragment = importer.import_blob(input.to_blob()).unwrap();
678        let roots = fragment.exports().collect::<Vec<_>>();
679        assert_eq!(roots.len(), 1);
680        assert_eq!(fragment.facts().len(), 2);
681        assert!(!importer
682            .metadata()
683            .expect("metadata set")
684            .facts()
685            .is_empty());
686    }
687
688    fn extract_handle_raw(facts: &TribleSet, expected_attr: &str) -> RawValue {
689        let attr = Attribute::<Handle<Blake3, LongString>>::from_name(expected_attr).id();
690        let trible = facts
691            .iter()
692            .find(|t| *t.a() == attr)
693            .expect("missing string trible");
694        trible.v::<Handle<Blake3, LongString>>().raw
695    }
696
697    fn read_text(blobs: &mut MemoryBlobStore<Blake3>, handle_raw: RawValue) -> String {
698        let entries: Vec<_> = blobs.reader().unwrap().into_iter().collect();
699        let (_, blob) = entries
700            .iter()
701            .find(|(h, _)| {
702                let h: Value<Handle<Blake3, LongString>> = (*h).transmute();
703                h.raw == handle_raw
704            })
705            .expect("handle not found in blob store");
706
707        let text: View<str> = blob
708            .clone()
709            .transmute::<LongString>()
710            .try_from_blob()
711            .expect("blob should decode as string");
712        text.as_ref().to_owned()
713    }
714
715    #[test]
716    fn parses_escaped_string() {
717        let input = r#"{ "text": "hello\nworld" }"#;
718        let mut blobs = MemoryBlobStore::<Blake3>::new();
719        let mut importer = JsonObjectImporter::<_, Blake3>::new(&mut blobs, None);
720        let fragment = importer.import_blob(input.to_blob()).unwrap();
721        let handle = extract_handle_raw(fragment.facts(), "text");
722        drop(importer);
723        let text = read_text(&mut blobs, handle);
724        assert_eq!(text, "hello\nworld");
725    }
726
727    #[test]
728    fn parses_unicode_escape() {
729        let input = r#"{ "text": "smile: \u263A" }"#;
730        let mut blobs = MemoryBlobStore::<Blake3>::new();
731        let mut importer = JsonObjectImporter::<_, Blake3>::new(&mut blobs, None);
732        let fragment = importer.import_blob(input.to_blob()).unwrap();
733        let handle = extract_handle_raw(fragment.facts(), "text");
734        drop(importer);
735        let text = read_text(&mut blobs, handle);
736        assert_eq!(text, "smile: \u{263A}");
737    }
738}