hydrate_data/
json_storage.rs

1use crate::value::ValueEnum;
2use crate::{
3    AssetId, BuildInfo, DataSetAssetInfo, HashMap, HashSet, ImportInfo, ImporterId, NullOverride,
4    PathReference, PathReferenceHash, PathReferenceNamespaceResolver, Schema, SchemaFingerprint,
5    SchemaNamedType, SchemaSet, SingleObject, Value,
6};
7use crate::{AssetLocation, AssetName, DataSetResult, ImportableName, OrderedSet};
8use hydrate_schema::{CachedSchemaNamedType, DataSetError, SchemaRecord};
9use serde::{Deserialize, Serialize};
10use std::hash::Hash;
11use std::str::FromStr;
12use std::sync::Arc;
13use uuid::Uuid;
14
15fn property_value_to_json(
16    value: &Value,
17    buffers: &mut Option<Vec<Arc<Vec<u8>>>>,
18) -> serde_json::Value {
19    match value {
20        Value::Nullable(_) => unimplemented!(),
21        Value::Boolean(x) => serde_json::Value::from(*x),
22        Value::I32(x) => serde_json::Value::from(*x),
23        Value::I64(x) => serde_json::Value::from(*x),
24        Value::U32(x) => serde_json::Value::from(*x),
25        Value::U64(x) => serde_json::Value::from(*x),
26        Value::F32(x) => serde_json::Value::from(*x),
27        Value::F64(x) => serde_json::Value::from(*x),
28        Value::Bytes(x) => {
29            if let Some(buffers) = buffers {
30                // Copy the data into a new buffer and create a json value that indexes into it
31                let buffer_index = buffers.len();
32                buffers.push(x.clone());
33                serde_json::Value::from(buffer_index)
34            } else {
35                // Encode the data inline as a base64 string
36                serde_json::Value::from(base64::encode(&**x))
37            }
38        }
39        Value::String(x) => serde_json::Value::from(x.to_string()),
40        Value::StaticArray(_) => unimplemented!(),
41        Value::DynamicArray(_) => unimplemented!(),
42        Value::Map(_) => unimplemented!(),
43        Value::AssetRef(x) => serde_json::Value::from(x.as_uuid().to_string()),
44        Value::Record(_) => unimplemented!(),
45        Value::Enum(x) => serde_json::Value::from(x.symbol_name().to_string()),
46    }
47}
48
49fn json_to_i64(value: &serde_json::Value) -> Option<i64> {
50    match value {
51        serde_json::Value::Bool(b) => {
52            if *b {
53                Some(1)
54            } else {
55                Some(0)
56            }
57        }
58        serde_json::Value::Number(number) => {
59            if let Some(i) = number.as_i64() {
60                Some(i)
61            } else if let Some(u) = number.as_u64() {
62                Some(u as i64)
63            } else if let Some(f) = number.as_f64() {
64                Some(f as i64)
65            } else {
66                None
67            }
68        }
69        serde_json::Value::String(s) => s.parse::<i64>().ok(),
70        _ => None,
71    }
72}
73
74fn json_to_u64(value: &serde_json::Value) -> Option<u64> {
75    match value {
76        serde_json::Value::Bool(b) => {
77            if *b {
78                Some(1)
79            } else {
80                Some(0)
81            }
82        }
83        serde_json::Value::Number(number) => {
84            if let Some(u) = number.as_u64() {
85                Some(u)
86            } else if let Some(i) = number.as_i64() {
87                Some(i as u64)
88            } else if let Some(f) = number.as_f64() {
89                Some(f as u64)
90            } else {
91                None
92            }
93        }
94        serde_json::Value::String(s) => s.parse::<u64>().ok(),
95        _ => None,
96    }
97}
98
99fn json_to_f64(value: &serde_json::Value) -> Option<f64> {
100    match value {
101        serde_json::Value::Bool(b) => {
102            if *b {
103                Some(1.0)
104            } else {
105                Some(0.0)
106            }
107        }
108        serde_json::Value::Number(number) => {
109            if let Some(f) = number.as_f64() {
110                Some(f)
111            } else if let Some(u) = number.as_u64() {
112                Some(u as f64)
113            } else if let Some(i) = number.as_i64() {
114                Some(i as f64)
115            } else {
116                None
117            }
118        }
119        serde_json::Value::String(s) => s.parse::<f64>().ok(),
120        _ => None,
121    }
122}
123
124fn json_to_property_value_with_schema(
125    new_named_types: &HashMap<SchemaFingerprint, SchemaNamedType>,
126    old_named_types: &Option<HashMap<SchemaFingerprint, SchemaNamedType>>,
127    new_schema: &Schema,
128    old_schema: &Schema,
129    json_value: &serde_json::Value,
130    buffers: &Option<Vec<Arc<Vec<u8>>>>,
131) -> Value {
132    match new_schema {
133        // These schema types are never given property values in memory, even if some of them appear
134        // to be assignable properties in json.
135        Schema::Nullable(_) => unimplemented!(),
136        Schema::StaticArray(_) => unimplemented!(),
137        Schema::DynamicArray(_) => unimplemented!(),
138        Schema::Map(_) => unimplemented!(),
139        Schema::Record(_) => unimplemented!(),
140
141        // Simple scalar values
142        Schema::Boolean => Value::Boolean(json_value.as_bool().unwrap()),
143        Schema::I32 => Value::I32(json_to_i64(json_value).unwrap() as i32),
144        Schema::I64 => Value::I64(json_to_i64(json_value).unwrap()),
145        Schema::U32 => Value::U32(json_to_u64(json_value).unwrap() as u32),
146        Schema::U64 => Value::U64(json_to_u64(json_value).unwrap()),
147        Schema::F32 => Value::F32(json_to_f64(json_value).unwrap() as f32),
148        Schema::F64 => Value::F64(json_to_f64(json_value).unwrap()),
149        Schema::Bytes => {
150            if let Some(buffers) = buffers {
151                // The data is an index into a buffer, take the data from the buffer
152                let buffer_index = json_value.as_u64().unwrap() as usize;
153                Value::Bytes(buffers[buffer_index].clone())
154            } else {
155                // The data is encoded inline as a base64 string, decode and return the value
156                let data = base64::decode(json_value.as_str().unwrap()).unwrap();
157                Value::Bytes(Arc::new(data))
158            }
159        }
160        Schema::String => Value::String(Arc::new(json_value.as_str().unwrap().to_string())),
161        Schema::AssetRef(_) => Value::AssetRef(AssetId::from_uuid(
162            Uuid::parse_str(json_value.as_str().unwrap()).unwrap(),
163        )),
164        Schema::Enum(x) => {
165            let named_type = new_named_types.get(x).unwrap();
166            match named_type {
167                SchemaNamedType::Record(_) => {
168                    panic!("A Schema::Enum is matching a named type that is not an enum")
169                }
170                SchemaNamedType::Enum(new_enum) => {
171                    // Special handling to migrate enums
172                    if let Some(old_named_types) = old_named_types {
173                        match old_schema {
174                            Schema::Enum(old_enum_fingerprint) => {
175                                // Fix up using enum symbol UUID
176                                let old_named_type =
177                                    old_named_types.get(old_enum_fingerprint).unwrap();
178                                let old_enum = old_named_type.as_enum().unwrap();
179                                let old_symbol = old_enum
180                                    .find_symbol_from_name(json_value.as_str().unwrap())
181                                    .unwrap();
182                                let new_symbol = new_enum
183                                    .find_symbol_from_uuid(old_symbol.symbol_uuid())
184                                    .unwrap();
185                                Value::Enum(ValueEnum::new(new_symbol.name().to_string()))
186                            }
187                            Schema::String => {
188                                // Just try and match an enum string value
189                                Value::enum_value_from_string(
190                                    new_enum,
191                                    json_value.as_str().unwrap(),
192                                )
193                                .unwrap()
194                            }
195                            _ => {
196                                panic!("Cannot migrate schema {:?} into an enum schema", old_schema)
197                            }
198                        }
199                    } else {
200                        Value::enum_value_from_string(new_enum, json_value.as_str().unwrap())
201                            .unwrap()
202                    }
203                }
204            }
205        }
206    }
207}
208
209fn null_override_to_string_value(null_override: NullOverride) -> &'static str {
210    match null_override {
211        NullOverride::SetNull => "SetNull",
212        NullOverride::SetNonNull => "SetNonNull",
213        NullOverride::Unset => unreachable!(), // Should not be in the map
214    }
215}
216
217fn string_to_null_override_value(s: &str) -> Option<NullOverride> {
218    match s {
219        "SetNull" => Some(NullOverride::SetNull),
220        "SetNonNull" => Some(NullOverride::SetNonNull),
221        _ => None,
222    }
223}
224
225fn ordered_map_cached_schemas<S>(
226    value: &HashMap<Uuid, String>,
227    serializer: S,
228) -> Result<S::Ok, S::Error>
229where
230    S: serde::Serializer,
231{
232    let ordered: std::collections::BTreeMap<_, _> = value.iter().collect();
233    ordered.serialize(serializer)
234}
235
236fn ordered_map_file_references<S>(
237    value: &HashMap<Uuid, String>,
238    serializer: S,
239) -> Result<S::Ok, S::Error>
240where
241    S: serde::Serializer,
242{
243    let ordered: std::collections::BTreeMap<_, _> = value.iter().collect();
244    ordered.serialize(serializer)
245}
246
247fn ordered_map_json_value<S>(
248    value: &HashMap<String, serde_json::Value>,
249    serializer: S,
250) -> Result<S::Ok, S::Error>
251where
252    S: serde::Serializer,
253{
254    let ordered: std::collections::BTreeMap<_, _> = value.iter().collect();
255    ordered.serialize(serializer)
256}
257
258fn ordered_map_uuid<S>(
259    value: &HashMap<String, Uuid>,
260    serializer: S,
261) -> Result<S::Ok, S::Error>
262where
263    S: serde::Serializer,
264{
265    let ordered: std::collections::BTreeMap<_, _> = value.iter().collect();
266    ordered.serialize(serializer)
267}
268
269fn load_json_properties(
270    new_root_named_type: &SchemaNamedType,
271    new_named_types: &HashMap<SchemaFingerprint, SchemaNamedType>,
272    new_named_types_by_uuid: &HashMap<Uuid, SchemaFingerprint>,
273    // If we are not doing a schema migration, this will also happen to be the new schema fingerprint
274    old_schema_fingerprint: SchemaFingerprint,
275    // None, unless we are doing a schema migration
276    old_named_types: Option<HashMap<SchemaFingerprint, SchemaNamedType>>,
277
278    // The properties to parse
279    json_properties: &HashMap<String, serde_json::Value>,
280
281    // The out parameters
282    properties: &mut HashMap<String, Value>,
283    property_null_overrides: &mut HashMap<String, NullOverride>,
284    mut properties_in_replace_mode: Option<&mut HashSet<String>>,
285    dynamic_collection_entries: &mut HashMap<String, OrderedSet<Uuid>>,
286    buffers: &mut Option<Vec<Arc<Vec<u8>>>>,
287) {
288    // We could allow arbitrary migrations by handing off the schema information and json properties
289    // and expecting back the refreshed json properties. It's far from elegant but much simpler than
290    // true arbitrary schema migrations.
291    for (old_path, json_value) in json_properties {
292        let mut property_handled = false;
293
294        // First, some special handling for "control" fields on special types like collections/nullables
295        // This data is stored to disk as properties, but loaded in memory these values are represented
296        // differently.
297        let old_split_path = old_path.rsplit_once('.');
298        if let Some((old_parent_path, path_end)) = old_split_path {
299            //
300            // Handle the possibility of a property path changing due to schema migration
301            //
302            let fixed_parent_path_by_value;
303            let new_parent_path = if let Some(old_named_types) = &old_named_types {
304                let old_root_named_type = old_named_types.get(&old_schema_fingerprint).unwrap();
305
306                let new_parent_path = SchemaNamedType::find_post_migration_property_path(
307                    old_root_named_type,
308                    old_parent_path,
309                    old_named_types,
310                    new_root_named_type,
311                    new_named_types,
312                    new_named_types_by_uuid,
313                );
314
315                log::trace!(
316                    "Migrate property path {} -> {:?}",
317                    old_parent_path,
318                    new_parent_path
319                );
320
321                // This may return none, which probably means the field was deleted
322                fixed_parent_path_by_value = new_parent_path;
323                fixed_parent_path_by_value.as_deref()
324            } else {
325                Some(old_parent_path)
326            };
327
328            //
329            // Check for cases where properties in the json are "control" values and affect the in-memory
330            // representation of the loaded asset
331            //
332            if let Some(new_parent_path) = new_parent_path {
333                let parent_schema = new_root_named_type
334                    .find_property_schema(new_parent_path, new_named_types)
335                    .unwrap();
336
337                if parent_schema.is_nullable() && path_end == "null_override" {
338                    let null_override =
339                        string_to_null_override_value(json_value.as_str().unwrap()).unwrap();
340                    log::trace!(
341                        "set null override {} to {:?}",
342                        new_parent_path,
343                        null_override
344                    );
345                    property_null_overrides.insert(new_parent_path.to_string(), null_override);
346                    property_handled = true;
347                }
348
349                if parent_schema.is_dynamic_array() && path_end == "replace" {
350                    if let Some(properties_in_replace_mode) = &mut properties_in_replace_mode {
351                        if json_value.as_bool() == Some(true) {
352                            log::trace!("set property {} to replace", new_parent_path);
353                            properties_in_replace_mode.insert(new_parent_path.to_string());
354                        }
355                    }
356
357                    property_handled = true;
358                }
359            }
360        }
361
362        // Handle actual property values (some of these may still be "control" values for special types
363        // like collections, and aren't true properties)
364        if !property_handled {
365            //
366            // Handle the possibility of a property path changing due to schema migration
367            //
368            let fixed_path_by_value;
369            let new_path = if let Some(old_named_types) = &old_named_types {
370                let old_root_named_type = old_named_types.get(&old_schema_fingerprint).unwrap();
371
372                let new_property_path = SchemaNamedType::find_post_migration_property_path(
373                    old_root_named_type,
374                    old_path,
375                    old_named_types,
376                    new_root_named_type,
377                    new_named_types,
378                    new_named_types_by_uuid,
379                );
380
381                log::info!(
382                    "Migrate property path {} -> {:?}",
383                    old_path,
384                    new_property_path
385                );
386
387                fixed_path_by_value = new_property_path;
388                fixed_path_by_value.as_deref()
389            } else {
390                Some(old_path.as_str())
391            };
392
393            //
394            // Finally, we are loading properties, possibly with a modified path for schema migration
395            //
396            // new_path could be none if the field has been removed and we are migrating schema
397            //
398            if let Some(new_path) = new_path {
399                let new_property_schema = new_root_named_type
400                    .find_property_schema(&new_path, new_named_types)
401                    .unwrap();
402
403                let old_property_schema = if let Some(old_named_types) = &old_named_types {
404                    let old_root_named_type = old_named_types.get(&old_schema_fingerprint).unwrap();
405                    old_root_named_type
406                        .find_property_schema(&old_path, old_named_types)
407                        .unwrap()
408                        .clone()
409                } else {
410                    new_property_schema.clone()
411                };
412
413                // If it's a dynamic array, then don't treat this as a property. Instead read the
414                // list of element UUIDs and store them into dynamic_collection_entries
415                if new_property_schema.is_dynamic_array() || new_property_schema.is_map() {
416                    let json_array = json_value.as_array().unwrap();
417                    for json_array_element in json_array {
418                        let element = json_array_element.as_str().unwrap();
419                        let element = Uuid::from_str(element).unwrap();
420                        let existing_entries = dynamic_collection_entries
421                            .entry(new_path.to_string())
422                            .or_default();
423                        if !existing_entries.contains(&element) {
424                            log::trace!("add dynamic array element {} to {:?}", element, new_path);
425                            let newly_inserted = existing_entries.try_insert_at_end(element);
426                            assert!(newly_inserted);
427                        }
428                    }
429                } else {
430                    let new_property_value = json_to_property_value_with_schema(
431                        new_named_types,
432                        &old_named_types,
433                        &new_property_schema,
434                        &old_property_schema,
435                        &json_value,
436                        buffers,
437                    );
438
439                    log::trace!("set {} to {:?}", new_path, new_property_value);
440                    properties.insert(new_path.to_string(), new_property_value);
441                }
442            }
443        }
444    }
445}
446
447fn store_json_properties(
448    properties: &HashMap<String, Value>,
449    property_null_overrides: &HashMap<String, NullOverride>,
450    properties_in_replace_mode: Option<&HashSet<String>>,
451    dynamic_collection_entries: &HashMap<String, OrderedSet<Uuid>>,
452    buffers: &mut Option<Vec<Arc<Vec<u8>>>>,
453) -> HashMap<String, serde_json::Value> {
454    let mut saved_properties: HashMap<String, serde_json::Value> = Default::default();
455
456    for (path, null_override) in property_null_overrides {
457        saved_properties.insert(
458            format!("{}.null_override", path),
459            serde_json::Value::from(null_override_to_string_value(*null_override)),
460        );
461    }
462
463    if let Some(properties_in_replace_mode) = properties_in_replace_mode {
464        for path in properties_in_replace_mode {
465            saved_properties.insert(format!("{}.replace", path), serde_json::Value::from(true));
466        }
467    }
468
469    for (path, elements) in dynamic_collection_entries {
470        let elements_json: Vec<_> = elements
471            .iter()
472            .map(|x| serde_json::Value::from(x.to_string()))
473            .collect();
474        let elements_json_array = serde_json::Value::from(elements_json);
475        saved_properties.insert(path.to_string(), elements_json_array);
476    }
477
478    for (k, v) in properties {
479        saved_properties.insert(k.to_string(), property_value_to_json(v, buffers));
480    }
481
482    saved_properties
483}
484
485// Import Info, part of AssetJson
486#[derive(Debug, Serialize, Deserialize)]
487pub struct AssetImportInfoJson {
488    importer_id: Uuid,
489
490    //source_file_root: String,
491    source_file_path: String,
492    importable_name: String,
493
494    #[serde(serialize_with = "ordered_map_file_references")]
495    file_references: HashMap<Uuid, String>,
496
497    // These are all encoded as hex to avoid json/u64 weirdness
498    source_file_modified_timestamp: String,
499    source_file_size: String,
500    import_data_contents_hash: String,
501}
502
503impl AssetImportInfoJson {
504    pub fn new(import_info: &ImportInfo) -> Self {
505        let source_file_path = format!(
506            "{}",
507            PathReference::new(
508                import_info.source_file().namespace().to_string(),
509                import_info.source_file().path().to_string(),
510                ImportableName::default()
511            )
512        );
513
514        AssetImportInfoJson {
515            importer_id: import_info.importer_id().0,
516            source_file_path,
517            importable_name: import_info
518                .importable_name()
519                .name()
520                .map(|x| x.to_string())
521                .unwrap_or_default(),
522            file_references: import_info
523                .path_references()
524                .iter()
525                .map(|(k, v)| (k.0, v.to_string()))
526                .collect(),
527            source_file_modified_timestamp: format!(
528                "{:0>16x}",
529                import_info.source_file_modified_timestamp()
530            ),
531            source_file_size: format!("{:0>16x}", import_info.source_file_size()),
532            import_data_contents_hash: format!("{:0>16x}", import_info.import_data_contents_hash()),
533        }
534    }
535
536    pub fn to_import_info(
537        &self,
538        _schema_set: &SchemaSet,
539        namespace_resolver: &dyn PathReferenceNamespaceResolver,
540    ) -> DataSetResult<ImportInfo> {
541        let mut path_references = HashMap::default();
542        for (key, value) in &self.file_references {
543            let path_reference: PathReference = value.into();
544            path_references.insert(
545                PathReferenceHash(*key),
546                path_reference.simplify(namespace_resolver),
547            );
548        }
549
550        let path_reference: PathReference = self.source_file_path.clone().into();
551        let source_file = PathReference::new(
552            path_reference.namespace().to_string(),
553            path_reference.path().to_string(),
554            ImportableName::new(self.importable_name.clone()),
555        )
556        .simplify(namespace_resolver);
557
558        let source_file_modified_timestamp =
559            u64::from_str_radix(&self.source_file_modified_timestamp, 16)
560                .map_err(|_| (DataSetError::StorageFormatError))?;
561        let source_file_size = u64::from_str_radix(&self.source_file_size, 16)
562            .map_err(|_| (DataSetError::StorageFormatError))?;
563        let import_data_contents_hash = u64::from_str_radix(&self.import_data_contents_hash, 16)
564            .map_err(|_| (DataSetError::StorageFormatError))?;
565
566        Ok(ImportInfo::new(
567            ImporterId(self.importer_id),
568            source_file,
569            path_references,
570            source_file_modified_timestamp,
571            source_file_size,
572            import_data_contents_hash,
573        ))
574    }
575}
576
577// Build Info, part of AssetJson
578#[derive(Debug, Serialize, Deserialize)]
579pub struct AssetBuildInfoJson {
580    #[serde(serialize_with = "ordered_map_uuid")]
581    file_reference_overrides: HashMap<String, Uuid>,
582}
583
584impl AssetBuildInfoJson {
585    pub fn new(import_info: &BuildInfo) -> Self {
586        let mut file_reference_overrides = HashMap::default();
587        for (k, v) in &import_info.path_reference_overrides {
588            file_reference_overrides.insert(k.to_string(), v.as_uuid());
589        }
590
591        AssetBuildInfoJson {
592            file_reference_overrides,
593        }
594    }
595
596    pub fn to_build_info(
597        &self,
598        _schema_set: &SchemaSet,
599        namespace_resolver: &dyn PathReferenceNamespaceResolver,
600    ) -> BuildInfo {
601        let mut file_reference_overrides = HashMap::default();
602        for (k, v) in &self.file_reference_overrides {
603            let path_reference: PathReference = k.into();
604            file_reference_overrides.insert(
605                path_reference.simplify(namespace_resolver),
606                AssetId::from_uuid(*v),
607            );
608        }
609
610        BuildInfo {
611            path_reference_overrides: file_reference_overrides,
612        }
613    }
614}
615
616pub trait RestoreAssetFromStorageImpl {
617    fn restore_asset(
618        &mut self,
619        asset_id: AssetId,
620        asset_name: AssetName,
621        asset_location: AssetLocation,
622        import_info: Option<ImportInfo>,
623        build_info: BuildInfo,
624        prototype: Option<AssetId>,
625        schema: SchemaFingerprint,
626        properties: HashMap<String, Value>,
627        property_null_overrides: HashMap<String, NullOverride>,
628        properties_in_replace_mode: HashSet<String>,
629        dynamic_collection_entries: HashMap<String, OrderedSet<Uuid>>,
630    ) -> DataSetResult<()>;
631
632    fn namespace_resolver(&self) -> &dyn PathReferenceNamespaceResolver;
633}
634
635#[derive(Debug, Serialize, Deserialize)]
636pub struct AssetJson {
637    id: Option<Uuid>,
638    name: String,
639    parent_dir: Option<Uuid>,
640    root_schema: Uuid,
641    schema_name: String,
642    import_info: Option<AssetImportInfoJson>,
643    build_info: AssetBuildInfoJson,
644    prototype: Option<Uuid>,
645    #[serde(serialize_with = "ordered_map_json_value")]
646    properties: HashMap<String, serde_json::Value>,
647    #[serde(default)]
648    #[serde(serialize_with = "ordered_map_cached_schemas")]
649    schemas: HashMap<Uuid, String>,
650}
651
652impl AssetJson {
653    #[profiling::function]
654    pub fn load_asset_from_string(
655        restore_asset_impl: &mut dyn RestoreAssetFromStorageImpl,
656        schema_set: &SchemaSet,
657        override_asset_id: Option<Uuid>,
658        // If the file doesn't claim a location and we don't override it, we will default to this
659        default_asset_location: AssetLocation,
660        // If set, we use this instead of what the file says to use
661        override_asset_location: Option<AssetLocation>,
662        json: &str,
663    ) -> DataSetResult<AssetId> {
664        let stored_asset: AssetJson = {
665            profiling::scope!("serde_json::from_str");
666            serde_json::from_str(json).unwrap()
667        };
668
669        // Use the provided override, or what's in the file, or worst case default to asset_source_id
670        let asset_location = if let Some(override_asset_location) = override_asset_location {
671            override_asset_location
672        } else {
673            // If no parent is specified, default it to the root node for this data source
674            stored_asset
675                .parent_dir
676                .map(|x| AssetLocation::new(AssetId::from_uuid(x)))
677                .unwrap_or(default_asset_location)
678        };
679
680        let asset_name = if stored_asset.name.is_empty() {
681            AssetName::empty()
682        } else {
683            AssetName::new(stored_asset.name)
684        };
685
686        let asset_id = if let Some(override_asset_id) = override_asset_id {
687            // If an ID was provided, use it
688            AssetId::from_uuid(override_asset_id)
689        } else {
690            // Otherwise read it from the file. If there was no ID specified, generate a new one
691            AssetId::from_uuid(stored_asset.id.unwrap_or_else(Uuid::new_v4))
692        };
693
694        let root_schema_fingerprint = SchemaFingerprint::from_uuid(stored_asset.root_schema);
695        let prototype = stored_asset.prototype.map(|x| AssetId::from_uuid(x));
696
697        //
698        // In this chunk of code, we determine what the loaded object's type will be.
699        // - The fast/happy path is that the data was saved with an identical schema to the schema
700        //   we currently have loaded (i.e. fingerprints match)
701        // - The slow path is a schema migration (fingerprints do not match). This could be due to
702        //   added/modified/removed fields, enum symbols, etc.
703        //
704        // If we need to do schema migration, we will unpack the schema cache in the data file.
705        // This allows us to get the UUIDs for all the fields/enum symbols, etc.
706        //
707        let new_named_type = schema_set.find_named_type_by_fingerprint(root_schema_fingerprint);
708        let (new_named_type, old_named_types) = if let Some(new_named_type) = new_named_type {
709            // The object was saved using the identical schema that we already loaded. This is the
710            // fast/happy path
711            (new_named_type.clone(), None)
712        } else if !stored_asset.schemas.is_empty() {
713            // There's a schema cache in the asset file. We can try to locate the corresponding type in our schema set
714            // and try to migrate the data
715            log::info!(
716                "Can't load asset {} type {} by fingerprint, trying by UUID",
717                asset_id,
718                stored_asset.schema_name
719            );
720
721            // Parse all the schemas in the cache
722            let old_named_types = parse_referenced_schemas(&stored_asset.schemas);
723
724            // Find the schema we want to migrate the data to
725            let old_root_schema = old_named_types
726                .get(&SchemaFingerprint::from_uuid(stored_asset.root_schema))
727                .unwrap();
728            let root_type_uuid = old_root_schema.type_uuid();
729            let new_named_type = schema_set.find_named_type_by_type_uuid(root_type_uuid)?;
730            (new_named_type.clone(), Some(old_named_types))
731        } else {
732            panic!(
733                "Can't load asset {} type {} by fingerprint, not stored schemas found.",
734                asset_id, stored_asset.schema_name
735            );
736            //let named_type = schema_set.find_named_type(stored_asset.schema_name)?;
737            //(named_type.clone(), None)
738        };
739
740        let mut properties: HashMap<String, Value> = Default::default();
741        let mut property_null_overrides: HashMap<String, NullOverride> = Default::default();
742        let mut properties_in_replace_mode: HashSet<String> = Default::default();
743        let mut dynamic_collection_entries: HashMap<String, OrderedSet<Uuid>> = Default::default();
744        let mut buffers = None;
745
746        load_json_properties(
747            &new_named_type,
748            schema_set.schemas(),
749            schema_set.schemas_by_type_uuid(),
750            SchemaFingerprint::from_uuid(stored_asset.root_schema),
751            old_named_types,
752            &stored_asset.properties,
753            &mut properties,
754            &mut property_null_overrides,
755            Some(&mut properties_in_replace_mode),
756            &mut dynamic_collection_entries,
757            &mut buffers,
758        );
759
760        let import_info = if let Some(import_info) = stored_asset.import_info {
761            Some(import_info.to_import_info(schema_set, restore_asset_impl.namespace_resolver())?)
762        } else {
763            None
764        };
765
766        let build_info = stored_asset
767            .build_info
768            .to_build_info(schema_set, restore_asset_impl.namespace_resolver());
769
770        restore_asset_impl.restore_asset(
771            asset_id,
772            asset_name,
773            asset_location,
774            import_info,
775            build_info,
776            prototype,
777            new_named_type.fingerprint(),
778            properties,
779            property_null_overrides,
780            properties_in_replace_mode,
781            dynamic_collection_entries,
782        )?;
783
784        Ok(asset_id)
785    }
786
787    #[profiling::function]
788    pub fn save_asset_to_string(
789        schema_set: &SchemaSet,
790        assets: &HashMap<AssetId, DataSetAssetInfo>,
791        asset_id: AssetId,
792        // We only save the ID in the file if using path-based file system storage. Otherwise the
793        // id is the file path/name
794        include_asset_id_in_file: bool,
795        asset_location: Option<AssetLocation>,
796    ) -> String {
797        let obj = assets.get(&asset_id).unwrap();
798        let mut buffers = None;
799
800        let schemas = gather_referenced_schemas(schema_set, obj.schema());
801
802        let json_properties = store_json_properties(
803            obj.properties(),
804            obj.property_null_overrides(),
805            Some(obj.properties_in_replace_mode()),
806            obj.dynamic_collection_entries(),
807            &mut buffers,
808        );
809
810        let import_info = obj
811            .import_info()
812            .as_ref()
813            .map(|x| AssetImportInfoJson::new(&x));
814        let build_info = AssetBuildInfoJson::new(obj.build_info());
815
816        let written_asset_id = if include_asset_id_in_file {
817            Some(asset_id.as_uuid())
818        } else {
819            None
820        };
821        let stored_asset = AssetJson {
822            id: written_asset_id,
823            name: obj.asset_name().as_string().cloned().unwrap_or_default(),
824            parent_dir: asset_location.map(|x| x.path_node_id().as_uuid()),
825            root_schema: obj.schema().fingerprint().as_uuid(),
826            schema_name: obj.schema().name().to_string(),
827            import_info,
828            build_info,
829            prototype: obj.prototype().map(|x| x.as_uuid()),
830            properties: json_properties,
831            schemas,
832        };
833
834        profiling::scope!("serde_json::to_string_pretty");
835        serde_json::to_string_pretty(&stored_asset).unwrap()
836    }
837}
838
839pub fn parse_referenced_schemas(
840    stored_schemas: &HashMap<Uuid, String>
841) -> HashMap<SchemaFingerprint, SchemaNamedType> {
842    // Parse all the schemas in the cache
843    let mut old_named_types = HashMap::default();
844    for (k, v) in stored_schemas {
845        let cached_schema_json = String::from_utf8(base64::decode(v).unwrap()).unwrap();
846        let cached_schema: CachedSchemaNamedType =
847            serde_json::from_str(&cached_schema_json).unwrap();
848        old_named_types.insert(SchemaFingerprint::from_uuid(*k), cached_schema.to_schema());
849    }
850
851    old_named_types
852}
853
854pub fn gather_referenced_schemas(
855    schema_set: &SchemaSet,
856    root_schema: &SchemaRecord,
857) -> HashMap<Uuid, String> {
858    // Find relevant schema/fingerprints so they can be stored alongside the object data
859    let mut referenced_schema_fingerprints = HashSet::default();
860    let mut visit_stack = Vec::default();
861    Schema::find_referenced_schemas(
862        schema_set.schemas(),
863        &Schema::Record(root_schema.fingerprint()),
864        &mut referenced_schema_fingerprints,
865        &mut visit_stack,
866    );
867
868    // Build the schema cache to save alongside the object data
869    let mut referenced_schemas = HashMap::default();
870    for fingerprint in referenced_schema_fingerprints {
871        let named_type = schema_set
872            .find_named_type_by_fingerprint(fingerprint)
873            .unwrap();
874        let cached_schema = CachedSchemaNamedType::new_from_schema(named_type);
875        let cached_schema_json = serde_json::to_string(&cached_schema).unwrap();
876        let cached_schema_json64 = base64::encode(cached_schema_json.into_bytes());
877        referenced_schemas.insert(fingerprint.as_uuid(), cached_schema_json64);
878    }
879
880    referenced_schemas
881}
882
883// You can create this with SingleObjectJson::new and serialize it to disk to save
884// You can deserialize this and read using SingleObjectJson::to_single_object
885#[derive(Debug, Serialize, Deserialize)]
886pub struct SingleObjectJson {
887    //contents_hash: u64,
888    //TODO: Rnemae to root_schema
889    //TODO: Add schemas
890    root_schema: Uuid,
891    schema_name: String,
892    #[serde(serialize_with = "ordered_map_json_value")]
893    properties: HashMap<String, serde_json::Value>,
894    #[serde(default)]
895    #[serde(serialize_with = "ordered_map_cached_schemas")]
896    schemas: HashMap<Uuid, String>,
897}
898
899impl SingleObjectJson {
900    pub fn new(
901        schema_set: &SchemaSet,
902        object: &SingleObject,
903        // If buffers are provided, the bulk data is stored here instead of inline with the rest of the properties
904        buffers: &mut Option<Vec<Arc<Vec<u8>>>>,
905    ) -> SingleObjectJson {
906        let schemas = gather_referenced_schemas(schema_set, object.schema());
907
908        let json_properties = store_json_properties(
909            &object.properties(),
910            &object.property_null_overrides(),
911            None,
912            &object.dynamic_collection_entries(),
913            buffers,
914        );
915
916        let mut hasher = siphasher::sip::SipHasher::default();
917        // This includes schema, all other contents of the asset
918        object.hash(&mut hasher);
919
920        SingleObjectJson {
921            //contents_hash: hasher.finish().into(),
922            root_schema: object.schema().fingerprint().as_uuid(),
923            schema_name: object.schema().name().to_string(),
924            properties: json_properties,
925            schemas,
926        }
927    }
928
929    pub fn to_single_object(
930        &self,
931        schema_set: &SchemaSet,
932        // If buffers are provided, then we read bulk data from here instead from inline
933        buffers: &mut Option<Vec<Arc<Vec<u8>>>>,
934    ) -> SingleObject {
935        let root_schema_fingerprint = SchemaFingerprint::from_uuid(self.root_schema);
936
937        //
938        // In this chunk of code, we determine what the loaded object's type will be.
939        // - The fast/happy path is that the data was saved with an identical schema to the schema
940        //   we currently have loaded (i.e. fingerprints match)
941        // - The slow path is a schema migration (fingerprints do not match). This could be due to
942        //   added/modified/removed fields, enum symbols, etc.
943        //
944        // If we need to do schema migration, we will unpack the schema cache in the data file.
945        // This allows us to get the UUIDs for all the fields/enum symbols, etc.
946        //
947        let new_named_type = schema_set.find_named_type_by_fingerprint(root_schema_fingerprint);
948        let new_named_type = if let Some(new_named_type) = new_named_type {
949            // The object was saved using the identical schema that we already loaded. This is the
950            // fast/happy path
951            new_named_type.clone()
952        } else if !self.schemas.is_empty() {
953            // There's a schema cache in the asset file. We can try to locate the corresponding type in our schema set
954            // and try to migrate the data
955            log::info!(
956                "Can't load single object type {} by fingerprint, trying by UUID",
957                self.schema_name
958            );
959
960            // Parse all the schemas in the cache
961            let old_named_types = parse_referenced_schemas(&self.schemas);
962
963            // Find the schema we want to migrate the data to
964            let old_root_schema = old_named_types
965                .get(&SchemaFingerprint::from_uuid(self.root_schema))
966                .unwrap();
967            let root_type_uuid = old_root_schema.type_uuid();
968            let new_named_type = schema_set
969                .find_named_type_by_type_uuid(root_type_uuid)
970                .unwrap();
971            new_named_type.clone()
972        } else {
973            panic!(
974                "Can't load single object type {} by fingerprint, no stored schemas found",
975                self.schema_name
976            );
977            //let named_type = schema_set.find_named_type(stored_asset.schema_name)?;
978            //(named_type.clone(), None)
979        };
980
981        let mut properties: HashMap<String, Value> = Default::default();
982        let mut property_null_overrides: HashMap<String, NullOverride> = Default::default();
983        let mut dynamic_collection_entries: HashMap<String, OrderedSet<Uuid>> = Default::default();
984
985        load_json_properties(
986            &new_named_type,
987            schema_set.schemas(),
988            schema_set.schemas_by_type_uuid(),
989            root_schema_fingerprint,
990            None,
991            &self.properties,
992            &mut properties,
993            &mut property_null_overrides,
994            None,
995            &mut dynamic_collection_entries,
996            buffers,
997        );
998
999        SingleObject::restore(
1000            schema_set,
1001            new_named_type.fingerprint(),
1002            properties,
1003            property_null_overrides,
1004            dynamic_collection_entries,
1005        )
1006    }
1007}
1008
1009#[derive(Default, Clone)]
1010pub struct MetaFile {
1011    pub past_id_assignments: HashMap<ImportableName, AssetId>,
1012    pub persisted_assets: HashSet<AssetId>,
1013}
1014
1015#[derive(Debug, Serialize, Deserialize)]
1016pub struct ImportableInfoJson {
1017    id: Uuid,
1018    persisted: bool,
1019}
1020
1021fn ordered_map_importable_info<S>(
1022    value: &HashMap<String, ImportableInfoJson>,
1023    serializer: S,
1024) -> Result<S::Ok, S::Error>
1025where
1026    S: serde::Serializer,
1027{
1028    let ordered: std::collections::BTreeMap<_, _> = value.iter().collect();
1029    ordered.serialize(serializer)
1030}
1031
1032#[derive(Debug, Serialize, Deserialize)]
1033pub struct MetaFileJson {
1034    #[serde(serialize_with = "ordered_map_importable_info")]
1035    pub importables: HashMap<String, ImportableInfoJson>,
1036}
1037
1038impl MetaFileJson {
1039    #[profiling::function]
1040    pub fn load_from_string(json: &str) -> MetaFile {
1041        let meta_file: MetaFileJson = {
1042            profiling::scope!("serde_json::from_str");
1043            serde_json::from_str(json).unwrap()
1044        };
1045        let mut past_id_assignments = HashMap::default();
1046        let mut persisted_assets = HashSet::default();
1047        for (importable_name, importable_info) in meta_file.importables {
1048            let asset_id = AssetId::from_uuid(importable_info.id);
1049            past_id_assignments.insert(ImportableName::new(importable_name), asset_id);
1050            if importable_info.persisted {
1051                persisted_assets.insert(asset_id);
1052            }
1053        }
1054
1055        MetaFile {
1056            past_id_assignments,
1057            persisted_assets,
1058        }
1059    }
1060
1061    #[profiling::function]
1062    pub fn store_to_string(meta_file: &MetaFile) -> String {
1063        let mut importables = HashMap::default();
1064        for (importable_name, asset_id) in &meta_file.past_id_assignments {
1065            let persisted = meta_file.persisted_assets.contains(&asset_id);
1066
1067            let importable_info = ImportableInfoJson {
1068                id: asset_id.as_uuid(),
1069                persisted,
1070            };
1071
1072            importables.insert(
1073                importable_name
1074                    .name()
1075                    .map(|x| x.to_string())
1076                    .unwrap_or_default(),
1077                importable_info,
1078            );
1079        }
1080
1081        let json_object = MetaFileJson { importables };
1082        profiling::scope!("serde_json::to_string_pretty");
1083        serde_json::to_string_pretty(&json_object).unwrap()
1084    }
1085}