Skip to main content

kora_doc/
recompose.rs

1//! Packed document to JSON reconstruction pipeline.
2//!
3//! [`Recomposer`] is the inverse of
4//! [`Decomposer`](crate::decompose::Decomposer). Given a [`PackedDoc`], the
5//! [`IdRegistry`], and the collection's [`ValueDictionary`], it rebuilds a
6//! `serde_json::Value` by:
7//!
8//! 1. Iterating over packed fields in field-ID order.
9//! 2. Resolving each numeric `FieldId` back to its dot-separated path string
10//!    via the registry.
11//! 3. Decoding the [`FieldValue`](crate::packed::FieldValue) into a JSON
12//!    primitive, looking up `DictRef` values through the dictionary.
13//! 4. Inserting each value into a nested `serde_json::Map` tree by splitting
14//!    the dotted path and creating intermediate objects as needed.
15//!
16//! ## Projection
17//!
18//! [`Recomposer::project`] reconstructs only a caller-specified subset of
19//! fields, leveraging [`PackedDoc::read_field`]'s O(log F) binary search to
20//! skip unneeded data. This is the fast path for queries that select a small
21//! number of fields from large documents.
22
23use std::str;
24
25use serde_json::{Map, Value};
26use thiserror::Error;
27
28use crate::dictionary::{DictionaryError, ValueDictionary};
29use crate::packed::{FieldValue, PackedDoc, PackedDocError};
30use crate::registry::{CollectionId, FieldId, IdRegistry};
31
32/// Errors returned when reconstructing JSON from a packed document.
33#[derive(Debug, Error)]
34pub enum RecomposeError {
35    /// Registry does not know one of the field IDs in the packed payload.
36    #[error("unknown field id {field_id} in collection {collection_id}")]
37    UnknownFieldId {
38        /// Collection that owns the field.
39        collection_id: CollectionId,
40        /// Unknown field id.
41        field_id: FieldId,
42    },
43    /// Dictionary operation failed.
44    #[error(transparent)]
45    Dictionary(#[from] DictionaryError),
46    /// Packed document read failed.
47    #[error(transparent)]
48    Packed(#[from] PackedDocError),
49    /// Stored UTF-8 bytes are invalid.
50    #[error("invalid utf-8 string at field id {field_id}: {message}")]
51    InvalidUtf8 {
52        /// Field ID with invalid bytes.
53        field_id: FieldId,
54        /// UTF-8 decoder error.
55        message: String,
56    },
57    /// Structured payload bytes are invalid JSON.
58    #[error("invalid structured payload at field id {field_id}: {message}")]
59    InvalidStructuredPayload {
60        /// Field ID with invalid payload bytes.
61        field_id: FieldId,
62        /// serde_json parser error string.
63        message: String,
64    },
65    /// Field path conflicts with an existing scalar/object shape.
66    #[error("path conflict while inserting '{path}'")]
67    PathConflict {
68        /// Dotted path being inserted.
69        path: String,
70    },
71}
72
73/// Rebuild full or partial JSON documents from `PackedDoc`.
74pub struct Recomposer;
75
76impl Recomposer {
77    /// Reconstruct a full JSON document.
78    pub fn recompose(
79        packed: &PackedDoc,
80        registry: &IdRegistry,
81        dictionary: &ValueDictionary,
82        collection_id: CollectionId,
83    ) -> Result<Value, RecomposeError> {
84        let mut root = Value::Object(Map::new());
85        for entry in packed.iter_fields()? {
86            let (field_id, field_value) = entry?;
87            let path = registry.field_path(collection_id, field_id).ok_or(
88                RecomposeError::UnknownFieldId {
89                    collection_id,
90                    field_id,
91                },
92            )?;
93            let json_value = field_to_json(field_id, field_value, dictionary)?;
94            insert_at_path(&mut root, path, json_value)?;
95        }
96        Ok(root)
97    }
98
99    /// Reconstruct only selected fields by field ID.
100    pub fn project(
101        packed: &PackedDoc,
102        field_ids: &[FieldId],
103        registry: &IdRegistry,
104        dictionary: &ValueDictionary,
105        collection_id: CollectionId,
106    ) -> Result<Value, RecomposeError> {
107        let mut root = Value::Object(Map::new());
108        for field_id in field_ids {
109            if let Some(field_value) = packed.read_field(*field_id)? {
110                let path = registry.field_path(collection_id, *field_id).ok_or(
111                    RecomposeError::UnknownFieldId {
112                        collection_id,
113                        field_id: *field_id,
114                    },
115                )?;
116                let json_value = field_to_json(*field_id, field_value, dictionary)?;
117                insert_at_path(&mut root, path, json_value)?;
118            }
119        }
120        Ok(root)
121    }
122}
123
124fn field_to_json(
125    field_id: FieldId,
126    value: FieldValue,
127    dictionary: &ValueDictionary,
128) -> Result<Value, RecomposeError> {
129    match value {
130        FieldValue::Null => Ok(Value::Null),
131        FieldValue::Bool(value) => Ok(Value::Bool(value)),
132        FieldValue::I64(value) => Ok(Value::Number(value.into())),
133        FieldValue::F64(value) => serde_json::Number::from_f64(value)
134            .map(Value::Number)
135            .ok_or_else(|| RecomposeError::InvalidStructuredPayload {
136                field_id,
137                message: "non-finite float cannot be represented in JSON".to_string(),
138            }),
139        FieldValue::InlineBytes(bytes) => {
140            let string = str::from_utf8(&bytes).map_err(|err| RecomposeError::InvalidUtf8 {
141                field_id,
142                message: err.to_string(),
143            })?;
144            Ok(Value::String(string.to_string()))
145        }
146        FieldValue::DictRef(dict_id) => {
147            let bytes = dictionary.decode(&crate::dictionary::StoredValue::DictRef(dict_id))?;
148            let string = str::from_utf8(&bytes).map_err(|err| RecomposeError::InvalidUtf8 {
149                field_id,
150                message: err.to_string(),
151            })?;
152            Ok(Value::String(string.to_string()))
153        }
154        FieldValue::ArrayBytes(bytes) => serde_json::from_slice::<Value>(&bytes).map_err(|err| {
155            RecomposeError::InvalidStructuredPayload {
156                field_id,
157                message: err.to_string(),
158            }
159        }),
160    }
161}
162
163fn insert_at_path(root: &mut Value, path: &str, value: Value) -> Result<(), RecomposeError> {
164    if path.is_empty() {
165        *root = value;
166        return Ok(());
167    }
168
169    let parts: Vec<&str> = path.split('.').collect();
170    insert_path_parts(root, &parts, value, path)
171}
172
173fn insert_path_parts(
174    current: &mut Value,
175    parts: &[&str],
176    value: Value,
177    full_path: &str,
178) -> Result<(), RecomposeError> {
179    if parts.is_empty() {
180        *current = value;
181        return Ok(());
182    }
183
184    let key = parts[0];
185    if parts.len() == 1 {
186        let map = current
187            .as_object_mut()
188            .ok_or_else(|| RecomposeError::PathConflict {
189                path: full_path.to_string(),
190            })?;
191        map.insert(key.to_string(), value);
192        return Ok(());
193    }
194
195    let map = current
196        .as_object_mut()
197        .ok_or_else(|| RecomposeError::PathConflict {
198            path: full_path.to_string(),
199        })?;
200
201    let child = map
202        .entry(key.to_string())
203        .or_insert_with(|| Value::Object(Map::new()));
204
205    if !child.is_object() {
206        return Err(RecomposeError::PathConflict {
207            path: full_path.to_string(),
208        });
209    }
210
211    insert_path_parts(child, &parts[1..], value, full_path)
212}
213
214#[cfg(test)]
215mod tests {
216    use serde_json::json;
217
218    use crate::decompose::Decomposer;
219    use crate::dictionary::ValueDictionary;
220    use crate::registry::IdRegistry;
221
222    use super::*;
223
224    #[test]
225    fn projection_returns_requested_fields_only() {
226        let mut registry = IdRegistry::new();
227        let mut dictionary = ValueDictionary::default();
228        let collection_id = registry
229            .get_or_create_collection_id("users")
230            .expect("collection id should allocate");
231
232        let mut decomposer = Decomposer::new(collection_id, &mut registry, &mut dictionary, 1);
233        let packed = decomposer
234            .decompose(
235                &json!({
236                    "name": "Augustus",
237                    "age": 30,
238                    "address": {"city": "Accra", "zip": "00233"}
239                }),
240                1,
241            )
242            .expect("decompose should work");
243
244        let city_id = registry
245            .segment(collection_id)
246            .and_then(|segment| segment.field_id("address.city"))
247            .expect("city field id should exist");
248        let age_id = registry
249            .segment(collection_id)
250            .and_then(|segment| segment.field_id("age"))
251            .expect("age field id should exist");
252
253        let projected = Recomposer::project(
254            &packed,
255            &[city_id, age_id],
256            &registry,
257            &dictionary,
258            collection_id,
259        )
260        .expect("projection should work");
261        assert_eq!(projected, json!({"address": {"city": "Accra"}, "age": 30}));
262    }
263}