compress_json_rs/memory.rs
1//! Memory management for compression state.
2//!
3//! This module provides the [`Memory`] struct and related functions for managing
4//! the compression state. It handles value deduplication through caching and
5//! schema sharing for objects with identical keys.
6//!
7//! # Architecture
8//!
9//! The memory system consists of:
10//! - **Store**: A vector of encoded string values
11//! - **Value Cache**: HashMap for deduplicating identical values
12//! - **Schema Cache**: HashMap for deduplicating object schemas (key lists)
13//!
14//! # Deduplication
15//!
16//! When a value is added:
17//! 1. It's first checked against the value cache
18//! 2. If found, the existing key is returned (no new storage)
19//! 3. If not found, a new key is generated and the value is stored
20//!
21//! This ensures that identical values (like repeated strings or numbers)
22//! are only stored once.
23//!
24//! # Example
25//!
26//! ```rust
27//! use compress_json_rs::{make_memory, add_value, mem_to_values};
28//! use serde_json::json;
29//!
30//! let mut mem = make_memory();
31//!
32//! // Adding the same value twice returns the same key
33//! let key1 = add_value(&mut mem, &json!("repeated"));
34//! let key2 = add_value(&mut mem, &json!("repeated"));
35//! assert_eq!(key1, key2);
36//!
37//! // The value is only stored once
38//! let values = mem_to_values(&mem);
39//! assert_eq!(values.len(), 1);
40//! ```
41
42use crate::config::CONFIG;
43use crate::debug::throw_unsupported_data;
44use crate::encode::{encode_bool, encode_num, encode_str};
45use crate::number::int_to_s;
46use serde_json::Value;
47use std::collections::HashMap;
48
49/// Key type for compressed references.
50///
51/// Keys are base-62 encoded strings that reference positions in the values array.
52/// The base-62 encoding uses characters `0-9`, `A-Z`, and `a-z`.
53///
54/// # Examples
55///
56/// - `"0"` - First value (index 0)
57/// - `"A"` - Eleventh value (index 10)
58/// - `"10"` - Sixty-third value (index 62)
59pub type Key = String;
60
61/// In-memory structure holding store and caches for compression.
62///
63/// This struct maintains the state needed during compression. It uses
64/// internal caching to deduplicate values and object schemas.
65///
66/// # Fields (Internal)
67///
68/// | Field | Type | Description |
69/// |-------|------|-------------|
70/// | `store` | `Vec<String>` | Encoded string values |
71/// | `value_cache` | `HashMap` | Maps values to keys |
72/// | `schema_cache` | `HashMap` | Maps schemas to keys |
73/// | `key_count` | `usize` | Key counter |
74///
75/// # Usage
76///
77/// Create with [`make_memory`], add values with [`add_value`], and extract
78/// the final values array with [`mem_to_values`].
79///
80/// # Example
81///
82/// ```rust
83/// use compress_json_rs::{Memory, make_memory, add_value, mem_to_values};
84/// use serde_json::json;
85///
86/// // Create memory store
87/// let mut mem: Memory = make_memory();
88///
89/// // Add values (duplicates are deduplicated)
90/// let k1 = add_value(&mut mem, &json!("hello"));
91/// let k2 = add_value(&mut mem, &json!("hello"));
92/// assert_eq!(k1, k2);
93///
94/// // Extract values
95/// let values = mem_to_values(&mem);
96/// assert_eq!(values.len(), 1);
97/// ```
98pub struct Memory {
99 /// The actual stored values (encoded strings)
100 pub(crate) store: Vec<String>,
101 /// Cache mapping encoded values to their keys
102 pub(crate) value_cache: HashMap<String, String>,
103 /// Cache mapping object schemas to their keys
104 pub(crate) schema_cache: HashMap<String, String>,
105 /// Counter for generating sequential keys
106 pub(crate) key_count: usize,
107}
108
109/// Convert internal store to values array.
110///
111/// Extracts the values vector from a `Memory` instance. This is typically
112/// called after all values have been added to get the final compressed output.
113///
114/// # Arguments
115///
116/// * `mem` - Reference to the Memory instance
117///
118/// # Returns
119///
120/// A clone of the internal values vector
121///
122/// # Example
123///
124/// ```rust
125/// use compress_json_rs::{make_memory, add_value, mem_to_values};
126/// use serde_json::json;
127///
128/// let mut mem = make_memory();
129/// add_value(&mut mem, &json!({"key": "value"}));
130/// let values = mem_to_values(&mem);
131/// assert!(!values.is_empty());
132/// ```
133pub fn mem_to_values(mem: &Memory) -> Vec<String> {
134 mem.store.clone()
135}
136
137/// Create a new in-memory Memory instance.
138///
139/// Initializes an empty `Memory` struct ready to accept values.
140///
141/// # Returns
142///
143/// A new, empty Memory instance
144///
145/// # Example
146///
147/// ```rust
148/// use compress_json_rs::make_memory;
149///
150/// let mem = make_memory();
151/// // Ready to use with add_value()
152/// ```
153pub fn make_memory() -> Memory {
154 Memory {
155 store: Vec::new(),
156 value_cache: HashMap::new(),
157 schema_cache: HashMap::new(),
158 key_count: 0,
159 }
160}
161
162/// Get or insert a value in the store, returning its key.
163///
164/// This is the core deduplication function. It checks if the encoded value
165/// already exists in the cache, returning the existing key if so. Otherwise,
166/// it generates a new key, stores the value, and caches the mapping.
167fn get_value_key(mem: &mut Memory, value: &str) -> String {
168 if let Some(key) = mem.value_cache.get(value) {
169 return key.clone();
170 }
171 let id = mem.key_count;
172 let key = int_to_s(id);
173 mem.key_count += 1;
174 mem.store.push(value.to_string());
175 mem.value_cache.insert(value.to_string(), key.clone());
176 key
177}
178
179/// Get or insert a schema (object keys), returning its key.
180///
181/// Schemas are stored as arrays of key strings. Objects with identical
182/// keys share the same schema, reducing storage for arrays of similar objects.
183fn get_schema(mem: &mut Memory, keys: &[String]) -> String {
184 let mut schema_keys = keys.to_vec();
185 if CONFIG.sort_key {
186 schema_keys.sort();
187 }
188 let schema = schema_keys.join(",");
189 if let Some(key) = mem.schema_cache.get(&schema) {
190 return key.clone();
191 }
192 // Represent schema as an array of strings
193 let arr = Value::Array(
194 schema_keys
195 .iter()
196 .map(|k| Value::String(k.clone()))
197 .collect(),
198 );
199 let key_id = add_value(mem, &arr);
200 mem.schema_cache.insert(schema, key_id.clone());
201 key_id
202}
203
204/// Recursively add a JSON value to memory, returning its key.
205///
206/// This function handles all JSON value types and recursively processes
207/// nested arrays and objects. Values are deduplicated through the internal
208/// cache.
209///
210/// # Arguments
211///
212/// * `mem` - Mutable reference to the Memory instance
213/// * `o` - Reference to the JSON value to add
214///
215/// # Returns
216///
217/// A base-62 encoded key string referencing the stored value
218///
219/// # Value Encoding
220///
221/// | Type | Encoding | Example |
222/// |------|----------|---------|
223/// | Null | Empty string | `""` |
224/// | Bool | `b\|T` or `b\|F` | `"b\|T"` |
225/// | Number | `n\|<value>` | `"n\|42.5"` |
226/// | String | Plain or `s\|<escaped>` | `"hello"` or `"s\|n\|123"` |
227/// | Array | `a\|<refs>` | `"a\|0\|1\|2"` |
228/// | Object | `o\|<schema>\|<refs>` | `"o\|0\|1\|2"` |
229///
230/// # Example
231///
232/// ```rust
233/// use compress_json_rs::{make_memory, add_value, mem_to_values, decode};
234/// use serde_json::json;
235///
236/// let mut mem = make_memory();
237///
238/// // Add a complex value
239/// let key = add_value(&mut mem, &json!({
240/// "name": "Alice",
241/// "scores": [95, 87, 92]
242/// }));
243///
244/// // The key can be used to decode back
245/// let values = mem_to_values(&mem);
246/// let decoded = decode(&values, &key);
247/// assert_eq!(decoded["name"], "Alice");
248/// ```
249///
250/// # Special Cases (v3.4.0+)
251///
252/// Special value handling depends on configuration:
253///
254/// | Value | `preserve_*` = true | `preserve_*` = false, `error_*` = true | Both false |
255/// |-------|---------------------|----------------------------------------|------------|
256/// | NaN | Encoded as `N\|0` | Panic | Returns `""` (null) |
257/// | Infinity | Encoded as `N\|+` | Panic | Returns `""` (null) |
258/// | -Infinity | Encoded as `N\|-` | Panic | Returns `""` (null) |
259///
260/// - **Null in arrays**: Encoded as `_` to distinguish from empty references
261pub fn add_value(mem: &mut Memory, o: &Value) -> Key {
262 match o {
263 Value::Null => "".to_string(),
264 Value::Bool(b) => get_value_key(mem, &encode_bool(*b)),
265 Value::Number(n) => {
266 // Convert number to f64
267 let f = n.as_f64().unwrap_or_else(|| {
268 // integer fallback
269 n.as_i64()
270 .map(|i| i as f64)
271 .or_else(|| n.as_u64().map(|u| u as f64))
272 .unwrap_or(0.0)
273 });
274
275 // Handle NaN (v3.4.0 logic)
276 if f.is_nan() {
277 if CONFIG.preserve_nan {
278 return get_value_key(mem, "N|0");
279 }
280 if CONFIG.error_on_nan {
281 throw_unsupported_data("[number NaN]");
282 }
283 // Convert to null like JSON.stringify
284 return "".to_string();
285 }
286
287 // Handle Infinity (v3.4.0 logic)
288 if f.is_infinite() {
289 if CONFIG.preserve_infinite {
290 if f.is_sign_positive() {
291 return get_value_key(mem, "N|+");
292 } else {
293 return get_value_key(mem, "N|-");
294 }
295 }
296 if CONFIG.error_on_infinite {
297 if f.is_sign_positive() {
298 throw_unsupported_data("[number Infinity]");
299 } else {
300 throw_unsupported_data("[number -Infinity]");
301 }
302 }
303 // Convert to null like JSON.stringify
304 return "".to_string();
305 }
306
307 // Regular number
308 get_value_key(mem, &encode_num(f))
309 }
310 Value::String(s) => get_value_key(mem, &encode_str(s)),
311 Value::Array(arr) => {
312 let mut acc = String::from("a");
313 for v in arr.iter() {
314 let key = if v.is_null() {
315 "_".to_string()
316 } else {
317 add_value(mem, v)
318 };
319 acc.push('|');
320 acc.push_str(&key);
321 }
322 if acc == "a" {
323 acc = "a|".to_string();
324 }
325 get_value_key(mem, &acc)
326 }
327 Value::Object(map) => {
328 let keys: Vec<String> = map.keys().cloned().collect();
329 if keys.is_empty() {
330 return get_value_key(mem, "o|");
331 }
332 let key_id = get_schema(mem, &keys);
333 let mut acc = String::from("o|");
334 acc.push_str(&key_id);
335 for key in keys.iter() {
336 let v = &map[key];
337 let val_key = add_value(mem, v);
338 acc.push('|');
339 acc.push_str(&val_key);
340 }
341 get_value_key(mem, &acc)
342 }
343 }
344}