compress_json_rs/
memory.rs

1//! Memory management for compression state.
2//!
3//! This module provides the [`Memory`] struct and related functions for managing
4//! the compression state. It handles value deduplication through caching and
5//! schema sharing for objects with identical keys.
6//!
7//! # Architecture
8//!
9//! The memory system consists of:
10//! - **Store**: A vector of encoded string values
11//! - **Value Cache**: HashMap for deduplicating identical values
12//! - **Schema Cache**: HashMap for deduplicating object schemas (key lists)
13//!
14//! # Deduplication
15//!
16//! When a value is added:
17//! 1. It's first checked against the value cache
18//! 2. If found, the existing key is returned (no new storage)
19//! 3. If not found, a new key is generated and the value is stored
20//!
21//! This ensures that identical values (like repeated strings or numbers)
22//! are only stored once.
23//!
24//! # Example
25//!
26//! ```rust
27//! use compress_json_rs::{make_memory, add_value, mem_to_values};
28//! use serde_json::json;
29//!
30//! let mut mem = make_memory();
31//!
32//! // Adding the same value twice returns the same key
33//! let key1 = add_value(&mut mem, &json!("repeated"));
34//! let key2 = add_value(&mut mem, &json!("repeated"));
35//! assert_eq!(key1, key2);
36//!
37//! // The value is only stored once
38//! let values = mem_to_values(&mem);
39//! assert_eq!(values.len(), 1);
40//! ```
41
42use crate::config::CONFIG;
43use crate::debug::throw_unsupported_data;
44use crate::encode::{encode_bool, encode_num, encode_str};
45use crate::number::int_to_s;
46use serde_json::Value;
47use std::collections::HashMap;
48
49/// Key type for compressed references.
50///
51/// Keys are base-62 encoded strings that reference positions in the values array.
52/// The base-62 encoding uses characters `0-9`, `A-Z`, and `a-z`.
53///
54/// # Examples
55///
56/// - `"0"` - First value (index 0)
57/// - `"A"` - Eleventh value (index 10)
58/// - `"10"` - Sixty-third value (index 62)
59pub type Key = String;
60
61/// In-memory structure holding store and caches for compression.
62///
63/// This struct maintains the state needed during compression. It uses
64/// internal caching to deduplicate values and object schemas.
65///
66/// # Fields (Internal)
67///
68/// | Field | Type | Description |
69/// |-------|------|-------------|
70/// | `store` | `Vec<String>` | Encoded string values |
71/// | `value_cache` | `HashMap` | Maps values to keys |
72/// | `schema_cache` | `HashMap` | Maps schemas to keys |
73/// | `key_count` | `usize` | Key counter |
74///
75/// # Usage
76///
77/// Create with [`make_memory`], add values with [`add_value`], and extract
78/// the final values array with [`mem_to_values`].
79///
80/// # Example
81///
82/// ```rust
83/// use compress_json_rs::{Memory, make_memory, add_value, mem_to_values};
84/// use serde_json::json;
85///
86/// // Create memory store
87/// let mut mem: Memory = make_memory();
88///
89/// // Add values (duplicates are deduplicated)
90/// let k1 = add_value(&mut mem, &json!("hello"));
91/// let k2 = add_value(&mut mem, &json!("hello"));
92/// assert_eq!(k1, k2);
93///
94/// // Extract values
95/// let values = mem_to_values(&mem);
96/// assert_eq!(values.len(), 1);
97/// ```
98pub struct Memory {
99    /// The actual stored values (encoded strings)
100    pub(crate) store: Vec<String>,
101    /// Cache mapping encoded values to their keys
102    pub(crate) value_cache: HashMap<String, String>,
103    /// Cache mapping object schemas to their keys
104    pub(crate) schema_cache: HashMap<String, String>,
105    /// Counter for generating sequential keys
106    pub(crate) key_count: usize,
107}
108
109/// Convert internal store to values array.
110///
111/// Extracts the values vector from a `Memory` instance. This is typically
112/// called after all values have been added to get the final compressed output.
113///
114/// # Arguments
115///
116/// * `mem` - Reference to the Memory instance
117///
118/// # Returns
119///
120/// A clone of the internal values vector
121///
122/// # Example
123///
124/// ```rust
125/// use compress_json_rs::{make_memory, add_value, mem_to_values};
126/// use serde_json::json;
127///
128/// let mut mem = make_memory();
129/// add_value(&mut mem, &json!({"key": "value"}));
130/// let values = mem_to_values(&mem);
131/// assert!(!values.is_empty());
132/// ```
133pub fn mem_to_values(mem: &Memory) -> Vec<String> {
134    mem.store.clone()
135}
136
137/// Create a new in-memory Memory instance.
138///
139/// Initializes an empty `Memory` struct ready to accept values.
140///
141/// # Returns
142///
143/// A new, empty Memory instance
144///
145/// # Example
146///
147/// ```rust
148/// use compress_json_rs::make_memory;
149///
150/// let mem = make_memory();
151/// // Ready to use with add_value()
152/// ```
153pub fn make_memory() -> Memory {
154    Memory {
155        store: Vec::new(),
156        value_cache: HashMap::new(),
157        schema_cache: HashMap::new(),
158        key_count: 0,
159    }
160}
161
162/// Get or insert a value in the store, returning its key.
163///
164/// This is the core deduplication function. It checks if the encoded value
165/// already exists in the cache, returning the existing key if so. Otherwise,
166/// it generates a new key, stores the value, and caches the mapping.
167fn get_value_key(mem: &mut Memory, value: &str) -> String {
168    if let Some(key) = mem.value_cache.get(value) {
169        return key.clone();
170    }
171    let id = mem.key_count;
172    let key = int_to_s(id);
173    mem.key_count += 1;
174    mem.store.push(value.to_string());
175    mem.value_cache.insert(value.to_string(), key.clone());
176    key
177}
178
179/// Get or insert a schema (object keys), returning its key.
180///
181/// Schemas are stored as arrays of key strings. Objects with identical
182/// keys share the same schema, reducing storage for arrays of similar objects.
183fn get_schema(mem: &mut Memory, keys: &[String]) -> String {
184    let mut schema_keys = keys.to_vec();
185    if CONFIG.sort_key {
186        schema_keys.sort();
187    }
188    let schema = schema_keys.join(",");
189    if let Some(key) = mem.schema_cache.get(&schema) {
190        return key.clone();
191    }
192    // Represent schema as an array of strings
193    let arr = Value::Array(
194        schema_keys
195            .iter()
196            .map(|k| Value::String(k.clone()))
197            .collect(),
198    );
199    let key_id = add_value(mem, &arr);
200    mem.schema_cache.insert(schema, key_id.clone());
201    key_id
202}
203
204/// Recursively add a JSON value to memory, returning its key.
205///
206/// This function handles all JSON value types and recursively processes
207/// nested arrays and objects. Values are deduplicated through the internal
208/// cache.
209///
210/// # Arguments
211///
212/// * `mem` - Mutable reference to the Memory instance
213/// * `o` - Reference to the JSON value to add
214///
215/// # Returns
216///
217/// A base-62 encoded key string referencing the stored value
218///
219/// # Value Encoding
220///
221/// | Type | Encoding | Example |
222/// |------|----------|---------|
223/// | Null | Empty string | `""` |
224/// | Bool | `b\|T` or `b\|F` | `"b\|T"` |
225/// | Number | `n\|<value>` | `"n\|42.5"` |
226/// | String | Plain or `s\|<escaped>` | `"hello"` or `"s\|n\|123"` |
227/// | Array | `a\|<refs>` | `"a\|0\|1\|2"` |
228/// | Object | `o\|<schema>\|<refs>` | `"o\|0\|1\|2"` |
229///
230/// # Example
231///
232/// ```rust
233/// use compress_json_rs::{make_memory, add_value, mem_to_values, decode};
234/// use serde_json::json;
235///
236/// let mut mem = make_memory();
237///
238/// // Add a complex value
239/// let key = add_value(&mut mem, &json!({
240///     "name": "Alice",
241///     "scores": [95, 87, 92]
242/// }));
243///
244/// // The key can be used to decode back
245/// let values = mem_to_values(&mem);
246/// let decoded = decode(&values, &key);
247/// assert_eq!(decoded["name"], "Alice");
248/// ```
249///
250/// # Special Cases (v3.4.0+)
251///
252/// Special value handling depends on configuration:
253///
254/// | Value | `preserve_*` = true | `preserve_*` = false, `error_*` = true | Both false |
255/// |-------|---------------------|----------------------------------------|------------|
256/// | NaN | Encoded as `N\|0` | Panic | Returns `""` (null) |
257/// | Infinity | Encoded as `N\|+` | Panic | Returns `""` (null) |
258/// | -Infinity | Encoded as `N\|-` | Panic | Returns `""` (null) |
259///
260/// - **Null in arrays**: Encoded as `_` to distinguish from empty references
261pub fn add_value(mem: &mut Memory, o: &Value) -> Key {
262    match o {
263        Value::Null => "".to_string(),
264        Value::Bool(b) => get_value_key(mem, &encode_bool(*b)),
265        Value::Number(n) => {
266            // Convert number to f64
267            let f = n.as_f64().unwrap_or_else(|| {
268                // integer fallback
269                n.as_i64()
270                    .map(|i| i as f64)
271                    .or_else(|| n.as_u64().map(|u| u as f64))
272                    .unwrap_or(0.0)
273            });
274
275            // Handle NaN (v3.4.0 logic)
276            if f.is_nan() {
277                if CONFIG.preserve_nan {
278                    return get_value_key(mem, "N|0");
279                }
280                if CONFIG.error_on_nan {
281                    throw_unsupported_data("[number NaN]");
282                }
283                // Convert to null like JSON.stringify
284                return "".to_string();
285            }
286
287            // Handle Infinity (v3.4.0 logic)
288            if f.is_infinite() {
289                if CONFIG.preserve_infinite {
290                    if f.is_sign_positive() {
291                        return get_value_key(mem, "N|+");
292                    } else {
293                        return get_value_key(mem, "N|-");
294                    }
295                }
296                if CONFIG.error_on_infinite {
297                    if f.is_sign_positive() {
298                        throw_unsupported_data("[number Infinity]");
299                    } else {
300                        throw_unsupported_data("[number -Infinity]");
301                    }
302                }
303                // Convert to null like JSON.stringify
304                return "".to_string();
305            }
306
307            // Regular number
308            get_value_key(mem, &encode_num(f))
309        }
310        Value::String(s) => get_value_key(mem, &encode_str(s)),
311        Value::Array(arr) => {
312            let mut acc = String::from("a");
313            for v in arr.iter() {
314                let key = if v.is_null() {
315                    "_".to_string()
316                } else {
317                    add_value(mem, v)
318                };
319                acc.push('|');
320                acc.push_str(&key);
321            }
322            if acc == "a" {
323                acc = "a|".to_string();
324            }
325            get_value_key(mem, &acc)
326        }
327        Value::Object(map) => {
328            let keys: Vec<String> = map.keys().cloned().collect();
329            if keys.is_empty() {
330                return get_value_key(mem, "o|");
331            }
332            let key_id = get_schema(mem, &keys);
333            let mut acc = String::from("o|");
334            acc.push_str(&key_id);
335            for key in keys.iter() {
336                let v = &map[key];
337                let val_key = add_value(mem, v);
338                acc.push('|');
339                acc.push_str(&val_key);
340            }
341            get_value_key(mem, &acc)
342        }
343    }
344}