json_packer/
dict.rs

1use std::collections::HashMap;
2use serde_json::Value;
3use crate::{bitstream::{BitWriter, BitReader}, varint, Error};
4
5/// 收集 JSON 中所有对象键的频率统计
6pub fn collect_keys(json: &Value) -> HashMap<String, u64> {
7    let mut freq_map = HashMap::new();
8    collect_keys_recursive(json, &mut freq_map);
9    freq_map
10}
11
12/// 递归遍历 JSON 值,统计对象键的频率
13fn collect_keys_recursive(value: &Value, freq_map: &mut HashMap<String, u64>) {
14    match value {
15        Value::Object(map) => {
16            // 统计当前对象的所有键
17            for (key, val) in map {
18                *freq_map.entry(key.clone()).or_insert(0) += 1;
19                // 递归处理值
20                collect_keys_recursive(val, freq_map);
21            }
22        },
23        Value::Array(arr) => {
24            // 递归处理数组中的每个元素
25            for item in arr {
26                collect_keys_recursive(item, freq_map);
27            }
28        },
29        _ => {
30            // 其他类型(null, bool, number, string)无需处理
31        }
32    }
33}
34
35/// 写入字典表到位流
36/// 格式:[KEY_COUNT(uleb128)] + 对每个键: [KEY_LEN(uleb128)][KEY_UTF8...][FREQ(uleb128)]
37pub fn write_dictionary(writer: &mut BitWriter, freq_map: &HashMap<String, u64>) {
38    // 写入键的总数
39    varint::write_uleb128(writer, freq_map.len() as u64);
40    
41    // 按字典序排序键名,确保确定性输出
42    let mut sorted_keys: Vec<_> = freq_map.iter().collect();
43    sorted_keys.sort_by(|a, b| a.0.cmp(b.0));
44    
45    // 写入每个键的信息
46    for (key, &freq) in sorted_keys {
47        let key_bytes = key.as_bytes();
48        
49        // 键长度 (ULEB128)
50        varint::write_uleb128(writer, key_bytes.len() as u64);
51        
52        // 键内容 (UTF-8 字节)
53        for &byte in key_bytes {
54            writer.write_byte(byte);
55        }
56        
57        // 键频率 (ULEB128)
58        varint::write_uleb128(writer, freq);
59    }
60}
61
62/// 从位流读取字典表
63/// 返回键频率映射表
64pub fn read_dictionary(reader: &mut BitReader) -> Result<HashMap<String, u64>, Error> {
65    let mut freq_map = HashMap::new();
66    
67    // 读取键的总数
68    let key_count = varint::read_uleb128(reader)?;
69    
70    for _ in 0..key_count {
71        // 读取键长度
72        let key_len = varint::read_uleb128(reader)? as usize;
73        
74        // 读取键内容
75        let mut key_bytes = Vec::with_capacity(key_len);
76        for _ in 0..key_len {
77            key_bytes.push(reader.read_byte()?);
78        }
79        
80        // 转换为 UTF-8 字符串
81        let key = String::from_utf8(key_bytes)?;
82        
83        // 读取频率
84        let freq = varint::read_uleb128(reader)?;
85        
86        freq_map.insert(key, freq);
87    }
88    
89    Ok(freq_map)
90}