json_packer/
encode.rs

1use serde_json::Value;
2use crate::{bitstream::BitWriter, header, dict, huffman::HuffmanCodec, types::tag, varint, Error, pool::{collect_string_pool, PoolConfig, write_string_pool}};
3
4fn encode_value(value: &Value, writer: &mut BitWriter, huffman: &HuffmanCodec) -> Result<(), Error> {
5    match value {
6        Value::Null => {
7            writer.write_bits(tag::NULL as u64, 3);
8        }
9        Value::Bool(b) => {
10            writer.write_bits((if *b { tag::BOOL_TRUE } else { tag::BOOL_FALSE }) as u64, 3);
11        }
12        Value::Number(n) => {
13            if let Some(i) = n.as_i64() {
14                writer.write_bits(tag::INT as u64, 3);
15                // is_unsigned = 0
16                writer.write_bits(0, 1);
17                varint::write_sleb128(writer, i);
18            } else if let Some(u) = n.as_u64() {
19                writer.write_bits(tag::INT as u64, 3);
20                // is_unsigned = 1
21                writer.write_bits(1, 1);
22                varint::write_uleb128(writer, u);
23            } else if let Some(f) = n.as_f64() {
24                if !f.is_finite() { return Err(Error::IllegalFloat); }
25                writer.write_bits(tag::FLOAT as u64, 3);
26                writer.write_bits(f.to_bits(), 64);
27            } else {
28                return Err(Error::IllegalFloat);
29            }
30        }
31        Value::String(s) => {
32            writer.write_bits(tag::STRING as u64, 3);
33            let bytes = s.as_bytes();
34            varint::write_uleb128(writer, bytes.len() as u64);
35            for &b in bytes { writer.write_byte(b); }
36        }
37        Value::Array(arr) => {
38            writer.write_bits(tag::ARRAY as u64, 3);
39            varint::write_uleb128(writer, arr.len() as u64);
40            for item in arr { encode_value(item, writer, huffman)?; }
41        }
42        Value::Object(map) => {
43            writer.write_bits(tag::OBJECT as u64, 3);
44            varint::write_uleb128(writer, map.len() as u64);
45            for (k, v) in map {
46                huffman.write_key_code(k, writer)?;
47                encode_value(v, writer, huffman)?;
48            }
49        }
50    }
51    Ok(())
52}
53
54
55#[derive(Debug, Clone)]
56pub struct CompressOptions {
57    pub enable_value_pool: bool,
58    pub pool_min_repeats: u32,
59    pub pool_min_string_len: usize,
60}
61
62impl Default for CompressOptions {
63    fn default() -> Self { Self { enable_value_pool: false, pool_min_repeats: 3, pool_min_string_len: 8 } }
64}
65
66pub fn compress_with_options(value: &Value, opt: &CompressOptions) -> Result<Vec<u8>, Error> {
67    // 1) 统计键频
68    let freq = dict::collect_keys(value);
69    // 2) 构建 canonical Huffman
70    let codec = HuffmanCodec::from_frequencies(&freq)?;
71
72    // 3) 值池(仅字符串,按需)
73    let (version, pool_len, string_pool) = if opt.enable_value_pool {
74        let pool = collect_string_pool(value, PoolConfig { min_repeats: opt.pool_min_repeats, min_string_len: opt.pool_min_string_len });
75        (header::VERSION_V2, pool.entries.len() as u64, Some(pool))
76    } else {
77        (header::VERSION_V1, 0, None)
78    };
79
80    // 4) 写包头 + 字典表 + 值池 + 数据
81    let mut writer = BitWriter::new();
82    header::write_header(&mut writer, version, freq.len() as u64, pool_len);
83    dict::write_dictionary(&mut writer, &freq);
84    if let Some(pool) = &string_pool {
85        write_string_pool(&mut writer, pool);
86    }
87    // 统一分派编码(启用/禁用值池皆可)
88    encode_value_dispatch(value, &mut writer, &codec, string_pool.as_ref())?;
89
90    Ok(writer.into_bytes())
91}
92
93fn encode_value_dispatch(value: &Value, writer: &mut BitWriter, huffman: &HuffmanCodec, string_pool: Option<&crate::pool::StringPool>) -> Result<(), Error> {
94    match value {
95        Value::String(s) => {
96            if let Some(pool) = string_pool {
97                if let Some(&id) = pool.index.get(s) {
98                    writer.write_bits(tag::STRING as u64, 3);
99                    writer.write_bits(1, 1); // is_pool_ref
100                    varint::write_uleb128(writer, id);
101                    return Ok(());
102                }
103            }
104            // 非引用路径(与原逻辑相同,但需要写 is_pool_ref=0 于 v2)
105            if string_pool.is_some() {
106                writer.write_bits(tag::STRING as u64, 3);
107                writer.write_bits(0, 1);
108                let bytes = s.as_bytes();
109                varint::write_uleb128(writer, bytes.len() as u64);
110                for &b in bytes { writer.write_byte(b); }
111                return Ok(());
112            }
113            // v1:保持原逻辑(无额外 is_pool_ref)
114            writer.write_bits(tag::STRING as u64, 3);
115            let bytes = s.as_bytes();
116            varint::write_uleb128(writer, bytes.len() as u64);
117            for &b in bytes { writer.write_byte(b); }
118            Ok(())
119        }
120        Value::Array(a) => {
121            writer.write_bits(tag::ARRAY as u64, 3);
122            varint::write_uleb128(writer, a.len() as u64);
123            for x in a { encode_value_dispatch(x, writer, huffman, string_pool)?; }
124            Ok(())
125        }
126        Value::Object(m) => {
127            writer.write_bits(tag::OBJECT as u64, 3);
128            varint::write_uleb128(writer, m.len() as u64);
129            for (k, v) in m {
130                huffman.write_key_code(k, writer)?;
131                encode_value_dispatch(v, writer, huffman, string_pool)?;
132            }
133            Ok(())
134        }
135        _ => encode_value(value, writer, huffman),
136    }
137}