1use std::collections::HashMap;
2
3use serde_json::Value;
4
5use crate::{bitstream::BitWriter, varint, types::tag};
6
7#[derive(Debug, Clone)]
8pub struct StringPool {
9 pub entries: Vec<String>,
11 pub index: HashMap<String, u64>,
13}
14
15#[derive(Debug, Clone, Copy)]
16pub struct PoolConfig {
17 pub min_repeats: u32,
18 pub min_string_len: usize,
19}
20
21impl Default for PoolConfig {
22 fn default() -> Self { Self { min_repeats: 3, min_string_len: 8 } }
23}
24
25pub fn collect_string_pool(root: &Value, cfg: PoolConfig) -> StringPool {
26 let mut counter: HashMap<String, u32> = HashMap::new();
27 fn walk(v: &Value, counter: &mut HashMap<String, u32>) {
28 match v {
29 Value::String(s) => {
30 *counter.entry(s.clone()).or_insert(0) += 1;
31 }
32 Value::Array(a) => for x in a { walk(x, counter); },
33 Value::Object(m) => for (_k, x) in m { walk(x, counter); },
34 _ => {}
35 }
36 }
37 walk(root, &mut counter);
38
39 let mut candidates: Vec<(String, u32)> = counter
41 .into_iter()
42 .filter(|(s, c)| *c >= cfg.min_repeats && s.len() >= cfg.min_string_len)
43 .collect();
44 candidates.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
45
46 let mut entries: Vec<String> = Vec::with_capacity(candidates.len());
47 let mut index: HashMap<String, u64> = HashMap::with_capacity(candidates.len());
48 for (i, (s, _)) in candidates.into_iter().enumerate() {
49 index.insert(s.clone(), i as u64);
50 entries.push(s);
51 }
52 StringPool { entries, index }
53}
54
55pub fn write_string_pool(writer: &mut BitWriter, pool: &StringPool) {
56 for s in &pool.entries {
57 writer.write_bits(tag::STRING as u64, 3);
59 let bytes = s.as_bytes();
60 varint::write_uleb128(writer, bytes.len() as u64);
61 for &b in bytes { writer.write_byte(b); }
62 }
63}
64
65