nodedb_columnar/memtable/column_data/
dict_encode.rs1use super::types::ColumnData;
6
7impl ColumnData {
8 pub fn try_dict_encode(col: &ColumnData, max_cardinality: u32) -> Option<ColumnData> {
10 let (data, offsets, valid) = match col {
11 ColumnData::String {
12 data,
13 offsets,
14 valid,
15 } => (data, offsets, valid),
16 _ => return None,
17 };
18
19 let row_count = col.len();
20 let mut dictionary: Vec<String> = Vec::new();
21 let mut reverse: std::collections::HashMap<String, u32> = std::collections::HashMap::new();
22 let mut ids: Vec<u32> = Vec::with_capacity(row_count);
24
25 for i in 0..row_count {
26 if valid.as_ref().is_some_and(|v| !v[i]) {
27 ids.push(0);
28 continue;
29 }
30 let start = offsets[i] as usize;
31 let end = offsets[i + 1] as usize;
32 let s = match std::str::from_utf8(&data[start..end]) {
33 Ok(s) => s,
34 Err(_) => return None,
35 };
36 let id = if let Some(&existing) = reverse.get(s) {
37 existing
38 } else {
39 if dictionary.len() as u32 >= max_cardinality {
40 return None;
41 }
42 let new_id = dictionary.len() as u32;
43 dictionary.push(s.to_string());
44 reverse.insert(s.to_string(), new_id);
45 new_id
46 };
47 ids.push(id);
48 }
49
50 Some(ColumnData::DictEncoded {
51 ids,
52 dictionary,
53 reverse,
54 valid: valid.clone(),
55 })
56 }
57}