use indexmap::IndexMap;
use serde::Serialize;
use zerocopy::{BigEndian, Immutable, IntoBytes, KnownLayout, TryFromBytes, Unaligned, U32};
use crate::tag::tagdata::DictData;
#[derive(TryFromBytes, IntoBytes, KnownLayout, Immutable, Unaligned)]
#[repr(C, packed)]
struct Header {
type_signature: U32<BigEndian>,
reserved: [u8; 4],
num_records: U32<BigEndian>,
record_size: U32<BigEndian>,
}
#[derive(TryFromBytes, IntoBytes, KnownLayout, Immutable, Unaligned, Clone, Copy)]
#[repr(C, packed)]
struct Record {
key_offset: U32<BigEndian>,
key_length: U32<BigEndian>,
value_offset: U32<BigEndian>,
value_length: U32<BigEndian>,
}
#[derive(Serialize)]
pub struct DictType {
#[serde(flatten)]
entries: IndexMap<String, String>,
}
fn decode_utf16be(bytes: &[u8]) -> Option<String> {
if bytes.len() % 2 != 0 {
return None;
}
let words: Vec<u16> = bytes
.chunks_exact(2)
.map(|c| u16::from_be_bytes([c[0], c[1]]))
.collect();
Some(String::from_utf16_lossy(&words))
}
impl From<&DictData> for DictType {
fn from(data: &DictData) -> Self {
let bytes = &data.0;
let mut entries = IndexMap::new();
let Ok(header) = Header::try_ref_from_bytes(bytes.get(..16).unwrap_or(&[])) else {
return DictType { entries };
};
let n = header.num_records.get() as usize;
let rec_size = header.record_size.get() as usize;
if rec_size != 16 && rec_size != 24 {
return DictType { entries };
}
let table_start = 16usize;
let table_end = table_start.saturating_add(n.saturating_mul(rec_size));
if table_end > bytes.len() {
return DictType { entries };
}
for i in 0..n {
let rec_start = table_start + i * rec_size;
let rec_end = rec_start + 16; let Ok(rec) = Record::try_ref_from_bytes(&bytes[rec_start..rec_end]) else {
continue;
};
let key_off = rec.key_offset.get() as usize;
let key_len = rec.key_length.get() as usize;
let val_off = rec.value_offset.get() as usize;
let val_len = rec.value_length.get() as usize;
let Some(key_end) = key_off.checked_add(key_len) else {
continue;
};
let Some(val_end) = val_off.checked_add(val_len) else {
continue;
};
if key_end > bytes.len() || val_end > bytes.len() {
continue;
}
let (Some(key), Some(value)) = (
decode_utf16be(&bytes[key_off..key_end]),
decode_utf16be(&bytes[val_off..val_end]),
) else {
continue;
};
entries.insert(key, value);
}
DictType { entries }
}
}
impl From<&DictType> for DictData {
fn from(dict: &DictType) -> Self {
let n = dict.entries.len();
let mut encoded: Vec<(Vec<u8>, Vec<u8>)> = Vec::with_capacity(n);
for (k, v) in &dict.entries {
let key_bytes: Vec<u8> = k.encode_utf16().flat_map(|c| c.to_be_bytes()).collect();
let val_bytes: Vec<u8> = v.encode_utf16().flat_map(|c| c.to_be_bytes()).collect();
encoded.push((key_bytes, val_bytes));
}
const HEADER_SIZE: usize = 16;
const RECORD_SIZE: usize = 16;
let string_data_start = HEADER_SIZE + n * RECORD_SIZE;
let mut records: Vec<Record> = Vec::with_capacity(n);
let mut cursor = string_data_start;
for (key_bytes, val_bytes) in &encoded {
let val_offset = cursor + key_bytes.len();
debug_assert!(
cursor <= u32::MAX as usize,
"dictType key offset overflows u32"
);
debug_assert!(
val_offset <= u32::MAX as usize,
"dictType value offset overflows u32"
);
debug_assert!(
key_bytes.len() <= u32::MAX as usize,
"dictType key length overflows u32"
);
debug_assert!(
val_bytes.len() <= u32::MAX as usize,
"dictType value length overflows u32"
);
records.push(Record {
key_offset: U32::new(cursor as u32),
key_length: U32::new(key_bytes.len() as u32),
value_offset: U32::new(val_offset as u32),
value_length: U32::new(val_bytes.len() as u32),
});
cursor += key_bytes.len() + val_bytes.len();
}
let total = cursor;
let mut buf = Vec::with_capacity(total);
let header = Header {
type_signature: U32::new(super::DataSignature::DictData.to_u32()),
reserved: [0; 4],
num_records: U32::new(n as u32),
record_size: U32::new(RECORD_SIZE as u32),
};
buf.extend_from_slice(header.as_bytes());
for rec in &records {
buf.extend_from_slice(rec.as_bytes());
}
for (key_bytes, val_bytes) in &encoded {
buf.extend_from_slice(key_bytes);
buf.extend_from_slice(val_bytes);
}
debug_assert_eq!(
buf.len(),
total,
"dictType serialisation: buffer size mismatch"
);
DictData(buf)
}
}
impl DictData {
pub fn clear(&mut self) {
*self = DictData::from(&DictType {
entries: IndexMap::new(),
});
}
pub fn insert(&mut self, key: &str, value: &str) {
let mut dict: DictType = (&*self).into();
dict.entries.insert(key.to_string(), value.to_string());
*self = DictData::from(&dict);
}
pub fn remove(&mut self, key: &str) {
let mut dict: DictType = (&*self).into();
dict.entries.shift_remove(key);
*self = DictData::from(&dict);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn roundtrip_empty() {
let mut d = DictData(Vec::new());
d.clear();
let parsed = DictType::from(&d);
assert!(parsed.entries.is_empty());
let back = DictData::from(&parsed);
assert_eq!(DictType::from(&back).entries, parsed.entries);
}
#[test]
fn roundtrip_entries() {
let mut d = DictData(Vec::new());
d.insert("CMF_product", "DisplayCAL");
d.insert("CMF_version", "3.8.8.0");
d.insert("unicode_key_\u{00e9}", "caf\u{00e9}");
let parsed = DictType::from(&d);
assert_eq!(parsed.entries["CMF_product"], "DisplayCAL");
assert_eq!(parsed.entries["CMF_version"], "3.8.8.0");
assert_eq!(parsed.entries["unicode_key_\u{00e9}"], "caf\u{00e9}");
let back = DictData::from(&parsed);
let reparsed = DictType::from(&back);
assert_eq!(reparsed.entries, parsed.entries);
}
#[test]
fn insert_preserves_order() {
let mut d = DictData(Vec::new());
for i in 0..5u8 {
d.insert(&format!("key{i}"), &format!("val{i}"));
}
let parsed = DictType::from(&d);
let keys: Vec<&str> = parsed.entries.keys().map(String::as_str).collect();
assert_eq!(keys, ["key0", "key1", "key2", "key3", "key4"]);
}
#[test]
fn remove_entry() {
let mut d = DictData(Vec::new());
d.insert("a", "1");
d.insert("b", "2");
d.insert("c", "3");
d.remove("b");
let parsed = DictType::from(&d);
assert!(!parsed.entries.contains_key("b"));
assert_eq!(parsed.entries.len(), 2);
}
#[test]
fn malformed_data_returns_empty() {
let d = DictData(vec![0x64, 0x69, 0x63, 0x74, 0x00]);
let parsed = DictType::from(&d);
assert!(parsed.entries.is_empty());
}
#[test]
fn odd_length_string_record_is_skipped() {
let mut d = DictData(Vec::new());
d.insert("ab", "cd");
d.0[20] = 0x00;
d.0[21] = 0x00;
d.0[22] = 0x00;
d.0[23] = 0x03;
let parsed = DictType::from(&d);
assert!(parsed.entries.is_empty());
}
}