Skip to main content

dicom_map/
schema.rs

1//! Shared schema for the `.dmap` archive file format.
2//!
3//! Layout on disk (all little-endian):
4//!
5//! ```text
6//! +---------------------------------------+
7//! | FileHeader (fixed, 32 bytes)          |
8//! +---------------------------------------+
9//! | rkyv-archived Dictionary              |
10//! |   .index  : Vec<IndexEntry>  sorted   |
11//! |   .records: Vec<TagRecord>            |
12//! |   .strings: Vec<u8> pool              |
13//! +---------------------------------------+
14//! ```
15//!
16//! The index is sorted by `(group, element, creator_hash)` so lookup is a
17//! plain binary search. String fields inside `TagRecord` are stored as
18//! `(offset, len)` into `strings`; the reader slices without copying.
19
20use rkyv::{Archive, Deserialize, Serialize};
21
22pub const MAGIC: &[u8; 4] = b"DMAP";
23pub const VERSION: u16 = 2;
24
25/// 32-byte file prefix. Kept outside rkyv so we can sanity-check before
26/// attempting to deserialize the body.
27#[repr(C)]
28#[derive(Debug, Clone, Copy)]
29pub struct FileHeader {
30    pub magic: [u8; 4],
31    pub version: u16,
32    pub _reserved: [u8; 2],
33    pub body_len: u64,
34    pub body_sha256_lo: u64, // first 8 bytes of body sha256 (integrity hint)
35    pub _pad: [u8; 8],
36}
37
38impl FileHeader {
39    pub const SIZE: usize = 32;
40
41    pub fn new(body_len: u64) -> Self {
42        Self {
43            magic: *MAGIC,
44            version: VERSION,
45            _reserved: [0; 2],
46            body_len,
47            body_sha256_lo: 0,
48            _pad: [0; 8],
49        }
50    }
51
52    pub fn to_bytes(&self) -> [u8; Self::SIZE] {
53        let mut out = [0u8; Self::SIZE];
54        out[0..4].copy_from_slice(&self.magic);
55        out[4..6].copy_from_slice(&self.version.to_le_bytes());
56        out[6..8].copy_from_slice(&self._reserved);
57        out[8..16].copy_from_slice(&self.body_len.to_le_bytes());
58        out[16..24].copy_from_slice(&self.body_sha256_lo.to_le_bytes());
59        out[24..32].copy_from_slice(&self._pad);
60        out
61    }
62
63    pub fn from_bytes(buf: &[u8]) -> Option<Self> {
64        if buf.len() < Self::SIZE {
65            return None;
66        }
67        let mut magic = [0u8; 4];
68        magic.copy_from_slice(&buf[0..4]);
69        Some(Self {
70            magic,
71            version: u16::from_le_bytes([buf[4], buf[5]]),
72            _reserved: [buf[6], buf[7]],
73            body_len: u64::from_le_bytes(buf[8..16].try_into().unwrap()),
74            body_sha256_lo: u64::from_le_bytes(buf[16..24].try_into().unwrap()),
75            _pad: [0; 8],
76        })
77    }
78}
79
80/// One entry in the sorted lookup index.
81#[derive(Archive, Serialize, Deserialize, Debug, Clone, Copy)]
82#[archive(check_bytes)]
83pub struct IndexEntry {
84    pub group: u16,
85    pub element: u16,
86    /// FNV-1a 32-bit hash of the canonical (uppercase, single-space) private
87    /// creator string. `0` means "no private creator" (public tag).
88    pub creator_hash: u32,
89    /// Index into `records`.
90    pub record_idx: u32,
91}
92
93/// One fully resolved tag record.
94#[derive(Archive, Serialize, Deserialize, Debug, Clone)]
95#[archive(check_bytes)]
96pub struct TagRecord {
97    pub group: u16,
98    pub element: u16,
99    /// Whether `element` is the low byte of a private block offset (PS3.5 ยง7.8.1).
100    pub element_is_block_offset: bool,
101    pub retired: bool,
102    pub vr: VrCode,
103    pub vm_min: u8,
104    pub vm_max: u8, // 0xFF == 'n' (unbounded)
105    pub keyword_off: u32,
106    pub keyword_len: u16,
107    pub name_off: u32,
108    pub name_len: u16,
109    pub creator_off: u32, // 0 for public tags
110    pub creator_len: u16,
111    pub description_off: u32,
112    pub description_len: u32,
113    /// Pipe-delimited list of source PDF filenames (with `#pN` page anchors).
114    /// Points into the shared string pool. Empty for public (PS3.6) tags.
115    pub sources_off: u32,
116    pub sources_len: u32,
117}
118
119/// DICOM value representation, 2-byte ASCII packed into one byte via a
120/// fixed enum so records stay small.
121#[derive(Archive, Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
122#[archive(check_bytes)]
123#[repr(u8)]
124pub enum VrCode {
125    AE,
126    AS,
127    AT,
128    CS,
129    DA,
130    DS,
131    DT,
132    FL,
133    FD,
134    IS,
135    LO,
136    LT,
137    OB,
138    OD,
139    OF,
140    OL,
141    OV,
142    OW,
143    PN,
144    SH,
145    SL,
146    SQ,
147    SS,
148    ST,
149    SV,
150    TM,
151    UC,
152    UI,
153    UL,
154    UN,
155    UR,
156    US,
157    UT,
158    UV,
159    /// For rows where the VR couldn't be determined.
160    Unknown = 0xFF,
161}
162
163impl VrCode {
164    pub fn from_str2(s: &str) -> Self {
165        match s {
166            "AE" => Self::AE,
167            "AS" => Self::AS,
168            "AT" => Self::AT,
169            "CS" => Self::CS,
170            "DA" => Self::DA,
171            "DS" => Self::DS,
172            "DT" => Self::DT,
173            "FL" => Self::FL,
174            "FD" => Self::FD,
175            "IS" => Self::IS,
176            "LO" => Self::LO,
177            "LT" => Self::LT,
178            "OB" => Self::OB,
179            "OD" => Self::OD,
180            "OF" => Self::OF,
181            "OL" => Self::OL,
182            "OV" => Self::OV,
183            "OW" => Self::OW,
184            "PN" => Self::PN,
185            "SH" => Self::SH,
186            "SL" => Self::SL,
187            "SQ" => Self::SQ,
188            "SS" => Self::SS,
189            "ST" => Self::ST,
190            "SV" => Self::SV,
191            "TM" => Self::TM,
192            "UC" => Self::UC,
193            "UI" => Self::UI,
194            "UL" => Self::UL,
195            "UN" => Self::UN,
196            "UR" => Self::UR,
197            "US" => Self::US,
198            "UT" => Self::UT,
199            "UV" => Self::UV,
200            _ => Self::Unknown,
201        }
202    }
203
204    pub fn as_str(&self) -> &'static str {
205        match self {
206            Self::AE => "AE",
207            Self::AS => "AS",
208            Self::AT => "AT",
209            Self::CS => "CS",
210            Self::DA => "DA",
211            Self::DS => "DS",
212            Self::DT => "DT",
213            Self::FL => "FL",
214            Self::FD => "FD",
215            Self::IS => "IS",
216            Self::LO => "LO",
217            Self::LT => "LT",
218            Self::OB => "OB",
219            Self::OD => "OD",
220            Self::OF => "OF",
221            Self::OL => "OL",
222            Self::OV => "OV",
223            Self::OW => "OW",
224            Self::PN => "PN",
225            Self::SH => "SH",
226            Self::SL => "SL",
227            Self::SQ => "SQ",
228            Self::SS => "SS",
229            Self::ST => "ST",
230            Self::SV => "SV",
231            Self::TM => "TM",
232            Self::UC => "UC",
233            Self::UI => "UI",
234            Self::UL => "UL",
235            Self::UN => "UN",
236            Self::UR => "UR",
237            Self::US => "US",
238            Self::UT => "UT",
239            Self::UV => "UV",
240            Self::Unknown => "??",
241        }
242    }
243}
244
245/// Root archived object in the `.dmap` file body.
246#[derive(Archive, Serialize, Deserialize, Debug)]
247#[archive(check_bytes)]
248pub struct Dictionary {
249    pub index: Vec<IndexEntry>,
250    pub records: Vec<TagRecord>,
251    pub strings: Vec<u8>,
252}
253
254/// FNV-1a 32-bit. Used for creator-hash in the index key.
255pub fn fnv1a32(bytes: &[u8]) -> u32 {
256    let mut h: u32 = 0x811c_9dc5;
257    for &b in bytes {
258        h ^= b as u32;
259        h = h.wrapping_mul(0x0100_0193);
260    }
261    h
262}
263
264/// Canonicalize a private creator string the same way the Python normalize
265/// stage does: uppercase, collapse internal whitespace.
266pub fn canonicalize_creator(s: &str) -> String {
267    s.split_whitespace()
268        .collect::<Vec<_>>()
269        .join(" ")
270        .to_uppercase()
271}
272
273/// Compute the index key hash for a creator (0 for public tags).
274pub fn creator_hash(s: Option<&str>) -> u32 {
275    match s {
276        None => 0,
277        Some(c) => fnv1a32(canonicalize_creator(c).as_bytes()),
278    }
279}