Skip to main content

dicom_map/
lookup.rs

1//! Reader: mmap a `.dmap` file and perform O(log n) lookups.
2
3use std::fs::File;
4use std::path::Path;
5
6use memmap2::Mmap;
7use rkyv::{archived_root, check_archived_root};
8
9use crate::schema::{
10    creator_hash as compute_creator_hash, ArchivedDictionary, ArchivedTagRecord, ArchivedVrCode,
11    Dictionary, FileHeader, MAGIC, VERSION,
12};
13
14#[derive(Debug, thiserror::Error)]
15pub enum DmapError {
16    #[error("io error: {0}")]
17    Io(#[from] std::io::Error),
18    #[error("file too small ({0} bytes) to contain a header")]
19    TooSmall(usize),
20    #[error("bad magic bytes {got:?}")]
21    BadMagic { got: [u8; 4] },
22    #[error("unsupported version {got}")]
23    UnsupportedVersion { got: u16 },
24    #[error("archive integrity check failed: {0}")]
25    BadArchive(String),
26}
27
28pub struct DmapDict {
29    // When loaded from disk we hold an mmap. When constructed from a byte
30    // slice (e.g. `include_bytes!`), the slice itself is the backing store.
31    backing: Backing,
32    body_off: usize,
33}
34
35enum Backing {
36    Mmap { _file: File, mmap: Mmap },
37    Static(&'static [u8]),
38}
39
40impl Backing {
41    fn bytes(&self) -> &[u8] {
42        match self {
43            Backing::Mmap { mmap, .. } => &mmap[..],
44            Backing::Static(b) => b,
45        }
46    }
47}
48
49impl DmapDict {
50    pub fn open(path: impl AsRef<Path>) -> Result<Self, DmapError> {
51        let file = File::open(path)?;
52        // SAFETY: file is held for the life of the mapping.
53        let mmap = unsafe { Mmap::map(&file)? };
54
55        if mmap.len() < FileHeader::SIZE {
56            return Err(DmapError::TooSmall(mmap.len()));
57        }
58        let hdr = FileHeader::from_bytes(&mmap[..FileHeader::SIZE])
59            .ok_or(DmapError::TooSmall(mmap.len()))?;
60        if &hdr.magic != MAGIC {
61            return Err(DmapError::BadMagic { got: hdr.magic });
62        }
63        if hdr.version != VERSION {
64            return Err(DmapError::UnsupportedVersion { got: hdr.version });
65        }
66
67        let body = &mmap[FileHeader::SIZE..];
68        check_archived_root::<Dictionary>(body)
69            .map_err(|e| DmapError::BadArchive(e.to_string()))?;
70
71        Ok(Self {
72            backing: Backing::Mmap { _file: file, mmap },
73            body_off: FileHeader::SIZE,
74        })
75    }
76
77    /// Construct from a `'static` byte slice, typically `include_bytes!`.
78    pub fn from_static(bytes: &'static [u8]) -> Result<Self, DmapError> {
79        if bytes.len() < FileHeader::SIZE {
80            return Err(DmapError::TooSmall(bytes.len()));
81        }
82        let hdr = FileHeader::from_bytes(&bytes[..FileHeader::SIZE])
83            .ok_or(DmapError::TooSmall(bytes.len()))?;
84        if &hdr.magic != MAGIC {
85            return Err(DmapError::BadMagic { got: hdr.magic });
86        }
87        if hdr.version != VERSION {
88            return Err(DmapError::UnsupportedVersion { got: hdr.version });
89        }
90        let body = &bytes[FileHeader::SIZE..];
91        check_archived_root::<Dictionary>(body)
92            .map_err(|e| DmapError::BadArchive(e.to_string()))?;
93        Ok(Self {
94            backing: Backing::Static(bytes),
95            body_off: FileHeader::SIZE,
96        })
97    }
98
99    fn archived(&self) -> &ArchivedDictionary {
100        // SAFETY: validated in `open()` / `from_static()`.
101        unsafe { archived_root::<Dictionary>(&self.backing.bytes()[self.body_off..]) }
102    }
103
104    pub fn len(&self) -> usize {
105        self.archived().index.len()
106    }
107
108    pub fn is_empty(&self) -> bool {
109        self.len() == 0
110    }
111
112    pub fn lookup(&self, group: u16, element: u16, creator: Option<&str>) -> Option<TagView<'_>> {
113        let ch = compute_creator_hash(creator);
114        let dict = self.archived();
115        let index = &dict.index;
116
117        let key = (group, element, ch);
118        let idx = index
119            .binary_search_by(|e| (e.group, e.element, e.creator_hash).cmp(&key))
120            .ok()?;
121
122        let rec_idx = index[idx].record_idx as usize;
123        let rec = &dict.records[rec_idx];
124        Some(TagView {
125            rec,
126            strings: dict.strings.as_slice(),
127        })
128    }
129}
130
131pub struct TagView<'a> {
132    rec: &'a ArchivedTagRecord,
133    strings: &'a [u8],
134}
135
136impl<'a> TagView<'a> {
137    fn slice(&self, off: u32, len: u32) -> &'a str {
138        let s = off as usize;
139        let e = s + len as usize;
140        std::str::from_utf8(&self.strings[s..e]).unwrap_or("")
141    }
142
143    pub fn group(&self) -> u16 {
144        self.rec.group
145    }
146
147    pub fn element(&self) -> u16 {
148        self.rec.element
149    }
150
151    pub fn keyword(&self) -> &'a str {
152        self.slice(self.rec.keyword_off, self.rec.keyword_len as u32)
153    }
154
155    pub fn name(&self) -> &'a str {
156        self.slice(self.rec.name_off, self.rec.name_len as u32)
157    }
158
159    pub fn creator(&self) -> &'a str {
160        let len = self.rec.creator_len;
161        if len == 0 {
162            return "";
163        }
164        self.slice(self.rec.creator_off, len as u32)
165    }
166
167    pub fn description(&self) -> &'a str {
168        self.slice(self.rec.description_off, self.rec.description_len)
169    }
170
171    /// Pipe-delimited list of source PDF filenames that document this tag.
172    /// Each entry is a filename (optionally with a `#pN` page anchor) from
173    /// `data/sources.json`. Empty for public PS3.6 tags.
174    pub fn sources_raw(&self) -> &'a str {
175        self.slice(self.rec.sources_off, self.rec.sources_len)
176    }
177
178    /// Source PDF filenames split on `|`. Empty iterator for public PS3.6 tags.
179    pub fn sources(&self) -> impl Iterator<Item = &'a str> {
180        self.sources_raw().split('|').filter(|s| !s.is_empty())
181    }
182
183    pub fn vr(&self) -> &'static str {
184        archived_vr_as_str(&self.rec.vr)
185    }
186
187    pub fn retired(&self) -> bool {
188        self.rec.retired
189    }
190
191    pub fn is_block_offset(&self) -> bool {
192        self.rec.element_is_block_offset
193    }
194}
195
196fn archived_vr_as_str(v: &ArchivedVrCode) -> &'static str {
197    match v {
198        ArchivedVrCode::AE => "AE",
199        ArchivedVrCode::AS => "AS",
200        ArchivedVrCode::AT => "AT",
201        ArchivedVrCode::CS => "CS",
202        ArchivedVrCode::DA => "DA",
203        ArchivedVrCode::DS => "DS",
204        ArchivedVrCode::DT => "DT",
205        ArchivedVrCode::FL => "FL",
206        ArchivedVrCode::FD => "FD",
207        ArchivedVrCode::IS => "IS",
208        ArchivedVrCode::LO => "LO",
209        ArchivedVrCode::LT => "LT",
210        ArchivedVrCode::OB => "OB",
211        ArchivedVrCode::OD => "OD",
212        ArchivedVrCode::OF => "OF",
213        ArchivedVrCode::OL => "OL",
214        ArchivedVrCode::OV => "OV",
215        ArchivedVrCode::OW => "OW",
216        ArchivedVrCode::PN => "PN",
217        ArchivedVrCode::SH => "SH",
218        ArchivedVrCode::SL => "SL",
219        ArchivedVrCode::SQ => "SQ",
220        ArchivedVrCode::SS => "SS",
221        ArchivedVrCode::ST => "ST",
222        ArchivedVrCode::SV => "SV",
223        ArchivedVrCode::TM => "TM",
224        ArchivedVrCode::UC => "UC",
225        ArchivedVrCode::UI => "UI",
226        ArchivedVrCode::UL => "UL",
227        ArchivedVrCode::UN => "UN",
228        ArchivedVrCode::UR => "UR",
229        ArchivedVrCode::US => "US",
230        ArchivedVrCode::UT => "UT",
231        ArchivedVrCode::UV => "UV",
232        ArchivedVrCode::Unknown => "??",
233    }
234}