unifont_bitmap/lib.rs
1//! This crate incorporates the data for [GNU Unifont][1] in compressed binary
2//! form. It concerns itself with glyph lookup and caching only. It does not
3//! provide any rendering (like [`sdl2-unifont`][2] does) or even pixel lookup
4//! (like [`unifont`][3] does). It is nothing more than a compression scheme
5//! for the raw binary data represented in the `.hex` files that comprise
6//! GNU Unifont's "source code".
7//!
8//! [1]: http://unifoundry.com/unifont/index.html
9//! [2]: https://crates.io/crates/sdl2-unifont
10//! [3]: https://crates.io/crates/unifont
11//!
12//! # Background
13//!
14//! GNU Unifont is a bitmap font covering every character in Unicode. Narrow
15//! characters are 8x16 pixels, and wide characters are 16x16 pixels. GNU
16//! Unifont can be used to render any text that can be represented entirely
17//! without combining characters, ligatures, or other frippery. For example, it
18//! can render "ÿ", since that is encoded in Unicode as a single character:
19//!
20//! 1. `U+00FF LATIN SMALL LETTER Y WITH DIAERESIS` ("ÿ")
21//!
22//! But it could *not* render "ÿ̰́", which is a sequence of:
23//!
24//! 1. `U+0079 LATIN SMALL LETTER Y` ("y")
25//! 2. `U+0308 COMBINING DIAERESIS` ("◌̈")
26//! 3. `U+0301 COMBINING ACUTE ACCENT` ("◌́")
27//! 4. `U+0330 COMBINING TILDE BELOW` ("◌̰")
28//!
29//! In addition to basic concerns about putting pixels on the screen, any text
30//! rendering system may also have to account for [bidirectional text][4] (and
31//! right-to-left scripts in general) and take special care when [breaking
32//! lines of text][5]. Not to mention "invisible characters". All of these
33//! concerns are outside the scope of this crate, which, again, has the sole
34//! and simple purpose of retrieving the individual GNU Unifont glyph that
35//! represents a given Unicode code point.
36//!
37//! [4]: https://unicode.org/reports/tr9/
38//! [5]: https://unicode.org/reports/tr14/
39//!
40//! The font data is embedded in your executable, in compressed form. The whole
41//! thing is less than a megabyte in size when compressed, and if you somehow
42//! end up using every page, it adds about 2.3 megabytes of runtime memory
43//! overhead. This is a small price to pay for a font that covers every Unicode
44//! character.
45//!
46//! # Usage
47//!
48//! Single-threaded usage is simple, via the [`Unifont`](struct.Unifont.html)
49//! struct:
50//!
51//! ```rust
52//! use unifont_bitmap::Unifont;
53//! let mut unifont = Unifont::open();
54//! // Get a bitmap, loading its page if necessary. Requires mut.
55//! let my_bitmap = unifont.load_bitmap('井' as u32);
56//! println!("{} pixels wide.", if my_bitmap.is_wide() { 16 } else { 8 });
57//! println!("Bytes: {:?}", my_bitmap.get_bytes());
58//! // Get a bitmap, iff its page is already loaded. Does not require mut.
59//! let my_bitmap = unifont.get_bitmap('井' as u32).unwrap();
60//! println!("{} pixels wide.", if my_bitmap.is_wide() { 16 } else { 8 });
61//! println!("Bytes: {:?}", my_bitmap.get_bytes());
62//! ```
63//!
64//! What you do from here is complicated, and outside this crate's pay grade.
65//!
66//! # Legalese
67//!
68//! The `unifont-bitmap` crate is copyright 2021, Solra Bizna, and licensed
69//! under either of:
70//!
71//! * Apache License, Version 2.0
72//! ([LICENSE-APACHE](LICENSE-APACHE) or
73//! <http://www.apache.org/licenses/LICENSE-2.0>)
74//! * MIT license
75//! ([LICENSE-MIT](LICENSE-MIT) or <http://opensource.org/licenses/MIT>)
76//!
77//! at your option.
78//!
79//! As for GNU Unifont:
80//!
81//! > Copyright (C) 1998-2021 Roman Czyborra, Paul Hardy, Qianqian Fang,
82//! > Andrew Miller, Johnnie Weaver, David Corbett, Nils Moskopp, Rebecca
83//! > Bettencourt, et al. License: SIL Open Font License version 1.1 and
84//! > GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
85//! > with the GNU Font Embedding Exception.
86//!
87//! I believe that this license is compatible with `unifont-bitmap`'s use of
88//! the font. If the font ends up statically linked into a non-GPL-compatible
89//! application, e.g. for its own use in UI elements, my interpretation of the
90//! license is that this is equivalent to embedding it into a document; thus
91//! explicitly permitted by the Font Embedding Exception. If one of the
92//! copyright holders and/or the Free Software Foundation disagrees with this
93//! interpretation, I'd be open to discuss the issue.
94//!
95//! ## Contribution
96//!
97//! Unless you explicitly state otherwise, any contribution intentionally
98//! submitted for inclusion in the `unifont-bitmap` crate by you, as defined
99//! in the Apache-2.0 license, shall be dual licensed as above, without any
100//! additional terms or conditions.
101
102use byteorder::{ReadBytesExt, BigEndian};
103
104const UNIFONT_DATA: &[u8] = include_bytes!("unifont.dat");
105
106/// The largest codepoint value that is, or ever will be, legal in Unicode.
107pub const MAX_UNICODE_CODEPOINT: u32 = 0x10FFFF;
108/// The number of legal codepoint values that exist in Unicode.
109pub const NUM_UNICODE_CODEPOINTS: u32 = MAX_UNICODE_CODEPOINT + 1;
110/// The largest number of a 256-codepoint "page" that exists in Unicode.
111pub const MAX_UNICODE_PAGE: u32 = NUM_UNICODE_PAGES-1;
112/// The number of 256-codepoint "pages" that exist in Unicode.
113pub const NUM_UNICODE_PAGES: u32 = NUM_UNICODE_CODEPOINTS >> 8;
114
115/// A single 8x16 or 16x16 bitmap, corresponding to a single displayed glyph.
116/// See the module documentation for a cryptic warning about combining
117/// characters, invisible characters, etc.
118#[derive(PartialEq,Eq)]
119pub struct Bitmap<'a> {
120 bytes: &'a [u8],
121}
122
123impl<'a> core::fmt::Debug for Bitmap<'a> {
124 fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
125 fmt.write_str("unifont bitmap {")?;
126 core::fmt::Debug::fmt(self.bytes, fmt)?;
127 fmt.write_str("}")
128 }
129}
130
131impl<'a> Bitmap<'a> {
132 /// Returns the bytes that make up the given bitmap. Each byte contains 8
133 /// pixels. The highest order bit of the byte is the leftmost pixel, the
134 /// next highest order bit is the next pixel, and so on. If the glyph is
135 /// wide (see `is_wide`) then there are two bytes per row, otherwise there
136 /// is one byte per row.
137 pub fn get_bytes(&self) -> &'a [u8] { self.bytes }
138 /// Returns `true` if the bitmap is wide (16x16), `false` if it is narrow
139 /// (8x16).
140 pub fn is_wide(&self) -> bool {
141 match self.bytes.len() {
142 16 => false,
143 32 => true,
144 _ => unreachable!(),
145 }
146 }
147 /// Returns the dimensions of the bitmap, width then height.
148 /// Always returns (8,16) or (16,16).
149 pub fn get_dimensions<T: From<u8>>(&self) -> (T, T) {
150 match self.is_wide() {
151 false => (8.into(), 16.into()),
152 true => (16.into(), 16.into()),
153 }
154 }
155}
156
157#[derive(Default)]
158struct PageInfo {
159 uncompressed_size: u32,
160 compressed_offset: u32,
161 raw_data: Option<Vec<u8>>,
162}
163
164/// A data structure for caching Unifont character bitmaps. Decompresses the
165/// compressed font data in the executable on demand, and caches it in blocks
166/// ("pages") of 256 code points each.
167pub struct Unifont {
168 pages: [PageInfo; NUM_UNICODE_PAGES as usize],
169}
170
171impl Unifont {
172 /// Loads the Unifont bitmap corresponding to the given Unicode codepoint
173 /// (if necessary), and returns it.
174 ///
175 /// Will return the bitmap for U+FFFD REPLACEMENT CHAR (�) if Unifont does
176 /// not include a glyph for this bitmap.
177 ///
178 /// **PANICS** if you pass a `codepoint` larger than
179 /// `MAX_UNICODE_CODEPOINT`.
180 pub fn load_bitmap(&mut self, codepoint: u32) -> Bitmap {
181 assert!(codepoint <= MAX_UNICODE_CODEPOINT);
182 let page = codepoint >> 8;
183 self.load_page(page);
184 let ret = self.get_bitmap(codepoint);
185 // Justification for this unsafe block:
186 //
187 // Once loaded, the decompressed data for a given page will never be
188 // freed or moved until (and unless) this Unifont instance is dropped.
189 // Therefore, the implied lifetime constraint is met.
190 //
191 // A previous iteration of this API had a "purge_page" call, but that
192 // broke this safety assumption and was therefore removed.
193 if let Some(x) = ret { return unsafe { std::mem::transmute(x) } }
194 drop(ret);
195 if codepoint == 0xFFFD {
196 panic!("U+FFFD should have been loaded but wasn't!");
197 }
198 else {
199 // this will happen if U+FFFD was needed but not yet loaded
200 self.load_bitmap(0xFFFD)
201 }
202 }
203 /// Gets the Unifont bitmap corresponding to the given Unicode codepoint,
204 /// if and only if it is already loaded.
205 ///
206 /// Will return the bitmap for `U+FFFD REPLACEMENT CHAR` (�) if Unifont
207 /// does not include a glyph for this bitmap, iff the respective page of
208 /// the font is already loaded.
209 ///
210 /// **PANICS** if you pass a `codepoint` larger than
211 /// `MAX_UNICODE_CODEPOINT`.
212 pub fn get_bitmap(&self, codepoint: u32) -> Option<Bitmap> {
213 assert!(codepoint <= MAX_UNICODE_CODEPOINT);
214 let page = codepoint >> 8;
215 let ch = codepoint & 255;
216 let raw_data = match self.pages[page as usize].raw_data.as_ref() {
217 None => return None,
218 Some(x) => &x[..],
219 };
220 let offset_offset = (ch as usize) * 2;
221 let char_offset =
222 u16::from_ne_bytes(raw_data[offset_offset .. offset_offset + 2]
223 .try_into().unwrap());
224 if char_offset == 0 {
225 if codepoint == 0xFFFD {
226 panic!("U+FFFD should have been present but wasn't!");
227 }
228 else {
229 self.get_bitmap(0xFFFD)
230 }
231 }
232 else {
233 let is_wide = (char_offset & 1) != 0;
234 let real_offset = (char_offset & !1) as usize;
235 let region = &raw_data[real_offset .. real_offset +
236 if is_wide { 32 } else { 16 }];
237 Some(Bitmap { bytes: region })
238 }
239 }
240 /// Loads a given page, if it's not loaded already. (Since loading is
241 /// usually done transparently, this isn't usually needed.)
242 pub fn load_page(&mut self, page: u32) {
243 assert!(page <= MAX_UNICODE_PAGE);
244 let target_page = &mut self.pages[page as usize];
245 if target_page.raw_data.is_none() {
246 if target_page.uncompressed_size == 0 {
247 target_page.raw_data = Some(vec![0u8; 512]);
248 }
249 else {
250 let mut inflater = flate2::Decompress::new(true);
251 let mut buf = vec![0; target_page.uncompressed_size as usize];
252 inflater.decompress(&UNIFONT_DATA[target_page.compressed_offset as usize ..], &mut buf[..], flate2::FlushDecompress::Finish).expect("The Unifont bitmap data in this application appears to be corrupted!");
253 let mut running_offset = 512u16;
254 for n in 0 .. 256 {
255 let i = (n * 2) as usize;
256 let in_offset = u16::from_be_bytes(buf[i..i+2].try_into().unwrap());
257 let out_offset;
258 match in_offset {
259 0x0000 => {
260 // narrow char,
261 out_offset = running_offset;
262 running_offset += 16;
263 },
264 0x0001 => {
265 // wide char
266 out_offset = running_offset | 1;
267 running_offset += 32;
268 },
269 0x0101 => {
270 // invalid char
271 out_offset = 0;
272 },
273 _ => {
274 panic!("The Unifont bitmap data in this application appears to be corrupted!");
275 },
276 }
277 buf[i..i+2].copy_from_slice(&out_offset.to_ne_bytes());
278 }
279 target_page.raw_data = Some(buf)
280 }
281 }
282 }
283 /// Creates a new instance of this class, with no glyphs cached yet.
284 ///
285 /// The font data is embedded in your executable, and does not need to be
286 /// provided any other way.
287 pub fn open() -> Unifont {
288 // oh boy, this pain point hasn't been resolved yet
289 let mut pages: [std::mem::MaybeUninit<PageInfo>;
290 NUM_UNICODE_PAGES as usize]
291 = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
292 for el in &mut pages[..] {
293 unsafe { std::ptr::write(el.as_mut_ptr(), PageInfo {
294 compressed_offset: 0, uncompressed_size: 0, raw_data: None
295 }) }
296 }
297 let mut ret = Unifont { pages: unsafe { std::mem::transmute(pages) } };
298 ret.populate_page_infos();
299 ret
300 }
301 fn populate_page_infos(&mut self) {
302 let mut input = UNIFONT_DATA;
303 let start_offset: u32
304 = input.read_u32::<BigEndian>().unwrap() + 4;
305 let mut running_offset = start_offset;
306 let mut buf = [0u8; NUM_UNICODE_PAGES as usize * 4];
307 let mut fish = flate2::Decompress::new(true);
308 fish.decompress(&UNIFONT_DATA[4..(running_offset as usize)],
309 &mut buf, flate2::FlushDecompress::Finish).unwrap();
310 let mut i = &buf[..];
311 for el in &mut self.pages[..] {
312 let uncompressed_size = i.read_u16::<BigEndian>().unwrap();
313 let compressed_size = i.read_u16::<BigEndian>().unwrap();
314 el.uncompressed_size = uncompressed_size as u32;
315 if el.uncompressed_size > 0 {
316 el.compressed_offset = running_offset;
317 running_offset += compressed_size as u32;
318 }
319 else {
320 el.compressed_offset = 0;
321 }
322 }
323 }
324}
325
326#[cfg(test)]
327mod test {
328 use super::*;
329 #[test]
330 fn bogus_page() {
331 let mut unifont = Unifont::open();
332 let fffd = unifont.load_bitmap(0xFFFD);
333 drop(fffd);
334 let bad = unifont.load_bitmap(0x104560);
335 drop(bad);
336 let fffd = unifont.get_bitmap(0xFFFD);
337 let bad = unifont.get_bitmap(0x104560);
338 assert_eq!(fffd, bad);
339 }
340}