1pub mod cff;
2pub mod cjk;
3pub mod cmap;
4mod encoding;
5pub mod opentype;
6pub mod recovery;
7mod standard14;
8pub mod subset;
9pub mod type3;
10
11pub use cmap::ToUnicodeCMap;
12pub use encoding::{Encoding, decode_text};
13pub use standard14::{is_standard14, standard14_widths};
14
15use crate::object::{IndirectRef, PdfDict, PdfObject};
16
17#[derive(Debug, Clone)]
19pub struct FontInfo {
20 pub base_font: Vec<u8>,
22 pub subtype: Vec<u8>,
24 pub encoding: Encoding,
26 pub widths: FontWidths,
28 pub to_unicode: Option<Vec<u8>>,
30 pub is_standard14: bool,
32 pub descriptor: Option<FontDescriptor>,
34}
35
36#[derive(Debug, Clone)]
38pub struct FontDescriptor {
39 pub font_name: Vec<u8>,
41 pub font_family: Option<Vec<u8>>,
43 pub flags: u32,
45 pub font_b_box: Option<[f64; 4]>,
47 pub italic_angle: f64,
49 pub ascent: f64,
51 pub descent: f64,
53 pub cap_height: Option<f64>,
55 pub x_height: Option<f64>,
57 pub stem_v: f64,
59 pub stem_h: Option<f64>,
61 pub avg_width: Option<f64>,
63 pub max_width: Option<f64>,
65 pub missing_width: Option<f64>,
67 pub leading: Option<f64>,
69 pub font_file_ref: Option<IndirectRef>,
71 pub font_file2_ref: Option<IndirectRef>,
73 pub font_file3_ref: Option<IndirectRef>,
75}
76
77impl FontDescriptor {
78 pub const FIXED_PITCH: u32 = 1;
81 pub const SERIF: u32 = 1 << 1;
83 pub const SYMBOLIC: u32 = 1 << 2;
85 pub const SCRIPT: u32 = 1 << 3;
87 pub const NONSYMBOLIC: u32 = 1 << 5;
89 pub const ITALIC: u32 = 1 << 6;
91 pub const ALL_CAP: u32 = 1 << 16;
93 pub const SMALL_CAP: u32 = 1 << 17;
95 pub const FORCE_BOLD: u32 = 1 << 18;
97
98 pub fn has_flag(&self, flag: u32) -> bool {
100 self.flags & flag != 0
101 }
102
103 pub fn is_fixed_pitch(&self) -> bool {
105 self.has_flag(Self::FIXED_PITCH)
106 }
107
108 pub fn is_symbolic(&self) -> bool {
110 self.has_flag(Self::SYMBOLIC)
111 }
112
113 pub fn is_italic(&self) -> bool {
115 self.has_flag(Self::ITALIC)
116 }
117}
118
119#[derive(Debug, Clone)]
121pub enum FontWidths {
122 Simple {
124 first_char: u32,
125 widths: Vec<f64>,
126 default_width: f64,
127 },
128 CID {
130 default_width: f64,
131 w_entries: Vec<CIDWidthEntry>,
132 },
133 None { default_width: f64 },
135}
136
137#[derive(Debug, Clone)]
138pub enum CIDWidthEntry {
139 Range { first: u32, last: u32, width: f64 },
141 List { first: u32, widths: Vec<f64> },
143}
144
145impl FontWidths {
146 pub fn get_width(&self, char_code: u32) -> f64 {
148 match self {
149 Self::Simple {
150 first_char,
151 widths,
152 default_width,
153 } => {
154 if char_code >= *first_char {
155 let idx = (char_code - first_char) as usize;
156 widths.get(idx).copied().unwrap_or(*default_width)
157 } else {
158 *default_width
159 }
160 }
161 Self::CID {
162 default_width,
163 w_entries,
164 } => {
165 for entry in w_entries {
166 match entry {
167 CIDWidthEntry::Range { first, last, width } => {
168 if char_code >= *first && char_code <= *last {
169 return *width;
170 }
171 }
172 CIDWidthEntry::List { first, widths } => {
173 if char_code >= *first {
174 let idx = (char_code - first) as usize;
175 if let Some(w) = widths.get(idx) {
176 return *w;
177 }
178 }
179 }
180 }
181 }
182 *default_width
183 }
184 Self::None { default_width } => *default_width,
185 }
186 }
187}
188
189pub fn parse_font_info(dict: &PdfDict) -> FontInfo {
191 let base_font = dict
192 .get(b"BaseFont")
193 .and_then(|o| o.as_name())
194 .unwrap_or(b"Unknown")
195 .to_vec();
196
197 let subtype = dict
198 .get(b"Subtype")
199 .and_then(|o| o.as_name())
200 .unwrap_or(b"Type1")
201 .to_vec();
202
203 let is_std14 = is_standard14(&base_font);
204
205 let encoding = parse_encoding(dict);
206 let widths = parse_widths(dict, &base_font, is_std14);
207
208 let descriptor = dict
209 .get_dict(b"FontDescriptor")
210 .and_then(parse_font_descriptor);
211
212 FontInfo {
213 base_font,
214 subtype,
215 encoding,
216 widths,
217 to_unicode: None, is_standard14: is_std14,
219 descriptor,
220 }
221}
222
223pub fn parse_font_descriptor(dict: &PdfDict) -> Option<FontDescriptor> {
227 let font_name = dict
228 .get(b"FontName")
229 .and_then(|o| o.as_name())
230 .map(|n| n.to_vec())?;
231
232 let font_family = dict
233 .get(b"FontFamily")
234 .and_then(|o| match o {
235 PdfObject::String(s) => Some(s.clone()),
236 _ => o.as_name().map(|n| n.to_vec()),
237 });
238
239 let flags = dict.get_i64(b"Flags").unwrap_or(0) as u32;
240 let italic_angle = dict.get_f64(b"ItalicAngle").unwrap_or(0.0);
241 let ascent = dict.get_f64(b"Ascent").unwrap_or(0.0);
242 let descent = dict.get_f64(b"Descent").unwrap_or(0.0);
243 let stem_v = dict.get_f64(b"StemV").unwrap_or(0.0);
244
245 let font_b_box = dict.get_array(b"FontBBox").and_then(|arr| {
246 if arr.len() == 4 {
247 Some([
248 arr[0].as_f64().unwrap_or(0.0),
249 arr[1].as_f64().unwrap_or(0.0),
250 arr[2].as_f64().unwrap_or(0.0),
251 arr[3].as_f64().unwrap_or(0.0),
252 ])
253 } else {
254 None
255 }
256 });
257
258 let cap_height = dict.get_f64(b"CapHeight");
259 let x_height = dict.get_f64(b"XHeight");
260 let stem_h = dict.get_f64(b"StemH");
261 let avg_width = dict.get_f64(b"AvgWidth");
262 let max_width = dict.get_f64(b"MaxWidth");
263 let missing_width = dict.get_f64(b"MissingWidth");
264 let leading = dict.get_f64(b"Leading");
265
266 let font_file_ref = dict.get_ref(b"FontFile").cloned();
267 let font_file2_ref = dict.get_ref(b"FontFile2").cloned();
268 let font_file3_ref = dict.get_ref(b"FontFile3").cloned();
269
270 Some(FontDescriptor {
271 font_name,
272 font_family,
273 flags,
274 font_b_box,
275 italic_angle,
276 ascent,
277 descent,
278 cap_height,
279 x_height,
280 stem_v,
281 stem_h,
282 avg_width,
283 max_width,
284 missing_width,
285 leading,
286 font_file_ref,
287 font_file2_ref,
288 font_file3_ref,
289 })
290}
291
292fn parse_encoding(dict: &PdfDict) -> Encoding {
293 match dict.get(b"Encoding") {
294 Some(PdfObject::Name(name)) => Encoding::from_name(name),
295 _ => Encoding::StandardEncoding,
297 }
298}
299
300fn parse_widths(dict: &PdfDict, base_font: &[u8], is_std14: bool) -> FontWidths {
301 if let (Some(first_char), Some(widths_arr)) =
303 (dict.get_i64(b"FirstChar"), dict.get_array(b"Widths"))
304 {
305 let widths: Vec<f64> = widths_arr
306 .iter()
307 .map(|o| o.as_f64().unwrap_or(0.0))
308 .collect();
309 return FontWidths::Simple {
310 first_char: first_char as u32,
311 widths,
312 default_width: if is_std14 { 600.0 } else { 1000.0 },
313 };
314 }
315
316 if is_std14 {
318 let widths = standard14_widths(base_font);
319 if !widths.is_empty() {
320 return FontWidths::Simple {
321 first_char: 0,
322 widths,
323 default_width: 600.0,
324 };
325 }
326 }
327
328 FontWidths::None {
329 default_width: 1000.0,
330 }
331}
332
333#[cfg(test)]
334mod tests {
335 use super::*;
336
337 #[test]
338 fn test_simple_widths() {
339 let widths = FontWidths::Simple {
340 first_char: 32,
341 widths: vec![250.0, 333.0, 408.0],
342 default_width: 0.0,
343 };
344 assert_eq!(widths.get_width(32), 250.0);
345 assert_eq!(widths.get_width(33), 333.0);
346 assert_eq!(widths.get_width(34), 408.0);
347 assert_eq!(widths.get_width(35), 0.0); assert_eq!(widths.get_width(0), 0.0); }
350
351 #[test]
352 fn test_cid_widths() {
353 let widths = FontWidths::CID {
354 default_width: 1000.0,
355 w_entries: vec![
356 CIDWidthEntry::Range {
357 first: 1,
358 last: 10,
359 width: 500.0,
360 },
361 CIDWidthEntry::List {
362 first: 20,
363 widths: vec![600.0, 700.0, 800.0],
364 },
365 ],
366 };
367 assert_eq!(widths.get_width(5), 500.0);
368 assert_eq!(widths.get_width(20), 600.0);
369 assert_eq!(widths.get_width(21), 700.0);
370 assert_eq!(widths.get_width(99), 1000.0); }
372
373 #[test]
374 fn test_parse_font_info_standard14() {
375 let mut dict = PdfDict::new();
376 dict.insert(b"Type".to_vec(), PdfObject::Name(b"Font".to_vec()));
377 dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Type1".to_vec()));
378 dict.insert(b"BaseFont".to_vec(), PdfObject::Name(b"Helvetica".to_vec()));
379 dict.insert(
380 b"Encoding".to_vec(),
381 PdfObject::Name(b"WinAnsiEncoding".to_vec()),
382 );
383
384 let info = parse_font_info(&dict);
385 assert_eq!(info.base_font, b"Helvetica");
386 assert!(info.is_standard14);
387 assert_eq!(info.encoding, Encoding::WinAnsiEncoding);
388 assert!(info.descriptor.is_none());
389 }
390
391 #[test]
392 fn test_parse_font_descriptor_full() {
393 let mut desc = PdfDict::new();
394 desc.insert(b"FontName".to_vec(), PdfObject::Name(b"ArialMT".to_vec()));
395 desc.insert(
396 b"FontFamily".to_vec(),
397 PdfObject::String(b"Arial".to_vec()),
398 );
399 desc.insert(b"Flags".to_vec(), PdfObject::Integer(32)); desc.insert(
401 b"FontBBox".to_vec(),
402 PdfObject::Array(vec![
403 PdfObject::Integer(-665),
404 PdfObject::Integer(-210),
405 PdfObject::Integer(2000),
406 PdfObject::Integer(728),
407 ]),
408 );
409 desc.insert(b"ItalicAngle".to_vec(), PdfObject::Integer(0));
410 desc.insert(b"Ascent".to_vec(), PdfObject::Integer(905));
411 desc.insert(b"Descent".to_vec(), PdfObject::Integer(-212));
412 desc.insert(b"CapHeight".to_vec(), PdfObject::Integer(728));
413 desc.insert(b"XHeight".to_vec(), PdfObject::Integer(517));
414 desc.insert(b"StemV".to_vec(), PdfObject::Integer(88));
415 desc.insert(b"StemH".to_vec(), PdfObject::Integer(76));
416 desc.insert(b"AvgWidth".to_vec(), PdfObject::Integer(441));
417 desc.insert(b"MaxWidth".to_vec(), PdfObject::Integer(2000));
418 desc.insert(b"MissingWidth".to_vec(), PdfObject::Integer(250));
419 desc.insert(b"Leading".to_vec(), PdfObject::Integer(33));
420 desc.insert(
421 b"FontFile2".to_vec(),
422 PdfObject::Reference(IndirectRef {
423 obj_num: 42,
424 gen_num: 0,
425 }),
426 );
427
428 let fd = parse_font_descriptor(&desc).expect("should parse");
429 assert_eq!(fd.font_name, b"ArialMT");
430 assert_eq!(fd.font_family.as_deref(), Some(b"Arial".as_slice()));
431 assert_eq!(fd.flags, 32);
432 assert!(fd.has_flag(FontDescriptor::NONSYMBOLIC));
433 assert!(!fd.has_flag(FontDescriptor::SYMBOLIC));
434 assert!(!fd.is_italic());
435 assert!(!fd.is_fixed_pitch());
436 assert!(!fd.is_symbolic());
437
438 let bbox = fd.font_b_box.expect("should have bbox");
439 assert_eq!(bbox, [-665.0, -210.0, 2000.0, 728.0]);
440
441 assert_eq!(fd.italic_angle, 0.0);
442 assert_eq!(fd.ascent, 905.0);
443 assert_eq!(fd.descent, -212.0);
444 assert_eq!(fd.cap_height, Some(728.0));
445 assert_eq!(fd.x_height, Some(517.0));
446 assert_eq!(fd.stem_v, 88.0);
447 assert_eq!(fd.stem_h, Some(76.0));
448 assert_eq!(fd.avg_width, Some(441.0));
449 assert_eq!(fd.max_width, Some(2000.0));
450 assert_eq!(fd.missing_width, Some(250.0));
451 assert_eq!(fd.leading, Some(33.0));
452
453 assert!(fd.font_file_ref.is_none());
454 let ff2 = fd.font_file2_ref.as_ref().expect("should have FontFile2");
455 assert_eq!(ff2.obj_num, 42);
456 assert_eq!(ff2.gen_num, 0);
457 assert!(fd.font_file3_ref.is_none());
458 }
459
460 #[test]
461 fn test_parse_font_descriptor_minimal() {
462 let mut desc = PdfDict::new();
463 desc.insert(b"FontName".to_vec(), PdfObject::Name(b"MyFont".to_vec()));
464
465 let fd = parse_font_descriptor(&desc).expect("should parse minimal");
466 assert_eq!(fd.font_name, b"MyFont");
467 assert_eq!(fd.flags, 0);
468 assert_eq!(fd.italic_angle, 0.0);
469 assert_eq!(fd.ascent, 0.0);
470 assert_eq!(fd.descent, 0.0);
471 assert_eq!(fd.stem_v, 0.0);
472 assert!(fd.font_family.is_none());
473 assert!(fd.font_b_box.is_none());
474 assert!(fd.cap_height.is_none());
475 assert!(fd.x_height.is_none());
476 assert!(fd.stem_h.is_none());
477 assert!(fd.avg_width.is_none());
478 assert!(fd.max_width.is_none());
479 assert!(fd.missing_width.is_none());
480 assert!(fd.leading.is_none());
481 assert!(fd.font_file_ref.is_none());
482 assert!(fd.font_file2_ref.is_none());
483 assert!(fd.font_file3_ref.is_none());
484 }
485
486 #[test]
487 fn test_parse_font_descriptor_missing_font_name() {
488 let mut desc = PdfDict::new();
489 desc.insert(b"Flags".to_vec(), PdfObject::Integer(32));
490 desc.insert(b"Ascent".to_vec(), PdfObject::Integer(800));
491
492 assert!(parse_font_descriptor(&desc).is_none());
493 }
494
495 #[test]
496 fn test_font_descriptor_flags() {
497 let fd = FontDescriptor {
498 font_name: b"TestFont".to_vec(),
499 font_family: None,
500 flags: FontDescriptor::FIXED_PITCH
501 | FontDescriptor::SERIF
502 | FontDescriptor::ITALIC
503 | FontDescriptor::FORCE_BOLD,
504 font_b_box: None,
505 italic_angle: -12.0,
506 ascent: 800.0,
507 descent: -200.0,
508 cap_height: None,
509 x_height: None,
510 stem_v: 80.0,
511 stem_h: None,
512 avg_width: None,
513 max_width: None,
514 missing_width: None,
515 leading: None,
516 font_file_ref: None,
517 font_file2_ref: None,
518 font_file3_ref: None,
519 };
520
521 assert!(fd.is_fixed_pitch());
522 assert!(fd.has_flag(FontDescriptor::SERIF));
523 assert!(fd.is_italic());
524 assert!(fd.has_flag(FontDescriptor::FORCE_BOLD));
525 assert!(!fd.is_symbolic());
526 assert!(!fd.has_flag(FontDescriptor::NONSYMBOLIC));
527 assert!(!fd.has_flag(FontDescriptor::SCRIPT));
528 assert!(!fd.has_flag(FontDescriptor::ALL_CAP));
529 assert!(!fd.has_flag(FontDescriptor::SMALL_CAP));
530 }
531
532 #[test]
533 fn test_parse_font_descriptor_bbox_wrong_length() {
534 let mut desc = PdfDict::new();
535 desc.insert(b"FontName".to_vec(), PdfObject::Name(b"Test".to_vec()));
536 desc.insert(
537 b"FontBBox".to_vec(),
538 PdfObject::Array(vec![PdfObject::Integer(0), PdfObject::Integer(0)]),
539 );
540
541 let fd = parse_font_descriptor(&desc).expect("should parse");
542 assert!(fd.font_b_box.is_none());
543 }
544
545 #[test]
546 fn test_parse_font_info_with_descriptor() {
547 let mut desc_dict = PdfDict::new();
548 desc_dict.insert(
549 b"FontName".to_vec(),
550 PdfObject::Name(b"TimesNewRomanPSMT".to_vec()),
551 );
552 desc_dict.insert(b"Flags".to_vec(), PdfObject::Integer(34)); desc_dict.insert(b"Ascent".to_vec(), PdfObject::Integer(891));
554 desc_dict.insert(b"Descent".to_vec(), PdfObject::Integer(-216));
555 desc_dict.insert(b"StemV".to_vec(), PdfObject::Integer(82));
556 desc_dict.insert(b"ItalicAngle".to_vec(), PdfObject::Integer(0));
557 desc_dict.insert(b"CapHeight".to_vec(), PdfObject::Integer(662));
558 desc_dict.insert(
559 b"FontFile2".to_vec(),
560 PdfObject::Reference(IndirectRef {
561 obj_num: 100,
562 gen_num: 0,
563 }),
564 );
565
566 let mut font_dict = PdfDict::new();
567 font_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Font".to_vec()));
568 font_dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"TrueType".to_vec()));
569 font_dict.insert(
570 b"BaseFont".to_vec(),
571 PdfObject::Name(b"TimesNewRomanPSMT".to_vec()),
572 );
573 font_dict.insert(
574 b"Encoding".to_vec(),
575 PdfObject::Name(b"WinAnsiEncoding".to_vec()),
576 );
577 font_dict.insert(
578 b"FontDescriptor".to_vec(),
579 PdfObject::Dict(desc_dict),
580 );
581
582 let info = parse_font_info(&font_dict);
583 assert_eq!(info.base_font, b"TimesNewRomanPSMT");
584 assert_eq!(info.subtype, b"TrueType");
585
586 let fd = info.descriptor.expect("should have descriptor");
587 assert_eq!(fd.font_name, b"TimesNewRomanPSMT");
588 assert_eq!(fd.flags, 34);
589 assert!(fd.has_flag(FontDescriptor::SERIF));
590 assert!(fd.has_flag(FontDescriptor::NONSYMBOLIC));
591 assert_eq!(fd.ascent, 891.0);
592 assert_eq!(fd.descent, -216.0);
593 assert_eq!(fd.stem_v, 82.0);
594 assert_eq!(fd.cap_height, Some(662.0));
595 let ff2 = fd.font_file2_ref.as_ref().expect("should have FontFile2");
596 assert_eq!(ff2.obj_num, 100);
597 }
598
599 #[test]
600 fn test_font_descriptor_all_font_file_refs() {
601 let mut desc = PdfDict::new();
602 desc.insert(b"FontName".to_vec(), PdfObject::Name(b"Test".to_vec()));
603 desc.insert(
604 b"FontFile".to_vec(),
605 PdfObject::Reference(IndirectRef { obj_num: 10, gen_num: 0 }),
606 );
607 desc.insert(
608 b"FontFile2".to_vec(),
609 PdfObject::Reference(IndirectRef { obj_num: 20, gen_num: 0 }),
610 );
611 desc.insert(
612 b"FontFile3".to_vec(),
613 PdfObject::Reference(IndirectRef { obj_num: 30, gen_num: 0 }),
614 );
615
616 let fd = parse_font_descriptor(&desc).expect("should parse");
617 assert_eq!(fd.font_file_ref.as_ref().unwrap().obj_num, 10);
618 assert_eq!(fd.font_file2_ref.as_ref().unwrap().obj_num, 20);
619 assert_eq!(fd.font_file3_ref.as_ref().unwrap().obj_num, 30);
620 }
621}