1use crate::cmap::ToUnicodeCMap;
4use crate::encoding::Encoding;
5use folio_core::Result;
6use folio_cos::{CosDoc, PdfObject};
7use std::collections::HashMap;
8
9#[derive(Debug, Clone, Copy, PartialEq)]
11pub enum FontType {
12 Type1,
13 TrueType,
14 Type0,
15 Type3,
16 CIDFontType0,
17 CIDFontType2,
18 MMType1,
19 Unknown,
20}
21
22#[derive(Debug)]
24pub struct PdfFont {
25 pub font_type: FontType,
27 pub base_font: String,
29 pub encoding: Encoding,
31 pub to_unicode: Option<ToUnicodeCMap>,
33 pub widths: HashMap<u32, f64>,
35 pub default_width: f64,
37 pub first_char: u32,
39 pub is_cid: bool,
41}
42
43impl PdfFont {
44 pub fn from_dict(dict: &PdfObject, doc: &mut CosDoc) -> Result<Self> {
46 let subtype = dict.dict_get_name_str(b"Subtype").unwrap_or_default();
47 let base_font = dict.dict_get_name_str(b"BaseFont").unwrap_or_default();
48
49 let font_type = match subtype.as_str() {
50 "Type1" => FontType::Type1,
51 "TrueType" => FontType::TrueType,
52 "Type0" => FontType::Type0,
53 "Type3" => FontType::Type3,
54 "CIDFontType0" => FontType::CIDFontType0,
55 "CIDFontType2" => FontType::CIDFontType2,
56 "MMType1" => FontType::MMType1,
57 _ => FontType::Unknown,
58 };
59
60 let is_cid = matches!(font_type, FontType::Type0);
61
62 let encoding = load_encoding(dict, doc);
64
65 let to_unicode = load_tounicode(dict, doc);
67
68 let (widths, first_char, default_width) = if is_cid {
70 load_cid_widths(dict, doc)
71 } else {
72 load_simple_widths(dict)
73 };
74
75 Ok(PdfFont {
76 font_type,
77 base_font,
78 encoding,
79 to_unicode,
80 widths,
81 default_width,
82 first_char,
83 is_cid,
84 })
85 }
86
87 pub fn char_width(&self, code: u32) -> f64 {
89 self.widths
90 .get(&code)
91 .copied()
92 .unwrap_or(self.default_width)
93 }
94
95 pub fn decode_text(&self, data: &[u8]) -> String {
97 crate::encoding::decode_text(data, &self.encoding, self.to_unicode.as_ref())
98 }
99}
100
101fn load_encoding(dict: &PdfObject, doc: &mut CosDoc) -> Encoding {
102 let base_font = dict.dict_get_name_str(b"BaseFont").unwrap_or_default();
103 let subtype = dict.dict_get_name_str(b"Subtype").unwrap_or_default();
104
105 let is_zapf = base_font == "ZapfDingbats" || base_font.ends_with("+ZapfDingbats");
107 let is_symbol = base_font == "Symbol" || base_font.ends_with("+Symbol");
108
109 match dict.dict_get(b"Encoding") {
110 Some(PdfObject::Name(name)) => Encoding::from_name(name),
111 Some(PdfObject::Dict(d)) => {
112 let default_base = if is_zapf {
113 b"ZapfDingbatsEncoding".as_slice()
114 } else if is_symbol {
115 b"SymbolEncoding".as_slice()
116 } else {
117 b"WinAnsiEncoding".as_slice()
118 };
119 let base_name = d
120 .get(b"BaseEncoding".as_slice())
121 .and_then(|o| o.as_name())
122 .unwrap_or(default_base);
123 let mut enc = Encoding::from_name(base_name);
124
125 if let Some(PdfObject::Array(diffs)) = d.get(b"Differences".as_slice()) {
126 enc.apply_differences(diffs);
127 }
128
129 enc
130 }
131 Some(PdfObject::Reference(id)) => {
132 if let Ok(Some(obj)) = doc.get_object(id.num) {
133 let obj = obj.clone();
134 return load_encoding_from_obj(&obj, doc, is_zapf, is_symbol);
135 }
136 Encoding::win_ansi()
137 }
138 None => {
139 if is_zapf {
141 Encoding::zapf_dingbats()
142 } else if is_symbol {
143 Encoding::symbol()
144 } else if subtype == "TrueType" && is_subset_font(&base_font) {
145 Encoding::mac_roman()
148 } else {
149 Encoding::win_ansi()
150 }
151 }
152 _ => Encoding::win_ansi(),
153 }
154}
155
156fn load_encoding_from_obj(
157 obj: &PdfObject,
158 _doc: &mut CosDoc,
159 is_zapf: bool,
160 is_symbol: bool,
161) -> Encoding {
162 match obj {
163 PdfObject::Name(name) => Encoding::from_name(name),
164 PdfObject::Dict(d) => {
165 let default_base = if is_zapf {
166 b"ZapfDingbatsEncoding".as_slice()
167 } else if is_symbol {
168 b"SymbolEncoding".as_slice()
169 } else {
170 b"WinAnsiEncoding".as_slice()
171 };
172 let base_name = d
173 .get(b"BaseEncoding".as_slice())
174 .and_then(|o| o.as_name())
175 .unwrap_or(default_base);
176 let mut enc = Encoding::from_name(base_name);
177 if let Some(PdfObject::Array(diffs)) = d.get(b"Differences".as_slice()) {
178 enc.apply_differences(diffs);
179 }
180 enc
181 }
182 _ => Encoding::win_ansi(),
183 }
184}
185
186fn load_tounicode(dict: &PdfObject, doc: &mut CosDoc) -> Option<ToUnicodeCMap> {
187 let tu_ref = dict.dict_get(b"ToUnicode")?;
188
189 let stream = match tu_ref {
190 PdfObject::Reference(id) => {
191 let obj = doc.get_object(id.num).ok()??;
192 obj.clone()
193 }
194 other => other.clone(),
195 };
196
197 let stream_data = match &stream {
198 PdfObject::Stream(s) => doc.decode_stream(s).ok()?,
199 _ => return None,
200 };
201
202 ToUnicodeCMap::parse(&stream_data).ok()
203}
204
205fn load_simple_widths(dict: &PdfObject) -> (HashMap<u32, f64>, u32, f64) {
206 let first_char = dict.dict_get_i64(b"FirstChar").unwrap_or(0) as u32;
207 let default_width = dict.dict_get_f64(b"MissingWidth").unwrap_or(1000.0);
208
209 let mut widths = HashMap::new();
210
211 if let Some(PdfObject::Array(w_arr)) = dict.dict_get(b"Widths") {
212 for (i, w) in w_arr.iter().enumerate() {
213 if let Some(width) = w.as_f64() {
214 widths.insert(first_char + i as u32, width);
215 }
216 }
217 }
218
219 (widths, first_char, default_width)
220}
221
222fn load_cid_widths(dict: &PdfObject, doc: &mut CosDoc) -> (HashMap<u32, f64>, u32, f64) {
223 let descendant = dict
225 .dict_get(b"DescendantFonts")
226 .and_then(|o| o.as_array())
227 .and_then(|a| a.first())
228 .cloned();
229
230 let cid_dict = match descendant {
231 Some(PdfObject::Reference(id)) => doc.get_object(id.num).ok().flatten().cloned(),
232 Some(obj) => Some(obj),
233 None => None,
234 };
235
236 let cid_dict = match cid_dict {
237 Some(d) => d,
238 None => return (HashMap::new(), 0, 1000.0),
239 };
240
241 let default_width = cid_dict.dict_get_f64(b"DW").unwrap_or(1000.0);
242 let mut widths = HashMap::new();
243
244 if let Some(PdfObject::Array(w_arr)) = cid_dict.dict_get(b"W") {
246 let mut i = 0;
247 while i < w_arr.len() {
248 let cid_start = match w_arr[i].as_i64() {
249 Some(n) => n as u32,
250 None => {
251 i += 1;
252 continue;
253 }
254 };
255
256 if i + 1 < w_arr.len() {
257 match &w_arr[i + 1] {
258 PdfObject::Array(widths_arr) => {
259 for (j, w) in widths_arr.iter().enumerate() {
261 if let Some(width) = w.as_f64() {
262 widths.insert(cid_start + j as u32, width);
263 }
264 }
265 i += 2;
266 }
267 PdfObject::Integer(_) | PdfObject::Real(_) if i + 2 < w_arr.len() => {
268 let cid_end = w_arr[i + 1].as_i64().unwrap_or(0) as u32;
270 let width = w_arr[i + 2].as_f64().unwrap_or(default_width);
271 for cid in cid_start..=cid_end {
272 widths.insert(cid, width);
273 }
274 i += 3;
275 }
276 _ => {
277 i += 1;
278 }
279 }
280 } else {
281 i += 1;
282 }
283 }
284 }
285
286 (widths, 0, default_width)
287}
288
289fn is_subset_font(name: &str) -> bool {
291 if name.len() < 8 {
292 return false;
293 }
294 let prefix = &name[..6];
295 let has_plus = name.as_bytes().get(6) == Some(&b'+');
296 has_plus && prefix.chars().all(|c| c.is_ascii_uppercase())
297}