Skip to main content

pdfplumber_parse/
interpreter.rs

1//! Content stream interpreter.
2//!
3//! Interprets tokenized PDF content stream operators, maintaining graphics and
4//! text state, and emitting events to a [`ContentHandler`]. Handles Form XObject
5//! recursion via the `Do` operator.
6
7use std::collections::HashMap;
8
9use crate::cid_font::{
10    CidFontMetrics, extract_cid_font_metrics, get_descendant_font, get_type0_encoding,
11    is_type0_font, parse_predefined_cmap_name, strip_subset_prefix,
12};
13use crate::cmap::CMap;
14use crate::color_space::resolve_color_space_name;
15use crate::error::BackendError;
16use crate::font_metrics::{FontMetrics, extract_font_metrics};
17use crate::handler::{CharEvent, ContentHandler, ImageEvent};
18use crate::interpreter_state::InterpreterState;
19use crate::lopdf_backend::object_to_f64;
20use crate::text_renderer::{
21    TjElement, show_string, show_string_cid, show_string_with_positioning_mode,
22};
23use crate::text_state::TextState;
24use crate::tokenizer::{Operand, tokenize};
25use pdfplumber_core::{ExtractOptions, ExtractWarning};
26
27/// Cached font information for the interpreter.
28struct CachedFont {
29    metrics: FontMetrics,
30    cmap: Option<CMap>,
31    base_name: String,
32    /// CID font metrics (present for Type0/CID fonts).
33    cid_metrics: Option<CidFontMetrics>,
34    /// Whether this is a CID (composite/Type0) font.
35    is_cid_font: bool,
36    /// Writing mode: 0 = horizontal, 1 = vertical.
37    /// Used in US-041 for vertical writing mode support.
38    #[allow(dead_code)]
39    writing_mode: u8,
40}
41
42/// Interpret a content stream and emit events to the handler.
43///
44/// Processes tokenized PDF operators, updates graphics/text state, and calls
45/// handler methods for text, path, and image events. Handles Form XObject
46/// recursion via the `Do` operator.
47///
48/// # Arguments
49///
50/// * `doc` - The lopdf document (for resolving references)
51/// * `stream_bytes` - Decoded content stream bytes
52/// * `resources` - Resources dictionary for this scope
53/// * `handler` - Event callback handler
54/// * `options` - Resource limits and settings
55/// * `depth` - Current recursion depth (0 for page-level)
56/// * `gstate` - Current graphics/interpreter state
57/// * `tstate` - Current text state
58#[allow(clippy::too_many_arguments)]
59pub(crate) fn interpret_content_stream(
60    doc: &lopdf::Document,
61    stream_bytes: &[u8],
62    resources: &lopdf::Dictionary,
63    handler: &mut dyn ContentHandler,
64    options: &ExtractOptions,
65    depth: usize,
66    gstate: &mut InterpreterState,
67    tstate: &mut TextState,
68) -> Result<(), BackendError> {
69    if depth > options.max_recursion_depth {
70        return Err(BackendError::Interpreter(format!(
71            "Form XObject recursion depth {} exceeds limit {}",
72            depth, options.max_recursion_depth
73        )));
74    }
75
76    let operators = tokenize(stream_bytes)?;
77    let mut font_cache: HashMap<String, CachedFont> = HashMap::new();
78
79    for (op_index, op) in operators.iter().enumerate() {
80        match op.name.as_str() {
81            // --- Graphics state operators ---
82            "q" => gstate.save_state(),
83            "Q" => {
84                gstate.restore_state();
85            }
86            "cm" => {
87                if op.operands.len() >= 6 {
88                    let a = get_f64(&op.operands, 0).unwrap_or(1.0);
89                    let b = get_f64(&op.operands, 1).unwrap_or(0.0);
90                    let c = get_f64(&op.operands, 2).unwrap_or(0.0);
91                    let d = get_f64(&op.operands, 3).unwrap_or(1.0);
92                    let e = get_f64(&op.operands, 4).unwrap_or(0.0);
93                    let f = get_f64(&op.operands, 5).unwrap_or(0.0);
94                    gstate.concat_matrix(a, b, c, d, e, f);
95                }
96            }
97            "w" => {
98                if let Some(v) = get_f64(&op.operands, 0) {
99                    gstate.set_line_width(v);
100                }
101            }
102
103            // --- Color operators ---
104            "G" => {
105                if let Some(g) = get_f32(&op.operands, 0) {
106                    gstate.set_stroking_gray(g);
107                }
108            }
109            "g" => {
110                if let Some(g) = get_f32(&op.operands, 0) {
111                    gstate.set_non_stroking_gray(g);
112                }
113            }
114            "RG" => {
115                if op.operands.len() >= 3 {
116                    let r = get_f32(&op.operands, 0).unwrap_or(0.0);
117                    let g = get_f32(&op.operands, 1).unwrap_or(0.0);
118                    let b = get_f32(&op.operands, 2).unwrap_or(0.0);
119                    gstate.set_stroking_rgb(r, g, b);
120                }
121            }
122            "rg" => {
123                if op.operands.len() >= 3 {
124                    let r = get_f32(&op.operands, 0).unwrap_or(0.0);
125                    let g = get_f32(&op.operands, 1).unwrap_or(0.0);
126                    let b = get_f32(&op.operands, 2).unwrap_or(0.0);
127                    gstate.set_non_stroking_rgb(r, g, b);
128                }
129            }
130            "K" => {
131                if op.operands.len() >= 4 {
132                    let c = get_f32(&op.operands, 0).unwrap_or(0.0);
133                    let m = get_f32(&op.operands, 1).unwrap_or(0.0);
134                    let y = get_f32(&op.operands, 2).unwrap_or(0.0);
135                    let k = get_f32(&op.operands, 3).unwrap_or(0.0);
136                    gstate.set_stroking_cmyk(c, m, y, k);
137                }
138            }
139            "k" => {
140                if op.operands.len() >= 4 {
141                    let c = get_f32(&op.operands, 0).unwrap_or(0.0);
142                    let m = get_f32(&op.operands, 1).unwrap_or(0.0);
143                    let y = get_f32(&op.operands, 2).unwrap_or(0.0);
144                    let k = get_f32(&op.operands, 3).unwrap_or(0.0);
145                    gstate.set_non_stroking_cmyk(c, m, y, k);
146                }
147            }
148            "CS" => {
149                if let Some(Operand::Name(name)) = op.operands.first() {
150                    if let Some(cs) = resolve_color_space_name(name, doc, resources) {
151                        gstate.set_stroking_color_space(cs);
152                    }
153                }
154            }
155            "cs" => {
156                if let Some(Operand::Name(name)) = op.operands.first() {
157                    if let Some(cs) = resolve_color_space_name(name, doc, resources) {
158                        gstate.set_non_stroking_color_space(cs);
159                    }
160                }
161            }
162            "SC" | "SCN" => {
163                let components: Vec<f32> = op.operands.iter().filter_map(operand_to_f32).collect();
164                gstate.set_stroking_color(&components);
165            }
166            "sc" | "scn" => {
167                let components: Vec<f32> = op.operands.iter().filter_map(operand_to_f32).collect();
168                gstate.set_non_stroking_color(&components);
169            }
170
171            // --- Text state operators ---
172            "BT" => tstate.begin_text(),
173            "ET" => tstate.end_text(),
174            "Tf" => {
175                if op.operands.len() >= 2 {
176                    let font_name = operand_to_name(&op.operands[0]);
177                    let size = get_f64(&op.operands, 1).unwrap_or(0.0);
178                    tstate.set_font(font_name.clone(), size);
179                    load_font_if_needed(
180                        doc,
181                        resources,
182                        &font_name,
183                        &mut font_cache,
184                        handler,
185                        options,
186                        op_index,
187                    );
188                }
189            }
190            "Tm" => {
191                if op.operands.len() >= 6 {
192                    let a = get_f64(&op.operands, 0).unwrap_or(1.0);
193                    let b = get_f64(&op.operands, 1).unwrap_or(0.0);
194                    let c = get_f64(&op.operands, 2).unwrap_or(0.0);
195                    let d = get_f64(&op.operands, 3).unwrap_or(1.0);
196                    let e = get_f64(&op.operands, 4).unwrap_or(0.0);
197                    let f = get_f64(&op.operands, 5).unwrap_or(0.0);
198                    tstate.set_text_matrix(a, b, c, d, e, f);
199                }
200            }
201            "Td" => {
202                if op.operands.len() >= 2 {
203                    let tx = get_f64(&op.operands, 0).unwrap_or(0.0);
204                    let ty = get_f64(&op.operands, 1).unwrap_or(0.0);
205                    tstate.move_text_position(tx, ty);
206                }
207            }
208            "TD" => {
209                if op.operands.len() >= 2 {
210                    let tx = get_f64(&op.operands, 0).unwrap_or(0.0);
211                    let ty = get_f64(&op.operands, 1).unwrap_or(0.0);
212                    tstate.move_text_position_and_set_leading(tx, ty);
213                }
214            }
215            "T*" => tstate.move_to_next_line(),
216            "Tc" => {
217                if let Some(v) = get_f64(&op.operands, 0) {
218                    tstate.set_char_spacing(v);
219                }
220            }
221            "Tw" => {
222                if let Some(v) = get_f64(&op.operands, 0) {
223                    tstate.set_word_spacing(v);
224                }
225            }
226            "Tz" => {
227                if let Some(v) = get_f64(&op.operands, 0) {
228                    tstate.set_h_scaling(v);
229                }
230            }
231            "TL" => {
232                if let Some(v) = get_f64(&op.operands, 0) {
233                    tstate.set_leading(v);
234                }
235            }
236            "Tr" => {
237                if let Some(v) = get_i64(&op.operands, 0) {
238                    if let Some(mode) = crate::text_state::TextRenderMode::from_i64(v) {
239                        tstate.set_render_mode(mode);
240                    }
241                }
242            }
243            "Ts" => {
244                if let Some(v) = get_f64(&op.operands, 0) {
245                    tstate.set_rise(v);
246                }
247            }
248
249            // --- Text rendering operators ---
250            "Tj" => {
251                handle_tj(tstate, gstate, handler, &op.operands, &font_cache);
252            }
253            "TJ" => {
254                handle_tj_array(tstate, gstate, handler, &op.operands, &font_cache);
255            }
256            "'" => {
257                // T* then Tj
258                tstate.move_to_next_line();
259                handle_tj(tstate, gstate, handler, &op.operands, &font_cache);
260            }
261            "\"" => {
262                // aw ac (string) "
263                if op.operands.len() >= 3 {
264                    if let Some(aw) = get_f64(&op.operands, 0) {
265                        tstate.set_word_spacing(aw);
266                    }
267                    if let Some(ac) = get_f64(&op.operands, 1) {
268                        tstate.set_char_spacing(ac);
269                    }
270                    tstate.move_to_next_line();
271                    // Show the string (3rd operand)
272                    let string_operands = vec![op.operands[2].clone()];
273                    handle_tj(tstate, gstate, handler, &string_operands, &font_cache);
274                }
275            }
276
277            // --- XObject operator ---
278            "Do" => {
279                if let Some(Operand::Name(name)) = op.operands.first() {
280                    handle_do(
281                        doc, resources, handler, options, depth, gstate, tstate, name,
282                    )?;
283                }
284            }
285
286            // Other operators (paths, etc.) - not yet handled for this story
287            _ => {}
288        }
289    }
290
291    Ok(())
292}
293
294// --- Operand extraction helpers ---
295
296fn get_f64(operands: &[Operand], index: usize) -> Option<f64> {
297    operands.get(index).and_then(|o| match o {
298        Operand::Integer(i) => Some(*i as f64),
299        Operand::Real(f) => Some(*f),
300        _ => None,
301    })
302}
303
304fn get_f32(operands: &[Operand], index: usize) -> Option<f32> {
305    get_f64(operands, index).map(|v| v as f32)
306}
307
308fn get_i64(operands: &[Operand], index: usize) -> Option<i64> {
309    operands.get(index).and_then(|o| match o {
310        Operand::Integer(i) => Some(*i),
311        Operand::Real(f) => Some(*f as i64),
312        _ => None,
313    })
314}
315
316fn operand_to_f32(o: &Operand) -> Option<f32> {
317    match o {
318        Operand::Integer(i) => Some(*i as f32),
319        Operand::Real(f) => Some(*f as f32),
320        _ => None,
321    }
322}
323
324fn operand_to_name(o: &Operand) -> String {
325    match o {
326        Operand::Name(n) => n.clone(),
327        _ => String::new(),
328    }
329}
330
331fn operand_to_string_bytes(o: &Operand) -> Option<&[u8]> {
332    match o {
333        Operand::LiteralString(s) | Operand::HexString(s) => Some(s),
334        _ => None,
335    }
336}
337
338// --- Font loading ---
339
340#[allow(clippy::too_many_arguments)]
341fn load_font_if_needed(
342    doc: &lopdf::Document,
343    resources: &lopdf::Dictionary,
344    font_name: &str,
345    cache: &mut HashMap<String, CachedFont>,
346    handler: &mut dyn ContentHandler,
347    options: &ExtractOptions,
348    op_index: usize,
349) {
350    if cache.contains_key(font_name) {
351        return;
352    }
353
354    // Look up /Resources/Font/<font_name>
355    let font_dict = (|| -> Option<&lopdf::Dictionary> {
356        let fonts_obj = resources.get(b"Font").ok()?;
357        let fonts_obj = resolve_ref(doc, fonts_obj);
358        let fonts_dict = fonts_obj.as_dict().ok()?;
359        let font_obj = fonts_dict.get(font_name.as_bytes()).ok()?;
360        let font_obj = resolve_ref(doc, font_obj);
361        font_obj.as_dict().ok()
362    })();
363
364    let (metrics, cmap, base_name, cid_metrics, is_cid_font, writing_mode) =
365        if let Some(fd) = font_dict {
366            if is_type0_font(fd) {
367                // Type0 (composite/CID) font
368                let (cid_met, wm) = load_cid_font(doc, fd);
369                let metrics = if let Some(ref cm) = cid_met {
370                    // Create a FontMetrics from CID font data for backward compat
371                    FontMetrics::new(
372                        Vec::new(),
373                        0,
374                        0,
375                        cm.default_width(),
376                        cm.ascent(),
377                        cm.descent(),
378                        cm.font_bbox(),
379                    )
380                } else {
381                    if options.collect_warnings {
382                        handler.on_warning(ExtractWarning::with_operator_context(
383                            "CID font metrics not available, using defaults",
384                            op_index,
385                            font_name,
386                        ));
387                    }
388                    FontMetrics::default_metrics()
389                };
390
391                // Extract ToUnicode CMap if present
392                let cmap = extract_tounicode_cmap(doc, fd);
393
394                let raw_base_name = fd
395                    .get(b"BaseFont")
396                    .ok()
397                    .and_then(|o| o.as_name_str().ok())
398                    .unwrap_or(font_name);
399                let base_name = strip_subset_prefix(raw_base_name).to_string();
400
401                (metrics, cmap, base_name, cid_met, true, wm)
402            } else {
403                // Simple font
404                let metrics = match extract_font_metrics(doc, fd) {
405                    Ok(m) => m,
406                    Err(_) => {
407                        if options.collect_warnings {
408                            handler.on_warning(ExtractWarning::with_operator_context(
409                                "failed to extract font metrics, using defaults",
410                                op_index,
411                                font_name,
412                            ));
413                        }
414                        FontMetrics::default_metrics()
415                    }
416                };
417                let cmap = extract_tounicode_cmap(doc, fd);
418                let raw_base_name = fd
419                    .get(b"BaseFont")
420                    .ok()
421                    .and_then(|o| o.as_name_str().ok())
422                    .unwrap_or(font_name);
423                let base_name = strip_subset_prefix(raw_base_name).to_string();
424
425                (metrics, cmap, base_name, None, false, 0)
426            }
427        } else {
428            // Font not found in page resources — use defaults
429            if options.collect_warnings {
430                handler.on_warning(ExtractWarning::with_operator_context(
431                    "font not found in page resources, using defaults",
432                    op_index,
433                    font_name,
434                ));
435            }
436            (
437                FontMetrics::default_metrics(),
438                None,
439                font_name.to_string(),
440                None,
441                false,
442                0,
443            )
444        };
445
446    cache.insert(
447        font_name.to_string(),
448        CachedFont {
449            metrics,
450            cmap,
451            base_name,
452            cid_metrics,
453            is_cid_font,
454            writing_mode,
455        },
456    );
457}
458
459/// Extract ToUnicode CMap from a font dictionary.
460fn extract_tounicode_cmap(doc: &lopdf::Document, fd: &lopdf::Dictionary) -> Option<CMap> {
461    let tounicode_obj = fd.get(b"ToUnicode").ok()?;
462    let tounicode_obj = resolve_ref(doc, tounicode_obj);
463    let stream = tounicode_obj.as_stream().ok()?;
464    let data = decode_stream(stream).ok()?;
465    CMap::parse(&data).ok()
466}
467
468/// Load CID font information from a Type0 font dictionary.
469fn load_cid_font(
470    doc: &lopdf::Document,
471    type0_dict: &lopdf::Dictionary,
472) -> (Option<CidFontMetrics>, u8) {
473    // Determine writing mode from encoding name
474    let writing_mode = get_type0_encoding(type0_dict)
475        .and_then(|enc| parse_predefined_cmap_name(&enc))
476        .map(|info| info.writing_mode)
477        .unwrap_or(0);
478
479    // Get descendant CIDFont dictionary
480    let cid_metrics = get_descendant_font(doc, type0_dict)
481        .and_then(|desc| extract_cid_font_metrics(doc, desc).ok());
482
483    (cid_metrics, writing_mode)
484}
485
486// --- Text rendering ---
487
488/// Build a width lookup function for a cached font.
489/// For CID fonts, uses CidFontMetrics; for simple fonts, uses FontMetrics.
490fn get_width_fn(cached: Option<&CachedFont>) -> Box<dyn Fn(u32) -> f64 + '_> {
491    match cached {
492        Some(cf) if cf.is_cid_font => {
493            if let Some(ref cid_met) = cf.cid_metrics {
494                Box::new(move |code: u32| cid_met.get_width(code))
495            } else {
496                Box::new(move |code: u32| cf.metrics.get_width(code))
497            }
498        }
499        Some(cf) => Box::new(move |code: u32| cf.metrics.get_width(code)),
500        None => {
501            let default_metrics = FontMetrics::default_metrics();
502            Box::new(move |code: u32| default_metrics.get_width(code))
503        }
504    }
505}
506
507fn handle_tj(
508    tstate: &mut TextState,
509    gstate: &InterpreterState,
510    handler: &mut dyn ContentHandler,
511    operands: &[Operand],
512    font_cache: &HashMap<String, CachedFont>,
513) {
514    let string_bytes = match operands.first().and_then(operand_to_string_bytes) {
515        Some(bytes) => bytes,
516        None => return,
517    };
518
519    let cached = font_cache.get(&tstate.font_name);
520    let width_fn = get_width_fn(cached);
521    let is_cid = cached.is_some_and(|c| c.is_cid_font);
522    let raw_chars = if is_cid {
523        show_string_cid(tstate, string_bytes, &*width_fn)
524    } else {
525        show_string(tstate, string_bytes, &*width_fn)
526    };
527
528    emit_char_events(raw_chars, tstate, gstate, handler, cached);
529}
530
531fn handle_tj_array(
532    tstate: &mut TextState,
533    gstate: &InterpreterState,
534    handler: &mut dyn ContentHandler,
535    operands: &[Operand],
536    font_cache: &HashMap<String, CachedFont>,
537) {
538    let array = match operands.first() {
539        Some(Operand::Array(arr)) => arr,
540        _ => return,
541    };
542
543    // Convert Operand array to TjElement array
544    let elements: Vec<TjElement> = array
545        .iter()
546        .filter_map(|o| match o {
547            Operand::LiteralString(s) | Operand::HexString(s) => Some(TjElement::String(s.clone())),
548            Operand::Integer(i) => Some(TjElement::Adjustment(*i as f64)),
549            Operand::Real(f) => Some(TjElement::Adjustment(*f)),
550            _ => None,
551        })
552        .collect();
553
554    let cached = font_cache.get(&tstate.font_name);
555    let width_fn = get_width_fn(cached);
556    let is_cid = cached.is_some_and(|c| c.is_cid_font);
557    let raw_chars = show_string_with_positioning_mode(tstate, &elements, &*width_fn, is_cid);
558
559    emit_char_events(raw_chars, tstate, gstate, handler, cached);
560}
561
562fn emit_char_events(
563    raw_chars: Vec<crate::text_renderer::RawChar>,
564    tstate: &TextState,
565    gstate: &InterpreterState,
566    handler: &mut dyn ContentHandler,
567    cached: Option<&CachedFont>,
568) {
569    let ctm = gstate.ctm_array();
570    let font_name = cached.map_or_else(|| tstate.font_name.clone(), |c| c.base_name.clone());
571
572    for rc in raw_chars {
573        let unicode = cached.and_then(|c| {
574            c.cmap
575                .as_ref()
576                .and_then(|cm| cm.lookup(rc.char_code).map(|s| s.to_string()))
577        });
578
579        // Use CID font metrics for displacement if available
580        let displacement = match cached {
581            Some(cf) if cf.is_cid_font => cf
582                .cid_metrics
583                .as_ref()
584                .map_or(600.0, |cm| cm.get_width(rc.char_code)),
585            Some(cf) => cf.metrics.get_width(rc.char_code),
586            None => 600.0,
587        };
588
589        handler.on_char(CharEvent {
590            char_code: rc.char_code,
591            unicode,
592            font_name: font_name.clone(),
593            font_size: tstate.font_size,
594            text_matrix: rc.text_matrix,
595            ctm,
596            displacement,
597            char_spacing: tstate.char_spacing,
598            word_spacing: tstate.word_spacing,
599            h_scaling: tstate.h_scaling_normalized(),
600            rise: tstate.rise,
601        });
602    }
603}
604
605// --- Do operator: XObject handling ---
606
607#[allow(clippy::too_many_arguments)]
608fn handle_do(
609    doc: &lopdf::Document,
610    resources: &lopdf::Dictionary,
611    handler: &mut dyn ContentHandler,
612    options: &ExtractOptions,
613    depth: usize,
614    gstate: &mut InterpreterState,
615    tstate: &mut TextState,
616    name: &str,
617) -> Result<(), BackendError> {
618    // Look up /Resources/XObject/<name>
619    let xobj_dict = resources.get(b"XObject").map_err(|_| {
620        BackendError::Interpreter(format!(
621            "no /XObject dictionary in resources for Do /{name}"
622        ))
623    })?;
624    let xobj_dict = resolve_ref(doc, xobj_dict);
625    let xobj_dict = xobj_dict.as_dict().map_err(|_| {
626        BackendError::Interpreter("/XObject resource is not a dictionary".to_string())
627    })?;
628
629    let xobj_entry = xobj_dict.get(name.as_bytes()).map_err(|_| {
630        BackendError::Interpreter(format!("XObject /{name} not found in resources"))
631    })?;
632
633    let xobj_id = xobj_entry.as_reference().map_err(|_| {
634        BackendError::Interpreter(format!("XObject /{name} is not an indirect reference"))
635    })?;
636
637    let xobj = doc.get_object(xobj_id).map_err(|e| {
638        BackendError::Interpreter(format!("failed to resolve XObject /{name}: {e}"))
639    })?;
640
641    let stream = xobj
642        .as_stream()
643        .map_err(|e| BackendError::Interpreter(format!("XObject /{name} is not a stream: {e}")))?;
644
645    let subtype = stream
646        .dict
647        .get(b"Subtype")
648        .ok()
649        .and_then(|o| o.as_name_str().ok())
650        .unwrap_or("");
651
652    match subtype {
653        "Form" => handle_form_xobject(
654            doc, stream, name, resources, handler, options, depth, gstate, tstate,
655        ),
656        "Image" => {
657            handle_image_xobject(stream, name, gstate, handler);
658            Ok(())
659        }
660        _ => {
661            // Unknown XObject subtype — ignore
662            Ok(())
663        }
664    }
665}
666
667#[allow(clippy::too_many_arguments)]
668fn handle_form_xobject(
669    doc: &lopdf::Document,
670    stream: &lopdf::Stream,
671    name: &str,
672    parent_resources: &lopdf::Dictionary,
673    handler: &mut dyn ContentHandler,
674    options: &ExtractOptions,
675    depth: usize,
676    gstate: &mut InterpreterState,
677    tstate: &mut TextState,
678) -> Result<(), BackendError> {
679    // Save graphics state
680    gstate.save_state();
681
682    // Apply /Matrix if present (transforms Form XObject space to parent space)
683    if let Ok(matrix_obj) = stream.dict.get(b"Matrix") {
684        if let Ok(arr) = matrix_obj.as_array() {
685            if arr.len() == 6 {
686                let vals: Result<Vec<f64>, _> = arr.iter().map(object_to_f64).collect();
687                if let Ok(vals) = vals {
688                    gstate.concat_matrix(vals[0], vals[1], vals[2], vals[3], vals[4], vals[5]);
689                }
690            }
691        }
692    }
693
694    // Get Form XObject's resources (fall back to parent resources)
695    let form_resources_dict;
696    let form_resources = if let Ok(res_obj) = stream.dict.get(b"Resources") {
697        let res_obj = resolve_ref(doc, res_obj);
698        match res_obj.as_dict() {
699            Ok(d) => d,
700            Err(_) => parent_resources,
701        }
702    } else {
703        // Check if /Resources is an inline dictionary (common for Form XObjects)
704        // The dict.get already handles this, so use parent as fallback
705        // But also check if it's an indirect reference in the dict
706        if let Ok(res_ref) = stream.dict.get(b"Resources") {
707            if let Ok(id) = res_ref.as_reference() {
708                if let Ok(obj) = doc.get_object(id) {
709                    if let Ok(d) = obj.as_dict() {
710                        form_resources_dict = d.clone();
711                        &form_resources_dict
712                    } else {
713                        parent_resources
714                    }
715                } else {
716                    parent_resources
717                }
718            } else {
719                parent_resources
720            }
721        } else {
722            parent_resources
723        }
724    };
725
726    // Decode stream content
727    let content_bytes = decode_stream(stream).map_err(|e| {
728        BackendError::Interpreter(format!("failed to decode Form XObject /{name} stream: {e}"))
729    })?;
730
731    // Recursively interpret the Form XObject content stream
732    interpret_content_stream(
733        doc,
734        &content_bytes,
735        form_resources,
736        handler,
737        options,
738        depth + 1,
739        gstate,
740        tstate,
741    )?;
742
743    // Restore graphics state
744    gstate.restore_state();
745
746    Ok(())
747}
748
749fn handle_image_xobject(
750    stream: &lopdf::Stream,
751    name: &str,
752    gstate: &InterpreterState,
753    handler: &mut dyn ContentHandler,
754) {
755    let width = stream
756        .dict
757        .get(b"Width")
758        .ok()
759        .and_then(|o| o.as_i64().ok())
760        .unwrap_or(0) as u32;
761
762    let height = stream
763        .dict
764        .get(b"Height")
765        .ok()
766        .and_then(|o| o.as_i64().ok())
767        .unwrap_or(0) as u32;
768
769    let colorspace = stream
770        .dict
771        .get(b"ColorSpace")
772        .ok()
773        .and_then(|o| o.as_name_str().ok())
774        .map(|s| s.to_string());
775
776    let bits_per_component = stream
777        .dict
778        .get(b"BitsPerComponent")
779        .ok()
780        .and_then(|o| o.as_i64().ok())
781        .map(|v| v as u32);
782
783    handler.on_image(ImageEvent {
784        name: name.to_string(),
785        ctm: gstate.ctm_array(),
786        width,
787        height,
788        colorspace,
789        bits_per_component,
790    });
791}
792
793// --- Helpers ---
794
795/// Resolve an indirect reference, returning the referenced object.
796/// If the object is not a reference, returns it as-is.
797fn resolve_ref<'a>(doc: &'a lopdf::Document, obj: &'a lopdf::Object) -> &'a lopdf::Object {
798    match obj {
799        lopdf::Object::Reference(id) => doc.get_object(*id).unwrap_or(obj),
800        _ => obj,
801    }
802}
803
804/// Decode a PDF stream, decompressing if necessary.
805fn decode_stream(stream: &lopdf::Stream) -> Result<Vec<u8>, BackendError> {
806    // Check if stream has filters
807    if stream.dict.get(b"Filter").is_ok() {
808        stream
809            .decompressed_content()
810            .map_err(|e| BackendError::Interpreter(format!("stream decompression failed: {e}")))
811    } else {
812        Ok(stream.content.clone())
813    }
814}
815
816#[cfg(test)]
817mod tests {
818    use super::*;
819    use crate::handler::{CharEvent, ContentHandler, ImageEvent};
820
821    // --- Collecting handler ---
822
823    struct CollectingHandler {
824        chars: Vec<CharEvent>,
825        images: Vec<ImageEvent>,
826        warnings: Vec<ExtractWarning>,
827    }
828
829    impl CollectingHandler {
830        fn new() -> Self {
831            Self {
832                chars: Vec::new(),
833                images: Vec::new(),
834                warnings: Vec::new(),
835            }
836        }
837    }
838
839    impl ContentHandler for CollectingHandler {
840        fn on_char(&mut self, event: CharEvent) {
841            self.chars.push(event);
842        }
843        fn on_image(&mut self, event: ImageEvent) {
844            self.images.push(event);
845        }
846        fn on_warning(&mut self, warning: ExtractWarning) {
847            self.warnings.push(warning);
848        }
849    }
850
851    // --- Helper to create a minimal lopdf document for testing ---
852
853    fn empty_resources() -> lopdf::Dictionary {
854        lopdf::Dictionary::new()
855    }
856
857    fn default_options() -> ExtractOptions {
858        ExtractOptions::default()
859    }
860
861    // --- Basic text interpretation tests ---
862
863    #[test]
864    fn interpret_simple_text() {
865        let doc = lopdf::Document::with_version("1.5");
866        let resources = empty_resources();
867        let stream = b"BT /F1 12 Tf 72 700 Td (Hello) Tj ET";
868
869        let mut handler = CollectingHandler::new();
870        let mut gstate = InterpreterState::new();
871        let mut tstate = TextState::new();
872
873        interpret_content_stream(
874            &doc,
875            stream,
876            &resources,
877            &mut handler,
878            &default_options(),
879            0,
880            &mut gstate,
881            &mut tstate,
882        )
883        .unwrap();
884
885        // "Hello" = 5 characters
886        assert_eq!(handler.chars.len(), 5);
887        assert_eq!(handler.chars[0].char_code, b'H' as u32);
888        assert_eq!(handler.chars[1].char_code, b'e' as u32);
889        assert_eq!(handler.chars[4].char_code, b'o' as u32);
890        assert_eq!(handler.chars[0].font_size, 12.0);
891    }
892
893    #[test]
894    fn interpret_tj_array() {
895        let doc = lopdf::Document::with_version("1.5");
896        let resources = empty_resources();
897        let stream = b"BT /F1 12 Tf [(H) -20 (i)] TJ ET";
898
899        let mut handler = CollectingHandler::new();
900        let mut gstate = InterpreterState::new();
901        let mut tstate = TextState::new();
902
903        interpret_content_stream(
904            &doc,
905            stream,
906            &resources,
907            &mut handler,
908            &default_options(),
909            0,
910            &mut gstate,
911            &mut tstate,
912        )
913        .unwrap();
914
915        assert_eq!(handler.chars.len(), 2);
916        assert_eq!(handler.chars[0].char_code, b'H' as u32);
917        assert_eq!(handler.chars[1].char_code, b'i' as u32);
918    }
919
920    #[test]
921    fn interpret_ctm_passed_to_char_events() {
922        let doc = lopdf::Document::with_version("1.5");
923        let resources = empty_resources();
924        let stream = b"1 0 0 1 10 20 cm BT /F1 12 Tf (A) Tj ET";
925
926        let mut handler = CollectingHandler::new();
927        let mut gstate = InterpreterState::new();
928        let mut tstate = TextState::new();
929
930        interpret_content_stream(
931            &doc,
932            stream,
933            &resources,
934            &mut handler,
935            &default_options(),
936            0,
937            &mut gstate,
938            &mut tstate,
939        )
940        .unwrap();
941
942        assert_eq!(handler.chars.len(), 1);
943        assert_eq!(handler.chars[0].ctm, [1.0, 0.0, 0.0, 1.0, 10.0, 20.0]);
944    }
945
946    // --- Recursion limit tests ---
947
948    #[test]
949    fn recursion_depth_zero_allowed() {
950        let doc = lopdf::Document::with_version("1.5");
951        let resources = empty_resources();
952        let stream = b"BT ET";
953
954        let mut handler = CollectingHandler::new();
955        let mut gstate = InterpreterState::new();
956        let mut tstate = TextState::new();
957
958        let result = interpret_content_stream(
959            &doc,
960            stream,
961            &resources,
962            &mut handler,
963            &default_options(),
964            0,
965            &mut gstate,
966            &mut tstate,
967        );
968        assert!(result.is_ok());
969    }
970
971    #[test]
972    fn recursion_depth_exceeds_limit() {
973        let doc = lopdf::Document::with_version("1.5");
974        let resources = empty_resources();
975        let stream = b"BT ET";
976
977        let mut handler = CollectingHandler::new();
978        let mut gstate = InterpreterState::new();
979        let mut tstate = TextState::new();
980
981        let mut opts = ExtractOptions::default();
982        opts.max_recursion_depth = 3;
983
984        let result = interpret_content_stream(
985            &doc,
986            stream,
987            &resources,
988            &mut handler,
989            &opts,
990            4, // depth > max
991            &mut gstate,
992            &mut tstate,
993        );
994        assert!(result.is_err());
995        let err_msg = result.unwrap_err().to_string();
996        assert!(err_msg.contains("recursion depth"));
997    }
998
999    // --- Graphics state tests ---
1000
1001    #[test]
1002    fn interpret_q_q_state_save_restore() {
1003        let doc = lopdf::Document::with_version("1.5");
1004        let resources = empty_resources();
1005        // Set color, save, change color, restore
1006        let stream = b"0.5 g q 1 0 0 rg Q";
1007
1008        let mut handler = CollectingHandler::new();
1009        let mut gstate = InterpreterState::new();
1010        let mut tstate = TextState::new();
1011
1012        interpret_content_stream(
1013            &doc,
1014            stream,
1015            &resources,
1016            &mut handler,
1017            &default_options(),
1018            0,
1019            &mut gstate,
1020            &mut tstate,
1021        )
1022        .unwrap();
1023
1024        // After Q, fill color should be restored to gray 0.5
1025        assert_eq!(
1026            gstate.graphics_state().fill_color,
1027            pdfplumber_core::Color::Gray(0.5)
1028        );
1029    }
1030
1031    // --- CID font / Identity-H tests ---
1032
1033    /// Build a resources dictionary containing a Type0 font with Identity-H encoding.
1034    fn make_cid_font_resources(doc: &mut lopdf::Document) -> lopdf::Dictionary {
1035        use lopdf::{Object, Stream, dictionary};
1036
1037        // ToUnicode CMap: map 0x4E2D → U+4E2D (中), 0x6587 → U+6587 (文)
1038        let tounicode_data = b"\
1039            /CIDInit /ProcSet findresource begin\n\
1040            12 dict begin\n\
1041            begincmap\n\
1042            /CMapName /Adobe-Identity-UCS def\n\
1043            /CMapType 2 def\n\
1044            1 begincodespacerange\n\
1045            <0000> <FFFF>\n\
1046            endcodespacerange\n\
1047            2 beginbfchar\n\
1048            <4E2D> <4E2D>\n\
1049            <6587> <6587>\n\
1050            endbfchar\n\
1051            endcmap\n";
1052        let tounicode_stream = Stream::new(dictionary! {}, tounicode_data.to_vec());
1053        let tounicode_id = doc.add_object(Object::Stream(tounicode_stream));
1054
1055        // CIDFont dictionary
1056        let cid_font_dict = dictionary! {
1057            "Type" => "Font",
1058            "Subtype" => "CIDFontType2",
1059            "BaseFont" => "MSGothic",
1060            "DW" => Object::Integer(1000),
1061            "CIDToGIDMap" => "Identity",
1062            "CIDSystemInfo" => Object::Dictionary(dictionary! {
1063                "Registry" => Object::String("Adobe".as_bytes().to_vec(), lopdf::StringFormat::Literal),
1064                "Ordering" => Object::String("Identity".as_bytes().to_vec(), lopdf::StringFormat::Literal),
1065                "Supplement" => Object::Integer(0),
1066            }),
1067        };
1068        let cid_font_id = doc.add_object(Object::Dictionary(cid_font_dict));
1069
1070        // Type0 font dictionary with Identity-H encoding
1071        let type0_dict = dictionary! {
1072            "Type" => "Font",
1073            "Subtype" => "Type0",
1074            "BaseFont" => "MSGothic",
1075            "Encoding" => "Identity-H",
1076            "DescendantFonts" => Object::Array(vec![Object::Reference(cid_font_id)]),
1077            "ToUnicode" => Object::Reference(tounicode_id),
1078        };
1079        let type0_id = doc.add_object(Object::Dictionary(type0_dict));
1080
1081        // Resources with Font entry
1082        dictionary! {
1083            "Font" => Object::Dictionary(dictionary! {
1084                "F1" => Object::Reference(type0_id),
1085            }),
1086        }
1087    }
1088
1089    #[test]
1090    fn interpret_cid_font_identity_h_two_byte_codes() {
1091        let mut doc = lopdf::Document::with_version("1.5");
1092        let resources = make_cid_font_resources(&mut doc);
1093
1094        // Content stream: use CID font F1 and show 2-byte character codes
1095        // 0x4E2D = 中, 0x6587 = 文
1096        let stream = b"BT /F1 12 Tf <4E2D6587> Tj ET";
1097
1098        let mut handler = CollectingHandler::new();
1099        let mut gstate = InterpreterState::new();
1100        let mut tstate = TextState::new();
1101
1102        interpret_content_stream(
1103            &doc,
1104            stream,
1105            &resources,
1106            &mut handler,
1107            &default_options(),
1108            0,
1109            &mut gstate,
1110            &mut tstate,
1111        )
1112        .unwrap();
1113
1114        // Should produce 2 characters (2-byte codes), not 4 (1-byte)
1115        assert_eq!(handler.chars.len(), 2);
1116        assert_eq!(handler.chars[0].char_code, 0x4E2D);
1117        assert_eq!(handler.chars[1].char_code, 0x6587);
1118        // Unicode should be resolved via ToUnicode CMap
1119        assert_eq!(handler.chars[0].unicode, Some("中".to_string()));
1120        assert_eq!(handler.chars[1].unicode, Some("文".to_string()));
1121        assert_eq!(handler.chars[0].font_name, "MSGothic");
1122    }
1123
1124    #[test]
1125    fn interpret_cid_font_tj_array_two_byte_codes() {
1126        let mut doc = lopdf::Document::with_version("1.5");
1127        let resources = make_cid_font_resources(&mut doc);
1128
1129        // TJ array with 2-byte CID strings and adjustments
1130        let stream = b"BT /F1 12 Tf [<4E2D> -100 <6587>] TJ ET";
1131
1132        let mut handler = CollectingHandler::new();
1133        let mut gstate = InterpreterState::new();
1134        let mut tstate = TextState::new();
1135
1136        interpret_content_stream(
1137            &doc,
1138            stream,
1139            &resources,
1140            &mut handler,
1141            &default_options(),
1142            0,
1143            &mut gstate,
1144            &mut tstate,
1145        )
1146        .unwrap();
1147
1148        assert_eq!(handler.chars.len(), 2);
1149        assert_eq!(handler.chars[0].char_code, 0x4E2D);
1150        assert_eq!(handler.chars[1].char_code, 0x6587);
1151    }
1152
1153    #[test]
1154    fn interpret_subset_font_name_stripped() {
1155        let mut doc = lopdf::Document::with_version("1.5");
1156
1157        use lopdf::{Object, Stream, dictionary};
1158
1159        // Create a ToUnicode CMap
1160        let tounicode_data = b"\
1161            beginbfchar\n\
1162            <4E2D> <4E2D>\n\
1163            endbfchar\n";
1164        let tounicode_stream = Stream::new(dictionary! {}, tounicode_data.to_vec());
1165        let tounicode_id = doc.add_object(Object::Stream(tounicode_stream));
1166
1167        // CIDFont with subset prefix
1168        let cid_font_dict = dictionary! {
1169            "Type" => "Font",
1170            "Subtype" => "CIDFontType2",
1171            "BaseFont" => "ABCDEF+MSGothic",
1172            "DW" => Object::Integer(1000),
1173            "CIDToGIDMap" => "Identity",
1174        };
1175        let cid_font_id = doc.add_object(Object::Dictionary(cid_font_dict));
1176
1177        // Type0 font with subset prefix in BaseFont
1178        let type0_dict = dictionary! {
1179            "Type" => "Font",
1180            "Subtype" => "Type0",
1181            "BaseFont" => "ABCDEF+MSGothic",
1182            "Encoding" => "Identity-H",
1183            "DescendantFonts" => Object::Array(vec![Object::Reference(cid_font_id)]),
1184            "ToUnicode" => Object::Reference(tounicode_id),
1185        };
1186        let type0_id = doc.add_object(Object::Dictionary(type0_dict));
1187
1188        let resources = dictionary! {
1189            "Font" => Object::Dictionary(dictionary! {
1190                "F1" => Object::Reference(type0_id),
1191            }),
1192        };
1193
1194        let stream = b"BT /F1 12 Tf <4E2D> Tj ET";
1195
1196        let mut handler = CollectingHandler::new();
1197        let mut gstate = InterpreterState::new();
1198        let mut tstate = TextState::new();
1199
1200        interpret_content_stream(
1201            &doc,
1202            stream,
1203            &resources,
1204            &mut handler,
1205            &default_options(),
1206            0,
1207            &mut gstate,
1208            &mut tstate,
1209        )
1210        .unwrap();
1211
1212        assert_eq!(handler.chars.len(), 1);
1213        // Subset prefix should be stripped
1214        assert_eq!(handler.chars[0].font_name, "MSGothic");
1215    }
1216
1217    /// Build resources for Identity-V (vertical writing mode).
1218    fn make_cid_font_resources_identity_v(doc: &mut lopdf::Document) -> lopdf::Dictionary {
1219        use lopdf::{Object, Stream, dictionary};
1220
1221        let tounicode_data = b"\
1222            beginbfchar\n\
1223            <4E2D> <4E2D>\n\
1224            endbfchar\n";
1225        let tounicode_stream = Stream::new(dictionary! {}, tounicode_data.to_vec());
1226        let tounicode_id = doc.add_object(Object::Stream(tounicode_stream));
1227
1228        let cid_font_dict = dictionary! {
1229            "Type" => "Font",
1230            "Subtype" => "CIDFontType2",
1231            "BaseFont" => "MSGothic",
1232            "DW" => Object::Integer(1000),
1233            "CIDToGIDMap" => "Identity",
1234        };
1235        let cid_font_id = doc.add_object(Object::Dictionary(cid_font_dict));
1236
1237        let type0_dict = dictionary! {
1238            "Type" => "Font",
1239            "Subtype" => "Type0",
1240            "BaseFont" => "MSGothic",
1241            "Encoding" => "Identity-V",
1242            "DescendantFonts" => Object::Array(vec![Object::Reference(cid_font_id)]),
1243            "ToUnicode" => Object::Reference(tounicode_id),
1244        };
1245        let type0_id = doc.add_object(Object::Dictionary(type0_dict));
1246
1247        dictionary! {
1248            "Font" => Object::Dictionary(dictionary! {
1249                "F1" => Object::Reference(type0_id),
1250            }),
1251        }
1252    }
1253
1254    #[test]
1255    fn interpret_cid_font_identity_v_detected() {
1256        let mut doc = lopdf::Document::with_version("1.5");
1257        let resources = make_cid_font_resources_identity_v(&mut doc);
1258
1259        // Show a CID character with Identity-V encoding
1260        let stream = b"BT /F1 12 Tf <4E2D> Tj ET";
1261
1262        let mut handler = CollectingHandler::new();
1263        let mut gstate = InterpreterState::new();
1264        let mut tstate = TextState::new();
1265
1266        interpret_content_stream(
1267            &doc,
1268            stream,
1269            &resources,
1270            &mut handler,
1271            &default_options(),
1272            0,
1273            &mut gstate,
1274            &mut tstate,
1275        )
1276        .unwrap();
1277
1278        // Should still produce characters (Identity-V uses same CID=charcode mapping)
1279        assert_eq!(handler.chars.len(), 1);
1280        assert_eq!(handler.chars[0].char_code, 0x4E2D);
1281        assert_eq!(handler.chars[0].unicode, Some("中".to_string()));
1282    }
1283
1284    // --- Warning emission tests ---
1285
1286    #[test]
1287    fn interpret_missing_font_emits_warning() {
1288        let doc = lopdf::Document::with_version("1.5");
1289        let resources = empty_resources(); // No fonts defined
1290        // Use font F1 which is not in resources
1291        let stream = b"BT /F1 12 Tf (Hi) Tj ET";
1292
1293        let mut handler = CollectingHandler::new();
1294        let mut gstate = InterpreterState::new();
1295        let mut tstate = TextState::new();
1296
1297        interpret_content_stream(
1298            &doc,
1299            stream,
1300            &resources,
1301            &mut handler,
1302            &default_options(),
1303            0,
1304            &mut gstate,
1305            &mut tstate,
1306        )
1307        .unwrap();
1308
1309        // Should emit a warning about missing font
1310        assert!(!handler.warnings.is_empty());
1311        assert!(
1312            handler.warnings[0]
1313                .description
1314                .contains("font not found in page resources"),
1315            "expected 'font not found' warning, got: {}",
1316            handler.warnings[0].description
1317        );
1318        assert_eq!(
1319            handler.warnings[0].font_name,
1320            Some("F1".to_string()),
1321            "warning should include font name"
1322        );
1323        assert!(
1324            handler.warnings[0].operator_index.is_some(),
1325            "warning should include operator index"
1326        );
1327
1328        // Characters should still be extracted (using default metrics)
1329        assert_eq!(handler.chars.len(), 2);
1330    }
1331
1332    #[test]
1333    fn interpret_no_warnings_when_collection_disabled() {
1334        let doc = lopdf::Document::with_version("1.5");
1335        let resources = empty_resources();
1336        let stream = b"BT /F1 12 Tf (Hi) Tj ET";
1337
1338        let mut handler = CollectingHandler::new();
1339        let mut gstate = InterpreterState::new();
1340        let mut tstate = TextState::new();
1341
1342        let opts = ExtractOptions {
1343            collect_warnings: false,
1344            ..ExtractOptions::default()
1345        };
1346
1347        interpret_content_stream(
1348            &doc,
1349            stream,
1350            &resources,
1351            &mut handler,
1352            &opts,
1353            0,
1354            &mut gstate,
1355            &mut tstate,
1356        )
1357        .unwrap();
1358
1359        // No warnings should be collected
1360        assert!(handler.warnings.is_empty());
1361
1362        // Characters should still be extracted normally
1363        assert_eq!(handler.chars.len(), 2);
1364    }
1365
1366    #[test]
1367    fn interpret_warnings_do_not_affect_output() {
1368        let doc = lopdf::Document::with_version("1.5");
1369        let resources = empty_resources();
1370        let stream = b"BT /F1 12 Tf (AB) Tj ET";
1371
1372        // With warnings enabled
1373        let mut handler_on = CollectingHandler::new();
1374        let mut gstate_on = InterpreterState::new();
1375        let mut tstate_on = TextState::new();
1376        let opts_on = ExtractOptions {
1377            collect_warnings: true,
1378            ..ExtractOptions::default()
1379        };
1380        interpret_content_stream(
1381            &doc,
1382            stream,
1383            &resources,
1384            &mut handler_on,
1385            &opts_on,
1386            0,
1387            &mut gstate_on,
1388            &mut tstate_on,
1389        )
1390        .unwrap();
1391
1392        // With warnings disabled
1393        let mut handler_off = CollectingHandler::new();
1394        let mut gstate_off = InterpreterState::new();
1395        let mut tstate_off = TextState::new();
1396        let opts_off = ExtractOptions {
1397            collect_warnings: false,
1398            ..ExtractOptions::default()
1399        };
1400        interpret_content_stream(
1401            &doc,
1402            stream,
1403            &resources,
1404            &mut handler_off,
1405            &opts_off,
1406            0,
1407            &mut gstate_off,
1408            &mut tstate_off,
1409        )
1410        .unwrap();
1411
1412        // Same output regardless of warning collection
1413        assert_eq!(handler_on.chars.len(), handler_off.chars.len());
1414        for (a, b) in handler_on.chars.iter().zip(handler_off.chars.iter()) {
1415            assert_eq!(a.char_code, b.char_code);
1416        }
1417    }
1418
1419    #[test]
1420    fn interpret_valid_font_no_warnings() {
1421        let mut doc = lopdf::Document::with_version("1.5");
1422        let resources = make_cid_font_resources(&mut doc);
1423        let stream = b"BT /F1 12 Tf <4E2D> Tj ET";
1424
1425        let mut handler = CollectingHandler::new();
1426        let mut gstate = InterpreterState::new();
1427        let mut tstate = TextState::new();
1428
1429        interpret_content_stream(
1430            &doc,
1431            stream,
1432            &resources,
1433            &mut handler,
1434            &default_options(),
1435            0,
1436            &mut gstate,
1437            &mut tstate,
1438        )
1439        .unwrap();
1440
1441        // Valid font should not produce warnings
1442        assert!(
1443            handler.warnings.is_empty(),
1444            "expected no warnings for valid font, got: {:?}",
1445            handler.warnings
1446        );
1447        assert_eq!(handler.chars.len(), 1);
1448    }
1449}