use harumi::Document;
const FONT: &[u8] = include_bytes!("fixtures/NotoSansJP-Regular.ttf");
#[allow(dead_code)]
fn split_first_tj(pdf_bytes: &[u8], split_at_char: usize) -> Vec<u8> {
let mut ldoc = lopdf::Document::load_from(pdf_bytes).unwrap();
let page_id = *ldoc.get_pages().values().next().unwrap();
let contents_val = {
let obj = ldoc.get_object(page_id).unwrap();
obj.as_dict().unwrap().get(b"Contents").unwrap().clone()
};
let stream_ids: Vec<lopdf::ObjectId> = match contents_val {
lopdf::Object::Reference(id) => vec![id],
lopdf::Object::Array(arr) => arr
.into_iter()
.filter_map(|o| {
if let lopdf::Object::Reference(id) = o {
Some(id)
} else {
None
}
})
.collect(),
_ => panic!("unexpected Contents type"),
};
for stream_id in stream_ids {
let stream_obj = ldoc.get_object(stream_id).unwrap().clone();
let Ok(stream) = stream_obj.as_stream() else {
continue;
};
let mut owned = stream.clone();
if owned.dict.get(b"Filter").is_ok() {
owned.decompress().ok();
}
let Ok(content_str) = std::str::from_utf8(&owned.content) else {
continue;
};
if let Some(new_content) = try_split_hex_tj(content_str, split_at_char) {
ldoc.objects.insert(
stream_id,
lopdf::Object::Stream(lopdf::Stream::new(
lopdf::Dictionary::new(),
new_content.into_bytes(),
)),
);
break;
}
}
let mut out = Vec::new();
ldoc.save_to(&mut out).unwrap();
out
}
#[allow(dead_code)]
fn try_split_hex_tj(content: &str, split_at_char: usize) -> Option<String> {
let tj_idx = content.find("> Tj")?;
let lt_idx = content[..tj_idx].rfind('<')?;
let hex_str = &content[lt_idx + 1..tj_idx];
if !hex_str.len().is_multiple_of(4) || hex_str.is_empty() {
return None;
}
let split_pos = split_at_char * 4;
if split_pos == 0 || split_pos >= hex_str.len() {
return None;
}
Some(format!(
"{}<{}> Tj\n<{}> Tj{}",
&content[..lt_idx],
&hex_str[..split_pos],
&hex_str[split_pos..],
&content[tj_idx + 4..], ))
}
fn pdf_with_text(text: &str) -> Vec<u8> {
let mut doc = Document::new((595.0, 842.0)).unwrap();
let font = doc.embed_font(FONT).unwrap();
doc.page(1)
.unwrap()
.add_invisible_text(text, font, [72.0, 700.0], 14.0)
.unwrap();
doc.save_to_bytes().unwrap()
}
#[test]
fn replace_text_resubset_basic() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let count = doc
.page(1)
.unwrap()
.replace_text_resubset("Hello", "世界", FONT)
.unwrap();
assert_eq!(count, 1);
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(all.contains("世界"), "expected '世界' in output: {}", all);
}
#[test]
fn replace_text_resubset_no_match() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let count = doc
.page(1)
.unwrap()
.replace_text_resubset("Goodbye", "世界", FONT)
.unwrap();
assert_eq!(count, 0);
}
#[test]
fn replace_text_resubset_empty_replacement() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let count = doc
.page(1)
.unwrap()
.replace_text_resubset("Hello", "", FONT)
.unwrap();
assert_eq!(count, 1);
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
!all.contains("Hello"),
"expected 'Hello' to be removed: {}",
all
);
}
#[test]
fn replace_text_preserve_basic() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let count = doc
.page(1)
.unwrap()
.replace_text_preserve_font("Hello", "Helo")
.unwrap();
assert_eq!(count, 1);
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(all.contains("Helo"), "expected 'Helo' in output: {}", all);
}
#[test]
fn replace_text_preserve_no_match() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let count = doc
.page(1)
.unwrap()
.replace_text_preserve_font("Goodbye", "Hi")
.unwrap();
assert_eq!(count, 0);
}
#[test]
fn replace_text_preserve_empty_replacement() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let count = doc
.page(1)
.unwrap()
.replace_text_preserve_font("Hello", "")
.unwrap();
assert_eq!(count, 1);
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
!all.contains("Hello"),
"expected 'Hello' to be removed: {}",
all
);
}
#[test]
fn replace_text_preserve_char_not_in_font() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let err = doc
.page(1)
.unwrap()
.replace_text_preserve_font("Hello", "Привет")
.unwrap_err();
assert!(matches!(err, harumi::Error::FontCharNotMapped { .. }));
}
#[test]
fn replace_text_resubset_japanese() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let count = doc
.page(1)
.unwrap()
.replace_text_resubset("Hello", "日本語テスト", FONT)
.unwrap();
assert_eq!(count, 1);
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
all.contains("日本語"),
"expected '日本語' in output: {:?}",
all
);
}
#[test]
fn replace_text_resubset_chinese() {
let mut doc = Document::new((595.0, 842.0)).unwrap();
let font = doc.embed_font(FONT).unwrap();
doc.page(1)
.unwrap()
.add_invisible_text("Hello", font, [72.0, 700.0], 14.0)
.unwrap();
let initial = doc.save_to_bytes().unwrap();
let mut doc_pf = Document::from_bytes(&initial).unwrap();
let err = doc_pf
.page(1)
.unwrap()
.replace_text_preserve_font("Hello", "中文字")
.unwrap_err();
assert!(matches!(err, harumi::Error::FontCharNotMapped { .. }));
let mut doc2 = Document::from_bytes(&initial).unwrap();
let count = doc2
.page(1)
.unwrap()
.replace_text_resubset("Hello", "中文字", FONT)
.unwrap();
assert_eq!(count, 1);
let out = doc2.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
all.contains("中文字"),
"expected '中文字' in output: {:?}",
all
);
}
#[test]
fn replace_text_resubset_with_wrap_simple() {
let initial = pdf_with_text("Hi");
let mut doc = Document::from_bytes(&initial).unwrap();
let replacement = "This is a much longer replacement text";
let count = doc
.page(1)
.unwrap()
.replace_text_resubset_with_wrap("Hi", replacement, FONT, 14.4)
.unwrap();
assert_eq!(count, 1, "Expected 1 match for 'Hi'");
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
all.contains("This") && all.contains("longer"),
"expected wrapped text components in output: {}",
all
);
}
#[test]
fn replace_text_resubset_with_wrap_cjk() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let replacement = "日本語テスト文字列は複数行に折り返されるはずです";
let count = doc
.page(1)
.unwrap()
.replace_text_resubset_with_wrap("Hello", replacement, FONT, 14.4)
.unwrap();
assert_eq!(count, 1);
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
all.contains("日本語") && all.contains("文字"),
"expected Japanese text components in output: {}",
all
);
}
#[test]
fn replace_text_resubset_with_wrap_custom_line_height() {
let initial = pdf_with_text("X");
let mut doc = Document::from_bytes(&initial).unwrap();
let replacement = "A B C D E F G H I J K L M N O P";
let count = doc
.page(1)
.unwrap()
.replace_text_resubset_with_wrap("X", replacement, FONT, 20.0)
.unwrap();
assert_eq!(count, 1);
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
all.contains("A") && all.contains("P"),
"expected wrapped text in output: {}",
all
);
}
#[test]
fn replace_text_resubset_with_wrap_invalid_line_height_nan() {
let initial = pdf_with_text("Hi");
let mut doc = Document::from_bytes(&initial).unwrap();
let result =
doc.page(1)
.unwrap()
.replace_text_resubset_with_wrap("Hi", "Replacement", FONT, f32::NAN);
assert!(
result.is_err(),
"Expected error for NaN line_height, got {:?}",
result
);
}
#[test]
fn replace_text_resubset_with_wrap_invalid_line_height_negative() {
let initial = pdf_with_text("Hi");
let mut doc = Document::from_bytes(&initial).unwrap();
let result =
doc.page(1)
.unwrap()
.replace_text_resubset_with_wrap("Hi", "Replacement", FONT, -5.0);
assert!(
result.is_err(),
"Expected error for negative line_height, got {:?}",
result
);
}
#[test]
fn replace_text_resubset_with_wrap_zero_line_height_defaults_to_14_4() {
let initial = pdf_with_text("Hi");
let mut doc = Document::from_bytes(&initial).unwrap();
let replacement = "A longer replacement text";
let count = doc
.page(1)
.unwrap()
.replace_text_resubset_with_wrap("Hi", replacement, FONT, 0.0)
.unwrap();
assert_eq!(count, 1, "Expected wrap with default line_height=14.4");
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
all.contains("longer"),
"expected replacement text in output: {}",
all
);
}
#[test]
fn replace_text_resubset_with_wrap_no_match_returns_zero() {
let initial = pdf_with_text("Hello");
let mut doc = Document::from_bytes(&initial).unwrap();
let count = doc
.page(1)
.unwrap()
.replace_text_resubset_with_wrap("NotPresent", "Replacement", FONT, 14.4)
.unwrap();
assert_eq!(count, 0, "Expected no matches");
}
#[test]
fn replace_text_resubset_with_wrap_single_line_fits() {
let initial = pdf_with_text("Hi");
let mut doc = Document::from_bytes(&initial).unwrap();
let count = doc
.page(1)
.unwrap()
.replace_text_resubset_with_wrap("Hi", "OK", FONT, 14.4)
.unwrap();
assert_eq!(count, 1);
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let all: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(all.contains("OK"), "expected 'OK' in output: {}", all);
}
#[test]
fn replace_text_in_form_xobject_inherited_resources() {
use lopdf::{Object, Stream, StringFormat};
let noto = FONT;
let cmap_bytes = b"/CIDInit /ProcSet findresource begin\n\
12 dict begin\n\
begincmap\n\
/CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> def\n\
/CMapName /Adobe-Identity-H def\n\
/CMapType 1 def\n\
2 beginbfchar\n\
<0048> <0048>\n\
<0069> <0069>\n\
endbfchar\n\
endcmap\n\
end end\n"
.to_vec();
let mut ldoc = lopdf::Document::new();
let cmap_id = ldoc.add_object(Object::Stream(Stream::new(lopdf::Dictionary::new(), cmap_bytes)));
let mut cidfont_d = lopdf::Dictionary::new();
cidfont_d.set("Type", Object::Name(b"Font".to_vec()));
cidfont_d.set("Subtype", Object::Name(b"CIDFontType2".to_vec()));
cidfont_d.set("BaseFont", Object::Name(b"TestCIDFont".to_vec()));
{
let mut cidsys = lopdf::Dictionary::new();
cidsys.set("Registry", Object::String(b"Adobe".to_vec(), StringFormat::Literal));
cidsys.set("Ordering", Object::String(b"Identity".to_vec(), StringFormat::Literal));
cidsys.set("Supplement", Object::Integer(0));
cidfont_d.set("CIDSystemInfo", Object::Dictionary(cidsys));
}
cidfont_d.set("DW", Object::Integer(1000));
let cidfont_id = ldoc.add_object(Object::Dictionary(cidfont_d));
let mut font_d = lopdf::Dictionary::new();
font_d.set("Type", Object::Name(b"Font".to_vec()));
font_d.set("Subtype", Object::Name(b"Type0".to_vec()));
font_d.set("BaseFont", Object::Name(b"TestCIDFont".to_vec()));
font_d.set("Encoding", Object::Name(b"Identity-H".to_vec()));
font_d.set("DescendantFonts", Object::Array(vec![Object::Reference(cidfont_id)]));
font_d.set("ToUnicode", Object::Reference(cmap_id));
let existing_font_id = ldoc.add_object(Object::Dictionary(font_d));
let mut xobj_font_d = lopdf::Dictionary::new();
xobj_font_d.set("F1", Object::Reference(existing_font_id));
let mut xobj_res = lopdf::Dictionary::new();
xobj_res.set("Font", Object::Dictionary(xobj_font_d));
let mut xobj_d = lopdf::Dictionary::new();
xobj_d.set("Type", Object::Name(b"XObject".to_vec()));
xobj_d.set("Subtype", Object::Name(b"Form".to_vec()));
xobj_d.set(
"BBox",
Object::Array(vec![
Object::Integer(0), Object::Integer(0),
Object::Integer(595), Object::Integer(842),
]),
);
xobj_d.set("Resources", Object::Dictionary(xobj_res));
let xobj_id = ldoc.add_object(Object::Stream(Stream::new(
xobj_d,
b"BT /F1 12 Tf <00480069> Tj ET".to_vec(),
)));
let content_id = ldoc.add_object(Object::Stream(Stream::new(
lopdf::Dictionary::new(),
b"q Q".to_vec(),
)));
let mut page_d = lopdf::Dictionary::new();
page_d.set("Type", Object::Name(b"Page".to_vec()));
page_d.set(
"MediaBox",
Object::Array(vec![
Object::Integer(0), Object::Integer(0),
Object::Integer(595), Object::Integer(842),
]),
);
page_d.set("Contents", Object::Reference(content_id));
let page_id = ldoc.add_object(Object::Dictionary(page_d));
let mut xobj_dict = lopdf::Dictionary::new();
xobj_dict.set("X1", Object::Reference(xobj_id));
let mut pages_res = lopdf::Dictionary::new();
pages_res.set("XObject", Object::Dictionary(xobj_dict));
let mut pages_d = lopdf::Dictionary::new();
pages_d.set("Type", Object::Name(b"Pages".to_vec()));
pages_d.set("Kids", Object::Array(vec![Object::Reference(page_id)]));
pages_d.set("Count", Object::Integer(1));
pages_d.set("Resources", Object::Dictionary(pages_res));
let pages_id = ldoc.add_object(Object::Dictionary(pages_d));
if let Ok(obj) = ldoc.get_object_mut(page_id) {
if let Ok(d) = obj.as_dict_mut() {
d.set("Parent", Object::Reference(pages_id));
}
}
let mut catalog = lopdf::Dictionary::new();
catalog.set("Type", Object::Name(b"Catalog".to_vec()));
catalog.set("Pages", Object::Reference(pages_id));
let catalog_id = ldoc.add_object(Object::Dictionary(catalog));
ldoc.trailer.set("Root", Object::Reference(catalog_id));
let mut raw = Vec::new();
ldoc.save_to(&mut raw).unwrap();
let mut doc = Document::from_bytes(&raw).unwrap();
let before: String = doc
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(before.contains("Hi"), "P0 extraction failed, got: {before:?}");
let font = doc.embed_font(noto).unwrap();
let count = doc.page(1).unwrap().replace_text("Hi", "Bye", font).unwrap();
assert!(count > 0, "replace_text returned 0 matches in XObject stream");
let out = doc.save_to_bytes().unwrap();
let check = Document::from_bytes(&out).unwrap();
let after: String = check
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(!after.contains("Hi"), "old text still present after replace: {after:?}");
assert!(after.contains("Bye"), "new text not found after replace: {after:?}");
}
fn make_per_char_pdf(text: &str) -> Vec<u8> {
use lopdf::{Dictionary, Document as LDoc, Object, Stream};
let mut font_d = Dictionary::new();
font_d.set("Type", Object::Name(b"Font".to_vec()));
font_d.set("Subtype", Object::Name(b"Type1".to_vec()));
font_d.set("BaseFont", Object::Name(b"Helvetica".to_vec()));
let mut content = Vec::<u8>::new();
for (i, ch) in text.chars().enumerate() {
let x = 72.0 + i as f32 * 8.0;
let line = format!("BT /F1 12 Tf {} 700 Td ({ch}) Tj ET\n", x);
content.extend_from_slice(line.as_bytes());
}
let mut doc = LDoc::new();
let font_id = doc.add_object(Object::Dictionary(font_d));
let content_id =
doc.add_object(Object::Stream(Stream::new(Dictionary::new(), content)));
let mut font_dict = Dictionary::new();
font_dict.set("F1", Object::Reference(font_id));
let mut page_res = Dictionary::new();
page_res.set("Font", Object::Dictionary(font_dict));
let mut page_d = Dictionary::new();
page_d.set("Type", Object::Name(b"Page".to_vec()));
page_d.set(
"MediaBox",
Object::Array(vec![
Object::Integer(0),
Object::Integer(0),
Object::Integer(595),
Object::Integer(842),
]),
);
page_d.set("Resources", Object::Dictionary(page_res));
page_d.set("Contents", Object::Reference(content_id));
let page_id = doc.add_object(Object::Dictionary(page_d));
let mut pages_d = Dictionary::new();
pages_d.set("Type", Object::Name(b"Pages".to_vec()));
pages_d.set("Kids", Object::Array(vec![Object::Reference(page_id)]));
pages_d.set("Count", Object::Integer(1));
let pages_id = doc.add_object(Object::Dictionary(pages_d));
if let Ok(obj) = doc.get_object_mut(page_id)
&& let Ok(d) = obj.as_dict_mut()
{
d.set("Parent", Object::Reference(pages_id));
}
let mut catalog = Dictionary::new();
catalog.set("Type", Object::Name(b"Catalog".to_vec()));
catalog.set("Pages", Object::Reference(pages_id));
let catalog_id = doc.add_object(Object::Dictionary(catalog));
doc.trailer.set("Root", Object::Reference(catalog_id));
let mut buf = Vec::new();
doc.save_to(&mut buf).unwrap();
buf
}
#[test]
fn replace_text_fragments_suppresses_source_ops() {
let pdf = make_per_char_pdf("Hello");
let mut doc = Document::from_bytes(&pdf).unwrap();
let frags = doc.extract_text_runs(1).unwrap();
assert_eq!(frags.len(), 5, "expected 5 per-char fragments, got: {frags:?}");
for f in &frags {
assert!(
f.source_stream.is_some(),
"fragment {:?} has no source_stream",
f.text
);
assert!(
f.source_op_start.is_some(),
"fragment {:?} has no source_op_start",
f.text
);
}
let font = doc.embed_font(FONT).unwrap();
let suppressed = doc
.page(1)
.unwrap()
.replace_text_fragments(&frags, "World", font)
.unwrap();
assert_eq!(suppressed, 5, "expected 5 ops suppressed, got {suppressed}");
let out = doc.save_to_bytes().unwrap();
let reloaded = Document::from_bytes(&out).unwrap();
let after: String = reloaded
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
!after.contains("Hello"),
"original text still present after replace_text_fragments: {after:?}"
);
assert!(
after.contains("World"),
"new text not found after replace_text_fragments: {after:?}"
);
}
#[test]
fn replace_text_fragments_xobject() {
use lopdf::{Dictionary, Document as LDoc, Object, Stream};
let mut ldoc = LDoc::new();
let mut font_d = Dictionary::new();
font_d.set("Type", Object::Name(b"Font".to_vec()));
font_d.set("Subtype", Object::Name(b"Type1".to_vec()));
font_d.set("BaseFont", Object::Name(b"Helvetica".to_vec()));
let font_id = ldoc.add_object(Object::Dictionary(font_d));
let xobj_content = b"BT /F1 12 Tf 0 0 Td (Hello) Tj ET".to_vec();
let mut xobj_font_d = Dictionary::new();
xobj_font_d.set("F1", Object::Reference(font_id));
let mut xobj_res = Dictionary::new();
xobj_res.set("Font", Object::Dictionary(xobj_font_d));
let mut xobj_d = Dictionary::new();
xobj_d.set("Type", Object::Name(b"XObject".to_vec()));
xobj_d.set("Subtype", Object::Name(b"Form".to_vec()));
xobj_d.set("BBox", Object::Array(vec![
Object::Integer(0), Object::Integer(0),
Object::Integer(595), Object::Integer(842),
]));
xobj_d.set("Resources", Object::Dictionary(xobj_res));
let xobj_id = ldoc.add_object(Object::Stream(Stream::new(xobj_d, xobj_content)));
let page_content_id = ldoc.add_object(Object::Stream(Stream::new(
Dictionary::new(),
b"/X1 Do".to_vec(),
)));
let mut xobj_ref_d = Dictionary::new();
xobj_ref_d.set("X1", Object::Reference(xobj_id));
let mut page_res = Dictionary::new();
page_res.set("XObject", Object::Dictionary(xobj_ref_d));
let mut page_d = Dictionary::new();
page_d.set("Type", Object::Name(b"Page".to_vec()));
page_d.set("MediaBox", Object::Array(vec![
Object::Integer(0), Object::Integer(0),
Object::Integer(595), Object::Integer(842),
]));
page_d.set("Resources", Object::Dictionary(page_res));
page_d.set("Contents", Object::Reference(page_content_id));
let page_id = ldoc.add_object(Object::Dictionary(page_d));
let mut pages_d = Dictionary::new();
pages_d.set("Type", Object::Name(b"Pages".to_vec()));
pages_d.set("Kids", Object::Array(vec![Object::Reference(page_id)]));
pages_d.set("Count", Object::Integer(1));
let pages_id = ldoc.add_object(Object::Dictionary(pages_d));
if let Ok(obj) = ldoc.get_object_mut(page_id) && let Ok(d) = obj.as_dict_mut() {
d.set("Parent", Object::Reference(pages_id));
}
let mut catalog = Dictionary::new();
catalog.set("Type", Object::Name(b"Catalog".to_vec()));
catalog.set("Pages", Object::Reference(pages_id));
let catalog_id = ldoc.add_object(Object::Dictionary(catalog));
ldoc.trailer.set("Root", Object::Reference(catalog_id));
let mut buf = Vec::new();
ldoc.save_to(&mut buf).unwrap();
let mut doc = harumi::Document::from_bytes(&buf).unwrap();
let frags = doc.extract_text_runs(1).unwrap();
assert!(!frags.is_empty(), "expected fragments from XObject");
for f in &frags {
assert!(
f.source_xobject.is_some(),
"XObject fragment missing source_xobject: {:?}",
f.text
);
assert!(
f.source_stream.is_none(),
"XObject fragment should not have source_stream"
);
}
let font = doc.embed_font(FONT).unwrap();
let suppressed = doc.page(1).unwrap()
.replace_text_fragments(&frags, "World", font)
.unwrap();
assert!(suppressed > 0, "expected at least 1 op suppressed, got 0");
let out = doc.save_to_bytes().unwrap();
let reloaded = harumi::Document::from_bytes(&out).unwrap();
let after: String = reloaded
.extract_text_runs(1)
.unwrap()
.iter()
.map(|f| f.text.as_str())
.collect::<Vec<_>>()
.join("");
assert!(
!after.contains("Hello"),
"XObject text still present after replace: {after:?}"
);
assert!(
after.contains("World"),
"replacement text not found: {after:?}"
);
}