use super::region::RegionSet;
use super::text_prune::{prune_run, Glyph, TextPruneResult};
use crate::content::graphics_state::{GraphicsStateStack, Matrix};
use crate::content::operators::{Operator, TextElement};
use crate::geometry::Rect;
pub trait FontMetrics {
fn width(&self, font: &str, code: u32) -> f32;
fn decode(&self, _font: &str, s: &[u8]) -> Vec<(u32, Vec<u8>)> {
s.iter().map(|&b| (b as u32, vec![b])).collect()
}
fn is_word_space(&self, _font: &str, code: u32) -> bool {
code == 32
}
fn ascent_descent(&self, _font: &str) -> (f32, f32) {
(1.0, -0.30)
}
fn is_simple(&self, _font: &str) -> bool {
true
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct RemovedGlyphs {
pub codes: Vec<(u32, u32)>,
}
#[derive(Debug, Clone, Default, PartialEq)]
pub struct TextEngineResult {
pub operators: Vec<Operator>,
pub glyphs_removed: usize,
pub bytes_removed: u64,
pub removed_codes: Vec<(u32, u32)>,
pub unsupported_font: bool,
}
fn font_id(name: &str) -> u32 {
let mut h: u32 = 0x811c_9dc5;
for b in name.as_bytes() {
h ^= *b as u32;
h = h.wrapping_mul(0x0100_0193);
}
h
}
fn text_rendering_matrix(tfs: f32, th: f32, trise: f32, tm: &Matrix, ctm: &Matrix) -> Matrix {
let params = Matrix {
a: tfs * th,
b: 0.0,
c: 0.0,
d: tfs,
e: 0.0,
f: trise,
};
params.multiply(tm).multiply(ctm)
}
fn glyph_box(w0: f32, ascent: f32, descent: f32, trm: &Matrix) -> Rect {
let corners = [
trm.transform_point(0.0, descent),
trm.transform_point(w0, descent),
trm.transform_point(w0, ascent),
trm.transform_point(0.0, ascent),
];
let mut x0 = f32::INFINITY;
let mut y0 = f32::INFINITY;
let mut x1 = f32::NEG_INFINITY;
let mut y1 = f32::NEG_INFINITY;
for p in corners {
x0 = x0.min(p.x);
y0 = y0.min(p.y);
x1 = x1.max(p.x);
y1 = y1.max(p.y);
}
Rect::from_points(x0, y0, x1, y1)
}
#[derive(Clone)]
struct TextState {
tm: Matrix,
tlm: Matrix,
tc: f32,
tw: f32,
th: f32,
tfs: f32,
trise: f32,
leading: f32,
font: String,
}
impl Default for TextState {
fn default() -> Self {
Self {
tm: Matrix::identity(),
tlm: Matrix::identity(),
tc: 0.0,
tw: 0.0,
th: 1.0,
tfs: 0.0,
trise: 0.0,
leading: 0.0,
font: String::new(),
}
}
}
fn show_string(
bytes: &[u8],
ts: &mut TextState,
ctm: &Matrix,
fonts: &dyn FontMetrics,
regions: &RegionSet,
min_padding: f32,
) -> TextPruneResult {
let fid = font_id(&ts.font);
let (ascent, descent) = fonts.ascent_descent(&ts.font);
let decoded = fonts.decode(&ts.font, bytes);
let mut glyphs: Vec<Glyph> = Vec::with_capacity(decoded.len());
for (code, src) in decoded {
let trm = text_rendering_matrix(ts.tfs, ts.th, ts.trise, &ts.tm, ctm);
let w0 = fonts.width(&ts.font, code) / 1000.0;
let bbox = glyph_box(w0, ascent, descent, &trm);
glyphs.push(Glyph {
bytes: src,
bbox,
render_matrix: [ts.tm.a, ts.tm.b, ts.tm.c, ts.tm.d, ts.tm.e, ts.tm.f],
code: (fid, code),
});
let tw = if fonts.is_word_space(&ts.font, code) {
ts.tw
} else {
0.0
};
let tx = (w0 * ts.tfs + ts.tc + tw) * ts.th;
ts.tm = Matrix {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: tx,
f: 0.0,
}
.multiply(&ts.tm);
}
prune_run(&glyphs, regions, min_padding)
}
fn emit_runs(out: &mut Vec<Operator>, res: &TextPruneResult) {
for run in &res.runs {
let [a, b, c, d, e, f] = run.anchor;
out.push(Operator::Tm { a, b, c, d, e, f });
out.push(Operator::Tj {
text: run.bytes.clone(),
});
}
}
pub fn redact_text_stream(
ops: &[Operator],
regions: &RegionSet,
min_padding: f32,
fonts: &dyn FontMetrics,
) -> TextEngineResult {
let mut stack = GraphicsStateStack::new();
let mut ts = TextState::default();
let mut out: Vec<Operator> = Vec::with_capacity(ops.len());
let mut result = TextEngineResult::default();
for op in ops {
match op {
Operator::SaveState | Operator::RestoreState | Operator::Cm { .. } => {
super::classify::apply_ctm(&mut stack, op);
out.push(op.clone());
},
Operator::BeginText => {
ts.tm = Matrix::identity();
ts.tlm = Matrix::identity();
out.push(op.clone());
},
Operator::EndText => out.push(op.clone()),
Operator::Tf { font, size } => {
ts.font = font.clone();
ts.tfs = *size;
out.push(op.clone());
},
Operator::Tc { char_space } => {
ts.tc = *char_space;
out.push(op.clone());
},
Operator::Tw { word_space } => {
ts.tw = *word_space;
out.push(op.clone());
},
Operator::Tz { scale } => {
ts.th = *scale / 100.0;
out.push(op.clone());
},
Operator::TL { leading } => {
ts.leading = *leading;
out.push(op.clone());
},
Operator::Ts { rise } => {
ts.trise = *rise;
out.push(op.clone());
},
Operator::Td { tx, ty } => {
ts.tlm = Matrix {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: *tx,
f: *ty,
}
.multiply(&ts.tlm);
ts.tm = ts.tlm;
out.push(op.clone());
},
Operator::TD { tx, ty } => {
ts.leading = -*ty;
ts.tlm = Matrix {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: *tx,
f: *ty,
}
.multiply(&ts.tlm);
ts.tm = ts.tlm;
out.push(op.clone());
},
Operator::Tm { a, b, c, d, e, f } => {
let m = Matrix {
a: *a,
b: *b,
c: *c,
d: *d,
e: *e,
f: *f,
};
ts.tm = m;
ts.tlm = m;
out.push(op.clone());
},
Operator::TStar => {
ts.tlm = Matrix {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: 0.0,
f: -ts.leading,
}
.multiply(&ts.tlm);
ts.tm = ts.tlm;
out.push(op.clone());
},
Operator::Tj { text } => {
if refuse_unsupported(fonts, &ts.font, regions, &mut result, &mut out, op) {
continue;
}
let ctm = stack.current().ctm;
let res = show_string(text, &mut ts, &ctm, fonts, regions, min_padding);
account(&mut result, text.len(), &res);
emit_runs(&mut out, &res);
},
Operator::Quote { text } => {
if refuse_unsupported(fonts, &ts.font, regions, &mut result, &mut out, op) {
continue;
}
ts.tlm = Matrix {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: 0.0,
f: -ts.leading,
}
.multiply(&ts.tlm);
ts.tm = ts.tlm;
let ctm = stack.current().ctm;
let res = show_string(text, &mut ts, &ctm, fonts, regions, min_padding);
account(&mut result, text.len(), &res);
emit_runs(&mut out, &res);
},
Operator::DoubleQuote {
word_space,
char_space,
text,
} => {
if refuse_unsupported(fonts, &ts.font, regions, &mut result, &mut out, op) {
continue;
}
ts.tw = *word_space;
ts.tc = *char_space;
ts.tlm = Matrix {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: 0.0,
f: -ts.leading,
}
.multiply(&ts.tlm);
ts.tm = ts.tlm;
let ctm = stack.current().ctm;
let res = show_string(text, &mut ts, &ctm, fonts, regions, min_padding);
account(&mut result, text.len(), &res);
emit_runs(&mut out, &res);
},
Operator::TJ { array } => {
if refuse_unsupported(fonts, &ts.font, regions, &mut result, &mut out, op) {
continue;
}
let ctm = stack.current().ctm;
let mut any_removed = false;
let mut tj_orig = 0usize;
let mut survived_runs = TextPruneResult::default();
for el in array {
match el {
TextElement::String(s) => {
tj_orig += s.len();
let r = show_string(s, &mut ts, &ctm, fonts, regions, min_padding);
if r.glyphs_removed > 0 {
any_removed = true;
}
for c in &r.removed_codes {
if !survived_runs.removed_codes.contains(c) {
survived_runs.removed_codes.push(*c);
}
}
survived_runs.glyphs_removed += r.glyphs_removed;
survived_runs.runs.extend(r.runs);
},
TextElement::Offset(off) => {
let dx = (-*off / 1000.0) * ts.tfs * ts.th;
ts.tm = Matrix {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: dx,
f: 0.0,
}
.multiply(&ts.tm);
},
}
}
account(&mut result, tj_orig, &survived_runs);
if any_removed {
emit_runs(&mut out, &survived_runs);
} else {
out.push(op.clone());
}
},
other => out.push(other.clone()),
}
}
result.operators = out;
result
}
fn refuse_unsupported(
fonts: &dyn FontMetrics,
font: &str,
regions: &RegionSet,
result: &mut TextEngineResult,
out: &mut Vec<Operator>,
op: &Operator,
) -> bool {
if !regions.is_empty() && !fonts.is_simple(font) {
result.unsupported_font = true;
out.push(op.clone());
true
} else {
false
}
}
fn account(result: &mut TextEngineResult, orig_len: usize, res: &TextPruneResult) {
result.glyphs_removed += res.glyphs_removed;
let kept: usize = res.runs.iter().map(|r| r.bytes.len()).sum();
result.bytes_removed += orig_len.saturating_sub(kept) as u64;
for c in &res.removed_codes {
if !result.removed_codes.contains(c) {
result.removed_codes.push(*c);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::redaction::region::{RedactionRegion, RegionSet, DEFAULT_EDGE_PADDING};
struct Stub;
impl FontMetrics for Stub {
fn width(&self, _f: &str, _c: u32) -> f32 {
500.0
}
}
struct CompositeStub;
impl FontMetrics for CompositeStub {
fn width(&self, _f: &str, _c: u32) -> f32 {
500.0
}
fn is_simple(&self, _f: &str) -> bool {
false
}
}
fn regions(x0: f32, y0: f32, x1: f32, y1: f32) -> RegionSet {
let mut rs = RegionSet::new(0);
rs.push(RedactionRegion::from_rect(x0, y0, x1, y1, None));
rs
}
fn doc(tm: [f32; 6], text: &[u8]) -> Vec<Operator> {
vec![
Operator::BeginText,
Operator::Tf {
font: "F1".into(),
size: 10.0,
},
Operator::Tm {
a: tm[0],
b: tm[1],
c: tm[2],
d: tm[3],
e: tm[4],
f: tm[5],
},
Operator::Tj {
text: text.to_vec(),
},
Operator::EndText,
]
}
fn tj_text(ops: &[Operator]) -> Vec<Vec<u8>> {
ops.iter()
.filter_map(|o| match o {
Operator::Tj { text } => Some(text.clone()),
_ => None,
})
.collect()
}
#[test]
fn text_fully_in_region_is_removed_no_offset() {
let ops = doc([1.0, 0.0, 0.0, 1.0, 100.0, 100.0], b"SECRET");
let r = regions(90.0, 95.0, 140.0, 115.0);
let out = redact_text_stream(&ops, &r, DEFAULT_EDGE_PADDING, &Stub);
assert_eq!(out.glyphs_removed, 6);
assert!(tj_text(&out.operators).is_empty());
assert!(!out
.operators
.iter()
.any(|o| matches!(o, Operator::TJ { .. })));
assert!(matches!(out.operators.first(), Some(Operator::BeginText)));
assert!(matches!(out.operators.last(), Some(Operator::EndText)));
}
#[test]
fn text_fully_outside_is_untouched() {
let ops = doc([1.0, 0.0, 0.0, 1.0, 100.0, 100.0], b"PUBLIC");
let r = regions(0.0, 0.0, 10.0, 10.0);
let out = redact_text_stream(&ops, &r, DEFAULT_EDGE_PADDING, &Stub);
assert_eq!(out.glyphs_removed, 0);
assert_eq!(tj_text(&out.operators), vec![b"PUBLIC".to_vec()]);
}
#[test]
fn straddle_splits_and_reanchors_absolutely() {
let ops = doc([1.0, 0.0, 0.0, 1.0, 100.0, 100.0], b"PUBxSECRET");
let r = regions(120.0, 95.0, 400.0, 115.0);
let out = redact_text_stream(&ops, &r, DEFAULT_EDGE_PADDING, &Stub);
assert!(out.glyphs_removed >= 7, "removed={}", out.glyphs_removed);
let surv = tj_text(&out.operators);
assert_eq!(surv, vec![b"PUB".to_vec()]);
let tm = out
.operators
.iter()
.find_map(|o| match o {
Operator::Tm { e, f, .. } => Some((*e, *f)),
_ => None,
})
.unwrap();
assert!((tm.0 - 100.0).abs() < 1e-3 && (tm.1 - 100.0).abs() < 1e-3);
}
#[test]
fn ctm_scaled_block_still_caught_no_under_redaction() {
let mut ops = vec![
Operator::SaveState,
Operator::Cm {
a: 10.0,
b: 0.0,
c: 0.0,
d: 10.0,
e: 0.0,
f: 0.0,
},
];
ops.extend(doc([1.0, 0.0, 0.0, 1.0, 10.0, 10.0], b"HIDE"));
ops.push(Operator::RestoreState);
let r = regions(90.0, 90.0, 300.0, 300.0);
let out = redact_text_stream(&ops, &r, DEFAULT_EDGE_PADDING, &Stub);
assert_eq!(out.glyphs_removed, 4);
assert!(tj_text(&out.operators).is_empty());
assert!(matches!(out.operators.first(), Some(Operator::SaveState)));
assert!(matches!(out.operators.last(), Some(Operator::RestoreState)));
}
#[test]
fn tj_array_with_offsets_redacted_drops_deltas() {
let ops = vec![
Operator::BeginText,
Operator::Tf {
font: "F1".into(),
size: 10.0,
},
Operator::Tm {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: 100.0,
f: 100.0,
},
Operator::TJ {
array: vec![
TextElement::String(b"AB".to_vec()),
TextElement::Offset(-200.0),
TextElement::String(b"CD".to_vec()),
],
},
Operator::EndText,
];
let r = regions(90.0, 95.0, 300.0, 115.0);
let out = redact_text_stream(&ops, &r, DEFAULT_EDGE_PADDING, &Stub);
assert_eq!(out.glyphs_removed, 4);
assert!(!out
.operators
.iter()
.any(|o| matches!(o, Operator::TJ { .. })));
assert!(tj_text(&out.operators).is_empty());
}
#[test]
fn untouched_tj_array_emitted_byte_identical() {
let ops = vec![
Operator::BeginText,
Operator::Tf {
font: "F1".into(),
size: 10.0,
},
Operator::Tm {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: 100.0,
f: 100.0,
},
Operator::TJ {
array: vec![
TextElement::String(b"AB".to_vec()),
TextElement::Offset(-200.0),
TextElement::String(b"CD".to_vec()),
],
},
Operator::EndText,
];
let r = regions(0.0, 0.0, 5.0, 5.0); let out = redact_text_stream(&ops, &r, DEFAULT_EDGE_PADDING, &Stub);
assert_eq!(out.glyphs_removed, 0);
let tj = out
.operators
.iter()
.filter(|o| matches!(o, Operator::TJ { .. }))
.count();
assert_eq!(tj, 1);
}
#[test]
fn composite_font_with_regions_refuses_and_keeps_original() {
let ops = doc([1.0, 0.0, 0.0, 1.0, 100.0, 100.0], b"SECRET");
let r = regions(0.0, 0.0, 1000.0, 1000.0);
let out = redact_text_stream(&ops, &r, DEFAULT_EDGE_PADDING, &CompositeStub);
assert!(out.unsupported_font, "must flag refusal");
assert_eq!(out.glyphs_removed, 0);
assert_eq!(tj_text(&out.operators), vec![b"SECRET".to_vec()]);
}
#[test]
fn composite_font_without_regions_is_not_a_refusal() {
let ops = doc([1.0, 0.0, 0.0, 1.0, 100.0, 100.0], b"hello");
let out =
redact_text_stream(&ops, &RegionSet::new(0), DEFAULT_EDGE_PADDING, &CompositeStub);
assert!(!out.unsupported_font);
assert_eq!(tj_text(&out.operators), vec![b"hello".to_vec()]);
}
#[test]
fn empty_and_no_region_are_safe() {
let out = redact_text_stream(&[], &RegionSet::new(0), DEFAULT_EDGE_PADDING, &Stub);
assert_eq!(out, TextEngineResult::default());
let ops = doc([1.0, 0.0, 0.0, 1.0, 10.0, 10.0], b"abc");
let out = redact_text_stream(&ops, &RegionSet::new(0), DEFAULT_EDGE_PADDING, &Stub);
assert_eq!(out.glyphs_removed, 0);
assert_eq!(tj_text(&out.operators), vec![b"abc".to_vec()]);
}
#[test]
fn text_rendering_matrix_matches_spec_9_4_4() {
let trm = text_rendering_matrix(
12.0,
1.0,
0.0,
&Matrix {
a: 1.0,
b: 0.0,
c: 0.0,
d: 1.0,
e: 50.0,
f: 60.0,
},
&Matrix::identity(),
);
let o = trm.transform_point(0.0, 0.0);
let x1 = trm.transform_point(1.0, 0.0);
assert!((o.x - 50.0).abs() < 1e-3 && (o.y - 60.0).abs() < 1e-3);
assert!((x1.x - 62.0).abs() < 1e-3 && (x1.y - 60.0).abs() < 1e-3);
}
#[test]
fn unbalanced_q_and_malformed_do_not_panic() {
let ops = vec![
Operator::RestoreState,
Operator::Tj {
text: b"x".to_vec(),
},
Operator::RestoreState,
];
let r = regions(0.0, 0.0, 1000.0, 1000.0);
let _ = redact_text_stream(&ops, &r, DEFAULT_EDGE_PADDING, &Stub);
}
}